From ea5dba8e0d0fa7ddc5304dbf15cf78a0ec643091 Mon Sep 17 00:00:00 2001 From: Aleksei Pavliukov Date: Sat, 29 Sep 2018 17:22:53 +0300 Subject: [PATCH] Implemented : PDFs optimization --- pdf/core/primitives.go | 67 ++ pdf/creator/creator.go | 18 +- pdf/creator/creator_test.go | 821 ++++++++++++++++++ pdf/model/colorspace.go | 6 +- pdf/model/optimize/chain.go | 34 + .../combine_duplicate_direct_objects.go | 71 ++ .../optimize/combine_duplicate_streams.go | 53 ++ .../combine_identical_indirect_objects.go | 64 ++ pdf/model/optimize/compress_streams.go | 45 + pdf/model/optimize/image.go | 138 +++ pdf/model/optimize/image_ppi.go | 203 +++++ pdf/model/optimize/object_streams.go | 40 + pdf/model/optimize/optimizer.go | 84 ++ pdf/model/optimize/options.go | 17 + pdf/model/optimizer.go | 18 + pdf/model/writer.go | 309 ++++++- 16 files changed, 1954 insertions(+), 34 deletions(-) create mode 100644 pdf/model/optimize/chain.go create mode 100644 pdf/model/optimize/combine_duplicate_direct_objects.go create mode 100644 pdf/model/optimize/combine_duplicate_streams.go create mode 100644 pdf/model/optimize/combine_identical_indirect_objects.go create mode 100644 pdf/model/optimize/compress_streams.go create mode 100644 pdf/model/optimize/image.go create mode 100644 pdf/model/optimize/image_ppi.go create mode 100644 pdf/model/optimize/object_streams.go create mode 100644 pdf/model/optimize/optimizer.go create mode 100644 pdf/model/optimize/options.go create mode 100644 pdf/model/optimizer.go diff --git a/pdf/core/primitives.go b/pdf/core/primitives.go index d7db7d69..fe98a8a4 100644 --- a/pdf/core/primitives.go +++ b/pdf/core/primitives.go @@ -74,6 +74,13 @@ type PdfObjectStream struct { Stream []byte } +// PdfObjectStreams represents the primitive PDF object streams. +// 7.5.7 Object Streams (page 45). +type PdfObjectStreams struct { + PdfObjectReference + vec []PdfObject +} + // MakeDict creates and returns an empty PdfObjectDictionary. func MakeDict() *PdfObjectDictionary { d := &PdfObjectDictionary{} @@ -203,6 +210,16 @@ func MakeStream(contents []byte, encoder StreamEncoder) (*PdfObjectStream, error return stream, nil } +// MakeObjectStreams creates an PdfObjectStreams from a list of PdfObjects. +func MakeObjectStreams(objects ...PdfObject) *PdfObjectStreams { + streams := &PdfObjectStreams{} + streams.vec = []PdfObject{} + for _, obj := range objects { + streams.vec = append(streams.vec, obj) + } + return streams +} + func (bool *PdfObjectBool) String() string { if *bool { return "true" @@ -848,3 +865,53 @@ func GetStream(obj PdfObject) (stream *PdfObjectStream, found bool) { stream, found = obj.(*PdfObjectStream) return stream, found } + +// GetObjectStreams returns the *PdfObjectStreams represented by the PdfObject. On type mismatch the found bool flag is +// false and a nil pointer is returned. +func GetObjectStreams(obj PdfObject) (objStream *PdfObjectStreams, found bool) { + objStream, found = obj.(*PdfObjectStreams) + return objStream, found +} + +// Append appends PdfObject(s) to the streams. +func (streams *PdfObjectStreams) Append(objects ...PdfObject) { + if streams == nil { + common.Log.Debug("Warn - Attempt to append to a nil streams") + return + } + if streams.vec == nil { + streams.vec = []PdfObject{} + } + + for _, obj := range objects { + streams.vec = append(streams.vec, obj) + } +} + +// Elements returns a slice of the PdfObject elements in the array. +// Preferred over accessing the array directly as type may be changed in future major versions (v3). +func (streams *PdfObjectStreams) Elements() []PdfObject { + if streams == nil { + return nil + } + return streams.vec +} + +// String returns a string describing `streams`. +func (streams *PdfObjectStreams) String() string { + return fmt.Sprintf("Object stream %d", streams.ObjectNumber) +} + +// Len returns the number of elements in the streams. +func (streams *PdfObjectStreams) Len() int { + if streams == nil { + return 0 + } + return len(streams.vec) +} + +// DefaultWriteString outputs the object as it is to be written to file. +func (streams *PdfObjectStreams) DefaultWriteString() string { + outStr := fmt.Sprintf("%d 0 R", (*streams).ObjectNumber) + return outStr +} diff --git a/pdf/creator/creator.go b/pdf/creator/creator.go index 6d9cf7ac..b14ade55 100644 --- a/pdf/creator/creator.go +++ b/pdf/creator/creator.go @@ -44,6 +44,8 @@ type Creator struct { // Forms. acroForm *model.PdfAcroForm + + optimizer model.Optimizer } // SetForms adds an Acroform to a PDF file. Sets the specified form for writing. @@ -101,6 +103,16 @@ func New() *Creator { return c } +// SetOptimizer sets the optimizer to optimize PDF before writing. +func (c *Creator) SetOptimizer(optimizer model.Optimizer) { + c.optimizer = optimizer +} + +// GetOptimizer returns current PDF optimizer. +func (c *Creator) GetOptimizer() model.Optimizer { + return c.optimizer +} + // SetPageMargins sets the page margins: left, right, top, bottom. // The default page margins are 10% of document width. func (c *Creator) SetPageMargins(left, right, top, bottom float64) { @@ -459,13 +471,15 @@ func (c *Creator) Draw(d Drawable) error { return nil } -// Write output of creator to io.WriteSeeker interface. -func (c *Creator) Write(ws io.WriteSeeker) error { +// Write output of creator to io.Writer interface. +func (c *Creator) Write(ws io.Writer) error { if !c.finalized { c.finalize() } pdfWriter := model.NewPdfWriter() + pdfWriter.SetOptimizer(c.optimizer) + // Form fields. if c.acroForm != nil { err := pdfWriter.SetForms(c.acroForm) diff --git a/pdf/creator/creator_test.go b/pdf/creator/creator_test.go index 7557e26f..430bda86 100644 --- a/pdf/creator/creator_test.go +++ b/pdf/creator/creator_test.go @@ -14,6 +14,7 @@ import ( goimage "image" "io/ioutil" "math" + "os" "testing" "github.com/boombuler/barcode" @@ -22,6 +23,7 @@ import ( "github.com/unidoc/unidoc/pdf/contentstream/draw" "github.com/unidoc/unidoc/pdf/core" "github.com/unidoc/unidoc/pdf/model" + "github.com/unidoc/unidoc/pdf/model/optimize" "github.com/unidoc/unidoc/pdf/model/textencoding" ) @@ -2133,3 +2135,822 @@ func TestEncrypting1(t *testing.T) { return } } + +// TestOptimizeCombineDuplicateStreams tests optimizing PDFs to reduce output file size. +func TestOptimizeCombineDuplicateStreams(t *testing.T) { + c := createPdf4Optimization(t) + + err := c.WriteToFile("/tmp/7_combine_duplicate_streams_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + c = createPdf4Optimization(t) + + c.SetOptimizer(optimize.New(optimize.Options{CombineDuplicateStreams: true})) + + err = c.WriteToFile("/tmp/7_combine_duplicate_streams_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + fileInfo, err := os.Stat("/tmp/7_combine_duplicate_streams_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + fileInfoOptimized, err := os.Stat("/tmp/7_combine_duplicate_streams_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + if fileInfoOptimized.Size() >= fileInfo.Size() { + t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size()) + } +} + +// TestOptimizeImageQuality tests optimizing PDFs to reduce output file size. +func TestOptimizeImageQuality(t *testing.T) { + c := New() + + imgDataJpeg, err := ioutil.ReadFile(testImageFile1) + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + imgJpeg, err := NewImageFromData(imgDataJpeg) + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + // JPEG encoder (DCT) with quality factor 70. + encoder := core.NewDCTEncoder() + encoder.Quality = 100 + encoder.Width = int(imgJpeg.Width()) + encoder.Height = int(imgJpeg.Height()) + imgJpeg.SetEncoder(encoder) + + imgJpeg.SetPos(250, 350) + imgJpeg.Scale(0.25, 0.25) + + err = c.Draw(imgJpeg) + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + err = c.WriteToFile("/tmp/8_image_quality_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + c.SetOptimizer(optimize.New(optimize.Options{ImageQuality: 20})) + + err = c.WriteToFile("/tmp/8_image_quality_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + fileInfo, err := os.Stat("/tmp/8_image_quality_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + fileInfoOptimized, err := os.Stat("/tmp/8_image_quality_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + if fileInfoOptimized.Size() >= fileInfo.Size() { + t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size()) + } +} + +func createPdf4Optimization(t *testing.T) *Creator { + c := New() + + p := NewParagraph("Test text1") + // Change to times bold font (default is helvetica). + font, err := model.NewStandard14Font(model.CourierBold) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + return nil + } + p.SetFont(font) + p.SetPos(15, 15) + _ = c.Draw(p) + + imgData, err := ioutil.ReadFile(testImageFile1) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + return nil + } + + img, err := NewImageFromData(imgData) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + return nil + } + + img.SetPos(0, 100) + img.ScaleToWidth(1.0 * c.Width()) + + err = c.Draw(img) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + return nil + } + + img1, err := NewImageFromData(imgData) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + return nil + } + + img1.SetPos(0, 200) + img1.ScaleToWidth(1.0 * c.Width()) + + err = c.Draw(img1) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + return nil + } + + imgData2, err := ioutil.ReadFile(testImageFile1) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + return nil + } + + img2, err := NewImageFromData(imgData2) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + return nil + } + + img2.SetPos(0, 500) + img2.ScaleToWidth(1.0 * c.Width()) + + c.NewPage() + p = NewParagraph("Test text2") + // Change to times bold font (default is helvetica). + font, err = model.NewStandard14Font(model.Helvetica) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + return nil + } + p.SetFont(font) + p.SetPos(15, 15) + _ = c.Draw(p) + + err = c.Draw(img2) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + return nil + } + + return c +} + +// TestOptimizeUseObjectStreams tests optimizing PDFs to reduce output file size. +func TestOptimizeUseObjectStreams(t *testing.T) { + c := createPdf4Optimization(t) + + err := c.WriteToFile("/tmp/9_use_object_streams_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + c = createPdf4Optimization(t) + c.SetOptimizer(optimize.New(optimize.Options{UseObjectStreams: true})) + + err = c.WriteToFile("/tmp/9_use_object_streams_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + fileInfo, err := os.Stat("/tmp/9_use_object_streams_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + fileInfoOptimized, err := os.Stat("/tmp/9_use_object_streams_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + if fileInfoOptimized.Size() >= fileInfo.Size() { + t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size()) + } +} + +// TestCombineDuplicateDirectObjects tests optimizing PDFs to reduce output file size. +func TestCombineDuplicateDirectObjects(t *testing.T) { + + createDoc := func() *Creator { + c := New() + + ch1 := c.NewChapter("Introduction") + subchap1 := c.NewSubchapter(ch1, "The fundamentals") + subchap1.SetMargins(0, 0, 5, 0) + + //subCh1 := NewSubchapter(ch1, "Workflow") + + p := NewParagraph("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt " + + "ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut " + + "aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore " + + "eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt " + + "mollit anim id est laborum.") + p.SetTextAlignment(TextAlignmentJustify) + p.SetMargins(0, 0, 5, 0) + for j := 0; j < 7; j++ { + subchap1.Add(p) + } + + subchap2 := c.NewSubchapter(ch1, "Mechanism") + subchap2.SetMargins(0, 0, 5, 0) + for j := 0; j < 15; j++ { + subchap2.Add(p) + } + + subchap3 := c.NewSubchapter(ch1, "Discussion") + subchap3.SetMargins(0, 0, 5, 0) + for j := 0; j < 19; j++ { + subchap3.Add(p) + } + + subchap4 := c.NewSubchapter(ch1, "Conclusion") + subchap4.SetMargins(0, 0, 5, 0) + for j := 0; j < 23; j++ { + subchap4.Add(p) + } + + c.Draw(ch1) + + for i := 0; i < 50; i++ { + ch2 := c.NewChapter("References") + for j := 0; j < 13; j++ { + ch2.Add(p) + } + + c.Draw(ch2) + } + + // Set a function to create the front Page. + c.CreateFrontPage(func(args FrontpageFunctionArgs) { + p := NewParagraph("Example Report") + p.SetWidth(c.Width()) + p.SetTextAlignment(TextAlignmentCenter) + p.SetFontSize(32) + p.SetPos(0, 300) + c.Draw(p) + + p.SetFontSize(22) + p.SetText("Example Report Data Results") + p.SetPos(0, 340) + c.Draw(p) + }) + + // Set a function to create the table of contents. + c.CreateTableOfContents(func(toc *TableOfContents) (*Chapter, error) { + ch := c.NewChapter("Table of contents") + ch.GetHeading().SetColor(ColorRGBFromArithmetic(0.5, 0.5, 0.5)) + ch.GetHeading().SetFontSize(28) + ch.GetHeading().SetMargins(0, 0, 0, 30) + + table := NewTable(2) + // Default, equal column sizes (4x0.25)... + table.SetColumnWidths(0.9, 0.1) + + for _, entry := range toc.entries { + // Col 1. Chapter number, title. + var str string + if entry.Subchapter == 0 { + str = fmt.Sprintf("%d. %s", entry.Chapter, entry.Title) + } else { + str = fmt.Sprintf(" %d.%d. %s", entry.Chapter, entry.Subchapter, entry.Title) + } + p := NewParagraph(str) + p.SetFontSize(14) + cell := table.NewCell() + cell.SetContent(p) + // Set the paragraph width to the cell width. + p.SetWidth(cell.Width(c.Context())) + table.SetRowHeight(table.CurRow(), p.Height()*1.2) + + // Col 1. Page number. + p = NewParagraph(fmt.Sprintf("%d", entry.PageNumber)) + p.SetFontSize(14) + cell = table.NewCell() + cell.SetContent(p) + } + err := ch.Add(table) + if err != nil { + fmt.Printf("Error adding table: %v\n", err) + return nil, err + } + + return ch, nil + }) + + addHeadersAndFooters(c) + return c + } + + c := createDoc() + + err := c.WriteToFile("/tmp/10_combine_duplicate_direct_objects_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + c = createDoc() + c.SetOptimizer(optimize.New(optimize.Options{CombineDuplicateDirectObjects: true})) + + err = c.WriteToFile("/tmp/10_combine_duplicate_direct_objects_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + fileInfo, err := os.Stat("/tmp/10_combine_duplicate_direct_objects_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + fileInfoOptimized, err := os.Stat("/tmp/10_combine_duplicate_direct_objects_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + if fileInfoOptimized.Size() >= fileInfo.Size() { + t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size()) + } +} + +// TestOptimizeImagePPI tests optimizing PDFs to reduce output file size. +func TestOptimizeImagePPI(t *testing.T) { + c := New() + + imgDataJpeg, err := ioutil.ReadFile(testImageFile1) + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + imgJpeg, err := NewImageFromData(imgDataJpeg) + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + // JPEG encoder (DCT) with quality factor 100. + encoder := core.NewDCTEncoder() + encoder.Quality = 100 + encoder.Width = int(imgJpeg.Width()) + encoder.Height = int(imgJpeg.Height()) + imgJpeg.SetEncoder(encoder) + + imgJpeg.SetPos(250, 350) + imgJpeg.Scale(0.25, 0.25) + + err = c.Draw(imgJpeg) + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + c.NewPage() + + imgData, err := ioutil.ReadFile(testImageFile1) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + } + + img, err := NewImageFromData(imgData) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + } + + img.SetPos(0, 100) + img.ScaleToWidth(0.1 * c.Width()) + + err = c.Draw(img) + if err != nil { + t.Errorf("Fail: %v\n", err) + t.FailNow() + } + + err = c.Draw(imgJpeg) + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + err = c.WriteToFile("/tmp/11_image_ppi_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + c.SetOptimizer(optimize.New(optimize.Options{ImageUpperPPI: 144})) + + err = c.WriteToFile("/tmp/11_image_ppi_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + fileInfo, err := os.Stat("/tmp/11_image_ppi_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + fileInfoOptimized, err := os.Stat("/tmp/11_image_ppi_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + if fileInfoOptimized.Size() >= fileInfo.Size() { + t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size()) + } +} + +// TestCombineIdenticalIndirectObjects tests optimizing PDFs to reduce output file size. +func TestCombineIdenticalIndirectObjects(t *testing.T) { + c := New() + + ch1 := c.NewChapter("Introduction") + subchap1 := c.NewSubchapter(ch1, "The fundamentals") + subchap1.SetMargins(0, 0, 5, 0) + + //subCh1 := NewSubchapter(ch1, "Workflow") + + p := NewParagraph("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt " + + "ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut " + + "aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore " + + "eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt " + + "mollit anim id est laborum.") + p.SetTextAlignment(TextAlignmentJustify) + p.SetMargins(0, 0, 5, 0) + for j := 0; j < 5; j++ { + subchap1.Add(p) + } + + subchap2 := c.NewSubchapter(ch1, "Mechanism") + subchap2.SetMargins(0, 0, 5, 0) + for j := 0; j < 15; j++ { + subchap2.Add(p) + } + + subchap3 := c.NewSubchapter(ch1, "Discussion") + subchap3.SetMargins(0, 0, 5, 0) + for j := 0; j < 19; j++ { + subchap3.Add(p) + } + + subchap4 := c.NewSubchapter(ch1, "Conclusion") + subchap4.SetMargins(0, 0, 5, 0) + for j := 0; j < 23; j++ { + subchap4.Add(p) + } + + c.Draw(ch1) + + for i := 0; i < 50; i++ { + ch2 := c.NewChapter("References") + for j := 0; j < 13; j++ { + ch2.Add(p) + } + + c.Draw(ch2) + } + + // Set a function to create the front Page. + c.CreateFrontPage(func(args FrontpageFunctionArgs) { + p := NewParagraph("Example Report") + p.SetWidth(c.Width()) + p.SetTextAlignment(TextAlignmentCenter) + p.SetFontSize(32) + p.SetPos(0, 300) + c.Draw(p) + + p.SetFontSize(22) + p.SetText("Example Report Data Results") + p.SetPos(0, 340) + c.Draw(p) + }) + + // Set a function to create the table of contents. + c.CreateTableOfContents(func(toc *TableOfContents) (*Chapter, error) { + ch := c.NewChapter("Table of contents") + ch.GetHeading().SetColor(ColorRGBFromArithmetic(0.5, 0.5, 0.5)) + ch.GetHeading().SetFontSize(28) + ch.GetHeading().SetMargins(0, 0, 0, 30) + + table := NewTable(2) + // Default, equal column sizes (4x0.25)... + table.SetColumnWidths(0.9, 0.1) + + for _, entry := range toc.entries { + // Col 1. Chapter number, title. + var str string + if entry.Subchapter == 0 { + str = fmt.Sprintf("%d. %s", entry.Chapter, entry.Title) + } else { + str = fmt.Sprintf(" %d.%d. %s", entry.Chapter, entry.Subchapter, entry.Title) + } + p := NewParagraph(str) + p.SetFontSize(14) + cell := table.NewCell() + cell.SetContent(p) + // Set the paragraph width to the cell width. + p.SetWidth(cell.Width(c.Context())) + table.SetRowHeight(table.CurRow(), p.Height()*1.2) + + // Col 1. Page number. + p = NewParagraph(fmt.Sprintf("%d", entry.PageNumber)) + p.SetFontSize(14) + cell = table.NewCell() + cell.SetContent(p) + } + err := ch.Add(table) + if err != nil { + fmt.Printf("Error adding table: %v\n", err) + return nil, err + } + + return ch, nil + }) + + addHeadersAndFooters(c) + + err := c.WriteToFile("/tmp/12_identical_indirect_objects_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + c.SetOptimizer(optimize.New(optimize.Options{CombineIdenticalIndirectObjects: true})) + + err = c.WriteToFile("/tmp/12_identical_indirect_objects_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + fileInfo, err := os.Stat("/tmp/12_identical_indirect_objects_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + fileInfoOptimized, err := os.Stat("/tmp/12_identical_indirect_objects_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + if fileInfoOptimized.Size() >= fileInfo.Size() { + t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size()) + } +} + +// TestCompressStreams tests optimizing PDFs to reduce output file size. +func TestCompressStreams(t *testing.T) { + + createDoc := func() *Creator { + c := New() + p := NewParagraph("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt" + + "ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut " + + "aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore" + + "eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt " + + "mollit anim id est laborum.") + + p.SetMargins(0, 0, 5, 0) + c.Draw(p) + //c.NewPage() + + page := c.pages[0] + page.AddContentStreamByString(`BT +/Arial 56 Tf +20 600 Td +(The multiline example text)Tj +/Arial 30 Tf +0 30 Td +60 TL +(example text)' +(example text)' +(example text)' +(example text)' +(example text)' +(example text)' +(example text)' +(example text)' +ET`) + return c + } + + c := createDoc() + + err := c.WriteToFile("/tmp/13_compress_streams_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + c = createDoc() + c.SetOptimizer(optimize.New(optimize.Options{CompressStreams: true})) + + err = c.WriteToFile("/tmp/13_compress_streams_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + fileInfo, err := os.Stat("/tmp/13_compress_streams_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + fileInfoOptimized, err := os.Stat("/tmp/13_compress_streams_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + if fileInfoOptimized.Size() >= fileInfo.Size() { + t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size()) + } +} + +// TestAllOptimizations tests optimizing PDFs to reduce output file size. +func TestAllOptimizations(t *testing.T) { + + createDoc := func() *Creator { + c := New() + + ch1 := c.NewChapter("Introduction") + subchap1 := c.NewSubchapter(ch1, "The fundamentals") + subchap1.SetMargins(0, 0, 5, 0) + + //subCh1 := NewSubchapter(ch1, "Workflow") + + p := NewParagraph("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt " + + "ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut " + + "aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore " + + "eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt " + + "mollit anim id est laborum.") + p.SetTextAlignment(TextAlignmentJustify) + p.SetMargins(0, 0, 5, 0) + for j := 0; j < 7; j++ { + subchap1.Add(p) + } + + subchap2 := c.NewSubchapter(ch1, "Mechanism") + subchap2.SetMargins(0, 0, 5, 0) + for j := 0; j < 15; j++ { + subchap2.Add(p) + } + + subchap3 := c.NewSubchapter(ch1, "Discussion") + subchap3.SetMargins(0, 0, 5, 0) + for j := 0; j < 19; j++ { + subchap3.Add(p) + } + + subchap4 := c.NewSubchapter(ch1, "Conclusion") + subchap4.SetMargins(0, 0, 5, 0) + for j := 0; j < 23; j++ { + subchap4.Add(p) + } + + c.Draw(ch1) + + for i := 0; i < 50; i++ { + ch2 := c.NewChapter("References") + for j := 0; j < 13; j++ { + ch2.Add(p) + } + + c.Draw(ch2) + } + + // Set a function to create the front Page. + c.CreateFrontPage(func(args FrontpageFunctionArgs) { + p := NewParagraph("Example Report") + p.SetWidth(c.Width()) + p.SetTextAlignment(TextAlignmentCenter) + p.SetFontSize(32) + p.SetPos(0, 300) + c.Draw(p) + + p.SetFontSize(22) + p.SetText("Example Report Data Results") + p.SetPos(0, 340) + c.Draw(p) + }) + + // Set a function to create the table of contents. + c.CreateTableOfContents(func(toc *TableOfContents) (*Chapter, error) { + ch := c.NewChapter("Table of contents") + ch.GetHeading().SetColor(ColorRGBFromArithmetic(0.5, 0.5, 0.5)) + ch.GetHeading().SetFontSize(28) + ch.GetHeading().SetMargins(0, 0, 0, 30) + + table := NewTable(2) + // Default, equal column sizes (4x0.25)... + table.SetColumnWidths(0.9, 0.1) + + for _, entry := range toc.entries { + // Col 1. Chapter number, title. + var str string + if entry.Subchapter == 0 { + str = fmt.Sprintf("%d. %s", entry.Chapter, entry.Title) + } else { + str = fmt.Sprintf(" %d.%d. %s", entry.Chapter, entry.Subchapter, entry.Title) + } + p := NewParagraph(str) + p.SetFontSize(14) + cell := table.NewCell() + cell.SetContent(p) + // Set the paragraph width to the cell width. + p.SetWidth(cell.Width(c.Context())) + table.SetRowHeight(table.CurRow(), p.Height()*1.2) + + // Col 1. Page number. + p = NewParagraph(fmt.Sprintf("%d", entry.PageNumber)) + p.SetFontSize(14) + cell = table.NewCell() + cell.SetContent(p) + } + err := ch.Add(table) + if err != nil { + fmt.Printf("Error adding table: %v\n", err) + return nil, err + } + + return ch, nil + }) + + addHeadersAndFooters(c) + return c + } + + c := createDoc() + + err := c.WriteToFile("/tmp/14_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + c = createDoc() + c.SetOptimizer(optimize.New(optimize.Options{ + CombineDuplicateDirectObjects: true, + CombineIdenticalIndirectObjects: true, + ImageUpperPPI: 50.0, + UseObjectStreams: true, + ImageQuality: 50, + CombineDuplicateStreams: true, + CompressStreams: true, + })) + + err = c.WriteToFile("/tmp/14_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + + fileInfo, err := os.Stat("/tmp/14_not_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + fileInfoOptimized, err := os.Stat("/tmp/14_optimized.pdf") + if err != nil { + t.Errorf("Fail: %v\n", err) + return + } + if fileInfoOptimized.Size() >= fileInfo.Size() { + t.Errorf("Optimization failed: size not changed %d vs %d", fileInfo.Size(), fileInfoOptimized.Size()) + } +} diff --git a/pdf/model/colorspace.go b/pdf/model/colorspace.go index 4ee7b8d5..7c9ead58 100644 --- a/pdf/model/colorspace.go +++ b/pdf/model/colorspace.go @@ -151,9 +151,9 @@ func NewPdfColorspaceFromPdfObject(obj PdfObject) (PdfColorspace, error) { return nil, errors.New("Type error") } -// determineColorspaceNameFromPdfObject determines PDF colorspace from a PdfObject. Returns the colorspace name and +// DetermineColorspaceNameFromPdfObject determines PDF colorspace from a PdfObject. Returns the colorspace name and // an error on failure. If the colorspace was not found, will return an empty string. -func determineColorspaceNameFromPdfObject(obj PdfObject) (PdfObjectName, error) { +func DetermineColorspaceNameFromPdfObject(obj PdfObject) (PdfObjectName, error) { var csName *PdfObjectName var csArray *PdfObjectArray @@ -2179,7 +2179,7 @@ func newPdfColorspaceSpecialIndexedFromPdfObject(obj PdfObject) (*PdfColorspaceS obj = array.Get(1) // Base cs cannot be another /Indexed or /Pattern space. - baseName, err := determineColorspaceNameFromPdfObject(obj) + baseName, err := DetermineColorspaceNameFromPdfObject(obj) if baseName == "Indexed" || baseName == "Pattern" { common.Log.Debug("Error: Indexed colorspace cannot have Indexed/Pattern CS as base (%v)", baseName) return nil, ErrRangeError diff --git a/pdf/model/optimize/chain.go b/pdf/model/optimize/chain.go new file mode 100644 index 00000000..e12ce2b9 --- /dev/null +++ b/pdf/model/optimize/chain.go @@ -0,0 +1,34 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package optimize + +import ( + "github.com/unidoc/unidoc/pdf/core" + "github.com/unidoc/unidoc/pdf/model" +) + +// Chain allows to use sequence of optimizers. +// It implements interface model.Optimizer. +type Chain struct { + optimizers []model.Optimizer +} + +// Append appends optimizers to the chain. +func (c *Chain) Append(optimizers ...model.Optimizer) { + c.optimizers = append(c.optimizers, optimizers...) +} + +// Optimize optimizes PDF objects to decrease PDF size. +func (c *Chain) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) { + optimizedObjects = objects + for _, optimizer := range c.optimizers { + optimizedObjects, err = optimizer.Optimize(optimizedObjects) + if err != nil { + return optimizedObjects, err + } + } + return optimizedObjects, nil +} diff --git a/pdf/model/optimize/combine_duplicate_direct_objects.go b/pdf/model/optimize/combine_duplicate_direct_objects.go new file mode 100644 index 00000000..c1ddd96b --- /dev/null +++ b/pdf/model/optimize/combine_duplicate_direct_objects.go @@ -0,0 +1,71 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package optimize + +import ( + "crypto/md5" + + "github.com/unidoc/unidoc/pdf/core" +) + +// CombineDuplicateDirectObjects combines duplicated direct objects by its data hash. +// It implements interface model.Optimizer. +type CombineDuplicateDirectObjects struct { +} + +// Optimize optimizes PDF objects to decrease PDF size. +func (dup *CombineDuplicateDirectObjects) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) { + + dictsByHash := make(map[string][]*core.PdfObjectDictionary) + var processDict func(pDict *core.PdfObjectDictionary) + processDict = func(pDict *core.PdfObjectDictionary) { + + for _, key := range pDict.Keys() { + obj := pDict.Get(key) + if dict, isDictObj := obj.(*core.PdfObjectDictionary); isDictObj { + hasher := md5.New() + hasher.Write([]byte(dict.DefaultWriteString())) + + hash := string(hasher.Sum(nil)) + dictsByHash[hash] = append(dictsByHash[hash], dict) + processDict(dict) + } + } + } + + for _, obj := range objects { + ind, isIndirectObj := obj.(*core.PdfIndirectObject) + if !isIndirectObj { + continue + } + if dict, isDictObj := ind.PdfObject.(*core.PdfObjectDictionary); isDictObj { + processDict(dict) + } + } + + indirects := make([]core.PdfObject, 0, len(dictsByHash)) + replaceTable := make(map[core.PdfObject]core.PdfObject) + + for _, dicts := range dictsByHash { + if len(dicts) < 2 { + continue + } + dict := core.MakeDict() + dict.Merge(dicts[0]) + ind := core.MakeIndirectObject(dict) + indirects = append(indirects, ind) + for i := 0; i < len(dicts); i++ { + dict := dicts[i] + replaceTable[dict] = ind + } + } + + optimizedObjects = make([]core.PdfObject, len(objects)) + copy(optimizedObjects, objects) + optimizedObjects = append(indirects, optimizedObjects...) + replaceObjectsInPlace(optimizedObjects, replaceTable) + return optimizedObjects, nil +} diff --git a/pdf/model/optimize/combine_duplicate_streams.go b/pdf/model/optimize/combine_duplicate_streams.go new file mode 100644 index 00000000..a43b6e15 --- /dev/null +++ b/pdf/model/optimize/combine_duplicate_streams.go @@ -0,0 +1,53 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package optimize + +import ( + "crypto/md5" + + "github.com/unidoc/unidoc/pdf/core" +) + +// CombineDuplicateStreams combines duplicated streams by its data hash. +// It implements interface model.Optimizer. +type CombineDuplicateStreams struct { +} + +// Optimize optimizes PDF objects to decrease PDF size. +func (dup *CombineDuplicateStreams) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) { + replaceTable := make(map[core.PdfObject]core.PdfObject) + toDelete := make(map[core.PdfObject]struct{}) + streamsByHash := make(map[string][]*core.PdfObjectStream) + for _, obj := range objects { + if stream, isStreamObj := obj.(*core.PdfObjectStream); isStreamObj { + hasher := md5.New() + hasher.Write([]byte(stream.Stream)) + hash := string(hasher.Sum(nil)) + streamsByHash[hash] = append(streamsByHash[hash], stream) + } + } + for _, streams := range streamsByHash { + if len(streams) < 2 { + continue + } + firstStream := streams[0] + for i := 1; i < len(streams); i++ { + stream := streams[i] + replaceTable[stream] = firstStream + toDelete[stream] = struct{}{} + } + } + + optimizedObjects = make([]core.PdfObject, 0, len(objects)-len(toDelete)) + for _, obj := range objects { + if _, found := toDelete[obj]; found { + continue + } + optimizedObjects = append(optimizedObjects, obj) + } + replaceObjectsInPlace(optimizedObjects, replaceTable) + return optimizedObjects, nil +} diff --git a/pdf/model/optimize/combine_identical_indirect_objects.go b/pdf/model/optimize/combine_identical_indirect_objects.go new file mode 100644 index 00000000..6faf62dc --- /dev/null +++ b/pdf/model/optimize/combine_identical_indirect_objects.go @@ -0,0 +1,64 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package optimize + +import ( + "crypto/md5" + + "github.com/unidoc/unidoc/pdf/core" +) + +// CombineIdenticalIndirectObjects combines identical indirect objects. +// It implements interface model.Optimizer. +type CombineIdenticalIndirectObjects struct { +} + +// Optimize optimizes PDF objects to decrease PDF size. +func (c *CombineIdenticalIndirectObjects) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) { + replaceTable := make(map[core.PdfObject]core.PdfObject) + toDelete := make(map[core.PdfObject]struct{}) + + indWithDictByHash := make(map[string][]*core.PdfIndirectObject) + + for _, obj := range objects { + ind, isIndirectObj := obj.(*core.PdfIndirectObject) + if !isIndirectObj { + continue + } + if dict, isDictObj := ind.PdfObject.(*core.PdfObjectDictionary); isDictObj { + if name, isName := dict.Get("Type").(*core.PdfObjectName); isName && *name == "Page" { + continue + } + hasher := md5.New() + hasher.Write([]byte(dict.DefaultWriteString())) + + hash := string(hasher.Sum(nil)) + indWithDictByHash[hash] = append(indWithDictByHash[hash], ind) + } + } + + for _, dicts := range indWithDictByHash { + if len(dicts) < 2 { + continue + } + firstDict := dicts[0] + for i := 1; i < len(dicts); i++ { + dict := dicts[i] + replaceTable[dict] = firstDict + toDelete[dict] = struct{}{} + } + } + + optimizedObjects = make([]core.PdfObject, 0, len(objects)-len(toDelete)) + for _, obj := range objects { + if _, found := toDelete[obj]; found { + continue + } + optimizedObjects = append(optimizedObjects, obj) + } + replaceObjectsInPlace(optimizedObjects, replaceTable) + return optimizedObjects, nil +} diff --git a/pdf/model/optimize/compress_streams.go b/pdf/model/optimize/compress_streams.go new file mode 100644 index 00000000..e58f96a0 --- /dev/null +++ b/pdf/model/optimize/compress_streams.go @@ -0,0 +1,45 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package optimize + +import ( + "github.com/unidoc/unidoc/pdf/core" +) + +// CompressStreams compresses uncompressed streams. +// It implements interface model.Optimizer. +type CompressStreams struct { +} + +// Optimize optimizes PDF objects to decrease PDF size. +func (c *CompressStreams) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) { + optimizedObjects = make([]core.PdfObject, len(objects)) + copy(optimizedObjects, objects) + for _, obj := range objects { + stream, isStreamObj := core.GetStream(obj) + if !isStreamObj { + continue + } + if _, found := core.GetName(stream.PdfObjectDictionary.Get("Filter")); found { + continue + } + encoder := core.NewLZWEncoder() + encoder.EarlyChange = 0 + var data []byte + data, err = encoder.EncodeBytes(stream.Stream) + if err != nil { + return optimizedObjects, err + } + dict := encoder.MakeStreamDict() + // compare compressed and uncompressed sizes + if len(data)+len(dict.DefaultWriteString()) < len(stream.Stream) { + stream.Stream = data + stream.PdfObjectDictionary.Merge(dict) + stream.PdfObjectDictionary.Set("Length", core.MakeInteger(int64(len(stream.Stream)))) + } + } + return optimizedObjects, nil +} diff --git a/pdf/model/optimize/image.go b/pdf/model/optimize/image.go new file mode 100644 index 00000000..6a7fc704 --- /dev/null +++ b/pdf/model/optimize/image.go @@ -0,0 +1,138 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package optimize + +import ( + "github.com/unidoc/unidoc/common" + "github.com/unidoc/unidoc/pdf/core" + "github.com/unidoc/unidoc/pdf/model" +) + +// Image optimizes images by rewrite images into JPEG format with quality equals to ImageQuality. +// TODO(a5i): Add support for inline images. +// It implements interface model.Optimizer. +type Image struct { + ImageQuality int +} + +// imageInfo is information about an image. +type imageInfo struct { + ColorSpace core.PdfObjectName + BitsPerComponent int + ColorComponents int + Width int + Height int + Stream *core.PdfObjectStream + PPI float64 +} + +// findImages returns images from objects. +func findImages(objects []core.PdfObject) []*imageInfo { + subTypeKey := core.PdfObjectName("Subtype") + streamProcessed := make(map[*core.PdfObjectStream]struct{}) + var err error + var images []*imageInfo + for _, obj := range objects { + stream, ok := core.GetStream(obj) + if !ok { + continue + } + if _, found := streamProcessed[stream]; found { + continue + } + streamProcessed[stream] = struct{}{} + subTypeValue := stream.PdfObjectDictionary.Get(subTypeKey) + subType, ok := core.GetName(subTypeValue) + if !ok || string(*subType) != "Image" { + continue + } + img := &imageInfo{BitsPerComponent: 8, Stream: stream} + if img.ColorSpace, err = model.DetermineColorspaceNameFromPdfObject(stream.PdfObjectDictionary.Get("ColorSpace")); err != nil { + common.Log.Error("Error determine color space %s", err) + continue + } + if val, ok := core.GetIntVal(stream.PdfObjectDictionary.Get("BitsPerComponent")); ok { + img.BitsPerComponent = val + } + if val, ok := core.GetIntVal(stream.PdfObjectDictionary.Get("Width")); ok { + img.Width = val + } + if val, ok := core.GetIntVal(stream.PdfObjectDictionary.Get("Height")); ok { + img.Height = val + } + + switch img.ColorSpace { + case "DeviceRGB": + img.ColorComponents = 3 + case "DeviceGray": + img.ColorComponents = 1 + default: + common.Log.Warning("Optimization is not supported for color space %s", img.ColorSpace) + continue + } + images = append(images, img) + } + return images +} + +// Optimize optimizes PDF objects to decrease PDF size. +func (i *Image) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) { + if i.ImageQuality <= 0 { + return objects, nil + } + images := findImages(objects) + if len(images) == 0 { + return objects, nil + } + + replaceTable := make(map[core.PdfObject]core.PdfObject) + imageMasks := make(map[core.PdfObject]struct{}) + for _, img := range images { + obj := img.Stream.PdfObjectDictionary.Get(core.PdfObjectName("SMask")) + imageMasks[obj] = struct{}{} + } + + for index, img := range images { + stream := img.Stream + if _, isMask := imageMasks[stream]; isMask { + continue + } + streamEncoder, err := core.NewEncoderFromStream(stream) + if err != nil { + common.Log.Warning("Error get encoder for the image stream %s") + continue + } + data, err := streamEncoder.DecodeStream(stream) + if err != nil { + common.Log.Warning("Error decode the image stream %s") + continue + } + encoder := core.NewDCTEncoder() + encoder.ColorComponents = img.ColorComponents + encoder.Quality = i.ImageQuality + encoder.BitsPerComponent = img.BitsPerComponent + encoder.Width = img.Width + encoder.Height = img.Height + streamData, err := encoder.EncodeBytes(data) + if err != nil { + return nil, err + } + newStream := &core.PdfObjectStream{Stream: streamData} + newStream.PdfObjectReference = stream.PdfObjectReference + newStream.PdfObjectDictionary = core.MakeDict() + newStream.PdfObjectDictionary.Merge(stream.PdfObjectDictionary) + fn := core.PdfObjectName(encoder.GetFilterName()) + newStream.PdfObjectDictionary.Set(core.PdfObjectName("Filter"), &fn) + ln := core.PdfObjectInteger(int64(len(streamData))) + newStream.PdfObjectDictionary.Set(core.PdfObjectName("Length"), &ln) + replaceTable[stream] = newStream + images[index].Stream = newStream + } + optimizedObjects = make([]core.PdfObject, len(objects)) + copy(optimizedObjects, objects) + replaceObjectsInPlace(optimizedObjects, replaceTable) + return optimizedObjects, nil +} diff --git a/pdf/model/optimize/image_ppi.go b/pdf/model/optimize/image_ppi.go new file mode 100644 index 00000000..2996845e --- /dev/null +++ b/pdf/model/optimize/image_ppi.go @@ -0,0 +1,203 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package optimize + +import ( + "fmt" + "image" + "math" + + "github.com/unidoc/unidoc/common" + "github.com/unidoc/unidoc/pdf/contentstream" + "github.com/unidoc/unidoc/pdf/core" + "github.com/unidoc/unidoc/pdf/model" + "golang.org/x/image/draw" +) + +// ImagePPI optimizes images by scaling images such that the PPI (pixels per inch) is never higher than ImageUpperPPI. +// TODO(a5i): Add support for inline images. +// It implements interface model.Optimizer. +type ImagePPI struct { + ImageUpperPPI float64 +} + +func scaleImage(stream *core.PdfObjectStream, scale float64) error { + xImg, err := model.NewXObjectImageFromStream(stream) + if err != nil { + return err + } + i, err := xImg.ToImage() + if err != nil { + return err + } + goimg, err := i.ToGoImage() + if err != nil { + return err + } + + newW := int(math.RoundToEven(float64(i.Width) * scale)) + newH := int(math.RoundToEven(float64(i.Height) * scale)) + rect := image.Rect(0, 0, newW, newH) + var newImage draw.Image + switch xImg.ColorSpace.String() { + case "DeviceRGB": + newImage = image.NewRGBA(rect) + case "DeviceGray": + newImage = image.NewGray(rect) + default: + return fmt.Errorf("Optimization is not supported for color space %s", xImg.ColorSpace.String()) + } + draw.CatmullRom.Scale(newImage, newImage.Bounds(), goimg, goimg.Bounds(), draw.Over, &draw.Options{}) + i, err = model.ImageHandling.NewImageFromGoImage(newImage) + if err != nil { + return err + } + xImg.SetImage(i, xImg.ColorSpace) + xImg.ToPdfObject() + return nil +} + +// Optimize optimizes PDF objects to decrease PDF size. +func (i *ImagePPI) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) { + if i.ImageUpperPPI <= 0 { + return objects, nil + } + images := findImages(objects) + if len(images) == 0 { + return objects, nil + } + imageMasks := make(map[core.PdfObject]struct{}) + for _, img := range images { + obj := img.Stream.PdfObjectDictionary.Get(core.PdfObjectName("SMask")) + imageMasks[obj] = struct{}{} + } + imageByStream := make(map[*core.PdfObjectStream]*imageInfo) + for _, img := range images { + imageByStream[img.Stream] = img + } + var catalog *core.PdfObjectDictionary + for _, obj := range objects { + if dict, isDict := core.GetDict(obj); catalog == nil && isDict { + if tp, ok := core.GetName(dict.Get(core.PdfObjectName("Type"))); ok && *tp == "Catalog" { + catalog = dict + } + } + } + if catalog == nil { + return objects, nil + } + pages, hasPages := core.GetDict(catalog.Get(core.PdfObjectName("Pages"))) + if !hasPages { + return objects, nil + } + kids, hasKids := core.GetArray(pages.Get(core.PdfObjectName("Kids"))) + if !hasKids { + return objects, nil + } + imageByName := make(map[string]*imageInfo) + + for _, pageObj := range kids.Elements() { + page, ok := core.GetDict(pageObj) + if !ok { + continue + } + contents, hasContents := core.GetArray(page.Get("Contents")) + if !hasContents { + continue + } + resources, hasResources := core.GetDict(page.Get("Resources")) + if !hasResources { + continue + } + xObject, hasXObject := core.GetDict(resources.Get("XObject")) + if !hasXObject { + continue + } + xObjectKeys := xObject.Keys() + for _, key := range xObjectKeys { + if stream, isStream := core.GetStream(xObject.Get(key)); isStream { + if img, found := imageByStream[stream]; found { + imageByName[string(key)] = img + } + } + } + for _, obj := range contents.Elements() { + if stream, isStream := core.GetStream(obj); isStream { + streamEncoder, err := core.NewEncoderFromStream(stream) + if err != nil { + return nil, err + } + data, err := streamEncoder.DecodeStream(stream) + if err != nil { + return nil, err + } + + p := contentstream.NewContentStreamParser(string(data)) + operations, err := p.Parse() + if err != nil { + return nil, err + } + scaleX, scaleY := 1.0, 1.0 + for _, operation := range *operations { + if operation.Operand == "Q" { + scaleX, scaleY = 1.0, 1.0 + } + if operation.Operand == "cm" && len(operation.Params) == 6 { + if sx, ok := core.GetFloatVal(operation.Params[0]); ok { + scaleX = scaleX * sx + } + if sy, ok := core.GetFloatVal(operation.Params[3]); ok { + scaleY = scaleY * sy + } + if sx, ok := core.GetIntVal(operation.Params[0]); ok { + scaleX = scaleX * float64(sx) + } + if sy, ok := core.GetIntVal(operation.Params[3]); ok { + scaleY = scaleY * float64(sy) + } + } + if operation.Operand == "Do" && len(operation.Params) == 1 { + name, ok := core.GetName(operation.Params[0]) + if !ok { + continue + } + if img, found := imageByName[string(*name)]; found { + wInch, hInch := scaleX/72.0, scaleY/72.0 + xPPI, yPPI := float64(img.Width)/wInch, float64(img.Height)/hInch + if wInch == 0 || hInch == 0 { + xPPI = 72.0 + yPPI = 72.0 + } + img.PPI = math.Max(img.PPI, xPPI) + img.PPI = math.Max(img.PPI, yPPI) + } + } + } + } + } + } + + for _, img := range images { + if _, isMask := imageMasks[img.Stream]; isMask { + continue + } + if img.PPI <= i.ImageUpperPPI { + continue + } + scale := i.ImageUpperPPI / img.PPI + if err := scaleImage(img.Stream, scale); err != nil { + common.Log.Debug("Error scale image keep original image: %s", err) + } else { + if mask, hasMask := core.GetStream(img.Stream.PdfObjectDictionary.Get(core.PdfObjectName("SMask"))); hasMask { + if err := scaleImage(mask, scale); err != nil { + return nil, err + } + } + } + } + + return objects, nil +} diff --git a/pdf/model/optimize/object_streams.go b/pdf/model/optimize/object_streams.go new file mode 100644 index 00000000..4f03b699 --- /dev/null +++ b/pdf/model/optimize/object_streams.go @@ -0,0 +1,40 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package optimize + +import ( + "github.com/unidoc/unidoc/pdf/core" +) + +// ObjectStreams groups PDF objects to object streams. +// It implements interface model.Optimizer. +type ObjectStreams struct { +} + +// Optimize optimizes PDF objects to decrease PDF size. +func (o *ObjectStreams) Optimize(objects []core.PdfObject) (optimizedObjects []core.PdfObject, err error) { + objStream := &core.PdfObjectStreams{} + skippedObjects := make([]core.PdfObject, 0, len(objects)) + for _, obj := range objects { + if io, isIndirectObj := obj.(*core.PdfIndirectObject); isIndirectObj && io.GenerationNumber == 0 { + objStream.Append(obj) + } else { + skippedObjects = append(skippedObjects, obj) + } + } + if objStream.Len() == 0 { + return skippedObjects, nil + } + + optimizedObjects = make([]core.PdfObject, 0, len(skippedObjects)+objStream.Len()+1) + if objStream.Len() > 1 { + optimizedObjects = append(optimizedObjects, objStream) + } + optimizedObjects = append(optimizedObjects, objStream.Elements()...) + optimizedObjects = append(optimizedObjects, skippedObjects...) + + return optimizedObjects, nil +} diff --git a/pdf/model/optimize/optimizer.go b/pdf/model/optimize/optimizer.go new file mode 100644 index 00000000..2c425088 --- /dev/null +++ b/pdf/model/optimize/optimizer.go @@ -0,0 +1,84 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package optimize + +import ( + "github.com/unidoc/unidoc/pdf/core" +) + +// New creates a optimizers chain from options. +func New(options Options) *Chain { + chain := new(Chain) + if options.ImageUpperPPI > 0 { + imageOptimizer := new(ImagePPI) + imageOptimizer.ImageUpperPPI = options.ImageUpperPPI + chain.Append(imageOptimizer) + } + if options.ImageQuality > 0 { + imageOptimizer := new(Image) + imageOptimizer.ImageQuality = options.ImageQuality + chain.Append(imageOptimizer) + } + if options.CombineDuplicateDirectObjects { + chain.Append(new(CombineDuplicateDirectObjects)) + } + if options.CombineDuplicateStreams { + chain.Append(new(CombineDuplicateStreams)) + } + if options.CombineIdenticalIndirectObjects { + chain.Append(new(CombineIdenticalIndirectObjects)) + } + if options.UseObjectStreams { + chain.Append(new(ObjectStreams)) + } + if options.CompressStreams { + chain.Append(new(CompressStreams)) + } + return chain +} + +// replaceObjectsInPlace replaces objects. objTo will be modified by the process. +func replaceObjectsInPlace(objects []core.PdfObject, objTo map[core.PdfObject]core.PdfObject) { + if objTo == nil || len(objTo) == 0 { + return + } + for i, obj := range objects { + if to, found := objTo[obj]; found { + objects[i] = to + continue + } + objTo[obj] = obj + switch t := obj.(type) { + case *core.PdfObjectArray: + values := make([]core.PdfObject, t.Len()) + copy(values, t.Elements()) + replaceObjectsInPlace(values, objTo) + for i, obj := range values { + t.Set(i, obj) + } + case *core.PdfObjectStreams: + replaceObjectsInPlace(t.Elements(), objTo) + case *core.PdfObjectStream: + values := []core.PdfObject{t.PdfObjectDictionary} + replaceObjectsInPlace(values, objTo) + t.PdfObjectDictionary = values[0].(*core.PdfObjectDictionary) + case *core.PdfObjectDictionary: + keys := t.Keys() + values := make([]core.PdfObject, len(keys)) + for i, key := range keys { + values[i] = t.Get(key) + } + replaceObjectsInPlace(values, objTo) + for i, key := range keys { + t.Set(key, values[i]) + } + case *core.PdfIndirectObject: + values := []core.PdfObject{t.PdfObject} + replaceObjectsInPlace(values, objTo) + t.PdfObject = values[0] + } + } +} diff --git a/pdf/model/optimize/options.go b/pdf/model/optimize/options.go new file mode 100644 index 00000000..db024510 --- /dev/null +++ b/pdf/model/optimize/options.go @@ -0,0 +1,17 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package optimize + +// Options describes PDF optimization parameters. +type Options struct { + CombineDuplicateStreams bool + CombineDuplicateDirectObjects bool + ImageUpperPPI float64 + ImageQuality int + UseObjectStreams bool + CombineIdenticalIndirectObjects bool + CompressStreams bool +} diff --git a/pdf/model/optimizer.go b/pdf/model/optimizer.go new file mode 100644 index 00000000..c9961297 --- /dev/null +++ b/pdf/model/optimizer.go @@ -0,0 +1,18 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package model + +import ( + "github.com/unidoc/unidoc/pdf/core" +) + +// Optimizer is the interface that performs optimization of PDF object structure for output writing. +// +// Optimize receives a slice of input `objects`, performs optimization, including removing, replacing objects and +// output the optimized slice of objects. +type Optimizer interface { + Optimize(objects []core.PdfObject) ([]core.PdfObject, error) +} diff --git a/pdf/model/writer.go b/pdf/model/writer.go index bf7a5fc9..b9d8a906 100644 --- a/pdf/model/writer.go +++ b/pdf/model/writer.go @@ -10,8 +10,10 @@ package model import ( "bufio" + "bytes" "crypto/md5" "crypto/rand" + "encoding/binary" "errors" "fmt" "io" @@ -25,6 +27,16 @@ import ( "github.com/unidoc/unidoc/pdf/model/fonts" ) +type crossReference struct { + Type int + // Type 1 + Offset int64 + Generation int64 // and Type 0 + // Type 2 + ObjectNumber int // and Type 0 + Index int +} + var pdfCreator = "" func getPdfProducer() string { @@ -79,6 +91,9 @@ type PdfWriter struct { // Forms. acroForm *PdfAcroForm + + optimizer Optimizer + crossReferenceMap map[int]crossReference } // NewPdfWriter initializes a new PdfWriter. @@ -132,6 +147,108 @@ func NewPdfWriter() PdfWriter { return w } +// copyObject creates deep copy of the Pdf object and +// fills objectToObjectCopyMap to replace the old object to the copy of object if needed. +// Parameter objectToObjectCopyMap is needed to replace object references to its copies. +// Because many objects can contain references to another objects like pages to images. +func copyObject(obj PdfObject, objectToObjectCopyMap map[PdfObject]PdfObject) PdfObject { + if newObj, ok := objectToObjectCopyMap[obj]; ok { + return newObj + } + + switch t := obj.(type) { + case *PdfObjectArray: + newObj := &PdfObjectArray{} + objectToObjectCopyMap[obj] = newObj + for _, val := range t.Elements() { + newObj.Append(copyObject(val, objectToObjectCopyMap)) + } + return newObj + case *PdfObjectStreams: + newObj := &PdfObjectStreams{PdfObjectReference: t.PdfObjectReference} + objectToObjectCopyMap[obj] = newObj + for _, val := range t.Elements() { + newObj.Append(copyObject(val, objectToObjectCopyMap)) + } + return newObj + case *PdfObjectStream: + newObj := &PdfObjectStream{ + Stream: t.Stream, + PdfObjectReference: t.PdfObjectReference, + } + objectToObjectCopyMap[obj] = newObj + newObj.PdfObjectDictionary = copyObject(t.PdfObjectDictionary, objectToObjectCopyMap).(*PdfObjectDictionary) + return newObj + case *PdfObjectDictionary: + newObj := MakeDict() + objectToObjectCopyMap[obj] = newObj + for _, key := range t.Keys() { + val := t.Get(key) + newObj.Set(key, copyObject(val, objectToObjectCopyMap)) + } + return newObj + case *PdfIndirectObject: + newObj := &PdfIndirectObject{ + PdfObjectReference: t.PdfObjectReference, + } + objectToObjectCopyMap[obj] = newObj + newObj.PdfObject = copyObject(t.PdfObject, objectToObjectCopyMap) + return newObj + case *PdfObjectString: + newObj := &PdfObjectString{} + *newObj = *t + objectToObjectCopyMap[obj] = newObj + return newObj + case *PdfObjectName: + newObj := PdfObjectName(*t) + objectToObjectCopyMap[obj] = &newObj + return &newObj + case *PdfObjectNull: + newObj := PdfObjectNull{} + objectToObjectCopyMap[obj] = &newObj + return &newObj + case *PdfObjectInteger: + newObj := PdfObjectInteger(*t) + objectToObjectCopyMap[obj] = &newObj + return &newObj + case *PdfObjectReference: + newObj := PdfObjectReference(*t) + objectToObjectCopyMap[obj] = &newObj + return &newObj + case *PdfObjectFloat: + newObj := PdfObjectFloat(*t) + objectToObjectCopyMap[obj] = &newObj + return &newObj + case *PdfObjectBool: + newObj := PdfObjectBool(*t) + objectToObjectCopyMap[obj] = &newObj + return &newObj + default: + common.Log.Info("TODO(a5i): implement copyObject for %+v", obj) + } + // return other objects as is + return obj +} + +// copyObjects makes objects copy and set as working. +func (this *PdfWriter) copyObjects() { + objectToObjectCopyMap := make(map[PdfObject]PdfObject) + objects := make([]PdfObject, len(this.objects)) + objectsMap := make(map[PdfObject]bool) + for i, obj := range this.objects { + newObject := copyObject(obj, objectToObjectCopyMap) + objects[i] = newObject + if this.objectsMap[obj] { + objectsMap[newObject] = true + } + } + + this.objects = objects + this.objectsMap = objectsMap + this.infoObj = copyObject(this.infoObj, objectToObjectCopyMap).(*PdfIndirectObject) + this.root = copyObject(this.root, objectToObjectCopyMap).(*PdfIndirectObject) +} + // Set the PDF version of the output file. func (this *PdfWriter) SetVersion(majorVersion, minorVersion int) { this.majorVersion = majorVersion @@ -152,6 +269,16 @@ func (this *PdfWriter) SetOCProperties(ocProperties PdfObject) error { return nil } +// SetOptimizer sets the optimizer to optimize PDF before writing. +func (this *PdfWriter) SetOptimizer(optimizer Optimizer) { + this.optimizer = optimizer +} + +// GetOptimizer returns current PDF optimizer. +func (this *PdfWriter) GetOptimizer() Optimizer { + return this.optimizer +} + func (this *PdfWriter) hasObject(obj PdfObject) bool { // Check if already added. for _, o := range this.objects { @@ -438,6 +565,7 @@ func (this *PdfWriter) writeObject(num int, obj PdfObject) { common.Log.Trace("Write obj #%d\n", num) if pobj, isIndirect := obj.(*PdfIndirectObject); isIndirect { + this.crossReferenceMap[num] = crossReference{Type: 1, Offset: this.writePos, Generation: pobj.GenerationNumber} outStr := fmt.Sprintf("%d 0 obj\n", num) outStr += pobj.PdfObject.DefaultWriteString() outStr += "\nendobj\n" @@ -448,6 +576,7 @@ func (this *PdfWriter) writeObject(num int, obj PdfObject) { // XXX/TODO: Add a default encoder if Filter not specified? // Still need to make sure is encrypted. if pobj, isStream := obj.(*PdfObjectStream); isStream { + this.crossReferenceMap[num] = crossReference{Type: 1, Offset: this.writePos, Generation: pobj.GenerationNumber} outStr := fmt.Sprintf("%d 0 obj\n", num) outStr += pobj.PdfObjectDictionary.DefaultWriteString() outStr += "\nstream\n" @@ -457,6 +586,46 @@ func (this *PdfWriter) writeObject(num int, obj PdfObject) { return } + if ostreams, isObjStreams := obj.(*PdfObjectStreams); isObjStreams { + this.crossReferenceMap[num] = crossReference{Type: 1, Offset: this.writePos, Generation: ostreams.GenerationNumber} + outStr := fmt.Sprintf("%d 0 obj\n", num) + var offsets []string + var objData string + var offset int64 + + for index, obj := range ostreams.Elements() { + io, isIndirect := obj.(*PdfIndirectObject) + if !isIndirect { + common.Log.Error("Object streams N %d contains non indirect pdf object %v", num, obj) + } + data := io.PdfObject.DefaultWriteString() + " " + objData = objData + data + offsets = append(offsets, fmt.Sprintf("%d %d", io.ObjectNumber, offset)) + this.crossReferenceMap[int(io.ObjectNumber)] = crossReference{Type: 2, ObjectNumber: num, Index: index} + offset = offset + int64(len([]byte(data))) + } + offsetsStr := strings.Join(offsets, " ") + " " + encoder := NewFlateEncoder() + //encoder := NewRawEncoder() + dict := encoder.MakeStreamDict() + dict.Set(PdfObjectName("Type"), MakeName("ObjStm")) + n := int64(ostreams.Len()) + dict.Set(PdfObjectName("N"), MakeInteger(n)) + first := int64(len(offsetsStr)) + dict.Set(PdfObjectName("First"), MakeInteger(first)) + + data, _ := encoder.EncodeBytes([]byte(offsetsStr + objData)) + length := int64(len(data)) + + dict.Set(PdfObjectName("Length"), MakeInteger(length)) + outStr += dict.DefaultWriteString() + outStr += "\nstream\n" + this.writeString(outStr) + this.writeBytes(data) + this.writeString("\nendstream\nendobj\n") + return + } + this.writer.WriteString(obj.DefaultWriteString()) } @@ -472,6 +641,10 @@ func (this *PdfWriter) updateObjectNumbers() { so.ObjectNumber = int64(idx + 1) so.GenerationNumber = 0 } + if so, isObjectStreams := obj.(*PdfObjectStreams); isObjectStreams { + so.ObjectNumber = int64(idx + 1) + so.GenerationNumber = 0 + } } } @@ -686,24 +859,53 @@ func (this *PdfWriter) Write(writer io.Writer) error { } // Set version in the catalog. this.catalog.Set("Version", MakeName(fmt.Sprintf("%d.%d", this.majorVersion, this.minorVersion))) + this.copyObjects() + + if this.optimizer != nil { + var err error + this.objects, err = this.optimizer.Optimize(this.objects) + if err != nil { + return err + } + } w := bufio.NewWriter(writer) this.writer = w this.writePos = 0 + useCrossReferenceStream := this.majorVersion > 1 || (this.majorVersion == 1 && this.minorVersion > 4) + objectsInObjectStreams := make(map[PdfObject]bool) + if !useCrossReferenceStream { + for _, obj := range this.objects { + if objStm, isObjectStreams := obj.(*PdfObjectStreams); isObjectStreams { + useCrossReferenceStream = true + for _, obj := range objStm.Elements() { + objectsInObjectStreams[obj] = true + if io, isIndirectObj := obj.(*PdfIndirectObject); isIndirectObj { + objectsInObjectStreams[io.PdfObject] = true + } + } + } + } + } + + if useCrossReferenceStream && this.majorVersion == 1 && this.minorVersion < 5 { + this.minorVersion = 5 + } this.writeString(fmt.Sprintf("%%PDF-%d.%d\n", this.majorVersion, this.minorVersion)) this.writeString("%âãÏÓ\n") this.updateObjectNumbers() - offsets := []int64{} - // Write objects common.Log.Trace("Writing %d obj", len(this.objects)) + this.crossReferenceMap = make(map[int]crossReference) + this.crossReferenceMap[0] = crossReference{Type: 0, ObjectNumber: 0, Generation: 0xFFFF} for idx, obj := range this.objects { + if skip := objectsInObjectStreams[obj]; skip { + continue + } common.Log.Trace("Writing %d", idx) - offset := this.writePos - offsets = append(offsets, offset) // Encrypt prior to writing. // Encrypt dictionary should not be encrypted. @@ -713,41 +915,90 @@ func (this *PdfWriter) Write(writer io.Writer) error { common.Log.Debug("ERROR: Failed encrypting (%s)", err) return err } - } this.writeObject(idx+1, obj) } xrefOffset := this.writePos - // Write xref table. - this.writeString("xref\r\n") - outStr := fmt.Sprintf("%d %d\r\n", 0, len(this.objects)+1) - this.writeString(outStr) - outStr = fmt.Sprintf("%.10d %.5d f\r\n", 0, 65535) - this.writeString(outStr) - for _, offset := range offsets { - outStr = fmt.Sprintf("%.10d %.5d n\r\n", offset, 0) - this.writeString(outStr) - } + if useCrossReferenceStream { + + crossObjNumber := len(this.crossReferenceMap) + this.crossReferenceMap[crossObjNumber] = crossReference{Type: 1, ObjectNumber: crossObjNumber, Offset: xrefOffset} + crossReferenceData := bytes.NewBuffer(nil) + + for idx := 0; idx < len(this.crossReferenceMap); idx++ { + ref := this.crossReferenceMap[idx] + switch ref.Type { + case 0: + binary.Write(crossReferenceData, binary.BigEndian, byte(0)) + binary.Write(crossReferenceData, binary.BigEndian, uint32(0)) + binary.Write(crossReferenceData, binary.BigEndian, uint16(0xFFFF)) + case 1: + binary.Write(crossReferenceData, binary.BigEndian, byte(1)) + binary.Write(crossReferenceData, binary.BigEndian, uint32(ref.Offset)) + binary.Write(crossReferenceData, binary.BigEndian, uint16(ref.Generation)) + case 2: + binary.Write(crossReferenceData, binary.BigEndian, byte(2)) + binary.Write(crossReferenceData, binary.BigEndian, uint32(ref.ObjectNumber)) + binary.Write(crossReferenceData, binary.BigEndian, uint16(ref.Index)) + } + } + crossReferenceStream, err := MakeStream(crossReferenceData.Bytes(), NewFlateEncoder()) + if err != nil { + return err + } + crossReferenceStream.ObjectNumber = int64(crossObjNumber) + crossReferenceStream.PdfObjectDictionary.Set("Type", MakeName("XRef")) + crossReferenceStream.PdfObjectDictionary.Set("W", MakeArray(MakeInteger(1), MakeInteger(4), MakeInteger(2))) + crossReferenceStream.PdfObjectDictionary.Set("Index", MakeArray(MakeInteger(0), MakeInteger(crossReferenceStream.ObjectNumber+1))) + crossReferenceStream.PdfObjectDictionary.Set("Size", MakeInteger(crossReferenceStream.ObjectNumber+1)) + crossReferenceStream.PdfObjectDictionary.Set("Info", this.infoObj) + crossReferenceStream.PdfObjectDictionary.Set("Root", this.root) + // If encrypted! + if this.crypter != nil { + crossReferenceStream.Set("Encrypt", this.encryptObj) + crossReferenceStream.Set("ID", this.ids) + common.Log.Trace("Ids: %s", this.ids) + } + + this.writeObject(int(crossReferenceStream.ObjectNumber), crossReferenceStream) + + } else { + this.writeString("xref\r\n") + outStr := fmt.Sprintf("%d %d\r\n", 0, len(this.crossReferenceMap)) + this.writeString(outStr) + for idx := 0; idx < len(this.crossReferenceMap); idx++ { + ref := this.crossReferenceMap[idx] + switch ref.Type { + case 0: + outStr = fmt.Sprintf("%.10d %.5d f\r\n", 0, 65535) + this.writeString(outStr) + case 1: + outStr = fmt.Sprintf("%.10d %.5d n\r\n", ref.Offset, 0) + this.writeString(outStr) + } + } + + // Generate & write trailer + trailer := MakeDict() + trailer.Set("Info", this.infoObj) + trailer.Set("Root", this.root) + trailer.Set("Size", MakeInteger(int64(len(this.objects)+1))) + // If encrypted! + if this.crypter != nil { + trailer.Set("Encrypt", this.encryptObj) + trailer.Set("ID", this.ids) + common.Log.Trace("Ids: %s", this.ids) + } + this.writeString("trailer\n") + this.writeString(trailer.DefaultWriteString()) + this.writeString("\n") - // Generate & write trailer - trailer := MakeDict() - trailer.Set("Info", this.infoObj) - trailer.Set("Root", this.root) - trailer.Set("Size", MakeInteger(int64(len(this.objects)+1))) - // If encrypted! - if this.crypter != nil { - trailer.Set("Encrypt", this.encryptObj) - trailer.Set("ID", this.ids) - common.Log.Trace("Ids: %s", this.ids) } - this.writeString("trailer\n") - this.writeString(trailer.DefaultWriteString()) - this.writeString("\n") // Make offset reference. - outStr = fmt.Sprintf("startxref\n%d\n", xrefOffset) + outStr := fmt.Sprintf("startxref\n%d\n", xrefOffset) this.writeString(outStr) this.writeString("%%EOF\n")