diff --git a/creator/creator_test.go b/creator/creator_test.go index 432c16fb..9b7d3287 100644 --- a/creator/creator_test.go +++ b/creator/creator_test.go @@ -3067,6 +3067,77 @@ func TestPageLabels(t *testing.T) { require.Equal(t, core.EqualObjects(genPageLabels, pageLabels), true) } +func TestReferencedPageDestinations(t *testing.T) { + testPages := func(buf *bytes.Buffer, expectedPages, expectedNullDestPages int) { + reader, err := model.NewPdfReader(bytes.NewReader(buf.Bytes())) + require.NoError(t, err) + + // Check number of pages in catalog. + numPages, err := reader.GetNumPages() + require.NoError(t, err) + require.Equal(t, expectedPages, numPages) + + // Check outline destionation pages. + outlines, err := reader.GetOutlines() + require.NoError(t, err) + + var nullDestPages int + var validDestPages int + for _, entry := range outlines.Entries { + pageObj := entry.Dest.PageObj + require.NotNil(t, pageObj) + + if core.IsNullObject(entry.Dest.PageObj) { + nullDestPages++ + continue + } + + _, _, err := reader.PageFromIndirectObject(pageObj) + require.NoError(t, err) + validDestPages++ + } + + require.Equal(t, expectedPages, validDestPages) + require.Equal(t, expectedNullDestPages, nullDestPages) + } + + // Generate and test input file. + c := New() + c.AddTOC = true + + numPages := 10 + for i := 0; i < numPages; i++ { + chapter := c.NewChapter(fmt.Sprintf("Chapter %d", i+1)) + paragraph := c.NewParagraph(fmt.Sprintf("Content for chapter %d", i+1)) + chapter.Add(paragraph) + require.NoError(t, c.Draw(chapter)) + + if i < numPages-1 { + c.NewPage() + } + } + + buf := bytes.NewBuffer(nil) + require.NoError(t, c.Write(buf)) + testPages(buf, 11, 0) + + // Generate and test split input file. + reader, err := model.NewPdfReader(bytes.NewReader(buf.Bytes())) + require.NoError(t, err) + + writer := model.NewPdfWriter() + for i, page := range reader.PageList { + if i%2 == 0 { + require.NoError(t, writer.AddPage(page)) + } + } + writer.AddOutlineTree(reader.GetOutlineTree()) + + buf = bytes.NewBuffer(nil) + require.NoError(t, writer.Write(buf)) + testPages(buf, 6, 5) +} + var errRenderNotSupported = errors.New("rendering pdf is not supported on this system") // renderPDFToPNGs uses ghostscript (gs) to render specified PDF file into a set of PNG images (one per page). diff --git a/model/writer.go b/model/writer.go index 1ab1c309..8a23a00b 100644 --- a/model/writer.go +++ b/model/writer.go @@ -133,6 +133,7 @@ func SetPdfTitle(title string) { type PdfWriter struct { root *core.PdfIndirectObject pages *core.PdfIndirectObject + pagesMap map[core.PdfObject]struct{} // Pages lookup table. objects []core.PdfObject // Objects to write. objectsMap map[core.PdfObject]struct{} // Quick lookup table. outlines []*core.PdfIndirectObject @@ -256,6 +257,7 @@ func NewPdfWriter() PdfWriter { pages.PdfObject = pagedict w.pages = &pages + w.pagesMap = map[core.PdfObject]struct{}{} w.addObject(w.pages) catalogDict.Set("Pages", &pages) @@ -270,114 +272,143 @@ func NewPdfWriter() PdfWriter { // fills objectToObjectCopyMap to replace the old object to the copy of object if needed. // Parameter objectToObjectCopyMap is needed to replace object references to its copies. // Because many objects can contain references to another objects like pages to images. -func copyObject(obj core.PdfObject, objectToObjectCopyMap map[core.PdfObject]core.PdfObject) core.PdfObject { +// If a skip map is provided and the writer is not set to append mode, the +// children objects of pages which are not present in the catalog are added to +// the map and the page dictionaries are replaced with null objects. +func (w *PdfWriter) copyObject(obj core.PdfObject, + objectToObjectCopyMap map[core.PdfObject]core.PdfObject, + skipMap map[core.PdfObject]struct{}, skip bool) core.PdfObject { if newObj, ok := objectToObjectCopyMap[obj]; ok { return newObj } + newObj := obj + skipUnusedPages := !w.appendMode && skipMap != nil switch t := obj.(type) { case *core.PdfObjectArray: - newObj := &core.PdfObjectArray{} + arrObj := core.MakeArray() + newObj = arrObj objectToObjectCopyMap[obj] = newObj for _, val := range t.Elements() { - newObj.Append(copyObject(val, objectToObjectCopyMap)) + arrObj.Append(w.copyObject(val, objectToObjectCopyMap, skipMap, skip)) } - return newObj case *core.PdfObjectStreams: - newObj := &core.PdfObjectStreams{PdfObjectReference: t.PdfObjectReference} + streamsObj := &core.PdfObjectStreams{PdfObjectReference: t.PdfObjectReference} + newObj = streamsObj objectToObjectCopyMap[obj] = newObj for _, val := range t.Elements() { - newObj.Append(copyObject(val, objectToObjectCopyMap)) + streamsObj.Append(w.copyObject(val, objectToObjectCopyMap, skipMap, skip)) } - return newObj case *core.PdfObjectStream: - newObj := &core.PdfObjectStream{ + streamObj := &core.PdfObjectStream{ Stream: t.Stream, PdfObjectReference: t.PdfObjectReference, } + newObj = streamObj objectToObjectCopyMap[obj] = newObj - newObj.PdfObjectDictionary = copyObject(t.PdfObjectDictionary, objectToObjectCopyMap).(*core.PdfObjectDictionary) - return newObj + streamObj.PdfObjectDictionary = w.copyObject(t.PdfObjectDictionary, objectToObjectCopyMap, skipMap, skip).(*core.PdfObjectDictionary) case *core.PdfObjectDictionary: - newObj := core.MakeDict() + // Check if the object is a page dictionary and search it in the + // writer pages. If not found, replace it with a null object and add + // the chain of children objects to the skip map. + var unused bool + if skipUnusedPages && !skip { + if dictType, _ := core.GetNameVal(t.Get("Type")); dictType == "Page" { + _, ok := w.pagesMap[t] + skip = !ok + unused = skip + } + } + + dictObj := core.MakeDict() + newObj = dictObj objectToObjectCopyMap[obj] = newObj for _, key := range t.Keys() { - val := t.Get(key) - newObj.Set(key, copyObject(val, objectToObjectCopyMap)) + dictObj.Set(key, w.copyObject(t.Get(key), objectToObjectCopyMap, skipMap, skip)) + } + + // If an unused page dictionary is found, replace it with a null object. + if unused { + newObj = core.MakeNull() + skip = false } - return newObj case *core.PdfIndirectObject: - newObj := &core.PdfIndirectObject{ + indObj := &core.PdfIndirectObject{ PdfObjectReference: t.PdfObjectReference, } + newObj = indObj objectToObjectCopyMap[obj] = newObj - newObj.PdfObject = copyObject(t.PdfObject, objectToObjectCopyMap) - return newObj + indObj.PdfObject = w.copyObject(t.PdfObject, objectToObjectCopyMap, skipMap, skip) case *core.PdfObjectString: - newObj := &core.PdfObjectString{} - *newObj = *t + strObj := *t + newObj = &strObj objectToObjectCopyMap[obj] = newObj - return newObj case *core.PdfObjectName: - newObj := core.PdfObjectName(*t) - objectToObjectCopyMap[obj] = &newObj - return &newObj + nameObj := core.PdfObjectName(*t) + newObj = &nameObj + objectToObjectCopyMap[obj] = newObj case *core.PdfObjectNull: - newObj := core.PdfObjectNull{} - objectToObjectCopyMap[obj] = &newObj - return &newObj + newObj = core.MakeNull() + objectToObjectCopyMap[obj] = newObj case *core.PdfObjectInteger: - newObj := core.PdfObjectInteger(*t) - objectToObjectCopyMap[obj] = &newObj - return &newObj + intObj := core.PdfObjectInteger(*t) + newObj = &intObj + objectToObjectCopyMap[obj] = newObj case *core.PdfObjectReference: - newObj := core.PdfObjectReference(*t) - objectToObjectCopyMap[obj] = &newObj - return &newObj + refObj := core.PdfObjectReference(*t) + newObj = &refObj + objectToObjectCopyMap[obj] = newObj case *core.PdfObjectFloat: - newObj := core.PdfObjectFloat(*t) - objectToObjectCopyMap[obj] = &newObj - return &newObj + floatObj := core.PdfObjectFloat(*t) + newObj = &floatObj + objectToObjectCopyMap[obj] = newObj case *core.PdfObjectBool: - newObj := core.PdfObjectBool(*t) - objectToObjectCopyMap[obj] = &newObj - return &newObj + boolObj := core.PdfObjectBool(*t) + newObj = &boolObj + objectToObjectCopyMap[obj] = newObj case *pdfSignDictionary: - newObj := &pdfSignDictionary{ + sigObj := &pdfSignDictionary{ PdfObjectDictionary: core.MakeDict(), handler: t.handler, signature: t.signature, } + newObj = sigObj objectToObjectCopyMap[obj] = newObj for _, key := range t.Keys() { - val := t.Get(key) - newObj.Set(key, copyObject(val, objectToObjectCopyMap)) + sigObj.Set(key, w.copyObject(t.Get(key), objectToObjectCopyMap, skipMap, skip)) } - return newObj default: common.Log.Info("TODO(a5i): implement copyObject for %+v", obj) } - // return other objects as is - return obj + + if skipUnusedPages && skip { + skipMap[obj] = struct{}{} + } + + return newObj } // copyObjects makes objects copy and set as working. func (w *PdfWriter) copyObjects() { objectToObjectCopyMap := make(map[core.PdfObject]core.PdfObject) - objects := make([]core.PdfObject, len(w.objects)) + objects := make([]core.PdfObject, 0, len(w.objects)) objectsMap := make(map[core.PdfObject]struct{}, len(w.objects)) - for i, obj := range w.objects { - newObject := copyObject(obj, objectToObjectCopyMap) - objects[i] = newObject + skipMap := make(map[core.PdfObject]struct{}) + for _, obj := range w.objects { + newObject := w.copyObject(obj, objectToObjectCopyMap, skipMap, false) + if _, ok := skipMap[obj]; ok { + continue + } + objects = append(objects, newObject) objectsMap[newObject] = struct{}{} } w.objects = objects w.objectsMap = objectsMap - w.infoObj = copyObject(w.infoObj, objectToObjectCopyMap).(*core.PdfIndirectObject) - w.root = copyObject(w.root, objectToObjectCopyMap).(*core.PdfIndirectObject) + w.infoObj = w.copyObject(w.infoObj, objectToObjectCopyMap, nil, false).(*core.PdfIndirectObject) + w.root = w.copyObject(w.root, objectToObjectCopyMap, nil, false).(*core.PdfIndirectObject) if w.encryptObj != nil { - w.encryptObj = copyObject(w.encryptObj, objectToObjectCopyMap).(*core.PdfIndirectObject) + w.encryptObj = w.copyObject(w.encryptObj, objectToObjectCopyMap, nil, false).(*core.PdfIndirectObject) } // Update replace map. @@ -631,6 +662,8 @@ func (w *PdfWriter) AddPage(page *PdfPage) error { return errors.New("invalid Pages Kids obj (not an array)") } kids.Append(pageObj) + w.pagesMap[pDict] = struct{}{} + pageCount, ok := core.GetInt(pagesDict.Get("Count")) if !ok { return errors.New("invalid Pages Count object (not an integer)")