mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-27 13:48:51 +08:00
Skip referenced pages which are not present in the catalog (#377)
* Skip referenced pages which are not present in the catalog * Improve documentation for the copyObject method of the writer * Add creator test case for checking referenced page destinations
This commit is contained in:
parent
ae20c30ae4
commit
7bf2f62c3b
@ -3067,6 +3067,77 @@ func TestPageLabels(t *testing.T) {
|
||||
require.Equal(t, core.EqualObjects(genPageLabels, pageLabels), true)
|
||||
}
|
||||
|
||||
func TestReferencedPageDestinations(t *testing.T) {
|
||||
testPages := func(buf *bytes.Buffer, expectedPages, expectedNullDestPages int) {
|
||||
reader, err := model.NewPdfReader(bytes.NewReader(buf.Bytes()))
|
||||
require.NoError(t, err)
|
||||
|
||||
// Check number of pages in catalog.
|
||||
numPages, err := reader.GetNumPages()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, expectedPages, numPages)
|
||||
|
||||
// Check outline destionation pages.
|
||||
outlines, err := reader.GetOutlines()
|
||||
require.NoError(t, err)
|
||||
|
||||
var nullDestPages int
|
||||
var validDestPages int
|
||||
for _, entry := range outlines.Entries {
|
||||
pageObj := entry.Dest.PageObj
|
||||
require.NotNil(t, pageObj)
|
||||
|
||||
if core.IsNullObject(entry.Dest.PageObj) {
|
||||
nullDestPages++
|
||||
continue
|
||||
}
|
||||
|
||||
_, _, err := reader.PageFromIndirectObject(pageObj)
|
||||
require.NoError(t, err)
|
||||
validDestPages++
|
||||
}
|
||||
|
||||
require.Equal(t, expectedPages, validDestPages)
|
||||
require.Equal(t, expectedNullDestPages, nullDestPages)
|
||||
}
|
||||
|
||||
// Generate and test input file.
|
||||
c := New()
|
||||
c.AddTOC = true
|
||||
|
||||
numPages := 10
|
||||
for i := 0; i < numPages; i++ {
|
||||
chapter := c.NewChapter(fmt.Sprintf("Chapter %d", i+1))
|
||||
paragraph := c.NewParagraph(fmt.Sprintf("Content for chapter %d", i+1))
|
||||
chapter.Add(paragraph)
|
||||
require.NoError(t, c.Draw(chapter))
|
||||
|
||||
if i < numPages-1 {
|
||||
c.NewPage()
|
||||
}
|
||||
}
|
||||
|
||||
buf := bytes.NewBuffer(nil)
|
||||
require.NoError(t, c.Write(buf))
|
||||
testPages(buf, 11, 0)
|
||||
|
||||
// Generate and test split input file.
|
||||
reader, err := model.NewPdfReader(bytes.NewReader(buf.Bytes()))
|
||||
require.NoError(t, err)
|
||||
|
||||
writer := model.NewPdfWriter()
|
||||
for i, page := range reader.PageList {
|
||||
if i%2 == 0 {
|
||||
require.NoError(t, writer.AddPage(page))
|
||||
}
|
||||
}
|
||||
writer.AddOutlineTree(reader.GetOutlineTree())
|
||||
|
||||
buf = bytes.NewBuffer(nil)
|
||||
require.NoError(t, writer.Write(buf))
|
||||
testPages(buf, 6, 5)
|
||||
}
|
||||
|
||||
var errRenderNotSupported = errors.New("rendering pdf is not supported on this system")
|
||||
|
||||
// renderPDFToPNGs uses ghostscript (gs) to render specified PDF file into a set of PNG images (one per page).
|
||||
|
135
model/writer.go
135
model/writer.go
@ -133,6 +133,7 @@ func SetPdfTitle(title string) {
|
||||
type PdfWriter struct {
|
||||
root *core.PdfIndirectObject
|
||||
pages *core.PdfIndirectObject
|
||||
pagesMap map[core.PdfObject]struct{} // Pages lookup table.
|
||||
objects []core.PdfObject // Objects to write.
|
||||
objectsMap map[core.PdfObject]struct{} // Quick lookup table.
|
||||
outlines []*core.PdfIndirectObject
|
||||
@ -256,6 +257,7 @@ func NewPdfWriter() PdfWriter {
|
||||
pages.PdfObject = pagedict
|
||||
|
||||
w.pages = &pages
|
||||
w.pagesMap = map[core.PdfObject]struct{}{}
|
||||
w.addObject(w.pages)
|
||||
|
||||
catalogDict.Set("Pages", &pages)
|
||||
@ -270,114 +272,143 @@ func NewPdfWriter() PdfWriter {
|
||||
// fills objectToObjectCopyMap to replace the old object to the copy of object if needed.
|
||||
// Parameter objectToObjectCopyMap is needed to replace object references to its copies.
|
||||
// Because many objects can contain references to another objects like pages to images.
|
||||
func copyObject(obj core.PdfObject, objectToObjectCopyMap map[core.PdfObject]core.PdfObject) core.PdfObject {
|
||||
// If a skip map is provided and the writer is not set to append mode, the
|
||||
// children objects of pages which are not present in the catalog are added to
|
||||
// the map and the page dictionaries are replaced with null objects.
|
||||
func (w *PdfWriter) copyObject(obj core.PdfObject,
|
||||
objectToObjectCopyMap map[core.PdfObject]core.PdfObject,
|
||||
skipMap map[core.PdfObject]struct{}, skip bool) core.PdfObject {
|
||||
if newObj, ok := objectToObjectCopyMap[obj]; ok {
|
||||
return newObj
|
||||
}
|
||||
|
||||
newObj := obj
|
||||
skipUnusedPages := !w.appendMode && skipMap != nil
|
||||
switch t := obj.(type) {
|
||||
case *core.PdfObjectArray:
|
||||
newObj := &core.PdfObjectArray{}
|
||||
arrObj := core.MakeArray()
|
||||
newObj = arrObj
|
||||
objectToObjectCopyMap[obj] = newObj
|
||||
for _, val := range t.Elements() {
|
||||
newObj.Append(copyObject(val, objectToObjectCopyMap))
|
||||
arrObj.Append(w.copyObject(val, objectToObjectCopyMap, skipMap, skip))
|
||||
}
|
||||
return newObj
|
||||
case *core.PdfObjectStreams:
|
||||
newObj := &core.PdfObjectStreams{PdfObjectReference: t.PdfObjectReference}
|
||||
streamsObj := &core.PdfObjectStreams{PdfObjectReference: t.PdfObjectReference}
|
||||
newObj = streamsObj
|
||||
objectToObjectCopyMap[obj] = newObj
|
||||
for _, val := range t.Elements() {
|
||||
newObj.Append(copyObject(val, objectToObjectCopyMap))
|
||||
streamsObj.Append(w.copyObject(val, objectToObjectCopyMap, skipMap, skip))
|
||||
}
|
||||
return newObj
|
||||
case *core.PdfObjectStream:
|
||||
newObj := &core.PdfObjectStream{
|
||||
streamObj := &core.PdfObjectStream{
|
||||
Stream: t.Stream,
|
||||
PdfObjectReference: t.PdfObjectReference,
|
||||
}
|
||||
newObj = streamObj
|
||||
objectToObjectCopyMap[obj] = newObj
|
||||
newObj.PdfObjectDictionary = copyObject(t.PdfObjectDictionary, objectToObjectCopyMap).(*core.PdfObjectDictionary)
|
||||
return newObj
|
||||
streamObj.PdfObjectDictionary = w.copyObject(t.PdfObjectDictionary, objectToObjectCopyMap, skipMap, skip).(*core.PdfObjectDictionary)
|
||||
case *core.PdfObjectDictionary:
|
||||
newObj := core.MakeDict()
|
||||
// Check if the object is a page dictionary and search it in the
|
||||
// writer pages. If not found, replace it with a null object and add
|
||||
// the chain of children objects to the skip map.
|
||||
var unused bool
|
||||
if skipUnusedPages && !skip {
|
||||
if dictType, _ := core.GetNameVal(t.Get("Type")); dictType == "Page" {
|
||||
_, ok := w.pagesMap[t]
|
||||
skip = !ok
|
||||
unused = skip
|
||||
}
|
||||
}
|
||||
|
||||
dictObj := core.MakeDict()
|
||||
newObj = dictObj
|
||||
objectToObjectCopyMap[obj] = newObj
|
||||
for _, key := range t.Keys() {
|
||||
val := t.Get(key)
|
||||
newObj.Set(key, copyObject(val, objectToObjectCopyMap))
|
||||
dictObj.Set(key, w.copyObject(t.Get(key), objectToObjectCopyMap, skipMap, skip))
|
||||
}
|
||||
|
||||
// If an unused page dictionary is found, replace it with a null object.
|
||||
if unused {
|
||||
newObj = core.MakeNull()
|
||||
skip = false
|
||||
}
|
||||
return newObj
|
||||
case *core.PdfIndirectObject:
|
||||
newObj := &core.PdfIndirectObject{
|
||||
indObj := &core.PdfIndirectObject{
|
||||
PdfObjectReference: t.PdfObjectReference,
|
||||
}
|
||||
newObj = indObj
|
||||
objectToObjectCopyMap[obj] = newObj
|
||||
newObj.PdfObject = copyObject(t.PdfObject, objectToObjectCopyMap)
|
||||
return newObj
|
||||
indObj.PdfObject = w.copyObject(t.PdfObject, objectToObjectCopyMap, skipMap, skip)
|
||||
case *core.PdfObjectString:
|
||||
newObj := &core.PdfObjectString{}
|
||||
*newObj = *t
|
||||
strObj := *t
|
||||
newObj = &strObj
|
||||
objectToObjectCopyMap[obj] = newObj
|
||||
return newObj
|
||||
case *core.PdfObjectName:
|
||||
newObj := core.PdfObjectName(*t)
|
||||
objectToObjectCopyMap[obj] = &newObj
|
||||
return &newObj
|
||||
nameObj := core.PdfObjectName(*t)
|
||||
newObj = &nameObj
|
||||
objectToObjectCopyMap[obj] = newObj
|
||||
case *core.PdfObjectNull:
|
||||
newObj := core.PdfObjectNull{}
|
||||
objectToObjectCopyMap[obj] = &newObj
|
||||
return &newObj
|
||||
newObj = core.MakeNull()
|
||||
objectToObjectCopyMap[obj] = newObj
|
||||
case *core.PdfObjectInteger:
|
||||
newObj := core.PdfObjectInteger(*t)
|
||||
objectToObjectCopyMap[obj] = &newObj
|
||||
return &newObj
|
||||
intObj := core.PdfObjectInteger(*t)
|
||||
newObj = &intObj
|
||||
objectToObjectCopyMap[obj] = newObj
|
||||
case *core.PdfObjectReference:
|
||||
newObj := core.PdfObjectReference(*t)
|
||||
objectToObjectCopyMap[obj] = &newObj
|
||||
return &newObj
|
||||
refObj := core.PdfObjectReference(*t)
|
||||
newObj = &refObj
|
||||
objectToObjectCopyMap[obj] = newObj
|
||||
case *core.PdfObjectFloat:
|
||||
newObj := core.PdfObjectFloat(*t)
|
||||
objectToObjectCopyMap[obj] = &newObj
|
||||
return &newObj
|
||||
floatObj := core.PdfObjectFloat(*t)
|
||||
newObj = &floatObj
|
||||
objectToObjectCopyMap[obj] = newObj
|
||||
case *core.PdfObjectBool:
|
||||
newObj := core.PdfObjectBool(*t)
|
||||
objectToObjectCopyMap[obj] = &newObj
|
||||
return &newObj
|
||||
boolObj := core.PdfObjectBool(*t)
|
||||
newObj = &boolObj
|
||||
objectToObjectCopyMap[obj] = newObj
|
||||
case *pdfSignDictionary:
|
||||
newObj := &pdfSignDictionary{
|
||||
sigObj := &pdfSignDictionary{
|
||||
PdfObjectDictionary: core.MakeDict(),
|
||||
handler: t.handler,
|
||||
signature: t.signature,
|
||||
}
|
||||
newObj = sigObj
|
||||
objectToObjectCopyMap[obj] = newObj
|
||||
for _, key := range t.Keys() {
|
||||
val := t.Get(key)
|
||||
newObj.Set(key, copyObject(val, objectToObjectCopyMap))
|
||||
sigObj.Set(key, w.copyObject(t.Get(key), objectToObjectCopyMap, skipMap, skip))
|
||||
}
|
||||
return newObj
|
||||
default:
|
||||
common.Log.Info("TODO(a5i): implement copyObject for %+v", obj)
|
||||
}
|
||||
// return other objects as is
|
||||
return obj
|
||||
|
||||
if skipUnusedPages && skip {
|
||||
skipMap[obj] = struct{}{}
|
||||
}
|
||||
|
||||
return newObj
|
||||
}
|
||||
|
||||
// copyObjects makes objects copy and set as working.
|
||||
func (w *PdfWriter) copyObjects() {
|
||||
objectToObjectCopyMap := make(map[core.PdfObject]core.PdfObject)
|
||||
objects := make([]core.PdfObject, len(w.objects))
|
||||
objects := make([]core.PdfObject, 0, len(w.objects))
|
||||
objectsMap := make(map[core.PdfObject]struct{}, len(w.objects))
|
||||
for i, obj := range w.objects {
|
||||
newObject := copyObject(obj, objectToObjectCopyMap)
|
||||
objects[i] = newObject
|
||||
skipMap := make(map[core.PdfObject]struct{})
|
||||
for _, obj := range w.objects {
|
||||
newObject := w.copyObject(obj, objectToObjectCopyMap, skipMap, false)
|
||||
if _, ok := skipMap[obj]; ok {
|
||||
continue
|
||||
}
|
||||
objects = append(objects, newObject)
|
||||
objectsMap[newObject] = struct{}{}
|
||||
}
|
||||
|
||||
w.objects = objects
|
||||
w.objectsMap = objectsMap
|
||||
w.infoObj = copyObject(w.infoObj, objectToObjectCopyMap).(*core.PdfIndirectObject)
|
||||
w.root = copyObject(w.root, objectToObjectCopyMap).(*core.PdfIndirectObject)
|
||||
w.infoObj = w.copyObject(w.infoObj, objectToObjectCopyMap, nil, false).(*core.PdfIndirectObject)
|
||||
w.root = w.copyObject(w.root, objectToObjectCopyMap, nil, false).(*core.PdfIndirectObject)
|
||||
if w.encryptObj != nil {
|
||||
w.encryptObj = copyObject(w.encryptObj, objectToObjectCopyMap).(*core.PdfIndirectObject)
|
||||
w.encryptObj = w.copyObject(w.encryptObj, objectToObjectCopyMap, nil, false).(*core.PdfIndirectObject)
|
||||
}
|
||||
|
||||
// Update replace map.
|
||||
@ -631,6 +662,8 @@ func (w *PdfWriter) AddPage(page *PdfPage) error {
|
||||
return errors.New("invalid Pages Kids obj (not an array)")
|
||||
}
|
||||
kids.Append(pageObj)
|
||||
w.pagesMap[pDict] = struct{}{}
|
||||
|
||||
pageCount, ok := core.GetInt(pagesDict.Get("Count"))
|
||||
if !ok {
|
||||
return errors.New("invalid Pages Count object (not an integer)")
|
||||
|
Loading…
x
Reference in New Issue
Block a user