mirror of
https://github.com/unidoc/unipdf.git
synced 2025-05-02 22:17:06 +08:00
Merge in master
This commit is contained in:
commit
734cd7ddbf
28
README.md
28
README.md
@ -22,34 +22,18 @@ go get github.com/unidoc/unidoc
|
|||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
* Read and extract PDF metadata
|
* Many [features](http://unidoc.io/features) with documented examples.
|
||||||
* Merge PDF ([example](https://github.com/unidoc/unidoc-examples/blob/master/pdf/pdf_merge.go)).
|
|
||||||
* Split PDF ([example](https://github.com/unidoc/unidoc-examples/blob/master/pdf/pdf_split.go)).
|
|
||||||
* Protect PDF ([example](https://github.com/unidoc/unidoc-examples/blob/master/pdf/pdf_protect.go)).
|
|
||||||
* Unlock PDF ([example](https://github.com/unidoc/unidoc-examples/blob/master/pdf/pdf_unlock.go)).
|
|
||||||
* Rotate PDF ([example](https://github.com/unidoc/unidoc-examples/blob/master/pdf/pdf_rotate.go)).
|
|
||||||
* Crop PDF ([example](https://github.com/unidoc/unidoc-examples/blob/master/pdf/pdf_crop.go)).
|
|
||||||
* Self contained with no external dependencies
|
* Self contained with no external dependencies
|
||||||
* Developer friendly
|
* Developer friendly
|
||||||
|
|
||||||
|
## Roadmap
|
||||||
|
|
||||||
|
Our [roadmap](https://trello.com/b/JcliaYYI) is publicly available and features can be voted upon.
|
||||||
|
|
||||||
## Examples
|
## Examples
|
||||||
|
|
||||||
See the [unidoc-examples](https://github.com/unidoc/unidoc-examples/tree/master) folder.
|
See the [unidoc-examples](https://github.com/unidoc/unidoc-examples/tree/master) folder.
|
||||||
|
|
||||||
## Roadmap
|
|
||||||
|
|
||||||
The following features are on the roadmap, these are all subjects to change.
|
|
||||||
|
|
||||||
* Compress PDF
|
|
||||||
* Create PDF (high level API)
|
|
||||||
* Fill out Forms
|
|
||||||
* Create Forms
|
|
||||||
* Bindings for Python (and C#/Java if there is interest)
|
|
||||||
* Create Doc and DocX files
|
|
||||||
* Convert PDF to Word
|
|
||||||
* OCR Engine
|
|
||||||
* And many more...
|
|
||||||
|
|
||||||
## Copying/License
|
## Copying/License
|
||||||
|
|
||||||
UniDoc is licensed as [AGPL][agpl] software (with extra terms as specified in our license).
|
UniDoc is licensed as [AGPL][agpl] software (with extra terms as specified in our license).
|
||||||
@ -76,7 +60,7 @@ Contributors need to approve the [Contributor License Agreement](https://docs.go
|
|||||||
|
|
||||||
## Support
|
## Support
|
||||||
|
|
||||||
Open source users can create a GitHub issue and we will look at it. Commercial users can either create a GitHub issue and also email us at support@unidoc.io and we will assist them directly.
|
Please email us at support@unidoc.io for any queries.
|
||||||
|
|
||||||
## Stay up to date
|
## Stay up to date
|
||||||
|
|
||||||
|
@ -245,7 +245,18 @@ func (this *PdfParser) lookupByNumber(objNumber int, attemptRepairs bool) (PdfOb
|
|||||||
|
|
||||||
obj, err := this.parseIndirectObject()
|
obj, err := this.parseIndirectObject()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
common.Log.Error("Failed reading xref")
|
common.Log.Error("Failed reading xref (%s)", err)
|
||||||
|
// Offset pointing to a non-object. Try to repair the file.
|
||||||
|
if attemptRepairs {
|
||||||
|
common.Log.Error("Attempting to repair xrefs (top down)")
|
||||||
|
xrefTable, err := this.repairRebuildXrefsTopDown()
|
||||||
|
if err != nil {
|
||||||
|
common.Log.Error("Failed repair (%s)", err)
|
||||||
|
return nil, false, err
|
||||||
|
}
|
||||||
|
this.xrefs = *xrefTable
|
||||||
|
return this.lookupByNumber(objNumber, false)
|
||||||
|
}
|
||||||
return nil, false, err
|
return nil, false, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -334,29 +345,6 @@ func (this *PdfParser) Trace(obj PdfObject) (PdfObject, error) {
|
|||||||
return o, nil
|
return o, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (this *PdfParser) rebuildXrefTable() error {
|
|
||||||
newXrefs := XrefTable{}
|
|
||||||
for objNum, xref := range this.xrefs {
|
|
||||||
obj, _, err := this.lookupByNumberWrapper(objNum, false)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
actObjNum, actGenNum, err := getObjectNumber(obj)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
xref.objectNumber = int(actObjNum)
|
|
||||||
xref.generation = int(actGenNum)
|
|
||||||
newXrefs[int(actObjNum)] = xref
|
|
||||||
}
|
|
||||||
|
|
||||||
this.xrefs = newXrefs
|
|
||||||
common.Log.Debug("New xref table built")
|
|
||||||
printXrefTable(this.xrefs)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func printXrefTable(xrefTable XrefTable) {
|
func printXrefTable(xrefTable XrefTable) {
|
||||||
common.Log.Debug("=X=X=X=")
|
common.Log.Debug("=X=X=X=")
|
||||||
common.Log.Debug("Xref table:")
|
common.Log.Debug("Xref table:")
|
||||||
|
@ -24,7 +24,7 @@ import (
|
|||||||
var rePdfVersion = regexp.MustCompile(`%PDF-(\d\.\d)`)
|
var rePdfVersion = regexp.MustCompile(`%PDF-(\d\.\d)`)
|
||||||
var reEOF = regexp.MustCompile("%%EOF")
|
var reEOF = regexp.MustCompile("%%EOF")
|
||||||
var reXrefTable = regexp.MustCompile(`\s*xref\s*`)
|
var reXrefTable = regexp.MustCompile(`\s*xref\s*`)
|
||||||
var reStartXref = regexp.MustCompile(`startxref\s*(\d+)`)
|
var reStartXref = regexp.MustCompile(`startx?ref\s*(\d+)`)
|
||||||
var reNumeric = regexp.MustCompile(`^[\+-.]*([0-9.]+)`)
|
var reNumeric = regexp.MustCompile(`^[\+-.]*([0-9.]+)`)
|
||||||
var reExponential = regexp.MustCompile(`^[\+-.]*([0-9.]+)e[\+-.]*([0-9.]+)`)
|
var reExponential = regexp.MustCompile(`^[\+-.]*([0-9.]+)e[\+-.]*([0-9.]+)`)
|
||||||
var reReference = regexp.MustCompile(`^\s*(\d+)\s+(\d+)\s+R`)
|
var reReference = regexp.MustCompile(`^\s*(\d+)\s+(\d+)\s+R`)
|
||||||
@ -1070,13 +1070,22 @@ func (this *PdfParser) loadXrefs() (*PdfObjectDictionary, error) {
|
|||||||
return nil, errors.New("Startxref not found")
|
return nil, errors.New("Startxref not found")
|
||||||
}
|
}
|
||||||
if len(result) > 2 {
|
if len(result) > 2 {
|
||||||
// GH: Take the last one?
|
// GH: Take the last one? Make a test case.
|
||||||
common.Log.Error("Multiple startxref (%s)!", b2)
|
common.Log.Error("Multiple startxref (%s)!", b2)
|
||||||
return nil, errors.New("Multiple startxref entries?")
|
return nil, errors.New("Multiple startxref entries?")
|
||||||
}
|
}
|
||||||
offsetXref, _ := strconv.Atoi(result[1])
|
offsetXref, _ := strconv.ParseInt(result[1], 10, 64)
|
||||||
common.Log.Debug("startxref at %d", offsetXref)
|
common.Log.Debug("startxref at %d", offsetXref)
|
||||||
|
|
||||||
|
if offsetXref > fSize {
|
||||||
|
common.Log.Error("Xref offset outside of file")
|
||||||
|
common.Log.Error("Attempting repair")
|
||||||
|
offsetXref, err = this.repairLocateXref()
|
||||||
|
if err != nil {
|
||||||
|
common.Log.Error("Repair attempt failed (%s)")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
// Read the xref.
|
// Read the xref.
|
||||||
this.rs.Seek(int64(offsetXref), os.SEEK_SET)
|
this.rs.Seek(int64(offsetXref), os.SEEK_SET)
|
||||||
this.reader = bufio.NewReader(this.rs)
|
this.reader = bufio.NewReader(this.rs)
|
||||||
|
@ -381,10 +381,7 @@ func (this *PdfReader) GetForms() (*PdfObjectDictionary, error) {
|
|||||||
common.Log.Debug("Has Acro forms")
|
common.Log.Debug("Has Acro forms")
|
||||||
|
|
||||||
common.Log.Debug("Traverse the Acroforms structure")
|
common.Log.Debug("Traverse the Acroforms structure")
|
||||||
nofollowList := map[PdfObjectName]bool{
|
err := this.traverseObjectData(formsDict)
|
||||||
"Parent": true,
|
|
||||||
}
|
|
||||||
err := this.traverseObjectData(formsDict, nofollowList)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
common.Log.Error("Unable to traverse AcroForms (%s)", err)
|
common.Log.Error("Unable to traverse AcroForms (%s)", err)
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -442,13 +439,8 @@ func (this *PdfReader) buildPageList(node *PdfIndirectObject, parent *PdfIndirec
|
|||||||
(*nodeDict)["Parent"] = parent
|
(*nodeDict)["Parent"] = parent
|
||||||
}
|
}
|
||||||
|
|
||||||
// Resolve the object recursively, not following Parents or Kids fields.
|
// Resolve the object recursively.
|
||||||
// Later can refactor and use only one smart recursive function.
|
err := this.traverseObjectData(node)
|
||||||
nofollowList := map[PdfObjectName]bool{
|
|
||||||
"Parent": true,
|
|
||||||
"Kids": true,
|
|
||||||
}
|
|
||||||
err := this.traverseObjectData(node, nofollowList)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -473,20 +465,9 @@ func (this *PdfReader) buildPageList(node *PdfIndirectObject, parent *PdfIndirec
|
|||||||
}
|
}
|
||||||
common.Log.Debug("Kids: %s", kids)
|
common.Log.Debug("Kids: %s", kids)
|
||||||
for idx, child := range *kids {
|
for idx, child := range *kids {
|
||||||
childRef, ok := child.(*PdfObjectReference)
|
child, ok := child.(*PdfIndirectObject)
|
||||||
if !ok {
|
if !ok {
|
||||||
return errors.New("Invalid kid, non-reference")
|
common.Log.Error("Page not indirect object - (%s)", child)
|
||||||
}
|
|
||||||
|
|
||||||
common.Log.Debug("look up ref %s", childRef)
|
|
||||||
pchild, err := this.parser.LookupByReference(*childRef)
|
|
||||||
if err != nil {
|
|
||||||
common.Log.Error("Unable to lookup page ref")
|
|
||||||
return errors.New("Unable to lookup page ref")
|
|
||||||
}
|
|
||||||
child, ok := pchild.(*PdfIndirectObject)
|
|
||||||
if !ok {
|
|
||||||
common.Log.Error("Page not indirect object - %s (%s)", childRef, pchild)
|
|
||||||
return errors.New("Page not indirect object")
|
return errors.New("Page not indirect object")
|
||||||
}
|
}
|
||||||
(*kids)[idx] = child
|
(*kids)[idx] = child
|
||||||
@ -526,11 +507,10 @@ func (this *PdfReader) resolveReference(ref *PdfObjectReference) (PdfObject, boo
|
|||||||
/*
|
/*
|
||||||
* Recursively traverse through the page object data and look up
|
* Recursively traverse through the page object data and look up
|
||||||
* references to indirect objects.
|
* references to indirect objects.
|
||||||
* GH: Consider to define a smarter traversing engine, defining explicitly
|
*
|
||||||
* - how deep we can go in terms of following certain Trees by name etc.
|
* GH: Are we fully protected against circular references? (Add tests).
|
||||||
* GH: Are we fully protected against circular references?
|
|
||||||
*/
|
*/
|
||||||
func (this *PdfReader) traverseObjectData(o PdfObject, nofollowKeys map[PdfObjectName]bool) error {
|
func (this *PdfReader) traverseObjectData(o PdfObject) error {
|
||||||
common.Log.Debug("Traverse object data")
|
common.Log.Debug("Traverse object data")
|
||||||
if _, isTraversed := this.traversed[o]; isTraversed {
|
if _, isTraversed := this.traversed[o]; isTraversed {
|
||||||
return nil
|
return nil
|
||||||
@ -540,37 +520,30 @@ func (this *PdfReader) traverseObjectData(o PdfObject, nofollowKeys map[PdfObjec
|
|||||||
if io, isIndirectObj := o.(*PdfIndirectObject); isIndirectObj {
|
if io, isIndirectObj := o.(*PdfIndirectObject); isIndirectObj {
|
||||||
common.Log.Debug("io: %s", io)
|
common.Log.Debug("io: %s", io)
|
||||||
common.Log.Debug("- %s", io.PdfObject)
|
common.Log.Debug("- %s", io.PdfObject)
|
||||||
err := this.traverseObjectData(io.PdfObject, nofollowKeys)
|
err := this.traverseObjectData(io.PdfObject)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if so, isStreamObj := o.(*PdfObjectStream); isStreamObj {
|
if so, isStreamObj := o.(*PdfObjectStream); isStreamObj {
|
||||||
err := this.traverseObjectData(so.PdfObjectDictionary, nofollowKeys)
|
err := this.traverseObjectData(so.PdfObjectDictionary)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if dict, isDict := o.(*PdfObjectDictionary); isDict {
|
if dict, isDict := o.(*PdfObjectDictionary); isDict {
|
||||||
common.Log.Debug("- dict: %s", dict)
|
common.Log.Debug("- dict: %s", dict)
|
||||||
for name, v := range *dict {
|
for name, v := range *dict {
|
||||||
if nofollowKeys != nil {
|
|
||||||
if _, nofollow := nofollowKeys[name]; nofollow {
|
|
||||||
// Do not retraverse up the tree.
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ref, isRef := v.(*PdfObjectReference); isRef {
|
if ref, isRef := v.(*PdfObjectReference); isRef {
|
||||||
resolvedObj, _, err := this.resolveReference(ref)
|
resolvedObj, _, err := this.resolveReference(ref)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
(*dict)[name] = resolvedObj
|
(*dict)[name] = resolvedObj
|
||||||
err = this.traverseObjectData(resolvedObj, nofollowKeys)
|
err = this.traverseObjectData(resolvedObj)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
err := this.traverseObjectData(v, nofollowKeys)
|
err := this.traverseObjectData(v)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -589,12 +562,12 @@ func (this *PdfReader) traverseObjectData(o PdfObject, nofollowKeys map[PdfObjec
|
|||||||
}
|
}
|
||||||
(*arr)[idx] = resolvedObj
|
(*arr)[idx] = resolvedObj
|
||||||
|
|
||||||
err = this.traverseObjectData(resolvedObj, nofollowKeys)
|
err = this.traverseObjectData(resolvedObj)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
err := this.traverseObjectData(v, nofollowKeys)
|
err := this.traverseObjectData(v)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -683,11 +656,8 @@ func (this *PdfReader) GetPage(pageNumber int) (PdfObject, error) {
|
|||||||
}
|
}
|
||||||
page := this.pageList[pageNumber-1]
|
page := this.pageList[pageNumber-1]
|
||||||
|
|
||||||
nofollowList := map[PdfObjectName]bool{
|
|
||||||
"Parent": true,
|
|
||||||
}
|
|
||||||
// Look up all references related to page and load everything.
|
// Look up all references related to page and load everything.
|
||||||
err := this.traverseObjectData(page, nofollowList)
|
err := this.traverseObjectData(page)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
140
pdf/repairs.go
Normal file
140
pdf/repairs.go
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
/*
|
||||||
|
* This file is subject to the terms and conditions defined in
|
||||||
|
* file 'LICENSE.md', which is part of this source code package.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Routines related to repairing malformed pdf files.
|
||||||
|
|
||||||
|
package pdf
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"regexp"
|
||||||
|
|
||||||
|
"github.com/unidoc/unidoc/common"
|
||||||
|
)
|
||||||
|
|
||||||
|
var repairReXrefTable = regexp.MustCompile(`[\r\n]\s*(xref)\s*[\r\n]`)
|
||||||
|
|
||||||
|
// Locates a standard Xref table by looking for the "xref" entry.
|
||||||
|
// Xref object stream not supported.
|
||||||
|
func (this *PdfParser) repairLocateXref() (int64, error) {
|
||||||
|
readBuf := int64(1000)
|
||||||
|
this.rs.Seek(-readBuf, os.SEEK_CUR)
|
||||||
|
|
||||||
|
curOffset, err := this.rs.Seek(0, os.SEEK_CUR)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
b2 := make([]byte, readBuf)
|
||||||
|
this.rs.Read(b2)
|
||||||
|
|
||||||
|
results := repairReXrefTable.FindAllStringIndex(string(b2), -1)
|
||||||
|
if len(results) < 1 {
|
||||||
|
common.Log.Error("Repair: xref not found!")
|
||||||
|
return 0, errors.New("Repair: xref not found")
|
||||||
|
}
|
||||||
|
|
||||||
|
localOffset := int64(results[len(results)-1][0])
|
||||||
|
xrefOffset := curOffset + localOffset
|
||||||
|
return xrefOffset, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Renumbers the xref table.
|
||||||
|
// Useful when the cross reference is pointing to an object with the wrong number.
|
||||||
|
// Update the table.
|
||||||
|
func (this *PdfParser) rebuildXrefTable() error {
|
||||||
|
newXrefs := XrefTable{}
|
||||||
|
for objNum, xref := range this.xrefs {
|
||||||
|
obj, _, err := this.lookupByNumberWrapper(objNum, false)
|
||||||
|
if err != nil {
|
||||||
|
common.Log.Error("Unable to look up object (%s)", err)
|
||||||
|
common.Log.Error("Xref table completely broken - attempting to repair ")
|
||||||
|
xrefTable, err := this.repairRebuildXrefsTopDown()
|
||||||
|
if err != nil {
|
||||||
|
common.Log.Error("Failed xref rebuild repair (%s)", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
this.xrefs = *xrefTable
|
||||||
|
common.Log.Debug("Repaired xref table built")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
actObjNum, actGenNum, err := getObjectNumber(obj)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
xref.objectNumber = int(actObjNum)
|
||||||
|
xref.generation = int(actGenNum)
|
||||||
|
newXrefs[int(actObjNum)] = xref
|
||||||
|
}
|
||||||
|
|
||||||
|
this.xrefs = newXrefs
|
||||||
|
common.Log.Debug("New xref table built")
|
||||||
|
printXrefTable(this.xrefs)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the entire file from top down.
|
||||||
|
// Currently not supporting object streams...
|
||||||
|
// Also need to detect object streams and load the object numbers.
|
||||||
|
func (this *PdfParser) repairRebuildXrefsTopDown() (*XrefTable, error) {
|
||||||
|
reRepairIndirectObject := regexp.MustCompile(`^(\d+)\s+(\d+)\s+obj`)
|
||||||
|
|
||||||
|
this.SetFileOffset(0)
|
||||||
|
|
||||||
|
xrefTable := XrefTable{}
|
||||||
|
for {
|
||||||
|
this.skipComments()
|
||||||
|
|
||||||
|
curOffset := this.GetFileOffset()
|
||||||
|
|
||||||
|
peakBuf, err := this.reader.Peek(10)
|
||||||
|
if err != nil {
|
||||||
|
// EOF
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Indirect object?
|
||||||
|
results := reRepairIndirectObject.FindIndex(peakBuf)
|
||||||
|
if len(results) > 0 {
|
||||||
|
obj, err := this.parseIndirectObject()
|
||||||
|
if err != nil {
|
||||||
|
common.Log.Error("Unable to parse indirect object (%s)", err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if indObj, ok := obj.(*PdfIndirectObject); ok {
|
||||||
|
// Make the entry for the cross ref table.
|
||||||
|
xrefEntry := XrefObject{}
|
||||||
|
xrefEntry.xtype = XREF_TABLE_ENTRY
|
||||||
|
xrefEntry.objectNumber = int(indObj.ObjectNumber)
|
||||||
|
xrefEntry.generation = int(indObj.GenerationNumber)
|
||||||
|
xrefEntry.offset = curOffset
|
||||||
|
xrefTable[int(indObj.ObjectNumber)] = xrefEntry
|
||||||
|
} else if streamObj, ok := obj.(*PdfObjectStream); ok {
|
||||||
|
// Make the entry for the cross ref table.
|
||||||
|
xrefEntry := XrefObject{}
|
||||||
|
xrefEntry.xtype = XREF_TABLE_ENTRY
|
||||||
|
xrefEntry.objectNumber = int(streamObj.ObjectNumber)
|
||||||
|
xrefEntry.generation = int(streamObj.GenerationNumber)
|
||||||
|
xrefEntry.offset = curOffset
|
||||||
|
xrefTable[int(streamObj.ObjectNumber)] = xrefEntry
|
||||||
|
} else {
|
||||||
|
return nil, fmt.Errorf("Not an indirect object or stream (%T)", obj) // Should never happen.
|
||||||
|
}
|
||||||
|
} else if string(peakBuf[0:6]) == "endobj" {
|
||||||
|
this.reader.Discard(6)
|
||||||
|
} else {
|
||||||
|
// Stop once we reach xrefs/trailer section etc. Technically this could fail for complex
|
||||||
|
// cases, but lets keep it simple for now. Add more complexity when needed (problematic user committed files).
|
||||||
|
// In general more likely that more complex files would have better understanding of the PDF standard.
|
||||||
|
common.Log.Debug("Not an object - stop repair rebuilding xref here (%s)", peakBuf)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return &xrefTable, nil
|
||||||
|
}
|
@ -22,6 +22,43 @@ import (
|
|||||||
"github.com/unidoc/unidoc/license"
|
"github.com/unidoc/unidoc/license"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var pdfProducer = ""
|
||||||
|
var pdfCreator = ""
|
||||||
|
|
||||||
|
func getPdfProducer() string {
|
||||||
|
if len(pdfProducer) > 0 {
|
||||||
|
return pdfProducer
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return default.
|
||||||
|
licenseKey := license.GetLicenseKey()
|
||||||
|
return fmt.Sprintf("UniDoc Library version %s (%s) - http://unidoc.io", getUniDocVersion(), licenseKey.TypeToString())
|
||||||
|
}
|
||||||
|
|
||||||
|
func SetPdfProducer(producer string) {
|
||||||
|
licenseKey := license.GetLicenseKey()
|
||||||
|
commercial := licenseKey.Type == license.LicenseTypeCommercial
|
||||||
|
if !commercial {
|
||||||
|
// Only commercial users can modify the producer.
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
pdfProducer = producer
|
||||||
|
}
|
||||||
|
|
||||||
|
func getPdfCreator() string {
|
||||||
|
if len(pdfCreator) > 0 {
|
||||||
|
return pdfCreator
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return default.
|
||||||
|
return "UniDoc - http://unidoc.io"
|
||||||
|
}
|
||||||
|
|
||||||
|
func SetPdfCreator(creator string) {
|
||||||
|
pdfCreator = creator
|
||||||
|
}
|
||||||
|
|
||||||
type PdfWriter struct {
|
type PdfWriter struct {
|
||||||
root *PdfIndirectObject
|
root *PdfIndirectObject
|
||||||
pages *PdfIndirectObject
|
pages *PdfIndirectObject
|
||||||
@ -45,14 +82,10 @@ func NewPdfWriter() PdfWriter {
|
|||||||
w.objectsMap = map[PdfObject]bool{}
|
w.objectsMap = map[PdfObject]bool{}
|
||||||
w.objects = []PdfObject{}
|
w.objects = []PdfObject{}
|
||||||
|
|
||||||
licenseKey := license.GetLicenseKey()
|
|
||||||
|
|
||||||
producer := fmt.Sprintf("UniDoc Library version %s (%s) - http://unidoc.io", getUniDocVersion(), licenseKey.TypeToString())
|
|
||||||
|
|
||||||
// Creation info.
|
// Creation info.
|
||||||
infoDict := PdfObjectDictionary{}
|
infoDict := PdfObjectDictionary{}
|
||||||
infoDict[PdfObjectName("Producer")] = MakeString(producer)
|
infoDict[PdfObjectName("Producer")] = MakeString(getPdfProducer())
|
||||||
infoDict[PdfObjectName("Creator")] = MakeString("FoxyUtils Online PDF https://foxyutils.com")
|
infoDict[PdfObjectName("Creator")] = MakeString(getPdfCreator())
|
||||||
infoObj := PdfIndirectObject{}
|
infoObj := PdfIndirectObject{}
|
||||||
infoObj.PdfObject = &infoDict
|
infoObj.PdfObject = &infoDict
|
||||||
w.infoObj = &infoObj
|
w.infoObj = &infoObj
|
||||||
@ -151,7 +184,16 @@ func (this *PdfWriter) addObjects(obj PdfObject) error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// How to handle the parent? Make sure it is present?
|
||||||
|
if parentObj, parentIsRef := (*dict)["Parent"].(*PdfObjectReference); parentIsRef {
|
||||||
|
// Parent is a reference. Means we can drop it?
|
||||||
|
// Could refer to somewhere outside of the scope of the output doc.
|
||||||
|
// Should be done by the reader already.
|
||||||
|
// -> ERROR.
|
||||||
|
common.Log.Error("Parent is a reference object - Cannot be in writer (needs to be resolved)")
|
||||||
|
return fmt.Errorf("Parent is a reference object - Cannot be in writer (needs to be resolved) - %s", parentObj)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
Loading…
x
Reference in New Issue
Block a user