mirror of
https://github.com/unidoc/unipdf.git
synced 2025-05-13 19:29:10 +08:00
Merge branch 'development' of https://github.com/unidoc/unipdf into cmap
This commit is contained in:
commit
4cca92cdff
@ -168,16 +168,12 @@ func genFieldTextAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFieldT
|
||||
if !ok {
|
||||
return nil, errors.New("invalid Rect")
|
||||
}
|
||||
rect, err := array.ToFloat64Array()
|
||||
rect, err := model.NewPdfRectangle(*array)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(rect) != 4 {
|
||||
return nil, errors.New("len(Rect) != 4")
|
||||
}
|
||||
|
||||
width := rect[2] - rect[0]
|
||||
height := rect[3] - rect[1]
|
||||
width := rect.Width()
|
||||
height := rect.Height()
|
||||
|
||||
if mkDict, has := core.GetDict(wa.MK); has {
|
||||
bsDict, _ := core.GetDict(wa.BS)
|
||||
|
@ -481,6 +481,13 @@ func (c *Creator) Finalize() error {
|
||||
adjustOutlineDest = func(item *model.OutlineItem) {
|
||||
item.Dest.Page += int64(genpages)
|
||||
|
||||
// Get page indirect object.
|
||||
if page := int(item.Dest.Page); page >= 0 && page < len(c.pages) {
|
||||
item.Dest.PageObj = c.pages[page].GetPageAsIndirectObject()
|
||||
} else {
|
||||
common.Log.Debug("WARN: could not get page container for page %d", page)
|
||||
}
|
||||
|
||||
// Reverse the Y axis of the destination coordinates.
|
||||
// The user passes in the annotation coordinates as if
|
||||
// position 0, 0 is at the top left of the page.
|
||||
@ -501,15 +508,19 @@ func (c *Creator) Finalize() error {
|
||||
|
||||
// Add outline TOC item.
|
||||
if c.AddTOC {
|
||||
var tocPage int64
|
||||
var tocPage int
|
||||
if hasFrontPage {
|
||||
tocPage = 1
|
||||
}
|
||||
|
||||
c.outline.Insert(0, model.NewOutlineItem(
|
||||
"Table of Contents",
|
||||
model.NewOutlineDest(tocPage, 0, c.pageHeight),
|
||||
))
|
||||
// Create TOC outline item.
|
||||
dest := model.NewOutlineDest(int64(tocPage), 0, c.pageHeight)
|
||||
if tocPage >= 0 && tocPage < len(c.pages) {
|
||||
dest.PageObj = c.pages[tocPage].GetPageAsIndirectObject()
|
||||
} else {
|
||||
common.Log.Debug("WARN: could not get page container for page %d", tocPage)
|
||||
}
|
||||
c.outline.Insert(0, model.NewOutlineItem("Table of Contents", dest))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -13,6 +13,7 @@ import (
|
||||
"bytes"
|
||||
"crypto/md5"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
goimage "image"
|
||||
@ -1040,11 +1041,31 @@ func TestSubchapters(t *testing.T) {
|
||||
|
||||
addHeadersAndFooters(c)
|
||||
|
||||
err := c.WriteToFile(tempFile("3_subchapters.pdf"))
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
// Finalize creator in order to get final version of the outlines.
|
||||
require.NoError(t, c.Finalize())
|
||||
|
||||
// Get outline data as JSON.
|
||||
srcJson, err := json.Marshal(c.outline)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Write output file.
|
||||
outputPath := tempFile("3_subchapters.pdf")
|
||||
require.NoError(t, c.WriteToFile(outputPath))
|
||||
|
||||
// Read output file.
|
||||
outputFile, err := os.Open(outputPath)
|
||||
require.NoError(t, err)
|
||||
defer outputFile.Close()
|
||||
|
||||
reader, err := model.NewPdfReader(outputFile)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Compare outlines JSON data.
|
||||
dstOutline, err := reader.GetOutlines()
|
||||
require.NoError(t, err)
|
||||
dstJson, err := json.Marshal(dstOutline)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, srcJson, dstJson)
|
||||
}
|
||||
|
||||
// Test creating and drawing a table.
|
||||
|
@ -47,7 +47,7 @@ func (e *Extractor) ExtractTextWithStats() (extracted string, numChars int, numM
|
||||
|
||||
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
|
||||
func (e *Extractor) ExtractPageText() (*PageText, int, int, error) {
|
||||
pt, numChars, numMisses, err := e.extractPageText(e.contents, e.resources, 0)
|
||||
pt, numChars, numMisses, err := e.extractPageText(e.contents, e.resources, transform.IdentityMatrix(), 0)
|
||||
if err != nil {
|
||||
return nil, numChars, numMisses, err
|
||||
}
|
||||
@ -60,7 +60,7 @@ func (e *Extractor) ExtractPageText() (*PageText, int, int, error) {
|
||||
// extractPageText returns the text contents of content stream `e` and resouces `resources` as a
|
||||
// PageText.
|
||||
// This can be called on a page or a form XObject.
|
||||
func (e *Extractor) extractPageText(contents string, resources *model.PdfPageResources, level int) (
|
||||
func (e *Extractor) extractPageText(contents string, resources *model.PdfPageResources, parentCTM transform.Matrix, level int) (
|
||||
*PageText, int, int, error) {
|
||||
common.Log.Trace("extractPageText: level=%d", level)
|
||||
pageText := &PageText{}
|
||||
@ -118,7 +118,10 @@ func (e *Extractor) extractPageText(contents string, resources *model.PdfPageRes
|
||||
pageText.marks = append(pageText.marks, to.marks...)
|
||||
}
|
||||
inTextObj = true
|
||||
to = newTextObject(e, resources, gs, &state, &fontStack)
|
||||
|
||||
graphicsState := gs
|
||||
graphicsState.CTM = parentCTM.Mult(graphicsState.CTM)
|
||||
to = newTextObject(e, resources, graphicsState, &state, &fontStack)
|
||||
case "ET": // End Text
|
||||
// End text object, discarding text matrix. If the current
|
||||
// text object contains text marks, they are added to the
|
||||
@ -331,8 +334,9 @@ func (e *Extractor) extractPageText(contents string, resources *model.PdfPageRes
|
||||
if formResources == nil {
|
||||
formResources = resources
|
||||
}
|
||||
|
||||
tList, numChars, numMisses, err := e.extractPageText(string(formContent),
|
||||
formResources, level+1)
|
||||
formResources, parentCTM.Mult(gs.CTM), level+1)
|
||||
if err != nil {
|
||||
common.Log.Debug("ERROR: %v", err)
|
||||
return err
|
||||
@ -1133,7 +1137,7 @@ func (tm TextMark) String() string {
|
||||
func (pt *PageText) computeViews() {
|
||||
fontHeight := pt.height()
|
||||
// We sort with a y tolerance to allow for subscripts, diacritics etc.
|
||||
tol := minFloat(fontHeight*0.2, 5.0)
|
||||
tol := minFloat(fontHeight*0.19, 5.0)
|
||||
common.Log.Trace("ToTextLocation: %d elements fontHeight=%.1f tol=%.1f", len(pt.marks), fontHeight, tol)
|
||||
// Uncomment the 2 following Debug statements to see the effects of sorting.
|
||||
// common.Log.Debug("computeViews: Before sorting %s", pt)
|
||||
|
@ -176,3 +176,36 @@ func TestRepairAcroForm(t *testing.T) {
|
||||
repaired := *reader.AcroForm.Fields
|
||||
require.ElementsMatch(t, original, repaired)
|
||||
}
|
||||
|
||||
func TestAcroFormNeedsRepair(t *testing.T) {
|
||||
f, err := os.Open("./testdata/OoPdfFormExample.pdf")
|
||||
require.NoError(t, err)
|
||||
defer f.Close()
|
||||
|
||||
reader, err := NewPdfReader(f)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Original AcroForm repair status check.
|
||||
needsRepair, err := reader.AcroFormNeedsRepair()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, needsRepair, false)
|
||||
|
||||
// Nil AcroForm repair status check.
|
||||
reader.AcroForm = nil
|
||||
needsRepair, err = reader.AcroFormNeedsRepair()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, needsRepair, true)
|
||||
|
||||
// Repaired AcroForm repair status check.
|
||||
require.NoError(t, reader.RepairAcroForm(nil))
|
||||
needsRepair, err = reader.AcroFormNeedsRepair()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, needsRepair, false)
|
||||
|
||||
// Missing AcroForm fields repair status check.
|
||||
fields := (*reader.AcroForm.Fields)[1:]
|
||||
reader.AcroForm.Fields = &fields
|
||||
needsRepair, err = reader.AcroFormNeedsRepair()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, needsRepair, true)
|
||||
}
|
||||
|
@ -16,11 +16,12 @@ import (
|
||||
// OutlineDest represents the destination of an outline item.
|
||||
// It holds the page and the position on the page an outline item points to.
|
||||
type OutlineDest struct {
|
||||
Page int64 `json:"page"`
|
||||
Mode string `json:"mode"`
|
||||
X float64 `json:"x"`
|
||||
Y float64 `json:"y"`
|
||||
Zoom float64 `json:"zoom"`
|
||||
PageObj *core.PdfIndirectObject `json:"-"`
|
||||
Page int64 `json:"page"`
|
||||
Mode string `json:"mode"`
|
||||
X float64 `json:"x"`
|
||||
Y float64 `json:"y"`
|
||||
Zoom float64 `json:"zoom"`
|
||||
}
|
||||
|
||||
// NewOutlineDest returns a new outline destination which can be used
|
||||
@ -56,10 +57,18 @@ func newOutlineDestFromPdfObject(o core.PdfObject, r *PdfReader) (*OutlineDest,
|
||||
// Page object is provided. Identify page number using the reader.
|
||||
if _, pageNum, err := r.PageFromIndirectObject(pageInd); err == nil {
|
||||
dest.Page = int64(pageNum - 1)
|
||||
} else {
|
||||
common.Log.Debug("WARN: could not get page index for page %+v", pageInd)
|
||||
}
|
||||
} else if pageNum, ok := core.GetIntVal(pageObj); ok {
|
||||
// Page number is provided.
|
||||
dest.Page = int64(pageNum)
|
||||
dest.PageObj = pageInd
|
||||
} else if pageIdx, ok := core.GetIntVal(pageObj); ok {
|
||||
// Page index is provided. Get indirect object to page.
|
||||
if pageIdx >= 0 && pageIdx < len(r.PageList) {
|
||||
dest.PageObj = r.PageList[pageIdx].GetPageAsIndirectObject()
|
||||
} else {
|
||||
common.Log.Debug("WARN: could not get page container for page %d", pageIdx)
|
||||
}
|
||||
dest.Page = int64(pageIdx)
|
||||
} else {
|
||||
return nil, fmt.Errorf("invalid outline destination page: %T", pageObj)
|
||||
}
|
||||
@ -106,14 +115,22 @@ func newOutlineDestFromPdfObject(o core.PdfObject, r *PdfReader) (*OutlineDest,
|
||||
|
||||
// ToPdfObject returns a PDF object representation of the outline destination.
|
||||
func (od OutlineDest) ToPdfObject() core.PdfObject {
|
||||
if od.Page < 0 || od.Mode == "" {
|
||||
if (od.PageObj == nil && od.Page < 0) || od.Mode == "" {
|
||||
return core.MakeNull()
|
||||
}
|
||||
|
||||
dest := core.MakeArray(
|
||||
core.MakeInteger(od.Page),
|
||||
core.MakeName(od.Mode),
|
||||
)
|
||||
// Add destination page.
|
||||
dest := core.MakeArray()
|
||||
if od.PageObj != nil {
|
||||
// Internal outline.
|
||||
dest.Append(od.PageObj)
|
||||
} else {
|
||||
// External outline.
|
||||
dest.Append(core.MakeInteger(od.Page))
|
||||
}
|
||||
|
||||
// Add destination mode.
|
||||
dest.Append(core.MakeName(od.Mode))
|
||||
|
||||
// See section 12.3.2.2 "Explicit Destinations" (page 374).
|
||||
switch od.Mode {
|
||||
@ -180,10 +197,11 @@ func (o *Outline) ToPdfOutline() *PdfOutline {
|
||||
|
||||
// Create outline items.
|
||||
var outlineItems []*PdfOutlineItem
|
||||
var lenDescendants int64
|
||||
var prev *PdfOutlineItem
|
||||
|
||||
for _, item := range o.Entries {
|
||||
outlineItem, _ := item.ToPdfOutlineItem()
|
||||
outlineItem, lenChildren := item.ToPdfOutlineItem()
|
||||
outlineItem.Parent = &outline.PdfOutlineTreeNode
|
||||
|
||||
if prev != nil {
|
||||
@ -192,15 +210,18 @@ func (o *Outline) ToPdfOutline() *PdfOutline {
|
||||
}
|
||||
|
||||
outlineItems = append(outlineItems, outlineItem)
|
||||
lenDescendants += lenChildren
|
||||
prev = outlineItem
|
||||
}
|
||||
|
||||
// Add outline linked list properties.
|
||||
lenOutlineItems := int64(len(outlineItems))
|
||||
lenDescendants += int64(lenOutlineItems)
|
||||
|
||||
if lenOutlineItems > 0 {
|
||||
outline.First = &outlineItems[0].PdfOutlineTreeNode
|
||||
outline.Last = &outlineItems[lenOutlineItems-1].PdfOutlineTreeNode
|
||||
outline.Count = &lenOutlineItems
|
||||
outline.Count = &lenDescendants
|
||||
}
|
||||
|
||||
return outline
|
||||
|
@ -570,6 +570,45 @@ func (r *PdfReader) RepairAcroForm(opts *AcroFormRepairOptions) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// AcroFormNeedsRepair returns true if the document contains widget annotations
|
||||
// linked to fields which are not referenced in the AcroForm. The AcroForm can
|
||||
// be repaired using the RepairAcroForm method of the reader.
|
||||
func (r *PdfReader) AcroFormNeedsRepair() (bool, error) {
|
||||
var fields []*PdfField
|
||||
if r.AcroForm != nil {
|
||||
fields = r.AcroForm.AllFields()
|
||||
}
|
||||
|
||||
fieldMap := make(map[*PdfField]struct{}, len(fields))
|
||||
for _, field := range fields {
|
||||
fieldMap[field] = struct{}{}
|
||||
}
|
||||
|
||||
for _, page := range r.PageList {
|
||||
annotations, err := page.GetAnnotations()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
for _, annotation := range annotations {
|
||||
widget, ok := annotation.GetContext().(*PdfAnnotationWidget)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
field := widget.Field()
|
||||
if field == nil {
|
||||
return true, nil
|
||||
}
|
||||
if _, ok := fieldMap[field]; !ok {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// loadForms loads the AcroForm.
|
||||
func (r *PdfReader) loadForms() (*PdfAcroForm, error) {
|
||||
if r.parser.GetCrypter() != nil && !r.parser.IsAuthenticated() {
|
||||
|
Loading…
x
Reference in New Issue
Block a user