mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-24 13:48:49 +08:00
Merge pull request #363 from gunnsth/release/v3.7.1
Prepare unipdf release v3.7.1
This commit is contained in:
commit
f99c0cd58f
@ -168,16 +168,12 @@ func genFieldTextAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFieldT
|
||||
if !ok {
|
||||
return nil, errors.New("invalid Rect")
|
||||
}
|
||||
rect, err := array.ToFloat64Array()
|
||||
rect, err := model.NewPdfRectangle(*array)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(rect) != 4 {
|
||||
return nil, errors.New("len(Rect) != 4")
|
||||
}
|
||||
|
||||
width := rect[2] - rect[0]
|
||||
height := rect[3] - rect[1]
|
||||
width := rect.Width()
|
||||
height := rect.Height()
|
||||
|
||||
if mkDict, has := core.GetDict(wa.MK); has {
|
||||
bsDict, _ := core.GetDict(wa.BS)
|
||||
|
@ -12,11 +12,11 @@ import (
|
||||
|
||||
const releaseYear = 2020
|
||||
const releaseMonth = 5
|
||||
const releaseDay = 5
|
||||
const releaseDay = 25
|
||||
const releaseHour = 23
|
||||
const releaseMin = 20
|
||||
const releaseMin = 35
|
||||
|
||||
// Version holds version information, when bumping this make sure to bump the released at stamp also.
|
||||
const Version = "3.7.0"
|
||||
const Version = "3.7.1"
|
||||
|
||||
var ReleasedAt = time.Date(releaseYear, releaseMonth, releaseDay, releaseHour, releaseMin, 0, 0, time.UTC)
|
||||
|
@ -481,6 +481,13 @@ func (c *Creator) Finalize() error {
|
||||
adjustOutlineDest = func(item *model.OutlineItem) {
|
||||
item.Dest.Page += int64(genpages)
|
||||
|
||||
// Get page indirect object.
|
||||
if page := int(item.Dest.Page); page >= 0 && page < len(c.pages) {
|
||||
item.Dest.PageObj = c.pages[page].GetPageAsIndirectObject()
|
||||
} else {
|
||||
common.Log.Debug("WARN: could not get page container for page %d", page)
|
||||
}
|
||||
|
||||
// Reverse the Y axis of the destination coordinates.
|
||||
// The user passes in the annotation coordinates as if
|
||||
// position 0, 0 is at the top left of the page.
|
||||
@ -501,15 +508,19 @@ func (c *Creator) Finalize() error {
|
||||
|
||||
// Add outline TOC item.
|
||||
if c.AddTOC {
|
||||
var tocPage int64
|
||||
var tocPage int
|
||||
if hasFrontPage {
|
||||
tocPage = 1
|
||||
}
|
||||
|
||||
c.outline.Insert(0, model.NewOutlineItem(
|
||||
"Table of Contents",
|
||||
model.NewOutlineDest(tocPage, 0, c.pageHeight),
|
||||
))
|
||||
// Create TOC outline item.
|
||||
dest := model.NewOutlineDest(int64(tocPage), 0, c.pageHeight)
|
||||
if tocPage >= 0 && tocPage < len(c.pages) {
|
||||
dest.PageObj = c.pages[tocPage].GetPageAsIndirectObject()
|
||||
} else {
|
||||
common.Log.Debug("WARN: could not get page container for page %d", tocPage)
|
||||
}
|
||||
c.outline.Insert(0, model.NewOutlineItem("Table of Contents", dest))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -13,6 +13,7 @@ import (
|
||||
"bytes"
|
||||
"crypto/md5"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
goimage "image"
|
||||
@ -33,6 +34,7 @@ import (
|
||||
"github.com/unidoc/unipdf/v3/common"
|
||||
"github.com/unidoc/unipdf/v3/contentstream/draw"
|
||||
"github.com/unidoc/unipdf/v3/core"
|
||||
"github.com/unidoc/unipdf/v3/extractor"
|
||||
"github.com/unidoc/unipdf/v3/model"
|
||||
"github.com/unidoc/unipdf/v3/model/optimize"
|
||||
)
|
||||
@ -696,12 +698,31 @@ func TestParagraphChinese(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
testWriteAndRender(t, creator, "2_p_nihao.pdf")
|
||||
fname := tempFile("2_p_nihao.pdf")
|
||||
fname := testWrite(t, creator, "2_p_nihao.pdf")
|
||||
st, err := os.Stat(fname)
|
||||
require.NoError(t, err)
|
||||
t.Logf("output size: %d (%.2f MB)", st.Size(), float64(st.Size())/1024/1024)
|
||||
|
||||
t.Logf("output size: %d (%d MB)", st.Size(), st.Size()/1024/1024)
|
||||
// Check if text is extracted correctly (tests the ToUnicode map).
|
||||
f, err := os.Open(fname)
|
||||
require.NoError(t, err)
|
||||
defer f.Close()
|
||||
r, err := model.NewPdfReaderLazy(f)
|
||||
require.NoError(t, err)
|
||||
p, err := r.GetPage(1)
|
||||
require.NoError(t, err)
|
||||
e, err := extractor.New(p)
|
||||
require.NoError(t, err)
|
||||
text, err := e.ExtractText()
|
||||
require.NoError(t, err)
|
||||
expected := strings.Join(lines, "\n")
|
||||
if len(text) > len(expected) {
|
||||
// Trim off extra license data.
|
||||
text = text[:len(expected)]
|
||||
}
|
||||
require.Equal(t, expected, text)
|
||||
|
||||
testRender(t, fname)
|
||||
}
|
||||
|
||||
// Test paragraph with composite font and various unicode characters.
|
||||
@ -1020,11 +1041,31 @@ func TestSubchapters(t *testing.T) {
|
||||
|
||||
addHeadersAndFooters(c)
|
||||
|
||||
err := c.WriteToFile(tempFile("3_subchapters.pdf"))
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
}
|
||||
// Finalize creator in order to get final version of the outlines.
|
||||
require.NoError(t, c.Finalize())
|
||||
|
||||
// Get outline data as JSON.
|
||||
srcJson, err := json.Marshal(c.outline)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Write output file.
|
||||
outputPath := tempFile("3_subchapters.pdf")
|
||||
require.NoError(t, c.WriteToFile(outputPath))
|
||||
|
||||
// Read output file.
|
||||
outputFile, err := os.Open(outputPath)
|
||||
require.NoError(t, err)
|
||||
defer outputFile.Close()
|
||||
|
||||
reader, err := model.NewPdfReader(outputFile)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Compare outlines JSON data.
|
||||
dstOutline, err := reader.GetOutlines()
|
||||
require.NoError(t, err)
|
||||
dstJson, err := json.Marshal(dstOutline)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, srcJson, dstJson)
|
||||
}
|
||||
|
||||
// Test creating and drawing a table.
|
||||
@ -3113,14 +3154,20 @@ func hashFile(file string) (string, error) {
|
||||
return hex.EncodeToString(h.Sum(nil)), nil
|
||||
}
|
||||
|
||||
func testWriteAndRender(t *testing.T, c *Creator, pname string) {
|
||||
func testWriteAndRender(t *testing.T, c *Creator, pname string) string {
|
||||
pname = testWrite(t, c, pname)
|
||||
testRender(t, pname)
|
||||
return pname
|
||||
}
|
||||
|
||||
func testWrite(t *testing.T, c *Creator, pname string) string {
|
||||
pname = tempFile(pname)
|
||||
err := c.WriteToFile(pname)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %v\n", err)
|
||||
return
|
||||
return pname
|
||||
}
|
||||
testRender(t, pname)
|
||||
return pname
|
||||
}
|
||||
|
||||
func testRender(t *testing.T, pdfPath string) {
|
||||
|
@ -684,8 +684,9 @@ func (i *Invoice) drawInformation() *Table {
|
||||
return table
|
||||
}
|
||||
|
||||
func (i *Invoice) drawTotals() *Table {
|
||||
table := newTable(2)
|
||||
func (i *Invoice) generateTotalBlocks(ctx DrawContext) ([]*Block, DrawContext, error) {
|
||||
table := newTable(4)
|
||||
table.SetMargins(0, 0, 10, 10)
|
||||
|
||||
totals := [][2]*InvoiceCell{i.subtotal}
|
||||
totals = append(totals, i.totals...)
|
||||
@ -696,6 +697,7 @@ func (i *Invoice) drawTotals() *Table {
|
||||
if value.Value == "" {
|
||||
continue
|
||||
}
|
||||
table.SkipCells(2)
|
||||
|
||||
// Add description.
|
||||
cell := table.NewCell()
|
||||
@ -720,7 +722,7 @@ func (i *Invoice) drawTotals() *Table {
|
||||
cell.SetContent(p)
|
||||
}
|
||||
|
||||
return table
|
||||
return table.GeneratePageBlocks(ctx)
|
||||
}
|
||||
|
||||
func (i *Invoice) generateHeaderBlocks(ctx DrawContext) ([]*Block, DrawContext, error) {
|
||||
@ -820,20 +822,6 @@ func (i *Invoice) generateLineBlocks(ctx DrawContext) ([]*Block, DrawContext, er
|
||||
return table.GeneratePageBlocks(ctx)
|
||||
}
|
||||
|
||||
func (i *Invoice) generateTotalBlocks(ctx DrawContext) ([]*Block, DrawContext, error) {
|
||||
table := newTable(2)
|
||||
table.SetMargins(0, 0, 5, 40)
|
||||
table.SkipCells(1)
|
||||
|
||||
totalsTable := i.drawTotals()
|
||||
totalsTable.SetMargins(0, 0, 5, 0)
|
||||
|
||||
cell := table.NewCell()
|
||||
cell.SetContent(totalsTable)
|
||||
|
||||
return table.GeneratePageBlocks(ctx)
|
||||
}
|
||||
|
||||
func (i *Invoice) generateNoteBlocks(ctx DrawContext) ([]*Block, DrawContext, error) {
|
||||
division := newDivision()
|
||||
|
||||
|
@ -62,9 +62,12 @@ func TestInvoiceSimple(t *testing.T) {
|
||||
|
||||
// Set invoice totals.
|
||||
invoice.SetSubtotal("$100.00")
|
||||
invoice.AddTotalLine("Tax (10%)", "$10.00")
|
||||
invoice.AddTotalLine("Shipping", "$5.00")
|
||||
invoice.SetTotal("$115.00")
|
||||
invoice.AddTotalLine("Tax (10%)", "$10.00")
|
||||
for i := 0; i < 10; i++ {
|
||||
invoice.AddTotalLine(fmt.Sprintf("Extra tax #%d", i+1), "$10.00")
|
||||
}
|
||||
invoice.SetTotal("$215.00")
|
||||
|
||||
// Set invoice content sections.
|
||||
invoice.SetNotes("Notes", "Thank you for your business.")
|
||||
|
@ -569,7 +569,7 @@ func (table *Table) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawContext,
|
||||
case CellHorizontalAlignmentRight:
|
||||
if w > cw {
|
||||
ctx.X = ctx.X + w - cw - cell.indent
|
||||
ctx.Width = cw
|
||||
ctx.Width -= cell.indent
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -10,6 +10,8 @@ import (
|
||||
"io/ioutil"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/unidoc/unipdf/v3/model"
|
||||
)
|
||||
|
||||
@ -577,3 +579,38 @@ func TestTableParagraphLinks(t *testing.T) {
|
||||
t.Fatalf("Fail: %v\n", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTableHorizontalCellAlign(t *testing.T) {
|
||||
font, err := model.NewStandard14Font(model.HelveticaName)
|
||||
require.NoError(t, err)
|
||||
fontBold, err := model.NewStandard14Font(model.HelveticaBoldName)
|
||||
require.NoError(t, err)
|
||||
|
||||
c := New()
|
||||
table := c.NewTable(3)
|
||||
|
||||
drawCell := func(text string, font *model.PdfFont, align CellHorizontalAlignment) {
|
||||
p := c.NewStyledParagraph()
|
||||
p.Append(text).Style.Font = font
|
||||
|
||||
cell := table.NewCell()
|
||||
cell.SetBorder(CellBorderSideAll, CellBorderStyleSingle, 1)
|
||||
cell.SetHorizontalAlignment(align)
|
||||
cell.SetContent(p)
|
||||
}
|
||||
|
||||
// Draw table header.
|
||||
drawCell("Align left", fontBold, CellHorizontalAlignmentLeft)
|
||||
drawCell("Align center", fontBold, CellHorizontalAlignmentCenter)
|
||||
drawCell("Align right", fontBold, CellHorizontalAlignmentRight)
|
||||
|
||||
// Draw table content.
|
||||
for i := 100; i < 200; i++ {
|
||||
drawCell(fmt.Sprintf("Product #%d", i), font, CellHorizontalAlignmentLeft)
|
||||
drawCell(fmt.Sprintf("Quantity #%d", i), font, CellHorizontalAlignmentCenter)
|
||||
drawCell(fmt.Sprintf("Total: #%d.%d", i, 200-i), font, CellHorizontalAlignmentRight)
|
||||
}
|
||||
|
||||
require.NoError(t, c.Draw(table))
|
||||
testWriteAndRender(t, c, "table_horizontal_cell_align.pdf")
|
||||
}
|
||||
|
@ -47,7 +47,7 @@ func (e *Extractor) ExtractTextWithStats() (extracted string, numChars int, numM
|
||||
|
||||
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
|
||||
func (e *Extractor) ExtractPageText() (*PageText, int, int, error) {
|
||||
pt, numChars, numMisses, err := e.extractPageText(e.contents, e.resources, 0)
|
||||
pt, numChars, numMisses, err := e.extractPageText(e.contents, e.resources, transform.IdentityMatrix(), 0)
|
||||
if err != nil {
|
||||
return nil, numChars, numMisses, err
|
||||
}
|
||||
@ -60,7 +60,7 @@ func (e *Extractor) ExtractPageText() (*PageText, int, int, error) {
|
||||
// extractPageText returns the text contents of content stream `e` and resouces `resources` as a
|
||||
// PageText.
|
||||
// This can be called on a page or a form XObject.
|
||||
func (e *Extractor) extractPageText(contents string, resources *model.PdfPageResources, level int) (
|
||||
func (e *Extractor) extractPageText(contents string, resources *model.PdfPageResources, parentCTM transform.Matrix, level int) (
|
||||
*PageText, int, int, error) {
|
||||
common.Log.Trace("extractPageText: level=%d", level)
|
||||
pageText := &PageText{}
|
||||
@ -118,7 +118,10 @@ func (e *Extractor) extractPageText(contents string, resources *model.PdfPageRes
|
||||
pageText.marks = append(pageText.marks, to.marks...)
|
||||
}
|
||||
inTextObj = true
|
||||
to = newTextObject(e, resources, gs, &state, &fontStack)
|
||||
|
||||
graphicsState := gs
|
||||
graphicsState.CTM = parentCTM.Mult(graphicsState.CTM)
|
||||
to = newTextObject(e, resources, graphicsState, &state, &fontStack)
|
||||
case "ET": // End Text
|
||||
// End text object, discarding text matrix. If the current
|
||||
// text object contains text marks, they are added to the
|
||||
@ -331,8 +334,9 @@ func (e *Extractor) extractPageText(contents string, resources *model.PdfPageRes
|
||||
if formResources == nil {
|
||||
formResources = resources
|
||||
}
|
||||
|
||||
tList, numChars, numMisses, err := e.extractPageText(string(formContent),
|
||||
formResources, level+1)
|
||||
formResources, parentCTM.Mult(gs.CTM), level+1)
|
||||
if err != nil {
|
||||
common.Log.Debug("ERROR: %v", err)
|
||||
return err
|
||||
@ -1134,7 +1138,7 @@ func (tm TextMark) String() string {
|
||||
func (pt *PageText) computeViews() {
|
||||
fontHeight := pt.height()
|
||||
// We sort with a y tolerance to allow for subscripts, diacritics etc.
|
||||
tol := minFloat(fontHeight*0.2, 5.0)
|
||||
tol := minFloat(fontHeight*0.19, 5.0)
|
||||
common.Log.Trace("ToTextLocation: %d elements fontHeight=%.1f tol=%.1f", len(pt.marks), fontHeight, tol)
|
||||
// Uncomment the 2 following Debug statements to see the effects of sorting.
|
||||
// common.Log.Debug("computeViews: Before sorting %s", pt)
|
||||
|
@ -51,9 +51,7 @@ var doStress bool
|
||||
func init() {
|
||||
flag.BoolVar(&doStress, "extractor-stresstest", false, "Run text extractor stress tests.")
|
||||
common.SetLogger(common.NewConsoleLogger(common.LogLevelInfo))
|
||||
if flag.Lookup("test.v") != nil {
|
||||
isTesting = true
|
||||
}
|
||||
isTesting = true
|
||||
}
|
||||
|
||||
// TestTextExtractionFragments tests text extraction on the PDF fragments in `fragmentTests`.
|
||||
|
4
go.mod
4
go.mod
@ -6,11 +6,13 @@ require (
|
||||
github.com/adrg/sysfont v0.1.0
|
||||
github.com/boombuler/barcode v1.0.0
|
||||
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0
|
||||
github.com/sirupsen/logrus v1.6.0 // indirect
|
||||
github.com/stretchr/testify v1.4.0
|
||||
github.com/unidoc/pkcs7 v0.0.0-20200411230602-d883fd70d1df
|
||||
github.com/unidoc/timestamp v0.0.0-20200412005513-91597fd3793a
|
||||
github.com/unidoc/unitype v0.0.0-20200426000514-43fb032b9ce6
|
||||
github.com/unidoc/unitype v0.2.0
|
||||
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5
|
||||
golang.org/x/image v0.0.0-20181116024801-cd38e8056d9b
|
||||
golang.org/x/sys v0.0.0-20200523222454-059865788121 // indirect
|
||||
golang.org/x/text v0.3.2
|
||||
)
|
||||
|
17
go.sum
17
go.sum
@ -15,25 +15,31 @@ github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGw
|
||||
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||
github.com/konsorten/go-windows-terminal-sequences v1.0.2 h1:DB17ag19krx9CFsz4o3enTrPXyIXCl+2iCXH/aMAp9s=
|
||||
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||
github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8=
|
||||
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
|
||||
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/sirupsen/logrus v1.5.0 h1:1N5EYkVAPEywqZRJd7cwnRtCb6xJx7NH3T3WUTF980Q=
|
||||
github.com/sirupsen/logrus v1.5.0/go.mod h1:+F7Ogzej0PZc/94MaYx/nvG9jOFMD2osvC3s+Squfpo=
|
||||
github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I=
|
||||
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
|
||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||
github.com/unidoc/pkcs7 v0.0.0-20200411230602-d883fd70d1df h1:1RV3lxQ6L6xGFNhngpP9iMjJPSwvH3p17JNbK9u5274=
|
||||
github.com/unidoc/pkcs7 v0.0.0-20200411230602-d883fd70d1df/go.mod h1:UEzOZUEpJfDpywVJMUT8QiugqEZC29pDq7kdIZhWCr8=
|
||||
github.com/unidoc/timestamp v0.0.0-20200412005513-91597fd3793a h1:RLtvUhe4DsUDl66m7MJ8OqBjq8jpWBXPK6/RKtqeTkc=
|
||||
github.com/unidoc/timestamp v0.0.0-20200412005513-91597fd3793a/go.mod h1:j+qMWZVpZFTvDey3zxUkSgPJZEX33tDgU/QIA0IzCUw=
|
||||
github.com/unidoc/unitype v0.0.0-20200426000514-43fb032b9ce6 h1:wKQZP0/WXDQ6kqniHSpdXol+895jojF+Sk4XORxxi3A=
|
||||
github.com/unidoc/unitype v0.0.0-20200426000514-43fb032b9ce6/go.mod h1:mafyug7zYmDOusqa7G0dJV45qp4b6TDAN+pHN7ZUIBU=
|
||||
github.com/unidoc/unitype v0.1.0 h1:6zJYMl8XdwFBD45Cmg8Ge13WyE92jwLuK1tk2IsRb9s=
|
||||
github.com/unidoc/unitype v0.1.0/go.mod h1:mafyug7zYmDOusqa7G0dJV45qp4b6TDAN+pHN7ZUIBU=
|
||||
github.com/unidoc/unitype v0.2.0 h1:N+ZKjwz8UDU0qa1IYzstDLffvQEctFo+bo6b6ZqW+9M=
|
||||
github.com/unidoc/unitype v0.2.0/go.mod h1:mafyug7zYmDOusqa7G0dJV45qp4b6TDAN+pHN7ZUIBU=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5 h1:58fnuSXlxZmFdJyvtTFVmVhcMLU6v5fEb/ok4wyqtNU=
|
||||
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
@ -45,12 +51,15 @@ golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7w
|
||||
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200413165638-669c56c373c4 h1:opSr2sbRXk5X5/givKrrKj9HXxFpW2sdCiP8MJSKLQY=
|
||||
golang.org/x/sys v0.0.0-20200413165638-669c56c373c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200523222454-059865788121 h1:rITEj+UZHYC927n8GT97eC3zrpzXdb/voyeOuVKS46o=
|
||||
golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
|
||||
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
|
@ -459,25 +459,35 @@ func (cmap *CMap) toBfData() string {
|
||||
}
|
||||
sort.Slice(codes, func(i, j int) bool { return codes[i] < codes[j] })
|
||||
|
||||
// charRanges is a list of the contiguous character code ranges in `codes`.
|
||||
// Generate CMap character code ranges.
|
||||
// The code ranges are intervals of consecutive charcodes (c1 = c0 + 1)
|
||||
// mapping to consecutive runes.
|
||||
// Start with a range consisting of the current character code for both ends
|
||||
// of the interval. Check if the next character is consecutive to the upper
|
||||
// end of the interval and if it maps to the next rune. If so, increase the
|
||||
// interval to the right. Otherwise, append the current range to the
|
||||
// character ranges slice and start over. Continue the process until all
|
||||
// character codes have been mapped to code ranges.
|
||||
var charRanges []charRange
|
||||
c0, c1 := codes[0], codes[0]+1
|
||||
currCharRange := charRange{codes[0], codes[0]}
|
||||
prevRune := cmap.codeToUnicode[codes[0]]
|
||||
for _, c := range codes[1:] {
|
||||
if c != c1 {
|
||||
charRanges = append(charRanges, charRange{c0, c1})
|
||||
c0 = c
|
||||
currRune := cmap.codeToUnicode[c]
|
||||
if c == currCharRange.code1+1 && currRune == prevRune+1 {
|
||||
currCharRange.code1 = c
|
||||
} else {
|
||||
charRanges = append(charRanges, currCharRange)
|
||||
currCharRange.code0, currCharRange.code1 = c, c
|
||||
}
|
||||
c1 = c + 1
|
||||
}
|
||||
if c1 > c0 {
|
||||
charRanges = append(charRanges, charRange{c0, c1})
|
||||
prevRune = currRune
|
||||
}
|
||||
charRanges = append(charRanges, currCharRange)
|
||||
|
||||
// fbChars is a list of single character ranges. fbRanges is a list of multiple character ranges.
|
||||
var fbChars []CharCode
|
||||
var fbRanges []fbRange
|
||||
for _, cr := range charRanges {
|
||||
if cr.code0+1 == cr.code1 {
|
||||
if cr.code0 == cr.code1 {
|
||||
fbChars = append(fbChars, cr.code0)
|
||||
} else {
|
||||
fbRanges = append(fbRanges, fbRange{
|
||||
@ -512,7 +522,7 @@ func (cmap *CMap) toBfData() string {
|
||||
for j := 0; j < n; j++ {
|
||||
rng := fbRanges[i*maxBfEntries+j]
|
||||
r := rng.r0
|
||||
lines = append(lines, fmt.Sprintf("<%04x><%04x> <%04x>", rng.code0, rng.code1-1, r))
|
||||
lines = append(lines, fmt.Sprintf("<%04x><%04x> <%04x>", rng.code0, rng.code1, r))
|
||||
}
|
||||
lines = append(lines, "endbfrange")
|
||||
}
|
||||
|
@ -31,6 +31,7 @@ type TrueTypeFontEncoder struct {
|
||||
}
|
||||
|
||||
// SubsetRegistered subsets `enc` to only registered runes (that have been registered via encoding).
|
||||
// NOTE: Make sure to call this soon before writing (once all needed runes have been registered).
|
||||
func (enc *TrueTypeFontEncoder) SubsetRegistered() {
|
||||
common.Log.Info("TTF Subset: Pruning")
|
||||
for r := range enc.runeToGIDMap {
|
||||
|
@ -343,6 +343,12 @@ type PdfAnnotationWidget struct {
|
||||
processing bool // Used in ToPdfObject serialization to avoid infinite loops for merged-in annots.
|
||||
}
|
||||
|
||||
// Field returns the parent form field of the widget annotation, if one exists.
|
||||
// NOTE: the method returns nil if the parent form field has not been parsed.
|
||||
func (widget *PdfAnnotationWidget) Field() *PdfField {
|
||||
return widget.parent
|
||||
}
|
||||
|
||||
// PdfAnnotationWatermark represents Watermark annotations.
|
||||
// (Section 12.5.6.22).
|
||||
type PdfAnnotationWatermark struct {
|
||||
|
@ -43,6 +43,8 @@ type PdfFont struct {
|
||||
// SubsetRegistered subsets the font to only the glyphs that have been registered by the encoder.
|
||||
// NOTE: This only works on fonts that support subsetting. For unsupported fonts this is a no-op, although a debug
|
||||
// message is emitted. Currently supported fonts are embedded Truetype CID fonts (type 0).
|
||||
// NOTE: Make sure to call this soon before writing (once all needed runes have been registered).
|
||||
// If using package creator, use its EnableFontSubsetting method instead.
|
||||
func (font *PdfFont) SubsetRegistered() error {
|
||||
switch t := font.context.(type) {
|
||||
case *pdfFontType0:
|
||||
|
@ -830,7 +830,17 @@ func NewCompositePdfFontFromTTF(r io.ReadSeeker) (*PdfFont, error) {
|
||||
encoder: ttf.NewEncoder(),
|
||||
}
|
||||
|
||||
type0.toUnicodeCmap = ttf.MakeToUnicode()
|
||||
// Generate CMap for the Type 0 font, which is the inverse of ttf.Chars.
|
||||
if len(ttf.Chars) > 0 {
|
||||
codeToUnicode := make(map[cmap.CharCode]rune, len(ttf.Chars))
|
||||
for r, gid := range ttf.Chars {
|
||||
cid := cmap.CharCode(gid)
|
||||
if rn, ok := codeToUnicode[cid]; !ok || (ok && rn > r) {
|
||||
codeToUnicode[cid] = r
|
||||
}
|
||||
}
|
||||
type0.toUnicodeCmap = cmap.NewToUnicodeCMap(codeToUnicode)
|
||||
}
|
||||
|
||||
// Build Font.
|
||||
font := PdfFont{
|
||||
|
@ -6,6 +6,7 @@
|
||||
package model
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
@ -160,3 +161,51 @@ endobj
|
||||
_ = raw
|
||||
t.Skip("Not implemented yet")
|
||||
}
|
||||
|
||||
func TestRepairAcroForm(t *testing.T) {
|
||||
f, err := os.Open("./testdata/OoPdfFormExample.pdf")
|
||||
require.NoError(t, err)
|
||||
defer f.Close()
|
||||
|
||||
reader, err := NewPdfReader(f)
|
||||
require.NoError(t, err)
|
||||
|
||||
original := *reader.AcroForm.Fields
|
||||
reader.AcroForm.Fields = nil
|
||||
require.NoError(t, reader.RepairAcroForm(nil))
|
||||
repaired := *reader.AcroForm.Fields
|
||||
require.ElementsMatch(t, original, repaired)
|
||||
}
|
||||
|
||||
func TestAcroFormNeedsRepair(t *testing.T) {
|
||||
f, err := os.Open("./testdata/OoPdfFormExample.pdf")
|
||||
require.NoError(t, err)
|
||||
defer f.Close()
|
||||
|
||||
reader, err := NewPdfReader(f)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Original AcroForm repair status check.
|
||||
needsRepair, err := reader.AcroFormNeedsRepair()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, needsRepair, false)
|
||||
|
||||
// Nil AcroForm repair status check.
|
||||
reader.AcroForm = nil
|
||||
needsRepair, err = reader.AcroFormNeedsRepair()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, needsRepair, true)
|
||||
|
||||
// Repaired AcroForm repair status check.
|
||||
require.NoError(t, reader.RepairAcroForm(nil))
|
||||
needsRepair, err = reader.AcroFormNeedsRepair()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, needsRepair, false)
|
||||
|
||||
// Missing AcroForm fields repair status check.
|
||||
fields := (*reader.AcroForm.Fields)[1:]
|
||||
reader.AcroForm.Fields = &fields
|
||||
needsRepair, err = reader.AcroFormNeedsRepair()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, needsRepair, true)
|
||||
}
|
||||
|
@ -16,11 +16,12 @@ import (
|
||||
// OutlineDest represents the destination of an outline item.
|
||||
// It holds the page and the position on the page an outline item points to.
|
||||
type OutlineDest struct {
|
||||
Page int64 `json:"page"`
|
||||
Mode string `json:"mode"`
|
||||
X float64 `json:"x"`
|
||||
Y float64 `json:"y"`
|
||||
Zoom float64 `json:"zoom"`
|
||||
PageObj *core.PdfIndirectObject `json:"-"`
|
||||
Page int64 `json:"page"`
|
||||
Mode string `json:"mode"`
|
||||
X float64 `json:"x"`
|
||||
Y float64 `json:"y"`
|
||||
Zoom float64 `json:"zoom"`
|
||||
}
|
||||
|
||||
// NewOutlineDest returns a new outline destination which can be used
|
||||
@ -56,10 +57,18 @@ func newOutlineDestFromPdfObject(o core.PdfObject, r *PdfReader) (*OutlineDest,
|
||||
// Page object is provided. Identify page number using the reader.
|
||||
if _, pageNum, err := r.PageFromIndirectObject(pageInd); err == nil {
|
||||
dest.Page = int64(pageNum - 1)
|
||||
} else {
|
||||
common.Log.Debug("WARN: could not get page index for page %+v", pageInd)
|
||||
}
|
||||
} else if pageNum, ok := core.GetIntVal(pageObj); ok {
|
||||
// Page number is provided.
|
||||
dest.Page = int64(pageNum)
|
||||
dest.PageObj = pageInd
|
||||
} else if pageIdx, ok := core.GetIntVal(pageObj); ok {
|
||||
// Page index is provided. Get indirect object to page.
|
||||
if pageIdx >= 0 && pageIdx < len(r.PageList) {
|
||||
dest.PageObj = r.PageList[pageIdx].GetPageAsIndirectObject()
|
||||
} else {
|
||||
common.Log.Debug("WARN: could not get page container for page %d", pageIdx)
|
||||
}
|
||||
dest.Page = int64(pageIdx)
|
||||
} else {
|
||||
return nil, fmt.Errorf("invalid outline destination page: %T", pageObj)
|
||||
}
|
||||
@ -106,14 +115,22 @@ func newOutlineDestFromPdfObject(o core.PdfObject, r *PdfReader) (*OutlineDest,
|
||||
|
||||
// ToPdfObject returns a PDF object representation of the outline destination.
|
||||
func (od OutlineDest) ToPdfObject() core.PdfObject {
|
||||
if od.Page < 0 || od.Mode == "" {
|
||||
if (od.PageObj == nil && od.Page < 0) || od.Mode == "" {
|
||||
return core.MakeNull()
|
||||
}
|
||||
|
||||
dest := core.MakeArray(
|
||||
core.MakeInteger(od.Page),
|
||||
core.MakeName(od.Mode),
|
||||
)
|
||||
// Add destination page.
|
||||
dest := core.MakeArray()
|
||||
if od.PageObj != nil {
|
||||
// Internal outline.
|
||||
dest.Append(od.PageObj)
|
||||
} else {
|
||||
// External outline.
|
||||
dest.Append(core.MakeInteger(od.Page))
|
||||
}
|
||||
|
||||
// Add destination mode.
|
||||
dest.Append(core.MakeName(od.Mode))
|
||||
|
||||
// See section 12.3.2.2 "Explicit Destinations" (page 374).
|
||||
switch od.Mode {
|
||||
@ -180,10 +197,11 @@ func (o *Outline) ToPdfOutline() *PdfOutline {
|
||||
|
||||
// Create outline items.
|
||||
var outlineItems []*PdfOutlineItem
|
||||
var lenDescendants int64
|
||||
var prev *PdfOutlineItem
|
||||
|
||||
for _, item := range o.Entries {
|
||||
outlineItem, _ := item.ToPdfOutlineItem()
|
||||
outlineItem, lenChildren := item.ToPdfOutlineItem()
|
||||
outlineItem.Parent = &outline.PdfOutlineTreeNode
|
||||
|
||||
if prev != nil {
|
||||
@ -192,15 +210,18 @@ func (o *Outline) ToPdfOutline() *PdfOutline {
|
||||
}
|
||||
|
||||
outlineItems = append(outlineItems, outlineItem)
|
||||
lenDescendants += lenChildren
|
||||
prev = outlineItem
|
||||
}
|
||||
|
||||
// Add outline linked list properties.
|
||||
lenOutlineItems := int64(len(outlineItems))
|
||||
lenDescendants += int64(lenOutlineItems)
|
||||
|
||||
if lenOutlineItems > 0 {
|
||||
outline.First = &outlineItems[0].PdfOutlineTreeNode
|
||||
outline.Last = &outlineItems[lenOutlineItems-1].PdfOutlineTreeNode
|
||||
outline.Count = &lenOutlineItems
|
||||
outline.Count = &lenDescendants
|
||||
}
|
||||
|
||||
return outline
|
||||
|
102
model/reader.go
102
model/reader.go
@ -507,6 +507,108 @@ func (r *PdfReader) GetOutlines() (*Outline, error) {
|
||||
return outline, nil
|
||||
}
|
||||
|
||||
// AcroFormRepairOptions contains options for rebuilding the AcroForm.
|
||||
type AcroFormRepairOptions struct {
|
||||
}
|
||||
|
||||
// RepairAcroForm attempts to rebuild the AcroForm fields using the widget
|
||||
// annotations present in the document pages. Pass nil for the opts parameter
|
||||
// in order to use the default options.
|
||||
// NOTE: Currently, the opts parameter is declared in order to enable adding
|
||||
// future options, but passing nil will always result in the default options
|
||||
// being used.
|
||||
func (r *PdfReader) RepairAcroForm(opts *AcroFormRepairOptions) error {
|
||||
var fields []*PdfField
|
||||
fieldCache := map[*core.PdfIndirectObject]struct{}{}
|
||||
for _, page := range r.PageList {
|
||||
annotations, err := page.GetAnnotations()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, annotation := range annotations {
|
||||
var field *PdfField
|
||||
switch t := annotation.GetContext().(type) {
|
||||
case *PdfAnnotationWidget:
|
||||
if t.parent != nil {
|
||||
field = t.parent
|
||||
break
|
||||
}
|
||||
if parentObj, ok := core.GetIndirect(t.Parent); ok {
|
||||
field, err = r.newPdfFieldFromIndirectObject(parentObj, nil)
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
common.Log.Debug("WARN: could not parse form field %+v: %v", parentObj, err)
|
||||
}
|
||||
if t.container != nil {
|
||||
field, err = r.newPdfFieldFromIndirectObject(t.container, nil)
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
common.Log.Debug("WARN: could not parse form field %+v: %v", t.container, err)
|
||||
}
|
||||
}
|
||||
if field == nil {
|
||||
continue
|
||||
}
|
||||
if _, ok := fieldCache[field.container]; ok {
|
||||
continue
|
||||
}
|
||||
fieldCache[field.container] = struct{}{}
|
||||
fields = append(fields, field)
|
||||
}
|
||||
}
|
||||
|
||||
if len(fields) == 0 {
|
||||
return nil
|
||||
}
|
||||
if r.AcroForm == nil {
|
||||
r.AcroForm = NewPdfAcroForm()
|
||||
}
|
||||
r.AcroForm.Fields = &fields
|
||||
return nil
|
||||
}
|
||||
|
||||
// AcroFormNeedsRepair returns true if the document contains widget annotations
|
||||
// linked to fields which are not referenced in the AcroForm. The AcroForm can
|
||||
// be repaired using the RepairAcroForm method of the reader.
|
||||
func (r *PdfReader) AcroFormNeedsRepair() (bool, error) {
|
||||
var fields []*PdfField
|
||||
if r.AcroForm != nil {
|
||||
fields = r.AcroForm.AllFields()
|
||||
}
|
||||
|
||||
fieldMap := make(map[*PdfField]struct{}, len(fields))
|
||||
for _, field := range fields {
|
||||
fieldMap[field] = struct{}{}
|
||||
}
|
||||
|
||||
for _, page := range r.PageList {
|
||||
annotations, err := page.GetAnnotations()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
for _, annotation := range annotations {
|
||||
widget, ok := annotation.GetContext().(*PdfAnnotationWidget)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
field := widget.Field()
|
||||
if field == nil {
|
||||
return true, nil
|
||||
}
|
||||
if _, ok := fieldMap[field]; !ok {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// loadForms loads the AcroForm.
|
||||
func (r *PdfReader) loadForms() (*PdfAcroForm, error) {
|
||||
if r.parser.GetCrypter() != nil && !r.parser.IsAuthenticated() {
|
||||
|
Loading…
x
Reference in New Issue
Block a user