unipdf/internal/e2etest/fdfmerge_test.go
Adrian-George Bostan cca04199e6 Add extract images test case, with memory profiling (#146)
* Add extract images test case, with memory profiling
* Use TotalAlloc insted of Alloc for memory profiling
* Remove calls to debug.FreeOSMemory from test cases
2019-08-19 22:37:16 +00:00

166 lines
5.0 KiB
Go

/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package e2etest
import (
"io/ioutil"
"os"
"path/filepath"
"strings"
"testing"
"github.com/stretchr/testify/require"
"github.com/unidoc/unipdf/v3/annotator"
"github.com/unidoc/unipdf/v3/fdf"
"github.com/unidoc/unipdf/v3/model"
)
// FDF merge tests merge FDF data into template PDF data and flattens to an output PDF file.
// Output files are checked with ghostscript and memory consumption is measured.
// Set environment variables:
// UNIDOC_E2E_FORCE_TESTS to "1" to force the tests to execute.
// UNIDOC_FDFMERGE_TESTDATA to the path of the corpus folder.
// UNIDOC_GS_BIN_PATH to the path of the ghostscript binary (gs) for validation.
var (
fdfMergeCorpusFolder = os.Getenv("UNIDOC_FDFMERGE_TESTDATA")
)
// fdfMergeHashes defines a list of known output hashes to ensure that the output is constant.
// If there is a change in hash need to find out why and update only if the change is accepted.
var fdfMergeHashes = map[string]string{
"NW_null_Business_V04.fdf": "6e33f219994e4b9ee1e1843c976504df",
"NW_null_Business_V05.fdf": "ff1f8bd39f9be9844a6d85bafe07c790",
"NW_null_Business_V05.v1.2.fdf": "ff1f8bd39f9be9844a6d85bafe07c790",
"NW_null_Contract_V04.fdf": "a54f4b42dc34997cfb701ef647cdbdfe",
"N_null_Contract.fdf": "c173340d6492984532cf51a4f5ceb4b6",
"Network_Contract_V01.fdf": "0ae2537bf8a8366aa97c1ca965b88d1f",
"checkmark_check.fdf": "8892cdb01318421f8d198233b80ab8e3",
"checkmark_circle.fdf": "3b1e6ef6aae2a7497b090e0960d2c163",
"checkmark_cross.fdf": "6b16b6d7437a3f59a7e9e72c1ecfd59b",
"checkmark_diamond.fdf": "123488e428914832f21e213339ed74f1",
"checkmark_square.fdf": "d0ac69dac7a933e440a5005b1712edeb",
"checkmark_star.fdf": "1326f152fb8158dffc08e5bb51cba1bc",
"test_fail.fdf": "9a90cef679d6b4c13017c73c2528ca75",
}
// Test filling (fdf merge) and flattening form data and annotations.
func TestFdfMerging(t *testing.T) {
if len(fdfMergeCorpusFolder) == 0 {
if forceTest {
t.Fatalf("UNIDOC_FDFMERGE_TESTDATA not set")
}
t.Skipf("UNIDOC_FDFMERGE_TESTDATA not set")
}
files, err := ioutil.ReadDir(fdfMergeCorpusFolder)
if err != nil {
if forceTest {
t.Fatalf("Error opening %s: %v", fdfMergeCorpusFolder, err)
}
t.Skipf("Skipping flatten bench - unable to open UNIDOC_FDFMERGE_TESTDATA (%s)", fdfMergeCorpusFolder)
}
// Make a temporary folder and clean up after.
tempdir, err := ioutil.TempDir("", "unidoc_fdfmerge")
require.NoError(t, err)
defer os.RemoveAll(tempdir)
matchcount := 0
for _, file := range files {
if strings.ToLower(filepath.Ext(file.Name())) != ".fdf" {
continue
}
fdfPath := filepath.Join(fdfMergeCorpusFolder, file.Name())
bareName := strings.TrimSuffix(file.Name(), ".fdf")
pdfPath := filepath.Join(fdfMergeCorpusFolder, bareName+".pdf")
t.Logf("%s", file.Name())
params := fdfMergeParams{
templatePath: pdfPath,
fdfPath: fdfPath,
outPath: filepath.Join(tempdir, "filled_flatten_1_"+bareName+".pdf"),
gsValidation: len(ghostscriptBinPath) > 0,
}
fdfMergeSingle(t, params)
hash, err := hashFile(params.outPath)
require.NoError(t, err)
knownHash, has := fdfMergeHashes[file.Name()]
if has {
require.Equal(t, knownHash, hash)
matchcount++
} else {
t.Logf("Output: %s", params.outPath)
t.Logf("%s - hash: %s not in the list of known hashes", file.Name(), hash)
}
}
// Ensure all the defined hashes were found.
require.Equal(t, len(fdfMergeHashes), matchcount)
t.Logf("FDF merge benchmark complete for %d cases in %s", matchcount, fdfMergeCorpusFolder)
}
type fdfMergeParams struct {
templatePath string // template PDF file.
fdfPath string // form data FDF file.
outPath string
gsValidation bool
}
func fdfMergeSingle(t *testing.T, params fdfMergeParams) {
measure := startMemoryMeasurement()
fdfData, err := fdf.LoadFromPath(params.fdfPath)
require.NoError(t, err)
f, err := os.Open(params.templatePath)
require.NoError(t, err)
defer f.Close()
pdfReader, err := model.NewPdfReader(f)
require.NoError(t, err)
// Populate the form data.
err = pdfReader.AcroForm.Fill(fdfData)
require.NoError(t, err)
// Flatten form.
fieldAppearance := annotator.FieldAppearance{OnlyIfMissing: true, RegenerateTextFields: true}
// NOTE: To customize certain styles try:
style := fieldAppearance.Style()
style.CheckmarkRune = '✖'
style.AutoFontSizeFraction = 0.70
fieldAppearance.SetStyle(style)
err = pdfReader.FlattenFields(true, fieldAppearance)
require.NoError(t, err)
// Write out.
model.SetPdfProducer("UniDoc")
pdfWriter := model.NewPdfWriter()
pdfWriter.SetForms(nil)
for _, p := range pdfReader.PageList {
err = pdfWriter.AddPage(p)
require.NoError(t, err)
}
fout, err := os.Create(params.outPath)
require.NoError(t, err)
defer fout.Close()
err = pdfWriter.Write(fout)
require.NoError(t, err)
measure.Stop()
summary := measure.Summary()
t.Logf("%s - summary %s", params.templatePath, summary)
}