From 7a9a8ff542c4097366d272b078dcfaa48eace8c8 Mon Sep 17 00:00:00 2001 From: Gunnsteinn Hall Date: Tue, 25 Jun 2019 08:08:51 +0000 Subject: [PATCH] Add FDF merge test case for form filling and flattening with change detection (#98) Manually verified that output PDFs look good and leave hash check to detect change. If there is a change in the future, the hash change will trigger a failure upon which the output PDFs need to be re-checked and hashes updated if appropriate. --- Jenkinsfile | 1 + internal/e2etest/fdfmerge_test.go | 176 ++++++++++++++++++++++++++++++ 2 files changed, 177 insertions(+) create mode 100644 internal/e2etest/fdfmerge_test.go diff --git a/Jenkinsfile b/Jenkinsfile index c2f98986..69d47f31 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -15,6 +15,7 @@ node { env.UNIDOC_PASSTHROUGH_TESTDATA="/home/jenkins/corpus/unidoc-e2e-testdata" env.UNIDOC_ALLOBJECTS_TESTDATA="/home/jenkins/corpus/unidoc-e2e-testdata" env.UNIDOC_SPLIT_TESTDATA="/home/jenkins/corpus/unidoc-e2e-split-testdata" + env.UNIDOC_FDFMERGE_TESTDATA="/home/jenkins/corpus/fdfmerge-testdata" env.UNIDOC_GS_BIN_PATH="/usr/bin/gs" // Hack for 1.11.5 testing work. env.CGO_ENABLED="0" diff --git a/internal/e2etest/fdfmerge_test.go b/internal/e2etest/fdfmerge_test.go new file mode 100644 index 00000000..60833b05 --- /dev/null +++ b/internal/e2etest/fdfmerge_test.go @@ -0,0 +1,176 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package e2etest + +import ( + "io/ioutil" + "os" + "path/filepath" + "runtime/debug" + "strings" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/unidoc/unipdf/v3/annotator" + "github.com/unidoc/unipdf/v3/fdf" + "github.com/unidoc/unipdf/v3/model" +) + +// FDF merge tests merge FDF data into template PDF data and flattens to an output PDF file. +// Output files are checked with ghostscript and memory consumption is measured. +// Set environment variables: +// UNIDOC_E2E_FORCE_TESTS to "1" to force the tests to execute. +// UNIDOC_FDFMERGE_TESTDATA to the path of the corpus folder. +// UNIDOC_GS_BIN_PATH to the path of the ghostscript binary (gs) for validation. +var ( + fdfMergeCorpusFolder = os.Getenv("UNIDOC_FDFMERGE_TESTDATA") +) + +// fdfMergeHashes defines a list of known output hashes to ensure that the output is constant. +// If there is a change in hash need to find out why and update only if the change is accepted. +var fdfMergeHashes = map[string]string{ + "NW_null_Business_V04.fdf": "45ccf325025b366d6e9c90bde53da1aa", + "NW_null_Business_V05.fdf": "49c0c89cd2384c8a75f9cb20a778698f", + "NW_null_Business_V05.v1.2.fdf": "49c0c89cd2384c8a75f9cb20a778698f", + "NW_null_Contract_V04.fdf": "80ebec761eba106cda38a4613819634e", + "N_null_Contract.fdf": "557e9d5788ba418e3e5f6ffdf710a3b9", + "Network_Contract_V01.fdf": "3bf058c9e4cefae222c92caa28fca603", + "checkmark_check.fdf": "b95c8f8c0673e5541d28f212c0b25b5b", + "checkmark_circle.fdf": "06bda9e3539e63aebdfc20f8fe3d83e9", + "checkmark_cross.fdf": "34dc015cf122bffcef8c62c559fc0ac7", + "checkmark_diamond.fdf": "5a3c2951da0aa2943e9007d4baed82bf", + "checkmark_square.fdf": "83d97592cd75c2c62a2e6ae2962379db", + "checkmark_star.fdf": "2e460f069e474714573724255fcdffda", + "test_fail.fdf": "d7eb6071341f823a64f7234a20830d74", +} + +// Test filling (fdf merge) and flattening form data and annotations. +func TestFdfMerging(t *testing.T) { + if len(fdfMergeCorpusFolder) == 0 { + if forceTest { + t.Fatalf("UNIDOC_FDFMERGE_TESTDATA not set") + } + t.Skipf("UNIDOC_FDFMERGE_TESTDATA not set") + } + + files, err := ioutil.ReadDir(fdfMergeCorpusFolder) + if err != nil { + if forceTest { + t.Fatalf("Error opening %s: %v", fdfMergeCorpusFolder, err) + } + t.Skipf("Skipping flatten bench - unable to open UNIDOC_FDFMERGE_TESTDATA (%s)", fdfMergeCorpusFolder) + } + + // Make a temporary folder and clean up after. + tempdir, err := ioutil.TempDir("", "unidoc_fdfmerge") + require.NoError(t, err) + defer os.RemoveAll(tempdir) + + matchcount := 0 + for _, file := range files { + if strings.ToLower(filepath.Ext(file.Name())) != ".fdf" { + continue + } + fdfPath := filepath.Join(fdfMergeCorpusFolder, file.Name()) + bareName := strings.TrimSuffix(file.Name(), ".fdf") + pdfPath := filepath.Join(fdfMergeCorpusFolder, bareName+".pdf") + + // Ensure memory is garbage collected prior to running for consistency. + debug.FreeOSMemory() + + t.Logf("%s", file.Name()) + params := fdfMergeParams{ + templatePath: pdfPath, + fdfPath: fdfPath, + outPath: filepath.Join(tempdir, "filled_flatten_1_"+bareName+".pdf"), + gsValidation: len(ghostscriptBinPath) > 0, + } + fdfMergeSingle(t, params) + + hash, err := hashFile(params.outPath) + require.NoError(t, err) + + knownHash, has := fdfMergeHashes[file.Name()] + if has { + require.Equal(t, knownHash, hash) + matchcount++ + } else { + t.Logf("Output: %s", params.outPath) + t.Logf("%s - hash: %s not in the list of known hashes", file.Name(), hash) + } + } + + // Ensure all the defined hashes were found. + require.Equal(t, len(fdfMergeHashes), matchcount) + + t.Logf("FDF merge benchmark complete for %d cases in %s", matchcount, fdfMergeCorpusFolder) +} + +type fdfMergeParams struct { + templatePath string // template PDF file. + fdfPath string // form data FDF file. + outPath string + gsValidation bool +} + +func fdfMergeSingle(t *testing.T, params fdfMergeParams) { + measure := startMemoryMeasurement() + + fdfData, err := fdf.LoadFromPath(params.fdfPath) + require.NoError(t, err) + + f, err := os.Open(params.templatePath) + require.NoError(t, err) + defer f.Close() + + pdfReader, err := model.NewPdfReader(f) + require.NoError(t, err) + + // Populate the form data. + err = pdfReader.AcroForm.Fill(fdfData) + require.NoError(t, err) + + // Flatten form. + fieldAppearance := annotator.FieldAppearance{OnlyIfMissing: true, RegenerateTextFields: true} + + // NOTE: To customize certain styles try: + style := fieldAppearance.Style() + style.CheckmarkRune = '✖' + style.AutoFontSizeFraction = 0.70 + fieldAppearance.SetStyle(style) + + err = pdfReader.FlattenFields(true, fieldAppearance) + require.NoError(t, err) + + // Write out. + model.SetPdfProducer("UniDoc") + pdfWriter := model.NewPdfWriter() + pdfWriter.SetForms(nil) + + for _, p := range pdfReader.PageList { + // FIXME: Hack needed to ensure that annotations are loaded. + // TODO: Remove. Resolved in PR#93. + { + _, err := p.GetAnnotations() + require.NoError(t, err) + } + + err = pdfWriter.AddPage(p) + require.NoError(t, err) + } + + fout, err := os.Create(params.outPath) + require.NoError(t, err) + defer fout.Close() + + err = pdfWriter.Write(fout) + require.NoError(t, err) + + measure.Stop() + summary := measure.Summary() + t.Logf("%s - summary %s", params.templatePath, summary) +}