mirror of
https://github.com/unidoc/unipdf.git
synced 2025-05-07 19:29:16 +08:00
Merge pull request #329 from unidoc/v3-integration-testing
Integration test coverage
This commit is contained in:
commit
2253e29a12
6
Jenkinsfile
vendored
6
Jenkinsfile
vendored
@ -7,7 +7,11 @@ node {
|
||||
env.PATH="${root}/bin:${env.GOPATH}/bin:${env.PATH}"
|
||||
env.GOCACHE="off"
|
||||
env.UNIDOC_EXTRACT_FORCETEST="1"
|
||||
env.UNIDOC_E2E_FORCE_TESTS="1"
|
||||
env.UNIDOC_EXTRACT_TESTDATA="/home/jenkins/corpus/unidoc-extractor-testdata"
|
||||
env.UNIDOC_PASSTHROUGH_TESTDATA="/home/jenkins/corpus/unidoc-e2e-testdata"
|
||||
env.UNIDOC_ALLOBJECTS_TESTDATA="/home/jenkins/corpus/unidoc-e2e-testdata"
|
||||
env.UNIDOC_GS_BIN_PATH="/usr/bin/gs"
|
||||
|
||||
env.TMPDIR="${WORKSPACE}/temp"
|
||||
sh "mkdir -p ${env.TMPDIR}"
|
||||
@ -50,7 +54,7 @@ node {
|
||||
}
|
||||
|
||||
stage('Test coverage') {
|
||||
sh 'go test -coverprofile=coverage.out ./...'
|
||||
sh 'go test -coverprofile=coverage.out -covermode=atomic -coverpkg=./... ./...'
|
||||
sh '/home/jenkins/codecov.sh'
|
||||
sh 'gocover-cobertura < coverage.out > coverage.xml'
|
||||
step([$class: 'CoberturaPublisher', coberturaReportFile: 'coverage.xml'])
|
||||
|
@ -2,7 +2,7 @@ coverage:
|
||||
status:
|
||||
project:
|
||||
default:
|
||||
target: 35%
|
||||
target: 50%
|
||||
threshold: 1%
|
||||
patch: false
|
||||
changes: false
|
105
pdf/internal/e2etest/allobjects_test.go
Normal file
105
pdf/internal/e2etest/allobjects_test.go
Normal file
@ -0,0 +1,105 @@
|
||||
/*
|
||||
* This file is subject to the terms and conditions defined in
|
||||
* file 'LICENSE.md', which is part of this source code package.
|
||||
*/
|
||||
|
||||
package e2etest
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/unidoc/unidoc/pdf/core"
|
||||
"github.com/unidoc/unidoc/pdf/model"
|
||||
)
|
||||
|
||||
// The allobjects test probes through all objects in PDF files of a specified corpus, decoding streams.
|
||||
// Set environment variables:
|
||||
// UNIDOC_E2E_FORCE_TESTS to "1" to force the tests to execute.
|
||||
// UNIDOC_ALLOBJECTS_TESTDATA to the path of the corpus folder.
|
||||
var (
|
||||
allObjectsCorpusFolder = os.Getenv("UNIDOC_ALLOBJECTS_TESTDATA")
|
||||
)
|
||||
|
||||
func TestAllObjects(t *testing.T) {
|
||||
if len(allObjectsCorpusFolder) == 0 {
|
||||
if forceTest {
|
||||
t.Fatalf("UNIDOC_ALLOBJECTS_TESTDATA not set")
|
||||
}
|
||||
}
|
||||
|
||||
files, err := ioutil.ReadDir(allObjectsCorpusFolder)
|
||||
if err != nil {
|
||||
if forceTest {
|
||||
t.Fatalf("Error opening %s: %v", allObjectsCorpusFolder, err)
|
||||
}
|
||||
t.Skipf("Skipping allobjects test - unable to open UNIDOC_ALLOBJECTS_TESTDATA (%s)", allObjectsCorpusFolder)
|
||||
return
|
||||
}
|
||||
|
||||
for _, file := range files {
|
||||
fpath := filepath.Join(allObjectsCorpusFolder, file.Name())
|
||||
t.Logf("%s", fpath)
|
||||
err := probeAllObjectsSinglePdf(fpath)
|
||||
if err != nil {
|
||||
t.Fatalf("Error: %v", err)
|
||||
}
|
||||
}
|
||||
t.Logf("allObjects test complete for %d files in %s", len(files), allObjectsCorpusFolder)
|
||||
}
|
||||
|
||||
func probeAllObjectsSinglePdf(inputPath string) error {
|
||||
f, err := os.Open(inputPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
pdfReader, err := model.NewPdfReader(f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
isEncrypted, err := pdfReader.IsEncrypted()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Try decrypting with an empty one.
|
||||
if isEncrypted {
|
||||
auth, err := pdfReader.Decrypt([]byte(""))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !auth {
|
||||
return errors.New("unauthorized read")
|
||||
}
|
||||
}
|
||||
|
||||
_, err = pdfReader.GetNumPages()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
objNums := pdfReader.GetObjectNums()
|
||||
|
||||
// Output.
|
||||
for _, objNum := range objNums {
|
||||
obj, err := pdfReader.GetIndirectObjectByNumber(objNum)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if stream, is := obj.(*core.PdfObjectStream); is {
|
||||
_, err := core.DecodeStream(stream)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
177
pdf/internal/e2etest/passthrough_test.go
Normal file
177
pdf/internal/e2etest/passthrough_test.go
Normal file
@ -0,0 +1,177 @@
|
||||
/*
|
||||
* This file is subject to the terms and conditions defined in
|
||||
* file 'LICENSE.md', which is part of this source code package.
|
||||
*/
|
||||
|
||||
package e2etest
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/unidoc/unidoc/common"
|
||||
"github.com/unidoc/unidoc/pdf/model"
|
||||
)
|
||||
|
||||
// Passthrough benchmark loads a PDF, writes back out and performs a sanity check on the output with ghostscript.
|
||||
// Set environment variables:
|
||||
// UNIDOC_E2E_FORCE_TESTS to "1" to force the tests to execute.
|
||||
// UNIDOC_PASSTHROUGH_TESTDATA to the path of the corpus folder.
|
||||
// UNIDOC_GS_BIN_PATH to the path of the ghostscript binary (gs).
|
||||
var (
|
||||
forceTest = os.Getenv("UNIDOC_E2E_FORCE_TESTS") == "1"
|
||||
passthroughCorpusFolder = os.Getenv("UNIDOC_PASSTHROUGH_TESTDATA")
|
||||
)
|
||||
|
||||
func TestPassthrough(t *testing.T) {
|
||||
if len(passthroughCorpusFolder) == 0 {
|
||||
if forceTest {
|
||||
t.Fatalf("UNIDOC_PASSTHROUGH_TESTDATA not set")
|
||||
}
|
||||
}
|
||||
|
||||
files, err := ioutil.ReadDir(passthroughCorpusFolder)
|
||||
if err != nil {
|
||||
if forceTest {
|
||||
t.Fatalf("Error opening %s: %v", passthroughCorpusFolder, err)
|
||||
}
|
||||
t.Skipf("Skipping passthrough bench - unable to open UNIDOC_PASSTHROUGH_TESTDATA (%s)", passthroughCorpusFolder)
|
||||
return
|
||||
}
|
||||
|
||||
// Make a temporary folder and clean up after.
|
||||
tempdir, err := ioutil.TempDir("", "unidoc_passthrough")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temporary folder")
|
||||
}
|
||||
defer os.RemoveAll(tempdir)
|
||||
|
||||
for _, file := range files {
|
||||
t.Logf("%s", file.Name())
|
||||
fpath := filepath.Join(passthroughCorpusFolder, file.Name())
|
||||
params := passthroughParams{
|
||||
inputPath: fpath,
|
||||
outPath: filepath.Join(tempdir, "1.pdf"),
|
||||
gsValidation: len(ghostscriptBinPath) > 0,
|
||||
}
|
||||
err := passthroughSinglePdf(params)
|
||||
if err != nil {
|
||||
t.Fatalf("Error: %v", err)
|
||||
}
|
||||
}
|
||||
t.Logf("Passthrough benchmark complete for %d files in %s", len(files), passthroughCorpusFolder)
|
||||
}
|
||||
|
||||
type passthroughParams struct {
|
||||
inputPath string
|
||||
outPath string
|
||||
gsValidation bool
|
||||
}
|
||||
|
||||
func passthroughSinglePdf(params passthroughParams) error {
|
||||
file, err := os.Open(params.inputPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
reader, err := model.NewPdfReader(file)
|
||||
if err != nil {
|
||||
common.Log.Debug("Reader create error %s\n", err)
|
||||
return err
|
||||
}
|
||||
|
||||
isEncrypted, err := reader.IsEncrypted()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if isEncrypted {
|
||||
valid, err := reader.Decrypt([]byte(""))
|
||||
if err != nil {
|
||||
common.Log.Debug("Fail to decrypt: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
if !valid {
|
||||
return fmt.Errorf("Unable to access, encrypted")
|
||||
}
|
||||
}
|
||||
|
||||
numPages, err := reader.GetNumPages()
|
||||
if err != nil {
|
||||
common.Log.Debug("Failed to get number of pages")
|
||||
return err
|
||||
}
|
||||
|
||||
if numPages < 1 {
|
||||
common.Log.Debug("Empty pdf - nothing to be done!")
|
||||
return nil
|
||||
}
|
||||
|
||||
writer := model.NewPdfWriter()
|
||||
|
||||
// Optional content.
|
||||
ocProps, err := reader.GetOCProperties()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
writer.SetOCProperties(ocProps)
|
||||
|
||||
for j := 0; j < numPages; j++ {
|
||||
page, err := reader.GetPage(j + 1)
|
||||
if err != nil {
|
||||
common.Log.Debug("Get page error %s", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Load and set outlines (table of contents).
|
||||
outlineTree := reader.GetOutlineTree()
|
||||
|
||||
err = writer.AddPage(page)
|
||||
if err != nil {
|
||||
common.Log.Debug("Add page error %s", err)
|
||||
return err
|
||||
}
|
||||
|
||||
writer.AddOutlineTree(outlineTree)
|
||||
}
|
||||
|
||||
// Copy the forms over to the new document also.
|
||||
writer.SetForms(reader.AcroForm)
|
||||
|
||||
of, err := os.Create(params.outPath)
|
||||
if err != nil {
|
||||
common.Log.Debug("Failed to create file (%s)", err)
|
||||
return err
|
||||
}
|
||||
defer of.Close()
|
||||
|
||||
err = writer.Write(of)
|
||||
if err != nil {
|
||||
common.Log.Debug("WriteFile error")
|
||||
return err
|
||||
}
|
||||
|
||||
// GS validation of input, output pdfs.
|
||||
if params.gsValidation {
|
||||
common.Log.Debug("Validating input file")
|
||||
inputWarnings, err := validatePdf(params.inputPath, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
common.Log.Debug("Validating output file")
|
||||
|
||||
warnings, err := validatePdf(params.outPath, "")
|
||||
if err != nil && warnings > inputWarnings {
|
||||
common.Log.Debug("Input warnings %d vs output %d", inputWarnings, warnings)
|
||||
return fmt.Errorf("Invalid PDF input %d/ output %d warnings", inputWarnings, warnings)
|
||||
}
|
||||
common.Log.Debug("Valid PDF!")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
71
pdf/internal/e2etest/validate.go
Normal file
71
pdf/internal/e2etest/validate.go
Normal file
@ -0,0 +1,71 @@
|
||||
/*
|
||||
* This file is subject to the terms and conditions defined in
|
||||
* file 'LICENSE.md', which is part of this source code package.
|
||||
*/
|
||||
|
||||
package e2etest
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
|
||||
"github.com/unidoc/unidoc/common"
|
||||
)
|
||||
|
||||
// To enable ghostscript validation, the path to the binary needs to be specified.
|
||||
// Set environment variable:
|
||||
// UNIDOC_GS_BIN_PATH to the path of the ghostscript binary (gs).
|
||||
var (
|
||||
ghostscriptBinPath = os.Getenv("UNIDOC_GS_BIN_PATH")
|
||||
)
|
||||
|
||||
// validatePdf a pdf file using Ghostscript, returns an error if unable to execute.
|
||||
// Also returns the number of output warnings, which can be used as some sort of measure
|
||||
// of validity, especially when comparing with a transformed version of same file.
|
||||
func validatePdf(path string, password string) (int, error) {
|
||||
if len(ghostscriptBinPath) == 0 {
|
||||
return 0, errors.New("UNIDOC_GS_BIN_PATH not set")
|
||||
}
|
||||
common.Log.Debug("Validating: %s", path)
|
||||
|
||||
params := []string{"-dBATCH", "-dNODISPLAY", "-dNOPAUSE"}
|
||||
if len(password) > 0 {
|
||||
params = append(params, fmt.Sprintf("-sPDFPassword=%s", password))
|
||||
}
|
||||
params = append(params, path)
|
||||
|
||||
var (
|
||||
out bytes.Buffer
|
||||
errOut bytes.Buffer
|
||||
)
|
||||
cmd := exec.Command(ghostscriptBinPath, params...)
|
||||
cmd.Stdout = &out
|
||||
cmd.Stderr = &errOut
|
||||
|
||||
err := cmd.Run()
|
||||
if err != nil {
|
||||
common.Log.Debug("%s", out.String())
|
||||
common.Log.Debug("%s", errOut.String())
|
||||
common.Log.Error("GS failed with error %s", err)
|
||||
return 0, fmt.Errorf("GS failed with error (%s)", err)
|
||||
}
|
||||
|
||||
outputErr := errOut.String()
|
||||
warnings := strings.Count(outputErr, "****")
|
||||
common.Log.Debug("ERROR: - %d warnings %s", warnings, outputErr)
|
||||
|
||||
if warnings > 1 {
|
||||
if len(outputErr) > 80 {
|
||||
outputErr = outputErr[:80] // Trim the output.
|
||||
}
|
||||
common.Log.Debug("ERROR: Invalid - %d warnings %s", warnings, outputErr)
|
||||
return warnings, nil
|
||||
}
|
||||
|
||||
// Valid if no error.
|
||||
return 0, nil
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user