Merge pull request #329 from unidoc/v3-integration-testing

Integration test coverage
This commit is contained in:
Gunnsteinn Hall 2019-01-28 17:35:46 +00:00 committed by GitHub
commit 2253e29a12
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 359 additions and 2 deletions

6
Jenkinsfile vendored
View File

@ -7,7 +7,11 @@ node {
env.PATH="${root}/bin:${env.GOPATH}/bin:${env.PATH}"
env.GOCACHE="off"
env.UNIDOC_EXTRACT_FORCETEST="1"
env.UNIDOC_E2E_FORCE_TESTS="1"
env.UNIDOC_EXTRACT_TESTDATA="/home/jenkins/corpus/unidoc-extractor-testdata"
env.UNIDOC_PASSTHROUGH_TESTDATA="/home/jenkins/corpus/unidoc-e2e-testdata"
env.UNIDOC_ALLOBJECTS_TESTDATA="/home/jenkins/corpus/unidoc-e2e-testdata"
env.UNIDOC_GS_BIN_PATH="/usr/bin/gs"
env.TMPDIR="${WORKSPACE}/temp"
sh "mkdir -p ${env.TMPDIR}"
@ -50,7 +54,7 @@ node {
}
stage('Test coverage') {
sh 'go test -coverprofile=coverage.out ./...'
sh 'go test -coverprofile=coverage.out -covermode=atomic -coverpkg=./... ./...'
sh '/home/jenkins/codecov.sh'
sh 'gocover-cobertura < coverage.out > coverage.xml'
step([$class: 'CoberturaPublisher', coberturaReportFile: 'coverage.xml'])

View File

@ -2,7 +2,7 @@ coverage:
status:
project:
default:
target: 35%
target: 50%
threshold: 1%
patch: false
changes: false

View File

@ -0,0 +1,105 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package e2etest
import (
"errors"
"io/ioutil"
"os"
"path/filepath"
"testing"
"github.com/unidoc/unidoc/pdf/core"
"github.com/unidoc/unidoc/pdf/model"
)
// The allobjects test probes through all objects in PDF files of a specified corpus, decoding streams.
// Set environment variables:
// UNIDOC_E2E_FORCE_TESTS to "1" to force the tests to execute.
// UNIDOC_ALLOBJECTS_TESTDATA to the path of the corpus folder.
var (
allObjectsCorpusFolder = os.Getenv("UNIDOC_ALLOBJECTS_TESTDATA")
)
func TestAllObjects(t *testing.T) {
if len(allObjectsCorpusFolder) == 0 {
if forceTest {
t.Fatalf("UNIDOC_ALLOBJECTS_TESTDATA not set")
}
}
files, err := ioutil.ReadDir(allObjectsCorpusFolder)
if err != nil {
if forceTest {
t.Fatalf("Error opening %s: %v", allObjectsCorpusFolder, err)
}
t.Skipf("Skipping allobjects test - unable to open UNIDOC_ALLOBJECTS_TESTDATA (%s)", allObjectsCorpusFolder)
return
}
for _, file := range files {
fpath := filepath.Join(allObjectsCorpusFolder, file.Name())
t.Logf("%s", fpath)
err := probeAllObjectsSinglePdf(fpath)
if err != nil {
t.Fatalf("Error: %v", err)
}
}
t.Logf("allObjects test complete for %d files in %s", len(files), allObjectsCorpusFolder)
}
func probeAllObjectsSinglePdf(inputPath string) error {
f, err := os.Open(inputPath)
if err != nil {
return err
}
defer f.Close()
pdfReader, err := model.NewPdfReader(f)
if err != nil {
return err
}
isEncrypted, err := pdfReader.IsEncrypted()
if err != nil {
return err
}
// Try decrypting with an empty one.
if isEncrypted {
auth, err := pdfReader.Decrypt([]byte(""))
if err != nil {
return err
}
if !auth {
return errors.New("unauthorized read")
}
}
_, err = pdfReader.GetNumPages()
if err != nil {
return err
}
objNums := pdfReader.GetObjectNums()
// Output.
for _, objNum := range objNums {
obj, err := pdfReader.GetIndirectObjectByNumber(objNum)
if err != nil {
return err
}
if stream, is := obj.(*core.PdfObjectStream); is {
_, err := core.DecodeStream(stream)
if err != nil {
return err
}
}
}
return nil
}

View File

@ -0,0 +1,177 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package e2etest
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"testing"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/model"
)
// Passthrough benchmark loads a PDF, writes back out and performs a sanity check on the output with ghostscript.
// Set environment variables:
// UNIDOC_E2E_FORCE_TESTS to "1" to force the tests to execute.
// UNIDOC_PASSTHROUGH_TESTDATA to the path of the corpus folder.
// UNIDOC_GS_BIN_PATH to the path of the ghostscript binary (gs).
var (
forceTest = os.Getenv("UNIDOC_E2E_FORCE_TESTS") == "1"
passthroughCorpusFolder = os.Getenv("UNIDOC_PASSTHROUGH_TESTDATA")
)
func TestPassthrough(t *testing.T) {
if len(passthroughCorpusFolder) == 0 {
if forceTest {
t.Fatalf("UNIDOC_PASSTHROUGH_TESTDATA not set")
}
}
files, err := ioutil.ReadDir(passthroughCorpusFolder)
if err != nil {
if forceTest {
t.Fatalf("Error opening %s: %v", passthroughCorpusFolder, err)
}
t.Skipf("Skipping passthrough bench - unable to open UNIDOC_PASSTHROUGH_TESTDATA (%s)", passthroughCorpusFolder)
return
}
// Make a temporary folder and clean up after.
tempdir, err := ioutil.TempDir("", "unidoc_passthrough")
if err != nil {
t.Fatalf("Failed to create temporary folder")
}
defer os.RemoveAll(tempdir)
for _, file := range files {
t.Logf("%s", file.Name())
fpath := filepath.Join(passthroughCorpusFolder, file.Name())
params := passthroughParams{
inputPath: fpath,
outPath: filepath.Join(tempdir, "1.pdf"),
gsValidation: len(ghostscriptBinPath) > 0,
}
err := passthroughSinglePdf(params)
if err != nil {
t.Fatalf("Error: %v", err)
}
}
t.Logf("Passthrough benchmark complete for %d files in %s", len(files), passthroughCorpusFolder)
}
type passthroughParams struct {
inputPath string
outPath string
gsValidation bool
}
func passthroughSinglePdf(params passthroughParams) error {
file, err := os.Open(params.inputPath)
if err != nil {
return err
}
defer file.Close()
reader, err := model.NewPdfReader(file)
if err != nil {
common.Log.Debug("Reader create error %s\n", err)
return err
}
isEncrypted, err := reader.IsEncrypted()
if err != nil {
return err
}
if isEncrypted {
valid, err := reader.Decrypt([]byte(""))
if err != nil {
common.Log.Debug("Fail to decrypt: %v", err)
return err
}
if !valid {
return fmt.Errorf("Unable to access, encrypted")
}
}
numPages, err := reader.GetNumPages()
if err != nil {
common.Log.Debug("Failed to get number of pages")
return err
}
if numPages < 1 {
common.Log.Debug("Empty pdf - nothing to be done!")
return nil
}
writer := model.NewPdfWriter()
// Optional content.
ocProps, err := reader.GetOCProperties()
if err != nil {
return err
}
writer.SetOCProperties(ocProps)
for j := 0; j < numPages; j++ {
page, err := reader.GetPage(j + 1)
if err != nil {
common.Log.Debug("Get page error %s", err)
return err
}
// Load and set outlines (table of contents).
outlineTree := reader.GetOutlineTree()
err = writer.AddPage(page)
if err != nil {
common.Log.Debug("Add page error %s", err)
return err
}
writer.AddOutlineTree(outlineTree)
}
// Copy the forms over to the new document also.
writer.SetForms(reader.AcroForm)
of, err := os.Create(params.outPath)
if err != nil {
common.Log.Debug("Failed to create file (%s)", err)
return err
}
defer of.Close()
err = writer.Write(of)
if err != nil {
common.Log.Debug("WriteFile error")
return err
}
// GS validation of input, output pdfs.
if params.gsValidation {
common.Log.Debug("Validating input file")
inputWarnings, err := validatePdf(params.inputPath, "")
if err != nil {
return err
}
common.Log.Debug("Validating output file")
warnings, err := validatePdf(params.outPath, "")
if err != nil && warnings > inputWarnings {
common.Log.Debug("Input warnings %d vs output %d", inputWarnings, warnings)
return fmt.Errorf("Invalid PDF input %d/ output %d warnings", inputWarnings, warnings)
}
common.Log.Debug("Valid PDF!")
}
return nil
}

View File

@ -0,0 +1,71 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package e2etest
import (
"bytes"
"errors"
"fmt"
"os"
"os/exec"
"strings"
"github.com/unidoc/unidoc/common"
)
// To enable ghostscript validation, the path to the binary needs to be specified.
// Set environment variable:
// UNIDOC_GS_BIN_PATH to the path of the ghostscript binary (gs).
var (
ghostscriptBinPath = os.Getenv("UNIDOC_GS_BIN_PATH")
)
// validatePdf a pdf file using Ghostscript, returns an error if unable to execute.
// Also returns the number of output warnings, which can be used as some sort of measure
// of validity, especially when comparing with a transformed version of same file.
func validatePdf(path string, password string) (int, error) {
if len(ghostscriptBinPath) == 0 {
return 0, errors.New("UNIDOC_GS_BIN_PATH not set")
}
common.Log.Debug("Validating: %s", path)
params := []string{"-dBATCH", "-dNODISPLAY", "-dNOPAUSE"}
if len(password) > 0 {
params = append(params, fmt.Sprintf("-sPDFPassword=%s", password))
}
params = append(params, path)
var (
out bytes.Buffer
errOut bytes.Buffer
)
cmd := exec.Command(ghostscriptBinPath, params...)
cmd.Stdout = &out
cmd.Stderr = &errOut
err := cmd.Run()
if err != nil {
common.Log.Debug("%s", out.String())
common.Log.Debug("%s", errOut.String())
common.Log.Error("GS failed with error %s", err)
return 0, fmt.Errorf("GS failed with error (%s)", err)
}
outputErr := errOut.String()
warnings := strings.Count(outputErr, "****")
common.Log.Debug("ERROR: - %d warnings %s", warnings, outputErr)
if warnings > 1 {
if len(outputErr) > 80 {
outputErr = outputErr[:80] // Trim the output.
}
common.Log.Debug("ERROR: Invalid - %d warnings %s", warnings, outputErr)
return warnings, nil
}
// Valid if no error.
return 0, nil
}