Merge pull request #335 from peterwilliams97/v3.imagemark

V3.imagemark
This commit is contained in:
Gunnsteinn Hall 2019-01-31 12:50:33 +00:00 committed by GitHub
commit 2cc717d642
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 77 additions and 3 deletions

View File

@ -46,7 +46,7 @@ type ImageMark struct {
X float64
Y float64
// Angle if rotated.
// Angle in degrees, if rotated.
Angle float64
}

View File

@ -8,6 +8,7 @@ package extractor
import (
"math"
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
@ -243,6 +244,71 @@ func TestImageExtractionMulti(t *testing.T) {
}
}
func TestImageExtractionRealWorld(t *testing.T) {
if len(corpusFolder) == 0 && !forceTest {
t.Log("Corpus folder not set - skipping")
return
}
testcases := []struct {
Name string
PageNum int
Path string
Expected []ImageMark
}{
{
"ICC color space",
3,
"icnp12-qinghua.pdf",
[]ImageMark{
{
Image: nil,
Width: 2.877,
Height: 22.344,
X: 236.508,
Y: 685.248,
Angle: 0.0,
},
{
Image: nil,
Width: 247.44,
Height: 0.48,
X: 313.788,
Y: 715.248,
Angle: 0.0,
},
{
Image: nil,
Width: 247.44,
Height: 0.48,
X: 313.788,
Y: 594.648,
Angle: 0.0,
},
},
},
}
for _, tcase := range testcases {
inputPath := filepath.Join(corpusFolder, tcase.Path)
page, err := loadPageFromPDFFile(inputPath, tcase.PageNum)
require.NoError(t, err)
pageExtractor, err := New(page)
require.NoError(t, err)
pageImages, err := pageExtractor.ExtractPageImages()
require.NoError(t, err)
assert.Equal(t, len(tcase.Expected), len(pageImages.Images))
for i, img := range pageImages.Images {
img.Image = nil // Discard image data.
assert.Equalf(t, tcase.Expected[i], img, "i = %d", i)
}
}
}
func BenchmarkImageExtraction(b *testing.B) {
cnt := 0
for i := 0; i < b.N; i++ {

View File

@ -1719,6 +1719,7 @@ func newPdfColorspaceICCBasedFromPdfObject(obj core.PdfObject) (*PdfColorspaceIC
}
obj = core.TraceToDirectObject(obj)
array, ok := obj.(*core.PdfObjectArray)
if !ok {
return nil, fmt.Errorf("type error")
@ -1779,6 +1780,13 @@ func newPdfColorspaceICCBasedFromPdfObject(obj core.PdfObject) (*PdfColorspaceIC
return nil, err
}
cs.Range = r
} else {
// Set defaults
cs.Range = make([]float64, 2*cs.N)
for i := 0; i < cs.N; i++ {
cs.Range[2*i] = 0.0
cs.Range[2*i+1] = 1.0
}
}
if obj := dict.Get("Metadata"); obj != nil {
@ -2513,9 +2521,9 @@ func (cs *PdfColorspaceSpecialSeparation) ImageToRGB(img Image) (Image, error) {
var altSamples []uint32
// Convert tints to color data in the alternate colorspace.
for i := 0; i < len(samples); i++ {
for _, sample := range samples {
// A single tint component is in the range 0.0 - 1.0
tint := float64(samples[i]) / maxVal
tint := float64(sample) / maxVal
// Convert the tint value to the alternate space value.
outputs, err := cs.TintTransform.Evaluate([]float64{tint})