From 520ab09a72215b8e05c90edac8f0b561e1d75a9f Mon Sep 17 00:00:00 2001 From: Gunnsteinn Hall Date: Wed, 28 Nov 2018 23:25:17 +0000 Subject: [PATCH 1/3] Addressing review comments --- pdf/contentstream/matrix.go | 157 +++++++++++++++++++++++++++ pdf/contentstream/processor.go | 164 +++------------------------- pdf/extractor/point.go | 12 +-- pdf/extractor/text.go | 81 +++++++------- pdf/extractor/text_test.go | 189 ++++++++++++++++----------------- pdf/model/font.go | 58 ++++------ pdf/model/font_composite.go | 9 -- pdf/model/fonts/font.go | 1 - 8 files changed, 325 insertions(+), 346 deletions(-) create mode 100644 pdf/contentstream/matrix.go diff --git a/pdf/contentstream/matrix.go b/pdf/contentstream/matrix.go new file mode 100644 index 00000000..6997b510 --- /dev/null +++ b/pdf/contentstream/matrix.go @@ -0,0 +1,157 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package contentstream + +import ( + "fmt" + "math" + + "github.com/unidoc/unidoc/common" +) + +// Matrix is a linear transform matrix in homogenous coordinates. +// PDF coordinate transforms are always affine so we only need 6 of these. See newMatrix. +type Matrix [9]float64 + +// IdentityMatrix returns the identity transform. +func IdentityMatrix() Matrix { + return NewMatrix(1, 0, 0, 1, 0, 0) +} + +// TranslationMatrix returns a matrix that translates by `tx`, `ty`. +func TranslationMatrix(tx, ty float64) Matrix { + return NewMatrix(1, 0, 0, 1, tx, ty) +} + +// NewMatrix returns an affine transform matrix laid out in homogenous coordinates as +// a b 0 +// c d 0 +// tx ty 1 +func NewMatrix(a, b, c, d, tx, ty float64) Matrix { + m := Matrix{ + a, b, 0, + c, d, 0, + tx, ty, 1, + } + m.fixup() + return m +} + +// String returns a string describing `m`. +func (m Matrix) String() string { + a, b, c, d, tx, ty := m[0], m[1], m[3], m[4], m[6], m[7] + return fmt.Sprintf("[%.4f,%.4f,%.4f,%.4f:%.4f,%.4f]", a, b, c, d, tx, ty) +} + +// Set sets `m` to affine transform a,b,c,d,tx,ty. +func (m *Matrix) Set(a, b, c, d, tx, ty float64) { + m[0], m[1] = a, b + m[3], m[4] = c, d + m[6], m[7] = tx, ty + m.fixup() +} + +// Concat sets `m` to `m` × `b`. +// `b` needs to be created by newMatrix. i.e. It must be an affine transform. +// m00 m01 0 b00 b01 0 m00*b00 + m01*b01 m00*b10 + m01*b11 0 +// m10 m11 0 × b10 b11 0 = m10*b00 + m11*b01 m10*b10 + m11*b11 0 +// m20 m21 1 b20 b21 1 m20*b00 + m21*b10 + b20 m20*b01 + m21*b11 + b21 1 +func (m *Matrix) Concat(b Matrix) { + *m = Matrix{ + m[0]*b[0] + m[1]*b[3], m[0]*b[1] + m[1]*b[4], 0, + m[3]*b[0] + m[4]*b[3], m[3]*b[1] + m[4]*b[4], 0, + m[6]*b[0] + m[7]*b[3] + b[6], m[6]*b[1] + m[7]*b[4] + b[7], 1, + } + m.fixup() +} + +// Mult returns `m` × `b`. +func (m Matrix) Mult(b Matrix) Matrix { + m.Concat(b) + return m +} + +// Translate appends a translation of `dx`,`dy` to `m`. +// m.Translate(dx, dy) is equivalent to m.Concat(NewMatrix(1, 0, 0, 1, dx, dy)) +func (m *Matrix) Translate(dx, dy float64) { + m[6] += dx + m[7] += dy + m.fixup() +} + +// Translation returns the translation part of `m`. +func (m *Matrix) Translation() (float64, float64) { + return m[6], m[7] +} + +// Translation returns the translation part of `m`. +func (m *Matrix) ScalingX() float64 { + return math.Hypot(m[0], m[1]) +} + +// Transform returns coordinates `x`,`y` transformed by `m`. +func (m *Matrix) Transform(x, y float64) (float64, float64) { + xp := x*m[0] + y*m[1] + m[6] + yp := x*m[3] + y*m[4] + m[7] + return xp, yp +} + +// ScalingFactorX returns X scaling of the affine transform. +func (m *Matrix) ScalingFactorX() float64 { + return math.Sqrt(m[0]*m[0] + m[1]*m[1]) +} + +// ScalingFactorY returns X scaling of the affine transform. +func (m *Matrix) ScalingFactorY() float64 { + return math.Sqrt(m[3]*m[3] + m[4]*m[4]) +} + +// Angle returns the angle of the affine transform. +// For simplicity, we assume the transform is a multiple of 90 degrees. +func (m *Matrix) Angle() int { + a, b, c, d := m[0], m[1], m[3], m[4] + // We are returning θ for + // a b cos θ -sin θ + // c d = sin θ cos θ + if a > 0 && d > 0 { + // 1 0 + // 0 1 + return 0 + } else if b < 0 && c > 0 { + // 0 1 + // -1 0 + return 90 + } else if a < 0 && d < 0 { + // -1 0 + // 0 -1 + return 180 + } else if b > 0 && c < 0 { + // 0 -1 + // 1 0 + return 270 + } + common.Log.Debug("ERROR: Angle not a multiple of 90°. m=%s", m) + return 0 +} + +// fixup forces `m` to have reasonable values. It is a guard against crazy values in corrupt PDF +// files. +// Currently it clamps elements to [-maxAbsNumber, -maxAbsNumber] to avoid floating point exceptions. +func (m *Matrix) fixup() { + for i, x := range m { + if x > maxAbsNumber { + common.Log.Debug("FIXUP: %d -> %d", x, maxAbsNumber) + m[i] = maxAbsNumber + } else if x < -maxAbsNumber { + common.Log.Debug("FIXUP: %d -> %d", x, -maxAbsNumber) + m[i] = -maxAbsNumber + } + } +} + +// largest numbers needed in PDF transforms. Is this correct? +// TODO(gunnsth): Practical value? Need some reasoning. +const maxAbsNumber = 1e9 diff --git a/pdf/contentstream/processor.go b/pdf/contentstream/processor.go index 52d3a920..d555d876 100644 --- a/pdf/contentstream/processor.go +++ b/pdf/contentstream/processor.go @@ -7,8 +7,6 @@ package contentstream import ( "errors" - "fmt" - "math" "github.com/unidoc/unidoc/common" "github.com/unidoc/unidoc/pdf/core" @@ -62,21 +60,26 @@ type HandlerEntry struct { Handler HandlerFunc } +// HandlerConditionEnum represents the type of operand content stream processor. +// HandlerConditionEnumOperand handler handles a single operand, whereas +// HandlerConditionEnumAllOperands processes all operands. type HandlerConditionEnum int -func (csp HandlerConditionEnum) All() bool { - return csp == HandlerConditionEnumAllOperands -} - -func (csp HandlerConditionEnum) Operand() bool { - return csp == HandlerConditionEnumOperand -} - const ( HandlerConditionEnumOperand HandlerConditionEnum = iota HandlerConditionEnumAllOperands HandlerConditionEnum = iota ) +// All returns true if `hce` is equivalent to HandlerConditionEnumAllOperands. +func (hce HandlerConditionEnum) All() bool { + return hce == HandlerConditionEnumAllOperands +} + +// Operand returns true if `hce` is equivalent to HandlerConditionEnumOperand. +func (hce HandlerConditionEnum) Operand() bool { + return hce == HandlerConditionEnumOperand +} + func NewContentStreamProcessor(ops []*ContentStreamOperation) *ContentStreamProcessor { csp := ContentStreamProcessor{} csp.graphicsStack = GraphicStateStack{} @@ -573,144 +576,3 @@ func (proc *ContentStreamProcessor) handleCommand_cm(op *ContentStreamOperation, return nil } - -// Matrix is a linear transform matrix in homogenous coordinates. -// PDF coordinate transforms are always affine so we only need 6 of these. See newMatrix. -type Matrix [9]float64 - -// IdentityMatrix returns the identity transform. -func IdentityMatrix() Matrix { - return NewMatrix(1, 0, 0, 1, 0, 0) -} - -// TranslationMatrix returns a matrix that translates by `tx`, `ty`. -func TranslationMatrix(tx, ty float64) Matrix { - return NewMatrix(1, 0, 0, 1, tx, ty) -} - -// NewMatrix returns an affine transform matrix laid out in homogenous coordinates as -// a b 0 -// c d 0 -// tx ty 1 -func NewMatrix(a, b, c, d, tx, ty float64) Matrix { - m := Matrix{ - a, b, 0, - c, d, 0, - tx, ty, 1, - } - m.fixup() - return m -} - -// String returns a string describing `m`. -func (m Matrix) String() string { - a, b, c, d, tx, ty := m[0], m[1], m[3], m[4], m[6], m[7] - return fmt.Sprintf("[%.4f,%.4f,%.4f,%.4f:%.4f,%.4f]", a, b, c, d, tx, ty) -} - -// Set sets `m` to affine transform a,b,c,d,tx,ty. -func (m *Matrix) Set(a, b, c, d, tx, ty float64) { - m[0], m[1] = a, b - m[3], m[4] = c, d - m[6], m[7] = tx, ty - m.fixup() -} - -// Concat sets `m` to `m` × `b`. -// `b` needs to be created by newMatrix. i.e. It must be an affine transform. -// m00 m01 0 b00 b01 0 m00*b00 + m01*b01 m00*b10 + m01*b11 0 -// m10 m11 0 × b10 b11 0 = m10*b00 + m11*b01 m10*b10 + m11*b11 0 -// m20 m21 1 b20 b21 1 m20*b00 + m21*b10 + b20 m20*b01 + m21*b11 + b21 1 -func (m *Matrix) Concat(b Matrix) { - *m = Matrix{ - m[0]*b[0] + m[1]*b[3], m[0]*b[1] + m[1]*b[4], 0, - m[3]*b[0] + m[4]*b[3], m[3]*b[1] + m[4]*b[4], 0, - m[6]*b[0] + m[7]*b[3] + b[6], m[6]*b[1] + m[7]*b[4] + b[7], 1, - } - m.fixup() -} - -// Mult returns `m` × `b`. -func (m Matrix) Mult(b Matrix) Matrix { - m.Concat(b) - return m -} - -// Translate appends a translation of `dx`,`dy` to `m`. -// m.Translate(dx, dy) is equivalent to m.Concat(NewMatrix(1, 0, 0, 1, dx, dy)) -func (m *Matrix) Translate(dx, dy float64) { - m[6] += dx - m[7] += dy - m.fixup() -} - -// Translation returns the translation part of `m`. -func (m *Matrix) Translation() (float64, float64) { - return m[6], m[7] -} - -// Translation returns the translation part of `m`. -func (m *Matrix) ScalingX() float64 { - return math.Hypot(m[0], m[1]) -} - -// Transform returns coordinates `x`,`y` transformed by `m`. -func (m *Matrix) Transform(x, y float64) (float64, float64) { - xp := x*m[0] + y*m[1] + m[6] - yp := x*m[3] + y*m[4] + m[7] - return xp, yp -} - -// ScalingFactorX returns X scaling of the affine transform. -func (m *Matrix) ScalingFactorX() float64 { - return math.Sqrt(m[0]*m[0] + m[1]*m[1]) -} - -// ScalingFactorY returns X scaling of the affine transform. -func (m *Matrix) ScalingFactorY() float64 { - return math.Sqrt(m[3]*m[3] + m[4]*m[4]) -} - -// Angle returns the angle of the affine transform. -// For simplicity, we assume the transform is a multiple of 90 degrees. -func (m *Matrix) Angle() int { - a, b, c, d := m[0], m[1], m[3], m[4] - // We are returning θ for - // a b cos θ -sin θ - // c d = sin θ cos θ - if a > 0 && d > 0 { - // 1 0 - // 0 1 - return 0 - } else if b < 0 && c > 0 { - // 0 1 - // -1 0 - return 90 - } else if a < 0 && d < 0 { - // -1 0 - // 0 -1 - return 180 - } else if b > 0 && c < 0 { - // 0 -1 - // 1 0 - return 270 - } - common.Log.Debug("ERROR: Angle not a mulitple of 90°. m=%s", m) - return 0 -} - -// fixup forces `m` to have reasonable values. It is a guard against crazy values in corrupt PDF -// files. -// Currently it clamps elements to [-maxAbsNumber, -maxAbsNumber] to avoid floating point exceptions. -func (m *Matrix) fixup() { - for i, x := range m { - if x > maxAbsNumber { - m[i] = maxAbsNumber - } else if x < -maxAbsNumber { - m[i] = -maxAbsNumber - } - } -} - -// largest numbers needed in PDF transforms. Is this correct? -const maxAbsNumber = 1e9 diff --git a/pdf/extractor/point.go b/pdf/extractor/point.go index 5561f453..30200c44 100644 --- a/pdf/extractor/point.go +++ b/pdf/extractor/point.go @@ -5,7 +5,7 @@ * Based on pdf/contentstream/draw/point.go */ -// XXX(peterwilliams97) Change to functional style. i.e. Return new value, don't mutate. +// FIXME(peterwilliams97) Change to functional style. i.e. Return new value, don't mutate. package extractor @@ -16,18 +16,18 @@ import ( "github.com/unidoc/unidoc/pdf/contentstream" ) -// Point defines a point in Cartesian coordinates +// Point defines a point (X,Y) in Cartesian coordinates. type Point struct { X float64 Y float64 } -// NewPoint returns a Point at 'x', 'y'. +// NewPoint returns a Point at `x`, `y`. func NewPoint(x, y float64) Point { return Point{X: x, Y: y} } -// Set sets `p` to `x`, `y`. +// Set sets `p` to coordinates `(x, y)`. func (p *Point) Set(x, y float64) { p.X, p.Y = x, y } @@ -38,12 +38,12 @@ func (p *Point) Transform(a, b, c, d, tx, ty float64) { p.transformByMatrix(m) } -// Displace returns `p` displaced by `delta`. +// Displace returns a new Point at location `p` + `delta`. func (p Point) Displace(delta Point) Point { return Point{p.X + delta.X, p.Y + delta.Y} } -// Rotate returns `p` rotated by `theta` degrees. +// Rotate rotates `p` by `theta` degrees and returns back. func (p Point) Rotate(theta int) Point { switch theta { case 0: diff --git a/pdf/extractor/text.go b/pdf/extractor/text.go index f23a59a2..4683a27f 100644 --- a/pdf/extractor/text.go +++ b/pdf/extractor/text.go @@ -25,13 +25,13 @@ import ( // CharcodeBytesToUnicode. // Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = �). func (e *Extractor) ExtractText() (string, error) { - text, _, _, err := e.ExtractText2() + text, _, _, err := e.ExtractTextWithStats() return text, err } -// ExtractText2 works like ExtractText but returns the number of characters in the output and the +// ExtractTextWithStats works like ExtractText but returns the number of characters in the output and the // the number of characters that were not decoded. -func (e *Extractor) ExtractText2() (string, int, int, error) { +func (e *Extractor) ExtractTextWithStats() (string, int, int, error) { textList, numChars, numMisses, err := e.ExtractXYText() if err != nil { return "", numChars, numMisses, err @@ -311,6 +311,10 @@ func (to *textObject) nextLine() { // Set the text matrix, Tm, and the text line matrix, Tlm to the Matrix specified by the 6 numbers // in `f` (page 250) func (to *textObject) setTextMatrix(f []float64) { + if len(f) != 6 { + common.Log.Debug("ERROR: len(f) != 6 (%d)", len(f)) + return + } a, b, c, d, tx, ty := f[0], f[1], f[2], f[3], f[4], f[5] to.Tm = contentstream.NewMatrix(a, b, c, d, tx, ty) to.Tlm = contentstream.NewMatrix(a, b, c, d, tx, ty) @@ -358,7 +362,7 @@ func (to *textObject) showTextAdjusted(args *core.PdfObjectArray) error { // setTextLeading "TL" Set text leading. func (to *textObject) setTextLeading(y float64) { - if to == nil { + if to == nil || to.State == nil { return } to.State.Tl = y @@ -427,7 +431,7 @@ func (to *textObject) setHorizScaling(y float64) { to.State.Th = y } -// floatParam returns the single float parameter of operatr `op`, or an error if it doesn't have +// floatParam returns the single float parameter of operator `op`, or an error if it doesn't have // a single float parameter or we aren't in a text stream. func floatParam(op *contentstream.ContentStreamOperation) (float64, error) { if len(op.Params) != 1 { @@ -444,7 +448,7 @@ func floatParam(op *contentstream.ContentStreamOperation) (float64, error) { func (to *textObject) checkOp(op *contentstream.ContentStreamOperation, numParams int, hard bool) (ok bool, err error) { if to == nil { - params := []core.PdfObject{} + var params []core.PdfObject if numParams > 0 { params = op.Params if len(params) > numParams { @@ -599,7 +603,7 @@ func newTextObject(e *Extractor, gs contentstream.GraphicsState, state *textStat } } -// renderText emits byte array `data` to the calling program. +// renderText processes and renders byte array `data` for extraction purposes. func (to *textObject) renderText(data []byte) error { font := to.getCurrentFont() @@ -629,7 +633,6 @@ func (to *textObject) renderText(data []byte) error { common.Log.Debug("%d codes=%+v runes=%q", len(charcodes), charcodes, runes) for i, r := range runes { - code := charcodes[i] // The location of the text on the page in device coordinates is given by trm, the text // rendering matrix. @@ -738,7 +741,7 @@ func newXYText(text string, trm contentstream.Matrix, end Point, spaceWidth floa // String returns a string describing `t`. func (t XYText) String() string { - return fmt.Sprintf("XYText{%s %.1f |%d| [%.3f,%.3f] %q}", + return fmt.Sprintf("XYText{%s %.1f |%d| [%.3f,%.3f] %q}", t.Trm.String(), t.Width(), t.Orient, t.OrientedStart.X, t.OrientedStart.Y, truncate(t.Text, 100)) } @@ -762,7 +765,7 @@ func (tl *TextList) ToText() string { tl.SortPosition() lines := tl.toLines() - texts := []string{} + texts := make([]string, 0, len(lines)) for _, l := range lines { texts = append(texts, l.Text) } @@ -797,11 +800,11 @@ type Line struct { // NOTE: Caller must sort the text list by top-to-bottom, left-to-write (for orientation adjusted so // that text is horizontal) before calling this function. func (tl TextList) toLines() []Line { - tlOrient := map[int]TextList{} + tlOrient := make(map[int]TextList, len(tl)) for _, t := range tl { tlOrient[t.Orient] = append(tlOrient[t.Orient], t) } - lines := []Line{} + lines := make([]Line, 0, 4) for _, o := range []int{0, 90, 180, 270} { lines = append(lines, tlOrient[o].toLinesOrient()...) } @@ -815,15 +818,15 @@ func (tl TextList) toLinesOrient() []Line { if len(tl) == 0 { return []Line{} } - lines := []Line{} - words := []string{} - x := []float64{} + var lines []Line + var words []string + var x []float64 y := tl[0].OrientedStart.Y scanning := false - averageCharWidth := ExponAve{} - wordSpacing := ExponAve{} + averageCharWidth := exponAve{} + wordSpacing := exponAve{} lastEndX := 0.0 // tl[i-1].End.X for _, t := range tl { @@ -857,15 +860,15 @@ func (tl TextList) toLinesOrient() []Line { deltaCharWidth := averageCharWidth.ave * 0.3 isSpace := false - nextWordX := lastEndX + min(deltaSpace, deltaCharWidth) + nextWordX := lastEndX + minFloat(deltaSpace, deltaCharWidth) if scanning && t.Text != " " { isSpace = nextWordX < t.OrientedStart.X } common.Log.Trace("t=%s", t) common.Log.Trace("width=%.2f delta=%.2f deltaSpace=%.2g deltaCharWidth=%.2g", - t.Width(), min(deltaSpace, deltaCharWidth), deltaSpace, deltaCharWidth) + t.Width(), minFloat(deltaSpace, deltaCharWidth), deltaSpace, deltaCharWidth) common.Log.Trace("%+q [%.1f, %.1f] lastEndX=%.2f nextWordX=%.2f (%.2f) isSpace=%t", - t.Text, t.OrientedStart.X, t.OrientedStart.Y, lastEndX, nextWordX, + t.Text, t.OrientedStart.X, t.OrientedStart.Y, lastEndX, nextWordX, nextWordX-t.OrientedStart.X, isSpace) if isSpace { @@ -890,22 +893,14 @@ func (tl TextList) toLinesOrient() []Line { return lines } -// min returns the lesser of `a` and `b`. -func min(a, b float64) float64 { - if a < b { - return a - } - return b -} - -// ExponAve implements an exponential average. -type ExponAve struct { +// exponAve implements an exponential average. +type exponAve struct { ave float64 // Current average value. running bool // Has `ave` been set? } // update updates the exponential average `exp.ave` and returns it -func (exp *ExponAve) update(x float64) float64 { +func (exp *exponAve) update(x float64) float64 { if !exp.running { exp.ave = x exp.running = true @@ -915,9 +910,15 @@ func (exp *ExponAve) update(x float64) float64 { return exp.ave } -// printTexts is a debugging function. XXX(peterwilliams97) Remove this. +const isDebug = false + +// printTexts is a debugging function. +// TODO(peterwilliams97) Remove this. func (tl *TextList) printTexts(message string) { - return + if !isDebug { + return + } + _, file, line, ok := runtime.Caller(1) if !ok { file = "???" @@ -943,7 +944,7 @@ func (tl *TextList) printTexts(message string) { // newLine returns the Line representation of strings `words` with y coordinate `y` and x // coordinates `x`. func newLine(y float64, x []float64, words []string) Line { - dx := []float64{} + dx := make([]float64, 0, len(x)) for i := 1; i < len(x); i++ { dx = append(dx, x[i]-x[i-1]) } @@ -1031,18 +1032,8 @@ type fontEntry struct { const maxFontCache = 10 // getFontDirect returns the font named `name` if it exists in the page's resources or an error if -// is doesn't. -// This is a direct (uncached access). +// it doesn't. Accesses page resources directly (not cached). func (to *textObject) getFontDirect(name string) (*model.PdfFont, error) { - - // This is a hack for testing. - switch name { - case "UniDocCourier": - return model.NewStandard14FontMustCompile(model.Courier), nil - case "UniDocHelvetica": - return model.NewStandard14FontMustCompile(model.Helvetica), nil - } - fontObj, err := to.getFontDict(name) if err != nil { return nil, err diff --git a/pdf/extractor/text_test.go b/pdf/extractor/text_test.go index d9f84b46..4c9a4cc1 100644 --- a/pdf/extractor/text_test.go +++ b/pdf/extractor/text_test.go @@ -8,7 +8,6 @@ package extractor import ( "flag" "os" - "os/user" "path/filepath" "regexp" "sort" @@ -19,18 +18,14 @@ import ( "github.com/unidoc/unidoc/pdf/model" ) -// XXX(peterwilliams97) NOTE: We do a best effort at finding the PDF file because we don't keep PDF -// test files in this repo so you will need to setup `corpusFolders` to point at the corpus directory. +// NOTE: We do a best effort at finding the PDF file because we don't keep PDF test files in this repo so you +// will need to setup UNIDOC_EXTRACT_TESTDATA to point at the corpus directory. // forceTest should be set to true to force running all tests. -const forceTest = false +// NOTE: Setting environment variable UNIDOC_EXTRACT_FORCETEST = 1 sets this to true. +var forceTest = os.Getenv("UNIDOC_EXTRACT_FORCETEST") == "1" -// corpusFolders is where we search for test files. -var corpusFolders = []string{ - "./testdata", - "~/testdata", - ".", -} +var corpusFolder = os.Getenv("UNIDOC_EXTRACT_TESTDATA") func init() { common.SetLogger(common.NewConsoleLogger(common.LogLevelError)) @@ -39,23 +34,16 @@ func init() { } } -// TestTextExtraction1 tests text extraction on the PDF fragments in `fragmentTests`. -func TestTextExtraction1(t *testing.T) { - for _, f := range fragmentTests { - f.testExtraction(t) - } -} - -type fragment struct { - name string - contents string - text string -} - -var fragmentTests = []fragment{ - - {name: "portrait", - contents: ` +// TestTextExtractionFragments tests text extraction on the PDF fragments in `fragmentTests`. +func TestTextExtractionFragments(t *testing.T) { + fragmentTests := []struct { + name string + contents string + text string + }{ + { + name: "portrait", + contents: ` BT /UniDocCourier 24 Tf (Hello World!)Tj @@ -63,10 +51,11 @@ var fragmentTests = []fragment{ (Doink)Tj ET `, - text: "Hello World!\nDoink", - }, - {name: "landscape", - contents: ` + text: "Hello World!\nDoink", + }, + { + name: "landscape", + contents: ` BT /UniDocCourier 24 Tf 0 1 -1 0 0 0 Tm @@ -75,10 +64,11 @@ var fragmentTests = []fragment{ (Doink)Tj ET `, - text: "Hello World!\nDoink", - }, - {name: "180 degree rotation", - contents: ` + text: "Hello World!\nDoink", + }, + { + name: "180 degree rotation", + contents: ` BT /UniDocCourier 24 Tf -1 0 0 -1 0 0 Tm @@ -87,10 +77,11 @@ var fragmentTests = []fragment{ (Doink)Tj ET `, - text: "Hello World!\nDoink", - }, - {name: "Helvetica", - contents: ` + text: "Hello World!\nDoink", + }, + { + name: "Helvetica", + contents: ` BT /UniDocHelvetica 24 Tf 0 -1 1 0 0 0 Tm @@ -99,35 +90,53 @@ var fragmentTests = []fragment{ (Doink)Tj ET `, - text: "Hello World!\nDoink", - }, -} - -// testExtraction checks that ExtractText() works on fragment `f`. -func (f fragment) testExtraction(t *testing.T) { - e := Extractor{contents: f.contents} - text, err := e.ExtractText() - if err != nil { - t.Fatalf("Error extracting text: %q err=%v", f.name, err) - return + text: "Hello World!\nDoink", + }, } - if text != f.text { - t.Fatalf("Text mismatch: %q Got %q. Expected %q", f.name, text, f.text) - return + + // Setup mock resources. + resources := model.NewPdfPageResources() + { + courier := model.NewStandard14FontMustCompile(model.Courier) + helvetica := model.NewStandard14FontMustCompile(model.Helvetica) + resources.SetFontByName("UniDocHelvetica", helvetica.ToPdfObject()) + resources.SetFontByName("UniDocCourier", courier.ToPdfObject()) + } + + for _, f := range fragmentTests { + t.Run(f.name, func(t *testing.T) { + e := Extractor{resources: resources, contents: f.contents} + text, err := e.ExtractText() + if err != nil { + t.Fatalf("Error extracting text: %q err=%v", f.name, err) + return + } + if text != f.text { + t.Fatalf("Text mismatch: %q Got %q. Expected %q", f.name, text, f.text) + return + } + }) } } -// TestTextExtraction2 tests text extraction on set of PDF files. +// TestTextExtractionFiles tests text extraction on a set of PDF files. // It checks for the existence of specified strings of words on specified pages. // We currently only check within lines as our line order is still improving. -func TestTextExtraction2(t *testing.T) { - for _, test := range extract2Tests { - testExtract2(t, test.filename, test.expectedPageText) +func TestTextExtractionFiles(t *testing.T) { + if len(corpusFolder) == 0 && !forceTest { + t.Log("Corpus folder not set - skipping") + return + } + + for _, test := range fileExtractionTests { + t.Run(test.filename, func(t *testing.T) { + testExtractFile(t, test.filename, test.expectedPageText) + }) } } -// extract2Tests are the PDFs and texts we are looking for on specified pages. -var extract2Tests = []struct { +// fileExtractionTests are the PDFs and texts we are looking for on specified pages. +var fileExtractionTests = []struct { filename string expectedPageText map[int][]string }{ @@ -208,21 +217,27 @@ var extract2Tests = []struct { }, } -// testExtract2 tests the ExtractText2 text extractor on `filename` and compares the extracted +// testExtractFile tests the ExtractTextWithStats text extractor on `filename` and compares the extracted // text to `expectedPageText`. -// XXX(peterwilliams97) NOTE: We do a best effort at finding the PDF file because we don't keep PDF -// test files in this repo so you will need to setup `corpusFolders` to point at the corpus directory. -// If `filename` cannot be found in `corpusFolders` then the test is skipped. -func testExtract2(t *testing.T, filename string, expectedPageText map[int][]string) { - homeDir, hasHome := getHomeDir() - path, ok := searchDirectories(homeDir, hasHome, corpusFolders, filename) - if !ok { +// +// NOTE: We do a best effort at finding the PDF file because we don't keep PDF test files in this repo +// so you will need to set the environment variable UNIDOC_EXTRACT_TESTDATA to point at +// the corpus directory. +// +// If `filename` cannot be found in `corpusFolders` then the test is skipped unless `forceTest` global +// variable is true (e.g. setting environment variable UNIDOC_EXTRACT_FORCETESTS = 1). +func testExtractFile(t *testing.T, filename string, expectedPageText map[int][]string) { + filepath := filepath.Join(corpusFolder, filename) + exists := checkFileExists(filepath) + if !exists { if forceTest { t.Fatalf("filename=%q does not exist", filename) } + t.Logf("%s not found", filename) return } - _, actualPageText := extractPageTexts(t, path) + + _, actualPageText := extractPageTexts(t, filepath) for _, pageNum := range sortedKeys(expectedPageText) { expectedSentences, ok := expectedPageText[pageNum] actualText, ok := actualPageText[pageNum] @@ -230,12 +245,12 @@ func testExtract2(t *testing.T, filename string, expectedPageText map[int][]stri t.Fatalf("%q doesn't have page %d", filename, pageNum) } if !containsSentences(t, expectedSentences, actualText) { - t.Fatalf("Text mismatch filename=%q page=%d", path, pageNum) + t.Fatalf("Text mismatch filepath=%q page=%d", filepath, pageNum) } } } -// extractPageTexts runs ExtractText2 on all pages in PDF `filename` and returns the result as a map +// extractPageTexts runs ExtractTextWithStats on all pages in PDF `filename` and returns the result as a map // {page number: page text} func extractPageTexts(t *testing.T, filename string) (int, map[int]string) { f, err := os.Open(filename) @@ -263,11 +278,11 @@ func extractPageTexts(t *testing.T, filename string) (int, map[int]string) { if err != nil { t.Fatalf("extractor.New failed. filename=%q page=%d err=%v", filename, pageNum, err) } - text, _, _, err := ex.ExtractText2() + text, _, _, err := ex.ExtractTextWithStats() if err != nil { - t.Fatalf("ExtractText2 failed. filename=%q page=%d err=%v", filename, pageNum, err) + t.Fatalf("ExtractTextWithStats failed. filename=%q page=%d err=%v", filename, pageNum, err) } - // XXX(peterwilliams97)TODO: Improve text extraction space insertion so we don't need reduceSpaces. + // TODO(peterwilliams97): Improve text extraction space insertion so we don't need reduceSpaces. pageText[pageNum] = reduceSpaces(text) } return numPages, pageText @@ -293,30 +308,10 @@ func reduceSpaces(text string) string { var reSpace = regexp.MustCompile(`(?m)\s+`) -// searchDirectories searches `directories` for `filename` and returns the full file path if it is -// found. `homeDir` and `hasHome` are used for home directory substitution. -func searchDirectories(homeDir string, hasHome bool, directories []string, filename string) (string, bool) { - for _, direct := range directories { - if hasHome { - direct = strings.Replace(direct, "~", homeDir, 1) - } - path := filepath.Join(direct, filename) - if _, err := os.Stat(path); err == nil { - return path, true - } - } - return "", false -} - -// getHomeDir returns the current user's home directory if it is defined and a bool to tell if it -// is defined. -func getHomeDir() (string, bool) { - usr, err := user.Current() - if err != nil { - common.Log.Error("No current user. err=%v", err) - return "", false - } - return usr.HomeDir, true +// checkFileExists returns true if `filepath` exists. +func checkFileExists(filepath string) bool { + _, err := os.Stat(filepath) + return err == nil } // sortedKeys returns the keys of `m` as a sorted slice. diff --git a/pdf/model/font.go b/pdf/model/font.go index 6c6e51d0..9cc62bd6 100644 --- a/pdf/model/font.go +++ b/pdf/model/font.go @@ -18,26 +18,30 @@ import ( "github.com/unidoc/unidoc/pdf/model/fonts" ) -// Font represents a font which is a series of glyphs. Character codes from PDF strings can be -// mapped to and from glyphs. Each glyph has metrics. -// XXX: FIXME (peterwilliams97) HACK to add GetCharMetrics() for fonts other than standard 14 -// Remove this hack. -type Font interface { - Encoder() textencoding.TextEncoder - SetEncoder(encoder textencoding.TextEncoder) - GetGlyphCharMetrics(glyph string) (fonts.CharMetrics, bool) - GetCharMetrics(code uint16) (fonts.CharMetrics, bool) - GetAverageCharWidth() float64 - ToPdfObject() core.PdfObject -} - // PdfFont represents an underlying font structure which can be of type: // - Type0 // - Type1 // - TrueType // etc. type PdfFont struct { - context Font // The underlying font: Type0, Type1, Truetype, etc.. + context fonts.Font // The underlying font: Type0, Type1, Truetype, etc.. +} + +// getCharCodeMetrics is a handy function for getting character metrics given a charcode. +func (font PdfFont) getCharCodeMetrics(code uint16) (fonts.CharMetrics, bool) { + var nometrics fonts.CharMetrics + + enc := font.Encoder() + if enc == nil { + return nometrics, false + } + + glyph, found := enc.CharcodeToGlyph(code) + if !found { + return nometrics, false + } + + return font.GetGlyphCharMetrics(glyph) } // GetFontDescriptor returns the font descriptor for `font`. @@ -517,18 +521,7 @@ func (font PdfFont) GetGlyphCharMetrics(glyph string) (fonts.CharMetrics, bool) // GetCharMetrics returns the char metrics for character code `code`. func (font PdfFont) GetCharMetrics(code uint16) (fonts.CharMetrics, bool) { - t := font.actualFont() - if t == nil { - common.Log.Debug("ERROR: GetCharMetrics Not implemented for font type=%#T", font.context) - return fonts.CharMetrics{}, false - } - if m, ok := t.GetCharMetrics(code); ok { - return m, ok - } - if descriptor, err := font.GetFontDescriptor(); err == nil && descriptor != nil { - return fonts.CharMetrics{Wx: descriptor.missingWidth}, true - } - return fonts.CharMetrics{}, false + return font.getCharCodeMetrics(code) } // GetRuneCharMetrics returns the char metrics for rune `r`. @@ -551,18 +544,9 @@ func (font PdfFont) GetRuneCharMetrics(r rune) (fonts.CharMetrics, error) { return m, nil } -// GetAverageCharWidth returns the average width of all the characters in `font`. -func (font PdfFont) GetAverageCharWidth() float64 { - t := font.actualFont() - if t == nil { - common.Log.Debug("ERROR: GetAverageCharWidth Not implemented for font type=%#T", font.context) - return 0.0 - } - return t.GetAverageCharWidth() -} - // actualFont returns the Font in font.context -func (font PdfFont) actualFont() Font { +// NOTE(gunnsth): Actually this only sanity checks the font.context as the returned font will be wrapped in an interface. +func (font PdfFont) actualFont() fonts.Font { if font.context == nil { common.Log.Debug("ERROR: actualFont. context is nil. font=%s", font) } diff --git a/pdf/model/font_composite.go b/pdf/model/font_composite.go index 951931b3..883e9622 100644 --- a/pdf/model/font_composite.go +++ b/pdf/model/font_composite.go @@ -131,15 +131,6 @@ func (font pdfFontType0) GetCharMetrics(code uint16) (fonts.CharMetrics, bool) { return font.DescendantFont.GetCharMetrics(code) } -// GetAverageCharWidth returns the average width of all the characters in `font`. -func (font pdfFontType0) GetAverageCharWidth() float64 { - if font.DescendantFont == nil { - common.Log.Debug("ERROR: No descendant. font=%s", font) - return 0.0 - } - return font.DescendantFont.GetAverageCharWidth() -} - // Encoder returns the font's text encoder. func (font pdfFontType0) Encoder() textencoding.TextEncoder { return font.encoder diff --git a/pdf/model/fonts/font.go b/pdf/model/fonts/font.go index 34755e7b..fc3b0f39 100644 --- a/pdf/model/fonts/font.go +++ b/pdf/model/fonts/font.go @@ -18,7 +18,6 @@ type Font interface { Encoder() textencoding.TextEncoder SetEncoder(encoder textencoding.TextEncoder) GetGlyphCharMetrics(glyph string) (CharMetrics, bool) - GetAverageCharWidth() float64 ToPdfObject() core.PdfObject } From d29f9a6a34e22b0b84114ca53a07255ea8637ecb Mon Sep 17 00:00:00 2001 From: Gunnsteinn Hall Date: Wed, 28 Nov 2018 23:25:31 +0000 Subject: [PATCH 2/3] Adding Height and Width methods for PdfRectangle --- pdf/model/structures.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pdf/model/structures.go b/pdf/model/structures.go index 9bac263a..b58ff353 100644 --- a/pdf/model/structures.go +++ b/pdf/model/structures.go @@ -11,6 +11,7 @@ package model import ( "errors" "fmt" + "math" "regexp" "strconv" @@ -58,6 +59,16 @@ func NewPdfRectangle(arr PdfObjectArray) (*PdfRectangle, error) { return &rect, nil } +// Height returns the height of `rect`. +func (rect *PdfRectangle) Height() float64 { + return math.Abs(rect.Ury - rect.Lly) +} + +// Width returns the width of `rect`. +func (rect *PdfRectangle) Width() float64 { + return math.Abs(rect.Urx - rect.Llx) +} + // Convert to a PDF object. func (rect *PdfRectangle) ToPdfObject() PdfObject { arr := MakeArray(MakeFloat(rect.Llx), MakeFloat(rect.Lly), MakeFloat(rect.Urx), MakeFloat(rect.Ury)) From e6b768c06cdc1d3b078b74e210581ee126be5683 Mon Sep 17 00:00:00 2001 From: Gunnsteinn Hall Date: Thu, 29 Nov 2018 01:09:34 +0000 Subject: [PATCH 3/3] Remove GetAverageCharWidth --- pdf/model/font_composite.go | 17 ----------------- pdf/model/font_simple.go | 12 ------------ pdf/model/fonts/courier.go | 5 ----- pdf/model/fonts/courier_bold.go | 5 ----- pdf/model/fonts/courier_bold_oblique.go | 5 ----- pdf/model/fonts/courier_oblique.go | 5 ----- pdf/model/fonts/font.go | 8 -------- pdf/model/fonts/helvetica.go | 5 ----- pdf/model/fonts/helvetica_bold.go | 5 ----- pdf/model/fonts/helvetica_bold_oblique.go | 5 ----- pdf/model/fonts/helvetica_oblique.go | 5 ----- pdf/model/fonts/symbol.go | 5 ----- pdf/model/fonts/times_bold.go | 5 ----- pdf/model/fonts/times_bold_italic.go | 5 ----- pdf/model/fonts/times_italic.go | 5 ----- pdf/model/fonts/times_roman.go | 5 ----- pdf/model/fonts/zapfdingbats.go | 5 ----- 17 files changed, 107 deletions(-) diff --git a/pdf/model/font_composite.go b/pdf/model/font_composite.go index 883e9622..1e26a093 100644 --- a/pdf/model/font_composite.go +++ b/pdf/model/font_composite.go @@ -244,11 +244,6 @@ func (font pdfCIDFontType0) GetCharMetrics(code uint16) (fonts.CharMetrics, bool return fonts.CharMetrics{}, true } -// GetAverageCharWidth returns the average width of all the characters in `font`. -func (font pdfCIDFontType0) GetAverageCharWidth() float64 { - return 0.0 -} - // ToPdfObject converts the pdfCIDFontType0 to a PDF representation. func (font *pdfCIDFontType0) ToPdfObject() core.PdfObject { return core.MakeNull() @@ -369,18 +364,6 @@ func (font pdfCIDFontType2) GetCharMetrics(code uint16) (fonts.CharMetrics, bool return fonts.CharMetrics{Wx: float64(w)}, true } -// GetAverageCharWidth returns the average width of all the characters in `font`. -func (font pdfCIDFontType2) GetAverageCharWidth() float64 { - if len(font.runeToWidthMap) == 0 { - return 0.0 - } - total := 0 - for _, w := range font.runeToWidthMap { - total += w - } - return float64(total) / float64(len(font.runeToWidthMap)) -} - // ToPdfObject converts the pdfCIDFontType2 to a PDF representation. func (font *pdfCIDFontType2) ToPdfObject() core.PdfObject { if font.container == nil { diff --git a/pdf/model/font_simple.go b/pdf/model/font_simple.go index 8e570c1a..5cbe0607 100644 --- a/pdf/model/font_simple.go +++ b/pdf/model/font_simple.go @@ -149,18 +149,6 @@ func (font pdfFontSimple) GetCharMetrics(code uint16) (fonts.CharMetrics, bool) return fonts.CharMetrics{}, false } -// GetAverageCharWidth returns the average width of all the characters in `font`. -func (font pdfFontSimple) GetAverageCharWidth() float64 { - if font.fontMetrics != nil { - return fonts.AverageCharWidth(font.fontMetrics) - } - total := 0.0 - for _, w := range font.charWidths { - total += w - } - return total / float64(len(font.charWidths)) -} - // newSimpleFontFromPdfObject creates a pdfFontSimple from dictionary `d`. Elements of `d` that // are already parsed are contained in `base`. // Standard 14 fonts need to to specify their builtin encoders in the `std14Encoder` parameter. diff --git a/pdf/model/fonts/courier.go b/pdf/model/fonts/courier.go index 89f65967..028c52b0 100644 --- a/pdf/model/fonts/courier.go +++ b/pdf/model/fonts/courier.go @@ -47,11 +47,6 @@ func (font FontCourier) GetGlyphCharMetrics(glyph string) (CharMetrics, bool) { return metrics, true } -// GetAverageCharWidth returns the average width of all glyphs in the font. -func (font FontCourier) GetAverageCharWidth() float64 { - return AverageCharWidth(CourierCharMetrics) -} - // ToPdfObject returns a primitive PDF object representation of the font. func (font FontCourier) ToPdfObject() core.PdfObject { fontDict := core.MakeDict() diff --git a/pdf/model/fonts/courier_bold.go b/pdf/model/fonts/courier_bold.go index b34250ac..8c92c9e5 100644 --- a/pdf/model/fonts/courier_bold.go +++ b/pdf/model/fonts/courier_bold.go @@ -47,11 +47,6 @@ func (font FontCourierBold) GetGlyphCharMetrics(glyph string) (CharMetrics, bool return metrics, true } -// GetAverageCharWidth returns the average width of all glyphs in the font. -func (font FontCourierBold) GetAverageCharWidth() float64 { - return AverageCharWidth(CourierBoldCharMetrics) -} - // ToPdfObject returns a primitive PDF object representation of the font. func (font FontCourierBold) ToPdfObject() core.PdfObject { fontDict := core.MakeDict() diff --git a/pdf/model/fonts/courier_bold_oblique.go b/pdf/model/fonts/courier_bold_oblique.go index 0d33421b..6bbc7d4e 100644 --- a/pdf/model/fonts/courier_bold_oblique.go +++ b/pdf/model/fonts/courier_bold_oblique.go @@ -48,11 +48,6 @@ func (font FontCourierBoldOblique) GetGlyphCharMetrics(glyph string) (CharMetric return metrics, true } -// GetAverageCharWidth returns the average width of all glyphs in the font. -func (font FontCourierBoldOblique) GetAverageCharWidth() float64 { - return AverageCharWidth(CourierBoldObliqueCharMetrics) -} - // ToPdfObject returns a primitive PDF object representation of the font. func (font FontCourierBoldOblique) ToPdfObject() core.PdfObject { fontDict := core.MakeDict() diff --git a/pdf/model/fonts/courier_oblique.go b/pdf/model/fonts/courier_oblique.go index 0420dbb2..5981d13d 100644 --- a/pdf/model/fonts/courier_oblique.go +++ b/pdf/model/fonts/courier_oblique.go @@ -47,11 +47,6 @@ func (font FontCourierOblique) GetGlyphCharMetrics(glyph string) (CharMetrics, b return metrics, true } -// GetAverageCharWidth returns the average width of all glyphs in the font. -func (font FontCourierOblique) GetAverageCharWidth() float64 { - return AverageCharWidth(CourierObliqueCharMetrics) -} - // ToPdfObject returns a primitive PDF object representation of the font. func (font FontCourierOblique) ToPdfObject() core.PdfObject { fontDict := core.MakeDict() diff --git a/pdf/model/fonts/font.go b/pdf/model/fonts/font.go index fc3b0f39..2d9a20f3 100644 --- a/pdf/model/fonts/font.go +++ b/pdf/model/fonts/font.go @@ -31,11 +31,3 @@ type CharMetrics struct { func (m CharMetrics) String() string { return fmt.Sprintf("<%q,%.1f,%.1f>", m.GlyphName, m.Wx, m.Wy) } - -func AverageCharWidth(metrics map[string]CharMetrics) float64 { - total := 0.0 - for _, m := range metrics { - total += m.Wx - } - return total / float64(len(metrics)) -} diff --git a/pdf/model/fonts/helvetica.go b/pdf/model/fonts/helvetica.go index 02ef6383..49dccf88 100644 --- a/pdf/model/fonts/helvetica.go +++ b/pdf/model/fonts/helvetica.go @@ -47,11 +47,6 @@ func (font FontHelvetica) GetGlyphCharMetrics(glyph string) (CharMetrics, bool) return metrics, true } -// GetAverageCharWidth returns the average width of all glyphs in the font. -func (font FontHelvetica) GetAverageCharWidth() float64 { - return AverageCharWidth(HelveticaCharMetrics) -} - // ToPdfObject returns a primitive PDF object representation of the font. func (font FontHelvetica) ToPdfObject() core.PdfObject { fontDict := core.MakeDict() diff --git a/pdf/model/fonts/helvetica_bold.go b/pdf/model/fonts/helvetica_bold.go index 12bc34e8..82491de4 100644 --- a/pdf/model/fonts/helvetica_bold.go +++ b/pdf/model/fonts/helvetica_bold.go @@ -48,11 +48,6 @@ func (font FontHelveticaBold) GetGlyphCharMetrics(glyph string) (CharMetrics, bo return metrics, true } -// GetAverageCharWidth returns the average width of all glyphs in the font. -func (font FontHelveticaBold) GetAverageCharWidth() float64 { - return AverageCharWidth(HelveticaBoldCharMetrics) -} - // ToPdfObject returns a primitive PDF object representation of the font. func (font FontHelveticaBold) ToPdfObject() core.PdfObject { fontDict := core.MakeDict() diff --git a/pdf/model/fonts/helvetica_bold_oblique.go b/pdf/model/fonts/helvetica_bold_oblique.go index 63a642d4..e265b5a2 100644 --- a/pdf/model/fonts/helvetica_bold_oblique.go +++ b/pdf/model/fonts/helvetica_bold_oblique.go @@ -47,11 +47,6 @@ func (font FontHelveticaBoldOblique) GetGlyphCharMetrics(glyph string) (CharMetr return metrics, true } -// GetAverageCharWidth returns the average width of all glyphs in the font. -func (font FontHelveticaBoldOblique) GetAverageCharWidth() float64 { - return AverageCharWidth(HelveticaObliqueCharMetrics) -} - // ToPdfObject returns a primitive PDF object representation of the font. func (font FontHelveticaBoldOblique) ToPdfObject() core.PdfObject { fontDict := core.MakeDict() diff --git a/pdf/model/fonts/helvetica_oblique.go b/pdf/model/fonts/helvetica_oblique.go index bdfb66aa..d2349cac 100644 --- a/pdf/model/fonts/helvetica_oblique.go +++ b/pdf/model/fonts/helvetica_oblique.go @@ -47,11 +47,6 @@ func (font FontHelveticaOblique) GetGlyphCharMetrics(glyph string) (CharMetrics, return metrics, true } -// GetAverageCharWidth returns the average width of all glyphs in the font. -func (font FontHelveticaOblique) GetAverageCharWidth() float64 { - return AverageCharWidth(HelveticaObliqueCharMetrics) -} - // ToPdfObject returns a primitive PDF object representation of the font. func (font FontHelveticaOblique) ToPdfObject() core.PdfObject { fontDict := core.MakeDict() diff --git a/pdf/model/fonts/symbol.go b/pdf/model/fonts/symbol.go index fb04755c..6a2b9dd4 100644 --- a/pdf/model/fonts/symbol.go +++ b/pdf/model/fonts/symbol.go @@ -48,11 +48,6 @@ func (font FontSymbol) GetGlyphCharMetrics(glyph string) (CharMetrics, bool) { return metrics, true } -// GetAverageCharWidth returns the average width of all glyphs in the font. -func (font FontSymbol) GetAverageCharWidth() float64 { - return AverageCharWidth(SymbolCharMetrics) -} - // ToPdfObject returns a primitive PDF object representation of the font. func (font FontSymbol) ToPdfObject() core.PdfObject { fontDict := core.MakeDict() diff --git a/pdf/model/fonts/times_bold.go b/pdf/model/fonts/times_bold.go index fb4c98fa..18178c2e 100644 --- a/pdf/model/fonts/times_bold.go +++ b/pdf/model/fonts/times_bold.go @@ -47,11 +47,6 @@ func (font FontTimesBold) GetGlyphCharMetrics(glyph string) (CharMetrics, bool) return metrics, true } -// GetAverageCharWidth returns the average width of all glyphs in the font. -func (font FontTimesBold) GetAverageCharWidth() float64 { - return AverageCharWidth(TimesBoldCharMetrics) -} - // ToPdfObject returns a primitive PDF object representation of the font. func (font FontTimesBold) ToPdfObject() core.PdfObject { fontDict := core.MakeDict() diff --git a/pdf/model/fonts/times_bold_italic.go b/pdf/model/fonts/times_bold_italic.go index 93c2eace..584425ad 100644 --- a/pdf/model/fonts/times_bold_italic.go +++ b/pdf/model/fonts/times_bold_italic.go @@ -47,11 +47,6 @@ func (font FontTimesBoldItalic) GetGlyphCharMetrics(glyph string) (CharMetrics, return metrics, true } -// GetAverageCharWidth returns the average width of all glyphs in the font. -func (font FontTimesBoldItalic) GetAverageCharWidth() float64 { - return AverageCharWidth(TimesBoldItalicCharMetrics) -} - // ToPdfObject returns a primitive PDF object representation of the font. func (font FontTimesBoldItalic) ToPdfObject() core.PdfObject { fontDict := core.MakeDict() diff --git a/pdf/model/fonts/times_italic.go b/pdf/model/fonts/times_italic.go index 92657446..9396c63a 100644 --- a/pdf/model/fonts/times_italic.go +++ b/pdf/model/fonts/times_italic.go @@ -47,11 +47,6 @@ func (font FontTimesItalic) GetGlyphCharMetrics(glyph string) (CharMetrics, bool return metrics, true } -// GetAverageCharWidth returns the average width of all glyphs in the font. -func (font FontTimesItalic) GetAverageCharWidth() float64 { - return AverageCharWidth(TimesItalicCharMetrics) -} - // ToPdfObject returns a primitive PDF object representation of the font. func (font FontTimesItalic) ToPdfObject() core.PdfObject { fontDict := core.MakeDict() diff --git a/pdf/model/fonts/times_roman.go b/pdf/model/fonts/times_roman.go index f2be14bd..dd4d58b1 100644 --- a/pdf/model/fonts/times_roman.go +++ b/pdf/model/fonts/times_roman.go @@ -47,11 +47,6 @@ func (font FontTimesRoman) GetGlyphCharMetrics(glyph string) (CharMetrics, bool) return metrics, true } -// GetAverageCharWidth returns the average width of all glyphs in the font. -func (font FontTimesRoman) GetAverageCharWidth() float64 { - return AverageCharWidth(TimesRomanCharMetrics) -} - // ToPdfObject returns a primitive PDF object representation of the font. func (font FontTimesRoman) ToPdfObject() core.PdfObject { fontDict := core.MakeDict() diff --git a/pdf/model/fonts/zapfdingbats.go b/pdf/model/fonts/zapfdingbats.go index 956eed34..cae87d64 100644 --- a/pdf/model/fonts/zapfdingbats.go +++ b/pdf/model/fonts/zapfdingbats.go @@ -48,11 +48,6 @@ func (font FontZapfDingbats) GetGlyphCharMetrics(glyph string) (CharMetrics, boo return metrics, true } -// GetAverageCharWidth returns the average width of all glyphs in the font. -func (font FontZapfDingbats) GetAverageCharWidth() float64 { - return AverageCharWidth(ZapfDingbatsCharMetrics) -} - // ToPdfObject returns a primitive PDF object representation of the font. func (font FontZapfDingbats) ToPdfObject() core.PdfObject { fontDict := core.MakeDict()