Merge branch 'extract.text' of https://github.com/peterwilliams97/unidoc into v3-peterwilliams97-extract.text

This commit is contained in:
Gunnsteinn Hall 2018-11-28 23:33:31 +00:00
commit f04f83b271
5 changed files with 268 additions and 70 deletions

View File

@ -18,6 +18,9 @@ type Extractor struct {
// accessCount is used to set fontEntry.access to an incrementing number.
accessCount int64
// textCount is an incrementing number used to identify XYTest objects.
textCount int64
}
// New returns an Extractor instance for extracting content from the input PDF page.

View File

@ -13,11 +13,13 @@ import (
"runtime"
"sort"
"strings"
"unicode"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/contentstream"
"github.com/unidoc/unidoc/pdf/core"
"github.com/unidoc/unidoc/pdf/model"
"golang.org/x/text/unicode/norm"
)
// ExtractText processes and extracts all text data in content streams and returns as a string.
@ -49,7 +51,7 @@ func (e *Extractor) ExtractXYText() (*TextList, int, int, error) {
cstreamParser := contentstream.NewContentStreamParser(e.contents)
operations, err := cstreamParser.Parse()
if err != nil {
common.Log.Debug("ExtractXYText: parse failed. err=%v", err)
common.Log.Debug("ERROR: ExtractXYText parse failed. err=%v", err)
return textList, state.numChars, state.numMisses, err
}
@ -309,7 +311,7 @@ func (to *textObject) nextLine() {
// setTextMatrix "Tm".
// Set the text matrix, Tm, and the text line matrix, Tlm to the Matrix specified by the 6 numbers
// in `f` (page 250)
// in `f` (page 250).
func (to *textObject) setTextMatrix(f []float64) {
if len(f) != 6 {
common.Log.Debug("ERROR: len(f) != 6 (%d)", len(f))
@ -317,25 +319,23 @@ func (to *textObject) setTextMatrix(f []float64) {
}
a, b, c, d, tx, ty := f[0], f[1], f[2], f[3], f[4], f[5]
to.Tm = contentstream.NewMatrix(a, b, c, d, tx, ty)
to.Tlm = contentstream.NewMatrix(a, b, c, d, tx, ty)
common.Log.Debug("setTextMatrix: Tm=%s", to.Tm)
to.Tlm = to.Tm
}
// showText "Tj" Show a text string.
// showText "Tj". Show a text string.
func (to *textObject) showText(charcodes []byte) error {
return to.renderText(charcodes)
}
// showTextAdjusted "TJ" Show text with adjustable spacing.
// showTextAdjusted "TJ". Show text with adjustable spacing.
func (to *textObject) showTextAdjusted(args *core.PdfObjectArray) error {
vertical := false
for _, o := range args.Elements() {
switch o.(type) {
case *core.PdfObjectFloat, *core.PdfObjectInteger:
// The following is supposed to be equivalent to the existing Unidoc implementation.
x, err := core.GetNumberAsFloat(o)
if err != nil {
common.Log.Debug("showTextAdjusted: Bad numerical arg. o=%s args=%+v", o, args)
common.Log.Debug("ERROR: showTextAdjusted. Bad numerical arg. o=%s args=%+v", o, args)
return err
}
dx, dy := -x*0.001*to.State.Tfs, 0.0
@ -344,23 +344,23 @@ func (to *textObject) showTextAdjusted(args *core.PdfObjectArray) error {
}
td := translationMatrix(Point{X: dx, Y: dy})
to.Tm = td.Mult(to.Tm)
common.Log.Debug("showTextAdjusted: dx,dy=%3f,%.3f Tm=%s", dx, dy, to.Tm)
common.Log.Trace("showTextAdjusted: dx,dy=%3f,%.3f Tm=%s", dx, dy, to.Tm)
case *core.PdfObjectString:
charcodes, ok := core.GetStringBytes(o)
if !ok {
common.Log.Debug("showTextAdjusted: Bad string arg. o=%s args=%+v", o, args)
common.Log.Trace("showTextAdjusted: Bad string arg. o=%s args=%+v", o, args)
return core.ErrTypeError
}
to.renderText(charcodes)
default:
common.Log.Debug("showTextAdjusted. Unexpected type (%T) args=%+v", o, args)
common.Log.Debug("ERROR: showTextAdjusted. Unexpected type (%T) args=%+v", o, args)
return core.ErrTypeError
}
}
return nil
}
// setTextLeading "TL" Set text leading.
// setTextLeading "TL". Set text leading.
func (to *textObject) setTextLeading(y float64) {
if to == nil || to.State == nil {
return
@ -368,7 +368,7 @@ func (to *textObject) setTextLeading(y float64) {
to.State.Tl = y
}
// setCharSpacing "Tc" Set character spacing.
// setCharSpacing "Tc". Set character spacing.
func (to *textObject) setCharSpacing(x float64) {
if to == nil {
return
@ -376,7 +376,7 @@ func (to *textObject) setCharSpacing(x float64) {
to.State.Tc = x
}
// setFont "Tf" Set font.
// setFont "Tf". Set font.
func (to *textObject) setFont(name string, size float64) error {
if to == nil {
return nil
@ -399,7 +399,7 @@ func (to *textObject) setFont(name string, size float64) error {
return nil
}
// setTextRenderMode "Tr" Set text rendering mode.
// setTextRenderMode "Tr". Set text rendering mode.
func (to *textObject) setTextRenderMode(mode int) {
if to == nil {
return
@ -407,7 +407,7 @@ func (to *textObject) setTextRenderMode(mode int) {
to.State.Tmode = RenderMode(mode)
}
// setTextRise "Ts" Set text rise.
// setTextRise "Ts". Set text rise.
func (to *textObject) setTextRise(y float64) {
if to == nil {
return
@ -415,7 +415,7 @@ func (to *textObject) setTextRise(y float64) {
to.State.Trise = y
}
// setWordSpacing "Tw" Set word spacing.
// setWordSpacing "Tw". Set word spacing.
func (to *textObject) setWordSpacing(y float64) {
if to == nil {
return
@ -423,7 +423,7 @@ func (to *textObject) setWordSpacing(y float64) {
to.State.Tw = y
}
// setHorizScaling "Tz" Set horizontal scaling.
// setHorizScaling "Tz". Set horizontal scaling.
func (to *textObject) setHorizScaling(y float64) {
if to == nil {
return
@ -577,9 +577,6 @@ type textObject struct {
Tm contentstream.Matrix // Text matrix. For the character pointer.
Tlm contentstream.Matrix // Text line matrix. For the start of line pointer.
Texts []XYText // Text gets written here.
// These fields are used to implement existing UniDoc behaviour.
xPos, yPos float64
}
// newTextState returns a default textState.
@ -610,6 +607,9 @@ func (to *textObject) renderText(data []byte) error {
charcodes := font.BytesToCharcodes(data)
runes, numChars, numMisses := font.CharcodesToUnicode(charcodes)
if numMisses > 0 {
common.Log.Debug("renderText: numChars=%d numMisses=%d", numChars, numMisses)
}
to.State.numChars += numChars
to.State.numMisses += numMisses
@ -629,10 +629,14 @@ func (to *textObject) renderText(data []byte) error {
0, tfs,
0, state.Trise)
common.Log.Debug("==========================================")
common.Log.Debug("%d codes=%+v runes=%q", len(charcodes), charcodes, runes)
common.Log.Trace("renderText: %d codes=%+v runes=%q", len(charcodes), charcodes, runes)
for i, r := range runes {
// XXX(peterwilliams97) Need to find and fix cases where this happens.
if r == "\x00" {
continue
}
code := charcodes[i]
// The location of the text on the page in device coordinates is given by trm, the text
// rendering matrix.
@ -656,14 +660,13 @@ func (to *textObject) renderText(data []byte) error {
// c is the character size in unscaled text units.
c := Point{X: m.Wx * glyphTextRatio, Y: m.Wy * glyphTextRatio}
// t0 is the end of this character.
// t is the displacement of the text cursor when the character is rendered.
// float tx = displacementX * fontSize * horizontalScaling;
// w = 0
t0 := Point{X: (c.X*tfs + w) * th}
t := Point{X: (c.X*tfs + state.Tc + w) * th}
// td, td0 are t, t0 in matrix form.
// td0 is where this char ends. td is where the next char stats.
// td0 is where this character ends. td is where the next character starts.
td0 := translationMatrix(t0)
td := translationMatrix(t)
@ -671,19 +674,17 @@ func (to *textObject) renderText(data []byte) error {
common.Log.Trace("tfs=%.3f th=%.3f Tc=%.3f w=%.3f (Tw=%.3f)", tfs, th, state.Tc, w, state.Tw)
common.Log.Trace("m=%s c=%+v t0=%+v td0=%s trm0=%s", m, c, t0, td0, td0.Mult(to.Tm).Mult(to.gs.CTM))
nextTm := td.Mult(to.Tm)
common.Log.Trace("nextTm=%s", nextTm)
xyt := newXYText(
xyt := to.newXYText(
string(r),
trm,
translation(td0.Mult(to.Tm).Mult(to.gs.CTM)),
1.0*trm.ScalingFactorY(),
spaceWidth*trm.ScalingFactorX())
common.Log.Trace("i=%d code=%d, xyt=%s", i, code, xyt)
common.Log.Trace("i=%d code=%d xyt=%s trm=%s", i, code, xyt, trm)
to.Texts = append(to.Texts, xyt)
// update the text matrix by the displacement of the text location.
to.Tm = nextTm
to.Tm = td.Mult(to.Tm)
common.Log.Trace("to.Tm=%s", to.Tm)
}
@ -714,36 +715,45 @@ func (to *textObject) moveTo(tx, ty float64) {
}
// XYText represents text drawn on a page and its position in device coordinates.
// All dimensions are in device coordinates.
type XYText struct {
Trm contentstream.Matrix
OrientedStart Point // Left of text in orientation where text is horizontal.
OrientedEnd Point // Right of text in orientation where text is horizontal.
ColorStroking model.PdfColor // Colour that text is stroked with, if any.
ColorNonStroking model.PdfColor // Colour that text is filled with, if any.
Orient int
Text string
SpaceWidth float64
Font string
FontSize float64
Text string // The text.
Orient int // The text orientation.
OrientedStart Point // Left of text in orientation where text is horizontal.
OrientedEnd Point // Right of text in orientation where text is horizontal.
Height float64 // Text height.
SpaceWidth float64 // Best guess at the width of a space in the font the text was rendered with.
count int64 // To help with reading debug logs.
}
func newXYText(text string, trm contentstream.Matrix, end Point, spaceWidth float64) XYText {
// newXYText returns an XYText for text `text` rendered with text rendering matrix `trm` and end
// of character device coordinates `end`. `spaceWidth` is our best guess at the width of a space in
// the font the text is rendered in device coordinates.
func (to *textObject) newXYText(text string, trm contentstream.Matrix, end Point,
height, spaceWidth float64) XYText {
to.e.textCount++
theta := trm.Angle()
if theta%180 == 0 {
height = trm.ScalingFactorY()
} else {
height = trm.ScalingFactorX()
}
return XYText{
Text: text,
Trm: trm,
Orient: theta,
OrientedStart: translation(trm).Rotate(theta),
OrientedEnd: end.Rotate(theta),
Orient: theta,
Height: height,
SpaceWidth: spaceWidth,
count: to.e.textCount,
}
}
// String returns a string describing `t`.
func (t XYText) String() string {
return fmt.Sprintf("XYText{%s %.1f |%d| [%.3f,%.3f] %q}",
t.Trm.String(), t.Width(), t.Orient, t.OrientedStart.X, t.OrientedStart.Y,
truncate(t.Text, 100))
return fmt.Sprintf("XYText{@%03d [%.3f,%.3f] %.1f %d° %q}",
t.count, t.OrientedStart.X, t.OrientedStart.Y, t.Width(), t.Orient, truncate(t.Text, 100))
}
// Width returns the width of `t`.Text in the text direction.
@ -755,13 +765,25 @@ func (t XYText) Width() float64 {
type TextList []XYText
// Length returns the number of elements in `tl`.
func (tl *TextList) Length() int {
return len(*tl)
func (tl TextList) Length() int {
return len(tl)
}
// height returns the max height of the elements in `tl`.
func (tl TextList) height() float64 {
fontHeight := 0.0
for _, t := range tl {
if t.Height > fontHeight {
fontHeight = t.Height
}
}
return fontHeight
}
// ToText returns the contents of `tl` as a single string.
func (tl *TextList) ToText() string {
func (tl TextList) ToText() string {
tl.printTexts("ToText: before sorting")
tl.SortPosition()
lines := tl.toLines()
@ -776,12 +798,16 @@ func (tl *TextList) ToText() string {
// Sorting is by orientation then top to bottom, left to right when page is orientated so that text
// is horizontal.
func (tl *TextList) SortPosition() {
fontHeight := tl.height()
// We sort with a y tolerance to allow for subscripts, diacritics etc.
tol := min(fontHeight*0.2, 5.0)
common.Log.Trace("SortPosition: fontHeight=%.1f tol=%.1f", fontHeight, tol)
sort.SliceStable(*tl, func(i, j int) bool {
ti, tj := (*tl)[i], (*tl)[j]
if ti.Orient != tj.Orient {
return ti.Orient < tj.Orient
}
if ti.OrientedStart.Y != tj.OrientedStart.Y {
if math.Abs(ti.OrientedStart.Y-tj.OrientedStart.Y) > tol {
return ti.OrientedStart.Y > tj.OrientedStart.Y
}
return ti.OrientedStart.X < tj.OrientedStart.X
@ -793,26 +819,31 @@ type Line struct {
Y float64 // y position of line.
Dx []float64 // x distance between successive words in line.
Text string // text in the line.
Words []string // words in the line
Words []string // words in the line.
}
// toLines returns the text and positions in `tl` as a slice of Line.
// NOTE: Caller must sort the text list by top-to-bottom, left-to-write (for orientation adjusted so
// NOTE: Caller must sort the text list top-to-bottom, left-to-write (for orientation adjusted so
// that text is horizontal) before calling this function.
func (tl TextList) toLines() []Line {
// We divide `tl` into slices which contain texts with the same orientation, extract the lines
// for each orientation then return the concatention of these lines sorted by orientation.
tlOrient := make(map[int]TextList, len(tl))
for _, t := range tl {
tlOrient[t.Orient] = append(tlOrient[t.Orient], t)
}
lines := make([]Line, 0, 4)
for _, o := range []int{0, 90, 180, 270} {
var lines []Line
for _, o := range orientKeys(tlOrient) {
lines = append(lines, tlOrient[o].toLinesOrient()...)
}
return lines
}
// toLinesOrient returns the text and positions in `tl` as a slice of Line.
// NOTE: Caller must sort the text list top-to-bottom, left-to-write before calling this function.
// NOTE: This function only works on text lists where all text is the same orientation so it should
// only be called from toLines.
// Caller must sort the text list top-to-bottom, left-to-write (for orientation adjusted so
// that text is horizontal) before calling this function.
func (tl TextList) toLinesOrient() []Line {
tl.printTexts("toLines: before")
if len(tl) == 0 {
@ -827,13 +858,14 @@ func (tl TextList) toLinesOrient() []Line {
averageCharWidth := exponAve{}
wordSpacing := exponAve{}
lastEndX := 0.0 // tl[i-1].End.X
lastEndX := 0.0 // lastEndX is tl[i-1].OrientedEnd.X
for _, t := range tl {
if t.OrientedStart.Y < y {
if len(words) > 0 {
line := newLine(y, x, words)
if averageCharWidth.running {
line = combineDiacritics(line, averageCharWidth.ave)
line = removeDuplicates(line, averageCharWidth.ave)
}
lines = append(lines, line)
@ -893,6 +925,24 @@ func (tl TextList) toLinesOrient() []Line {
return lines
}
// orientKeys returns the keys of `tlOrient` as a sorted slice.
func orientKeys(tlOrient map[int]TextList) []int {
keys := []int{}
for k := range tlOrient {
keys = append(keys, k)
}
sort.Ints(keys)
return keys
}
// min returns the lesser of `a` and `b`.
func min(a, b float64) float64 {
if a < b {
return a
}
return b
}
// exponAve implements an exponential average.
type exponAve struct {
ave float64 // Current average value.
@ -974,6 +1024,144 @@ func removeDuplicates(line Line, charWidth float64) Line {
return Line{Y: line.Y, Dx: dxList, Text: strings.Join(words, ""), Words: words}
}
// combineDiacritics returns `line` with diacritics close to characters combined with the characters.
// `charWidth` is the average character width for the line.
// We have to do this because PDF can render diacritics separately to the characters they attach to
// in extracted text.
func combineDiacritics(line Line, charWidth float64) Line {
if len(line.Dx) == 0 {
return line
}
tol := charWidth * 0.2
common.Log.Trace("combineDiacritics: charWidth=%.2f tol=%.2f", charWidth, tol)
words := []string{}
dxList := []float64{}
w := line.Words[0]
w, c := countDiacritic(w)
delta := 0.0
dx0 := 0.0
parts := []string{w}
numChars := c
for i := 0; i < len(line.Dx); i++ {
w = line.Words[i+1]
w, c := countDiacritic(w)
dx := line.Dx[i]
if numChars+c <= 1 && delta+dx <= tol {
if len(parts) == 0 {
dx0 = dx
} else {
delta += dx
}
parts = append(parts, w)
numChars += c
} else {
if len(parts) > 0 {
if len(words) > 0 {
dxList = append(dxList, dx0)
}
words = append(words, combine(parts))
}
parts = []string{w}
numChars = c
dx0 = dx
delta = 0.0
}
}
if len(parts) > 0 {
if len(words) > 0 {
dxList = append(dxList, dx0)
}
words = append(words, combine(parts))
}
if len(words) != len(dxList)+1 {
common.Log.Error("Inconsistent: \nwords=%d %q\ndxList=%d %.2f",
len(words), words, len(dxList), dxList)
return line
}
return Line{Y: line.Y, Dx: dxList, Text: strings.Join(words, ""), Words: words}
}
// combine combines any diacritics in `parts` with the single non-diacritic character in `parts`.
func combine(parts []string) string {
if len(parts) == 1 {
// Must be a non-diacritic.
return parts[0]
}
// We need to put the diacritics before the non-diacritic for NFKC normalization to work.
diacritic := map[string]bool{}
for _, w := range parts {
r := []rune(w)[0]
diacritic[w] = unicode.Is(unicode.Mn, r) || unicode.Is(unicode.Sk, r)
}
sort.SliceStable(parts, func(i, j int) bool { return !diacritic[parts[i]] && diacritic[parts[j]] })
// Construct the NFKC-normalized concatenation of the diacritics and the non-diacritic.
for i, w := range parts {
parts[i] = strings.TrimSpace(norm.NFKC.String(w))
}
return strings.Join(parts, "")
}
// countDiacritic returns the combining diacritic version of `w` (usually itself) and the number of
// non-diacritics in `w` (0 or 1)
func countDiacritic(w string) (string, int) {
runes := []rune(w)
if len(runes) != 1 {
return w, 1
}
r := runes[0]
c := 1
if unicode.Is(unicode.Mn, r) || unicode.Is(unicode.Sk, r) {
c = 0
}
if w2, ok := diacritics[r]; ok {
c = 0
w = w2
}
return w, c
}
// diacritics is a map of diacritic characters that are not classified as unicode.Mn or unicode.Sk
// and the corresponding unicode.Mn or unicode.Sk characters. This map was copied from PdfBox.
var diacritics = map[rune]string{
0x0060: "\u0300",
0x02CB: "\u0300",
0x0027: "\u0301",
0x02B9: "\u0301",
0x02CA: "\u0301",
0x005e: "\u0302",
0x02C6: "\u0302",
0x007E: "\u0303",
0x02C9: "\u0304",
0x00B0: "\u030A",
0x02BA: "\u030B",
0x02C7: "\u030C",
0x02C8: "\u030D",
0x0022: "\u030E",
0x02BB: "\u0312",
0x02BC: "\u0313",
0x0486: "\u0313",
0x055A: "\u0313",
0x02BD: "\u0314",
0x0485: "\u0314",
0x0559: "\u0314",
0x02D4: "\u031D",
0x02D5: "\u031E",
0x02D6: "\u031F",
0x02D7: "\u0320",
0x02B2: "\u0321",
0x02CC: "\u0329",
0x02B7: "\u032B",
0x02CD: "\u0331",
0x005F: "\u0332",
0x204E: "\u0359",
}
// getCurrentFont returns the font on top of the font stack, or DefaultFont if the font stack is
// empty.
func (to *textObject) getCurrentFont() *model.PdfFont {

View File

@ -16,6 +16,7 @@ import (
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/model"
"golang.org/x/text/unicode/norm"
)
// NOTE: We do a best effort at finding the PDF file because we don't keep PDF test files in this repo so you
@ -196,8 +197,10 @@ var fileExtractionTests = []struct {
},
{filename: "Ito_Formula.pdf",
expectedPageText: map[int][]string{
// 1: []string{"In the Itô stochastic calculus"},
1: []string{"In standard, non-stochastic calculus, one computes a derivative"},
1: []string{
"In the Itô stochastic calculus",
"In standard, non-stochastic calculus, one computes a derivative"},
2: []string{"Financial Economics Itôs Formula"},
},
},
{filename: "circ2.pdf",
@ -215,6 +218,11 @@ var fileExtractionTests = []struct {
1: []string{"entropy of a system of n identical resonators in a stationary radiation field"},
},
},
{filename: "thanh.pdf",
expectedPageText: map[int][]string{
1: []string{"Hàn Thé̂ Thành"},
},
},
}
// testExtractFile tests the ExtractTextWithStats text extractor on `filename` and compares the extracted
@ -244,6 +252,7 @@ func testExtractFile(t *testing.T, filename string, expectedPageText map[int][]s
if !ok {
t.Fatalf("%q doesn't have page %d", filename, pageNum)
}
actualText = norm.NFKC.String(actualText)
if !containsSentences(t, expectedSentences, actualText) {
t.Fatalf("Text mismatch filepath=%q page=%d", filepath, pageNum)
}
@ -291,8 +300,9 @@ func extractPageTexts(t *testing.T, filename string) (int, map[int]string) {
// containsSentences returns true if all strings `expectedSentences` are contained in `actualText`.
func containsSentences(t *testing.T, expectedSentences []string, actualText string) bool {
for _, e := range expectedSentences {
e = norm.NFKC.String(e)
if !strings.Contains(actualText, e) {
t.Errorf("No match for %#q", e)
t.Errorf("No match for %q", e)
return false
}
}

View File

@ -56,7 +56,7 @@ func (font PdfFont) GetFontDescriptor() (*PdfFontDescriptor, error) {
case *pdfCIDFontType2:
return t.fontDescriptor, nil
}
common.Log.Debug("ERROR: Cannot get font descriptor for font type %t (%s)", font, font)
common.Log.Debug("ERROR: Cannot get font descriptor for font type %T (%s)", font, font)
return nil, errors.New("font descriptor not found")
}
@ -67,7 +67,6 @@ func (font PdfFont) String() string {
enc = font.context.Encoder().String()
}
return fmt.Sprintf("FONT{%T %s %s}", font.context, font.baseFields().coreString(), enc)
}
// BaseFont returns the font's "BaseFont" field.
@ -214,7 +213,7 @@ func NewStandard14FontWithEncoding(basefont Standard14Font, alphabet map[rune]in
return &PdfFont{context: &std}, encoder, nil
}
// GetAlphabet returns a map of the runes in `text`.
// GetAlphabet returns a map of the runes in `text` and their frequencies.
func GetAlphabet(text string) map[rune]int {
alphabet := map[rune]int{}
for _, r := range text {
@ -477,7 +476,7 @@ func (font PdfFont) ToPdfObject() core.PdfObject {
if t := font.actualFont(); t != nil {
return t.ToPdfObject()
}
common.Log.Debug("ERROR: ToPdfObject Not implemented for font type=%#T. Returning null object",
common.Log.Debug("ERROR: ToPdfObject Not implemented for font type=%#T. Returning null object.",
font.context)
return core.MakeNull()
}
@ -560,7 +559,7 @@ func (font PdfFont) actualFont() fonts.Font {
case *pdfCIDFontType2:
return t
default:
common.Log.Debug("ERROR: actualFont. Unknown font type %t. font=%s", t, font)
common.Log.Debug("ERROR: actualFont. Unknown font type %T. font=%s", t, font)
return nil
}
}
@ -581,7 +580,7 @@ func (font PdfFont) baseFields() *fontCommon {
case *pdfCIDFontType2:
return t.baseFields()
default:
common.Log.Debug("ERROR: base. Unknown font type %t. font=%s", t, font.String())
common.Log.Debug("ERROR: base. Unknown font type %T. font=%s", t, font.String())
return nil
}
}

View File

@ -131,7 +131,6 @@ func (font pdfFontSimple) GetGlyphCharMetrics(glyph string) (fonts.CharMetrics,
// returned to indicate whether or not the entry was found in the glyph to charcode mapping.
func (font pdfFontSimple) GetCharMetrics(code uint16) (fonts.CharMetrics, bool) {
if width, ok := font.charWidths[code]; ok {
common.Log.Debug("GetCharMetrics 1: code=%d width=%.1f font=%s", code, width, font)
return fonts.CharMetrics{Wx: width}, true
}
if isBuiltin(Standard14Font(font.basefont)) {
@ -142,12 +141,11 @@ func (font pdfFontSimple) GetCharMetrics(code uint16) (fonts.CharMetrics, bool)
if glyph, ok := font.encoder.CharcodeToGlyph(code); ok {
if metrics, ok := font.fontMetrics[glyph]; ok {
font.charWidths[code] = metrics.Wx
common.Log.Debug("GetCharMetrics 2: code=%d glyph=%q width=%.1f", code, glyph, metrics.Wx)
return metrics, true
}
}
}
common.Log.Debug("GetCharMetrics 3: code=%d font=%s", code, font)
common.Log.Debug("GetCharMetrics: No match for code=%d font=%s", code, font)
return fonts.CharMetrics{}, false
}