mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-29 13:48:54 +08:00
Fixes for text extraction corpus testing.
- Correct matrix multiplication order in text.go - Look up standard 14 font widths after applying custom encoding.
This commit is contained in:
parent
851aa267b1
commit
a9019a50a3
BIN
pdf/extractor/testdata/000026.pdf
vendored
Normal file
BIN
pdf/extractor/testdata/000026.pdf
vendored
Normal file
Binary file not shown.
@ -266,7 +266,7 @@ func (e *Extractor) ExtractXYText() (*TextList, int, int, error) {
|
||||
|
||||
err = processor.Process(e.resources)
|
||||
if err != nil {
|
||||
common.Log.Error("ERROR: Processing: err=%v", err)
|
||||
common.Log.Debug("ERROR: Processing: err=%v", err)
|
||||
}
|
||||
return textList, state.numChars, state.numMisses, err
|
||||
}
|
||||
@ -406,7 +406,7 @@ func (to *textObject) setTextRise(y float64) {
|
||||
|
||||
// setWordSpacing "Tw" Set word spacing.
|
||||
func (to *textObject) setWordSpacing(y float64) {
|
||||
// Not implemented yet
|
||||
to.State.Tw = y
|
||||
}
|
||||
|
||||
// setHorizScaling "Tz" Set horizontal scaling.
|
||||
@ -609,7 +609,10 @@ func (to *textObject) renderText(data []byte) error {
|
||||
0, tfs,
|
||||
0, state.Trise)
|
||||
|
||||
common.Log.Debug("%d codes=%+v runes=%q", len(charcodes), charcodes, runes)
|
||||
|
||||
for i, r := range runes {
|
||||
|
||||
code := charcodes[i]
|
||||
// The location of the text on the page in device coordinates is given by trm, the text
|
||||
// rendering matrix.
|
||||
@ -634,19 +637,38 @@ func (to *textObject) renderText(data []byte) error {
|
||||
c := Point{X: m.Wx * glyphTextRatio, Y: m.Wy * glyphTextRatio}
|
||||
|
||||
// t is the displacement of the text cursor when the character is rendered.
|
||||
// float tx = displacementX * fontSize * horizontalScaling;
|
||||
// w = 0
|
||||
t0 := Point{X: (c.X*tfs + w) * th}
|
||||
t := Point{X: (c.X*tfs + state.Tc + w) * th}
|
||||
|
||||
// td is t in matrix form.
|
||||
td0 := translationMatrix(t0)
|
||||
td := translationMatrix(t)
|
||||
|
||||
common.Log.Debug("%q stateMatrix=%s CTM=%s Tm=%s", r, stateMatrix, to.gs.CTM, to.Tm)
|
||||
common.Log.Debug("tfs=%.3f th=%.3f Tc=%.3f w=%.3f (Tw=%.3f)", tfs, th, state.Tc, w, state.Tw)
|
||||
common.Log.Debug("m=%s c=%+v t0=%+v td0=%s trm0=%s",
|
||||
m, c, t0, td0, td0.Mult(to.Tm).Mult(to.gs.CTM))
|
||||
common.Log.Debug("m=%s c=%+v t=%+v td=%s trm=%s",
|
||||
m, c, t, td, td.Mult(to.Tm).Mult(to.gs.CTM))
|
||||
|
||||
nextTm := to.Tm.Mult(td)
|
||||
|
||||
xyt := XYText{Text: string(r),
|
||||
Point: translation(trm),
|
||||
Orient: trm.Orientation(),
|
||||
End: translation(to.Tm.Mult(td).Mult(to.gs.CTM)),
|
||||
SpaceWidth: spaceWidth * trm.ScalingFactorX(),
|
||||
}
|
||||
// xyt := XYText{Text: string(r),
|
||||
// Point: translation(trm),
|
||||
// Orient: trm.Orientation(),
|
||||
// // Matrix nextTextRenderingMatrix = td.multiply(textMatrix).multiply(ctm); // text space -> device space
|
||||
// End: translation(td0.Mult(to.Tm).Mult(to.gs.CTM)),
|
||||
// SpaceWidth: spaceWidth * trm.ScalingFactorX(),
|
||||
// }
|
||||
xyt := newXYText(
|
||||
string(r),
|
||||
translation(trm),
|
||||
translation(td0.Mult(to.Tm).Mult(to.gs.CTM)),
|
||||
trm.Orientation(),
|
||||
spaceWidth*trm.ScalingFactorX())
|
||||
common.Log.Debug("i=%d code=%d, xyt=%s", i, code, xyt)
|
||||
to.Texts = append(to.Texts, xyt)
|
||||
|
||||
// update the text matrix by the displacement of the text location.
|
||||
@ -690,11 +712,26 @@ type XYText struct {
|
||||
SpaceWidth float64
|
||||
Font string
|
||||
FontSize float64
|
||||
counter int
|
||||
}
|
||||
|
||||
var counter int
|
||||
|
||||
func newXYText(text string, point, end Point, orient contentstream.Orientation, spaceWidth float64) XYText {
|
||||
counter++
|
||||
return XYText{
|
||||
Text: text,
|
||||
Point: point,
|
||||
End: end,
|
||||
Orient: orient,
|
||||
SpaceWidth: spaceWidth,
|
||||
counter: counter,
|
||||
}
|
||||
}
|
||||
|
||||
// String returns a string describing `t`.
|
||||
func (t XYText) String() string {
|
||||
return fmt.Sprintf("%s,%s %.1f %q",
|
||||
return fmt.Sprintf("@@%d %s,%s %.1f %q", t.counter,
|
||||
t.Point.String(), t.End.String(), t.End.X-t.X, truncate(t.Text, 100))
|
||||
}
|
||||
|
||||
@ -707,7 +744,7 @@ func (t XYText) Width() float64 {
|
||||
default:
|
||||
w = math.Abs(t.End.X - t.X)
|
||||
}
|
||||
common.Log.Trace(" Width %q (%s %s) -> %.1f", t.Text, t.Point.String(), t.End.String(), w)
|
||||
common.Log.Debug(" Width %q (%s %s) -> %.1f", t.Text, t.Point.String(), t.End.String(), w)
|
||||
return w
|
||||
}
|
||||
|
||||
@ -719,20 +756,20 @@ func (tl *TextList) Length() int {
|
||||
return len(*tl)
|
||||
}
|
||||
|
||||
// AppendText appends the location and contents of `text` to a text list.
|
||||
func (tl *TextList) AppendText(gs contentstream.GraphicsState, p, e Point, text string, spaceWidth float64) {
|
||||
t := XYText{
|
||||
Point: p,
|
||||
End: e,
|
||||
ColorStroking: gs.ColorStroking,
|
||||
ColorNonStroking: gs.ColorNonStroking,
|
||||
Orient: gs.PageOrientation(),
|
||||
Text: text,
|
||||
SpaceWidth: spaceWidth,
|
||||
}
|
||||
common.Log.Debug("AppendText: %s", t.String())
|
||||
*tl = append(*tl, t)
|
||||
}
|
||||
// // AppendText appends the location and contents of `text` to a text list.
|
||||
// func (tl *TextList) AppendText(gs contentstream.GraphicsState, p, e Point, text string, spaceWidth float64) {
|
||||
// t := XYText{
|
||||
// Point: p,
|
||||
// End: e,
|
||||
// ColorStroking: gs.ColorStroking,
|
||||
// ColorNonStroking: gs.ColorNonStroking,
|
||||
// Orient: gs.PageOrientation(),
|
||||
// Text: text,
|
||||
// SpaceWidth: spaceWidth,
|
||||
// }
|
||||
// common.Log.Debug("AppendText: %s", t.String())
|
||||
// *tl = append(*tl, t)
|
||||
// }
|
||||
|
||||
// ToText returns the contents of `tl` as a single string.
|
||||
func (tl *TextList) ToText() string {
|
||||
@ -794,6 +831,7 @@ func (tl *TextList) toLines() []Line {
|
||||
}
|
||||
portLines := portText.toLinesOrient()
|
||||
landLines := landText.toLinesOrient()
|
||||
common.Log.Debug("portText=%d landText=%d", len(portText), len(landText))
|
||||
return append(portLines, landLines...)
|
||||
}
|
||||
|
||||
@ -816,6 +854,7 @@ func (tl *TextList) toLinesOrient() []Line {
|
||||
lastEndX := 0.0 // (*tl)[i-1).End.X
|
||||
|
||||
for _, t := range *tl {
|
||||
// common.Log.Debug("%d --------------------------", i)
|
||||
if t.Y < y {
|
||||
if len(words) > 0 {
|
||||
line := newLine(y, x, words)
|
||||
@ -846,12 +885,16 @@ func (tl *TextList) toLinesOrient() []Line {
|
||||
deltaCharWidth := averageCharWidth.ave * 0.3
|
||||
|
||||
isSpace := false
|
||||
nextWordX := lastEndX + min(deltaSpace, deltaCharWidth)
|
||||
if scanning && t.Text != " " {
|
||||
nextWordX := lastEndX + min(deltaSpace, deltaCharWidth)
|
||||
isSpace = nextWordX < t.X
|
||||
common.Log.Trace("[%.1f, %.1f] lastEndX=%.1f nextWordX=%.1f",
|
||||
t.Y, t.X, lastEndX, nextWordX)
|
||||
}
|
||||
common.Log.Debug("t=%s", t)
|
||||
common.Log.Debug("width=%.2f delta=%.2f deltaSpace=%.2g deltaCharWidth=%.2g",
|
||||
t.Width(), min(deltaSpace, deltaCharWidth), deltaSpace, deltaCharWidth)
|
||||
|
||||
common.Log.Debug("%+q [%.1f, %.1f] lastEndX=%.2f nextWordX=%.2f (%.2f) isSpace=%t",
|
||||
t.Text, t.X, t.Y, lastEndX, nextWordX, nextWordX-t.X, isSpace)
|
||||
if isSpace {
|
||||
words = append(words, " ")
|
||||
x = append(x, (lastEndX+t.X)*0.5)
|
||||
@ -862,6 +905,7 @@ func (tl *TextList) toLinesOrient() []Line {
|
||||
words = append(words, t.Text)
|
||||
x = append(x, t.X)
|
||||
scanning = true
|
||||
common.Log.Debug("lastEndX=%.2f", lastEndX)
|
||||
}
|
||||
if len(words) > 0 {
|
||||
line := newLine(y, x, words)
|
||||
@ -898,7 +942,7 @@ func (exp *ExponAve) update(x float64) float64 {
|
||||
return exp.ave
|
||||
}
|
||||
|
||||
// printTexts is a debugging function. XXX Remove this.
|
||||
// printTexts is a debugging function. XXX(peterwilliams97) Remove this.
|
||||
func (tl *TextList) printTexts(message string) {
|
||||
return
|
||||
_, file, line, ok := runtime.Caller(1)
|
||||
@ -910,17 +954,17 @@ func (tl *TextList) printTexts(message string) {
|
||||
}
|
||||
prefix := fmt.Sprintf("[%s:%d]", file, line)
|
||||
|
||||
common.Log.Error("=====================================")
|
||||
common.Log.Error("printTexts %s %s", prefix, message)
|
||||
common.Log.Error("%d texts", len(*tl))
|
||||
common.Log.Debug("=====================================")
|
||||
common.Log.Debug("printTexts %s %s", prefix, message)
|
||||
common.Log.Debug("%d texts", len(*tl))
|
||||
parts := []string{}
|
||||
for i, t := range *tl {
|
||||
fmt.Printf("%5d: %s\n", i, t.String())
|
||||
parts = append(parts, t.Text)
|
||||
}
|
||||
common.Log.Error("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
|
||||
common.Log.Debug("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
|
||||
fmt.Printf("%s\n", strings.Join(parts, ""))
|
||||
common.Log.Error("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^")
|
||||
common.Log.Debug("^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^")
|
||||
}
|
||||
|
||||
// newLine returns the Line representation of strings `words` with y coordinate `y` and x
|
||||
|
@ -17,7 +17,7 @@ import (
|
||||
)
|
||||
|
||||
func init() {
|
||||
common.SetLogger(common.NewConsoleLogger(common.LogLevelDebug))
|
||||
common.SetLogger(common.NewConsoleLogger(common.LogLevelError))
|
||||
if flag.Lookup("test.v") != nil {
|
||||
isTesting = true
|
||||
}
|
||||
@ -59,12 +59,20 @@ var extract2Tests = []struct {
|
||||
filename string
|
||||
expectedPageText map[int][]string
|
||||
}{
|
||||
{
|
||||
filename: "testdata/reader.pdf",
|
||||
{filename: "testdata/reader.pdf",
|
||||
expectedPageText: map[int][]string{
|
||||
1: []string{"A Research UNIX Reader:",
|
||||
"Annotated Excerpts from the Programmer's Manual,",
|
||||
"Annotated Excerpts from the Programmer’s Manual,",
|
||||
"1. Introduction",
|
||||
"To keep the size of this report",
|
||||
"last common ancestor of a radiative explosion",
|
||||
},
|
||||
},
|
||||
},
|
||||
{filename: "testdata/000026.pdf",
|
||||
expectedPageText: map[int][]string{
|
||||
1: []string{"Fresh Flower",
|
||||
"Care & Handling
",
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -85,10 +93,9 @@ func testExtract2(t *testing.T, filename string, expectedPageText map[int][]stri
|
||||
}
|
||||
|
||||
func containsSentences(t *testing.T, expectedSentences []string, actualText string) bool {
|
||||
actualSentences := asSet(strings.Split(actualText, "\n"))
|
||||
for _, e := range expectedSentences {
|
||||
if _, ok := actualSentences[e]; !ok {
|
||||
t.Errorf("No match for %q", e)
|
||||
if !strings.Contains(actualText, e) {
|
||||
t.Errorf("No match for %+q", e)
|
||||
return false
|
||||
}
|
||||
}
|
||||
@ -104,14 +111,6 @@ func sortedKeys(m map[int][]string) []int {
|
||||
return keys
|
||||
}
|
||||
|
||||
func asSet(keys []string) map[string]bool {
|
||||
set := map[string]bool{}
|
||||
for _, k := range keys {
|
||||
set[k] = true
|
||||
}
|
||||
return set
|
||||
}
|
||||
|
||||
func extractPageTexts(t *testing.T, filename string) (int, map[int]string) {
|
||||
f, err := os.Open(filename)
|
||||
if err != nil {
|
||||
|
@ -6,7 +6,7 @@
|
||||
package textencoding
|
||||
|
||||
// NewSymbolEncoder returns a SimpleEncoder that implements SymbolEncoding.
|
||||
func NewSymbolEncoder() SimpleEncoder {
|
||||
func NewSymbolEncoder() *SimpleEncoder {
|
||||
enc, _ := NewSimpleTextEncoder("SymbolEncoding", nil)
|
||||
return *enc
|
||||
return enc
|
||||
}
|
||||
|
@ -6,7 +6,7 @@
|
||||
package textencoding
|
||||
|
||||
// NewWinAnsiTextEncoder returns a SimpleEncoder that implements WinAnsiEncoding.
|
||||
func NewWinAnsiTextEncoder() SimpleEncoder {
|
||||
func NewWinAnsiTextEncoder() *SimpleEncoder {
|
||||
enc, _ := NewSimpleTextEncoder("WinAnsiEncoding", nil)
|
||||
return *enc
|
||||
return enc
|
||||
}
|
||||
|
@ -6,7 +6,7 @@
|
||||
package textencoding
|
||||
|
||||
// NewZapfDingbatsEncoder returns a SimpleEncoder that implements ZapfDingbatsEncoding.
|
||||
func NewZapfDingbatsEncoder() SimpleEncoder {
|
||||
func NewZapfDingbatsEncoder() *SimpleEncoder {
|
||||
enc, _ := NewSimpleTextEncoder("ZapfDingbatsEncoding", nil)
|
||||
return *enc
|
||||
return enc
|
||||
}
|
||||
|
@ -263,7 +263,9 @@ func newPdfFontFromPdfObject(fontObj core.PdfObject, allowType0 bool) (*PdfFont,
|
||||
font.context = type0font
|
||||
case "Type1", "Type3", "MMType1", "TrueType":
|
||||
var simplefont *pdfFontSimple
|
||||
if std, ok := loadStandard14Font(Standard14Font(base.basefont)); ok && base.subtype == "Type1" {
|
||||
std, ok := loadStandard14Font(Standard14Font(base.basefont))
|
||||
builtin := ok && base.subtype == "Type1"
|
||||
if builtin {
|
||||
font.context = &std
|
||||
|
||||
stdObj := core.TraceToDirectObject(std.ToPdfObject())
|
||||
@ -283,8 +285,6 @@ func newPdfFontFromPdfObject(fontObj core.PdfObject, allowType0 bool) (*PdfFont,
|
||||
return nil, err
|
||||
}
|
||||
|
||||
simplefont.firstChar = std.firstChar
|
||||
simplefont.lastChar = std.lastChar
|
||||
simplefont.charWidths = std.charWidths
|
||||
simplefont.fontMetrics = std.fontMetrics
|
||||
} else {
|
||||
@ -298,6 +298,19 @@ func newPdfFontFromPdfObject(fontObj core.PdfObject, allowType0 bool) (*PdfFont,
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if builtin {
|
||||
simplefont.updateStandard14Font()
|
||||
}
|
||||
if builtin && simplefont.encoder == nil && simplefont.std14Encoder == nil {
|
||||
common.Log.Error("simplefont=%s", simplefont)
|
||||
common.Log.Error("std=%s", std)
|
||||
panic("Not possible")
|
||||
}
|
||||
if len(simplefont.charWidths) == 0 {
|
||||
common.Log.Error("simplefont=%s", simplefont)
|
||||
common.Log.Error("std=%s", std)
|
||||
common.Log.Debug("ERROR: No widths. font=%s", simplefont)
|
||||
}
|
||||
font.context = simplefont
|
||||
case "CIDFontType0":
|
||||
cidfont, err := newPdfCIDFontType0FromPdfObject(d, base)
|
||||
@ -415,6 +428,7 @@ func (font PdfFont) BytesToCharcodes(data []byte) []uint16 {
|
||||
}
|
||||
|
||||
// CharcodesToUnicode converts the character codes `charcodes` to a slice of unicode strings.
|
||||
// XXX(peterwilliams97): Remove int returns.
|
||||
func (font PdfFont) CharcodesToUnicode(charcodes []uint16) ([]string, int, int) {
|
||||
charstrings := make([]string, 0, len(charcodes))
|
||||
numMisses := 0
|
||||
@ -426,7 +440,7 @@ func (font PdfFont) CharcodesToUnicode(charcodes []uint16) ([]string, int, int)
|
||||
continue
|
||||
}
|
||||
}
|
||||
// Fall back to encoding
|
||||
// Fall back to encoding.
|
||||
encoder := font.Encoder()
|
||||
if encoder != nil {
|
||||
r, ok := encoder.CharcodeToRune(code)
|
||||
|
@ -37,13 +37,12 @@ type pdfFontSimple struct {
|
||||
container *core.PdfIndirectObject
|
||||
|
||||
// These fields are specific to simple PDF fonts.
|
||||
firstChar int
|
||||
lastChar int
|
||||
charWidths []float64
|
||||
|
||||
charWidths map[uint16]float64
|
||||
// std14Encoder is the encoder specified by the /Encoding entry in the font dict.
|
||||
encoder textencoding.TextEncoder
|
||||
encoder *textencoding.SimpleEncoder
|
||||
// std14Encoder is used for Standard 14 fonts where no /Encoding is specified in the font dict.
|
||||
std14Encoder textencoding.TextEncoder
|
||||
std14Encoder *textencoding.SimpleEncoder
|
||||
|
||||
// std14Descriptor is used for Standard 14 fonts where no /FontDescriptor is specified in the font dict.
|
||||
std14Descriptor *PdfFontDescriptor
|
||||
@ -76,14 +75,26 @@ func (font *pdfFontSimple) Encoder() textencoding.TextEncoder {
|
||||
// Standard 14 fonts have builtin encoders that we fall back to when no /Encoding is specified
|
||||
// in the font dict.
|
||||
if font.encoder == nil {
|
||||
// Need to make font.Encoder()==nil test work for font.std14=Encoder=font.encoder=nil
|
||||
// See https://golang.org/doc/faq#nil_error
|
||||
if font.std14Encoder == nil {
|
||||
return nil
|
||||
}
|
||||
return font.std14Encoder
|
||||
}
|
||||
return font.encoder
|
||||
}
|
||||
|
||||
// SetEncoder sets the encoding for the underlying font.
|
||||
// XXX(peterwilliams97) Change function signature to SetEncoder(encoder *textencoding.SimpleEncoder).
|
||||
func (font *pdfFontSimple) SetEncoder(encoder textencoding.TextEncoder) {
|
||||
font.encoder = encoder
|
||||
simple, ok := encoder.(*textencoding.SimpleEncoder)
|
||||
if !ok {
|
||||
// This can't happen.
|
||||
common.Log.Error("pdfFontSimple.SetEncoder passedbad encoder type %T", encoder)
|
||||
simple = nil
|
||||
}
|
||||
font.encoder = simple
|
||||
}
|
||||
|
||||
// GetGlyphCharMetrics returns the character metrics for the specified glyph. A bool flag is
|
||||
@ -123,30 +134,21 @@ func (font pdfFontSimple) GetGlyphCharMetrics(glyph string) (fonts.CharMetrics,
|
||||
// GetCharMetrics returns the character metrics for the specified character code. A bool flag is
|
||||
// returned to indicate whether or not the entry was found in the glyph to charcode mapping.
|
||||
func (font pdfFontSimple) GetCharMetrics(code uint16) (fonts.CharMetrics, bool) {
|
||||
metrics := fonts.CharMetrics{}
|
||||
|
||||
if int(code) < font.firstChar {
|
||||
common.Log.Debug("Code lower than firstchar (%d < %d)", code, font.firstChar)
|
||||
return metrics, false
|
||||
if width, ok := font.charWidths[code]; ok {
|
||||
common.Log.Debug("GetCharMetrics 1: code=%d width=%.1f font=%s", code, width, font)
|
||||
return fonts.CharMetrics{Wx: width}, true
|
||||
}
|
||||
|
||||
if int(code) > font.lastChar {
|
||||
common.Log.Debug("ERROR: Code higher than lastchar (%d > %d) %s",
|
||||
code, font.lastChar, font)
|
||||
return metrics, false
|
||||
if font.encoder != nil {
|
||||
if glyph, ok := font.encoder.CharcodeToGlyph(code); ok {
|
||||
if metrics, ok := font.fontMetrics[glyph]; ok {
|
||||
font.charWidths[code] = metrics.Wx
|
||||
common.Log.Debug("GetCharMetrics 2: code=%d glyph=%q width=%.1f", code, glyph, metrics.Wx)
|
||||
return metrics, true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
index := int(code) - font.firstChar
|
||||
if index >= len(font.charWidths) {
|
||||
common.Log.Debug("ERROR: Code outside of widths range (%d > %d) code=%d [%d %d] font=%s",
|
||||
index, len(font.charWidths), code, font.firstChar, font.lastChar, font.String())
|
||||
return metrics, false
|
||||
}
|
||||
|
||||
width := font.charWidths[index]
|
||||
metrics.Wx = width
|
||||
|
||||
return metrics, true
|
||||
common.Log.Debug("GetCharMetrics 3: code=%d", code)
|
||||
return fonts.CharMetrics{}, false
|
||||
}
|
||||
|
||||
// GetAverageCharWidth returns the average width of all the characters in `font`.
|
||||
@ -170,7 +172,7 @@ func (font pdfFontSimple) GetAverageCharWidth() float64 {
|
||||
// • The value of BaseFont is derived differently.
|
||||
//
|
||||
func newSimpleFontFromPdfObject(d *core.PdfObjectDictionary, base *fontCommon,
|
||||
std14Encoder textencoding.TextEncoder) (*pdfFontSimple, error) {
|
||||
std14Encoder *textencoding.SimpleEncoder) (*pdfFontSimple, error) {
|
||||
font := pdfFontSimpleFromSkeleton(base)
|
||||
font.std14Encoder = std14Encoder
|
||||
|
||||
@ -187,7 +189,7 @@ func newSimpleFontFromPdfObject(d *core.PdfObjectDictionary, base *fontCommon,
|
||||
common.Log.Debug("ERROR: Invalid FirstChar type (%T)", obj)
|
||||
return nil, core.ErrTypeError
|
||||
}
|
||||
font.firstChar = int(intVal)
|
||||
firstChar := int(intVal)
|
||||
|
||||
obj = d.Get("LastChar")
|
||||
if obj == nil {
|
||||
@ -199,9 +201,9 @@ func newSimpleFontFromPdfObject(d *core.PdfObjectDictionary, base *fontCommon,
|
||||
common.Log.Debug("ERROR: Invalid LastChar type (%T)", obj)
|
||||
return nil, core.ErrTypeError
|
||||
}
|
||||
font.lastChar = int(intVal)
|
||||
lastChar := int(intVal)
|
||||
|
||||
font.charWidths = []float64{}
|
||||
font.charWidths = map[uint16]float64{}
|
||||
obj = d.Get("Widths")
|
||||
if obj != nil {
|
||||
font.Widths = obj
|
||||
@ -218,17 +220,16 @@ func newSimpleFontFromPdfObject(d *core.PdfObjectDictionary, base *fontCommon,
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(widths) != (font.lastChar - font.firstChar + 1) {
|
||||
if len(widths) != (lastChar - firstChar + 1) {
|
||||
common.Log.Debug("ERROR: Invalid widths length != %d (%d)",
|
||||
font.lastChar-font.firstChar+1, len(widths))
|
||||
lastChar-firstChar+1, len(widths))
|
||||
return nil, core.ErrRangeError
|
||||
}
|
||||
font.charWidths = widths
|
||||
for i, w := range widths {
|
||||
font.charWidths[uint16(firstChar+i)] = w
|
||||
}
|
||||
}
|
||||
}
|
||||
if font.lastChar > 0 && len(font.charWidths) == 0 {
|
||||
common.Log.Debug("ERROR: No widths. font=%s", font)
|
||||
}
|
||||
|
||||
font.Encoding = core.TraceToDirectObject(d.Get("Encoding"))
|
||||
return font, nil
|
||||
@ -250,9 +251,8 @@ func (font *pdfFontSimple) addEncoding() error {
|
||||
return err
|
||||
}
|
||||
base := font.baseFields()
|
||||
common.Log.Trace("addEncoding: BaseFont=%q Subtype=%q Encoding=%s (%T)", base.basefont,
|
||||
base.subtype, font.Encoding, font.Encoding)
|
||||
|
||||
common.Log.Trace("addEncoding: BaseFont=%q Subtype=%q Encoding=%s (%T) differences=%d %+v",
|
||||
base.basefont, base.subtype, font.Encoding, font.Encoding, len(differences), differences)
|
||||
encoder, err = textencoding.NewSimpleTextEncoder(baseEncoder, differences)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -378,6 +378,7 @@ func NewPdfFontFromTTFFile(filePath string) (*PdfFont, error) {
|
||||
}
|
||||
|
||||
truefont := &pdfFontSimple{
|
||||
charWidths: map[uint16]float64{},
|
||||
fontCommon: fontCommon{
|
||||
subtype: "TrueType",
|
||||
},
|
||||
@ -387,8 +388,6 @@ func NewPdfFontFromTTFFile(filePath string) (*PdfFont, error) {
|
||||
// then can derive
|
||||
// TODO: Subsetting fonts.
|
||||
truefont.encoder = textencoding.NewWinAnsiTextEncoder()
|
||||
truefont.firstChar = minCode
|
||||
truefont.lastChar = maxCode
|
||||
|
||||
truefont.basefont = ttf.PostScriptName
|
||||
truefont.FirstChar = core.MakeInteger(minCode)
|
||||
@ -424,12 +423,15 @@ func NewPdfFontFromTTFFile(filePath string) (*PdfFont, error) {
|
||||
|
||||
truefont.Widths = core.MakeIndirectObject(core.MakeArrayFromFloats(vals))
|
||||
|
||||
if len(vals) < (255 - 32 + 1) {
|
||||
if len(vals) < maxCode-minCode+1 {
|
||||
common.Log.Debug("ERROR: Invalid length of widths, %d < %d", len(vals), 255-32+1)
|
||||
return nil, core.ErrRangeError
|
||||
}
|
||||
|
||||
truefont.charWidths = vals[:255-32+1]
|
||||
// truefont.charWidths = vals[:maxCode-minCode+1]
|
||||
for i := uint16(minCode); i <= maxCode; i++ {
|
||||
truefont.charWidths[i] = vals[i-minCode]
|
||||
}
|
||||
|
||||
// Use WinAnsiEncoding by default.
|
||||
truefont.Encoding = core.MakeName("WinAnsiEncoding")
|
||||
@ -511,31 +513,38 @@ func loadStandard14Font(baseFont Standard14Font) (pdfFontSimple, bool) {
|
||||
if !ok {
|
||||
return pdfFontSimple{}, false
|
||||
}
|
||||
|
||||
descriptor := builtinDescriptor(string(baseFont))
|
||||
if descriptor == nil {
|
||||
return pdfFontSimple{}, false
|
||||
}
|
||||
|
||||
std.std14Descriptor = descriptor
|
||||
se, ok := std.std14Encoder.(textencoding.SimpleEncoder)
|
||||
if !ok {
|
||||
common.Log.Debug("ERROR: Wrong encoder type: %T", std.std14Encoder)
|
||||
}
|
||||
codes := []int{}
|
||||
for c := range se.CodeToGlyph {
|
||||
codes = append(codes, int(c))
|
||||
}
|
||||
sort.Ints(codes)
|
||||
std.firstChar = codes[0]
|
||||
std.lastChar = codes[len(codes)-1]
|
||||
std.charWidths = make([]float64, len(codes))
|
||||
for i, code := range codes {
|
||||
glyph := se.CodeToGlyph[uint16(code)]
|
||||
std.charWidths[i] = std.fontMetrics[glyph].Wx
|
||||
}
|
||||
|
||||
return std, true
|
||||
}
|
||||
|
||||
func (font *pdfFontSimple) updateStandard14Font() {
|
||||
se, ok := font.Encoder().(*textencoding.SimpleEncoder)
|
||||
if !ok {
|
||||
// This can't happen.
|
||||
common.Log.Error("Wrong encoder type: %T. font=%s.", font.Encoder(), font)
|
||||
return
|
||||
}
|
||||
|
||||
codes := []uint16{}
|
||||
for c := range se.CodeToGlyph {
|
||||
codes = append(codes, c)
|
||||
}
|
||||
sort.Slice(codes, func(i, j int) bool { return codes[i] < codes[j] })
|
||||
|
||||
font.charWidths = map[uint16]float64{}
|
||||
for _, code := range codes {
|
||||
glyph := se.CodeToGlyph[uint16(code)]
|
||||
font.charWidths[code] = font.fontMetrics[glyph].Wx
|
||||
}
|
||||
}
|
||||
|
||||
var standard14Fonts = map[Standard14Font]pdfFontSimple{
|
||||
Courier: pdfFontSimple{
|
||||
fontCommon: fontCommon{
|
||||
|
Loading…
x
Reference in New Issue
Block a user