Fixed landscape rotation for text extraction.

Also compute metrics for standard 14 fonts when not created from dict.
This commit is contained in:
Peter Williams 2018-11-19 16:50:28 +11:00
parent ea8a26a7dc
commit 2f8b50af75
2 changed files with 20 additions and 13 deletions

View File

@ -805,8 +805,8 @@ func (tl *TextList) SortPosition() {
xi, xj := ti.X, tj.X
yi, yj := ti.Y, tj.Y
if ti.Orient == contentstream.OrientationLandscape {
xi, yi = yi, xi
xj, yj = yj, xj
xi, yi = yi, -xi
xj, yj = yj, -xj
}
if yi != yj {
@ -833,17 +833,21 @@ func (tl *TextList) toLines() []Line {
if t.Orient == contentstream.OrientationPortrait {
portText = append(portText, t)
} else {
t.X, t.Y = t.Y, t.X
t.X, t.Y = t.Y, -t.X
t.End.X, t.End.Y = t.End.Y, -t.End.X
t.Orient = contentstream.OrientationPortrait
landText = append(landText, t)
}
}
common.Log.Debug("toLines: portrait ^^^^^^^")
portLines := portText.toLinesOrient()
common.Log.Debug("toLines: landscape &&&&&&&")
landLines := landText.toLinesOrient()
common.Log.Debug("portText=%d landText=%d", len(portText), len(landText))
return append(portLines, landLines...)
}
// toLinesOrient return the text and positions in `tl` as a slice of Line.
// toLinesOrient returns the text and positions in `tl` as a slice of Line.
// NOTE: Caller must sort the text list top-to-bottom, left-to-write before calling this function.
func (tl *TextList) toLinesOrient() []Line {
tl.printTexts("toLines: before")

View File

@ -115,12 +115,8 @@ func DefaultFont() *PdfFont {
// NewStandard14Font returns the standard 14 font named `basefont` as a *PdfFont, or an error if it
// `basefont` is not one of the standard 14 font names.
func NewStandard14Font(basefont Standard14Font) (*PdfFont, error) {
std, ok := loadStandard14Font(basefont)
if !ok {
common.Log.Debug("ERROR: Invalid standard 14 font name %#q", basefont)
return nil, ErrFontNotSupported
}
return &PdfFont{context: &std}, nil
font, _, err := NewStandard14FontWithEncoding(basefont, nil)
return font, err
}
// NewStandard14FontMustCompile returns the standard 14 font named `basefont` as a *PdfFont.
@ -137,7 +133,8 @@ func NewStandard14FontMustCompile(basefont Standard14Font) *PdfFont {
// NewStandard14FontWithEncoding returns the standard 14 font named `basefont` as a *PdfFont and
// a SimpleEncoder that encodes all the runes in `alphabet`, or an error if this is not possible.
// An error can occur if`basefont` is not one the standard 14 font names.
func NewStandard14FontWithEncoding(basefont Standard14Font, alphabet map[rune]int) (*PdfFont, *textencoding.SimpleEncoder, error) {
func NewStandard14FontWithEncoding(basefont Standard14Font, alphabet map[rune]int) (*PdfFont,
*textencoding.SimpleEncoder, error) {
baseEncoder := "MacRomanEncoding"
common.Log.Trace("NewStandard14FontWithEncoding: basefont=%#q baseEncoder=%#q alphabet=%q",
basefont, baseEncoder, string(sortedAlphabet(alphabet)))
@ -201,9 +198,15 @@ func NewStandard14FontWithEncoding(basefont Standard14Font, alphabet map[rune]in
slotIdx++
}
}
encoder, err = textencoding.NewSimpleTextEncoder(baseEncoder, differences)
return &PdfFont{context: &std}, encoder, err
encoder, err = textencoding.NewSimpleTextEncoder(baseEncoder, differences)
if err != nil {
return nil, nil, err
}
std.std14Encoder = encoder
std.updateStandard14Font()
return &PdfFont{context: &std}, encoder, nil
}
// GetAlphabet returns a map of the runes in `text`.