Fixed landscape rotation for text extraction.

Also compute metrics for standard 14 fonts when not created from dict.
This commit is contained in:
Peter Williams 2018-11-19 16:50:28 +11:00
parent ea8a26a7dc
commit 2f8b50af75
2 changed files with 20 additions and 13 deletions

View File

@ -805,8 +805,8 @@ func (tl *TextList) SortPosition() {
xi, xj := ti.X, tj.X xi, xj := ti.X, tj.X
yi, yj := ti.Y, tj.Y yi, yj := ti.Y, tj.Y
if ti.Orient == contentstream.OrientationLandscape { if ti.Orient == contentstream.OrientationLandscape {
xi, yi = yi, xi xi, yi = yi, -xi
xj, yj = yj, xj xj, yj = yj, -xj
} }
if yi != yj { if yi != yj {
@ -833,17 +833,21 @@ func (tl *TextList) toLines() []Line {
if t.Orient == contentstream.OrientationPortrait { if t.Orient == contentstream.OrientationPortrait {
portText = append(portText, t) portText = append(portText, t)
} else { } else {
t.X, t.Y = t.Y, t.X t.X, t.Y = t.Y, -t.X
t.End.X, t.End.Y = t.End.Y, -t.End.X
t.Orient = contentstream.OrientationPortrait
landText = append(landText, t) landText = append(landText, t)
} }
} }
common.Log.Debug("toLines: portrait ^^^^^^^")
portLines := portText.toLinesOrient() portLines := portText.toLinesOrient()
common.Log.Debug("toLines: landscape &&&&&&&")
landLines := landText.toLinesOrient() landLines := landText.toLinesOrient()
common.Log.Debug("portText=%d landText=%d", len(portText), len(landText)) common.Log.Debug("portText=%d landText=%d", len(portText), len(landText))
return append(portLines, landLines...) return append(portLines, landLines...)
} }
// toLinesOrient return the text and positions in `tl` as a slice of Line. // toLinesOrient returns the text and positions in `tl` as a slice of Line.
// NOTE: Caller must sort the text list top-to-bottom, left-to-write before calling this function. // NOTE: Caller must sort the text list top-to-bottom, left-to-write before calling this function.
func (tl *TextList) toLinesOrient() []Line { func (tl *TextList) toLinesOrient() []Line {
tl.printTexts("toLines: before") tl.printTexts("toLines: before")

View File

@ -115,12 +115,8 @@ func DefaultFont() *PdfFont {
// NewStandard14Font returns the standard 14 font named `basefont` as a *PdfFont, or an error if it // NewStandard14Font returns the standard 14 font named `basefont` as a *PdfFont, or an error if it
// `basefont` is not one of the standard 14 font names. // `basefont` is not one of the standard 14 font names.
func NewStandard14Font(basefont Standard14Font) (*PdfFont, error) { func NewStandard14Font(basefont Standard14Font) (*PdfFont, error) {
std, ok := loadStandard14Font(basefont) font, _, err := NewStandard14FontWithEncoding(basefont, nil)
if !ok { return font, err
common.Log.Debug("ERROR: Invalid standard 14 font name %#q", basefont)
return nil, ErrFontNotSupported
}
return &PdfFont{context: &std}, nil
} }
// NewStandard14FontMustCompile returns the standard 14 font named `basefont` as a *PdfFont. // NewStandard14FontMustCompile returns the standard 14 font named `basefont` as a *PdfFont.
@ -137,7 +133,8 @@ func NewStandard14FontMustCompile(basefont Standard14Font) *PdfFont {
// NewStandard14FontWithEncoding returns the standard 14 font named `basefont` as a *PdfFont and // NewStandard14FontWithEncoding returns the standard 14 font named `basefont` as a *PdfFont and
// a SimpleEncoder that encodes all the runes in `alphabet`, or an error if this is not possible. // a SimpleEncoder that encodes all the runes in `alphabet`, or an error if this is not possible.
// An error can occur if`basefont` is not one the standard 14 font names. // An error can occur if`basefont` is not one the standard 14 font names.
func NewStandard14FontWithEncoding(basefont Standard14Font, alphabet map[rune]int) (*PdfFont, *textencoding.SimpleEncoder, error) { func NewStandard14FontWithEncoding(basefont Standard14Font, alphabet map[rune]int) (*PdfFont,
*textencoding.SimpleEncoder, error) {
baseEncoder := "MacRomanEncoding" baseEncoder := "MacRomanEncoding"
common.Log.Trace("NewStandard14FontWithEncoding: basefont=%#q baseEncoder=%#q alphabet=%q", common.Log.Trace("NewStandard14FontWithEncoding: basefont=%#q baseEncoder=%#q alphabet=%q",
basefont, baseEncoder, string(sortedAlphabet(alphabet))) basefont, baseEncoder, string(sortedAlphabet(alphabet)))
@ -201,9 +198,15 @@ func NewStandard14FontWithEncoding(basefont Standard14Font, alphabet map[rune]in
slotIdx++ slotIdx++
} }
} }
encoder, err = textencoding.NewSimpleTextEncoder(baseEncoder, differences)
return &PdfFont{context: &std}, encoder, err encoder, err = textencoding.NewSimpleTextEncoder(baseEncoder, differences)
if err != nil {
return nil, nil, err
}
std.std14Encoder = encoder
std.updateStandard14Font()
return &PdfFont{context: &std}, encoder, nil
} }
// GetAlphabet returns a map of the runes in `text`. // GetAlphabet returns a map of the runes in `text`.