From 2f8b50af758cf09052b4eb1de556e956a19090cd Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Mon, 19 Nov 2018 16:50:28 +1100 Subject: [PATCH] Fixed landscape rotation for text extraction. Also compute metrics for standard 14 fonts when not created from dict. --- pdf/extractor/text.go | 12 ++++++++---- pdf/model/font.go | 21 ++++++++++++--------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/pdf/extractor/text.go b/pdf/extractor/text.go index e1694861..eb4a93a4 100644 --- a/pdf/extractor/text.go +++ b/pdf/extractor/text.go @@ -805,8 +805,8 @@ func (tl *TextList) SortPosition() { xi, xj := ti.X, tj.X yi, yj := ti.Y, tj.Y if ti.Orient == contentstream.OrientationLandscape { - xi, yi = yi, xi - xj, yj = yj, xj + xi, yi = yi, -xi + xj, yj = yj, -xj } if yi != yj { @@ -833,17 +833,21 @@ func (tl *TextList) toLines() []Line { if t.Orient == contentstream.OrientationPortrait { portText = append(portText, t) } else { - t.X, t.Y = t.Y, t.X + t.X, t.Y = t.Y, -t.X + t.End.X, t.End.Y = t.End.Y, -t.End.X + t.Orient = contentstream.OrientationPortrait landText = append(landText, t) } } + common.Log.Debug("toLines: portrait ^^^^^^^") portLines := portText.toLinesOrient() + common.Log.Debug("toLines: landscape &&&&&&&") landLines := landText.toLinesOrient() common.Log.Debug("portText=%d landText=%d", len(portText), len(landText)) return append(portLines, landLines...) } -// toLinesOrient return the text and positions in `tl` as a slice of Line. +// toLinesOrient returns the text and positions in `tl` as a slice of Line. // NOTE: Caller must sort the text list top-to-bottom, left-to-write before calling this function. func (tl *TextList) toLinesOrient() []Line { tl.printTexts("toLines: before") diff --git a/pdf/model/font.go b/pdf/model/font.go index c57425fa..0af4fe99 100644 --- a/pdf/model/font.go +++ b/pdf/model/font.go @@ -115,12 +115,8 @@ func DefaultFont() *PdfFont { // NewStandard14Font returns the standard 14 font named `basefont` as a *PdfFont, or an error if it // `basefont` is not one of the standard 14 font names. func NewStandard14Font(basefont Standard14Font) (*PdfFont, error) { - std, ok := loadStandard14Font(basefont) - if !ok { - common.Log.Debug("ERROR: Invalid standard 14 font name %#q", basefont) - return nil, ErrFontNotSupported - } - return &PdfFont{context: &std}, nil + font, _, err := NewStandard14FontWithEncoding(basefont, nil) + return font, err } // NewStandard14FontMustCompile returns the standard 14 font named `basefont` as a *PdfFont. @@ -137,7 +133,8 @@ func NewStandard14FontMustCompile(basefont Standard14Font) *PdfFont { // NewStandard14FontWithEncoding returns the standard 14 font named `basefont` as a *PdfFont and // a SimpleEncoder that encodes all the runes in `alphabet`, or an error if this is not possible. // An error can occur if`basefont` is not one the standard 14 font names. -func NewStandard14FontWithEncoding(basefont Standard14Font, alphabet map[rune]int) (*PdfFont, *textencoding.SimpleEncoder, error) { +func NewStandard14FontWithEncoding(basefont Standard14Font, alphabet map[rune]int) (*PdfFont, + *textencoding.SimpleEncoder, error) { baseEncoder := "MacRomanEncoding" common.Log.Trace("NewStandard14FontWithEncoding: basefont=%#q baseEncoder=%#q alphabet=%q", basefont, baseEncoder, string(sortedAlphabet(alphabet))) @@ -201,9 +198,15 @@ func NewStandard14FontWithEncoding(basefont Standard14Font, alphabet map[rune]in slotIdx++ } } - encoder, err = textencoding.NewSimpleTextEncoder(baseEncoder, differences) - return &PdfFont{context: &std}, encoder, err + encoder, err = textencoding.NewSimpleTextEncoder(baseEncoder, differences) + if err != nil { + return nil, nil, err + } + std.std14Encoder = encoder + std.updateStandard14Font() + + return &PdfFont{context: &std}, encoder, nil } // GetAlphabet returns a map of the runes in `text`.