From c2874e4c353905c9a8cddbf4536ee8f5d083ceb5 Mon Sep 17 00:00:00 2001 From: Gunnsteinn Hall Date: Mon, 25 May 2020 00:56:03 +0000 Subject: [PATCH] Error checking for extractor. Add some comments. --- extractor/text.go | 9 ++++++++- extractor/text_test.go | 4 +--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/extractor/text.go b/extractor/text.go index a91eff75..bbfd1889 100644 --- a/extractor/text.go +++ b/extractor/text.go @@ -435,7 +435,11 @@ func (to *textObject) showTextAdjusted(args *core.PdfObjectArray) error { common.Log.Trace("showTextAdjusted: Bad string arg. o=%s args=%+v", o, args) return core.ErrTypeError } - to.renderText(charcodes) + err := to.renderText(charcodes) + if err != nil { + common.Log.Debug("Render text error: %v", err) + return err + } default: common.Log.Debug("ERROR: showTextAdjusted. Unexpected type (%T) args=%+v", o, args) return core.ErrTypeError @@ -732,6 +736,7 @@ func (to *textObject) renderText(data []byte) error { continue } + // TODO(gunnsth): Assuming 1:1 charcode[i] <-> rune[i] mapping. code := charcodes[i] // The location of the text on the page in device coordinates is given by trm, the text // rendering matrix. @@ -781,6 +786,8 @@ func (to *textObject) renderText(data []byte) error { } else if font.Encoder() == nil { common.Log.Debug("ERROR: No encoding. font=%s", font) } else { + // TODO: This lookup seems confusing. Went from bytes <-> charcodes already. + // NOTE: This is needed to register runes by the font encoder - for subsetting (optimization). original, ok := font.Encoder().CharcodeToRune(code) if ok { mark.original = string(original) diff --git a/extractor/text_test.go b/extractor/text_test.go index 92dfb976..651ef63f 100644 --- a/extractor/text_test.go +++ b/extractor/text_test.go @@ -51,9 +51,7 @@ var doStress bool func init() { flag.BoolVar(&doStress, "extractor-stresstest", false, "Run text extractor stress tests.") common.SetLogger(common.NewConsoleLogger(common.LogLevelInfo)) - if flag.Lookup("test.v") != nil { - isTesting = true - } + isTesting = true } // TestTextExtractionFragments tests text extraction on the PDF fragments in `fragmentTests`.