Error checking for extractor. Add some comments.

This commit is contained in:
Gunnsteinn Hall 2020-05-25 00:56:03 +00:00
parent 2dde46eb70
commit c2874e4c35
2 changed files with 9 additions and 4 deletions

View File

@ -435,7 +435,11 @@ func (to *textObject) showTextAdjusted(args *core.PdfObjectArray) error {
common.Log.Trace("showTextAdjusted: Bad string arg. o=%s args=%+v", o, args) common.Log.Trace("showTextAdjusted: Bad string arg. o=%s args=%+v", o, args)
return core.ErrTypeError return core.ErrTypeError
} }
to.renderText(charcodes) err := to.renderText(charcodes)
if err != nil {
common.Log.Debug("Render text error: %v", err)
return err
}
default: default:
common.Log.Debug("ERROR: showTextAdjusted. Unexpected type (%T) args=%+v", o, args) common.Log.Debug("ERROR: showTextAdjusted. Unexpected type (%T) args=%+v", o, args)
return core.ErrTypeError return core.ErrTypeError
@ -732,6 +736,7 @@ func (to *textObject) renderText(data []byte) error {
continue continue
} }
// TODO(gunnsth): Assuming 1:1 charcode[i] <-> rune[i] mapping.
code := charcodes[i] code := charcodes[i]
// The location of the text on the page in device coordinates is given by trm, the text // The location of the text on the page in device coordinates is given by trm, the text
// rendering matrix. // rendering matrix.
@ -781,6 +786,8 @@ func (to *textObject) renderText(data []byte) error {
} else if font.Encoder() == nil { } else if font.Encoder() == nil {
common.Log.Debug("ERROR: No encoding. font=%s", font) common.Log.Debug("ERROR: No encoding. font=%s", font)
} else { } else {
// TODO: This lookup seems confusing. Went from bytes <-> charcodes already.
// NOTE: This is needed to register runes by the font encoder - for subsetting (optimization).
original, ok := font.Encoder().CharcodeToRune(code) original, ok := font.Encoder().CharcodeToRune(code)
if ok { if ok {
mark.original = string(original) mark.original = string(original)

View File

@ -51,9 +51,7 @@ var doStress bool
func init() { func init() {
flag.BoolVar(&doStress, "extractor-stresstest", false, "Run text extractor stress tests.") flag.BoolVar(&doStress, "extractor-stresstest", false, "Run text extractor stress tests.")
common.SetLogger(common.NewConsoleLogger(common.LogLevelInfo)) common.SetLogger(common.NewConsoleLogger(common.LogLevelInfo))
if flag.Lookup("test.v") != nil {
isTesting = true isTesting = true
}
} }
// TestTextExtractionFragments tests text extraction on the PDF fragments in `fragmentTests`. // TestTextExtractionFragments tests text extraction on the PDF fragments in `fragmentTests`.