diff --git a/extractor/text.go b/extractor/text.go index f16645f5..5872c480 100644 --- a/extractor/text.go +++ b/extractor/text.go @@ -245,7 +245,7 @@ func (e *Extractor) extractPageText(contents string, resources *model.PdfPageRes return err } err = to.setFont(name, size) - to.invalidFont = unsupportedFontErr(err) + to.invalidFont = errors.Is(err, core.ErrNotSupported) if err != nil && !to.invalidFont { return err } @@ -372,23 +372,6 @@ func (e *Extractor) extractPageText(contents string, resources *model.PdfPageRes return pageText, state.numChars, state.numMisses, err } -// unsupportedFontErr returns true if `err` indicated that the selected font or encoding is not supported. -func unsupportedFontErr(err error) bool { - if err == model.ErrFontNotSupported || - err == model.ErrType1CFontNotSupported || - err == model.ErrType3FontNotSupported || - err == model.ErrTTCmapNotSupported { - return true - } - if err == nil { - return false - } - errStr := err.Error() - return strings.Contains(errStr, "unsupported font encoding:") || - strings.Contains(errStr, "unexpected subtable format:") || - strings.Contains(errStr, "fonts based on PostScript outlines are not supported") -} - // textResult is used for holding results of PDF form processig type textResult struct { pageText PageText diff --git a/internal/textencoding/simple.go b/internal/textencoding/simple.go index bd209beb..ebd5592c 100644 --- a/internal/textencoding/simple.go +++ b/internal/textencoding/simple.go @@ -55,7 +55,7 @@ func NewSimpleTextEncoder(baseName string, differences map[CharCode]GlyphName) ( fnc, ok := simple[baseName] if !ok { common.Log.Debug("ERROR: NewSimpleTextEncoder. Unknown encoding %q", baseName) - return nil, fmt.Errorf("unsupported font encoding: %q", baseName) + return nil, fmt.Errorf("unsupported font encoding: %q (%w)", baseName, core.ErrNotSupported) } enc := fnc() if len(differences) != 0 { diff --git a/model/const.go b/model/const.go index d6efcac4..ff2f1f4e 100644 --- a/model/const.go +++ b/model/const.go @@ -7,6 +7,9 @@ package model import ( "errors" + "fmt" + + "github.com/unidoc/unipdf/v3/core" ) // Errors when parsing/loading data in PDF. @@ -18,8 +21,8 @@ var ( errRangeError = errors.New("range check error") ErrEncrypted = errors.New("file needs to be decrypted first") ErrNoFont = errors.New("font not defined") - ErrFontNotSupported = errors.New("unsupported font") - ErrType1CFontNotSupported = errors.New("Type1C fonts are not currently supported") - ErrType3FontNotSupported = errors.New("Type3 fonts are not currently supported") - ErrTTCmapNotSupported = errors.New("unsupported TrueType cmap format") + ErrFontNotSupported = fmt.Errorf("unsupported font (%w)", core.ErrNotSupported) + ErrType1CFontNotSupported = fmt.Errorf("Type1C fonts are not currently supported (%w)", core.ErrNotSupported) + ErrType3FontNotSupported = fmt.Errorf("Type3 fonts are not currently supported (%w)", core.ErrNotSupported) + ErrTTCmapNotSupported = fmt.Errorf("unsupported TrueType cmap format (%w)", core.ErrNotSupported) ) diff --git a/model/internal/fonts/ttfparser.go b/model/internal/fonts/ttfparser.go index 42d0a94c..1e8d07cc 100644 --- a/model/internal/fonts/ttfparser.go +++ b/model/internal/fonts/ttfparser.go @@ -209,7 +209,8 @@ func (t *ttfParser) Parse() (TtfType, error) { } if version == "OTTO" { // See https://docs.microsoft.com/en-us/typography/opentype/spec/otff - return TtfType{}, errors.New("fonts based on PostScript outlines are not supported") + return TtfType{}, fmt.Errorf("fonts based on PostScript outlines are not supported (%w)", + core.ErrNotSupported) } if version != "\x00\x01\x00\x00" && version != "true" { // This is not an error. In the font_test.go example axes.txt we see version "true". @@ -376,7 +377,7 @@ func (t *ttfParser) parseCmapSubtable31(offset31 int64) error { t.f.Seek(int64(t.tables["cmap"])+offset31, os.SEEK_SET) format := t.ReadUShort() if format != 4 { - return fmt.Errorf("unexpected subtable format: %d", format) + return fmt.Errorf("unexpected subtable format: %d (%w)", format, core.ErrNotSupported) } t.Skip(2 * 2) // length, language segCount := int(t.ReadUShort() / 2)