Changed error handling. Allow partial encoding maps. Don't continue processing unsupported fonts

This commit is contained in:
Peter Williams 2018-07-04 18:00:37 +10:00
parent ec50032dc5
commit 49674d6b63
4 changed files with 41 additions and 18 deletions

View File

@ -12,6 +12,8 @@ package extractor
import (
"bytes"
"errors"
"fmt"
"os"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/contentstream"
@ -148,10 +150,10 @@ func (e *Extractor) ExtractXYText() (*TextList, error) {
return err
}
err = to.setFont(name, size)
if err == model.ErrUnsupportedFont {
common.Log.Debug("Swallow error. err=%v", err)
err = nil
}
// if err == model.ErrUnsupportedFont {
// common.Log.Debug("Swallow error. err=%v", err)
// err = nil
// }
if err != nil {
return err
}
@ -206,10 +208,10 @@ func (e *Extractor) ExtractXYText() (*TextList, error) {
})
err = processor.Process(e.resources)
if err == model.ErrUnsupportedFont {
common.Log.Debug("Swallow error. err=%v", err)
err = nil
}
// if err == model.ErrUnsupportedFont {
// common.Log.Debug("Swallow error. err=%v", err)
// err = nil
// }
if err != nil {
common.Log.Error("ERROR: Processing: err=%v", err)
return textList, err
@ -329,8 +331,8 @@ func (to *TextObject) setFont(name string, size float64) error {
if err == nil {
to.State.Tf = font
} else if err == ErrFontNotSupported {
// XXX: HACK !@#$ This is not correct. Fix it.
to.State.Tf = nil
return err
// to.State.Tf = nil
} else {
return err
}

View File

@ -8,6 +8,7 @@ package model
import (
"errors"
"fmt"
"strings"
"github.com/unidoc/unidoc/common"
. "github.com/unidoc/unidoc/pdf/core"
@ -112,7 +113,7 @@ func newPdfFontFromPdfObject(fontObj PdfObject, allowType0 bool) (*PdfFont, erro
font.context = cidfont
default:
common.Log.Debug("ERROR: Unsupported font type: font=%s", font)
return nil, ErrUnsupportedFont
return nil, fmt.Errorf("Unsupported font type: font=%s", font)
}
return font, nil
@ -159,8 +160,8 @@ func (font PdfFont) CharcodeBytesToUnicode(data []byte) (string, error) {
for _, code := range charcodes {
r, ok := encoder.CharcodeToRune(code)
if !ok {
common.Log.Debug("ERROR: No rune. code=0x%04x font=%s encoding=%s data = [% 02x]=%#q",
code, font, encoder, data, data)
common.Log.Debug("ERROR: No rune. code=0x%04x data = [% 02x]=%#q\nfont=%s\nencoding=%s ",
code, data, data, font, encoder)
r = cmap.MissingCodeRune
return string(data), ErrBadText
}
@ -311,7 +312,7 @@ func (skel fontSkeleton) toDict(subtype string) *PdfObjectDictionary {
func (skel fontSkeleton) String() string {
descriptor := ""
if skel.fontDescriptor != nil {
descriptor = "(has descriptor)"
descriptor = skel.fontDescriptor.String()
}
return fmt.Sprintf("FONT{%#q %#q %s}", skel.subtype, skel.basefont, descriptor)
}
@ -458,6 +459,20 @@ type PdfFontDescriptor struct {
container *PdfIndirectObject
}
func (descriptor *PdfFontDescriptor) String() string {
parts := []string{}
if descriptor.FontName != nil {
parts = append(parts, descriptor.FontName.String())
}
if descriptor.FontFamily != nil {
parts = append(parts, descriptor.FontFamily.String())
}
parts = append(parts, fmt.Sprintf("FontFile=%t", descriptor.FontFile != nil))
parts = append(parts, fmt.Sprintf("FontFile2=%t", descriptor.FontFile2 != nil))
parts = append(parts, fmt.Sprintf("FontFile3=%t", descriptor.FontFile3 != nil))
return fmt.Sprintf("FONT_DESCRIPTON{%s}", strings.Join(parts, ", "))
}
// newPdfFontDescriptorFromPdfObject loads the font descriptor from a PdfObject. Can either be a
// *PdfIndirectObject or a *PdfObjectDictionary.
func newPdfFontDescriptorFromPdfObject(obj PdfObject) (*PdfFontDescriptor, error) {
@ -490,7 +505,6 @@ func newPdfFontDescriptorFromPdfObject(obj PdfObject) (*PdfFontDescriptor, error
} else {
common.Log.Trace("Incompatibility: Type (Required) missing. font=%q %T",
fontname, descriptor.FontName)
// return nil, errors.New("$$$$$")
}
descriptor.FontFamily = d.Get("FontFamily")

View File

@ -21,7 +21,11 @@ type fontFile struct {
}
func (fontfile *fontFile) String() string {
return fmt.Sprintf("FONTFILE{%#q encoder=%s}", fontfile.name, fontfile.encoder)
encoding := "[None]"
if fontfile.encoder != nil {
encoding = fontfile.encoder.String()
}
return fmt.Sprintf("FONTFILE{%#q encoder=%s}", fontfile.name, encoding)
}
// newFontFileFromPdfObject loads a FontFile from a PdfObject. Can either be a
@ -107,6 +111,9 @@ func (fontfile *fontFile) loadFromSegments(segment1, segment2 []byte) error {
// parseAsciiPart parses the ASCII part of the FontFile.
func (fontfile *fontFile) parseAsciiPart(data []byte) error {
common.Log.Debug("parseAsciiPart: %d ", len(data))
// fmt.Println("~~~~~~~~~~~~~~~~~~~~~~~^^^~~~~~~~~~~~~~~~~~~~~~~~")
// fmt.Printf("data=%s\n", string(data))
// fmt.Println("~~~~~~~~~~~~~~~~~~~~~~~!!!~~~~~~~~~~~~~~~~~~~~~~~")
// The start of a FontFile looks like
// %!PS-AdobeFont-1.0: MyArial 003.002
// %%Title: MyArial

View File

@ -47,7 +47,7 @@ func NewCustomSimpleTextEncoder(encoding map[uint16]string, differences map[byte
r, ok := GlyphToRune(glyph)
if !ok {
common.Log.Debug("ERROR: Unknown glyph. %q", glyph)
return SimpleEncoder{}, ErrTypeError
// return SimpleEncoder{}, ErrTypeError
}
baseEncoding[code] = r
}
@ -87,7 +87,7 @@ func newSimpleTextEncoder(baseEncoding map[uint16]rune, baseName string,
}
// simpleEncoderNumEntries is the maximum number of encoding entries shown in SimpleEncoder.String()
const simpleEncoderNumEntries = 0
const simpleEncoderNumEntries = 1000
// String returns a string that describes `se`.
func (se SimpleEncoder) String() string {