Changed error handling. Allow partial encoding maps. Don't continue processing unsupported fonts

This commit is contained in:
Peter Williams 2018-07-04 18:00:37 +10:00
parent ec50032dc5
commit 49674d6b63
4 changed files with 41 additions and 18 deletions

View File

@ -12,6 +12,8 @@ package extractor
import ( import (
"bytes" "bytes"
"errors" "errors"
"fmt"
"os"
"github.com/unidoc/unidoc/common" "github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/contentstream" "github.com/unidoc/unidoc/pdf/contentstream"
@ -148,10 +150,10 @@ func (e *Extractor) ExtractXYText() (*TextList, error) {
return err return err
} }
err = to.setFont(name, size) err = to.setFont(name, size)
if err == model.ErrUnsupportedFont { // if err == model.ErrUnsupportedFont {
common.Log.Debug("Swallow error. err=%v", err) // common.Log.Debug("Swallow error. err=%v", err)
err = nil // err = nil
} // }
if err != nil { if err != nil {
return err return err
} }
@ -206,10 +208,10 @@ func (e *Extractor) ExtractXYText() (*TextList, error) {
}) })
err = processor.Process(e.resources) err = processor.Process(e.resources)
if err == model.ErrUnsupportedFont { // if err == model.ErrUnsupportedFont {
common.Log.Debug("Swallow error. err=%v", err) // common.Log.Debug("Swallow error. err=%v", err)
err = nil // err = nil
} // }
if err != nil { if err != nil {
common.Log.Error("ERROR: Processing: err=%v", err) common.Log.Error("ERROR: Processing: err=%v", err)
return textList, err return textList, err
@ -329,8 +331,8 @@ func (to *TextObject) setFont(name string, size float64) error {
if err == nil { if err == nil {
to.State.Tf = font to.State.Tf = font
} else if err == ErrFontNotSupported { } else if err == ErrFontNotSupported {
// XXX: HACK !@#$ This is not correct. Fix it. return err
to.State.Tf = nil // to.State.Tf = nil
} else { } else {
return err return err
} }

View File

@ -8,6 +8,7 @@ package model
import ( import (
"errors" "errors"
"fmt" "fmt"
"strings"
"github.com/unidoc/unidoc/common" "github.com/unidoc/unidoc/common"
. "github.com/unidoc/unidoc/pdf/core" . "github.com/unidoc/unidoc/pdf/core"
@ -112,7 +113,7 @@ func newPdfFontFromPdfObject(fontObj PdfObject, allowType0 bool) (*PdfFont, erro
font.context = cidfont font.context = cidfont
default: default:
common.Log.Debug("ERROR: Unsupported font type: font=%s", font) common.Log.Debug("ERROR: Unsupported font type: font=%s", font)
return nil, ErrUnsupportedFont return nil, fmt.Errorf("Unsupported font type: font=%s", font)
} }
return font, nil return font, nil
@ -159,8 +160,8 @@ func (font PdfFont) CharcodeBytesToUnicode(data []byte) (string, error) {
for _, code := range charcodes { for _, code := range charcodes {
r, ok := encoder.CharcodeToRune(code) r, ok := encoder.CharcodeToRune(code)
if !ok { if !ok {
common.Log.Debug("ERROR: No rune. code=0x%04x font=%s encoding=%s data = [% 02x]=%#q", common.Log.Debug("ERROR: No rune. code=0x%04x data = [% 02x]=%#q\nfont=%s\nencoding=%s ",
code, font, encoder, data, data) code, data, data, font, encoder)
r = cmap.MissingCodeRune r = cmap.MissingCodeRune
return string(data), ErrBadText return string(data), ErrBadText
} }
@ -311,7 +312,7 @@ func (skel fontSkeleton) toDict(subtype string) *PdfObjectDictionary {
func (skel fontSkeleton) String() string { func (skel fontSkeleton) String() string {
descriptor := "" descriptor := ""
if skel.fontDescriptor != nil { if skel.fontDescriptor != nil {
descriptor = "(has descriptor)" descriptor = skel.fontDescriptor.String()
} }
return fmt.Sprintf("FONT{%#q %#q %s}", skel.subtype, skel.basefont, descriptor) return fmt.Sprintf("FONT{%#q %#q %s}", skel.subtype, skel.basefont, descriptor)
} }
@ -458,6 +459,20 @@ type PdfFontDescriptor struct {
container *PdfIndirectObject container *PdfIndirectObject
} }
func (descriptor *PdfFontDescriptor) String() string {
parts := []string{}
if descriptor.FontName != nil {
parts = append(parts, descriptor.FontName.String())
}
if descriptor.FontFamily != nil {
parts = append(parts, descriptor.FontFamily.String())
}
parts = append(parts, fmt.Sprintf("FontFile=%t", descriptor.FontFile != nil))
parts = append(parts, fmt.Sprintf("FontFile2=%t", descriptor.FontFile2 != nil))
parts = append(parts, fmt.Sprintf("FontFile3=%t", descriptor.FontFile3 != nil))
return fmt.Sprintf("FONT_DESCRIPTON{%s}", strings.Join(parts, ", "))
}
// newPdfFontDescriptorFromPdfObject loads the font descriptor from a PdfObject. Can either be a // newPdfFontDescriptorFromPdfObject loads the font descriptor from a PdfObject. Can either be a
// *PdfIndirectObject or a *PdfObjectDictionary. // *PdfIndirectObject or a *PdfObjectDictionary.
func newPdfFontDescriptorFromPdfObject(obj PdfObject) (*PdfFontDescriptor, error) { func newPdfFontDescriptorFromPdfObject(obj PdfObject) (*PdfFontDescriptor, error) {
@ -490,7 +505,6 @@ func newPdfFontDescriptorFromPdfObject(obj PdfObject) (*PdfFontDescriptor, error
} else { } else {
common.Log.Trace("Incompatibility: Type (Required) missing. font=%q %T", common.Log.Trace("Incompatibility: Type (Required) missing. font=%q %T",
fontname, descriptor.FontName) fontname, descriptor.FontName)
// return nil, errors.New("$$$$$")
} }
descriptor.FontFamily = d.Get("FontFamily") descriptor.FontFamily = d.Get("FontFamily")

View File

@ -21,7 +21,11 @@ type fontFile struct {
} }
func (fontfile *fontFile) String() string { func (fontfile *fontFile) String() string {
return fmt.Sprintf("FONTFILE{%#q encoder=%s}", fontfile.name, fontfile.encoder) encoding := "[None]"
if fontfile.encoder != nil {
encoding = fontfile.encoder.String()
}
return fmt.Sprintf("FONTFILE{%#q encoder=%s}", fontfile.name, encoding)
} }
// newFontFileFromPdfObject loads a FontFile from a PdfObject. Can either be a // newFontFileFromPdfObject loads a FontFile from a PdfObject. Can either be a
@ -107,6 +111,9 @@ func (fontfile *fontFile) loadFromSegments(segment1, segment2 []byte) error {
// parseAsciiPart parses the ASCII part of the FontFile. // parseAsciiPart parses the ASCII part of the FontFile.
func (fontfile *fontFile) parseAsciiPart(data []byte) error { func (fontfile *fontFile) parseAsciiPart(data []byte) error {
common.Log.Debug("parseAsciiPart: %d ", len(data)) common.Log.Debug("parseAsciiPart: %d ", len(data))
// fmt.Println("~~~~~~~~~~~~~~~~~~~~~~~^^^~~~~~~~~~~~~~~~~~~~~~~~")
// fmt.Printf("data=%s\n", string(data))
// fmt.Println("~~~~~~~~~~~~~~~~~~~~~~~!!!~~~~~~~~~~~~~~~~~~~~~~~")
// The start of a FontFile looks like // The start of a FontFile looks like
// %!PS-AdobeFont-1.0: MyArial 003.002 // %!PS-AdobeFont-1.0: MyArial 003.002
// %%Title: MyArial // %%Title: MyArial

View File

@ -47,7 +47,7 @@ func NewCustomSimpleTextEncoder(encoding map[uint16]string, differences map[byte
r, ok := GlyphToRune(glyph) r, ok := GlyphToRune(glyph)
if !ok { if !ok {
common.Log.Debug("ERROR: Unknown glyph. %q", glyph) common.Log.Debug("ERROR: Unknown glyph. %q", glyph)
return SimpleEncoder{}, ErrTypeError // return SimpleEncoder{}, ErrTypeError
} }
baseEncoding[code] = r baseEncoding[code] = r
} }
@ -87,7 +87,7 @@ func newSimpleTextEncoder(baseEncoding map[uint16]rune, baseName string,
} }
// simpleEncoderNumEntries is the maximum number of encoding entries shown in SimpleEncoder.String() // simpleEncoderNumEntries is the maximum number of encoding entries shown in SimpleEncoder.String()
const simpleEncoderNumEntries = 0 const simpleEncoderNumEntries = 1000
// String returns a string that describes `se`. // String returns a string that describes `se`.
func (se SimpleEncoder) String() string { func (se SimpleEncoder) String() string {