mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-29 13:48:54 +08:00
Changed error handling. Allow partial encoding maps. Don't continue processing unsupported fonts
This commit is contained in:
parent
ec50032dc5
commit
49674d6b63
@ -12,6 +12,8 @@ package extractor
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/unidoc/unidoc/common"
|
||||
"github.com/unidoc/unidoc/pdf/contentstream"
|
||||
@ -148,10 +150,10 @@ func (e *Extractor) ExtractXYText() (*TextList, error) {
|
||||
return err
|
||||
}
|
||||
err = to.setFont(name, size)
|
||||
if err == model.ErrUnsupportedFont {
|
||||
common.Log.Debug("Swallow error. err=%v", err)
|
||||
err = nil
|
||||
}
|
||||
// if err == model.ErrUnsupportedFont {
|
||||
// common.Log.Debug("Swallow error. err=%v", err)
|
||||
// err = nil
|
||||
// }
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -206,10 +208,10 @@ func (e *Extractor) ExtractXYText() (*TextList, error) {
|
||||
})
|
||||
|
||||
err = processor.Process(e.resources)
|
||||
if err == model.ErrUnsupportedFont {
|
||||
common.Log.Debug("Swallow error. err=%v", err)
|
||||
err = nil
|
||||
}
|
||||
// if err == model.ErrUnsupportedFont {
|
||||
// common.Log.Debug("Swallow error. err=%v", err)
|
||||
// err = nil
|
||||
// }
|
||||
if err != nil {
|
||||
common.Log.Error("ERROR: Processing: err=%v", err)
|
||||
return textList, err
|
||||
@ -329,8 +331,8 @@ func (to *TextObject) setFont(name string, size float64) error {
|
||||
if err == nil {
|
||||
to.State.Tf = font
|
||||
} else if err == ErrFontNotSupported {
|
||||
// XXX: HACK !@#$ This is not correct. Fix it.
|
||||
to.State.Tf = nil
|
||||
return err
|
||||
// to.State.Tf = nil
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
|
@ -8,6 +8,7 @@ package model
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/unidoc/unidoc/common"
|
||||
. "github.com/unidoc/unidoc/pdf/core"
|
||||
@ -112,7 +113,7 @@ func newPdfFontFromPdfObject(fontObj PdfObject, allowType0 bool) (*PdfFont, erro
|
||||
font.context = cidfont
|
||||
default:
|
||||
common.Log.Debug("ERROR: Unsupported font type: font=%s", font)
|
||||
return nil, ErrUnsupportedFont
|
||||
return nil, fmt.Errorf("Unsupported font type: font=%s", font)
|
||||
}
|
||||
|
||||
return font, nil
|
||||
@ -159,8 +160,8 @@ func (font PdfFont) CharcodeBytesToUnicode(data []byte) (string, error) {
|
||||
for _, code := range charcodes {
|
||||
r, ok := encoder.CharcodeToRune(code)
|
||||
if !ok {
|
||||
common.Log.Debug("ERROR: No rune. code=0x%04x font=%s encoding=%s data = [% 02x]=%#q",
|
||||
code, font, encoder, data, data)
|
||||
common.Log.Debug("ERROR: No rune. code=0x%04x data = [% 02x]=%#q\nfont=%s\nencoding=%s ",
|
||||
code, data, data, font, encoder)
|
||||
r = cmap.MissingCodeRune
|
||||
return string(data), ErrBadText
|
||||
}
|
||||
@ -311,7 +312,7 @@ func (skel fontSkeleton) toDict(subtype string) *PdfObjectDictionary {
|
||||
func (skel fontSkeleton) String() string {
|
||||
descriptor := ""
|
||||
if skel.fontDescriptor != nil {
|
||||
descriptor = "(has descriptor)"
|
||||
descriptor = skel.fontDescriptor.String()
|
||||
}
|
||||
return fmt.Sprintf("FONT{%#q %#q %s}", skel.subtype, skel.basefont, descriptor)
|
||||
}
|
||||
@ -458,6 +459,20 @@ type PdfFontDescriptor struct {
|
||||
container *PdfIndirectObject
|
||||
}
|
||||
|
||||
func (descriptor *PdfFontDescriptor) String() string {
|
||||
parts := []string{}
|
||||
if descriptor.FontName != nil {
|
||||
parts = append(parts, descriptor.FontName.String())
|
||||
}
|
||||
if descriptor.FontFamily != nil {
|
||||
parts = append(parts, descriptor.FontFamily.String())
|
||||
}
|
||||
parts = append(parts, fmt.Sprintf("FontFile=%t", descriptor.FontFile != nil))
|
||||
parts = append(parts, fmt.Sprintf("FontFile2=%t", descriptor.FontFile2 != nil))
|
||||
parts = append(parts, fmt.Sprintf("FontFile3=%t", descriptor.FontFile3 != nil))
|
||||
return fmt.Sprintf("FONT_DESCRIPTON{%s}", strings.Join(parts, ", "))
|
||||
}
|
||||
|
||||
// newPdfFontDescriptorFromPdfObject loads the font descriptor from a PdfObject. Can either be a
|
||||
// *PdfIndirectObject or a *PdfObjectDictionary.
|
||||
func newPdfFontDescriptorFromPdfObject(obj PdfObject) (*PdfFontDescriptor, error) {
|
||||
@ -490,7 +505,6 @@ func newPdfFontDescriptorFromPdfObject(obj PdfObject) (*PdfFontDescriptor, error
|
||||
} else {
|
||||
common.Log.Trace("Incompatibility: Type (Required) missing. font=%q %T",
|
||||
fontname, descriptor.FontName)
|
||||
// return nil, errors.New("$$$$$")
|
||||
}
|
||||
|
||||
descriptor.FontFamily = d.Get("FontFamily")
|
||||
|
@ -21,7 +21,11 @@ type fontFile struct {
|
||||
}
|
||||
|
||||
func (fontfile *fontFile) String() string {
|
||||
return fmt.Sprintf("FONTFILE{%#q encoder=%s}", fontfile.name, fontfile.encoder)
|
||||
encoding := "[None]"
|
||||
if fontfile.encoder != nil {
|
||||
encoding = fontfile.encoder.String()
|
||||
}
|
||||
return fmt.Sprintf("FONTFILE{%#q encoder=%s}", fontfile.name, encoding)
|
||||
}
|
||||
|
||||
// newFontFileFromPdfObject loads a FontFile from a PdfObject. Can either be a
|
||||
@ -107,6 +111,9 @@ func (fontfile *fontFile) loadFromSegments(segment1, segment2 []byte) error {
|
||||
// parseAsciiPart parses the ASCII part of the FontFile.
|
||||
func (fontfile *fontFile) parseAsciiPart(data []byte) error {
|
||||
common.Log.Debug("parseAsciiPart: %d ", len(data))
|
||||
// fmt.Println("~~~~~~~~~~~~~~~~~~~~~~~^^^~~~~~~~~~~~~~~~~~~~~~~~")
|
||||
// fmt.Printf("data=%s\n", string(data))
|
||||
// fmt.Println("~~~~~~~~~~~~~~~~~~~~~~~!!!~~~~~~~~~~~~~~~~~~~~~~~")
|
||||
// The start of a FontFile looks like
|
||||
// %!PS-AdobeFont-1.0: MyArial 003.002
|
||||
// %%Title: MyArial
|
||||
|
@ -47,7 +47,7 @@ func NewCustomSimpleTextEncoder(encoding map[uint16]string, differences map[byte
|
||||
r, ok := GlyphToRune(glyph)
|
||||
if !ok {
|
||||
common.Log.Debug("ERROR: Unknown glyph. %q", glyph)
|
||||
return SimpleEncoder{}, ErrTypeError
|
||||
// return SimpleEncoder{}, ErrTypeError
|
||||
}
|
||||
baseEncoding[code] = r
|
||||
}
|
||||
@ -87,7 +87,7 @@ func newSimpleTextEncoder(baseEncoding map[uint16]rune, baseName string,
|
||||
}
|
||||
|
||||
// simpleEncoderNumEntries is the maximum number of encoding entries shown in SimpleEncoder.String()
|
||||
const simpleEncoderNumEntries = 0
|
||||
const simpleEncoderNumEntries = 1000
|
||||
|
||||
// String returns a string that describes `se`.
|
||||
func (se SimpleEncoder) String() string {
|
||||
|
Loading…
x
Reference in New Issue
Block a user