mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-29 13:48:54 +08:00
Changed error handling. Allow partial encoding maps. Don't continue processing unsupported fonts
This commit is contained in:
parent
ec50032dc5
commit
49674d6b63
@ -12,6 +12,8 @@ package extractor
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
"github.com/unidoc/unidoc/common"
|
"github.com/unidoc/unidoc/common"
|
||||||
"github.com/unidoc/unidoc/pdf/contentstream"
|
"github.com/unidoc/unidoc/pdf/contentstream"
|
||||||
@ -148,10 +150,10 @@ func (e *Extractor) ExtractXYText() (*TextList, error) {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
err = to.setFont(name, size)
|
err = to.setFont(name, size)
|
||||||
if err == model.ErrUnsupportedFont {
|
// if err == model.ErrUnsupportedFont {
|
||||||
common.Log.Debug("Swallow error. err=%v", err)
|
// common.Log.Debug("Swallow error. err=%v", err)
|
||||||
err = nil
|
// err = nil
|
||||||
}
|
// }
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -206,10 +208,10 @@ func (e *Extractor) ExtractXYText() (*TextList, error) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
err = processor.Process(e.resources)
|
err = processor.Process(e.resources)
|
||||||
if err == model.ErrUnsupportedFont {
|
// if err == model.ErrUnsupportedFont {
|
||||||
common.Log.Debug("Swallow error. err=%v", err)
|
// common.Log.Debug("Swallow error. err=%v", err)
|
||||||
err = nil
|
// err = nil
|
||||||
}
|
// }
|
||||||
if err != nil {
|
if err != nil {
|
||||||
common.Log.Error("ERROR: Processing: err=%v", err)
|
common.Log.Error("ERROR: Processing: err=%v", err)
|
||||||
return textList, err
|
return textList, err
|
||||||
@ -329,8 +331,8 @@ func (to *TextObject) setFont(name string, size float64) error {
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
to.State.Tf = font
|
to.State.Tf = font
|
||||||
} else if err == ErrFontNotSupported {
|
} else if err == ErrFontNotSupported {
|
||||||
// XXX: HACK !@#$ This is not correct. Fix it.
|
return err
|
||||||
to.State.Tf = nil
|
// to.State.Tf = nil
|
||||||
} else {
|
} else {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -8,6 +8,7 @@ package model
|
|||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/unidoc/unidoc/common"
|
"github.com/unidoc/unidoc/common"
|
||||||
. "github.com/unidoc/unidoc/pdf/core"
|
. "github.com/unidoc/unidoc/pdf/core"
|
||||||
@ -112,7 +113,7 @@ func newPdfFontFromPdfObject(fontObj PdfObject, allowType0 bool) (*PdfFont, erro
|
|||||||
font.context = cidfont
|
font.context = cidfont
|
||||||
default:
|
default:
|
||||||
common.Log.Debug("ERROR: Unsupported font type: font=%s", font)
|
common.Log.Debug("ERROR: Unsupported font type: font=%s", font)
|
||||||
return nil, ErrUnsupportedFont
|
return nil, fmt.Errorf("Unsupported font type: font=%s", font)
|
||||||
}
|
}
|
||||||
|
|
||||||
return font, nil
|
return font, nil
|
||||||
@ -159,8 +160,8 @@ func (font PdfFont) CharcodeBytesToUnicode(data []byte) (string, error) {
|
|||||||
for _, code := range charcodes {
|
for _, code := range charcodes {
|
||||||
r, ok := encoder.CharcodeToRune(code)
|
r, ok := encoder.CharcodeToRune(code)
|
||||||
if !ok {
|
if !ok {
|
||||||
common.Log.Debug("ERROR: No rune. code=0x%04x font=%s encoding=%s data = [% 02x]=%#q",
|
common.Log.Debug("ERROR: No rune. code=0x%04x data = [% 02x]=%#q\nfont=%s\nencoding=%s ",
|
||||||
code, font, encoder, data, data)
|
code, data, data, font, encoder)
|
||||||
r = cmap.MissingCodeRune
|
r = cmap.MissingCodeRune
|
||||||
return string(data), ErrBadText
|
return string(data), ErrBadText
|
||||||
}
|
}
|
||||||
@ -311,7 +312,7 @@ func (skel fontSkeleton) toDict(subtype string) *PdfObjectDictionary {
|
|||||||
func (skel fontSkeleton) String() string {
|
func (skel fontSkeleton) String() string {
|
||||||
descriptor := ""
|
descriptor := ""
|
||||||
if skel.fontDescriptor != nil {
|
if skel.fontDescriptor != nil {
|
||||||
descriptor = "(has descriptor)"
|
descriptor = skel.fontDescriptor.String()
|
||||||
}
|
}
|
||||||
return fmt.Sprintf("FONT{%#q %#q %s}", skel.subtype, skel.basefont, descriptor)
|
return fmt.Sprintf("FONT{%#q %#q %s}", skel.subtype, skel.basefont, descriptor)
|
||||||
}
|
}
|
||||||
@ -458,6 +459,20 @@ type PdfFontDescriptor struct {
|
|||||||
container *PdfIndirectObject
|
container *PdfIndirectObject
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (descriptor *PdfFontDescriptor) String() string {
|
||||||
|
parts := []string{}
|
||||||
|
if descriptor.FontName != nil {
|
||||||
|
parts = append(parts, descriptor.FontName.String())
|
||||||
|
}
|
||||||
|
if descriptor.FontFamily != nil {
|
||||||
|
parts = append(parts, descriptor.FontFamily.String())
|
||||||
|
}
|
||||||
|
parts = append(parts, fmt.Sprintf("FontFile=%t", descriptor.FontFile != nil))
|
||||||
|
parts = append(parts, fmt.Sprintf("FontFile2=%t", descriptor.FontFile2 != nil))
|
||||||
|
parts = append(parts, fmt.Sprintf("FontFile3=%t", descriptor.FontFile3 != nil))
|
||||||
|
return fmt.Sprintf("FONT_DESCRIPTON{%s}", strings.Join(parts, ", "))
|
||||||
|
}
|
||||||
|
|
||||||
// newPdfFontDescriptorFromPdfObject loads the font descriptor from a PdfObject. Can either be a
|
// newPdfFontDescriptorFromPdfObject loads the font descriptor from a PdfObject. Can either be a
|
||||||
// *PdfIndirectObject or a *PdfObjectDictionary.
|
// *PdfIndirectObject or a *PdfObjectDictionary.
|
||||||
func newPdfFontDescriptorFromPdfObject(obj PdfObject) (*PdfFontDescriptor, error) {
|
func newPdfFontDescriptorFromPdfObject(obj PdfObject) (*PdfFontDescriptor, error) {
|
||||||
@ -490,7 +505,6 @@ func newPdfFontDescriptorFromPdfObject(obj PdfObject) (*PdfFontDescriptor, error
|
|||||||
} else {
|
} else {
|
||||||
common.Log.Trace("Incompatibility: Type (Required) missing. font=%q %T",
|
common.Log.Trace("Incompatibility: Type (Required) missing. font=%q %T",
|
||||||
fontname, descriptor.FontName)
|
fontname, descriptor.FontName)
|
||||||
// return nil, errors.New("$$$$$")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
descriptor.FontFamily = d.Get("FontFamily")
|
descriptor.FontFamily = d.Get("FontFamily")
|
||||||
|
@ -21,7 +21,11 @@ type fontFile struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (fontfile *fontFile) String() string {
|
func (fontfile *fontFile) String() string {
|
||||||
return fmt.Sprintf("FONTFILE{%#q encoder=%s}", fontfile.name, fontfile.encoder)
|
encoding := "[None]"
|
||||||
|
if fontfile.encoder != nil {
|
||||||
|
encoding = fontfile.encoder.String()
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("FONTFILE{%#q encoder=%s}", fontfile.name, encoding)
|
||||||
}
|
}
|
||||||
|
|
||||||
// newFontFileFromPdfObject loads a FontFile from a PdfObject. Can either be a
|
// newFontFileFromPdfObject loads a FontFile from a PdfObject. Can either be a
|
||||||
@ -107,6 +111,9 @@ func (fontfile *fontFile) loadFromSegments(segment1, segment2 []byte) error {
|
|||||||
// parseAsciiPart parses the ASCII part of the FontFile.
|
// parseAsciiPart parses the ASCII part of the FontFile.
|
||||||
func (fontfile *fontFile) parseAsciiPart(data []byte) error {
|
func (fontfile *fontFile) parseAsciiPart(data []byte) error {
|
||||||
common.Log.Debug("parseAsciiPart: %d ", len(data))
|
common.Log.Debug("parseAsciiPart: %d ", len(data))
|
||||||
|
// fmt.Println("~~~~~~~~~~~~~~~~~~~~~~~^^^~~~~~~~~~~~~~~~~~~~~~~~")
|
||||||
|
// fmt.Printf("data=%s\n", string(data))
|
||||||
|
// fmt.Println("~~~~~~~~~~~~~~~~~~~~~~~!!!~~~~~~~~~~~~~~~~~~~~~~~")
|
||||||
// The start of a FontFile looks like
|
// The start of a FontFile looks like
|
||||||
// %!PS-AdobeFont-1.0: MyArial 003.002
|
// %!PS-AdobeFont-1.0: MyArial 003.002
|
||||||
// %%Title: MyArial
|
// %%Title: MyArial
|
||||||
|
@ -47,7 +47,7 @@ func NewCustomSimpleTextEncoder(encoding map[uint16]string, differences map[byte
|
|||||||
r, ok := GlyphToRune(glyph)
|
r, ok := GlyphToRune(glyph)
|
||||||
if !ok {
|
if !ok {
|
||||||
common.Log.Debug("ERROR: Unknown glyph. %q", glyph)
|
common.Log.Debug("ERROR: Unknown glyph. %q", glyph)
|
||||||
return SimpleEncoder{}, ErrTypeError
|
// return SimpleEncoder{}, ErrTypeError
|
||||||
}
|
}
|
||||||
baseEncoding[code] = r
|
baseEncoding[code] = r
|
||||||
}
|
}
|
||||||
@ -87,7 +87,7 @@ func newSimpleTextEncoder(baseEncoding map[uint16]rune, baseName string,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// simpleEncoderNumEntries is the maximum number of encoding entries shown in SimpleEncoder.String()
|
// simpleEncoderNumEntries is the maximum number of encoding entries shown in SimpleEncoder.String()
|
||||||
const simpleEncoderNumEntries = 0
|
const simpleEncoderNumEntries = 1000
|
||||||
|
|
||||||
// String returns a string that describes `se`.
|
// String returns a string that describes `se`.
|
||||||
func (se SimpleEncoder) String() string {
|
func (se SimpleEncoder) String() string {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user