mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-26 13:48:55 +08:00

* Add PdfFont method for encoding runes to charcode bytes * Add getter method for CMap nbits * Take CMap nbits into account when encoding text * Adapt font test cases to include text encoding testing
1120 lines
34 KiB
Go
1120 lines
34 KiB
Go
/*
|
|
* This file is subject to the terms and conditions defined in
|
|
* file 'LICENSE.md', which is part of this source code package.
|
|
*/
|
|
|
|
package model
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"sort"
|
|
"strings"
|
|
|
|
"github.com/unidoc/unipdf/v3/common"
|
|
"github.com/unidoc/unipdf/v3/core"
|
|
|
|
"github.com/unidoc/unipdf/v3/internal/cmap"
|
|
"github.com/unidoc/unipdf/v3/internal/textencoding"
|
|
"github.com/unidoc/unipdf/v3/model/internal/fonts"
|
|
)
|
|
|
|
// pdfFont is an internal interface for fonts that can be stored in PDF documents.
|
|
type pdfFont interface {
|
|
fonts.Font
|
|
// ToPdfObject returns a PDF representation of the font and implements interface Model.
|
|
ToPdfObject() core.PdfObject
|
|
// getFontDescriptor returns the font descriptor of the font.
|
|
getFontDescriptor() *PdfFontDescriptor
|
|
// baseFields returns fields that are common for PDF fonts.
|
|
baseFields() *fontCommon
|
|
}
|
|
|
|
// PdfFont represents an underlying font structure which can be of type:
|
|
// - Type0
|
|
// - Type1
|
|
// - TrueType
|
|
// etc.
|
|
type PdfFont struct {
|
|
context pdfFont // The underlying font: Type0, Type1, Truetype, etc..
|
|
}
|
|
|
|
// GetFontDescriptor returns the font descriptor for `font`.
|
|
func (font PdfFont) GetFontDescriptor() (*PdfFontDescriptor, error) {
|
|
return font.context.getFontDescriptor(), nil
|
|
}
|
|
|
|
// String returns a string that describes `font`.
|
|
func (font *PdfFont) String() string {
|
|
enc := ""
|
|
if font.context.Encoder() != nil {
|
|
enc = font.context.Encoder().String()
|
|
}
|
|
return fmt.Sprintf("FONT{%T %s %s}", font.context, font.baseFields().coreString(), enc)
|
|
}
|
|
|
|
// BaseFont returns the font's "BaseFont" field.
|
|
func (font *PdfFont) BaseFont() string {
|
|
return font.baseFields().basefont
|
|
}
|
|
|
|
// Subtype returns the font's "Subtype" field.
|
|
func (font *PdfFont) Subtype() string {
|
|
subtype := font.baseFields().subtype
|
|
if t, ok := font.context.(*pdfFontType0); ok {
|
|
subtype = subtype + ":" + t.DescendantFont.Subtype()
|
|
}
|
|
return subtype
|
|
}
|
|
|
|
// IsCID returns true if the underlying font is CID.
|
|
func (font *PdfFont) IsCID() bool {
|
|
return font.baseFields().isCIDFont()
|
|
}
|
|
|
|
// FontDescriptor returns font's PdfFontDescriptor. This may be a builtin descriptor for standard 14
|
|
// fonts but must be an explicit descriptor for other fonts.
|
|
func (font *PdfFont) FontDescriptor() *PdfFontDescriptor {
|
|
if font.baseFields().fontDescriptor != nil {
|
|
return font.baseFields().fontDescriptor
|
|
}
|
|
if d := font.context.getFontDescriptor(); d != nil {
|
|
return d
|
|
}
|
|
common.Log.Error("All fonts have a Descriptor. font=%s", font)
|
|
return nil
|
|
}
|
|
|
|
// ToUnicode returns the name of the font's "ToUnicode" field if there is one, or "" if there isn't.
|
|
func (font *PdfFont) ToUnicode() string {
|
|
if font.baseFields().toUnicodeCmap == nil {
|
|
return ""
|
|
}
|
|
return font.baseFields().toUnicodeCmap.Name()
|
|
}
|
|
|
|
// DefaultFont returns the default font, which is currently the built in Helvetica.
|
|
func DefaultFont() *PdfFont {
|
|
helvetica, ok := fonts.NewStdFontByName(HelveticaName)
|
|
if !ok {
|
|
panic("Helvetica should always be available")
|
|
}
|
|
std := stdFontToSimpleFont(helvetica)
|
|
return &PdfFont{context: &std}
|
|
}
|
|
|
|
func newStandard14Font(basefont StdFontName) (pdfFontSimple, error) {
|
|
fnt, ok := fonts.NewStdFontByName(basefont)
|
|
if !ok {
|
|
return pdfFontSimple{}, ErrFontNotSupported
|
|
}
|
|
std := stdFontToSimpleFont(fnt)
|
|
return std, nil
|
|
}
|
|
|
|
// StdFontName represents name of a standard font.
|
|
type StdFontName = fonts.StdFontName
|
|
|
|
// Names of the standard 14 fonts.
|
|
var (
|
|
CourierName = fonts.CourierName
|
|
CourierBoldName = fonts.CourierBoldName
|
|
CourierObliqueName = fonts.CourierObliqueName
|
|
CourierBoldObliqueName = fonts.CourierBoldObliqueName
|
|
HelveticaName = fonts.HelveticaName
|
|
HelveticaBoldName = fonts.HelveticaBoldName
|
|
HelveticaObliqueName = fonts.HelveticaObliqueName
|
|
HelveticaBoldObliqueName = fonts.HelveticaBoldObliqueName
|
|
SymbolName = fonts.SymbolName
|
|
ZapfDingbatsName = fonts.ZapfDingbatsName
|
|
TimesRomanName = fonts.TimesRomanName
|
|
TimesBoldName = fonts.TimesBoldName
|
|
TimesItalicName = fonts.TimesItalicName
|
|
TimesBoldItalicName = fonts.TimesBoldItalicName
|
|
)
|
|
|
|
// NewStandard14Font returns the standard 14 font named `basefont` as a *PdfFont, or an error if it
|
|
// `basefont` is not one of the standard 14 font names.
|
|
func NewStandard14Font(basefont StdFontName) (*PdfFont, error) {
|
|
std, err := newStandard14Font(basefont)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if basefont != SymbolName && basefont != ZapfDingbatsName {
|
|
// Default to using WinAnsiEncoder for text generation as it spans a large number of symbols.
|
|
std.encoder = textencoding.NewWinAnsiEncoder()
|
|
}
|
|
|
|
return &PdfFont{context: &std}, nil
|
|
}
|
|
|
|
// NewStandard14FontMustCompile returns the standard 14 font named `basefont` as a *PdfFont.
|
|
// If `basefont` is one of the 14 Standard14Font values defined above then NewStandard14FontMustCompile
|
|
// is guaranteed to succeed.
|
|
func NewStandard14FontMustCompile(basefont StdFontName) *PdfFont {
|
|
font, err := NewStandard14Font(basefont)
|
|
if err != nil {
|
|
panic(fmt.Errorf("invalid Standard14Font %#q", basefont))
|
|
}
|
|
return font
|
|
}
|
|
|
|
// NewStandard14FontWithEncoding returns the standard 14 font named `basefont` as a *PdfFont and
|
|
// a TextEncoder that encodes all the runes in `alphabet`, or an error if this is not possible.
|
|
// An error can occur if `basefont` is not one the standard 14 font names.
|
|
func NewStandard14FontWithEncoding(basefont StdFontName, alphabet map[rune]int) (*PdfFont,
|
|
textencoding.SimpleEncoder, error) {
|
|
std, err := newStandard14Font(basefont)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
enc, ok := std.Encoder().(textencoding.SimpleEncoder)
|
|
if !ok {
|
|
return nil, nil, fmt.Errorf("only simple encoding is supported, got %T", std.Encoder())
|
|
}
|
|
|
|
// collect all runes from alphabet that are missing in the encoding
|
|
// and find corresponding glyph names
|
|
missing := make(map[rune]textencoding.GlyphName)
|
|
for r := range alphabet {
|
|
if _, ok := enc.RuneToCharcode(r); !ok {
|
|
_, ok := std.fontMetrics[r]
|
|
if !ok {
|
|
common.Log.Trace("rune %#x=%q not in the font", r, r)
|
|
continue
|
|
}
|
|
glyph, ok := textencoding.RuneToGlyph(r)
|
|
if !ok {
|
|
common.Log.Debug("no glyph for rune %#x=%q", r, r)
|
|
continue
|
|
}
|
|
if len(missing) >= 255 {
|
|
return nil, nil, errors.New("too many characters for simple encoding")
|
|
}
|
|
missing[r] = glyph
|
|
}
|
|
}
|
|
|
|
// collect the list of empty indexes in the encoding that can be filed
|
|
// and join the list of runes unused in the alphabet to overwrite, if necessary
|
|
var (
|
|
gaps []textencoding.CharCode
|
|
unused []textencoding.CharCode
|
|
)
|
|
// note, that this loop will become endless if CharCode becomes a byte
|
|
for code := textencoding.CharCode(1); code <= 0xff; code++ {
|
|
r, ok := enc.CharcodeToRune(code)
|
|
if !ok {
|
|
gaps = append(gaps, code)
|
|
continue
|
|
}
|
|
if _, ok = alphabet[r]; !ok {
|
|
unused = append(unused, code)
|
|
}
|
|
}
|
|
// join into a single list of replacable charcodes, gaps first
|
|
replacable := append(gaps, unused...)
|
|
|
|
if len(replacable) < len(missing) {
|
|
return nil, nil, fmt.Errorf("need to encode %d runes, but have only %d slots",
|
|
len(missing), len(replacable))
|
|
}
|
|
|
|
// sort, make an order predictable
|
|
runes := make([]rune, 0, len(missing))
|
|
for r := range missing {
|
|
runes = append(runes, r)
|
|
}
|
|
sort.Slice(runes, func(i, j int) bool {
|
|
return runes[i] < runes[j]
|
|
})
|
|
|
|
// build a map of replacements
|
|
differences := make(map[textencoding.CharCode]textencoding.GlyphName, len(runes))
|
|
for _, r := range runes {
|
|
code := replacable[0]
|
|
replacable = replacable[1:]
|
|
|
|
differences[code] = missing[r]
|
|
}
|
|
enc = textencoding.ApplyDifferences(enc, differences)
|
|
std.SetEncoder(enc)
|
|
|
|
return &PdfFont{context: &std}, enc, nil
|
|
}
|
|
|
|
// GetAlphabet returns a map of the runes in `text` and their frequencies.
|
|
func GetAlphabet(text string) map[rune]int {
|
|
alphabet := map[rune]int{}
|
|
for _, r := range text {
|
|
alphabet[r]++
|
|
}
|
|
return alphabet
|
|
}
|
|
|
|
// NewPdfFontFromPdfObject loads a PdfFont from the dictionary `fontObj`. If there is a problem an
|
|
// error is returned.
|
|
func NewPdfFontFromPdfObject(fontObj core.PdfObject) (*PdfFont, error) {
|
|
return newPdfFontFromPdfObject(fontObj, true)
|
|
}
|
|
|
|
// newPdfFontFromPdfObject loads a PdfFont from the dictionary `fontObj`. If there is a problem an
|
|
// error is returned.
|
|
// The allowType0 flag indicates whether loading Type0 font should be supported. This is used to
|
|
// avoid cyclical loading.
|
|
func newPdfFontFromPdfObject(fontObj core.PdfObject, allowType0 bool) (*PdfFont, error) {
|
|
d, base, err := newFontBaseFieldsFromPdfObject(fontObj)
|
|
if err != nil {
|
|
// In the case of not yet supported fonts, we attempt to return enough information in the
|
|
// font for the caller to see some font properties.
|
|
// TODO(peterwilliams97): Add support for these fonts and remove this special error handling.
|
|
if err == ErrType3FontNotSupported || err == ErrType1CFontNotSupported {
|
|
simplefont, err2 := newSimpleFontFromPdfObject(d, base, nil)
|
|
if err2 != nil {
|
|
common.Log.Debug("ERROR: While loading simple font: font=%s err=%v", base, err2)
|
|
return nil, err
|
|
}
|
|
return &PdfFont{context: simplefont}, err
|
|
}
|
|
|
|
return nil, err
|
|
}
|
|
|
|
font := &PdfFont{}
|
|
switch base.subtype {
|
|
case "Type0":
|
|
if !allowType0 {
|
|
common.Log.Debug("ERROR: Loading type0 not allowed. font=%s", base)
|
|
return nil, errors.New("cyclical type0 loading")
|
|
}
|
|
type0font, err := newPdfFontType0FromPdfObject(d, base)
|
|
if err != nil {
|
|
common.Log.Debug("ERROR: While loading Type0 font. font=%s err=%v", base, err)
|
|
return nil, err
|
|
}
|
|
font.context = type0font
|
|
case "Type1", "Type3", "MMType1", "TrueType":
|
|
var simplefont *pdfFontSimple
|
|
fnt, builtin := fonts.NewStdFontByName(fonts.StdFontName(base.basefont))
|
|
if builtin {
|
|
std := stdFontToSimpleFont(fnt)
|
|
font.context = &std
|
|
|
|
stdObj := core.TraceToDirectObject(std.ToPdfObject())
|
|
d14, stdBase, err := newFontBaseFieldsFromPdfObject(stdObj)
|
|
|
|
if err != nil {
|
|
common.Log.Debug("ERROR: Bad Standard14\n\tfont=%s\n\tstd=%+v", base, std)
|
|
return nil, err
|
|
}
|
|
|
|
for _, k := range d.Keys() {
|
|
d14.Set(k, d.Get(k))
|
|
}
|
|
simplefont, err = newSimpleFontFromPdfObject(d14, stdBase, std.std14Encoder)
|
|
if err != nil {
|
|
common.Log.Debug("ERROR: Bad Standard14\n\tfont=%s\n\tstd=%+v", base, std)
|
|
return nil, err
|
|
}
|
|
|
|
simplefont.charWidths = std.charWidths
|
|
simplefont.fontMetrics = std.fontMetrics
|
|
} else {
|
|
simplefont, err = newSimpleFontFromPdfObject(d, base, nil)
|
|
if err != nil {
|
|
common.Log.Debug("ERROR: While loading simple font: font=%s err=%v", base, err)
|
|
return nil, err
|
|
}
|
|
}
|
|
err = simplefont.addEncoding()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if builtin {
|
|
simplefont.updateStandard14Font()
|
|
}
|
|
if builtin && simplefont.encoder == nil && simplefont.std14Encoder == nil {
|
|
// This is not possible.
|
|
common.Log.Error("simplefont=%s", simplefont)
|
|
common.Log.Error("fnt=%+v", fnt)
|
|
}
|
|
if len(simplefont.charWidths) == 0 {
|
|
common.Log.Debug("ERROR: No widths. font=%s", simplefont)
|
|
}
|
|
font.context = simplefont
|
|
case "CIDFontType0":
|
|
cidfont, err := newPdfCIDFontType0FromPdfObject(d, base)
|
|
if err != nil {
|
|
common.Log.Debug("ERROR: While loading cid font type0 font: %v", err)
|
|
return nil, err
|
|
}
|
|
font.context = cidfont
|
|
case "CIDFontType2":
|
|
cidfont, err := newPdfCIDFontType2FromPdfObject(d, base)
|
|
if err != nil {
|
|
common.Log.Debug("ERROR: While loading cid font type2 font. font=%s err=%v", base, err)
|
|
return nil, err
|
|
}
|
|
font.context = cidfont
|
|
default:
|
|
common.Log.Debug("ERROR: Unsupported font type: font=%s", base)
|
|
return nil, fmt.Errorf("unsupported font type: font=%s", base)
|
|
}
|
|
|
|
return font, nil
|
|
}
|
|
|
|
// BytesToCharcodes converts the bytes in a PDF string to character codes.
|
|
func (font *PdfFont) BytesToCharcodes(data []byte) []textencoding.CharCode {
|
|
common.Log.Trace("BytesToCharcodes: data=[% 02x]=%#q", data, data)
|
|
if type0, ok := font.context.(*pdfFontType0); ok && type0.codeToCID != nil {
|
|
if charcodes, ok := type0.bytesToCharcodes(data); ok {
|
|
return charcodes
|
|
}
|
|
}
|
|
|
|
charcodes := make([]textencoding.CharCode, 0, len(data)+len(data)%2)
|
|
if font.baseFields().isCIDFont() {
|
|
if len(data) == 1 {
|
|
data = []byte{0, data[0]}
|
|
}
|
|
if len(data)%2 != 0 {
|
|
common.Log.Debug("ERROR: Padding data=%+v to even length", data)
|
|
data = append(data, 0)
|
|
}
|
|
for i := 0; i < len(data); i += 2 {
|
|
b := uint16(data[i])<<8 | uint16(data[i+1])
|
|
charcodes = append(charcodes, textencoding.CharCode(b))
|
|
}
|
|
} else {
|
|
for _, b := range data {
|
|
charcodes = append(charcodes, textencoding.CharCode(b))
|
|
}
|
|
}
|
|
return charcodes
|
|
}
|
|
|
|
// CharcodesToUnicodeWithStats is identical to CharcodesToUnicode except returns more statistical
|
|
// information about hits and misses from the reverse mapping process.
|
|
func (font *PdfFont) CharcodesToUnicodeWithStats(charcodes []textencoding.CharCode) (runelist []rune, numHits, numMisses int) {
|
|
fontBase := font.baseFields()
|
|
runes := make([]rune, 0, len(charcodes))
|
|
numMisses = 0
|
|
for _, code := range charcodes {
|
|
if fontBase.toUnicodeCmap != nil {
|
|
if r, ok := fontBase.toUnicodeCmap.CharcodeToUnicode(cmap.CharCode(code)); ok {
|
|
runes = append(runes, r)
|
|
continue
|
|
}
|
|
}
|
|
|
|
// Fall back to encoding.
|
|
encoder := font.Encoder()
|
|
if encoder != nil {
|
|
if r, ok := encoder.CharcodeToRune(code); ok {
|
|
runes = append(runes, r)
|
|
continue
|
|
}
|
|
}
|
|
|
|
common.Log.Debug("ERROR: No rune. code=0x%04x charcodes=[% 04x] CID=%t\n"+
|
|
"\tfont=%s\n\tencoding=%s",
|
|
code, charcodes, fontBase.isCIDFont(), font, encoder)
|
|
numMisses++
|
|
runes = append(runes, cmap.MissingCodeRune)
|
|
}
|
|
|
|
if numMisses != 0 {
|
|
common.Log.Debug("ERROR: Couldn't convert to unicode. Using input.\n"+
|
|
"\tnumChars=%d numMisses=%d\n"+
|
|
"\tfont=%s",
|
|
len(charcodes), numMisses, font)
|
|
}
|
|
|
|
return runes, len(runes), numMisses
|
|
}
|
|
|
|
// CharcodeBytesToUnicode converts PDF character codes `data` to a Go unicode string.
|
|
//
|
|
// 9.10 Extraction of Text Content (page 292)
|
|
// The process of finding glyph descriptions in OpenType fonts by a conforming reader shall be the following:
|
|
// • For Type 1 fonts using “CFF” tables, the process shall be as described in 9.6.6.2, "Encodings
|
|
// for Type 1 Fonts".
|
|
// • For TrueType fonts using “glyf” tables, the process shall be as described in 9.6.6.4,
|
|
// "Encodings for TrueType Fonts". Since this process sometimes produces ambiguous results,
|
|
// conforming writers, instead of using a simple font, shall use a Type 0 font with an Identity-H
|
|
// encoding and use the glyph indices as character codes, as described following Table 118.
|
|
func (font *PdfFont) CharcodeBytesToUnicode(data []byte) (string, int, int) {
|
|
runes, _, numMisses := font.CharcodesToUnicodeWithStats(font.BytesToCharcodes(data))
|
|
|
|
var buffer bytes.Buffer
|
|
for _, r := range runes {
|
|
buffer.WriteString(textencoding.RuneToString(r))
|
|
}
|
|
|
|
str := buffer.String()
|
|
return str, len([]rune(str)), numMisses
|
|
}
|
|
|
|
// CharcodesToUnicode converts the character codes `charcodes` to a slice of runes.
|
|
// How it works:
|
|
// 1) Use the ToUnicode CMap if there is one.
|
|
// 2) Use the underlying font's encoding.
|
|
func (font *PdfFont) CharcodesToUnicode(charcodes []textencoding.CharCode) []rune {
|
|
strlist, _, _ := font.CharcodesToUnicodeWithStats(charcodes)
|
|
return strlist
|
|
}
|
|
|
|
// RunesToCharcodeBytes maps the provided runes to charcode bytes and it
|
|
// returns the resulting slice of bytes, along with the number of runes which
|
|
// could not be converted. If the number of misses is 0, all runes were
|
|
// successfully converted.
|
|
func (font *PdfFont) RunesToCharcodeBytes(data []rune) ([]byte, int) {
|
|
// Create collection of encoders used for rune to charcode mapping:
|
|
// - if the font has a to Unicode CMap, use it first.
|
|
// - if the font has an encoder, use it as a fallback.
|
|
var encoders []textencoding.TextEncoder
|
|
if toUnicode := font.baseFields().toUnicodeCmap; toUnicode != nil {
|
|
encoders = append(encoders, textencoding.NewCMapEncoder("", nil, toUnicode))
|
|
}
|
|
if encoder := font.Encoder(); encoder != nil {
|
|
encoders = append(encoders, encoder)
|
|
}
|
|
|
|
var buffer bytes.Buffer
|
|
var numMisses int
|
|
for _, r := range data {
|
|
// Attempt to encode the current rune using each of the encoders,
|
|
// falling back to the next one in case of failure.
|
|
var encoded bool
|
|
for _, encoder := range encoders {
|
|
if encBytes := encoder.Encode(string(r)); len(encBytes) > 0 {
|
|
buffer.Write(encBytes)
|
|
encoded = true
|
|
break
|
|
}
|
|
}
|
|
|
|
if !encoded {
|
|
common.Log.Debug("ERROR: failed to map rune `%+q` to charcode", r)
|
|
numMisses++
|
|
}
|
|
}
|
|
|
|
if numMisses != 0 {
|
|
common.Log.Debug("ERROR: could not convert all runes to charcodes.\n"+
|
|
"\tnumRunes=%d numMisses=%d\n"+
|
|
"\tfont=%s encoders=%+v", len(data), numMisses, font, encoders)
|
|
}
|
|
|
|
return buffer.Bytes(), numMisses
|
|
}
|
|
|
|
// StringToCharcodeBytes maps the provided string runes to charcode bytes and
|
|
// it returns the resulting slice of bytes, along with the number of runes
|
|
// which could not be converted. If the number of misses is 0, all string runes
|
|
// were successfully converted.
|
|
func (font *PdfFont) StringToCharcodeBytes(str string) ([]byte, int) {
|
|
return font.RunesToCharcodeBytes([]rune(str))
|
|
}
|
|
|
|
// ToPdfObject converts the PdfFont object to its PDF representation.
|
|
func (font *PdfFont) ToPdfObject() core.PdfObject {
|
|
if font.context == nil {
|
|
common.Log.Debug("ERROR: font context is nil")
|
|
return core.MakeNull()
|
|
}
|
|
return font.context.ToPdfObject()
|
|
}
|
|
|
|
// Encoder returns the font's text encoder.
|
|
func (font *PdfFont) Encoder() textencoding.TextEncoder {
|
|
t := font.actualFont()
|
|
if t == nil {
|
|
common.Log.Debug("ERROR: Encoder not implemented for font type=%#T", font.context)
|
|
// TODO: Should we return a default encoding?
|
|
return nil
|
|
}
|
|
return t.Encoder()
|
|
}
|
|
|
|
// CharMetrics represents width and height metrics of a glyph.
|
|
type CharMetrics = fonts.CharMetrics
|
|
|
|
// GetRuneMetrics returns the char metrics for a rune.
|
|
// TODO(peterwilliams97) There is nothing callers can do if no CharMetrics are found so we might as
|
|
// well give them 0 width. There is no need for the bool return.
|
|
func (font *PdfFont) GetRuneMetrics(r rune) (CharMetrics, bool) {
|
|
t := font.actualFont()
|
|
if t == nil {
|
|
common.Log.Debug("ERROR: GetGlyphCharMetrics Not implemented for font type=%#T", font.context)
|
|
return fonts.CharMetrics{}, false
|
|
}
|
|
if m, ok := t.GetRuneMetrics(r); ok {
|
|
return m, true
|
|
}
|
|
if desc, err := font.GetFontDescriptor(); err == nil && desc != nil {
|
|
return fonts.CharMetrics{Wx: desc.missingWidth}, true
|
|
}
|
|
|
|
common.Log.Debug("GetGlyphCharMetrics: No metrics for font=%s", font)
|
|
return fonts.CharMetrics{}, false
|
|
}
|
|
|
|
// GetCharMetrics returns the char metrics for character code `code`.
|
|
// How it works:
|
|
// 1) It calls the GetCharMetrics function for the underlying font, either a simple font or
|
|
// a Type0 font. The underlying font GetCharMetrics() functions do direct charcode ➞ metrics
|
|
// mappings.
|
|
// 2) If the underlying font's GetCharMetrics() doesn't have a CharMetrics for `code` then a
|
|
// a CharMetrics with the FontDescriptor's /MissingWidth is returned.
|
|
// 3) If there is no /MissingWidth then a failure is returned.
|
|
// TODO(peterwilliams97) There is nothing callers can do if no CharMetrics are found so we might as
|
|
// well give them 0 width. There is no need for the bool return.
|
|
// TODO(gunnsth): Reconsider whether needed or if can map via GlyphName.
|
|
func (font *PdfFont) GetCharMetrics(code textencoding.CharCode) (CharMetrics, bool) {
|
|
var nometrics fonts.CharMetrics
|
|
|
|
// TODO(peterwilliams97): pdfFontType0.GetCharMetrics() calls pdfCIDFontType2.GetCharMetrics()
|
|
// through this function. Would it be more straightforward for
|
|
// pdfFontType0.GetCharMetrics() to call pdfCIDFontType0.GetCharMetrics()
|
|
// and pdfCIDFontType2.GetCharMetrics() directly?
|
|
|
|
switch t := font.context.(type) {
|
|
case *pdfFontSimple:
|
|
if m, ok := t.GetCharMetrics(code); ok {
|
|
return m, ok
|
|
}
|
|
case *pdfFontType0:
|
|
if m, ok := t.GetCharMetrics(code); ok {
|
|
return m, ok
|
|
}
|
|
case *pdfCIDFontType0:
|
|
if m, ok := t.GetCharMetrics(code); ok {
|
|
return m, ok
|
|
}
|
|
case *pdfCIDFontType2:
|
|
if m, ok := t.GetCharMetrics(code); ok {
|
|
return m, ok
|
|
}
|
|
default:
|
|
common.Log.Debug("ERROR: GetCharMetrics not implemented for font type=%T.", font.context)
|
|
return nometrics, false
|
|
}
|
|
|
|
if descriptor, err := font.GetFontDescriptor(); err == nil && descriptor != nil {
|
|
return fonts.CharMetrics{Wx: descriptor.missingWidth}, true
|
|
}
|
|
|
|
common.Log.Debug("GetCharMetrics: No metrics for font=%s", font)
|
|
return nometrics, false
|
|
}
|
|
|
|
// actualFont returns the Font in font.context
|
|
func (font PdfFont) actualFont() pdfFont {
|
|
if font.context == nil {
|
|
common.Log.Debug("ERROR: actualFont. context is nil. font=%s", font)
|
|
}
|
|
return font.context
|
|
}
|
|
|
|
// baseFields returns the fields of `font`.context that are common to all PDF fonts.
|
|
func (font *PdfFont) baseFields() *fontCommon {
|
|
if font.context == nil {
|
|
common.Log.Debug("ERROR: baseFields. context is nil.")
|
|
return nil
|
|
}
|
|
return font.context.baseFields()
|
|
}
|
|
|
|
// fontCommon represents the fields that are common to all PDF fonts.
|
|
type fontCommon struct {
|
|
// All fonts have these fields.
|
|
basefont string // The font's "BaseFont" field.
|
|
subtype string // The font's "Subtype" field.
|
|
name string
|
|
|
|
// These are optional fields in the PDF font.
|
|
toUnicode core.PdfObject // The stream containing toUnicodeCmap. We keep it around for ToPdfObject.
|
|
|
|
// These objects are computed from optional fields in the PDF font.
|
|
toUnicodeCmap *cmap.CMap // Computed from "ToUnicode".
|
|
fontDescriptor *PdfFontDescriptor // Computed from "FontDescriptor".
|
|
|
|
// objectNumber helps us find the font in the PDF being processed. This helps with debugging.
|
|
objectNumber int64
|
|
}
|
|
|
|
// asPdfObjectDictionary returns `base` as a core.PdfObjectDictionary.
|
|
// It is for use in font ToPdfObject functions.
|
|
// NOTE: The returned dict's "Subtype" field is set to `subtype` if `base` doesn't have a subtype.
|
|
func (base fontCommon) asPdfObjectDictionary(subtype string) *core.PdfObjectDictionary {
|
|
|
|
if subtype != "" && base.subtype != "" && subtype != base.subtype {
|
|
common.Log.Debug("ERROR: asPdfObjectDictionary. Overriding subtype to %#q %s", subtype, base)
|
|
} else if subtype == "" && base.subtype == "" {
|
|
common.Log.Debug("ERROR: asPdfObjectDictionary no subtype. font=%s", base)
|
|
} else if base.subtype == "" {
|
|
base.subtype = subtype
|
|
}
|
|
|
|
d := core.MakeDict()
|
|
d.Set("Type", core.MakeName("Font"))
|
|
d.Set("BaseFont", core.MakeName(base.basefont))
|
|
d.Set("Subtype", core.MakeName(base.subtype))
|
|
|
|
if base.fontDescriptor != nil {
|
|
d.Set("FontDescriptor", base.fontDescriptor.ToPdfObject())
|
|
}
|
|
if base.toUnicode != nil {
|
|
d.Set("ToUnicode", base.toUnicode)
|
|
} else if base.toUnicodeCmap != nil {
|
|
data := base.toUnicodeCmap.Bytes()
|
|
o, err := core.MakeStream(data, nil)
|
|
if err != nil {
|
|
common.Log.Debug("MakeStream failed. err=%v", err)
|
|
} else {
|
|
d.Set("ToUnicode", o)
|
|
}
|
|
}
|
|
return d
|
|
}
|
|
|
|
// String returns a string that describes `base`.
|
|
func (base fontCommon) String() string {
|
|
return fmt.Sprintf("FONT{%s}", base.coreString())
|
|
}
|
|
|
|
// coreString returns the contents of fontCommon.String() without the FONT{} wrapper.
|
|
func (base fontCommon) coreString() string {
|
|
descriptor := ""
|
|
if base.fontDescriptor != nil {
|
|
descriptor = base.fontDescriptor.String()
|
|
}
|
|
return fmt.Sprintf("%#q %#q %q obj=%d ToUnicode=%t flags=0x%0x %s",
|
|
base.subtype, base.basefont, base.name, base.objectNumber, base.toUnicode != nil,
|
|
base.fontFlags(), descriptor)
|
|
}
|
|
|
|
func (base fontCommon) fontFlags() int {
|
|
if base.fontDescriptor == nil {
|
|
return 0
|
|
}
|
|
return base.fontDescriptor.flags
|
|
}
|
|
|
|
// isCIDFont returns true if `base` is a CID font.
|
|
func (base fontCommon) isCIDFont() bool {
|
|
if base.subtype == "" {
|
|
common.Log.Debug("ERROR: isCIDFont. context is nil. font=%s", base)
|
|
}
|
|
isCID := false
|
|
switch base.subtype {
|
|
case "Type0", "CIDFontType0", "CIDFontType2":
|
|
isCID = true
|
|
}
|
|
common.Log.Trace("isCIDFont: isCID=%t font=%s", isCID, base)
|
|
return isCID
|
|
}
|
|
|
|
// newFontBaseFieldsFromPdfObject returns `fontObj` as a dictionary the common fields from that
|
|
// dictionary in the fontCommon return. If there is a problem an error is returned.
|
|
// The fontCommon is the group of fields common to all PDF fonts.
|
|
func newFontBaseFieldsFromPdfObject(fontObj core.PdfObject) (*core.PdfObjectDictionary, *fontCommon,
|
|
error) {
|
|
font := &fontCommon{}
|
|
|
|
if obj, ok := fontObj.(*core.PdfIndirectObject); ok {
|
|
font.objectNumber = obj.ObjectNumber
|
|
}
|
|
|
|
d, ok := core.GetDict(fontObj)
|
|
if !ok {
|
|
common.Log.Debug("ERROR: Font not given by a dictionary (%T)", fontObj)
|
|
return nil, nil, ErrFontNotSupported
|
|
}
|
|
|
|
objtype, ok := core.GetNameVal(d.Get("Type"))
|
|
if !ok {
|
|
common.Log.Debug("ERROR: Font Incompatibility. Type (Required) missing")
|
|
return nil, nil, ErrRequiredAttributeMissing
|
|
}
|
|
if objtype != "Font" {
|
|
common.Log.Debug("ERROR: Font Incompatibility. Type=%q. Should be %q.", objtype, "Font")
|
|
return nil, nil, core.ErrTypeError
|
|
}
|
|
|
|
subtype, ok := core.GetNameVal(d.Get("Subtype"))
|
|
if !ok {
|
|
common.Log.Debug("ERROR: Font Incompatibility. Subtype (Required) missing")
|
|
return nil, nil, ErrRequiredAttributeMissing
|
|
}
|
|
font.subtype = subtype
|
|
|
|
name, ok := core.GetNameVal(d.Get("Name"))
|
|
if ok {
|
|
font.name = name
|
|
}
|
|
|
|
if subtype == "Type3" {
|
|
common.Log.Debug("ERROR: Type 3 font not supported. d=%s", d)
|
|
return d, font, ErrType3FontNotSupported
|
|
}
|
|
|
|
basefont, ok := core.GetNameVal(d.Get("BaseFont"))
|
|
if !ok {
|
|
common.Log.Debug("ERROR: Font Incompatibility. BaseFont (Required) missing")
|
|
return d, font, ErrRequiredAttributeMissing
|
|
}
|
|
font.basefont = basefont
|
|
|
|
obj := d.Get("FontDescriptor")
|
|
if obj != nil {
|
|
fontDescriptor, err := newPdfFontDescriptorFromPdfObject(obj)
|
|
if err != nil {
|
|
common.Log.Debug("ERROR: Bad font descriptor. err=%v", err)
|
|
return d, font, err
|
|
}
|
|
font.fontDescriptor = fontDescriptor
|
|
}
|
|
|
|
toUnicode := d.Get("ToUnicode")
|
|
if toUnicode != nil {
|
|
font.toUnicode = core.TraceToDirectObject(toUnicode)
|
|
codemap, err := toUnicodeToCmap(font.toUnicode, font)
|
|
if err != nil {
|
|
return d, font, err
|
|
}
|
|
font.toUnicodeCmap = codemap
|
|
} else if subtype == "CIDFontType0" || subtype == "CIDFontType2" {
|
|
si, err := cmap.NewCIDSystemInfo(d.Get("CIDSystemInfo"))
|
|
if err != nil {
|
|
return d, font, err
|
|
}
|
|
|
|
cmapName := fmt.Sprintf("%s-%s-UCS2", si.Registry, si.Ordering)
|
|
if cmap.IsPredefinedCMap(cmapName) {
|
|
font.toUnicodeCmap, err = cmap.LoadPredefinedCMap(cmapName)
|
|
if err != nil {
|
|
common.Log.Debug("WARN: could not load predefined CMap %s: %v", cmapName, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
return d, font, nil
|
|
}
|
|
|
|
// toUnicodeToCmap returns a CMap of `toUnicode` if it exists.
|
|
func toUnicodeToCmap(toUnicode core.PdfObject, font *fontCommon) (*cmap.CMap, error) {
|
|
toUnicodeStream, ok := core.GetStream(toUnicode)
|
|
if !ok {
|
|
common.Log.Debug("ERROR: toUnicodeToCmap: Not a stream (%T)", toUnicode)
|
|
return nil, core.ErrTypeError
|
|
}
|
|
data, err := core.DecodeStream(toUnicodeStream)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
cm, err := cmap.LoadCmapFromData(data, !font.isCIDFont())
|
|
if err != nil {
|
|
// Show the object number of the bad cmap to help with debugging.
|
|
common.Log.Debug("ERROR: ObjectNumber=%d err=%v", toUnicodeStream.ObjectNumber, err)
|
|
}
|
|
return cm, err
|
|
}
|
|
|
|
// 9.8.2 Font Descriptor Flags (page 283)
|
|
const (
|
|
fontFlagFixedPitch = 0x00001
|
|
fontFlagSerif = 0x00002
|
|
fontFlagSymbolic = 0x00004
|
|
fontFlagScript = 0x00008
|
|
fontFlagNonsymbolic = 0x00020
|
|
fontFlagItalic = 0x00040
|
|
fontFlagAllCap = 0x10000
|
|
fontFlagSmallCap = 0x20000
|
|
fontFlagForceBold = 0x40000
|
|
)
|
|
|
|
// PdfFontDescriptor specifies metrics and other attributes of a font and can refer to a FontFile
|
|
// for embedded fonts.
|
|
// 9.8 Font Descriptors (page 281)
|
|
type PdfFontDescriptor struct {
|
|
FontName core.PdfObject
|
|
FontFamily core.PdfObject
|
|
FontStretch core.PdfObject
|
|
FontWeight core.PdfObject
|
|
Flags core.PdfObject
|
|
FontBBox core.PdfObject
|
|
ItalicAngle core.PdfObject
|
|
Ascent core.PdfObject
|
|
Descent core.PdfObject
|
|
Leading core.PdfObject
|
|
CapHeight core.PdfObject
|
|
XHeight core.PdfObject
|
|
StemV core.PdfObject
|
|
StemH core.PdfObject
|
|
AvgWidth core.PdfObject
|
|
MaxWidth core.PdfObject
|
|
MissingWidth core.PdfObject
|
|
FontFile core.PdfObject // PFB
|
|
FontFile2 core.PdfObject // TTF
|
|
FontFile3 core.PdfObject // OTF / CFF
|
|
CharSet core.PdfObject
|
|
|
|
flags int
|
|
missingWidth float64
|
|
*fontFile
|
|
fontFile2 *fonts.TtfType
|
|
|
|
// Additional entries for CIDFonts
|
|
Style core.PdfObject
|
|
Lang core.PdfObject
|
|
FD core.PdfObject
|
|
CIDSet core.PdfObject
|
|
|
|
// Container.
|
|
container *core.PdfIndirectObject
|
|
}
|
|
|
|
// GetDescent returns the Descent of the font `descriptor`.
|
|
func (desc *PdfFontDescriptor) GetDescent() (float64, error) {
|
|
return core.GetNumberAsFloat(desc.Descent)
|
|
}
|
|
|
|
// GetAscent returns the Ascent of the font `descriptor`.
|
|
func (desc *PdfFontDescriptor) GetAscent() (float64, error) {
|
|
return core.GetNumberAsFloat(desc.Ascent)
|
|
}
|
|
|
|
// GetCapHeight returns the CapHeight of the font `descriptor`.
|
|
func (desc *PdfFontDescriptor) GetCapHeight() (float64, error) {
|
|
return core.GetNumberAsFloat(desc.CapHeight)
|
|
}
|
|
|
|
// String returns a string describing the font descriptor.
|
|
func (desc *PdfFontDescriptor) String() string {
|
|
var parts []string
|
|
if desc.FontName != nil {
|
|
parts = append(parts, desc.FontName.String())
|
|
}
|
|
if desc.FontFamily != nil {
|
|
parts = append(parts, desc.FontFamily.String())
|
|
}
|
|
if desc.fontFile != nil {
|
|
parts = append(parts, desc.fontFile.String())
|
|
}
|
|
if desc.fontFile2 != nil {
|
|
parts = append(parts, desc.fontFile2.String())
|
|
}
|
|
parts = append(parts, fmt.Sprintf("FontFile3=%t", desc.FontFile3 != nil))
|
|
|
|
return fmt.Sprintf("FONT_DESCRIPTOR{%s}", strings.Join(parts, ", "))
|
|
}
|
|
|
|
// newPdfFontDescriptorFromPdfObject loads the font descriptor from a core.PdfObject. Can either be a
|
|
// *PdfIndirectObject or a *core.PdfObjectDictionary.
|
|
func newPdfFontDescriptorFromPdfObject(obj core.PdfObject) (*PdfFontDescriptor, error) {
|
|
descriptor := &PdfFontDescriptor{}
|
|
|
|
obj = core.ResolveReference(obj)
|
|
if ind, is := obj.(*core.PdfIndirectObject); is {
|
|
descriptor.container = ind
|
|
obj = ind.PdfObject
|
|
}
|
|
|
|
d, ok := core.GetDict(obj)
|
|
if !ok {
|
|
common.Log.Debug("ERROR: FontDescriptor not given by a dictionary (%T)", obj)
|
|
return nil, core.ErrTypeError
|
|
}
|
|
|
|
if obj := d.Get("FontName"); obj != nil {
|
|
descriptor.FontName = obj
|
|
} else {
|
|
common.Log.Debug("Incompatibility: FontName (Required) missing")
|
|
}
|
|
fontname, _ := core.GetName(descriptor.FontName)
|
|
|
|
if obj := d.Get("Type"); obj != nil {
|
|
oname, is := obj.(*core.PdfObjectName)
|
|
if !is || string(*oname) != "FontDescriptor" {
|
|
common.Log.Debug("Incompatibility: Font descriptor Type invalid (%T) font=%q %T",
|
|
obj, fontname, descriptor.FontName)
|
|
}
|
|
} else {
|
|
common.Log.Trace("Incompatibility: Type (Required) missing. font=%q %T",
|
|
fontname, descriptor.FontName)
|
|
}
|
|
|
|
descriptor.FontFamily = d.Get("FontFamily")
|
|
descriptor.FontStretch = d.Get("FontStretch")
|
|
descriptor.FontWeight = d.Get("FontWeight")
|
|
descriptor.Flags = d.Get("Flags")
|
|
descriptor.FontBBox = d.Get("FontBBox")
|
|
descriptor.ItalicAngle = d.Get("ItalicAngle")
|
|
descriptor.Ascent = d.Get("Ascent")
|
|
descriptor.Descent = d.Get("Descent")
|
|
descriptor.Leading = d.Get("Leading")
|
|
descriptor.CapHeight = d.Get("CapHeight")
|
|
descriptor.XHeight = d.Get("XHeight")
|
|
descriptor.StemV = d.Get("StemV")
|
|
descriptor.StemH = d.Get("StemH")
|
|
descriptor.AvgWidth = d.Get("AvgWidth")
|
|
descriptor.MaxWidth = d.Get("MaxWidth")
|
|
descriptor.MissingWidth = d.Get("MissingWidth")
|
|
descriptor.FontFile = d.Get("FontFile")
|
|
descriptor.FontFile2 = d.Get("FontFile2")
|
|
descriptor.FontFile3 = d.Get("FontFile3")
|
|
descriptor.CharSet = d.Get("CharSet")
|
|
descriptor.Style = d.Get("Style")
|
|
descriptor.Lang = d.Get("Lang")
|
|
descriptor.FD = d.Get("FD")
|
|
descriptor.CIDSet = d.Get("CIDSet")
|
|
|
|
if descriptor.Flags != nil {
|
|
if flags, ok := core.GetIntVal(descriptor.Flags); ok {
|
|
descriptor.flags = flags
|
|
}
|
|
}
|
|
if descriptor.MissingWidth != nil {
|
|
if missingWidth, err := core.GetNumberAsFloat(descriptor.MissingWidth); err == nil {
|
|
descriptor.missingWidth = missingWidth
|
|
}
|
|
}
|
|
|
|
if descriptor.FontFile != nil {
|
|
fontFile, err := newFontFileFromPdfObject(descriptor.FontFile)
|
|
if err != nil {
|
|
return descriptor, err
|
|
}
|
|
common.Log.Trace("fontFile=%s", fontFile)
|
|
descriptor.fontFile = fontFile
|
|
}
|
|
if descriptor.FontFile2 != nil {
|
|
fontFile2, err := fonts.NewFontFile2FromPdfObject(descriptor.FontFile2)
|
|
if err != nil {
|
|
return descriptor, err
|
|
}
|
|
common.Log.Trace("fontFile2=%s", fontFile2.String())
|
|
descriptor.fontFile2 = &fontFile2
|
|
}
|
|
return descriptor, nil
|
|
}
|
|
|
|
// ToPdfObject returns the PdfFontDescriptor as a PDF dictionary inside an indirect object.
|
|
func (desc *PdfFontDescriptor) ToPdfObject() core.PdfObject {
|
|
d := core.MakeDict()
|
|
if desc.container == nil {
|
|
desc.container = &core.PdfIndirectObject{}
|
|
}
|
|
desc.container.PdfObject = d
|
|
|
|
d.Set("Type", core.MakeName("FontDescriptor"))
|
|
|
|
if desc.FontName != nil {
|
|
d.Set("FontName", desc.FontName)
|
|
}
|
|
|
|
if desc.FontFamily != nil {
|
|
d.Set("FontFamily", desc.FontFamily)
|
|
}
|
|
|
|
if desc.FontStretch != nil {
|
|
d.Set("FontStretch", desc.FontStretch)
|
|
}
|
|
|
|
if desc.FontWeight != nil {
|
|
d.Set("FontWeight", desc.FontWeight)
|
|
}
|
|
|
|
if desc.Flags != nil {
|
|
d.Set("Flags", desc.Flags)
|
|
}
|
|
|
|
if desc.FontBBox != nil {
|
|
d.Set("FontBBox", desc.FontBBox)
|
|
}
|
|
|
|
if desc.ItalicAngle != nil {
|
|
d.Set("ItalicAngle", desc.ItalicAngle)
|
|
}
|
|
|
|
if desc.Ascent != nil {
|
|
d.Set("Ascent", desc.Ascent)
|
|
}
|
|
|
|
if desc.Descent != nil {
|
|
d.Set("Descent", desc.Descent)
|
|
}
|
|
|
|
if desc.Leading != nil {
|
|
d.Set("Leading", desc.Leading)
|
|
}
|
|
|
|
if desc.CapHeight != nil {
|
|
d.Set("CapHeight", desc.CapHeight)
|
|
}
|
|
|
|
if desc.XHeight != nil {
|
|
d.Set("XHeight", desc.XHeight)
|
|
}
|
|
|
|
if desc.StemV != nil {
|
|
d.Set("StemV", desc.StemV)
|
|
}
|
|
|
|
if desc.StemH != nil {
|
|
d.Set("StemH", desc.StemH)
|
|
}
|
|
|
|
if desc.AvgWidth != nil {
|
|
d.Set("AvgWidth", desc.AvgWidth)
|
|
}
|
|
|
|
if desc.MaxWidth != nil {
|
|
d.Set("MaxWidth", desc.MaxWidth)
|
|
}
|
|
|
|
if desc.MissingWidth != nil {
|
|
d.Set("MissingWidth", desc.MissingWidth)
|
|
}
|
|
|
|
if desc.FontFile != nil {
|
|
d.Set("FontFile", desc.FontFile)
|
|
}
|
|
|
|
if desc.FontFile2 != nil {
|
|
d.Set("FontFile2", desc.FontFile2)
|
|
}
|
|
|
|
if desc.FontFile3 != nil {
|
|
d.Set("FontFile3", desc.FontFile3)
|
|
}
|
|
|
|
if desc.CharSet != nil {
|
|
d.Set("CharSet", desc.CharSet)
|
|
}
|
|
|
|
if desc.Style != nil {
|
|
d.Set("FontName", desc.FontName)
|
|
}
|
|
|
|
if desc.Lang != nil {
|
|
d.Set("Lang", desc.Lang)
|
|
}
|
|
|
|
if desc.FD != nil {
|
|
d.Set("FD", desc.FD)
|
|
}
|
|
|
|
if desc.CIDSet != nil {
|
|
d.Set("CIDSet", desc.CIDSet)
|
|
}
|
|
|
|
return desc.container
|
|
}
|