unipdf/pdf/model/font.go

587 lines
16 KiB
Go
Raw Normal View History

/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package model
import (
"errors"
"fmt"
"github.com/unidoc/unidoc/common"
. "github.com/unidoc/unidoc/pdf/core"
"github.com/unidoc/unidoc/pdf/internal/cmap"
"github.com/unidoc/unidoc/pdf/model/fonts"
"github.com/unidoc/unidoc/pdf/model/textencoding"
)
// PdfFont represents an underlying font structure which can be of type:
// - Type0
// - Type1
// - TrueType
// etc.
type PdfFont struct {
fontSkeleton
context fonts.Font // The underlying font: Type0, Type1, Truetype, etc..
dict *PdfObjectDictionary
}
// fontSkeleton represents the fields that are common to all PDF fonts
type fontSkeleton struct {
dict *PdfObjectDictionary
subtype string
basefont string
BaseFont PdfObject
Subtype PdfObject
ToUnicode PdfObject
// ToUnicode cmap
UCMap *cmap.CMap
fontDescriptor *PdfFontDescriptor
}
// toFont returns a PdfObjectDictionary for `font`.
// It is for use in font ToPdfObject functions.
// NOTE: The returned dict's SubType is set to `subtype` if font doesn't have a subtype
func (skel fontSkeleton) toDict(subtype string) *PdfObjectDictionary {
if subtype != "" && skel.subtype != "" {
common.Log.Debug("ERROR: toDict. Overriding subtype to %#q %s", subtype, skel)
} else if subtype == "" && skel.subtype == "" {
common.Log.Debug("ERROR: toDict no subtype. font=%s", skel)
} else if skel.subtype == "" {
skel.subtype = subtype
}
d := MakeDict()
d.Set("Type", MakeName("Font"))
d.Set("Subtype", MakeName(skel.subtype))
if skel.BaseFont != nil {
d.Set("BaseFont", skel.BaseFont)
}
if skel.fontDescriptor != nil {
d.Set("FontDescriptor", skel.fontDescriptor.ToPdfObject())
}
if skel.ToUnicode != nil {
d.Set("ToUnicode", skel.ToUnicode)
}
return d
}
// String returns a string that describes `font`.
func (font PdfFont) String() string {
return fmt.Sprintf("%T %s", font.context, font.fontSkeleton.String())
}
// String returns a string that describes `skel`.
func (skel fontSkeleton) String() string {
descriptor := ""
if skel.fontDescriptor != nil {
descriptor = "(has descriptor)"
}
return fmt.Sprintf("%#q %#q %s", skel.subtype, skel.basefont, descriptor)
}
// CharcodeBytesToUnicode converts PDF character codes `data` to a Go unicode string.
func (font PdfFont) CharcodeBytesToUnicode(data []byte) string {
if font.UCMap != nil {
return font.UCMap.CharcodeBytesToUnicode(data)
}
if encoder := font.Encoder(); encoder != nil {
runes := []rune{}
for _, b := range data {
r, ok := encoder.CharcodeToRune(uint16(b))
if !ok {
common.Log.Debug("ERROR: CharcodeBytesToUnicode: No rune. b=0x%04x font=%s encoding=%s",
b, font, encoder)
common.Log.Debug("ERROR: data = [% 02x]=%#q", data, data)
r = '?'
// panic("??") //!@#$
}
runes = append(runes, r)
}
return string(runes)
}
common.Log.Debug("CharcodeBytesToUnicode. Couldn't convert. Returning input bytes. font=%s", font)
return string(data)
}
// isCIDFont returns true if `skel` is a CID font.
func (skel fontSkeleton) isCIDFont() bool {
if skel.subtype == "" {
common.Log.Debug("ERROR: isCIDFont. context is nil. font=%s", skel)
}
isCID := false
switch skel.subtype {
case "Type0", "CIDFontType0", "CIDFontType2":
isCID = true
}
common.Log.Trace("isCIDFont: isCID=%t font=%s", isCID, skel)
return isCID
}
// actualFont returns the Font in font.context
func (font PdfFont) actualFont() fonts.Font {
if font.context == nil {
common.Log.Debug("ERROR: actualFont. context is nil. font=%s", font)
}
switch t := font.context.(type) {
case *pdfFontSimple:
return t
case *pdfFontType0:
return t
case *pdfCIDFontType0:
return t
case *pdfCIDFontType2:
return t
case fonts.FontCourier:
return t
case fonts.FontCourierBold:
return t
case fonts.FontCourierBoldOblique:
return t
case fonts.FontCourierOblique:
return t
case fonts.FontHelvetica:
return t
case fonts.FontHelveticaBold:
return t
case fonts.FontHelveticaBoldOblique:
return t
case fonts.FontHelveticaOblique:
return t
case fonts.FontTimesRoman:
return t
case fonts.FontTimesBold:
return t
case fonts.FontTimesBoldItalic:
return t
case fonts.FontTimesItalic:
return t
case fonts.FontSymbol:
return t
case fonts.FontZapfDingbats:
return t
default:
common.Log.Debug("ERROR: actualFont. Unknown font type %t. font=%s", t, font)
return nil
}
}
// Encoder returns the font's text encoder.
func (font PdfFont) Encoder() textencoding.TextEncoder {
t := font.actualFont()
if t == nil {
common.Log.Debug("ERROR: Encoder not implemented for font type=%#T", font.context)
// XXX: Should we return a default encoding?
return nil
}
return t.Encoder()
}
// SetEncoder sets the encoding for the underlying font.
// !@#$ Is this only possible for simple fonts?
func (font PdfFont) SetEncoder(encoder textencoding.TextEncoder) {
t := font.actualFont()
if t == nil {
common.Log.Debug("ERROR: SetEncoder. Not implemented for font type=%#T", font.context)
return
}
t.SetEncoder(encoder)
}
// GetGlyphCharMetrics returns the specified char metrics for a specified glyph name.
func (font PdfFont) GetGlyphCharMetrics(glyph string) (fonts.CharMetrics, bool) {
t := font.actualFont()
if t == nil {
common.Log.Debug("ERROR: GetGlyphCharMetrics Not implemented for font type=%#T", font.context)
return fonts.CharMetrics{}, false
}
return t.GetGlyphCharMetrics(glyph)
}
// NewPdfFontFromPdfObject loads a PdfFont from a dictionary. If there is a problem an error is
// returned.
func NewPdfFontFromPdfObject(fontObj PdfObject) (*PdfFont, error) {
return newPdfFontFromPdfObject(fontObj, true)
}
// newPdfFontFromPdfObject loads a PdfFont from a dictionary. If there is a problem an error is
// returned.
// The allowType0 indicates whether loading Type0 font should be supported. Flag used to avoid
// cyclical loading.
func newPdfFontFromPdfObject(fontObj PdfObject, allowType0 bool) (*PdfFont, error) {
skeleton, err := newFontSkeletonFromPdfObject(fontObj)
if err != nil {
return nil, err
}
font := &PdfFont{fontSkeleton: *skeleton}
switch skeleton.subtype {
case "Type0":
if !allowType0 {
common.Log.Debug("ERROR: Loading type0 not allowed. font=%s", font)
return nil, errors.New("Cyclical type0 loading")
}
type0font, err := newPdfFontType0FromPdfObject(fontObj, skeleton)
if err != nil {
common.Log.Debug("ERROR: While loading Type0 font. font=%s err=%v", font, err)
return nil, err
}
font.context = type0font
case "Type1", "Type3", "MMType1", "TrueType": // !@#$
if std, ok := fonts.Standard14Fonts[font.basefont]; ok && font.subtype == "Type1" {
font.context = std
} else {
simplefont, err := newSimpleFontFromPdfObject(fontObj, skeleton)
if err != nil {
common.Log.Debug("ERROR: While loading simple font: font=%s err=%v", font, err)
return nil, err
}
font.context = simplefont
}
case "CIDFontType0":
cidfont, err := newPdfCIDFontType0FromPdfObject(fontObj, skeleton)
if err != nil {
common.Log.Debug("ERROR: While loading cid font type0 font: %v", err)
return nil, err
}
font.context = cidfont
case "CIDFontType2":
cidfont, err := newPdfCIDFontType2FromPdfObject(fontObj, skeleton)
if err != nil {
common.Log.Debug("ERROR: While loading cid font type2 font. font=%s err=%v", font, err)
return nil, err
}
font.context = cidfont
default:
common.Log.Debug("ERROR: Unsupported font type: font=%s", font)
return nil, ErrUnsupportedFont
}
return font, nil
}
// newFontSkeletonFromPdfObject loads a fontSkeleton from a dictionary. If there is a problem an error is
// returned.
// The allowType0 indicates whether loading Type0 font should be supported. Flag used to avoid
// cyclical loading.
func newFontSkeletonFromPdfObject(fontObj PdfObject) (*fontSkeleton, error) {
font := &fontSkeleton{}
dictObj := fontObj
if ind, is := fontObj.(*PdfIndirectObject); is {
dictObj = ind.PdfObject
}
d, ok := dictObj.(*PdfObjectDictionary)
if !ok {
common.Log.Debug("ERROR: Font not given by a dictionary (%T)", fontObj)
return nil, ErrUnsupportedFont
}
font.dict = d
basefont, err := GetName(d.Get("BaseFont"))
if err == nil {
font.basefont = basefont
font.BaseFont = d.Get("BaseFont")
}
if obj := d.Get("Type"); obj != nil {
oname, is := obj.(*PdfObjectName)
if !is || string(*oname) != "Font" {
common.Log.Debug("ERROR: Font Incompatibility. Type=%q Should be %q", string(*oname), "Font")
return nil, ErrRangeError
}
} else {
common.Log.Debug("ERROR: Font Incompatibility. Type (Required) missing")
return nil, ErrRequiredAttributeMissing
}
obj := d.Get("Subtype")
if obj == nil {
common.Log.Debug("ERROR: Font Incompatibility. Subtype (Required) missing")
return nil, ErrRequiredAttributeMissing
}
subtype, err := GetName(TraceToDirectObject(obj))
if err != nil {
common.Log.Debug("ERROR: Font Incompatibility. subtype not a name (%T) font=%s", obj, font)
return nil, ErrTypeError
}
font.subtype = subtype
obj = d.Get("FontDescriptor")
if obj != nil {
fontDescriptor, err := newPdfFontDescriptorFromPdfObject(obj)
if err != nil {
common.Log.Debug("ERROR: Bad font descriptor")
return nil, ErrRequiredAttributeMissing
}
if err == nil {
font.fontDescriptor = fontDescriptor
}
}
font.ToUnicode = TraceToDirectObject(d.Get("ToUnicode"))
if font.ToUnicode != nil {
codemap, err := toUnicodeToCmap(font.ToUnicode, font.isCIDFont())
if err != nil {
return nil, err
}
font.UCMap = codemap
}
return font, nil
}
// ToPdfObject converts the PdfFont object to its PDF representation.
func (font PdfFont) ToPdfObject() PdfObject {
if t := font.actualFont(); t != nil {
return t.ToPdfObject()
}
common.Log.Debug("ERROR: ToPdfObject Not implemented for font type=%#T. Returning null object",
font.context)
return MakeNull()
}
// toUnicodeToCmap returns a CMap of `toUnicode` if it exists
// 9.10.3 ToUnicode CMaps (page 29)
// The CMap defined in the ToUnicode entry of the font dictionary shall follow the syntax for CMaps
// This CMap differs from an ordinary one in these ways:
// • The only pertinent entry in the CMap stream dictionary (see Table 120) is UseCMap, which may be
// used if the CMap is based on another ToUnicode CMap.
// • The CMap file shall contain begincodespacerange and endcodespacerange operators that are
// consistent with the encoding that the font uses. In particular, for a simple font, the
// codespace shall be one byte long.
// • It shall use the beginbfchar, endbfchar, beginbfrange, and endbfrange operators to define the
// mapping from character codes to Unicode character sequences expressed in UTF-16BE encoding
func toUnicodeToCmap(toUnicode PdfObject, isCID bool) (*cmap.CMap, error) {
toUnicodeStream, ok := toUnicode.(*PdfObjectStream)
if !ok {
common.Log.Debug("ERROR: toUnicodeToCmap: Not a stream (%T)", toUnicode)
return nil, errors.New("Invalid ToUnicode entry - not a stream")
}
data, err := DecodeStream(toUnicodeStream)
if err != nil {
return nil, err
}
return cmap.LoadCmapFromData(data, isCID)
}
// PdfFontDescriptor specifies metrics and other attributes of a font and can refer to a FontFile
// for embedded fonts.
// 9.8 Font Descriptors (page 281)
type PdfFontDescriptor struct {
FontName PdfObject
FontFamily PdfObject
FontStretch PdfObject
FontWeight PdfObject
Flags PdfObject
FontBBox PdfObject
ItalicAngle PdfObject
Ascent PdfObject
Descent PdfObject
Leading PdfObject
CapHeight PdfObject
XHeight PdfObject
StemV PdfObject
StemH PdfObject
AvgWidth PdfObject
MaxWidth PdfObject
MissingWidth PdfObject
FontFile PdfObject
FontFile2 PdfObject
FontFile3 PdfObject
CharSet PdfObject
// Additional entries for CIDFonts
Style PdfObject
Lang PdfObject
FD PdfObject
CIDSet PdfObject
// Container.
container *PdfIndirectObject
}
// newPdfFontDescriptorFromPdfObject loads the font descriptor from a PdfObject. Can either be a
// *PdfIndirectObject or a *PdfObjectDictionary.
func newPdfFontDescriptorFromPdfObject(obj PdfObject) (*PdfFontDescriptor, error) {
descriptor := &PdfFontDescriptor{}
if ind, is := obj.(*PdfIndirectObject); is {
descriptor.container = ind
obj = ind.PdfObject
}
d, ok := obj.(*PdfObjectDictionary)
if !ok {
common.Log.Debug("ERROR: FontDescriptor not given by a dictionary (%T)", obj)
return nil, ErrTypeError
}
if obj := d.Get("FontName"); obj != nil {
descriptor.FontName = obj
} else {
common.Log.Debug("Incompatibility: FontName (Required) missing")
}
fontname, _ := GetName(descriptor.FontName)
if obj := d.Get("Type"); obj != nil {
oname, is := obj.(*PdfObjectName)
if !is || string(*oname) != "FontDescriptor" {
common.Log.Debug("Incompatibility: Font descriptor Type invalid (%T) font=%q %T",
obj, fontname, descriptor.FontName)
}
} else {
common.Log.Trace("Incompatibility: Type (Required) missing. font=%q %T",
fontname, descriptor.FontName)
// return nil, errors.New("$$$$$")
}
descriptor.FontFamily = d.Get("FontFamily")
descriptor.FontStretch = d.Get("FontStretch")
descriptor.FontWeight = d.Get("FontWeight")
descriptor.Flags = d.Get("Flags")
descriptor.FontBBox = d.Get("FontBBox")
descriptor.ItalicAngle = d.Get("ItalicAngle")
descriptor.Ascent = d.Get("Ascent")
descriptor.Descent = d.Get("Descent")
descriptor.Leading = d.Get("Leading")
descriptor.CapHeight = d.Get("CapHeight")
descriptor.XHeight = d.Get("XHeight")
descriptor.StemV = d.Get("StemV")
descriptor.StemH = d.Get("StemH")
descriptor.AvgWidth = d.Get("AvgWidth")
descriptor.MaxWidth = d.Get("MaxWidth")
descriptor.MissingWidth = d.Get("MissingWidth")
descriptor.FontFile = d.Get("FontFile")
descriptor.FontFile2 = d.Get("FontFile2")
descriptor.FontFile3 = d.Get("FontFile3")
descriptor.CharSet = d.Get("CharSet")
descriptor.Style = d.Get("Style")
descriptor.Lang = d.Get("Lang")
descriptor.FD = d.Get("FD")
descriptor.CIDSet = d.Get("CIDSet")
return descriptor, nil
}
// ToPdfObject returns the PdfFontDescriptor as a PDF dictionary inside an indirect object.
func (this *PdfFontDescriptor) ToPdfObject() PdfObject {
d := MakeDict()
if this.container == nil {
this.container = &PdfIndirectObject{}
}
this.container.PdfObject = d
d.Set("Type", MakeName("FontDescriptor"))
if this.FontName != nil {
d.Set("FontName", this.FontName)
}
if this.FontFamily != nil {
d.Set("FontFamily", this.FontFamily)
}
if this.FontStretch != nil {
d.Set("FontStretch", this.FontStretch)
}
if this.FontWeight != nil {
d.Set("FontWeight", this.FontWeight)
}
if this.Flags != nil {
d.Set("Flags", this.Flags)
}
if this.FontBBox != nil {
d.Set("FontBBox", this.FontBBox)
}
if this.ItalicAngle != nil {
d.Set("ItalicAngle", this.ItalicAngle)
}
if this.Ascent != nil {
d.Set("Ascent", this.Ascent)
}
if this.Descent != nil {
d.Set("Descent", this.Descent)
}
if this.Leading != nil {
d.Set("Leading", this.Leading)
}
if this.CapHeight != nil {
d.Set("CapHeight", this.CapHeight)
}
if this.XHeight != nil {
d.Set("XHeight", this.XHeight)
}
if this.StemV != nil {
d.Set("StemV", this.StemV)
}
if this.StemH != nil {
d.Set("StemH", this.StemH)
}
if this.AvgWidth != nil {
d.Set("AvgWidth", this.AvgWidth)
}
if this.MaxWidth != nil {
d.Set("MaxWidth", this.MaxWidth)
}
if this.MissingWidth != nil {
d.Set("MissingWidth", this.MissingWidth)
}
if this.FontFile != nil {
d.Set("FontFile", this.FontFile)
}
if this.FontFile2 != nil {
d.Set("FontFile2", this.FontFile2)
}
if this.FontFile3 != nil {
d.Set("FontFile3", this.FontFile3)
}
if this.CharSet != nil {
d.Set("CharSet", this.CharSet)
}
if this.Style != nil {
d.Set("FontName", this.FontName)
}
if this.Lang != nil {
d.Set("Lang", this.Lang)
}
if this.FD != nil {
d.Set("FD", this.FD)
}
if this.CIDSet != nil {
d.Set("CIDSet", this.CIDSet)
}
return this.container
}