diff --git a/pdf/model/font.go b/pdf/model/font.go index 7dd064d2..8eb0c80a 100644 --- a/pdf/model/font.go +++ b/pdf/model/font.go @@ -22,27 +22,23 @@ import ( // - Type1 // - TrueType // etc. -// It also holds the elements common to all fonts in fontSkeleton. -// XXX: The idea behind fontSkeleton is to avoid replicating the commmon font field parsing code -// in all fonts. Is there a better way of doing this? type PdfFont struct { - fontSkeleton // The fields common to all fonts - context fonts.Font // The underlying font: Type0, Type1, Truetype, etc.. + context fonts.Font // The underlying font: Type0, Type1, Truetype, etc.. } // String returns a string that describes `font`. func (font PdfFont) String() string { - return fmt.Sprintf("%T %s", font.context, font.fontSkeleton.String()) + return fmt.Sprintf("FONT{%T %s}", font.context, font.baseFields().String()) } // BaseFont returns the font's "BaseFont" field. func (font PdfFont) BaseFont() string { - return font.basefont + return font.baseFields().basefont } // Subtype returns the font's "Subtype" field. func (font PdfFont) Subtype() string { - subtype := font.subtype + subtype := font.baseFields().subtype if t, ok := font.context.(*pdfFontType0); ok { subtype = fmt.Sprintf("%s:%s", subtype, t.DescendantFont.Subtype()) } @@ -51,26 +47,20 @@ func (font PdfFont) Subtype() string { // ToUnicode returns the name of the font's "ToUnicode" field if there is one, or "" if there isn't. func (font PdfFont) ToUnicode() string { - if font.toUnicodeCmap == nil { + if font.baseFields().toUnicodeCmap == nil { return "" } - return font.toUnicodeCmap.Name() + return font.baseFields().toUnicodeCmap.Name() } // NewStandard14Font returns the standard 14 font named `basefont` as a *PdfFont, or an error if it // `basefont` is not one the standard 14 font names. func NewStandard14Font(basefont string) (*PdfFont, error) { - std, ok := fonts.Standard14Fonts[basefont] + std, ok := standard14Fonts[basefont] if !ok { return nil, ErrFontNotSupported } - return &PdfFont{ - fontSkeleton: fontSkeleton{ - subtype: "Type1", - basefont: basefont, - }, - context: std, - }, nil + return &PdfFont{context: &std}, nil } // NewPdfFontFromPdfObject loads a PdfFont from the dictionary `fontObj`. If there is a problem an @@ -84,43 +74,43 @@ func NewPdfFontFromPdfObject(fontObj core.PdfObject) (*PdfFont, error) { // The allowType0 flag indicates whether loading Type0 font should be supported. This is used to // avoid cyclical loading. func newPdfFontFromPdfObject(fontObj core.PdfObject, allowType0 bool) (*PdfFont, error) { - skeleton, err := newFontSkeletonFromPdfObject(fontObj) + d, base, err := newFontBaseFieldsFromPdfObject(fontObj) if err != nil { return nil, err } - font := &PdfFont{fontSkeleton: *skeleton} - switch skeleton.subtype { + font := &PdfFont{} + switch base.subtype { case "Type0": if !allowType0 { common.Log.Debug("ERROR: Loading type0 not allowed. font=%s", font) return nil, errors.New("Cyclical type0 loading") } - type0font, err := newPdfFontType0FromPdfObject(skeleton) + type0font, err := newPdfFontType0FromPdfObject(d, base) if err != nil { - common.Log.Debug("ERROR: While loading Type0 font. font=%s err=%v", font, err) + common.Log.Debug("ERROR: While loading Type0 font. font=%s err=%v", base, err) return nil, err } font.context = type0font case "Type1", "Type3", "MMType1", "TrueType": // !@#$ var simplefont *pdfFontSimple - if std, ok := fonts.Standard14Fonts[font.basefont]; ok && font.subtype == "Type1" { - font.context = std + if std, ok := standard14Fonts[base.basefont]; ok && base.subtype == "Type1" { + font.context = &std stdObj := core.TraceToDirectObject(std.ToPdfObject()) - stdSkeleton, err := newFontSkeletonFromPdfObject(stdObj) + d, stdBase, err := newFontBaseFieldsFromPdfObject(stdObj) if err != nil { - common.Log.Debug("ERROR: Bad Standard14\n\tfont=%s\n\tstd=%+v", font, std) + common.Log.Debug("ERROR: Bad Standard14\n\tfont=%s\n\tstd=%+v", base, std) return nil, err } - simplefont, err = newSimpleFontFromPdfObject(stdSkeleton, true) + simplefont, err = newSimpleFontFromPdfObject(d, stdBase, true) if err != nil { - common.Log.Debug("ERROR: Bad Standard14\n\tfont=%s\n\tstd=%+v", font, std) + common.Log.Debug("ERROR: Bad Standard14\n\tfont=%s\n\tstd=%+v", base, std) return nil, err } } else { - simplefont, err = newSimpleFontFromPdfObject(skeleton, false) + simplefont, err = newSimpleFontFromPdfObject(d, base, false) if err != nil { - common.Log.Debug("ERROR: While loading simple font: font=%s err=%v", font, err) + common.Log.Debug("ERROR: While loading simple font: font=%s err=%v", base, err) return nil, err } } @@ -130,22 +120,22 @@ func newPdfFontFromPdfObject(fontObj core.PdfObject, allowType0 bool) (*PdfFont, } font.context = simplefont case "CIDFontType0": - cidfont, err := newPdfCIDFontType0FromPdfObject(skeleton) + cidfont, err := newPdfCIDFontType0FromPdfObject(d, base) if err != nil { common.Log.Debug("ERROR: While loading cid font type0 font: %v", err) return nil, err } font.context = cidfont case "CIDFontType2": - cidfont, err := newPdfCIDFontType2FromPdfObject(skeleton) + cidfont, err := newPdfCIDFontType2FromPdfObject(d, base) if err != nil { - common.Log.Debug("ERROR: While loading cid font type2 font. font=%s err=%v", font, err) + common.Log.Debug("ERROR: While loading cid font type2 font. font=%s err=%v", base, err) return nil, err } font.context = cidfont default: - common.Log.Debug("ERROR: Unsupported font type: font=%s", font) - return nil, fmt.Errorf("Unsupported font type: font=%s", font) + common.Log.Debug("ERROR: Unsupported font type: font=%s", base) + return nil, fmt.Errorf("Unsupported font type: font=%s", base) } return font, nil @@ -165,7 +155,7 @@ func (font PdfFont) CharcodeBytesToUnicode(data []byte) (string, int, int) { common.Log.Debug("showText: data=[% 02x]=%#q", data, data) charcodes := make([]uint16, 0, len(data)+len(data)%2) - if font.isCIDFont() { + if font.baseFields().isCIDFont() { if len(data) == 1 { data = []byte{0, data[0]} } @@ -186,8 +176,8 @@ func (font PdfFont) CharcodeBytesToUnicode(data []byte) (string, int, int) { charstrings := make([]string, 0, len(charcodes)) numMisses := 0 for _, code := range charcodes { - if font.toUnicodeCmap != nil { - r, ok := font.toUnicodeCmap.CharcodeToUnicode2(cmap.CharCode(code)) + if font.baseFields().toUnicodeCmap != nil { + r, ok := font.baseFields().toUnicodeCmap.CharcodeToUnicode2(cmap.CharCode(code)) if ok { charstrings = append(charstrings, r) continue @@ -203,7 +193,7 @@ func (font PdfFont) CharcodeBytesToUnicode(data []byte) (string, int, int) { common.Log.Debug("ERROR: No rune. code=0x%04x data=[% 02x]=%#q charcodes=[% 04x] CID=%t\n"+ "\tfont=%s\n\tencoding=%s", - code, data, data, charcodes, font.isCIDFont(), font, encoder) + code, data, data, charcodes, font.baseFields().isCIDFont(), font, encoder) numMisses++ charstrings = append(charstrings, cmap.MissingCodeString) } @@ -277,40 +267,36 @@ func (font PdfFont) actualFont() fonts.Font { return t case fonts.FontCourier: return t - case fonts.FontCourierBold: - return t - case fonts.FontCourierBoldOblique: - return t - case fonts.FontCourierOblique: - return t - case fonts.FontHelvetica: - return t - case fonts.FontHelveticaBold: - return t - case fonts.FontHelveticaBoldOblique: - return t - case fonts.FontHelveticaOblique: - return t - case fonts.FontTimesRoman: - return t - case fonts.FontTimesBold: - return t - case fonts.FontTimesBoldItalic: - return t - case fonts.FontTimesItalic: - return t - case fonts.FontSymbol: - return t - case fonts.FontZapfDingbats: - return t default: common.Log.Debug("ERROR: actualFont. Unknown font type %t. font=%s", t, font) return nil } } -// fontSkeleton represents the fields that are common to all PDF fonts. -type fontSkeleton struct { +// baseFields returns the fields of `font`.context that are common to all PDF fonts. +func (font PdfFont) baseFields() *fontCommon { + if font.context == nil { + common.Log.Debug("ERROR: baseFields. context is nil.") + panic("RRRR") + } + switch t := font.context.(type) { + case *pdfFontSimple: + return t.baseFields() + case *pdfFontType0: + return t.baseFields() + case *pdfCIDFontType0: + return t.baseFields() + case *pdfCIDFontType2: + return t.baseFields() + default: + //common.Log.Error("ERROR: base. Unknown font type %t. font=%s", t, font.String()) + panic(fmt.Errorf("ERROR: base. Unknown font type %t. ", t)) + return nil + } +} + +// fontCommon represents the fields that are common to all PDF fonts. +type fontCommon struct { // All fonts have these fields basefont string // The font's "BaseFont" field. subtype string // The font's "Subtype" field. @@ -322,68 +308,65 @@ type fontSkeleton struct { toUnicodeCmap *cmap.CMap // Computed from "ToUnicode" fontDescriptor *PdfFontDescriptor // Computed from "FontDescriptor" - // This is an internal implementation detail. It is passed to specific font types so they can parse it. - dict *core.PdfObjectDictionary - // objectNumber helps us find the font in the PDF being processed. This helps with debugging objectNumber int64 } -// toFont returns a core.PdfObjectDictionary for `skel`. +// asPdfObjectDictionary returns `base` as a core.PdfObjectDictionary. // It is for use in font ToPdfObject functions. -// NOTE: The returned dict's "Subtype" field is set to `subtype` if `skel` doesn't have a subtype. -func (skel fontSkeleton) toDict(subtype string) *core.PdfObjectDictionary { +// NOTE: The returned dict's "Subtype" field is set to `subtype` if `base` doesn't have a subtype. +func (base fontCommon) asPdfObjectDictionary(subtype string) *core.PdfObjectDictionary { - if subtype != "" && skel.subtype != "" && subtype != skel.subtype { - common.Log.Debug("ERROR: toDict. Overriding subtype to %#q %s", subtype, skel) - } else if subtype == "" && skel.subtype == "" { - common.Log.Debug("ERROR: toDict no subtype. font=%s", skel) - } else if skel.subtype == "" { - skel.subtype = subtype + if subtype != "" && base.subtype != "" && subtype != base.subtype { + common.Log.Debug("ERROR: asPdfObjectDictionary. Overriding subtype to %#q %s", subtype, base) + } else if subtype == "" && base.subtype == "" { + common.Log.Debug("ERROR: asPdfObjectDictionary no subtype. font=%s", base) + } else if base.subtype == "" { + base.subtype = subtype } d := core.MakeDict() d.Set("Type", core.MakeName("Font")) - d.Set("BaseFont", core.MakeName(skel.basefont)) - d.Set("Subtype", core.MakeName(skel.subtype)) + d.Set("BaseFont", core.MakeName(base.basefont)) + d.Set("Subtype", core.MakeName(base.subtype)) - if skel.fontDescriptor != nil { - d.Set("FontDescriptor", skel.fontDescriptor.ToPdfObject()) + if base.fontDescriptor != nil { + d.Set("FontDescriptor", base.fontDescriptor.ToPdfObject()) } - if skel.toUnicode != nil { - d.Set("ToUnicode", skel.toUnicode) + if base.toUnicode != nil { + d.Set("ToUnicode", base.toUnicode) } return d } -// String returns a string that describes `skel`. -func (skel fontSkeleton) String() string { +// String returns a string that describes `base`. +func (base fontCommon) String() string { descriptor := "" - if skel.fontDescriptor != nil { - descriptor = skel.fontDescriptor.String() + if base.fontDescriptor != nil { + descriptor = base.fontDescriptor.String() } - return fmt.Sprintf("FONT{%#q %#q obj=%d %s}", skel.subtype, skel.basefont, skel.objectNumber, descriptor) + return fmt.Sprintf("FONT{%#q %#q obj=%d %s}", base.subtype, base.basefont, base.objectNumber, descriptor) } -// isCIDFont returns true if `skel` is a CID font. -func (skel fontSkeleton) isCIDFont() bool { - if skel.subtype == "" { - common.Log.Debug("ERROR: isCIDFont. context is nil. font=%s", skel) +// isCIDFont returns true if `base` is a CID font. +func (base fontCommon) isCIDFont() bool { + if base.subtype == "" { + common.Log.Debug("ERROR: isCIDFont. context is nil. font=%s", base) } isCID := false - switch skel.subtype { + switch base.subtype { case "Type0", "CIDFontType0", "CIDFontType2": isCID = true } - common.Log.Trace("isCIDFont: isCID=%t font=%s", isCID, skel) + common.Log.Trace("isCIDFont: isCID=%t font=%s", isCID, base) return isCID } -// newFontSkeletonFromPdfObject loads a fontSkeleton from a dictionary. If there is a problem an -// error is returned. -// The fontSkeleton is the group of fields common to all PDF fonts. -func newFontSkeletonFromPdfObject(fontObj core.PdfObject) (*fontSkeleton, error) { - font := &fontSkeleton{} +// newFontBaseFieldsFromPdfObject returns `fontObj` as a dictionary the common fields from that +// dictionary in the fontCommon return. If there is a problem an error is returned. +// The fontCommon is the group of fields common to all PDF fonts. +func newFontBaseFieldsFromPdfObject(fontObj core.PdfObject) (*core.PdfObjectDictionary, *fontCommon, error) { + font := &fontCommon{} if obj, ok := fontObj.(*core.PdfIndirectObject); ok { font.objectNumber = obj.ObjectNumber @@ -394,36 +377,35 @@ func newFontSkeletonFromPdfObject(fontObj core.PdfObject) (*fontSkeleton, error) d, ok := dictObj.(*core.PdfObjectDictionary) if !ok { common.Log.Debug("ERROR: Font not given by a dictionary (%T)", fontObj) - return nil, ErrFontNotSupported + return nil, nil, ErrFontNotSupported } - font.dict = d objtype, err := core.GetName(core.TraceToDirectObject(d.Get("Type"))) if err != nil { common.Log.Debug("ERROR: Font Incompatibility. Type (Required) missing") - return nil, ErrRequiredAttributeMissing + return nil, nil, ErrRequiredAttributeMissing } if objtype != "Font" { common.Log.Debug("ERROR: Font Incompatibility. Type=%q. Should be %q.", objtype, "Font") - return nil, core.ErrTypeError + return nil, nil, core.ErrTypeError } subtype, err := core.GetName(core.TraceToDirectObject(d.Get("Subtype"))) if err != nil { common.Log.Debug("ERROR: Font Incompatibility. Subtype (Required) missing") - return nil, ErrRequiredAttributeMissing + return nil, nil, ErrRequiredAttributeMissing } font.subtype = subtype if subtype == "Type3" { common.Log.Debug("ERROR: Type 3 font not supprted. d=%s", d) - return nil, ErrFontNotSupported + return nil, nil, ErrFontNotSupported } basefont, err := core.GetName(core.TraceToDirectObject(d.Get("BaseFont"))) if err != nil { common.Log.Debug("ERROR: Font Incompatibility. BaseFont (Required) missing") - return nil, ErrRequiredAttributeMissing + return nil, nil, ErrRequiredAttributeMissing } font.basefont = basefont @@ -432,7 +414,7 @@ func newFontSkeletonFromPdfObject(fontObj core.PdfObject) (*fontSkeleton, error) fontDescriptor, err := newPdfFontDescriptorFromPdfObject(obj) if err != nil { common.Log.Debug("ERROR: Bad font descriptor. err=%v", err) - return nil, err + return nil, nil, err } font.fontDescriptor = fontDescriptor } @@ -441,12 +423,12 @@ func newFontSkeletonFromPdfObject(fontObj core.PdfObject) (*fontSkeleton, error) if font.toUnicode != nil { codemap, err := toUnicodeToCmap(font.toUnicode, font.isCIDFont()) if err != nil { - return nil, err + return nil, nil, err } font.toUnicodeCmap = codemap } - return font, nil + return d, font, nil } // toUnicodeToCmap returns a CMap of `toUnicode` if it exists diff --git a/pdf/model/font_composite.go b/pdf/model/font_composite.go index d60acdd2..a9ab3f3e 100644 --- a/pdf/model/font_composite.go +++ b/pdf/model/font_composite.go @@ -7,6 +7,7 @@ import ( "github.com/unidoc/unidoc/common" "github.com/unidoc/unidoc/pdf/core" + "github.com/unidoc/unidoc/pdf/internal/cmap" "github.com/unidoc/unidoc/pdf/model/fonts" "github.com/unidoc/unidoc/pdf/model/textencoding" ) @@ -87,13 +88,51 @@ import ( // associated CIDFont is called its descendant. type pdfFontType0 struct { container *core.PdfIndirectObject - *fontSkeleton + + // These fields are common to all PDF fonts. + + basefont string // The font's "BaseFont" field. + subtype string // The font's "Subtype" field. + + // These are optional fields in the PDF font + toUnicode core.PdfObject // The stream containing toUnicodeCmap. We keep it around for ToPdfObject. + + // These objects are computed from optional fields in the PDF font + toUnicodeCmap *cmap.CMap // Computed from "ToUnicode" + fontDescriptor *PdfFontDescriptor // Computed from "FontDescriptor" + + // objectNumber helps us find the font in the PDF being processed. This helps with debugging + objectNumber int64 + + // These fields are specific to Type 0 fonts. encoder textencoding.TextEncoder Encoding core.PdfObject DescendantFont *PdfFont // Can be either CIDFontType0 or CIDFontType2 font. } +// pdfFontType0FromSkeleton returns a pdfFontType0 with its common fields initalized. +func pdfFontType0FromSkeleton(base *fontCommon) *pdfFontType0 { + return &pdfFontType0{ + basefont: base.basefont, + subtype: base.subtype, + toUnicode: base.toUnicode, + fontDescriptor: base.fontDescriptor, + objectNumber: base.objectNumber, + } +} + +// baseFields returns the fields of `font` that are common to all PDF fonts. +func (font *pdfFontType0) baseFields() *fontCommon { + return &fontCommon{ + basefont: font.basefont, + subtype: font.subtype, + toUnicode: font.toUnicode, + fontDescriptor: font.fontDescriptor, + objectNumber: font.objectNumber, + } +} + // GetGlyphCharMetrics returns the character metrics for the specified glyph. A bool flag is // returned to indicate whether or not the entry was found in the glyph to charcode mapping. func (font pdfFontType0) GetGlyphCharMetrics(glyph string) (fonts.CharMetrics, bool) { @@ -119,7 +158,8 @@ func (font *pdfFontType0) ToPdfObject() core.PdfObject { if font.container == nil { font.container = &core.PdfIndirectObject{} } - d := font.toDict("Type0") + d := font.baseFields().asPdfObjectDictionary("Type0") + font.container.PdfObject = d if font.encoder != nil { @@ -133,16 +173,14 @@ func (font *pdfFontType0) ToPdfObject() core.PdfObject { return font.container } -// newPdfFontType0FromPdfObject makes a pdfFontType0 based on the input `d` in skeleton. +// newPdfFontType0FromPdfObject makes a pdfFontType0 based on the input `d` in base. // If a problem is encountered, an error is returned. -func newPdfFontType0FromPdfObject(skeleton *fontSkeleton) (*pdfFontType0, error) { - - d := skeleton.dict +func newPdfFontType0FromPdfObject(d *core.PdfObjectDictionary, base *fontCommon) (*pdfFontType0, error) { // DescendantFonts. arr, err := core.GetArray(core.TraceToDirectObject(d.Get("DescendantFonts"))) if err != nil { - common.Log.Debug("ERROR: Invalid DescendantFonts - not an array %s", skeleton) + common.Log.Debug("ERROR: Invalid DescendantFonts - not an array %s", base) return nil, core.ErrRangeError } if len(arr) != 1 { @@ -151,14 +189,12 @@ func newPdfFontType0FromPdfObject(skeleton *fontSkeleton) (*pdfFontType0, error) } df, err := newPdfFontFromPdfObject(arr[0], false) if err != nil { - common.Log.Debug("ERROR: Failed loading descendant font: err=%v %s", err, skeleton) + common.Log.Debug("ERROR: Failed loading descendant font: err=%v %s", err, base) return nil, err } - font := &pdfFontType0{ - fontSkeleton: skeleton, - DescendantFont: df, - } + font := pdfFontType0FromSkeleton(base) + font.DescendantFont = df encoderName, err := core.GetName(core.TraceToDirectObject(d.Get("Encoding"))) // XXX: FIXME This is not valid if encoder is not Identity-H !@#$ @@ -172,14 +208,51 @@ func newPdfFontType0FromPdfObject(skeleton *fontSkeleton) (*pdfFontType0, error) // XXX: This is a stub. type pdfCIDFontType0 struct { container *core.PdfIndirectObject - skeleton *fontSkeleton // Elements common to all font types. + + // These fields are common to all PDF fonts. + + basefont string // The font's "BaseFont" field. + subtype string // The font's "Subtype" field. + + // These are optional fields in the PDF font + toUnicode core.PdfObject // The stream containing toUnicodeCmap. We keep it around for ToPdfObject. + + // These objects are computed from optional fields in the PDF font + toUnicodeCmap *cmap.CMap // Computed from "ToUnicode" + fontDescriptor *PdfFontDescriptor // Computed from "FontDescriptor" + + // objectNumber helps us find the font in the PDF being processed. This helps with debugging + objectNumber int64 + + // These fields are specific to Type 0 fonts. encoder textencoding.TextEncoder // Table 117 – Entries in a CIDFont dictionary (page 269) CIDSystemInfo core.PdfObject // (Required) Dictionary that defines the character collection of the CIDFont. See Table 116. FontDescriptor core.PdfObject // (Required) Describes the CIDFont’s default metrics other than its glyph widths +} +// pdfCIDFontType0FromSkeleton returns a pdfCIDFontType0 with its common fields initalized. +func pdfCIDFontType0FromSkeleton(base *fontCommon) *pdfCIDFontType0 { + return &pdfCIDFontType0{ + basefont: base.basefont, + subtype: base.subtype, + toUnicode: base.toUnicode, + fontDescriptor: base.fontDescriptor, + objectNumber: base.objectNumber, + } +} + +// baseFields returns the fields of `font` that are common to all PDF fonts. +func (font *pdfCIDFontType0) baseFields() *fontCommon { + return &fontCommon{ + basefont: font.basefont, + subtype: font.subtype, + toUnicode: font.toUnicode, + fontDescriptor: font.fontDescriptor, + objectNumber: font.objectNumber, + } } // Encoder returns the font's text encoder. @@ -208,19 +281,18 @@ func (font *pdfCIDFontType0) ToPdfObject() core.PdfObject { // newPdfCIDFontType0FromPdfObject creates a pdfCIDFontType0 object from a dictionary (either direct // or via indirect object). If a problem occurs with loading an error is returned. // XXX: This is a stub. -func newPdfCIDFontType0FromPdfObject(skeleton *fontSkeleton) (*pdfCIDFontType0, error) { - if skeleton.subtype != "CIDFontType0" { - common.Log.Debug("ERROR: Font SubType != CIDFontType0. font=%s", skeleton) +func newPdfCIDFontType0FromPdfObject(d *core.PdfObjectDictionary, base *fontCommon) (*pdfCIDFontType0, error) { + if base.subtype != "CIDFontType0" { + common.Log.Debug("ERROR: Font SubType != CIDFontType0. font=%s", base) return nil, core.ErrRangeError } - font := &pdfCIDFontType0{skeleton: skeleton} - d := skeleton.dict + font := pdfCIDFontType0FromSkeleton(base) // CIDSystemInfo. obj := core.TraceToDirectObject(d.Get("CIDSystemInfo")) if obj == nil { - common.Log.Debug("ERROR: CIDSystemInfo (Required) missing. font=%s", skeleton) + common.Log.Debug("ERROR: CIDSystemInfo (Required) missing. font=%s", base) return nil, ErrRequiredAttributeMissing } font.CIDSystemInfo = obj @@ -230,10 +302,26 @@ func newPdfCIDFontType0FromPdfObject(skeleton *fontSkeleton) (*pdfCIDFontType0, // pdfCIDFontType2 represents a CIDFont Type2 font dictionary. type pdfCIDFontType2 struct { - container *core.PdfIndirectObject - *fontSkeleton // Elements common to all font types + container *core.PdfIndirectObject - encoder textencoding.TextEncoder // !@#$ In skeleton? + // These fields are common to all PDF fonts. + + basefont string // The font's "BaseFont" field. + subtype string // The font's "Subtype" field. + + // These are optional fields in the PDF font + toUnicode core.PdfObject // The stream containing toUnicodeCmap. We keep it around for ToPdfObject. + + // These objects are computed from optional fields in the PDF font + toUnicodeCmap *cmap.CMap // Computed from "ToUnicode" + fontDescriptor *PdfFontDescriptor // Computed from "FontDescriptor" + + // objectNumber helps us find the font in the PDF being processed. This helps with debugging + objectNumber int64 + + // These fields are specific to Type 0 fonts. + + encoder textencoding.TextEncoder // !@#$ In base? ttfParser *fonts.TtfType CIDSystemInfo core.PdfObject @@ -250,6 +338,28 @@ type pdfCIDFontType2 struct { gidToWidthMap map[uint16]int } +// pdfCIDFontType2FromSkeleton returns a pdfCIDFontType2 with its common fields initalized. +func pdfCIDFontType2FromSkeleton(base *fontCommon) *pdfCIDFontType2 { + return &pdfCIDFontType2{ + basefont: base.basefont, + subtype: base.subtype, + toUnicode: base.toUnicode, + fontDescriptor: base.fontDescriptor, + objectNumber: base.objectNumber, + } +} + +// baseFields returns the fields of `font` that are common to all PDF fonts. +func (font *pdfCIDFontType2) baseFields() *fontCommon { + return &fontCommon{ + basefont: font.basefont, + subtype: font.subtype, + toUnicode: font.toUnicode, + fontDescriptor: font.fontDescriptor, + objectNumber: font.objectNumber, + } +} + // Encoder returns the font's text encoder. func (font pdfCIDFontType2) Encoder() textencoding.TextEncoder { return font.encoder @@ -289,7 +399,7 @@ func (font *pdfCIDFontType2) ToPdfObject() core.PdfObject { if font.container == nil { font.container = &core.PdfIndirectObject{} } - d := font.toDict("CIDFontType2") + d := font.baseFields().asPdfObjectDictionary("CIDFontType2") font.container.PdfObject = d if font.CIDSystemInfo != nil { @@ -316,19 +426,18 @@ func (font *pdfCIDFontType2) ToPdfObject() core.PdfObject { // newPdfCIDFontType2FromPdfObject creates a pdfCIDFontType2 object from a dictionary (either direct // or via indirect object). If a problem occurs with loading, an error is returned. -func newPdfCIDFontType2FromPdfObject(skeleton *fontSkeleton) (*pdfCIDFontType2, error) { - if skeleton.subtype != "CIDFontType2" { - common.Log.Debug("ERROR: Font SubType != CIDFontType2. font=%s", skeleton) +func newPdfCIDFontType2FromPdfObject(d *core.PdfObjectDictionary, base *fontCommon) (*pdfCIDFontType2, error) { + if base.subtype != "CIDFontType2" { + common.Log.Debug("ERROR: Font SubType != CIDFontType2. font=%s", base) return nil, core.ErrRangeError } - font := &pdfCIDFontType2{fontSkeleton: skeleton} - d := skeleton.dict + font := pdfCIDFontType2FromSkeleton(base) // CIDSystemInfo. obj := d.Get("CIDSystemInfo") if obj == nil { - common.Log.Debug("ERROR: CIDSystemInfo (Required) missing. font=%s", skeleton) + common.Log.Debug("ERROR: CIDSystemInfo (Required) missing. font=%s", base) return nil, ErrRequiredAttributeMissing } font.CIDSystemInfo = obj @@ -357,8 +466,7 @@ func NewCompositePdfFontFromTTFFile(filePath string) (*PdfFont, error) { } // Prepare the inner descendant font (CIDFontType2). - skeletonCID := fontSkeleton{subtype: "CIDFontType2"} - cidfont := &pdfCIDFontType2{fontSkeleton: &skeletonCID} + cidfont := &pdfCIDFontType2{subtype: "CIDFontType2"} cidfont.ttfParser = &ttf // 2-byte character codes ➞ runes @@ -370,7 +478,7 @@ func NewCompositePdfFontFromTTFFile(filePath string) (*PdfFont, error) { return runes[i] < runes[j] }) - skeleton := fontSkeleton{ + base := fontCommon{ subtype: "Type0", basefont: ttf.PostScriptName, } @@ -476,15 +584,14 @@ func NewCompositePdfFontFromTTFFile(filePath string) (*PdfFont, error) { flags |= 1 << 2 // Symbolic. descriptor.Flags = core.MakeInteger(int64(flags)) - skeleton.fontDescriptor = descriptor + base.fontDescriptor = descriptor descendantFont := PdfFont{ - context: cidfont, - fontSkeleton: skeletonCID, + context: cidfont, } // Make root Type0 font. type0 := pdfFontType0{ - fontSkeleton: &skeleton, + fontDescriptor: descriptor, DescendantFont: &descendantFont, Encoding: core.MakeName("Identity-H"), encoder: textencoding.NewTrueTypeFontEncoder(ttf.Chars), @@ -492,8 +599,7 @@ func NewCompositePdfFontFromTTFFile(filePath string) (*PdfFont, error) { // Build Font. font := PdfFont{ - fontSkeleton: skeleton, - context: &type0, + context: &type0, } return &font, nil diff --git a/pdf/model/font_simple.go b/pdf/model/font_simple.go index 2015c465..9e06b503 100644 --- a/pdf/model/font_simple.go +++ b/pdf/model/font_simple.go @@ -6,6 +6,7 @@ import ( "github.com/unidoc/unidoc/common" "github.com/unidoc/unidoc/pdf/core" + "github.com/unidoc/unidoc/pdf/internal/cmap" "github.com/unidoc/unidoc/pdf/model/fonts" "github.com/unidoc/unidoc/pdf/model/textencoding" ) @@ -27,9 +28,23 @@ import ( // containing font-wide metrics and other attributes of the font. // Among those attributes is an optional font filestream containing the font program. type pdfFontSimple struct { - container *core.PdfIndirectObject - *fontSkeleton // Elements common to all font types + container *core.PdfIndirectObject + // These fields are common to all PDF fonts. + basefont string // The font's "BaseFont" field. + subtype string // The font's "Subtype" field. + + // These are optional fields in the PDF font + toUnicode core.PdfObject // The stream containing toUnicodeCmap. We keep it around for ToPdfObject. + + // These objects are computed from optional fields in the PDF font + toUnicodeCmap *cmap.CMap // Computed from "ToUnicode" + fontDescriptor *PdfFontDescriptor // Computed from "FontDescriptor" + + // objectNumber helps us find the font in the PDF being processed. This helps with debugging + objectNumber int64 + + // These fields are specific to simple PDF fonts. firstChar int lastChar int charWidths []float64 @@ -41,6 +56,31 @@ type pdfFontSimple struct { LastChar core.PdfObject Widths core.PdfObject Encoding core.PdfObject + + // Standard 14 fonts metrics + fontMetrics map[string]fonts.CharMetrics +} + +// pdfCIDFontType0FromSkeleton returns a pdfFontSimple with its common fields initalized. +func pdfFontSimpleFromSkeleton(base *fontCommon) *pdfFontSimple { + return &pdfFontSimple{ + basefont: base.basefont, + subtype: base.subtype, + toUnicode: base.toUnicode, + fontDescriptor: base.fontDescriptor, + objectNumber: base.objectNumber, + } +} + +// baseFields returns the fields of `font` that are common to all PDF fonts. +func (font *pdfFontSimple) baseFields() *fontCommon { + return &fontCommon{ + basefont: font.basefont, + subtype: font.subtype, + toUnicode: font.toUnicode, + fontDescriptor: font.fontDescriptor, + objectNumber: font.objectNumber, + } } // Encoder returns the font's text encoder. @@ -56,6 +96,11 @@ func (font *pdfFontSimple) SetEncoder(encoder textencoding.TextEncoder) { // GetGlyphCharMetrics returns the character metrics for the specified glyph. A bool flag is // returned to indicate whether or not the entry was found in the glyph to charcode mapping. func (font pdfFontSimple) GetGlyphCharMetrics(glyph string) (fonts.CharMetrics, bool) { + if font.fontMetrics != nil { + metrics, ok := font.fontMetrics[glyph] + return metrics, ok + } + metrics := fonts.CharMetrics{} code, found := font.encoder.GlyphToCharcode(glyph) @@ -87,7 +132,7 @@ func (font pdfFontSimple) GetGlyphCharMetrics(glyph string) (fonts.CharMetrics, } // newSimpleFontFromPdfObject creates a pdfFontSimple from dictionary `d`. Elements of `d` that -// are already parsed are contained in `skeleton`. +// are already parsed are contained in `base`. // An error is returned if there is a problem with loading. // !@#$ Just return a base 14 font, if obj is a base 14 font // @@ -96,20 +141,14 @@ func (font pdfFontSimple) GetGlyphCharMetrics(glyph string) (fonts.CharMetrics, // // !@#$ 9.6.6.4 Encodings for TrueType Fonts (page 265) // Need to get TrueType font's cmap -func newSimpleFontFromPdfObject(skeleton *fontSkeleton, std14 bool) (*pdfFontSimple, error) { - font := &pdfFontSimple{fontSkeleton: skeleton} - - d := skeleton.dict +func newSimpleFontFromPdfObject(d *core.PdfObjectDictionary, base *fontCommon, std14 bool) (*pdfFontSimple, error) { + font := pdfFontSimpleFromSkeleton(base) // !@#$ Failing on ~/testdata/The-Byzantine-Generals-Problem.pdf + // FirstChar is not defined in ~/testdata/shamirturing.pdf if !std14 { obj := d.Get("FirstChar") if obj == nil { - // See ~/testdata/shamirturing.pdf - // if skeleton.subtype == "TrueType" { - // common.Log.Debug("ERROR: FirstChar attribute missing. font=%s d=%s", skeleton, d) - // return nil, ErrRequiredAttributeMissing - // } obj = core.PdfObject(core.MakeInteger(0)) } font.FirstChar = obj @@ -123,11 +162,7 @@ func newSimpleFontFromPdfObject(skeleton *fontSkeleton, std14 bool) (*pdfFontSim obj = d.Get("LastChar") if obj == nil { - // if skeleton.subtype == "TrueType" { - // common.Log.Debug("ERROR: LastChar attribute missing") - // return nil, ErrRequiredAttributeMissing - // } - obj = core.PdfObject(core.MakeInteger(0)) + obj = core.PdfObject(core.MakeInteger(255)) } font.LastChar = obj intVal, ok = core.TraceToDirectObject(obj).(*core.PdfObjectInteger) @@ -140,9 +175,6 @@ func newSimpleFontFromPdfObject(skeleton *fontSkeleton, std14 bool) (*pdfFontSim font.charWidths = []float64{} obj = d.Get("Widths") if obj != nil { - // common.Log.Debug("ERROR: Widths missing from font") - // return nil, ErrRequiredAttributeMissing - // } font.Widths = obj arr, ok := core.TraceToDirectObject(obj).(*core.PdfObjectArray) @@ -173,7 +205,6 @@ func newSimpleFontFromPdfObject(skeleton *fontSkeleton, std14 bool) (*pdfFontSim // addEncoding adds the encoding to the font. // The order of precedence is important func (font *pdfFontSimple) addEncoding() error { - skeleton := font.fontSkeleton var baseEncoder string var differences map[byte]string var err error @@ -181,12 +212,12 @@ func (font *pdfFontSimple) addEncoding() error { // !@#$ Stop setting default encoding in getFontEncoding XXX baseEncoder, differences, err = getFontEncoding(font.Encoding) if err != nil { - common.Log.Debug("ERROR: BaseFont=%q Subtype=%q Encoding=%s (%T) err=%v", skeleton.basefont, - skeleton.subtype, font.Encoding, font.Encoding, err) + common.Log.Debug("ERROR: BaseFont=%q Subtype=%q Encoding=%s (%T) err=%v", font.basefont, + font.subtype, font.Encoding, font.Encoding, err) return err } - common.Log.Debug("addEncoding: BaseFont=%q Subtype=%q Encoding=%s (%T)", skeleton.basefont, - skeleton.subtype, font.Encoding, font.Encoding) + common.Log.Debug("addEncoding: BaseFont=%q Subtype=%q Encoding=%s (%T)", font.basefont, + font.subtype, font.Encoding, font.Encoding) encoder, err := textencoding.NewSimpleTextEncoder(baseEncoder, differences) if err != nil { @@ -196,9 +227,9 @@ func (font *pdfFontSimple) addEncoding() error { } if font.Encoder() == nil { - descriptor := skeleton.fontDescriptor + descriptor := font.fontDescriptor if descriptor != nil { - switch skeleton.subtype { + switch font.subtype { case "Type1": // XXX: !@#$ Is this the right order? Do the /Differences need to be reapplied? if descriptor.fontFile != nil && descriptor.fontFile.encoder != nil { @@ -212,7 +243,6 @@ func (font *pdfFontSimple) addEncoding() error { if err == nil { font.SetEncoder(encoder) } - } } } @@ -277,7 +307,7 @@ func (font *pdfFontSimple) ToPdfObject() core.PdfObject { if font.container == nil { font.container = &core.PdfIndirectObject{} } - d := font.toDict("") + d := font.baseFields().asPdfObjectDictionary("") font.container.PdfObject = d if font.FirstChar != nil { @@ -291,6 +321,8 @@ func (font *pdfFontSimple) ToPdfObject() core.PdfObject { } if font.Encoding != nil { d.Set("Encoding", font.Encoding) + } else if font.encoder != nil { + d.Set("Encoding", font.encoder.ToPdfObject()) } return font.container @@ -309,8 +341,7 @@ func NewPdfFontFromTTFFile(filePath string) (*PdfFont, error) { return nil, err } - skeleton := fontSkeleton{subtype: "TrueType"} - truefont := &pdfFontSimple{fontSkeleton: &skeleton} + truefont := &pdfFontSimple{subtype: "TrueType"} // TODO: Make more generic to allow customization... Need to know which glyphs are to be used, // then can derive @@ -403,12 +434,84 @@ func NewPdfFontFromTTFFile(filePath string) (*PdfFont, error) { descriptor.Flags = core.MakeInteger(int64(flags)) // Build Font. - skeleton.fontDescriptor = descriptor + truefont.fontDescriptor = descriptor font := &PdfFont{ - fontSkeleton: skeleton, - context: truefont, + context: truefont, } return font, nil } + +var standard14Fonts = map[string]pdfFontSimple{ + "Courier": pdfFontSimple{subtype: "Type1", + basefont: "Courier", + encoder: textencoding.NewWinAnsiTextEncoder(), + fontMetrics: fonts.CourierCharMetrics, + }, + "Courier-Bold": pdfFontSimple{subtype: "Type1", + basefont: "Courier-Bold", + encoder: textencoding.NewWinAnsiTextEncoder(), + fontMetrics: fonts.CourierBoldCharMetrics, + }, + "Courier-BoldOblique": pdfFontSimple{subtype: "Type1", + basefont: "Courier-BoldOblique", + encoder: textencoding.NewWinAnsiTextEncoder(), + fontMetrics: fonts.CourierBoldObliqueCharMetrics, + }, + "Courier-Oblique": pdfFontSimple{subtype: "Type1", + basefont: "Courier-Oblique", + encoder: textencoding.NewWinAnsiTextEncoder(), + fontMetrics: fonts.CourierObliqueCharMetrics, + }, + "Helvetica": pdfFontSimple{subtype: "Type1", + basefont: "Helvetica", + encoder: textencoding.NewWinAnsiTextEncoder(), + fontMetrics: fonts.HelveticaCharMetrics, + }, + "Helvetica-Bold": pdfFontSimple{subtype: "Type1", + basefont: "Helvetica-Bold", + encoder: textencoding.NewWinAnsiTextEncoder(), + fontMetrics: fonts.HelveticaBoldCharMetrics, + }, + "Helvetica-BoldOblique": pdfFontSimple{subtype: "Type1", + basefont: "Helvetica-BoldOblique", + encoder: textencoding.NewWinAnsiTextEncoder(), + fontMetrics: fonts.HelveticaBoldObliqueCharMetrics, + }, + "Helvetica-Oblique": pdfFontSimple{subtype: "Type1", + basefont: "Helvetica-Oblique", + encoder: textencoding.NewWinAnsiTextEncoder(), + fontMetrics: fonts.HelveticaObliqueCharMetrics, + }, + "Times-Roman": pdfFontSimple{subtype: "Type1", + basefont: "Times-Roman", + encoder: textencoding.NewWinAnsiTextEncoder(), + fontMetrics: fonts.TimesRomanCharMetrics, + }, + "Times-Bold": pdfFontSimple{subtype: "Type1", + basefont: "Times-Bold", + encoder: textencoding.NewWinAnsiTextEncoder(), + fontMetrics: fonts.TimesBoldCharMetrics, + }, + "Times-BoldItalic": pdfFontSimple{subtype: "Type1", + basefont: "Times-BoldItalic", + encoder: textencoding.NewWinAnsiTextEncoder(), + fontMetrics: fonts.TimesBoldItalicCharMetrics, + }, + "Times-Italic": pdfFontSimple{subtype: "Type1", + basefont: "Times-Italic", + encoder: textencoding.NewWinAnsiTextEncoder(), + fontMetrics: fonts.TimesItalicCharMetrics, + }, + "Symbol": pdfFontSimple{subtype: "Type1", + basefont: "Symbol", + encoder: textencoding.NewSymbolEncoder(), + fontMetrics: fonts.SymbolCharMetrics, + }, + "ZapfDingbats": pdfFontSimple{subtype: "Type1", + basefont: "ZapfDingbats", + encoder: textencoding.NewZapfDingbatsEncoder(), + fontMetrics: fonts.ZapfDingbatsCharMetrics, + }, +}