unipdf/pdf/model/font_simple.go

414 lines
13 KiB
Go
Raw Normal View History

package model
import (
"errors"
"io/ioutil"
"github.com/unidoc/unidoc/common"
. "github.com/unidoc/unidoc/pdf/core"
"github.com/unidoc/unidoc/pdf/model/fonts"
"github.com/unidoc/unidoc/pdf/model/textencoding"
)
// pdfFontSimple describes a Simple Font
//
// 9.6 Simple Fonts (page 254)
// 9.6.1 General
// There are several types of simple fonts, all of which have these properties:
// • Glyphs in the font shall be selected by single-byte character codes obtained from a string that
// is shown by the text-showing operators. Logically, these codes index into a table of 256 glyphs;
// the mapping from codes to glyphs is called the fonts encoding. Under some circumstances, the
// encoding may be altered by means described in 9.6.6, "Character Encoding".
// • Each glyph shall have a single set of metrics, including a horizontal displacement or width,
// as described in 9.2.4, "Glyph Positioning and Metrics"; that is, simple fonts support only
// horizontal writing mode.
// • Except for Type 0 fonts, Type 3 fonts in non-Tagged PDF documents, and certain standard Type 1
// fonts, every font dictionary shall contain a subsidiary dictionary, the font descriptor,
// containing font-wide metrics and other attributes of the font.
// Among those attributes is an optional font filestream containing the font program.
type pdfFontSimple struct {
container *PdfIndirectObject
*fontSkeleton // Elements common to all font types
firstChar int
lastChar int
charWidths []float64
encoder textencoding.TextEncoder
// Encoding is subject to limitations that are described in 9.6.6, "Character Encoding".
// BaseFont is derived differently.
FirstChar PdfObject
LastChar PdfObject
Widths PdfObject
Encoding PdfObject
}
// Encoder returns the font's text encoder.
func (font *pdfFontSimple) Encoder() textencoding.TextEncoder {
return font.encoder
}
// SetEncoder sets the encoding for the underlying font.
func (font *pdfFontSimple) SetEncoder(encoder textencoding.TextEncoder) {
font.encoder = encoder
}
// GetGlyphCharMetrics returns the character metrics for the specified glyph. A bool flag is
// returned to indicate whether or not the entry was found in the glyph to charcode mapping.
func (font pdfFontSimple) GetGlyphCharMetrics(glyph string) (fonts.CharMetrics, bool) {
metrics := fonts.CharMetrics{}
code, found := font.encoder.GlyphToCharcode(glyph)
if !found {
return metrics, false
}
metrics.GlyphName = glyph
if int(code) < font.firstChar {
common.Log.Debug("Code lower than firstchar (%d < %d)", code, font.firstChar)
return metrics, false
}
if int(code) > font.lastChar {
common.Log.Debug("Code higher than lastchar (%d < %d)", code, font.lastChar)
return metrics, false
}
index := int(code) - font.firstChar
if index >= len(font.charWidths) {
common.Log.Debug("Code outside of widths range")
return metrics, false
}
width := font.charWidths[index]
metrics.Wx = width
return metrics, true
}
// newSimpleFontFromPdfObject creates a pdfFontSimple from dictionary `d`. Elements of `d` that
// are already parsed are contained in `skeleton`.
// An error is returned if there is a problem with loading.
// !@#$ Just return a base 14 font, if obj is a base 14 font
//
// The value of Encoding is subject to limitations that are described in 9.6.6, "Character Encoding".
// • The value of BaseFont is derived differently.
//
// !@#$ 9.6.6.4 Encodings for TrueType Fonts (page 265)
// Need to get TrueType font's cmap
2018-07-11 09:04:17 +10:00
func newSimpleFontFromPdfObject(skeleton *fontSkeleton, std14 bool) (*pdfFontSimple, error) {
font := &pdfFontSimple{fontSkeleton: skeleton}
d := skeleton.dict
// !@#$ Failing on ~/testdata/The-Byzantine-Generals-Problem.pdf
if !std14 {
2018-07-11 09:04:17 +10:00
obj := d.Get("FirstChar")
if obj == nil {
// See /Users/pcadmin/testdata/shamirturing.pdf
// if skeleton.subtype == "TrueType" {
// common.Log.Debug("ERROR: FirstChar attribute missing. font=%s d=%s", skeleton, d)
// return nil, ErrRequiredAttributeMissing
// }
obj = PdfObject(MakeInteger(0))
}
font.FirstChar = obj
intVal, ok := TraceToDirectObject(obj).(*PdfObjectInteger)
if !ok {
common.Log.Debug("ERROR: Invalid FirstChar type (%T)", obj)
return nil, ErrTypeError
}
font.firstChar = int(*intVal)
obj = d.Get("LastChar")
if obj == nil {
// if skeleton.subtype == "TrueType" {
// common.Log.Debug("ERROR: LastChar attribute missing")
// return nil, ErrRequiredAttributeMissing
// }
obj = PdfObject(MakeInteger(0))
}
font.LastChar = obj
intVal, ok = TraceToDirectObject(obj).(*PdfObjectInteger)
if !ok {
common.Log.Debug("ERROR: Invalid LastChar type (%T)", obj)
return nil, ErrTypeError
}
font.lastChar = int(*intVal)
font.charWidths = []float64{}
obj = d.Get("Widths")
if obj != nil {
// common.Log.Debug("ERROR: Widths missing from font")
// return nil, ErrRequiredAttributeMissing
// }
font.Widths = obj
arr, ok := TraceToDirectObject(obj).(*PdfObjectArray)
if !ok {
common.Log.Debug("ERROR: Widths attribute != array (%T)", arr)
return nil, ErrTypeError
}
widths, err := arr.ToFloat64Array()
if err != nil {
common.Log.Debug("ERROR: converting widths to array")
return nil, err
}
if len(widths) != (font.lastChar - font.firstChar + 1) {
common.Log.Debug("ERROR: Invalid widths length != %d (%d)",
font.lastChar-font.firstChar+1, len(widths))
return nil, ErrRangeError
}
font.charWidths = widths
}
}
font.Encoding = TraceToDirectObject(d.Get("Encoding"))
return font, nil
}
// addEncoding adds the encoding to the font.
// The order of precedence is important
func (font *pdfFontSimple) addEncoding() error {
skeleton := font.fontSkeleton
var baseEncoder string
var differences map[byte]string
var err error
if font.Encoding != nil {
// !@#$ Stop setting default encoding in getFontEncoding XXX
baseEncoder, differences, err = getFontEncoding(font.Encoding)
if err != nil {
common.Log.Debug("ERROR: BaseFont=%q Subtype=%q Encoding=%s (%T) err=%v", skeleton.basefont,
skeleton.subtype, font.Encoding, font.Encoding, err)
return err
}
common.Log.Debug("addEncoding: BaseFont=%q Subtype=%q Encoding=%s (%T)", skeleton.basefont,
skeleton.subtype, font.Encoding, font.Encoding)
encoder, err := textencoding.NewSimpleTextEncoder(baseEncoder, differences)
if err != nil {
return err
}
font.SetEncoder(encoder)
}
if font.Encoder() == nil {
2018-07-02 16:46:43 +10:00
descriptor := skeleton.fontDescriptor
if descriptor != nil {
switch skeleton.subtype {
case "Type1":
// XXX: !@#$ Is this the right order? Do the /Differences need to be reapplied?
if descriptor.fontFile != nil && descriptor.fontFile.encoder != nil {
common.Log.Debug("Using fontFile")
font.SetEncoder(descriptor.fontFile.encoder)
}
case "TrueType":
if descriptor.fontFile2 != nil {
common.Log.Debug("Using FontFile2")
encoder, err := descriptor.fontFile2.MakeEncoder()
if err == nil {
font.SetEncoder(encoder)
}
}
}
2018-07-02 16:46:43 +10:00
}
}
// At the end, apply the differences.
if differences != nil {
common.Log.Debug("differences=%+v font=%s", differences, font)
2018-07-03 14:26:42 +10:00
if se, ok := font.Encoder().(textencoding.SimpleEncoder); ok {
se.ApplyDifferences(differences)
font.SetEncoder(se)
}
}
return nil
}
// getFontEncoding returns font encoding of `obj` the "Encoding" entry in a font dict
// Table 114 Entries in an encoding dictionary (page 263)
// 9.6.6.1 General (page 262)
// A fonts encoding is the association between character codes (obtained from text strings that
// are shown) and glyph descriptions. This sub-clause describes the character encoding scheme used
// with simple PDF fonts. Composite fonts (Type 0) use a different character mapping algorithm, as
// discussed in 9.7, "Composite Fonts".
// Except for Type 3 fonts, every font program shall have a built-in encoding. Under certain
// circumstances, a PDF font dictionary may change the encoding used with the font program to match
// the requirements of the conforming writer generating the text being shown.
func getFontEncoding(obj PdfObject) (string, map[byte]string, error) {
baseName := "StandardEncoding"
if obj == nil {
// Fall back to StandardEncoding
return baseName, nil, nil
}
switch encoding := obj.(type) {
case *PdfObjectName:
return string(*encoding), nil, nil
case *PdfObjectDictionary:
typ, err := GetName(TraceToDirectObject(encoding.Get("Type")))
if err == nil && typ == "Encoding" {
base, err := GetName(TraceToDirectObject(encoding.Get("BaseEncoding")))
if err == nil {
baseName = base
}
}
diffList, err := GetArray(TraceToDirectObject(encoding.Get("Differences")))
if err != nil {
common.Log.Debug("ERROR: Bad font encoding dict=%+v err=%v", encoding, err)
return "", nil, ErrTypeError
}
differences, err := textencoding.FromFontDifferences(diffList)
return baseName, differences, err
default:
common.Log.Debug("ERROR: Encoding not a name or dict (%T) %s", obj, obj.String())
return "", nil, ErrTypeError
}
}
// ToPdfObject converts the pdfFontSimple to its PDF representation for outputting.
func (font *pdfFontSimple) ToPdfObject() PdfObject {
if font.container == nil {
font.container = &PdfIndirectObject{}
}
d := font.toDict("")
font.container.PdfObject = d
if font.FirstChar != nil {
d.Set("FirstChar", font.FirstChar)
}
if font.LastChar != nil {
d.Set("LastChar", font.LastChar)
}
if font.Widths != nil {
d.Set("Widths", font.Widths)
}
if font.Encoding != nil {
d.Set("Encoding", font.Encoding)
}
return font.container
}
// NewPdfFontFromTTFFile loads a TTF font and returns a PdfFont type that can be used in text
// styling functions.
// Uses a WinAnsiTextEncoder and loads only character codes 32-255.
func NewPdfFontFromTTFFile(filePath string) (*PdfFont, error) {
2018-07-02 13:49:06 +10:00
const minCode = 32
const maxCode = 255
ttf, err := fonts.TtfParse(filePath)
if err != nil {
common.Log.Debug("ERROR: loading ttf font: %v", err)
return nil, err
}
skeleton := fontSkeleton{subtype: "TrueType"}
truefont := &pdfFontSimple{fontSkeleton: &skeleton}
// TODO: Make more generic to allow customization... Need to know which glyphs are to be used,
// then can derive
// TODO: Subsetting fonts.
truefont.encoder = textencoding.NewWinAnsiTextEncoder()
2018-07-02 13:49:06 +10:00
truefont.firstChar = minCode
truefont.lastChar = maxCode
truefont.basefont = ttf.PostScriptName
2018-07-02 13:49:06 +10:00
truefont.FirstChar = MakeInteger(minCode)
truefont.LastChar = MakeInteger(maxCode)
k := 1000.0 / float64(ttf.UnitsPerEm)
if len(ttf.Widths) <= 0 {
return nil, errors.New("ERROR: Missing required attribute (Widths)")
}
missingWidth := k * float64(ttf.Widths[0])
2018-07-02 13:49:06 +10:00
vals := make([]float64, 0, maxCode-minCode+1)
for code := minCode; code <= maxCode; code++ {
r, found := truefont.Encoder().CharcodeToRune(uint16(code))
if !found {
common.Log.Debug("Rune not found (code: %d)", code)
vals = append(vals, missingWidth)
continue
}
pos, ok := ttf.Chars[uint16(r)]
if !ok {
common.Log.Debug("Rune not in TTF Chars")
vals = append(vals, missingWidth)
continue
}
w := k * float64(ttf.Widths[pos])
vals = append(vals, w)
}
truefont.Widths = &PdfIndirectObject{PdfObject: MakeArrayFromFloats(vals)}
if len(vals) < (255 - 32 + 1) {
common.Log.Debug("ERROR: Invalid length of widths, %d < %d", len(vals), 255-32+1)
return nil, errors.New("Range check error")
}
truefont.charWidths = vals[:255-32+1]
// Use WinAnsiEncoding by default.
truefont.Encoding = MakeName("WinAnsiEncoding")
descriptor := &PdfFontDescriptor{}
descriptor.Ascent = MakeFloat(k * float64(ttf.TypoAscender))
descriptor.Descent = MakeFloat(k * float64(ttf.TypoDescender))
descriptor.CapHeight = MakeFloat(k * float64(ttf.CapHeight))
descriptor.FontBBox = MakeArrayFromFloats([]float64{k * float64(ttf.Xmin), k * float64(ttf.Ymin), k * float64(ttf.Xmax), k * float64(ttf.Ymax)})
descriptor.ItalicAngle = MakeFloat(float64(ttf.ItalicAngle))
descriptor.MissingWidth = MakeFloat(k * float64(ttf.Widths[0]))
ttfBytes, err := ioutil.ReadFile(filePath)
if err != nil {
common.Log.Debug("Unable to read file contents: %v", err)
return nil, err
}
stream, err := MakeStream(ttfBytes, NewFlateEncoder())
if err != nil {
common.Log.Debug("Unable to make stream: %v", err)
return nil, err
}
stream.PdfObjectDictionary.Set("Length1", MakeInteger(int64(len(ttfBytes))))
descriptor.FontFile2 = stream
if ttf.Bold {
descriptor.StemV = MakeInteger(120)
} else {
descriptor.StemV = MakeInteger(70)
}
// Flags.
flags := 1 << 5
if ttf.IsFixedPitch {
flags |= 1
}
if ttf.ItalicAngle != 0 {
flags |= 1 << 6
}
descriptor.Flags = MakeInteger(int64(flags))
// Build Font.
skeleton.fontDescriptor = descriptor
font := &PdfFont{
fontSkeleton: skeleton,
context: truefont,
}
return font, nil
}