mirror of
https://github.com/unidoc/unipdf.git
synced 2025-05-02 22:17:06 +08:00

Made GlyphToCode work for all tables Moved more aliases into glyphAliases rather than leaving the duplicates in the base maps. Use SimpleEncoder explictly for simple fonts
283 lines
7.3 KiB
Go
283 lines
7.3 KiB
Go
/*
|
|
* This file is subject to the terms and conditions defined in
|
|
* file 'LICENSE.md', which is part of this source code package.
|
|
*
|
|
|
|
/*
|
|
* A font file is a stream containing a Type 1 font program. It appears in PDF files as a
|
|
* /FontFile entry in a /FontDescriptor dictionary.
|
|
*
|
|
* 9.9 Embedded Font Programs (page 289)
|
|
*
|
|
* TODO: Add Type1C support
|
|
*/
|
|
|
|
package model
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"unicode"
|
|
|
|
"github.com/unidoc/unidoc/common"
|
|
"github.com/unidoc/unidoc/pdf/core"
|
|
"github.com/unidoc/unidoc/pdf/model/textencoding"
|
|
)
|
|
|
|
// fontFile represents a font file.
|
|
// Currently this is just the identifying information and the text encoder created from the font
|
|
// file's encoding section.
|
|
type fontFile struct {
|
|
name string
|
|
subtype string
|
|
encoder *textencoding.SimpleEncoder
|
|
}
|
|
|
|
// String returns a human readable description of `fontfile`.
|
|
func (fontfile *fontFile) String() string {
|
|
encoding := "[None]"
|
|
if fontfile.encoder != nil {
|
|
encoding = fontfile.encoder.String()
|
|
}
|
|
return fmt.Sprintf("FONTFILE{%#q encoder=%s}", fontfile.name, encoding)
|
|
}
|
|
|
|
// newFontFileFromPdfObject loads a FontFile from a PdfObject. Can either be a
|
|
// *PdfIndirectObject or a *PdfObjectDictionary.
|
|
func newFontFileFromPdfObject(obj core.PdfObject) (*fontFile, error) {
|
|
common.Log.Trace("newFontFileFromPdfObject: obj=%s", obj)
|
|
fontfile := &fontFile{}
|
|
|
|
obj = core.TraceToDirectObject(obj)
|
|
|
|
streamObj, ok := obj.(*core.PdfObjectStream)
|
|
if !ok {
|
|
common.Log.Debug("ERROR: FontFile must be a stream (%T)", obj)
|
|
return nil, core.ErrTypeError
|
|
}
|
|
d := streamObj.PdfObjectDictionary
|
|
data, err := core.DecodeStream(streamObj)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
subtype, ok := core.GetNameVal(d.Get("Subtype"))
|
|
if !ok {
|
|
fontfile.subtype = subtype
|
|
if subtype == "Type1C" {
|
|
// XXX: TODO Add Type1C support
|
|
common.Log.Debug("Type1C fonts are currently not supported")
|
|
return nil, ErrType1CFontNotSupported
|
|
}
|
|
}
|
|
|
|
length1, _ := core.GetIntVal(d.Get("Length1"))
|
|
length2, _ := core.GetIntVal(d.Get("Length2"))
|
|
|
|
if length1 > len(data) {
|
|
length1 = len(data)
|
|
}
|
|
if length1+length2 > len(data) {
|
|
length2 = len(data) - length1
|
|
}
|
|
|
|
segment1 := data[:length1]
|
|
segment2 := []byte{}
|
|
if length2 > 0 {
|
|
segment2 = data[length1 : length1+length2]
|
|
}
|
|
|
|
// empty streams are ignored
|
|
if length1 > 0 && length2 > 0 {
|
|
err := fontfile.loadFromSegments(segment1, segment2)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
return fontfile, nil
|
|
}
|
|
|
|
// loadFromSegments loads a Type1Font object from two header-less .pfb segments.
|
|
// Based on pdfbox
|
|
func (fontfile *fontFile) loadFromSegments(segment1, segment2 []byte) error {
|
|
common.Log.Trace("loadFromSegments: %d %d", len(segment1), len(segment2))
|
|
err := fontfile.parseAsciiPart(segment1)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
common.Log.Trace("fontfile=%s", fontfile)
|
|
if len(segment2) == 0 {
|
|
return nil
|
|
}
|
|
common.Log.Trace("fontfile=%s", fontfile)
|
|
return nil
|
|
}
|
|
|
|
// parseAsciiPart parses the ASCII part of the FontFile.
|
|
func (fontfile *fontFile) parseAsciiPart(data []byte) error {
|
|
|
|
// Uncomment these lines to see the contents of the font file. For debugging.
|
|
// fmt.Println("~~~~~~~~~~~~~~~~~~~~~~~^^^~~~~~~~~~~~~~~~~~~~~~~~")
|
|
// fmt.Printf("data=%s\n", string(data))
|
|
// fmt.Println("~~~~~~~~~~~~~~~~~~~~~~~!!!~~~~~~~~~~~~~~~~~~~~~~~")
|
|
|
|
// The start of a FontFile looks like
|
|
// %!PS-AdobeFont-1.0: MyArial 003.002
|
|
// %%Title: MyArial
|
|
// or
|
|
// %!FontType1-1.0
|
|
if len(data) < 2 || string(data[:2]) != "%!" {
|
|
return errors.New("Invalid start of ASCII segment")
|
|
}
|
|
|
|
keySection, encodingSection, err := getAsciiSections(data)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
keyValues := getKeyValues(keySection)
|
|
|
|
fontfile.name = keyValues["FontName"]
|
|
if fontfile.name == "" {
|
|
common.Log.Debug("ERROR: FontFile has no /FontName")
|
|
return ErrRequiredAttributeMissing
|
|
}
|
|
|
|
if encodingSection != "" {
|
|
encodings, err := getEncodings(encodingSection)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
encoder, err := textencoding.NewCustomSimpleTextEncoder(encodings, nil)
|
|
if err != nil {
|
|
// XXX: Logging an error because we need to fix all these misses.
|
|
common.Log.Error("UNKNOWN GLYPH: err=%v", err)
|
|
return nil
|
|
}
|
|
fontfile.encoder = encoder
|
|
}
|
|
return nil
|
|
}
|
|
|
|
var (
|
|
reDictBegin = regexp.MustCompile(`\d+ dict\s+(dup\s+)?begin`)
|
|
reKeyVal = regexp.MustCompile(`^\s*/(\S+?)\s+(.+?)\s+def\s*$`)
|
|
reEncoding = regexp.MustCompile(`^\s*dup\s+(\d+)\s*/(\w+?)(?:\.\d+)?\s+put$`)
|
|
encodingBegin = "/Encoding 256 array"
|
|
encodingEnd = "readonly def"
|
|
binaryStart = "currentfile eexec"
|
|
)
|
|
|
|
// getAsciiSections returns two sections of `data`, the ASCII part of the FontFile
|
|
// - the general key values in `keySection`
|
|
// - the encoding in `encodingSection`
|
|
func getAsciiSections(data []byte) (keySection, encodingSection string, err error) {
|
|
common.Log.Trace("getAsciiSections: %d ", len(data))
|
|
loc := reDictBegin.FindIndex(data)
|
|
if loc == nil {
|
|
common.Log.Debug("ERROR: getAsciiSections. No dict.")
|
|
return "", "", core.ErrTypeError
|
|
}
|
|
i0 := loc[1]
|
|
i := strings.Index(string(data[i0:]), encodingBegin)
|
|
if i < 0 {
|
|
keySection = string(data[i0:])
|
|
return keySection, "", nil
|
|
}
|
|
i1 := i0 + i
|
|
keySection = string(data[i0:i1])
|
|
|
|
i2 := i1
|
|
i = strings.Index(string(data[i2:]), encodingEnd)
|
|
if i < 0 {
|
|
common.Log.Debug("ERROR: getAsciiSections. err=%v", err)
|
|
return "", "", core.ErrTypeError
|
|
}
|
|
i3 := i2 + i
|
|
encodingSection = string(data[i2:i3])
|
|
return keySection, encodingSection, nil
|
|
}
|
|
|
|
// ~/testdata/private/invoice61781040.pdf has \r line endings
|
|
var reEndline = regexp.MustCompile(`[\n\r]+`)
|
|
|
|
// getKeyValues returns the map encoded in `data`.
|
|
func getKeyValues(data string) map[string]string {
|
|
lines := reEndline.Split(data, -1)
|
|
keyValues := map[string]string{}
|
|
for _, line := range lines {
|
|
matches := reKeyVal.FindStringSubmatch(line)
|
|
if matches == nil {
|
|
continue
|
|
}
|
|
k, v := matches[1], matches[2]
|
|
keyValues[k] = v
|
|
}
|
|
return keyValues
|
|
}
|
|
|
|
// getEncodings returns the encodings encoded in `data`.
|
|
func getEncodings(data string) (map[uint16]string, error) {
|
|
lines := strings.Split(data, "\n")
|
|
keyValues := map[uint16]string{}
|
|
for _, line := range lines {
|
|
matches := reEncoding.FindStringSubmatch(line)
|
|
if matches == nil {
|
|
continue
|
|
}
|
|
k, glyph := matches[1], matches[2]
|
|
code, err := strconv.Atoi(k)
|
|
if err != nil {
|
|
common.Log.Debug("ERROR: Bad encoding line. %q", line)
|
|
return nil, core.ErrTypeError
|
|
}
|
|
keyValues[uint16(code)] = glyph
|
|
}
|
|
common.Log.Trace("getEncodings: keyValues=%#v", keyValues)
|
|
return keyValues, nil
|
|
}
|
|
|
|
// decodeEexec returns the decoding of the eexec bytes `data`
|
|
func decodeEexec(data []byte) []byte {
|
|
const c1 = 52845
|
|
const c2 = 22719
|
|
|
|
seed := 55665 // eexec key
|
|
// Run the seed through the encoder 4 times
|
|
for _, b := range data[:4] {
|
|
seed = (int(b)+seed)*c1 + c2
|
|
}
|
|
decoded := make([]byte, len(data)-4)
|
|
for i, b := range data[4:] {
|
|
decoded[i] = byte(int(b) ^ seed>>8)
|
|
seed = (int(b)+seed)*c1 + c2
|
|
}
|
|
return decoded
|
|
}
|
|
|
|
// isBinary returns true if `data` is binary. See Adobe Type 1 Font Format specification
|
|
// 7.2 eexec encryption
|
|
func isBinary(data []byte) bool {
|
|
if len(data) < 4 {
|
|
return true
|
|
}
|
|
for b := range data[:4] {
|
|
r := rune(b)
|
|
if !unicode.Is(unicode.ASCII_Hex_Digit, r) && !unicode.IsSpace(r) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// truncate returns the first `n` characters f string `s`.
|
|
func truncate(s string, n int) string {
|
|
if len(s) < n {
|
|
return s
|
|
}
|
|
return s[:n]
|
|
}
|