2018-07-02 16:46:43 +10:00
|
|
|
package model
|
|
|
|
|
|
|
|
import (
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"regexp"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
"unicode"
|
|
|
|
|
|
|
|
"github.com/unidoc/unidoc/common"
|
|
|
|
. "github.com/unidoc/unidoc/pdf/core"
|
|
|
|
"github.com/unidoc/unidoc/pdf/model/textencoding"
|
|
|
|
)
|
|
|
|
|
|
|
|
type fontFile struct {
|
|
|
|
name string
|
2018-07-03 14:26:42 +10:00
|
|
|
subtype string
|
2018-07-02 16:46:43 +10:00
|
|
|
encoder textencoding.TextEncoder
|
|
|
|
// binary []byte
|
|
|
|
}
|
|
|
|
|
2018-07-03 14:26:42 +10:00
|
|
|
func (fontfile *fontFile) String() string {
|
2018-07-04 18:00:37 +10:00
|
|
|
encoding := "[None]"
|
|
|
|
if fontfile.encoder != nil {
|
|
|
|
encoding = fontfile.encoder.String()
|
|
|
|
}
|
|
|
|
return fmt.Sprintf("FONTFILE{%#q encoder=%s}", fontfile.name, encoding)
|
2018-07-03 14:26:42 +10:00
|
|
|
}
|
|
|
|
|
2018-07-02 16:46:43 +10:00
|
|
|
// newFontFileFromPdfObject loads a FontFile from a PdfObject. Can either be a
|
|
|
|
// *PdfIndirectObject or a *PdfObjectDictionary.
|
|
|
|
func newFontFileFromPdfObject(obj PdfObject) (*fontFile, error) {
|
|
|
|
common.Log.Debug("newFontFileFromPdfObject: obj=%s", obj)
|
|
|
|
fontfile := &fontFile{}
|
|
|
|
|
2018-07-03 14:26:42 +10:00
|
|
|
obj = TraceToDirectObject(obj)
|
2018-07-02 16:46:43 +10:00
|
|
|
|
|
|
|
streamObj, ok := obj.(*PdfObjectStream)
|
|
|
|
if !ok {
|
|
|
|
common.Log.Debug("ERROR: FontFile must be a stream (%T)", obj)
|
|
|
|
return nil, ErrTypeError
|
|
|
|
}
|
|
|
|
d := streamObj.PdfObjectDictionary
|
|
|
|
data, err := DecodeStream(streamObj)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2018-07-03 14:26:42 +10:00
|
|
|
subtype, err := GetName(TraceToDirectObject(d.Get("Subtype")))
|
|
|
|
if err != nil {
|
|
|
|
fontfile.subtype = subtype
|
|
|
|
if subtype == "Type1C" {
|
|
|
|
// XXX: TODO Add Type1C support
|
2018-07-06 16:55:39 +10:00
|
|
|
common.Log.Debug("Type1C fonts are currently not supported")
|
2018-07-03 14:26:42 +10:00
|
|
|
return nil, ErrFontNotSupported
|
|
|
|
}
|
|
|
|
}
|
2018-07-02 16:46:43 +10:00
|
|
|
|
2018-07-03 14:26:42 +10:00
|
|
|
length1 := int(*(TraceToDirectObject(d.Get("Length1")).(*PdfObjectInteger)))
|
|
|
|
length2 := int(*(TraceToDirectObject(d.Get("Length2")).(*PdfObjectInteger)))
|
|
|
|
if length1 > len(data) {
|
|
|
|
length1 = len(data)
|
|
|
|
}
|
|
|
|
if length1+length2 > len(data) {
|
|
|
|
length2 = len(data) - length1
|
|
|
|
}
|
2018-07-02 16:46:43 +10:00
|
|
|
|
2018-07-03 14:26:42 +10:00
|
|
|
segment1 := data[:length1]
|
|
|
|
segment2 := []byte{}
|
|
|
|
if length2 > 0 {
|
|
|
|
segment2 = data[length1 : length1+length2]
|
|
|
|
}
|
2018-07-02 16:46:43 +10:00
|
|
|
|
2018-07-03 14:26:42 +10:00
|
|
|
// empty streams are ignored
|
|
|
|
if length1 > 0 && length2 > 0 {
|
|
|
|
err := fontfile.loadFromSegments(segment1, segment2)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2018-07-02 16:46:43 +10:00
|
|
|
}
|
|
|
|
}
|
2018-07-03 14:26:42 +10:00
|
|
|
|
|
|
|
common.Log.Debug("fontfile=%s", fontfile)
|
2018-07-02 16:46:43 +10:00
|
|
|
return fontfile, nil
|
|
|
|
}
|
|
|
|
|
2018-07-03 14:26:42 +10:00
|
|
|
// loadFromSegments loads a Type1Font object from two header-less .pfb segments.
|
|
|
|
// Based on pdfbox
|
2018-07-02 16:46:43 +10:00
|
|
|
func (fontfile *fontFile) loadFromSegments(segment1, segment2 []byte) error {
|
|
|
|
common.Log.Debug("loadFromSegments: %d %d", len(segment1), len(segment2))
|
2018-07-03 14:26:42 +10:00
|
|
|
err := fontfile.parseAsciiPart(segment1)
|
2018-07-02 16:46:43 +10:00
|
|
|
if err != nil {
|
|
|
|
common.Log.Debug("err=%v", err)
|
|
|
|
return err
|
|
|
|
}
|
2018-07-03 14:26:42 +10:00
|
|
|
common.Log.Debug("fontfile=%s", fontfile)
|
2018-07-02 16:46:43 +10:00
|
|
|
if len(segment2) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
2018-07-03 14:26:42 +10:00
|
|
|
// err = fontfile.parseEexecPart(segment2)
|
|
|
|
// if err != nil {
|
|
|
|
// common.Log.Debug("err=%v", err)
|
|
|
|
// return err
|
|
|
|
// }
|
2018-07-02 16:46:43 +10:00
|
|
|
|
2018-07-03 14:26:42 +10:00
|
|
|
common.Log.Debug("fontfile=%s", fontfile)
|
2018-07-02 16:46:43 +10:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-07-03 14:26:42 +10:00
|
|
|
// parseAsciiPart parses the ASCII part of the FontFile.
|
|
|
|
func (fontfile *fontFile) parseAsciiPart(data []byte) error {
|
|
|
|
common.Log.Debug("parseAsciiPart: %d ", len(data))
|
2018-07-04 18:00:37 +10:00
|
|
|
// fmt.Println("~~~~~~~~~~~~~~~~~~~~~~~^^^~~~~~~~~~~~~~~~~~~~~~~~")
|
|
|
|
// fmt.Printf("data=%s\n", string(data))
|
|
|
|
// fmt.Println("~~~~~~~~~~~~~~~~~~~~~~~!!!~~~~~~~~~~~~~~~~~~~~~~~")
|
2018-07-06 16:55:39 +10:00
|
|
|
|
2018-07-03 14:26:42 +10:00
|
|
|
// The start of a FontFile looks like
|
|
|
|
// %!PS-AdobeFont-1.0: MyArial 003.002
|
|
|
|
// %%Title: MyArial
|
|
|
|
// or
|
|
|
|
// %!FontType1-1.0
|
2018-07-02 16:46:43 +10:00
|
|
|
if len(data) < 2 || string(data[:2]) != "%!" {
|
|
|
|
return errors.New("Invalid start of ASCII segment")
|
|
|
|
}
|
|
|
|
|
2018-07-03 14:26:42 +10:00
|
|
|
keySection, encodingSection, err := getAsciiSections(data)
|
2018-07-02 16:46:43 +10:00
|
|
|
if err != nil {
|
|
|
|
common.Log.Debug("err=%v", err)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
keyValues := getKeyValues(keySection)
|
|
|
|
|
|
|
|
fontfile.name = keyValues["FontName"]
|
|
|
|
if fontfile.name == "" {
|
2018-07-03 14:26:42 +10:00
|
|
|
common.Log.Debug("ERROR: FontFile has no /FontName")
|
|
|
|
return ErrRequiredAttributeMissing
|
2018-07-02 16:46:43 +10:00
|
|
|
}
|
|
|
|
|
2018-07-06 16:55:39 +10:00
|
|
|
// encodingName, ok := keyValues["Encoding"]
|
|
|
|
// !@#$ I am not sure why we don't do this
|
|
|
|
// if ok {
|
|
|
|
// encoder, err := textencoding.NewSimpleTextEncoder(encodingName, nil)
|
|
|
|
// if err != nil {
|
|
|
|
// return err
|
|
|
|
// }
|
|
|
|
// fontfile.encoder = encoder
|
|
|
|
// }
|
2018-07-02 16:46:43 +10:00
|
|
|
if encodingSection != "" {
|
|
|
|
encodings, err := getEncodings(encodingSection)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
encoder, err := textencoding.NewCustomSimpleTextEncoder(encodings, nil)
|
|
|
|
if err != nil {
|
2018-07-06 16:55:39 +10:00
|
|
|
// XXX: !@#$ We need to fix all these errors
|
|
|
|
common.Log.Error("UNKNOWN GLYPH: err=%v", err)
|
2018-07-03 14:26:42 +10:00
|
|
|
return nil
|
2018-07-02 16:46:43 +10:00
|
|
|
}
|
|
|
|
fontfile.encoder = encoder
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-07-03 14:26:42 +10:00
|
|
|
// // parseEexecPart parses the binary encrypted part of the FontFile.
|
|
|
|
// func (fontfile *fontFile) parseEexecPart(data []byte) error {
|
|
|
|
// // Sometimes, fonts use hex format
|
|
|
|
// if !isBinary(data) {
|
|
|
|
// decoded, err := hex.DecodeString(string(data))
|
|
|
|
// if err != nil {
|
|
|
|
// return err
|
|
|
|
// }
|
|
|
|
// data = decoded
|
|
|
|
// }
|
|
|
|
// decoded := decodeEexec(data)
|
2018-07-06 16:55:39 +10:00
|
|
|
// fmt.Println(":::::::::::::::::::::<<>>:::::::::::::::::::::")
|
|
|
|
// fmt.Printf("%s\n", string(decoded))
|
|
|
|
// fmt.Println(":::::::::::::::::::::<><>:::::::::::::::::::::")
|
2018-07-03 14:26:42 +10:00
|
|
|
// return nil
|
|
|
|
// }
|
2018-07-02 16:46:43 +10:00
|
|
|
|
|
|
|
var (
|
2018-07-03 14:26:42 +10:00
|
|
|
reDictBegin = regexp.MustCompile(`\d+ dict\s+(dup\s+)?begin`)
|
2018-07-02 16:46:43 +10:00
|
|
|
reKeyVal = regexp.MustCompile(`^\s*/(\S+?)\s+(.+?)\s+def\s*$`)
|
|
|
|
reEncoding = regexp.MustCompile(`dup\s+(\d+)\s*/(\w+)\s+put`)
|
|
|
|
encodingBegin = "/Encoding 256 array"
|
|
|
|
encodingEnd = "readonly def"
|
|
|
|
binaryStart = "currentfile eexec"
|
|
|
|
)
|
|
|
|
|
2018-07-03 14:26:42 +10:00
|
|
|
// getAsciiSections returns two sections of `data`, the ASCII part of the FontFile
|
|
|
|
// - the general key values in `keySection`
|
|
|
|
// - the encoding in `encodingSection`
|
|
|
|
func getAsciiSections(data []byte) (keySection, encodingSection string, err error) {
|
|
|
|
common.Log.Debug("getAsciiSections: %d ", len(data))
|
2018-07-02 16:46:43 +10:00
|
|
|
loc := reDictBegin.FindIndex(data)
|
|
|
|
if loc == nil {
|
|
|
|
err = ErrTypeError
|
2018-07-03 14:26:42 +10:00
|
|
|
common.Log.Debug("getAsciiSections: No dict.")
|
2018-07-02 16:46:43 +10:00
|
|
|
return
|
|
|
|
}
|
|
|
|
i0 := loc[1]
|
2018-07-03 14:26:42 +10:00
|
|
|
i := strings.Index(string(data[i0:]), encodingBegin)
|
2018-07-02 16:46:43 +10:00
|
|
|
if i < 0 {
|
2018-07-03 14:26:42 +10:00
|
|
|
keySection = string(data[i0:])
|
2018-07-02 16:46:43 +10:00
|
|
|
return
|
|
|
|
}
|
|
|
|
i1 := i0 + i
|
|
|
|
keySection = string(data[i0:i1])
|
|
|
|
|
2018-07-03 14:26:42 +10:00
|
|
|
i2 := i1
|
2018-07-02 16:46:43 +10:00
|
|
|
i = strings.Index(string(data[i2:]), encodingEnd)
|
|
|
|
if i < 0 {
|
|
|
|
err = ErrTypeError
|
|
|
|
common.Log.Debug("err=%v", err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
i3 := i2 + i
|
|
|
|
encodingSection = string(data[i2:i3])
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2018-07-06 16:55:39 +10:00
|
|
|
// /Users/pcadmin/testdata/invoice61781040.pdf has \r line endings
|
|
|
|
var reEndline = regexp.MustCompile(`[\n\r]+`)
|
|
|
|
|
2018-07-03 14:26:42 +10:00
|
|
|
// getKeyValues returns the map encoded in `data`.
|
2018-07-02 16:46:43 +10:00
|
|
|
func getKeyValues(data string) map[string]string {
|
2018-07-06 16:55:39 +10:00
|
|
|
// lines := strings.Split(data, "\n")
|
|
|
|
lines := reEndline.Split(data, -1)
|
2018-07-02 16:46:43 +10:00
|
|
|
keyValues := map[string]string{}
|
|
|
|
for _, line := range lines {
|
|
|
|
matches := reKeyVal.FindStringSubmatch(line)
|
|
|
|
if matches == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
k, v := matches[1], matches[2]
|
|
|
|
keyValues[k] = v
|
|
|
|
}
|
|
|
|
return keyValues
|
|
|
|
}
|
|
|
|
|
2018-07-03 14:26:42 +10:00
|
|
|
// getEncodings returns the encodings encoded in `data`.
|
2018-07-02 16:46:43 +10:00
|
|
|
func getEncodings(data string) (map[uint16]string, error) {
|
|
|
|
lines := strings.Split(data, "\n")
|
|
|
|
keyValues := map[uint16]string{}
|
|
|
|
for _, line := range lines {
|
|
|
|
matches := reEncoding.FindStringSubmatch(line)
|
|
|
|
if matches == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
k, glyph := matches[1], matches[2]
|
|
|
|
code, err := strconv.Atoi(k)
|
|
|
|
if err != nil {
|
|
|
|
common.Log.Debug("ERROR: Bad encoding line. %q", line)
|
|
|
|
return nil, ErrTypeCheck
|
|
|
|
}
|
2018-07-03 14:26:42 +10:00
|
|
|
// if !textencoding.KnownGlyph(glyph) {
|
|
|
|
// common.Log.Debug("ERROR: Unknown glyph %q. line=%q", glyph, line)
|
|
|
|
// return nil, ErrTypeCheck
|
|
|
|
// }
|
2018-07-02 16:46:43 +10:00
|
|
|
keyValues[uint16(code)] = glyph
|
|
|
|
}
|
|
|
|
common.Log.Debug("getEncodings: keyValues=%#v", keyValues)
|
|
|
|
return keyValues, nil
|
|
|
|
}
|
|
|
|
|
2018-07-03 14:26:42 +10:00
|
|
|
// decodeEexec returns the decoding of the eexec bytes `data`
|
|
|
|
func decodeEexec(data []byte) []byte {
|
2018-07-02 16:46:43 +10:00
|
|
|
const c1 = 52845
|
|
|
|
const c2 = 22719
|
|
|
|
|
2018-07-03 14:26:42 +10:00
|
|
|
seed := 55665 // eexec key
|
|
|
|
// Run the seed through the encoder 4 times
|
|
|
|
for _, b := range data[:4] {
|
|
|
|
seed = (int(b)+seed)*c1 + c2
|
2018-07-02 16:46:43 +10:00
|
|
|
}
|
2018-07-03 14:26:42 +10:00
|
|
|
decoded := make([]byte, len(data)-4)
|
|
|
|
for i, b := range data[4:] {
|
|
|
|
decoded[i] = byte(int(b) ^ seed>>8)
|
|
|
|
seed = (int(b)+seed)*c1 + c2
|
|
|
|
}
|
|
|
|
return decoded
|
2018-07-02 16:46:43 +10:00
|
|
|
}
|
|
|
|
|
2018-07-03 14:26:42 +10:00
|
|
|
// isBinary returns true if `data` is binary. See Adobe Type 1 Font Format specification
|
2018-07-02 16:46:43 +10:00
|
|
|
// 7.2 eexec encryption
|
|
|
|
func isBinary(data []byte) bool {
|
|
|
|
if len(data) < 4 {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
for b := range data[:4] {
|
|
|
|
r := rune(b)
|
|
|
|
if !unicode.Is(unicode.ASCII_Hex_Digit, r) && !unicode.IsSpace(r) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
2018-07-03 14:26:42 +10:00
|
|
|
|
|
|
|
// truncate returns the first `n` characters in string `s`
|
|
|
|
func truncate(s string, n int) string {
|
|
|
|
if len(s) < n {
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
return s[:n]
|
|
|
|
}
|