2018-03-22 14:03:47 +00:00
|
|
|
|
/*
|
|
|
|
|
* This file is subject to the terms and conditions defined in
|
|
|
|
|
* file 'LICENSE.md', which is part of this source code package.
|
|
|
|
|
*/
|
|
|
|
|
|
2018-03-22 13:01:04 +00:00
|
|
|
|
package cmap
|
|
|
|
|
|
|
|
|
|
import (
|
2018-06-27 12:25:59 +10:00
|
|
|
|
"fmt"
|
|
|
|
|
"sort"
|
|
|
|
|
"strings"
|
2018-03-22 13:01:04 +00:00
|
|
|
|
|
|
|
|
|
"github.com/unidoc/unidoc/common"
|
2018-07-15 16:28:56 +10:00
|
|
|
|
"github.com/unidoc/unidoc/pdf/core"
|
2018-08-03 21:15:21 +00:00
|
|
|
|
"github.com/unidoc/unidoc/pdf/internal/textencoding"
|
2018-03-22 13:01:04 +00:00
|
|
|
|
)
|
|
|
|
|
|
2018-06-27 12:25:59 +10:00
|
|
|
|
// CharCode is a character code or Unicode
|
|
|
|
|
// rune is int32 https://golang.org/doc/go1#rune
|
|
|
|
|
type CharCode uint32
|
2018-03-22 13:01:04 +00:00
|
|
|
|
|
2018-06-27 12:25:59 +10:00
|
|
|
|
// Maximum number of possible bytes per code.
|
|
|
|
|
const maxCodeLen = 4
|
2018-03-22 13:01:04 +00:00
|
|
|
|
|
2018-06-27 12:25:59 +10:00
|
|
|
|
// Codespace represents a single codespace range used in the CMap.
|
|
|
|
|
type Codespace struct {
|
|
|
|
|
NumBytes int
|
|
|
|
|
Low CharCode
|
|
|
|
|
High CharCode
|
2018-03-22 13:01:04 +00:00
|
|
|
|
}
|
|
|
|
|
|
2018-06-27 12:25:59 +10:00
|
|
|
|
// CIDSystemInfo=Dict("Registry": Adobe, "Ordering": Korea1, "Supplement": 0, )
|
|
|
|
|
type CIDSystemInfo struct {
|
|
|
|
|
Registry string
|
|
|
|
|
Ordering string
|
|
|
|
|
Supplement int
|
2018-03-22 13:01:04 +00:00
|
|
|
|
}
|
|
|
|
|
|
2018-06-27 12:25:59 +10:00
|
|
|
|
// CMap represents a character code to unicode mapping used in PDF files.
|
2018-07-25 16:14:19 +10:00
|
|
|
|
// References:
|
|
|
|
|
// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/5411.ToUnicode.pdf
|
|
|
|
|
// https://github.com/adobe-type-tools/cmap-resources/releases
|
2018-06-27 12:25:59 +10:00
|
|
|
|
type CMap struct {
|
|
|
|
|
*cMapParser
|
2018-03-22 13:01:04 +00:00
|
|
|
|
|
2018-06-27 12:25:59 +10:00
|
|
|
|
name string
|
|
|
|
|
nbits int // 8 bits for simple fonts, 16 bits for CID fonts.
|
|
|
|
|
ctype int
|
|
|
|
|
version string
|
2018-09-17 17:57:52 +10:00
|
|
|
|
usecmap string // Base this cmap on `usecmap` if `usecmap` is not empty.
|
2018-06-27 12:25:59 +10:00
|
|
|
|
systemInfo CIDSystemInfo
|
2018-03-22 13:01:04 +00:00
|
|
|
|
|
2018-09-17 17:57:52 +10:00
|
|
|
|
// For regular cmaps.
|
2018-06-27 12:25:59 +10:00
|
|
|
|
codespaces []Codespace
|
2018-03-22 13:01:04 +00:00
|
|
|
|
|
2018-09-17 17:57:52 +10:00
|
|
|
|
// For ToUnicode (ctype 2) cmaps.
|
|
|
|
|
codeToUnicode map[CharCode]string
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// NewToUnicodeCMap returns an identity CMap with codeToUnicode matching the `codeToUnicode` arg.
|
|
|
|
|
func NewToUnicodeCMap(codeToUnicode map[CharCode]string) *CMap {
|
|
|
|
|
return &CMap{
|
|
|
|
|
name: "Adobe-Identity-UCS",
|
|
|
|
|
ctype: 2,
|
|
|
|
|
nbits: 16,
|
|
|
|
|
systemInfo: CIDSystemInfo{
|
|
|
|
|
Registry: "Adobe",
|
|
|
|
|
Ordering: "UCS",
|
|
|
|
|
Supplement: 0,
|
|
|
|
|
},
|
|
|
|
|
codespaces: []Codespace{Codespace{Low: 0, High: 0xffff}},
|
|
|
|
|
codeToUnicode: codeToUnicode,
|
|
|
|
|
}
|
2018-03-22 13:01:04 +00:00
|
|
|
|
}
|
|
|
|
|
|
2018-07-25 12:00:49 +10:00
|
|
|
|
// String returns a human readable description of `cmap`.
|
2018-06-27 12:25:59 +10:00
|
|
|
|
func (cmap *CMap) String() string {
|
|
|
|
|
si := cmap.systemInfo
|
|
|
|
|
parts := []string{
|
|
|
|
|
fmt.Sprintf("nbits:%d", cmap.nbits),
|
|
|
|
|
fmt.Sprintf("type:%d", cmap.ctype),
|
2018-03-22 13:01:04 +00:00
|
|
|
|
}
|
2018-06-27 12:25:59 +10:00
|
|
|
|
if cmap.version != "" {
|
|
|
|
|
parts = append(parts, fmt.Sprintf("version:%s", cmap.version))
|
|
|
|
|
}
|
|
|
|
|
if cmap.usecmap != "" {
|
|
|
|
|
parts = append(parts, fmt.Sprintf("usecmap:%#q", cmap.usecmap))
|
|
|
|
|
}
|
|
|
|
|
parts = append(parts, fmt.Sprintf("systemInfo:%s", si.String()))
|
|
|
|
|
if len(cmap.codespaces) > 0 {
|
|
|
|
|
parts = append(parts, fmt.Sprintf("codespaces:%d", len(cmap.codespaces)))
|
|
|
|
|
}
|
|
|
|
|
if len(cmap.codeToUnicode) > 0 {
|
|
|
|
|
parts = append(parts, fmt.Sprintf("codeToUnicode:%d", len(cmap.codeToUnicode)))
|
|
|
|
|
}
|
|
|
|
|
return fmt.Sprintf("CMAP{%#q %s}", cmap.name, strings.Join(parts, " "))
|
2018-03-22 13:01:04 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// newCMap returns an initialized CMap.
|
2018-06-27 12:25:59 +10:00
|
|
|
|
func newCMap(isSimple bool) *CMap {
|
|
|
|
|
nbits := 16
|
|
|
|
|
if isSimple {
|
2018-06-27 17:27:41 +10:00
|
|
|
|
nbits = 8
|
2018-06-27 12:25:59 +10:00
|
|
|
|
}
|
|
|
|
|
cmap := &CMap{
|
|
|
|
|
nbits: nbits,
|
|
|
|
|
codeToUnicode: map[CharCode]string{},
|
|
|
|
|
}
|
2018-03-22 13:01:04 +00:00
|
|
|
|
return cmap
|
|
|
|
|
}
|
|
|
|
|
|
2018-07-24 21:32:02 +10:00
|
|
|
|
// String returns a human readable description of `info`.
|
2018-06-27 14:22:47 +10:00
|
|
|
|
// It looks like "Adobe-Japan2-000".
|
2018-06-27 12:25:59 +10:00
|
|
|
|
func (info *CIDSystemInfo) String() string {
|
2018-06-27 14:22:47 +10:00
|
|
|
|
return fmt.Sprintf("%s-%s-%03d", info.Registry, info.Ordering, info.Supplement)
|
2018-06-27 12:25:59 +10:00
|
|
|
|
}
|
2018-03-22 13:01:04 +00:00
|
|
|
|
|
2018-09-17 17:57:52 +10:00
|
|
|
|
// NewCIDSystemInfo returns the CIDSystemInfo encoded in PDFObject `obj`.
|
2018-07-15 16:28:56 +10:00
|
|
|
|
func NewCIDSystemInfo(obj core.PdfObject) (info CIDSystemInfo, err error) {
|
2018-07-24 21:32:02 +10:00
|
|
|
|
d, ok := core.GetDict(obj)
|
|
|
|
|
if !ok {
|
2018-07-25 12:00:49 +10:00
|
|
|
|
return CIDSystemInfo{}, core.ErrTypeError
|
2018-07-24 21:32:02 +10:00
|
|
|
|
}
|
2018-07-21 21:20:39 +10:00
|
|
|
|
registry, ok := core.GetStringVal(d.Get("Registry"))
|
|
|
|
|
if !ok {
|
2018-07-25 12:00:49 +10:00
|
|
|
|
return CIDSystemInfo{}, core.ErrTypeError
|
2018-03-22 13:01:04 +00:00
|
|
|
|
}
|
2018-07-21 21:20:39 +10:00
|
|
|
|
ordering, ok := core.GetStringVal(d.Get("Ordering"))
|
|
|
|
|
if !ok {
|
2018-07-25 12:00:49 +10:00
|
|
|
|
return CIDSystemInfo{}, core.ErrTypeError
|
2018-06-27 12:25:59 +10:00
|
|
|
|
}
|
2018-07-21 21:20:39 +10:00
|
|
|
|
supplement, ok := core.GetIntVal(d.Get("Supplement"))
|
|
|
|
|
if !ok {
|
2018-07-25 12:00:49 +10:00
|
|
|
|
return CIDSystemInfo{}, core.ErrTypeError
|
2018-03-22 13:01:04 +00:00
|
|
|
|
}
|
2018-07-25 12:00:49 +10:00
|
|
|
|
return CIDSystemInfo{
|
2018-06-27 12:25:59 +10:00
|
|
|
|
Registry: registry,
|
|
|
|
|
Ordering: ordering,
|
|
|
|
|
Supplement: supplement,
|
2018-07-25 12:00:49 +10:00
|
|
|
|
}, nil
|
2018-03-22 13:01:04 +00:00
|
|
|
|
}
|
|
|
|
|
|
2018-06-27 12:25:59 +10:00
|
|
|
|
// Name returns the name of the CMap.
|
|
|
|
|
func (cmap *CMap) Name() string {
|
|
|
|
|
return cmap.name
|
2018-03-22 13:01:04 +00:00
|
|
|
|
}
|
|
|
|
|
|
2018-07-15 16:28:56 +10:00
|
|
|
|
// Type returns the CMap type.
|
2018-06-27 12:25:59 +10:00
|
|
|
|
func (cmap *CMap) Type() int {
|
|
|
|
|
return cmap.ctype
|
2018-03-22 13:01:04 +00:00
|
|
|
|
}
|
|
|
|
|
|
2018-09-17 17:57:52 +10:00
|
|
|
|
// MissingCodeRune replaces runes that can't be decoded. '\ufffd' = <20>. Was '?'.
|
2018-07-15 16:28:56 +10:00
|
|
|
|
const MissingCodeRune = textencoding.MissingCodeRune
|
2018-03-22 13:01:04 +00:00
|
|
|
|
|
2018-07-15 16:28:56 +10:00
|
|
|
|
// MissingCodeString replaces strings that can't be decoded.
|
|
|
|
|
var MissingCodeString = string(MissingCodeRune)
|
2018-03-22 13:01:04 +00:00
|
|
|
|
|
2018-06-27 12:25:59 +10:00
|
|
|
|
// CharcodeBytesToUnicode converts a byte array of charcodes to a unicode string representation.
|
2018-06-27 22:01:17 +10:00
|
|
|
|
// It also returns a bool flag to tell if the conversion was successful.
|
2018-07-24 21:32:02 +10:00
|
|
|
|
// NOTE: This only works for ToUnicode cmaps.
|
2018-07-13 17:40:27 +10:00
|
|
|
|
func (cmap *CMap) CharcodeBytesToUnicode(data []byte) (string, int) {
|
2018-06-27 12:25:59 +10:00
|
|
|
|
charcodes, matched := cmap.bytesToCharcodes(data)
|
|
|
|
|
if !matched {
|
|
|
|
|
common.Log.Debug("ERROR: CharcodeBytesToUnicode. Not in codespaces. data=[% 02x] cmap=%s",
|
|
|
|
|
data, cmap)
|
2018-07-13 17:40:27 +10:00
|
|
|
|
return "", 0
|
2018-03-22 13:01:04 +00:00
|
|
|
|
}
|
|
|
|
|
|
2018-06-27 12:25:59 +10:00
|
|
|
|
parts := []string{}
|
2018-07-13 17:40:27 +10:00
|
|
|
|
missing := []CharCode{}
|
2018-06-27 12:25:59 +10:00
|
|
|
|
for _, code := range charcodes {
|
|
|
|
|
s, ok := cmap.codeToUnicode[code]
|
2018-03-22 13:01:04 +00:00
|
|
|
|
if !ok {
|
2018-07-13 17:40:27 +10:00
|
|
|
|
missing = append(missing, code)
|
|
|
|
|
s = MissingCodeString
|
2018-06-03 01:05:46 +00:00
|
|
|
|
}
|
2018-06-27 12:25:59 +10:00
|
|
|
|
parts = append(parts, s)
|
2018-03-22 13:01:04 +00:00
|
|
|
|
}
|
2018-07-13 17:40:27 +10:00
|
|
|
|
unicode := strings.Join(parts, "")
|
|
|
|
|
if len(missing) > 0 {
|
|
|
|
|
common.Log.Debug("ERROR: CharcodeBytesToUnicode. Not in map.\n"+
|
|
|
|
|
"\tdata=[% 02x]=%#q\n"+
|
|
|
|
|
"\tcharcodes=%02x\n"+
|
|
|
|
|
"\tmissing=%d %02x\n"+
|
|
|
|
|
"\tunicode=`%s`\n"+
|
|
|
|
|
"\tcmap=%s",
|
|
|
|
|
data, string(data), charcodes, len(missing), missing, unicode, cmap)
|
|
|
|
|
}
|
|
|
|
|
return unicode, len(missing)
|
2018-03-22 13:01:04 +00:00
|
|
|
|
}
|
|
|
|
|
|
2018-07-24 21:32:02 +10:00
|
|
|
|
// CharcodeToUnicode converts a single character code `code` to a unicode string.
|
|
|
|
|
// If `code` is not in the unicode map, "<22>" is returned.
|
|
|
|
|
// NOTE: CharcodeBytesToUnicode is typically more efficient.
|
|
|
|
|
func (cmap *CMap) CharcodeToUnicode(code CharCode) (string, bool) {
|
2018-06-27 12:25:59 +10:00
|
|
|
|
if s, ok := cmap.codeToUnicode[code]; ok {
|
2018-07-13 17:40:27 +10:00
|
|
|
|
return s, true
|
2018-06-27 12:25:59 +10:00
|
|
|
|
}
|
2018-07-13 17:40:27 +10:00
|
|
|
|
return MissingCodeString, false
|
2018-06-27 12:25:59 +10:00
|
|
|
|
}
|
2018-03-22 13:01:04 +00:00
|
|
|
|
|
2018-06-27 12:25:59 +10:00
|
|
|
|
// bytesToCharcodes attempts to convert the entire byte array `data` to a list of character codes
|
|
|
|
|
// from the ranges specified by `cmap`'s codespaces.
|
|
|
|
|
// Returns:
|
|
|
|
|
// character code sequence (if there is a match complete match)
|
|
|
|
|
// matched?
|
|
|
|
|
// NOTE: A partial list of character codes will be returned if a complete match is not possible.
|
|
|
|
|
func (cmap *CMap) bytesToCharcodes(data []byte) ([]CharCode, bool) {
|
|
|
|
|
charcodes := []CharCode{}
|
|
|
|
|
if cmap.nbits == 8 {
|
|
|
|
|
for _, b := range data {
|
|
|
|
|
charcodes = append(charcodes, CharCode(b))
|
2018-03-22 13:01:04 +00:00
|
|
|
|
}
|
2018-06-27 12:25:59 +10:00
|
|
|
|
return charcodes, true
|
|
|
|
|
}
|
|
|
|
|
for i := 0; i < len(data); {
|
|
|
|
|
code, n, matched := cmap.matchCode(data[i:])
|
|
|
|
|
if !matched {
|
|
|
|
|
common.Log.Debug("ERROR: No code match at i=%d bytes=[% 02x]=%#q", i, data, string(data))
|
|
|
|
|
return charcodes, false
|
2018-03-22 13:01:04 +00:00
|
|
|
|
}
|
2018-06-27 12:25:59 +10:00
|
|
|
|
charcodes = append(charcodes, code)
|
|
|
|
|
i += n
|
|
|
|
|
}
|
|
|
|
|
return charcodes, true
|
|
|
|
|
}
|
2018-03-22 13:01:04 +00:00
|
|
|
|
|
2018-09-17 17:57:52 +10:00
|
|
|
|
// matchCode attempts to match the byte array `data` with a character code in `cmap`'s codespaces.
|
2018-06-27 12:25:59 +10:00
|
|
|
|
// Returns:
|
|
|
|
|
// character code (if there is a match) of
|
|
|
|
|
// number of bytes read (if there is a match)
|
|
|
|
|
// matched?
|
|
|
|
|
func (cmap *CMap) matchCode(data []byte) (code CharCode, n int, matched bool) {
|
|
|
|
|
for j := 0; j < maxCodeLen; j++ {
|
|
|
|
|
if j < len(data) {
|
|
|
|
|
code = code<<8 | CharCode(data[j])
|
|
|
|
|
n++
|
2018-03-22 13:01:04 +00:00
|
|
|
|
}
|
2018-06-27 12:25:59 +10:00
|
|
|
|
matched = cmap.inCodespace(code, j+1)
|
|
|
|
|
if matched {
|
2018-07-25 12:00:49 +10:00
|
|
|
|
return code, n, true
|
2018-06-03 01:05:46 +00:00
|
|
|
|
}
|
2018-03-22 13:01:04 +00:00
|
|
|
|
}
|
2018-06-27 12:25:59 +10:00
|
|
|
|
// No codespace matched data. This is a serious problem.
|
|
|
|
|
common.Log.Debug("ERROR: No codespace matches bytes=[% 02x]=%#q cmap=%s",
|
|
|
|
|
data, string(data), cmap)
|
2018-07-25 12:00:49 +10:00
|
|
|
|
return 0, 0, false
|
2018-03-22 13:01:04 +00:00
|
|
|
|
}
|
|
|
|
|
|
2018-07-16 17:40:28 +10:00
|
|
|
|
// inCodespace returns true if `code` is in the `numBytes` byte codespace.
|
2018-06-27 12:25:59 +10:00
|
|
|
|
func (cmap *CMap) inCodespace(code CharCode, numBytes int) bool {
|
|
|
|
|
for _, cs := range cmap.codespaces {
|
|
|
|
|
if cs.Low <= code && code <= cs.High && numBytes == cs.NumBytes {
|
|
|
|
|
return true
|
2018-03-22 13:01:04 +00:00
|
|
|
|
}
|
2018-06-27 12:25:59 +10:00
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2018-03-22 13:01:04 +00:00
|
|
|
|
|
2018-07-16 17:40:28 +10:00
|
|
|
|
// LoadCmapFromDataCID parses the in-memory cmap `data` and returns the resulting CMap.
|
2018-07-24 21:32:02 +10:00
|
|
|
|
// It is a convenience function.
|
2018-06-27 12:25:59 +10:00
|
|
|
|
func LoadCmapFromDataCID(data []byte) (*CMap, error) {
|
|
|
|
|
return LoadCmapFromData(data, false)
|
|
|
|
|
}
|
2018-03-22 13:01:04 +00:00
|
|
|
|
|
2018-07-16 17:40:28 +10:00
|
|
|
|
// LoadCmapFromData parses the in-memory cmap `data` and returns the resulting CMap.
|
2018-09-17 17:57:52 +10:00
|
|
|
|
// If `isSimple` is true, it uses 1-byte encodings, otherwise it uses the codespaces in the cmap.
|
2018-06-27 12:25:59 +10:00
|
|
|
|
//
|
2018-09-21 16:43:10 +10:00
|
|
|
|
// 9.10.3 ToUnicode CMaps (page 293).
|
2018-06-27 12:25:59 +10:00
|
|
|
|
func LoadCmapFromData(data []byte, isSimple bool) (*CMap, error) {
|
|
|
|
|
common.Log.Trace("LoadCmapFromData: isSimple=%t", isSimple)
|
2018-03-22 13:01:04 +00:00
|
|
|
|
|
2018-06-27 12:25:59 +10:00
|
|
|
|
cmap := newCMap(isSimple)
|
|
|
|
|
cmap.cMapParser = newCMapParser(data)
|
2018-03-22 13:01:04 +00:00
|
|
|
|
|
2018-06-27 12:25:59 +10:00
|
|
|
|
// In debugging it may help to see the data being parsed.
|
|
|
|
|
// fmt.Println("===============*******===========")
|
|
|
|
|
// fmt.Printf("%s\n", string(data))
|
|
|
|
|
// fmt.Println("===============&&&&&&&===========")
|
2018-06-03 01:05:46 +00:00
|
|
|
|
|
2018-06-27 12:25:59 +10:00
|
|
|
|
err := cmap.parse()
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
2018-03-22 13:01:04 +00:00
|
|
|
|
}
|
2018-06-27 12:25:59 +10:00
|
|
|
|
if len(cmap.codespaces) == 0 {
|
|
|
|
|
common.Log.Debug("ERROR: No codespaces. cmap=%s", cmap)
|
|
|
|
|
return nil, ErrBadCMap
|
|
|
|
|
}
|
2018-09-17 17:57:52 +10:00
|
|
|
|
// We need to sort codespaces so that we check shorter codes first.
|
2018-06-27 12:25:59 +10:00
|
|
|
|
sort.Slice(cmap.codespaces, func(i, j int) bool {
|
|
|
|
|
return cmap.codespaces[i].Low < cmap.codespaces[j].Low
|
|
|
|
|
})
|
|
|
|
|
return cmap, nil
|
2018-03-22 13:01:04 +00:00
|
|
|
|
}
|
2018-09-17 17:57:52 +10:00
|
|
|
|
|
|
|
|
|
// Bytes returns the raw bytes of a PDF CMap corresponding to `cmap`.
|
|
|
|
|
func (cmap *CMap) Bytes() []byte {
|
|
|
|
|
common.Log.Trace("cmap.Bytes: cmap=%s", cmap.String())
|
|
|
|
|
body := cmap.toBfData()
|
|
|
|
|
whole := strings.Join([]string{cmapHeader, body, cmapTrailer}, "\n")
|
|
|
|
|
return []byte(whole)
|
|
|
|
|
}
|
|
|
|
|
|
2018-09-21 15:39:31 +10:00
|
|
|
|
type charRange struct {
|
|
|
|
|
code0 CharCode
|
|
|
|
|
code1 CharCode
|
|
|
|
|
}
|
|
|
|
|
type fbRange struct {
|
|
|
|
|
code0 CharCode
|
|
|
|
|
code1 CharCode
|
|
|
|
|
r0 rune
|
|
|
|
|
}
|
2018-09-17 17:57:52 +10:00
|
|
|
|
|
|
|
|
|
// toBfData returns the bfchar and bfrange sections of a CMap text file.
|
|
|
|
|
// Both sections are computed from cmap.codeToUnicode.
|
|
|
|
|
func (cmap *CMap) toBfData() string {
|
|
|
|
|
if len(cmap.codeToUnicode) == 0 {
|
|
|
|
|
return ""
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// codes is a sorted list of the codeToUnicode keys.
|
|
|
|
|
codes := []CharCode{}
|
|
|
|
|
for code := range cmap.codeToUnicode {
|
|
|
|
|
codes = append(codes, code)
|
|
|
|
|
}
|
|
|
|
|
sort.Slice(codes, func(i, j int) bool { return codes[i] < codes[j] })
|
|
|
|
|
|
2018-09-21 15:39:31 +10:00
|
|
|
|
// charRanges is a list of the contiguous character code ranges in `codes`.
|
2018-09-17 17:57:52 +10:00
|
|
|
|
charRanges := []charRange{}
|
|
|
|
|
c0, c1 := codes[0], codes[0]+1
|
|
|
|
|
for _, c := range codes[1:] {
|
|
|
|
|
if c != c1 {
|
|
|
|
|
charRanges = append(charRanges, charRange{c0, c1})
|
|
|
|
|
c0 = c
|
|
|
|
|
}
|
|
|
|
|
c1 = c + 1
|
|
|
|
|
}
|
2018-09-21 15:39:31 +10:00
|
|
|
|
if c1 > c0 {
|
2018-09-17 17:57:52 +10:00
|
|
|
|
charRanges = append(charRanges, charRange{c0, c1})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// fbChars is a list of single character ranges. fbRanges is a list of multiple character ranges.
|
|
|
|
|
fbChars := []CharCode{}
|
|
|
|
|
fbRanges := []fbRange{}
|
|
|
|
|
for _, cr := range charRanges {
|
|
|
|
|
if cr.code0+1 == cr.code1 {
|
|
|
|
|
fbChars = append(fbChars, cr.code0)
|
|
|
|
|
} else {
|
|
|
|
|
fbRanges = append(fbRanges, fbRange{
|
|
|
|
|
code0: cr.code0,
|
|
|
|
|
code1: cr.code1,
|
|
|
|
|
r0: []rune(cmap.codeToUnicode[cr.code0])[0],
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
common.Log.Trace("charRanges=%d fbChars=%d fbRanges=%d", len(charRanges), len(fbChars),
|
|
|
|
|
len(fbRanges))
|
|
|
|
|
|
|
|
|
|
lines := []string{}
|
|
|
|
|
if len(fbChars) > 0 {
|
|
|
|
|
numRanges := (len(fbChars) + maxBfEntries - 1) / maxBfEntries
|
|
|
|
|
for i := 0; i < numRanges; i++ {
|
|
|
|
|
n := min(len(fbChars)-i*maxBfEntries, maxBfEntries)
|
|
|
|
|
lines = append(lines, fmt.Sprintf("%d beginbfchar", n))
|
|
|
|
|
for j := 0; j < n; j++ {
|
|
|
|
|
code := fbChars[i*maxBfEntries+j]
|
|
|
|
|
s := cmap.codeToUnicode[code]
|
|
|
|
|
r := []rune(s)[0]
|
|
|
|
|
lines = append(lines, fmt.Sprintf("<%04x> <%04x>", code, r))
|
|
|
|
|
}
|
|
|
|
|
lines = append(lines, "endbfchar")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if len(fbRanges) > 0 {
|
|
|
|
|
numRanges := (len(fbRanges) + maxBfEntries - 1) / maxBfEntries
|
|
|
|
|
for i := 0; i < numRanges; i++ {
|
|
|
|
|
n := min(len(fbRanges)-i*maxBfEntries, maxBfEntries)
|
|
|
|
|
lines = append(lines, fmt.Sprintf("%d beginbfrange", n))
|
|
|
|
|
for j := 0; j < n; j++ {
|
|
|
|
|
rng := fbRanges[i*maxBfEntries+j]
|
|
|
|
|
r := rng.r0
|
|
|
|
|
lines = append(lines, fmt.Sprintf("<%04x><%04x> <%04x>", rng.code0, rng.code1-1, r))
|
|
|
|
|
}
|
|
|
|
|
lines = append(lines, "endbfrange")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return strings.Join(lines, "\n")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
maxBfEntries = 100 // Maximum number of entries in a bfchar or bfrange section.
|
|
|
|
|
cmapHeader = `
|
|
|
|
|
/CIDInit /ProcSet findresource begin
|
|
|
|
|
12 dict begin
|
|
|
|
|
begincmap
|
|
|
|
|
/CIDSystemInfo << /Registry (Adobe) /Ordering (UCS) /Supplement 0 >> def
|
|
|
|
|
/CMapName /Adobe-Identity-UCS def
|
|
|
|
|
/CMapType 2 def
|
|
|
|
|
1 begincodespacerange
|
|
|
|
|
<0000> <FFFF>
|
|
|
|
|
endcodespacerange
|
|
|
|
|
`
|
|
|
|
|
cmapTrailer = `endcmap
|
|
|
|
|
CMapName currentdict /CMap defineresource pop
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
`
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
func min(i, j int) int {
|
|
|
|
|
if i < j {
|
|
|
|
|
return i
|
|
|
|
|
}
|
|
|
|
|
return j
|
|
|
|
|
}
|