2018-06-27 12:25:59 +10:00
|
|
|
/*
|
|
|
|
* This file is subject to the terms and conditions defined in
|
|
|
|
* file 'LICENSE.md', which is part of this source code package.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package textencoding
|
|
|
|
|
|
|
|
import (
|
|
|
|
"errors"
|
|
|
|
"sort"
|
2019-01-02 16:54:37 +02:00
|
|
|
"sync"
|
2019-01-01 21:17:57 +02:00
|
|
|
"unicode/utf8"
|
2018-06-27 12:25:59 +10:00
|
|
|
|
|
|
|
"github.com/unidoc/unidoc/common"
|
2018-07-15 16:28:56 +10:00
|
|
|
"github.com/unidoc/unidoc/pdf/core"
|
2019-01-01 21:17:57 +02:00
|
|
|
"golang.org/x/text/encoding"
|
2019-01-02 17:03:03 +02:00
|
|
|
xtransform "golang.org/x/text/transform"
|
2018-06-27 12:25:59 +10:00
|
|
|
)
|
|
|
|
|
2018-12-30 16:18:56 +02:00
|
|
|
// SimpleEncoder represents a 1 byte encoding.
|
|
|
|
type SimpleEncoder interface {
|
|
|
|
TextEncoder
|
|
|
|
BaseName() string
|
|
|
|
Charcodes() []CharCode
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewCustomSimpleTextEncoder returns a simpleEncoder based on map `encoding` and difference map
|
2018-07-02 16:46:43 +10:00
|
|
|
// `differences`.
|
2019-01-01 21:17:57 +02:00
|
|
|
func NewCustomSimpleTextEncoder(encoding, differences map[CharCode]GlyphName) (SimpleEncoder, error) {
|
2018-07-13 17:40:27 +10:00
|
|
|
if len(encoding) == 0 {
|
2018-12-30 16:18:56 +02:00
|
|
|
return nil, errors.New("empty custom encoding")
|
2018-07-13 17:40:27 +10:00
|
|
|
}
|
2018-12-30 16:18:56 +02:00
|
|
|
const baseName = "custom"
|
2019-01-01 21:17:57 +02:00
|
|
|
baseEncoding := make(map[byte]rune)
|
2018-07-02 16:46:43 +10:00
|
|
|
for code, glyph := range encoding {
|
|
|
|
r, ok := GlyphToRune(glyph)
|
|
|
|
if !ok {
|
|
|
|
common.Log.Debug("ERROR: Unknown glyph. %q", glyph)
|
2018-07-13 17:40:27 +10:00
|
|
|
continue
|
2018-07-02 16:46:43 +10:00
|
|
|
}
|
2019-01-01 21:17:57 +02:00
|
|
|
baseEncoding[byte(code)] = r
|
2018-07-02 16:46:43 +10:00
|
|
|
}
|
2019-01-01 21:17:57 +02:00
|
|
|
// TODO(dennwc): this seems to be incorrect - baseEncoding won't be saved when converting to PDF object
|
|
|
|
enc := newSimpleEncoderFromMap(baseName, baseEncoding)
|
|
|
|
if len(differences) != 0 {
|
|
|
|
enc = ApplyDifferences(enc, differences)
|
|
|
|
}
|
|
|
|
return enc, nil
|
2018-07-03 14:26:42 +10:00
|
|
|
}
|
|
|
|
|
2018-12-30 16:18:56 +02:00
|
|
|
// NewSimpleTextEncoder returns a simpleEncoder based on predefined encoding `baseName` and
|
2018-06-27 12:25:59 +10:00
|
|
|
// difference map `differences`.
|
2018-12-30 16:18:56 +02:00
|
|
|
func NewSimpleTextEncoder(baseName string, differences map[CharCode]GlyphName) (SimpleEncoder, error) {
|
2019-01-02 16:54:37 +02:00
|
|
|
fnc, ok := simple[baseName]
|
|
|
|
if !ok {
|
|
|
|
common.Log.Debug("ERROR: NewSimpleTextEncoder. Unknown encoding %q", baseName)
|
|
|
|
return nil, errors.New("unsupported font encoding")
|
2018-12-30 16:18:56 +02:00
|
|
|
}
|
2019-01-02 16:54:37 +02:00
|
|
|
enc := fnc()
|
2019-01-01 21:17:57 +02:00
|
|
|
if len(differences) != 0 {
|
|
|
|
enc = ApplyDifferences(enc, differences)
|
2018-06-27 12:25:59 +10:00
|
|
|
}
|
2019-01-01 21:17:57 +02:00
|
|
|
return enc, nil
|
2018-07-02 16:46:43 +10:00
|
|
|
}
|
|
|
|
|
2019-01-01 21:17:57 +02:00
|
|
|
func newSimpleEncoderFromMap(name string, encoding map[byte]rune) SimpleEncoder {
|
|
|
|
se := &simpleEncoding{
|
|
|
|
baseName: name,
|
|
|
|
decode: encoding,
|
|
|
|
encode: make(map[rune]byte, len(encoding)),
|
|
|
|
}
|
2019-03-24 00:13:35 +00:00
|
|
|
|
|
|
|
// If there is unique mapping between rune and charcode then always go with the
|
|
|
|
// lower charcode for consistency.
|
2019-01-01 21:17:57 +02:00
|
|
|
for b, r := range se.decode {
|
2019-03-24 00:13:35 +00:00
|
|
|
if b2, has := se.encode[r]; !has || b < b2 {
|
|
|
|
se.encode[r] = b
|
|
|
|
}
|
2018-06-27 12:25:59 +10:00
|
|
|
}
|
2018-12-30 16:18:56 +02:00
|
|
|
return se
|
2018-06-27 12:25:59 +10:00
|
|
|
}
|
|
|
|
|
2019-01-01 21:17:57 +02:00
|
|
|
var (
|
|
|
|
simple = make(map[string]func() SimpleEncoder)
|
|
|
|
)
|
2018-07-03 14:26:42 +10:00
|
|
|
|
2019-01-01 21:17:57 +02:00
|
|
|
// RegisterSimpleEncoding registers a SimpleEncoder constructer by PDF encoding name.
|
|
|
|
func RegisterSimpleEncoding(name string, fnc func() SimpleEncoder) {
|
|
|
|
if _, ok := simple[name]; ok {
|
|
|
|
panic("already registered")
|
2018-07-03 14:26:42 +10:00
|
|
|
}
|
2019-01-01 21:17:57 +02:00
|
|
|
simple[name] = fnc
|
|
|
|
}
|
2018-07-02 16:46:43 +10:00
|
|
|
|
2019-01-01 21:17:57 +02:00
|
|
|
var (
|
|
|
|
_ SimpleEncoder = (*simpleEncoding)(nil)
|
|
|
|
_ encoding.Encoding = (*simpleEncoding)(nil)
|
|
|
|
)
|
2018-07-03 14:26:42 +10:00
|
|
|
|
2019-01-01 21:17:57 +02:00
|
|
|
// simpleEncoding represents a 1 byte encoding.
|
|
|
|
type simpleEncoding struct {
|
|
|
|
baseName string
|
|
|
|
// one byte encoding: CharCode <-> byte
|
|
|
|
encode map[rune]byte
|
|
|
|
decode map[byte]rune
|
2018-06-27 12:25:59 +10:00
|
|
|
}
|
|
|
|
|
2019-01-01 23:24:11 +02:00
|
|
|
// Encode converts the Go unicode string to a PDF encoded string.
|
|
|
|
func (enc *simpleEncoding) Encode(str string) []byte {
|
|
|
|
data, _ := enc.NewEncoder().Bytes([]byte(str))
|
2019-01-01 21:17:57 +02:00
|
|
|
return data
|
2018-11-22 22:01:04 +11:00
|
|
|
}
|
|
|
|
|
2019-01-01 23:24:11 +02:00
|
|
|
// Decode converts PDF encoded string to a Go unicode string.
|
|
|
|
func (enc *simpleEncoding) Decode(raw []byte) string {
|
|
|
|
data, _ := enc.NewDecoder().Bytes(raw)
|
|
|
|
return string(data)
|
|
|
|
}
|
|
|
|
|
2019-01-01 21:17:57 +02:00
|
|
|
// NewDecoder implements encoding.Encoding.
|
|
|
|
func (enc *simpleEncoding) NewDecoder() *encoding.Decoder {
|
|
|
|
return &encoding.Decoder{Transformer: simpleDecoder{m: enc.decode}}
|
2018-06-27 12:25:59 +10:00
|
|
|
}
|
|
|
|
|
2019-01-01 21:17:57 +02:00
|
|
|
type simpleDecoder struct {
|
|
|
|
m map[byte]rune
|
2018-12-07 18:43:24 +02:00
|
|
|
}
|
|
|
|
|
2019-01-02 17:03:03 +02:00
|
|
|
// Transform implements xtransform.Transformer.
|
2019-01-01 21:17:57 +02:00
|
|
|
func (enc simpleDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, _ error) {
|
|
|
|
for len(src) != 0 {
|
|
|
|
b := src[0]
|
|
|
|
src = src[1:]
|
|
|
|
|
|
|
|
r, ok := enc.m[b]
|
|
|
|
if !ok {
|
|
|
|
r = MissingCodeRune
|
|
|
|
}
|
|
|
|
if utf8.RuneLen(r) > len(dst) {
|
2019-01-02 17:03:03 +02:00
|
|
|
return nDst, nSrc, xtransform.ErrShortDst
|
2019-01-01 21:17:57 +02:00
|
|
|
}
|
|
|
|
n := utf8.EncodeRune(dst, r)
|
|
|
|
dst = dst[n:]
|
|
|
|
|
|
|
|
nSrc++
|
|
|
|
nDst += n
|
2018-06-27 12:25:59 +10:00
|
|
|
}
|
2019-01-01 21:17:57 +02:00
|
|
|
return nDst, nSrc, nil
|
2018-06-27 12:25:59 +10:00
|
|
|
}
|
|
|
|
|
2019-01-02 17:03:03 +02:00
|
|
|
// Reset implements xtransform.Transformer.
|
2019-01-01 21:17:57 +02:00
|
|
|
func (enc simpleDecoder) Reset() {}
|
|
|
|
|
|
|
|
// NewEncoder implements encoding.Encoding.
|
|
|
|
func (enc *simpleEncoding) NewEncoder() *encoding.Encoder {
|
|
|
|
return &encoding.Encoder{Transformer: simpleEncoder{m: enc.encode}}
|
2018-06-27 12:25:59 +10:00
|
|
|
}
|
|
|
|
|
2019-01-01 21:17:57 +02:00
|
|
|
type simpleEncoder struct {
|
|
|
|
m map[rune]byte
|
2018-06-27 12:25:59 +10:00
|
|
|
}
|
|
|
|
|
2019-01-02 17:03:03 +02:00
|
|
|
// Transform implements xtransform.Transformer.
|
2019-01-01 21:17:57 +02:00
|
|
|
func (enc simpleEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, _ error) {
|
|
|
|
for len(src) != 0 {
|
|
|
|
if !utf8.FullRune(src) && !atEOF {
|
2019-01-02 17:03:03 +02:00
|
|
|
return nDst, nSrc, xtransform.ErrShortSrc
|
2019-01-01 21:17:57 +02:00
|
|
|
} else if len(dst) == 0 {
|
2019-01-02 17:03:03 +02:00
|
|
|
return nDst, nSrc, xtransform.ErrShortDst
|
2019-01-01 21:17:57 +02:00
|
|
|
}
|
|
|
|
r, n := utf8.DecodeRune(src)
|
|
|
|
if r == utf8.RuneError {
|
|
|
|
r = MissingCodeRune
|
|
|
|
}
|
|
|
|
src = src[n:]
|
|
|
|
nSrc += n
|
|
|
|
|
|
|
|
b, ok := enc.m[r]
|
|
|
|
if !ok {
|
|
|
|
b, _ = enc.m[MissingCodeRune]
|
|
|
|
}
|
|
|
|
dst[0] = b
|
|
|
|
|
|
|
|
dst = dst[1:]
|
|
|
|
nDst++
|
2018-07-02 16:46:43 +10:00
|
|
|
}
|
2019-01-01 21:17:57 +02:00
|
|
|
return nDst, nSrc, nil
|
2018-06-27 12:25:59 +10:00
|
|
|
}
|
|
|
|
|
2019-01-02 17:03:03 +02:00
|
|
|
// Reset implements xtransform.Transformer.
|
2019-01-01 21:17:57 +02:00
|
|
|
func (enc simpleEncoder) Reset() {}
|
|
|
|
|
|
|
|
// String returns a text representation of encoding.
|
|
|
|
func (enc *simpleEncoding) String() string {
|
|
|
|
return "simpleEncoding(" + enc.baseName + ")"
|
2018-06-27 12:25:59 +10:00
|
|
|
}
|
|
|
|
|
2019-01-01 21:17:57 +02:00
|
|
|
// BaseName returns a base name of the encoder, as specified in the PDF spec.
|
|
|
|
func (enc *simpleEncoding) BaseName() string {
|
|
|
|
return enc.baseName
|
2018-06-27 12:25:59 +10:00
|
|
|
}
|
|
|
|
|
2019-01-01 21:17:57 +02:00
|
|
|
func (enc *simpleEncoding) Charcodes() []CharCode {
|
|
|
|
codes := make([]CharCode, 0, len(enc.decode))
|
|
|
|
for b := range enc.decode {
|
|
|
|
codes = append(codes, CharCode(b))
|
2018-06-27 12:25:59 +10:00
|
|
|
}
|
2019-01-01 21:17:57 +02:00
|
|
|
sort.Slice(codes, func(i, j int) bool {
|
|
|
|
return codes[i] < codes[j]
|
|
|
|
})
|
|
|
|
return codes
|
2018-06-27 12:25:59 +10:00
|
|
|
}
|
|
|
|
|
2019-01-01 21:17:57 +02:00
|
|
|
func (enc *simpleEncoding) RuneToCharcode(r rune) (CharCode, bool) {
|
|
|
|
b, ok := enc.encode[r]
|
|
|
|
return CharCode(b), ok
|
|
|
|
}
|
2018-07-03 14:26:42 +10:00
|
|
|
|
2019-01-01 21:17:57 +02:00
|
|
|
func (enc *simpleEncoding) CharcodeToRune(code CharCode) (rune, bool) {
|
|
|
|
if code > 0xff {
|
|
|
|
return MissingCodeRune, false
|
2018-06-27 12:25:59 +10:00
|
|
|
}
|
2019-01-01 21:17:57 +02:00
|
|
|
b := byte(code)
|
|
|
|
r, ok := enc.decode[b]
|
|
|
|
return r, ok
|
2018-06-27 12:25:59 +10:00
|
|
|
}
|
|
|
|
|
2019-01-01 21:17:57 +02:00
|
|
|
func (enc *simpleEncoding) ToPdfObject() core.PdfObject {
|
2019-01-06 13:49:36 +02:00
|
|
|
return core.MakeName(enc.baseName)
|
2018-06-27 12:25:59 +10:00
|
|
|
}
|
|
|
|
|
2019-01-02 16:54:37 +02:00
|
|
|
// newSimpleMapping creates a byte-to-rune mapping that can be used to create simple encodings.
|
|
|
|
// An implementation will build reverse map only once when the encoding is first used.
|
|
|
|
func newSimpleMapping(name string, m map[byte]rune) *simpleMapping {
|
|
|
|
return &simpleMapping{
|
|
|
|
baseName: name,
|
|
|
|
decode: m,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
type simpleMapping struct {
|
|
|
|
baseName string
|
|
|
|
once sync.Once
|
|
|
|
decode map[byte]rune
|
|
|
|
encode map[rune]byte
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m *simpleMapping) init() {
|
|
|
|
m.encode = make(map[rune]byte, len(m.decode))
|
2019-03-24 00:13:35 +00:00
|
|
|
// If there is unique mapping between rune and charcode then always go with the
|
|
|
|
// lower charcode for consistency.
|
2019-01-02 16:54:37 +02:00
|
|
|
for b, r := range m.decode {
|
2019-03-24 00:13:35 +00:00
|
|
|
if b2, has := m.encode[r]; !has || b < b2 {
|
|
|
|
m.encode[r] = b
|
|
|
|
}
|
2019-01-02 16:54:37 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewEncoder creates a new SimpleEncoding from the byte-to-rune mapping.
|
|
|
|
func (m *simpleMapping) NewEncoder() SimpleEncoder {
|
|
|
|
m.once.Do(m.init)
|
|
|
|
return &simpleEncoding{
|
|
|
|
baseName: m.baseName,
|
|
|
|
encode: m.encode,
|
|
|
|
decode: m.decode,
|
|
|
|
}
|
2018-06-27 12:25:59 +10:00
|
|
|
}
|