unipdf/internal/textencoding/differences.go
2019-05-16 20:44:51 +00:00

202 lines
5.5 KiB
Go

package textencoding
import (
"bytes"
"fmt"
"sort"
"github.com/unidoc/unipdf/v3/common"
"github.com/unidoc/unipdf/v3/core"
)
// FromFontDifferences converts `diffList` (a /Differences array from an /Encoding object) to a map
// representing character code to glyph mappings.
func FromFontDifferences(diffList *core.PdfObjectArray) (map[CharCode]GlyphName, error) {
differences := make(map[CharCode]GlyphName)
var n CharCode
for _, obj := range diffList.Elements() {
switch v := obj.(type) {
case *core.PdfObjectInteger:
n = CharCode(*v)
case *core.PdfObjectName:
s := string(*v)
differences[n] = GlyphName(s)
n++
default:
common.Log.Debug("ERROR: Bad type. obj=%s", obj)
return nil, core.ErrTypeError
}
}
return differences, nil
}
// toFontDifferences converts `differences` (a map representing character code to glyph mappings)
// to a /Differences array for an /Encoding object.
func toFontDifferences(differences map[CharCode]GlyphName) *core.PdfObjectArray {
if len(differences) == 0 {
return nil
}
codes := make([]CharCode, 0, len(differences))
for c := range differences {
codes = append(codes, c)
}
sort.Slice(codes, func(i, j int) bool {
return codes[i] < codes[j]
})
n := codes[0]
diffList := []core.PdfObject{core.MakeInteger(int64(n)), core.MakeName(string(differences[n]))}
for _, c := range codes[1:] {
if c == n+1 {
diffList = append(diffList, core.MakeName(string(differences[c])))
} else {
diffList = append(diffList, core.MakeInteger(int64(c)))
}
n = c
}
return core.MakeArray(diffList...)
}
// ApplyDifferences modifies or wraps the base encoding and overlays differences over it.
func ApplyDifferences(base SimpleEncoder, differences map[CharCode]GlyphName) SimpleEncoder {
if len(differences) == 0 {
return base
}
d := &differencesEncoding{
base: base,
differences: differences,
decode: make(map[byte]rune),
encode: make(map[rune]byte),
}
if d2, ok := base.(*differencesEncoding); ok {
// merge differences
diff := make(map[CharCode]GlyphName)
for code, glyph := range d2.differences {
diff[code] = glyph
}
for code, glyph := range differences {
diff[code] = glyph
}
differences = diff
base = d2.base
}
for code, glyph := range differences {
b := byte(code)
r, ok := GlyphToRune(glyph)
if ok {
d.encode[r] = b
} else {
common.Log.Debug("ERROR: No match for glyph=%q differences=%+v", glyph, differences)
}
d.decode[b] = r
}
return d
}
// differencesEncoding remaps characters of a base encoding and act as a pass-trough for other characters.
// Assumes that an underlying encoding is 8 bit.
type differencesEncoding struct {
base SimpleEncoder
// original mapping to encode to PDF
differences map[CharCode]GlyphName
// overlayed on top of base encoding (8 bit)
decode map[byte]rune
encode map[rune]byte
}
// BaseName returns base encoding name.
func (enc *differencesEncoding) BaseName() string {
return enc.base.BaseName()
}
// String returns a string that describes the encoding.
func (enc *differencesEncoding) String() string {
return fmt.Sprintf("differences(%s, %v)", enc.base.String(), enc.differences)
}
// Charcodes returns a slice of all charcodes in this encoding.
func (enc *differencesEncoding) Charcodes() []CharCode {
codes := enc.base.Charcodes()
sorted := true
seen := make(map[CharCode]struct{}, len(codes))
for _, code := range codes {
seen[code] = struct{}{}
}
for b := range enc.decode {
code := CharCode(b)
if _, ok := seen[code]; !ok {
codes = append(codes, code)
sorted = false
}
}
if !sorted {
sort.Slice(codes, func(i, j int) bool {
return codes[i] < codes[j]
})
}
return codes
}
// Encode converts a Go unicode string to a PDF encoded string.
func (enc *differencesEncoding) Encode(str string) []byte {
runes := []rune(str)
buf := bytes.NewBuffer(nil)
buf.Grow(len(runes))
for _, r := range runes {
code, _ := enc.RuneToCharcode(r)
// relies on the fact that underlying encoding is 8 bit
buf.WriteByte(byte(code))
}
return buf.Bytes()
}
// Decode converts PDF encoded string to a Go unicode string.
func (enc *differencesEncoding) Decode(raw []byte) string {
runes := make([]rune, 0, len(raw))
// relies on the fact that underlying encoding is 8 bit
for _, b := range raw {
r, _ := enc.CharcodeToRune(CharCode(b))
runes = append(runes, r)
}
return string(runes)
}
// RuneToCharcode returns the PDF character code corresponding to rune `r`.
// The bool return flag is true if there was a match, and false otherwise.
func (enc *differencesEncoding) RuneToCharcode(r rune) (CharCode, bool) {
if b, ok := enc.encode[r]; ok {
return CharCode(b), true
}
return enc.base.RuneToCharcode(r)
}
// CharcodeToRune returns the rune corresponding to character code `code`.
// The bool return flag is true if there was a match, and false otherwise.
func (enc *differencesEncoding) CharcodeToRune(code CharCode) (rune, bool) {
if code > 0xff {
return MissingCodeRune, false
}
b := byte(code)
if r, ok := enc.decode[b]; ok {
return r, true
}
return enc.base.CharcodeToRune(code)
}
// ToPdfObject returns the encoding as a PdfObject.
func (enc *differencesEncoding) ToPdfObject() core.PdfObject {
dict := core.MakeDict()
dict.Set("Type", core.MakeName("Encoding"))
dict.Set("BaseEncoding", enc.base.ToPdfObject())
diff := toFontDifferences(enc.differences)
if diff == nil {
// this should never happen, because the constructor checks if it is empty
panic("differences should not be nil")
}
dict.Set("Differences", diff)
return core.MakeIndirectObject(dict)
}