unipdf/pdf/internal/cmap/utils.go
2018-06-28 11:09:59 +10:00

50 lines
1.3 KiB
Go

/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package cmap
import (
"unicode/utf16"
"github.com/unidoc/unidoc/common"
)
// hexToCharCode returns the integer that is encoded in `shex` as a big-endian hex value
func hexToCharCode(shex cmapHexString) CharCode {
code := CharCode(0)
for _, v := range shex.b {
code <<= 8
code |= CharCode(v)
}
return code
}
// hexToString returns the unicode string that is UTF-16BE encoded in `shex`.
// 9.10.3 ToUnicode CMaps (page 293)
// • It shall use the beginbfchar, endbfchar, beginbfrange, and endbfrange operators to define the
// mapping from character codes to Unicode character sequences expressed in UTF-16BE encoding.
func hexToString(shex cmapHexString) string {
return string(utf16ToRunes(shex))
}
// hexToString decodes the UTF-16BE encoded string `shex` to unicode runes.
func utf16ToRunes(shex cmapHexString) []rune {
if len(shex.b) == 1 {
return []rune{rune(shex.b[0])}
}
b := shex.b
if len(b)%2 != 0 {
b = append(b, 0)
common.Log.Debug("ERROR: hexToRunes. Padding shex=%#v to %+v", shex, b)
}
n := len(b) >> 1
chars := make([]uint16, n)
for i := 0; i < n; i++ {
chars[i] = uint16(b[i<<1])<<8 + uint16(b[i<<1+1])
}
runes := utf16.Decode(chars)
return runes
}