mirror of
https://github.com/unidoc/unipdf.git
synced 2025-05-01 22:17:29 +08:00
50 lines
1.3 KiB
Go
50 lines
1.3 KiB
Go
/*
|
|
* This file is subject to the terms and conditions defined in
|
|
* file 'LICENSE.md', which is part of this source code package.
|
|
*/
|
|
|
|
package cmap
|
|
|
|
import (
|
|
"unicode/utf16"
|
|
|
|
"github.com/unidoc/unidoc/common"
|
|
)
|
|
|
|
// hexToCharCode returns the integer that is encoded in `shex` as a big-endian hex value
|
|
func hexToCharCode(shex cmapHexString) CharCode {
|
|
code := CharCode(0)
|
|
for _, v := range shex.b {
|
|
code <<= 8
|
|
code |= CharCode(v)
|
|
}
|
|
return code
|
|
}
|
|
|
|
// hexToString returns the unicode string that is UTF-16BE encoded in `shex`.
|
|
// 9.10.3 ToUnicode CMaps (page 293)
|
|
// • It shall use the beginbfchar, endbfchar, beginbfrange, and endbfrange operators to define the
|
|
// mapping from character codes to Unicode character sequences expressed in UTF-16BE encoding.
|
|
func hexToString(shex cmapHexString) string {
|
|
return string(utf16ToRunes(shex))
|
|
}
|
|
|
|
// hexToString decodes the UTF-16BE encoded string `shex` to unicode runes.
|
|
func utf16ToRunes(shex cmapHexString) []rune {
|
|
if len(shex.b) == 1 {
|
|
return []rune{rune(shex.b[0])}
|
|
}
|
|
b := shex.b
|
|
if len(b)%2 != 0 {
|
|
b = append(b, 0)
|
|
common.Log.Debug("ERROR: hexToRunes. Padding shex=%#v to %+v", shex, b)
|
|
}
|
|
n := len(b) >> 1
|
|
chars := make([]uint16, n)
|
|
for i := 0; i < n; i++ {
|
|
chars[i] = uint16(b[i<<1])<<8 + uint16(b[i<<1+1])
|
|
}
|
|
runes := utf16.Decode(chars)
|
|
return runes
|
|
}
|