unipdf/internal/cmap/utils.go
Adrian-George Bostan e2b3c6e6ba
Add predefined CMaps for Type 0 composite fonts (#246)
* Add packed predefined cmaps
* Add cmap cid range parsing
* Load base cmap for predefined cmaps
* Refactor pdfFont to Unicode methods
* Preserve CharcodeBytesToUnicode behavior
* Add support for CID-keyed Type 0 fonts
* Add method documentation for the cmap package
* Refactor and document charcode to Unicode conversion code
* Add more cmap parsing test cases
* Add more method documentation in the cmap package.
* Remove unused code from the bcmaps package
* Improve cmap test case
* Assume identity when encoder is missing on regenerating field appearance
* Add missing encoder log message
* Add inverse CMap mappings
* Add CMap encoder
* Address golint notices and small fix in the cmap package
* Keep smaller charcodes when generating cmap inverse mappings
* Update extractor test case
* Keep latest supplement charcodes/CIDs when computing inverse mappings
* Fix comment typo
2020-02-07 19:56:30 +00:00

65 lines
1.6 KiB
Go

/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package cmap
import (
"unicode/utf16"
"github.com/unidoc/unipdf/v3/common"
)
// hexToCharCode returns the integer that is encoded in `shex` as a big-endian hex value
func hexToCharCode(shex cmapHexString) CharCode {
code := CharCode(0)
for _, v := range shex.b {
code <<= 8
code |= CharCode(v)
}
return code
}
// hexToString decodes the UTF-16BE encoded string `shex` to unicode runes.
// 9.10.3 ToUnicode CMaps (page 293)
// • It shall use the beginbfchar, endbfchar, beginbfrange, and endbfrange operators to define the
// mapping from character codes to Unicode character sequences expressed in UTF-16BE encoding.
func hexToRunes(shex cmapHexString) []rune {
if len(shex.b) == 1 {
return []rune{rune(shex.b[0])}
}
b := shex.b
if len(b)%2 != 0 {
b = append(b, 0)
common.Log.Debug("ERROR: hexToRunes. Padding shex=%#v to %+v", shex, b)
}
n := len(b) >> 1
chars := make([]uint16, n)
for i := 0; i < n; i++ {
chars[i] = uint16(b[i<<1])<<8 + uint16(b[i<<1+1])
}
runes := utf16.Decode(chars)
return runes
}
// hexToRune is the same as hexToRunes but expects only a single rune to be decoded.
func hexToRune(shex cmapHexString) rune {
runes := hexToRunes(shex)
if n := len(runes); n == 0 {
common.Log.Debug("ERROR: hexToRune. Expected at least one rune shex=%#v", shex)
return MissingCodeRune
}
if len(runes) > 1 {
common.Log.Debug("ERROR: hexToRune. Expected exactly one rune shex=%#v -> %#v", shex, runes)
}
return runes[0]
}
func min(i, j int) int {
if i < j {
return i
}
return j
}