mirror of
https://github.com/unidoc/unipdf.git
synced 2025-05-01 22:17:29 +08:00

Simplified creator paragraph handling of text encoding. Character codes expanded to 16bit instead of 8bit.
39 lines
840 B
Go
39 lines
840 B
Go
package textencoding
|
|
|
|
import (
|
|
"errors"
|
|
)
|
|
|
|
// CID represents a character identifier.
|
|
type CID uint16
|
|
|
|
// CMap maps character codes to CIDs.
|
|
type CMap interface {
|
|
CharacterCodesToCID(charcodes []byte) ([]CID, error)
|
|
}
|
|
|
|
// CMapIdentityH is a representation of the /Identity-H cmap.
|
|
type CMapIdentityH struct {
|
|
}
|
|
|
|
// CharacterCodesToCID converts charcodes to CIDs for the Identity CMap, which maps
|
|
// 2-byte character codes (from the raw data) from 0-65535 to the same 2-byte CID value.
|
|
func (cmap CMapIdentityH) CharacterCodesToCID(raw []byte) ([]CID, error) {
|
|
if len(raw)%2 != 0 {
|
|
return nil, errors.New("Range error")
|
|
}
|
|
|
|
var charcode uint16
|
|
cids := []CID{}
|
|
|
|
for i := 0; i < len(raw); i += 2 {
|
|
b1 := uint16(raw[i])
|
|
b2 := uint16(raw[i+1])
|
|
charcode = (b1 << 8) | b2
|
|
|
|
cids = append(cids, CID(charcode))
|
|
}
|
|
|
|
return cids, nil
|
|
}
|