107 lines
3.2 KiB
Go
Raw Normal View History

/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package textencoding
import (
2018-10-16 02:32:17 +03:00
"encoding/binary"
"github.com/unidoc/unipdf/v3/common"
"github.com/unidoc/unipdf/v3/core"
)
2018-11-29 23:24:40 +02:00
// CharCode is a character code used in the specific encoding.
type CharCode uint16
2018-11-29 23:24:40 +02:00
// GlyphName is a name of a glyph.
type GlyphName string
2018-08-03 10:18:44 +00:00
// TextEncoder defines the common methods that a text encoder implementation must have in UniDoc.
type TextEncoder interface {
// String returns a string that describes the TextEncoder instance.
String() string
// Encode converts the Go unicode string to a PDF encoded string.
Encode(str string) []byte
// Decode converts PDF encoded string to a Go unicode string.
Decode(raw []byte) string
// RuneToCharcode returns the PDF character code corresponding to rune `r`.
// The bool return flag is true if there was a match, and false otherwise.
// This is usually implemented as RuneToGlyph->GlyphToCharcode
RuneToCharcode(r rune) (CharCode, bool)
// CharcodeToRune returns the rune corresponding to character code `code`.
// The bool return flag is true if there was a match, and false otherwise.
// This is usually implemented as CharcodeToGlyph->GlyphToRune
CharcodeToRune(code CharCode) (rune, bool)
// ToPdfObject returns a PDF Object that represents the encoding.
2018-07-24 21:32:02 +10:00
ToPdfObject() core.PdfObject
}
// Convenience functions
2018-10-16 02:32:17 +03:00
// encodeString8bit converts a Go unicode string `raw` to a PDF encoded string using the encoder `enc`.
// It expects that character codes will fit into a single byte.
func encodeString8bit(enc TextEncoder, raw string) []byte {
encoded := make([]byte, 0, len(raw))
for _, r := range raw {
code, found := enc.RuneToCharcode(r)
2018-10-16 02:32:17 +03:00
if !found || code > 0xff {
common.Log.Debug("Failed to map rune to charcode for rune 0x%04x", r)
continue
}
encoded = append(encoded, byte(code))
}
return encoded
}
2018-10-16 02:32:17 +03:00
// encodeString16bit converts a Go unicode string `raw` to a PDF encoded string using the encoder `enc`.
// Each character will be encoded as two bytes.
func encodeString16bit(enc TextEncoder, raw string) []byte {
// runes -> character codes -> bytes
runes := []rune(raw)
encoded := make([]byte, 0, len(runes)*2)
for _, r := range runes {
code, ok := enc.RuneToCharcode(r)
if !ok {
common.Log.Debug("Failed to map rune to charcode. rune=%+q", r)
continue
}
// Each entry represented by 2 bytes.
var v [2]byte
binary.BigEndian.PutUint16(v[:], uint16(code))
2018-10-16 02:32:17 +03:00
encoded = append(encoded, v[:]...)
}
return encoded
}
// decodeString16bit converts PDF encoded string to a Go unicode string using the encoder `enc`.
// Each character will be decoded from two bytes.
func decodeString16bit(enc TextEncoder, raw []byte) string {
// bytes -> character codes -> runes
runes := make([]rune, 0, len(raw)/2+len(raw)%2)
for len(raw) > 0 {
if len(raw) == 1 {
raw = []byte{raw[0], 0}
}
// Each entry represented by 2 bytes.
code := CharCode(binary.BigEndian.Uint16(raw[:]))
raw = raw[2:]
r, ok := enc.CharcodeToRune(code)
if !ok {
common.Log.Debug("Failed to map charcode to rune. charcode=%#x", code)
continue
}
runes = append(runes, r)
}
return string(runes)
}