112 lines
3.7 KiB
Go
Raw Normal View History

/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package textencoding
import (
2018-10-16 02:32:17 +03:00
"encoding/binary"
"github.com/unidoc/unidoc/common"
2018-07-24 21:32:02 +10:00
"github.com/unidoc/unidoc/pdf/core"
)
type CharCode uint16
2018-08-03 10:18:44 +00:00
// TextEncoder defines the common methods that a text encoder implementation must have in UniDoc.
type TextEncoder interface {
// String returns a string that describes the TextEncoder instance.
String() string
// Encode converts the Go unicode string `raw` to a PDF encoded string.
Encode(raw string) []byte
// CharcodeToGlyph returns the glyph name for character code `code`.
// The bool return flag is true if there was a match, and false otherwise.
CharcodeToGlyph(code CharCode) (string, bool)
// GlyphToCharcode returns the PDF character code corresponding to glyph name `glyph`.
// The bool return flag is true if there was a match, and false otherwise.
GlyphToCharcode(glyph string) (CharCode, bool)
// RuneToCharcode returns the PDF character code corresponding to rune `r`.
// The bool return flag is true if there was a match, and false otherwise.
// This is usually implemented as RuneToGlyph->GlyphToCharcode
RuneToCharcode(r rune) (CharCode, bool)
// CharcodeToRune returns the rune corresponding to character code `code`.
// The bool return flag is true if there was a match, and false otherwise.
// This is usually implemented as CharcodeToGlyph->GlyphToRune
CharcodeToRune(code CharCode) (rune, bool)
// RuneToGlyph returns the glyph name for rune `r`.
// The bool return flag is true if there was a match, and false otherwise.
RuneToGlyph(r rune) (string, bool)
// GlyphToRune returns the rune corresponding to glyph name `glyph`.
// The bool return flag is true if there was a match, and false otherwise.
GlyphToRune(glyph string) (rune, bool)
// ToPdfObject returns a PDF Object that represents the encoding.
2018-07-24 21:32:02 +10:00
ToPdfObject() core.PdfObject
}
// Convenience functions
2018-10-16 02:32:17 +03:00
// encodeString8bit converts a Go unicode string `raw` to a PDF encoded string using the encoder `enc`.
// It expects that character codes will fit into a single byte.
func encodeString8bit(enc TextEncoder, raw string) []byte {
encoded := make([]byte, 0, len(raw))
for _, r := range raw {
code, found := enc.RuneToCharcode(r)
2018-10-16 02:32:17 +03:00
if !found || code > 0xff {
common.Log.Debug("Failed to map rune to charcode for rune 0x%04x", r)
continue
}
encoded = append(encoded, byte(code))
}
return encoded
}
2018-10-16 02:32:17 +03:00
// encodeString16bit converts a Go unicode string `raw` to a PDF encoded string using the encoder `enc`.
// Each character will be encoded as two bytes.
func encodeString16bit(enc TextEncoder, raw string) []byte {
// runes -> character codes -> bytes
runes := []rune(raw)
encoded := make([]byte, 0, len(runes)*2)
for _, r := range runes {
code, ok := enc.RuneToCharcode(r)
if !ok {
common.Log.Debug("Failed to map rune to charcode. rune=%+q", r)
continue
}
// Each entry represented by 2 bytes.
var v [2]byte
binary.BigEndian.PutUint16(v[:], uint16(code))
2018-10-16 02:32:17 +03:00
encoded = append(encoded, v[:]...)
}
return encoded
}
// doRuneToCharcode converts rune `r` to a PDF character code.
// The bool return flag is true if there was a match, and false otherwise.
func doRuneToCharcode(enc TextEncoder, r rune) (CharCode, bool) {
g, ok := enc.RuneToGlyph(r)
if !ok {
return 0, false
}
return enc.GlyphToCharcode(g)
}
// doCharcodeToRune converts PDF character code `code` to a rune.
// The bool return flag is true if there was a match, and false otherwise.
func doCharcodeToRune(enc TextEncoder, code CharCode) (rune, bool) {
g, ok := enc.CharcodeToGlyph(code)
if !ok {
return 0, false
}
return enc.GlyphToRune(g)
}