Add basic support for UTF-16 text encodings (#203)

* Add UTF-16 text encoder
This commit is contained in:
Adrian-George Bostan 2019-11-28 02:47:00 +02:00 committed by Gunnsteinn Hall
parent 1e26aa81f6
commit 23aec77478
2 changed files with 73 additions and 2 deletions

View File

@ -0,0 +1,57 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package textencoding
import (
"github.com/unidoc/unipdf/v3/core"
"github.com/unidoc/unipdf/v3/internal/strutils"
)
// UTF16Encoder represents UTF-16 encoding.
type UTF16Encoder struct {
baseName string
}
// NewUTF16TextEncoder returns a new UTF16Encoder based on the predefined
// encoding `baseName`.
func NewUTF16TextEncoder(baseName string) UTF16Encoder {
return UTF16Encoder{baseName}
}
// String returns a string that describes `enc`.
func (enc UTF16Encoder) String() string {
return enc.baseName
}
// Encode converts the Go unicode string to a PDF encoded string.
func (enc UTF16Encoder) Encode(str string) []byte {
return []byte(strutils.StringToUTF16(str))
}
// Decode converts PDF encoded string to a Go unicode string.
func (enc UTF16Encoder) Decode(raw []byte) string {
return strutils.UTF16ToString(raw)
}
// RuneToCharcode converts rune `r` to a PDF character code.
// The bool return flag is true if there was a match, and false otherwise.
func (enc UTF16Encoder) RuneToCharcode(r rune) (CharCode, bool) {
return CharCode(r), true
}
// CharcodeToRune converts PDF character code `code` to a rune.
// The bool return flag is true if there was a match, and false otherwise.
func (enc UTF16Encoder) CharcodeToRune(code CharCode) (rune, bool) {
return rune(code), true
}
// ToPdfObject returns a PDF Object that represents the encoding.
func (enc UTF16Encoder) ToPdfObject() core.PdfObject {
if enc.baseName != "" {
return core.MakeName(enc.baseName)
}
return core.MakeNull()
}

View File

@ -194,9 +194,23 @@ func newPdfFontType0FromPdfObject(d *core.PdfObjectDictionary, base *fontCommon)
encoderName, ok := core.GetNameVal(d.Get("Encoding"))
if ok {
if encoderName == "Identity-H" || encoderName == "Identity-V" {
switch encoderName {
case "Identity-H", "Identity-V":
font.encoder = textencoding.NewIdentityTextEncoder(encoderName)
} else {
case
// Reference: https://www.adobe.com/content/dam/acom/en/devnet/font/pdfs/5094.CJK_CID.pdf
// Adobe-GB1-4, Adobe-GB1-5
"UniGB-UTF16-H", "UniGB-UTF16-V",
// Adobe-CNS1-4, Adobe-CNS1-5
"UniCNS-UTF16-H", "UniCNS-UTF16-V",
// Adobe-Japan1-4, Adobe-Japan1-5, Adobe-Japan1-6
"UniJIS-UTF16-H", "UniJIS-UTF16-V", "UniJIS2004-UTF16-H",
// Adobe-Japan2-0
"UniHojo-UTF16-H", "UniHojo-UTF16-V",
// Adobe-Korea1-2
"UniKS-UTF16-H", "UniKS-UTF16-V":
font.encoder = textencoding.NewUTF16TextEncoder(encoderName)
default:
common.Log.Debug("Unhandled cmap %q", encoderName)
}
}