mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-26 13:48:55 +08:00
Add basic support for UTF-16 text encodings (#203)
* Add UTF-16 text encoder
This commit is contained in:
parent
1e26aa81f6
commit
23aec77478
57
internal/textencoding/utf16.go
Normal file
57
internal/textencoding/utf16.go
Normal file
@ -0,0 +1,57 @@
|
||||
/*
|
||||
* This file is subject to the terms and conditions defined in
|
||||
* file 'LICENSE.md', which is part of this source code package.
|
||||
*/
|
||||
|
||||
package textencoding
|
||||
|
||||
import (
|
||||
"github.com/unidoc/unipdf/v3/core"
|
||||
"github.com/unidoc/unipdf/v3/internal/strutils"
|
||||
)
|
||||
|
||||
// UTF16Encoder represents UTF-16 encoding.
|
||||
type UTF16Encoder struct {
|
||||
baseName string
|
||||
}
|
||||
|
||||
// NewUTF16TextEncoder returns a new UTF16Encoder based on the predefined
|
||||
// encoding `baseName`.
|
||||
func NewUTF16TextEncoder(baseName string) UTF16Encoder {
|
||||
return UTF16Encoder{baseName}
|
||||
}
|
||||
|
||||
// String returns a string that describes `enc`.
|
||||
func (enc UTF16Encoder) String() string {
|
||||
return enc.baseName
|
||||
}
|
||||
|
||||
// Encode converts the Go unicode string to a PDF encoded string.
|
||||
func (enc UTF16Encoder) Encode(str string) []byte {
|
||||
return []byte(strutils.StringToUTF16(str))
|
||||
}
|
||||
|
||||
// Decode converts PDF encoded string to a Go unicode string.
|
||||
func (enc UTF16Encoder) Decode(raw []byte) string {
|
||||
return strutils.UTF16ToString(raw)
|
||||
}
|
||||
|
||||
// RuneToCharcode converts rune `r` to a PDF character code.
|
||||
// The bool return flag is true if there was a match, and false otherwise.
|
||||
func (enc UTF16Encoder) RuneToCharcode(r rune) (CharCode, bool) {
|
||||
return CharCode(r), true
|
||||
}
|
||||
|
||||
// CharcodeToRune converts PDF character code `code` to a rune.
|
||||
// The bool return flag is true if there was a match, and false otherwise.
|
||||
func (enc UTF16Encoder) CharcodeToRune(code CharCode) (rune, bool) {
|
||||
return rune(code), true
|
||||
}
|
||||
|
||||
// ToPdfObject returns a PDF Object that represents the encoding.
|
||||
func (enc UTF16Encoder) ToPdfObject() core.PdfObject {
|
||||
if enc.baseName != "" {
|
||||
return core.MakeName(enc.baseName)
|
||||
}
|
||||
return core.MakeNull()
|
||||
}
|
@ -194,9 +194,23 @@ func newPdfFontType0FromPdfObject(d *core.PdfObjectDictionary, base *fontCommon)
|
||||
|
||||
encoderName, ok := core.GetNameVal(d.Get("Encoding"))
|
||||
if ok {
|
||||
if encoderName == "Identity-H" || encoderName == "Identity-V" {
|
||||
switch encoderName {
|
||||
case "Identity-H", "Identity-V":
|
||||
font.encoder = textencoding.NewIdentityTextEncoder(encoderName)
|
||||
} else {
|
||||
case
|
||||
// Reference: https://www.adobe.com/content/dam/acom/en/devnet/font/pdfs/5094.CJK_CID.pdf
|
||||
// Adobe-GB1-4, Adobe-GB1-5
|
||||
"UniGB-UTF16-H", "UniGB-UTF16-V",
|
||||
// Adobe-CNS1-4, Adobe-CNS1-5
|
||||
"UniCNS-UTF16-H", "UniCNS-UTF16-V",
|
||||
// Adobe-Japan1-4, Adobe-Japan1-5, Adobe-Japan1-6
|
||||
"UniJIS-UTF16-H", "UniJIS-UTF16-V", "UniJIS2004-UTF16-H",
|
||||
// Adobe-Japan2-0
|
||||
"UniHojo-UTF16-H", "UniHojo-UTF16-V",
|
||||
// Adobe-Korea1-2
|
||||
"UniKS-UTF16-H", "UniKS-UTF16-V":
|
||||
font.encoder = textencoding.NewUTF16TextEncoder(encoderName)
|
||||
default:
|
||||
common.Log.Debug("Unhandled cmap %q", encoderName)
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user