mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-27 13:48:51 +08:00
Add basic support for UTF-16 text encodings (#203)
* Add UTF-16 text encoder
This commit is contained in:
parent
1e26aa81f6
commit
23aec77478
57
internal/textencoding/utf16.go
Normal file
57
internal/textencoding/utf16.go
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
/*
|
||||||
|
* This file is subject to the terms and conditions defined in
|
||||||
|
* file 'LICENSE.md', which is part of this source code package.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package textencoding
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/unidoc/unipdf/v3/core"
|
||||||
|
"github.com/unidoc/unipdf/v3/internal/strutils"
|
||||||
|
)
|
||||||
|
|
||||||
|
// UTF16Encoder represents UTF-16 encoding.
|
||||||
|
type UTF16Encoder struct {
|
||||||
|
baseName string
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewUTF16TextEncoder returns a new UTF16Encoder based on the predefined
|
||||||
|
// encoding `baseName`.
|
||||||
|
func NewUTF16TextEncoder(baseName string) UTF16Encoder {
|
||||||
|
return UTF16Encoder{baseName}
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns a string that describes `enc`.
|
||||||
|
func (enc UTF16Encoder) String() string {
|
||||||
|
return enc.baseName
|
||||||
|
}
|
||||||
|
|
||||||
|
// Encode converts the Go unicode string to a PDF encoded string.
|
||||||
|
func (enc UTF16Encoder) Encode(str string) []byte {
|
||||||
|
return []byte(strutils.StringToUTF16(str))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decode converts PDF encoded string to a Go unicode string.
|
||||||
|
func (enc UTF16Encoder) Decode(raw []byte) string {
|
||||||
|
return strutils.UTF16ToString(raw)
|
||||||
|
}
|
||||||
|
|
||||||
|
// RuneToCharcode converts rune `r` to a PDF character code.
|
||||||
|
// The bool return flag is true if there was a match, and false otherwise.
|
||||||
|
func (enc UTF16Encoder) RuneToCharcode(r rune) (CharCode, bool) {
|
||||||
|
return CharCode(r), true
|
||||||
|
}
|
||||||
|
|
||||||
|
// CharcodeToRune converts PDF character code `code` to a rune.
|
||||||
|
// The bool return flag is true if there was a match, and false otherwise.
|
||||||
|
func (enc UTF16Encoder) CharcodeToRune(code CharCode) (rune, bool) {
|
||||||
|
return rune(code), true
|
||||||
|
}
|
||||||
|
|
||||||
|
// ToPdfObject returns a PDF Object that represents the encoding.
|
||||||
|
func (enc UTF16Encoder) ToPdfObject() core.PdfObject {
|
||||||
|
if enc.baseName != "" {
|
||||||
|
return core.MakeName(enc.baseName)
|
||||||
|
}
|
||||||
|
return core.MakeNull()
|
||||||
|
}
|
@ -194,9 +194,23 @@ func newPdfFontType0FromPdfObject(d *core.PdfObjectDictionary, base *fontCommon)
|
|||||||
|
|
||||||
encoderName, ok := core.GetNameVal(d.Get("Encoding"))
|
encoderName, ok := core.GetNameVal(d.Get("Encoding"))
|
||||||
if ok {
|
if ok {
|
||||||
if encoderName == "Identity-H" || encoderName == "Identity-V" {
|
switch encoderName {
|
||||||
|
case "Identity-H", "Identity-V":
|
||||||
font.encoder = textencoding.NewIdentityTextEncoder(encoderName)
|
font.encoder = textencoding.NewIdentityTextEncoder(encoderName)
|
||||||
} else {
|
case
|
||||||
|
// Reference: https://www.adobe.com/content/dam/acom/en/devnet/font/pdfs/5094.CJK_CID.pdf
|
||||||
|
// Adobe-GB1-4, Adobe-GB1-5
|
||||||
|
"UniGB-UTF16-H", "UniGB-UTF16-V",
|
||||||
|
// Adobe-CNS1-4, Adobe-CNS1-5
|
||||||
|
"UniCNS-UTF16-H", "UniCNS-UTF16-V",
|
||||||
|
// Adobe-Japan1-4, Adobe-Japan1-5, Adobe-Japan1-6
|
||||||
|
"UniJIS-UTF16-H", "UniJIS-UTF16-V", "UniJIS2004-UTF16-H",
|
||||||
|
// Adobe-Japan2-0
|
||||||
|
"UniHojo-UTF16-H", "UniHojo-UTF16-V",
|
||||||
|
// Adobe-Korea1-2
|
||||||
|
"UniKS-UTF16-H", "UniKS-UTF16-V":
|
||||||
|
font.encoder = textencoding.NewUTF16TextEncoder(encoderName)
|
||||||
|
default:
|
||||||
common.Log.Debug("Unhandled cmap %q", encoderName)
|
common.Log.Debug("Unhandled cmap %q", encoderName)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user