mirror of
https://github.com/unidoc/unipdf.git
synced 2025-05-02 22:17:06 +08:00
Merge pull request #247 from dennwc/fonts_minor_2
Minor improvements to the fonts encoders
This commit is contained in:
commit
b5abc6925e
@ -6,6 +6,8 @@
|
||||
package textencoding
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
|
||||
"github.com/unidoc/unidoc/common"
|
||||
"github.com/unidoc/unidoc/pdf/core"
|
||||
)
|
||||
@ -50,12 +52,13 @@ type TextEncoder interface {
|
||||
|
||||
// Convenience functions
|
||||
|
||||
// doEncode converts a Go unicode string `raw` to a PDF encoded string using the encoder `enc`.
|
||||
func doEncode(enc TextEncoder, raw string) []byte {
|
||||
encoded := []byte{}
|
||||
// encodeString8bit converts a Go unicode string `raw` to a PDF encoded string using the encoder `enc`.
|
||||
// It expects that character codes will fit into a single byte.
|
||||
func encodeString8bit(enc TextEncoder, raw string) []byte {
|
||||
encoded := make([]byte, 0, len(raw))
|
||||
for _, r := range raw {
|
||||
code, found := enc.RuneToCharcode(r)
|
||||
if !found {
|
||||
if !found || code > 0xff {
|
||||
common.Log.Debug("Failed to map rune to charcode for rune 0x%04x", r)
|
||||
continue
|
||||
}
|
||||
@ -64,6 +67,27 @@ func doEncode(enc TextEncoder, raw string) []byte {
|
||||
return encoded
|
||||
}
|
||||
|
||||
// encodeString16bit converts a Go unicode string `raw` to a PDF encoded string using the encoder `enc`.
|
||||
// Each character will be encoded as two bytes.
|
||||
func encodeString16bit(enc TextEncoder, raw string) []byte {
|
||||
// runes -> character codes -> bytes
|
||||
runes := []rune(raw)
|
||||
encoded := make([]byte, 0, len(runes)*2)
|
||||
for _, r := range runes {
|
||||
code, ok := enc.RuneToCharcode(r)
|
||||
if !ok {
|
||||
common.Log.Debug("Failed to map rune to charcode. rune=%+q", r)
|
||||
continue
|
||||
}
|
||||
|
||||
// Each entry represented by 2 bytes.
|
||||
var v [2]byte
|
||||
binary.BigEndian.PutUint16(v[:], code)
|
||||
encoded = append(encoded, v[:]...)
|
||||
}
|
||||
return encoded
|
||||
}
|
||||
|
||||
// doRuneToCharcode converts rune `r` to a PDF character code.
|
||||
// The bool return flag is true if there was a match, and false otherwise.
|
||||
func doRuneToCharcode(enc TextEncoder, r rune) (uint16, bool) {
|
||||
|
@ -6,10 +6,10 @@
|
||||
package textencoding
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/unidoc/unidoc/common"
|
||||
"github.com/unidoc/unidoc/pdf/core"
|
||||
)
|
||||
|
||||
@ -31,20 +31,7 @@ func (enc IdentityEncoder) String() string {
|
||||
|
||||
// Encode converts the Go unicode string `raw` to a PDF encoded string.
|
||||
func (enc IdentityEncoder) Encode(raw string) []byte {
|
||||
// runes -> character codes -> bytes
|
||||
var encoded bytes.Buffer
|
||||
for _, r := range raw {
|
||||
code, ok := enc.RuneToCharcode(r)
|
||||
if !ok {
|
||||
common.Log.Debug("Failed to map rune to charcode. rune=%+q", r)
|
||||
continue
|
||||
}
|
||||
|
||||
// Each entry represented by 2 bytes.
|
||||
encoded.WriteByte(byte((code & 0xff00) >> 8))
|
||||
encoded.WriteByte(byte(code & 0xff))
|
||||
}
|
||||
return encoded.Bytes()
|
||||
return encodeString16bit(enc, raw)
|
||||
}
|
||||
|
||||
// CharcodeToGlyph returns the glyph name matching character code `code`.
|
||||
@ -63,17 +50,11 @@ func (enc IdentityEncoder) CharcodeToGlyph(code uint16) (string, bool) {
|
||||
// GlyphToCharcode returns the character code matching glyph `glyph`.
|
||||
// The bool return flag is true if there was a match, and false otherwise.
|
||||
func (enc IdentityEncoder) GlyphToCharcode(glyph string) (uint16, bool) {
|
||||
// String with "uniXXXX" format where XXXX is the hexcode.
|
||||
if len(glyph) == 7 && glyph[0:3] == "uni" {
|
||||
var unicode uint16
|
||||
n, err := fmt.Sscanf(glyph, "uni%X", &unicode)
|
||||
if n == 1 && err == nil {
|
||||
return enc.RuneToCharcode(rune(unicode))
|
||||
}
|
||||
r, ok := enc.GlyphToRune(glyph)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
common.Log.Debug("Symbol encoding error: unable to find glyph->charcode entry (%s)", glyph)
|
||||
return 0, false
|
||||
return enc.RuneToCharcode(r)
|
||||
}
|
||||
|
||||
// RuneToCharcode converts rune `r` to a PDF character code.
|
||||
@ -91,7 +72,7 @@ func (enc IdentityEncoder) CharcodeToRune(code uint16) (rune, bool) {
|
||||
// RuneToGlyph returns the glyph name for rune `r`.
|
||||
// The bool return flag is true if there was a match, and false otherwise.
|
||||
func (enc IdentityEncoder) RuneToGlyph(r rune) (string, bool) {
|
||||
if r == 0x20 {
|
||||
if r == ' ' {
|
||||
return "space", true
|
||||
}
|
||||
glyph := fmt.Sprintf("uni%.4X", r)
|
||||
@ -102,14 +83,16 @@ func (enc IdentityEncoder) RuneToGlyph(r rune) (string, bool) {
|
||||
// The bool return flag is true if there was a match, and false otherwise.
|
||||
func (enc IdentityEncoder) GlyphToRune(glyph string) (rune, bool) {
|
||||
// String with "uniXXXX" format where XXXX is the hexcode.
|
||||
if len(glyph) == 7 && glyph[0:3] == "uni" {
|
||||
unicode := uint16(0)
|
||||
n, err := fmt.Sscanf(glyph, "uni%X", &unicode)
|
||||
if n == 1 && err == nil {
|
||||
return rune(unicode), true
|
||||
}
|
||||
if glyph == "space" {
|
||||
return ' ', true
|
||||
} else if !strings.HasPrefix(glyph, "uni") || len(glyph) != 7 {
|
||||
return 0, false
|
||||
}
|
||||
return 0, false
|
||||
r, err := strconv.ParseUint(glyph[3:], 16, 16)
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
return rune(r), true
|
||||
}
|
||||
|
||||
// ToPdfObject returns a nil as it is not truly a PDF object and should not be attempted to store in file.
|
||||
|
@ -117,7 +117,7 @@ func (se SimpleEncoder) String() string {
|
||||
|
||||
// Encode converts a Go unicode string `raw` to a PDF encoded string.
|
||||
func (se SimpleEncoder) Encode(raw string) []byte {
|
||||
return doEncode(se, raw)
|
||||
return encodeString8bit(se, raw)
|
||||
}
|
||||
|
||||
// CharcodeToGlyph returns the glyph name for character code `code`.
|
||||
|
@ -6,7 +6,6 @@
|
||||
package textencoding
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
@ -64,20 +63,7 @@ func (enc TrueTypeFontEncoder) String() string {
|
||||
|
||||
// Encode converts the Go unicode string `raw` to a PDF encoded string.
|
||||
func (enc TrueTypeFontEncoder) Encode(raw string) []byte {
|
||||
// runes -> character codes -> bytes
|
||||
var encoded bytes.Buffer
|
||||
for _, r := range raw {
|
||||
code, ok := enc.RuneToCharcode(r)
|
||||
if !ok {
|
||||
common.Log.Debug("Failed to map rune to charcode. rune=%+q", r)
|
||||
continue
|
||||
}
|
||||
|
||||
// Each entry represented by 2 bytes.
|
||||
encoded.WriteByte(byte((code & 0xff00) >> 8))
|
||||
encoded.WriteByte(byte(code & 0xff))
|
||||
}
|
||||
return encoded.Bytes()
|
||||
return encodeString16bit(enc, raw)
|
||||
}
|
||||
|
||||
// CharcodeToGlyph returns the glyph name matching character code `code`.
|
||||
|
Loading…
x
Reference in New Issue
Block a user