mirror of
https://github.com/unidoc/unipdf.git
synced 2025-05-14 19:29:50 +08:00
Track runes in IdentityEncoder (for subsetting), track decoded runes
This commit is contained in:
parent
5efaa02e23
commit
ba1058fa37
@ -16,44 +16,69 @@ import (
|
||||
// IdentityEncoder represents an 2-byte identity encoding
|
||||
type IdentityEncoder struct {
|
||||
baseName string
|
||||
|
||||
// runes registered by encoder for tracking what runes are used for subsetting.
|
||||
registeredMap map[rune]struct{}
|
||||
}
|
||||
|
||||
// NewIdentityTextEncoder returns a new IdentityEncoder based on predefined
|
||||
// encoding `baseName` and difference map `differences`.
|
||||
func NewIdentityTextEncoder(baseName string) IdentityEncoder {
|
||||
return IdentityEncoder{baseName}
|
||||
func NewIdentityTextEncoder(baseName string) *IdentityEncoder {
|
||||
return &IdentityEncoder{
|
||||
baseName: baseName,
|
||||
}
|
||||
}
|
||||
|
||||
// RegisteredRunes returns the slice of runes that have been registered as used by the encoder.
|
||||
func (enc *IdentityEncoder) RegisteredRunes() []rune {
|
||||
runes := make([]rune, len(enc.registeredMap))
|
||||
i := 0
|
||||
for r := range enc.registeredMap {
|
||||
runes[i] = r
|
||||
i++
|
||||
}
|
||||
return runes
|
||||
}
|
||||
|
||||
// String returns a string that describes `enc`.
|
||||
func (enc IdentityEncoder) String() string {
|
||||
func (enc *IdentityEncoder) String() string {
|
||||
return enc.baseName
|
||||
}
|
||||
|
||||
// Encode converts the Go unicode string to a PDF encoded string.
|
||||
func (enc IdentityEncoder) Encode(str string) []byte {
|
||||
func (enc *IdentityEncoder) Encode(str string) []byte {
|
||||
return encodeString16bit(enc, str)
|
||||
}
|
||||
|
||||
// Decode converts PDF encoded string to a Go unicode string.
|
||||
func (enc IdentityEncoder) Decode(raw []byte) string {
|
||||
func (enc *IdentityEncoder) Decode(raw []byte) string {
|
||||
return decodeString16bit(enc, raw)
|
||||
}
|
||||
|
||||
// RuneToCharcode converts rune `r` to a PDF character code.
|
||||
// The bool return flag is true if there was a match, and false otherwise.
|
||||
func (enc IdentityEncoder) RuneToCharcode(r rune) (CharCode, bool) {
|
||||
func (enc *IdentityEncoder) RuneToCharcode(r rune) (CharCode, bool) {
|
||||
if enc.registeredMap == nil {
|
||||
enc.registeredMap = map[rune]struct{}{}
|
||||
}
|
||||
enc.registeredMap[r] = struct{}{} // Register use (subsetting).
|
||||
|
||||
return CharCode(r), true
|
||||
}
|
||||
|
||||
// CharcodeToRune converts PDF character code `code` to a rune.
|
||||
// The bool return flag is true if there was a match, and false otherwise.
|
||||
func (enc IdentityEncoder) CharcodeToRune(code CharCode) (rune, bool) {
|
||||
func (enc *IdentityEncoder) CharcodeToRune(code CharCode) (rune, bool) {
|
||||
if enc.registeredMap == nil {
|
||||
enc.registeredMap = map[rune]struct{}{}
|
||||
}
|
||||
enc.registeredMap[rune(code)] = struct{}{}
|
||||
return rune(code), true
|
||||
}
|
||||
|
||||
// RuneToGlyph returns the glyph name for rune `r`.
|
||||
// The bool return flag is true if there was a match, and false otherwise.
|
||||
func (enc IdentityEncoder) RuneToGlyph(r rune) (GlyphName, bool) {
|
||||
func (enc *IdentityEncoder) RuneToGlyph(r rune) (GlyphName, bool) {
|
||||
if r == ' ' {
|
||||
return "space", true
|
||||
}
|
||||
@ -63,7 +88,7 @@ func (enc IdentityEncoder) RuneToGlyph(r rune) (GlyphName, bool) {
|
||||
|
||||
// GlyphToRune returns the rune corresponding to glyph name `glyph`.
|
||||
// The bool return flag is true if there was a match, and false otherwise.
|
||||
func (enc IdentityEncoder) GlyphToRune(glyph GlyphName) (rune, bool) {
|
||||
func (enc *IdentityEncoder) GlyphToRune(glyph GlyphName) (rune, bool) {
|
||||
// String with "uniXXXX" format where XXXX is the hexcode.
|
||||
if glyph == "space" {
|
||||
return ' ', true
|
||||
@ -78,7 +103,7 @@ func (enc IdentityEncoder) GlyphToRune(glyph GlyphName) (rune, bool) {
|
||||
}
|
||||
|
||||
// ToPdfObject returns a nil as it is not truly a PDF object and should not be attempted to store in file.
|
||||
func (enc IdentityEncoder) ToPdfObject() core.PdfObject {
|
||||
func (enc *IdentityEncoder) ToPdfObject() core.PdfObject {
|
||||
if enc.baseName != "" {
|
||||
return core.MakeName(enc.baseName)
|
||||
}
|
||||
|
@ -210,10 +210,14 @@ func (font *pdfFontType0) subsetRegistered() error {
|
||||
common.Log.Debug("Missing font descriptor")
|
||||
return nil
|
||||
}
|
||||
if font.encoder == nil {
|
||||
common.Log.Debug("No encoder - subsetting ignored")
|
||||
return nil
|
||||
}
|
||||
|
||||
stream, ok := core.GetStream(cidfnt.fontDescriptor.FontFile2)
|
||||
if !ok {
|
||||
common.Log.Debug("Embedded font object not found -- ABORT subsseting")
|
||||
common.Log.Debug("Embedded font object not found -- ABORT subsetting")
|
||||
return errors.New("fontfile2 not found")
|
||||
}
|
||||
decoded, err := core.DecodeStream(stream)
|
||||
@ -227,18 +231,31 @@ func (font *pdfFontType0) subsetRegistered() error {
|
||||
return err
|
||||
}
|
||||
|
||||
tenc, ok := font.encoder.(*textencoding.TrueTypeFontEncoder)
|
||||
if !ok {
|
||||
return fmt.Errorf("unsupported encoder for subsetting: %T", cidfnt.encoder)
|
||||
var runes []rune
|
||||
var subset *unitype.Font
|
||||
switch tenc := font.encoder.(type) {
|
||||
case *textencoding.TrueTypeFontEncoder:
|
||||
// Means the font has been loaded from TTF file.
|
||||
runes = tenc.RegisteredRunes()
|
||||
subset, err = fnt.SubsetKeepRunes(runes)
|
||||
if err != nil {
|
||||
common.Log.Debug("ERROR: %v", err)
|
||||
return err
|
||||
}
|
||||
// Reduce the encoder also.
|
||||
tenc.SubsetRegistered()
|
||||
case *textencoding.IdentityEncoder:
|
||||
// IdentityEncoder typically means font was parsed from PDF file.
|
||||
runes = tenc.RegisteredRunes()
|
||||
subset, err = fnt.SubsetKeepRunes(runes)
|
||||
if err != nil {
|
||||
common.Log.Debug("ERROR: %v", err)
|
||||
return err
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("unsupported encoder for subsetting: %T", font.encoder)
|
||||
}
|
||||
|
||||
runes := tenc.RegisteredRunes()
|
||||
subset, err := fnt.SubsetKeepRunes(runes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Reduce the encoder also.
|
||||
tenc.SubsetRegistered()
|
||||
var buf bytes.Buffer
|
||||
err = subset.Write(&buf)
|
||||
if err != nil {
|
||||
@ -249,7 +266,7 @@ func (font *pdfFontType0) subsetRegistered() error {
|
||||
if font.toUnicodeCmap != nil {
|
||||
codeToUnicode := make(map[cmap.CharCode]rune, len(runes))
|
||||
for _, r := range runes {
|
||||
cc, ok := tenc.RuneToCharcode(r)
|
||||
cc, ok := font.encoder.RuneToCharcode(r)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user