diff --git a/internal/textencoding/identity.go b/internal/textencoding/identity.go index 10877558..be8344ec 100644 --- a/internal/textencoding/identity.go +++ b/internal/textencoding/identity.go @@ -16,44 +16,69 @@ import ( // IdentityEncoder represents an 2-byte identity encoding type IdentityEncoder struct { baseName string + + // runes registered by encoder for tracking what runes are used for subsetting. + registeredMap map[rune]struct{} } // NewIdentityTextEncoder returns a new IdentityEncoder based on predefined // encoding `baseName` and difference map `differences`. -func NewIdentityTextEncoder(baseName string) IdentityEncoder { - return IdentityEncoder{baseName} +func NewIdentityTextEncoder(baseName string) *IdentityEncoder { + return &IdentityEncoder{ + baseName: baseName, + } +} + +// RegisteredRunes returns the slice of runes that have been registered as used by the encoder. +func (enc *IdentityEncoder) RegisteredRunes() []rune { + runes := make([]rune, len(enc.registeredMap)) + i := 0 + for r := range enc.registeredMap { + runes[i] = r + i++ + } + return runes } // String returns a string that describes `enc`. -func (enc IdentityEncoder) String() string { +func (enc *IdentityEncoder) String() string { return enc.baseName } // Encode converts the Go unicode string to a PDF encoded string. -func (enc IdentityEncoder) Encode(str string) []byte { +func (enc *IdentityEncoder) Encode(str string) []byte { return encodeString16bit(enc, str) } // Decode converts PDF encoded string to a Go unicode string. -func (enc IdentityEncoder) Decode(raw []byte) string { +func (enc *IdentityEncoder) Decode(raw []byte) string { return decodeString16bit(enc, raw) } // RuneToCharcode converts rune `r` to a PDF character code. // The bool return flag is true if there was a match, and false otherwise. -func (enc IdentityEncoder) RuneToCharcode(r rune) (CharCode, bool) { +func (enc *IdentityEncoder) RuneToCharcode(r rune) (CharCode, bool) { + if enc.registeredMap == nil { + enc.registeredMap = map[rune]struct{}{} + } + enc.registeredMap[r] = struct{}{} // Register use (subsetting). + return CharCode(r), true } // CharcodeToRune converts PDF character code `code` to a rune. // The bool return flag is true if there was a match, and false otherwise. -func (enc IdentityEncoder) CharcodeToRune(code CharCode) (rune, bool) { +func (enc *IdentityEncoder) CharcodeToRune(code CharCode) (rune, bool) { + if enc.registeredMap == nil { + enc.registeredMap = map[rune]struct{}{} + } + enc.registeredMap[rune(code)] = struct{}{} return rune(code), true } // RuneToGlyph returns the glyph name for rune `r`. // The bool return flag is true if there was a match, and false otherwise. -func (enc IdentityEncoder) RuneToGlyph(r rune) (GlyphName, bool) { +func (enc *IdentityEncoder) RuneToGlyph(r rune) (GlyphName, bool) { if r == ' ' { return "space", true } @@ -63,7 +88,7 @@ func (enc IdentityEncoder) RuneToGlyph(r rune) (GlyphName, bool) { // GlyphToRune returns the rune corresponding to glyph name `glyph`. // The bool return flag is true if there was a match, and false otherwise. -func (enc IdentityEncoder) GlyphToRune(glyph GlyphName) (rune, bool) { +func (enc *IdentityEncoder) GlyphToRune(glyph GlyphName) (rune, bool) { // String with "uniXXXX" format where XXXX is the hexcode. if glyph == "space" { return ' ', true @@ -78,7 +103,7 @@ func (enc IdentityEncoder) GlyphToRune(glyph GlyphName) (rune, bool) { } // ToPdfObject returns a nil as it is not truly a PDF object and should not be attempted to store in file. -func (enc IdentityEncoder) ToPdfObject() core.PdfObject { +func (enc *IdentityEncoder) ToPdfObject() core.PdfObject { if enc.baseName != "" { return core.MakeName(enc.baseName) } diff --git a/model/font_composite.go b/model/font_composite.go index 23d69df9..47474bae 100644 --- a/model/font_composite.go +++ b/model/font_composite.go @@ -210,10 +210,14 @@ func (font *pdfFontType0) subsetRegistered() error { common.Log.Debug("Missing font descriptor") return nil } + if font.encoder == nil { + common.Log.Debug("No encoder - subsetting ignored") + return nil + } stream, ok := core.GetStream(cidfnt.fontDescriptor.FontFile2) if !ok { - common.Log.Debug("Embedded font object not found -- ABORT subsseting") + common.Log.Debug("Embedded font object not found -- ABORT subsetting") return errors.New("fontfile2 not found") } decoded, err := core.DecodeStream(stream) @@ -227,18 +231,31 @@ func (font *pdfFontType0) subsetRegistered() error { return err } - tenc, ok := font.encoder.(*textencoding.TrueTypeFontEncoder) - if !ok { - return fmt.Errorf("unsupported encoder for subsetting: %T", cidfnt.encoder) + var runes []rune + var subset *unitype.Font + switch tenc := font.encoder.(type) { + case *textencoding.TrueTypeFontEncoder: + // Means the font has been loaded from TTF file. + runes = tenc.RegisteredRunes() + subset, err = fnt.SubsetKeepRunes(runes) + if err != nil { + common.Log.Debug("ERROR: %v", err) + return err + } + // Reduce the encoder also. + tenc.SubsetRegistered() + case *textencoding.IdentityEncoder: + // IdentityEncoder typically means font was parsed from PDF file. + runes = tenc.RegisteredRunes() + subset, err = fnt.SubsetKeepRunes(runes) + if err != nil { + common.Log.Debug("ERROR: %v", err) + return err + } + default: + return fmt.Errorf("unsupported encoder for subsetting: %T", font.encoder) } - runes := tenc.RegisteredRunes() - subset, err := fnt.SubsetKeepRunes(runes) - if err != nil { - return err - } - // Reduce the encoder also. - tenc.SubsetRegistered() var buf bytes.Buffer err = subset.Write(&buf) if err != nil { @@ -249,7 +266,7 @@ func (font *pdfFontType0) subsetRegistered() error { if font.toUnicodeCmap != nil { codeToUnicode := make(map[cmap.CharCode]rune, len(runes)) for _, r := range runes { - cc, ok := tenc.RuneToCharcode(r) + cc, ok := font.encoder.RuneToCharcode(r) if !ok { continue }