Track runes in IdentityEncoder (for subsetting), track decoded runes

This commit is contained in:
Gunnsteinn Hall 2020-05-09 11:36:39 +00:00
parent 5efaa02e23
commit ba1058fa37
2 changed files with 64 additions and 22 deletions

View File

@ -16,44 +16,69 @@ import (
// IdentityEncoder represents an 2-byte identity encoding
type IdentityEncoder struct {
baseName string
// runes registered by encoder for tracking what runes are used for subsetting.
registeredMap map[rune]struct{}
}
// NewIdentityTextEncoder returns a new IdentityEncoder based on predefined
// encoding `baseName` and difference map `differences`.
func NewIdentityTextEncoder(baseName string) IdentityEncoder {
return IdentityEncoder{baseName}
func NewIdentityTextEncoder(baseName string) *IdentityEncoder {
return &IdentityEncoder{
baseName: baseName,
}
}
// RegisteredRunes returns the slice of runes that have been registered as used by the encoder.
func (enc *IdentityEncoder) RegisteredRunes() []rune {
runes := make([]rune, len(enc.registeredMap))
i := 0
for r := range enc.registeredMap {
runes[i] = r
i++
}
return runes
}
// String returns a string that describes `enc`.
func (enc IdentityEncoder) String() string {
func (enc *IdentityEncoder) String() string {
return enc.baseName
}
// Encode converts the Go unicode string to a PDF encoded string.
func (enc IdentityEncoder) Encode(str string) []byte {
func (enc *IdentityEncoder) Encode(str string) []byte {
return encodeString16bit(enc, str)
}
// Decode converts PDF encoded string to a Go unicode string.
func (enc IdentityEncoder) Decode(raw []byte) string {
func (enc *IdentityEncoder) Decode(raw []byte) string {
return decodeString16bit(enc, raw)
}
// RuneToCharcode converts rune `r` to a PDF character code.
// The bool return flag is true if there was a match, and false otherwise.
func (enc IdentityEncoder) RuneToCharcode(r rune) (CharCode, bool) {
func (enc *IdentityEncoder) RuneToCharcode(r rune) (CharCode, bool) {
if enc.registeredMap == nil {
enc.registeredMap = map[rune]struct{}{}
}
enc.registeredMap[r] = struct{}{} // Register use (subsetting).
return CharCode(r), true
}
// CharcodeToRune converts PDF character code `code` to a rune.
// The bool return flag is true if there was a match, and false otherwise.
func (enc IdentityEncoder) CharcodeToRune(code CharCode) (rune, bool) {
func (enc *IdentityEncoder) CharcodeToRune(code CharCode) (rune, bool) {
if enc.registeredMap == nil {
enc.registeredMap = map[rune]struct{}{}
}
enc.registeredMap[rune(code)] = struct{}{}
return rune(code), true
}
// RuneToGlyph returns the glyph name for rune `r`.
// The bool return flag is true if there was a match, and false otherwise.
func (enc IdentityEncoder) RuneToGlyph(r rune) (GlyphName, bool) {
func (enc *IdentityEncoder) RuneToGlyph(r rune) (GlyphName, bool) {
if r == ' ' {
return "space", true
}
@ -63,7 +88,7 @@ func (enc IdentityEncoder) RuneToGlyph(r rune) (GlyphName, bool) {
// GlyphToRune returns the rune corresponding to glyph name `glyph`.
// The bool return flag is true if there was a match, and false otherwise.
func (enc IdentityEncoder) GlyphToRune(glyph GlyphName) (rune, bool) {
func (enc *IdentityEncoder) GlyphToRune(glyph GlyphName) (rune, bool) {
// String with "uniXXXX" format where XXXX is the hexcode.
if glyph == "space" {
return ' ', true
@ -78,7 +103,7 @@ func (enc IdentityEncoder) GlyphToRune(glyph GlyphName) (rune, bool) {
}
// ToPdfObject returns a nil as it is not truly a PDF object and should not be attempted to store in file.
func (enc IdentityEncoder) ToPdfObject() core.PdfObject {
func (enc *IdentityEncoder) ToPdfObject() core.PdfObject {
if enc.baseName != "" {
return core.MakeName(enc.baseName)
}

View File

@ -210,10 +210,14 @@ func (font *pdfFontType0) subsetRegistered() error {
common.Log.Debug("Missing font descriptor")
return nil
}
if font.encoder == nil {
common.Log.Debug("No encoder - subsetting ignored")
return nil
}
stream, ok := core.GetStream(cidfnt.fontDescriptor.FontFile2)
if !ok {
common.Log.Debug("Embedded font object not found -- ABORT subsseting")
common.Log.Debug("Embedded font object not found -- ABORT subsetting")
return errors.New("fontfile2 not found")
}
decoded, err := core.DecodeStream(stream)
@ -227,18 +231,31 @@ func (font *pdfFontType0) subsetRegistered() error {
return err
}
tenc, ok := font.encoder.(*textencoding.TrueTypeFontEncoder)
if !ok {
return fmt.Errorf("unsupported encoder for subsetting: %T", cidfnt.encoder)
var runes []rune
var subset *unitype.Font
switch tenc := font.encoder.(type) {
case *textencoding.TrueTypeFontEncoder:
// Means the font has been loaded from TTF file.
runes = tenc.RegisteredRunes()
subset, err = fnt.SubsetKeepRunes(runes)
if err != nil {
common.Log.Debug("ERROR: %v", err)
return err
}
// Reduce the encoder also.
tenc.SubsetRegistered()
case *textencoding.IdentityEncoder:
// IdentityEncoder typically means font was parsed from PDF file.
runes = tenc.RegisteredRunes()
subset, err = fnt.SubsetKeepRunes(runes)
if err != nil {
common.Log.Debug("ERROR: %v", err)
return err
}
default:
return fmt.Errorf("unsupported encoder for subsetting: %T", font.encoder)
}
runes := tenc.RegisteredRunes()
subset, err := fnt.SubsetKeepRunes(runes)
if err != nil {
return err
}
// Reduce the encoder also.
tenc.SubsetRegistered()
var buf bytes.Buffer
err = subset.Write(&buf)
if err != nil {
@ -249,7 +266,7 @@ func (font *pdfFontType0) subsetRegistered() error {
if font.toUnicodeCmap != nil {
codeToUnicode := make(map[cmap.CharCode]rune, len(runes))
for _, r := range runes {
cc, ok := tenc.RuneToCharcode(r)
cc, ok := font.encoder.RuneToCharcode(r)
if !ok {
continue
}