From aef6e5e9768f140d2df16f9dfd25da7d1e5e1fbb Mon Sep 17 00:00:00 2001 From: Adrian-George Bostan Date: Fri, 8 May 2020 03:15:09 +0300 Subject: [PATCH] Fix CMap generation and serialization for composite fonts (#344) * Fix CMap charcode mapping serialization * Improve CMap generation in the NewCompositePdfFontFromTTF function --- internal/cmap/cmap.go | 32 +++++++++++++++++++++----------- model/font_composite.go | 12 +++++++++++- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/internal/cmap/cmap.go b/internal/cmap/cmap.go index 3e3ff967..1299faa5 100644 --- a/internal/cmap/cmap.go +++ b/internal/cmap/cmap.go @@ -459,25 +459,35 @@ func (cmap *CMap) toBfData() string { } sort.Slice(codes, func(i, j int) bool { return codes[i] < codes[j] }) - // charRanges is a list of the contiguous character code ranges in `codes`. + // Generate CMap character code ranges. + // The code ranges are intervals of consecutive charcodes (c1 = c0 + 1) + // mapping to consecutive runes. + // Start with a range consisting of the current character code for both ends + // of the interval. Check if the next character is consecutive to the upper + // end of the interval and if it maps to the next rune. If so, increase the + // interval to the right. Otherwise, append the current range to the + // character ranges slice and start over. Continue the process until all + // character codes have been mapped to code ranges. var charRanges []charRange - c0, c1 := codes[0], codes[0]+1 + currCharRange := charRange{codes[0], codes[0]} + prevRune := cmap.codeToUnicode[codes[0]] for _, c := range codes[1:] { - if c != c1 { - charRanges = append(charRanges, charRange{c0, c1}) - c0 = c + currRune := cmap.codeToUnicode[c] + if c == currCharRange.code1+1 && currRune == prevRune+1 { + currCharRange.code1 = c + } else { + charRanges = append(charRanges, currCharRange) + currCharRange.code0, currCharRange.code1 = c, c } - c1 = c + 1 - } - if c1 > c0 { - charRanges = append(charRanges, charRange{c0, c1}) + prevRune = currRune } + charRanges = append(charRanges, currCharRange) // fbChars is a list of single character ranges. fbRanges is a list of multiple character ranges. var fbChars []CharCode var fbRanges []fbRange for _, cr := range charRanges { - if cr.code0+1 == cr.code1 { + if cr.code0 == cr.code1 { fbChars = append(fbChars, cr.code0) } else { fbRanges = append(fbRanges, fbRange{ @@ -512,7 +522,7 @@ func (cmap *CMap) toBfData() string { for j := 0; j < n; j++ { rng := fbRanges[i*maxBfEntries+j] r := rng.r0 - lines = append(lines, fmt.Sprintf("<%04x><%04x> <%04x>", rng.code0, rng.code1-1, r)) + lines = append(lines, fmt.Sprintf("<%04x><%04x> <%04x>", rng.code0, rng.code1, r)) } lines = append(lines, "endbfrange") } diff --git a/model/font_composite.go b/model/font_composite.go index 87cfc827..23d69df9 100644 --- a/model/font_composite.go +++ b/model/font_composite.go @@ -830,7 +830,17 @@ func NewCompositePdfFontFromTTF(r io.ReadSeeker) (*PdfFont, error) { encoder: ttf.NewEncoder(), } - type0.toUnicodeCmap = ttf.MakeToUnicode() + // Generate CMap for the Type 0 font, which is the inverse of ttf.Chars. + if len(ttf.Chars) > 0 { + codeToUnicode := make(map[cmap.CharCode]rune, len(ttf.Chars)) + for r, gid := range ttf.Chars { + cid := cmap.CharCode(gid) + if rn, ok := codeToUnicode[cid]; !ok || (ok && rn > r) { + codeToUnicode[cid] = r + } + } + type0.toUnicodeCmap = cmap.NewToUnicodeCMap(codeToUnicode) + } // Build Font. font := PdfFont{