fonts: describe few issues with the code; remove unused cmap type

This commit is contained in:
Denys Smirnov 2018-12-29 19:01:05 +02:00
parent 83d8086657
commit ac7696693b
6 changed files with 24 additions and 89 deletions

View File

@ -1,36 +0,0 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package textencoding
import "github.com/unidoc/unidoc/pdf/core"
// CID represents a character identifier.
type CID uint16
// CMap maps character codes to CIDs.
type CMap interface {
CharacterCodesToCID(charcodes []byte) ([]CID, error)
}
// CMapIdentityH is a representation of the /Identity-H cmap.
type CMapIdentityH struct {
}
// CharacterCodesToCID converts charcodes to CIDs for the Identity CMap, which maps
// 2-byte character codes (from the raw data) from 0-65535 to the same 2-byte CID value.
func (cmap CMapIdentityH) CharacterCodesToCID(raw []byte) ([]CID, error) {
if len(raw)%2 != 0 {
return nil, core.ErrRangeError
}
var cids []CID
for i := 0; i < len(raw); i += 2 {
b1 := CID(raw[i])
b2 := CID(raw[i+1])
cids = append(cids, (b1<<8)|b2)
}
return cids, nil
}

View File

@ -1,45 +0,0 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package textencoding
import "testing"
func TestCMapIdentityH_CharacterCodesToCID(t *testing.T) {
identityCMap := CMapIdentityH{}
type dataPair struct {
raw []byte
expected []CID
errs bool
}
dataPairs := []dataPair{
{[]byte{0x00, 0x00, 0x04, 0xff}, []CID{0x0000, 0x04ff}, false},
{[]byte{0x00, 0x00, 0x04}, []CID{0x0000, 0x04ff}, true},
}
for _, data := range dataPairs {
cids, err := identityCMap.CharacterCodesToCID(data.raw)
if err != nil {
if data.errs {
continue
}
t.Errorf("Failed: %v", err)
return
}
if len(data.expected) != len(cids) {
t.Errorf("Length mismatch")
return
}
for i := 0; i < len(data.expected); i++ {
if cids[i] != data.expected[i] {
t.Errorf("Not equal")
}
}
}
}

View File

@ -17,13 +17,14 @@ import (
// GID is a glyph index.
type GID uint16
// TODO(dennwc): should not mix Identity-H CMap and Encoding in the same object
// TrueTypeFontEncoder handles text encoding for composite TrueType fonts.
// It performs mapping between character ids and glyph ids.
// It has a preloaded rune (unicode code point) to glyph index map that has been loaded from a font.
// Corresponds to Identity-H.
// Corresponds to Identity-H CMap and Identity encoding.
type TrueTypeFontEncoder struct {
runeToGIDMap map[rune]GID
cmap CMap
}
// NewTrueTypeFontEncoder creates a new text encoder for TTF fonts with a runeToGlyphIndexMap that
@ -33,7 +34,6 @@ type TrueTypeFontEncoder struct {
func NewTrueTypeFontEncoder(runeToGIDMap map[rune]GID) TrueTypeFontEncoder {
return TrueTypeFontEncoder{
runeToGIDMap: runeToGIDMap,
cmap: CMapIdentityH{},
}
}
@ -75,7 +75,7 @@ func (enc TrueTypeFontEncoder) Encode(raw string) []byte {
// The bool return flag is true if there was a match, and false otherwise.
func (enc TrueTypeFontEncoder) CharcodeToGlyph(code CharCode) (GlyphName, bool) {
r, found := enc.CharcodeToRune(code)
if found && r == 0x20 {
if found && r == ' ' {
return "space", true
}
@ -139,9 +139,10 @@ func (enc TrueTypeFontEncoder) CharcodeToRune(code CharCode) (rune, bool) {
// RuneToGlyph returns the glyph name for rune `r`.
// The bool return flag is true if there was a match, and false otherwise.
func (enc TrueTypeFontEncoder) RuneToGlyph(r rune) (GlyphName, bool) {
if r == 0x20 {
if r == ' ' {
return "space", true
}
// TODO(dennwc): this is wrong; font may override this with a "post" table that specifies glyph names
glyph := GlyphName(fmt.Sprintf("uni%.4X", r))
return glyph, true
}
@ -149,6 +150,7 @@ func (enc TrueTypeFontEncoder) RuneToGlyph(r rune) (GlyphName, bool) {
// GlyphToRune returns the rune corresponding to glyph name `glyph`.
// The bool return flag is true if there was a match, and false otherwise.
func (enc TrueTypeFontEncoder) GlyphToRune(glyph GlyphName) (rune, bool) {
// TODO(dennwc): this is wrong; font may override this with a "post" table that specifies glyph names
// String with "uniXXXX" format where XXXX is the hexcode.
if len(glyph) == 7 && glyph[0:3] == "uni" {
unicode := uint16(0)
@ -168,5 +170,6 @@ func (enc TrueTypeFontEncoder) GlyphToRune(glyph GlyphName) (rune, bool) {
// ToPdfObject returns a nil as it is not truly a PDF object and should not be attempted to store in file.
func (enc TrueTypeFontEncoder) ToPdfObject() core.PdfObject {
// TODO(dennwc): reasonable question: why it have to implement this interface then?
return core.MakeNull()
}

View File

@ -15,6 +15,11 @@ func TestWinAnsiEncoder(t *testing.T) {
t.Errorf("Glyph != space")
return
}
code, found := enc.RuneToCharcode('þ')
if !found || code != 254 {
t.Errorf("code != 254")
return
}
glyph, found = enc.RuneToGlyph('þ')
if !found || glyph != "thorn" {

View File

@ -430,14 +430,14 @@ func NewPdfFontFromTTFFile(filePath string) (*PdfFont, error) {
continue
}
pos, ok := ttf.Chars[r]
gid, ok := ttf.Chars[r]
if !ok {
common.Log.Debug("Rune not in TTF Chars")
vals = append(vals, missingWidth)
continue
}
w := k * float64(ttf.Widths[pos])
w := k * float64(ttf.Widths[gid])
vals = append(vals, w)
}

View File

@ -47,6 +47,8 @@ import (
// MakeEncoder returns an encoder built from the tables in `rec`.
func (ttf *TtfType) MakeEncoder() (*textencoding.SimpleEncoder, error) {
encoding := make(map[textencoding.CharCode]GlyphName)
// TODO(dennwc): this is a bit strange, since TTF may contain more than 256 characters
// should probably make a different encoder here
for code := textencoding.CharCode(0); code <= 256; code++ {
r := rune(code) // TODO(dennwc): make sure this conversion is valid
gid, ok := ttf.Chars[r]
@ -93,11 +95,14 @@ type TtfType struct {
UnderlineThickness int16
Xmin, Ymin, Xmax, Ymax int16
CapHeight int16
Widths []uint16
// Widths is a list of glyph widths indexed by GID.
Widths []uint16
// Chars maps rune values (unicode) to GIDs (the indexes in GlyphNames). i.e. GlyphNames[Chars[r]] is
// the glyph corresponding to rune r.
//
// TODO(dennwc): CharCode is currently defined as uint16, but some tables may store 32 bit charcodes
// not the case right now, but make sure to update it once we support those tables
// TODO(dennwc,peterwilliams97): it should map char codes to GIDs
Chars map[rune]GID
// GlyphNames is a list of glyphs from the "post" section of the TrueType file.
@ -117,6 +122,9 @@ func (ttf *TtfType) MakeToUnicode() *cmap.CMap {
glyph := ttf.GlyphNames[gid]
// TODO(dennwc): 'code' is already a rune; do we need this extra lookup?
// TODO(dennwc): this cannot be done here; glyphNames might be empty
// the parent font may specify a different encoding
// so we should remap on a higher level
r, ok := textencoding.GlyphToRune(glyph)
if !ok {
common.Log.Debug("No rune. code=0x%04x glyph=%q", code, glyph)