mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-30 13:48:51 +08:00
fonts: describe few issues with the code; remove unused cmap type
This commit is contained in:
parent
83d8086657
commit
ac7696693b
@ -1,36 +0,0 @@
|
|||||||
/*
|
|
||||||
* This file is subject to the terms and conditions defined in
|
|
||||||
* file 'LICENSE.md', which is part of this source code package.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package textencoding
|
|
||||||
|
|
||||||
import "github.com/unidoc/unidoc/pdf/core"
|
|
||||||
|
|
||||||
// CID represents a character identifier.
|
|
||||||
type CID uint16
|
|
||||||
|
|
||||||
// CMap maps character codes to CIDs.
|
|
||||||
type CMap interface {
|
|
||||||
CharacterCodesToCID(charcodes []byte) ([]CID, error)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CMapIdentityH is a representation of the /Identity-H cmap.
|
|
||||||
type CMapIdentityH struct {
|
|
||||||
}
|
|
||||||
|
|
||||||
// CharacterCodesToCID converts charcodes to CIDs for the Identity CMap, which maps
|
|
||||||
// 2-byte character codes (from the raw data) from 0-65535 to the same 2-byte CID value.
|
|
||||||
func (cmap CMapIdentityH) CharacterCodesToCID(raw []byte) ([]CID, error) {
|
|
||||||
if len(raw)%2 != 0 {
|
|
||||||
return nil, core.ErrRangeError
|
|
||||||
}
|
|
||||||
|
|
||||||
var cids []CID
|
|
||||||
for i := 0; i < len(raw); i += 2 {
|
|
||||||
b1 := CID(raw[i])
|
|
||||||
b2 := CID(raw[i+1])
|
|
||||||
cids = append(cids, (b1<<8)|b2)
|
|
||||||
}
|
|
||||||
return cids, nil
|
|
||||||
}
|
|
@ -1,45 +0,0 @@
|
|||||||
/*
|
|
||||||
* This file is subject to the terms and conditions defined in
|
|
||||||
* file 'LICENSE.md', which is part of this source code package.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package textencoding
|
|
||||||
|
|
||||||
import "testing"
|
|
||||||
|
|
||||||
func TestCMapIdentityH_CharacterCodesToCID(t *testing.T) {
|
|
||||||
identityCMap := CMapIdentityH{}
|
|
||||||
|
|
||||||
type dataPair struct {
|
|
||||||
raw []byte
|
|
||||||
expected []CID
|
|
||||||
errs bool
|
|
||||||
}
|
|
||||||
|
|
||||||
dataPairs := []dataPair{
|
|
||||||
{[]byte{0x00, 0x00, 0x04, 0xff}, []CID{0x0000, 0x04ff}, false},
|
|
||||||
{[]byte{0x00, 0x00, 0x04}, []CID{0x0000, 0x04ff}, true},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, data := range dataPairs {
|
|
||||||
cids, err := identityCMap.CharacterCodesToCID(data.raw)
|
|
||||||
if err != nil {
|
|
||||||
if data.errs {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
t.Errorf("Failed: %v", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(data.expected) != len(cids) {
|
|
||||||
t.Errorf("Length mismatch")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := 0; i < len(data.expected); i++ {
|
|
||||||
if cids[i] != data.expected[i] {
|
|
||||||
t.Errorf("Not equal")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -17,13 +17,14 @@ import (
|
|||||||
// GID is a glyph index.
|
// GID is a glyph index.
|
||||||
type GID uint16
|
type GID uint16
|
||||||
|
|
||||||
|
// TODO(dennwc): should not mix Identity-H CMap and Encoding in the same object
|
||||||
|
|
||||||
// TrueTypeFontEncoder handles text encoding for composite TrueType fonts.
|
// TrueTypeFontEncoder handles text encoding for composite TrueType fonts.
|
||||||
// It performs mapping between character ids and glyph ids.
|
// It performs mapping between character ids and glyph ids.
|
||||||
// It has a preloaded rune (unicode code point) to glyph index map that has been loaded from a font.
|
// It has a preloaded rune (unicode code point) to glyph index map that has been loaded from a font.
|
||||||
// Corresponds to Identity-H.
|
// Corresponds to Identity-H CMap and Identity encoding.
|
||||||
type TrueTypeFontEncoder struct {
|
type TrueTypeFontEncoder struct {
|
||||||
runeToGIDMap map[rune]GID
|
runeToGIDMap map[rune]GID
|
||||||
cmap CMap
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewTrueTypeFontEncoder creates a new text encoder for TTF fonts with a runeToGlyphIndexMap that
|
// NewTrueTypeFontEncoder creates a new text encoder for TTF fonts with a runeToGlyphIndexMap that
|
||||||
@ -33,7 +34,6 @@ type TrueTypeFontEncoder struct {
|
|||||||
func NewTrueTypeFontEncoder(runeToGIDMap map[rune]GID) TrueTypeFontEncoder {
|
func NewTrueTypeFontEncoder(runeToGIDMap map[rune]GID) TrueTypeFontEncoder {
|
||||||
return TrueTypeFontEncoder{
|
return TrueTypeFontEncoder{
|
||||||
runeToGIDMap: runeToGIDMap,
|
runeToGIDMap: runeToGIDMap,
|
||||||
cmap: CMapIdentityH{},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -75,7 +75,7 @@ func (enc TrueTypeFontEncoder) Encode(raw string) []byte {
|
|||||||
// The bool return flag is true if there was a match, and false otherwise.
|
// The bool return flag is true if there was a match, and false otherwise.
|
||||||
func (enc TrueTypeFontEncoder) CharcodeToGlyph(code CharCode) (GlyphName, bool) {
|
func (enc TrueTypeFontEncoder) CharcodeToGlyph(code CharCode) (GlyphName, bool) {
|
||||||
r, found := enc.CharcodeToRune(code)
|
r, found := enc.CharcodeToRune(code)
|
||||||
if found && r == 0x20 {
|
if found && r == ' ' {
|
||||||
return "space", true
|
return "space", true
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -139,9 +139,10 @@ func (enc TrueTypeFontEncoder) CharcodeToRune(code CharCode) (rune, bool) {
|
|||||||
// RuneToGlyph returns the glyph name for rune `r`.
|
// RuneToGlyph returns the glyph name for rune `r`.
|
||||||
// The bool return flag is true if there was a match, and false otherwise.
|
// The bool return flag is true if there was a match, and false otherwise.
|
||||||
func (enc TrueTypeFontEncoder) RuneToGlyph(r rune) (GlyphName, bool) {
|
func (enc TrueTypeFontEncoder) RuneToGlyph(r rune) (GlyphName, bool) {
|
||||||
if r == 0x20 {
|
if r == ' ' {
|
||||||
return "space", true
|
return "space", true
|
||||||
}
|
}
|
||||||
|
// TODO(dennwc): this is wrong; font may override this with a "post" table that specifies glyph names
|
||||||
glyph := GlyphName(fmt.Sprintf("uni%.4X", r))
|
glyph := GlyphName(fmt.Sprintf("uni%.4X", r))
|
||||||
return glyph, true
|
return glyph, true
|
||||||
}
|
}
|
||||||
@ -149,6 +150,7 @@ func (enc TrueTypeFontEncoder) RuneToGlyph(r rune) (GlyphName, bool) {
|
|||||||
// GlyphToRune returns the rune corresponding to glyph name `glyph`.
|
// GlyphToRune returns the rune corresponding to glyph name `glyph`.
|
||||||
// The bool return flag is true if there was a match, and false otherwise.
|
// The bool return flag is true if there was a match, and false otherwise.
|
||||||
func (enc TrueTypeFontEncoder) GlyphToRune(glyph GlyphName) (rune, bool) {
|
func (enc TrueTypeFontEncoder) GlyphToRune(glyph GlyphName) (rune, bool) {
|
||||||
|
// TODO(dennwc): this is wrong; font may override this with a "post" table that specifies glyph names
|
||||||
// String with "uniXXXX" format where XXXX is the hexcode.
|
// String with "uniXXXX" format where XXXX is the hexcode.
|
||||||
if len(glyph) == 7 && glyph[0:3] == "uni" {
|
if len(glyph) == 7 && glyph[0:3] == "uni" {
|
||||||
unicode := uint16(0)
|
unicode := uint16(0)
|
||||||
@ -168,5 +170,6 @@ func (enc TrueTypeFontEncoder) GlyphToRune(glyph GlyphName) (rune, bool) {
|
|||||||
|
|
||||||
// ToPdfObject returns a nil as it is not truly a PDF object and should not be attempted to store in file.
|
// ToPdfObject returns a nil as it is not truly a PDF object and should not be attempted to store in file.
|
||||||
func (enc TrueTypeFontEncoder) ToPdfObject() core.PdfObject {
|
func (enc TrueTypeFontEncoder) ToPdfObject() core.PdfObject {
|
||||||
|
// TODO(dennwc): reasonable question: why it have to implement this interface then?
|
||||||
return core.MakeNull()
|
return core.MakeNull()
|
||||||
}
|
}
|
||||||
|
@ -15,6 +15,11 @@ func TestWinAnsiEncoder(t *testing.T) {
|
|||||||
t.Errorf("Glyph != space")
|
t.Errorf("Glyph != space")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
code, found := enc.RuneToCharcode('þ')
|
||||||
|
if !found || code != 254 {
|
||||||
|
t.Errorf("code != 254")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
glyph, found = enc.RuneToGlyph('þ')
|
glyph, found = enc.RuneToGlyph('þ')
|
||||||
if !found || glyph != "thorn" {
|
if !found || glyph != "thorn" {
|
||||||
|
@ -430,14 +430,14 @@ func NewPdfFontFromTTFFile(filePath string) (*PdfFont, error) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
pos, ok := ttf.Chars[r]
|
gid, ok := ttf.Chars[r]
|
||||||
if !ok {
|
if !ok {
|
||||||
common.Log.Debug("Rune not in TTF Chars")
|
common.Log.Debug("Rune not in TTF Chars")
|
||||||
vals = append(vals, missingWidth)
|
vals = append(vals, missingWidth)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
w := k * float64(ttf.Widths[pos])
|
w := k * float64(ttf.Widths[gid])
|
||||||
|
|
||||||
vals = append(vals, w)
|
vals = append(vals, w)
|
||||||
}
|
}
|
||||||
|
@ -47,6 +47,8 @@ import (
|
|||||||
// MakeEncoder returns an encoder built from the tables in `rec`.
|
// MakeEncoder returns an encoder built from the tables in `rec`.
|
||||||
func (ttf *TtfType) MakeEncoder() (*textencoding.SimpleEncoder, error) {
|
func (ttf *TtfType) MakeEncoder() (*textencoding.SimpleEncoder, error) {
|
||||||
encoding := make(map[textencoding.CharCode]GlyphName)
|
encoding := make(map[textencoding.CharCode]GlyphName)
|
||||||
|
// TODO(dennwc): this is a bit strange, since TTF may contain more than 256 characters
|
||||||
|
// should probably make a different encoder here
|
||||||
for code := textencoding.CharCode(0); code <= 256; code++ {
|
for code := textencoding.CharCode(0); code <= 256; code++ {
|
||||||
r := rune(code) // TODO(dennwc): make sure this conversion is valid
|
r := rune(code) // TODO(dennwc): make sure this conversion is valid
|
||||||
gid, ok := ttf.Chars[r]
|
gid, ok := ttf.Chars[r]
|
||||||
@ -93,11 +95,14 @@ type TtfType struct {
|
|||||||
UnderlineThickness int16
|
UnderlineThickness int16
|
||||||
Xmin, Ymin, Xmax, Ymax int16
|
Xmin, Ymin, Xmax, Ymax int16
|
||||||
CapHeight int16
|
CapHeight int16
|
||||||
|
// Widths is a list of glyph widths indexed by GID.
|
||||||
Widths []uint16
|
Widths []uint16
|
||||||
|
|
||||||
// Chars maps rune values (unicode) to GIDs (the indexes in GlyphNames). i.e. GlyphNames[Chars[r]] is
|
// Chars maps rune values (unicode) to GIDs (the indexes in GlyphNames). i.e. GlyphNames[Chars[r]] is
|
||||||
// the glyph corresponding to rune r.
|
// the glyph corresponding to rune r.
|
||||||
//
|
//
|
||||||
|
// TODO(dennwc): CharCode is currently defined as uint16, but some tables may store 32 bit charcodes
|
||||||
|
// not the case right now, but make sure to update it once we support those tables
|
||||||
// TODO(dennwc,peterwilliams97): it should map char codes to GIDs
|
// TODO(dennwc,peterwilliams97): it should map char codes to GIDs
|
||||||
Chars map[rune]GID
|
Chars map[rune]GID
|
||||||
// GlyphNames is a list of glyphs from the "post" section of the TrueType file.
|
// GlyphNames is a list of glyphs from the "post" section of the TrueType file.
|
||||||
@ -117,6 +122,9 @@ func (ttf *TtfType) MakeToUnicode() *cmap.CMap {
|
|||||||
glyph := ttf.GlyphNames[gid]
|
glyph := ttf.GlyphNames[gid]
|
||||||
|
|
||||||
// TODO(dennwc): 'code' is already a rune; do we need this extra lookup?
|
// TODO(dennwc): 'code' is already a rune; do we need this extra lookup?
|
||||||
|
// TODO(dennwc): this cannot be done here; glyphNames might be empty
|
||||||
|
// the parent font may specify a different encoding
|
||||||
|
// so we should remap on a higher level
|
||||||
r, ok := textencoding.GlyphToRune(glyph)
|
r, ok := textencoding.GlyphToRune(glyph)
|
||||||
if !ok {
|
if !ok {
|
||||||
common.Log.Debug("No rune. code=0x%04x glyph=%q", code, glyph)
|
common.Log.Debug("No rune. code=0x%04x glyph=%q", code, glyph)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user