From 83d8086657a56a56b957a9ee2885532cdddddc46 Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Fri, 28 Dec 2018 16:48:38 +0200 Subject: [PATCH 01/11] model: reformat TODOs --- pdf/model/font.go | 7 ++++--- pdf/model/font_composite.go | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pdf/model/font.go b/pdf/model/font.go index 0f9fc10f..c0bf99ed 100644 --- a/pdf/model/font.go +++ b/pdf/model/font.go @@ -516,9 +516,10 @@ func (font *PdfFont) GetRuneMetrics(r rune) (fonts.CharMetrics, bool) { func (font *PdfFont) GetCharMetrics(code textencoding.CharCode) (fonts.CharMetrics, bool) { var nometrics fonts.CharMetrics - // XXX(peterwilliams97) pdfFontType0.GetCharMetrics() calls pdfCIDFontType2.GetCharMetrics() - // through this function. Would it be more straightforward for pdfFontType0.GetCharMetrics() to - // call pdfCIDFontType0.GetCharMetrics() and pdfCIDFontType2.GetCharMetrics() directly? + // TODO(peterwilliams97): pdfFontType0.GetCharMetrics() calls pdfCIDFontType2.GetCharMetrics() + // through this function. Would it be more straightforward for + // pdfFontType0.GetCharMetrics() to call pdfCIDFontType0.GetCharMetrics() + // and pdfCIDFontType2.GetCharMetrics() directly? switch t := font.context.(type) { case *pdfFontSimple: diff --git a/pdf/model/font_composite.go b/pdf/model/font_composite.go index 3aa5ba1f..f4f452f6 100644 --- a/pdf/model/font_composite.go +++ b/pdf/model/font_composite.go @@ -297,8 +297,8 @@ type pdfCIDFontType2 struct { defaultWidth float64 // Mapping between unicode runes to widths. - // TODO(dennwc): both are used only in GetGlyphCharMetrics - // we can precompute metrics and drop both + // TODO(dennwc): it is used only in GetGlyphCharMetrics + // we can precompute metrics and drop it runeToWidthMap map[rune]int } From ac7696693b1c829c60aa04916e425e6871ab3d50 Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Sat, 29 Dec 2018 19:01:05 +0200 Subject: [PATCH 02/11] fonts: describe few issues with the code; remove unused cmap type --- pdf/internal/textencoding/cmap.go | 36 ------------------ pdf/internal/textencoding/cmap_test.go | 45 ----------------------- pdf/internal/textencoding/truetype.go | 13 ++++--- pdf/internal/textencoding/winansi_test.go | 5 +++ pdf/model/font_simple.go | 4 +- pdf/model/fonts/ttfparser.go | 10 ++++- 6 files changed, 24 insertions(+), 89 deletions(-) delete mode 100644 pdf/internal/textencoding/cmap.go delete mode 100644 pdf/internal/textencoding/cmap_test.go diff --git a/pdf/internal/textencoding/cmap.go b/pdf/internal/textencoding/cmap.go deleted file mode 100644 index 63423c07..00000000 --- a/pdf/internal/textencoding/cmap.go +++ /dev/null @@ -1,36 +0,0 @@ -/* - * This file is subject to the terms and conditions defined in - * file 'LICENSE.md', which is part of this source code package. - */ - -package textencoding - -import "github.com/unidoc/unidoc/pdf/core" - -// CID represents a character identifier. -type CID uint16 - -// CMap maps character codes to CIDs. -type CMap interface { - CharacterCodesToCID(charcodes []byte) ([]CID, error) -} - -// CMapIdentityH is a representation of the /Identity-H cmap. -type CMapIdentityH struct { -} - -// CharacterCodesToCID converts charcodes to CIDs for the Identity CMap, which maps -// 2-byte character codes (from the raw data) from 0-65535 to the same 2-byte CID value. -func (cmap CMapIdentityH) CharacterCodesToCID(raw []byte) ([]CID, error) { - if len(raw)%2 != 0 { - return nil, core.ErrRangeError - } - - var cids []CID - for i := 0; i < len(raw); i += 2 { - b1 := CID(raw[i]) - b2 := CID(raw[i+1]) - cids = append(cids, (b1<<8)|b2) - } - return cids, nil -} diff --git a/pdf/internal/textencoding/cmap_test.go b/pdf/internal/textencoding/cmap_test.go deleted file mode 100644 index 3698b6c2..00000000 --- a/pdf/internal/textencoding/cmap_test.go +++ /dev/null @@ -1,45 +0,0 @@ -/* - * This file is subject to the terms and conditions defined in - * file 'LICENSE.md', which is part of this source code package. - */ - -package textencoding - -import "testing" - -func TestCMapIdentityH_CharacterCodesToCID(t *testing.T) { - identityCMap := CMapIdentityH{} - - type dataPair struct { - raw []byte - expected []CID - errs bool - } - - dataPairs := []dataPair{ - {[]byte{0x00, 0x00, 0x04, 0xff}, []CID{0x0000, 0x04ff}, false}, - {[]byte{0x00, 0x00, 0x04}, []CID{0x0000, 0x04ff}, true}, - } - - for _, data := range dataPairs { - cids, err := identityCMap.CharacterCodesToCID(data.raw) - if err != nil { - if data.errs { - continue - } - t.Errorf("Failed: %v", err) - return - } - - if len(data.expected) != len(cids) { - t.Errorf("Length mismatch") - return - } - - for i := 0; i < len(data.expected); i++ { - if cids[i] != data.expected[i] { - t.Errorf("Not equal") - } - } - } -} diff --git a/pdf/internal/textencoding/truetype.go b/pdf/internal/textencoding/truetype.go index 6ba84cf3..b55ef087 100644 --- a/pdf/internal/textencoding/truetype.go +++ b/pdf/internal/textencoding/truetype.go @@ -17,13 +17,14 @@ import ( // GID is a glyph index. type GID uint16 +// TODO(dennwc): should not mix Identity-H CMap and Encoding in the same object + // TrueTypeFontEncoder handles text encoding for composite TrueType fonts. // It performs mapping between character ids and glyph ids. // It has a preloaded rune (unicode code point) to glyph index map that has been loaded from a font. -// Corresponds to Identity-H. +// Corresponds to Identity-H CMap and Identity encoding. type TrueTypeFontEncoder struct { runeToGIDMap map[rune]GID - cmap CMap } // NewTrueTypeFontEncoder creates a new text encoder for TTF fonts with a runeToGlyphIndexMap that @@ -33,7 +34,6 @@ type TrueTypeFontEncoder struct { func NewTrueTypeFontEncoder(runeToGIDMap map[rune]GID) TrueTypeFontEncoder { return TrueTypeFontEncoder{ runeToGIDMap: runeToGIDMap, - cmap: CMapIdentityH{}, } } @@ -75,7 +75,7 @@ func (enc TrueTypeFontEncoder) Encode(raw string) []byte { // The bool return flag is true if there was a match, and false otherwise. func (enc TrueTypeFontEncoder) CharcodeToGlyph(code CharCode) (GlyphName, bool) { r, found := enc.CharcodeToRune(code) - if found && r == 0x20 { + if found && r == ' ' { return "space", true } @@ -139,9 +139,10 @@ func (enc TrueTypeFontEncoder) CharcodeToRune(code CharCode) (rune, bool) { // RuneToGlyph returns the glyph name for rune `r`. // The bool return flag is true if there was a match, and false otherwise. func (enc TrueTypeFontEncoder) RuneToGlyph(r rune) (GlyphName, bool) { - if r == 0x20 { + if r == ' ' { return "space", true } + // TODO(dennwc): this is wrong; font may override this with a "post" table that specifies glyph names glyph := GlyphName(fmt.Sprintf("uni%.4X", r)) return glyph, true } @@ -149,6 +150,7 @@ func (enc TrueTypeFontEncoder) RuneToGlyph(r rune) (GlyphName, bool) { // GlyphToRune returns the rune corresponding to glyph name `glyph`. // The bool return flag is true if there was a match, and false otherwise. func (enc TrueTypeFontEncoder) GlyphToRune(glyph GlyphName) (rune, bool) { + // TODO(dennwc): this is wrong; font may override this with a "post" table that specifies glyph names // String with "uniXXXX" format where XXXX is the hexcode. if len(glyph) == 7 && glyph[0:3] == "uni" { unicode := uint16(0) @@ -168,5 +170,6 @@ func (enc TrueTypeFontEncoder) GlyphToRune(glyph GlyphName) (rune, bool) { // ToPdfObject returns a nil as it is not truly a PDF object and should not be attempted to store in file. func (enc TrueTypeFontEncoder) ToPdfObject() core.PdfObject { + // TODO(dennwc): reasonable question: why it have to implement this interface then? return core.MakeNull() } diff --git a/pdf/internal/textencoding/winansi_test.go b/pdf/internal/textencoding/winansi_test.go index 8eac31b8..3f2daa6c 100644 --- a/pdf/internal/textencoding/winansi_test.go +++ b/pdf/internal/textencoding/winansi_test.go @@ -15,6 +15,11 @@ func TestWinAnsiEncoder(t *testing.T) { t.Errorf("Glyph != space") return } + code, found := enc.RuneToCharcode('þ') + if !found || code != 254 { + t.Errorf("code != 254") + return + } glyph, found = enc.RuneToGlyph('þ') if !found || glyph != "thorn" { diff --git a/pdf/model/font_simple.go b/pdf/model/font_simple.go index 417321a3..627c5c15 100644 --- a/pdf/model/font_simple.go +++ b/pdf/model/font_simple.go @@ -430,14 +430,14 @@ func NewPdfFontFromTTFFile(filePath string) (*PdfFont, error) { continue } - pos, ok := ttf.Chars[r] + gid, ok := ttf.Chars[r] if !ok { common.Log.Debug("Rune not in TTF Chars") vals = append(vals, missingWidth) continue } - w := k * float64(ttf.Widths[pos]) + w := k * float64(ttf.Widths[gid]) vals = append(vals, w) } diff --git a/pdf/model/fonts/ttfparser.go b/pdf/model/fonts/ttfparser.go index 0622528f..d463e12d 100644 --- a/pdf/model/fonts/ttfparser.go +++ b/pdf/model/fonts/ttfparser.go @@ -47,6 +47,8 @@ import ( // MakeEncoder returns an encoder built from the tables in `rec`. func (ttf *TtfType) MakeEncoder() (*textencoding.SimpleEncoder, error) { encoding := make(map[textencoding.CharCode]GlyphName) + // TODO(dennwc): this is a bit strange, since TTF may contain more than 256 characters + // should probably make a different encoder here for code := textencoding.CharCode(0); code <= 256; code++ { r := rune(code) // TODO(dennwc): make sure this conversion is valid gid, ok := ttf.Chars[r] @@ -93,11 +95,14 @@ type TtfType struct { UnderlineThickness int16 Xmin, Ymin, Xmax, Ymax int16 CapHeight int16 - Widths []uint16 + // Widths is a list of glyph widths indexed by GID. + Widths []uint16 // Chars maps rune values (unicode) to GIDs (the indexes in GlyphNames). i.e. GlyphNames[Chars[r]] is // the glyph corresponding to rune r. // + // TODO(dennwc): CharCode is currently defined as uint16, but some tables may store 32 bit charcodes + // not the case right now, but make sure to update it once we support those tables // TODO(dennwc,peterwilliams97): it should map char codes to GIDs Chars map[rune]GID // GlyphNames is a list of glyphs from the "post" section of the TrueType file. @@ -117,6 +122,9 @@ func (ttf *TtfType) MakeToUnicode() *cmap.CMap { glyph := ttf.GlyphNames[gid] // TODO(dennwc): 'code' is already a rune; do we need this extra lookup? + // TODO(dennwc): this cannot be done here; glyphNames might be empty + // the parent font may specify a different encoding + // so we should remap on a higher level r, ok := textencoding.GlyphToRune(glyph) if !ok { common.Log.Debug("No rune. code=0x%04x glyph=%q", code, glyph) From 622ae5668d5183832b15953f8cd0e16c37319305 Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Sat, 29 Dec 2018 19:54:18 +0200 Subject: [PATCH 03/11] textencoding: generate table for WinAnsi encoding from CP1252 --- Gopkg.lock | 19 +- go.mod | 1 + go.sum | 2 + pdf/internal/textencoding/simple.go | 227 --------------------- pdf/internal/textencoding/winansi.go | 35 ++++ pdf/internal/textencoding/winansi_test.go | 236 ++++++++++++++++++++++ 6 files changed, 292 insertions(+), 228 deletions(-) diff --git a/Gopkg.lock b/Gopkg.lock index df964fe1..4b788aaa 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -21,9 +21,26 @@ ] revision = "cd38e8056d9b27bb2f265effa37fb0ea6b8a7f0f" +[[projects]] + name = "golang.org/x/text" + packages = [ + "encoding", + "encoding/charmap", + "encoding/internal", + "encoding/internal/identifier", + "internal/gen", + "internal/triegen", + "internal/ucd", + "transform", + "unicode/cldr", + "unicode/norm" + ] + revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0" + version = "v0.3.0" + [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "2dcb31447fae810daeea813d2c0cdaa9bce2122d0e556718741fcef5a7506eed" + inputs-digest = "564f0035f32edd92273b270310a310d5b3111a181851eb2c0e2f0d514980aa2e" solver-name = "gps-cdcl" solver-version = 1 diff --git a/go.mod b/go.mod index b0634e58..4c478663 100644 --- a/go.mod +++ b/go.mod @@ -3,4 +3,5 @@ module github.com/unidoc/unidoc require ( github.com/boombuler/barcode v1.0.0 golang.org/x/image v0.0.0-20181116024801-cd38e8056d9b + golang.org/x/text v0.3.0 ) diff --git a/go.sum b/go.sum index 19e7c2fb..398840f6 100644 --- a/go.sum +++ b/go.sum @@ -2,3 +2,5 @@ github.com/boombuler/barcode v1.0.0 h1:s1TvRnXwL2xJRaccrdcBQMZxq6X7DvsMogtmJeHDd github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= golang.org/x/image v0.0.0-20181116024801-cd38e8056d9b h1:VHyIDlv3XkfCa5/a81uzaoDkHH4rr81Z62g+xlnO8uM= golang.org/x/image v0.0.0-20181116024801-cd38e8056d9b/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= +golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/pdf/internal/textencoding/simple.go b/pdf/internal/textencoding/simple.go index 77aa69db..53c79dd5 100644 --- a/pdf/internal/textencoding/simple.go +++ b/pdf/internal/textencoding/simple.go @@ -283,233 +283,6 @@ func ToFontDifferences(differences map[CharCode]GlyphName) *core.PdfObjectArray return core.MakeArray(diffList...) } -var winAnsiEncoding = map[CharCode]rune{ // 224 entries - 0x20: 0x0020, // "space" - 0x21: 0x0021, // ! "exclam" - 0x22: 0x0022, // " "quotedbl" - 0x23: 0x0023, // # "numbersign" - 0x24: 0x0024, // $ "dollar" - 0x25: 0x0025, // % "percent" - 0x26: 0x0026, // & "ampersand" - 0x27: 0x0027, // \' "quotesingle" - 0x28: 0x0028, // ( "parenleft" - 0x29: 0x0029, // ) "parenright" - 0x2a: 0x002a, // * "asterisk" - 0x2b: 0x002b, // + "plus" - 0x2c: 0x002c, // , "comma" - 0x2d: 0x002d, // - "hyphen" - 0x2e: 0x002e, // . "period" - 0x2f: 0x002f, // / "slash" - 0x30: 0x0030, // 0 "zero" - 0x31: 0x0031, // 1 "one" - 0x32: 0x0032, // 2 "two" - 0x33: 0x0033, // 3 "three" - 0x34: 0x0034, // 4 "four" - 0x35: 0x0035, // 5 "five" - 0x36: 0x0036, // 6 "six" - 0x37: 0x0037, // 7 "seven" - 0x38: 0x0038, // 8 "eight" - 0x39: 0x0039, // 9 "nine" - 0x3a: 0x003a, // : "colon" - 0x3b: 0x003b, // ; "semicolon" - 0x3c: 0x003c, // < "less" - 0x3d: 0x003d, // = "equal" - 0x3e: 0x003e, // > "greater" - 0x3f: 0x003f, // ? "question" - 0x40: 0x0040, // @ "at" - 0x41: 0x0041, // A "A" - 0x42: 0x0042, // B "B" - 0x43: 0x0043, // C "C" - 0x44: 0x0044, // D "D" - 0x45: 0x0045, // E "E" - 0x46: 0x0046, // F "F" - 0x47: 0x0047, // G "G" - 0x48: 0x0048, // H "H" - 0x49: 0x0049, // I "I" - 0x4a: 0x004a, // J "J" - 0x4b: 0x004b, // K "K" - 0x4c: 0x004c, // L "L" - 0x4d: 0x004d, // M "M" - 0x4e: 0x004e, // N "N" - 0x4f: 0x004f, // O "O" - 0x50: 0x0050, // P "P" - 0x51: 0x0051, // Q "Q" - 0x52: 0x0052, // R "R" - 0x53: 0x0053, // S "S" - 0x54: 0x0054, // T "T" - 0x55: 0x0055, // U "U" - 0x56: 0x0056, // V "V" - 0x57: 0x0057, // W "W" - 0x58: 0x0058, // X "X" - 0x59: 0x0059, // Y "Y" - 0x5a: 0x005a, // Z "Z" - 0x5b: 0x005b, // [ "bracketleft" - 0x5c: 0x005c, // \\ "backslash" - 0x5d: 0x005d, // ] "bracketright" - 0x5e: 0x005e, // ^ "asciicircum" - 0x5f: 0x005f, // _ "underscore" - 0x60: 0x0060, // ` "grave" - 0x61: 0x0061, // a "a" - 0x62: 0x0062, // b "b" - 0x63: 0x0063, // c "c" - 0x64: 0x0064, // d "d" - 0x65: 0x0065, // e "e" - 0x66: 0x0066, // f "f" - 0x67: 0x0067, // g "g" - 0x68: 0x0068, // h "h" - 0x69: 0x0069, // i "i" - 0x6a: 0x006a, // j "j" - 0x6b: 0x006b, // k "k" - 0x6c: 0x006c, // l "l" - 0x6d: 0x006d, // m "m" - 0x6e: 0x006e, // n "n" - 0x6f: 0x006f, // o "o" - 0x70: 0x0070, // p "p" - 0x71: 0x0071, // q "q" - 0x72: 0x0072, // r "r" - 0x73: 0x0073, // s "s" - 0x74: 0x0074, // t "t" - 0x75: 0x0075, // u "u" - 0x76: 0x0076, // v "v" - 0x77: 0x0077, // w "w" - 0x78: 0x0078, // x "x" - 0x79: 0x0079, // y "y" - 0x7a: 0x007a, // z "z" - 0x7b: 0x007b, // { "braceleft" - 0x7c: 0x007c, // | "bar" - 0x7d: 0x007d, // } "braceright" - 0x7e: 0x007e, // ~ "asciitilde" - 0x7f: 0x2022, // • "bullet" - 0x80: 0x20ac, // € "Euro" - 0x81: 0x2022, // • "bullet" - 0x82: 0x201a, // ‚ "quotesinglbase" - 0x83: 0x0192, // ƒ "florin" - 0x84: 0x201e, // „ "quotedblbase" - 0x85: 0x2026, // … "ellipsis" - 0x86: 0x2020, // † "dagger" - 0x87: 0x2021, // ‡ "daggerdbl" - 0x88: 0x02c6, // ˆ "circumflex" - 0x89: 0x2030, // ‰ "perthousand" - 0x8a: 0x0160, // Š "Scaron" - 0x8b: 0x2039, // ‹ "guilsinglleft" - 0x8c: 0x0152, // Œ "OE" - 0x8d: 0x2022, // • "bullet" - 0x8e: 0x017d, // Ž "Zcaron" - 0x8f: 0x2022, // • "bullet" - 0x90: 0x2022, // • "bullet" - 0x91: 0x2018, // ‘ "quoteleft" - 0x92: 0x2019, // ’ "quoteright" - 0x93: 0x201c, // “ "quotedblleft" - 0x94: 0x201d, // ” "quotedblright" - 0x95: 0x2022, // • "bullet" - 0x96: 0x2013, // – "endash" - 0x97: 0x2014, // — "emdash" - 0x98: 0x02dc, // ˜ "tilde" - 0x99: 0x2122, // ™ "trademark" - 0x9a: 0x0161, // š "scaron" - 0x9b: 0x203a, // › "guilsinglright" - 0x9c: 0x0153, // œ "oe" - 0x9d: 0x2022, // • "bullet" - 0x9e: 0x017e, // ž "zcaron" - 0x9f: 0x0178, // Ÿ "Ydieresis" - 0xa0: 0x0020, // "space" - 0xa1: 0x00a1, // ¡ "exclamdown" - 0xa2: 0x00a2, // ¢ "cent" - 0xa3: 0x00a3, // £ "sterling" - 0xa4: 0x00a4, // ¤ "currency" - 0xa5: 0x00a5, // ¥ "yen" - 0xa6: 0x00a6, // ¦ "brokenbar" - 0xa7: 0x00a7, // § "section" - 0xa8: 0x00a8, // ¨ "dieresis" - 0xa9: 0x00a9, // © "copyright" - 0xaa: 0x00aa, // ª "ordfeminine" - 0xab: 0x00ab, // « "guillemotleft" - 0xac: 0x00ac, // ¬ "logicalnot" - 0xad: 0x002d, // - "hyphen" - 0xae: 0x00ae, // ® "registered" - 0xaf: 0x00af, // ¯ "macron" - 0xb0: 0x00b0, // ° "degree" - 0xb1: 0x00b1, // ± "plusminus" - 0xb2: 0x00b2, // ² "twosuperior" - 0xb3: 0x00b3, // ³ "threesuperior" - 0xb4: 0x00b4, // ´ "acute" - 0xb5: 0x00b5, // µ "mu" - 0xb6: 0x00b6, // ¶ "paragraph" - 0xb7: 0x00b7, // · "periodcentered" - 0xb8: 0x00b8, // ¸ "cedilla" - 0xb9: 0x00b9, // ¹ "onesuperior" - 0xba: 0x00ba, // º "ordmasculine" - 0xbb: 0x00bb, // » "guillemotright" - 0xbc: 0x00bc, // ¼ "onequarter" - 0xbd: 0x00bd, // ½ "onehalf" - 0xbe: 0x00be, // ¾ "threequarters" - 0xbf: 0x00bf, // ¿ "questiondown" - 0xc0: 0x00c0, // À "Agrave" - 0xc1: 0x00c1, // Á "Aacute" - 0xc2: 0x00c2, //  "Acircumflex" - 0xc3: 0x00c3, // à "Atilde" - 0xc4: 0x00c4, // Ä "Adieresis" - 0xc5: 0x00c5, // Å "Aring" - 0xc6: 0x00c6, // Æ "AE" - 0xc7: 0x00c7, // Ç "Ccedilla" - 0xc8: 0x00c8, // È "Egrave" - 0xc9: 0x00c9, // É "Eacute" - 0xca: 0x00ca, // Ê "Ecircumflex" - 0xcb: 0x00cb, // Ë "Edieresis" - 0xcc: 0x00cc, // Ì "Igrave" - 0xcd: 0x00cd, // Í "Iacute" - 0xce: 0x00ce, // Î "Icircumflex" - 0xcf: 0x00cf, // Ï "Idieresis" - 0xd0: 0x00d0, // Ð "Eth" - 0xd1: 0x00d1, // Ñ "Ntilde" - 0xd2: 0x00d2, // Ò "Ograve" - 0xd3: 0x00d3, // Ó "Oacute" - 0xd4: 0x00d4, // Ô "Ocircumflex" - 0xd5: 0x00d5, // Õ "Otilde" - 0xd6: 0x00d6, // Ö "Odieresis" - 0xd7: 0x00d7, // × "multiply" - 0xd8: 0x00d8, // Ø "Oslash" - 0xd9: 0x00d9, // Ù "Ugrave" - 0xda: 0x00da, // Ú "Uacute" - 0xdb: 0x00db, // Û "Ucircumflex" - 0xdc: 0x00dc, // Ü "Udieresis" - 0xdd: 0x00dd, // Ý "Yacute" - 0xde: 0x00de, // Þ "Thorn" - 0xdf: 0x00df, // ß "germandbls" - 0xe0: 0x00e0, // à "agrave" - 0xe1: 0x00e1, // á "aacute" - 0xe2: 0x00e2, // â "acircumflex" - 0xe3: 0x00e3, // ã "atilde" - 0xe4: 0x00e4, // ä "adieresis" - 0xe5: 0x00e5, // å "aring" - 0xe6: 0x00e6, // æ "ae" - 0xe7: 0x00e7, // ç "ccedilla" - 0xe8: 0x00e8, // è "egrave" - 0xe9: 0x00e9, // é "eacute" - 0xea: 0x00ea, // ê "ecircumflex" - 0xeb: 0x00eb, // ë "edieresis" - 0xec: 0x00ec, // ì "igrave" - 0xed: 0x00ed, // í "iacute" - 0xee: 0x00ee, // î "icircumflex" - 0xef: 0x00ef, // ï "idieresis" - 0xf0: 0x00f0, // ð "eth" - 0xf1: 0x00f1, // ñ "ntilde" - 0xf2: 0x00f2, // ò "ograve" - 0xf3: 0x00f3, // ó "oacute" - 0xf4: 0x00f4, // ô "ocircumflex" - 0xf5: 0x00f5, // õ "otilde" - 0xf6: 0x00f6, // ö "odieresis" - 0xf7: 0x00f7, // ÷ "divide" - 0xf8: 0x00f8, // ø "oslash" - 0xf9: 0x00f9, // ù "ugrave" - 0xfa: 0x00fa, // ú "uacute" - 0xfb: 0x00fb, // û "ucircumflex" - 0xfc: 0x00fc, // ü "udieresis" - 0xfd: 0x00fd, // ý "yacute" - 0xfe: 0x00fe, // þ "thorn" - 0xff: 0x00ff, // ÿ "ydieresis" -} - // simpleEncodings is a map of the standard 8 bit character encodings. var simpleEncodings = map[string]map[CharCode]rune{ "MacExpertEncoding": { // 165 entries diff --git a/pdf/internal/textencoding/winansi.go b/pdf/internal/textencoding/winansi.go index 73ef67b5..c0872ae4 100644 --- a/pdf/internal/textencoding/winansi.go +++ b/pdf/internal/textencoding/winansi.go @@ -5,6 +5,41 @@ package textencoding +import "golang.org/x/text/encoding/charmap" + +var winAnsiEncoding = make(map[CharCode]rune, 256) + +func init() { + // WinAnsiEncoding is also known as CP1252 + enc := charmap.Windows1252 + + // in WinAnsiEncoding all unused and non-visual codes map to the '•' character + const bullet = '•' + replace := map[byte]rune{ + 127: bullet, // DEL + + // unused + 129: bullet, + 141: bullet, + 143: bullet, + 144: bullet, + 157: bullet, + + // typographically similar + 160: ' ', // non-breaking space -> space + 173: '-', // soft hyphen -> hyphen + } + + for i := int(' '); i < 256; i++ { + b := byte(i) + r := enc.DecodeByte(b) + if rp, ok := replace[b]; ok { + r = rp + } + winAnsiEncoding[CharCode(b)] = r + } +} + // NewWinAnsiTextEncoder returns a SimpleEncoder that implements WinAnsiEncoding. func NewWinAnsiTextEncoder() *SimpleEncoder { const baseName = "WinAnsiEncoding" diff --git a/pdf/internal/textencoding/winansi_test.go b/pdf/internal/textencoding/winansi_test.go index 3f2daa6c..86489217 100644 --- a/pdf/internal/textencoding/winansi_test.go +++ b/pdf/internal/textencoding/winansi_test.go @@ -27,3 +27,239 @@ func TestWinAnsiEncoder(t *testing.T) { return } } + +// TODO(dennwc): remove once done with encoding-related changes +func TestWinANSITable(t *testing.T) { + for code, val := range winAnsiEncoding { + if val2, ok := winAnsiEncodingTest[code]; !ok || val != val2 { + t.Fatalf("wrong table: %d %x %v %x(%c) != %x(%c)", code, code, ok, val, val, val2, val2) + } + } +} + +var winAnsiEncodingTest = map[CharCode]rune{ // 224 entries + 0x20: 0x0020, // "space" + 0x21: 0x0021, // ! "exclam" + 0x22: 0x0022, // " "quotedbl" + 0x23: 0x0023, // # "numbersign" + 0x24: 0x0024, // $ "dollar" + 0x25: 0x0025, // % "percent" + 0x26: 0x0026, // & "ampersand" + 0x27: 0x0027, // \' "quotesingle" + 0x28: 0x0028, // ( "parenleft" + 0x29: 0x0029, // ) "parenright" + 0x2a: 0x002a, // * "asterisk" + 0x2b: 0x002b, // + "plus" + 0x2c: 0x002c, // , "comma" + 0x2d: 0x002d, // - "hyphen" + 0x2e: 0x002e, // . "period" + 0x2f: 0x002f, // / "slash" + 0x30: 0x0030, // 0 "zero" + 0x31: 0x0031, // 1 "one" + 0x32: 0x0032, // 2 "two" + 0x33: 0x0033, // 3 "three" + 0x34: 0x0034, // 4 "four" + 0x35: 0x0035, // 5 "five" + 0x36: 0x0036, // 6 "six" + 0x37: 0x0037, // 7 "seven" + 0x38: 0x0038, // 8 "eight" + 0x39: 0x0039, // 9 "nine" + 0x3a: 0x003a, // : "colon" + 0x3b: 0x003b, // ; "semicolon" + 0x3c: 0x003c, // < "less" + 0x3d: 0x003d, // = "equal" + 0x3e: 0x003e, // > "greater" + 0x3f: 0x003f, // ? "question" + 0x40: 0x0040, // @ "at" + 0x41: 0x0041, // A "A" + 0x42: 0x0042, // B "B" + 0x43: 0x0043, // C "C" + 0x44: 0x0044, // D "D" + 0x45: 0x0045, // E "E" + 0x46: 0x0046, // F "F" + 0x47: 0x0047, // G "G" + 0x48: 0x0048, // H "H" + 0x49: 0x0049, // I "I" + 0x4a: 0x004a, // J "J" + 0x4b: 0x004b, // K "K" + 0x4c: 0x004c, // L "L" + 0x4d: 0x004d, // M "M" + 0x4e: 0x004e, // N "N" + 0x4f: 0x004f, // O "O" + 0x50: 0x0050, // P "P" + 0x51: 0x0051, // Q "Q" + 0x52: 0x0052, // R "R" + 0x53: 0x0053, // S "S" + 0x54: 0x0054, // T "T" + 0x55: 0x0055, // U "U" + 0x56: 0x0056, // V "V" + 0x57: 0x0057, // W "W" + 0x58: 0x0058, // X "X" + 0x59: 0x0059, // Y "Y" + 0x5a: 0x005a, // Z "Z" + 0x5b: 0x005b, // [ "bracketleft" + 0x5c: 0x005c, // \\ "backslash" + 0x5d: 0x005d, // ] "bracketright" + 0x5e: 0x005e, // ^ "asciicircum" + 0x5f: 0x005f, // _ "underscore" + 0x60: 0x0060, // ` "grave" + 0x61: 0x0061, // a "a" + 0x62: 0x0062, // b "b" + 0x63: 0x0063, // c "c" + 0x64: 0x0064, // d "d" + 0x65: 0x0065, // e "e" + 0x66: 0x0066, // f "f" + 0x67: 0x0067, // g "g" + 0x68: 0x0068, // h "h" + 0x69: 0x0069, // i "i" + 0x6a: 0x006a, // j "j" + 0x6b: 0x006b, // k "k" + 0x6c: 0x006c, // l "l" + 0x6d: 0x006d, // m "m" + 0x6e: 0x006e, // n "n" + 0x6f: 0x006f, // o "o" + 0x70: 0x0070, // p "p" + 0x71: 0x0071, // q "q" + 0x72: 0x0072, // r "r" + 0x73: 0x0073, // s "s" + 0x74: 0x0074, // t "t" + 0x75: 0x0075, // u "u" + 0x76: 0x0076, // v "v" + 0x77: 0x0077, // w "w" + 0x78: 0x0078, // x "x" + 0x79: 0x0079, // y "y" + 0x7a: 0x007a, // z "z" + 0x7b: 0x007b, // { "braceleft" + 0x7c: 0x007c, // | "bar" + 0x7d: 0x007d, // } "braceright" + 0x7e: 0x007e, // ~ "asciitilde" + 0x7f: 0x2022, // • "bullet" + 0x80: 0x20ac, // € "Euro" + 0x81: 0x2022, // • "bullet" + 0x82: 0x201a, // ‚ "quotesinglbase" + 0x83: 0x0192, // ƒ "florin" + 0x84: 0x201e, // „ "quotedblbase" + 0x85: 0x2026, // … "ellipsis" + 0x86: 0x2020, // † "dagger" + 0x87: 0x2021, // ‡ "daggerdbl" + 0x88: 0x02c6, // ˆ "circumflex" + 0x89: 0x2030, // ‰ "perthousand" + 0x8a: 0x0160, // Š "Scaron" + 0x8b: 0x2039, // ‹ "guilsinglleft" + 0x8c: 0x0152, // Œ "OE" + 0x8d: 0x2022, // • "bullet" + 0x8e: 0x017d, // Ž "Zcaron" + 0x8f: 0x2022, // • "bullet" + 0x90: 0x2022, // • "bullet" + 0x91: 0x2018, // ‘ "quoteleft" + 0x92: 0x2019, // ’ "quoteright" + 0x93: 0x201c, // “ "quotedblleft" + 0x94: 0x201d, // ” "quotedblright" + 0x95: 0x2022, // • "bullet" + 0x96: 0x2013, // – "endash" + 0x97: 0x2014, // — "emdash" + 0x98: 0x02dc, // ˜ "tilde" + 0x99: 0x2122, // ™ "trademark" + 0x9a: 0x0161, // š "scaron" + 0x9b: 0x203a, // › "guilsinglright" + 0x9c: 0x0153, // œ "oe" + 0x9d: 0x2022, // • "bullet" + 0x9e: 0x017e, // ž "zcaron" + 0x9f: 0x0178, // Ÿ "Ydieresis" + 0xa0: 0x0020, // "space" + 0xa1: 0x00a1, // ¡ "exclamdown" + 0xa2: 0x00a2, // ¢ "cent" + 0xa3: 0x00a3, // £ "sterling" + 0xa4: 0x00a4, // ¤ "currency" + 0xa5: 0x00a5, // ¥ "yen" + 0xa6: 0x00a6, // ¦ "brokenbar" + 0xa7: 0x00a7, // § "section" + 0xa8: 0x00a8, // ¨ "dieresis" + 0xa9: 0x00a9, // © "copyright" + 0xaa: 0x00aa, // ª "ordfeminine" + 0xab: 0x00ab, // « "guillemotleft" + 0xac: 0x00ac, // ¬ "logicalnot" + 0xad: 0x002d, // - "hyphen" + 0xae: 0x00ae, // ® "registered" + 0xaf: 0x00af, // ¯ "macron" + 0xb0: 0x00b0, // ° "degree" + 0xb1: 0x00b1, // ± "plusminus" + 0xb2: 0x00b2, // ² "twosuperior" + 0xb3: 0x00b3, // ³ "threesuperior" + 0xb4: 0x00b4, // ´ "acute" + 0xb5: 0x00b5, // µ "mu" + 0xb6: 0x00b6, // ¶ "paragraph" + 0xb7: 0x00b7, // · "periodcentered" + 0xb8: 0x00b8, // ¸ "cedilla" + 0xb9: 0x00b9, // ¹ "onesuperior" + 0xba: 0x00ba, // º "ordmasculine" + 0xbb: 0x00bb, // » "guillemotright" + 0xbc: 0x00bc, // ¼ "onequarter" + 0xbd: 0x00bd, // ½ "onehalf" + 0xbe: 0x00be, // ¾ "threequarters" + 0xbf: 0x00bf, // ¿ "questiondown" + 0xc0: 0x00c0, // À "Agrave" + 0xc1: 0x00c1, // Á "Aacute" + 0xc2: 0x00c2, //  "Acircumflex" + 0xc3: 0x00c3, // à "Atilde" + 0xc4: 0x00c4, // Ä "Adieresis" + 0xc5: 0x00c5, // Å "Aring" + 0xc6: 0x00c6, // Æ "AE" + 0xc7: 0x00c7, // Ç "Ccedilla" + 0xc8: 0x00c8, // È "Egrave" + 0xc9: 0x00c9, // É "Eacute" + 0xca: 0x00ca, // Ê "Ecircumflex" + 0xcb: 0x00cb, // Ë "Edieresis" + 0xcc: 0x00cc, // Ì "Igrave" + 0xcd: 0x00cd, // Í "Iacute" + 0xce: 0x00ce, // Î "Icircumflex" + 0xcf: 0x00cf, // Ï "Idieresis" + 0xd0: 0x00d0, // Ð "Eth" + 0xd1: 0x00d1, // Ñ "Ntilde" + 0xd2: 0x00d2, // Ò "Ograve" + 0xd3: 0x00d3, // Ó "Oacute" + 0xd4: 0x00d4, // Ô "Ocircumflex" + 0xd5: 0x00d5, // Õ "Otilde" + 0xd6: 0x00d6, // Ö "Odieresis" + 0xd7: 0x00d7, // × "multiply" + 0xd8: 0x00d8, // Ø "Oslash" + 0xd9: 0x00d9, // Ù "Ugrave" + 0xda: 0x00da, // Ú "Uacute" + 0xdb: 0x00db, // Û "Ucircumflex" + 0xdc: 0x00dc, // Ü "Udieresis" + 0xdd: 0x00dd, // Ý "Yacute" + 0xde: 0x00de, // Þ "Thorn" + 0xdf: 0x00df, // ß "germandbls" + 0xe0: 0x00e0, // à "agrave" + 0xe1: 0x00e1, // á "aacute" + 0xe2: 0x00e2, // â "acircumflex" + 0xe3: 0x00e3, // ã "atilde" + 0xe4: 0x00e4, // ä "adieresis" + 0xe5: 0x00e5, // å "aring" + 0xe6: 0x00e6, // æ "ae" + 0xe7: 0x00e7, // ç "ccedilla" + 0xe8: 0x00e8, // è "egrave" + 0xe9: 0x00e9, // é "eacute" + 0xea: 0x00ea, // ê "ecircumflex" + 0xeb: 0x00eb, // ë "edieresis" + 0xec: 0x00ec, // ì "igrave" + 0xed: 0x00ed, // í "iacute" + 0xee: 0x00ee, // î "icircumflex" + 0xef: 0x00ef, // ï "idieresis" + 0xf0: 0x00f0, // ð "eth" + 0xf1: 0x00f1, // ñ "ntilde" + 0xf2: 0x00f2, // ò "ograve" + 0xf3: 0x00f3, // ó "oacute" + 0xf4: 0x00f4, // ô "ocircumflex" + 0xf5: 0x00f5, // õ "otilde" + 0xf6: 0x00f6, // ö "odieresis" + 0xf7: 0x00f7, // ÷ "divide" + 0xf8: 0x00f8, // ø "oslash" + 0xf9: 0x00f9, // ù "ugrave" + 0xfa: 0x00fa, // ú "uacute" + 0xfb: 0x00fb, // û "ucircumflex" + 0xfc: 0x00fc, // ü "udieresis" + 0xfd: 0x00fd, // ý "yacute" + 0xfe: 0x00fe, // þ "thorn" + 0xff: 0x00ff, // ÿ "ydieresis" +} From 3c5fc18b01d0ac07912d2155b7375c0aa7d5b65d Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Sun, 30 Dec 2018 16:18:56 +0200 Subject: [PATCH 04/11] textencoding: refactor encodings; better handling for differences --- pdf/internal/textencoding/differences.go | 163 +++++++++++++++ pdf/internal/textencoding/simple.go | 107 +++++----- pdf/internal/textencoding/symbol.go | 4 +- pdf/internal/textencoding/truetype.go | 2 +- pdf/internal/textencoding/winansi.go | 148 +++++++++++--- pdf/internal/textencoding/winansi_test.go | 236 ---------------------- pdf/internal/textencoding/zapfdingbats.go | 4 +- pdf/model/font.go | 4 +- pdf/model/font_simple.go | 26 +-- pdf/model/font_test.go | 4 +- pdf/model/fontfile.go | 2 +- pdf/model/fonts/std.go | 9 +- pdf/model/fonts/ttfparser.go | 2 +- 13 files changed, 362 insertions(+), 349 deletions(-) create mode 100644 pdf/internal/textencoding/differences.go diff --git a/pdf/internal/textencoding/differences.go b/pdf/internal/textencoding/differences.go new file mode 100644 index 00000000..f2fe25d0 --- /dev/null +++ b/pdf/internal/textencoding/differences.go @@ -0,0 +1,163 @@ +package textencoding + +import ( + "bytes" + "fmt" + "sort" + + "github.com/unidoc/unidoc/common" + "github.com/unidoc/unidoc/pdf/core" +) + +// ApplyDifferences modifies or wraps the base encoding and overlays differences over it. +func ApplyDifferences(base SimpleEncoder, differences map[CharCode]GlyphName) SimpleEncoder { + if enc, ok := base.(*simpleEncoder); ok { + enc.applyDifferences(differences) + return enc + } + return newDifferencesEncoding(base, differences) +} + +func newDifferencesEncoding(base SimpleEncoder, differences map[CharCode]GlyphName) SimpleEncoder { + // TODO(dennwc): check if it's a differencesEncoding, and merge the mapping + d := &differencesEncoding{ + base: base, + differences: differences, + code2rune: make(map[CharCode]rune), + rune2code: make(map[rune]CharCode), + } + for code, glyph := range differences { + r, ok := GlyphToRune(glyph) + if ok { + d.rune2code[r] = code + } else { + common.Log.Debug("ERROR: No match for glyph=%q differences=%+v", glyph, differences) + } + d.code2rune[code] = r + } + return d +} + +// differencesEncoding remaps characters of a base encoding and act as a pass-trough for other characters. +type differencesEncoding struct { + base SimpleEncoder + + // original mapping to encode to PDF + differences map[CharCode]GlyphName + // overlayed on top of base encoding + code2rune map[CharCode]rune + rune2code map[rune]CharCode +} + +// BaseName returns base encoding name. +func (enc *differencesEncoding) BaseName() string { + return enc.base.BaseName() +} + +// String returns a string that describes the encoding. +func (enc *differencesEncoding) String() string { + return fmt.Sprintf("differences(%s, %v)", enc.base.String(), enc.differences) +} + +// Charcodes returns a slice of all charcodes in this encoding. +func (enc *differencesEncoding) Charcodes() []CharCode { + codes := enc.base.Charcodes() + sorted := true + for _, code := range codes { + if _, ok := enc.code2rune[code]; !ok { + codes = append(codes, code) + sorted = false + } + } + if !sorted { + sort.Slice(codes, func(i, j int) bool { + return codes[i] < codes[j] + }) + } + return codes +} + +// Encode converts a Go unicode string `raw` to a PDF encoded string. +func (enc *differencesEncoding) Encode(raw string) []byte { + runes := []rune(raw) + buf := bytes.NewBuffer(nil) + buf.Grow(len(runes)) + for _, r := range runes { + code, _ := enc.RuneToCharcode(r) + // relies on the fact that underlying encoding is 8 bit + buf.WriteByte(byte(code)) + } + return buf.Bytes() +} + +// RuneToCharcode returns the PDF character code corresponding to rune `r`. +// The bool return flag is true if there was a match, and false otherwise. +func (enc *differencesEncoding) RuneToCharcode(r rune) (CharCode, bool) { + if code, ok := enc.rune2code[r]; ok { + return code, true + } + return enc.base.RuneToCharcode(r) +} + +// CharcodeToRune returns the rune corresponding to character code `code`. +// The bool return flag is true if there was a match, and false otherwise. +func (enc *differencesEncoding) CharcodeToRune(code CharCode) (rune, bool) { + if r, ok := enc.code2rune[code]; ok { + return r, true + } + return enc.base.CharcodeToRune(code) +} + +// CharcodeToGlyph returns the glyph name for character code `code`. +// The bool return flag is true if there was a match, and false otherwise. +func (enc *differencesEncoding) CharcodeToGlyph(code CharCode) (GlyphName, bool) { + if glyph, ok := enc.differences[code]; ok { + return glyph, true + } + // TODO(dennwc): only redirects the call - remove from the interface + r, ok := enc.CharcodeToRune(code) + if !ok { + return "", false + } + return enc.RuneToGlyph(r) +} + +// GlyphToCharcode returns character code for glyph `glyph`. +// The bool return flag is true if there was a match, and false otherwise. +func (enc *differencesEncoding) GlyphToCharcode(glyph GlyphName) (CharCode, bool) { + // TODO: store reverse map? + for code, glyph2 := range enc.differences { + if glyph2 == glyph { + return code, true + } + } + // TODO(dennwc): only redirects the call - remove from the interface + r, ok := GlyphToRune(glyph) + if !ok { + return MissingCodeRune, false + } + return enc.RuneToCharcode(r) +} + +// RuneToGlyph returns the glyph corresponding to rune `r`. +// The bool return flag is true if there was a match, and false otherwise. +func (enc *differencesEncoding) RuneToGlyph(r rune) (GlyphName, bool) { + // TODO(dennwc): should be in the font interface + return runeToGlyph(r, glyphlistRuneToGlyphMap) +} + +// GlyphToRune returns the rune corresponding to glyph `glyph`. +// The bool return flag is true if there was a match, and false otherwise. +func (enc *differencesEncoding) GlyphToRune(glyph GlyphName) (rune, bool) { + // TODO(dennwc): should be in the font interface + return glyphToRune(glyph, glyphlistGlyphToRuneMap) +} + +// ToPdfObject returns the encoding as a PdfObject. +func (enc *differencesEncoding) ToPdfObject() core.PdfObject { + dict := core.MakeDict() + dict.Set("Type", core.MakeName("Encoding")) + dict.Set("BaseEncoding", enc.base.ToPdfObject()) + dict.Set("Differences", toFontDifferences(enc.differences)) + return core.MakeIndirectObject(dict) +} diff --git a/pdf/internal/textencoding/simple.go b/pdf/internal/textencoding/simple.go index 53c79dd5..328e3c64 100644 --- a/pdf/internal/textencoding/simple.go +++ b/pdf/internal/textencoding/simple.go @@ -20,11 +20,21 @@ import ( // MacRomanEncoding // PdfDocEncoding // StandardEncoding -// WinAnsiEncoding // ZapfDingbatsEncoding +// +// WinAnsiEncoding is implemented via charmapEncoding. -// SimpleEncoder represents a 1 byte encoding -type SimpleEncoder struct { +// SimpleEncoder represents a 1 byte encoding. +type SimpleEncoder interface { + TextEncoder + BaseName() string + Charcodes() []CharCode +} + +var _ SimpleEncoder = (*simpleEncoder)(nil) + +// simpleEncoder represents a 1 byte encoding +type simpleEncoder struct { baseName string baseEncoding map[CharCode]rune @@ -35,15 +45,15 @@ type SimpleEncoder struct { codeToRune map[CharCode]rune } -// NewCustomSimpleTextEncoder returns a SimpleEncoder based on map `encoding` and difference map +// NewCustomSimpleTextEncoder returns a simpleEncoder based on map `encoding` and difference map // `differences`. func NewCustomSimpleTextEncoder(encoding, differences map[CharCode]GlyphName) ( - *SimpleEncoder, error) { - baseName := "custom" - baseEncoding := make(map[CharCode]rune) + SimpleEncoder, error) { if len(encoding) == 0 { - return &SimpleEncoder{}, errors.New("empty custom encoding") + return nil, errors.New("empty custom encoding") } + const baseName = "custom" + baseEncoding := make(map[CharCode]rune) for code, glyph := range encoding { r, ok := GlyphToRune(glyph) if !ok { @@ -55,42 +65,50 @@ func NewCustomSimpleTextEncoder(encoding, differences map[CharCode]GlyphName) ( return newSimpleTextEncoder(baseEncoding, baseName, differences), nil } -// ApplyDifferences applies the encoding delta `differences` to `se`. -func (se *SimpleEncoder) ApplyDifferences(differences map[CharCode]GlyphName) { +// applyDifferences applies the encoding delta `differences` to `se`. +func (se *simpleEncoder) applyDifferences(differences map[CharCode]GlyphName) { se.differences = differences se.computeTables() } -// NewSimpleTextEncoder returns a SimpleEncoder based on predefined encoding `baseName` and +// NewSimpleTextEncoder returns a simpleEncoder based on predefined encoding `baseName` and // difference map `differences`. -func NewSimpleTextEncoder(baseName string, differences map[CharCode]GlyphName) (*SimpleEncoder, error) { +func NewSimpleTextEncoder(baseName string, differences map[CharCode]GlyphName) (SimpleEncoder, error) { + switch baseName { + case baseWinAnsi: + enc := NewWinAnsiTextEncoder() + if len(differences) != 0 { + enc = ApplyDifferences(enc, differences) + } + return enc, nil + } baseEncoding, ok := simpleEncodings[baseName] if !ok { common.Log.Debug("ERROR: NewSimpleTextEncoder. Unknown encoding %q", baseName) - return &SimpleEncoder{}, errors.New("unsupported font encoding") + return nil, errors.New("unsupported font encoding") } return newSimpleTextEncoder(baseEncoding, baseName, differences), nil } -// newSimpleTextEncoder returns a SimpleEncoder based on map `encoding` and difference map +// newSimpleTextEncoder returns a simpleEncoder based on map `encoding` and difference map // `differences`. func newSimpleTextEncoder(baseEncoding map[CharCode]rune, baseName string, - differences map[CharCode]GlyphName) *SimpleEncoder { + differences map[CharCode]GlyphName) SimpleEncoder { - se := SimpleEncoder{ + se := &simpleEncoder{ baseName: baseName, baseEncoding: baseEncoding, differences: differences, } se.computeTables() - return &se + return se } -// simpleEncoderNumEntries is the maximum number of encoding entries shown in SimpleEncoder.String() +// simpleEncoderNumEntries is the maximum number of encoding entries shown in simpleEncoder.String() const simpleEncoderNumEntries = 0 // String returns a string that describes `se`. -func (se SimpleEncoder) String() string { +func (se simpleEncoder) String() string { name := se.baseName if len(se.differences) > 0 { name = fmt.Sprintf("%s(diff)", se.baseName) @@ -100,37 +118,29 @@ func (se SimpleEncoder) String() string { fmt.Sprintf("differences=%+v", se.differences), } - codes := make([]CharCode, 0, len(se.codeToGlyph)) - for c := range se.codeToGlyph { - codes = append(codes, c) - } - sort.Slice(codes, func(i, j int) bool { - return codes[i] < codes[j] - }) - numCodes := len(codes) - if numCodes > simpleEncoderNumEntries { - numCodes = simpleEncoderNumEntries + codes := se.Charcodes() + if len(codes) > simpleEncoderNumEntries { + codes = codes[:simpleEncoderNumEntries] } - for i := 0; i < numCodes; i++ { - c := codes[i] + for _, c := range codes { parts = append(parts, fmt.Sprintf("%d=0x%02x: %q", c, c, se.codeToGlyph[c])) } return fmt.Sprintf("SIMPLE_ENCODER{%s}", strings.Join(parts, ", ")) } // BaseName returns `se`'s base name. -func (se SimpleEncoder) BaseName() string { +func (se simpleEncoder) BaseName() string { return se.baseName } // Encode converts a Go unicode string `raw` to a PDF encoded string. -func (se SimpleEncoder) Encode(raw string) []byte { +func (se simpleEncoder) Encode(raw string) []byte { return encodeString8bit(se, raw) } // Charcodes returns a slice of all charcodes in this encoding. -func (se SimpleEncoder) Charcodes() []CharCode { +func (se simpleEncoder) Charcodes() []CharCode { codes := make([]CharCode, 0, len(se.codeToGlyph)) for code := range se.codeToGlyph { codes = append(codes, code) @@ -143,7 +153,7 @@ func (se SimpleEncoder) Charcodes() []CharCode { // CharcodeToGlyph returns the glyph name for character code `code`. // The bool return flag is true if there was a match, and false otherwise. -func (se SimpleEncoder) CharcodeToGlyph(code CharCode) (GlyphName, bool) { +func (se simpleEncoder) CharcodeToGlyph(code CharCode) (GlyphName, bool) { glyph, ok := se.codeToGlyph[code] if !ok { common.Log.Debug("Charcode -> Glyph error: charcode not found: 0x%04x", code) @@ -153,7 +163,7 @@ func (se SimpleEncoder) CharcodeToGlyph(code CharCode) (GlyphName, bool) { // GlyphToCharcode returns character code for glyph `glyph`. // The bool return flag is true if there was a match, and false otherwise. -func (se SimpleEncoder) GlyphToCharcode(glyph GlyphName) (CharCode, bool) { +func (se simpleEncoder) GlyphToCharcode(glyph GlyphName) (CharCode, bool) { code, ok := se.glyphToCode[glyph] if !ok { common.Log.Debug("Glyph -> Charcode error: glyph not found: %q %s", glyph, se) @@ -163,13 +173,13 @@ func (se SimpleEncoder) GlyphToCharcode(glyph GlyphName) (CharCode, bool) { // RuneToCharcode returns the PDF character code corresponding to rune `r`. // The bool return flag is true if there was a match, and false otherwise. -func (se SimpleEncoder) RuneToCharcode(val rune) (CharCode, bool) { +func (se simpleEncoder) RuneToCharcode(val rune) (CharCode, bool) { return doRuneToCharcode(se, val) } // CharcodeToRune returns the rune corresponding to character code `code`. // The bool return flag is true if there was a match, and false otherwise. -func (se SimpleEncoder) CharcodeToRune(code CharCode) (rune, bool) { +func (se simpleEncoder) CharcodeToRune(code CharCode) (rune, bool) { r, ok := se.codeToRune[code] if !ok { common.Log.Debug("Charcode -> Rune error: charcode not found: 0x%04x", code) @@ -179,21 +189,21 @@ func (se SimpleEncoder) CharcodeToRune(code CharCode) (rune, bool) { // RuneToGlyph returns the glyph corresponding to rune `r`. // The bool return flag is true if there was a match, and false otherwise. -func (se SimpleEncoder) RuneToGlyph(r rune) (GlyphName, bool) { +func (se simpleEncoder) RuneToGlyph(r rune) (GlyphName, bool) { return runeToGlyph(r, glyphlistRuneToGlyphMap) } // GlyphToRune returns the rune corresponding to glyph `glyph`. // The bool return flag is true if there was a match, and false otherwise. -func (se SimpleEncoder) GlyphToRune(glyph GlyphName) (rune, bool) { +func (se simpleEncoder) GlyphToRune(glyph GlyphName) (rune, bool) { return glyphToRune(glyph, glyphlistGlyphToRuneMap) } // ToPdfObject returns `se` as a PdfObject -func (se SimpleEncoder) ToPdfObject() core.PdfObject { - if se.differences == nil || len(se.differences) == 0 { +func (se simpleEncoder) ToPdfObject() core.PdfObject { + if len(se.differences) == 0 { switch se.baseName { - case "MacRomanEncoding", "MacExpertEncoding", "WinAnsiEncoding": + case "MacRomanEncoding", "MacExpertEncoding": return core.MakeName(se.baseName) } return nil // Use font's built-in encoding. @@ -201,13 +211,13 @@ func (se SimpleEncoder) ToPdfObject() core.PdfObject { dict := core.MakeDict() dict.Set("Type", core.MakeName("Encoding")) dict.Set("BaseEncoding", core.MakeName(se.baseName)) - dict.Set("Differences", ToFontDifferences(se.differences)) + dict.Set("Differences", toFontDifferences(se.differences)) return core.MakeIndirectObject(dict) } -// computeTables computes the tables needed for a working SimpleEncoder from the member +// computeTables computes the tables needed for a working simpleEncoder from the member // fields `baseEncoding` and `differences`. -func (se *SimpleEncoder) computeTables() { +func (se *simpleEncoder) computeTables() { codeToRune := make(map[CharCode]rune) for code, r := range se.baseEncoding { codeToRune[code] = r @@ -255,9 +265,9 @@ func FromFontDifferences(diffList *core.PdfObjectArray) (map[CharCode]GlyphName, return differences, nil } -// ToFontDifferences converts `differences` (a map representing character code to glyph mappings) +// toFontDifferences converts `differences` (a map representing character code to glyph mappings) // to a /Differences array for an /Encoding object. -func ToFontDifferences(differences map[CharCode]GlyphName) *core.PdfObjectArray { +func toFontDifferences(differences map[CharCode]GlyphName) *core.PdfObjectArray { if len(differences) == 0 { return nil } @@ -1305,7 +1315,6 @@ var simpleEncodings = map[string]map[CharCode]rune{ 0xfd: 0xf8fd, // "bracerightmid" 0xfe: 0xf8fe, // "bracerightbt" }, - "WinAnsiEncoding": winAnsiEncoding, "ZapfDingbatsEncoding": { // 202 entries 0x20: 0x0020, // "space" 0x21: 0x2701, // ✁ "a1" diff --git a/pdf/internal/textencoding/symbol.go b/pdf/internal/textencoding/symbol.go index 62664000..3cc601eb 100644 --- a/pdf/internal/textencoding/symbol.go +++ b/pdf/internal/textencoding/symbol.go @@ -5,8 +5,8 @@ package textencoding -// NewSymbolEncoder returns a SimpleEncoder that implements SymbolEncoding. -func NewSymbolEncoder() *SimpleEncoder { +// NewSymbolEncoder returns a TextEncoder that implements SymbolEncoding. +func NewSymbolEncoder() TextEncoder { enc, _ := NewSimpleTextEncoder("SymbolEncoding", nil) return enc } diff --git a/pdf/internal/textencoding/truetype.go b/pdf/internal/textencoding/truetype.go index b55ef087..e868d245 100644 --- a/pdf/internal/textencoding/truetype.go +++ b/pdf/internal/textencoding/truetype.go @@ -37,7 +37,7 @@ func NewTrueTypeFontEncoder(runeToGIDMap map[rune]GID) TrueTypeFontEncoder { } } -// ttEncoderMaxNumEntries is the maximum number of encoding entries shown in SimpleEncoder.String(). +// ttEncoderMaxNumEntries is the maximum number of encoding entries shown in simpleEncoder.String(). const ttEncoderMaxNumEntries = 10 // String returns a string that describes `enc`. diff --git a/pdf/internal/textencoding/winansi.go b/pdf/internal/textencoding/winansi.go index c0872ae4..a52ee401 100644 --- a/pdf/internal/textencoding/winansi.go +++ b/pdf/internal/textencoding/winansi.go @@ -5,44 +5,132 @@ package textencoding -import "golang.org/x/text/encoding/charmap" +import ( + "bytes" -var winAnsiEncoding = make(map[CharCode]rune, 256) + "github.com/unidoc/unidoc/pdf/core" + "golang.org/x/text/encoding/charmap" +) -func init() { - // WinAnsiEncoding is also known as CP1252 - enc := charmap.Windows1252 +const baseWinAnsi = "WinAnsiEncoding" - // in WinAnsiEncoding all unused and non-visual codes map to the '•' character - const bullet = '•' - replace := map[byte]rune{ - 127: bullet, // DEL +// NewWinAnsiTextEncoder returns a simpleEncoder that implements WinAnsiEncoding. +func NewWinAnsiTextEncoder() SimpleEncoder { + return &charmapEncoding{ + baseName: baseWinAnsi, + charmap: charmap.Windows1252, + } +} - // unused - 129: bullet, - 141: bullet, - 143: bullet, - 144: bullet, - 157: bullet, +var _ SimpleEncoder = (*charmapEncoding)(nil) + +type charmapEncoding struct { + baseName string + charmap *charmap.Charmap +} + +// String returns a text representation of encoding. +func (enc *charmapEncoding) String() string { + return "charmapEncoding(" + enc.baseName + ")" +} + +// BaseName returns a base name of the encoder, as specified in the PDF spec. +func (enc *charmapEncoding) BaseName() string { + return enc.baseName +} + +// Encode converts a Go unicode string `raw` to a PDF encoded string. +func (enc *charmapEncoding) Encode(raw string) []byte { + runes := []rune(raw) + buf := bytes.NewBuffer(nil) + buf.Grow(len(runes)) + for _, r := range runes { + b, ok := enc.charmap.EncodeRune(r) + if !ok { + b, _ = enc.charmap.EncodeRune(MissingCodeRune) + } + buf.WriteByte(b) + } + return buf.Bytes() +} + +func (enc *charmapEncoding) Charcodes() []CharCode { + codes := make([]CharCode, 0, 256) + for i := 0; i < 256; i++ { + code := CharCode(i) + if _, ok := enc.CharcodeToRune(code); ok { + codes = append(codes, code) + } + } + return codes +} + +func (enc *charmapEncoding) RuneToCharcode(r rune) (CharCode, bool) { + b, ok := enc.charmap.EncodeRune(r) + return CharCode(b), ok +} + +func (enc *charmapEncoding) CharcodeToRune(code CharCode) (rune, bool) { + if code > 0xff { + return MissingCodeRune, false + } + switch enc.baseName { + case "WinAnsiEncoding": + // WinANSI in the old implementation remaps few characters + + // everything below 20 (space) is "missing" + if code < 0x20 { + return MissingCodeRune, false + } + + const bullet = '•' + switch code { + + // in WinAnsiEncoding all unused and non-visual codes map to the '•' character + case 127: // DEL + return bullet, true + case 129, 141, 143, 144, 157: // unused in WinANSI + return bullet, true // typographically similar - 160: ' ', // non-breaking space -> space - 173: '-', // soft hyphen -> hyphen - } - - for i := int(' '); i < 256; i++ { - b := byte(i) - r := enc.DecodeByte(b) - if rp, ok := replace[b]; ok { - r = rp + case 160: // non-breaking space -> space + return ' ', true + case 173: // soft hyphen -> hyphen + return '-', true } - winAnsiEncoding[CharCode(b)] = r } + r := enc.charmap.DecodeByte(byte(code)) + return r, r != MissingCodeRune } -// NewWinAnsiTextEncoder returns a SimpleEncoder that implements WinAnsiEncoding. -func NewWinAnsiTextEncoder() *SimpleEncoder { - const baseName = "WinAnsiEncoding" - enc := newSimpleTextEncoder(winAnsiEncoding, baseName, nil) - return enc +func (enc *charmapEncoding) CharcodeToGlyph(code CharCode) (GlyphName, bool) { + // TODO(dennwc): only redirects the call - remove from the interface + r, ok := enc.CharcodeToRune(code) + if !ok { + return "", false + } + return enc.RuneToGlyph(r) +} + +func (enc *charmapEncoding) GlyphToCharcode(glyph GlyphName) (CharCode, bool) { + // TODO(dennwc): only redirects the call - remove from the interface + r, ok := GlyphToRune(glyph) + if !ok { + return MissingCodeRune, false + } + return enc.RuneToCharcode(r) +} + +func (enc *charmapEncoding) RuneToGlyph(r rune) (GlyphName, bool) { + // TODO(dennwc): should be in the font interface + return runeToGlyph(r, glyphlistRuneToGlyphMap) +} + +func (enc *charmapEncoding) GlyphToRune(glyph GlyphName) (rune, bool) { + // TODO(dennwc): should be in the font interface + return glyphToRune(glyph, glyphlistGlyphToRuneMap) +} + +func (enc *charmapEncoding) ToPdfObject() core.PdfObject { + return core.MakeName(enc.baseName) } diff --git a/pdf/internal/textencoding/winansi_test.go b/pdf/internal/textencoding/winansi_test.go index 86489217..3f2daa6c 100644 --- a/pdf/internal/textencoding/winansi_test.go +++ b/pdf/internal/textencoding/winansi_test.go @@ -27,239 +27,3 @@ func TestWinAnsiEncoder(t *testing.T) { return } } - -// TODO(dennwc): remove once done with encoding-related changes -func TestWinANSITable(t *testing.T) { - for code, val := range winAnsiEncoding { - if val2, ok := winAnsiEncodingTest[code]; !ok || val != val2 { - t.Fatalf("wrong table: %d %x %v %x(%c) != %x(%c)", code, code, ok, val, val, val2, val2) - } - } -} - -var winAnsiEncodingTest = map[CharCode]rune{ // 224 entries - 0x20: 0x0020, // "space" - 0x21: 0x0021, // ! "exclam" - 0x22: 0x0022, // " "quotedbl" - 0x23: 0x0023, // # "numbersign" - 0x24: 0x0024, // $ "dollar" - 0x25: 0x0025, // % "percent" - 0x26: 0x0026, // & "ampersand" - 0x27: 0x0027, // \' "quotesingle" - 0x28: 0x0028, // ( "parenleft" - 0x29: 0x0029, // ) "parenright" - 0x2a: 0x002a, // * "asterisk" - 0x2b: 0x002b, // + "plus" - 0x2c: 0x002c, // , "comma" - 0x2d: 0x002d, // - "hyphen" - 0x2e: 0x002e, // . "period" - 0x2f: 0x002f, // / "slash" - 0x30: 0x0030, // 0 "zero" - 0x31: 0x0031, // 1 "one" - 0x32: 0x0032, // 2 "two" - 0x33: 0x0033, // 3 "three" - 0x34: 0x0034, // 4 "four" - 0x35: 0x0035, // 5 "five" - 0x36: 0x0036, // 6 "six" - 0x37: 0x0037, // 7 "seven" - 0x38: 0x0038, // 8 "eight" - 0x39: 0x0039, // 9 "nine" - 0x3a: 0x003a, // : "colon" - 0x3b: 0x003b, // ; "semicolon" - 0x3c: 0x003c, // < "less" - 0x3d: 0x003d, // = "equal" - 0x3e: 0x003e, // > "greater" - 0x3f: 0x003f, // ? "question" - 0x40: 0x0040, // @ "at" - 0x41: 0x0041, // A "A" - 0x42: 0x0042, // B "B" - 0x43: 0x0043, // C "C" - 0x44: 0x0044, // D "D" - 0x45: 0x0045, // E "E" - 0x46: 0x0046, // F "F" - 0x47: 0x0047, // G "G" - 0x48: 0x0048, // H "H" - 0x49: 0x0049, // I "I" - 0x4a: 0x004a, // J "J" - 0x4b: 0x004b, // K "K" - 0x4c: 0x004c, // L "L" - 0x4d: 0x004d, // M "M" - 0x4e: 0x004e, // N "N" - 0x4f: 0x004f, // O "O" - 0x50: 0x0050, // P "P" - 0x51: 0x0051, // Q "Q" - 0x52: 0x0052, // R "R" - 0x53: 0x0053, // S "S" - 0x54: 0x0054, // T "T" - 0x55: 0x0055, // U "U" - 0x56: 0x0056, // V "V" - 0x57: 0x0057, // W "W" - 0x58: 0x0058, // X "X" - 0x59: 0x0059, // Y "Y" - 0x5a: 0x005a, // Z "Z" - 0x5b: 0x005b, // [ "bracketleft" - 0x5c: 0x005c, // \\ "backslash" - 0x5d: 0x005d, // ] "bracketright" - 0x5e: 0x005e, // ^ "asciicircum" - 0x5f: 0x005f, // _ "underscore" - 0x60: 0x0060, // ` "grave" - 0x61: 0x0061, // a "a" - 0x62: 0x0062, // b "b" - 0x63: 0x0063, // c "c" - 0x64: 0x0064, // d "d" - 0x65: 0x0065, // e "e" - 0x66: 0x0066, // f "f" - 0x67: 0x0067, // g "g" - 0x68: 0x0068, // h "h" - 0x69: 0x0069, // i "i" - 0x6a: 0x006a, // j "j" - 0x6b: 0x006b, // k "k" - 0x6c: 0x006c, // l "l" - 0x6d: 0x006d, // m "m" - 0x6e: 0x006e, // n "n" - 0x6f: 0x006f, // o "o" - 0x70: 0x0070, // p "p" - 0x71: 0x0071, // q "q" - 0x72: 0x0072, // r "r" - 0x73: 0x0073, // s "s" - 0x74: 0x0074, // t "t" - 0x75: 0x0075, // u "u" - 0x76: 0x0076, // v "v" - 0x77: 0x0077, // w "w" - 0x78: 0x0078, // x "x" - 0x79: 0x0079, // y "y" - 0x7a: 0x007a, // z "z" - 0x7b: 0x007b, // { "braceleft" - 0x7c: 0x007c, // | "bar" - 0x7d: 0x007d, // } "braceright" - 0x7e: 0x007e, // ~ "asciitilde" - 0x7f: 0x2022, // • "bullet" - 0x80: 0x20ac, // € "Euro" - 0x81: 0x2022, // • "bullet" - 0x82: 0x201a, // ‚ "quotesinglbase" - 0x83: 0x0192, // ƒ "florin" - 0x84: 0x201e, // „ "quotedblbase" - 0x85: 0x2026, // … "ellipsis" - 0x86: 0x2020, // † "dagger" - 0x87: 0x2021, // ‡ "daggerdbl" - 0x88: 0x02c6, // ˆ "circumflex" - 0x89: 0x2030, // ‰ "perthousand" - 0x8a: 0x0160, // Š "Scaron" - 0x8b: 0x2039, // ‹ "guilsinglleft" - 0x8c: 0x0152, // Œ "OE" - 0x8d: 0x2022, // • "bullet" - 0x8e: 0x017d, // Ž "Zcaron" - 0x8f: 0x2022, // • "bullet" - 0x90: 0x2022, // • "bullet" - 0x91: 0x2018, // ‘ "quoteleft" - 0x92: 0x2019, // ’ "quoteright" - 0x93: 0x201c, // “ "quotedblleft" - 0x94: 0x201d, // ” "quotedblright" - 0x95: 0x2022, // • "bullet" - 0x96: 0x2013, // – "endash" - 0x97: 0x2014, // — "emdash" - 0x98: 0x02dc, // ˜ "tilde" - 0x99: 0x2122, // ™ "trademark" - 0x9a: 0x0161, // š "scaron" - 0x9b: 0x203a, // › "guilsinglright" - 0x9c: 0x0153, // œ "oe" - 0x9d: 0x2022, // • "bullet" - 0x9e: 0x017e, // ž "zcaron" - 0x9f: 0x0178, // Ÿ "Ydieresis" - 0xa0: 0x0020, // "space" - 0xa1: 0x00a1, // ¡ "exclamdown" - 0xa2: 0x00a2, // ¢ "cent" - 0xa3: 0x00a3, // £ "sterling" - 0xa4: 0x00a4, // ¤ "currency" - 0xa5: 0x00a5, // ¥ "yen" - 0xa6: 0x00a6, // ¦ "brokenbar" - 0xa7: 0x00a7, // § "section" - 0xa8: 0x00a8, // ¨ "dieresis" - 0xa9: 0x00a9, // © "copyright" - 0xaa: 0x00aa, // ª "ordfeminine" - 0xab: 0x00ab, // « "guillemotleft" - 0xac: 0x00ac, // ¬ "logicalnot" - 0xad: 0x002d, // - "hyphen" - 0xae: 0x00ae, // ® "registered" - 0xaf: 0x00af, // ¯ "macron" - 0xb0: 0x00b0, // ° "degree" - 0xb1: 0x00b1, // ± "plusminus" - 0xb2: 0x00b2, // ² "twosuperior" - 0xb3: 0x00b3, // ³ "threesuperior" - 0xb4: 0x00b4, // ´ "acute" - 0xb5: 0x00b5, // µ "mu" - 0xb6: 0x00b6, // ¶ "paragraph" - 0xb7: 0x00b7, // · "periodcentered" - 0xb8: 0x00b8, // ¸ "cedilla" - 0xb9: 0x00b9, // ¹ "onesuperior" - 0xba: 0x00ba, // º "ordmasculine" - 0xbb: 0x00bb, // » "guillemotright" - 0xbc: 0x00bc, // ¼ "onequarter" - 0xbd: 0x00bd, // ½ "onehalf" - 0xbe: 0x00be, // ¾ "threequarters" - 0xbf: 0x00bf, // ¿ "questiondown" - 0xc0: 0x00c0, // À "Agrave" - 0xc1: 0x00c1, // Á "Aacute" - 0xc2: 0x00c2, //  "Acircumflex" - 0xc3: 0x00c3, // à "Atilde" - 0xc4: 0x00c4, // Ä "Adieresis" - 0xc5: 0x00c5, // Å "Aring" - 0xc6: 0x00c6, // Æ "AE" - 0xc7: 0x00c7, // Ç "Ccedilla" - 0xc8: 0x00c8, // È "Egrave" - 0xc9: 0x00c9, // É "Eacute" - 0xca: 0x00ca, // Ê "Ecircumflex" - 0xcb: 0x00cb, // Ë "Edieresis" - 0xcc: 0x00cc, // Ì "Igrave" - 0xcd: 0x00cd, // Í "Iacute" - 0xce: 0x00ce, // Î "Icircumflex" - 0xcf: 0x00cf, // Ï "Idieresis" - 0xd0: 0x00d0, // Ð "Eth" - 0xd1: 0x00d1, // Ñ "Ntilde" - 0xd2: 0x00d2, // Ò "Ograve" - 0xd3: 0x00d3, // Ó "Oacute" - 0xd4: 0x00d4, // Ô "Ocircumflex" - 0xd5: 0x00d5, // Õ "Otilde" - 0xd6: 0x00d6, // Ö "Odieresis" - 0xd7: 0x00d7, // × "multiply" - 0xd8: 0x00d8, // Ø "Oslash" - 0xd9: 0x00d9, // Ù "Ugrave" - 0xda: 0x00da, // Ú "Uacute" - 0xdb: 0x00db, // Û "Ucircumflex" - 0xdc: 0x00dc, // Ü "Udieresis" - 0xdd: 0x00dd, // Ý "Yacute" - 0xde: 0x00de, // Þ "Thorn" - 0xdf: 0x00df, // ß "germandbls" - 0xe0: 0x00e0, // à "agrave" - 0xe1: 0x00e1, // á "aacute" - 0xe2: 0x00e2, // â "acircumflex" - 0xe3: 0x00e3, // ã "atilde" - 0xe4: 0x00e4, // ä "adieresis" - 0xe5: 0x00e5, // å "aring" - 0xe6: 0x00e6, // æ "ae" - 0xe7: 0x00e7, // ç "ccedilla" - 0xe8: 0x00e8, // è "egrave" - 0xe9: 0x00e9, // é "eacute" - 0xea: 0x00ea, // ê "ecircumflex" - 0xeb: 0x00eb, // ë "edieresis" - 0xec: 0x00ec, // ì "igrave" - 0xed: 0x00ed, // í "iacute" - 0xee: 0x00ee, // î "icircumflex" - 0xef: 0x00ef, // ï "idieresis" - 0xf0: 0x00f0, // ð "eth" - 0xf1: 0x00f1, // ñ "ntilde" - 0xf2: 0x00f2, // ò "ograve" - 0xf3: 0x00f3, // ó "oacute" - 0xf4: 0x00f4, // ô "ocircumflex" - 0xf5: 0x00f5, // õ "otilde" - 0xf6: 0x00f6, // ö "odieresis" - 0xf7: 0x00f7, // ÷ "divide" - 0xf8: 0x00f8, // ø "oslash" - 0xf9: 0x00f9, // ù "ugrave" - 0xfa: 0x00fa, // ú "uacute" - 0xfb: 0x00fb, // û "ucircumflex" - 0xfc: 0x00fc, // ü "udieresis" - 0xfd: 0x00fd, // ý "yacute" - 0xfe: 0x00fe, // þ "thorn" - 0xff: 0x00ff, // ÿ "ydieresis" -} diff --git a/pdf/internal/textencoding/zapfdingbats.go b/pdf/internal/textencoding/zapfdingbats.go index 5ba70eb7..64153a96 100644 --- a/pdf/internal/textencoding/zapfdingbats.go +++ b/pdf/internal/textencoding/zapfdingbats.go @@ -5,8 +5,8 @@ package textencoding -// NewZapfDingbatsEncoder returns a SimpleEncoder that implements ZapfDingbatsEncoding. -func NewZapfDingbatsEncoder() *SimpleEncoder { +// NewZapfDingbatsEncoder returns a TextEncoder that implements ZapfDingbatsEncoding. +func NewZapfDingbatsEncoder() TextEncoder { enc, _ := NewSimpleTextEncoder("ZapfDingbatsEncoding", nil) return enc } diff --git a/pdf/model/font.go b/pdf/model/font.go index c0bf99ed..10c4c719 100644 --- a/pdf/model/font.go +++ b/pdf/model/font.go @@ -115,10 +115,10 @@ func NewStandard14FontMustCompile(basefont fonts.StdFontName) *PdfFont { } // NewStandard14FontWithEncoding returns the standard 14 font named `basefont` as a *PdfFont and -// a SimpleEncoder that encodes all the runes in `alphabet`, or an error if this is not possible. +// a TextEncoder that encodes all the runes in `alphabet`, or an error if this is not possible. // An error can occur if `basefont` is not one the standard 14 font names. func NewStandard14FontWithEncoding(basefont fonts.StdFontName, alphabet map[rune]int) (*PdfFont, - *textencoding.SimpleEncoder, error) { + textencoding.SimpleEncoder, error) { baseEncoder := "MacRomanEncoding" common.Log.Trace("NewStandard14FontWithEncoding: basefont=%#q baseEncoder=%#q alphabet=%q", basefont, baseEncoder, string(sortedAlphabet(alphabet))) diff --git a/pdf/model/font_simple.go b/pdf/model/font_simple.go index 627c5c15..fa7ccc07 100644 --- a/pdf/model/font_simple.go +++ b/pdf/model/font_simple.go @@ -43,9 +43,9 @@ type pdfFontSimple struct { charWidths map[textencoding.CharCode]float64 // std14Encoder is the encoder specified by the /Encoding entry in the font dict. - encoder *textencoding.SimpleEncoder + encoder textencoding.TextEncoder // std14Encoder is used for Standard 14 fonts where no /Encoding is specified in the font dict. - std14Encoder *textencoding.SimpleEncoder + std14Encoder textencoding.TextEncoder // std14Descriptor is used for Standard 14 fonts where no /FontDescriptor is specified in the font dict. std14Descriptor *PdfFontDescriptor @@ -100,16 +100,10 @@ func (font *pdfFontSimple) Encoder() textencoding.TextEncoder { } // SetEncoder sets the encoding for the underlying font. -// TODO(peterwilliams97): Change function signature to SetEncoder(encoder *textencoding.SimpleEncoder). +// TODO(peterwilliams97): Change function signature to SetEncoder(encoder *textencoding.simpleEncoder). // TODO(gunnsth): Makes sense if SetEncoder is removed from the interface fonts.Font as proposed in PR #260. func (font *pdfFontSimple) SetEncoder(encoder textencoding.TextEncoder) { - simple, ok := encoder.(*textencoding.SimpleEncoder) - if !ok { - // This can't happen. - common.Log.Error("pdfFontSimple.SetEncoder passed bad encoder type %T", encoder) - simple = nil - } - font.encoder = simple + font.encoder = encoder } // GetRuneMetrics returns the character metrics for the rune. @@ -166,7 +160,7 @@ func (font pdfFontSimple) GetCharMetrics(code textencoding.CharCode) (fonts.Char // • The value of BaseFont is derived differently. // func newSimpleFontFromPdfObject(d *core.PdfObjectDictionary, base *fontCommon, - std14Encoder *textencoding.SimpleEncoder) (*pdfFontSimple, error) { + std14Encoder textencoding.TextEncoder) (*pdfFontSimple, error) { font := pdfFontSimpleFromSkeleton(base) font.std14Encoder = std14Encoder @@ -239,11 +233,11 @@ func (font *pdfFontSimple) addEncoding() error { var ( baseEncoder string differences map[textencoding.CharCode]textencoding.GlyphName - encoder *textencoding.SimpleEncoder + encoder textencoding.SimpleEncoder ) if font.Encoder() != nil { - encoder, ok := font.Encoder().(*textencoding.SimpleEncoder) + encoder, ok := font.Encoder().(textencoding.SimpleEncoder) if ok && encoder != nil { baseEncoder = encoder.BaseName() } @@ -291,7 +285,7 @@ func (font *pdfFontSimple) addEncoding() error { // At the end, apply the differences. if differences != nil { common.Log.Trace("differences=%+v font=%s", differences, font.baseFields()) - encoder.ApplyDifferences(differences) + encoder = textencoding.ApplyDifferences(encoder, differences) } font.SetEncoder(encoder) } @@ -508,7 +502,7 @@ func NewPdfFontFromTTFFile(filePath string) (*PdfFont, error) { // updateStandard14Font fills the font.charWidths for standard 14 fonts. // Don't call this function with a font that is not in the standard 14. func (font *pdfFontSimple) updateStandard14Font() { - se, ok := font.Encoder().(*textencoding.SimpleEncoder) + se, ok := font.Encoder().(textencoding.SimpleEncoder) if !ok { // This can't happen. common.Log.Error("Wrong encoder type: %T. font=%s.", font.Encoder(), font) @@ -546,6 +540,6 @@ func stdFontToSimpleFont(f fonts.StdFont) pdfFontSimple { StemV: core.MakeFloat(l.StemV), StemH: core.MakeFloat(l.StemH), }, - std14Encoder: f.SimpleEncoder(), + std14Encoder: f.Encoder(), } } diff --git a/pdf/model/font_test.go b/pdf/model/font_test.go index ab438322..53cae596 100644 --- a/pdf/model/font_test.go +++ b/pdf/model/font_test.go @@ -798,12 +798,12 @@ endobj } -// newStandandTextEncoder returns a SimpleEncoder that implements StandardEncoding. +// newStandandTextEncoder returns a simpleEncoder that implements StandardEncoding. // The non-symbolic standard 14 fonts have StandardEncoding. func newStandandTextEncoder(t *testing.T) textencoding.SimpleEncoder { enc, err := textencoding.NewSimpleTextEncoder("StandardEncoding", nil) if err != nil { t.Fatalf("Error: %v", err) } - return *enc + return enc } diff --git a/pdf/model/fontfile.go b/pdf/model/fontfile.go index ec671421..2ceeaac0 100644 --- a/pdf/model/fontfile.go +++ b/pdf/model/fontfile.go @@ -33,7 +33,7 @@ import ( type fontFile struct { name string subtype string - encoder *textencoding.SimpleEncoder + encoder textencoding.SimpleEncoder } // String returns a human readable description of `fontfile`. diff --git a/pdf/model/fonts/std.go b/pdf/model/fonts/std.go index b7e2e992..54319a77 100644 --- a/pdf/model/fonts/std.go +++ b/pdf/model/fonts/std.go @@ -73,7 +73,7 @@ var _ Font = StdFont{} type StdFont struct { desc Descriptor metrics map[GlyphName]CharMetrics - encoder *textencoding.SimpleEncoder + encoder textencoding.TextEncoder } // NewStdFont returns a new instance of the font with a default encoder set (WinAnsiEncoding). @@ -83,7 +83,7 @@ func NewStdFont(desc Descriptor, metrics map[GlyphName]CharMetrics) StdFont { } // NewStdFontWithEncoding returns a new instance of the font with a specified encoder. -func NewStdFontWithEncoding(desc Descriptor, metrics map[GlyphName]CharMetrics, encoder *textencoding.SimpleEncoder) StdFont { +func NewStdFontWithEncoding(desc Descriptor, metrics map[GlyphName]CharMetrics, encoder textencoding.TextEncoder) StdFont { return StdFont{ desc: desc, metrics: metrics, @@ -98,11 +98,6 @@ func (font StdFont) Name() string { // Encoder returns the font's text encoder. func (font StdFont) Encoder() textencoding.TextEncoder { - return font.SimpleEncoder() -} - -// SimpleEncoder returns the font's text encoder. -func (font StdFont) SimpleEncoder() *textencoding.SimpleEncoder { return font.encoder } diff --git a/pdf/model/fonts/ttfparser.go b/pdf/model/fonts/ttfparser.go index d463e12d..4b6d7379 100644 --- a/pdf/model/fonts/ttfparser.go +++ b/pdf/model/fonts/ttfparser.go @@ -45,7 +45,7 @@ import ( ) // MakeEncoder returns an encoder built from the tables in `rec`. -func (ttf *TtfType) MakeEncoder() (*textencoding.SimpleEncoder, error) { +func (ttf *TtfType) MakeEncoder() (textencoding.SimpleEncoder, error) { encoding := make(map[textencoding.CharCode]GlyphName) // TODO(dennwc): this is a bit strange, since TTF may contain more than 256 characters // should probably make a different encoder here From 1742cb9c893a56b388c7c2a61c9483953f3d7a01 Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Tue, 1 Jan 2019 21:17:57 +0200 Subject: [PATCH 05/11] textencoding: drop old simpleEncoder, use the new implementation --- pdf/internal/textencoding/differences.go | 111 ++- pdf/internal/textencoding/simple.go | 789 ++++-------------- pdf/internal/textencoding/simple_symbol.go | 228 +++++ pdf/internal/textencoding/simple_winansi.go | 73 ++ ...winansi_test.go => simple_winansi_test.go} | 2 +- .../textencoding/simple_zapfdingbats.go | 241 ++++++ pdf/internal/textencoding/symbol.go | 12 - pdf/internal/textencoding/winansi.go | 136 --- pdf/internal/textencoding/zapfdingbats.go | 12 - pdf/model/font_simple.go | 2 +- pdf/model/font_test.go | 20 +- pdf/model/fonts/std.go | 2 +- pdf/model/functions_test.go | 7 +- 13 files changed, 824 insertions(+), 811 deletions(-) create mode 100644 pdf/internal/textencoding/simple_symbol.go create mode 100644 pdf/internal/textencoding/simple_winansi.go rename pdf/internal/textencoding/{winansi_test.go => simple_winansi_test.go} (94%) create mode 100644 pdf/internal/textencoding/simple_zapfdingbats.go delete mode 100644 pdf/internal/textencoding/symbol.go delete mode 100644 pdf/internal/textencoding/winansi.go delete mode 100644 pdf/internal/textencoding/zapfdingbats.go diff --git a/pdf/internal/textencoding/differences.go b/pdf/internal/textencoding/differences.go index f2fe25d0..aeb6ad58 100644 --- a/pdf/internal/textencoding/differences.go +++ b/pdf/internal/textencoding/differences.go @@ -9,44 +9,88 @@ import ( "github.com/unidoc/unidoc/pdf/core" ) -// ApplyDifferences modifies or wraps the base encoding and overlays differences over it. -func ApplyDifferences(base SimpleEncoder, differences map[CharCode]GlyphName) SimpleEncoder { - if enc, ok := base.(*simpleEncoder); ok { - enc.applyDifferences(differences) - return enc +// FromFontDifferences converts `diffList` (a /Differences array from an /Encoding object) to a map +// representing character code to glyph mappings. +func FromFontDifferences(diffList *core.PdfObjectArray) (map[CharCode]GlyphName, error) { + differences := make(map[CharCode]GlyphName) + var n CharCode + for _, obj := range diffList.Elements() { + switch v := obj.(type) { + case *core.PdfObjectInteger: + n = CharCode(*v) + case *core.PdfObjectName: + s := string(*v) + differences[n] = GlyphName(s) + n++ + default: + common.Log.Debug("ERROR: Bad type. obj=%s", obj) + return nil, core.ErrTypeError + } } - return newDifferencesEncoding(base, differences) + return differences, nil } -func newDifferencesEncoding(base SimpleEncoder, differences map[CharCode]GlyphName) SimpleEncoder { +// toFontDifferences converts `differences` (a map representing character code to glyph mappings) +// to a /Differences array for an /Encoding object. +func toFontDifferences(differences map[CharCode]GlyphName) *core.PdfObjectArray { + if len(differences) == 0 { + return nil + } + + codes := make([]CharCode, 0, len(differences)) + for c := range differences { + codes = append(codes, c) + } + sort.Slice(codes, func(i, j int) bool { + return codes[i] < codes[j] + }) + + n := codes[0] + diffList := []core.PdfObject{core.MakeInteger(int64(n)), core.MakeName(string(differences[n]))} + for _, c := range codes[1:] { + if c == n+1 { + diffList = append(diffList, core.MakeName(string(differences[c]))) + } else { + diffList = append(diffList, core.MakeInteger(int64(c))) + } + n = c + } + return core.MakeArray(diffList...) +} + +// ApplyDifferences modifies or wraps the base encoding and overlays differences over it. +func ApplyDifferences(base SimpleEncoder, differences map[CharCode]GlyphName) SimpleEncoder { // TODO(dennwc): check if it's a differencesEncoding, and merge the mapping d := &differencesEncoding{ base: base, differences: differences, - code2rune: make(map[CharCode]rune), - rune2code: make(map[rune]CharCode), + decode: make(map[byte]rune), + encode: make(map[rune]byte), } for code, glyph := range differences { + b := byte(code) r, ok := GlyphToRune(glyph) if ok { - d.rune2code[r] = code + d.encode[r] = b } else { common.Log.Debug("ERROR: No match for glyph=%q differences=%+v", glyph, differences) } - d.code2rune[code] = r + d.decode[b] = r } return d } // differencesEncoding remaps characters of a base encoding and act as a pass-trough for other characters. +// Assumes that an underlying encoding is 8 bit. type differencesEncoding struct { base SimpleEncoder // original mapping to encode to PDF differences map[CharCode]GlyphName - // overlayed on top of base encoding - code2rune map[CharCode]rune - rune2code map[rune]CharCode + + // overlayed on top of base encoding (8 bit) + decode map[byte]rune + encode map[rune]byte } // BaseName returns base encoding name. @@ -63,8 +107,13 @@ func (enc *differencesEncoding) String() string { func (enc *differencesEncoding) Charcodes() []CharCode { codes := enc.base.Charcodes() sorted := true + seen := make(map[CharCode]struct{}, len(codes)) for _, code := range codes { - if _, ok := enc.code2rune[code]; !ok { + seen[code] = struct{}{} + } + for b := range enc.decode { + code := CharCode(b) + if _, ok := seen[code]; !ok { codes = append(codes, code) sorted = false } @@ -93,8 +142,8 @@ func (enc *differencesEncoding) Encode(raw string) []byte { // RuneToCharcode returns the PDF character code corresponding to rune `r`. // The bool return flag is true if there was a match, and false otherwise. func (enc *differencesEncoding) RuneToCharcode(r rune) (CharCode, bool) { - if code, ok := enc.rune2code[r]; ok { - return code, true + if b, ok := enc.encode[r]; ok { + return CharCode(b), true } return enc.base.RuneToCharcode(r) } @@ -102,7 +151,11 @@ func (enc *differencesEncoding) RuneToCharcode(r rune) (CharCode, bool) { // CharcodeToRune returns the rune corresponding to character code `code`. // The bool return flag is true if there was a match, and false otherwise. func (enc *differencesEncoding) CharcodeToRune(code CharCode) (rune, bool) { - if r, ok := enc.code2rune[code]; ok { + if code > 0xff { + return MissingCodeRune, false + } + b := byte(code) + if r, ok := enc.decode[b]; ok { return r, true } return enc.base.CharcodeToRune(code) @@ -114,12 +167,7 @@ func (enc *differencesEncoding) CharcodeToGlyph(code CharCode) (GlyphName, bool) if glyph, ok := enc.differences[code]; ok { return glyph, true } - // TODO(dennwc): only redirects the call - remove from the interface - r, ok := enc.CharcodeToRune(code) - if !ok { - return "", false - } - return enc.RuneToGlyph(r) + return enc.base.CharcodeToGlyph(code) } // GlyphToCharcode returns character code for glyph `glyph`. @@ -143,14 +191,25 @@ func (enc *differencesEncoding) GlyphToCharcode(glyph GlyphName) (CharCode, bool // The bool return flag is true if there was a match, and false otherwise. func (enc *differencesEncoding) RuneToGlyph(r rune) (GlyphName, bool) { // TODO(dennwc): should be in the font interface - return runeToGlyph(r, glyphlistRuneToGlyphMap) + code, ok := enc.RuneToCharcode(r) + if !ok { + return "", false + } + if glyph, ok := enc.differences[code]; ok { + return glyph, true + } + return enc.base.RuneToGlyph(r) } // GlyphToRune returns the rune corresponding to glyph `glyph`. // The bool return flag is true if there was a match, and false otherwise. func (enc *differencesEncoding) GlyphToRune(glyph GlyphName) (rune, bool) { // TODO(dennwc): should be in the font interface - return glyphToRune(glyph, glyphlistGlyphToRuneMap) + code, ok := enc.GlyphToCharcode(glyph) + if !ok { + return MissingCodeRune, false + } + return enc.CharcodeToRune(code) } // ToPdfObject returns the encoding as a PdfObject. diff --git a/pdf/internal/textencoding/simple.go b/pdf/internal/textencoding/simple.go index 328e3c64..a4a5d564 100644 --- a/pdf/internal/textencoding/simple.go +++ b/pdf/internal/textencoding/simple.go @@ -7,23 +7,15 @@ package textencoding import ( "errors" - "fmt" "sort" - "strings" + "unicode/utf8" "github.com/unidoc/unidoc/common" "github.com/unidoc/unidoc/pdf/core" + "golang.org/x/text/encoding" + "golang.org/x/text/transform" ) -// Implementations of the standard 1-byte encodings -// MacExpertEncoding -// MacRomanEncoding -// PdfDocEncoding -// StandardEncoding -// ZapfDingbatsEncoding -// -// WinAnsiEncoding is implemented via charmapEncoding. - // SimpleEncoder represents a 1 byte encoding. type SimpleEncoder interface { TextEncoder @@ -31,119 +23,180 @@ type SimpleEncoder interface { Charcodes() []CharCode } -var _ SimpleEncoder = (*simpleEncoder)(nil) - -// simpleEncoder represents a 1 byte encoding -type simpleEncoder struct { - baseName string - - baseEncoding map[CharCode]rune - differences map[CharCode]GlyphName - - codeToGlyph map[CharCode]GlyphName - glyphToCode map[GlyphName]CharCode - codeToRune map[CharCode]rune -} - // NewCustomSimpleTextEncoder returns a simpleEncoder based on map `encoding` and difference map // `differences`. -func NewCustomSimpleTextEncoder(encoding, differences map[CharCode]GlyphName) ( - SimpleEncoder, error) { +func NewCustomSimpleTextEncoder(encoding, differences map[CharCode]GlyphName) (SimpleEncoder, error) { if len(encoding) == 0 { return nil, errors.New("empty custom encoding") } const baseName = "custom" - baseEncoding := make(map[CharCode]rune) + baseEncoding := make(map[byte]rune) for code, glyph := range encoding { r, ok := GlyphToRune(glyph) if !ok { common.Log.Debug("ERROR: Unknown glyph. %q", glyph) continue } - baseEncoding[code] = r + baseEncoding[byte(code)] = r } - return newSimpleTextEncoder(baseEncoding, baseName, differences), nil -} - -// applyDifferences applies the encoding delta `differences` to `se`. -func (se *simpleEncoder) applyDifferences(differences map[CharCode]GlyphName) { - se.differences = differences - se.computeTables() + // TODO(dennwc): this seems to be incorrect - baseEncoding won't be saved when converting to PDF object + enc := newSimpleEncoderFromMap(baseName, baseEncoding) + if len(differences) != 0 { + enc = ApplyDifferences(enc, differences) + } + return enc, nil } // NewSimpleTextEncoder returns a simpleEncoder based on predefined encoding `baseName` and // difference map `differences`. func NewSimpleTextEncoder(baseName string, differences map[CharCode]GlyphName) (SimpleEncoder, error) { - switch baseName { - case baseWinAnsi: - enc := NewWinAnsiTextEncoder() - if len(differences) != 0 { - enc = ApplyDifferences(enc, differences) + var enc SimpleEncoder + if fnc, ok := simple[baseName]; ok { + enc = fnc() + } else { + baseEncoding, ok := simpleEncodings[baseName] + if !ok { + common.Log.Debug("ERROR: NewSimpleTextEncoder. Unknown encoding %q", baseName) + return nil, errors.New("unsupported font encoding") } - return enc, nil + // FIXME(dennwc): make a global and init once + enc = newSimpleEncoderFromMap(baseName, baseEncoding) } - baseEncoding, ok := simpleEncodings[baseName] - if !ok { - common.Log.Debug("ERROR: NewSimpleTextEncoder. Unknown encoding %q", baseName) - return nil, errors.New("unsupported font encoding") + if len(differences) != 0 { + enc = ApplyDifferences(enc, differences) } - return newSimpleTextEncoder(baseEncoding, baseName, differences), nil + return enc, nil } -// newSimpleTextEncoder returns a simpleEncoder based on map `encoding` and difference map -// `differences`. -func newSimpleTextEncoder(baseEncoding map[CharCode]rune, baseName string, - differences map[CharCode]GlyphName) SimpleEncoder { - - se := &simpleEncoder{ - baseName: baseName, - baseEncoding: baseEncoding, - differences: differences, +func newSimpleEncoderFromMap(name string, encoding map[byte]rune) SimpleEncoder { + se := &simpleEncoding{ + baseName: name, + decode: encoding, + encode: make(map[rune]byte, len(encoding)), + } + for b, r := range se.decode { + se.encode[r] = b } - se.computeTables() return se } -// simpleEncoderNumEntries is the maximum number of encoding entries shown in simpleEncoder.String() -const simpleEncoderNumEntries = 0 +var ( + simple = make(map[string]func() SimpleEncoder) +) -// String returns a string that describes `se`. -func (se simpleEncoder) String() string { - name := se.baseName - if len(se.differences) > 0 { - name = fmt.Sprintf("%s(diff)", se.baseName) +// RegisterSimpleEncoding registers a SimpleEncoder constructer by PDF encoding name. +func RegisterSimpleEncoding(name string, fnc func() SimpleEncoder) { + if _, ok := simple[name]; ok { + panic("already registered") } - parts := []string{ - fmt.Sprintf("%#q %d entries %d differences", name, len(se.codeToGlyph), len(se.differences)), - fmt.Sprintf("differences=%+v", se.differences), - } - - codes := se.Charcodes() - if len(codes) > simpleEncoderNumEntries { - codes = codes[:simpleEncoderNumEntries] - } - - for _, c := range codes { - parts = append(parts, fmt.Sprintf("%d=0x%02x: %q", c, c, se.codeToGlyph[c])) - } - return fmt.Sprintf("SIMPLE_ENCODER{%s}", strings.Join(parts, ", ")) + simple[name] = fnc } -// BaseName returns `se`'s base name. -func (se simpleEncoder) BaseName() string { - return se.baseName +var ( + _ SimpleEncoder = (*simpleEncoding)(nil) + _ encoding.Encoding = (*simpleEncoding)(nil) +) + +// simpleEncoding represents a 1 byte encoding. +type simpleEncoding struct { + baseName string + // one byte encoding: CharCode <-> byte + encode map[rune]byte + decode map[byte]rune } -// Encode converts a Go unicode string `raw` to a PDF encoded string. -func (se simpleEncoder) Encode(raw string) []byte { - return encodeString8bit(se, raw) +func (enc *simpleEncoding) Encode(raw string) []byte { + data, _ := enc.NewEncoder().Bytes([]byte(raw)) + return data } -// Charcodes returns a slice of all charcodes in this encoding. -func (se simpleEncoder) Charcodes() []CharCode { - codes := make([]CharCode, 0, len(se.codeToGlyph)) - for code := range se.codeToGlyph { - codes = append(codes, code) +// NewDecoder implements encoding.Encoding. +func (enc *simpleEncoding) NewDecoder() *encoding.Decoder { + return &encoding.Decoder{Transformer: simpleDecoder{m: enc.decode}} +} + +type simpleDecoder struct { + m map[byte]rune +} + +// Transform implements transform.Transformer. +func (enc simpleDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, _ error) { + for len(src) != 0 { + b := src[0] + src = src[1:] + + r, ok := enc.m[b] + if !ok { + r = MissingCodeRune + } + if utf8.RuneLen(r) > len(dst) { + return nDst, nSrc, transform.ErrShortDst + } + n := utf8.EncodeRune(dst, r) + dst = dst[n:] + + nSrc++ + nDst += n + } + return nDst, nSrc, nil +} + +// Reset implements transform.Transformer. +func (enc simpleDecoder) Reset() {} + +// NewEncoder implements encoding.Encoding. +func (enc *simpleEncoding) NewEncoder() *encoding.Encoder { + return &encoding.Encoder{Transformer: simpleEncoder{m: enc.encode}} +} + +type simpleEncoder struct { + m map[rune]byte +} + +// Transform implements transform.Transformer. +func (enc simpleEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, _ error) { + for len(src) != 0 { + if !utf8.FullRune(src) && !atEOF { + return nDst, nSrc, transform.ErrShortSrc + } else if len(dst) == 0 { + return nDst, nSrc, transform.ErrShortDst + } + r, n := utf8.DecodeRune(src) + if r == utf8.RuneError { + r = MissingCodeRune + } + src = src[n:] + nSrc += n + + b, ok := enc.m[r] + if !ok { + b, _ = enc.m[MissingCodeRune] + } + dst[0] = b + + dst = dst[1:] + nDst++ + } + return nDst, nSrc, nil +} + +// Reset implements transform.Transformer. +func (enc simpleEncoder) Reset() {} + +// String returns a text representation of encoding. +func (enc *simpleEncoding) String() string { + return "simpleEncoding(" + enc.baseName + ")" +} + +// BaseName returns a base name of the encoder, as specified in the PDF spec. +func (enc *simpleEncoding) BaseName() string { + return enc.baseName +} + +func (enc *simpleEncoding) Charcodes() []CharCode { + codes := make([]CharCode, 0, len(enc.decode)) + for b := range enc.decode { + codes = append(codes, CharCode(b)) } sort.Slice(codes, func(i, j int) bool { return codes[i] < codes[j] @@ -151,150 +204,63 @@ func (se simpleEncoder) Charcodes() []CharCode { return codes } -// CharcodeToGlyph returns the glyph name for character code `code`. -// The bool return flag is true if there was a match, and false otherwise. -func (se simpleEncoder) CharcodeToGlyph(code CharCode) (GlyphName, bool) { - glyph, ok := se.codeToGlyph[code] - if !ok { - common.Log.Debug("Charcode -> Glyph error: charcode not found: 0x%04x", code) - } - return glyph, ok +func (enc *simpleEncoding) RuneToCharcode(r rune) (CharCode, bool) { + b, ok := enc.encode[r] + return CharCode(b), ok } -// GlyphToCharcode returns character code for glyph `glyph`. -// The bool return flag is true if there was a match, and false otherwise. -func (se simpleEncoder) GlyphToCharcode(glyph GlyphName) (CharCode, bool) { - code, ok := se.glyphToCode[glyph] - if !ok { - common.Log.Debug("Glyph -> Charcode error: glyph not found: %q %s", glyph, se) - } - return code, ok -} - -// RuneToCharcode returns the PDF character code corresponding to rune `r`. -// The bool return flag is true if there was a match, and false otherwise. -func (se simpleEncoder) RuneToCharcode(val rune) (CharCode, bool) { - return doRuneToCharcode(se, val) -} - -// CharcodeToRune returns the rune corresponding to character code `code`. -// The bool return flag is true if there was a match, and false otherwise. -func (se simpleEncoder) CharcodeToRune(code CharCode) (rune, bool) { - r, ok := se.codeToRune[code] - if !ok { - common.Log.Debug("Charcode -> Rune error: charcode not found: 0x%04x", code) +func (enc *simpleEncoding) CharcodeToRune(code CharCode) (rune, bool) { + if code > 0xff { + return MissingCodeRune, false } + b := byte(code) + r, ok := enc.decode[b] return r, ok } -// RuneToGlyph returns the glyph corresponding to rune `r`. -// The bool return flag is true if there was a match, and false otherwise. -func (se simpleEncoder) RuneToGlyph(r rune) (GlyphName, bool) { +func (enc *simpleEncoding) CharcodeToGlyph(code CharCode) (GlyphName, bool) { + // TODO(dennwc): only redirects the call - remove from the interface + r, ok := enc.CharcodeToRune(code) + if !ok { + return "", false + } + return enc.RuneToGlyph(r) +} + +func (enc *simpleEncoding) GlyphToCharcode(glyph GlyphName) (CharCode, bool) { + // TODO(dennwc): only redirects the call - remove from the interface + r, ok := GlyphToRune(glyph) + if !ok { + return MissingCodeRune, false + } + return enc.RuneToCharcode(r) +} + +func (enc *simpleEncoding) RuneToGlyph(r rune) (GlyphName, bool) { + // TODO(dennwc): should be in the font interface return runeToGlyph(r, glyphlistRuneToGlyphMap) } -// GlyphToRune returns the rune corresponding to glyph `glyph`. -// The bool return flag is true if there was a match, and false otherwise. -func (se simpleEncoder) GlyphToRune(glyph GlyphName) (rune, bool) { +func (enc *simpleEncoding) GlyphToRune(glyph GlyphName) (rune, bool) { + // TODO(dennwc): should be in the font interface return glyphToRune(glyph, glyphlistGlyphToRuneMap) } -// ToPdfObject returns `se` as a PdfObject -func (se simpleEncoder) ToPdfObject() core.PdfObject { - if len(se.differences) == 0 { - switch se.baseName { - case "MacRomanEncoding", "MacExpertEncoding": - return core.MakeName(se.baseName) - } - return nil // Use font's built-in encoding. +func (enc *simpleEncoding) ToPdfObject() core.PdfObject { + switch enc.baseName { + case "MacRomanEncoding", "MacExpertEncoding", baseWinAnsi: + return core.MakeName(enc.baseName) } + // TODO(dennwc): check if this switch is necessary, or an old code was incorrect dict := core.MakeDict() dict.Set("Type", core.MakeName("Encoding")) - dict.Set("BaseEncoding", core.MakeName(se.baseName)) - dict.Set("Differences", toFontDifferences(se.differences)) + dict.Set("BaseEncoding", core.MakeName(enc.baseName)) + dict.Set("Differences", toFontDifferences(nil)) return core.MakeIndirectObject(dict) } -// computeTables computes the tables needed for a working simpleEncoder from the member -// fields `baseEncoding` and `differences`. -func (se *simpleEncoder) computeTables() { - codeToRune := make(map[CharCode]rune) - for code, r := range se.baseEncoding { - codeToRune[code] = r - } - for code, glyph := range se.differences { - r, ok := GlyphToRune(glyph) - if !ok { - common.Log.Debug("ERROR: No match for glyph=%q differences=%+v", glyph, - se.differences) - } - codeToRune[code] = r - } - - codeToGlyph := make(map[CharCode]GlyphName) - glyphToCode := make(map[GlyphName]CharCode) - for code, r := range codeToRune { - if glyph, ok := RuneToGlyph(r); ok { - codeToGlyph[code] = glyph - glyphToCode[glyph] = code - } - } - se.codeToGlyph = codeToGlyph - se.glyphToCode = glyphToCode - se.codeToRune = codeToRune -} - -// FromFontDifferences converts `diffList` (a /Differences array from an /Encoding object) to a map -// representing character code to glyph mappings. -func FromFontDifferences(diffList *core.PdfObjectArray) (map[CharCode]GlyphName, error) { - differences := make(map[CharCode]GlyphName) - var n CharCode - for _, obj := range diffList.Elements() { - switch v := obj.(type) { - case *core.PdfObjectInteger: - n = CharCode(*v) - case *core.PdfObjectName: - s := string(*v) - differences[n] = GlyphName(s) - n++ - default: - common.Log.Debug("ERROR: Bad type. obj=%s", obj) - return nil, core.ErrTypeError - } - } - return differences, nil -} - -// toFontDifferences converts `differences` (a map representing character code to glyph mappings) -// to a /Differences array for an /Encoding object. -func toFontDifferences(differences map[CharCode]GlyphName) *core.PdfObjectArray { - if len(differences) == 0 { - return nil - } - - codes := make([]CharCode, 0, len(differences)) - for c := range differences { - codes = append(codes, c) - } - sort.Slice(codes, func(i, j int) bool { - return codes[i] < codes[j] - }) - - n := codes[0] - diffList := []core.PdfObject{core.MakeInteger(int64(n)), core.MakeName(string(differences[n]))} - for _, c := range codes[1:] { - if c == n+1 { - diffList = append(diffList, core.MakeName(string(differences[c]))) - } else { - diffList = append(diffList, core.MakeInteger(int64(c))) - } - n = c - } - return core.MakeArray(diffList...) -} - // simpleEncodings is a map of the standard 8 bit character encodings. -var simpleEncodings = map[string]map[CharCode]rune{ +var simpleEncodings = map[string]map[byte]rune{ "MacExpertEncoding": { // 165 entries 0x20: 0x0020, // "space" 0x21: 0xf721, // "exclamsmall" @@ -1124,399 +1090,4 @@ var simpleEncodings = map[string]map[CharCode]rune{ 0xf9: 0x0153, // œ "oe" 0xfa: 0x00df, // ß "germandbls" }, - "SymbolEncoding": { // 189 entries - 0x20: 0x0020, // "space" - 0x21: 0x0021, // ! "exclam" - 0x22: 0x2200, // ∀ "universal" - 0x23: 0x0023, // # "numbersign" - 0x24: 0x2203, // ∃ "existential" - 0x25: 0x0025, // % "percent" - 0x26: 0x0026, // & "ampersand" - 0x27: 0x220b, // ∋ "suchthat" - 0x28: 0x0028, // ( "parenleft" - 0x29: 0x0029, // ) "parenright" - 0x2a: 0x2217, // ∗ "asteriskmath" - 0x2b: 0x002b, // + "plus" - 0x2c: 0x002c, // , "comma" - 0x2d: 0x2212, // − "minus" - 0x2e: 0x002e, // . "period" - 0x2f: 0x002f, // / "slash" - 0x30: 0x0030, // 0 "zero" - 0x31: 0x0031, // 1 "one" - 0x32: 0x0032, // 2 "two" - 0x33: 0x0033, // 3 "three" - 0x34: 0x0034, // 4 "four" - 0x35: 0x0035, // 5 "five" - 0x36: 0x0036, // 6 "six" - 0x37: 0x0037, // 7 "seven" - 0x38: 0x0038, // 8 "eight" - 0x39: 0x0039, // 9 "nine" - 0x3a: 0x003a, // : "colon" - 0x3b: 0x003b, // ; "semicolon" - 0x3c: 0x003c, // < "less" - 0x3d: 0x003d, // = "equal" - 0x3e: 0x003e, // > "greater" - 0x3f: 0x003f, // ? "question" - 0x40: 0x2245, // ≅ "congruent" - 0x41: 0x0391, // Α "Alpha" - 0x42: 0x0392, // Β "Beta" - 0x43: 0x03a7, // Χ "Chi" - 0x44: 0x2206, // ∆ "Delta" - 0x45: 0x0395, // Ε "Epsilon" - 0x46: 0x03a6, // Φ "Phi" - 0x47: 0x0393, // Γ "Gamma" - 0x48: 0x0397, // Η "Eta" - 0x49: 0x0399, // Ι "Iota" - 0x4a: 0x03d1, // ϑ "theta1" - 0x4b: 0x039a, // Κ "Kappa" - 0x4c: 0x039b, // Λ "Lambda" - 0x4d: 0x039c, // Μ "Mu" - 0x4e: 0x039d, // Ν "Nu" - 0x4f: 0x039f, // Ο "Omicron" - 0x50: 0x03a0, // Π "Pi" - 0x51: 0x0398, // Θ "Theta" - 0x52: 0x03a1, // Ρ "Rho" - 0x53: 0x03a3, // Σ "Sigma" - 0x54: 0x03a4, // Τ "Tau" - 0x55: 0x03a5, // Υ "Upsilon" - 0x56: 0x03c2, // ς "sigma1" - 0x57: 0x2126, // Ω "Omega" - 0x58: 0x039e, // Ξ "Xi" - 0x59: 0x03a8, // Ψ "Psi" - 0x5a: 0x0396, // Ζ "Zeta" - 0x5b: 0x005b, // [ "bracketleft" - 0x5c: 0x2234, // ∴ "therefore" - 0x5d: 0x005d, // ] "bracketright" - 0x5e: 0x22a5, // ⊥ "perpendicular" - 0x5f: 0x005f, // _ "underscore" - 0x60: 0xf8e5, // "radicalex" - 0x61: 0x03b1, // α "alpha" - 0x62: 0x03b2, // β "beta" - 0x63: 0x03c7, // χ "chi" - 0x64: 0x03b4, // δ "delta" - 0x65: 0x03b5, // ε "epsilon" - 0x66: 0x03c6, // φ "phi" - 0x67: 0x03b3, // γ "gamma" - 0x68: 0x03b7, // η "eta" - 0x69: 0x03b9, // ι "iota" - 0x6a: 0x03d5, // ϕ "phi1" - 0x6b: 0x03ba, // κ "kappa" - 0x6c: 0x03bb, // λ "lambda" - 0x6d: 0x00b5, // µ "mu" - 0x6e: 0x03bd, // ν "nu" - 0x6f: 0x03bf, // ο "omicron" - 0x70: 0x03c0, // π "pi" - 0x71: 0x03b8, // θ "theta" - 0x72: 0x03c1, // ρ "rho" - 0x73: 0x03c3, // σ "sigma" - 0x74: 0x03c4, // τ "tau" - 0x75: 0x03c5, // υ "upsilon" - 0x76: 0x03d6, // ϖ "omega1" - 0x77: 0x03c9, // ω "omega" - 0x78: 0x03be, // ξ "xi" - 0x79: 0x03c8, // ψ "psi" - 0x7a: 0x03b6, // ζ "zeta" - 0x7b: 0x007b, // { "braceleft" - 0x7c: 0x007c, // | "bar" - 0x7d: 0x007d, // } "braceright" - 0x7e: 0x223c, // ∼ "similar" - 0xa0: 0x20ac, // € "Euro" - 0xa1: 0x03d2, // ϒ "Upsilon1" - 0xa2: 0x2032, // ′ "minute" - 0xa3: 0x2264, // ≤ "lessequal" - 0xa4: 0x2044, // ⁄ "fraction" - 0xa5: 0x221e, // ∞ "infinity" - 0xa6: 0x0192, // ƒ "florin" - 0xa7: 0x2663, // ♣ "club" - 0xa8: 0x2666, // ♦ "diamond" - 0xa9: 0x2665, // ♥ "heart" - 0xaa: 0x2660, // ♠ "spade" - 0xab: 0x2194, // ↔ "arrowboth" - 0xac: 0x2190, // ← "arrowleft" - 0xad: 0x2191, // ↑ "arrowup" - 0xae: 0x2192, // → "arrowright" - 0xaf: 0x2193, // ↓ "arrowdown" - 0xb0: 0x00b0, // ° "degree" - 0xb1: 0x00b1, // ± "plusminus" - 0xb2: 0x2033, // ″ "second" - 0xb3: 0x2265, // ≥ "greaterequal" - 0xb4: 0x00d7, // × "multiply" - 0xb5: 0x221d, // ∝ "proportional" - 0xb6: 0x2202, // ∂ "partialdiff" - 0xb7: 0x2022, // • "bullet" - 0xb8: 0x00f7, // ÷ "divide" - 0xb9: 0x2260, // ≠ "notequal" - 0xba: 0x2261, // ≡ "equivalence" - 0xbb: 0x2248, // ≈ "approxequal" - 0xbc: 0x2026, // … "ellipsis" - 0xbd: 0xf8e6, // "arrowvertex" - 0xbe: 0xf8e7, // "arrowhorizex" - 0xbf: 0x21b5, // ↵ "carriagereturn" - 0xc0: 0x2135, // ℵ "aleph" - 0xc1: 0x2111, // ℑ "Ifraktur" - 0xc2: 0x211c, // ℜ "Rfraktur" - 0xc3: 0x2118, // ℘ "weierstrass" - 0xc4: 0x2297, // ⊗ "circlemultiply" - 0xc5: 0x2295, // ⊕ "circleplus" - 0xc6: 0x2205, // ∅ "emptyset" - 0xc7: 0x2229, // ∩ "intersection" - 0xc8: 0x222a, // ∪ "union" - 0xc9: 0x2283, // ⊃ "propersuperset" - 0xca: 0x2287, // ⊇ "reflexsuperset" - 0xcb: 0x2284, // ⊄ "notsubset" - 0xcc: 0x2282, // ⊂ "propersubset" - 0xcd: 0x2286, // ⊆ "reflexsubset" - 0xce: 0x2208, // ∈ "element" - 0xcf: 0x2209, // ∉ "notelement" - 0xd0: 0x2220, // ∠ "angle" - 0xd1: 0x2207, // ∇ "gradient" - 0xd2: 0xf6da, // "registerserif" - 0xd3: 0xf6d9, // "copyrightserif" - 0xd4: 0xf6db, // "trademarkserif" - 0xd5: 0x220f, // ∏ "product" - 0xd6: 0x221a, // √ "radical" - 0xd7: 0x22c5, // ⋅ "dotmath" - 0xd8: 0x00ac, // ¬ "logicalnot" - 0xd9: 0x2227, // ∧ "logicaland" - 0xda: 0x2228, // ∨ "logicalor" - 0xdb: 0x21d4, // ⇔ "arrowdblboth" - 0xdc: 0x21d0, // ⇐ "arrowdblleft" - 0xdd: 0x21d1, // ⇑ "arrowdblup" - 0xde: 0x21d2, // ⇒ "arrowdblright" - 0xdf: 0x21d3, // ⇓ "arrowdbldown" - 0xe0: 0x25ca, // ◊ "lozenge" - 0xe1: 0x2329, // 〈 "angleleft" - 0xe2: 0xf8e8, // "registersans" - 0xe3: 0xf8e9, // "copyrightsans" - 0xe4: 0xf8ea, // "trademarksans" - 0xe5: 0x2211, // ∑ "summation" - 0xe6: 0xf8eb, // "parenlefttp" - 0xe7: 0xf8ec, // "parenleftex" - 0xe8: 0xf8ed, // "parenleftbt" - 0xe9: 0xf8ee, // "bracketlefttp" - 0xea: 0xf8ef, // "bracketleftex" - 0xeb: 0xf8f0, // "bracketleftbt" - 0xec: 0xf8f1, // "bracelefttp" - 0xed: 0xf8f2, // "braceleftmid" - 0xee: 0xf8f3, // "braceleftbt" - 0xef: 0xf8f4, // "braceex" - 0xf1: 0x232a, // 〉 "angleright" - 0xf2: 0x222b, // ∫ "integral" - 0xf3: 0x2320, // ⌠ "integraltp" - 0xf4: 0xf8f5, // "integralex" - 0xf5: 0x2321, // ⌡ "integralbt" - 0xf6: 0xf8f6, // "parenrighttp" - 0xf7: 0xf8f7, // "parenrightex" - 0xf8: 0xf8f8, // "parenrightbt" - 0xf9: 0xf8f9, // "bracketrighttp" - 0xfa: 0xf8fa, // "bracketrightex" - 0xfb: 0xf8fb, // "bracketrightbt" - 0xfc: 0xf8fc, // "bracerighttp" - 0xfd: 0xf8fd, // "bracerightmid" - 0xfe: 0xf8fe, // "bracerightbt" - }, - "ZapfDingbatsEncoding": { // 202 entries - 0x20: 0x0020, // "space" - 0x21: 0x2701, // ✁ "a1" - 0x22: 0x2702, // ✂ "a2" - 0x23: 0x2703, // ✃ "a202" - 0x24: 0x2704, // ✄ "a3" - 0x25: 0x260e, // ☎ "a4" - 0x26: 0x2706, // ✆ "a5" - 0x27: 0x2707, // ✇ "a119" - 0x28: 0x2708, // ✈ "a118" - 0x29: 0x2709, // ✉ "a117" - 0x2a: 0x261b, // ☛ "a11" - 0x2b: 0x261e, // ☞ "a12" - 0x2c: 0x270c, // ✌ "a13" - 0x2d: 0x270d, // ✍ "a14" - 0x2e: 0x270e, // ✎ "a15" - 0x2f: 0x270f, // ✏ "a16" - 0x30: 0x2710, // ✐ "a105" - 0x31: 0x2711, // ✑ "a17" - 0x32: 0x2712, // ✒ "a18" - 0x33: 0x2713, // ✓ "a19" - 0x34: 0x2714, // ✔ "a20" - 0x35: 0x2715, // ✕ "a21" - 0x36: 0x2716, // ✖ "a22" - 0x37: 0x2717, // ✗ "a23" - 0x38: 0x2718, // ✘ "a24" - 0x39: 0x2719, // ✙ "a25" - 0x3a: 0x271a, // ✚ "a26" - 0x3b: 0x271b, // ✛ "a27" - 0x3c: 0x271c, // ✜ "a28" - 0x3d: 0x271d, // ✝ "a6" - 0x3e: 0x271e, // ✞ "a7" - 0x3f: 0x271f, // ✟ "a8" - 0x40: 0x2720, // ✠ "a9" - 0x41: 0x2721, // ✡ "a10" - 0x42: 0x2722, // ✢ "a29" - 0x43: 0x2723, // ✣ "a30" - 0x44: 0x2724, // ✤ "a31" - 0x45: 0x2725, // ✥ "a32" - 0x46: 0x2726, // ✦ "a33" - 0x47: 0x2727, // ✧ "a34" - 0x48: 0x2605, // ★ "a35" - 0x49: 0x2729, // ✩ "a36" - 0x4a: 0x272a, // ✪ "a37" - 0x4b: 0x272b, // ✫ "a38" - 0x4c: 0x272c, // ✬ "a39" - 0x4d: 0x272d, // ✭ "a40" - 0x4e: 0x272e, // ✮ "a41" - 0x4f: 0x272f, // ✯ "a42" - 0x50: 0x2730, // ✰ "a43" - 0x51: 0x2731, // ✱ "a44" - 0x52: 0x2732, // ✲ "a45" - 0x53: 0x2733, // ✳ "a46" - 0x54: 0x2734, // ✴ "a47" - 0x55: 0x2735, // ✵ "a48" - 0x56: 0x2736, // ✶ "a49" - 0x57: 0x2737, // ✷ "a50" - 0x58: 0x2738, // ✸ "a51" - 0x59: 0x2739, // ✹ "a52" - 0x5a: 0x273a, // ✺ "a53" - 0x5b: 0x273b, // ✻ "a54" - 0x5c: 0x273c, // ✼ "a55" - 0x5d: 0x273d, // ✽ "a56" - 0x5e: 0x273e, // ✾ "a57" - 0x5f: 0x273f, // ✿ "a58" - 0x60: 0x2740, // ❀ "a59" - 0x61: 0x2741, // ❁ "a60" - 0x62: 0x2742, // ❂ "a61" - 0x63: 0x2743, // ❃ "a62" - 0x64: 0x2744, // ❄ "a63" - 0x65: 0x2745, // ❅ "a64" - 0x66: 0x2746, // ❆ "a65" - 0x67: 0x2747, // ❇ "a66" - 0x68: 0x2748, // ❈ "a67" - 0x69: 0x2749, // ❉ "a68" - 0x6a: 0x274a, // ❊ "a69" - 0x6b: 0x274b, // ❋ "a70" - 0x6c: 0x25cf, // ● "a71" - 0x6d: 0x274d, // ❍ "a72" - 0x6e: 0x25a0, // ■ "a73" - 0x6f: 0x274f, // ❏ "a74" - 0x70: 0x2750, // ❐ "a203" - 0x71: 0x2751, // ❑ "a75" - 0x72: 0x2752, // ❒ "a204" - 0x73: 0x25b2, // ▲ "a76" - 0x74: 0x25bc, // ▼ "a77" - 0x75: 0x25c6, // ◆ "a78" - 0x76: 0x2756, // ❖ "a79" - 0x77: 0x25d7, // ◗ "a81" - 0x78: 0x2758, // ❘ "a82" - 0x79: 0x2759, // ❙ "a83" - 0x7a: 0x275a, // ❚ "a84" - 0x7b: 0x275b, // ❛ "a97" - 0x7c: 0x275c, // ❜ "a98" - 0x7d: 0x275d, // ❝ "a99" - 0x7e: 0x275e, // ❞ "a100" - 0x80: 0xf8d7, // "a89" - 0x81: 0xf8d8, // "a90" - 0x82: 0xf8d9, // "a93" - 0x83: 0xf8da, // "a94" - 0x84: 0xf8db, // "a91" - 0x85: 0xf8dc, // "a92" - 0x86: 0xf8dd, // "a205" - 0x87: 0xf8de, // "a85" - 0x88: 0xf8df, // "a206" - 0x89: 0xf8e0, // "a86" - 0x8a: 0xf8e1, // "a87" - 0x8b: 0xf8e2, // "a88" - 0x8c: 0xf8e3, // "a95" - 0x8d: 0xf8e4, // "a96" - 0xa1: 0x2761, // ❡ "a101" - 0xa2: 0x2762, // ❢ "a102" - 0xa3: 0x2763, // ❣ "a103" - 0xa4: 0x2764, // ❤ "a104" - 0xa5: 0x2765, // ❥ "a106" - 0xa6: 0x2766, // ❦ "a107" - 0xa7: 0x2767, // ❧ "a108" - 0xa8: 0x2663, // ♣ "a112" - 0xa9: 0x2666, // ♦ "a111" - 0xaa: 0x2665, // ♥ "a110" - 0xab: 0x2660, // ♠ "a109" - 0xac: 0x2460, // ① "a120" - 0xad: 0x2461, // ② "a121" - 0xae: 0x2462, // ③ "a122" - 0xaf: 0x2463, // ④ "a123" - 0xb0: 0x2464, // ⑤ "a124" - 0xb1: 0x2465, // ⑥ "a125" - 0xb2: 0x2466, // ⑦ "a126" - 0xb3: 0x2467, // ⑧ "a127" - 0xb4: 0x2468, // ⑨ "a128" - 0xb5: 0x2469, // ⑩ "a129" - 0xb6: 0x2776, // ❶ "a130" - 0xb7: 0x2777, // ❷ "a131" - 0xb8: 0x2778, // ❸ "a132" - 0xb9: 0x2779, // ❹ "a133" - 0xba: 0x277a, // ❺ "a134" - 0xbb: 0x277b, // ❻ "a135" - 0xbc: 0x277c, // ❼ "a136" - 0xbd: 0x277d, // ❽ "a137" - 0xbe: 0x277e, // ❾ "a138" - 0xbf: 0x277f, // ❿ "a139" - 0xc0: 0x2780, // ➀ "a140" - 0xc1: 0x2781, // ➁ "a141" - 0xc2: 0x2782, // ➂ "a142" - 0xc3: 0x2783, // ➃ "a143" - 0xc4: 0x2784, // ➄ "a144" - 0xc5: 0x2785, // ➅ "a145" - 0xc6: 0x2786, // ➆ "a146" - 0xc7: 0x2787, // ➇ "a147" - 0xc8: 0x2788, // ➈ "a148" - 0xc9: 0x2789, // ➉ "a149" - 0xca: 0x278a, // ➊ "a150" - 0xcb: 0x278b, // ➋ "a151" - 0xcc: 0x278c, // ➌ "a152" - 0xcd: 0x278d, // ➍ "a153" - 0xce: 0x278e, // ➎ "a154" - 0xcf: 0x278f, // ➏ "a155" - 0xd0: 0x2790, // ➐ "a156" - 0xd1: 0x2791, // ➑ "a157" - 0xd2: 0x2792, // ➒ "a158" - 0xd3: 0x2793, // ➓ "a159" - 0xd4: 0x2794, // ➔ "a160" - 0xd5: 0x2192, // → "a161" - 0xd6: 0x2194, // ↔ "a163" - 0xd7: 0x2195, // ↕ "a164" - 0xd8: 0x2798, // ➘ "a196" - 0xd9: 0x2799, // ➙ "a165" - 0xda: 0x279a, // ➚ "a192" - 0xdb: 0x279b, // ➛ "a166" - 0xdc: 0x279c, // ➜ "a167" - 0xdd: 0x279d, // ➝ "a168" - 0xde: 0x279e, // ➞ "a169" - 0xdf: 0x279f, // ➟ "a170" - 0xe0: 0x27a0, // ➠ "a171" - 0xe1: 0x27a1, // ➡ "a172" - 0xe2: 0x27a2, // ➢ "a173" - 0xe3: 0x27a3, // ➣ "a162" - 0xe4: 0x27a4, // ➤ "a174" - 0xe5: 0x27a5, // ➥ "a175" - 0xe6: 0x27a6, // ➦ "a176" - 0xe7: 0x27a7, // ➧ "a177" - 0xe8: 0x27a8, // ➨ "a178" - 0xe9: 0x27a9, // ➩ "a179" - 0xea: 0x27aa, // ➪ "a193" - 0xeb: 0x27ab, // ➫ "a180" - 0xec: 0x27ac, // ➬ "a199" - 0xed: 0x27ad, // ➭ "a181" - 0xee: 0x27ae, // ➮ "a200" - 0xef: 0x27af, // ➯ "a182" - 0xf1: 0x27b1, // ➱ "a201" - 0xf2: 0x27b2, // ➲ "a183" - 0xf3: 0x27b3, // ➳ "a184" - 0xf4: 0x27b4, // ➴ "a197" - 0xf5: 0x27b5, // ➵ "a185" - 0xf6: 0x27b6, // ➶ "a194" - 0xf7: 0x27b7, // ➷ "a198" - 0xf8: 0x27b8, // ➸ "a186" - 0xf9: 0x27b9, // ➹ "a195" - 0xfa: 0x27ba, // ➺ "a187" - 0xfb: 0x27bb, // ➻ "a188" - 0xfc: 0x27bc, // ➼ "a189" - 0xfd: 0x27bd, // ➽ "a190" - 0xfe: 0x27be, // ➾ "a191" - }, } diff --git a/pdf/internal/textencoding/simple_symbol.go b/pdf/internal/textencoding/simple_symbol.go new file mode 100644 index 00000000..a44a2ef2 --- /dev/null +++ b/pdf/internal/textencoding/simple_symbol.go @@ -0,0 +1,228 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package textencoding + +import "sync" + +const baseSymbol = "SymbolEncoding" + +var ( + symbolOnce sync.Once + symbolRuneToChar map[rune]byte +) + +func init() { + RegisterSimpleEncoding(baseSymbol, NewSymbolEncoder) +} + +// NewSymbolEncoder returns a SimpleEncoder that implements SymbolEncoding. +func NewSymbolEncoder() SimpleEncoder { + symbolOnce.Do(initSymbol) + return &simpleEncoding{ + baseName: baseSymbol, + encode: symbolRuneToChar, + decode: symbolCharToRune, + } +} + +func initSymbol() { + symbolRuneToChar = make(map[rune]byte, len(symbolRuneToChar)) + for b, r := range symbolCharToRune { + symbolRuneToChar[r] = b + } +} + +var symbolCharToRune = map[byte]rune{ // 189 entries + 0x20: 0x0020, // "space" + 0x21: 0x0021, // ! "exclam" + 0x22: 0x2200, // ∀ "universal" + 0x23: 0x0023, // # "numbersign" + 0x24: 0x2203, // ∃ "existential" + 0x25: 0x0025, // % "percent" + 0x26: 0x0026, // & "ampersand" + 0x27: 0x220b, // ∋ "suchthat" + 0x28: 0x0028, // ( "parenleft" + 0x29: 0x0029, // ) "parenright" + 0x2a: 0x2217, // ∗ "asteriskmath" + 0x2b: 0x002b, // + "plus" + 0x2c: 0x002c, // , "comma" + 0x2d: 0x2212, // − "minus" + 0x2e: 0x002e, // . "period" + 0x2f: 0x002f, // / "slash" + 0x30: 0x0030, // 0 "zero" + 0x31: 0x0031, // 1 "one" + 0x32: 0x0032, // 2 "two" + 0x33: 0x0033, // 3 "three" + 0x34: 0x0034, // 4 "four" + 0x35: 0x0035, // 5 "five" + 0x36: 0x0036, // 6 "six" + 0x37: 0x0037, // 7 "seven" + 0x38: 0x0038, // 8 "eight" + 0x39: 0x0039, // 9 "nine" + 0x3a: 0x003a, // : "colon" + 0x3b: 0x003b, // ; "semicolon" + 0x3c: 0x003c, // < "less" + 0x3d: 0x003d, // = "equal" + 0x3e: 0x003e, // > "greater" + 0x3f: 0x003f, // ? "question" + 0x40: 0x2245, // ≅ "congruent" + 0x41: 0x0391, // Α "Alpha" + 0x42: 0x0392, // Β "Beta" + 0x43: 0x03a7, // Χ "Chi" + 0x44: 0x2206, // ∆ "Delta" + 0x45: 0x0395, // Ε "Epsilon" + 0x46: 0x03a6, // Φ "Phi" + 0x47: 0x0393, // Γ "Gamma" + 0x48: 0x0397, // Η "Eta" + 0x49: 0x0399, // Ι "Iota" + 0x4a: 0x03d1, // ϑ "theta1" + 0x4b: 0x039a, // Κ "Kappa" + 0x4c: 0x039b, // Λ "Lambda" + 0x4d: 0x039c, // Μ "Mu" + 0x4e: 0x039d, // Ν "Nu" + 0x4f: 0x039f, // Ο "Omicron" + 0x50: 0x03a0, // Π "Pi" + 0x51: 0x0398, // Θ "Theta" + 0x52: 0x03a1, // Ρ "Rho" + 0x53: 0x03a3, // Σ "Sigma" + 0x54: 0x03a4, // Τ "Tau" + 0x55: 0x03a5, // Υ "Upsilon" + 0x56: 0x03c2, // ς "sigma1" + 0x57: 0x2126, // Ω "Omega" + 0x58: 0x039e, // Ξ "Xi" + 0x59: 0x03a8, // Ψ "Psi" + 0x5a: 0x0396, // Ζ "Zeta" + 0x5b: 0x005b, // [ "bracketleft" + 0x5c: 0x2234, // ∴ "therefore" + 0x5d: 0x005d, // ] "bracketright" + 0x5e: 0x22a5, // ⊥ "perpendicular" + 0x5f: 0x005f, // _ "underscore" + 0x60: 0xf8e5, // "radicalex" + 0x61: 0x03b1, // α "alpha" + 0x62: 0x03b2, // β "beta" + 0x63: 0x03c7, // χ "chi" + 0x64: 0x03b4, // δ "delta" + 0x65: 0x03b5, // ε "epsilon" + 0x66: 0x03c6, // φ "phi" + 0x67: 0x03b3, // γ "gamma" + 0x68: 0x03b7, // η "eta" + 0x69: 0x03b9, // ι "iota" + 0x6a: 0x03d5, // ϕ "phi1" + 0x6b: 0x03ba, // κ "kappa" + 0x6c: 0x03bb, // λ "lambda" + 0x6d: 0x00b5, // µ "mu" + 0x6e: 0x03bd, // ν "nu" + 0x6f: 0x03bf, // ο "omicron" + 0x70: 0x03c0, // π "pi" + 0x71: 0x03b8, // θ "theta" + 0x72: 0x03c1, // ρ "rho" + 0x73: 0x03c3, // σ "sigma" + 0x74: 0x03c4, // τ "tau" + 0x75: 0x03c5, // υ "upsilon" + 0x76: 0x03d6, // ϖ "omega1" + 0x77: 0x03c9, // ω "omega" + 0x78: 0x03be, // ξ "xi" + 0x79: 0x03c8, // ψ "psi" + 0x7a: 0x03b6, // ζ "zeta" + 0x7b: 0x007b, // { "braceleft" + 0x7c: 0x007c, // | "bar" + 0x7d: 0x007d, // } "braceright" + 0x7e: 0x223c, // ∼ "similar" + 0xa0: 0x20ac, // € "Euro" + 0xa1: 0x03d2, // ϒ "Upsilon1" + 0xa2: 0x2032, // ′ "minute" + 0xa3: 0x2264, // ≤ "lessequal" + 0xa4: 0x2044, // ⁄ "fraction" + 0xa5: 0x221e, // ∞ "infinity" + 0xa6: 0x0192, // ƒ "florin" + 0xa7: 0x2663, // ♣ "club" + 0xa8: 0x2666, // ♦ "diamond" + 0xa9: 0x2665, // ♥ "heart" + 0xaa: 0x2660, // ♠ "spade" + 0xab: 0x2194, // ↔ "arrowboth" + 0xac: 0x2190, // ← "arrowleft" + 0xad: 0x2191, // ↑ "arrowup" + 0xae: 0x2192, // → "arrowright" + 0xaf: 0x2193, // ↓ "arrowdown" + 0xb0: 0x00b0, // ° "degree" + 0xb1: 0x00b1, // ± "plusminus" + 0xb2: 0x2033, // ″ "second" + 0xb3: 0x2265, // ≥ "greaterequal" + 0xb4: 0x00d7, // × "multiply" + 0xb5: 0x221d, // ∝ "proportional" + 0xb6: 0x2202, // ∂ "partialdiff" + 0xb7: 0x2022, // • "bullet" + 0xb8: 0x00f7, // ÷ "divide" + 0xb9: 0x2260, // ≠ "notequal" + 0xba: 0x2261, // ≡ "equivalence" + 0xbb: 0x2248, // ≈ "approxequal" + 0xbc: 0x2026, // … "ellipsis" + 0xbd: 0xf8e6, // "arrowvertex" + 0xbe: 0xf8e7, // "arrowhorizex" + 0xbf: 0x21b5, // ↵ "carriagereturn" + 0xc0: 0x2135, // ℵ "aleph" + 0xc1: 0x2111, // ℑ "Ifraktur" + 0xc2: 0x211c, // ℜ "Rfraktur" + 0xc3: 0x2118, // ℘ "weierstrass" + 0xc4: 0x2297, // ⊗ "circlemultiply" + 0xc5: 0x2295, // ⊕ "circleplus" + 0xc6: 0x2205, // ∅ "emptyset" + 0xc7: 0x2229, // ∩ "intersection" + 0xc8: 0x222a, // ∪ "union" + 0xc9: 0x2283, // ⊃ "propersuperset" + 0xca: 0x2287, // ⊇ "reflexsuperset" + 0xcb: 0x2284, // ⊄ "notsubset" + 0xcc: 0x2282, // ⊂ "propersubset" + 0xcd: 0x2286, // ⊆ "reflexsubset" + 0xce: 0x2208, // ∈ "element" + 0xcf: 0x2209, // ∉ "notelement" + 0xd0: 0x2220, // ∠ "angle" + 0xd1: 0x2207, // ∇ "gradient" + 0xd2: 0xf6da, // "registerserif" + 0xd3: 0xf6d9, // "copyrightserif" + 0xd4: 0xf6db, // "trademarkserif" + 0xd5: 0x220f, // ∏ "product" + 0xd6: 0x221a, // √ "radical" + 0xd7: 0x22c5, // ⋅ "dotmath" + 0xd8: 0x00ac, // ¬ "logicalnot" + 0xd9: 0x2227, // ∧ "logicaland" + 0xda: 0x2228, // ∨ "logicalor" + 0xdb: 0x21d4, // ⇔ "arrowdblboth" + 0xdc: 0x21d0, // ⇐ "arrowdblleft" + 0xdd: 0x21d1, // ⇑ "arrowdblup" + 0xde: 0x21d2, // ⇒ "arrowdblright" + 0xdf: 0x21d3, // ⇓ "arrowdbldown" + 0xe0: 0x25ca, // ◊ "lozenge" + 0xe1: 0x2329, // 〈 "angleleft" + 0xe2: 0xf8e8, // "registersans" + 0xe3: 0xf8e9, // "copyrightsans" + 0xe4: 0xf8ea, // "trademarksans" + 0xe5: 0x2211, // ∑ "summation" + 0xe6: 0xf8eb, // "parenlefttp" + 0xe7: 0xf8ec, // "parenleftex" + 0xe8: 0xf8ed, // "parenleftbt" + 0xe9: 0xf8ee, // "bracketlefttp" + 0xea: 0xf8ef, // "bracketleftex" + 0xeb: 0xf8f0, // "bracketleftbt" + 0xec: 0xf8f1, // "bracelefttp" + 0xed: 0xf8f2, // "braceleftmid" + 0xee: 0xf8f3, // "braceleftbt" + 0xef: 0xf8f4, // "braceex" + 0xf1: 0x232a, // 〉 "angleright" + 0xf2: 0x222b, // ∫ "integral" + 0xf3: 0x2320, // ⌠ "integraltp" + 0xf4: 0xf8f5, // "integralex" + 0xf5: 0x2321, // ⌡ "integralbt" + 0xf6: 0xf8f6, // "parenrighttp" + 0xf7: 0xf8f7, // "parenrightex" + 0xf8: 0xf8f8, // "parenrightbt" + 0xf9: 0xf8f9, // "bracketrighttp" + 0xfa: 0xf8fa, // "bracketrightex" + 0xfb: 0xf8fb, // "bracketrightbt" + 0xfc: 0xf8fc, // "bracerighttp" + 0xfd: 0xf8fd, // "bracerightmid" + 0xfe: 0xf8fe, // "bracerightbt" +} diff --git a/pdf/internal/textencoding/simple_winansi.go b/pdf/internal/textencoding/simple_winansi.go new file mode 100644 index 00000000..a4e8ddad --- /dev/null +++ b/pdf/internal/textencoding/simple_winansi.go @@ -0,0 +1,73 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package textencoding + +import ( + "sync" + + "golang.org/x/text/encoding/charmap" +) + +const baseWinAnsi = "WinAnsiEncoding" + +func init() { + RegisterSimpleEncoding(baseWinAnsi, NewWinAnsiEncoder) +} + +var ( + winAnsiOnce sync.Once + winAnsiCharToRune map[byte]rune + winAnsiRuneToChar map[rune]byte +) + +// NewWinAnsiEncoder returns a simpleEncoder that implements WinAnsiEncoding. +func NewWinAnsiEncoder() SimpleEncoder { + winAnsiOnce.Do(initWinAnsi) + return &simpleEncoding{ + baseName: baseWinAnsi, + encode: winAnsiRuneToChar, + decode: winAnsiCharToRune, + } +} + +func initWinAnsi() { + winAnsiCharToRune = make(map[byte]rune, 256) + winAnsiRuneToChar = make(map[rune]byte, 256) + + // WinAnsiEncoding is also known as CP1252 + enc := charmap.Windows1252 + + // in WinAnsiEncoding, comparing to CP1252, all unused and + // non-visual codes are replaced with '•' character + const bullet = '•' + replace := map[byte]rune{ + 127: bullet, // DEL + + // unused + 129: bullet, + 141: bullet, + 143: bullet, + 144: bullet, + 157: bullet, + + // typographically similar + 160: ' ', // non-breaking space -> space + 173: '-', // soft hyphen -> hyphen + } + + for i := int(' '); i < 256; i++ { + b := byte(i) + r := enc.DecodeByte(b) + + // don't use replace map. since it creates duplicates + winAnsiRuneToChar[r] = b + + if rp, ok := replace[b]; ok { + r = rp + } + winAnsiCharToRune[b] = r + } +} diff --git a/pdf/internal/textencoding/winansi_test.go b/pdf/internal/textencoding/simple_winansi_test.go similarity index 94% rename from pdf/internal/textencoding/winansi_test.go rename to pdf/internal/textencoding/simple_winansi_test.go index 3f2daa6c..0226c88b 100644 --- a/pdf/internal/textencoding/winansi_test.go +++ b/pdf/internal/textencoding/simple_winansi_test.go @@ -8,7 +8,7 @@ package textencoding import "testing" func TestWinAnsiEncoder(t *testing.T) { - enc := NewWinAnsiTextEncoder() + enc := NewWinAnsiEncoder() glyph, found := enc.CharcodeToGlyph(32) if !found || glyph != "space" { diff --git a/pdf/internal/textencoding/simple_zapfdingbats.go b/pdf/internal/textencoding/simple_zapfdingbats.go new file mode 100644 index 00000000..6eae8e6b --- /dev/null +++ b/pdf/internal/textencoding/simple_zapfdingbats.go @@ -0,0 +1,241 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package textencoding + +import "sync" + +const baseZapfDingbats = "ZapfDingbatsEncoding" + +var ( + zapfDingbatsOnce sync.Once + zapfDingbatsRuneToChar map[rune]byte +) + +func init() { + RegisterSimpleEncoding(baseZapfDingbats, NewZapfDingbatsEncoder) +} + +// NewZapfDingbatsEncoder returns a SimpleEncoder that implements ZapfDingbatsEncoding. +func NewZapfDingbatsEncoder() SimpleEncoder { + zapfDingbatsOnce.Do(initZapfDingbats) + return &simpleEncoding{ + baseName: baseZapfDingbats, + encode: zapfDingbatsRuneToChar, + decode: zapfDingbatsCharToRune, + } +} + +func initZapfDingbats() { + zapfDingbatsRuneToChar = make(map[rune]byte, len(zapfDingbatsRuneToChar)) + for b, r := range zapfDingbatsCharToRune { + zapfDingbatsRuneToChar[r] = b + } +} + +var zapfDingbatsCharToRune = map[byte]rune{ // 202 entries + 0x20: 0x0020, // "space" + 0x21: 0x2701, // ✁ "a1" + 0x22: 0x2702, // ✂ "a2" + 0x23: 0x2703, // ✃ "a202" + 0x24: 0x2704, // ✄ "a3" + 0x25: 0x260e, // ☎ "a4" + 0x26: 0x2706, // ✆ "a5" + 0x27: 0x2707, // ✇ "a119" + 0x28: 0x2708, // ✈ "a118" + 0x29: 0x2709, // ✉ "a117" + 0x2a: 0x261b, // ☛ "a11" + 0x2b: 0x261e, // ☞ "a12" + 0x2c: 0x270c, // ✌ "a13" + 0x2d: 0x270d, // ✍ "a14" + 0x2e: 0x270e, // ✎ "a15" + 0x2f: 0x270f, // ✏ "a16" + 0x30: 0x2710, // ✐ "a105" + 0x31: 0x2711, // ✑ "a17" + 0x32: 0x2712, // ✒ "a18" + 0x33: 0x2713, // ✓ "a19" + 0x34: 0x2714, // ✔ "a20" + 0x35: 0x2715, // ✕ "a21" + 0x36: 0x2716, // ✖ "a22" + 0x37: 0x2717, // ✗ "a23" + 0x38: 0x2718, // ✘ "a24" + 0x39: 0x2719, // ✙ "a25" + 0x3a: 0x271a, // ✚ "a26" + 0x3b: 0x271b, // ✛ "a27" + 0x3c: 0x271c, // ✜ "a28" + 0x3d: 0x271d, // ✝ "a6" + 0x3e: 0x271e, // ✞ "a7" + 0x3f: 0x271f, // ✟ "a8" + 0x40: 0x2720, // ✠ "a9" + 0x41: 0x2721, // ✡ "a10" + 0x42: 0x2722, // ✢ "a29" + 0x43: 0x2723, // ✣ "a30" + 0x44: 0x2724, // ✤ "a31" + 0x45: 0x2725, // ✥ "a32" + 0x46: 0x2726, // ✦ "a33" + 0x47: 0x2727, // ✧ "a34" + 0x48: 0x2605, // ★ "a35" + 0x49: 0x2729, // ✩ "a36" + 0x4a: 0x272a, // ✪ "a37" + 0x4b: 0x272b, // ✫ "a38" + 0x4c: 0x272c, // ✬ "a39" + 0x4d: 0x272d, // ✭ "a40" + 0x4e: 0x272e, // ✮ "a41" + 0x4f: 0x272f, // ✯ "a42" + 0x50: 0x2730, // ✰ "a43" + 0x51: 0x2731, // ✱ "a44" + 0x52: 0x2732, // ✲ "a45" + 0x53: 0x2733, // ✳ "a46" + 0x54: 0x2734, // ✴ "a47" + 0x55: 0x2735, // ✵ "a48" + 0x56: 0x2736, // ✶ "a49" + 0x57: 0x2737, // ✷ "a50" + 0x58: 0x2738, // ✸ "a51" + 0x59: 0x2739, // ✹ "a52" + 0x5a: 0x273a, // ✺ "a53" + 0x5b: 0x273b, // ✻ "a54" + 0x5c: 0x273c, // ✼ "a55" + 0x5d: 0x273d, // ✽ "a56" + 0x5e: 0x273e, // ✾ "a57" + 0x5f: 0x273f, // ✿ "a58" + 0x60: 0x2740, // ❀ "a59" + 0x61: 0x2741, // ❁ "a60" + 0x62: 0x2742, // ❂ "a61" + 0x63: 0x2743, // ❃ "a62" + 0x64: 0x2744, // ❄ "a63" + 0x65: 0x2745, // ❅ "a64" + 0x66: 0x2746, // ❆ "a65" + 0x67: 0x2747, // ❇ "a66" + 0x68: 0x2748, // ❈ "a67" + 0x69: 0x2749, // ❉ "a68" + 0x6a: 0x274a, // ❊ "a69" + 0x6b: 0x274b, // ❋ "a70" + 0x6c: 0x25cf, // ● "a71" + 0x6d: 0x274d, // ❍ "a72" + 0x6e: 0x25a0, // ■ "a73" + 0x6f: 0x274f, // ❏ "a74" + 0x70: 0x2750, // ❐ "a203" + 0x71: 0x2751, // ❑ "a75" + 0x72: 0x2752, // ❒ "a204" + 0x73: 0x25b2, // ▲ "a76" + 0x74: 0x25bc, // ▼ "a77" + 0x75: 0x25c6, // ◆ "a78" + 0x76: 0x2756, // ❖ "a79" + 0x77: 0x25d7, // ◗ "a81" + 0x78: 0x2758, // ❘ "a82" + 0x79: 0x2759, // ❙ "a83" + 0x7a: 0x275a, // ❚ "a84" + 0x7b: 0x275b, // ❛ "a97" + 0x7c: 0x275c, // ❜ "a98" + 0x7d: 0x275d, // ❝ "a99" + 0x7e: 0x275e, // ❞ "a100" + 0x80: 0xf8d7, // "a89" + 0x81: 0xf8d8, // "a90" + 0x82: 0xf8d9, // "a93" + 0x83: 0xf8da, // "a94" + 0x84: 0xf8db, // "a91" + 0x85: 0xf8dc, // "a92" + 0x86: 0xf8dd, // "a205" + 0x87: 0xf8de, // "a85" + 0x88: 0xf8df, // "a206" + 0x89: 0xf8e0, // "a86" + 0x8a: 0xf8e1, // "a87" + 0x8b: 0xf8e2, // "a88" + 0x8c: 0xf8e3, // "a95" + 0x8d: 0xf8e4, // "a96" + 0xa1: 0x2761, // ❡ "a101" + 0xa2: 0x2762, // ❢ "a102" + 0xa3: 0x2763, // ❣ "a103" + 0xa4: 0x2764, // ❤ "a104" + 0xa5: 0x2765, // ❥ "a106" + 0xa6: 0x2766, // ❦ "a107" + 0xa7: 0x2767, // ❧ "a108" + 0xa8: 0x2663, // ♣ "a112" + 0xa9: 0x2666, // ♦ "a111" + 0xaa: 0x2665, // ♥ "a110" + 0xab: 0x2660, // ♠ "a109" + 0xac: 0x2460, // ① "a120" + 0xad: 0x2461, // ② "a121" + 0xae: 0x2462, // ③ "a122" + 0xaf: 0x2463, // ④ "a123" + 0xb0: 0x2464, // ⑤ "a124" + 0xb1: 0x2465, // ⑥ "a125" + 0xb2: 0x2466, // ⑦ "a126" + 0xb3: 0x2467, // ⑧ "a127" + 0xb4: 0x2468, // ⑨ "a128" + 0xb5: 0x2469, // ⑩ "a129" + 0xb6: 0x2776, // ❶ "a130" + 0xb7: 0x2777, // ❷ "a131" + 0xb8: 0x2778, // ❸ "a132" + 0xb9: 0x2779, // ❹ "a133" + 0xba: 0x277a, // ❺ "a134" + 0xbb: 0x277b, // ❻ "a135" + 0xbc: 0x277c, // ❼ "a136" + 0xbd: 0x277d, // ❽ "a137" + 0xbe: 0x277e, // ❾ "a138" + 0xbf: 0x277f, // ❿ "a139" + 0xc0: 0x2780, // ➀ "a140" + 0xc1: 0x2781, // ➁ "a141" + 0xc2: 0x2782, // ➂ "a142" + 0xc3: 0x2783, // ➃ "a143" + 0xc4: 0x2784, // ➄ "a144" + 0xc5: 0x2785, // ➅ "a145" + 0xc6: 0x2786, // ➆ "a146" + 0xc7: 0x2787, // ➇ "a147" + 0xc8: 0x2788, // ➈ "a148" + 0xc9: 0x2789, // ➉ "a149" + 0xca: 0x278a, // ➊ "a150" + 0xcb: 0x278b, // ➋ "a151" + 0xcc: 0x278c, // ➌ "a152" + 0xcd: 0x278d, // ➍ "a153" + 0xce: 0x278e, // ➎ "a154" + 0xcf: 0x278f, // ➏ "a155" + 0xd0: 0x2790, // ➐ "a156" + 0xd1: 0x2791, // ➑ "a157" + 0xd2: 0x2792, // ➒ "a158" + 0xd3: 0x2793, // ➓ "a159" + 0xd4: 0x2794, // ➔ "a160" + 0xd5: 0x2192, // → "a161" + 0xd6: 0x2194, // ↔ "a163" + 0xd7: 0x2195, // ↕ "a164" + 0xd8: 0x2798, // ➘ "a196" + 0xd9: 0x2799, // ➙ "a165" + 0xda: 0x279a, // ➚ "a192" + 0xdb: 0x279b, // ➛ "a166" + 0xdc: 0x279c, // ➜ "a167" + 0xdd: 0x279d, // ➝ "a168" + 0xde: 0x279e, // ➞ "a169" + 0xdf: 0x279f, // ➟ "a170" + 0xe0: 0x27a0, // ➠ "a171" + 0xe1: 0x27a1, // ➡ "a172" + 0xe2: 0x27a2, // ➢ "a173" + 0xe3: 0x27a3, // ➣ "a162" + 0xe4: 0x27a4, // ➤ "a174" + 0xe5: 0x27a5, // ➥ "a175" + 0xe6: 0x27a6, // ➦ "a176" + 0xe7: 0x27a7, // ➧ "a177" + 0xe8: 0x27a8, // ➨ "a178" + 0xe9: 0x27a9, // ➩ "a179" + 0xea: 0x27aa, // ➪ "a193" + 0xeb: 0x27ab, // ➫ "a180" + 0xec: 0x27ac, // ➬ "a199" + 0xed: 0x27ad, // ➭ "a181" + 0xee: 0x27ae, // ➮ "a200" + 0xef: 0x27af, // ➯ "a182" + 0xf1: 0x27b1, // ➱ "a201" + 0xf2: 0x27b2, // ➲ "a183" + 0xf3: 0x27b3, // ➳ "a184" + 0xf4: 0x27b4, // ➴ "a197" + 0xf5: 0x27b5, // ➵ "a185" + 0xf6: 0x27b6, // ➶ "a194" + 0xf7: 0x27b7, // ➷ "a198" + 0xf8: 0x27b8, // ➸ "a186" + 0xf9: 0x27b9, // ➹ "a195" + 0xfa: 0x27ba, // ➺ "a187" + 0xfb: 0x27bb, // ➻ "a188" + 0xfc: 0x27bc, // ➼ "a189" + 0xfd: 0x27bd, // ➽ "a190" + 0xfe: 0x27be, // ➾ "a191" +} diff --git a/pdf/internal/textencoding/symbol.go b/pdf/internal/textencoding/symbol.go deleted file mode 100644 index 3cc601eb..00000000 --- a/pdf/internal/textencoding/symbol.go +++ /dev/null @@ -1,12 +0,0 @@ -/* - * This file is subject to the terms and conditions defined in - * file 'LICENSE.md', which is part of this source code package. - */ - -package textencoding - -// NewSymbolEncoder returns a TextEncoder that implements SymbolEncoding. -func NewSymbolEncoder() TextEncoder { - enc, _ := NewSimpleTextEncoder("SymbolEncoding", nil) - return enc -} diff --git a/pdf/internal/textencoding/winansi.go b/pdf/internal/textencoding/winansi.go deleted file mode 100644 index a52ee401..00000000 --- a/pdf/internal/textencoding/winansi.go +++ /dev/null @@ -1,136 +0,0 @@ -/* - * This file is subject to the terms and conditions defined in - * file 'LICENSE.md', which is part of this source code package. - */ - -package textencoding - -import ( - "bytes" - - "github.com/unidoc/unidoc/pdf/core" - "golang.org/x/text/encoding/charmap" -) - -const baseWinAnsi = "WinAnsiEncoding" - -// NewWinAnsiTextEncoder returns a simpleEncoder that implements WinAnsiEncoding. -func NewWinAnsiTextEncoder() SimpleEncoder { - return &charmapEncoding{ - baseName: baseWinAnsi, - charmap: charmap.Windows1252, - } -} - -var _ SimpleEncoder = (*charmapEncoding)(nil) - -type charmapEncoding struct { - baseName string - charmap *charmap.Charmap -} - -// String returns a text representation of encoding. -func (enc *charmapEncoding) String() string { - return "charmapEncoding(" + enc.baseName + ")" -} - -// BaseName returns a base name of the encoder, as specified in the PDF spec. -func (enc *charmapEncoding) BaseName() string { - return enc.baseName -} - -// Encode converts a Go unicode string `raw` to a PDF encoded string. -func (enc *charmapEncoding) Encode(raw string) []byte { - runes := []rune(raw) - buf := bytes.NewBuffer(nil) - buf.Grow(len(runes)) - for _, r := range runes { - b, ok := enc.charmap.EncodeRune(r) - if !ok { - b, _ = enc.charmap.EncodeRune(MissingCodeRune) - } - buf.WriteByte(b) - } - return buf.Bytes() -} - -func (enc *charmapEncoding) Charcodes() []CharCode { - codes := make([]CharCode, 0, 256) - for i := 0; i < 256; i++ { - code := CharCode(i) - if _, ok := enc.CharcodeToRune(code); ok { - codes = append(codes, code) - } - } - return codes -} - -func (enc *charmapEncoding) RuneToCharcode(r rune) (CharCode, bool) { - b, ok := enc.charmap.EncodeRune(r) - return CharCode(b), ok -} - -func (enc *charmapEncoding) CharcodeToRune(code CharCode) (rune, bool) { - if code > 0xff { - return MissingCodeRune, false - } - switch enc.baseName { - case "WinAnsiEncoding": - // WinANSI in the old implementation remaps few characters - - // everything below 20 (space) is "missing" - if code < 0x20 { - return MissingCodeRune, false - } - - const bullet = '•' - switch code { - - // in WinAnsiEncoding all unused and non-visual codes map to the '•' character - case 127: // DEL - return bullet, true - case 129, 141, 143, 144, 157: // unused in WinANSI - return bullet, true - - // typographically similar - case 160: // non-breaking space -> space - return ' ', true - case 173: // soft hyphen -> hyphen - return '-', true - } - } - r := enc.charmap.DecodeByte(byte(code)) - return r, r != MissingCodeRune -} - -func (enc *charmapEncoding) CharcodeToGlyph(code CharCode) (GlyphName, bool) { - // TODO(dennwc): only redirects the call - remove from the interface - r, ok := enc.CharcodeToRune(code) - if !ok { - return "", false - } - return enc.RuneToGlyph(r) -} - -func (enc *charmapEncoding) GlyphToCharcode(glyph GlyphName) (CharCode, bool) { - // TODO(dennwc): only redirects the call - remove from the interface - r, ok := GlyphToRune(glyph) - if !ok { - return MissingCodeRune, false - } - return enc.RuneToCharcode(r) -} - -func (enc *charmapEncoding) RuneToGlyph(r rune) (GlyphName, bool) { - // TODO(dennwc): should be in the font interface - return runeToGlyph(r, glyphlistRuneToGlyphMap) -} - -func (enc *charmapEncoding) GlyphToRune(glyph GlyphName) (rune, bool) { - // TODO(dennwc): should be in the font interface - return glyphToRune(glyph, glyphlistGlyphToRuneMap) -} - -func (enc *charmapEncoding) ToPdfObject() core.PdfObject { - return core.MakeName(enc.baseName) -} diff --git a/pdf/internal/textencoding/zapfdingbats.go b/pdf/internal/textencoding/zapfdingbats.go deleted file mode 100644 index 64153a96..00000000 --- a/pdf/internal/textencoding/zapfdingbats.go +++ /dev/null @@ -1,12 +0,0 @@ -/* - * This file is subject to the terms and conditions defined in - * file 'LICENSE.md', which is part of this source code package. - */ - -package textencoding - -// NewZapfDingbatsEncoder returns a TextEncoder that implements ZapfDingbatsEncoding. -func NewZapfDingbatsEncoder() TextEncoder { - enc, _ := NewSimpleTextEncoder("ZapfDingbatsEncoding", nil) - return enc -} diff --git a/pdf/model/font_simple.go b/pdf/model/font_simple.go index fa7ccc07..12725935 100644 --- a/pdf/model/font_simple.go +++ b/pdf/model/font_simple.go @@ -402,7 +402,7 @@ func NewPdfFontFromTTFFile(filePath string) (*PdfFont, error) { }, } - truefont.encoder = textencoding.NewWinAnsiTextEncoder() + truefont.encoder = textencoding.NewWinAnsiEncoder() truefont.basefont = ttf.PostScriptName truefont.FirstChar = core.MakeInteger(int64(minCode)) diff --git a/pdf/model/font_test.go b/pdf/model/font_test.go index 53cae596..ddf3705b 100644 --- a/pdf/model/font_test.go +++ b/pdf/model/font_test.go @@ -119,7 +119,8 @@ func TestNewStandard14Font(t *testing.T) { "Courier": { subtype: "Type1", basefont: "Courier", - CharMetrics: fonts.CharMetrics{Wx: 600}}, + CharMetrics: fonts.CharMetrics{Wx: 600}, + }, } for in, expect := range tests { @@ -769,30 +770,31 @@ endobj 255: `/ydieresis`, } - for ccode := textencoding.CharCode(32); ccode < 255; ccode++ { - fontglyph, has := font.Encoder().CharcodeToGlyph(ccode) + enc := font.Encoder() + for code := textencoding.CharCode(32); code < 255; code++ { + fontglyph, has := enc.CharcodeToGlyph(code) if !has { - baseglyph, bad := baseEncoding.CharcodeToGlyph(ccode) + baseglyph, bad := baseEncoding.CharcodeToGlyph(code) if bad { - t.Fatalf("font not having glyph for char code %d - whereas base encoding had '%s'", ccode, baseglyph) + t.Fatalf("font not having glyph for char code %d - whereas base encoding had '%s'", code, baseglyph) } } // Check if in differencesmap first. - glyph, has := differencesMap[ccode] + glyph, has := differencesMap[code] if has { glyph = textencoding.GlyphName(strings.Trim(string(glyph), `/`)) if glyph != fontglyph { - t.Fatalf("Mismatch for char code %d, font has: %s and expected is: %s (differences)", ccode, fontglyph, glyph) + t.Fatalf("Mismatch for char code %d, font has: %s and expected is: %s (differences)", code, fontglyph, glyph) } continue } // If not in differences, should be according to StandardEncoding (base). - glyph, has = baseEncoding.CharcodeToGlyph(ccode) + glyph, has = baseEncoding.CharcodeToGlyph(code) if has && glyph != fontglyph { - t.Fatalf("Mismatch for char code %d (%X), font has: %s and expected is: %s (StandardEncoding)", ccode, ccode, fontglyph, glyph) + t.Fatalf("Mismatch for char code %d (%X), font has: %s and expected is: %s (StandardEncoding)", code, code, fontglyph, glyph) } } diff --git a/pdf/model/fonts/std.go b/pdf/model/fonts/std.go index 54319a77..eb9ebcfd 100644 --- a/pdf/model/fonts/std.go +++ b/pdf/model/fonts/std.go @@ -78,7 +78,7 @@ type StdFont struct { // NewStdFont returns a new instance of the font with a default encoder set (WinAnsiEncoding). func NewStdFont(desc Descriptor, metrics map[GlyphName]CharMetrics) StdFont { - enc := textencoding.NewWinAnsiTextEncoder() // Default + enc := textencoding.NewWinAnsiEncoder() // Default return NewStdFontWithEncoding(desc, metrics, enc) } diff --git a/pdf/model/functions_test.go b/pdf/model/functions_test.go index f636674d..b962955f 100644 --- a/pdf/model/functions_test.go +++ b/pdf/model/functions_test.go @@ -5,7 +5,6 @@ package model import ( - "fmt" "math" "testing" @@ -84,8 +83,8 @@ endobj t.Errorf("Failed: %v", err) return } - fmt.Println(testcase) - fmt.Println(outputs) + t.Log(testcase) + t.Log(outputs) if len(outputs) != len(testcase.Expected) { t.Errorf("Failed, output length mismatch") @@ -99,5 +98,5 @@ endobj } } - fmt.Printf("%s", stream.Stream) + t.Logf("%s", stream.Stream) } From 2e820f3ac517a6669d8f1a1e75e426b00ec84375 Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Tue, 1 Jan 2019 22:15:22 +0200 Subject: [PATCH 06/11] textencoding: remove unused rune <-> glyph methods from the interface --- pdf/creator/paragraph.go | 32 ++++----- pdf/creator/styled_paragraph.go | 11 +-- pdf/internal/textencoding/differences.go | 25 ------- pdf/internal/textencoding/encoder.go | 28 -------- pdf/internal/textencoding/glyph_test.go | 30 ++++++++ pdf/internal/textencoding/simple.go | 12 +--- pdf/internal/textencoding/simple_test.go | 68 ------------------- .../textencoding/simple_winansi_test.go | 2 +- pdf/internal/textencoding/truetype.go | 32 --------- pdf/internal/textencoding/utils.go | 28 -------- pdf/model/font.go | 29 +++++--- pdf/model/fonts/std.go | 7 +- 12 files changed, 78 insertions(+), 226 deletions(-) delete mode 100644 pdf/internal/textencoding/simple_test.go diff --git a/pdf/creator/paragraph.go b/pdf/creator/paragraph.go index 8419907b..10bee596 100644 --- a/pdf/creator/paragraph.go +++ b/pdf/creator/paragraph.go @@ -7,6 +7,7 @@ package creator import ( "errors" + "fmt" "strconv" "github.com/unidoc/unidoc/common" @@ -496,31 +497,30 @@ func drawParagraphOnBlock(blk *Block, p *Paragraph, ctx DrawContext) (DrawContex shift := (p.wrapWidth*1000.0 - textWidth) / p.fontSize objs = append(objs, core.MakeFloat(-shift)) } + enc := p.textFont.Encoder() var encoded []byte isCID := p.textFont.IsCID() for _, r := range runes { - glyph, ok := p.textFont.Encoder().RuneToGlyph(r) - if !ok { - common.Log.Debug("Rune 0x%x not supported by text encoder", r) - return ctx, errors.New("unsupported rune in text encoding") - } - - if glyph == "space" { // TODO: What about \t and other spaces. + if r == ' ' { // TODO: What about \t and other spaces. if len(encoded) > 0 { objs = append(objs, core.MakeStringFromBytes(encoded)) - encoded = []byte{} + encoded = nil } objs = append(objs, core.MakeFloat(-spaceWidth)) } else { - code, ok := p.textFont.Encoder().RuneToCharcode(r) - if ok { - if isCID { - hi, lo := code>>8, code&0xff - encoded = append(encoded, byte(hi), byte(lo)) - } else { - encoded = append(encoded, byte(code)) - } + code, ok := enc.RuneToCharcode(r) + if !ok { + err := fmt.Errorf("unsupported rune in text encoding: %#x (%c)", r, r) + common.Log.Debug("%s", err) + return ctx, err + } + // TODO(dennwc): this should not be done manually; encoder should do this + if isCID { + hi, lo := code>>8, code&0xff + encoded = append(encoded, byte(hi), byte(lo)) + } else { + encoded = append(encoded, byte(code)) } } } diff --git a/pdf/creator/styled_paragraph.go b/pdf/creator/styled_paragraph.go index 70a462e9..3ca15aa6 100644 --- a/pdf/creator/styled_paragraph.go +++ b/pdf/creator/styled_paragraph.go @@ -659,16 +659,11 @@ func drawStyledParagraphOnBlock(blk *Block, p *StyledParagraph, ctx DrawContext) fontSize = style.FontSize spaceWidth = spaceMetrics.Wx } + enc := style.Font.Encoder() var encStr []byte for _, rn := range chunk.Text { - glyph, found := style.Font.Encoder().RuneToGlyph(rn) - if !found { - common.Log.Debug("Rune 0x%x not supported by text encoder", r) - return ctx, errors.New("unsupported rune in text encoding") - } - - if glyph == "space" { + if rn == ' ' { if len(encStr) > 0 { cc.Add_rg(r, g, b). Add_Tf(fonts[idx][k], style.FontSize). @@ -684,7 +679,7 @@ func drawStyledParagraphOnBlock(blk *Block, p *StyledParagraph, ctx DrawContext) chunkWidths[k] += spaceWidth * fontSize } else { - encStr = append(encStr, style.Font.Encoder().Encode(string(rn))...) + encStr = append(encStr, enc.Encode(string(rn))...) } } diff --git a/pdf/internal/textencoding/differences.go b/pdf/internal/textencoding/differences.go index aeb6ad58..bab1f309 100644 --- a/pdf/internal/textencoding/differences.go +++ b/pdf/internal/textencoding/differences.go @@ -187,31 +187,6 @@ func (enc *differencesEncoding) GlyphToCharcode(glyph GlyphName) (CharCode, bool return enc.RuneToCharcode(r) } -// RuneToGlyph returns the glyph corresponding to rune `r`. -// The bool return flag is true if there was a match, and false otherwise. -func (enc *differencesEncoding) RuneToGlyph(r rune) (GlyphName, bool) { - // TODO(dennwc): should be in the font interface - code, ok := enc.RuneToCharcode(r) - if !ok { - return "", false - } - if glyph, ok := enc.differences[code]; ok { - return glyph, true - } - return enc.base.RuneToGlyph(r) -} - -// GlyphToRune returns the rune corresponding to glyph `glyph`. -// The bool return flag is true if there was a match, and false otherwise. -func (enc *differencesEncoding) GlyphToRune(glyph GlyphName) (rune, bool) { - // TODO(dennwc): should be in the font interface - code, ok := enc.GlyphToCharcode(glyph) - if !ok { - return MissingCodeRune, false - } - return enc.CharcodeToRune(code) -} - // ToPdfObject returns the encoding as a PdfObject. func (enc *differencesEncoding) ToPdfObject() core.PdfObject { dict := core.MakeDict() diff --git a/pdf/internal/textencoding/encoder.go b/pdf/internal/textencoding/encoder.go index e94f7bcd..dd0b5c56 100644 --- a/pdf/internal/textencoding/encoder.go +++ b/pdf/internal/textencoding/encoder.go @@ -44,14 +44,6 @@ type TextEncoder interface { // This is usually implemented as CharcodeToGlyph->GlyphToRune CharcodeToRune(code CharCode) (rune, bool) - // RuneToGlyph returns the glyph name for rune `r`. - // The bool return flag is true if there was a match, and false otherwise. - RuneToGlyph(r rune) (GlyphName, bool) - - // GlyphToRune returns the rune corresponding to glyph name `glyph`. - // The bool return flag is true if there was a match, and false otherwise. - GlyphToRune(glyph GlyphName) (rune, bool) - // ToPdfObject returns a PDF Object that represents the encoding. ToPdfObject() core.PdfObject } @@ -93,23 +85,3 @@ func encodeString16bit(enc TextEncoder, raw string) []byte { } return encoded } - -// doRuneToCharcode converts rune `r` to a PDF character code. -// The bool return flag is true if there was a match, and false otherwise. -func doRuneToCharcode(enc TextEncoder, r rune) (CharCode, bool) { - g, ok := enc.RuneToGlyph(r) - if !ok { - return 0, false - } - return enc.GlyphToCharcode(g) -} - -// doCharcodeToRune converts PDF character code `code` to a rune. -// The bool return flag is true if there was a match, and false otherwise. -func doCharcodeToRune(enc TextEncoder, code CharCode) (rune, bool) { - g, ok := enc.CharcodeToGlyph(code) - if !ok { - return 0, false - } - return enc.GlyphToRune(g) -} diff --git a/pdf/internal/textencoding/glyph_test.go b/pdf/internal/textencoding/glyph_test.go index bebdad86..34ce2e9c 100644 --- a/pdf/internal/textencoding/glyph_test.go +++ b/pdf/internal/textencoding/glyph_test.go @@ -25,3 +25,33 @@ func TestGlypRune(t *testing.T) { } } } + +// TestRuneToGlyph checks for known glyph->rune mappings. +func TestRuneToGlyph(t *testing.T) { + runes := []rune("₂₃₄₅◊fl˝ˇ¾Ðí©ºªı„δ∂ℵ⌡×÷®Ï☎①➔➨") + glyphs := []GlyphName{ + "twoinferior", "threeinferior", "fourinferior", "fiveinferior", + "lozenge", "fl", "hungarumlaut", "caron", + "threequarters", "Eth", "iacute", "copyright", + "ordmasculine", "ordfeminine", "dotlessi", "quotedblbase", + "delta", "partialdiff", "aleph", "integralbt", + "multiply", "divide", "registered", "Idieresis", + "a4", "a120", "a160", "a178", + } + + if len(runes) != len(glyphs) { + t.Fatalf("Bad test: runes=%d glyphs=%d", len(runes), len(glyphs)) + } + for i, glyph := range glyphs { + t.Run(string(glyph), func(t *testing.T) { + expected := runes[i] + r, ok := GlyphToRune(glyph) + if !ok { + t.Fatalf("no glyph %q", glyph) + } + if r != expected { + t.Fatalf("Expected 0x%04x=%c. Got 0x%04x=%c", r, r, expected, expected) + } + }) + } +} diff --git a/pdf/internal/textencoding/simple.go b/pdf/internal/textencoding/simple.go index a4a5d564..2b22f3db 100644 --- a/pdf/internal/textencoding/simple.go +++ b/pdf/internal/textencoding/simple.go @@ -224,7 +224,7 @@ func (enc *simpleEncoding) CharcodeToGlyph(code CharCode) (GlyphName, bool) { if !ok { return "", false } - return enc.RuneToGlyph(r) + return runeToGlyph(r, glyphlistRuneToGlyphMap) } func (enc *simpleEncoding) GlyphToCharcode(glyph GlyphName) (CharCode, bool) { @@ -236,16 +236,6 @@ func (enc *simpleEncoding) GlyphToCharcode(glyph GlyphName) (CharCode, bool) { return enc.RuneToCharcode(r) } -func (enc *simpleEncoding) RuneToGlyph(r rune) (GlyphName, bool) { - // TODO(dennwc): should be in the font interface - return runeToGlyph(r, glyphlistRuneToGlyphMap) -} - -func (enc *simpleEncoding) GlyphToRune(glyph GlyphName) (rune, bool) { - // TODO(dennwc): should be in the font interface - return glyphToRune(glyph, glyphlistGlyphToRuneMap) -} - func (enc *simpleEncoding) ToPdfObject() core.PdfObject { switch enc.baseName { case "MacRomanEncoding", "MacExpertEncoding", baseWinAnsi: diff --git a/pdf/internal/textencoding/simple_test.go b/pdf/internal/textencoding/simple_test.go deleted file mode 100644 index 45497b8f..00000000 --- a/pdf/internal/textencoding/simple_test.go +++ /dev/null @@ -1,68 +0,0 @@ -/* - * This file is subject to the terms and conditions defined in - * file 'LICENSE.md', which is part of this source code package. - */ - -package textencoding - -import ( - "fmt" - "testing" - - "github.com/unidoc/unidoc/common" -) - -// This test covers all the standard encodings in simple.go - -func init() { - common.SetLogger(common.NewConsoleLogger(common.LogLevelDebug)) -} - -// TestBasicEncodings checks for known glyph->rune mappings in the standard encodings. -func TestBasicEncodings(t *testing.T) { - for _, test := range testCases { - test.check(t) - } -} - -var testCases = []encodingTest{ - {"MacExpertEncoding", "₂₃₄₅", []GlyphName{"twoinferior", "threeinferior", "fourinferior", "fiveinferior"}}, - {"MacRomanEncoding", "◊fl˝ˇ", []GlyphName{"lozenge", "fl", "hungarumlaut", "caron"}}, - {"PdfDocEncoding", "¾Ðí©", []GlyphName{"threequarters", "Eth", "iacute", "copyright"}}, - {"StandardEncoding", "ºªı„", []GlyphName{"ordmasculine", "ordfeminine", "dotlessi", "quotedblbase"}}, - {"SymbolEncoding", "δ∂ℵ⌡", []GlyphName{"delta", "partialdiff", "aleph", "integralbt"}}, - {"WinAnsiEncoding", "×÷®Ï", []GlyphName{"multiply", "divide", "registered", "Idieresis"}}, - {"ZapfDingbatsEncoding", "☎①➔➨", []GlyphName{"a4", "a120", "a160", "a178"}}, -} - -type encodingTest struct { - encoding string - runes string - glyphs []GlyphName -} - -func (f *encodingTest) String() string { - return fmt.Sprintf("ENCODING_TEST{%#q}", f.encoding) -} - -func (f *encodingTest) check(t *testing.T) { - common.Log.Debug("encodingTest: %s", f) - runes := []rune(f.runes) - if len(runes) != len(f.glyphs) { - t.Fatalf("Bad test %s runes=%d glyphs=%d", f, len(runes), len(f.glyphs)) - } - enc, err := NewSimpleTextEncoder(f.encoding, nil) - if err != nil { - t.Fatalf("NewSimpleTextEncoder(%#q) failed. err=%v", f.encoding, err) - } - for i, glyph := range f.glyphs { - expected := runes[i] - r, ok := enc.GlyphToRune(glyph) - if !ok { - t.Fatalf("Encoding %#q has no glyph %q", f.encoding, glyph) - } - if r != expected { - t.Fatalf("%s: Expected 0x%04x=%c. Got 0x%04x=%c", f, r, r, expected, expected) - } - } -} diff --git a/pdf/internal/textencoding/simple_winansi_test.go b/pdf/internal/textencoding/simple_winansi_test.go index 0226c88b..59bae88c 100644 --- a/pdf/internal/textencoding/simple_winansi_test.go +++ b/pdf/internal/textencoding/simple_winansi_test.go @@ -21,7 +21,7 @@ func TestWinAnsiEncoder(t *testing.T) { return } - glyph, found = enc.RuneToGlyph('þ') + glyph, found = RuneToGlyph('þ') if !found || glyph != "thorn" { t.Errorf("Glyph != thorn") return diff --git a/pdf/internal/textencoding/truetype.go b/pdf/internal/textencoding/truetype.go index e868d245..b7d5b843 100644 --- a/pdf/internal/textencoding/truetype.go +++ b/pdf/internal/textencoding/truetype.go @@ -136,38 +136,6 @@ func (enc TrueTypeFontEncoder) CharcodeToRune(code CharCode) (rune, bool) { return 0, false } -// RuneToGlyph returns the glyph name for rune `r`. -// The bool return flag is true if there was a match, and false otherwise. -func (enc TrueTypeFontEncoder) RuneToGlyph(r rune) (GlyphName, bool) { - if r == ' ' { - return "space", true - } - // TODO(dennwc): this is wrong; font may override this with a "post" table that specifies glyph names - glyph := GlyphName(fmt.Sprintf("uni%.4X", r)) - return glyph, true -} - -// GlyphToRune returns the rune corresponding to glyph name `glyph`. -// The bool return flag is true if there was a match, and false otherwise. -func (enc TrueTypeFontEncoder) GlyphToRune(glyph GlyphName) (rune, bool) { - // TODO(dennwc): this is wrong; font may override this with a "post" table that specifies glyph names - // String with "uniXXXX" format where XXXX is the hexcode. - if len(glyph) == 7 && glyph[0:3] == "uni" { - unicode := uint16(0) - n, err := fmt.Sscanf(string(glyph), "uni%X", &unicode) - if n == 1 && err == nil { - return rune(unicode), true - } - } - - // Look in glyphlist. - if r, ok := glyphlistGlyphToRuneMap[glyph]; ok { - return r, true - } - - return 0, false -} - // ToPdfObject returns a nil as it is not truly a PDF object and should not be attempted to store in file. func (enc TrueTypeFontEncoder) ToPdfObject() core.PdfObject { // TODO(dennwc): reasonable question: why it have to implement this interface then? diff --git a/pdf/internal/textencoding/utils.go b/pdf/internal/textencoding/utils.go index c5f4c2d9..971620e9 100644 --- a/pdf/internal/textencoding/utils.go +++ b/pdf/internal/textencoding/utils.go @@ -31,34 +31,6 @@ func runeToGlyph(r rune, runeToGlyphMap map[rune]GlyphName) (GlyphName, bool) { return "", false } -func splitWords(raw string, encoder TextEncoder) []string { - runes := []rune(raw) - - var words []string - - startsAt := 0 - for idx, r := range runes { - glyph, ok := encoder.RuneToGlyph(r) - if !ok { - common.Log.Debug("Glyph not found for rune %s", rs(r)) - continue - } - - if glyph == "space" || glyph == "uni0020" { - word := runes[startsAt:idx] - words = append(words, string(word)) - startsAt = idx + 1 - } - } - - word := runes[startsAt:] - if len(word) > 0 { - words = append(words, string(word)) - } - - return words -} - // rs returns a string describing rune `r`. func rs(r rune) string { c := "unprintable" diff --git a/pdf/model/font.go b/pdf/model/font.go index 10c4c719..634782cc 100644 --- a/pdf/model/font.go +++ b/pdf/model/font.go @@ -96,11 +96,23 @@ func DefaultFont() *PdfFont { return &PdfFont{context: &std} } +func newStandard14Font(basefont fonts.StdFontName) (pdfFontSimple, error) { + fnt, ok := fonts.NewStdFontByName(basefont) + if !ok { + return pdfFontSimple{}, ErrFontNotSupported + } + std := stdFontToSimpleFont(fnt) + return std, nil +} + // NewStandard14Font returns the standard 14 font named `basefont` as a *PdfFont, or an error if it // `basefont` is not one of the standard 14 font names. func NewStandard14Font(basefont fonts.StdFontName) (*PdfFont, error) { - std, _, err := NewStandard14FontWithEncoding(basefont, nil) - return std, err + std, err := newStandard14Font(basefont) + if err != nil { + return nil, err + } + return &PdfFont{context: &std}, nil } // NewStandard14FontMustCompile returns the standard 14 font named `basefont` as a *PdfFont. @@ -119,16 +131,17 @@ func NewStandard14FontMustCompile(basefont fonts.StdFontName) *PdfFont { // An error can occur if `basefont` is not one the standard 14 font names. func NewStandard14FontWithEncoding(basefont fonts.StdFontName, alphabet map[rune]int) (*PdfFont, textencoding.SimpleEncoder, error) { + + std, err := newStandard14Font(basefont) + if err != nil { + return nil, nil, err + } + + // TODO(dennwc): what if the font is Symbol and uses different encoding? baseEncoder := "MacRomanEncoding" common.Log.Trace("NewStandard14FontWithEncoding: basefont=%#q baseEncoder=%#q alphabet=%q", basefont, baseEncoder, string(sortedAlphabet(alphabet))) - fnt, ok := fonts.NewStdFontByName(basefont) - if !ok { - return nil, nil, ErrFontNotSupported - } - std := stdFontToSimpleFont(fnt) - encoder, err := textencoding.NewSimpleTextEncoder(baseEncoder, nil) if err != nil { return nil, nil, err diff --git a/pdf/model/fonts/std.go b/pdf/model/fonts/std.go index eb9ebcfd..ef556840 100644 --- a/pdf/model/fonts/std.go +++ b/pdf/model/fonts/std.go @@ -103,7 +103,12 @@ func (font StdFont) Encoder() textencoding.TextEncoder { // GetRuneMetrics returns character metrics for a given rune. func (font StdFont) GetRuneMetrics(r rune) (CharMetrics, bool) { - glyph, has := font.encoder.RuneToGlyph(r) + // TODO(dennwc): rebuild tables for runes instead of glyphs + code, has := font.encoder.RuneToCharcode(r) + if !has { + return CharMetrics{}, false + } + glyph, has := font.encoder.CharcodeToGlyph(code) if !has { return CharMetrics{}, false } From 7a2cd35f48eb7464711f5987abe8458f67e3fa97 Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Tue, 1 Jan 2019 22:40:11 +0200 Subject: [PATCH 07/11] fonts: rebuild font metrics tables based on runes for standard fonts --- pdf/model/font.go | 2 +- pdf/model/font_simple.go | 23 +- pdf/model/fonts/std.go | 120 ++--- pdf/model/fonts/std_courier.go | 14 +- pdf/model/fonts/std_helvetica.go | 18 +- pdf/model/fonts/std_other.go | 788 +++++++++++++++---------------- pdf/model/fonts/std_times.go | 26 +- 7 files changed, 474 insertions(+), 517 deletions(-) diff --git a/pdf/model/font.go b/pdf/model/font.go index 634782cc..95f37b3c 100644 --- a/pdf/model/font.go +++ b/pdf/model/font.go @@ -175,7 +175,7 @@ func NewStandard14FontWithEncoding(basefont fonts.StdFontName, alphabet map[rune common.Log.Debug("No glyph for rune 0x%02x=%c", r, r) continue } - if _, ok = std.fontMetrics[glyph]; !ok { + if _, ok = std.fontMetrics[r]; !ok { common.Log.Trace("Glyph %q (0x%04x=%c)not in font", glyph, r, r) continue } diff --git a/pdf/model/font_simple.go b/pdf/model/font_simple.go index 12725935..1664ab69 100644 --- a/pdf/model/font_simple.go +++ b/pdf/model/font_simple.go @@ -58,7 +58,7 @@ type pdfFontSimple struct { Encoding core.PdfObject // Standard 14 fonts metrics - fontMetrics map[textencoding.GlyphName]fonts.CharMetrics + fontMetrics map[rune]fonts.CharMetrics } // pdfCIDFontType0FromSkeleton returns a pdfFontSimple with its common fields initalized. @@ -109,12 +109,17 @@ func (font *pdfFontSimple) SetEncoder(encoder textencoding.TextEncoder) { // GetRuneMetrics returns the character metrics for the rune. // A bool flag is returned to indicate whether or not the entry was found. func (font pdfFontSimple) GetRuneMetrics(r rune) (fonts.CharMetrics, bool) { + if font.fontMetrics != nil { + metrics, has := font.fontMetrics[r] + if has { + return metrics, true + } + } encoder := font.Encoder() if encoder == nil { common.Log.Debug("No encoder for fonts=%s", font) return fonts.CharMetrics{}, false } - code, found := encoder.RuneToCharcode(r) if !found { if r != ' ' { @@ -122,14 +127,6 @@ func (font pdfFontSimple) GetRuneMetrics(r rune) (fonts.CharMetrics, bool) { } return fonts.CharMetrics{}, false } - if font.fontMetrics != nil { - if glyph, found := encoder.CharcodeToGlyph(code); found { - metrics, has := font.fontMetrics[glyph] - if has { - return metrics, true - } - } - } metrics, ok := font.GetCharMetrics(code) return metrics, ok } @@ -512,9 +509,9 @@ func (font *pdfFontSimple) updateStandard14Font() { codes := se.Charcodes() font.charWidths = make(map[textencoding.CharCode]float64, len(codes)) for _, code := range codes { - // codes was built from CharcodeToGlyph mapping, so each should have a glyph - glyph, _ := se.CharcodeToGlyph(code) - font.charWidths[code] = font.fontMetrics[glyph].Wx + // codes was built from the same mapping mapping, so each should have a rune + r, _ := se.CharcodeToRune(code) + font.charWidths[code] = font.fontMetrics[r].Wx } } diff --git a/pdf/model/fonts/std.go b/pdf/model/fonts/std.go index ef556840..e844fbd9 100644 --- a/pdf/model/fonts/std.go +++ b/pdf/model/fonts/std.go @@ -72,18 +72,18 @@ var _ Font = StdFont{} // StdFont represents one of the built-in fonts and it is assumed that every reader has access to it. type StdFont struct { desc Descriptor - metrics map[GlyphName]CharMetrics + metrics map[rune]CharMetrics encoder textencoding.TextEncoder } // NewStdFont returns a new instance of the font with a default encoder set (WinAnsiEncoding). -func NewStdFont(desc Descriptor, metrics map[GlyphName]CharMetrics) StdFont { +func NewStdFont(desc Descriptor, metrics map[rune]CharMetrics) StdFont { enc := textencoding.NewWinAnsiEncoder() // Default return NewStdFontWithEncoding(desc, metrics, enc) } // NewStdFontWithEncoding returns a new instance of the font with a specified encoder. -func NewStdFontWithEncoding(desc Descriptor, metrics map[GlyphName]CharMetrics, encoder textencoding.TextEncoder) StdFont { +func NewStdFontWithEncoding(desc Descriptor, metrics map[rune]CharMetrics, encoder textencoding.TextEncoder) StdFont { return StdFont{ desc: desc, metrics: metrics, @@ -103,22 +103,13 @@ func (font StdFont) Encoder() textencoding.TextEncoder { // GetRuneMetrics returns character metrics for a given rune. func (font StdFont) GetRuneMetrics(r rune) (CharMetrics, bool) { - // TODO(dennwc): rebuild tables for runes instead of glyphs - code, has := font.encoder.RuneToCharcode(r) - if !has { - return CharMetrics{}, false - } - glyph, has := font.encoder.CharcodeToGlyph(code) - if !has { - return CharMetrics{}, false - } - metrics, has := font.metrics[glyph] - return metrics, true + metrics, has := font.metrics[r] + return metrics, has } // GetMetricsTable is a method specific to standard fonts. It returns the metrics table of all glyphs. // Caller should not modify the table. -func (font StdFont) GetMetricsTable() map[GlyphName]CharMetrics { +func (font StdFont) GetMetricsTable() map[rune]CharMetrics { return font.metrics } @@ -138,69 +129,38 @@ func (font StdFont) ToPdfObject() core.PdfObject { return core.MakeIndirectObject(fontDict) } -// type1CommonGlyphs is list of common glyph names for some Type1. Used to unpack character metrics. -var type1CommonGlyphs = []textencoding.GlyphName{ - "A", "AE", "Aacute", "Abreve", "Acircumflex", - "Adieresis", "Agrave", "Amacron", "Aogonek", "Aring", - "Atilde", "B", "C", "Cacute", "Ccaron", - "Ccedilla", "D", "Dcaron", "Dcroat", "Delta", - "E", "Eacute", "Ecaron", "Ecircumflex", "Edieresis", - "Edotaccent", "Egrave", "Emacron", "Eogonek", "Eth", - "Euro", "F", "G", "Gbreve", "Gcommaaccent", - "H", "I", "Iacute", "Icircumflex", "Idieresis", - "Idotaccent", "Igrave", "Imacron", "Iogonek", "J", - "K", "Kcommaaccent", "L", "Lacute", "Lcaron", - "Lcommaaccent", "Lslash", "M", "N", "Nacute", - "Ncaron", "Ncommaaccent", "Ntilde", "O", "OE", - "Oacute", "Ocircumflex", "Odieresis", "Ograve", "Ohungarumlaut", - "Omacron", "Oslash", "Otilde", "P", "Q", - "R", "Racute", "Rcaron", "Rcommaaccent", "S", - "Sacute", "Scaron", "Scedilla", "Scommaaccent", "T", - "Tcaron", "Tcommaaccent", "Thorn", "U", "Uacute", - "Ucircumflex", "Udieresis", "Ugrave", "Uhungarumlaut", "Umacron", - "Uogonek", "Uring", "V", "W", "X", - "Y", "Yacute", "Ydieresis", "Z", "Zacute", - "Zcaron", "Zdotaccent", "a", "aacute", "abreve", - "acircumflex", "acute", "adieresis", "ae", "agrave", - "amacron", "ampersand", "aogonek", "aring", "asciicircum", - "asciitilde", "asterisk", "at", "atilde", "b", - "backslash", "bar", "braceleft", "braceright", "bracketleft", - "bracketright", "breve", "brokenbar", "bullet", "c", - "cacute", "caron", "ccaron", "ccedilla", "cedilla", - "cent", "circumflex", "colon", "comma", "commaaccent", - "copyright", "currency", "d", "dagger", "daggerdbl", - "dcaron", "dcroat", "degree", "dieresis", "divide", - "dollar", "dotaccent", "dotlessi", "e", "eacute", - "ecaron", "ecircumflex", "edieresis", "edotaccent", "egrave", - "eight", "ellipsis", "emacron", "emdash", "endash", - "eogonek", "equal", "eth", "exclam", "exclamdown", - "f", "fi", "five", "fl", "florin", - "four", "fraction", "g", "gbreve", "gcommaaccent", - "germandbls", "grave", "greater", "greaterequal", "guillemotleft", - "guillemotright", "guilsinglleft", "guilsinglright", "h", "hungarumlaut", - "hyphen", "i", "iacute", "icircumflex", "idieresis", - "igrave", "imacron", "iogonek", "j", "k", - "kcommaaccent", "l", "lacute", "lcaron", "lcommaaccent", - "less", "lessequal", "logicalnot", "lozenge", "lslash", - "m", "macron", "minus", "mu", "multiply", - "n", "nacute", "ncaron", "ncommaaccent", "nine", - "notequal", "ntilde", "numbersign", "o", "oacute", - "ocircumflex", "odieresis", "oe", "ogonek", "ograve", - "ohungarumlaut", "omacron", "one", "onehalf", "onequarter", - "onesuperior", "ordfeminine", "ordmasculine", "oslash", "otilde", - "p", "paragraph", "parenleft", "parenright", "partialdiff", - "percent", "period", "periodcentered", "perthousand", "plus", - "plusminus", "q", "question", "questiondown", "quotedbl", - "quotedblbase", "quotedblleft", "quotedblright", "quoteleft", "quoteright", - "quotesinglbase", "quotesingle", "r", "racute", "radical", - "rcaron", "rcommaaccent", "registered", "ring", "s", - "sacute", "scaron", "scedilla", "scommaaccent", "section", - "semicolon", "seven", "six", "slash", "space", - "sterling", "summation", "t", "tcaron", "tcommaaccent", - "thorn", "three", "threequarters", "threesuperior", "tilde", - "trademark", "two", "twosuperior", "u", "uacute", - "ucircumflex", "udieresis", "ugrave", "uhungarumlaut", "umacron", - "underscore", "uogonek", "uring", "v", "w", - "x", "y", "yacute", "ydieresis", "yen", - "z", "zacute", "zcaron", "zdotaccent", "zero", +// type1CommonRunes is list of runes common for some Type1 fonts. Used to unpack character metrics. +var type1CommonRunes = []rune{ + 'A', 'Æ', 'Á', 'Ă', 'Â', 'Ä', 'À', 'Ā', 'Ą', 'Å', + 'Ã', 'B', 'C', 'Ć', 'Č', 'Ç', 'D', 'Ď', 'Đ', '∆', + 'E', 'É', 'Ě', 'Ê', 'Ë', 'Ė', 'È', 'Ē', 'Ę', 'Ð', + '€', 'F', 'G', 'Ğ', 'Ģ', 'H', 'I', 'Í', 'Î', 'Ï', + 'İ', 'Ì', 'Ī', 'Į', 'J', 'K', 'Ķ', 'L', 'Ĺ', 'Ľ', + 'Ļ', 'Ł', 'M', 'N', 'Ń', 'Ň', 'Ņ', 'Ñ', 'O', 'Œ', + 'Ó', 'Ô', 'Ö', 'Ò', 'Ő', 'Ō', 'Ø', 'Õ', 'P', 'Q', + 'R', 'Ŕ', 'Ř', 'Ŗ', 'S', 'Ś', 'Š', 'Ş', 'Ș', 'T', + 'Ť', 'Ţ', 'Þ', 'U', 'Ú', 'Û', 'Ü', 'Ù', 'Ű', 'Ū', + 'Ų', 'Ů', 'V', 'W', 'X', 'Y', 'Ý', 'Ÿ', 'Z', 'Ź', + 'Ž', 'Ż', 'a', 'á', 'ă', 'â', '´', 'ä', 'æ', 'à', + 'ā', '&', 'ą', 'å', '^', '~', '*', '@', 'ã', 'b', + '\\', '|', '{', '}', '[', ']', '˘', '¦', '•', 'c', + 'ć', 'ˇ', 'č', 'ç', '¸', '¢', 'ˆ', ':', ',', '\uf6c3', + '©', '¤', 'd', '†', '‡', 'ď', 'đ', '°', '¨', '÷', + '$', '˙', 'ı', 'e', 'é', 'ě', 'ê', 'ë', 'ė', 'è', + '8', '…', 'ē', '—', '–', 'ę', '=', 'ð', '!', '¡', + 'f', 'fi', '5', 'fl', 'ƒ', '4', '⁄', 'g', 'ğ', 'ģ', + 'ß', '`', '>', '≥', '«', '»', '‹', '›', 'h', '˝', + '-', 'i', 'í', 'î', 'ï', 'ì', 'ī', 'į', 'j', 'k', + 'ķ', 'l', 'ĺ', 'ľ', 'ļ', '<', '≤', '¬', '◊', 'ł', + 'm', '¯', '−', 'µ', '×', 'n', 'ń', 'ň', 'ņ', '9', + '≠', 'ñ', '#', 'o', 'ó', 'ô', 'ö', 'œ', '˛', 'ò', + 'ő', 'ō', '1', '½', '¼', '¹', 'ª', 'º', 'ø', 'õ', + 'p', '¶', '(', ')', '∂', '%', '.', '·', '‰', '+', + '±', 'q', '?', '¿', '"', '„', '“', '”', '‘', '’', + '‚', '\'', 'r', 'ŕ', '√', 'ř', 'ŗ', '®', '˚', 's', + 'ś', 'š', 'ş', 'ș', '§', ';', '7', '6', '/', ' ', + '£', '∑', 't', 'ť', 'ţ', 'þ', '3', '¾', '³', '˜', + '™', '2', '²', 'u', 'ú', 'û', 'ü', 'ù', 'ű', 'ū', + '_', 'ų', 'ů', 'v', 'w', 'x', 'y', 'ý', 'ÿ', '¥', + 'z', 'ź', 'ž', 'ż', '0', } diff --git a/pdf/model/fonts/std_courier.go b/pdf/model/fonts/std_courier.go index 406bd2bf..4319257f 100644 --- a/pdf/model/fonts/std_courier.go +++ b/pdf/model/fonts/std_courier.go @@ -118,9 +118,9 @@ var courierOnce sync.Once func initCourier() { // the only font that has same metrics for all glyphs (fixed-width) const wx = 600 - courierCharMetrics = make(map[GlyphName]CharMetrics, len(type1CommonGlyphs)) - for _, glyph := range type1CommonGlyphs { - courierCharMetrics[glyph] = CharMetrics{Wx: wx} + courierCharMetrics = make(map[rune]CharMetrics, len(type1CommonRunes)) + for _, r := range type1CommonRunes { + courierCharMetrics[r] = CharMetrics{Wx: wx} } // other font variant still have the same metrics courierBoldCharMetrics = courierCharMetrics @@ -130,15 +130,15 @@ func initCourier() { // courierCharMetrics are the font metrics loaded from afms/Courier.afm. See afms/MustRead.html for // license information. -var courierCharMetrics map[GlyphName]CharMetrics +var courierCharMetrics map[rune]CharMetrics // Courier-Bold font metrics loaded from afms/Courier-Bold.afm. See afms/MustRead.html for license information. -var courierBoldCharMetrics map[GlyphName]CharMetrics +var courierBoldCharMetrics map[rune]CharMetrics // courierBoldObliqueCharMetrics are the font metrics loaded from afms/Courier-BoldOblique.afm. // See afms/MustRead.html for license information. -var courierBoldObliqueCharMetrics map[GlyphName]CharMetrics +var courierBoldObliqueCharMetrics map[rune]CharMetrics // courierObliqueCharMetrics are the font metrics loaded from afms/Courier-Oblique.afm. // See afms/MustRead.html for license information. -var courierObliqueCharMetrics map[GlyphName]CharMetrics +var courierObliqueCharMetrics map[rune]CharMetrics diff --git a/pdf/model/fonts/std_helvetica.go b/pdf/model/fonts/std_helvetica.go index 87a4807d..d2951270 100644 --- a/pdf/model/fonts/std_helvetica.go +++ b/pdf/model/fonts/std_helvetica.go @@ -117,11 +117,11 @@ var helveticaOnce sync.Once func initHelvetica() { // unpack font metrics - helveticaCharMetrics = make(map[GlyphName]CharMetrics, len(type1CommonGlyphs)) - helveticaBoldCharMetrics = make(map[GlyphName]CharMetrics, len(type1CommonGlyphs)) - for i, glyph := range type1CommonGlyphs { - helveticaCharMetrics[glyph] = CharMetrics{Wx: float64(helveticaWx[i])} - helveticaBoldCharMetrics[glyph] = CharMetrics{Wx: float64(helveticaBoldWx[i])} + helveticaCharMetrics = make(map[rune]CharMetrics, len(type1CommonRunes)) + helveticaBoldCharMetrics = make(map[rune]CharMetrics, len(type1CommonRunes)) + for i, r := range type1CommonRunes { + helveticaCharMetrics[r] = CharMetrics{Wx: float64(helveticaWx[i])} + helveticaBoldCharMetrics[r] = CharMetrics{Wx: float64(helveticaBoldWx[i])} } helveticaObliqueCharMetrics = helveticaCharMetrics helveticaBoldObliqueCharMetrics = helveticaBoldCharMetrics @@ -129,19 +129,19 @@ func initHelvetica() { // helveticaCharMetrics are the font metrics loaded from afms/Helvetica.afm. // See afms/MustRead.html for license information. -var helveticaCharMetrics map[GlyphName]CharMetrics +var helveticaCharMetrics map[rune]CharMetrics // helveticaBoldCharMetrics are the font metrics loaded from afms/Helvetica-Bold.afm. // See afms/MustRead.html for license information. -var helveticaBoldCharMetrics map[GlyphName]CharMetrics +var helveticaBoldCharMetrics map[rune]CharMetrics // helveticaBoldObliqueCharMetrics are the font metrics loaded from afms/Helvetica-BoldOblique.afm. // See afms/MustRead.html for license information. -var helveticaBoldObliqueCharMetrics map[GlyphName]CharMetrics +var helveticaBoldObliqueCharMetrics map[rune]CharMetrics // helveticaObliqueCharMetrics are the font metrics loaded from afms/Helvetica-Oblique.afm. // See afms/MustRead.html for license information. -var helveticaObliqueCharMetrics map[GlyphName]CharMetrics +var helveticaObliqueCharMetrics map[rune]CharMetrics // helveticaWx are the font metrics loaded from afms/Helvetica.afm. // See afms/MustRead.html for license information. diff --git a/pdf/model/fonts/std_other.go b/pdf/model/fonts/std_other.go index f227625d..7065310b 100644 --- a/pdf/model/fonts/std_other.go +++ b/pdf/model/fonts/std_other.go @@ -70,402 +70,402 @@ func NewFontZapfDingbats() StdFont { // symbolCharMetrics are the font metrics loaded from afms/Symbol.afm. // See afms/MustRead.html for license information. -var symbolCharMetrics = map[GlyphName]CharMetrics{ - "Alpha": {Wx: 722.000000}, - "Beta": {Wx: 667.000000}, - "Chi": {Wx: 722.000000}, - "Delta": {Wx: 612.000000}, - "Epsilon": {Wx: 611.000000}, - "Eta": {Wx: 722.000000}, - "Euro": {Wx: 750.000000}, - "Gamma": {Wx: 603.000000}, - "Ifraktur": {Wx: 686.000000}, - "Iota": {Wx: 333.000000}, - "Kappa": {Wx: 722.000000}, - "Lambda": {Wx: 686.000000}, - "Mu": {Wx: 889.000000}, - "Nu": {Wx: 722.000000}, - "Omega": {Wx: 768.000000}, - "Omicron": {Wx: 722.000000}, - "Phi": {Wx: 763.000000}, - "Pi": {Wx: 768.000000}, - "Psi": {Wx: 795.000000}, - "Rfraktur": {Wx: 795.000000}, - "Rho": {Wx: 556.000000}, - "Sigma": {Wx: 592.000000}, - "Tau": {Wx: 611.000000}, - "Theta": {Wx: 741.000000}, - "Upsilon": {Wx: 690.000000}, - "Upsilon1": {Wx: 620.000000}, - "Xi": {Wx: 645.000000}, - "Zeta": {Wx: 611.000000}, - "aleph": {Wx: 823.000000}, - "alpha": {Wx: 631.000000}, - "ampersand": {Wx: 778.000000}, - "angle": {Wx: 768.000000}, - "angleleft": {Wx: 329.000000}, - "angleright": {Wx: 329.000000}, - "apple": {Wx: 790.000000}, - "approxequal": {Wx: 549.000000}, - "arrowboth": {Wx: 1042.000000}, - "arrowdblboth": {Wx: 1042.000000}, - "arrowdbldown": {Wx: 603.000000}, - "arrowdblleft": {Wx: 987.000000}, - "arrowdblright": {Wx: 987.000000}, - "arrowdblup": {Wx: 603.000000}, - "arrowdown": {Wx: 603.000000}, - "arrowhorizex": {Wx: 1000.000000}, - "arrowleft": {Wx: 987.000000}, - "arrowright": {Wx: 987.000000}, - "arrowup": {Wx: 603.000000}, - "arrowvertex": {Wx: 603.000000}, - "asteriskmath": {Wx: 500.000000}, - "bar": {Wx: 200.000000}, - "beta": {Wx: 549.000000}, - "braceex": {Wx: 494.000000}, - "braceleft": {Wx: 480.000000}, - "braceleftbt": {Wx: 494.000000}, - "braceleftmid": {Wx: 494.000000}, - "bracelefttp": {Wx: 494.000000}, - "braceright": {Wx: 480.000000}, - "bracerightbt": {Wx: 494.000000}, - "bracerightmid": {Wx: 494.000000}, - "bracerighttp": {Wx: 494.000000}, - "bracketleft": {Wx: 333.000000}, - "bracketleftbt": {Wx: 384.000000}, - "bracketleftex": {Wx: 384.000000}, - "bracketlefttp": {Wx: 384.000000}, - "bracketright": {Wx: 333.000000}, - "bracketrightbt": {Wx: 384.000000}, - "bracketrightex": {Wx: 384.000000}, - "bracketrighttp": {Wx: 384.000000}, - "bullet": {Wx: 460.000000}, - "carriagereturn": {Wx: 658.000000}, - "chi": {Wx: 549.000000}, - "circlemultiply": {Wx: 768.000000}, - "circleplus": {Wx: 768.000000}, - "club": {Wx: 753.000000}, - "colon": {Wx: 278.000000}, - "comma": {Wx: 250.000000}, - "congruent": {Wx: 549.000000}, - "copyrightsans": {Wx: 790.000000}, - "copyrightserif": {Wx: 790.000000}, - "degree": {Wx: 400.000000}, - "delta": {Wx: 494.000000}, - "diamond": {Wx: 753.000000}, - "divide": {Wx: 549.000000}, - "dotmath": {Wx: 250.000000}, - "eight": {Wx: 500.000000}, - "element": {Wx: 713.000000}, - "ellipsis": {Wx: 1000.000000}, - "emptyset": {Wx: 823.000000}, - "epsilon": {Wx: 439.000000}, - "equal": {Wx: 549.000000}, - "equivalence": {Wx: 549.000000}, - "eta": {Wx: 603.000000}, - "exclam": {Wx: 333.000000}, - "existential": {Wx: 549.000000}, - "five": {Wx: 500.000000}, - "florin": {Wx: 500.000000}, - "four": {Wx: 500.000000}, - "fraction": {Wx: 167.000000}, - "gamma": {Wx: 411.000000}, - "gradient": {Wx: 713.000000}, - "greater": {Wx: 549.000000}, - "greaterequal": {Wx: 549.000000}, - "heart": {Wx: 753.000000}, - "infinity": {Wx: 713.000000}, - "integral": {Wx: 274.000000}, - "integralbt": {Wx: 686.000000}, - "integralex": {Wx: 686.000000}, - "integraltp": {Wx: 686.000000}, - "intersection": {Wx: 768.000000}, - "iota": {Wx: 329.000000}, - "kappa": {Wx: 549.000000}, - "lambda": {Wx: 549.000000}, - "less": {Wx: 549.000000}, - "lessequal": {Wx: 549.000000}, - "logicaland": {Wx: 603.000000}, - "logicalnot": {Wx: 713.000000}, - "logicalor": {Wx: 603.000000}, - "lozenge": {Wx: 494.000000}, - "minus": {Wx: 549.000000}, - "minute": {Wx: 247.000000}, - "mu": {Wx: 576.000000}, - "multiply": {Wx: 549.000000}, - "nine": {Wx: 500.000000}, - "notelement": {Wx: 713.000000}, - "notequal": {Wx: 549.000000}, - "notsubset": {Wx: 713.000000}, - "nu": {Wx: 521.000000}, - "numbersign": {Wx: 500.000000}, - "omega": {Wx: 686.000000}, - "omega1": {Wx: 713.000000}, - "omicron": {Wx: 549.000000}, - "one": {Wx: 500.000000}, - "parenleft": {Wx: 333.000000}, - "parenleftbt": {Wx: 384.000000}, - "parenleftex": {Wx: 384.000000}, - "parenlefttp": {Wx: 384.000000}, - "parenright": {Wx: 333.000000}, - "parenrightbt": {Wx: 384.000000}, - "parenrightex": {Wx: 384.000000}, - "parenrighttp": {Wx: 384.000000}, - "partialdiff": {Wx: 494.000000}, - "percent": {Wx: 833.000000}, - "period": {Wx: 250.000000}, - "perpendicular": {Wx: 658.000000}, - "phi": {Wx: 521.000000}, - "phi1": {Wx: 603.000000}, - "pi": {Wx: 549.000000}, - "plus": {Wx: 549.000000}, - "plusminus": {Wx: 549.000000}, - "product": {Wx: 823.000000}, - "propersubset": {Wx: 713.000000}, - "propersuperset": {Wx: 713.000000}, - "proportional": {Wx: 713.000000}, - "psi": {Wx: 686.000000}, - "question": {Wx: 444.000000}, - "radical": {Wx: 549.000000}, - "radicalex": {Wx: 500.000000}, - "reflexsubset": {Wx: 713.000000}, - "reflexsuperset": {Wx: 713.000000}, - "registersans": {Wx: 790.000000}, - "registerserif": {Wx: 790.000000}, - "rho": {Wx: 549.000000}, - "second": {Wx: 411.000000}, - "semicolon": {Wx: 278.000000}, - "seven": {Wx: 500.000000}, - "sigma": {Wx: 603.000000}, - "sigma1": {Wx: 439.000000}, - "similar": {Wx: 549.000000}, - "six": {Wx: 500.000000}, - "slash": {Wx: 278.000000}, - "space": {Wx: 250.000000}, - "spade": {Wx: 753.000000}, - "suchthat": {Wx: 439.000000}, - "summation": {Wx: 713.000000}, - "tau": {Wx: 439.000000}, - "therefore": {Wx: 863.000000}, - "theta": {Wx: 521.000000}, - "theta1": {Wx: 631.000000}, - "three": {Wx: 500.000000}, - "trademarksans": {Wx: 786.000000}, - "trademarkserif": {Wx: 890.000000}, - "two": {Wx: 500.000000}, - "underscore": {Wx: 500.000000}, - "union": {Wx: 768.000000}, - "universal": {Wx: 713.000000}, - "upsilon": {Wx: 576.000000}, - "weierstrass": {Wx: 987.000000}, - "xi": {Wx: 493.000000}, - "zero": {Wx: 500.000000}, - "zeta": {Wx: 494.000000}, +var symbolCharMetrics = map[rune]CharMetrics{ + ' ': {Wx: 250}, + '!': {Wx: 333}, + '#': {Wx: 500}, + '%': {Wx: 833}, + '&': {Wx: 778}, + '(': {Wx: 333}, + ')': {Wx: 333}, + '+': {Wx: 549}, + ',': {Wx: 250}, + '.': {Wx: 250}, + '/': {Wx: 278}, + '0': {Wx: 500}, + '1': {Wx: 500}, + '2': {Wx: 500}, + '3': {Wx: 500}, + '4': {Wx: 500}, + '5': {Wx: 500}, + '6': {Wx: 500}, + '7': {Wx: 500}, + '8': {Wx: 500}, + '9': {Wx: 500}, + ':': {Wx: 278}, + ';': {Wx: 278}, + '<': {Wx: 549}, + '=': {Wx: 549}, + '>': {Wx: 549}, + '?': {Wx: 444}, + '[': {Wx: 333}, + ']': {Wx: 333}, + '_': {Wx: 500}, + '{': {Wx: 480}, + '|': {Wx: 200}, + '}': {Wx: 480}, + '¬': {Wx: 713}, + '°': {Wx: 400}, + '±': {Wx: 549}, + 'µ': {Wx: 576}, + '×': {Wx: 549}, + '÷': {Wx: 549}, + 'ƒ': {Wx: 500}, + 'Α': {Wx: 722}, + 'Β': {Wx: 667}, + 'Γ': {Wx: 603}, + 'Ε': {Wx: 611}, + 'Ζ': {Wx: 611}, + 'Η': {Wx: 722}, + 'Θ': {Wx: 741}, + 'Ι': {Wx: 333}, + 'Κ': {Wx: 722}, + 'Λ': {Wx: 686}, + 'Μ': {Wx: 889}, + 'Ν': {Wx: 722}, + 'Ξ': {Wx: 645}, + 'Ο': {Wx: 722}, + 'Π': {Wx: 768}, + 'Ρ': {Wx: 556}, + 'Σ': {Wx: 592}, + 'Τ': {Wx: 611}, + 'Υ': {Wx: 690}, + 'Φ': {Wx: 763}, + 'Χ': {Wx: 722}, + 'Ψ': {Wx: 795}, + 'α': {Wx: 631}, + 'β': {Wx: 549}, + 'γ': {Wx: 411}, + 'δ': {Wx: 494}, + 'ε': {Wx: 439}, + 'ζ': {Wx: 494}, + 'η': {Wx: 603}, + 'θ': {Wx: 521}, + 'ι': {Wx: 329}, + 'κ': {Wx: 549}, + 'λ': {Wx: 549}, + 'ν': {Wx: 521}, + 'ξ': {Wx: 493}, + 'ο': {Wx: 549}, + 'π': {Wx: 549}, + 'ρ': {Wx: 549}, + 'ς': {Wx: 439}, + 'σ': {Wx: 603}, + 'τ': {Wx: 439}, + 'υ': {Wx: 576}, + 'φ': {Wx: 521}, + 'χ': {Wx: 549}, + 'ψ': {Wx: 686}, + 'ω': {Wx: 686}, + 'ϑ': {Wx: 631}, + 'ϒ': {Wx: 620}, + 'ϕ': {Wx: 603}, + 'ϖ': {Wx: 713}, + '•': {Wx: 460}, + '…': {Wx: 1000}, + '′': {Wx: 247}, + '″': {Wx: 411}, + '⁄': {Wx: 167}, + '€': {Wx: 750}, + 'ℑ': {Wx: 686}, + '℘': {Wx: 987}, + 'ℜ': {Wx: 795}, + 'Ω': {Wx: 768}, + 'ℵ': {Wx: 823}, + '←': {Wx: 987}, + '↑': {Wx: 603}, + '→': {Wx: 987}, + '↓': {Wx: 603}, + '↔': {Wx: 1042}, + '↵': {Wx: 658}, + '⇐': {Wx: 987}, + '⇑': {Wx: 603}, + '⇒': {Wx: 987}, + '⇓': {Wx: 603}, + '⇔': {Wx: 1042}, + '∀': {Wx: 713}, + '∂': {Wx: 494}, + '∃': {Wx: 549}, + '∅': {Wx: 823}, + '∆': {Wx: 612}, + '∇': {Wx: 713}, + '∈': {Wx: 713}, + '∉': {Wx: 713}, + '∋': {Wx: 439}, + '∏': {Wx: 823}, + '∑': {Wx: 713}, + '−': {Wx: 549}, + '∗': {Wx: 500}, + '√': {Wx: 549}, + '∝': {Wx: 713}, + '∞': {Wx: 713}, + '∠': {Wx: 768}, + '∧': {Wx: 603}, + '∨': {Wx: 603}, + '∩': {Wx: 768}, + '∪': {Wx: 768}, + '∫': {Wx: 274}, + '∴': {Wx: 863}, + '∼': {Wx: 549}, + '≅': {Wx: 549}, + '≈': {Wx: 549}, + '≠': {Wx: 549}, + '≡': {Wx: 549}, + '≤': {Wx: 549}, + '≥': {Wx: 549}, + '⊂': {Wx: 713}, + '⊃': {Wx: 713}, + '⊄': {Wx: 713}, + '⊆': {Wx: 713}, + '⊇': {Wx: 713}, + '⊕': {Wx: 768}, + '⊗': {Wx: 768}, + '⊥': {Wx: 658}, + '⋅': {Wx: 250}, + '⌠': {Wx: 686}, + '⌡': {Wx: 686}, + '〈': {Wx: 329}, + '〉': {Wx: 329}, + '◊': {Wx: 494}, + '♠': {Wx: 753}, + '♣': {Wx: 753}, + '♥': {Wx: 753}, + '♦': {Wx: 753}, + '\uf6d9': {Wx: 790}, + '\uf6da': {Wx: 790}, + '\uf6db': {Wx: 890}, + '\uf8e5': {Wx: 500}, + '\uf8e6': {Wx: 603}, + '\uf8e7': {Wx: 1000}, + '\uf8e8': {Wx: 790}, + '\uf8e9': {Wx: 790}, + '\uf8ea': {Wx: 786}, + '\uf8eb': {Wx: 384}, + '\uf8ec': {Wx: 384}, + '\uf8ed': {Wx: 384}, + '\uf8ee': {Wx: 384}, + '\uf8ef': {Wx: 384}, + '\uf8f0': {Wx: 384}, + '\uf8f1': {Wx: 494}, + '\uf8f2': {Wx: 494}, + '\uf8f3': {Wx: 494}, + '\uf8f4': {Wx: 494}, + '\uf8f5': {Wx: 686}, + '\uf8f6': {Wx: 384}, + '\uf8f7': {Wx: 384}, + '\uf8f8': {Wx: 384}, + '\uf8f9': {Wx: 384}, + '\uf8fa': {Wx: 384}, + '\uf8fb': {Wx: 384}, + '\uf8fc': {Wx: 494}, + '\uf8fd': {Wx: 494}, + '\uf8fe': {Wx: 494}, + '\uf8ff': {Wx: 790}, } // zapfDingbatsCharMetrics are the font metrics loaded from afms/ZapfDingbats.afm. // See afms/MustRead.html for license information. -var zapfDingbatsCharMetrics = map[GlyphName]CharMetrics{ - "a1": {Wx: 974.000000}, - "a10": {Wx: 692.000000}, - "a100": {Wx: 668.000000}, - "a101": {Wx: 732.000000}, - "a102": {Wx: 544.000000}, - "a103": {Wx: 544.000000}, - "a104": {Wx: 910.000000}, - "a105": {Wx: 911.000000}, - "a106": {Wx: 667.000000}, - "a107": {Wx: 760.000000}, - "a108": {Wx: 760.000000}, - "a109": {Wx: 626.000000}, - "a11": {Wx: 960.000000}, - "a110": {Wx: 694.000000}, - "a111": {Wx: 595.000000}, - "a112": {Wx: 776.000000}, - "a117": {Wx: 690.000000}, - "a118": {Wx: 791.000000}, - "a119": {Wx: 790.000000}, - "a12": {Wx: 939.000000}, - "a120": {Wx: 788.000000}, - "a121": {Wx: 788.000000}, - "a122": {Wx: 788.000000}, - "a123": {Wx: 788.000000}, - "a124": {Wx: 788.000000}, - "a125": {Wx: 788.000000}, - "a126": {Wx: 788.000000}, - "a127": {Wx: 788.000000}, - "a128": {Wx: 788.000000}, - "a129": {Wx: 788.000000}, - "a13": {Wx: 549.000000}, - "a130": {Wx: 788.000000}, - "a131": {Wx: 788.000000}, - "a132": {Wx: 788.000000}, - "a133": {Wx: 788.000000}, - "a134": {Wx: 788.000000}, - "a135": {Wx: 788.000000}, - "a136": {Wx: 788.000000}, - "a137": {Wx: 788.000000}, - "a138": {Wx: 788.000000}, - "a139": {Wx: 788.000000}, - "a14": {Wx: 855.000000}, - "a140": {Wx: 788.000000}, - "a141": {Wx: 788.000000}, - "a142": {Wx: 788.000000}, - "a143": {Wx: 788.000000}, - "a144": {Wx: 788.000000}, - "a145": {Wx: 788.000000}, - "a146": {Wx: 788.000000}, - "a147": {Wx: 788.000000}, - "a148": {Wx: 788.000000}, - "a149": {Wx: 788.000000}, - "a15": {Wx: 911.000000}, - "a150": {Wx: 788.000000}, - "a151": {Wx: 788.000000}, - "a152": {Wx: 788.000000}, - "a153": {Wx: 788.000000}, - "a154": {Wx: 788.000000}, - "a155": {Wx: 788.000000}, - "a156": {Wx: 788.000000}, - "a157": {Wx: 788.000000}, - "a158": {Wx: 788.000000}, - "a159": {Wx: 788.000000}, - "a16": {Wx: 933.000000}, - "a160": {Wx: 894.000000}, - "a161": {Wx: 838.000000}, - "a162": {Wx: 924.000000}, - "a163": {Wx: 1016.000000}, - "a164": {Wx: 458.000000}, - "a165": {Wx: 924.000000}, - "a166": {Wx: 918.000000}, - "a167": {Wx: 927.000000}, - "a168": {Wx: 928.000000}, - "a169": {Wx: 928.000000}, - "a17": {Wx: 945.000000}, - "a170": {Wx: 834.000000}, - "a171": {Wx: 873.000000}, - "a172": {Wx: 828.000000}, - "a173": {Wx: 924.000000}, - "a174": {Wx: 917.000000}, - "a175": {Wx: 930.000000}, - "a176": {Wx: 931.000000}, - "a177": {Wx: 463.000000}, - "a178": {Wx: 883.000000}, - "a179": {Wx: 836.000000}, - "a18": {Wx: 974.000000}, - "a180": {Wx: 867.000000}, - "a181": {Wx: 696.000000}, - "a182": {Wx: 874.000000}, - "a183": {Wx: 760.000000}, - "a184": {Wx: 946.000000}, - "a185": {Wx: 865.000000}, - "a186": {Wx: 967.000000}, - "a187": {Wx: 831.000000}, - "a188": {Wx: 873.000000}, - "a189": {Wx: 927.000000}, - "a19": {Wx: 755.000000}, - "a190": {Wx: 970.000000}, - "a191": {Wx: 918.000000}, - "a192": {Wx: 748.000000}, - "a193": {Wx: 836.000000}, - "a194": {Wx: 771.000000}, - "a195": {Wx: 888.000000}, - "a196": {Wx: 748.000000}, - "a197": {Wx: 771.000000}, - "a198": {Wx: 888.000000}, - "a199": {Wx: 867.000000}, - "a2": {Wx: 961.000000}, - "a20": {Wx: 846.000000}, - "a200": {Wx: 696.000000}, - "a201": {Wx: 874.000000}, - "a202": {Wx: 974.000000}, - "a203": {Wx: 762.000000}, - "a204": {Wx: 759.000000}, - "a205": {Wx: 509.000000}, - "a206": {Wx: 410.000000}, - "a21": {Wx: 762.000000}, - "a22": {Wx: 761.000000}, - "a23": {Wx: 571.000000}, - "a24": {Wx: 677.000000}, - "a25": {Wx: 763.000000}, - "a26": {Wx: 760.000000}, - "a27": {Wx: 759.000000}, - "a28": {Wx: 754.000000}, - "a29": {Wx: 786.000000}, - "a3": {Wx: 980.000000}, - "a30": {Wx: 788.000000}, - "a31": {Wx: 788.000000}, - "a32": {Wx: 790.000000}, - "a33": {Wx: 793.000000}, - "a34": {Wx: 794.000000}, - "a35": {Wx: 816.000000}, - "a36": {Wx: 823.000000}, - "a37": {Wx: 789.000000}, - "a38": {Wx: 841.000000}, - "a39": {Wx: 823.000000}, - "a4": {Wx: 719.000000}, - "a40": {Wx: 833.000000}, - "a41": {Wx: 816.000000}, - "a42": {Wx: 831.000000}, - "a43": {Wx: 923.000000}, - "a44": {Wx: 744.000000}, - "a45": {Wx: 723.000000}, - "a46": {Wx: 749.000000}, - "a47": {Wx: 790.000000}, - "a48": {Wx: 792.000000}, - "a49": {Wx: 695.000000}, - "a5": {Wx: 789.000000}, - "a50": {Wx: 776.000000}, - "a51": {Wx: 768.000000}, - "a52": {Wx: 792.000000}, - "a53": {Wx: 759.000000}, - "a54": {Wx: 707.000000}, - "a55": {Wx: 708.000000}, - "a56": {Wx: 682.000000}, - "a57": {Wx: 701.000000}, - "a58": {Wx: 826.000000}, - "a59": {Wx: 815.000000}, - "a6": {Wx: 494.000000}, - "a60": {Wx: 789.000000}, - "a61": {Wx: 789.000000}, - "a62": {Wx: 707.000000}, - "a63": {Wx: 687.000000}, - "a64": {Wx: 696.000000}, - "a65": {Wx: 689.000000}, - "a66": {Wx: 786.000000}, - "a67": {Wx: 787.000000}, - "a68": {Wx: 713.000000}, - "a69": {Wx: 791.000000}, - "a7": {Wx: 552.000000}, - "a70": {Wx: 785.000000}, - "a71": {Wx: 791.000000}, - "a72": {Wx: 873.000000}, - "a73": {Wx: 761.000000}, - "a74": {Wx: 762.000000}, - "a75": {Wx: 759.000000}, - "a76": {Wx: 892.000000}, - "a77": {Wx: 892.000000}, - "a78": {Wx: 788.000000}, - "a79": {Wx: 784.000000}, - "a8": {Wx: 537.000000}, - "a81": {Wx: 438.000000}, - "a82": {Wx: 138.000000}, - "a83": {Wx: 277.000000}, - "a84": {Wx: 415.000000}, - "a85": {Wx: 509.000000}, - "a86": {Wx: 410.000000}, - "a87": {Wx: 234.000000}, - "a88": {Wx: 234.000000}, - "a89": {Wx: 390.000000}, - "a9": {Wx: 577.000000}, - "a90": {Wx: 390.000000}, - "a91": {Wx: 276.000000}, - "a92": {Wx: 276.000000}, - "a93": {Wx: 317.000000}, - "a94": {Wx: 317.000000}, - "a95": {Wx: 334.000000}, - "a96": {Wx: 334.000000}, - "a97": {Wx: 392.000000}, - "a98": {Wx: 392.000000}, - "a99": {Wx: 668.000000}, - "space": {Wx: 278.000000}, +var zapfDingbatsCharMetrics = map[rune]CharMetrics{ + ' ': {Wx: 278}, + '→': {Wx: 838}, + '↔': {Wx: 1016}, + '↕': {Wx: 458}, + '①': {Wx: 788}, + '②': {Wx: 788}, + '③': {Wx: 788}, + '④': {Wx: 788}, + '⑤': {Wx: 788}, + '⑥': {Wx: 788}, + '⑦': {Wx: 788}, + '⑧': {Wx: 788}, + '⑨': {Wx: 788}, + '⑩': {Wx: 788}, + '■': {Wx: 761}, + '▲': {Wx: 892}, + '▼': {Wx: 892}, + '◆': {Wx: 788}, + '●': {Wx: 791}, + '◗': {Wx: 438}, + '★': {Wx: 816}, + '☎': {Wx: 719}, + '☛': {Wx: 960}, + '☞': {Wx: 939}, + '♠': {Wx: 626}, + '♣': {Wx: 776}, + '♥': {Wx: 694}, + '♦': {Wx: 595}, + '✁': {Wx: 974}, + '✂': {Wx: 961}, + '✃': {Wx: 974}, + '✄': {Wx: 980}, + '✆': {Wx: 789}, + '✇': {Wx: 790}, + '✈': {Wx: 791}, + '✉': {Wx: 690}, + '✌': {Wx: 549}, + '✍': {Wx: 855}, + '✎': {Wx: 911}, + '✏': {Wx: 933}, + '✐': {Wx: 911}, + '✑': {Wx: 945}, + '✒': {Wx: 974}, + '✓': {Wx: 755}, + '✔': {Wx: 846}, + '✕': {Wx: 762}, + '✖': {Wx: 761}, + '✗': {Wx: 571}, + '✘': {Wx: 677}, + '✙': {Wx: 763}, + '✚': {Wx: 760}, + '✛': {Wx: 759}, + '✜': {Wx: 754}, + '✝': {Wx: 494}, + '✞': {Wx: 552}, + '✟': {Wx: 537}, + '✠': {Wx: 577}, + '✡': {Wx: 692}, + '✢': {Wx: 786}, + '✣': {Wx: 788}, + '✤': {Wx: 788}, + '✥': {Wx: 790}, + '✦': {Wx: 793}, + '✧': {Wx: 794}, + '✩': {Wx: 823}, + '✪': {Wx: 789}, + '✫': {Wx: 841}, + '✬': {Wx: 823}, + '✭': {Wx: 833}, + '✮': {Wx: 816}, + '✯': {Wx: 831}, + '✰': {Wx: 923}, + '✱': {Wx: 744}, + '✲': {Wx: 723}, + '✳': {Wx: 749}, + '✴': {Wx: 790}, + '✵': {Wx: 792}, + '✶': {Wx: 695}, + '✷': {Wx: 776}, + '✸': {Wx: 768}, + '✹': {Wx: 792}, + '✺': {Wx: 759}, + '✻': {Wx: 707}, + '✼': {Wx: 708}, + '✽': {Wx: 682}, + '✾': {Wx: 701}, + '✿': {Wx: 826}, + '❀': {Wx: 815}, + '❁': {Wx: 789}, + '❂': {Wx: 789}, + '❃': {Wx: 707}, + '❄': {Wx: 687}, + '❅': {Wx: 696}, + '❆': {Wx: 689}, + '❇': {Wx: 786}, + '❈': {Wx: 787}, + '❉': {Wx: 713}, + '❊': {Wx: 791}, + '❋': {Wx: 785}, + '❍': {Wx: 873}, + '❏': {Wx: 762}, + '❐': {Wx: 762}, + '❑': {Wx: 759}, + '❒': {Wx: 759}, + '❖': {Wx: 784}, + '❘': {Wx: 138}, + '❙': {Wx: 277}, + '❚': {Wx: 415}, + '❛': {Wx: 392}, + '❜': {Wx: 392}, + '❝': {Wx: 668}, + '❞': {Wx: 668}, + '❡': {Wx: 732}, + '❢': {Wx: 544}, + '❣': {Wx: 544}, + '❤': {Wx: 910}, + '❥': {Wx: 667}, + '❦': {Wx: 760}, + '❧': {Wx: 760}, + '❶': {Wx: 788}, + '❷': {Wx: 788}, + '❸': {Wx: 788}, + '❹': {Wx: 788}, + '❺': {Wx: 788}, + '❻': {Wx: 788}, + '❼': {Wx: 788}, + '❽': {Wx: 788}, + '❾': {Wx: 788}, + '❿': {Wx: 788}, + '➀': {Wx: 788}, + '➁': {Wx: 788}, + '➂': {Wx: 788}, + '➃': {Wx: 788}, + '➄': {Wx: 788}, + '➅': {Wx: 788}, + '➆': {Wx: 788}, + '➇': {Wx: 788}, + '➈': {Wx: 788}, + '➉': {Wx: 788}, + '➊': {Wx: 788}, + '➋': {Wx: 788}, + '➌': {Wx: 788}, + '➍': {Wx: 788}, + '➎': {Wx: 788}, + '➏': {Wx: 788}, + '➐': {Wx: 788}, + '➑': {Wx: 788}, + '➒': {Wx: 788}, + '➓': {Wx: 788}, + '➔': {Wx: 894}, + '➘': {Wx: 748}, + '➙': {Wx: 924}, + '➚': {Wx: 748}, + '➛': {Wx: 918}, + '➜': {Wx: 927}, + '➝': {Wx: 928}, + '➞': {Wx: 928}, + '➟': {Wx: 834}, + '➠': {Wx: 873}, + '➡': {Wx: 828}, + '➢': {Wx: 924}, + '➣': {Wx: 924}, + '➤': {Wx: 917}, + '➥': {Wx: 930}, + '➦': {Wx: 931}, + '➧': {Wx: 463}, + '➨': {Wx: 883}, + '➩': {Wx: 836}, + '➪': {Wx: 836}, + '➫': {Wx: 867}, + '➬': {Wx: 867}, + '➭': {Wx: 696}, + '➮': {Wx: 696}, + '➯': {Wx: 874}, + '➱': {Wx: 874}, + '➲': {Wx: 760}, + '➳': {Wx: 946}, + '➴': {Wx: 771}, + '➵': {Wx: 865}, + '➶': {Wx: 771}, + '➷': {Wx: 888}, + '➸': {Wx: 967}, + '➹': {Wx: 888}, + '➺': {Wx: 831}, + '➻': {Wx: 873}, + '➼': {Wx: 927}, + '➽': {Wx: 970}, + '➾': {Wx: 918}, + '\uf8d7': {Wx: 390}, + '\uf8d8': {Wx: 390}, + '\uf8d9': {Wx: 317}, + '\uf8da': {Wx: 317}, + '\uf8db': {Wx: 276}, + '\uf8dc': {Wx: 276}, + '\uf8dd': {Wx: 509}, + '\uf8de': {Wx: 509}, + '\uf8df': {Wx: 410}, + '\uf8e0': {Wx: 410}, + '\uf8e1': {Wx: 234}, + '\uf8e2': {Wx: 234}, + '\uf8e3': {Wx: 334}, + '\uf8e4': {Wx: 334}, } diff --git a/pdf/model/fonts/std_times.go b/pdf/model/fonts/std_times.go index d4160854..eed3592a 100644 --- a/pdf/model/fonts/std_times.go +++ b/pdf/model/fonts/std_times.go @@ -118,33 +118,33 @@ var timesOnce sync.Once func initTimes() { // unpack font metrics - timesRomanCharMetrics = make(map[GlyphName]CharMetrics, len(type1CommonGlyphs)) - timesBoldCharMetrics = make(map[GlyphName]CharMetrics, len(type1CommonGlyphs)) - timesBoldItalicCharMetrics = make(map[GlyphName]CharMetrics, len(type1CommonGlyphs)) - timesItalicCharMetrics = make(map[GlyphName]CharMetrics, len(type1CommonGlyphs)) - for i, glyph := range type1CommonGlyphs { - timesRomanCharMetrics[glyph] = CharMetrics{Wx: float64(timesRomanWx[i])} - timesBoldCharMetrics[glyph] = CharMetrics{Wx: float64(timesBoldWx[i])} - timesBoldItalicCharMetrics[glyph] = CharMetrics{Wx: float64(timesBoldItalicWx[i])} - timesItalicCharMetrics[glyph] = CharMetrics{Wx: float64(timesItalicWx[i])} + timesRomanCharMetrics = make(map[rune]CharMetrics, len(type1CommonRunes)) + timesBoldCharMetrics = make(map[rune]CharMetrics, len(type1CommonRunes)) + timesBoldItalicCharMetrics = make(map[rune]CharMetrics, len(type1CommonRunes)) + timesItalicCharMetrics = make(map[rune]CharMetrics, len(type1CommonRunes)) + for i, r := range type1CommonRunes { + timesRomanCharMetrics[r] = CharMetrics{Wx: float64(timesRomanWx[i])} + timesBoldCharMetrics[r] = CharMetrics{Wx: float64(timesBoldWx[i])} + timesBoldItalicCharMetrics[r] = CharMetrics{Wx: float64(timesBoldItalicWx[i])} + timesItalicCharMetrics[r] = CharMetrics{Wx: float64(timesItalicWx[i])} } } // timesRomanCharMetrics are the font metrics loaded from afms/Times-Roman.afm. // See afms/MustRead.html for license information. -var timesRomanCharMetrics map[GlyphName]CharMetrics +var timesRomanCharMetrics map[rune]CharMetrics // timesBoldCharMetrics are the font metrics loaded from afms/Times-Bold.afm. // See afms/MustRead.html for license information. -var timesBoldCharMetrics map[GlyphName]CharMetrics +var timesBoldCharMetrics map[rune]CharMetrics // timesBoldItalicCharMetrics are the font metrics loaded from afms/Times-BoldItalic.afm. // See afms/MustRead.html for license information. -var timesBoldItalicCharMetrics map[GlyphName]CharMetrics +var timesBoldItalicCharMetrics map[rune]CharMetrics // timesItalicCharMetrics font metrics loaded from afms/Times-Italic.afm. // See afms/MustRead.html for license information. -var timesItalicCharMetrics map[GlyphName]CharMetrics +var timesItalicCharMetrics map[rune]CharMetrics // timesRomanWx are the font metrics loaded from afms/Times-Roman.afm. // See afms/MustRead.html for license information. From 0327d18eb6ccff6942ed4ff90e3c0789eab322c2 Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Tue, 1 Jan 2019 23:24:11 +0200 Subject: [PATCH 08/11] textencoding: remove all unrelated methods from the interface --- pdf/annotator/field_appearance.go | 32 +- pdf/internal/textencoding/differences.go | 43 +-- pdf/internal/textencoding/encoder.go | 37 ++- pdf/internal/textencoding/identity.go | 30 +- pdf/internal/textencoding/simple.go | 29 +- .../textencoding/simple_winansi_test.go | 8 +- pdf/internal/textencoding/truetype.go | 20 +- pdf/model/font.go | 81 ----- pdf/model/font_test.go | 278 +++++++++--------- 9 files changed, 217 insertions(+), 341 deletions(-) diff --git a/pdf/annotator/field_appearance.go b/pdf/annotator/field_appearance.go index 32519c6a..0c96a3e1 100644 --- a/pdf/annotator/field_appearance.go +++ b/pdf/annotator/field_appearance.go @@ -12,7 +12,6 @@ import ( "github.com/unidoc/unidoc/common" "github.com/unidoc/unidoc/pdf/contentstream" "github.com/unidoc/unidoc/pdf/core" - "github.com/unidoc/unidoc/pdf/internal/textencoding" "github.com/unidoc/unidoc/pdf/model" ) @@ -33,9 +32,8 @@ type FieldAppearance struct { type AppearanceStyle struct { // How much of Rect height to fill when autosizing text. AutoFontSizeFraction float64 - // Glyph used for check mark in checkboxes (for ZapfDingbats font). - // TODO(dennwc): can be a rune - CheckmarkGlyph textencoding.GlyphName + // CheckmarkRune is a rune used for check mark in checkboxes (for ZapfDingbats font). + CheckmarkRune rune BorderSize float64 BorderColor model.PdfColor @@ -73,7 +71,7 @@ func (fa FieldAppearance) Style() AppearanceStyle { // Default values returned if style not set. return AppearanceStyle{ AutoFontSizeFraction: 0.65, - CheckmarkGlyph: "a20", + CheckmarkRune: '✔', BorderSize: 0.0, BorderColor: model.NewPdfColorDeviceGray(0), FillColor: model.NewPdfColorDeviceGray(1), @@ -759,18 +757,13 @@ func genFieldCheckboxAppearance(wa *model.PdfAnnotationWidget, fbtn *model.PdfFi fontsize := style.AutoFontSizeFraction * height - checkcode, ok := zapfdb.Encoder().GlyphToCharcode(style.CheckmarkGlyph) - if !ok { - return nil, errors.New("checkmark glyph - charcode mapping not found") - } - checkrune, ok := zapfdb.Encoder().CharcodeToRune(checkcode) - if !ok { - return nil, errors.New("checkmark glyph - rune mapping not found") - } - checkmetrics, ok := zapfdb.GetRuneMetrics(checkrune) + checkmetrics, ok := zapfdb.GetRuneMetrics(style.CheckmarkRune) if !ok { return nil, errors.New("glyph not found") } + enc := zapfdb.Encoder() + checkstr := enc.Encode(string(style.CheckmarkRune)) + checkwidth := checkmetrics.Wx * fontsize / 1000.0 // TODO: Get bbox of specific glyph that is chosen. Choice of specific value will cause slight // deviations for other glyphs, but should be fairly close. @@ -791,7 +784,7 @@ func genFieldCheckboxAppearance(wa *model.PdfAnnotationWidget, fbtn *model.PdfFi Add_BT(). Add_Tf("ZaDb", fontsize). Add_Td(tx, ty). - Add_Tj(*core.MakeString(string(checkcode))). + Add_Tj(*core.MakeStringFromBytes(checkstr)). Add_ET(). Add_Q() @@ -1091,11 +1084,10 @@ func (style *AppearanceStyle) applyAppearanceCharacteristics(mkDict *core.PdfObj // Normal caption. if CA, has := core.GetString(mkDict.Get("CA")); has && font != nil { encoded := CA.Bytes() - if len(encoded) == 1 { - // TODO: this may be a multi-byte encoding - charcode := textencoding.CharCode(encoded[0]) - if checkglyph, has := font.Encoder().CharcodeToGlyph(charcode); has { - style.CheckmarkGlyph = checkglyph + if len(encoded) != 0 { + runes := []rune(font.Encoder().Decode(encoded)) + if len(runes) == 1 { + style.CheckmarkRune = runes[0] } } } diff --git a/pdf/internal/textencoding/differences.go b/pdf/internal/textencoding/differences.go index bab1f309..cfdf5845 100644 --- a/pdf/internal/textencoding/differences.go +++ b/pdf/internal/textencoding/differences.go @@ -126,9 +126,9 @@ func (enc *differencesEncoding) Charcodes() []CharCode { return codes } -// Encode converts a Go unicode string `raw` to a PDF encoded string. -func (enc *differencesEncoding) Encode(raw string) []byte { - runes := []rune(raw) +// Encode converts a Go unicode string to a PDF encoded string. +func (enc *differencesEncoding) Encode(str string) []byte { + runes := []rune(str) buf := bytes.NewBuffer(nil) buf.Grow(len(runes)) for _, r := range runes { @@ -139,6 +139,17 @@ func (enc *differencesEncoding) Encode(raw string) []byte { return buf.Bytes() } +// Decode converts PDF encoded string to a Go unicode string. +func (enc *differencesEncoding) Decode(raw []byte) string { + runes := make([]rune, 0, len(raw)) + // relies on the fact that underlying encoding is 8 bit + for _, b := range raw { + r, _ := enc.CharcodeToRune(CharCode(b)) + runes = append(runes, r) + } + return string(runes) +} + // RuneToCharcode returns the PDF character code corresponding to rune `r`. // The bool return flag is true if there was a match, and false otherwise. func (enc *differencesEncoding) RuneToCharcode(r rune) (CharCode, bool) { @@ -161,32 +172,6 @@ func (enc *differencesEncoding) CharcodeToRune(code CharCode) (rune, bool) { return enc.base.CharcodeToRune(code) } -// CharcodeToGlyph returns the glyph name for character code `code`. -// The bool return flag is true if there was a match, and false otherwise. -func (enc *differencesEncoding) CharcodeToGlyph(code CharCode) (GlyphName, bool) { - if glyph, ok := enc.differences[code]; ok { - return glyph, true - } - return enc.base.CharcodeToGlyph(code) -} - -// GlyphToCharcode returns character code for glyph `glyph`. -// The bool return flag is true if there was a match, and false otherwise. -func (enc *differencesEncoding) GlyphToCharcode(glyph GlyphName) (CharCode, bool) { - // TODO: store reverse map? - for code, glyph2 := range enc.differences { - if glyph2 == glyph { - return code, true - } - } - // TODO(dennwc): only redirects the call - remove from the interface - r, ok := GlyphToRune(glyph) - if !ok { - return MissingCodeRune, false - } - return enc.RuneToCharcode(r) -} - // ToPdfObject returns the encoding as a PdfObject. func (enc *differencesEncoding) ToPdfObject() core.PdfObject { dict := core.MakeDict() diff --git a/pdf/internal/textencoding/encoder.go b/pdf/internal/textencoding/encoder.go index dd0b5c56..d1dc772e 100644 --- a/pdf/internal/textencoding/encoder.go +++ b/pdf/internal/textencoding/encoder.go @@ -23,16 +23,11 @@ type TextEncoder interface { // String returns a string that describes the TextEncoder instance. String() string - // Encode converts the Go unicode string `raw` to a PDF encoded string. - Encode(raw string) []byte + // Encode converts the Go unicode string to a PDF encoded string. + Encode(str string) []byte - // CharcodeToGlyph returns the glyph name for character code `code`. - // The bool return flag is true if there was a match, and false otherwise. - CharcodeToGlyph(code CharCode) (GlyphName, bool) - - // GlyphToCharcode returns the PDF character code corresponding to glyph name `glyph`. - // The bool return flag is true if there was a match, and false otherwise. - GlyphToCharcode(glyph GlyphName) (CharCode, bool) + // Decode converts PDF encoded string to a Go unicode string. + Decode(raw []byte) string // RuneToCharcode returns the PDF character code corresponding to rune `r`. // The bool return flag is true if there was a match, and false otherwise. @@ -85,3 +80,27 @@ func encodeString16bit(enc TextEncoder, raw string) []byte { } return encoded } + +// decodeString16bit converts PDF encoded string to a Go unicode string using the encoder `enc`. +// Each character will be decoded from two bytes. +func decodeString16bit(enc TextEncoder, raw []byte) string { + // bytes -> character codes -> runes + runes := make([]rune, 0, len(raw)/2+len(raw)%2) + + for len(raw) > 0 { + if len(raw) == 1 { + raw = []byte{raw[0], 0} + } + // Each entry represented by 2 bytes. + code := CharCode(binary.BigEndian.Uint16(raw[:])) + raw = raw[2:] + + r, ok := enc.CharcodeToRune(code) + if !ok { + common.Log.Debug("Failed to map charcode to rune. charcode=%#x", code) + continue + } + runes = append(runes, r) + } + return string(runes) +} diff --git a/pdf/internal/textencoding/identity.go b/pdf/internal/textencoding/identity.go index bcb3a1d3..55148415 100644 --- a/pdf/internal/textencoding/identity.go +++ b/pdf/internal/textencoding/identity.go @@ -29,32 +29,14 @@ func (enc IdentityEncoder) String() string { return enc.baseName } -// Encode converts the Go unicode string `raw` to a PDF encoded string. -func (enc IdentityEncoder) Encode(raw string) []byte { - return encodeString16bit(enc, raw) +// Encode converts the Go unicode string to a PDF encoded string. +func (enc IdentityEncoder) Encode(str string) []byte { + return encodeString16bit(enc, str) } -// CharcodeToGlyph returns the glyph name matching character code `code`. -// The bool return flag is true if there was a match, and false otherwise. -func (enc IdentityEncoder) CharcodeToGlyph(code CharCode) (GlyphName, bool) { - r, found := enc.CharcodeToRune(code) - if found && r == 0x20 { - return "space", true - } - - // Returns "uniXXXX" format where XXXX is the code in hex format. - glyph := GlyphName(fmt.Sprintf("uni%.4X", code)) - return glyph, true -} - -// GlyphToCharcode returns the character code matching glyph `glyph`. -// The bool return flag is true if there was a match, and false otherwise. -func (enc IdentityEncoder) GlyphToCharcode(glyph GlyphName) (CharCode, bool) { - r, ok := enc.GlyphToRune(glyph) - if !ok { - return 0, false - } - return enc.RuneToCharcode(r) +// Decode converts PDF encoded string to a Go unicode string. +func (enc IdentityEncoder) Decode(raw []byte) string { + return decodeString16bit(enc, raw) } // RuneToCharcode converts rune `r` to a PDF character code. diff --git a/pdf/internal/textencoding/simple.go b/pdf/internal/textencoding/simple.go index 2b22f3db..77d056df 100644 --- a/pdf/internal/textencoding/simple.go +++ b/pdf/internal/textencoding/simple.go @@ -105,11 +105,18 @@ type simpleEncoding struct { decode map[byte]rune } -func (enc *simpleEncoding) Encode(raw string) []byte { - data, _ := enc.NewEncoder().Bytes([]byte(raw)) +// Encode converts the Go unicode string to a PDF encoded string. +func (enc *simpleEncoding) Encode(str string) []byte { + data, _ := enc.NewEncoder().Bytes([]byte(str)) return data } +// Decode converts PDF encoded string to a Go unicode string. +func (enc *simpleEncoding) Decode(raw []byte) string { + data, _ := enc.NewDecoder().Bytes(raw) + return string(data) +} + // NewDecoder implements encoding.Encoding. func (enc *simpleEncoding) NewDecoder() *encoding.Decoder { return &encoding.Decoder{Transformer: simpleDecoder{m: enc.decode}} @@ -218,24 +225,6 @@ func (enc *simpleEncoding) CharcodeToRune(code CharCode) (rune, bool) { return r, ok } -func (enc *simpleEncoding) CharcodeToGlyph(code CharCode) (GlyphName, bool) { - // TODO(dennwc): only redirects the call - remove from the interface - r, ok := enc.CharcodeToRune(code) - if !ok { - return "", false - } - return runeToGlyph(r, glyphlistRuneToGlyphMap) -} - -func (enc *simpleEncoding) GlyphToCharcode(glyph GlyphName) (CharCode, bool) { - // TODO(dennwc): only redirects the call - remove from the interface - r, ok := GlyphToRune(glyph) - if !ok { - return MissingCodeRune, false - } - return enc.RuneToCharcode(r) -} - func (enc *simpleEncoding) ToPdfObject() core.PdfObject { switch enc.baseName { case "MacRomanEncoding", "MacExpertEncoding", baseWinAnsi: diff --git a/pdf/internal/textencoding/simple_winansi_test.go b/pdf/internal/textencoding/simple_winansi_test.go index 59bae88c..708be7bf 100644 --- a/pdf/internal/textencoding/simple_winansi_test.go +++ b/pdf/internal/textencoding/simple_winansi_test.go @@ -10,9 +10,9 @@ import "testing" func TestWinAnsiEncoder(t *testing.T) { enc := NewWinAnsiEncoder() - glyph, found := enc.CharcodeToGlyph(32) - if !found || glyph != "space" { - t.Errorf("Glyph != space") + r, found := enc.CharcodeToRune(32) + if !found || r != ' ' { + t.Errorf("rune != space") return } code, found := enc.RuneToCharcode('þ') @@ -21,7 +21,7 @@ func TestWinAnsiEncoder(t *testing.T) { return } - glyph, found = RuneToGlyph('þ') + glyph, found := RuneToGlyph('þ') if !found || glyph != "thorn" { t.Errorf("Glyph != thorn") return diff --git a/pdf/internal/textencoding/truetype.go b/pdf/internal/textencoding/truetype.go index b7d5b843..e95115c0 100644 --- a/pdf/internal/textencoding/truetype.go +++ b/pdf/internal/textencoding/truetype.go @@ -66,22 +66,14 @@ func (enc TrueTypeFontEncoder) String() string { return fmt.Sprintf("TRUETYPE_ENCODER{%s}", strings.Join(parts, ", ")) } -// Encode converts the Go unicode string `raw` to a PDF encoded string. -func (enc TrueTypeFontEncoder) Encode(raw string) []byte { - return encodeString16bit(enc, raw) +// Encode converts the Go unicode string to a PDF encoded string. +func (enc TrueTypeFontEncoder) Encode(str string) []byte { + return encodeString16bit(enc, str) } -// CharcodeToGlyph returns the glyph name matching character code `code`. -// The bool return flag is true if there was a match, and false otherwise. -func (enc TrueTypeFontEncoder) CharcodeToGlyph(code CharCode) (GlyphName, bool) { - r, found := enc.CharcodeToRune(code) - if found && r == ' ' { - return "space", true - } - - // Returns "uniXXXX" format where XXXX is the code in hex format. - glyph := GlyphName(fmt.Sprintf("uni%.4X", code)) - return glyph, true +// Decode converts PDF encoded string to a Go unicode string. +func (enc TrueTypeFontEncoder) Decode(raw []byte) string { + return decodeString16bit(enc, raw) } // GlyphToCharcode returns character code matching the glyph name `glyph`. diff --git a/pdf/model/font.go b/pdf/model/font.go index 95f37b3c..68f21e23 100644 --- a/pdf/model/font.go +++ b/pdf/model/font.go @@ -126,87 +126,6 @@ func NewStandard14FontMustCompile(basefont fonts.StdFontName) *PdfFont { return font } -// NewStandard14FontWithEncoding returns the standard 14 font named `basefont` as a *PdfFont and -// a TextEncoder that encodes all the runes in `alphabet`, or an error if this is not possible. -// An error can occur if `basefont` is not one the standard 14 font names. -func NewStandard14FontWithEncoding(basefont fonts.StdFontName, alphabet map[rune]int) (*PdfFont, - textencoding.SimpleEncoder, error) { - - std, err := newStandard14Font(basefont) - if err != nil { - return nil, nil, err - } - - // TODO(dennwc): what if the font is Symbol and uses different encoding? - baseEncoder := "MacRomanEncoding" - common.Log.Trace("NewStandard14FontWithEncoding: basefont=%#q baseEncoder=%#q alphabet=%q", - basefont, baseEncoder, string(sortedAlphabet(alphabet))) - - encoder, err := textencoding.NewSimpleTextEncoder(baseEncoder, nil) - if err != nil { - return nil, nil, err - } - - // glyphCode are the encoding glyphs. We need to match them to the font glyphs. - glyphCode := make(map[textencoding.GlyphName]textencoding.CharCode) - - // slots are the indexes in the encoding where the new character codes are added. - // slots are unused indexes, which are filled first. slots1 are the used indexes. - var slots, slots1 []textencoding.CharCode - for code := textencoding.CharCode(1); code <= 0xff; code++ { - if glyph, ok := encoder.CharcodeToGlyph(code); ok { - glyphCode[glyph] = code - // Don't overwrite space - if glyph != "space" { - - slots1 = append(slots1, code) - } - } else { - slots = append(slots, code) - } - } - slots = append(slots, slots1...) - - // `glyphs` are the font glyphs that we need to encode. - var glyphs []textencoding.GlyphName - for _, r := range sortedAlphabet(alphabet) { - glyph, ok := textencoding.RuneToGlyph(r) - if !ok { - common.Log.Debug("No glyph for rune 0x%02x=%c", r, r) - continue - } - if _, ok = std.fontMetrics[r]; !ok { - common.Log.Trace("Glyph %q (0x%04x=%c)not in font", glyph, r, r) - continue - } - if len(glyphs) >= 255 { - common.Log.Debug("Too many characters for encoding") - break - } - glyphs = append(glyphs, glyph) - - } - - // Fill the slots, starting with the empty ones. - slotIdx := 0 - differences := make(map[textencoding.CharCode]textencoding.GlyphName) - for _, glyph := range glyphs { - if _, ok := glyphCode[glyph]; !ok { - differences[slots[slotIdx]] = glyph - slotIdx++ - } - } - - encoder, err = textencoding.NewSimpleTextEncoder(baseEncoder, differences) - if err != nil { - return nil, nil, err - } - std.std14Encoder = encoder - std.updateStandard14Font() - - return &PdfFont{context: &std}, encoder, nil -} - // GetAlphabet returns a map of the runes in `text` and their frequencies. func GetAlphabet(text string) map[rune]int { alphabet := map[rune]int{} diff --git a/pdf/model/font_test.go b/pdf/model/font_test.go index ddf3705b..539f1209 100644 --- a/pdf/model/font_test.go +++ b/pdf/model/font_test.go @@ -9,7 +9,6 @@ import ( "errors" "fmt" "io/ioutil" - "strings" "testing" "github.com/unidoc/unidoc/common" @@ -639,162 +638,161 @@ endobj // The expected encoding is StandardEncoding with the applied differences. baseEncoding := newStandandTextEncoder(t) - differencesMap := map[textencoding.CharCode]textencoding.GlyphName{ - 24: `/breve`, - 25: `/caron`, - 26: `/circumflex`, - 27: `/dotaccent`, - 28: `/hungarumlaut`, - 29: `/ogonek`, - 30: `/ring`, - 31: `/tilde`, - 39: `/quotesingle`, - 96: `/grave`, - 128: `/bullet`, - 129: `/dagger`, - 130: `/daggerdbl`, - 131: `/ellipsis`, - 132: `/emdash`, - 133: `/endash`, - 134: `/florin`, - 135: `/fraction`, - 136: `/guilsinglleft`, - 137: `/guilsinglright`, - 138: `/minus`, - 139: `/perthousand`, - 140: `/quotedblbase`, - 141: `/quotedblleft`, - 142: `/quotedblright`, - 143: `/quoteleft`, - 144: `/quoteright`, - 145: `/quotesinglbase`, - 146: `/trademark`, - 147: `/fi`, - 148: `/fl`, - 149: `/Lslash`, - 150: `/OE`, - 151: `/Scaron`, - 152: `/Ydieresis`, - 153: `/Zcaron`, - 154: `/dotlessi`, - 155: `/lslash`, - 156: `/oe`, - 157: `/scaron`, - 158: `/zcaron`, - 160: `/Euro`, - 164: `/currency`, - 166: `/brokenbar`, - 168: `/dieresis`, - 169: `/copyright`, - 170: `/ordfeminine`, - 172: `/logicalnot`, - 173: `/.notdef`, - 174: `/registered`, - 175: `/macron`, - 176: `/degree`, - 177: `/plusminus`, - 178: `/twosuperior`, - 179: `/threesuperior`, - 180: `/acute`, - 181: `/mu`, - 183: `/periodcentered`, - 184: `/cedilla`, - 185: `/onesuperior`, - 186: `/ordmasculine`, - 188: `/onequarter`, - 189: `/onehalf`, - 190: `/threequarters`, - 192: `/Agrave`, - 193: `/Aacute`, - 194: `/Acircumflex`, - 195: `/Atilde`, - 196: `/Adieresis`, - 197: `/Aring`, - 198: `/AE`, - 199: `/Ccedilla`, - 200: `/Egrave`, - 201: `/Eacute`, - 202: `/Ecircumflex`, - 203: `/Edieresis`, - 204: `/Igrave`, - 205: `/Iacute`, - 206: `/Icircumflex`, - 207: `/Idieresis`, - 208: `/Eth`, - 209: `/Ntilde`, - 210: `/Ograve`, - 211: `/Oacute`, - 212: `/Ocircumflex`, - 213: `/Otilde`, - 214: `/Odieresis`, - 215: `/multiply`, - 216: `/Oslash`, - 217: `/Ugrave`, - 218: `/Uacute`, - 219: `/Ucircumflex`, - 220: `/Udieresis`, - 221: `/Yacute`, - 222: `/Thorn`, - 223: `/germandbls`, - 224: `/agrave`, - 225: `/aacute`, - 226: `/acircumflex`, - 227: `/atilde`, - 228: `/adieresis`, - 229: `/aring`, - 230: `/ae`, - 231: `/ccedilla`, - 232: `/egrave`, - 233: `/eacute`, - 234: `/ecircumflex`, - 235: `/edieresis`, - 236: `/igrave`, - 237: `/iacute`, - 238: `/icircumflex`, - 239: `/idieresis`, - 240: `/eth`, - 241: `/ntilde`, - 242: `/ograve`, - 243: `/oacute`, - 244: `/ocircumflex`, - 245: `/otilde`, - 246: `/odieresis`, - 247: `/divide`, - 248: `/oslash`, - 249: `/ugrave`, - 250: `/uacute`, - 251: `/ucircumflex`, - 252: `/udieresis`, - 253: `/yacute`, - 254: `/thorn`, - 255: `/ydieresis`, + differencesMap := map[textencoding.CharCode]rune{ + 24: '˘', + 25: 'ˇ', + 26: 'ˆ', + 27: '˙', + 28: '˝', + 29: '˛', + 30: '˚', + 31: '˜', + 39: '\'', + 96: '`', + 128: '•', + 129: '†', + 130: '‡', + 131: '…', + 132: '—', + 133: '–', + 134: 'ƒ', + 135: '⁄', + 136: '‹', + 137: '›', + 138: '−', + 139: '‰', + 140: '„', + 141: '“', + 142: '”', + 143: '‘', + 144: '’', + 145: '‚', + 146: '™', + 147: 'fi', + 148: 'fl', + 149: 'Ł', + 150: 'Œ', + 151: 'Š', + 152: 'Ÿ', + 153: 'Ž', + 154: 'ı', + 155: 'ł', + 156: 'œ', + 157: 'š', + 158: 'ž', + 160: '€', + 164: '¤', + 166: '¦', + 168: '¨', + 169: '©', + 170: 'ª', + 172: '¬', + 173: '�', + 174: '®', + 175: '¯', + 176: '°', + 177: '±', + 178: '²', + 179: '³', + 180: '´', + 181: 'µ', + 183: '·', + 184: '¸', + 185: '¹', + 186: 'º', + 188: '¼', + 189: '½', + 190: '¾', + 192: 'À', + 193: 'Á', + 194: 'Â', + 195: 'Ã', + 196: 'Ä', + 197: 'Å', + 198: 'Æ', + 199: 'Ç', + 200: 'È', + 201: 'É', + 202: 'Ê', + 203: 'Ë', + 204: 'Ì', + 205: 'Í', + 206: 'Î', + 207: 'Ï', + 208: 'Ð', + 209: 'Ñ', + 210: 'Ò', + 211: 'Ó', + 212: 'Ô', + 213: 'Õ', + 214: 'Ö', + 215: '×', + 216: 'Ø', + 217: 'Ù', + 218: 'Ú', + 219: 'Û', + 220: 'Ü', + 221: 'Ý', + 222: 'Þ', + 223: 'ß', + 224: 'à', + 225: 'á', + 226: 'â', + 227: 'ã', + 228: 'ä', + 229: 'å', + 230: 'æ', + 231: 'ç', + 232: 'è', + 233: 'é', + 234: 'ê', + 235: 'ë', + 236: 'ì', + 237: 'í', + 238: 'î', + 239: 'ï', + 240: 'ð', + 241: 'ñ', + 242: 'ò', + 243: 'ó', + 244: 'ô', + 245: 'õ', + 246: 'ö', + 247: '÷', + 248: 'ø', + 249: 'ù', + 250: 'ú', + 251: 'û', + 252: 'ü', + 253: 'ý', + 254: 'þ', + 255: 'ÿ', } enc := font.Encoder() for code := textencoding.CharCode(32); code < 255; code++ { - fontglyph, has := enc.CharcodeToGlyph(code) + fontrune, has := enc.CharcodeToRune(code) if !has { - baseglyph, bad := baseEncoding.CharcodeToGlyph(code) + baserune, bad := baseEncoding.CharcodeToRune(code) if bad { - t.Fatalf("font not having glyph for char code %d - whereas base encoding had '%s'", code, baseglyph) + t.Fatalf("font not having glyph for char code %d - whereas base encoding had %q", code, baserune) } } // Check if in differencesmap first. - glyph, has := differencesMap[code] + rune, has := differencesMap[code] if has { - glyph = textencoding.GlyphName(strings.Trim(string(glyph), `/`)) - if glyph != fontglyph { - t.Fatalf("Mismatch for char code %d, font has: %s and expected is: %s (differences)", code, fontglyph, glyph) + if rune != fontrune { + t.Fatalf("Mismatch for char code %d, font has: %q and expected is: %q (differences)", code, fontrune, rune) } continue } // If not in differences, should be according to StandardEncoding (base). - glyph, has = baseEncoding.CharcodeToGlyph(code) - if has && glyph != fontglyph { - t.Fatalf("Mismatch for char code %d (%X), font has: %s and expected is: %s (StandardEncoding)", code, code, fontglyph, glyph) + rune, has = baseEncoding.CharcodeToRune(code) + if has && rune != fontrune { + t.Fatalf("Mismatch for char code %d (%X), font has: %q and expected is: %q (StandardEncoding)", code, code, fontrune, rune) } } From 203b620067451027c8b12bafc7928c9ef616bf2a Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Wed, 2 Jan 2019 16:54:37 +0200 Subject: [PATCH 09/11] textencoding: init other encodings once and reformat tables --- pdf/internal/textencoding/simple.go | 879 +----------------- pdf/internal/textencoding/simple_mac.go | 121 +++ pdf/internal/textencoding/simple_other.go | 126 +++ pdf/internal/textencoding/simple_pdf.go | 118 +++ pdf/internal/textencoding/simple_symbol.go | 228 ----- .../textencoding/simple_zapfdingbats.go | 241 ----- 6 files changed, 402 insertions(+), 1311 deletions(-) create mode 100644 pdf/internal/textencoding/simple_mac.go create mode 100644 pdf/internal/textencoding/simple_other.go create mode 100644 pdf/internal/textencoding/simple_pdf.go delete mode 100644 pdf/internal/textencoding/simple_symbol.go delete mode 100644 pdf/internal/textencoding/simple_zapfdingbats.go diff --git a/pdf/internal/textencoding/simple.go b/pdf/internal/textencoding/simple.go index 77d056df..6b417c3c 100644 --- a/pdf/internal/textencoding/simple.go +++ b/pdf/internal/textencoding/simple.go @@ -8,6 +8,7 @@ package textencoding import ( "errors" "sort" + "sync" "unicode/utf8" "github.com/unidoc/unidoc/common" @@ -50,18 +51,12 @@ func NewCustomSimpleTextEncoder(encoding, differences map[CharCode]GlyphName) (S // NewSimpleTextEncoder returns a simpleEncoder based on predefined encoding `baseName` and // difference map `differences`. func NewSimpleTextEncoder(baseName string, differences map[CharCode]GlyphName) (SimpleEncoder, error) { - var enc SimpleEncoder - if fnc, ok := simple[baseName]; ok { - enc = fnc() - } else { - baseEncoding, ok := simpleEncodings[baseName] - if !ok { - common.Log.Debug("ERROR: NewSimpleTextEncoder. Unknown encoding %q", baseName) - return nil, errors.New("unsupported font encoding") - } - // FIXME(dennwc): make a global and init once - enc = newSimpleEncoderFromMap(baseName, baseEncoding) + fnc, ok := simple[baseName] + if !ok { + common.Log.Debug("ERROR: NewSimpleTextEncoder. Unknown encoding %q", baseName) + return nil, errors.New("unsupported font encoding") } + enc := fnc() if len(differences) != 0 { enc = ApplyDifferences(enc, differences) } @@ -238,835 +233,35 @@ func (enc *simpleEncoding) ToPdfObject() core.PdfObject { return core.MakeIndirectObject(dict) } -// simpleEncodings is a map of the standard 8 bit character encodings. -var simpleEncodings = map[string]map[byte]rune{ - "MacExpertEncoding": { // 165 entries - 0x20: 0x0020, // "space" - 0x21: 0xf721, // "exclamsmall" - 0x22: 0xf6f8, // "Hungarumlautsmall" - 0x23: 0xf7a2, // "centoldstyle" - 0x24: 0xf724, // "dollaroldstyle" - 0x25: 0xf6e4, // "dollarsuperior" - 0x26: 0xf726, // "ampersandsmall" - 0x27: 0xf7b4, // "Acutesmall" - 0x28: 0x207d, // ⁽ "parenleftsuperior" - 0x29: 0x207e, // ⁾ "parenrightsuperior" - 0x2a: 0x2025, // ‥ "twodotenleader" - 0x2b: 0x2024, // ․ "onedotenleader" - 0x2c: 0x002c, // , "comma" - 0x2d: 0x002d, // - "hyphen" - 0x2e: 0x002e, // . "period" - 0x2f: 0x2044, // ⁄ "fraction" - 0x30: 0xf730, // "zerooldstyle" - 0x31: 0xf731, // "oneoldstyle" - 0x32: 0xf732, // "twooldstyle" - 0x33: 0xf733, // "threeoldstyle" - 0x34: 0xf734, // "fouroldstyle" - 0x35: 0xf735, // "fiveoldstyle" - 0x36: 0xf736, // "sixoldstyle" - 0x37: 0xf737, // "sevenoldstyle" - 0x38: 0xf738, // "eightoldstyle" - 0x39: 0xf739, // "nineoldstyle" - 0x3a: 0x003a, // : "colon" - 0x3b: 0x003b, // ; "semicolon" - 0x3d: 0xf6de, // "threequartersemdash" - 0x3f: 0xf73f, // "questionsmall" - 0x44: 0xf7f0, // "Ethsmall" - 0x47: 0x00bc, // ¼ "onequarter" - 0x48: 0x00bd, // ½ "onehalf" - 0x49: 0x00be, // ¾ "threequarters" - 0x4a: 0x215b, // ⅛ "oneeighth" - 0x4b: 0x215c, // ⅜ "threeeighths" - 0x4c: 0x215d, // ⅝ "fiveeighths" - 0x4d: 0x215e, // ⅞ "seveneighths" - 0x4e: 0x2153, // ⅓ "onethird" - 0x4f: 0x2154, // ⅔ "twothirds" - 0x56: 0xfb00, // ff "ff" - 0x57: 0xfb01, // fi "fi" - 0x58: 0xfb02, // fl "fl" - 0x59: 0xfb03, // ffi "ffi" - 0x5a: 0xfb04, // ffl "ffl" - 0x5b: 0x208d, // ₍ "parenleftinferior" - 0x5d: 0x208e, // ₎ "parenrightinferior" - 0x5e: 0xf6f6, // "Circumflexsmall" - 0x5f: 0xf6e5, // "hypheninferior" - 0x60: 0xf760, // "Gravesmall" - 0x61: 0xf761, // "Asmall" - 0x62: 0xf762, // "Bsmall" - 0x63: 0xf763, // "Csmall" - 0x64: 0xf764, // "Dsmall" - 0x65: 0xf765, // "Esmall" - 0x66: 0xf766, // "Fsmall" - 0x67: 0xf767, // "Gsmall" - 0x68: 0xf768, // "Hsmall" - 0x69: 0xf769, // "Ismall" - 0x6a: 0xf76a, // "Jsmall" - 0x6b: 0xf76b, // "Ksmall" - 0x6c: 0xf76c, // "Lsmall" - 0x6d: 0xf76d, // "Msmall" - 0x6e: 0xf76e, // "Nsmall" - 0x6f: 0xf76f, // "Osmall" - 0x70: 0xf770, // "Psmall" - 0x71: 0xf771, // "Qsmall" - 0x72: 0xf772, // "Rsmall" - 0x73: 0xf773, // "Ssmall" - 0x74: 0xf774, // "Tsmall" - 0x75: 0xf775, // "Usmall" - 0x76: 0xf776, // "Vsmall" - 0x77: 0xf777, // "Wsmall" - 0x78: 0xf778, // "Xsmall" - 0x79: 0xf779, // "Ysmall" - 0x7a: 0xf77a, // "Zsmall" - 0x7b: 0x20a1, // ₡ "colonmonetary" - 0x7c: 0xf6dc, // "onefitted" - 0x7d: 0xf6dd, // "rupiah" - 0x7e: 0xf6fe, // "Tildesmall" - 0x81: 0xf6e9, // "asuperior" - 0x82: 0xf6e0, // "centsuperior" - 0x87: 0xf7e1, // "Aacutesmall" - 0x88: 0xf7e0, // "Agravesmall" - 0x89: 0xf7e2, // "Acircumflexsmall" - 0x8a: 0xf7e4, // "Adieresissmall" - 0x8b: 0xf7e3, // "Atildesmall" - 0x8c: 0xf7e5, // "Aringsmall" - 0x8d: 0xf7e7, // "Ccedillasmall" - 0x8e: 0xf7e9, // "Eacutesmall" - 0x8f: 0xf7e8, // "Egravesmall" - 0x90: 0xf7ea, // "Ecircumflexsmall" - 0x91: 0xf7eb, // "Edieresissmall" - 0x92: 0xf7ed, // "Iacutesmall" - 0x93: 0xf7ec, // "Igravesmall" - 0x94: 0xf7ee, // "Icircumflexsmall" - 0x95: 0xf7ef, // "Idieresissmall" - 0x96: 0xf7f1, // "Ntildesmall" - 0x97: 0xf7f3, // "Oacutesmall" - 0x98: 0xf7f2, // "Ogravesmall" - 0x99: 0xf7f4, // "Ocircumflexsmall" - 0x9a: 0xf7f6, // "Odieresissmall" - 0x9b: 0xf7f5, // "Otildesmall" - 0x9c: 0xf7fa, // "Uacutesmall" - 0x9d: 0xf7f9, // "Ugravesmall" - 0x9e: 0xf7fb, // "Ucircumflexsmall" - 0x9f: 0xf7fc, // "Udieresissmall" - 0xa1: 0x2078, // ⁸ "eightsuperior" - 0xa2: 0x2084, // ₄ "fourinferior" - 0xa3: 0x2083, // ₃ "threeinferior" - 0xa4: 0x2086, // ₆ "sixinferior" - 0xa5: 0x2088, // ₈ "eightinferior" - 0xa6: 0x2087, // ₇ "seveninferior" - 0xa7: 0xf6fd, // "Scaronsmall" - 0xa9: 0xf6df, // "centinferior" - 0xaa: 0x2082, // ₂ "twoinferior" - 0xac: 0xf7a8, // "Dieresissmall" - 0xae: 0xf6f5, // "Caronsmall" - 0xaf: 0xf6f0, // "osuperior" - 0xb0: 0x2085, // ₅ "fiveinferior" - 0xb2: 0xf6e1, // "commainferior" - 0xb3: 0xf6e7, // "periodinferior" - 0xb4: 0xf7fd, // "Yacutesmall" - 0xb6: 0xf6e3, // "dollarinferior" - 0xb9: 0xf7fe, // "Thornsmall" - 0xbb: 0x2089, // ₉ "nineinferior" - 0xbc: 0x2080, // ₀ "zeroinferior" - 0xbd: 0xf6ff, // "Zcaronsmall" - 0xbe: 0xf7e6, // "AEsmall" - 0xbf: 0xf7f8, // "Oslashsmall" - 0xc0: 0xf7bf, // "questiondownsmall" - 0xc1: 0x2081, // ₁ "oneinferior" - 0xc2: 0xf6f9, // "Lslashsmall" - 0xc9: 0xf7b8, // "Cedillasmall" - 0xcf: 0xf6fa, // "OEsmall" - 0xd0: 0x2012, // ‒ "figuredash" - 0xd1: 0xf6e6, // "hyphensuperior" - 0xd6: 0xf7a1, // "exclamdownsmall" - 0xd8: 0xf7ff, // "Ydieresissmall" - 0xda: 0x00b9, // ¹ "onesuperior" - 0xdb: 0x00b2, // ² "twosuperior" - 0xdc: 0x00b3, // ³ "threesuperior" - 0xdd: 0x2074, // ⁴ "foursuperior" - 0xde: 0x2075, // ⁵ "fivesuperior" - 0xdf: 0x2076, // ⁶ "sixsuperior" - 0xe0: 0x2077, // ⁷ "sevensuperior" - 0xe1: 0x2079, // ⁹ "ninesuperior" - 0xe2: 0x2070, // ⁰ "zerosuperior" - 0xe4: 0xf6ec, // "esuperior" - 0xe5: 0xf6f1, // "rsuperior" - 0xe6: 0xf6f3, // "tsuperior" - 0xe9: 0xf6ed, // "isuperior" - 0xea: 0xf6f2, // "ssuperior" - 0xeb: 0xf6eb, // "dsuperior" - 0xf1: 0xf6ee, // "lsuperior" - 0xf2: 0xf6fb, // "Ogoneksmall" - 0xf3: 0xf6f4, // "Brevesmall" - 0xf4: 0xf7af, // "Macronsmall" - 0xf5: 0xf6ea, // "bsuperior" - 0xf6: 0x207f, // ⁿ "nsuperior" - 0xf7: 0xf6ef, // "msuperior" - 0xf8: 0xf6e2, // "commasuperior" - 0xf9: 0xf6e8, // "periodsuperior" - 0xfa: 0xf6f7, // "Dotaccentsmall" - 0xfb: 0xf6fc, // "Ringsmall" - }, - "MacRomanEncoding": { // 255 entries - 0x01: 0x0001, // "controlSTX" - 0x02: 0x0002, // "controlSOT" - 0x03: 0x0003, // "controlETX" - 0x04: 0x0004, // "controlEOT" - 0x05: 0x0005, // "controlENQ" - 0x06: 0x0006, // "controlACK" - 0x07: 0x0007, // "controlBEL" - 0x08: 0x0008, // "controlBS" - 0x09: 0x0009, // "controlHT" - 0x0a: 0x000a, // "controlLF" - 0x0b: 0x000b, // "controlVT" - 0x0c: 0x000c, // "controlFF" - 0x0d: 0x000d, // "controlCR" - 0x0e: 0x000e, // "controlSO" - 0x0f: 0x000f, // "controlSI" - 0x10: 0x0010, // "controlDLE" - 0x11: 0x0011, // "controlDC1" - 0x12: 0x0012, // "controlDC2" - 0x13: 0x0013, // "controlDC3" - 0x14: 0x0014, // "controlDC4" - 0x15: 0x0015, // "controlNAK" - 0x16: 0x0016, // "controlSYN" - 0x17: 0x0017, // "controlETB" - 0x18: 0x0018, // "controlCAN" - 0x19: 0x0019, // "controlEM" - 0x1a: 0x001a, // "controlSUB" - 0x1b: 0x001b, // "controlESC" - 0x1c: 0x001c, // "controlFS" - 0x1d: 0x001d, // "controlGS" - 0x1e: 0x001e, // "controlRS" - 0x1f: 0x001f, // "controlUS" - 0x20: 0x0020, // "space" - 0x21: 0x0021, // ! "exclam" - 0x22: 0x0022, // " "quotedbl" - 0x23: 0x0023, // # "numbersign" - 0x24: 0x0024, // $ "dollar" - 0x25: 0x0025, // % "percent" - 0x26: 0x0026, // & "ampersand" - 0x27: 0x0027, // \' "quotesingle" - 0x28: 0x0028, // ( "parenleft" - 0x29: 0x0029, // ) "parenright" - 0x2a: 0x002a, // * "asterisk" - 0x2b: 0x002b, // + "plus" - 0x2c: 0x002c, // , "comma" - 0x2d: 0x002d, // - "hyphen" - 0x2e: 0x002e, // . "period" - 0x2f: 0x002f, // / "slash" - 0x30: 0x0030, // 0 "zero" - 0x31: 0x0031, // 1 "one" - 0x32: 0x0032, // 2 "two" - 0x33: 0x0033, // 3 "three" - 0x34: 0x0034, // 4 "four" - 0x35: 0x0035, // 5 "five" - 0x36: 0x0036, // 6 "six" - 0x37: 0x0037, // 7 "seven" - 0x38: 0x0038, // 8 "eight" - 0x39: 0x0039, // 9 "nine" - 0x3a: 0x003a, // : "colon" - 0x3b: 0x003b, // ; "semicolon" - 0x3c: 0x003c, // < "less" - 0x3d: 0x003d, // = "equal" - 0x3e: 0x003e, // > "greater" - 0x3f: 0x003f, // ? "question" - 0x40: 0x0040, // @ "at" - 0x41: 0x0041, // A "A" - 0x42: 0x0042, // B "B" - 0x43: 0x0043, // C "C" - 0x44: 0x0044, // D "D" - 0x45: 0x0045, // E "E" - 0x46: 0x0046, // F "F" - 0x47: 0x0047, // G "G" - 0x48: 0x0048, // H "H" - 0x49: 0x0049, // I "I" - 0x4a: 0x004a, // J "J" - 0x4b: 0x004b, // K "K" - 0x4c: 0x004c, // L "L" - 0x4d: 0x004d, // M "M" - 0x4e: 0x004e, // N "N" - 0x4f: 0x004f, // O "O" - 0x50: 0x0050, // P "P" - 0x51: 0x0051, // Q "Q" - 0x52: 0x0052, // R "R" - 0x53: 0x0053, // S "S" - 0x54: 0x0054, // T "T" - 0x55: 0x0055, // U "U" - 0x56: 0x0056, // V "V" - 0x57: 0x0057, // W "W" - 0x58: 0x0058, // X "X" - 0x59: 0x0059, // Y "Y" - 0x5a: 0x005a, // Z "Z" - 0x5b: 0x005b, // [ "bracketleft" - 0x5c: 0x005c, // \\ "backslash" - 0x5d: 0x005d, // ] "bracketright" - 0x5e: 0x005e, // ^ "asciicircum" - 0x5f: 0x005f, // _ "underscore" - 0x60: 0x0060, // ` "grave" - 0x61: 0x0061, // a "a" - 0x62: 0x0062, // b "b" - 0x63: 0x0063, // c "c" - 0x64: 0x0064, // d "d" - 0x65: 0x0065, // e "e" - 0x66: 0x0066, // f "f" - 0x67: 0x0067, // g "g" - 0x68: 0x0068, // h "h" - 0x69: 0x0069, // i "i" - 0x6a: 0x006a, // j "j" - 0x6b: 0x006b, // k "k" - 0x6c: 0x006c, // l "l" - 0x6d: 0x006d, // m "m" - 0x6e: 0x006e, // n "n" - 0x6f: 0x006f, // o "o" - 0x70: 0x0070, // p "p" - 0x71: 0x0071, // q "q" - 0x72: 0x0072, // r "r" - 0x73: 0x0073, // s "s" - 0x74: 0x0074, // t "t" - 0x75: 0x0075, // u "u" - 0x76: 0x0076, // v "v" - 0x77: 0x0077, // w "w" - 0x78: 0x0078, // x "x" - 0x79: 0x0079, // y "y" - 0x7a: 0x007a, // z "z" - 0x7b: 0x007b, // { "braceleft" - 0x7c: 0x007c, // | "bar" - 0x7d: 0x007d, // } "braceright" - 0x7e: 0x007e, // ~ "asciitilde" - 0x7f: 0x007f, // "controlDEL" - 0x80: 0x00c4, // Ä "Adieresis" - 0x81: 0x00c5, // Å "Aring" - 0x82: 0x00c7, // Ç "Ccedilla" - 0x83: 0x00c9, // É "Eacute" - 0x84: 0x00d1, // Ñ "Ntilde" - 0x85: 0x00d6, // Ö "Odieresis" - 0x86: 0x00dc, // Ü "Udieresis" - 0x87: 0x00e1, // á "aacute" - 0x88: 0x00e0, // à "agrave" - 0x89: 0x00e2, // â "acircumflex" - 0x8a: 0x00e4, // ä "adieresis" - 0x8b: 0x00e3, // ã "atilde" - 0x8c: 0x00e5, // å "aring" - 0x8d: 0x00e7, // ç "ccedilla" - 0x8e: 0x00e9, // é "eacute" - 0x8f: 0x00e8, // è "egrave" - 0x90: 0x00ea, // ê "ecircumflex" - 0x91: 0x00eb, // ë "edieresis" - 0x92: 0x00ed, // í "iacute" - 0x93: 0x00ec, // ì "igrave" - 0x94: 0x00ee, // î "icircumflex" - 0x95: 0x00ef, // ï "idieresis" - 0x96: 0x00f1, // ñ "ntilde" - 0x97: 0x00f3, // ó "oacute" - 0x98: 0x00f2, // ò "ograve" - 0x99: 0x00f4, // ô "ocircumflex" - 0x9a: 0x00f6, // ö "odieresis" - 0x9b: 0x00f5, // õ "otilde" - 0x9c: 0x00fa, // ú "uacute" - 0x9d: 0x00f9, // ù "ugrave" - 0x9e: 0x00fb, // û "ucircumflex" - 0x9f: 0x00fc, // ü "udieresis" - 0xa0: 0x2020, // † "dagger" - 0xa1: 0x00b0, // ° "degree" - 0xa2: 0x00a2, // ¢ "cent" - 0xa3: 0x00a3, // £ "sterling" - 0xa4: 0x00a7, // § "section" - 0xa5: 0x2022, // • "bullet" - 0xa6: 0x00b6, // ¶ "paragraph" - 0xa7: 0x00df, // ß "germandbls" - 0xa8: 0x00ae, // ® "registered" - 0xa9: 0x00a9, // © "copyright" - 0xaa: 0x2122, // ™ "trademark" - 0xab: 0x00b4, // ´ "acute" - 0xac: 0x00a8, // ¨ "dieresis" - 0xad: 0x2260, // ≠ "notequal" - 0xae: 0x00c6, // Æ "AE" - 0xaf: 0x00d8, // Ø "Oslash" - 0xb0: 0x221e, // ∞ "infinity" - 0xb1: 0x00b1, // ± "plusminus" - 0xb2: 0x2264, // ≤ "lessequal" - 0xb3: 0x2265, // ≥ "greaterequal" - 0xb4: 0x00a5, // ¥ "yen" - 0xb5: 0x00b5, // µ "mu" - 0xb6: 0x2202, // ∂ "partialdiff" - 0xb7: 0x2211, // ∑ "summation" - 0xb8: 0x220f, // ∏ "product" - 0xb9: 0x03c0, // π "pi" - 0xba: 0x222b, // ∫ "integral" - 0xbb: 0x00aa, // ª "ordfeminine" - 0xbc: 0x00ba, // º "ordmasculine" - 0xbd: 0x03a9, // Ω "Omegagreek" - 0xbe: 0x00e6, // æ "ae" - 0xbf: 0x00f8, // ø "oslash" - 0xc0: 0x00bf, // ¿ "questiondown" - 0xc1: 0x00a1, // ¡ "exclamdown" - 0xc2: 0x00ac, // ¬ "logicalnot" - 0xc3: 0x221a, // √ "radical" - 0xc4: 0x0192, // ƒ "florin" - 0xc5: 0x2248, // ≈ "approxequal" - 0xc6: 0x2206, // ∆ "Delta" - 0xc7: 0x00ab, // « "guillemotleft" - 0xc8: 0x00bb, // » "guillemotright" - 0xc9: 0x2026, // … "ellipsis" - 0xca: 0x00a0, // "nbspace" - 0xcb: 0x00c0, // À "Agrave" - 0xcc: 0x00c3, // à "Atilde" - 0xcd: 0x00d5, // Õ "Otilde" - 0xce: 0x0152, // Œ "OE" - 0xcf: 0x0153, // œ "oe" - 0xd0: 0x2013, // – "endash" - 0xd1: 0x2014, // — "emdash" - 0xd2: 0x201c, // “ "quotedblleft" - 0xd3: 0x201d, // ” "quotedblright" - 0xd4: 0x2018, // ‘ "quoteleft" - 0xd5: 0x2019, // ’ "quoteright" - 0xd6: 0x00f7, // ÷ "divide" - 0xd7: 0x25ca, // ◊ "lozenge" - 0xd8: 0x00ff, // ÿ "ydieresis" - 0xd9: 0x0178, // Ÿ "Ydieresis" - 0xda: 0x2044, // ⁄ "fraction" - 0xdb: 0x20ac, // € "Euro" - 0xdc: 0x2039, // ‹ "guilsinglleft" - 0xdd: 0x203a, // › "guilsinglright" - 0xde: 0xfb01, // fi "fi" - 0xdf: 0xfb02, // fl "fl" - 0xe0: 0x2021, // ‡ "daggerdbl" - 0xe1: 0x00b7, // · "middot" - 0xe2: 0x201a, // ‚ "quotesinglbase" - 0xe3: 0x201e, // „ "quotedblbase" - 0xe4: 0x2030, // ‰ "perthousand" - 0xe5: 0x00c2, //  "Acircumflex" - 0xe6: 0x00ca, // Ê "Ecircumflex" - 0xe7: 0x00c1, // Á "Aacute" - 0xe8: 0x00cb, // Ë "Edieresis" - 0xe9: 0x00c8, // È "Egrave" - 0xea: 0x00cd, // Í "Iacute" - 0xeb: 0x00ce, // Î "Icircumflex" - 0xec: 0x00cf, // Ï "Idieresis" - 0xed: 0x00cc, // Ì "Igrave" - 0xee: 0x00d3, // Ó "Oacute" - 0xef: 0x00d4, // Ô "Ocircumflex" - 0xf0: 0xf8ff, // "apple" - 0xf1: 0x00d2, // Ò "Ograve" - 0xf2: 0x00da, // Ú "Uacute" - 0xf3: 0x00db, // Û "Ucircumflex" - 0xf4: 0x00d9, // Ù "Ugrave" - 0xf5: 0x0131, // ı "dotlessi" - 0xf6: 0x02c6, // ˆ "circumflex" - 0xf7: 0x02dc, // ˜ "ilde" - 0xf8: 0x00af, // ¯ "macron" - 0xf9: 0x02d8, // ˘ "breve" - 0xfa: 0x02d9, // ˙ "dotaccent" - 0xfb: 0x02da, // ˚ "ring" - 0xfc: 0x00b8, // ¸ "cedilla" - 0xfd: 0x02dd, // ˝ "hungarumlaut" - 0xfe: 0x02db, // ˛ "ogonek" - 0xff: 0x02c7, // ˇ "caron" - }, - "PdfDocEncoding": { // 252 entries - 0x01: 0x0001, // "controlSTX" - 0x02: 0x0002, // "controlSOT" - 0x03: 0x0003, // "controlETX" - 0x04: 0x0004, // "controlEOT" - 0x05: 0x0005, // "controlENQ" - 0x06: 0x0006, // "controlACK" - 0x07: 0x0007, // "controlBEL" - 0x08: 0x0008, // "controlBS" - 0x09: 0x0009, // "controlHT" - 0x0a: 0x000a, // "controlLF" - 0x0b: 0x000b, // "controlVT" - 0x0c: 0x000c, // "controlFF" - 0x0d: 0x000d, // "controlCR" - 0x0e: 0x000e, // "controlSO" - 0x0f: 0x000f, // "controlSI" - 0x10: 0x0010, // "controlDLE" - 0x11: 0x0011, // "controlDC1" - 0x12: 0x0012, // "controlDC2" - 0x13: 0x0013, // "controlDC3" - 0x14: 0x0014, // "controlDC4" - 0x15: 0x0015, // "controlNAK" - 0x16: 0x0017, // "controlETB" - 0x17: 0x0017, // "controlETB" - 0x18: 0x02d8, // ˘ "breve" - 0x19: 0x02c7, // ˇ "caron" - 0x1a: 0x02c6, // ˆ "circumflex" - 0x1b: 0x02d9, // ˙ "dotaccent" - 0x1c: 0x02dd, // ˝ "hungarumlaut" - 0x1d: 0x02db, // ˛ "ogonek" - 0x1e: 0x02da, // ˚ "ring" - 0x1f: 0x02dc, // ˜ "ilde" - 0x20: 0x0020, // "space" - 0x21: 0x0021, // ! "exclam" - 0x22: 0x0022, // " "quotedbl" - 0x23: 0x0023, // # "numbersign" - 0x24: 0x0024, // $ "dollar" - 0x25: 0x0025, // % "percent" - 0x26: 0x0026, // & "ampersand" - 0x27: 0x0027, // \' "quotesingle" - 0x28: 0x0028, // ( "parenleft" - 0x29: 0x0029, // ) "parenright" - 0x2a: 0x002a, // * "asterisk" - 0x2b: 0x002b, // + "plus" - 0x2c: 0x002c, // , "comma" - 0x2d: 0x002d, // - "hyphen" - 0x2e: 0x002e, // . "period" - 0x2f: 0x002f, // / "slash" - 0x30: 0x0030, // 0 "zero" - 0x31: 0x0031, // 1 "one" - 0x32: 0x0032, // 2 "two" - 0x33: 0x0033, // 3 "three" - 0x34: 0x0034, // 4 "four" - 0x35: 0x0035, // 5 "five" - 0x36: 0x0036, // 6 "six" - 0x37: 0x0037, // 7 "seven" - 0x38: 0x0038, // 8 "eight" - 0x39: 0x0039, // 9 "nine" - 0x3a: 0x003a, // : "colon" - 0x3b: 0x003b, // ; "semicolon" - 0x3c: 0x003c, // < "less" - 0x3d: 0x003d, // = "equal" - 0x3e: 0x003e, // > "greater" - 0x3f: 0x003f, // ? "question" - 0x40: 0x0040, // @ "at" - 0x41: 0x0041, // A "A" - 0x42: 0x0042, // B "B" - 0x43: 0x0043, // C "C" - 0x44: 0x0044, // D "D" - 0x45: 0x0045, // E "E" - 0x46: 0x0046, // F "F" - 0x47: 0x0047, // G "G" - 0x48: 0x0048, // H "H" - 0x49: 0x0049, // I "I" - 0x4a: 0x004a, // J "J" - 0x4b: 0x004b, // K "K" - 0x4c: 0x004c, // L "L" - 0x4d: 0x004d, // M "M" - 0x4e: 0x004e, // N "N" - 0x4f: 0x004f, // O "O" - 0x50: 0x0050, // P "P" - 0x51: 0x0051, // Q "Q" - 0x52: 0x0052, // R "R" - 0x53: 0x0053, // S "S" - 0x54: 0x0054, // T "T" - 0x55: 0x0055, // U "U" - 0x56: 0x0056, // V "V" - 0x57: 0x0057, // W "W" - 0x58: 0x0058, // X "X" - 0x59: 0x0059, // Y "Y" - 0x5a: 0x005a, // Z "Z" - 0x5b: 0x005b, // [ "bracketleft" - 0x5c: 0x005c, // \\ "backslash" - 0x5d: 0x005d, // ] "bracketright" - 0x5e: 0x005e, // ^ "asciicircum" - 0x5f: 0x005f, // _ "underscore" - 0x60: 0x0060, // ` "grave" - 0x61: 0x0061, // a "a" - 0x62: 0x0062, // b "b" - 0x63: 0x0063, // c "c" - 0x64: 0x0064, // d "d" - 0x65: 0x0065, // e "e" - 0x66: 0x0066, // f "f" - 0x67: 0x0067, // g "g" - 0x68: 0x0068, // h "h" - 0x69: 0x0069, // i "i" - 0x6a: 0x006a, // j "j" - 0x6b: 0x006b, // k "k" - 0x6c: 0x006c, // l "l" - 0x6d: 0x006d, // m "m" - 0x6e: 0x006e, // n "n" - 0x6f: 0x006f, // o "o" - 0x70: 0x0070, // p "p" - 0x71: 0x0071, // q "q" - 0x72: 0x0072, // r "r" - 0x73: 0x0073, // s "s" - 0x74: 0x0074, // t "t" - 0x75: 0x0075, // u "u" - 0x76: 0x0076, // v "v" - 0x77: 0x0077, // w "w" - 0x78: 0x0078, // x "x" - 0x79: 0x0079, // y "y" - 0x7a: 0x007a, // z "z" - 0x7b: 0x007b, // { "braceleft" - 0x7c: 0x007c, // | "bar" - 0x7d: 0x007d, // } "braceright" - 0x7e: 0x007e, // ~ "asciitilde" - 0x80: 0x2022, // • "bullet" - 0x81: 0x2020, // † "dagger" - 0x82: 0x2021, // ‡ "daggerdbl" - 0x83: 0x2026, // … "ellipsis" - 0x84: 0x2014, // — "emdash" - 0x85: 0x2013, // – "endash" - 0x86: 0x0192, // ƒ "florin" - 0x87: 0x2044, // ⁄ "fraction" - 0x88: 0x2039, // ‹ "guilsinglleft" - 0x89: 0x203a, // › "guilsinglright" - 0x8a: 0x2212, // − "minus" - 0x8b: 0x2030, // ‰ "perthousand" - 0x8c: 0x201e, // „ "quotedblbase" - 0x8d: 0x201c, // “ "quotedblleft" - 0x8e: 0x201d, // ” "quotedblright" - 0x8f: 0x2018, // ‘ "quoteleft" - 0x90: 0x2019, // ’ "quoteright" - 0x91: 0x201a, // ‚ "quotesinglbase" - 0x92: 0x2122, // ™ "trademark" - 0x93: 0xfb01, // fi "fi" - 0x94: 0xfb02, // fl "fl" - 0x95: 0x0141, // Ł "Lslash" - 0x96: 0x0152, // Œ "OE" - 0x97: 0x0160, // Š "Scaron" - 0x98: 0x0178, // Ÿ "Ydieresis" - 0x99: 0x017d, // Ž "Zcaron" - 0x9a: 0x0131, // ı "dotlessi" - 0x9b: 0x0142, // ł "lslash" - 0x9c: 0x0153, // œ "oe" - 0x9d: 0x0161, // š "scaron" - 0x9e: 0x017e, // ž "zcaron" - 0xa0: 0x20ac, // € "Euro" - 0xa1: 0x00a1, // ¡ "exclamdown" - 0xa2: 0x00a2, // ¢ "cent" - 0xa3: 0x00a3, // £ "sterling" - 0xa4: 0x00a4, // ¤ "currency" - 0xa5: 0x00a5, // ¥ "yen" - 0xa6: 0x00a6, // ¦ "brokenbar" - 0xa7: 0x00a7, // § "section" - 0xa8: 0x00a8, // ¨ "dieresis" - 0xa9: 0x00a9, // © "copyright" - 0xaa: 0x00aa, // ª "ordfeminine" - 0xab: 0x00ab, // « "guillemotleft" - 0xac: 0x00ac, // ¬ "logicalnot" - 0xae: 0x00ae, // ® "registered" - 0xaf: 0x00af, // ¯ "macron" - 0xb0: 0x00b0, // ° "degree" - 0xb1: 0x00b1, // ± "plusminus" - 0xb2: 0x00b2, // ² "twosuperior" - 0xb3: 0x00b3, // ³ "threesuperior" - 0xb4: 0x00b4, // ´ "acute" - 0xb5: 0x00b5, // µ "mu" - 0xb6: 0x00b6, // ¶ "paragraph" - 0xb7: 0x00b7, // · "middot" - 0xb8: 0x00b8, // ¸ "cedilla" - 0xb9: 0x00b9, // ¹ "onesuperior" - 0xba: 0x00ba, // º "ordmasculine" - 0xbb: 0x00bb, // » "guillemotright" - 0xbc: 0x00bc, // ¼ "onequarter" - 0xbd: 0x00bd, // ½ "onehalf" - 0xbe: 0x00be, // ¾ "threequarters" - 0xbf: 0x00bf, // ¿ "questiondown" - 0xc0: 0x00c0, // À "Agrave" - 0xc1: 0x00c1, // Á "Aacute" - 0xc2: 0x00c2, //  "Acircumflex" - 0xc3: 0x00c3, // à "Atilde" - 0xc4: 0x00c4, // Ä "Adieresis" - 0xc5: 0x00c5, // Å "Aring" - 0xc6: 0x00c6, // Æ "AE" - 0xc7: 0x00c7, // Ç "Ccedilla" - 0xc8: 0x00c8, // È "Egrave" - 0xc9: 0x00c9, // É "Eacute" - 0xca: 0x00ca, // Ê "Ecircumflex" - 0xcb: 0x00cb, // Ë "Edieresis" - 0xcc: 0x00cc, // Ì "Igrave" - 0xcd: 0x00cd, // Í "Iacute" - 0xce: 0x00ce, // Î "Icircumflex" - 0xcf: 0x00cf, // Ï "Idieresis" - 0xd0: 0x00d0, // Ð "Eth" - 0xd1: 0x00d1, // Ñ "Ntilde" - 0xd2: 0x00d2, // Ò "Ograve" - 0xd3: 0x00d3, // Ó "Oacute" - 0xd4: 0x00d4, // Ô "Ocircumflex" - 0xd5: 0x00d5, // Õ "Otilde" - 0xd6: 0x00d6, // Ö "Odieresis" - 0xd7: 0x00d7, // × "multiply" - 0xd8: 0x00d8, // Ø "Oslash" - 0xd9: 0x00d9, // Ù "Ugrave" - 0xda: 0x00da, // Ú "Uacute" - 0xdb: 0x00db, // Û "Ucircumflex" - 0xdc: 0x00dc, // Ü "Udieresis" - 0xdd: 0x00dd, // Ý "Yacute" - 0xde: 0x00de, // Þ "Thorn" - 0xdf: 0x00df, // ß "germandbls" - 0xe0: 0x00e0, // à "agrave" - 0xe1: 0x00e1, // á "aacute" - 0xe2: 0x00e2, // â "acircumflex" - 0xe3: 0x00e3, // ã "atilde" - 0xe4: 0x00e4, // ä "adieresis" - 0xe5: 0x00e5, // å "aring" - 0xe6: 0x00e6, // æ "ae" - 0xe7: 0x00e7, // ç "ccedilla" - 0xe8: 0x00e8, // è "egrave" - 0xe9: 0x00e9, // é "eacute" - 0xea: 0x00ea, // ê "ecircumflex" - 0xeb: 0x00eb, // ë "edieresis" - 0xec: 0x00ec, // ì "igrave" - 0xed: 0x00ed, // í "iacute" - 0xee: 0x00ee, // î "icircumflex" - 0xef: 0x00ef, // ï "idieresis" - 0xf0: 0x00f0, // ð "eth" - 0xf1: 0x00f1, // ñ "ntilde" - 0xf2: 0x00f2, // ò "ograve" - 0xf3: 0x00f3, // ó "oacute" - 0xf4: 0x00f4, // ô "ocircumflex" - 0xf5: 0x00f5, // õ "otilde" - 0xf6: 0x00f6, // ö "odieresis" - 0xf7: 0x00f7, // ÷ "divide" - 0xf8: 0x00f8, // ø "oslash" - 0xf9: 0x00f9, // ù "ugrave" - 0xfa: 0x00fa, // ú "uacute" - 0xfb: 0x00fb, // û "ucircumflex" - 0xfc: 0x00fc, // ü "udieresis" - 0xfd: 0x00fd, // ý "yacute" - 0xfe: 0x00fe, // þ "thorn" - 0xff: 0x00ff, // ÿ "ydieresis" - }, - "StandardEncoding": { // 149 entries - 0x20: 0x0020, // "space" - 0x21: 0x0021, // ! "exclam" - 0x22: 0x0022, // " "quotedbl" - 0x23: 0x0023, // # "numbersign" - 0x24: 0x0024, // $ "dollar" - 0x25: 0x0025, // % "percent" - 0x26: 0x0026, // & "ampersand" - 0x27: 0x2019, // ’ "quoteright" - 0x28: 0x0028, // ( "parenleft" - 0x29: 0x0029, // ) "parenright" - 0x2a: 0x002a, // * "asterisk" - 0x2b: 0x002b, // + "plus" - 0x2c: 0x002c, // , "comma" - 0x2d: 0x002d, // - "hyphen" - 0x2e: 0x002e, // . "period" - 0x2f: 0x002f, // / "slash" - 0x30: 0x0030, // 0 "zero" - 0x31: 0x0031, // 1 "one" - 0x32: 0x0032, // 2 "two" - 0x33: 0x0033, // 3 "three" - 0x34: 0x0034, // 4 "four" - 0x35: 0x0035, // 5 "five" - 0x36: 0x0036, // 6 "six" - 0x37: 0x0037, // 7 "seven" - 0x38: 0x0038, // 8 "eight" - 0x39: 0x0039, // 9 "nine" - 0x3a: 0x003a, // : "colon" - 0x3b: 0x003b, // ; "semicolon" - 0x3c: 0x003c, // < "less" - 0x3d: 0x003d, // = "equal" - 0x3e: 0x003e, // > "greater" - 0x3f: 0x003f, // ? "question" - 0x40: 0x0040, // @ "at" - 0x41: 0x0041, // A "A" - 0x42: 0x0042, // B "B" - 0x43: 0x0043, // C "C" - 0x44: 0x0044, // D "D" - 0x45: 0x0045, // E "E" - 0x46: 0x0046, // F "F" - 0x47: 0x0047, // G "G" - 0x48: 0x0048, // H "H" - 0x49: 0x0049, // I "I" - 0x4a: 0x004a, // J "J" - 0x4b: 0x004b, // K "K" - 0x4c: 0x004c, // L "L" - 0x4d: 0x004d, // M "M" - 0x4e: 0x004e, // N "N" - 0x4f: 0x004f, // O "O" - 0x50: 0x0050, // P "P" - 0x51: 0x0051, // Q "Q" - 0x52: 0x0052, // R "R" - 0x53: 0x0053, // S "S" - 0x54: 0x0054, // T "T" - 0x55: 0x0055, // U "U" - 0x56: 0x0056, // V "V" - 0x57: 0x0057, // W "W" - 0x58: 0x0058, // X "X" - 0x59: 0x0059, // Y "Y" - 0x5a: 0x005a, // Z "Z" - 0x5b: 0x005b, // [ "bracketleft" - 0x5c: 0x005c, // \\ "backslash" - 0x5d: 0x005d, // ] "bracketright" - 0x5e: 0x005e, // ^ "asciicircum" - 0x5f: 0x005f, // _ "underscore" - 0x60: 0x0060, // ` "grave" - 0x61: 0x0061, // a "a" - 0x62: 0x0062, // b "b" - 0x63: 0x0063, // c "c" - 0x64: 0x0064, // d "d" - 0x65: 0x0065, // e "e" - 0x66: 0x0066, // f "f" - 0x67: 0x0067, // g "g" - 0x68: 0x0068, // h "h" - 0x69: 0x0069, // i "i" - 0x6a: 0x006a, // j "j" - 0x6b: 0x006b, // k "k" - 0x6c: 0x006c, // l "l" - 0x6d: 0x006d, // m "m" - 0x6e: 0x006e, // n "n" - 0x6f: 0x006f, // o "o" - 0x70: 0x0070, // p "p" - 0x71: 0x0071, // q "q" - 0x72: 0x0072, // r "r" - 0x73: 0x0073, // s "s" - 0x74: 0x0074, // t "t" - 0x75: 0x0075, // u "u" - 0x76: 0x0076, // v "v" - 0x77: 0x0077, // w "w" - 0x78: 0x0078, // x "x" - 0x79: 0x0079, // y "y" - 0x7a: 0x007a, // z "z" - 0x7b: 0x007b, // { "braceleft" - 0x7c: 0x007c, // | "bar" - 0x7d: 0x007d, // } "braceright" - 0x7e: 0x007e, // ~ "asciitilde" - 0xa1: 0x00a1, // ¡ "exclamdown" - 0xa2: 0x00a2, // ¢ "cent" - 0xa3: 0x00a3, // £ "sterling" - 0xa4: 0x2044, // ⁄ "fraction" - 0xa5: 0x00a5, // ¥ "yen" - 0xa6: 0x0192, // ƒ "florin" - 0xa7: 0x00a7, // § "section" - 0xa8: 0x00a4, // ¤ "currency" - 0xa9: 0x0027, // \' "quotesingle" - 0xaa: 0x201c, // “ "quotedblleft" - 0xab: 0x00ab, // « "guillemotleft" - 0xac: 0x2039, // ‹ "guilsinglleft" - 0xad: 0x203a, // › "guilsinglright" - 0xae: 0xfb01, // fi "fi" - 0xaf: 0xfb02, // fl "fl" - 0xb1: 0x2013, // – "endash" - 0xb2: 0x2020, // † "dagger" - 0xb3: 0x2021, // ‡ "daggerdbl" - 0xb4: 0x00b7, // · "middot" - 0xb6: 0x00b6, // ¶ "paragraph" - 0xb7: 0x2022, // • "bullet" - 0xb8: 0x201a, // ‚ "quotesinglbase" - 0xb9: 0x201e, // „ "quotedblbase" - 0xba: 0x201d, // ” "quotedblright" - 0xbb: 0x00bb, // » "guillemotright" - 0xbc: 0x2026, // … "ellipsis" - 0xbd: 0x2030, // ‰ "perthousand" - 0xbf: 0x00bf, // ¿ "questiondown" - 0xc1: 0x0060, // ` "grave" - 0xc2: 0x00b4, // ´ "acute" - 0xc3: 0x02c6, // ˆ "circumflex" - 0xc4: 0x02dc, // ˜ "ilde" - 0xc5: 0x00af, // ¯ "macron" - 0xc6: 0x02d8, // ˘ "breve" - 0xc7: 0x02d9, // ˙ "dotaccent" - 0xc8: 0x00a8, // ¨ "dieresis" - 0xca: 0x02da, // ˚ "ring" - 0xcb: 0x00b8, // ¸ "cedilla" - 0xcc: 0x02dd, // ˝ "hungarumlaut" - 0xcd: 0x02db, // ˛ "ogonek" - 0xce: 0x02c7, // ˇ "caron" - 0xcf: 0x2014, // — "emdash" - 0xe0: 0x00c6, // Æ "AE" - 0xe2: 0x00aa, // ª "ordfeminine" - 0xe7: 0x0141, // Ł "Lslash" - 0xe8: 0x00d8, // Ø "Oslash" - 0xe9: 0x0152, // Œ "OE" - 0xea: 0x00ba, // º "ordmasculine" - 0xf0: 0x00e6, // æ "ae" - 0xf5: 0x0131, // ı "dotlessi" - 0xf7: 0x0142, // ł "lslash" - 0xf8: 0x00f8, // ø "oslash" - 0xf9: 0x0153, // œ "oe" - 0xfa: 0x00df, // ß "germandbls" - }, +// newSimpleMapping creates a byte-to-rune mapping that can be used to create simple encodings. +// An implementation will build reverse map only once when the encoding is first used. +func newSimpleMapping(name string, m map[byte]rune) *simpleMapping { + return &simpleMapping{ + baseName: name, + decode: m, + } +} + +type simpleMapping struct { + baseName string + once sync.Once + decode map[byte]rune + encode map[rune]byte +} + +func (m *simpleMapping) init() { + m.encode = make(map[rune]byte, len(m.decode)) + for b, r := range m.decode { + m.encode[r] = b + } +} + +// NewEncoder creates a new SimpleEncoding from the byte-to-rune mapping. +func (m *simpleMapping) NewEncoder() SimpleEncoder { + m.once.Do(m.init) + return &simpleEncoding{ + baseName: m.baseName, + encode: m.encode, + decode: m.decode, + } } diff --git a/pdf/internal/textencoding/simple_mac.go b/pdf/internal/textencoding/simple_mac.go new file mode 100644 index 00000000..faf908d0 --- /dev/null +++ b/pdf/internal/textencoding/simple_mac.go @@ -0,0 +1,121 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package textencoding + +const ( + baseMacExpert = "MacExpertEncoding" + baseMacRoman = "MacRomanEncoding" +) + +var ( + macExpert = newSimpleMapping(baseMacExpert, macExpertCharToRune) + macRoman = newSimpleMapping(baseMacRoman, macRomanCharToRune) +) + +func init() { + RegisterSimpleEncoding(baseMacExpert, NewMacExpertEncoder) + RegisterSimpleEncoding(baseMacRoman, NewMacRomanEncoder) +} + +// NewMacExpertEncoder returns a SimpleEncoder that implements MacExpertEncoding. +func NewMacExpertEncoder() SimpleEncoder { + return macExpert.NewEncoder() +} + +// NewMacRomanEncoder returns a SimpleEncoder that implements MacRomanEncoding. +func NewMacRomanEncoder() SimpleEncoder { + return macRoman.NewEncoder() +} + +var macExpertCharToRune = map[byte]rune{ // 165 entries + 0x20: ' ', 0x21: '\uf721', 0x22: '\uf6f8', 0x23: '\uf7a2', 0x24: '\uf724', + 0x25: '\uf6e4', 0x26: '\uf726', 0x27: '\uf7b4', 0x28: '⁽', 0x29: '⁾', + 0x2a: '‥', 0x2b: '․', 0x2c: ',', 0x2d: '-', 0x2e: '.', + 0x2f: '⁄', 0x30: '\uf730', 0x31: '\uf731', 0x32: '\uf732', 0x33: '\uf733', + 0x34: '\uf734', 0x35: '\uf735', 0x36: '\uf736', 0x37: '\uf737', 0x38: '\uf738', + 0x39: '\uf739', 0x3a: ':', 0x3b: ';', 0x3d: '\uf6de', 0x3f: '\uf73f', + 0x44: '\uf7f0', 0x47: '¼', 0x48: '½', 0x49: '¾', 0x4a: '⅛', + 0x4b: '⅜', 0x4c: '⅝', 0x4d: '⅞', 0x4e: '⅓', 0x4f: '⅔', + 0x56: 'ff', 0x57: 'fi', 0x58: 'fl', 0x59: 'ffi', 0x5a: 'ffl', + 0x5b: '₍', 0x5d: '₎', 0x5e: '\uf6f6', 0x5f: '\uf6e5', 0x60: '\uf760', + 0x61: '\uf761', 0x62: '\uf762', 0x63: '\uf763', 0x64: '\uf764', 0x65: '\uf765', + 0x66: '\uf766', 0x67: '\uf767', 0x68: '\uf768', 0x69: '\uf769', 0x6a: '\uf76a', + 0x6b: '\uf76b', 0x6c: '\uf76c', 0x6d: '\uf76d', 0x6e: '\uf76e', 0x6f: '\uf76f', + 0x70: '\uf770', 0x71: '\uf771', 0x72: '\uf772', 0x73: '\uf773', 0x74: '\uf774', + 0x75: '\uf775', 0x76: '\uf776', 0x77: '\uf777', 0x78: '\uf778', 0x79: '\uf779', + 0x7a: '\uf77a', 0x7b: '₡', 0x7c: '\uf6dc', 0x7d: '\uf6dd', 0x7e: '\uf6fe', + 0x81: '\uf6e9', 0x82: '\uf6e0', 0x87: '\uf7e1', 0x88: '\uf7e0', 0x89: '\uf7e2', + 0x8a: '\uf7e4', 0x8b: '\uf7e3', 0x8c: '\uf7e5', 0x8d: '\uf7e7', 0x8e: '\uf7e9', + 0x8f: '\uf7e8', 0x90: '\uf7ea', 0x91: '\uf7eb', 0x92: '\uf7ed', 0x93: '\uf7ec', + 0x94: '\uf7ee', 0x95: '\uf7ef', 0x96: '\uf7f1', 0x97: '\uf7f3', 0x98: '\uf7f2', + 0x99: '\uf7f4', 0x9a: '\uf7f6', 0x9b: '\uf7f5', 0x9c: '\uf7fa', 0x9d: '\uf7f9', + 0x9e: '\uf7fb', 0x9f: '\uf7fc', 0xa1: '⁸', 0xa2: '₄', 0xa3: '₃', + 0xa4: '₆', 0xa5: '₈', 0xa6: '₇', 0xa7: '\uf6fd', 0xa9: '\uf6df', + 0xaa: '₂', 0xac: '\uf7a8', 0xae: '\uf6f5', 0xaf: '\uf6f0', 0xb0: '₅', + 0xb2: '\uf6e1', 0xb3: '\uf6e7', 0xb4: '\uf7fd', 0xb6: '\uf6e3', 0xb9: '\uf7fe', + 0xbb: '₉', 0xbc: '₀', 0xbd: '\uf6ff', 0xbe: '\uf7e6', 0xbf: '\uf7f8', + 0xc0: '\uf7bf', 0xc1: '₁', 0xc2: '\uf6f9', 0xc9: '\uf7b8', 0xcf: '\uf6fa', + 0xd0: '‒', 0xd1: '\uf6e6', 0xd6: '\uf7a1', 0xd8: '\uf7ff', 0xda: '¹', + 0xdb: '²', 0xdc: '³', 0xdd: '⁴', 0xde: '⁵', 0xdf: '⁶', + 0xe0: '⁷', 0xe1: '⁹', 0xe2: '⁰', 0xe4: '\uf6ec', 0xe5: '\uf6f1', + 0xe6: '\uf6f3', 0xe9: '\uf6ed', 0xea: '\uf6f2', 0xeb: '\uf6eb', 0xf1: '\uf6ee', + 0xf2: '\uf6fb', 0xf3: '\uf6f4', 0xf4: '\uf7af', 0xf5: '\uf6ea', 0xf6: 'ⁿ', + 0xf7: '\uf6ef', 0xf8: '\uf6e2', 0xf9: '\uf6e8', 0xfa: '\uf6f7', 0xfb: '\uf6fc', +} + +var macRomanCharToRune = map[byte]rune{ // 255 entries + 0x1: '\x01', 0x2: '\x02', 0x3: '\x03', 0x4: '\x04', 0x5: '\x05', + 0x6: '\x06', 0x7: '\a', 0x8: '\b', 0x9: '\t', 0xa: '\n', + 0xb: '\v', 0xc: '\f', 0xd: '\r', 0xe: '\x0e', 0xf: '\x0f', + 0x10: '\x10', 0x11: '\x11', 0x12: '\x12', 0x13: '\x13', 0x14: '\x14', + 0x15: '\x15', 0x16: '\x16', 0x17: '\x17', 0x18: '\x18', 0x19: '\x19', + 0x1a: '\x1a', 0x1b: '\x1b', 0x1c: '\x1c', 0x1d: '\x1d', 0x1e: '\x1e', + 0x1f: '\x1f', 0x20: ' ', 0x21: '!', 0x22: '"', 0x23: '#', + 0x24: '$', 0x25: '%', 0x26: '&', 0x27: '\'', 0x28: '(', + 0x29: ')', 0x2a: '*', 0x2b: '+', 0x2c: ',', 0x2d: '-', + 0x2e: '.', 0x2f: '/', 0x30: '0', 0x31: '1', 0x32: '2', + 0x33: '3', 0x34: '4', 0x35: '5', 0x36: '6', 0x37: '7', + 0x38: '8', 0x39: '9', 0x3a: ':', 0x3b: ';', 0x3c: '<', + 0x3d: '=', 0x3e: '>', 0x3f: '?', 0x40: '@', 0x41: 'A', + 0x42: 'B', 0x43: 'C', 0x44: 'D', 0x45: 'E', 0x46: 'F', + 0x47: 'G', 0x48: 'H', 0x49: 'I', 0x4a: 'J', 0x4b: 'K', + 0x4c: 'L', 0x4d: 'M', 0x4e: 'N', 0x4f: 'O', 0x50: 'P', + 0x51: 'Q', 0x52: 'R', 0x53: 'S', 0x54: 'T', 0x55: 'U', + 0x56: 'V', 0x57: 'W', 0x58: 'X', 0x59: 'Y', 0x5a: 'Z', + 0x5b: '[', 0x5c: '\\', 0x5d: ']', 0x5e: '^', 0x5f: '_', + 0x60: '`', 0x61: 'a', 0x62: 'b', 0x63: 'c', 0x64: 'd', + 0x65: 'e', 0x66: 'f', 0x67: 'g', 0x68: 'h', 0x69: 'i', + 0x6a: 'j', 0x6b: 'k', 0x6c: 'l', 0x6d: 'm', 0x6e: 'n', + 0x6f: 'o', 0x70: 'p', 0x71: 'q', 0x72: 'r', 0x73: 's', + 0x74: 't', 0x75: 'u', 0x76: 'v', 0x77: 'w', 0x78: 'x', + 0x79: 'y', 0x7a: 'z', 0x7b: '{', 0x7c: '|', 0x7d: '}', + 0x7e: '~', 0x7f: '\u007f', 0x80: 'Ä', 0x81: 'Å', 0x82: 'Ç', + 0x83: 'É', 0x84: 'Ñ', 0x85: 'Ö', 0x86: 'Ü', 0x87: 'á', + 0x88: 'à', 0x89: 'â', 0x8a: 'ä', 0x8b: 'ã', 0x8c: 'å', + 0x8d: 'ç', 0x8e: 'é', 0x8f: 'è', 0x90: 'ê', 0x91: 'ë', + 0x92: 'í', 0x93: 'ì', 0x94: 'î', 0x95: 'ï', 0x96: 'ñ', + 0x97: 'ó', 0x98: 'ò', 0x99: 'ô', 0x9a: 'ö', 0x9b: 'õ', + 0x9c: 'ú', 0x9d: 'ù', 0x9e: 'û', 0x9f: 'ü', 0xa0: '†', + 0xa1: '°', 0xa2: '¢', 0xa3: '£', 0xa4: '§', 0xa5: '•', + 0xa6: '¶', 0xa7: 'ß', 0xa8: '®', 0xa9: '©', 0xaa: '™', + 0xab: '´', 0xac: '¨', 0xad: '≠', 0xae: 'Æ', 0xaf: 'Ø', + 0xb0: '∞', 0xb1: '±', 0xb2: '≤', 0xb3: '≥', 0xb4: '¥', + 0xb5: 'µ', 0xb6: '∂', 0xb7: '∑', 0xb8: '∏', 0xb9: 'π', + 0xba: '∫', 0xbb: 'ª', 0xbc: 'º', 0xbd: 'Ω', 0xbe: 'æ', + 0xbf: 'ø', 0xc0: '¿', 0xc1: '¡', 0xc2: '¬', 0xc3: '√', + 0xc4: 'ƒ', 0xc5: '≈', 0xc6: '∆', 0xc7: '«', 0xc8: '»', + 0xc9: '…', 0xca: '\u00a0', 0xcb: 'À', 0xcc: 'Ã', 0xcd: 'Õ', + 0xce: 'Œ', 0xcf: 'œ', 0xd0: '–', 0xd1: '—', 0xd2: '“', + 0xd3: '”', 0xd4: '‘', 0xd5: '’', 0xd6: '÷', 0xd7: '◊', + 0xd8: 'ÿ', 0xd9: 'Ÿ', 0xda: '⁄', 0xdb: '€', 0xdc: '‹', + 0xdd: '›', 0xde: 'fi', 0xdf: 'fl', 0xe0: '‡', 0xe1: '·', + 0xe2: '‚', 0xe3: '„', 0xe4: '‰', 0xe5: 'Â', 0xe6: 'Ê', + 0xe7: 'Á', 0xe8: 'Ë', 0xe9: 'È', 0xea: 'Í', 0xeb: 'Î', + 0xec: 'Ï', 0xed: 'Ì', 0xee: 'Ó', 0xef: 'Ô', 0xf0: '\uf8ff', + 0xf1: 'Ò', 0xf2: 'Ú', 0xf3: 'Û', 0xf4: 'Ù', 0xf5: 'ı', + 0xf6: 'ˆ', 0xf7: '˜', 0xf8: '¯', 0xf9: '˘', 0xfa: '˙', + 0xfb: '˚', 0xfc: '¸', 0xfd: '˝', 0xfe: '˛', 0xff: 'ˇ', +} diff --git a/pdf/internal/textencoding/simple_other.go b/pdf/internal/textencoding/simple_other.go new file mode 100644 index 00000000..33b52107 --- /dev/null +++ b/pdf/internal/textencoding/simple_other.go @@ -0,0 +1,126 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package textencoding + +const ( + baseSymbol = "SymbolEncoding" + baseZapfDingbats = "ZapfDingbatsEncoding" +) + +var ( + symbol = newSimpleMapping(baseSymbol, symbolCharToRune) + zapfDingbats = newSimpleMapping(baseZapfDingbats, zapfDingbatsCharToRune) +) + +func init() { + RegisterSimpleEncoding(baseSymbol, NewSymbolEncoder) + RegisterSimpleEncoding(baseZapfDingbats, NewZapfDingbatsEncoder) +} + +// NewSymbolEncoder returns a SimpleEncoder that implements SymbolEncoding. +func NewSymbolEncoder() SimpleEncoder { + return symbol.NewEncoder() +} + +// NewZapfDingbatsEncoder returns a SimpleEncoder that implements ZapfDingbatsEncoding. +func NewZapfDingbatsEncoder() SimpleEncoder { + return zapfDingbats.NewEncoder() +} + +var symbolCharToRune = map[byte]rune{ // 189 entries + 0x20: ' ', 0x21: '!', 0x22: '∀', 0x23: '#', + 0x24: '∃', 0x25: '%', 0x26: '&', 0x27: '∋', + 0x28: '(', 0x29: ')', 0x2a: '∗', 0x2b: '+', + 0x2c: ',', 0x2d: '−', 0x2e: '.', 0x2f: '/', + 0x30: '0', 0x31: '1', 0x32: '2', 0x33: '3', + 0x34: '4', 0x35: '5', 0x36: '6', 0x37: '7', + 0x38: '8', 0x39: '9', 0x3a: ':', 0x3b: ';', + 0x3c: '<', 0x3d: '=', 0x3e: '>', 0x3f: '?', + 0x40: '≅', 0x41: 'Α', 0x42: 'Β', 0x43: 'Χ', + 0x44: '∆', 0x45: 'Ε', 0x46: 'Φ', 0x47: 'Γ', + 0x48: 'Η', 0x49: 'Ι', 0x4a: 'ϑ', 0x4b: 'Κ', + 0x4c: 'Λ', 0x4d: 'Μ', 0x4e: 'Ν', 0x4f: 'Ο', + 0x50: 'Π', 0x51: 'Θ', 0x52: 'Ρ', 0x53: 'Σ', + 0x54: 'Τ', 0x55: 'Υ', 0x56: 'ς', 0x57: 'Ω', + 0x58: 'Ξ', 0x59: 'Ψ', 0x5a: 'Ζ', 0x5b: '[', + 0x5c: '∴', 0x5d: ']', 0x5e: '⊥', 0x5f: '_', + 0x60: '\uf8e5', 0x61: 'α', 0x62: 'β', 0x63: 'χ', + 0x64: 'δ', 0x65: 'ε', 0x66: 'φ', 0x67: 'γ', + 0x68: 'η', 0x69: 'ι', 0x6a: 'ϕ', 0x6b: 'κ', + 0x6c: 'λ', 0x6d: 'µ', 0x6e: 'ν', 0x6f: 'ο', + 0x70: 'π', 0x71: 'θ', 0x72: 'ρ', 0x73: 'σ', + 0x74: 'τ', 0x75: 'υ', 0x76: 'ϖ', 0x77: 'ω', + 0x78: 'ξ', 0x79: 'ψ', 0x7a: 'ζ', 0x7b: '{', + 0x7c: '|', 0x7d: '}', 0x7e: '∼', 0xa0: '€', + 0xa1: 'ϒ', 0xa2: '′', 0xa3: '≤', 0xa4: '⁄', + 0xa5: '∞', 0xa6: 'ƒ', 0xa7: '♣', 0xa8: '♦', + 0xa9: '♥', 0xaa: '♠', 0xab: '↔', 0xac: '←', + 0xad: '↑', 0xae: '→', 0xaf: '↓', 0xb0: '°', + 0xb1: '±', 0xb2: '″', 0xb3: '≥', 0xb4: '×', + 0xb5: '∝', 0xb6: '∂', 0xb7: '•', 0xb8: '÷', + 0xb9: '≠', 0xba: '≡', 0xbb: '≈', 0xbc: '…', + 0xbd: '\uf8e6', 0xbe: '\uf8e7', 0xbf: '↵', 0xc0: 'ℵ', + 0xc1: 'ℑ', 0xc2: 'ℜ', 0xc3: '℘', 0xc4: '⊗', + 0xc5: '⊕', 0xc6: '∅', 0xc7: '∩', 0xc8: '∪', + 0xc9: '⊃', 0xca: '⊇', 0xcb: '⊄', 0xcc: '⊂', + 0xcd: '⊆', 0xce: '∈', 0xcf: '∉', 0xd0: '∠', + 0xd1: '∇', 0xd2: '\uf6da', 0xd3: '\uf6d9', 0xd4: '\uf6db', + 0xd5: '∏', 0xd6: '√', 0xd7: '⋅', 0xd8: '¬', + 0xd9: '∧', 0xda: '∨', 0xdb: '⇔', 0xdc: '⇐', + 0xdd: '⇑', 0xde: '⇒', 0xdf: '⇓', 0xe0: '◊', + 0xe1: '〈', 0xe2: '\uf8e8', 0xe3: '\uf8e9', 0xe4: '\uf8ea', + 0xe5: '∑', 0xe6: '\uf8eb', 0xe7: '\uf8ec', 0xe8: '\uf8ed', + 0xe9: '\uf8ee', 0xea: '\uf8ef', 0xeb: '\uf8f0', 0xec: '\uf8f1', + 0xed: '\uf8f2', 0xee: '\uf8f3', 0xef: '\uf8f4', 0xf1: '〉', + 0xf2: '∫', 0xf3: '⌠', 0xf4: '\uf8f5', 0xf5: '⌡', + 0xf6: '\uf8f6', 0xf7: '\uf8f7', 0xf8: '\uf8f8', 0xf9: '\uf8f9', + 0xfa: '\uf8fa', 0xfb: '\uf8fb', 0xfc: '\uf8fc', 0xfd: '\uf8fd', + 0xfe: '\uf8fe', +} + +var zapfDingbatsCharToRune = map[byte]rune{ // 202 entries + 0x20: ' ', 0x21: '✁', 0x22: '✂', 0x23: '✃', 0x24: '✄', + 0x25: '☎', 0x26: '✆', 0x27: '✇', 0x28: '✈', 0x29: '✉', + 0x2a: '☛', 0x2b: '☞', 0x2c: '✌', 0x2d: '✍', 0x2e: '✎', + 0x2f: '✏', 0x30: '✐', 0x31: '✑', 0x32: '✒', 0x33: '✓', + 0x34: '✔', 0x35: '✕', 0x36: '✖', 0x37: '✗', 0x38: '✘', + 0x39: '✙', 0x3a: '✚', 0x3b: '✛', 0x3c: '✜', 0x3d: '✝', + 0x3e: '✞', 0x3f: '✟', 0x40: '✠', 0x41: '✡', 0x42: '✢', + 0x43: '✣', 0x44: '✤', 0x45: '✥', 0x46: '✦', 0x47: '✧', + 0x48: '★', 0x49: '✩', 0x4a: '✪', 0x4b: '✫', 0x4c: '✬', + 0x4d: '✭', 0x4e: '✮', 0x4f: '✯', 0x50: '✰', 0x51: '✱', + 0x52: '✲', 0x53: '✳', 0x54: '✴', 0x55: '✵', 0x56: '✶', + 0x57: '✷', 0x58: '✸', 0x59: '✹', 0x5a: '✺', 0x5b: '✻', + 0x5c: '✼', 0x5d: '✽', 0x5e: '✾', 0x5f: '✿', 0x60: '❀', + 0x61: '❁', 0x62: '❂', 0x63: '❃', 0x64: '❄', 0x65: '❅', + 0x66: '❆', 0x67: '❇', 0x68: '❈', 0x69: '❉', 0x6a: '❊', + 0x6b: '❋', 0x6c: '●', 0x6d: '❍', 0x6e: '■', 0x6f: '❏', + 0x70: '❐', 0x71: '❑', 0x72: '❒', 0x73: '▲', 0x74: '▼', + 0x75: '◆', 0x76: '❖', 0x77: '◗', 0x78: '❘', 0x79: '❙', + 0x7a: '❚', 0x7b: '❛', 0x7c: '❜', 0x7d: '❝', 0x7e: '❞', + 0x80: '\uf8d7', 0x81: '\uf8d8', 0x82: '\uf8d9', 0x83: '\uf8da', 0x84: '\uf8db', + 0x85: '\uf8dc', 0x86: '\uf8dd', 0x87: '\uf8de', 0x88: '\uf8df', 0x89: '\uf8e0', + 0x8a: '\uf8e1', 0x8b: '\uf8e2', 0x8c: '\uf8e3', 0x8d: '\uf8e4', 0xa1: '❡', + 0xa2: '❢', 0xa3: '❣', 0xa4: '❤', 0xa5: '❥', 0xa6: '❦', + 0xa7: '❧', 0xa8: '♣', 0xa9: '♦', 0xaa: '♥', 0xab: '♠', + 0xac: '①', 0xad: '②', 0xae: '③', 0xaf: '④', 0xb0: '⑤', + 0xb1: '⑥', 0xb2: '⑦', 0xb3: '⑧', 0xb4: '⑨', 0xb5: '⑩', + 0xb6: '❶', 0xb7: '❷', 0xb8: '❸', 0xb9: '❹', 0xba: '❺', + 0xbb: '❻', 0xbc: '❼', 0xbd: '❽', 0xbe: '❾', 0xbf: '❿', + 0xc0: '➀', 0xc1: '➁', 0xc2: '➂', 0xc3: '➃', 0xc4: '➄', + 0xc5: '➅', 0xc6: '➆', 0xc7: '➇', 0xc8: '➈', 0xc9: '➉', + 0xca: '➊', 0xcb: '➋', 0xcc: '➌', 0xcd: '➍', 0xce: '➎', + 0xcf: '➏', 0xd0: '➐', 0xd1: '➑', 0xd2: '➒', 0xd3: '➓', + 0xd4: '➔', 0xd5: '→', 0xd6: '↔', 0xd7: '↕', 0xd8: '➘', + 0xd9: '➙', 0xda: '➚', 0xdb: '➛', 0xdc: '➜', 0xdd: '➝', + 0xde: '➞', 0xdf: '➟', 0xe0: '➠', 0xe1: '➡', 0xe2: '➢', + 0xe3: '➣', 0xe4: '➤', 0xe5: '➥', 0xe6: '➦', 0xe7: '➧', + 0xe8: '➨', 0xe9: '➩', 0xea: '➪', 0xeb: '➫', 0xec: '➬', + 0xed: '➭', 0xee: '➮', 0xef: '➯', 0xf1: '➱', 0xf2: '➲', + 0xf3: '➳', 0xf4: '➴', 0xf5: '➵', 0xf6: '➶', 0xf7: '➷', + 0xf8: '➸', 0xf9: '➹', 0xfa: '➺', 0xfb: '➻', 0xfc: '➼', + 0xfd: '➽', 0xfe: '➾', +} diff --git a/pdf/internal/textencoding/simple_pdf.go b/pdf/internal/textencoding/simple_pdf.go new file mode 100644 index 00000000..7fb61ae6 --- /dev/null +++ b/pdf/internal/textencoding/simple_pdf.go @@ -0,0 +1,118 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package textencoding + +const ( + basePdfDoc = "PdfDocEncoding" + baseStandard = "StandardEncoding" +) + +var ( + pdfDoc = newSimpleMapping(basePdfDoc, pdfDocCharToRune) + standard = newSimpleMapping(baseStandard, standardCharToRune) +) + +func init() { + RegisterSimpleEncoding(basePdfDoc, NewPdfDocEncoder) + RegisterSimpleEncoding(baseStandard, NewStandardEncoder) +} + +// NewPdfDocEncoder returns a SimpleEncoder that implements PdfDocEncoding. +func NewPdfDocEncoder() SimpleEncoder { + return pdfDoc.NewEncoder() +} + +// NewStandardEncoder returns a SimpleEncoder that implements StandardEncoding. +func NewStandardEncoder() SimpleEncoder { + return standard.NewEncoder() +} + +var pdfDocCharToRune = map[byte]rune{ // 252 entries + 0x1: '\x01', 0x2: '\x02', 0x3: '\x03', 0x4: '\x04', 0x5: '\x05', + 0x6: '\x06', 0x7: '\a', 0x8: '\b', 0x9: '\t', 0xa: '\n', + 0xb: '\v', 0xc: '\f', 0xd: '\r', 0xe: '\x0e', 0xf: '\x0f', + 0x10: '\x10', 0x11: '\x11', 0x12: '\x12', 0x13: '\x13', 0x14: '\x14', + 0x15: '\x15', 0x16: '\x17', 0x17: '\x17', 0x18: '˘', 0x19: 'ˇ', + 0x1a: 'ˆ', 0x1b: '˙', 0x1c: '˝', 0x1d: '˛', 0x1e: '˚', + 0x1f: '˜', 0x20: ' ', 0x21: '!', 0x22: '"', 0x23: '#', + 0x24: '$', 0x25: '%', 0x26: '&', 0x27: '\'', 0x28: '(', + 0x29: ')', 0x2a: '*', 0x2b: '+', 0x2c: ',', 0x2d: '-', + 0x2e: '.', 0x2f: '/', 0x30: '0', 0x31: '1', 0x32: '2', + 0x33: '3', 0x34: '4', 0x35: '5', 0x36: '6', 0x37: '7', + 0x38: '8', 0x39: '9', 0x3a: ':', 0x3b: ';', 0x3c: '<', + 0x3d: '=', 0x3e: '>', 0x3f: '?', 0x40: '@', 0x41: 'A', + 0x42: 'B', 0x43: 'C', 0x44: 'D', 0x45: 'E', 0x46: 'F', + 0x47: 'G', 0x48: 'H', 0x49: 'I', 0x4a: 'J', 0x4b: 'K', + 0x4c: 'L', 0x4d: 'M', 0x4e: 'N', 0x4f: 'O', 0x50: 'P', + 0x51: 'Q', 0x52: 'R', 0x53: 'S', 0x54: 'T', 0x55: 'U', + 0x56: 'V', 0x57: 'W', 0x58: 'X', 0x59: 'Y', 0x5a: 'Z', + 0x5b: '[', 0x5c: '\\', 0x5d: ']', 0x5e: '^', 0x5f: '_', + 0x60: '`', 0x61: 'a', 0x62: 'b', 0x63: 'c', 0x64: 'd', + 0x65: 'e', 0x66: 'f', 0x67: 'g', 0x68: 'h', 0x69: 'i', + 0x6a: 'j', 0x6b: 'k', 0x6c: 'l', 0x6d: 'm', 0x6e: 'n', + 0x6f: 'o', 0x70: 'p', 0x71: 'q', 0x72: 'r', 0x73: 's', + 0x74: 't', 0x75: 'u', 0x76: 'v', 0x77: 'w', 0x78: 'x', + 0x79: 'y', 0x7a: 'z', 0x7b: '{', 0x7c: '|', 0x7d: '}', + 0x7e: '~', 0x80: '•', 0x81: '†', 0x82: '‡', 0x83: '…', + 0x84: '—', 0x85: '–', 0x86: 'ƒ', 0x87: '⁄', 0x88: '‹', + 0x89: '›', 0x8a: '−', 0x8b: '‰', 0x8c: '„', 0x8d: '“', + 0x8e: '”', 0x8f: '‘', 0x90: '’', 0x91: '‚', 0x92: '™', + 0x93: 'fi', 0x94: 'fl', 0x95: 'Ł', 0x96: 'Œ', 0x97: 'Š', + 0x98: 'Ÿ', 0x99: 'Ž', 0x9a: 'ı', 0x9b: 'ł', 0x9c: 'œ', + 0x9d: 'š', 0x9e: 'ž', 0xa0: '€', 0xa1: '¡', 0xa2: '¢', + 0xa3: '£', 0xa4: '¤', 0xa5: '¥', 0xa6: '¦', 0xa7: '§', + 0xa8: '¨', 0xa9: '©', 0xaa: 'ª', 0xab: '«', 0xac: '¬', + 0xae: '®', 0xaf: '¯', 0xb0: '°', 0xb1: '±', 0xb2: '²', + 0xb3: '³', 0xb4: '´', 0xb5: 'µ', 0xb6: '¶', 0xb7: '·', + 0xb8: '¸', 0xb9: '¹', 0xba: 'º', 0xbb: '»', 0xbc: '¼', + 0xbd: '½', 0xbe: '¾', 0xbf: '¿', 0xc0: 'À', 0xc1: 'Á', + 0xc2: 'Â', 0xc3: 'Ã', 0xc4: 'Ä', 0xc5: 'Å', 0xc6: 'Æ', + 0xc7: 'Ç', 0xc8: 'È', 0xc9: 'É', 0xca: 'Ê', 0xcb: 'Ë', + 0xcc: 'Ì', 0xcd: 'Í', 0xce: 'Î', 0xcf: 'Ï', 0xd0: 'Ð', + 0xd1: 'Ñ', 0xd2: 'Ò', 0xd3: 'Ó', 0xd4: 'Ô', 0xd5: 'Õ', + 0xd6: 'Ö', 0xd7: '×', 0xd8: 'Ø', 0xd9: 'Ù', 0xda: 'Ú', + 0xdb: 'Û', 0xdc: 'Ü', 0xdd: 'Ý', 0xde: 'Þ', 0xdf: 'ß', + 0xe0: 'à', 0xe1: 'á', 0xe2: 'â', 0xe3: 'ã', 0xe4: 'ä', + 0xe5: 'å', 0xe6: 'æ', 0xe7: 'ç', 0xe8: 'è', 0xe9: 'é', + 0xea: 'ê', 0xeb: 'ë', 0xec: 'ì', 0xed: 'í', 0xee: 'î', + 0xef: 'ï', 0xf0: 'ð', 0xf1: 'ñ', 0xf2: 'ò', 0xf3: 'ó', + 0xf4: 'ô', 0xf5: 'õ', 0xf6: 'ö', 0xf7: '÷', 0xf8: 'ø', + 0xf9: 'ù', 0xfa: 'ú', 0xfb: 'û', 0xfc: 'ü', 0xfd: 'ý', + 0xfe: 'þ', 0xff: 'ÿ', +} + +var standardCharToRune = map[byte]rune{ // 149 entries + 0x20: ' ', 0x21: '!', 0x22: '"', 0x23: '#', 0x24: '$', + 0x25: '%', 0x26: '&', 0x27: '’', 0x28: '(', 0x29: ')', + 0x2a: '*', 0x2b: '+', 0x2c: ',', 0x2d: '-', 0x2e: '.', + 0x2f: '/', 0x30: '0', 0x31: '1', 0x32: '2', 0x33: '3', + 0x34: '4', 0x35: '5', 0x36: '6', 0x37: '7', 0x38: '8', + 0x39: '9', 0x3a: ':', 0x3b: ';', 0x3c: '<', 0x3d: '=', + 0x3e: '>', 0x3f: '?', 0x40: '@', 0x41: 'A', 0x42: 'B', + 0x43: 'C', 0x44: 'D', 0x45: 'E', 0x46: 'F', 0x47: 'G', + 0x48: 'H', 0x49: 'I', 0x4a: 'J', 0x4b: 'K', 0x4c: 'L', + 0x4d: 'M', 0x4e: 'N', 0x4f: 'O', 0x50: 'P', 0x51: 'Q', + 0x52: 'R', 0x53: 'S', 0x54: 'T', 0x55: 'U', 0x56: 'V', + 0x57: 'W', 0x58: 'X', 0x59: 'Y', 0x5a: 'Z', 0x5b: '[', + 0x5c: '\\', 0x5d: ']', 0x5e: '^', 0x5f: '_', 0x60: '`', + 0x61: 'a', 0x62: 'b', 0x63: 'c', 0x64: 'd', 0x65: 'e', + 0x66: 'f', 0x67: 'g', 0x68: 'h', 0x69: 'i', 0x6a: 'j', + 0x6b: 'k', 0x6c: 'l', 0x6d: 'm', 0x6e: 'n', 0x6f: 'o', + 0x70: 'p', 0x71: 'q', 0x72: 'r', 0x73: 's', 0x74: 't', + 0x75: 'u', 0x76: 'v', 0x77: 'w', 0x78: 'x', 0x79: 'y', + 0x7a: 'z', 0x7b: '{', 0x7c: '|', 0x7d: '}', 0x7e: '~', + 0xa1: '¡', 0xa2: '¢', 0xa3: '£', 0xa4: '⁄', 0xa5: '¥', + 0xa6: 'ƒ', 0xa7: '§', 0xa8: '¤', 0xa9: '\'', 0xaa: '“', + 0xab: '«', 0xac: '‹', 0xad: '›', 0xae: 'fi', 0xaf: 'fl', + 0xb1: '–', 0xb2: '†', 0xb3: '‡', 0xb4: '·', 0xb6: '¶', + 0xb7: '•', 0xb8: '‚', 0xb9: '„', 0xba: '”', 0xbb: '»', + 0xbc: '…', 0xbd: '‰', 0xbf: '¿', 0xc1: '`', 0xc2: '´', + 0xc3: 'ˆ', 0xc4: '˜', 0xc5: '¯', 0xc6: '˘', 0xc7: '˙', + 0xc8: '¨', 0xca: '˚', 0xcb: '¸', 0xcc: '˝', 0xcd: '˛', + 0xce: 'ˇ', 0xcf: '—', 0xe0: 'Æ', 0xe2: 'ª', 0xe7: 'Ł', + 0xe8: 'Ø', 0xe9: 'Œ', 0xea: 'º', 0xf0: 'æ', 0xf5: 'ı', + 0xf7: 'ł', 0xf8: 'ø', 0xf9: 'œ', 0xfa: 'ß', +} diff --git a/pdf/internal/textencoding/simple_symbol.go b/pdf/internal/textencoding/simple_symbol.go deleted file mode 100644 index a44a2ef2..00000000 --- a/pdf/internal/textencoding/simple_symbol.go +++ /dev/null @@ -1,228 +0,0 @@ -/* - * This file is subject to the terms and conditions defined in - * file 'LICENSE.md', which is part of this source code package. - */ - -package textencoding - -import "sync" - -const baseSymbol = "SymbolEncoding" - -var ( - symbolOnce sync.Once - symbolRuneToChar map[rune]byte -) - -func init() { - RegisterSimpleEncoding(baseSymbol, NewSymbolEncoder) -} - -// NewSymbolEncoder returns a SimpleEncoder that implements SymbolEncoding. -func NewSymbolEncoder() SimpleEncoder { - symbolOnce.Do(initSymbol) - return &simpleEncoding{ - baseName: baseSymbol, - encode: symbolRuneToChar, - decode: symbolCharToRune, - } -} - -func initSymbol() { - symbolRuneToChar = make(map[rune]byte, len(symbolRuneToChar)) - for b, r := range symbolCharToRune { - symbolRuneToChar[r] = b - } -} - -var symbolCharToRune = map[byte]rune{ // 189 entries - 0x20: 0x0020, // "space" - 0x21: 0x0021, // ! "exclam" - 0x22: 0x2200, // ∀ "universal" - 0x23: 0x0023, // # "numbersign" - 0x24: 0x2203, // ∃ "existential" - 0x25: 0x0025, // % "percent" - 0x26: 0x0026, // & "ampersand" - 0x27: 0x220b, // ∋ "suchthat" - 0x28: 0x0028, // ( "parenleft" - 0x29: 0x0029, // ) "parenright" - 0x2a: 0x2217, // ∗ "asteriskmath" - 0x2b: 0x002b, // + "plus" - 0x2c: 0x002c, // , "comma" - 0x2d: 0x2212, // − "minus" - 0x2e: 0x002e, // . "period" - 0x2f: 0x002f, // / "slash" - 0x30: 0x0030, // 0 "zero" - 0x31: 0x0031, // 1 "one" - 0x32: 0x0032, // 2 "two" - 0x33: 0x0033, // 3 "three" - 0x34: 0x0034, // 4 "four" - 0x35: 0x0035, // 5 "five" - 0x36: 0x0036, // 6 "six" - 0x37: 0x0037, // 7 "seven" - 0x38: 0x0038, // 8 "eight" - 0x39: 0x0039, // 9 "nine" - 0x3a: 0x003a, // : "colon" - 0x3b: 0x003b, // ; "semicolon" - 0x3c: 0x003c, // < "less" - 0x3d: 0x003d, // = "equal" - 0x3e: 0x003e, // > "greater" - 0x3f: 0x003f, // ? "question" - 0x40: 0x2245, // ≅ "congruent" - 0x41: 0x0391, // Α "Alpha" - 0x42: 0x0392, // Β "Beta" - 0x43: 0x03a7, // Χ "Chi" - 0x44: 0x2206, // ∆ "Delta" - 0x45: 0x0395, // Ε "Epsilon" - 0x46: 0x03a6, // Φ "Phi" - 0x47: 0x0393, // Γ "Gamma" - 0x48: 0x0397, // Η "Eta" - 0x49: 0x0399, // Ι "Iota" - 0x4a: 0x03d1, // ϑ "theta1" - 0x4b: 0x039a, // Κ "Kappa" - 0x4c: 0x039b, // Λ "Lambda" - 0x4d: 0x039c, // Μ "Mu" - 0x4e: 0x039d, // Ν "Nu" - 0x4f: 0x039f, // Ο "Omicron" - 0x50: 0x03a0, // Π "Pi" - 0x51: 0x0398, // Θ "Theta" - 0x52: 0x03a1, // Ρ "Rho" - 0x53: 0x03a3, // Σ "Sigma" - 0x54: 0x03a4, // Τ "Tau" - 0x55: 0x03a5, // Υ "Upsilon" - 0x56: 0x03c2, // ς "sigma1" - 0x57: 0x2126, // Ω "Omega" - 0x58: 0x039e, // Ξ "Xi" - 0x59: 0x03a8, // Ψ "Psi" - 0x5a: 0x0396, // Ζ "Zeta" - 0x5b: 0x005b, // [ "bracketleft" - 0x5c: 0x2234, // ∴ "therefore" - 0x5d: 0x005d, // ] "bracketright" - 0x5e: 0x22a5, // ⊥ "perpendicular" - 0x5f: 0x005f, // _ "underscore" - 0x60: 0xf8e5, // "radicalex" - 0x61: 0x03b1, // α "alpha" - 0x62: 0x03b2, // β "beta" - 0x63: 0x03c7, // χ "chi" - 0x64: 0x03b4, // δ "delta" - 0x65: 0x03b5, // ε "epsilon" - 0x66: 0x03c6, // φ "phi" - 0x67: 0x03b3, // γ "gamma" - 0x68: 0x03b7, // η "eta" - 0x69: 0x03b9, // ι "iota" - 0x6a: 0x03d5, // ϕ "phi1" - 0x6b: 0x03ba, // κ "kappa" - 0x6c: 0x03bb, // λ "lambda" - 0x6d: 0x00b5, // µ "mu" - 0x6e: 0x03bd, // ν "nu" - 0x6f: 0x03bf, // ο "omicron" - 0x70: 0x03c0, // π "pi" - 0x71: 0x03b8, // θ "theta" - 0x72: 0x03c1, // ρ "rho" - 0x73: 0x03c3, // σ "sigma" - 0x74: 0x03c4, // τ "tau" - 0x75: 0x03c5, // υ "upsilon" - 0x76: 0x03d6, // ϖ "omega1" - 0x77: 0x03c9, // ω "omega" - 0x78: 0x03be, // ξ "xi" - 0x79: 0x03c8, // ψ "psi" - 0x7a: 0x03b6, // ζ "zeta" - 0x7b: 0x007b, // { "braceleft" - 0x7c: 0x007c, // | "bar" - 0x7d: 0x007d, // } "braceright" - 0x7e: 0x223c, // ∼ "similar" - 0xa0: 0x20ac, // € "Euro" - 0xa1: 0x03d2, // ϒ "Upsilon1" - 0xa2: 0x2032, // ′ "minute" - 0xa3: 0x2264, // ≤ "lessequal" - 0xa4: 0x2044, // ⁄ "fraction" - 0xa5: 0x221e, // ∞ "infinity" - 0xa6: 0x0192, // ƒ "florin" - 0xa7: 0x2663, // ♣ "club" - 0xa8: 0x2666, // ♦ "diamond" - 0xa9: 0x2665, // ♥ "heart" - 0xaa: 0x2660, // ♠ "spade" - 0xab: 0x2194, // ↔ "arrowboth" - 0xac: 0x2190, // ← "arrowleft" - 0xad: 0x2191, // ↑ "arrowup" - 0xae: 0x2192, // → "arrowright" - 0xaf: 0x2193, // ↓ "arrowdown" - 0xb0: 0x00b0, // ° "degree" - 0xb1: 0x00b1, // ± "plusminus" - 0xb2: 0x2033, // ″ "second" - 0xb3: 0x2265, // ≥ "greaterequal" - 0xb4: 0x00d7, // × "multiply" - 0xb5: 0x221d, // ∝ "proportional" - 0xb6: 0x2202, // ∂ "partialdiff" - 0xb7: 0x2022, // • "bullet" - 0xb8: 0x00f7, // ÷ "divide" - 0xb9: 0x2260, // ≠ "notequal" - 0xba: 0x2261, // ≡ "equivalence" - 0xbb: 0x2248, // ≈ "approxequal" - 0xbc: 0x2026, // … "ellipsis" - 0xbd: 0xf8e6, // "arrowvertex" - 0xbe: 0xf8e7, // "arrowhorizex" - 0xbf: 0x21b5, // ↵ "carriagereturn" - 0xc0: 0x2135, // ℵ "aleph" - 0xc1: 0x2111, // ℑ "Ifraktur" - 0xc2: 0x211c, // ℜ "Rfraktur" - 0xc3: 0x2118, // ℘ "weierstrass" - 0xc4: 0x2297, // ⊗ "circlemultiply" - 0xc5: 0x2295, // ⊕ "circleplus" - 0xc6: 0x2205, // ∅ "emptyset" - 0xc7: 0x2229, // ∩ "intersection" - 0xc8: 0x222a, // ∪ "union" - 0xc9: 0x2283, // ⊃ "propersuperset" - 0xca: 0x2287, // ⊇ "reflexsuperset" - 0xcb: 0x2284, // ⊄ "notsubset" - 0xcc: 0x2282, // ⊂ "propersubset" - 0xcd: 0x2286, // ⊆ "reflexsubset" - 0xce: 0x2208, // ∈ "element" - 0xcf: 0x2209, // ∉ "notelement" - 0xd0: 0x2220, // ∠ "angle" - 0xd1: 0x2207, // ∇ "gradient" - 0xd2: 0xf6da, // "registerserif" - 0xd3: 0xf6d9, // "copyrightserif" - 0xd4: 0xf6db, // "trademarkserif" - 0xd5: 0x220f, // ∏ "product" - 0xd6: 0x221a, // √ "radical" - 0xd7: 0x22c5, // ⋅ "dotmath" - 0xd8: 0x00ac, // ¬ "logicalnot" - 0xd9: 0x2227, // ∧ "logicaland" - 0xda: 0x2228, // ∨ "logicalor" - 0xdb: 0x21d4, // ⇔ "arrowdblboth" - 0xdc: 0x21d0, // ⇐ "arrowdblleft" - 0xdd: 0x21d1, // ⇑ "arrowdblup" - 0xde: 0x21d2, // ⇒ "arrowdblright" - 0xdf: 0x21d3, // ⇓ "arrowdbldown" - 0xe0: 0x25ca, // ◊ "lozenge" - 0xe1: 0x2329, // 〈 "angleleft" - 0xe2: 0xf8e8, // "registersans" - 0xe3: 0xf8e9, // "copyrightsans" - 0xe4: 0xf8ea, // "trademarksans" - 0xe5: 0x2211, // ∑ "summation" - 0xe6: 0xf8eb, // "parenlefttp" - 0xe7: 0xf8ec, // "parenleftex" - 0xe8: 0xf8ed, // "parenleftbt" - 0xe9: 0xf8ee, // "bracketlefttp" - 0xea: 0xf8ef, // "bracketleftex" - 0xeb: 0xf8f0, // "bracketleftbt" - 0xec: 0xf8f1, // "bracelefttp" - 0xed: 0xf8f2, // "braceleftmid" - 0xee: 0xf8f3, // "braceleftbt" - 0xef: 0xf8f4, // "braceex" - 0xf1: 0x232a, // 〉 "angleright" - 0xf2: 0x222b, // ∫ "integral" - 0xf3: 0x2320, // ⌠ "integraltp" - 0xf4: 0xf8f5, // "integralex" - 0xf5: 0x2321, // ⌡ "integralbt" - 0xf6: 0xf8f6, // "parenrighttp" - 0xf7: 0xf8f7, // "parenrightex" - 0xf8: 0xf8f8, // "parenrightbt" - 0xf9: 0xf8f9, // "bracketrighttp" - 0xfa: 0xf8fa, // "bracketrightex" - 0xfb: 0xf8fb, // "bracketrightbt" - 0xfc: 0xf8fc, // "bracerighttp" - 0xfd: 0xf8fd, // "bracerightmid" - 0xfe: 0xf8fe, // "bracerightbt" -} diff --git a/pdf/internal/textencoding/simple_zapfdingbats.go b/pdf/internal/textencoding/simple_zapfdingbats.go deleted file mode 100644 index 6eae8e6b..00000000 --- a/pdf/internal/textencoding/simple_zapfdingbats.go +++ /dev/null @@ -1,241 +0,0 @@ -/* - * This file is subject to the terms and conditions defined in - * file 'LICENSE.md', which is part of this source code package. - */ - -package textencoding - -import "sync" - -const baseZapfDingbats = "ZapfDingbatsEncoding" - -var ( - zapfDingbatsOnce sync.Once - zapfDingbatsRuneToChar map[rune]byte -) - -func init() { - RegisterSimpleEncoding(baseZapfDingbats, NewZapfDingbatsEncoder) -} - -// NewZapfDingbatsEncoder returns a SimpleEncoder that implements ZapfDingbatsEncoding. -func NewZapfDingbatsEncoder() SimpleEncoder { - zapfDingbatsOnce.Do(initZapfDingbats) - return &simpleEncoding{ - baseName: baseZapfDingbats, - encode: zapfDingbatsRuneToChar, - decode: zapfDingbatsCharToRune, - } -} - -func initZapfDingbats() { - zapfDingbatsRuneToChar = make(map[rune]byte, len(zapfDingbatsRuneToChar)) - for b, r := range zapfDingbatsCharToRune { - zapfDingbatsRuneToChar[r] = b - } -} - -var zapfDingbatsCharToRune = map[byte]rune{ // 202 entries - 0x20: 0x0020, // "space" - 0x21: 0x2701, // ✁ "a1" - 0x22: 0x2702, // ✂ "a2" - 0x23: 0x2703, // ✃ "a202" - 0x24: 0x2704, // ✄ "a3" - 0x25: 0x260e, // ☎ "a4" - 0x26: 0x2706, // ✆ "a5" - 0x27: 0x2707, // ✇ "a119" - 0x28: 0x2708, // ✈ "a118" - 0x29: 0x2709, // ✉ "a117" - 0x2a: 0x261b, // ☛ "a11" - 0x2b: 0x261e, // ☞ "a12" - 0x2c: 0x270c, // ✌ "a13" - 0x2d: 0x270d, // ✍ "a14" - 0x2e: 0x270e, // ✎ "a15" - 0x2f: 0x270f, // ✏ "a16" - 0x30: 0x2710, // ✐ "a105" - 0x31: 0x2711, // ✑ "a17" - 0x32: 0x2712, // ✒ "a18" - 0x33: 0x2713, // ✓ "a19" - 0x34: 0x2714, // ✔ "a20" - 0x35: 0x2715, // ✕ "a21" - 0x36: 0x2716, // ✖ "a22" - 0x37: 0x2717, // ✗ "a23" - 0x38: 0x2718, // ✘ "a24" - 0x39: 0x2719, // ✙ "a25" - 0x3a: 0x271a, // ✚ "a26" - 0x3b: 0x271b, // ✛ "a27" - 0x3c: 0x271c, // ✜ "a28" - 0x3d: 0x271d, // ✝ "a6" - 0x3e: 0x271e, // ✞ "a7" - 0x3f: 0x271f, // ✟ "a8" - 0x40: 0x2720, // ✠ "a9" - 0x41: 0x2721, // ✡ "a10" - 0x42: 0x2722, // ✢ "a29" - 0x43: 0x2723, // ✣ "a30" - 0x44: 0x2724, // ✤ "a31" - 0x45: 0x2725, // ✥ "a32" - 0x46: 0x2726, // ✦ "a33" - 0x47: 0x2727, // ✧ "a34" - 0x48: 0x2605, // ★ "a35" - 0x49: 0x2729, // ✩ "a36" - 0x4a: 0x272a, // ✪ "a37" - 0x4b: 0x272b, // ✫ "a38" - 0x4c: 0x272c, // ✬ "a39" - 0x4d: 0x272d, // ✭ "a40" - 0x4e: 0x272e, // ✮ "a41" - 0x4f: 0x272f, // ✯ "a42" - 0x50: 0x2730, // ✰ "a43" - 0x51: 0x2731, // ✱ "a44" - 0x52: 0x2732, // ✲ "a45" - 0x53: 0x2733, // ✳ "a46" - 0x54: 0x2734, // ✴ "a47" - 0x55: 0x2735, // ✵ "a48" - 0x56: 0x2736, // ✶ "a49" - 0x57: 0x2737, // ✷ "a50" - 0x58: 0x2738, // ✸ "a51" - 0x59: 0x2739, // ✹ "a52" - 0x5a: 0x273a, // ✺ "a53" - 0x5b: 0x273b, // ✻ "a54" - 0x5c: 0x273c, // ✼ "a55" - 0x5d: 0x273d, // ✽ "a56" - 0x5e: 0x273e, // ✾ "a57" - 0x5f: 0x273f, // ✿ "a58" - 0x60: 0x2740, // ❀ "a59" - 0x61: 0x2741, // ❁ "a60" - 0x62: 0x2742, // ❂ "a61" - 0x63: 0x2743, // ❃ "a62" - 0x64: 0x2744, // ❄ "a63" - 0x65: 0x2745, // ❅ "a64" - 0x66: 0x2746, // ❆ "a65" - 0x67: 0x2747, // ❇ "a66" - 0x68: 0x2748, // ❈ "a67" - 0x69: 0x2749, // ❉ "a68" - 0x6a: 0x274a, // ❊ "a69" - 0x6b: 0x274b, // ❋ "a70" - 0x6c: 0x25cf, // ● "a71" - 0x6d: 0x274d, // ❍ "a72" - 0x6e: 0x25a0, // ■ "a73" - 0x6f: 0x274f, // ❏ "a74" - 0x70: 0x2750, // ❐ "a203" - 0x71: 0x2751, // ❑ "a75" - 0x72: 0x2752, // ❒ "a204" - 0x73: 0x25b2, // ▲ "a76" - 0x74: 0x25bc, // ▼ "a77" - 0x75: 0x25c6, // ◆ "a78" - 0x76: 0x2756, // ❖ "a79" - 0x77: 0x25d7, // ◗ "a81" - 0x78: 0x2758, // ❘ "a82" - 0x79: 0x2759, // ❙ "a83" - 0x7a: 0x275a, // ❚ "a84" - 0x7b: 0x275b, // ❛ "a97" - 0x7c: 0x275c, // ❜ "a98" - 0x7d: 0x275d, // ❝ "a99" - 0x7e: 0x275e, // ❞ "a100" - 0x80: 0xf8d7, // "a89" - 0x81: 0xf8d8, // "a90" - 0x82: 0xf8d9, // "a93" - 0x83: 0xf8da, // "a94" - 0x84: 0xf8db, // "a91" - 0x85: 0xf8dc, // "a92" - 0x86: 0xf8dd, // "a205" - 0x87: 0xf8de, // "a85" - 0x88: 0xf8df, // "a206" - 0x89: 0xf8e0, // "a86" - 0x8a: 0xf8e1, // "a87" - 0x8b: 0xf8e2, // "a88" - 0x8c: 0xf8e3, // "a95" - 0x8d: 0xf8e4, // "a96" - 0xa1: 0x2761, // ❡ "a101" - 0xa2: 0x2762, // ❢ "a102" - 0xa3: 0x2763, // ❣ "a103" - 0xa4: 0x2764, // ❤ "a104" - 0xa5: 0x2765, // ❥ "a106" - 0xa6: 0x2766, // ❦ "a107" - 0xa7: 0x2767, // ❧ "a108" - 0xa8: 0x2663, // ♣ "a112" - 0xa9: 0x2666, // ♦ "a111" - 0xaa: 0x2665, // ♥ "a110" - 0xab: 0x2660, // ♠ "a109" - 0xac: 0x2460, // ① "a120" - 0xad: 0x2461, // ② "a121" - 0xae: 0x2462, // ③ "a122" - 0xaf: 0x2463, // ④ "a123" - 0xb0: 0x2464, // ⑤ "a124" - 0xb1: 0x2465, // ⑥ "a125" - 0xb2: 0x2466, // ⑦ "a126" - 0xb3: 0x2467, // ⑧ "a127" - 0xb4: 0x2468, // ⑨ "a128" - 0xb5: 0x2469, // ⑩ "a129" - 0xb6: 0x2776, // ❶ "a130" - 0xb7: 0x2777, // ❷ "a131" - 0xb8: 0x2778, // ❸ "a132" - 0xb9: 0x2779, // ❹ "a133" - 0xba: 0x277a, // ❺ "a134" - 0xbb: 0x277b, // ❻ "a135" - 0xbc: 0x277c, // ❼ "a136" - 0xbd: 0x277d, // ❽ "a137" - 0xbe: 0x277e, // ❾ "a138" - 0xbf: 0x277f, // ❿ "a139" - 0xc0: 0x2780, // ➀ "a140" - 0xc1: 0x2781, // ➁ "a141" - 0xc2: 0x2782, // ➂ "a142" - 0xc3: 0x2783, // ➃ "a143" - 0xc4: 0x2784, // ➄ "a144" - 0xc5: 0x2785, // ➅ "a145" - 0xc6: 0x2786, // ➆ "a146" - 0xc7: 0x2787, // ➇ "a147" - 0xc8: 0x2788, // ➈ "a148" - 0xc9: 0x2789, // ➉ "a149" - 0xca: 0x278a, // ➊ "a150" - 0xcb: 0x278b, // ➋ "a151" - 0xcc: 0x278c, // ➌ "a152" - 0xcd: 0x278d, // ➍ "a153" - 0xce: 0x278e, // ➎ "a154" - 0xcf: 0x278f, // ➏ "a155" - 0xd0: 0x2790, // ➐ "a156" - 0xd1: 0x2791, // ➑ "a157" - 0xd2: 0x2792, // ➒ "a158" - 0xd3: 0x2793, // ➓ "a159" - 0xd4: 0x2794, // ➔ "a160" - 0xd5: 0x2192, // → "a161" - 0xd6: 0x2194, // ↔ "a163" - 0xd7: 0x2195, // ↕ "a164" - 0xd8: 0x2798, // ➘ "a196" - 0xd9: 0x2799, // ➙ "a165" - 0xda: 0x279a, // ➚ "a192" - 0xdb: 0x279b, // ➛ "a166" - 0xdc: 0x279c, // ➜ "a167" - 0xdd: 0x279d, // ➝ "a168" - 0xde: 0x279e, // ➞ "a169" - 0xdf: 0x279f, // ➟ "a170" - 0xe0: 0x27a0, // ➠ "a171" - 0xe1: 0x27a1, // ➡ "a172" - 0xe2: 0x27a2, // ➢ "a173" - 0xe3: 0x27a3, // ➣ "a162" - 0xe4: 0x27a4, // ➤ "a174" - 0xe5: 0x27a5, // ➥ "a175" - 0xe6: 0x27a6, // ➦ "a176" - 0xe7: 0x27a7, // ➧ "a177" - 0xe8: 0x27a8, // ➨ "a178" - 0xe9: 0x27a9, // ➩ "a179" - 0xea: 0x27aa, // ➪ "a193" - 0xeb: 0x27ab, // ➫ "a180" - 0xec: 0x27ac, // ➬ "a199" - 0xed: 0x27ad, // ➭ "a181" - 0xee: 0x27ae, // ➮ "a200" - 0xef: 0x27af, // ➯ "a182" - 0xf1: 0x27b1, // ➱ "a201" - 0xf2: 0x27b2, // ➲ "a183" - 0xf3: 0x27b3, // ➳ "a184" - 0xf4: 0x27b4, // ➴ "a197" - 0xf5: 0x27b5, // ➵ "a185" - 0xf6: 0x27b6, // ➶ "a194" - 0xf7: 0x27b7, // ➷ "a198" - 0xf8: 0x27b8, // ➸ "a186" - 0xf9: 0x27b9, // ➹ "a195" - 0xfa: 0x27ba, // ➺ "a187" - 0xfb: 0x27bb, // ➻ "a188" - 0xfc: 0x27bc, // ➼ "a189" - 0xfd: 0x27bd, // ➽ "a190" - 0xfe: 0x27be, // ➾ "a191" -} From 0fe2f0a27ad81178bd9abbce64998bd9b31b990e Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Wed, 2 Jan 2019 17:03:03 +0200 Subject: [PATCH 10/11] textencoding: alias x/text/transform import to avoid confusion --- pdf/internal/textencoding/simple.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pdf/internal/textencoding/simple.go b/pdf/internal/textencoding/simple.go index 6b417c3c..b31c1418 100644 --- a/pdf/internal/textencoding/simple.go +++ b/pdf/internal/textencoding/simple.go @@ -14,7 +14,7 @@ import ( "github.com/unidoc/unidoc/common" "github.com/unidoc/unidoc/pdf/core" "golang.org/x/text/encoding" - "golang.org/x/text/transform" + xtransform "golang.org/x/text/transform" ) // SimpleEncoder represents a 1 byte encoding. @@ -121,7 +121,7 @@ type simpleDecoder struct { m map[byte]rune } -// Transform implements transform.Transformer. +// Transform implements xtransform.Transformer. func (enc simpleDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, _ error) { for len(src) != 0 { b := src[0] @@ -132,7 +132,7 @@ func (enc simpleDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, r = MissingCodeRune } if utf8.RuneLen(r) > len(dst) { - return nDst, nSrc, transform.ErrShortDst + return nDst, nSrc, xtransform.ErrShortDst } n := utf8.EncodeRune(dst, r) dst = dst[n:] @@ -143,7 +143,7 @@ func (enc simpleDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, return nDst, nSrc, nil } -// Reset implements transform.Transformer. +// Reset implements xtransform.Transformer. func (enc simpleDecoder) Reset() {} // NewEncoder implements encoding.Encoding. @@ -155,13 +155,13 @@ type simpleEncoder struct { m map[rune]byte } -// Transform implements transform.Transformer. +// Transform implements xtransform.Transformer. func (enc simpleEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, _ error) { for len(src) != 0 { if !utf8.FullRune(src) && !atEOF { - return nDst, nSrc, transform.ErrShortSrc + return nDst, nSrc, xtransform.ErrShortSrc } else if len(dst) == 0 { - return nDst, nSrc, transform.ErrShortDst + return nDst, nSrc, xtransform.ErrShortDst } r, n := utf8.DecodeRune(src) if r == utf8.RuneError { @@ -182,7 +182,7 @@ func (enc simpleEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, return nDst, nSrc, nil } -// Reset implements transform.Transformer. +// Reset implements xtransform.Transformer. func (enc simpleEncoder) Reset() {} // String returns a text representation of encoding. From 4a376ec6519b4c0b6192d532f45dbeadfd005b41 Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Sat, 5 Jan 2019 18:32:53 +0200 Subject: [PATCH 11/11] textencoding: define WinAnsi directly instead of using CP1252 --- pdf/internal/textencoding/simple_winansi.go | 106 ++++++++++---------- 1 file changed, 51 insertions(+), 55 deletions(-) diff --git a/pdf/internal/textencoding/simple_winansi.go b/pdf/internal/textencoding/simple_winansi.go index a4e8ddad..ed464a2d 100644 --- a/pdf/internal/textencoding/simple_winansi.go +++ b/pdf/internal/textencoding/simple_winansi.go @@ -5,69 +5,65 @@ package textencoding -import ( - "sync" - - "golang.org/x/text/encoding/charmap" -) - const baseWinAnsi = "WinAnsiEncoding" +var ( + winAnsi = newSimpleMapping(baseWinAnsi, winAnsiCharToRune) +) + func init() { RegisterSimpleEncoding(baseWinAnsi, NewWinAnsiEncoder) } -var ( - winAnsiOnce sync.Once - winAnsiCharToRune map[byte]rune - winAnsiRuneToChar map[rune]byte -) - // NewWinAnsiEncoder returns a simpleEncoder that implements WinAnsiEncoding. func NewWinAnsiEncoder() SimpleEncoder { - winAnsiOnce.Do(initWinAnsi) - return &simpleEncoding{ - baseName: baseWinAnsi, - encode: winAnsiRuneToChar, - decode: winAnsiCharToRune, - } + return winAnsi.NewEncoder() } -func initWinAnsi() { - winAnsiCharToRune = make(map[byte]rune, 256) - winAnsiRuneToChar = make(map[rune]byte, 256) - - // WinAnsiEncoding is also known as CP1252 - enc := charmap.Windows1252 - - // in WinAnsiEncoding, comparing to CP1252, all unused and - // non-visual codes are replaced with '•' character - const bullet = '•' - replace := map[byte]rune{ - 127: bullet, // DEL - - // unused - 129: bullet, - 141: bullet, - 143: bullet, - 144: bullet, - 157: bullet, - - // typographically similar - 160: ' ', // non-breaking space -> space - 173: '-', // soft hyphen -> hyphen - } - - for i := int(' '); i < 256; i++ { - b := byte(i) - r := enc.DecodeByte(b) - - // don't use replace map. since it creates duplicates - winAnsiRuneToChar[r] = b - - if rp, ok := replace[b]; ok { - r = rp - } - winAnsiCharToRune[b] = r - } +var winAnsiCharToRune = map[byte]rune{ // 224 entries + 0x20: ' ', 0x21: '!', 0x22: '"', 0x23: '#', 0x24: '$', + 0x25: '%', 0x26: '&', 0x27: '\'', 0x28: '(', 0x29: ')', + 0x2a: '*', 0x2b: '+', 0x2c: ',', 0x2d: '-', 0x2e: '.', + 0x2f: '/', 0x30: '0', 0x31: '1', 0x32: '2', 0x33: '3', + 0x34: '4', 0x35: '5', 0x36: '6', 0x37: '7', 0x38: '8', + 0x39: '9', 0x3a: ':', 0x3b: ';', 0x3c: '<', 0x3d: '=', + 0x3e: '>', 0x3f: '?', 0x40: '@', 0x41: 'A', 0x42: 'B', + 0x43: 'C', 0x44: 'D', 0x45: 'E', 0x46: 'F', 0x47: 'G', + 0x48: 'H', 0x49: 'I', 0x4a: 'J', 0x4b: 'K', 0x4c: 'L', + 0x4d: 'M', 0x4e: 'N', 0x4f: 'O', 0x50: 'P', 0x51: 'Q', + 0x52: 'R', 0x53: 'S', 0x54: 'T', 0x55: 'U', 0x56: 'V', + 0x57: 'W', 0x58: 'X', 0x59: 'Y', 0x5a: 'Z', 0x5b: '[', + 0x5c: '\\', 0x5d: ']', 0x5e: '^', 0x5f: '_', 0x60: '`', + 0x61: 'a', 0x62: 'b', 0x63: 'c', 0x64: 'd', 0x65: 'e', + 0x66: 'f', 0x67: 'g', 0x68: 'h', 0x69: 'i', 0x6a: 'j', + 0x6b: 'k', 0x6c: 'l', 0x6d: 'm', 0x6e: 'n', 0x6f: 'o', + 0x70: 'p', 0x71: 'q', 0x72: 'r', 0x73: 's', 0x74: 't', + 0x75: 'u', 0x76: 'v', 0x77: 'w', 0x78: 'x', 0x79: 'y', + 0x7a: 'z', 0x7b: '{', 0x7c: '|', 0x7d: '}', 0x7e: '~', + 0x7f: '•', 0x80: '€', 0x81: '•', 0x82: '‚', 0x83: 'ƒ', + 0x84: '„', 0x85: '…', 0x86: '†', 0x87: '‡', 0x88: 'ˆ', + 0x89: '‰', 0x8a: 'Š', 0x8b: '‹', 0x8c: 'Œ', 0x8d: '•', + 0x8e: 'Ž', 0x8f: '•', 0x90: '•', 0x91: '‘', 0x92: '’', + 0x93: '“', 0x94: '”', 0x95: '•', 0x96: '–', 0x97: '—', + 0x98: '˜', 0x99: '™', 0x9a: 'š', 0x9b: '›', 0x9c: 'œ', + 0x9d: '•', 0x9e: 'ž', 0x9f: 'Ÿ', 0xa0: ' ', 0xa1: '¡', + 0xa2: '¢', 0xa3: '£', 0xa4: '¤', 0xa5: '¥', 0xa6: '¦', + 0xa7: '§', 0xa8: '¨', 0xa9: '©', 0xaa: 'ª', 0xab: '«', + 0xac: '¬', 0xad: '-', 0xae: '®', 0xaf: '¯', 0xb0: '°', + 0xb1: '±', 0xb2: '²', 0xb3: '³', 0xb4: '´', 0xb5: 'µ', + 0xb6: '¶', 0xb7: '·', 0xb8: '¸', 0xb9: '¹', 0xba: 'º', + 0xbb: '»', 0xbc: '¼', 0xbd: '½', 0xbe: '¾', 0xbf: '¿', + 0xc0: 'À', 0xc1: 'Á', 0xc2: 'Â', 0xc3: 'Ã', 0xc4: 'Ä', + 0xc5: 'Å', 0xc6: 'Æ', 0xc7: 'Ç', 0xc8: 'È', 0xc9: 'É', + 0xca: 'Ê', 0xcb: 'Ë', 0xcc: 'Ì', 0xcd: 'Í', 0xce: 'Î', + 0xcf: 'Ï', 0xd0: 'Ð', 0xd1: 'Ñ', 0xd2: 'Ò', 0xd3: 'Ó', + 0xd4: 'Ô', 0xd5: 'Õ', 0xd6: 'Ö', 0xd7: '×', 0xd8: 'Ø', + 0xd9: 'Ù', 0xda: 'Ú', 0xdb: 'Û', 0xdc: 'Ü', 0xdd: 'Ý', + 0xde: 'Þ', 0xdf: 'ß', 0xe0: 'à', 0xe1: 'á', 0xe2: 'â', + 0xe3: 'ã', 0xe4: 'ä', 0xe5: 'å', 0xe6: 'æ', 0xe7: 'ç', + 0xe8: 'è', 0xe9: 'é', 0xea: 'ê', 0xeb: 'ë', 0xec: 'ì', + 0xed: 'í', 0xee: 'î', 0xef: 'ï', 0xf0: 'ð', 0xf1: 'ñ', + 0xf2: 'ò', 0xf3: 'ó', 0xf4: 'ô', 0xf5: 'õ', 0xf6: 'ö', + 0xf7: '÷', 0xf8: 'ø', 0xf9: 'ù', 0xfa: 'ú', 0xfb: 'û', + 0xfc: 'ü', 0xfd: 'ý', 0xfe: 'þ', 0xff: 'ÿ', }