From 4a376ec6519b4c0b6192d532f45dbeadfd005b41 Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Sat, 5 Jan 2019 18:32:53 +0200 Subject: [PATCH] textencoding: define WinAnsi directly instead of using CP1252 --- pdf/internal/textencoding/simple_winansi.go | 106 ++++++++++---------- 1 file changed, 51 insertions(+), 55 deletions(-) diff --git a/pdf/internal/textencoding/simple_winansi.go b/pdf/internal/textencoding/simple_winansi.go index a4e8ddad..ed464a2d 100644 --- a/pdf/internal/textencoding/simple_winansi.go +++ b/pdf/internal/textencoding/simple_winansi.go @@ -5,69 +5,65 @@ package textencoding -import ( - "sync" - - "golang.org/x/text/encoding/charmap" -) - const baseWinAnsi = "WinAnsiEncoding" +var ( + winAnsi = newSimpleMapping(baseWinAnsi, winAnsiCharToRune) +) + func init() { RegisterSimpleEncoding(baseWinAnsi, NewWinAnsiEncoder) } -var ( - winAnsiOnce sync.Once - winAnsiCharToRune map[byte]rune - winAnsiRuneToChar map[rune]byte -) - // NewWinAnsiEncoder returns a simpleEncoder that implements WinAnsiEncoding. func NewWinAnsiEncoder() SimpleEncoder { - winAnsiOnce.Do(initWinAnsi) - return &simpleEncoding{ - baseName: baseWinAnsi, - encode: winAnsiRuneToChar, - decode: winAnsiCharToRune, - } + return winAnsi.NewEncoder() } -func initWinAnsi() { - winAnsiCharToRune = make(map[byte]rune, 256) - winAnsiRuneToChar = make(map[rune]byte, 256) - - // WinAnsiEncoding is also known as CP1252 - enc := charmap.Windows1252 - - // in WinAnsiEncoding, comparing to CP1252, all unused and - // non-visual codes are replaced with '•' character - const bullet = '•' - replace := map[byte]rune{ - 127: bullet, // DEL - - // unused - 129: bullet, - 141: bullet, - 143: bullet, - 144: bullet, - 157: bullet, - - // typographically similar - 160: ' ', // non-breaking space -> space - 173: '-', // soft hyphen -> hyphen - } - - for i := int(' '); i < 256; i++ { - b := byte(i) - r := enc.DecodeByte(b) - - // don't use replace map. since it creates duplicates - winAnsiRuneToChar[r] = b - - if rp, ok := replace[b]; ok { - r = rp - } - winAnsiCharToRune[b] = r - } +var winAnsiCharToRune = map[byte]rune{ // 224 entries + 0x20: ' ', 0x21: '!', 0x22: '"', 0x23: '#', 0x24: '$', + 0x25: '%', 0x26: '&', 0x27: '\'', 0x28: '(', 0x29: ')', + 0x2a: '*', 0x2b: '+', 0x2c: ',', 0x2d: '-', 0x2e: '.', + 0x2f: '/', 0x30: '0', 0x31: '1', 0x32: '2', 0x33: '3', + 0x34: '4', 0x35: '5', 0x36: '6', 0x37: '7', 0x38: '8', + 0x39: '9', 0x3a: ':', 0x3b: ';', 0x3c: '<', 0x3d: '=', + 0x3e: '>', 0x3f: '?', 0x40: '@', 0x41: 'A', 0x42: 'B', + 0x43: 'C', 0x44: 'D', 0x45: 'E', 0x46: 'F', 0x47: 'G', + 0x48: 'H', 0x49: 'I', 0x4a: 'J', 0x4b: 'K', 0x4c: 'L', + 0x4d: 'M', 0x4e: 'N', 0x4f: 'O', 0x50: 'P', 0x51: 'Q', + 0x52: 'R', 0x53: 'S', 0x54: 'T', 0x55: 'U', 0x56: 'V', + 0x57: 'W', 0x58: 'X', 0x59: 'Y', 0x5a: 'Z', 0x5b: '[', + 0x5c: '\\', 0x5d: ']', 0x5e: '^', 0x5f: '_', 0x60: '`', + 0x61: 'a', 0x62: 'b', 0x63: 'c', 0x64: 'd', 0x65: 'e', + 0x66: 'f', 0x67: 'g', 0x68: 'h', 0x69: 'i', 0x6a: 'j', + 0x6b: 'k', 0x6c: 'l', 0x6d: 'm', 0x6e: 'n', 0x6f: 'o', + 0x70: 'p', 0x71: 'q', 0x72: 'r', 0x73: 's', 0x74: 't', + 0x75: 'u', 0x76: 'v', 0x77: 'w', 0x78: 'x', 0x79: 'y', + 0x7a: 'z', 0x7b: '{', 0x7c: '|', 0x7d: '}', 0x7e: '~', + 0x7f: '•', 0x80: '€', 0x81: '•', 0x82: '‚', 0x83: 'ƒ', + 0x84: '„', 0x85: '…', 0x86: '†', 0x87: '‡', 0x88: 'ˆ', + 0x89: '‰', 0x8a: 'Š', 0x8b: '‹', 0x8c: 'Œ', 0x8d: '•', + 0x8e: 'Ž', 0x8f: '•', 0x90: '•', 0x91: '‘', 0x92: '’', + 0x93: '“', 0x94: '”', 0x95: '•', 0x96: '–', 0x97: '—', + 0x98: '˜', 0x99: '™', 0x9a: 'š', 0x9b: '›', 0x9c: 'œ', + 0x9d: '•', 0x9e: 'ž', 0x9f: 'Ÿ', 0xa0: ' ', 0xa1: '¡', + 0xa2: '¢', 0xa3: '£', 0xa4: '¤', 0xa5: '¥', 0xa6: '¦', + 0xa7: '§', 0xa8: '¨', 0xa9: '©', 0xaa: 'ª', 0xab: '«', + 0xac: '¬', 0xad: '-', 0xae: '®', 0xaf: '¯', 0xb0: '°', + 0xb1: '±', 0xb2: '²', 0xb3: '³', 0xb4: '´', 0xb5: 'µ', + 0xb6: '¶', 0xb7: '·', 0xb8: '¸', 0xb9: '¹', 0xba: 'º', + 0xbb: '»', 0xbc: '¼', 0xbd: '½', 0xbe: '¾', 0xbf: '¿', + 0xc0: 'À', 0xc1: 'Á', 0xc2: 'Â', 0xc3: 'Ã', 0xc4: 'Ä', + 0xc5: 'Å', 0xc6: 'Æ', 0xc7: 'Ç', 0xc8: 'È', 0xc9: 'É', + 0xca: 'Ê', 0xcb: 'Ë', 0xcc: 'Ì', 0xcd: 'Í', 0xce: 'Î', + 0xcf: 'Ï', 0xd0: 'Ð', 0xd1: 'Ñ', 0xd2: 'Ò', 0xd3: 'Ó', + 0xd4: 'Ô', 0xd5: 'Õ', 0xd6: 'Ö', 0xd7: '×', 0xd8: 'Ø', + 0xd9: 'Ù', 0xda: 'Ú', 0xdb: 'Û', 0xdc: 'Ü', 0xdd: 'Ý', + 0xde: 'Þ', 0xdf: 'ß', 0xe0: 'à', 0xe1: 'á', 0xe2: 'â', + 0xe3: 'ã', 0xe4: 'ä', 0xe5: 'å', 0xe6: 'æ', 0xe7: 'ç', + 0xe8: 'è', 0xe9: 'é', 0xea: 'ê', 0xeb: 'ë', 0xec: 'ì', + 0xed: 'í', 0xee: 'î', 0xef: 'ï', 0xf0: 'ð', 0xf1: 'ñ', + 0xf2: 'ò', 0xf3: 'ó', 0xf4: 'ô', 0xf5: 'õ', 0xf6: 'ö', + 0xf7: '÷', 0xf8: 'ø', 0xf9: 'ù', 0xfa: 'ú', 0xfb: 'û', + 0xfc: 'ü', 0xfd: 'ý', 0xfe: 'þ', 0xff: 'ÿ', }