mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-27 13:48:51 +08:00
Use StandardEncoding for builtin standard fonts (not WinAnsiEncoding). Fix testcases.
Add test cases and fix the encoding table also based on observed errors
This commit is contained in:
parent
936bf446fc
commit
ee1416433c
@ -218,7 +218,7 @@ var fileExtractionTests = []struct {
|
||||
{filename: "/rfc6962.txt.pdf",
|
||||
expectedPageText: map[int][]string{
|
||||
4: []string{
|
||||
"timestamps for certificates they then don't log",
|
||||
"timestamps for certificates they then don’t log",
|
||||
`The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",`},
|
||||
},
|
||||
},
|
||||
|
@ -97,7 +97,7 @@ var standardCharToRune = map[byte]rune{ // 149 entries
|
||||
0x4d: 'M', 0x4e: 'N', 0x4f: 'O', 0x50: 'P', 0x51: 'Q',
|
||||
0x52: 'R', 0x53: 'S', 0x54: 'T', 0x55: 'U', 0x56: 'V',
|
||||
0x57: 'W', 0x58: 'X', 0x59: 'Y', 0x5a: 'Z', 0x5b: '[',
|
||||
0x5c: '\\', 0x5d: ']', 0x5e: '^', 0x5f: '_', 0x60: '`',
|
||||
0x5c: '\\', 0x5d: ']', 0x5e: '^', 0x5f: '_', 0x60: '‘',
|
||||
0x61: 'a', 0x62: 'b', 0x63: 'c', 0x64: 'd', 0x65: 'e',
|
||||
0x66: 'f', 0x67: 'g', 0x68: 'h', 0x69: 'i', 0x6a: 'j',
|
||||
0x6b: 'k', 0x6c: 'l', 0x6d: 'm', 0x6e: 'n', 0x6f: 'o',
|
||||
@ -112,7 +112,7 @@ var standardCharToRune = map[byte]rune{ // 149 entries
|
||||
0xbc: '…', 0xbd: '‰', 0xbf: '¿', 0xc1: '`', 0xc2: '´',
|
||||
0xc3: 'ˆ', 0xc4: '˜', 0xc5: '¯', 0xc6: '˘', 0xc7: '˙',
|
||||
0xc8: '¨', 0xca: '˚', 0xcb: '¸', 0xcc: '˝', 0xcd: '˛',
|
||||
0xce: 'ˇ', 0xcf: '—', 0xe0: 'Æ', 0xe2: 'ª', 0xe7: 'Ł',
|
||||
0xe8: 'Ø', 0xe9: 'Œ', 0xea: 'º', 0xf0: 'æ', 0xf5: 'ı',
|
||||
0xf7: 'ł', 0xf8: 'ø', 0xf9: 'œ', 0xfa: 'ß',
|
||||
0xce: 'ˇ', 0xcf: '—', 0xe1: 'Æ', 0xe3: 'ª', 0xe8: 'Ł',
|
||||
0xe9: 'Ø', 0xea: 'Œ', 0xeb: 'º', 0xf1: 'æ', 0xf5: 'ı',
|
||||
0xf8: 'ł', 0xf9: 'ø', 0xfa: 'œ', 0xfb: 'ß',
|
||||
}
|
||||
|
@ -344,7 +344,7 @@ func TestFontDescriptor(t *testing.T) {
|
||||
}
|
||||
|
||||
var charcodeBytesToUnicodeTest = []fontFragmentTest{
|
||||
{"Helvetica built-in",
|
||||
{"Helvetica WinAnsiEncoding",
|
||||
"./testdata/font/simple.txt", 1,
|
||||
[]byte{32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
|
||||
53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
|
||||
@ -362,6 +362,19 @@ var charcodeBytesToUnicodeTest = []fontFragmentTest{
|
||||
"abcdefghijklmnopqrstuvwxyz{|}~€‚ƒ„…†‡ˆ‰Š‹OEŽ‘’“”•–—˜™š›oežŸ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·" +
|
||||
"¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞfzàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ",
|
||||
},
|
||||
{"Helvetica built-in",
|
||||
"./testdata/font/simple.txt", 5,
|
||||
[]byte{
|
||||
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
|
||||
59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85,
|
||||
86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
|
||||
110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
|
||||
161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 177, 178, 179, 180, 182, 183,
|
||||
184, 185, 186, 187, 188, 189, 191, 193, 194, 195, 196, 197, 198, 199, 225, 227, 232, 241, 245, 248, 249,
|
||||
250, 251},
|
||||
` !"#$%&’()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_‘abcdefghijklmnopqrstuvwxyz{|}~` +
|
||||
`¡¢£⁄¥ƒ§¤'“«‹›fifl–†‡·¶•‚„”»…‰¿` + "`" + `´ˆ˜¯˘˙ÆªŁæıłøoefz`,
|
||||
},
|
||||
{"Symbol built-in",
|
||||
"./testdata/font/simple.txt", 3,
|
||||
[]byte{32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
|
||||
@ -436,8 +449,8 @@ var charcodeBytesToUnicodeTest = []fontFragmentTest{
|
||||
},
|
||||
{"Type1 font with /Encoding with /Differences",
|
||||
"./testdata/font/noise-invariant.txt", 102,
|
||||
[]byte{96, 247, 39, 32, 147, 231, 148, 32, 232, 32, 193, 111, 180, 32, 105, 116,
|
||||
169, 115, 32, 204, 195, 196, 197, 198, 199, 168, 202, 206, 226, 234, 172, 245, 173, 151,
|
||||
[]byte{96, 248, 39, 32, 147, 232, 148, 32, 233, 32, 193, 111, 180, 32, 105, 116,
|
||||
169, 115, 32, 204, 195, 196, 197, 198, 199, 168, 202, 206, 227, 235, 172, 245, 173, 151,
|
||||
177, 151, 178, 179, 183, 185, 188, 205, 184, 189},
|
||||
"‘ł’ “Ł” Ø `o´ it's ˝ˆ˜¯˘˙¨˚ˇªº‹ı›—–—†‡•„…˛¸‰",
|
||||
},
|
||||
|
@ -76,11 +76,9 @@ type StdFont struct {
|
||||
encoder textencoding.TextEncoder
|
||||
}
|
||||
|
||||
// NewStdFont returns a new instance of the font with a default encoder set (WinAnsiEncoding).
|
||||
// TODO(gunnsth): Create test cases to verify that the default encoder is WinAnsiEncoding or StandardEncoding.
|
||||
// NewStdFont returns a new instance of the font with a default encoder set (StandardEncoding).
|
||||
func NewStdFont(desc Descriptor, metrics map[rune]CharMetrics) StdFont {
|
||||
enc := textencoding.NewWinAnsiEncoder() // Default
|
||||
return NewStdFontWithEncoding(desc, metrics, enc)
|
||||
return NewStdFontWithEncoding(desc, metrics, textencoding.NewStandardEncoder())
|
||||
}
|
||||
|
||||
// NewStdFontWithEncoding returns a new instance of the font with a specified encoder.
|
||||
|
5
pdf/model/testdata/font/simple.txt
vendored
5
pdf/model/testdata/font/simple.txt
vendored
@ -1,5 +1,5 @@
|
||||
1 0 obj
|
||||
<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
|
||||
<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >>
|
||||
endobj
|
||||
2 0 obj
|
||||
<< /Type /Font /Subtype /Type1 /BaseFont /Courier >>
|
||||
@ -10,3 +10,6 @@ endobj
|
||||
4 0 obj
|
||||
<< /Type /Font /Subtype /Type1 /BaseFont /ZapfDingbats >>
|
||||
endobj
|
||||
5 0 obj
|
||||
<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
|
||||
endobj
|
||||
|
Loading…
x
Reference in New Issue
Block a user