mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-30 13:48:51 +08:00
330 lines
7.1 KiB
Go
330 lines
7.1 KiB
Go
/*
|
|
* This file is subject to the terms and conditions defined in
|
|
* file 'LICENSE.md', which is part of this source code package.
|
|
*/
|
|
|
|
package cmap
|
|
|
|
import (
|
|
"testing"
|
|
|
|
"github.com/unidoc/unidoc/common"
|
|
)
|
|
|
|
func init() {
|
|
//common.SetLogger(common.NewConsoleLogger(common.LogLevelDebug))
|
|
common.SetLogger(common.NewConsoleLogger(common.LogLevelTrace))
|
|
}
|
|
|
|
// cmap1Data represents a basic CMap.
|
|
const cmap1Data = `
|
|
/CIDInit /ProcSet findresource begin
|
|
12 dict begin
|
|
begincmap
|
|
/CIDSystemInfo
|
|
<< /Registry (Adobe)
|
|
/Ordering (UCS)
|
|
/Supplement 0
|
|
>> def
|
|
/CMapName /Adobe-Identity-UCS def
|
|
/CMapType 2 def
|
|
1 begincodespacerange
|
|
<0000> <FFFF>
|
|
endcodespacerange
|
|
8 beginbfchar
|
|
<0003> <0020>
|
|
<0007> <0024>
|
|
<0033> <0050>
|
|
<0035> <0052>
|
|
<0037> <0054>
|
|
<005A> <0077>
|
|
<005C> <0079>
|
|
<005F> <007C>
|
|
endbfchar
|
|
7 beginbfrange
|
|
<000F> <0017> <002C>
|
|
<001B> <001D> <0038>
|
|
<0025> <0026> <0042>
|
|
<002F> <0031> <004C>
|
|
<0044> <004C> <0061>
|
|
<004F> <0053> <006C>
|
|
<0055> <0057> <0072>
|
|
endbfrange
|
|
endcmap
|
|
CMapName currentdict /CMap defineresource pop
|
|
end
|
|
end
|
|
`
|
|
|
|
// TestCMapParser tests basic loading of a simple CMap.
|
|
func TestCMapParser1(t *testing.T) {
|
|
common.SetLogger(common.NewConsoleLogger(common.LogLevelTrace))
|
|
|
|
cmap, err := LoadCmapFromData([]byte(cmap1Data))
|
|
if err != nil {
|
|
t.Error("Failed: ", err)
|
|
return
|
|
}
|
|
|
|
if cmap.Name() != "Adobe-Identity-UCS" {
|
|
t.Errorf("CMap name incorrect (%s)", cmap.Name())
|
|
return
|
|
}
|
|
|
|
if cmap.Type() != 2 {
|
|
t.Errorf("CMap type incorrect")
|
|
return
|
|
}
|
|
|
|
if len(cmap.codespaces) != 1 {
|
|
t.Errorf("len codespace != 1 (%d)", len(cmap.codespaces))
|
|
return
|
|
}
|
|
|
|
if cmap.codespaces[0].low != 0 {
|
|
t.Errorf("code space low range != 0 (%d)", cmap.codespaces[0].low)
|
|
return
|
|
}
|
|
|
|
if cmap.codespaces[0].high != 0xFFFF {
|
|
t.Errorf("code space high range != 0xffff (%d)", cmap.codespaces[0].high)
|
|
return
|
|
}
|
|
|
|
expectedMappings := map[uint64]rune{
|
|
0x0003: 0x0020,
|
|
0x005F: 0x007C,
|
|
0x000F: 0x002C,
|
|
0x000F + 5: 0x002C + 5,
|
|
0x001B: 0x0038,
|
|
0x001B + 2: 0x0038 + 2,
|
|
0x002F: 0x004C,
|
|
0x0044: 0x0061,
|
|
0x004F: 0x006C,
|
|
0x0055: 0x0072,
|
|
}
|
|
|
|
for k, expected := range expectedMappings {
|
|
if v := cmap.CharcodeToUnicode(k); v != string(expected) {
|
|
t.Errorf("incorrect mapping, expecting 0x%X -> 0x%X (%#v)", k, expected, v)
|
|
return
|
|
}
|
|
}
|
|
|
|
v := cmap.CharcodeToUnicode(0x99)
|
|
if v != "?" { //!= "notdef" {
|
|
t.Errorf("Unmapped code, expected to map to undefined")
|
|
return
|
|
}
|
|
|
|
charcodes := []byte{0x00, 0x03, 0x00, 0x0F}
|
|
s := cmap.CharcodeBytesToUnicode(charcodes)
|
|
if s != " ," {
|
|
t.Error("Incorrect charcode bytes -> string mapping")
|
|
return
|
|
}
|
|
}
|
|
|
|
const cmap2Data = `
|
|
/CIDInit /ProcSet findresource begin
|
|
12 dict begin
|
|
begincmap
|
|
/CIDSystemInfo
|
|
<< /Registry (Adobe)
|
|
/Ordering (UCS)
|
|
/Supplement 0
|
|
>> def
|
|
/CMapName /Adobe-Identity-UCS def
|
|
/CMapType 2 def
|
|
1 begincodespacerange
|
|
<0000> <FFFF>
|
|
endcodespacerange
|
|
7 beginbfrange
|
|
<0080> <00FF> <002C>
|
|
<802F> <902F> <0038>
|
|
endbfrange
|
|
endcmap
|
|
CMapName currentdict /CMap defineresource pop
|
|
end
|
|
end
|
|
`
|
|
|
|
// TestCMapParser2 tests a bug that came up when 2-byte character codes had the higher byte set to 0,
|
|
// e.g. 0x0080, and the character map was not taking the number of bytes of the input codemap into account.
|
|
func TestCMapParser2(t *testing.T) {
|
|
common.SetLogger(common.NewConsoleLogger(common.LogLevelTrace))
|
|
|
|
cmap, err := LoadCmapFromData([]byte(cmap2Data))
|
|
if err != nil {
|
|
t.Error("Failed: ", err)
|
|
return
|
|
}
|
|
|
|
if cmap.Name() != "Adobe-Identity-UCS" {
|
|
t.Errorf("CMap name incorrect (%s)", cmap.Name())
|
|
return
|
|
}
|
|
|
|
if cmap.Type() != 2 {
|
|
t.Errorf("CMap type incorrect")
|
|
return
|
|
}
|
|
|
|
if len(cmap.codespaces) != 1 {
|
|
t.Errorf("len codespace != 1 (%d)", len(cmap.codespaces))
|
|
return
|
|
}
|
|
|
|
if cmap.codespaces[0].low != 0 {
|
|
t.Errorf("code space low range != 0 (%d)", cmap.codespaces[0].low)
|
|
return
|
|
}
|
|
|
|
if cmap.codespaces[0].high != 0xFFFF {
|
|
t.Errorf("code space high range != 0xffff (%d)", cmap.codespaces[0].high)
|
|
return
|
|
}
|
|
|
|
expectedMappings := map[uint64]rune{
|
|
0x0080: 0x002C,
|
|
0x802F: 0x0038,
|
|
}
|
|
|
|
for k, expected := range expectedMappings {
|
|
if v := cmap.CharcodeToUnicode(k); v != string(expected) {
|
|
t.Errorf("incorrect mapping, expecting 0x%X -> 0x%X (got 0x%X)", k, expected, v)
|
|
return
|
|
}
|
|
}
|
|
|
|
// Check byte sequence mappings.
|
|
excpectedSequenceMappings := []struct {
|
|
bytes []byte
|
|
expected string
|
|
}{
|
|
{[]byte{0x80, 0x2F, 0x00, 0x80}, string([]rune{0x0038, 0x002C})},
|
|
}
|
|
|
|
for _, exp := range excpectedSequenceMappings {
|
|
str := cmap.CharcodeBytesToUnicode(exp.bytes)
|
|
if str != exp.expected {
|
|
t.Errorf("Incorrect byte sequence mapping -> % X -> % X (got % X)", exp.bytes, []rune(exp.expected), []rune(str))
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// cmapData3 is a CMap with a mixture of 1 and 2 byte codespaces.
|
|
const cmapData3 = `
|
|
/CIDInit /ProcSet findresource begin
|
|
12 dict begin begincmap
|
|
/CIDSystemInfo
|
|
3 dict dup begin
|
|
/Registry (Adobe) def
|
|
/Supplement 2 def
|
|
end def
|
|
|
|
/CMapName /test-1 def
|
|
/CMapType 1 def
|
|
|
|
4 begincodespacerange
|
|
<00> <80>
|
|
<8100> <9fff>
|
|
<a0> <df>
|
|
<d040> <fbfc>
|
|
endcodespacerange
|
|
7 beginbfrange
|
|
<00> <80> <10>
|
|
<8100> <9f00> <1000>
|
|
<a0> <d0> <90>
|
|
<d140> <f000> <a000>
|
|
endbfrange
|
|
endcmap
|
|
`
|
|
|
|
// TestCMapParser3 test case of a CMap with mixed number of 1 and 2 bytes in the codespace range.
|
|
func TestCMapParser3(t *testing.T) {
|
|
common.SetLogger(common.NewConsoleLogger(common.LogLevelTrace))
|
|
|
|
cmap, err := LoadCmapFromData([]byte(cmapData3))
|
|
if err != nil {
|
|
t.Error("Failed: ", err)
|
|
return
|
|
}
|
|
|
|
if cmap.Name() != "test-1" {
|
|
t.Errorf("CMap name incorrect (%s)", cmap.Name())
|
|
return
|
|
}
|
|
|
|
if cmap.Type() != 1 {
|
|
t.Errorf("CMap type incorrect")
|
|
return
|
|
}
|
|
|
|
// Check codespaces.
|
|
expectedCodespaces := []struct {
|
|
numBytes int
|
|
low uint64
|
|
high uint64
|
|
}{
|
|
{1, 0x00, 0x80},
|
|
{2, 0x8100, 0x9fff},
|
|
{1, 0xa0, 0xdf},
|
|
{2, 0xd040, 0xfbfc},
|
|
}
|
|
|
|
if len(cmap.codespaces) != len(expectedCodespaces) {
|
|
t.Errorf("len codespace != %d (%d)", len(expectedCodespaces), len(cmap.codespaces))
|
|
return
|
|
}
|
|
|
|
for i, cs := range cmap.codespaces {
|
|
exp := expectedCodespaces[i]
|
|
if cs.numBytes != exp.numBytes {
|
|
t.Errorf("code space number of bytes != %d (%d)", exp.numBytes, cs.numBytes)
|
|
return
|
|
}
|
|
|
|
if cs.low != exp.low {
|
|
t.Errorf("code space low range != %d (%d)", exp.low, cs.low)
|
|
return
|
|
}
|
|
|
|
if cs.high != exp.high {
|
|
t.Errorf("code space high range != 0x%X (0x%X)", exp.high, cs.high)
|
|
return
|
|
}
|
|
}
|
|
|
|
// Check mappings.
|
|
expectedMappings := map[uint64]rune{
|
|
0x0080: 0x10 + 0x80,
|
|
0x8100: 0x1000,
|
|
0x00a0: 0x90,
|
|
0xd140: 0xa000,
|
|
}
|
|
for k, expected := range expectedMappings {
|
|
if v := cmap.CharcodeToUnicode(k); v != string(expected) {
|
|
t.Errorf("incorrect mapping, expecting 0x%X -> 0x%X (got 0x%X)", k, expected, v)
|
|
return
|
|
}
|
|
}
|
|
|
|
// Check byte sequence mappings.
|
|
excpectedSequenceMappings := []struct {
|
|
bytes []byte
|
|
expected string
|
|
}{
|
|
{[]byte{0x80, 0x81, 0x00, 0xa1, 0xd1, 0x80, 0x00}, string([]rune{0x90, 0x1000, 0x91, 0xa000 + 0x40, 0x10})},
|
|
}
|
|
|
|
for _, exp := range excpectedSequenceMappings {
|
|
str := cmap.CharcodeBytesToUnicode(exp.bytes)
|
|
if str != exp.expected {
|
|
t.Errorf("Incorrect byte sequence mapping -> % X -> % X (got % X)", exp.bytes, []rune(exp.expected), []rune(str))
|
|
return
|
|
}
|
|
}
|
|
}
|