mirror of
https://github.com/unidoc/unipdf.git
synced 2025-05-02 22:17:06 +08:00
Added a test for CharcodeBytesToUnicode for TrueType fonts with ToUnicode cmaps
This commit is contained in:
parent
79aa75acf8
commit
96dba88f57
@ -1507,6 +1507,7 @@ func NewParserFromString(txt string) *PdfParser {
|
||||
parser.reader = bufferedReader
|
||||
|
||||
parser.fileSize = int64(len(txt))
|
||||
parser.streamLengthReferenceLookupInProgress = map[int64]bool{}
|
||||
|
||||
return &parser
|
||||
}
|
||||
|
@ -2,6 +2,8 @@ package model_test
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"testing"
|
||||
|
||||
"github.com/unidoc/unidoc/common"
|
||||
@ -138,6 +140,53 @@ func TestCompositeFonts(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestTrueTypeToUnicode checks that CharcodeBytesToUnicode is working for a TrueType font with a
|
||||
// ToUnicode cmap.
|
||||
func TestTrueTypeToUnicode(t *testing.T) {
|
||||
numObj, err := parsePdfObjects(ttToUnicode)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to parse ttToUnicode object. err=%v", err)
|
||||
return
|
||||
}
|
||||
fontObj := numObj[9]
|
||||
font, err := model.NewPdfFontFromPdfObject(fontObj)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to create font. err=%v", err)
|
||||
return
|
||||
}
|
||||
|
||||
data := []byte{43, 40, 41, 34, 37, 42, 38, 49, 36, 38, 48, 34, 35, 36, 37, 35, 36, 58}
|
||||
expectedText := "Alerts on printing"
|
||||
actualText, numChars, numMisses := font.CharcodeBytesToUnicode(data)
|
||||
if numMisses != 0 {
|
||||
t.Errorf("Some codes not decoded. numMisses=%d", numMisses)
|
||||
return
|
||||
}
|
||||
if actualText != expectedText {
|
||||
t.Errorf("Incorrect decoding.\nexpected=%q\n actual=%q", expectedText, actualText)
|
||||
}
|
||||
if numChars != len(actualText) {
|
||||
t.Errorf("Incorrect numChars=%d expected=%d", numChars, len(actualText))
|
||||
}
|
||||
}
|
||||
|
||||
// ttToUnicode is a TrueType font object and its ToUnicode cmap.
|
||||
// The stream data in obj 26 (the ToUnicode cmap) is Sprintf'd to avoid binary data in the `` string.
|
||||
var ttToUnicode = fmt.Sprintf(`9 0 obj
|
||||
<< /Type /Font /Subtype /TrueType /BaseFont /AHSHJL+.SFUIText /ToUnicode 26 0 R /FirstChar 33 /LastChar 79 /Widths [ 635 381 246
|
||||
583 363 282 609 252 571 523 674 560 594 542 543 609 591 637 584 874 614 614
|
||||
362 246 268 609 742 870 644 596 604 771 716 653 297 525 657 543 297 774 539
|
||||
382 382 726 968 295 538 ] >>
|
||||
endobj
|
||||
26 0 obj
|
||||
<< /Length 497 /Filter /FlateDecode >>
|
||||
stream
|
||||
%s
|
||||
endstream
|
||||
endobj
|
||||
`,
|
||||
"\x78\x01\x5d\x93\xcd\x8a\xdb\x30\x14\x46\xf7\x7e\x0a\x2d\xa7\x8b\xc1\x8a\xe5\x24\x33\x60\x0c\xc3\x94\x81\x2c\xfa\x43\xd3\x3e\x80\x6d\xc9\xc1\xd0\xd8\xc6\x71\x16\x79\xfb\x9e\xef\x66\x3a\x85\x2e\xbe\xc5\xf1\xd5\x55\xee\x51\xa4\xfc\xf5\xf0\xf9\x30\x0e\xab\xcb\xbf\x2f\x53\x77\x4c\xab\xeb\x87\x31\x2e\xe9\x32\x5d\x97\x2e\xb9\x36\x9d\x86\x31\xdb\x14\x2e\x0e\xdd\xfa\x4e\xf6\xad\x3b\x37\x73\x96\xd3\x7c\xbc\x5d\xd6\x74\x3e\x8c\xfd\xe4\xaa\x2a\x73\x2e\xff\x41\xcb\x65\x5d\x6e\xee\xe1\x25\x4e\x6d\xfa\xa4\x6f\xdf\x96\x98\x96\x61\x3c\xb9\x87\x5f\xaf\x47\xfb\x72\xbc\xce\xf3\xef\x74\x4e\xe3\xea\x7c\x56\xd7\x2e\xa6\x9e\xed\xbe\x34\xf3\xd7\xe6\x9c\x5c\x6e\xad\x8f\x87\x48\x7d\x58\x6f\x8f\x74\xfd\x5b\xf1\xf3\x36\x27\xc7\x44\x74\x6c\xee\x23\x75\x53\x4c\x97\xb9\xe9\xd2\xd2\x8c\xa7\x94\x55\xde\xd7\xd5\xdb\x5b\x9d\xa5\x31\xfe\x57\x2a\xb7\xf7\x8e\xb6\x7f\x5f\x5a\x6c\xea\x4a\xf1\x7e\xeb\xeb\xac\x2a\x0a\x90\x78\xbf\x2f\x84\x01\x24\xde\xef\x9e\x85\x25\x48\xc0\x24\xdc\x82\x84\xc5\xa5\x70\x07\x12\xef\x0b\xdb\xea\x09\x24\x2c\xee\x54\x7d\x06\x09\xb8\x15\x36\x20\xa1\x37\x08\x5b\x90\x78\x5f\x6e\x84\x1d\x48\x58\x6c\xd5\x08\x12\xf0\x49\xd5\x04\x12\x7a\x77\xc2\x1e\x24\xa0\x86\x0c\xc8\x2b\xa0\xc6\x08\xc8\x29\xf4\xf6\x42\xe4\x14\x7c\xb5\x73\x40\x4e\x61\xb1\xa6\x0a\xc8\x29\x8c\x11\x85\xc8\x29\xf4\xea\x34\x02\x72\x0a\x28\xdf\xb0\x07\x09\xa8\x31\x02\xae\x0a\xd8\x08\x71\x55\xd8\xca\xa6\xc2\x35\x98\xef\x6e\xaf\x2a\xae\x0a\x55\x9d\x64\xc0\x55\xa1\xd7\x7e\x17\xd7\x60\xbe\x0c\x43\x15\x57\x85\xc5\x36\x24\xae\xc1\x7c\x11\xc9\xaa\x12\x57\x85\xaa\x04\x39\x3f\x0b\x28\xc1\x12\x57\x05\x5f\x5b\x8c\x2b\xdf\x41\x0e\x90\x2a\xae\x0a\xfa\x3a\x58\xb6\xb7\xd0\x6b\x8b\x71\x2d\xef\xbe\xad\xaa\xb8\x2a\xf4\xea\x0f\x2d\x71\x55\xe8\x95\x11\x96\x16\x50\xfa\x25\xae\xa5\x09\x72\x07\x40\xe4\x14\x76\x96\x11\xa7\x6b\x61\x2a\xeb\x45\x8e\x73\xa8\x7c\xd1\xda\xce\xc8\xe1\xa2\xc5\x6c\xc5\x25\xfe\x7b\x5b\x75\x9f\xf5\xee\x3e\xde\x49\x77\x5d\x16\x9e\x88\x3d\x4e\x7b\x3d\x7a\x15\xc3\x98\x3e\xde\xef\x3c\xcd\xda\xc0\xf2\x07\xc5\xfd\x00\x4f")
|
||||
|
||||
// objFontObj parses `fontDict` to a make a Font, creates a PDF object from the Font and checks that
|
||||
// the new PDF object is the same as the input object
|
||||
func objFontObj(t *testing.T, fontDict string) error {
|
||||
@ -166,3 +215,66 @@ func objFontObj(t *testing.T, fontDict string) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// parsePdfObjects parses a fragment of a PDF `text` (e.g. ttToUnicode above) and returns a map of
|
||||
// {object number: object} with indirect objects replaced by their values if they are in `text`.
|
||||
func parsePdfObjects(text string) (map[int64]core.PdfObject, error) {
|
||||
numObj := map[int64]core.PdfObject{}
|
||||
parser := core.NewParserFromString(text)
|
||||
for {
|
||||
obj, err := parser.ParseIndirectObject()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return numObj, err
|
||||
}
|
||||
switch t := obj.(type) {
|
||||
case *core.PdfIndirectObject:
|
||||
numObj[t.ObjectNumber] = obj
|
||||
case *core.PdfObjectStream:
|
||||
numObj[t.ObjectNumber] = obj
|
||||
}
|
||||
}
|
||||
|
||||
for _, obj := range numObj {
|
||||
iobj, ok := obj.(*core.PdfIndirectObject)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
dict, ok := iobj.PdfObject.(*core.PdfObjectDictionary)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for _, k := range dict.Keys() {
|
||||
if ref, ok := dict.Get(k).(*core.PdfObjectReference); ok {
|
||||
if o, ok := numObj[ref.ObjectNumber]; ok {
|
||||
dict.Set(k, o)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return numObj, nil
|
||||
}
|
||||
|
||||
// func isFontObject(obj core.PdfObject) bool {
|
||||
// var dict *core.PdfObjectDictionary
|
||||
// switch t := obj.(type) {
|
||||
// case *core.PdfIndirectObject:
|
||||
// dict = t.PdfObject.(*core.PdfObjectDictionary)
|
||||
// case *core.PdfObjectDictionary:
|
||||
// dict = t
|
||||
// default:
|
||||
// return false
|
||||
// }
|
||||
// name, err := core.GetName(dict.Get("Type"))
|
||||
// return err == nil && name == "Font"
|
||||
// }
|
||||
|
||||
// func showDict(dict *core.PdfObjectDictionary) string {
|
||||
// parts := []string{}
|
||||
// for _, k := range dict.Keys() {
|
||||
// parts = append(parts, fmt.Sprintf("%s: %T", k, dict.Get(k)))
|
||||
// }
|
||||
// return fmt.Sprintf("{%s}", strings.Join(parts, ", "))
|
||||
// }
|
||||
|
Loading…
x
Reference in New Issue
Block a user