Cleaned up text encoding interface and rune<->glyph conversions. Added test example for Symbol and ZapfDingbats font/encodings. Closes #59.

This commit is contained in:
Gunnsteinn Hall 2017-07-10 15:17:46 +00:00
parent ca74ee87a1
commit 5ce423ffcd
19 changed files with 15119 additions and 961 deletions

View File

@ -359,7 +359,7 @@ func TestParagraphStandardFonts(t *testing.T) {
"Times-BoldItalic: Lorem ipsum dolor sit amet, consectetur adipiscing elit...",
"Times-Italic: Lorem ipsum dolor sit amet, consectetur adipiscing elit...",
"\u2206\u0393\u0020\u2192\u0020\u0030", // Delta Gamma space arrowright space zero (demonstrate Symbol font)
"",
"\u2702\u0020\u2709\u261e\u2711\u2714", // a2 (scissors) space a117 (mail) a12 (finger) a17 (pen) a20 (checkmark)
}
for idx, font := range fonts {
@ -369,9 +369,13 @@ func TestParagraphStandardFonts(t *testing.T) {
p.SetLineHeight(1.2)
p.SetMargins(0, 0, 5, 0)
fmt.Printf("%s\n", names[idx])
if names[idx] == "Symbol" {
// For symbol font, need to use Symbol Encoder.
// For Symbol font, need to use Symbol encoder.
p.SetEncoder(textencoding.NewSymbolEncoder())
} else if names[idx] == "ZapfDingbats" {
// Font ZapfDingbats font, need to use ZapfDingbats encoder.
p.SetEncoder(textencoding.NewZapfDingbatsEncoder())
}
err := creator.Draw(p)

View File

@ -198,7 +198,7 @@ func (p *paragraph) getTextWidth() float64 {
w := float64(0.0)
for _, rune := range p.text {
glyph, found := p.encoder.RuneToGlyphName(rune)
glyph, found := p.encoder.RuneToGlyph(rune)
if !found {
common.Log.Debug("Error! Glyph not found for rune: %s\n", rune)
return -1 // XXX/FIXME: return error.
@ -232,7 +232,7 @@ func (p *paragraph) wrapText() error {
widths := []float64{}
for _, val := range runes {
glyph, found := p.encoder.RuneToGlyphName(val)
glyph, found := p.encoder.RuneToGlyph(val)
if !found {
common.Log.Debug("Error! Glyph not found for rune: %v\n", val)
return errors.New("Glyph not found for rune") // XXX/FIXME: return error.
@ -401,7 +401,7 @@ func drawParagraphOnBlock(blk *Block, p *paragraph, ctx DrawContext) (DrawContex
w := float64(0)
spaces := 0
for _, runeVal := range runes {
glyph, found := p.encoder.RuneToGlyphName(runeVal)
glyph, found := p.encoder.RuneToGlyph(runeVal)
if !found {
common.Log.Debug("Rune 0x%x not supported by text encoder", runeVal)
return ctx, errors.New("Unsupported rune in text encoding")
@ -444,7 +444,7 @@ func drawParagraphOnBlock(blk *Block, p *paragraph, ctx DrawContext) (DrawContex
encStr := ""
for _, runeVal := range runes {
//creator.Add_Tj(core.PdfObjectString(tb.Encoder.Encode(line)))
glyph, found := p.encoder.RuneToGlyphName(runeVal)
glyph, found := p.encoder.RuneToGlyph(runeVal)
if !found {
common.Log.Debug("Rune 0x%x not supported by text encoder", runeVal)
return ctx, errors.New("Unsupported rune in text encoding")

View File

@ -135,7 +135,7 @@ func (font pdfFontTrueType) SetEncoder(encoder textencoding.TextEncoder) {
func (font pdfFontTrueType) GetGlyphCharMetrics(glyph string) (fonts.CharMetrics, bool) {
metrics := fonts.CharMetrics{}
code, found := font.Encoder.GlyphNameToCharcode(glyph)
code, found := font.Encoder.GlyphToCharcode(glyph)
if !found {
return metrics, false
}

View File

@ -17,12 +17,12 @@ import (
// Font ZapfDingbats. Implements Font interface.
// This is a built-in font and it is assumed that every reader has access to it.
type fontZapfDingbats struct {
// By default encoder is not set, which means that we use the font's built in encoding.
encoder textencoding.TextEncoder
}
func NewFontZapfDingbats() fontZapfDingbats {
font := fontZapfDingbats{}
font.encoder = textencoding.NewWinAnsiTextEncoder() // Default
return font
}
@ -46,7 +46,9 @@ func (font fontZapfDingbats) ToPdfObject() core.PdfObject {
fontDict.Set("Type", core.MakeName("Font"))
fontDict.Set("Subtype", core.MakeName("Type1"))
fontDict.Set("BaseFont", core.MakeName("ZapfDingbats"))
fontDict.Set("Encoding", font.encoder.ToPdfObject())
if font.encoder != nil {
fontDict.Set("Encoding", font.encoder.ToPdfObject())
}
obj.PdfObject = fontDict
return obj

View File

@ -8,11 +8,32 @@ package textencoding
import "github.com/unidoc/unidoc/pdf/core"
type TextEncoder interface {
// Convert a raw utf8 string (series of runes) to an encoded string (series of character codes) to be used in PDF.
Encode(raw string) string
CharcodeToGlyphName(code byte) (string, bool)
// Conversion between character code and glyph name.
// The bool return flag is true if there was a match, and false otherwise.
CharcodeToGlyph(code byte) (string, bool)
// Conversion between glyph name and character code.
// The bool return flag is true if there was a match, and false otherwise.
GlyphToCharcode(glyph string) (byte, bool)
// Convert rune to character code.
// The bool return flag is true if there was a match, and false otherwise.
RuneToCharcode(val rune) (byte, bool)
RuneToGlyphName(val rune) (string, bool)
GlyphNameToCharcode(glyph string) (byte, bool)
// Convert character code to rune.
// The bool return flag is true if there was a match, and false otherwise.
CharcodeToRune(charcode byte) (rune, bool)
// Convert rune to glyph name.
// The bool return flag is true if there was a match, and false otherwise.
RuneToGlyph(val rune) (string, bool)
// Convert glyph to rune.
// The bool return flag is true if there was a match, and false otherwise.
GlyphToRune(glyph string) (rune, bool)
ToPdfObject() core.PdfObject
}

View File

@ -0,0 +1,113 @@
package main
// Utility to generate static maps of glyph <-> character codes for text encoding.
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"strings"
)
func main() {
encodingfile := flag.String("encodingfile", "", "Encoding glyph list file")
method := flag.String("method", "charcode-to-glyph", "charcode-to-glyph/glyph-to-charcode")
flag.Parse()
if len(*encodingfile) == 0 {
fmt.Printf("Please specify an encoding file, see -h for options\n")
os.Exit(1)
}
var err error
switch *method {
case "charcode-to-glyph":
err = charcodeToGlyphListPath(*encodingfile)
case "glyph-to-charcode":
err = glyphToCharcodeListPath(*encodingfile)
default:
fmt.Printf("Unsupported method, see -h for options\n")
os.Exit(1)
}
if err != nil {
fmt.Printf("Error: %v\n", err)
os.Exit(1)
}
}
func charcodeToGlyphListPath(filename string) error {
f, err := os.Open(filename)
if err != nil {
return err
}
defer f.Close()
reader := bufio.NewReader(f)
index := -1
for {
line, err := reader.ReadString('\n')
if err != nil {
if err == io.EOF {
break
}
return err
}
line = strings.Trim(line, " \r\n")
//fmt.Printf("%s\n", line)
parts := strings.Split(line, " ")
for _, part := range parts {
index++
if part == "notdef" {
continue
}
fmt.Printf("\t%d: \"%s\",\n", index, part)
}
}
return nil
}
func glyphToCharcodeListPath(filename string) error {
f, err := os.Open(filename)
if err != nil {
return err
}
defer f.Close()
reader := bufio.NewReader(f)
index := -1
for {
line, err := reader.ReadString('\n')
if err != nil {
if err == io.EOF {
break
}
return err
}
line = strings.Trim(line, " \r\n")
//fmt.Printf("%s\n", line)
parts := strings.Split(line, " ")
for _, part := range parts {
index++
if part == "notdef" {
continue
}
fmt.Printf("\t\"%s\": %d,\n", part, index)
}
}
return nil
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,223 @@
package main
// Utility to generate static maps of glyph <-> rune conversions for a glyphlist.
import (
"bufio"
"errors"
"flag"
"fmt"
"io"
"os"
"sort"
"strings"
)
func main() {
glyphlistFile := flag.String("glyphfile", "", "Glyph file to parse")
method := flag.String("method", "glyph-to-rune", "glyph-to-rune/rune-to-glyph")
flag.Parse()
if len(*glyphlistFile) == 0 {
fmt.Printf("Need to specify glyph list file via glyphfile, see -h for options\n")
os.Exit(1)
}
glyphToUnicodeMap, err := parseGlyphList(*glyphlistFile)
if err != nil {
fmt.Printf("Failed: %v\n", err)
os.Exit(1)
}
switch *method {
case "glyph-to-rune":
printGlyphToRuneList(glyphToUnicodeMap)
case "rune-to-glyph":
printRuneToGlyphList(glyphToUnicodeMap)
default:
fmt.Printf("Unsupported method: %s, see -h for options\n", *method)
}
/*
glyphs, err := loadGlyphlist("symbol.txt")
if err != nil {
fmt.Printf("Failed: %v\n", err)
os.Exit(1)
}
_ = glyphs
*/
//printGlyphList(glyphToUnicodeMap)
//printEncodingGlyphToRuneMap(glyphs, glyphToUnicodeMap)
//printEncodingRuneToGlyphMap(glyphs, glyphToUnicodeMap)
}
func printGlyphToRuneList(glyphToUnicodeMap map[string]string) {
keys := []string{}
for k, _ := range glyphToUnicodeMap {
keys = append(keys, k)
}
sort.Strings(keys)
fmt.Printf("var glyphlistGlyphToRuneMap = map[string]rune{\n")
for _, glyph := range keys {
ucode := glyphToUnicodeMap[glyph]
fmt.Printf("\t\"%s\":\t'\\u%s',\n", glyph, strings.ToLower(ucode))
}
fmt.Printf("}\n")
}
func printRuneToGlyphList(glyphToUnicodeMap map[string]string) {
keys := []string{}
for k, _ := range glyphToUnicodeMap {
keys = append(keys, k)
}
sort.Strings(keys)
uniqueList := map[string]bool{}
fmt.Printf("var glyphlistRuneToGlyphMap = map[rune]string{\n")
for _, glyph := range keys {
ucode := glyphToUnicodeMap[glyph]
ucode = strings.ToLower(ucode)
_, duplicate := uniqueList[ucode]
if !duplicate {
fmt.Printf("\t'\\u%s':\t\"%s\",\n", ucode, glyph)
uniqueList[ucode] = true
} else {
fmt.Printf("//\t'\\u%s':\t\"%s\", // duplicate\n", ucode, glyph)
}
}
fmt.Printf("}\n")
}
func printEncodingGlyphToRuneMap(glyphs []string, glyphToUnicodeMap map[string]string) {
fmt.Printf("var nameEncodingGlyphToRuneMap map[string]rune = map[string]rune{\n")
for _, glyph := range glyphs {
ucode, has := glyphToUnicodeMap[glyph]
if has {
fmt.Printf("\t\"%s\":\t'\\u%s',\n", glyph, strings.ToLower(ucode))
} else {
fmt.Printf("'%s' - NOT FOUND\n", glyph)
}
}
fmt.Printf("}\n")
}
func printEncodingRuneToGlyphMap(glyphs []string, glyphToUnicodeMap map[string]string) {
fmt.Printf("var nameEncodingRuneToGlyphMap map[rune]string = map[rune]string{\n")
for _, glyph := range glyphs {
ucode, has := glyphToUnicodeMap[glyph]
if has {
fmt.Printf("\t'\\u%s':\t\"%s\",\n", strings.ToLower(ucode), glyph)
} else {
fmt.Printf("'%s' - NOT FOUND\n", glyph)
}
}
fmt.Printf("}\n")
}
func parseGlyphList(filename string) (map[string]string, error) {
f, err := os.Open(filename)
if err != nil {
return nil, err
}
defer f.Close()
reader := bufio.NewReader(f)
gmap := map[string]bool{}
glyphToUnicodeMap := map[string]string{}
for {
line, err := reader.ReadString('\n')
if err != nil {
if err == io.EOF {
break
}
return nil, err
}
line = strings.Trim(line, " \r\n")
if line[0] == '#' {
continue
}
parts := strings.Split(line, ";")
if len(parts) != 2 {
return nil, errors.New("Invalid part")
}
if len(parts[1]) > 4 {
subparts := strings.Split(parts[1], " ")
for _, subpart := range subparts {
//fmt.Printf("\"%s\": '\\u%s', //%s (non unique)\n", parts[0], parts[1][0:4], parts[1][4:])
if _, has := gmap[subpart]; !has {
//fmt.Printf("'\\u%s': \"%s\",\n", subpart, parts[0])
gmap[subpart] = true
glyphToUnicodeMap[parts[0]] = subpart
} else {
//fmt.Printf("// '\\u%s': \"%s\", (duplicate)\n", subpart, parts[0])
glyphToUnicodeMap[parts[0]] = subpart
}
}
} else {
//fmt.Printf("\"%s\": '\\u%s',\n", parts[0], parts[1])
if _, has := gmap[parts[1]]; !has {
//fmt.Printf("'\\u%s': \"%s\",\n", parts[1], parts[0])
gmap[parts[1]] = true
glyphToUnicodeMap[parts[0]] = parts[1]
} else {
//fmt.Printf("// '\\u%s': \"%s\", (duplicate)\n", parts[1], parts[0])
glyphToUnicodeMap[parts[0]] = parts[1]
}
}
}
return glyphToUnicodeMap, nil
}
func loadGlyphlist(filename string) ([]string, error) {
f, err := os.Open(filename)
if err != nil {
return nil, err
}
defer f.Close()
glyphs := []string{}
reader := bufio.NewReader(f)
index := -1
for {
line, err := reader.ReadString('\n')
if err != nil {
if err == io.EOF {
break
}
return nil, err
}
line = strings.Trim(line, " \r\n")
//fmt.Printf("%s\n", line)
parts := strings.Split(line, " ")
for _, part := range parts {
index++
if part == "notdef" {
continue
}
//fmt.Printf("%d: \"%s\",\n", index, part)
glyphs = append(glyphs, part)
}
}
return glyphs, nil
}

View File

@ -0,0 +1,32 @@
notdef notdef notdef notdef notdef notdef notdef notdef
notdef notdef notdef notdef notdef notdef notdef notdef
notdef notdef notdef notdef notdef notdef notdef notdef
notdef notdef notdef notdef notdef notdef notdef notdef
space exclam quotedbl numbersign dollar percent ampersand quotesingle
parenleft parenright asterisk plus comma minus period slash
zero one two three four five six seven
eight nine colon semicolon less equal greater question
at A B C D E F G
H I J K L M N O
P Q R S T U V W
X Y Z bracketleft backslash bracketright asciicircum underscore
grave a b c d e f g
h i j k l m n o
p q r s t u v w
x y z braceleft bar braceright asciitilde notdef
Adieresis Aring Ccedilla Eacute Ntilde Odieresis Udieresis aacute
agrave acircumflex adieresis atilde aring ccedilla eacute egrave
ecircumflex edieresis iacute igrave icircumflex idieresis ntilde oacute
ograve ocircumflex odieresis otilde uacute ugrave ucircumflex udieresis
dagger degree cent sterling section bullet paragraph germandbls
registered copyright trademark acute dieresis notequal AE Oslash
infinity plusminus lessequal greaterequal yen mu partialdiff summation
Pi pi integral ordfeminine ordmasculine Omega ae oslash
questiondown exclamdown logicalnot radical florin approxequal delta guillemotleft
guillemotright ellipsis space Agrave Atilde Otilde OE oe
endash emdash quotedblleft quotedblright quoteleft quoteright divide lozenge
ydieresis Ydieresis fraction currency guilsinglleft guilsinglright fi fl
daggerdbl periodcentered quotesinglbase quotedblbase perthousand Acircumflex Ecircumflex Aacute
Edieresis Egrave Iacute Icircumflex Idieresis Igrave Oacute Ocircumflex
heart Ograve Uacute Ucircumflex Ugrave dotlessi circumflex tilde
macron breve dotaccent ring cedilla hungarumlaut ogonek caron

View File

@ -0,0 +1,189 @@
space
exclam
universal
numbersign
existential
percent
ampersand
suchthat
parenleft
parenright
asteriskmath
plus
comma
minus
period
slash
zero
one
two
three
four
five
six
seven
eight
nine
colon
semicolon
less
equal
greater
question
congruent
Alpha
Beta
Chi
Delta
Epsilon
Phi
Gamma
Eta
Iota
theta1
Kappa
Lambda
Mu
Nu
Omicron
Pi
Theta
Rho
Sigma
Tau
Upsilon
sigma1
Omega
Xi
Psi
Zeta
bracketleft
therefore
bracketright
perpendicular
underscore
radicalex
alpha
beta
chi
delta
epsilon
phi
gamma
eta
iota
phi1
kappa
lambda
mu
nu
omicron
pi
theta
rho
sigma
tau
upsilon
omega1
omega
xi
psi
zeta
braceleft
bar
braceright
similar
Euro
Upsilon1
minute
lessequal
fraction
infinity
florin
club
diamond
heart
spade
arrowboth
arrowleft
arrowup
arrowright
arrowdown
degree
plusminus
second
greaterequal
multiply
proportional
partialdiff
bullet
divide
notequal
equivalence
approxequal
ellipsis
arrowvertex
arrowhorizex
carriagereturn
aleph
Ifraktur
Rfraktur
weierstrass
circlemultiply
circleplus
emptyset
intersection
union
propersuperset
reflexsuperset
notsubset
propersubset
reflexsubset
element
notelement
angle
gradient
registerserif
copyrightserif
trademarkserif
product
radical
dotmath
logicalnot
logicaland
logicalor
arrowdblboth
arrowdblleft
arrowdblup
arrowdblright
arrowdbldown
lozenge
angleleft
registersans
copyrightsans
trademarksans
summation
parenlefttp
parenleftex
parenleftbt
bracketlefttp
bracketleftex
bracketleftbt
bracelefttp
braceleftmid
braceleftbt
braceex
angleright
integral
integraltp
integralex
integralbt
parenrighttp
parenrightex
parenrightbt
bracketrighttp
bracketrightex
bracketrightbt
bracerighttp
bracerightmid
bracerightbt

View File

@ -0,0 +1,28 @@
notdef notdef notdef notdef notdef notdef notdef notdef
notdef notdef notdef notdef notdef notdef notdef notdef
notdef notdef notdef notdef notdef notdef notdef notdef
notdef notdef notdef notdef notdef notdef notdef notdef
space exclam quotedbl numbersign dollar percent ampersand quotesingle
parenleft parenright asterisk plus comma hyphen period slash zero one
two three four five six seven eight nine colon semicolon less equal
greater question at A B C D E F G H I J K L M N O P Q R S T U V W X
Y Z bracketleft backslash bracketright asciicircum underscore
grave a b c d e f g h i j k l m n o p q r s t u v w x y z
braceleft bar braceright asciitilde bullet Euro bullet quotesinglbase
florin quotedblbase ellipsis dagger daggerdbl circumflex perthousand
Scaron guilsinglleft OE bullet Zcaron bullet bullet quoteleft quoteright
quotedblleft quotedblright bullet endash emdash tilde trademark scaron
guilsinglright oe bullet zcaron Ydieresis space exclamdown cent
sterling currency yen brokenbar section dieresis copyright
ordfeminine guillemotleft logicalnot hyphen registered macron degree
plusminus twosuperior threesuperior acute mu paragraph
periodcentered cedilla onesuperior ordmasculine guillemotright
onequarter onehalf threequarters questiondown Agrave Aacute
Acircumflex Atilde Adieresis Aring AE Ccedilla Egrave Eacute
Ecircumflex Edieresis Igrave Iacute Icircumflex Idieresis Eth Ntilde
Ograve Oacute Ocircumflex Otilde Odieresis multiply Oslash Ugrave
Uacute Ucircumflex Udieresis Yacute Thorn germandbls agrave aacute
acircumflex atilde adieresis aring ae ccedilla egrave eacute
ecircumflex edieresis igrave iacute icircumflex idieresis eth ntilde
ograve oacute ocircumflex otilde odieresis divide oslash ugrave
uacute ucircumflex udieresis yacute thorn ydieresis

View File

@ -0,0 +1,247 @@
# -----------------------------------------------------------
# Copyright 2002, 2010, 2015 Adobe Systems Incorporated.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or
# without modification, are permitted provided that the
# following conditions are met:
#
# Redistributions of source code must retain the above
# copyright notice, this list of conditions and the following
# disclaimer.
#
# Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials
# provided with the distribution.
#
# Neither the name of Adobe Systems Incorporated nor the names
# of its contributors may be used to endorse or promote
# products derived from this software without specific prior
# written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------
# Name: ITC Zapf Dingbats Glyph List
# Table version: 2.0
# Date: September 20, 2002
# URL: https://github.com/adobe-type-tools/agl-aglfn
#
# Format: two semicolon-delimited fields:
# (1) glyph name--upper/lowercase letters and digits
# (2) Unicode scalar value--four uppercase hexadecimal digits
#
a100;275E
a101;2761
a102;2762
a103;2763
a104;2764
a105;2710
a106;2765
a107;2766
a108;2767
a109;2660
a10;2721
a110;2665
a111;2666
a112;2663
a117;2709
a118;2708
a119;2707
a11;261B
a120;2460
a121;2461
a122;2462
a123;2463
a124;2464
a125;2465
a126;2466
a127;2467
a128;2468
a129;2469
a12;261E
a130;2776
a131;2777
a132;2778
a133;2779
a134;277A
a135;277B
a136;277C
a137;277D
a138;277E
a139;277F
a13;270C
a140;2780
a141;2781
a142;2782
a143;2783
a144;2784
a145;2785
a146;2786
a147;2787
a148;2788
a149;2789
a14;270D
a150;278A
a151;278B
a152;278C
a153;278D
a154;278E
a155;278F
a156;2790
a157;2791
a158;2792
a159;2793
a15;270E
a160;2794
a161;2192
a162;27A3
a163;2194
a164;2195
a165;2799
a166;279B
a167;279C
a168;279D
a169;279E
a16;270F
a170;279F
a171;27A0
a172;27A1
a173;27A2
a174;27A4
a175;27A5
a176;27A6
a177;27A7
a178;27A8
a179;27A9
a17;2711
a180;27AB
a181;27AD
a182;27AF
a183;27B2
a184;27B3
a185;27B5
a186;27B8
a187;27BA
a188;27BB
a189;27BC
a18;2712
a190;27BD
a191;27BE
a192;279A
a193;27AA
a194;27B6
a195;27B9
a196;2798
a197;27B4
a198;27B7
a199;27AC
a19;2713
a1;2701
a200;27AE
a201;27B1
a202;2703
a203;2750
a204;2752
a205;276E
a206;2770
a20;2714
a21;2715
a22;2716
a23;2717
a24;2718
a25;2719
a26;271A
a27;271B
a28;271C
a29;2722
a2;2702
a30;2723
a31;2724
a32;2725
a33;2726
a34;2727
a35;2605
a36;2729
a37;272A
a38;272B
a39;272C
a3;2704
a40;272D
a41;272E
a42;272F
a43;2730
a44;2731
a45;2732
a46;2733
a47;2734
a48;2735
a49;2736
a4;260E
a50;2737
a51;2738
a52;2739
a53;273A
a54;273B
a55;273C
a56;273D
a57;273E
a58;273F
a59;2740
a5;2706
a60;2741
a61;2742
a62;2743
a63;2744
a64;2745
a65;2746
a66;2747
a67;2748
a68;2749
a69;274A
a6;271D
a70;274B
a71;25CF
a72;274D
a73;25A0
a74;274F
a75;2751
a76;25B2
a77;25BC
a78;25C6
a79;2756
a7;271E
a81;25D7
a82;2758
a83;2759
a84;275A
a85;276F
a86;2771
a87;2772
a88;2773
a89;2768
a8;271F
a90;2769
a91;276C
a92;276D
a93;276A
a94;276B
a95;2774
a96;2775
a97;275B
a98;275C
a99;275D
a9;2720
#END

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,418 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
/*
* The embedded glyph to unicode mappings specified in this file are distributed under the terms listed in
* ./glyphlist/zapfdingbats.txt.
*/
package textencoding
var zapfdingbatsGlyphToRuneMap = map[string]rune{
"a1": '\u2701',
"a10": '\u2721',
"a100": '\u275e',
"a101": '\u2761',
"a102": '\u2762',
"a103": '\u2763',
"a104": '\u2764',
"a105": '\u2710',
"a106": '\u2765',
"a107": '\u2766',
"a108": '\u2767',
"a109": '\u2660',
"a11": '\u261b',
"a110": '\u2665',
"a111": '\u2666',
"a112": '\u2663',
"a117": '\u2709',
"a118": '\u2708',
"a119": '\u2707',
"a12": '\u261e',
"a120": '\u2460',
"a121": '\u2461',
"a122": '\u2462',
"a123": '\u2463',
"a124": '\u2464',
"a125": '\u2465',
"a126": '\u2466',
"a127": '\u2467',
"a128": '\u2468',
"a129": '\u2469',
"a13": '\u270c',
"a130": '\u2776',
"a131": '\u2777',
"a132": '\u2778',
"a133": '\u2779',
"a134": '\u277a',
"a135": '\u277b',
"a136": '\u277c',
"a137": '\u277d',
"a138": '\u277e',
"a139": '\u277f',
"a14": '\u270d',
"a140": '\u2780',
"a141": '\u2781',
"a142": '\u2782',
"a143": '\u2783',
"a144": '\u2784',
"a145": '\u2785',
"a146": '\u2786',
"a147": '\u2787',
"a148": '\u2788',
"a149": '\u2789',
"a15": '\u270e',
"a150": '\u278a',
"a151": '\u278b',
"a152": '\u278c',
"a153": '\u278d',
"a154": '\u278e',
"a155": '\u278f',
"a156": '\u2790',
"a157": '\u2791',
"a158": '\u2792',
"a159": '\u2793',
"a16": '\u270f',
"a160": '\u2794',
"a161": '\u2192',
"a162": '\u27a3',
"a163": '\u2194',
"a164": '\u2195',
"a165": '\u2799',
"a166": '\u279b',
"a167": '\u279c',
"a168": '\u279d',
"a169": '\u279e',
"a17": '\u2711',
"a170": '\u279f',
"a171": '\u27a0',
"a172": '\u27a1',
"a173": '\u27a2',
"a174": '\u27a4',
"a175": '\u27a5',
"a176": '\u27a6',
"a177": '\u27a7',
"a178": '\u27a8',
"a179": '\u27a9',
"a18": '\u2712',
"a180": '\u27ab',
"a181": '\u27ad',
"a182": '\u27af',
"a183": '\u27b2',
"a184": '\u27b3',
"a185": '\u27b5',
"a186": '\u27b8',
"a187": '\u27ba',
"a188": '\u27bb',
"a189": '\u27bc',
"a19": '\u2713',
"a190": '\u27bd',
"a191": '\u27be',
"a192": '\u279a',
"a193": '\u27aa',
"a194": '\u27b6',
"a195": '\u27b9',
"a196": '\u2798',
"a197": '\u27b4',
"a198": '\u27b7',
"a199": '\u27ac',
"a2": '\u2702',
"a20": '\u2714',
"a200": '\u27ae',
"a201": '\u27b1',
"a202": '\u2703',
"a203": '\u2750',
"a204": '\u2752',
"a205": '\u276e',
"a206": '\u2770',
"a21": '\u2715',
"a22": '\u2716',
"a23": '\u2717',
"a24": '\u2718',
"a25": '\u2719',
"a26": '\u271a',
"a27": '\u271b',
"a28": '\u271c',
"a29": '\u2722',
"a3": '\u2704',
"a30": '\u2723',
"a31": '\u2724',
"a32": '\u2725',
"a33": '\u2726',
"a34": '\u2727',
"a35": '\u2605',
"a36": '\u2729',
"a37": '\u272a',
"a38": '\u272b',
"a39": '\u272c',
"a4": '\u260e',
"a40": '\u272d',
"a41": '\u272e',
"a42": '\u272f',
"a43": '\u2730',
"a44": '\u2731',
"a45": '\u2732',
"a46": '\u2733',
"a47": '\u2734',
"a48": '\u2735',
"a49": '\u2736',
"a5": '\u2706',
"a50": '\u2737',
"a51": '\u2738',
"a52": '\u2739',
"a53": '\u273a',
"a54": '\u273b',
"a55": '\u273c',
"a56": '\u273d',
"a57": '\u273e',
"a58": '\u273f',
"a59": '\u2740',
"a6": '\u271d',
"a60": '\u2741',
"a61": '\u2742',
"a62": '\u2743',
"a63": '\u2744',
"a64": '\u2745',
"a65": '\u2746',
"a66": '\u2747',
"a67": '\u2748',
"a68": '\u2749',
"a69": '\u274a',
"a7": '\u271e',
"a70": '\u274b',
"a71": '\u25cf',
"a72": '\u274d',
"a73": '\u25a0',
"a74": '\u274f',
"a75": '\u2751',
"a76": '\u25b2',
"a77": '\u25bc',
"a78": '\u25c6',
"a79": '\u2756',
"a8": '\u271f',
"a81": '\u25d7',
"a82": '\u2758',
"a83": '\u2759',
"a84": '\u275a',
"a85": '\u276f',
"a86": '\u2771',
"a87": '\u2772',
"a88": '\u2773',
"a89": '\u2768',
"a9": '\u2720',
"a90": '\u2769',
"a91": '\u276c',
"a92": '\u276d',
"a93": '\u276a',
"a94": '\u276b',
"a95": '\u2774',
"a96": '\u2775',
"a97": '\u275b',
"a98": '\u275c',
"a99": '\u275d',
}
var zapfdingbatsRuneToGlyphMap = map[rune]string{
'\u2701': "a1",
'\u2721': "a10",
'\u275e': "a100",
'\u2761': "a101",
'\u2762': "a102",
'\u2763': "a103",
'\u2764': "a104",
'\u2710': "a105",
'\u2765': "a106",
'\u2766': "a107",
'\u2767': "a108",
'\u2660': "a109",
'\u261b': "a11",
'\u2665': "a110",
'\u2666': "a111",
'\u2663': "a112",
'\u2709': "a117",
'\u2708': "a118",
'\u2707': "a119",
'\u261e': "a12",
'\u2460': "a120",
'\u2461': "a121",
'\u2462': "a122",
'\u2463': "a123",
'\u2464': "a124",
'\u2465': "a125",
'\u2466': "a126",
'\u2467': "a127",
'\u2468': "a128",
'\u2469': "a129",
'\u270c': "a13",
'\u2776': "a130",
'\u2777': "a131",
'\u2778': "a132",
'\u2779': "a133",
'\u277a': "a134",
'\u277b': "a135",
'\u277c': "a136",
'\u277d': "a137",
'\u277e': "a138",
'\u277f': "a139",
'\u270d': "a14",
'\u2780': "a140",
'\u2781': "a141",
'\u2782': "a142",
'\u2783': "a143",
'\u2784': "a144",
'\u2785': "a145",
'\u2786': "a146",
'\u2787': "a147",
'\u2788': "a148",
'\u2789': "a149",
'\u270e': "a15",
'\u278a': "a150",
'\u278b': "a151",
'\u278c': "a152",
'\u278d': "a153",
'\u278e': "a154",
'\u278f': "a155",
'\u2790': "a156",
'\u2791': "a157",
'\u2792': "a158",
'\u2793': "a159",
'\u270f': "a16",
'\u2794': "a160",
'\u2192': "a161",
'\u27a3': "a162",
'\u2194': "a163",
'\u2195': "a164",
'\u2799': "a165",
'\u279b': "a166",
'\u279c': "a167",
'\u279d': "a168",
'\u279e': "a169",
'\u2711': "a17",
'\u279f': "a170",
'\u27a0': "a171",
'\u27a1': "a172",
'\u27a2': "a173",
'\u27a4': "a174",
'\u27a5': "a175",
'\u27a6': "a176",
'\u27a7': "a177",
'\u27a8': "a178",
'\u27a9': "a179",
'\u2712': "a18",
'\u27ab': "a180",
'\u27ad': "a181",
'\u27af': "a182",
'\u27b2': "a183",
'\u27b3': "a184",
'\u27b5': "a185",
'\u27b8': "a186",
'\u27ba': "a187",
'\u27bb': "a188",
'\u27bc': "a189",
'\u2713': "a19",
'\u27bd': "a190",
'\u27be': "a191",
'\u279a': "a192",
'\u27aa': "a193",
'\u27b6': "a194",
'\u27b9': "a195",
'\u2798': "a196",
'\u27b4': "a197",
'\u27b7': "a198",
'\u27ac': "a199",
'\u2702': "a2",
'\u2714': "a20",
'\u27ae': "a200",
'\u27b1': "a201",
'\u2703': "a202",
'\u2750': "a203",
'\u2752': "a204",
'\u276e': "a205",
'\u2770': "a206",
'\u2715': "a21",
'\u2716': "a22",
'\u2717': "a23",
'\u2718': "a24",
'\u2719': "a25",
'\u271a': "a26",
'\u271b': "a27",
'\u271c': "a28",
'\u2722': "a29",
'\u2704': "a3",
'\u2723': "a30",
'\u2724': "a31",
'\u2725': "a32",
'\u2726': "a33",
'\u2727': "a34",
'\u2605': "a35",
'\u2729': "a36",
'\u272a': "a37",
'\u272b': "a38",
'\u272c': "a39",
'\u260e': "a4",
'\u272d': "a40",
'\u272e': "a41",
'\u272f': "a42",
'\u2730': "a43",
'\u2731': "a44",
'\u2732': "a45",
'\u2733': "a46",
'\u2734': "a47",
'\u2735': "a48",
'\u2736': "a49",
'\u2706': "a5",
'\u2737': "a50",
'\u2738': "a51",
'\u2739': "a52",
'\u273a': "a53",
'\u273b': "a54",
'\u273c': "a55",
'\u273d': "a56",
'\u273e': "a57",
'\u273f': "a58",
'\u2740': "a59",
'\u271d': "a6",
'\u2741': "a60",
'\u2742': "a61",
'\u2743': "a62",
'\u2744': "a63",
'\u2745': "a64",
'\u2746': "a65",
'\u2747': "a66",
'\u2748': "a67",
'\u2749': "a68",
'\u274a': "a69",
'\u271e': "a7",
'\u274b': "a70",
'\u25cf': "a71",
'\u274d': "a72",
'\u25a0': "a73",
'\u274f': "a74",
'\u2751': "a75",
'\u25b2': "a76",
'\u25bc': "a77",
'\u25c6': "a78",
'\u2756': "a79",
'\u271f': "a8",
'\u25d7': "a81",
'\u2758': "a82",
'\u2759': "a83",
'\u275a': "a84",
'\u276f': "a85",
'\u2771': "a86",
'\u2772': "a87",
'\u2773': "a88",
'\u2768': "a89",
'\u2720': "a9",
'\u2769': "a90",
'\u276c': "a91",
'\u276d': "a92",
'\u276a': "a93",
'\u276b': "a94",
'\u2774': "a95",
'\u2775': "a96",
'\u275b': "a97",
'\u275c': "a98",
'\u275d': "a99",
}

View File

@ -1,3 +1,8 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package textencoding
import (
@ -14,12 +19,10 @@ func NewSymbolEncoder() SymbolEncoder {
return encoder
}
// Raw utf-8 rune string -> encoded string for use in PDF.
// Convert a raw utf8 string (series of runes) to an encoded string (series of character codes) to be used in PDF.
func (enc SymbolEncoder) Encode(raw string) string {
encoded := []byte{}
for _, rune := range raw {
// rune -> glyph -> code ?:
code, found := enc.RuneToCharcode(rune)
if !found {
continue
@ -31,8 +34,9 @@ func (enc SymbolEncoder) Encode(raw string) string {
return string(encoded)
}
// Symbol char code -> glyph name. Returns the glyph and a bool to indicate whether or not it was found.
func (enc SymbolEncoder) CharcodeToGlyphName(code byte) (string, bool) {
// Conversion between character code and glyph name.
// The bool return flag is true if there was a match, and false otherwise.
func (enc SymbolEncoder) CharcodeToGlyph(code byte) (string, bool) {
glyph, has := symbolEncodingCharcodeToGlyphMap[code]
if !has {
common.Log.Debug("Symbol encoding error: unable to find charcode->glyph entry (%v)", code)
@ -41,9 +45,22 @@ func (enc SymbolEncoder) CharcodeToGlyphName(code byte) (string, bool) {
return glyph, true
}
// Convert utf-8 input rune to a charcode.
// Conversion between glyph name and character code.
// The bool return flag is true if there was a match, and false otherwise.
func (enc SymbolEncoder) GlyphToCharcode(glyph string) (byte, bool) {
code, found := symbolEncodingGlyphToCharcodeMap[glyph]
if !found {
common.Log.Debug("Symbol encoding error: unable to find glyph->charcode entry (%s)", glyph)
return 0, false
}
return code, found
}
// Convert rune to character code.
// The bool return flag is true if there was a match, and false otherwise.
func (enc SymbolEncoder) RuneToCharcode(val rune) (byte, bool) {
glyph, found := symbolEncodingRuneToGlyphMap[val]
glyph, found := runeToGlyph(val, glyphlistRuneToGlyphMap)
if !found {
common.Log.Debug("Symbol encoding error: unable to find rune->glyph entry (%v)", val)
return 0, false
@ -58,29 +75,8 @@ func (enc SymbolEncoder) RuneToCharcode(val rune) (byte, bool) {
return code, true
}
// Convert utf-8 input rune to glyph name.
func (enc SymbolEncoder) RuneToGlyphName(val rune) (string, bool) {
glyph, found := symbolEncodingRuneToGlyphMap[val]
if !found {
common.Log.Debug("Symbol encoding error: unable to find rune->glyph entry (%v)", val)
return "", false
}
return glyph, true
}
// Convert glyph name to char code in this encoding.
func (enc SymbolEncoder) GlyphNameToCharcode(glyph string) (byte, bool) {
code, found := symbolEncodingGlyphToCharcodeMap[glyph]
if !found {
common.Log.Debug("Symbol encoding error: unable to find glyph->charcode entry (%s)", glyph)
return 0, false
}
return code, found
}
// Convert charcode to utf-8 rune.
// Convert character code to rune.
// The bool return flag is true if there was a match, and false otherwise.
func (enc SymbolEncoder) CharcodeToRune(charcode byte) (rune, bool) {
glyph, found := symbolEncodingCharcodeToGlyphMap[charcode]
if !found {
@ -88,15 +84,26 @@ func (enc SymbolEncoder) CharcodeToRune(charcode byte) (rune, bool) {
return 0, false
}
val, found := symbolEncodingGlyphToRuneMap[glyph]
val, found := glyphToRune(glyph, glyphlistGlyphToRuneMap)
if !found {
common.Log.Debug("Symbol encoding error: unable to find glyph->rune entry (%v)", glyph)
return 0, false
}
return val, true
}
// Convert rune to glyph name.
// The bool return flag is true if there was a match, and false otherwise.
func (enc SymbolEncoder) RuneToGlyph(val rune) (string, bool) {
return runeToGlyph(val, glyphlistRuneToGlyphMap)
}
// Convert glyph to rune.
// The bool return flag is true if there was a match, and false otherwise.
func (enc SymbolEncoder) GlyphToRune(glyph string) (rune, bool) {
return glyphToRune(glyph, glyphlistGlyphToRuneMap)
}
// Convert to PDF Object.
func (enc SymbolEncoder) ToPdfObject() core.PdfObject {
dict := core.MakeDict()
@ -107,6 +114,7 @@ func (enc SymbolEncoder) ToPdfObject() core.PdfObject {
return core.MakeIndirectObject(dict)
}
// Charcode to Glyph map (Symbol encoding)
var symbolEncodingCharcodeToGlyphMap map[byte]string = map[byte]string{
32: "space",
33: "exclam",
@ -299,390 +307,7 @@ var symbolEncodingCharcodeToGlyphMap map[byte]string = map[byte]string{
254: "bracerightbt",
}
var symbolEncodingGlyphToRuneMap map[string]rune = map[string]rune{
"space": '\u0020',
"exclam": '\u0021',
"universal": '\u2200',
"numbersign": '\u0023',
"existential": '\u2203',
"percent": '\u0025',
"ampersand": '\u0026',
"suchthat": '\u220b',
"parenleft": '\u0028',
"parenright": '\u0029',
"asteriskmath": '\u2217',
"plus": '\u002b',
"comma": '\u002c',
"minus": '\u2212',
"period": '\u002e',
"slash": '\u002f',
"zero": '\u0030',
"one": '\u0031',
"two": '\u0032',
"three": '\u0033',
"four": '\u0034',
"five": '\u0035',
"six": '\u0036',
"seven": '\u0037',
"eight": '\u0038',
"nine": '\u0039',
"colon": '\u003a',
"semicolon": '\u003b',
"less": '\u003c',
"equal": '\u003d',
"greater": '\u003e',
"question": '\u003f',
"congruent": '\u2245',
"Alpha": '\u0391',
"Beta": '\u0392',
"Chi": '\u03a7',
"Delta": '\u2206',
"Epsilon": '\u0395',
"Phi": '\u03a6',
"Gamma": '\u0393',
"Eta": '\u0397',
"Iota": '\u0399',
"theta1": '\u03d1',
"Kappa": '\u039a',
"Lambda": '\u039b',
"Mu": '\u039c',
"Nu": '\u039d',
"Omicron": '\u039f',
"Pi": '\u03a0',
"Theta": '\u0398',
"Rho": '\u03a1',
"Sigma": '\u03a3',
"Tau": '\u03a4',
"Upsilon": '\u03a5',
"sigma1": '\u03c2',
"Omega": '\u2126',
"Xi": '\u039e',
"Psi": '\u03a8',
"Zeta": '\u0396',
"bracketleft": '\u005b',
"therefore": '\u2234',
"bracketright": '\u005d',
"perpendicular": '\u22a5',
"underscore": '\u005f',
"radicalex": '\uf8e5',
"alpha": '\u03b1',
"beta": '\u03b2',
"chi": '\u03c7',
"delta": '\u03b4',
"epsilon": '\u03b5',
"phi": '\u03c6',
"gamma": '\u03b3',
"eta": '\u03b7',
"iota": '\u03b9',
"phi1": '\u03d5',
"kappa": '\u03ba',
"lambda": '\u03bb',
"mu": '\u00b5',
"nu": '\u03bd',
"omicron": '\u03bf',
"pi": '\u03c0',
"theta": '\u03b8',
"rho": '\u03c1',
"sigma": '\u03c3',
"tau": '\u03c4',
"upsilon": '\u03c5',
"omega1": '\u03d6',
"omega": '\u03c9',
"xi": '\u03be',
"psi": '\u03c8',
"zeta": '\u03b6',
"braceleft": '\u007b',
"bar": '\u007c',
"braceright": '\u007d',
"similar": '\u223c',
"Euro": '\u20ac',
"Upsilon1": '\u03d2',
"minute": '\u2032',
"lessequal": '\u2264',
"fraction": '\u2044',
"infinity": '\u221e',
"florin": '\u0192',
"club": '\u2663',
"diamond": '\u2666',
"heart": '\u2665',
"spade": '\u2660',
"arrowboth": '\u2194',
"arrowleft": '\u2190',
"arrowup": '\u2191',
"arrowright": '\u2192',
"arrowdown": '\u2193',
"degree": '\u00b0',
"plusminus": '\u00b1',
"second": '\u2033',
"greaterequal": '\u2265',
"multiply": '\u00d7',
"proportional": '\u221d',
"partialdiff": '\u2202',
"bullet": '\u2022',
"divide": '\u00f7',
"notequal": '\u2260',
"equivalence": '\u2261',
"approxequal": '\u2248',
"ellipsis": '\u2026',
"arrowvertex": '\uf8e6',
"arrowhorizex": '\uf8e7',
"carriagereturn": '\u21b5',
"aleph": '\u2135',
"Ifraktur": '\u2111',
"Rfraktur": '\u211c',
"weierstrass": '\u2118',
"circlemultiply": '\u2297',
"circleplus": '\u2295',
"emptyset": '\u2205',
"intersection": '\u2229',
"union": '\u222a',
"propersuperset": '\u2283',
"reflexsuperset": '\u2287',
"notsubset": '\u2284',
"propersubset": '\u2282',
"reflexsubset": '\u2286',
"element": '\u2208',
"notelement": '\u2209',
"angle": '\u2220',
"gradient": '\u2207',
"registerserif": '\uf6da',
"copyrightserif": '\uf6d9',
"trademarkserif": '\uf6db',
"product": '\u220f',
"radical": '\u221a',
"dotmath": '\u22c5',
"logicalnot": '\u00ac',
"logicaland": '\u2227',
"logicalor": '\u2228',
"arrowdblboth": '\u21d4',
"arrowdblleft": '\u21d0',
"arrowdblup": '\u21d1',
"arrowdblright": '\u21d2',
"arrowdbldown": '\u21d3',
"lozenge": '\u25ca',
"angleleft": '\u2329',
"registersans": '\uf8e8',
"copyrightsans": '\uf8e9',
"trademarksans": '\uf8ea',
"summation": '\u2211',
"parenlefttp": '\uf8eb',
"parenleftex": '\uf8ec',
"parenleftbt": '\uf8ed',
"bracketlefttp": '\uf8ee',
"bracketleftex": '\uf8ef',
"bracketleftbt": '\uf8f0',
"bracelefttp": '\uf8f1',
"braceleftmid": '\uf8f2',
"braceleftbt": '\uf8f3',
"braceex": '\uf8f4',
"angleright": '\u232a',
"integral": '\u222b',
"integraltp": '\u2320',
"integralex": '\uf8f5',
"integralbt": '\u2321',
"parenrighttp": '\uf8f6',
"parenrightex": '\uf8f7',
"parenrightbt": '\uf8f8',
"bracketrighttp": '\uf8f9',
"bracketrightex": '\uf8fa',
"bracketrightbt": '\uf8fb',
"bracerighttp": '\uf8fc',
"bracerightmid": '\uf8fd',
"bracerightbt": '\uf8fe',
}
var symbolEncodingRuneToGlyphMap map[rune]string = map[rune]string{
'\u0020': "space",
'\u0021': "exclam",
'\u2200': "universal",
'\u0023': "numbersign",
'\u2203': "existential",
'\u0025': "percent",
'\u0026': "ampersand",
'\u220b': "suchthat",
'\u0028': "parenleft",
'\u0029': "parenright",
'\u2217': "asteriskmath",
'\u002b': "plus",
'\u002c': "comma",
'\u2212': "minus",
'\u002e': "period",
'\u002f': "slash",
'\u0030': "zero",
'\u0031': "one",
'\u0032': "two",
'\u0033': "three",
'\u0034': "four",
'\u0035': "five",
'\u0036': "six",
'\u0037': "seven",
'\u0038': "eight",
'\u0039': "nine",
'\u003a': "colon",
'\u003b': "semicolon",
'\u003c': "less",
'\u003d': "equal",
'\u003e': "greater",
'\u003f': "question",
'\u2245': "congruent",
'\u0391': "Alpha",
'\u0392': "Beta",
'\u03a7': "Chi",
'\u2206': "Delta",
'\u0395': "Epsilon",
'\u03a6': "Phi",
'\u0393': "Gamma",
'\u0397': "Eta",
'\u0399': "Iota",
'\u03d1': "theta1",
'\u039a': "Kappa",
'\u039b': "Lambda",
'\u039c': "Mu",
'\u039d': "Nu",
'\u039f': "Omicron",
'\u03a0': "Pi",
'\u0398': "Theta",
'\u03a1': "Rho",
'\u03a3': "Sigma",
'\u03a4': "Tau",
'\u03a5': "Upsilon",
'\u03c2': "sigma1",
'\u2126': "Omega",
'\u039e': "Xi",
'\u03a8': "Psi",
'\u0396': "Zeta",
'\u005b': "bracketleft",
'\u2234': "therefore",
'\u005d': "bracketright",
'\u22a5': "perpendicular",
'\u005f': "underscore",
'\uf8e5': "radicalex",
'\u03b1': "alpha",
'\u03b2': "beta",
'\u03c7': "chi",
'\u03b4': "delta",
'\u03b5': "epsilon",
'\u03c6': "phi",
'\u03b3': "gamma",
'\u03b7': "eta",
'\u03b9': "iota",
'\u03d5': "phi1",
'\u03ba': "kappa",
'\u03bb': "lambda",
'\u00b5': "mu",
'\u03bd': "nu",
'\u03bf': "omicron",
'\u03c0': "pi",
'\u03b8': "theta",
'\u03c1': "rho",
'\u03c3': "sigma",
'\u03c4': "tau",
'\u03c5': "upsilon",
'\u03d6': "omega1",
'\u03c9': "omega",
'\u03be': "xi",
'\u03c8': "psi",
'\u03b6': "zeta",
'\u007b': "braceleft",
'\u007c': "bar",
'\u007d': "braceright",
'\u223c': "similar",
'\u20ac': "Euro",
'\u03d2': "Upsilon1",
'\u2032': "minute",
'\u2264': "lessequal",
'\u2044': "fraction",
'\u221e': "infinity",
'\u0192': "florin",
'\u2663': "club",
'\u2666': "diamond",
'\u2665': "heart",
'\u2660': "spade",
'\u2194': "arrowboth",
'\u2190': "arrowleft",
'\u2191': "arrowup",
'\u2192': "arrowright",
'\u2193': "arrowdown",
'\u00b0': "degree",
'\u00b1': "plusminus",
'\u2033': "second",
'\u2265': "greaterequal",
'\u00d7': "multiply",
'\u221d': "proportional",
'\u2202': "partialdiff",
'\u2022': "bullet",
'\u00f7': "divide",
'\u2260': "notequal",
'\u2261': "equivalence",
'\u2248': "approxequal",
'\u2026': "ellipsis",
'\uf8e6': "arrowvertex",
'\uf8e7': "arrowhorizex",
'\u21b5': "carriagereturn",
'\u2135': "aleph",
'\u2111': "Ifraktur",
'\u211c': "Rfraktur",
'\u2118': "weierstrass",
'\u2297': "circlemultiply",
'\u2295': "circleplus",
'\u2205': "emptyset",
'\u2229': "intersection",
'\u222a': "union",
'\u2283': "propersuperset",
'\u2287': "reflexsuperset",
'\u2284': "notsubset",
'\u2282': "propersubset",
'\u2286': "reflexsubset",
'\u2208': "element",
'\u2209': "notelement",
'\u2220': "angle",
'\u2207': "gradient",
'\uf6da': "registerserif",
'\uf6d9': "copyrightserif",
'\uf6db': "trademarkserif",
'\u220f': "product",
'\u221a': "radical",
'\u22c5': "dotmath",
'\u00ac': "logicalnot",
'\u2227': "logicaland",
'\u2228': "logicalor",
'\u21d4': "arrowdblboth",
'\u21d0': "arrowdblleft",
'\u21d1': "arrowdblup",
'\u21d2': "arrowdblright",
'\u21d3': "arrowdbldown",
'\u25ca': "lozenge",
'\u2329': "angleleft",
'\uf8e8': "registersans",
'\uf8e9': "copyrightsans",
'\uf8ea': "trademarksans",
'\u2211': "summation",
'\uf8eb': "parenlefttp",
'\uf8ec': "parenleftex",
'\uf8ed': "parenleftbt",
'\uf8ee': "bracketlefttp",
'\uf8ef': "bracketleftex",
'\uf8f0': "bracketleftbt",
'\uf8f1': "bracelefttp",
'\uf8f2': "braceleftmid",
'\uf8f3': "braceleftbt",
'\uf8f4': "braceex",
'\u232a': "angleright",
'\u222b': "integral",
'\u2320': "integraltp",
'\uf8f5': "integralex",
'\u2321': "integralbt",
'\uf8f6': "parenrighttp",
'\uf8f7': "parenrightex",
'\uf8f8': "parenrightbt",
'\uf8f9': "bracketrighttp",
'\uf8fa': "bracketrightex",
'\uf8fb': "bracketrightbt",
'\uf8fc': "bracerighttp",
'\uf8fd': "bracerightmid",
'\uf8fe': "bracerightbt",
}
// Glyph to charcode map (Symbol encoding).
var symbolEncodingGlyphToCharcodeMap map[string]byte = map[string]byte{
"space": 32,
"exclam": 33,

View File

@ -0,0 +1,56 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package textencoding
import "github.com/unidoc/unidoc/common"
func glyphToRune(glyph string, glyphToRuneMap map[string]rune) (rune, bool) {
ucode, found := glyphToRuneMap[glyph]
if found {
return ucode, true
}
//common.Log.Debug("Glyph->Rune ERROR: Unable to find glyph %s", glyph)
return 0, false
}
func runeToGlyph(ucode rune, runeToGlyphMap map[rune]string) (string, bool) {
glyph, found := runeToGlyphMap[ucode]
if found {
return glyph, true
}
//common.Log.Debug("Rune->Glyph ERROR: Unable to find rune %v", ucode)
return "", false
}
func splitWords(raw string, encoder TextEncoder) []string {
runes := []rune(raw)
words := []string{}
startsAt := 0
for idx, code := range runes {
glyph, found := encoder.RuneToGlyph(code)
if !found {
common.Log.Debug("Glyph not found for code: %s\n", string(code))
continue
}
if glyph == "space" {
word := runes[startsAt:idx]
words = append(words, string(word))
startsAt = idx + 1
}
}
word := runes[startsAt:]
if len(word) > 0 {
words = append(words, string(word))
}
return words
}

View File

@ -10,34 +10,7 @@ import (
"github.com/unidoc/unidoc/pdf/core"
)
func splitWords(raw string, encoder TextEncoder) []string {
runes := []rune(raw)
words := []string{}
startsAt := 0
for idx, code := range runes {
glyph, found := encoder.RuneToGlyphName(code)
if !found {
common.Log.Debug("Glyph not found for code: %s\n", string(code))
continue
}
if glyph == "space" {
word := runes[startsAt:idx]
words = append(words, string(word))
startsAt = idx + 1
}
}
word := runes[startsAt:]
if len(word) > 0 {
words = append(words, string(word))
}
return words
}
// WinAnsiEncoding.
type WinAnsiEncoder struct {
}
@ -50,11 +23,12 @@ func (winenc WinAnsiEncoder) ToPdfObject() core.PdfObject {
return core.MakeName("WinAnsiEncoding")
}
// Convert utf8 runes to WinAnsiEncoded encoded string (series of char codes).
// Convert a raw utf8 string (series of runes) to an encoded string (series of character codes) to be used in PDF.
func (winenc WinAnsiEncoder) Encode(raw string) string {
encoded := []byte{}
for _, rune := range raw {
if code, has := utf8ToWinAnsiEncodingMap[rune]; has {
code, has := winenc.RuneToCharcode(rune)
if has {
encoded = append(encoded, code)
}
}
@ -62,72 +36,77 @@ func (winenc WinAnsiEncoder) Encode(raw string) string {
return string(encoded)
}
func (winenc WinAnsiEncoder) RuneToGlyphName(val rune) (string, bool) {
code, found := winenc.RuneToCharcode(val)
if !found {
return "", false
}
glyph, found := winenc.CharcodeToGlyphName(code)
if !found {
return "", false
}
return glyph, true
}
func (winenc WinAnsiEncoder) CharcodeToGlyphName(code byte) (string, bool) {
glyph, has := winAnsiEncodingGlyphMap[code]
// Conversion between character code and glyph name.
// The bool return flag is true if there was a match, and false otherwise.
func (winenc WinAnsiEncoder) CharcodeToGlyph(code byte) (string, bool) {
glyph, has := winansiEncodingCharcodeToGlyphMap[code]
if !has {
common.Log.Debug("Charcode -> Glyph error: charcode not found: %d\n", code)
return "", false
}
return glyph, true
}
func (winenc WinAnsiEncoder) GlyphNameToCharcode(glyph string) (byte, bool) {
for code, name := range winAnsiEncodingGlyphMap {
if name == glyph {
return code, true
}
}
// Not found.
return 0, false
}
// Convert UTF-8 rune to character code. If applicable.
func (winenc WinAnsiEncoder) RuneToCharcode(val rune) (byte, bool) {
code, has := utf8ToWinAnsiEncodingMap[val]
if !has {
// Conversion between glyph name and character code.
// The bool return flag is true if there was a match, and false otherwise.
func (winenc WinAnsiEncoder) GlyphToCharcode(glyph string) (byte, bool) {
code, found := winansiEncodingGlyphToCharcodeMap[glyph]
if !found {
common.Log.Debug("Glyph -> Charcode error: glyph not found: %s\n", glyph)
return 0, false
}
return code, true
}
func (winenc WinAnsiEncoder) CharcodeToRune(charcode byte) (rune, bool) {
val, has := winAnsiEncodingToUtf8Map[charcode]
if !has {
// Convert rune to character code.
// The bool return flag is true if there was a match, and false otherwise.
func (winenc WinAnsiEncoder) RuneToCharcode(val rune) (byte, bool) {
glyph, found := winenc.RuneToGlyph(val)
if !found {
return 0, false
}
return val, true
}
// WinAnsiEncoding.
// Convert a UTF8 string to WinAnsiEncoding byte string.
func utf8ToWinAnsiEncoding(strUtf8 string) string {
encoded := []byte{}
for _, rune := range strUtf8 {
if code, has := utf8ToWinAnsiEncodingMap[rune]; has {
encoded = append(encoded, code)
}
code, found := winansiEncodingGlyphToCharcodeMap[glyph]
if !found {
common.Log.Debug("Glyph -> Charcode error: glyph not found %s\n", glyph)
return 0, false
}
return string(encoded)
return code, true
}
// Maps to enable conversion of WinAnsiEncoding character codes to glyphs, utf8 and vice versa.
var winAnsiEncodingGlyphMap = map[byte]string{
// Convert character code to rune.
// The bool return flag is true if there was a match, and false otherwise.
func (winenc WinAnsiEncoder) CharcodeToRune(charcode byte) (rune, bool) {
glyph, found := winansiEncodingCharcodeToGlyphMap[charcode]
if !found {
common.Log.Debug("Charcode -> Glyph error: charcode not found: %d\n", charcode)
return 0, false
}
ucode, found := glyphToRune(glyph, glyphlistGlyphToRuneMap)
if !found {
return 0, false
}
return ucode, true
}
// Convert rune to glyph name.
// The bool return flag is true if there was a match, and false otherwise.
func (winenc WinAnsiEncoder) RuneToGlyph(val rune) (string, bool) {
return runeToGlyph(val, glyphlistRuneToGlyphMap)
}
// Convert glyph to rune.
// The bool return flag is true if there was a match, and false otherwise.
func (winenc WinAnsiEncoder) GlyphToRune(glyph string) (rune, bool) {
return glyphToRune(glyph, glyphlistGlyphToRuneMap)
}
// Charcode to glyph name map (WinAnsiEncoding).
var winansiEncodingCharcodeToGlyphMap = map[byte]string{
32: "space",
33: "exclam",
34: "quotedbl",
@ -354,456 +333,230 @@ var winAnsiEncodingGlyphMap = map[byte]string{
255: "ydieresis",
}
var winAnsiEncodingToUtf8Map = map[byte]rune{
32: '\u0020',
33: '\u0021',
34: '\u0022',
35: '\u0023',
36: '\u0024',
37: '\u0025',
38: '\u0026',
39: '\u0027',
40: '\u0028',
41: '\u0029',
42: '\u002a',
43: '\u002b',
44: '\u002c',
45: '\u002d',
46: '\u002e',
47: '\u002f',
48: '\u0030',
49: '\u0031',
50: '\u0032',
51: '\u0033',
52: '\u0034',
53: '\u0035',
54: '\u0036',
55: '\u0037',
56: '\u0038',
57: '\u0039',
58: '\u003a',
59: '\u003b',
60: '\u003c',
61: '\u003d',
62: '\u003e',
63: '\u003f',
64: '\u0040',
65: '\u0041',
66: '\u0042',
67: '\u0043',
68: '\u0044',
69: '\u0045',
70: '\u0046',
71: '\u0047',
72: '\u0048',
73: '\u0049',
74: '\u004a',
75: '\u004b',
76: '\u004c',
77: '\u004d',
78: '\u004e',
79: '\u004f',
80: '\u0050',
81: '\u0051',
82: '\u0052',
83: '\u0053',
84: '\u0054',
85: '\u0055',
86: '\u0056',
87: '\u0057',
88: '\u0058',
89: '\u0059',
90: '\u005a',
91: '\u005b',
92: '\u005c',
93: '\u005d',
94: '\u005e',
95: '\u005f',
96: '\u0060',
97: '\u0061',
98: '\u0062',
99: '\u0063',
100: '\u0064',
101: '\u0065',
102: '\u0066',
103: '\u0067',
104: '\u0068',
105: '\u0069',
106: '\u006a',
107: '\u006b',
108: '\u006c',
109: '\u006d',
110: '\u006e',
111: '\u006f',
112: '\u0070',
113: '\u0071',
114: '\u0072',
115: '\u0073',
116: '\u0074',
117: '\u0075',
118: '\u0076',
119: '\u0077',
120: '\u0078',
121: '\u0079',
122: '\u007a',
123: '\u007b',
124: '\u007c',
125: '\u007d',
126: '\u007e',
127: '\u2022',
128: '\u20ac',
129: '\u2022',
130: '\u201a',
131: '\u0192',
132: '\u201e',
133: '\u2026',
134: '\u2020',
135: '\u2021',
136: '\u02c6',
137: '\u2030',
138: '\u0160',
139: '\u2039',
140: '\u0152',
141: '\u2022',
142: '\u017d',
143: '\u2022',
144: '\u2022',
145: '\u2018',
146: '\u2019',
147: '\u201c',
148: '\u201d',
149: '\u2022',
150: '\u2013',
151: '\u2014',
152: '\u02dc',
153: '\u2122',
154: '\u0161',
155: '\u203a',
156: '\u0153',
157: '\u2022',
158: '\u017e',
159: '\u0178',
160: '\u0020',
161: '\u00a1',
162: '\u00a2',
163: '\u00a3',
164: '\u00a4',
165: '\u00a5',
166: '\u00a6',
167: '\u00a7',
168: '\u00a8',
169: '\u00a9',
170: '\u00aa',
171: '\u00ab',
172: '\u00ac',
173: '\u002d',
174: '\u00ae',
175: '\u00af',
176: '\u00b0',
177: '\u00b1',
178: '\u00b2',
179: '\u00b3',
180: '\u00b4',
181: '\u00b5',
182: '\u00b6',
183: '\u00b7',
184: '\u00b8',
185: '\u00b9',
186: '\u00ba',
187: '\u00bb',
188: '\u00bc',
189: '\u00bd',
190: '\u00be',
191: '\u00bf',
192: '\u00c0',
193: '\u00c1',
194: '\u00c2',
195: '\u00c3',
196: '\u00c4',
197: '\u00c5',
198: '\u00c6',
199: '\u00c7',
200: '\u00c8',
201: '\u00c9',
202: '\u00ca',
203: '\u00cb',
204: '\u00cc',
205: '\u00cd',
206: '\u00ce',
207: '\u00cf',
208: '\u00d0',
209: '\u00d1',
210: '\u00d2',
211: '\u00d3',
212: '\u00d4',
213: '\u00d5',
214: '\u00d6',
215: '\u00d7',
216: '\u00d8',
217: '\u00d9',
218: '\u00da',
219: '\u00db',
220: '\u00dc',
221: '\u00dd',
222: '\u00de',
223: '\u00df',
224: '\u00e0',
225: '\u00e1',
226: '\u00e2',
227: '\u00e3',
228: '\u00e4',
229: '\u00e5',
230: '\u00e6',
231: '\u00e7',
232: '\u00e8',
233: '\u00e9',
234: '\u00ea',
235: '\u00eb',
236: '\u00ec',
237: '\u00ed',
238: '\u00ee',
239: '\u00ef',
240: '\u00f0',
241: '\u00f1',
242: '\u00f2',
243: '\u00f3',
244: '\u00f4',
245: '\u00f5',
246: '\u00f6',
247: '\u00f7',
248: '\u00f8',
249: '\u00f9',
250: '\u00fa',
251: '\u00fb',
252: '\u00fc',
253: '\u00fd',
254: '\u00fe',
255: '\u00ff',
}
var utf8ToWinAnsiEncodingMap = map[rune]byte{
'\u0020': 32,
'\u0021': 33,
'\u0022': 34,
'\u0023': 35,
'\u0024': 36,
'\u0025': 37,
'\u0026': 38,
'\u0027': 39,
'\u0028': 40,
'\u0029': 41,
'\u002a': 42,
'\u002b': 43,
'\u002c': 44,
'\u002d': 45,
'\u002e': 46,
'\u002f': 47,
'\u0030': 48,
'\u0031': 49,
'\u0032': 50,
'\u0033': 51,
'\u0034': 52,
'\u0035': 53,
'\u0036': 54,
'\u0037': 55,
'\u0038': 56,
'\u0039': 57,
'\u003a': 58,
'\u003b': 59,
'\u003c': 60,
'\u003d': 61,
'\u003e': 62,
'\u003f': 63,
'\u0040': 64,
'\u0041': 65,
'\u0042': 66,
'\u0043': 67,
'\u0044': 68,
'\u0045': 69,
'\u0046': 70,
'\u0047': 71,
'\u0048': 72,
'\u0049': 73,
'\u004a': 74,
'\u004b': 75,
'\u004c': 76,
'\u004d': 77,
'\u004e': 78,
'\u004f': 79,
'\u0050': 80,
'\u0051': 81,
'\u0052': 82,
'\u0053': 83,
'\u0054': 84,
'\u0055': 85,
'\u0056': 86,
'\u0057': 87,
'\u0058': 88,
'\u0059': 89,
'\u005a': 90,
'\u005b': 91,
'\u005c': 92,
'\u005d': 93,
'\u005e': 94,
'\u005f': 95,
'\u0060': 96,
'\u0061': 97,
'\u0062': 98,
'\u0063': 99,
'\u0064': 100,
'\u0065': 101,
'\u0066': 102,
'\u0067': 103,
'\u0068': 104,
'\u0069': 105,
'\u006a': 106,
'\u006b': 107,
'\u006c': 108,
'\u006d': 109,
'\u006e': 110,
'\u006f': 111,
'\u0070': 112,
'\u0071': 113,
'\u0072': 114,
'\u0073': 115,
'\u0074': 116,
'\u0075': 117,
'\u0076': 118,
'\u0077': 119,
'\u0078': 120,
'\u0079': 121,
'\u007a': 122,
'\u007b': 123,
'\u007c': 124,
'\u007d': 125,
'\u007e': 126,
'\u2022': 127,
'\u20ac': 128,
// '\u2022': 129, // duplicate
'\u201a': 130,
'\u0192': 131,
'\u201e': 132,
'\u2026': 133,
'\u2020': 134,
'\u2021': 135,
'\u02c6': 136,
'\u2030': 137,
'\u0160': 138,
'\u2039': 139,
'\u0152': 140,
//'\u2022': 141, // duplicate
'\u017d': 142,
//'\u2022': 143, // duplicate
// '\u2022': 144, // duplicate
'\u2018': 145,
'\u2019': 146,
'\u201c': 147,
'\u201d': 148,
//'\u2022': 149, // duplicate
'\u2013': 150,
'\u2014': 151,
'\u02dc': 152,
'\u2122': 153,
'\u0161': 154,
'\u203a': 155,
'\u0153': 156,
//'\u2022': 157, // duplicate
'\u017e': 158,
'\u0178': 159,
//'\u0020': 160, // duplicate
'\u00a1': 161,
'\u00a2': 162,
'\u00a3': 163,
'\u00a4': 164,
'\u00a5': 165,
'\u00a6': 166,
'\u00a7': 167,
'\u00a8': 168,
'\u00a9': 169,
'\u00aa': 170,
'\u00ab': 171,
'\u00ac': 172,
//'\u002d': 173, // duplicate
'\u00ae': 174,
'\u00af': 175,
'\u00b0': 176,
'\u00b1': 177,
'\u00b2': 178,
'\u00b3': 179,
'\u00b4': 180,
'\u00b5': 181,
'\u00b6': 182,
'\u00b7': 183,
'\u00b8': 184,
'\u00b9': 185,
'\u00ba': 186,
'\u00bb': 187,
'\u00bc': 188,
'\u00bd': 189,
'\u00be': 190,
'\u00bf': 191,
'\u00c0': 192,
'\u00c1': 193,
'\u00c2': 194,
'\u00c3': 195,
'\u00c4': 196,
'\u00c5': 197,
'\u00c6': 198,
'\u00c7': 199,
'\u00c8': 200,
'\u00c9': 201,
'\u00ca': 202,
'\u00cb': 203,
'\u00cc': 204,
'\u00cd': 205,
'\u00ce': 206,
'\u00cf': 207,
'\u00d0': 208,
'\u00d1': 209,
'\u00d2': 210,
'\u00d3': 211,
'\u00d4': 212,
'\u00d5': 213,
'\u00d6': 214,
'\u00d7': 215,
'\u00d8': 216,
'\u00d9': 217,
'\u00da': 218,
'\u00db': 219,
'\u00dc': 220,
'\u00dd': 221,
'\u00de': 222,
'\u00df': 223,
'\u00e0': 224,
'\u00e1': 225,
'\u00e2': 226,
'\u00e3': 227,
'\u00e4': 228,
'\u00e5': 229,
'\u00e6': 230,
'\u00e7': 231,
'\u00e8': 232,
'\u00e9': 233,
'\u00ea': 234,
'\u00eb': 235,
'\u00ec': 236,
'\u00ed': 237,
'\u00ee': 238,
'\u00ef': 239,
'\u00f0': 240,
'\u00f1': 241,
'\u00f2': 242,
'\u00f3': 243,
'\u00f4': 244,
'\u00f5': 245,
'\u00f6': 246,
'\u00f7': 247,
'\u00f8': 248,
'\u00f9': 249,
'\u00fa': 250,
'\u00fb': 251,
'\u00fc': 252,
'\u00fd': 253,
'\u00fe': 254,
'\u00ff': 255,
// Glyph to charcode map (WinAnsiEncoding).
var winansiEncodingGlyphToCharcodeMap = map[string]byte{
"space": 32,
"exclam": 33,
"quotedbl": 34,
"numbersign": 35,
"dollar": 36,
"percent": 37,
"ampersand": 38,
"quotesingle": 39,
"parenleft": 40,
"parenright": 41,
"asterisk": 42,
"plus": 43,
"comma": 44,
//"hyphen": 45,
"period": 46,
"slash": 47,
"zero": 48,
"one": 49,
"two": 50,
"three": 51,
"four": 52,
"five": 53,
"six": 54,
"seven": 55,
"eight": 56,
"nine": 57,
"colon": 58,
"semicolon": 59,
"less": 60,
"equal": 61,
"greater": 62,
"question": 63,
"at": 64,
"A": 65,
"B": 66,
"C": 67,
"D": 68,
"E": 69,
"F": 70,
"G": 71,
"H": 72,
"I": 73,
"J": 74,
"K": 75,
"L": 76,
"M": 77,
"N": 78,
"O": 79,
"P": 80,
"Q": 81,
"R": 82,
"S": 83,
"T": 84,
"U": 85,
"V": 86,
"W": 87,
"X": 88,
"Y": 89,
"Z": 90,
"bracketleft": 91,
"backslash": 92,
"bracketright": 93,
"asciicircum": 94,
"underscore": 95,
"grave": 96,
"a": 97,
"b": 98,
"c": 99,
"d": 100,
"e": 101,
"f": 102,
"g": 103,
"h": 104,
"i": 105,
"j": 106,
"k": 107,
"l": 108,
"m": 109,
"n": 110,
"o": 111,
"p": 112,
"q": 113,
"r": 114,
"s": 115,
"t": 116,
"u": 117,
"v": 118,
"w": 119,
"x": 120,
"y": 121,
"z": 122,
"braceleft": 123,
"bar": 124,
"braceright": 125,
"asciitilde": 126,
"bullet": 127,
"Euro": 128,
//"bullet": 129,
"quotesinglbase": 130,
"florin": 131,
"quotedblbase": 132,
"ellipsis": 133,
"dagger": 134,
"daggerdbl": 135,
"circumflex": 136,
"perthousand": 137,
"Scaron": 138,
"guilsinglleft": 139,
"OE": 140,
//"bullet": 141,
"Zcaron": 142,
//"bullet": 143,
//"bullet": 144,
"quoteleft": 145,
"quoteright": 146,
"quotedblleft": 147,
"quotedblright": 148,
//"bullet": 149,
"endash": 150,
"emdash": 151,
"tilde": 152,
"trademark": 153,
"scaron": 154,
"guilsinglright": 155,
"oe": 156,
//"bullet": 157,
"zcaron": 158,
"Ydieresis": 159,
//"space": 160,
"exclamdown": 161,
"cent": 162,
"sterling": 163,
"currency": 164,
"yen": 165,
"brokenbar": 166,
"section": 167,
"dieresis": 168,
"copyright": 169,
"ordfeminine": 170,
"guillemotleft": 171,
"logicalnot": 172,
"hyphen": 173,
"registered": 174,
"macron": 175,
"degree": 176,
"plusminus": 177,
"twosuperior": 178,
"threesuperior": 179,
"acute": 180,
"mu": 181,
"paragraph": 182,
"periodcentered": 183,
"cedilla": 184,
"onesuperior": 185,
"ordmasculine": 186,
"guillemotright": 187,
"onequarter": 188,
"onehalf": 189,
"threequarters": 190,
"questiondown": 191,
"Agrave": 192,
"Aacute": 193,
"Acircumflex": 194,
"Atilde": 195,
"Adieresis": 196,
"Aring": 197,
"AE": 198,
"Ccedilla": 199,
"Egrave": 200,
"Eacute": 201,
"Ecircumflex": 202,
"Edieresis": 203,
"Igrave": 204,
"Iacute": 205,
"Icircumflex": 206,
"Idieresis": 207,
"Eth": 208,
"Ntilde": 209,
"Ograve": 210,
"Oacute": 211,
"Ocircumflex": 212,
"Otilde": 213,
"Odieresis": 214,
"multiply": 215,
"Oslash": 216,
"Ugrave": 217,
"Uacute": 218,
"Ucircumflex": 219,
"Udieresis": 220,
"Yacute": 221,
"Thorn": 222,
"germandbls": 223,
"agrave": 224,
"aacute": 225,
"acircumflex": 226,
"atilde": 227,
"adieresis": 228,
"aring": 229,
"ae": 230,
"ccedilla": 231,
"egrave": 232,
"eacute": 233,
"ecircumflex": 234,
"edieresis": 235,
"igrave": 236,
"iacute": 237,
"icircumflex": 238,
"idieresis": 239,
"eth": 240,
"ntilde": 241,
"ograve": 242,
"oacute": 243,
"ocircumflex": 244,
"otilde": 245,
"odieresis": 246,
"divide": 247,
"oslash": 248,
"ugrave": 249,
"uacute": 250,
"ucircumflex": 251,
"udieresis": 252,
"yacute": 253,
"thorn": 254,
"ydieresis": 255,
}

View File

@ -10,13 +10,13 @@ import "testing"
func TestWinAnsiEncoder(t *testing.T) {
enc := NewWinAnsiTextEncoder()
glyph, found := enc.CharcodeToGlyphName(32)
glyph, found := enc.CharcodeToGlyph(32)
if !found || glyph != "space" {
t.Errorf("Glyph != space")
return
}
glyph, found = enc.RuneToGlyphName('þ')
glyph, found = enc.RuneToGlyph('þ')
if !found || glyph != "thorn" {
t.Errorf("Glyph != thorn")
return

View File

@ -0,0 +1,542 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package textencoding
import (
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/core"
)
// Encoding for ZapfDingbats font.
type ZapfDingbatsEncoder struct {
}
func NewZapfDingbatsEncoder() ZapfDingbatsEncoder {
encoder := ZapfDingbatsEncoder{}
return encoder
}
// Convert a raw utf8 string (series of runes) to an encoded string (series of character codes) to be used in PDF.
func (enc ZapfDingbatsEncoder) Encode(raw string) string {
encoded := []byte{}
for _, rune := range raw {
code, found := enc.RuneToCharcode(rune)
if !found {
continue
}
encoded = append(encoded, code)
}
return string(encoded)
}
// Conversion between character code and glyph name.
// The bool return flag is true if there was a match, and false otherwise.
func (enc ZapfDingbatsEncoder) CharcodeToGlyph(code byte) (string, bool) {
glyph, has := zapfDingbatsEncodingCharcodeToGlyphMap[code]
if !has {
common.Log.Debug("ZapfDingbats encoding error: unable to find charcode->glyph entry (%v)", code)
return "", false
}
return glyph, true
}
// Conversion between glyph name and character code.
// The bool return flag is true if there was a match, and false otherwise.
func (enc ZapfDingbatsEncoder) GlyphToCharcode(glyph string) (byte, bool) {
code, found := zapfDingbatsEncodingGlyphToCharcodeMap[glyph]
if !found {
common.Log.Debug("ZapfDingbats encoding error: unable to find glyph->charcode entry (%s)", glyph)
return 0, false
}
return code, found
}
// Convert rune to character code.
// The bool return flag is true if there was a match, and false otherwise.
func (enc ZapfDingbatsEncoder) RuneToCharcode(val rune) (byte, bool) {
glyph, found := enc.RuneToGlyph(val)
if !found {
common.Log.Debug("ZapfDingbats encoding error: unable to find rune->glyph entry (%v)", val)
return 0, false
}
code, found := zapfDingbatsEncodingGlyphToCharcodeMap[glyph]
if !found {
common.Log.Debug("ZapfDingbats encoding error: unable to find glyph->charcode entry (%s)", glyph)
return 0, false
}
return code, true
}
// Convert character code to rune.
// The bool return flag is true if there was a match, and false otherwise.
func (enc ZapfDingbatsEncoder) CharcodeToRune(charcode byte) (rune, bool) {
glyph, found := zapfDingbatsEncodingCharcodeToGlyphMap[charcode]
if !found {
common.Log.Debug("ZapfDingbats encoding error: unable to find charcode->glyph entry (%d)", charcode)
return 0, false
}
return enc.GlyphToRune(glyph)
}
// Convert rune to glyph name.
// The bool return flag is true if there was a match, and false otherwise.
func (enc ZapfDingbatsEncoder) RuneToGlyph(val rune) (string, bool) {
// Seek in the zapfdingbats list first.
glyph, found := runeToGlyph(val, zapfdingbatsRuneToGlyphMap)
if !found {
// Then revert to glyphlist if not found.
glyph, found = runeToGlyph(val, glyphlistRuneToGlyphMap)
if !found {
common.Log.Debug("ZapfDingbats encoding error: unable to find rune->glyph entry (%v)", val)
return "", false
}
}
return glyph, true
}
// Convert glyph to rune.
// The bool return flag is true if there was a match, and false otherwise.
func (enc ZapfDingbatsEncoder) GlyphToRune(glyph string) (rune, bool) {
// Seek in the zapfdingbats list first.
val, found := glyphToRune(glyph, zapfdingbatsGlyphToRuneMap)
if !found {
// Then revert to glyphlist if not found.
val, found = glyphToRune(glyph, glyphlistGlyphToRuneMap)
if !found {
common.Log.Debug("Symbol encoding error: unable to find glyph->rune entry (%v)", glyph)
return 0, false
}
}
return val, true
}
// Convert to PDF Object.
func (enc ZapfDingbatsEncoder) ToPdfObject() core.PdfObject {
dict := core.MakeDict()
dict.Set("Type", core.MakeName("Encoding"))
// Returning an empty Encoding object with no differences. Indicates that we are using the font's built-in
// encoding.
return core.MakeIndirectObject(dict)
}
var zapfDingbatsEncodingCharcodeToGlyphMap = map[byte]string{
32: "space",
33: "a1",
34: "a2",
35: "a202",
36: "a3",
37: "a4",
38: "a5",
39: "a119",
40: "a118",
41: "a117",
42: "a11",
43: "a12",
44: "a13",
45: "a14",
46: "a15",
47: "a16",
48: "a105",
49: "a17",
50: "a18",
51: "a19",
52: "a20",
53: "a21",
54: "a22",
55: "a23",
56: "a24",
57: "a25",
58: "a26",
59: "a27",
60: "a28",
61: "a6",
62: "a7",
63: "a8",
64: "a9",
65: "a10",
66: "a29",
67: "a30",
68: "a31",
69: "a32",
70: "a33",
71: "a34",
72: "a35",
73: "a36",
74: "a37",
75: "a38",
76: "a39",
77: "a40",
78: "a41",
79: "a42",
80: "a43",
81: "a44",
82: "a45",
83: "a46",
84: "a47",
85: "a48",
86: "a49",
87: "a50",
88: "a51",
89: "a52",
90: "a53",
91: "a54",
92: "a55",
93: "a56",
94: "a57",
95: "a58",
96: "a59",
97: "a60",
98: "a61",
99: "a62",
100: "a63",
101: "a64",
102: "a65",
103: "a66",
104: "a67",
105: "a68",
106: "a69",
107: "a70",
108: "a71",
109: "a72",
110: "a73",
111: "a74",
112: "a203",
113: "a75",
114: "a204",
115: "a76",
116: "a77",
117: "a78",
118: "a79",
119: "a81",
120: "a82",
121: "a83",
122: "a84",
123: "a97",
124: "a98",
125: "a99",
126: "a100",
128: "a89",
129: "a90",
130: "a93",
131: "a94",
132: "a91",
133: "a92",
134: "a205",
135: "a85",
136: "a206",
137: "a86",
138: "a87",
139: "a88",
140: "a95",
141: "a96",
161: "a101",
162: "a102",
163: "a103",
164: "a104",
165: "a106",
166: "a107",
167: "a108",
168: "a112",
169: "a111",
170: "a110",
171: "a109",
172: "a120",
173: "a121",
174: "a122",
175: "a123",
176: "a124",
177: "a125",
178: "a126",
179: "a127",
180: "a128",
181: "a129",
182: "a130",
183: "a131",
184: "a132",
185: "a133",
186: "a134",
187: "a135",
188: "a136",
189: "a137",
190: "a138",
191: "a139",
192: "a140",
193: "a141",
194: "a142",
195: "a143",
196: "a144",
197: "a145",
198: "a146",
199: "a147",
200: "a148",
201: "a149",
202: "a150",
203: "a151",
204: "a152",
205: "a153",
206: "a154",
207: "a155",
208: "a156",
209: "a157",
210: "a158",
211: "a159",
212: "a160",
213: "a161",
214: "a163",
215: "a164",
216: "a196",
217: "a165",
218: "a192",
219: "a166",
220: "a167",
221: "a168",
222: "a169",
223: "a170",
224: "a171",
225: "a172",
226: "a173",
227: "a162",
228: "a174",
229: "a175",
230: "a176",
231: "a177",
232: "a178",
233: "a179",
234: "a193",
235: "a180",
236: "a199",
237: "a181",
238: "a200",
239: "a182",
241: "a201",
242: "a183",
243: "a184",
244: "a197",
245: "a185",
246: "a194",
247: "a198",
248: "a186",
249: "a195",
250: "a187",
251: "a188",
252: "a189",
253: "a190",
254: "a191",
}
var zapfDingbatsEncodingGlyphToCharcodeMap = map[string]byte{
"space": 32,
"a1": 33,
"a2": 34,
"a202": 35,
"a3": 36,
"a4": 37,
"a5": 38,
"a119": 39,
"a118": 40,
"a117": 41,
"a11": 42,
"a12": 43,
"a13": 44,
"a14": 45,
"a15": 46,
"a16": 47,
"a105": 48,
"a17": 49,
"a18": 50,
"a19": 51,
"a20": 52,
"a21": 53,
"a22": 54,
"a23": 55,
"a24": 56,
"a25": 57,
"a26": 58,
"a27": 59,
"a28": 60,
"a6": 61,
"a7": 62,
"a8": 63,
"a9": 64,
"a10": 65,
"a29": 66,
"a30": 67,
"a31": 68,
"a32": 69,
"a33": 70,
"a34": 71,
"a35": 72,
"a36": 73,
"a37": 74,
"a38": 75,
"a39": 76,
"a40": 77,
"a41": 78,
"a42": 79,
"a43": 80,
"a44": 81,
"a45": 82,
"a46": 83,
"a47": 84,
"a48": 85,
"a49": 86,
"a50": 87,
"a51": 88,
"a52": 89,
"a53": 90,
"a54": 91,
"a55": 92,
"a56": 93,
"a57": 94,
"a58": 95,
"a59": 96,
"a60": 97,
"a61": 98,
"a62": 99,
"a63": 100,
"a64": 101,
"a65": 102,
"a66": 103,
"a67": 104,
"a68": 105,
"a69": 106,
"a70": 107,
"a71": 108,
"a72": 109,
"a73": 110,
"a74": 111,
"a203": 112,
"a75": 113,
"a204": 114,
"a76": 115,
"a77": 116,
"a78": 117,
"a79": 118,
"a81": 119,
"a82": 120,
"a83": 121,
"a84": 122,
"a97": 123,
"a98": 124,
"a99": 125,
"a100": 126,
"a89": 128,
"a90": 129,
"a93": 130,
"a94": 131,
"a91": 132,
"a92": 133,
"a205": 134,
"a85": 135,
"a206": 136,
"a86": 137,
"a87": 138,
"a88": 139,
"a95": 140,
"a96": 141,
"a101": 161,
"a102": 162,
"a103": 163,
"a104": 164,
"a106": 165,
"a107": 166,
"a108": 167,
"a112": 168,
"a111": 169,
"a110": 170,
"a109": 171,
"a120": 172,
"a121": 173,
"a122": 174,
"a123": 175,
"a124": 176,
"a125": 177,
"a126": 178,
"a127": 179,
"a128": 180,
"a129": 181,
"a130": 182,
"a131": 183,
"a132": 184,
"a133": 185,
"a134": 186,
"a135": 187,
"a136": 188,
"a137": 189,
"a138": 190,
"a139": 191,
"a140": 192,
"a141": 193,
"a142": 194,
"a143": 195,
"a144": 196,
"a145": 197,
"a146": 198,
"a147": 199,
"a148": 200,
"a149": 201,
"a150": 202,
"a151": 203,
"a152": 204,
"a153": 205,
"a154": 206,
"a155": 207,
"a156": 208,
"a157": 209,
"a158": 210,
"a159": 211,
"a160": 212,
"a161": 213,
"a163": 214,
"a164": 215,
"a196": 216,
"a165": 217,
"a192": 218,
"a166": 219,
"a167": 220,
"a168": 221,
"a169": 222,
"a170": 223,
"a171": 224,
"a172": 225,
"a173": 226,
"a162": 227,
"a174": 228,
"a175": 229,
"a176": 230,
"a177": 231,
"a178": 232,
"a179": 233,
"a193": 234,
"a180": 235,
"a199": 236,
"a181": 237,
"a200": 238,
"a182": 239,
"a201": 241,
"a183": 242,
"a184": 243,
"a197": 244,
"a185": 245,
"a194": 246,
"a198": 247,
"a186": 248,
"a195": 249,
"a187": 250,
"a188": 251,
"a189": 252,
"a190": 253,
"a191": 254,
}