Changes after pull request review

2025-05-09 19:29:34 +08:00 · 2018-07-24 21:32:02 +10:00 · 2018-07-24 21:32:02 +10:00 · e886846c6a
commit e886846c6a
parent e2b4f908bd
20 changed files with 146 additions and 187 deletions
--- a/pdf/extractor/extractor.go
+++ b/pdf/extractor/extractor.go
@ -20,6 +20,7 @@ func New(page *model.PdfPage) (*Extractor, error) {
 		return nil, err
 	}

+	// Uncomment these lines to see the contents of the page. For debugging.
 	// fmt.Println("========================= +++ =========================")
 	// fmt.Printf("%s\n", contents)
 	// fmt.Println("========================= ::: =========================")
--- a/pdf/extractor/text.go
+++ b/pdf/extractor/text.go
@ -54,9 +54,6 @@ func (e *Extractor) ExtractXYText() (*TextList, int, int, error) {
 		return textList, state.numChars, state.numMisses, err
 	}

-	// fmt.Println("========================= xxx =========================")
-	// fmt.Printf("%s\n", e.contents)
-	// fmt.Println("========================= ||| =========================")
 	processor := contentstream.NewContentStreamProcessor(*operations)

 	processor.AddHandler(contentstream.HandlerConditionEnumAllOperands, "",
@ -64,7 +61,6 @@ func (e *Extractor) ExtractXYText() (*TextList, int, int, error) {
 			resources *model.PdfPageResources) error {

 			operand := op.Operand
-			// common.Log.Debug("++Operand: %s", op.String())

 			switch operand {
 			case "q":
@ -383,7 +379,7 @@ func (to *TextObject) setFont(name string, size float64) error {
 			(*to.fontStack)[len(*to.fontStack)-1] = font
 		}
 	} else if err == model.ErrFontNotSupported {
-		// XXX: !@#$ Do we need to handle this case in a special way?
+		// XXX: Do we need to handle this case in a special way?
 		return err
 	} else {
 		return err
--- a/pdf/internal/cmap/cmap.go
+++ b/pdf/internal/cmap/cmap.go
@ -38,14 +38,8 @@ type CIDSystemInfo struct {

 // CMap represents a character code to unicode mapping used in PDF files.
 //
-// 9.7.5 CMaps (Page 272)
-//
-// Page 278
-// c) The beginbfchar and endbfchar shall not appear in a CMap that is used as the Encoding entry of
-// a Type 0 font; however, they may appear in the definition of a ToUnicode CMap
-//
 // https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/5411.ToUnicode.pdf
-// https://github.com/adobe-type-tools/cmap-resources/releases ***
+// https://github.com/adobe-type-tools/cmap-resources/releases
 type CMap struct {
 	*cMapParser

@ -59,15 +53,12 @@ type CMap struct {
 	// For regular cmaps
 	codespaces []Codespace

-	// Text encoder to look up runes from input glyph names. !@#$ Not used
-	// encoder textencoding.TextEncoder
-
 	// For ToUnicode (ctype 2) cmaps
 	codeToUnicode     map[CharCode]string
 	toUnicodeIdentity bool
 }

-// String retuns a human readable description of `cmap`
+// String retuns a human readable description of `cmap`.
 func (cmap *CMap) String() string {
 	si := cmap.systemInfo
 	parts := []string{
@ -103,7 +94,7 @@ func newCMap(isSimple bool) *CMap {
 	return cmap
 }

-// String returns a human readable description of `info`
+// String returns a human readable description of `info`.
 // It looks like "Adobe-Japan2-000".
 func (info *CIDSystemInfo) String() string {
 	return fmt.Sprintf("%s-%s-%03d", info.Registry, info.Ordering, info.Supplement)
@ -111,8 +102,11 @@ func (info *CIDSystemInfo) String() string {

 // NewCIDSystemInfo returns the CIDSystemInfo encoded in PDFObject `obj`
 func NewCIDSystemInfo(obj core.PdfObject) (info CIDSystemInfo, err error) {
-	obj = core.TraceToDirectObject(obj)
-	d := *obj.(*core.PdfObjectDictionary)
+	d, ok := core.GetDict(obj)
+	if !ok {
+		err = core.ErrTypeError
+		return
+	}
 	registry, ok := core.GetStringVal(d.Get("Registry"))
 	if !ok {
 		err = core.ErrTypeError
@ -154,7 +148,7 @@ var MissingCodeString = string(MissingCodeRune)

 // CharcodeBytesToUnicode converts a byte array of charcodes to a unicode string representation.
 // It also returns a bool flag to tell if the conversion was successful.
-// NOTE: This only works for ToUnicode cmaps
+// NOTE: This only works for ToUnicode cmaps.
 func (cmap *CMap) CharcodeBytesToUnicode(data []byte) (string, int) {
 	charcodes, matched := cmap.bytesToCharcodes(data)
 	if !matched {
@ -186,17 +180,10 @@ func (cmap *CMap) CharcodeBytesToUnicode(data []byte) (string, int) {
 	return unicode, len(missing)
 }

-// CharcodeToUnicode converts a single character code `code ` to a unicode string.
-// If `code` is not in the unicode map, "?" is returned
-// Note that CharcodeBytesToUnicode is typically more efficient.
-func (cmap *CMap) CharcodeToUnicode(code CharCode) string {
-	s, _ := cmap.CharcodeToUnicode2(code)
-	return s
-}
-
-// CharcodeToUnicode2 converts a single character code `code` to a unicode string.
-// The bool value is set to true if `code` is in the unicode map,
-func (cmap *CMap) CharcodeToUnicode2(code CharCode) (string, bool) {
+// CharcodeToUnicode converts a single character code `code` to a unicode string.
+// If `code` is not in the unicode map, "<22>" is returned.
+// NOTE: CharcodeBytesToUnicode is typically more efficient.
+func (cmap *CMap) CharcodeToUnicode(code CharCode) (string, bool) {
 	if s, ok := cmap.codeToUnicode[code]; ok {
 		return s, true
 	}
@ -264,7 +251,7 @@ func (cmap *CMap) inCodespace(code CharCode, numBytes int) bool {
 }

 // LoadCmapFromDataCID parses the in-memory cmap `data` and returns the resulting CMap.
-// It is a convenience function,
+// It is a convenience function.
 func LoadCmapFromDataCID(data []byte) (*CMap, error) {
 	return LoadCmapFromData(data, false)
 }
@ -273,12 +260,6 @@ func LoadCmapFromDataCID(data []byte) (*CMap, error) {
 // If isCID is true then it uses 1-byte encodings, otherwise it uses the codespaces in the cmap.
 //
 // 9.10.3 ToUnicode CMaps (page 293)
-// The CMap defined in the ToUnicode entry of the font dictionary shall follow the syntax for CMaps
-// • The CMap file shall contain begincodespacerange and endcodespacerange operators that are
-//   consistent with the encoding that the font uses. In particular, for a simple font, the
-//   codespace shall be one byte long.
-// • It shall use the beginbfchar, endbfchar, beginbfrange, and endbfrange operators to define the
-//    mapping from character codes to Unicode character sequences expressed in UTF-16BE encoding
 func LoadCmapFromData(data []byte, isSimple bool) (*CMap, error) {
 	common.Log.Trace("LoadCmapFromData: isSimple=%t", isSimple)

--- a/pdf/internal/cmap/cmap_parser.go
+++ b/pdf/internal/cmap/cmap_parser.go
@ -15,7 +15,6 @@ import (

 // parse parses the CMap file and loads into the CMap structure.
 func (cmap *CMap) parse() error {
-	inCmap := false
 	var prev cmapObject
 	for {
 		o, err := cmap.parseObject()
@ -26,17 +25,11 @@ func (cmap *CMap) parse() error {
 			common.Log.Debug("ERROR: parsing CMap: %v", err)
 			return err
 		}
-		// fmt.Printf("-- %#v\n", o)
-
 		switch t := o.(type) {
 		case cmapOperand:
 			op := t

 			switch op.Operand {
-			case begincmap:
-				inCmap = true
-			case endcmap:
-				inCmap = false
 			case begincodespacerange:
 				err := cmap.parseCodespaceRange()
 				if err != nil {
@ -95,13 +88,7 @@ func (cmap *CMap) parse() error {
 					return err
 				}
 			}
-		case cmapInt:

-		default:
-			if inCmap {
-				// Don't log this noise for now
-				// common.Log.Trace("Unhandled object: %#v", o)
-			}
 		}
 		prev = o
 	}
@ -119,7 +106,6 @@ func (cmap *CMap) parseName() error {
 		if err != nil {
 			return err
 		}
-		// fmt.Printf("^^ %d %#v\n", i, o)
 		switch t := o.(type) {
 		case cmapOperand:
 			switch t.Operand {
@ -159,7 +145,6 @@ func (cmap *CMap) parseType() error {
 		if err != nil {
 			return err
 		}
-		// fmt.Printf("^^ %d %#v\n", i, o)
 		switch t := o.(type) {
 		case cmapOperand:
 			switch t.Operand {
@ -190,7 +175,6 @@ func (cmap *CMap) parseVersion() error {
 		if err != nil {
 			return err
 		}
-		// fmt.Printf("^^ %d %#v\n", i, o)
 		switch t := o.(type) {
 		case cmapOperand:
 			switch t.Operand {
@ -228,9 +212,9 @@ func (cmap *CMap) parseSystemInfo() error {
 	done := false
 	systemInfo := CIDSystemInfo{}

+	// 50 is a generous but arbitrary limit to prevent an endless loop on badly formed cmap files.
 	for i := 0; i < 50 && !done; i++ {
 		o, err := cmap.parseObject()
-		// fmt.Printf("%2d: %#v\n", i, o)
 		if err != nil {
 			return err
 		}
@ -385,7 +369,6 @@ func (cmap *CMap) parseBfchar() error {
 			}
 			return err
 		}
-		// fmt.Printf("--- %#v\n", o)
 		var code CharCode

 		switch v := o.(type) {
@ -449,7 +432,6 @@ func (cmap *CMap) parseBfrange() error {
 			}
 			return err
 		}
-		// fmt.Printf("-== %#v\n", o)
 		switch v := o.(type) {
 		case cmapOperand:
 			if v.Operand == endbfrange {
--- a/pdf/internal/cmap/cmap_test.go
+++ b/pdf/internal/cmap/cmap_test.go
@ -105,13 +105,13 @@ func TestCMapParser1(t *testing.T) {
 	}

 	for k, expected := range expectedMappings {
-		if v := cmap.CharcodeToUnicode(k); v != string(expected) {
+		if v, ok := cmap.CharcodeToUnicode(k); !ok || v != string(expected) {
 			t.Errorf("incorrect mapping, expecting 0x%X ➞ 0x%X (%#v)", k, expected, v)
 			return
 		}
 	}

-	v := cmap.CharcodeToUnicode(0x99)
+	v, _ := cmap.CharcodeToUnicode(0x99)
 	if v != MissingCodeString { //!= "notdef" {
 		t.Errorf("Unmapped code, expected to map to undefined")
 		return
@ -191,7 +191,7 @@ func TestCMapParser2(t *testing.T) {
 	}

 	for k, expected := range expectedMappings {
-		if v := cmap.CharcodeToUnicode(k); v != string(expected) {
+		if v, ok := cmap.CharcodeToUnicode(k); !ok || v != string(expected) {
 			t.Errorf("incorrect mapping, expecting 0x%X ➞ 0x%X (got 0x%X)", k, expected, v)
 			return
 		}
@ -302,7 +302,7 @@ func TestCMapParser3(t *testing.T) {
 		0xd140: 0xa000,
 	}
 	for k, expected := range expectedMappings {
-		if v := cmap.CharcodeToUnicode(k); v != string(expected) {
+		if v, ok := cmap.CharcodeToUnicode(k); !ok || v != string(expected) {
 			t.Errorf("incorrect mapping: expecting 0x%02X ➞ 0x%02X (got 0x%02X)", k, expected, v)
 			return
 		}
@ -414,7 +414,7 @@ func TestCMapParser4(t *testing.T) {
 	}

 	for k, expected := range expectedMappings {
-		if v := cmap.CharcodeToUnicode(k); v != expected {
+		if v, ok := cmap.CharcodeToUnicode(k); !ok || v != expected {
 			t.Errorf("incorrect mapping, expecting 0x%04X ➞ %+q (got %+q)", k, expected, v)
 			return
 		}
--- a/pdf/model/colorspace.go
+++ b/pdf/model/colorspace.go
@ -255,6 +255,10 @@ func (this *PdfColorspaceDeviceGray) ColorFromFloats(vals []float64) (PdfColor,

 	val := vals[0]

+	if val < 0.0 || val > 1.0 {
+		common.Log.Debug("Incompatibility: Range outside [0,1]")
+	}
+
 	// Needed for ~/testdata/acl2017_hllz.pdf
 	if val < 0.0 {
 		val = 0.0
--- a/pdf/model/font.go
+++ b/pdf/model/font.go
@ -97,7 +97,7 @@ func newPdfFontFromPdfObject(fontObj core.PdfObject, allowType0 bool) (*PdfFont,
 			return nil, err
 		}
 		font.context = type0font
-	case "Type1", "Type3", "MMType1", "TrueType": // !@#$
+	case "Type1", "Type3", "MMType1", "TrueType":
 		var simplefont *pdfFontSimple
 		if std, ok := standard14Fonts[base.basefont]; ok && base.subtype == "Type1" {
 			font.context = &std
@ -182,7 +182,7 @@ func (font PdfFont) CharcodeBytesToUnicode(data []byte) (string, int, int) {
 	numMisses := 0
 	for _, code := range charcodes {
 		if font.baseFields().toUnicodeCmap != nil {
-			r, ok := font.baseFields().toUnicodeCmap.CharcodeToUnicode2(cmap.CharCode(code))
+			r, ok := font.baseFields().toUnicodeCmap.CharcodeToUnicode(cmap.CharCode(code))
 			if ok {
 				charstrings = append(charstrings, r)
 				continue
@ -237,7 +237,6 @@ func (font PdfFont) Encoder() textencoding.TextEncoder {
 }

 // SetEncoder sets the encoding for the underlying font.
-// !@#$ Is this only possible for simple fonts?
 func (font PdfFont) SetEncoder(encoder textencoding.TextEncoder) {
 	t := font.actualFont()
 	if t == nil {
@ -383,15 +382,9 @@ func newFontBaseFieldsFromPdfObject(fontObj core.PdfObject) (*core.PdfObjectDict
 		font.objectNumber = obj.ObjectNumber
 	}

-	dictObj := core.TraceToDirectObject(fontObj)
-
-	d, ok := dictObj.(*core.PdfObjectDictionary)
+	d, ok := core.GetDict(fontObj)
 	if !ok {
-		if ref, ok := dictObj.(*core.PdfObjectReference); ok {
-			common.Log.Debug("ERROR: Font is reference %s", ref)
-		} else {
-			common.Log.Debug("ERROR: Font not given by a dictionary (%T)", fontObj)
-		}
+		common.Log.Debug("ERROR: Font not given by a dictionary (%T)", fontObj)
 		return nil, nil, ErrFontNotSupported
 	}

@ -405,7 +398,7 @@ func newFontBaseFieldsFromPdfObject(fontObj core.PdfObject) (*core.PdfObjectDict
 		return nil, nil, core.ErrTypeError
 	}

-	subtype, ok := core.GetNameVal(core.TraceToDirectObject(d.Get("Subtype")))
+	subtype, ok := core.GetNameVal(d.Get("Subtype"))
 	if !ok {
 		common.Log.Debug("ERROR: Font Incompatibility. Subtype (Required) missing")
 		return nil, nil, ErrRequiredAttributeMissing
@ -417,7 +410,7 @@ func newFontBaseFieldsFromPdfObject(fontObj core.PdfObject) (*core.PdfObjectDict
 		return nil, nil, ErrFontNotSupported
 	}

-	basefont, ok := core.GetNameVal(core.TraceToDirectObject(d.Get("BaseFont")))
+	basefont, ok := core.GetNameVal(d.Get("BaseFont"))
 	if !ok {
 		common.Log.Debug("ERROR: Font Incompatibility. BaseFont (Required) missing")
 		return nil, nil, ErrRequiredAttributeMissing
@ -467,18 +460,17 @@ func toUnicodeToCmap(toUnicode core.PdfObject, font *fontCommon) (*cmap.CMap, er
 	return cm, err
 }

+// 9.8.2 Font Descriptor Flags (page 283)
 const (
-	fontFlagFixedPitch = 1 << iota
-	fontFlagSerif
-	fontFlagSymbolic
-	fontFlagScript
-	// Bit position 5 is not defined
-	fontFlagNonsymbolic = 1 << (iota + 1)
-	fontFlagItalic
-	// Bit position 8 - 16 are not defined
-	fontFlagAllCap = 1 << (iota + 10)
-	fontFlagSmallCap
-	fontFlagForceBold
+	fontFlagFixedPitch  = 0x00001
+	fontFlagSerif       = 0x00002
+	fontFlagSymbolic    = 0x00004
+	fontFlagScript      = 0x00008
+	fontFlagNonsymbolic = 0x00020
+	fontFlagItalic      = 0x00040
+	fontFlagAllCap      = 0x10000
+	fontFlagSmallCap    = 0x20000
+	fontFlagForceBold   = 0x40000
 )

 // PdfFontDescriptor specifies metrics and other attributes of a font and can refer to a FontFile
--- a/pdf/model/font_composite.go
+++ b/pdf/model/font_composite.go
@ -1,3 +1,8 @@
+/*
+ * This file is subject to the terms and conditions defined in
+ * file 'LICENSE.md', which is part of this source code package.
+ */
+
 package model

 import (
@ -170,16 +175,18 @@ func newPdfFontType0FromPdfObject(d *core.PdfObjectDictionary, base *fontCommon)
 	font := pdfFontType0FromSkeleton(base)
 	font.DescendantFont = df

-	encoderName, ok := core.GetNameVal(core.TraceToDirectObject(d.Get("Encoding")))
-	// XXX: FIXME This is not valid if encoder is not Identity-H !@#$
-	if ok /*&& encoderName == "Identity-H"*/ {
-		font.encoder = textencoding.NewIdentityTextEncoder(encoderName)
+	encoderName, ok := core.GetNameVal(d.Get("Encoding"))
+	if ok {
+		if encoderName == "Identity-H" || encoderName == "Identity-V" {
+			font.encoder = textencoding.NewIdentityTextEncoder(encoderName)
+		} else {
+			common.Log.Debug("Unhandled cmap %q", encoderName)
+		}
 	}
 	return font, nil
 }

 // pdfCIDFontType0 represents a CIDFont Type0 font dictionary.
-// XXX: This is a stub.
 type pdfCIDFontType0 struct {
 	container *core.PdfIndirectObject
 	fontCommon
@ -216,20 +223,17 @@ func (font pdfCIDFontType0) SetEncoder(encoder textencoding.TextEncoder) {

 // GetGlyphCharMetrics returns the character metrics for the specified glyph.  A bool flag is
 // returned to indicate whether or not the entry was found in the glyph to charcode mapping.
-// XXX: This is a stub.
 func (font pdfCIDFontType0) GetGlyphCharMetrics(glyph string) (fonts.CharMetrics, bool) {
 	return fonts.CharMetrics{}, true
 }

 // ToPdfObject converts the pdfCIDFontType0 to a PDF representation.
-// XXX: This is a stub.
 func (font *pdfCIDFontType0) ToPdfObject() core.PdfObject {
 	return core.MakeNull()
 }

 // newPdfCIDFontType0FromPdfObject creates a pdfCIDFontType0 object from a dictionary (either direct
 // or via indirect object). If a problem occurs with loading an error is returned.
-// XXX: This is a stub.
 func newPdfCIDFontType0FromPdfObject(d *core.PdfObjectDictionary, base *fontCommon) (*pdfCIDFontType0, error) {
 	if base.subtype != "CIDFontType0" {
 		common.Log.Debug("ERROR: Font SubType != CIDFontType0. font=%s", base)
@ -255,7 +259,7 @@ type pdfCIDFontType2 struct {
 	fontCommon

 	// These fields are specific to Type 0 fonts.
-	encoder   textencoding.TextEncoder // !@#$ In base?
+	encoder   textencoding.TextEncoder
 	ttfParser *fonts.TtfType

 	CIDSystemInfo core.PdfObject
--- a/pdf/model/font_simple.go
+++ b/pdf/model/font_simple.go
@ -1,3 +1,8 @@
+/*
+ * This file is subject to the terms and conditions defined in
+ * file 'LICENSE.md', which is part of this source code package.
+ */
+
 package model

 import (
@ -110,17 +115,13 @@ func (font pdfFontSimple) GetGlyphCharMetrics(glyph string) (fonts.CharMetrics,
 // newSimpleFontFromPdfObject creates a pdfFontSimple from dictionary `d`. Elements of `d` that
 // are already parsed are contained in `base`.
 // An error is returned if there is a problem with loading.
-// !@#$ Just return a base 14 font, if obj is a base 14 font
 //
 // The value of Encoding is subject to limitations that are described in 9.6.6, "Character Encoding".
 // • The value of BaseFont is derived differently.
 //
-// !@#$ 9.6.6.4 Encodings for TrueType Fonts (page 265)
-//      Need to get TrueType font's cmap
 func newSimpleFontFromPdfObject(d *core.PdfObjectDictionary, base *fontCommon, std14 bool) (*pdfFontSimple, error) {
 	font := pdfFontSimpleFromSkeleton(base)

-	// !@#$ Failing on ~/testdata/The-Byzantine-Generals-Problem.pdf
 	// FirstChar is not defined in ~/testdata/shamirturing.pdf
 	if !std14 {
 		obj := d.Get("FirstChar")
@ -129,31 +130,31 @@ func newSimpleFontFromPdfObject(d *core.PdfObjectDictionary, base *fontCommon, s
 		}
 		font.FirstChar = obj

-		intVal, ok := core.TraceToDirectObject(obj).(*core.PdfObjectInteger)
+		intVal, ok := core.GetIntVal(obj)
 		if !ok {
 			common.Log.Debug("ERROR: Invalid FirstChar type (%T)", obj)
 			return nil, core.ErrTypeError
 		}
-		font.firstChar = int(*intVal)
+		font.firstChar = int(intVal)

 		obj = d.Get("LastChar")
 		if obj == nil {
-			obj = core.PdfObject(core.MakeInteger(255))
+			obj = core.MakeInteger(255)
 		}
 		font.LastChar = obj
-		intVal, ok = core.TraceToDirectObject(obj).(*core.PdfObjectInteger)
+		intVal, ok = core.GetIntVal(obj)
 		if !ok {
 			common.Log.Debug("ERROR: Invalid LastChar type (%T)", obj)
 			return nil, core.ErrTypeError
 		}
-		font.lastChar = int(*intVal)
+		font.lastChar = int(intVal)

 		font.charWidths = []float64{}
 		obj = d.Get("Widths")
 		if obj != nil {
 			font.Widths = obj

-			arr, ok := core.TraceToDirectObject(obj).(*core.PdfObjectArray)
+			arr, ok := core.GetArray(obj)
 			if !ok {
 				common.Log.Debug("ERROR: Widths attribute != array (%T)", obj)
 				return nil, core.ErrTypeError
@ -186,7 +187,7 @@ func (font *pdfFontSimple) addEncoding() error {
 	var err error

 	if font.Encoding != nil {
-		// !@#$ Stop setting default encoding in getFontEncoding XXX
+		// XXX: TODO Stop setting default encoding in getFontEncoding
 		baseEncoder, differences, err = getFontEncoding(font.Encoding)
 		if err != nil {
 			common.Log.Debug("ERROR: BaseFont=%q Subtype=%q Encoding=%s (%T) err=%v", font.basefont,
@ -209,7 +210,6 @@ func (font *pdfFontSimple) addEncoding() error {
 		if descriptor != nil {
 			switch font.subtype {
 			case "Type1":
-				// XXX: !@#$ Is this the right order? Do the /Differences need to be reapplied?
 				if descriptor.fontFile != nil && descriptor.fontFile.encoder != nil {
 					common.Log.Debug("Using fontFile")
 					font.SetEncoder(descriptor.fontFile.encoder)
@ -247,8 +247,8 @@ func (font *pdfFontSimple) addEncoding() error {
 // Except for Type 3 fonts, every font program shall have a built-in encoding. Under certain
 // circumstances, a PDF font dictionary may change the encoding used with the font program to match
 // the requirements of the conforming writer generating the text being shown.
-func getFontEncoding(obj core.PdfObject) (string, map[byte]string, error) {
-	baseName := "StandardEncoding"
+func getFontEncoding(obj core.PdfObject) (baseName string, differences map[byte]string, err error) {
+	baseName = "StandardEncoding"

 	if obj == nil {
 		// Fall back to StandardEncoding
@ -259,9 +259,9 @@ func getFontEncoding(obj core.PdfObject) (string, map[byte]string, error) {
 	case *core.PdfObjectName:
 		return string(*encoding), nil, nil
 	case *core.PdfObjectDictionary:
-		typ, ok := core.GetNameVal(core.TraceToDirectObject(encoding.Get("Type")))
+		typ, ok := core.GetNameVal(encoding.Get("Type"))
 		if ok && typ == "Encoding" {
-			base, ok := core.GetNameVal(core.TraceToDirectObject(encoding.Get("BaseEncoding")))
+			base, ok := core.GetNameVal(encoding.Get("BaseEncoding"))
 			if ok {
 				baseName = base
 			}
@ -272,7 +272,7 @@ func getFontEncoding(obj core.PdfObject) (string, map[byte]string, error) {
 			return "", nil, core.ErrTypeError
 		}

-		differences, err := textencoding.FromFontDifferences(diffList)
+		differences, err = textencoding.FromFontDifferences(diffList)
 		return baseName, differences, err
 	default:
 		common.Log.Debug("ERROR: Encoding not a name or dict (%T) %s", obj, obj.String())
--- a/pdf/model/font_test.go
+++ b/pdf/model/font_test.go
@ -1,3 +1,8 @@
+/*
+ * This file is subject to the terms and conditions defined in
+ * file 'LICENSE.md', which is part of this source code package.
+ */
+
 package model_test

 import (
@ -14,7 +19,7 @@ import (
 )

 func init() {
-	// common.SetLogger(common.NewConsoleLogger(common.LogLevelDebug))
+	common.SetLogger(common.NewConsoleLogger(common.LogLevelDebug))
 }

 var simpleFontDicts = []string{
--- a/pdf/model/fontfile.go
+++ b/pdf/model/fontfile.go
@ -1,3 +1,17 @@
+/*
+ * This file is subject to the terms and conditions defined in
+ * file 'LICENSE.md', which is part of this source code package.
+ *
+
+ /*
+  * A font file is a stream containing a Type 1 font program. It appears in PDF files as a
+  * /FontFile entry in a /FontDescriptor dictionary.
+  *
+  * 9.9 Embedded Font Programs (page 289)
+  *
+  * TODO: Add Type1C support
+*/
+
 package model

 import (
@ -13,13 +27,16 @@ import (
 	"github.com/unidoc/unidoc/pdf/model/textencoding"
 )

+// fontFile represents a font file.
+// Currently this is just the identifying information and the text encoder created from the font
+// file's encoding section.
 type fontFile struct {
 	name    string
 	subtype string
 	encoder textencoding.TextEncoder
-	// binary  []byte
 }

+// String retuns a human readable description of `fontfile`.
 func (fontfile *fontFile) String() string {
 	encoding := "[None]"
 	if fontfile.encoder != nil {
@ -47,7 +64,7 @@ func newFontFileFromPdfObject(obj core.PdfObject) (*fontFile, error) {
 		return nil, err
 	}

-	subtype, ok := core.GetNameVal(core.TraceToDirectObject(d.Get("Subtype")))
+	subtype, ok := core.GetNameVal(d.Get("Subtype"))
 	if !ok {
 		fontfile.subtype = subtype
 		if subtype == "Type1C" {
@ -57,8 +74,9 @@ func newFontFileFromPdfObject(obj core.PdfObject) (*fontFile, error) {
 		}
 	}

-	length1 := int(*(core.TraceToDirectObject(d.Get("Length1")).(*core.PdfObjectInteger)))
-	length2 := int(*(core.TraceToDirectObject(d.Get("Length2")).(*core.PdfObjectInteger)))
+	length1, _ := core.GetIntVal(d.Get("Length1"))
+	length2, _ := core.GetIntVal(d.Get("Length2"))
+
 	if length1 > len(data) {
 		length1 = len(data)
 	}
@ -95,19 +113,14 @@ func (fontfile *fontFile) loadFromSegments(segment1, segment2 []byte) error {
 	if len(segment2) == 0 {
 		return nil
 	}
-	// err = fontfile.parseEexecPart(segment2)
-	// if err != nil {
-	// 	common.Log.Debug("err=%v", err)
-	// 	return err
-	// }
-
 	common.Log.Trace("fontfile=%s", fontfile)
 	return nil
 }

 // parseAsciiPart parses the ASCII part of the FontFile.
 func (fontfile *fontFile) parseAsciiPart(data []byte) error {
-	common.Log.Trace("parseAsciiPart: %d ", len(data))
+
+	// Uncomment these lines to see the contents of the font file. For debugging.
 	// fmt.Println("~~~~~~~~~~~~~~~~~~~~~~~^^^~~~~~~~~~~~~~~~~~~~~~~~")
 	// fmt.Printf("data=%s\n", string(data))
 	// fmt.Println("~~~~~~~~~~~~~~~~~~~~~~~!!!~~~~~~~~~~~~~~~~~~~~~~~")
@ -133,15 +146,6 @@ func (fontfile *fontFile) parseAsciiPart(data []byte) error {
 		return ErrRequiredAttributeMissing
 	}

-	// encodingName, ok := keyValues["Encoding"]
-	// !@#$ I am not sure why we don't do this
-	// if ok  {
-	// 	encoder, err := textencoding.NewSimpleTextEncoder(encodingName, nil)
-	// 	if err != nil {
-	// 		return err
-	// 	}
-	// 	fontfile.encoder = encoder
-	// }
 	if encodingSection != "" {
 		encodings, err := getEncodings(encodingSection)
 		if err != nil {
@ -149,7 +153,7 @@ func (fontfile *fontFile) parseAsciiPart(data []byte) error {
 		}
 		encoder, err := textencoding.NewCustomSimpleTextEncoder(encodings, nil)
 		if err != nil {
-			// XXX: !@#$ We need to fix all these errors
+			// XXX: Logging an error because we need to fix all these misses.
 			common.Log.Error("UNKNOWN GLYPH: err=%v", err)
 			return nil
 		}
@ -158,23 +162,6 @@ func (fontfile *fontFile) parseAsciiPart(data []byte) error {
 	return nil
 }

-// // parseEexecPart parses the binary encrypted part of the FontFile.
-// func (fontfile *fontFile) parseEexecPart(data []byte) error {
-// 	// Sometimes, fonts use  hex format
-// 	if !isBinary(data) {
-// 		decoded, err := hex.DecodeString(string(data))
-// 		if err != nil {
-// 			return err
-// 		}
-// 		data = decoded
-// 	}
-// 	decoded := decodeEexec(data)
-// 	fmt.Println(":::::::::::::::::::::<<>>:::::::::::::::::::::")
-// 	fmt.Printf("%s\n", string(decoded))
-// 	fmt.Println(":::::::::::::::::::::<><>:::::::::::::::::::::")
-// 	return nil
-// }
-
 var (
 	reDictBegin   = regexp.MustCompile(`\d+ dict\s+(dup\s+)?begin`)
 	reKeyVal      = regexp.MustCompile(`^\s*/(\S+?)\s+(.+?)\s+def\s*$`)
@ -216,12 +203,11 @@ func getAsciiSections(data []byte) (keySection, encodingSection string, err erro
 	return
 }

-// ~/testdata/invoice61781040.pdf has \r line endings
+// ~/testdata/private/invoice61781040.pdf has \r line endings
 var reEndline = regexp.MustCompile(`[\n\r]+`)

 // getKeyValues returns the map encoded in `data`.
 func getKeyValues(data string) map[string]string {
-	// lines := strings.Split(data, "\n")
 	lines := reEndline.Split(data, -1)
 	keyValues := map[string]string{}
 	for _, line := range lines {
@ -250,10 +236,6 @@ func getEncodings(data string) (map[uint16]string, error) {
 			common.Log.Debug("ERROR: Bad encoding line. %q", line)
 			return nil, core.ErrTypeError
 		}
-		// if !textencoding.KnownGlyph(glyph) {
-		// 	common.Log.Debug("ERROR: Unknown glyph %q. line=%q", glyph, line)
-		// 	return nil, ErrTypeCheck
-		// }
 		keyValues[uint16(code)] = glyph
 	}
 	common.Log.Trace("getEncodings: keyValues=%#v", keyValues)
--- a/pdf/model/fonts/ttfparser.go
+++ b/pdf/model/fonts/ttfparser.go
@ -115,6 +115,7 @@ func NewFontFile2FromPdfObject(obj core.PdfObject) (rec TtfType, err error) {
 		return
 	}

+	// Uncomment these lines to see the contents of the font file. For debugging.
 	// fmt.Println("===============&&&&===============")
 	// fmt.Printf("%#q", string(data))
 	// fmt.Println("===============####===============")
@ -148,7 +149,6 @@ func (t *ttfParser) Parse() (TtfRec TtfType, err error) {
 		err = errors.New("fonts based on PostScript outlines are not supported")
 		return
 	}
-	// XXX: !@#$ Not sure what to do here. Have seen version="true"
 	if version != "\x00\x01\x00\x00" {
 		common.Log.Debug("ERROR: Unrecognized TrueType file format. version=%q", version)
 	}
@ -416,7 +416,7 @@ func (t *ttfParser) ParseCmap() (err error) {
 		return
 	}
 	common.Log.Debug("ParseCmap")
-	/* version := */ t.ReadUShort()
+	t.ReadUShort() // version is ignored.
 	numTables := int(t.ReadUShort())
 	offset10 := int64(0)
 	offset31 := int64(0)
@ -428,7 +428,6 @@ func (t *ttfParser) ParseCmap() (err error) {
 			// (3,1) subtable. Windows Unicode.
 			offset31 = offset
 		}
-		//fmt.Printf("(%d,%d) subtable @ %d\n", platformID, encodingID, offset)
 	}

 	// Latin font support based on (3,1) table encoding.
@ -440,9 +439,7 @@ func (t *ttfParser) ParseCmap() (err error) {
 	}

 	// Many non-Latin fonts (including asian fonts) use subtable (1,0).
-
 	if offset10 != 0 {
-		// fmt.Printf("Offset10: %d\n", offset10)
 		err = t.parseCmapVersion(offset10)
 		if err != nil {
 			return
@ -578,18 +575,16 @@ func (t *ttfParser) ParsePost() (err error) {
 	if err = t.Seek("post"); err != nil {
 		return
 	}
-	//versionUpper := t.ReadShort()
-	//versionFraction := t.ReadUShort()

 	formatType := t.Read32Fixed()
 	t.rec.ItalicAngle = t.Read32Fixed()
 	t.rec.UnderlinePosition = t.ReadShort()
 	t.rec.UnderlineThickness = t.ReadShort()
 	t.rec.IsFixedPitch = t.ReadULong() != 0
-	/*minMemType42 := */ t.ReadULong()
-	/*maxMemType42 := */ t.ReadULong()
-	/*mimMemType1 := */ t.ReadULong()
-	/*maxMemType1 := */ t.ReadULong()
+	t.ReadULong() // minMemType42 ignored.
+	t.ReadULong() // maxMemType42 ignored.
+	t.ReadULong() // mimMemType1 ignored.
+	t.ReadULong() // maxMemType1 ignored.

 	common.Log.Trace("ParsePost: formatType=%f", formatType)

@ -628,13 +623,11 @@ func (t *ttfParser) ParsePost() (err error) {
 			} else if index >= len(macGlyphNames) && index <= 32767 {
 				t.rec.GlyphNames[i] = nameArray[index-len(macGlyphNames)]
 			} else {
-				// PDFBOX-808: Index numbers between 32768 and 65535 are
-				// reserved for future use, so we should just ignore them
 				t.rec.GlyphNames[i] = ".undefined"
 			}
 		}
 	case 2.5:
-		glyphNameIndex := make([]int, t.numGlyphs) // !@#$ Check that this is parsed first
+		glyphNameIndex := make([]int, t.numGlyphs)
 		for i := 0; i < len(glyphNameIndex); i++ {
 			offset := int(t.ReadSByte())
 			glyphNameIndex[i] = i + 1 + offset
@ -645,7 +638,7 @@ func (t *ttfParser) ParsePost() (err error) {
 			t.rec.GlyphNames[i] = name
 		}
 	case 3.0:
-		// no postscript information is provided.
+		// no PostScript information is provided.
 		common.Log.Debug("No PostScript name information is provided for the font.")
 	default:
 		common.Log.Debug("ERROR: Unknown formatType=%f", formatType)
@ -710,10 +703,13 @@ func (t *ttfParser) Seek(tag string) error {
 	return nil
 }

+// Skip moves the file point n bytes forward.
 func (t *ttfParser) Skip(n int) {
 	t.f.Seek(int64(n), os.SEEK_CUR)
 }

+// ReadStr reads `length` bytes from the file and returns them as a string, or an error if there was
+// a problem.
 func (t *ttfParser) ReadStr(length int) (str string, err error) {
 	var n int
 	buf := make([]byte, length)
@ -729,31 +725,38 @@ func (t *ttfParser) ReadStr(length int) (str string, err error) {
 	return
 }

+// ReadByte reads a byte and returns it as unsigned.
 func (t *ttfParser) ReadByte() (val uint8) {
 	binary.Read(t.f, binary.BigEndian, &val)
 	return
 }

+// ReadSByte reads a byte and returns it as signed.
 func (t *ttfParser) ReadSByte() (val int8) {
 	binary.Read(t.f, binary.BigEndian, &val)
 	return
 }

+// ReadUShort reads 2 bytes and returns them as a big endian unsigned 16 bit integer.
 func (t *ttfParser) ReadUShort() (val uint16) {
 	binary.Read(t.f, binary.BigEndian, &val)
 	return
 }

+// ReadShort reads 2 bytes and returns them as a big endian signed 16 bit integer.
 func (t *ttfParser) ReadShort() (val int16) {
 	binary.Read(t.f, binary.BigEndian, &val)
 	return
 }

+// ReadULong reads 4 bytes and returns them as a big endian unsigned 32 bit integer.
 func (t *ttfParser) ReadULong() (val uint32) {
 	binary.Read(t.f, binary.BigEndian, &val)
 	return
 }

+// ReadULong reads 4 bytes and returns them as a float, the first 2 bytes for the whole number and
+// the second 2 bytes for the fraction.
 func (t *ttfParser) Read32Fixed() float64 {
 	whole := float64(t.ReadUShort())
 	frac := float64(t.ReadUShort()) / 65536.0
--- a/pdf/model/fuzz_test.go
+++ b/pdf/model/fuzz_test.go
@ -1,3 +1,8 @@
+/*
+ * This file is subject to the terms and conditions defined in
+ * file 'LICENSE.md', which is part of this source code package.
+ */
+
 package model

 import (
--- a/pdf/model/textencoding/cmap.go
+++ b/pdf/model/textencoding/cmap.go
@ -1,3 +1,8 @@
+/*
+ * This file is subject to the terms and conditions defined in
+ * file 'LICENSE.md', which is part of this source code package.
+ */
+
 package textencoding

 import "github.com/unidoc/unidoc/pdf/core"
--- a/pdf/model/textencoding/cmap_test.go
+++ b/pdf/model/textencoding/cmap_test.go
@ -1,3 +1,8 @@
+/*
+ * This file is subject to the terms and conditions defined in
+ * file 'LICENSE.md', which is part of this source code package.
+ */
+
 package textencoding

 import "testing"
--- a/pdf/model/textencoding/encoder.go
+++ b/pdf/model/textencoding/encoder.go
@ -7,7 +7,7 @@ package textencoding

 import (
 	"github.com/unidoc/unidoc/common"
-	. "github.com/unidoc/unidoc/pdf/core"
+	"github.com/unidoc/unidoc/pdf/core"
 )

 type TextEncoder interface {
@ -44,7 +44,7 @@ type TextEncoder interface {
 	GlyphToRune(glyph string) (rune, bool)

 	// ToPdfObject returns a PDF Object that represents the encoding.
-	ToPdfObject() PdfObject
+	ToPdfObject() core.PdfObject
 }

 // Convenience functions
--- a/pdf/model/textencoding/glyphlist/utils/encoding-list/encoding-list.go
+++ b/pdf/model/textencoding/glyphlist/utils/encoding-list/encoding-list.go
@ -63,8 +63,6 @@ func charcodeToGlyphListPath(filename string) error {

 		line = strings.Trim(line, " \r\n")

-		//fmt.Printf("%s\n", line)
-
 		parts := strings.Split(line, " ")
 		for _, part := range parts {
 			index++
@ -99,8 +97,6 @@ func glyphToCharcodeListPath(filename string) error {

 		line = strings.Trim(line, " \r\n")

-		//fmt.Printf("%s\n", line)
-
 		parts := strings.Split(line, " ")
 		for _, part := range parts {
 			index++
--- a/pdf/model/textencoding/glyphlist/utils/glyphparser/glyphparser.go
+++ b/pdf/model/textencoding/glyphlist/utils/glyphparser/glyphparser.go
@ -237,7 +237,6 @@ func loadGlyphlist(filename string) ([]string, error) {
 			if part == "notdef" {
 				continue
 			}
-			//fmt.Printf("%d: \"%s\",\n", index, part)
 			glyphs = append(glyphs, part)
 		}
 	}
--- a/pdf/model/textencoding/glyphs_glyphlist.go
+++ b/pdf/model/textencoding/glyphs_glyphlist.go
@ -17,12 +17,6 @@ import (
 // MissingCodeRune is the rune returned when there is no matching glyph. It was previously '?'.
 const MissingCodeRune = '\ufffd' // <20>

-// GlyphToRune returns true if `glyph` is in our GlyphToRune mapping.
-func KnownGlyph(glyph string) bool {
-	_, ok := GlyphToRune(glyph)
-	return ok
-}
-
 // GlyphToRune returns the rune corresponding to glyph `glyph` if there is one.
 // XXX: TODO: Can we return a string here? e.g. When we are extracting text, we want to get "ffi"
 //            rather than 'ﬃ'. We only need a glyph ➞ rune map when we need to convert back to
--- a/pdf/model/textencoding/simple.go
+++ b/pdf/model/textencoding/simple.go
@ -198,7 +198,7 @@ func (se *SimpleEncoder) makeEncoder() {
 	codeToGlyph := map[uint16]string{}
 	glyphToCode := map[string]uint16{}
 	for code, r := range codeToRune {
-		glyph := glyphlistRuneToGlyphMap[r] // !@#$ Build out this map
+		glyph := glyphlistRuneToGlyphMap[r]
 		codeToGlyph[code] = glyph
 		glyphToCode[glyph] = code
 		if glyph == "" {
@ -207,9 +207,11 @@ func (se *SimpleEncoder) makeEncoder() {
 	}
 	se.codeToGlyph = codeToGlyph
 	se.glyphToCode = glyphToCode
-	se.codeToRune = codeToRune // XXX: !@#$ Make this a string
+	se.codeToRune = codeToRune
 }

+// FromFontDifferences converts `diffList`, a /Differences array from an /Encoding object to a map
+// representing character code to glyph mappings.
 func FromFontDifferences(diffList []core.PdfObject) (map[byte]string, error) {
 	differences := map[byte]string{}
 	var n byte
@ -229,6 +231,8 @@ func FromFontDifferences(diffList []core.PdfObject) (map[byte]string, error) {
 	return differences, nil
 }

+// ToFontDifferences converts `differences`, a map representing character code to glyph mappings,
+// to a /Differences array for an /Encoding object.
 func ToFontDifferences(differences map[byte]string) []core.PdfObject {
 	if len(differences) == 0 {
 		return []core.PdfObject{}
@ -255,6 +259,7 @@ func ToFontDifferences(differences map[byte]string) []core.PdfObject {
 	return diffList
 }

+// simpleEncodings is a map of the standard 8 bit character encodings.
 var simpleEncodings = map[string]map[uint16]rune{
 	"MacExpertEncoding": map[uint16]rune{
 		0x20: '\u0020', //     "space"