Add some comments

2025-05-13 19:29:10 +08:00 · 2020-05-25 00:54:49 +00:00 · 2020-05-25 00:54:49 +00:00 · e46c3fb426
commit e46c3fb426
parent 11dcb12637
2 changed files with 14 additions and 3 deletions
--- a/internal/textencoding/identity.go
+++ b/internal/textencoding/identity.go
@ -13,7 +13,12 @@ import (
 	"github.com/unidoc/unipdf/v3/core"
 )

-// IdentityEncoder represents an 2-byte identity encoding
+// IdentityEncoder represents an 2-byte identity encoding.
+// NOTE: In many cases this is just used to encode/decode to glyph index and does not have a unicode
+//  meaning, except via the ToUnicode maps.
+// TODO: The use of runes as indicators for glyph indices and not-utf8 runes is not good and confusing.
+//  Might be better to combine the Identity encoder with a ToUnicode map and keep track of the actual
+//  runes and character codes, CMaps together.
 type IdentityEncoder struct {
 	baseName string

@ -57,6 +62,7 @@ func (enc *IdentityEncoder) Decode(raw []byte) string {

 // RuneToCharcode converts rune `r` to a PDF character code.
 // The bool return flag is true if there was a match, and false otherwise.
+// TODO: Here the `r` is an actual rune.
 func (enc *IdentityEncoder) RuneToCharcode(r rune) (CharCode, bool) {
 	if enc.registeredMap == nil {
 		enc.registeredMap = map[rune]struct{}{}
@ -68,10 +74,13 @@ func (enc *IdentityEncoder) RuneToCharcode(r rune) (CharCode, bool) {

 // CharcodeToRune converts PDF character code `code` to a rune.
 // The bool return flag is true if there was a match, and false otherwise.
+// TODO: Here the `r` is not necessarily an actual rune but a glyph index (unless both).
 func (enc *IdentityEncoder) CharcodeToRune(code CharCode) (rune, bool) {
 	if enc.registeredMap == nil {
 		enc.registeredMap = map[rune]struct{}{}
 	}
+
+	// TODO: The rune(code) is confusing and is not an actual utf8 rune.
 	enc.registeredMap[rune(code)] = struct{}{}
 	return rune(code), true
 }
--- a/model/font.go
+++ b/model/font.go
@ -50,6 +50,7 @@ func (font *PdfFont) SubsetRegistered() error {
 	case *pdfFontType0:
 		err := t.subsetRegistered()
 		if err != nil {
+			common.Log.Debug("Subset error: %v", err)
 			return err
 		}
 		if t.container != nil {
@ -401,6 +402,7 @@ func (font *PdfFont) BytesToCharcodes(data []byte) []textencoding.CharCode {

 	charcodes := make([]textencoding.CharCode, 0, len(data)+len(data)%2)
 	if font.baseFields().isCIDFont() {
+		// Identity only?
 		if len(data) == 1 {
 			data = []byte{0, data[0]}
 		}
@ -413,6 +415,7 @@ func (font *PdfFont) BytesToCharcodes(data []byte) []textencoding.CharCode {
 			charcodes = append(charcodes, textencoding.CharCode(b))
 		}
 	} else {
+		// Simple font: byte -> charcode.
 		for _, b := range data {
 			charcodes = append(charcodes, textencoding.CharCode(b))
 		}
@ -745,8 +748,7 @@ func (base fontCommon) isCIDFont() bool {
 // newFontBaseFieldsFromPdfObject returns `fontObj` as a dictionary the common fields from that
 // dictionary in the fontCommon return.  If there is a problem an error is returned.
 // The fontCommon is the group of fields common to all PDF fonts.
-func newFontBaseFieldsFromPdfObject(fontObj core.PdfObject) (*core.PdfObjectDictionary, *fontCommon,
-	error) {
+func newFontBaseFieldsFromPdfObject(fontObj core.PdfObject) (*core.PdfObjectDictionary, *fontCommon, error) {
 	font := &fontCommon{}

 	if obj, ok := fontObj.(*core.PdfIndirectObject); ok {