Merge pull request #372 from gunnsth/release/v3.8.0

Prepare unipdf release v3.8.0
2025-04-26 13:48:55 +08:00 · 2020-06-16 08:35:52 +00:00 · 2020-06-16 08:35:52 +00:00 · 8ab0b6ff45
commit 8ab0b6ff45
parent f99c0cd58f deb563b581
17 changed files with 542 additions and 305 deletions
--- a/annotator/field_appearance.go
+++ b/annotator/field_appearance.go
@ -13,6 +13,7 @@ import (

 	"github.com/unidoc/unipdf/v3/common"
 	"github.com/unidoc/unipdf/v3/contentstream"
+	"github.com/unidoc/unipdf/v3/contentstream/draw"
 	"github.com/unidoc/unipdf/v3/core"
 	"github.com/unidoc/unipdf/v3/internal/textencoding"
 	"github.com/unidoc/unipdf/v3/model"
@ -35,6 +36,7 @@ type FieldAppearance struct {
 type AppearanceStyle struct {
 	// How much of Rect height to fill when autosizing text.
 	AutoFontSizeFraction float64
+
 	// CheckmarkRune is a rune used for check mark in checkboxes (for ZapfDingbats font).
 	CheckmarkRune rune

@ -51,6 +53,47 @@ type AppearanceStyle struct {

 	// Allow field MK appearance characteristics to override style settings.
 	AllowMK bool
+
+	// Fonts holds appearance styles for fonts.
+	Fonts *AppearanceFontStyle
+}
+
+// AppearanceFontStyle defines font style characteristics for form fields,
+// used in the filling/flattening process.
+type AppearanceFontStyle struct {
+	// Fallback represents a global font fallback, used for fields which do
+	// not specify a font in their default appearance (DA). The fallback is
+	// also used if there is a font specified in the DA, but it is not
+	// found in the AcroForm resources (DR).
+	Fallback *AppearanceFont
+
+	// FieldFallbacks defines font fallbacks for specific fields. The map keys
+	// represent the names of the fields (which can be specified by their
+	// partial or full names). Specific field fallback fonts take precedence
+	// over the global font fallback.
+	FieldFallbacks map[string]*AppearanceFont
+
+	// ForceReplace forces the replacement of fonts in the filling/flattening
+	// process, even if the default appearance (DA) specify a valid font.
+	// If no fallback font is provided, setting this field has no effect.
+	ForceReplace bool
+}
+
+// AppearanceFont represents a font used for generating the appearance of a
+// field in the filling/flattening process.
+type AppearanceFont struct {
+	// Name represents the name of the font which will be added to the
+	// AcroForm resources (DR).
+	Name string
+
+	// Font represents the actual font used for the field appearance.
+	Font *model.PdfFont
+
+	// Size represents the size of the font used for the field appearance.
+	// If size is 0, a default font size will be used.
+	// The default font size is calculated using the available annotation
+	// height and the AutoFontSizeFraction of the AppearanceStyle.
+	Size float64
 }

 type quadding int
@ -96,6 +139,9 @@ func (fa FieldAppearance) GenerateAppearanceDict(form *model.PdfAcroForm, field
 		common.Log.Trace("Already populated - ignoring")
 		return appDict, nil
 	}
+	if form.DR == nil {
+		form.DR = model.NewPdfPageResources()
+	}

 	// Generate the appearance.
 	switch t := field.GetContext().(type) {
@ -172,26 +218,26 @@ func genFieldTextAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFieldT
 	if err != nil {
 		return nil, err
 	}
-	width := rect.Width()
-	height := rect.Height()
+	width, height := rect.Width(), rect.Height()

+	var rotation float64
 	if mkDict, has := core.GetDict(wa.MK); has {
 		bsDict, _ := core.GetDict(wa.BS)
 		err := style.applyAppearanceCharacteristics(mkDict, bsDict, nil)
 		if err != nil {
 			return nil, err
 		}
+		rotation, _ = core.GetNumberAsFloat(mkDict.Get("R"))
 	}

 	// Get and process the default appearance string (DA) operands.
-	da := getDA(ftxt.PdfField)
-	csp := contentstream.NewContentStreamParser(da)
-	daOps, err := csp.Parse()
+	daOps, err := contentstream.NewContentStreamParser(getDA(ftxt.PdfField)).Parse()
 	if err != nil {
 		return nil, err
 	}

 	cc := contentstream.NewContentCreator()
+
 	if style.BorderSize > 0 {
 		drawRect(cc, style, width, height)
 	}
@ -205,62 +251,44 @@ func genFieldTextAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFieldT

 	cc.Add_BMC("Tx")
 	cc.Add_q()
+
+	bboxWidth, bboxHeight := width, height
+	if rotation != 0 {
+		// Calculate bounding box before rotation.
+		revRotation := -rotation
+		bbox := draw.Path{Points: []draw.Point{
+			draw.NewPoint(0, 0).Rotate(revRotation),
+			draw.NewPoint(width, 0).Rotate(revRotation),
+			draw.NewPoint(0, height).Rotate(revRotation),
+			draw.NewPoint(width, height).Rotate(revRotation),
+		}}.GetBoundingBox()
+
+		// Update width and height, as the appearance is generated based on
+		// the bounding of the annotation with no rotation.
+		width = bbox.Width
+		height = bbox.Height
+
+		// Apply rotation.
+		cc.RotateDeg(rotation)
+		cc.Translate(bbox.X, bbox.Y)
+	}
+
 	// Graphic state changes.
 	cc.Add_BT()

-	// Add DA operands.
-	var fontsize float64
-	var fontname *core.PdfObjectName
-	var font *model.PdfFont
-	autosize := true
-
-	fontsizeDef := height * style.AutoFontSizeFraction
-	for _, op := range *daOps {
-		// When Tf specified with font size is 0, it means we should set on our own based on the Rect (autosize).
-		if op.Operand == "Tf" && len(op.Params) == 2 {
-			if name, ok := core.GetName(op.Params[0]); ok {
-				fontname = name
-			}
-			num, err := core.GetNumberAsFloat(op.Params[1])
-			if err == nil {
-				fontsize = num
-			} else {
-				common.Log.Debug("ERROR invalid font size: %v", op.Params[1])
-			}
-			if fontsize == 0 {
-				// Use default if zero.
-				fontsize = fontsizeDef
-			} else {
-				// Disable autosize when font size (>0) explicitly specified.
-				autosize = false
-			}
-			// Skip over (set fontsize in code below).
-			continue
-		}
-		cc.AddOperand(*op)
-	}
-
-	// If the font name is not set or not found in the form resources, use
-	// the default fallback font (Helvetica).
-	var fontObj core.PdfObject
-	if dr != nil && fontname != nil {
-		if fObj, has := dr.GetFontByName(*fontname); has {
-			if font, err = model.NewPdfFontFromPdfObject(fObj); err != nil {
-				common.Log.Debug("ERROR: could not load appearance font: %v", err)
+	// Process DA operands.
+	apFont, hasTf, err := style.processDA(ftxt.PdfField, daOps, dr, resources, cc)
+	if err != nil {
 		return nil, err
 	}
-			fontObj = fObj
+
+	font := apFont.Font
+	fontsize := apFont.Size
+	fontname := core.MakeName(apFont.Name)
+	autosize := fontsize == 0
+	if autosize && hasTf {
+		fontsize = height * style.AutoFontSizeFraction
 	}
-	}
-	if fontObj == nil {
-		// Font not found. Reverting to Helvetica with name `Helv`.
-		if font, err = model.NewStandard14Font("Helvetica"); err != nil {
-			return nil, err
-		}
-		fontname = core.MakeName("Helv")
-		fontObj = font.ToPdfObject()
-	}
-	resources.SetFontByName(*fontname, fontObj)

 	encoder := font.Encoder()
 	if encoder == nil {
@ -461,7 +489,7 @@ func genFieldTextAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFieldT

 	xform := model.NewXObjectForm()
 	xform.Resources = resources
-	xform.BBox = core.MakeArrayFromFloats([]float64{0, 0, width, height})
+	xform.BBox = core.MakeArrayFromFloats([]float64{0, 0, bboxWidth, bboxHeight})
 	xform.SetContentStream(cc.Bytes(), defStreamEncoder())

 	apDict := core.MakeDict()
@ -480,16 +508,11 @@ func genFieldTextCombAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFi
 	if !ok {
 		return nil, errors.New("invalid Rect")
 	}
-	rect, err := array.ToFloat64Array()
+	rect, err := model.NewPdfRectangle(*array)
 	if err != nil {
 		return nil, err
 	}
-	if len(rect) != 4 {
-		return nil, errors.New("len(Rect) != 4")
-	}
-
-	width := rect[2] - rect[0]
-	height := rect[3] - rect[1]
+	width, height := rect.Width(), rect.Height()

 	if mkDict, has := core.GetDict(wa.MK); has {
 		bsDict, _ := core.GetDict(wa.BS)
@ -510,9 +533,7 @@ func genFieldTextCombAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFi
 	boxwidth := float64(width) / float64(maxLen)

 	// Get and process the default appearance string (DA) operands.
-	da := getDA(ftxt.PdfField)
-	csp := contentstream.NewContentStreamParser(da)
-	daOps, err := csp.Parse()
+	daOps, err := contentstream.NewContentStreamParser(getDA(ftxt.PdfField)).Parse()
 	if err != nil {
 		return nil, err
 	}
@ -529,68 +550,28 @@ func genFieldTextCombAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFi
 	}
 	cc.Add_BMC("Tx")
 	cc.Add_q()
+
 	// Graphic state changes.
 	cc.Add_BT()

-	// Add DA operands.
-	var fontsize float64
-	var fontname *core.PdfObjectName
-	var font *model.PdfFont
-	autosize := true
-
-	fontsizeDef := height * style.AutoFontSizeFraction
-	for _, op := range *daOps {
-		// If TF specified and font size is 0, it means we should set on our own based on the Rect.
-		if op.Operand == "Tf" && len(op.Params) == 2 {
-			if name, ok := core.GetName(op.Params[0]); ok {
-				fontname = name
-			}
-			num, err := core.GetNumberAsFloat(op.Params[1])
-			if err == nil {
-				fontsize = num
-			} else {
-				common.Log.Debug("ERROR invalid font size: %v", op.Params[1])
-			}
-			if fontsize == 0 {
-				// Use default if zero.
-				fontsize = fontsizeDef
-			} else {
-				// Disable autosize when font size (>0) explicitly specified.
-				autosize = false
-			}
-			// Skip over (set fontsize in code below).
-			continue
-		}
-		cc.AddOperand(*op)
-	}
-
-	// If fontname not set need to make a new font or use one defined in the resources.
-	// e.g. Helv commonly used for Helvetica.
-	if fontname == nil || dr == nil {
-		// Font not set, revert to Helvetica with name "Helv".
-		fontname = core.MakeName("Helv")
-		helv, err := model.NewStandard14Font("Helvetica")
+	// Process DA operands.
+	apFont, hasTf, err := style.processDA(ftxt.PdfField, daOps, dr, resources, cc)
 	if err != nil {
 		return nil, err
 	}
-		font = helv
-		resources.SetFontByName(*fontname, helv.ToPdfObject())
-		cc.Add_Tf(*fontname, fontsizeDef)
-	} else {
-		fontobj, has := dr.GetFontByName(*fontname)
-		if !has {
-			return nil, errors.New("font not in DR")
-		}
-		font, err = model.NewPdfFontFromPdfObject(fontobj)
-		if err != nil {
-			common.Log.Debug("ERROR loading default appearance font: %v", err)
-			return nil, err
-		}
-		resources.SetFontByName(*fontname, fontobj)
+
+	font := apFont.Font
+	fontname := core.MakeName(apFont.Name)
+	fontsize := apFont.Size
+	autosize := fontsize == 0
+	if autosize && hasTf {
+		fontsize = height * style.AutoFontSizeFraction
 	}
+
 	encoder := font.Encoder()
 	if encoder == nil {
-		common.Log.Debug("ERROR - Encoder is nil - can expect bad results")
+		common.Log.Debug("WARN: font encoder is nil. Assuming identity encoder. Output may be incorrect.")
+		encoder = textencoding.NewIdentityTextEncoder("Identity-H")
 	}

 	var text string
@ -711,26 +692,19 @@ func genFieldTextCombAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFi
 // genFieldCheckboxAppearance generates an appearance dictionary for a widget annotation `wa` referenced by
 // a button field `fbtn` with form resources `dr` (DR).
 func genFieldCheckboxAppearance(wa *model.PdfAnnotationWidget, fbtn *model.PdfFieldButton, dr *model.PdfPageResources, style AppearanceStyle) (*core.PdfObjectDictionary, error) {
-	// TODO(dennwc): unused parameters
-
 	// Get bounding Rect.
 	array, ok := core.GetArray(wa.Rect)
 	if !ok {
 		return nil, errors.New("invalid Rect")
 	}
-	rect, err := array.ToFloat64Array()
+	rect, err := model.NewPdfRectangle(*array)
 	if err != nil {
 		return nil, err
 	}
-	if len(rect) != 4 {
-		return nil, errors.New("len(Rect) != 4")
-	}
+	width, height := rect.Width(), rect.Height()

 	common.Log.Debug("Checkbox, wa BS: %v", wa.BS)

-	width := rect[2] - rect[0]
-	height := rect[3] - rect[1]
-
 	zapfdb, err := model.NewStandard14Font("ZapfDingbats")
 	if err != nil {
 		return nil, err
@ -825,26 +799,16 @@ func genFieldComboboxAppearance(form *model.PdfAcroForm, wa *model.PdfAnnotation
 	if !ok {
 		return nil, errors.New("invalid Rect")
 	}
-	rect, err := array.ToFloat64Array()
+	rect, err := model.NewPdfRectangle(*array)
 	if err != nil {
 		return nil, err
 	}
-	if len(rect) != 4 {
-		return nil, errors.New("len(Rect) != 4")
-	}
+	width, height := rect.Width(), rect.Height()

 	common.Log.Debug("Choice, wa BS: %v", wa.BS)

-	width := rect[2] - rect[0]
-	height := rect[3] - rect[1]
-
 	// Get and process the default appearance string (DA) operands.
-	da := core.MakeString("")
-	if form.DA != nil {
-		da, _ = core.GetString(form.DA)
-	}
-	csp := contentstream.NewContentStreamParser(da.String())
-	daOps, err := csp.Parse()
+	daOps, err := contentstream.NewContentStreamParser(getDA(fch.PdfField)).Parse()
 	if err != nil {
 		return nil, err
 	}
@ -857,22 +821,25 @@ func genFieldComboboxAppearance(form *model.PdfAcroForm, wa *model.PdfAnnotation
 		}
 	}

+	// See section 12.7.4.4 "Choice Fields" (pp. 444-446 PDF32000_2008).
 	dchoiceapp := core.MakeDict()
 	for _, optObj := range fch.Opt.Elements() {
+		if optArr, ok := core.GetArray(optObj); ok && optArr.Len() == 2 {
+			optObj = optArr.Get(1)
+		}
+
 		var optstr string
 		if opt, ok := core.GetString(optObj); ok {
-			optstr = opt.String()
-		} else {
-			if opt, ok := core.GetName(optObj); ok {
+			optstr = opt.Decoded()
+		} else if opt, ok := core.GetName(optObj); ok {
 			optstr = opt.String()
 		} else {
 			common.Log.Debug("ERROR: Opt not a name/string - %T", optObj)
 			return nil, errors.New("not a name/string")
 		}
-		}

 		if len(optstr) > 0 {
-			xform, err := makeComboboxTextXObjForm(width, height, optstr, style, daOps, form.DR)
+			xform, err := makeComboboxTextXObjForm(fch.PdfField, width, height, optstr, style, daOps, form.DR)
 			if err != nil {
 				return nil, err
 			}
@ -888,7 +855,9 @@ func genFieldComboboxAppearance(form *model.PdfAcroForm, wa *model.PdfAnnotation
 }

 // Make a text-based XObj Form.
-func makeComboboxTextXObjForm(width, height float64, text string, style AppearanceStyle, daOps *contentstream.ContentStreamOperations, dr *model.PdfPageResources) (*model.XObjectForm, error) {
+func makeComboboxTextXObjForm(field *model.PdfField, width, height float64,
+	text string, style AppearanceStyle, daOps *contentstream.ContentStreamOperations,
+	dr *model.PdfPageResources) (*model.XObjectForm, error) {
 	resources := model.NewPdfPageResources()

 	cc := contentstream.NewContentCreator()
@ -906,63 +875,25 @@ func makeComboboxTextXObjForm(width, height float64, text string, style Appearan
 	// Graphic state changes.
 	cc.Add_BT()

-	// Add DA operands.
-	var fontsize float64
-	var fontname *core.PdfObjectName
-	var font *model.PdfFont
-	var err error
-	autosize := true
-
-	fontsizeDef := height * style.AutoFontSizeFraction
-	for _, op := range *daOps {
-		// When Tf specified with font size is 0, it means we should set on our own based on the Rect (autosize).
-		if op.Operand == "Tf" && len(op.Params) == 2 {
-			if name, ok := core.GetName(op.Params[0]); ok {
-				fontname = name
-			}
-			num, err := core.GetNumberAsFloat(op.Params[1])
-			if err == nil {
-				fontsize = num
-			} else {
-				common.Log.Debug("ERROR invalid font size: %v", op.Params[1])
-			}
-			if fontsize == 0 {
-				// Use default if zero.
-				fontsize = fontsizeDef
-			} else {
-				// Disable autosize when font size (>0) explicitly specified.
-				autosize = false
-			}
-			// Skip over (set fontsize in code below).
-			continue
-		}
-		cc.AddOperand(*op)
-	}
-
-	// If fontname not set need to make a new font or use one defined in the resources.
-	// e.g. Helv commonly used for Helvetica.
-	if fontname == nil || dr == nil {
-		// Font not set, revert to Helvetica with name "Helv".
-		fontname = core.MakeName("Helv")
-		helv, err := model.NewStandard14Font("Helvetica")
+	// Process DA operands.
+	apFont, hasTf, err := style.processDA(field, daOps, dr, resources, cc)
 	if err != nil {
 		return nil, err
 	}
-		font = helv
-		resources.SetFontByName(*fontname, helv.ToPdfObject())
-	} else {
-		fontobj, has := dr.GetFontByName(*fontname)
-		if !has {
-			return nil, errors.New("font not in DR")
-		}
-		font, err = model.NewPdfFontFromPdfObject(fontobj)
-		if err != nil {
-			common.Log.Debug("ERROR loading default appearance font: %v", err)
-			return nil, err
-		}
-		resources.SetFontByName(*fontname, fontobj)
+
+	font := apFont.Font
+	fontsize := apFont.Size
+	fontname := core.MakeName(apFont.Name)
+	autosize := fontsize == 0
+	if autosize && hasTf {
+		fontsize = height * style.AutoFontSizeFraction
 	}
+
 	encoder := font.Encoder()
+	if encoder == nil {
+		common.Log.Debug("WARN: font encoder is nil. Assuming identity encoder. Output may be incorrect.")
+		encoder = textencoding.NewIdentityTextEncoder("Identity-H")
+	}

 	// If no text, no appearance needed.
 	if len(text) == 0 {
@ -1136,6 +1067,105 @@ func (style *AppearanceStyle) applyAppearanceCharacteristics(mkDict *core.PdfObj
 	return nil
 }

+// processDA adds the operands found in the field default appearance stream to
+// the provided content stream creator. It also provides a fallback font, based
+// on the configuration of the AppearanceStyle, if no valid font is specified
+// in the default appearance. The method returns the font to be used when
+// generating the appearance of the field and a boolean value specifying if
+// the DA stream contains any Tf operands.
+func (style *AppearanceStyle) processDA(field *model.PdfField,
+	daOps *contentstream.ContentStreamOperations, dr, resources *model.PdfPageResources,
+	cc *contentstream.ContentCreator) (*AppearanceFont, bool, error) {
+	// Check for fallback fonts.
+	var fallbackFont *AppearanceFont
+	var forceReplace bool
+	if style.Fonts != nil {
+		// Use global fallback, if one is specified.
+		if style.Fonts.Fallback != nil {
+			fallbackFont = style.Fonts.Fallback
+		}
+
+		// Use field fallback, if one is specified.
+		if fieldFallbacks := style.Fonts.FieldFallbacks; fieldFallbacks != nil {
+			if fbFont, ok := fieldFallbacks[field.PartialName()]; ok {
+				fallbackFont = fbFont
+			} else if fullName, err := field.FullName(); err == nil {
+				if fbFont, ok := fieldFallbacks[fullName]; ok {
+					fallbackFont = fbFont
+				}
+			}
+		}
+
+		forceReplace = style.Fonts.ForceReplace
+	}
+
+	// Iterate over the DA operands and extract the font, if specified.
+	var fontName string
+	var fontSize float64
+	var hasTf bool
+	if daOps != nil {
+		for _, op := range *daOps {
+			if op.Operand == "Tf" && len(op.Params) == 2 {
+				if name, ok := core.GetNameVal(op.Params[0]); ok {
+					fontName = name
+				}
+				if size, err := core.GetNumberAsFloat(op.Params[1]); err == nil {
+					fontSize = size
+				}
+				hasTf = true
+				continue
+			}
+			cc.AddOperand(*op)
+		}
+	}
+
+	var apFont *AppearanceFont
+	var apFontObj core.PdfObject
+	if forceReplace && fallbackFont != nil {
+		apFont = fallbackFont
+	} else {
+		// Check if font name was found in the DA stream and search it in the resources.
+		if dr != nil && fontName != "" {
+			if obj, ok := dr.GetFontByName(*core.MakeName(fontName)); ok {
+				if font, err := model.NewPdfFontFromPdfObject(obj); err == nil {
+					apFontObj = obj
+					apFont = &AppearanceFont{Name: fontName, Font: font, Size: fontSize}
+				} else {
+					common.Log.Debug("ERROR: could not load appearance font: %v", err)
+				}
+			}
+		}
+
+		// Use fallback font, if one was specified.
+		if apFont == nil && fallbackFont != nil {
+			apFont = fallbackFont
+		}
+
+		// Use default fallback font (Helvetica).
+		if apFont == nil {
+			font, err := model.NewStandard14Font("Helvetica")
+			if err != nil {
+				return nil, false, err
+			}
+			apFont = &AppearanceFont{Name: "Helv", Font: font, Size: fontSize}
+		}
+	}
+
+	// Add appearance font to the form resources (DR).
+	apFontName := *core.MakeName(apFont.Name)
+	if apFontObj == nil {
+		apFontObj = apFont.Font.ToPdfObject()
+	}
+	if dr != nil && !dr.HasFontByName(apFontName) {
+		dr.SetFontByName(apFontName, apFontObj)
+	}
+	if resources != nil && !resources.HasFontByName(apFontName) {
+		resources.SetFontByName(apFontName, apFontObj)
+	}
+
+	return apFont, hasTf, nil
+}
+
 // WrapContentStream ensures that the entire content stream for a `page` is wrapped within q ... Q operands.
 // Ensures that following operands that are added are not affected by additional operands that are added.
 // Implements interface model.ContentStreamWrapper.
--- a/common/logging.go
+++ b/common/logging.go
@ -221,7 +221,7 @@ func (l WriterLogger) logToWriter(f io.Writer, prefix string, format string, arg
 }

 func logToWriter(f io.Writer, prefix string, format string, args ...interface{}) {
-	_, file, line, ok := runtime.Caller(2)
+	_, file, line, ok := runtime.Caller(3)
 	if !ok {
 		file = "???"
 		line = 0
--- a/common/version.go
+++ b/common/version.go
@ -11,12 +11,12 @@ import (
 )

 const releaseYear = 2020
-const releaseMonth = 5
-const releaseDay = 25
-const releaseHour = 23
-const releaseMin = 35
+const releaseMonth = 6
+const releaseDay = 15
+const releaseHour = 20
+const releaseMin = 15

 // Version holds version information, when bumping this make sure to bump the released at stamp also.
-const Version = "3.7.1"
+const Version = "3.8.0"

 var ReleasedAt = time.Date(releaseYear, releaseMonth, releaseDay, releaseHour, releaseMin, 0, 0, time.UTC)
--- a/extractor/text.go
+++ b/extractor/text.go
@ -702,7 +702,7 @@ func (to *textObject) reset() {
 func (to *textObject) renderText(data []byte) error {
 	font := to.getCurrentFont()
 	charcodes := font.BytesToCharcodes(data)
-	runes, numChars, numMisses := font.CharcodesToUnicodeWithStats(charcodes)
+	texts, numChars, numMisses := font.CharcodesToStrings(charcodes)
 	if numMisses > 0 {
 		common.Log.Debug("renderText: numChars=%d numMisses=%d", numChars, numMisses)
 	}
@ -721,18 +721,18 @@ func (to *textObject) renderText(data []byte) error {
 		spaceMetrics, _ = model.DefaultFont().GetRuneMetrics(' ')
 	}
 	spaceWidth := spaceMetrics.Wx * glyphTextRatio
-	common.Log.Trace("spaceWidth=%.2f text=%q font=%s fontSize=%.1f", spaceWidth, runes, font, tfs)
+	common.Log.Trace("spaceWidth=%.2f text=%q font=%s fontSize=%.2f", spaceWidth, texts, font, tfs)

 	stateMatrix := transform.NewMatrix(
 		tfs*th, 0,
 		0, tfs,
 		0, state.trise)

-	common.Log.Trace("renderText: %d codes=%+v runes=%q", len(charcodes), charcodes, runes)
+	common.Log.Trace("renderText: %d codes=%+v runes=%q", len(charcodes), charcodes, len(texts))

-	for i, r := range runes {
-		// TODO(peterwilliams97): Need to find and fix cases where this happens.
-		if r == '\x00' {
+	for i, text := range texts {
+		r := []rune(text)
+		if len(r) == 1 && r[0] == '\x00' {
 			continue
 		}

@ -746,14 +746,14 @@ func (to *textObject) renderText(data []byte) error {

 		// w is the unscaled movement at the end of a word.
 		w := 0.0
-		if r == ' ' {
+		if string(r) == " " {
 			w = state.tw
 		}

 		m, ok := font.GetCharMetrics(code)
 		if !ok {
 			common.Log.Debug("ERROR: No metric for code=%d r=0x%04x=%+q %s", code, r, r, font)
-			return errors.New("no char metrics")
+			return fmt.Errorf("no char metrics: font=%s code=%d", font.String(), code)
 		}

 		// c is the character size in unscaled text units.
@ -774,7 +774,7 @@ func (to *textObject) renderText(data []byte) error {
 		common.Log.Trace("m=%s c=%+v t0=%+v td0=%s trm0=%s", m, c, t0, td0, td0.Mult(to.tm).Mult(to.gs.CTM))

 		mark := to.newTextMark(
-			string(r),
+			text,
 			trm,
 			translation(to.gs.CTM.Mult(to.tm).Mult(td0)),
 			math.Abs(spaceWidth*trm.ScalingFactorX()),
--- a/extractor/text_test.go
+++ b/extractor/text_test.go
@ -314,6 +314,11 @@ var fileExtractionTests = []struct {
 				`The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",`},
 		},
 	},
+	{filename: "Saudi.pdf",
+		pageTerms: map[int][]string{
+			10: []string{"الله"},
+		},
+	},
 	// TODO(peterwilliams97): Reinstate these 2 tests when diacritic combination is fixed.
 	// {filename: "Ito_Formula.pdf",
 	// 	pageTerms: map[int][]string{
--- a/fjson/fielddata_test.go
+++ b/fjson/fielddata_test.go
@ -148,6 +148,7 @@ func TestJSONExtractAndFill(t *testing.T) {
 	fieldDataExp, err := LoadFromJSONFile("./testdata/advancedform.json")
 	require.NoError(t, err)
 	jsonDataExp, err := fieldDataExp.JSON()
+	require.NoError(t, err)

 	// Check templates for equality.
 	require.Equal(t, jsonDataExp, jsonData)
@ -184,6 +185,7 @@ func TestJSONExtractAndFill(t *testing.T) {
 	fieldDataExp, err = LoadFromJSON(bytes.NewReader(jsonBytes))
 	require.NoError(t, err)
 	jsonDataExp, err = fieldDataExp.JSON()
+	require.NoError(t, err)

 	// Fill test PDF form fields and write to buffer.
 	f, err := os.Open(inputFilePath)
@ -212,6 +214,47 @@ func TestJSONExtractAndFill(t *testing.T) {
 	fieldData, err = LoadFromPDF(bytes.NewReader(buf.Bytes()))
 	require.NoError(t, err)
 	jsonData, err = fieldData.JSON()
+	require.NoError(t, err)
+
+	// Check field data for equality.
+	require.Equal(t, jsonDataExp, jsonData)
+}
+
+func TestJSONFillAndExtract(t *testing.T) {
+	// Read JSON fill data.
+	fieldDataExp, err := LoadFromJSONFile("./testdata/mixedfields.json")
+	require.NoError(t, err)
+	jsonDataExp, err := fieldDataExp.JSON()
+	require.NoError(t, err)
+
+	// Fill test PDF form fields and write to buffer.
+	f, err := os.Open("./testdata/mixedfields.pdf")
+	require.NoError(t, err)
+	defer f.Close()
+
+	reader, err := model.NewPdfReader(f)
+	require.NoError(t, err)
+
+	err = reader.AcroForm.Fill(fieldDataExp)
+	require.NoError(t, err)
+
+	var buf bytes.Buffer
+	writer := model.NewPdfWriter()
+	for i := range reader.PageList {
+		err := writer.AddPage(reader.PageList[i])
+		require.NoError(t, err)
+	}
+
+	err = writer.SetForms(reader.AcroForm)
+	require.NoError(t, err)
+	err = writer.Write(&buf)
+	require.NoError(t, err)
+
+	// Load field data from buffer.
+	fieldData, err := LoadFromPDF(bytes.NewReader(buf.Bytes()))
+	require.NoError(t, err)
+	jsonData, err := fieldData.JSON()
+	require.NoError(t, err)

 	// Check field data for equality.
 	require.Equal(t, jsonDataExp, jsonData)
--- a/fjson/testdata/mixedfields.json
+++ b/fjson/testdata/mixedfields.json
@ -0,0 +1,94 @@
+[
+    {
+        "name": "Given Name Text Box",
+        "value": "Jane"
+    },
+    {
+        "name": "Family Name Text Box",
+        "value": "Doe"
+    },
+    {
+        "name": "House nr Text Box",
+        "value": "100"
+    },
+    {
+        "name": "Address 2 Text Box",
+        "value": "Generic Avenue"
+    },
+    {
+        "name": "Postcode Text Box",
+        "value": "11122"
+    },
+    {
+        "name": "Country Combo Box",
+        "value": "France"
+    },
+    {
+        "name": "Height Formatted Field",
+        "value": "175"
+    },
+    {
+        "name": "City Text Box",
+        "value": "Paris"
+    },
+    {
+        "name": "Driving License Check Box",
+        "value": "Yes",
+        "options": [
+            "Yes",
+            "Off"
+        ]
+    },
+    {
+        "name": "Favourite Colour List Box",
+        "value": "Yellow"
+    },
+    {
+        "name": "Language 1 Check Box",
+        "value": "Yes",
+        "options": [
+            "Yes",
+            "Off"
+        ]
+    },
+    {
+        "name": "Language 2 Check Box",
+        "value": "Off",
+        "options": [
+            "Yes",
+            "Off"
+        ]
+    },
+    {
+        "name": "Language 3 Check Box",
+        "value": "Yes",
+        "options": [
+            "Yes",
+            "Off"
+        ]
+    },
+    {
+        "name": "Language 4 Check Box",
+        "value": "Off",
+        "options": [
+            "Yes",
+            "Off"
+        ]
+    },
+    {
+        "name": "Language 5 Check Box",
+        "value": "Yes",
+        "options": [
+            "Yes",
+            "Off"
+        ]
+    },
+    {
+        "name": "Gender List Box",
+        "value": "Woman"
+    },
+    {
+        "name": "Address 1 Text Box",
+        "value": "Generic Street"
+    }
+]
--- a/fjson/testdata/mixedfields.pdf
+++ b/fjson/testdata/mixedfields.pdf
--- a/internal/cmap/cmap.go
+++ b/internal/cmap/cmap.go
@ -21,6 +21,9 @@ const (

 	// MissingCodeRune replaces runes that can't be decoded. '\ufffd' = <20>. Was '?'.
 	MissingCodeRune = '\ufffd' // <20>
+
+	// MissingCodeString replaces strings that can't be decoded.
+	MissingCodeString = string(MissingCodeRune)
 )

 // CharCode is a character code or Unicode
@ -41,7 +44,7 @@ type charRange struct {
 type fbRange struct {
 	code0 CharCode
 	code1 CharCode
-	r0    rune
+	r0    string
 }

 // CIDSystemInfo contains information for identifying the character collection
@ -106,8 +109,8 @@ type CMap struct {
 	cidToCode map[CharCode]CharCode // CID -> charcode

 	// Used by ctype 2 CMaps.
-	codeToUnicode map[CharCode]rune // CID -> Unicode
-	unicodeToCode map[rune]CharCode // Unicode -> CID
+	codeToUnicode map[CharCode]string // CID -> Unicode string
+	unicodeToCode map[string]CharCode // Unicode rune -> CID

 	// cached contains the raw CMap data. It is used by the Bytes method in
 	// order to avoid generating the data for every call.
@ -116,8 +119,13 @@ type CMap struct {
 	cached []byte
 }

-// NewToUnicodeCMap returns an identity CMap with codeToUnicode matching the `codeToUnicode` arg.
-func NewToUnicodeCMap(codeToUnicode map[CharCode]rune) *CMap {
+// NewToUnicodeCMap returns an identity CMap with codeToUnicode matching the `codeToRune` arg.
+func NewToUnicodeCMap(codeToRune map[CharCode]rune) *CMap {
+	codeToUnicode := make(map[CharCode]string, len(codeToRune))
+	for code, r := range codeToRune {
+		codeToUnicode[code] = string(r)
+	}
+
 	cmap := &CMap{
 		name:  "Adobe-Identity-UCS",
 		ctype: 2,
@ -128,13 +136,14 @@ func NewToUnicodeCMap(codeToUnicode map[CharCode]rune) *CMap {
 			Supplement: 0,
 		},
 		codespaces:    []Codespace{{Low: 0, High: 0xffff}},
-		codeToCID:     make(map[CharCode]CharCode),
-		cidToCode:     make(map[CharCode]CharCode),
 		codeToUnicode: codeToUnicode,
-		unicodeToCode: make(map[rune]CharCode),
+		unicodeToCode: make(map[string]CharCode, len(codeToRune)),
+		codeToCID:     make(map[CharCode]CharCode, len(codeToRune)),
+		cidToCode:     make(map[CharCode]CharCode, len(codeToRune)),
 	}

 	cmap.computeInverseMappings()
+
 	return cmap
 }

@ -148,8 +157,8 @@ func newCMap(isSimple bool) *CMap {
 		nbits:         nbits,
 		codeToCID:     make(map[CharCode]CharCode),
 		cidToCode:     make(map[CharCode]CharCode),
-		codeToUnicode: make(map[CharCode]rune),
-		unicodeToCode: make(map[rune]CharCode),
+		codeToUnicode: make(map[CharCode]string),
+		unicodeToCode: make(map[string]CharCode),
 	}
 }

@ -254,9 +263,9 @@ func (cmap *CMap) computeInverseMappings() {
 	}

 	// Generate Unicode -> CID map.
-	for cid, r := range cmap.codeToUnicode {
-		if c, ok := cmap.unicodeToCode[r]; !ok || (ok && c > cid) {
-			cmap.unicodeToCode[r] = cid
+	for cid, s := range cmap.codeToUnicode {
+		if c, ok := cmap.unicodeToCode[s]; !ok || (ok && c > cid) {
+			cmap.unicodeToCode[s] = cid
 		}
 	}

@ -277,19 +286,18 @@ func (cmap *CMap) CharcodeBytesToUnicode(data []byte) (string, int) {
 		return "", 0
 	}

-	var (
-		parts   []rune
-		missing []CharCode
-	)
-	for _, code := range charcodes {
+	parts := make([]string, len(charcodes))
+	var missing []CharCode
+	for i, code := range charcodes {
 		s, ok := cmap.codeToUnicode[code]
 		if !ok {
 			missing = append(missing, code)
-			s = MissingCodeRune
+			s = MissingCodeString
 		}
-		parts = append(parts, s)
+		parts[i] = s
 	}
-	unicode := string(parts)
+	unicode := strings.Join(parts, "")
+
 	if len(missing) > 0 {
 		common.Log.Debug("ERROR: CharcodeBytesToUnicode. Not in map.\n"+
 			"\tdata=[% 02x]=%#q\n"+
@ -305,17 +313,17 @@ func (cmap *CMap) CharcodeBytesToUnicode(data []byte) (string, int) {
 // CharcodeToUnicode converts a single character code `code` to a unicode string.
 // If `code` is not in the unicode map, '<27>' is returned.
 // NOTE: CharcodeBytesToUnicode is typically more efficient.
-func (cmap *CMap) CharcodeToUnicode(code CharCode) (rune, bool) {
+func (cmap *CMap) CharcodeToUnicode(code CharCode) (string, bool) {
 	if s, ok := cmap.codeToUnicode[code]; ok {
 		return s, true
 	}
-	return MissingCodeRune, false
+	return MissingCodeString, false
 }

-// RuneToCID maps the specified rune to a character identifier. If the provided
-// rune has no available mapping, the second return value is false.
-func (cmap *CMap) RuneToCID(r rune) (CharCode, bool) {
-	cid, ok := cmap.unicodeToCode[r]
+// StringToCID maps the specified string to a character identifier. If the provided
+// string has no available mapping, the bool return value is false.
+func (cmap *CMap) StringToCID(s string) (CharCode, bool) {
+	cid, ok := cmap.unicodeToCode[s]
 	return cid, ok
 }

@ -453,7 +461,7 @@ func (cmap *CMap) toBfData() string {
 	}

 	// codes is a sorted list of the codeToUnicode keys.
-	var codes []CharCode
+	codes := make([]CharCode, 0, len(cmap.codeToUnicode))
 	for code := range cmap.codeToUnicode {
 		codes = append(codes, code)
 	}
@ -473,7 +481,7 @@ func (cmap *CMap) toBfData() string {
 	prevRune := cmap.codeToUnicode[codes[0]]
 	for _, c := range codes[1:] {
 		currRune := cmap.codeToUnicode[c]
-		if c == currCharRange.code1+1 && currRune == prevRune+1 {
+		if c == currCharRange.code1+1 && lastRune(currRune) == lastRune(prevRune)+1 {
 			currCharRange.code1 = c
 		} else {
 			charRanges = append(charRanges, currCharRange)
@ -508,8 +516,8 @@ func (cmap *CMap) toBfData() string {
 			lines = append(lines, fmt.Sprintf("%d beginbfchar", n))
 			for j := 0; j < n; j++ {
 				code := fbChars[i*maxBfEntries+j]
-				r := cmap.codeToUnicode[code]
-				lines = append(lines, fmt.Sprintf("<%04x> <%04x>", code, r))
+				s := cmap.codeToUnicode[code]
+				lines = append(lines, fmt.Sprintf("<%04x> %s", code, hexCode(s)))
 			}
 			lines = append(lines, "endbfchar")
 		}
@ -521,8 +529,8 @@ func (cmap *CMap) toBfData() string {
 			lines = append(lines, fmt.Sprintf("%d beginbfrange", n))
 			for j := 0; j < n; j++ {
 				rng := fbRanges[i*maxBfEntries+j]
-				r := rng.r0
-				lines = append(lines, fmt.Sprintf("<%04x><%04x> <%04x>", rng.code0, rng.code1, r))
+				lines = append(lines, fmt.Sprintf("<%04x><%04x> %s",
+					rng.code0, rng.code1, hexCode(rng.r0)))
 			}
 			lines = append(lines, "endbfrange")
 		}
@ -530,6 +538,22 @@ func (cmap *CMap) toBfData() string {
 	return strings.Join(lines, "\n")
 }

+// lastRune returns the last rune in `s`.
+func lastRune(s string) rune {
+	runes := []rune(s)
+	return runes[len(runes)-1]
+}
+
+// hexCode return the CMap hex code for `s`.
+func hexCode(s string) string {
+	runes := []rune(s)
+	codes := make([]string, len(runes))
+	for i, r := range runes {
+		codes[i] = fmt.Sprintf("%04x", r)
+	}
+	return fmt.Sprintf("<%s>", strings.Join(codes, ""))
+}
+
 const (
 	maxBfEntries = 100 // Maximum number of entries in a bfchar or bfrange section.
 	cmapHeader   = `
--- a/internal/cmap/cmap_parser.go
+++ b/internal/cmap/cmap_parser.go
@ -105,7 +105,8 @@ func (cmap *CMap) parse() error {
 func (cmap *CMap) parseName() error {
 	name := ""
 	done := false
-	for i := 0; i < 10 && !done; i++ {
+	// NOTE(peterwilliams97): We need up to 20 iterations of this loop for some PDFs I have seen.
+	for i := 0; i < 20 && !done; i++ {
 		o, err := cmap.parseObject()
 		if err != nil {
 			return err
@ -141,7 +142,6 @@ func (cmap *CMap) parseName() error {
 // parseType parses a cmap type and adds it to `cmap`.
 // cmap names are defined like this: /CMapType 1 def
 func (cmap *CMap) parseType() error {
-
 	ctype := 0
 	done := false
 	for i := 0; i < 3 && !done; i++ {
@ -171,7 +171,6 @@ func (cmap *CMap) parseType() error {
 // We don't need the version. We do this to eat up the version code in the cmap definition
 // to reduce unhandled parse object warnings.
 func (cmap *CMap) parseVersion() error {
-
 	version := ""
 	done := false
 	for i := 0; i < 3 && !done; i++ {
@ -471,7 +470,7 @@ func (cmap *CMap) parseBfchar() error {
 			}
 			return err
 		}
-		var target rune
+		var target []rune
 		switch v := o.(type) {
 		case cmapOperand:
 			if v.Operand == endbfchar {
@ -480,16 +479,16 @@ func (cmap *CMap) parseBfchar() error {
 			common.Log.Debug("ERROR: Unexpected operand. %#v", v)
 			return ErrBadCMap
 		case cmapHexString:
-			target = hexToRune(v)
+			target = hexToRunes(v)
 		case cmapName:
 			common.Log.Debug("ERROR: Unexpected name. %#v", v)
-			target = MissingCodeRune
+			target = []rune{MissingCodeRune}
 		default:
 			common.Log.Debug("ERROR: Unexpected type. %#v", o)
 			return ErrBadCMap
 		}

-		cmap.codeToUnicode[code] = target
+		cmap.codeToUnicode[code] = string(target)
 	}

 	return nil
@ -563,16 +562,17 @@ func (cmap *CMap) parseBfrange() error {
 				if !ok {
 					return errors.New("non-hex string in array")
 				}
-				r := hexToRune(hexs)
-				cmap.codeToUnicode[code] = r
+				runes := hexToRunes(hexs)
+				cmap.codeToUnicode[code] = string(runes)
 			}

 		case cmapHexString:
 			// <codeFrom> <codeTo> <dst>, maps [from,to] to [dst,dst+to-from].
-			r := hexToRune(v)
+			runes := hexToRunes(v)
+			n := len(runes)
 			for code := srcCodeFrom; code <= srcCodeTo; code++ {
-				cmap.codeToUnicode[code] = r
-				r++
+				cmap.codeToUnicode[code] = string(runes)
+				runes[n-1]++
 			}
 		default:
 			common.Log.Debug("ERROR: Unexpected type %T", o)
--- a/internal/cmap/cmap_test.go
+++ b/internal/cmap/cmap_test.go
@ -104,14 +104,14 @@ func TestCMapParser1(t *testing.T) {
 	}

 	for k, expected := range expectedMappings {
-		if v, ok := cmap.CharcodeToUnicode(k); !ok || v != expected {
+		if v, ok := cmap.CharcodeToUnicode(k); !ok || v != string(expected) {
 			t.Errorf("incorrect mapping, expecting 0x%X ➞ 0x%X (%#v)", k, expected, v)
 			return
 		}
 	}

 	v, _ := cmap.CharcodeToUnicode(0x99)
-	if v != MissingCodeRune { //!= "notdef" {
+	if v != MissingCodeString { //!= "notdef" {
 		t.Errorf("Unmapped code, expected to map to undefined")
 		return
 	}
@ -188,7 +188,7 @@ func TestCMapParser2(t *testing.T) {
 	}

 	for k, expected := range expectedMappings {
-		if v, ok := cmap.CharcodeToUnicode(k); !ok || v != expected {
+		if v, ok := cmap.CharcodeToUnicode(k); !ok || v != string(expected) {
 			t.Errorf("incorrect mapping, expecting 0x%X ➞ 0x%X (got 0x%X)", k, expected, v)
 			return
 		}
@ -297,7 +297,7 @@ func TestCMapParser3(t *testing.T) {
 		0xd140: 0xa000,
 	}
 	for k, expected := range expectedMappings {
-		if v, ok := cmap.CharcodeToUnicode(k); !ok || v != expected {
+		if v, ok := cmap.CharcodeToUnicode(k); !ok || v != string(expected) {
 			t.Errorf("incorrect mapping: expecting 0x%02X ➞ 0x%02X (got 0x%02X)", k, expected, v)
 			return
 		}
@ -407,7 +407,7 @@ func TestCMapParser4(t *testing.T) {
 	}

 	for k, expected := range expectedMappings {
-		if v, ok := cmap.CharcodeToUnicode(k); !ok || v != expected {
+		if v, ok := cmap.CharcodeToUnicode(k); !ok || v != string(expected) {
 			t.Errorf("incorrect mapping, expecting 0x%04X ➞ %+q (got %+q)", k, expected, v)
 			return
 		}
@ -520,6 +520,7 @@ var (
 		0x017b: 'Ż',
 		0x017d: 'Ž',
 	}
+
 	codeToUnicode3 = map[CharCode]rune{ // 93 entries
 		0x0124: 'Ĥ',
 		0x0125: 'ĥ',
@ -695,7 +696,7 @@ func checkCmapWriteRead(t *testing.T, codeToUnicode map[CharCode]rune) {
 		}
 		u0 := codeToUnicode[code]
 		u := cmap.codeToUnicode[code]
-		if u != u0 {
+		if u != string(u0) {
 			t.Errorf("Unicode mismatch: i=%d code0=0x%04x expected=%q test=%q", i, code, u0, u)
 			return
 		}
--- a/internal/textencoding/cmap.go
+++ b/internal/textencoding/cmap.go
@ -48,8 +48,8 @@ func (enc CMapEncoder) Decode(raw []byte) string {
 		if codes, ok := enc.codeToCID.BytesToCharcodes(raw); ok {
 			var buf bytes.Buffer
 			for _, code := range codes {
-				r, _ := enc.CharcodeToRune(CharCode(code))
-				buf.WriteRune(r)
+				s, _ := enc.charcodeToString(CharCode(code))
+				buf.WriteString(s)
 			}

 			return buf.String()
@ -67,7 +67,7 @@ func (enc CMapEncoder) RuneToCharcode(r rune) (CharCode, bool) {
 	}

 	// Map rune to CID.
-	cid, ok := enc.cidToUnicode.RuneToCID(r)
+	cid, ok := enc.cidToUnicode.StringToCID(string(r))
 	if !ok {
 		return 0, false
 	}
@ -87,8 +87,13 @@ func (enc CMapEncoder) RuneToCharcode(r rune) (CharCode, bool) {
 // CharcodeToRune converts PDF character code `code` to a rune.
 // The bool return flag is true if there was a match, and false otherwise.
 func (enc CMapEncoder) CharcodeToRune(code CharCode) (rune, bool) {
+	s, ok := enc.charcodeToString(code)
+	return ([]rune(s))[0], ok
+}
+
+func (enc CMapEncoder) charcodeToString(code CharCode) (string, bool) {
 	if enc.cidToUnicode == nil {
-		return MissingCodeRune, false
+		return MissingCodeString, false
 	}

 	// Map charcode to CID. If charcode to CID CMap is nil, assume Identity encoding.
@ -96,7 +101,7 @@ func (enc CMapEncoder) CharcodeToRune(code CharCode) (rune, bool) {
 	if enc.codeToCID != nil {
 		var ok bool
 		if cid, ok = enc.codeToCID.CharcodeToCID(cmap.CharCode(code)); !ok {
-			return MissingCodeRune, false
+			return MissingCodeString, false
 		}
 	}

--- a/internal/textencoding/glyphs_glyphlist.go
+++ b/internal/textencoding/glyphs_glyphlist.go
@ -18,7 +18,13 @@ import (
 )

 // MissingCodeRune is the rune returned when there is no matching glyph. It was previously '?'.
-const MissingCodeRune = '\ufffd' // <20>
+const (
+	// MissingCodeRune replaces runes that can't be decoded. .
+	MissingCodeRune = '\ufffd' // <20>
+
+	// MissingCodeString replaces strings that can't be decoded.
+	MissingCodeString = string(MissingCodeRune)
+)

 // GlyphToRune returns the rune corresponding to glyph `glyph` if there is one.
 // TODO: Can we return a string here? e.g. When we are extracting text, we want to get "ffi"
--- a/model/flatten.go
+++ b/model/flatten.go
@ -85,10 +85,11 @@ func (r *PdfReader) FlattenFields(allannots bool, appgen FieldAppearanceGenerato
 		var annots []*PdfAnnotation

 		// Wrap the content streams.
-		err := appgen.WrapContentStream(page)
-		if err != nil {
+		if appgen != nil {
+			if err := appgen.WrapContentStream(page); err != nil {
 				return err
 			}
+		}

 		annotations, err := page.GetAnnotations()
 		if err != nil {
--- a/model/font.go
+++ b/model/font.go
@ -420,16 +420,26 @@ func (font *PdfFont) BytesToCharcodes(data []byte) []textencoding.CharCode {
 	return charcodes
 }

-// CharcodesToUnicodeWithStats is identical to CharcodesToUnicode except returns more statistical
+// CharcodesToUnicodeWithStats is identical to CharcodesToUnicode except it returns more statistical
 // information about hits and misses from the reverse mapping process.
+// NOTE: The number of runes returned may be greater than the number of charcodes.
+// TODO(peterwilliams97): Deprecate in v4 and use only CharcodesToStrings()
 func (font *PdfFont) CharcodesToUnicodeWithStats(charcodes []textencoding.CharCode) (runelist []rune, numHits, numMisses int) {
+	texts, numHits, numMisses := font.CharcodesToStrings(charcodes)
+	return []rune(strings.Join(texts, "")), numHits, numMisses
+}
+
+// CharcodesToStrings returns the unicode strings corresponding to `charcodes`.
+// The int returns are the number of strings and the number of unconvereted codes.
+// NOTE: The number of strings returned is equal to the number of charcodes
+func (font *PdfFont) CharcodesToStrings(charcodes []textencoding.CharCode) ([]string, int, int) {
 	fontBase := font.baseFields()
-	runes := make([]rune, 0, len(charcodes))
-	numMisses = 0
+	texts := make([]string, 0, len(charcodes))
+	numMisses := 0
 	for _, code := range charcodes {
 		if fontBase.toUnicodeCmap != nil {
-			if r, ok := fontBase.toUnicodeCmap.CharcodeToUnicode(cmap.CharCode(code)); ok {
-				runes = append(runes, r)
+			if s, ok := fontBase.toUnicodeCmap.CharcodeToUnicode(cmap.CharCode(code)); ok {
+				texts = append(texts, s)
 				continue
 			}
 		}
@ -438,7 +448,7 @@ func (font *PdfFont) CharcodesToUnicodeWithStats(charcodes []textencoding.CharCo
 		encoder := font.Encoder()
 		if encoder != nil {
 			if r, ok := encoder.CharcodeToRune(code); ok {
-				runes = append(runes, r)
+				texts = append(texts, string(r))
 				continue
 			}
 		}
@ -447,7 +457,7 @@ func (font *PdfFont) CharcodesToUnicodeWithStats(charcodes []textencoding.CharCo
 			"\tfont=%s\n\tencoding=%s",
 			code, charcodes, fontBase.isCIDFont(), font, encoder)
 		numMisses++
-		runes = append(runes, cmap.MissingCodeRune)
+		texts = append(texts, cmap.MissingCodeString)
 	}

 	if numMisses != 0 {
@ -457,7 +467,7 @@ func (font *PdfFont) CharcodesToUnicodeWithStats(charcodes []textencoding.CharCo
 			len(charcodes), numMisses, font)
 	}

-	return runes, len(runes), numMisses
+	return texts, len(texts), numMisses
 }

 // CharcodeBytesToUnicode converts PDF character codes `data` to a Go unicode string.
@ -487,8 +497,8 @@ func (font *PdfFont) CharcodeBytesToUnicode(data []byte) (string, int, int) {
 //  1) Use the ToUnicode CMap if there is one.
 //  2) Use the underlying font's encoding.
 func (font *PdfFont) CharcodesToUnicode(charcodes []textencoding.CharCode) []rune {
-	strlist, _, _ := font.CharcodesToUnicodeWithStats(charcodes)
-	return strlist
+	runes, _, _ := font.CharcodesToUnicodeWithStats(charcodes)
+	return runes
 }

 // RunesToCharcodeBytes maps the provided runes to charcode bytes and it
--- a/model/form.go
+++ b/model/form.go
@ -317,26 +317,36 @@ func fillFieldValue(f *PdfField, val core.PdfObject) error {
 		default:
 			common.Log.Debug("ERROR: Unsupported text field V type: %T (%#v)", t, t)
 		}
-	case *PdfFieldButton, *PdfFieldChoice:
-		switch t := val.(type) {
+	case *PdfFieldButton:
+		// See section 12.7.4.2.3 "Check Boxes" (pp. 440-441 PDF32000_2008).
+		switch val.(type) {
 		case *core.PdfObjectName:
-			if len(t.String()) == 0 {
-				return nil
-			}
-			for _, wa := range f.Annotations {
-				wa.AS = val
-			}
+			if len(val.String()) > 0 {
 				f.V = val
+				setFieldAnnotAS(f, val)
+			}
 		case *core.PdfObjectString:
-			if len(t.String()) == 0 {
-				return nil
+			if len(val.String()) > 0 {
+				f.V = core.MakeName(val.String())
+				setFieldAnnotAS(f, f.V)
 			}
-			common.Log.Debug("Unexpected string for button/choice field. Converting to name: '%s'", t.String())
-			name := core.MakeName(t.String())
-			for _, wa := range f.Annotations {
-				wa.AS = name
+		default:
+			common.Log.Debug("ERROR: UNEXPECTED %s -> %v", f.PartialName(), val)
+			f.V = val
+		}
+	case *PdfFieldChoice:
+		// See section 12.7.4.4 "Choice Fields" (pp. 444-446 PDF32000_2008).
+		switch val.(type) {
+		case *core.PdfObjectName:
+			if len(val.String()) > 0 {
+				f.V = core.MakeString(val.String())
+				setFieldAnnotAS(f, val)
+			}
+		case *core.PdfObjectString:
+			if len(val.String()) > 0 {
+				f.V = val
+				setFieldAnnotAS(f, core.MakeName(val.String()))
 			}
-			f.V = name
 		default:
 			common.Log.Debug("ERROR: UNEXPECTED %s -> %v", f.PartialName(), val)
 			f.V = val
@ -347,3 +357,11 @@ func fillFieldValue(f *PdfField, val core.PdfObject) error {

 	return nil
 }
+
+// setFieldAnnotAS sets the appearance stream of the field annotations to `val`.
+func setFieldAnnotAS(f *PdfField, val core.PdfObject) {
+	for _, wa := range f.Annotations {
+		wa.AS = val
+		wa.ToPdfObject()
+	}
+}
--- a/model/reader.go
+++ b/model/reader.go
@ -275,7 +275,7 @@ func (r *PdfReader) loadOutlines() (*PdfOutlineTreeNode, error) {
 	outlineRootObj := core.ResolveReference(outlinesObj)
 	common.Log.Trace("Outline root: %v", outlineRootObj)

-	if _, isNull := outlineRootObj.(*core.PdfObjectNull); isNull {
+	if isNull := core.IsNullObject(outlineRootObj); isNull {
 		common.Log.Trace("Outline root is null - no outlines")
 		return nil, nil
 	}