diff --git a/annotator/field_appearance.go b/annotator/field_appearance.go index 07b2d7ef..fdacaaf6 100644 --- a/annotator/field_appearance.go +++ b/annotator/field_appearance.go @@ -13,13 +13,14 @@ import ( "github.com/unidoc/unipdf/v3/common" "github.com/unidoc/unipdf/v3/contentstream" + "github.com/unidoc/unipdf/v3/contentstream/draw" "github.com/unidoc/unipdf/v3/core" "github.com/unidoc/unipdf/v3/internal/textencoding" "github.com/unidoc/unipdf/v3/model" ) // FieldAppearance implements interface model.FieldAppearanceGenerator and generates appearance streams -// for fields taking into account what value is in the field. A common use case is for generating the +// for fields taking into account what value is in the field. A common use case is for generating the // appearance stream prior to flattening fields. // // If `OnlyIfMissing` is true, the field appearance is generated only for fields that do not have an @@ -35,6 +36,7 @@ type FieldAppearance struct { type AppearanceStyle struct { // How much of Rect height to fill when autosizing text. AutoFontSizeFraction float64 + // CheckmarkRune is a rune used for check mark in checkboxes (for ZapfDingbats font). CheckmarkRune rune @@ -51,6 +53,47 @@ type AppearanceStyle struct { // Allow field MK appearance characteristics to override style settings. AllowMK bool + + // Fonts holds appearance styles for fonts. + Fonts *AppearanceFontStyle +} + +// AppearanceFontStyle defines font style characteristics for form fields, +// used in the filling/flattening process. +type AppearanceFontStyle struct { + // Fallback represents a global font fallback, used for fields which do + // not specify a font in their default appearance (DA). The fallback is + // also used if there is a font specified in the DA, but it is not + // found in the AcroForm resources (DR). + Fallback *AppearanceFont + + // FieldFallbacks defines font fallbacks for specific fields. The map keys + // represent the names of the fields (which can be specified by their + // partial or full names). Specific field fallback fonts take precedence + // over the global font fallback. + FieldFallbacks map[string]*AppearanceFont + + // ForceReplace forces the replacement of fonts in the filling/flattening + // process, even if the default appearance (DA) specify a valid font. + // If no fallback font is provided, setting this field has no effect. + ForceReplace bool +} + +// AppearanceFont represents a font used for generating the appearance of a +// field in the filling/flattening process. +type AppearanceFont struct { + // Name represents the name of the font which will be added to the + // AcroForm resources (DR). + Name string + + // Font represents the actual font used for the field appearance. + Font *model.PdfFont + + // Size represents the size of the font used for the field appearance. + // If size is 0, a default font size will be used. + // The default font size is calculated using the available annotation + // height and the AutoFontSizeFraction of the AppearanceStyle. + Size float64 } type quadding int @@ -96,6 +139,9 @@ func (fa FieldAppearance) GenerateAppearanceDict(form *model.PdfAcroForm, field common.Log.Trace("Already populated - ignoring") return appDict, nil } + if form.DR == nil { + form.DR = model.NewPdfPageResources() + } // Generate the appearance. switch t := field.GetContext().(type) { @@ -172,26 +218,26 @@ func genFieldTextAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFieldT if err != nil { return nil, err } - width := rect.Width() - height := rect.Height() + width, height := rect.Width(), rect.Height() + var rotation float64 if mkDict, has := core.GetDict(wa.MK); has { bsDict, _ := core.GetDict(wa.BS) err := style.applyAppearanceCharacteristics(mkDict, bsDict, nil) if err != nil { return nil, err } + rotation, _ = core.GetNumberAsFloat(mkDict.Get("R")) } // Get and process the default appearance string (DA) operands. - da := getDA(ftxt.PdfField) - csp := contentstream.NewContentStreamParser(da) - daOps, err := csp.Parse() + daOps, err := contentstream.NewContentStreamParser(getDA(ftxt.PdfField)).Parse() if err != nil { return nil, err } cc := contentstream.NewContentCreator() + if style.BorderSize > 0 { drawRect(cc, style, width, height) } @@ -205,62 +251,44 @@ func genFieldTextAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFieldT cc.Add_BMC("Tx") cc.Add_q() + + bboxWidth, bboxHeight := width, height + if rotation != 0 { + // Calculate bounding box before rotation. + revRotation := -rotation + bbox := draw.Path{Points: []draw.Point{ + draw.NewPoint(0, 0).Rotate(revRotation), + draw.NewPoint(width, 0).Rotate(revRotation), + draw.NewPoint(0, height).Rotate(revRotation), + draw.NewPoint(width, height).Rotate(revRotation), + }}.GetBoundingBox() + + // Update width and height, as the appearance is generated based on + // the bounding of the annotation with no rotation. + width = bbox.Width + height = bbox.Height + + // Apply rotation. + cc.RotateDeg(rotation) + cc.Translate(bbox.X, bbox.Y) + } + // Graphic state changes. cc.Add_BT() - // Add DA operands. - var fontsize float64 - var fontname *core.PdfObjectName - var font *model.PdfFont - autosize := true - - fontsizeDef := height * style.AutoFontSizeFraction - for _, op := range *daOps { - // When Tf specified with font size is 0, it means we should set on our own based on the Rect (autosize). - if op.Operand == "Tf" && len(op.Params) == 2 { - if name, ok := core.GetName(op.Params[0]); ok { - fontname = name - } - num, err := core.GetNumberAsFloat(op.Params[1]) - if err == nil { - fontsize = num - } else { - common.Log.Debug("ERROR invalid font size: %v", op.Params[1]) - } - if fontsize == 0 { - // Use default if zero. - fontsize = fontsizeDef - } else { - // Disable autosize when font size (>0) explicitly specified. - autosize = false - } - // Skip over (set fontsize in code below). - continue - } - cc.AddOperand(*op) + // Process DA operands. + apFont, hasTf, err := style.processDA(ftxt.PdfField, daOps, dr, resources, cc) + if err != nil { + return nil, err } - // If the font name is not set or not found in the form resources, use - // the default fallback font (Helvetica). - var fontObj core.PdfObject - if dr != nil && fontname != nil { - if fObj, has := dr.GetFontByName(*fontname); has { - if font, err = model.NewPdfFontFromPdfObject(fObj); err != nil { - common.Log.Debug("ERROR: could not load appearance font: %v", err) - return nil, err - } - fontObj = fObj - } + font := apFont.Font + fontsize := apFont.Size + fontname := core.MakeName(apFont.Name) + autosize := fontsize == 0 + if autosize && hasTf { + fontsize = height * style.AutoFontSizeFraction } - if fontObj == nil { - // Font not found. Reverting to Helvetica with name `Helv`. - if font, err = model.NewStandard14Font("Helvetica"); err != nil { - return nil, err - } - fontname = core.MakeName("Helv") - fontObj = font.ToPdfObject() - } - resources.SetFontByName(*fontname, fontObj) encoder := font.Encoder() if encoder == nil { @@ -461,7 +489,7 @@ func genFieldTextAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFieldT xform := model.NewXObjectForm() xform.Resources = resources - xform.BBox = core.MakeArrayFromFloats([]float64{0, 0, width, height}) + xform.BBox = core.MakeArrayFromFloats([]float64{0, 0, bboxWidth, bboxHeight}) xform.SetContentStream(cc.Bytes(), defStreamEncoder()) apDict := core.MakeDict() @@ -480,16 +508,11 @@ func genFieldTextCombAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFi if !ok { return nil, errors.New("invalid Rect") } - rect, err := array.ToFloat64Array() + rect, err := model.NewPdfRectangle(*array) if err != nil { return nil, err } - if len(rect) != 4 { - return nil, errors.New("len(Rect) != 4") - } - - width := rect[2] - rect[0] - height := rect[3] - rect[1] + width, height := rect.Width(), rect.Height() if mkDict, has := core.GetDict(wa.MK); has { bsDict, _ := core.GetDict(wa.BS) @@ -510,9 +533,7 @@ func genFieldTextCombAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFi boxwidth := float64(width) / float64(maxLen) // Get and process the default appearance string (DA) operands. - da := getDA(ftxt.PdfField) - csp := contentstream.NewContentStreamParser(da) - daOps, err := csp.Parse() + daOps, err := contentstream.NewContentStreamParser(getDA(ftxt.PdfField)).Parse() if err != nil { return nil, err } @@ -529,68 +550,28 @@ func genFieldTextCombAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFi } cc.Add_BMC("Tx") cc.Add_q() + // Graphic state changes. cc.Add_BT() - // Add DA operands. - var fontsize float64 - var fontname *core.PdfObjectName - var font *model.PdfFont - autosize := true - - fontsizeDef := height * style.AutoFontSizeFraction - for _, op := range *daOps { - // If TF specified and font size is 0, it means we should set on our own based on the Rect. - if op.Operand == "Tf" && len(op.Params) == 2 { - if name, ok := core.GetName(op.Params[0]); ok { - fontname = name - } - num, err := core.GetNumberAsFloat(op.Params[1]) - if err == nil { - fontsize = num - } else { - common.Log.Debug("ERROR invalid font size: %v", op.Params[1]) - } - if fontsize == 0 { - // Use default if zero. - fontsize = fontsizeDef - } else { - // Disable autosize when font size (>0) explicitly specified. - autosize = false - } - // Skip over (set fontsize in code below). - continue - } - cc.AddOperand(*op) + // Process DA operands. + apFont, hasTf, err := style.processDA(ftxt.PdfField, daOps, dr, resources, cc) + if err != nil { + return nil, err } - // If fontname not set need to make a new font or use one defined in the resources. - // e.g. Helv commonly used for Helvetica. - if fontname == nil || dr == nil { - // Font not set, revert to Helvetica with name "Helv". - fontname = core.MakeName("Helv") - helv, err := model.NewStandard14Font("Helvetica") - if err != nil { - return nil, err - } - font = helv - resources.SetFontByName(*fontname, helv.ToPdfObject()) - cc.Add_Tf(*fontname, fontsizeDef) - } else { - fontobj, has := dr.GetFontByName(*fontname) - if !has { - return nil, errors.New("font not in DR") - } - font, err = model.NewPdfFontFromPdfObject(fontobj) - if err != nil { - common.Log.Debug("ERROR loading default appearance font: %v", err) - return nil, err - } - resources.SetFontByName(*fontname, fontobj) + font := apFont.Font + fontname := core.MakeName(apFont.Name) + fontsize := apFont.Size + autosize := fontsize == 0 + if autosize && hasTf { + fontsize = height * style.AutoFontSizeFraction } + encoder := font.Encoder() if encoder == nil { - common.Log.Debug("ERROR - Encoder is nil - can expect bad results") + common.Log.Debug("WARN: font encoder is nil. Assuming identity encoder. Output may be incorrect.") + encoder = textencoding.NewIdentityTextEncoder("Identity-H") } var text string @@ -711,26 +692,19 @@ func genFieldTextCombAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFi // genFieldCheckboxAppearance generates an appearance dictionary for a widget annotation `wa` referenced by // a button field `fbtn` with form resources `dr` (DR). func genFieldCheckboxAppearance(wa *model.PdfAnnotationWidget, fbtn *model.PdfFieldButton, dr *model.PdfPageResources, style AppearanceStyle) (*core.PdfObjectDictionary, error) { - // TODO(dennwc): unused parameters - // Get bounding Rect. array, ok := core.GetArray(wa.Rect) if !ok { return nil, errors.New("invalid Rect") } - rect, err := array.ToFloat64Array() + rect, err := model.NewPdfRectangle(*array) if err != nil { return nil, err } - if len(rect) != 4 { - return nil, errors.New("len(Rect) != 4") - } + width, height := rect.Width(), rect.Height() common.Log.Debug("Checkbox, wa BS: %v", wa.BS) - width := rect[2] - rect[0] - height := rect[3] - rect[1] - zapfdb, err := model.NewStandard14Font("ZapfDingbats") if err != nil { return nil, err @@ -825,26 +799,16 @@ func genFieldComboboxAppearance(form *model.PdfAcroForm, wa *model.PdfAnnotation if !ok { return nil, errors.New("invalid Rect") } - rect, err := array.ToFloat64Array() + rect, err := model.NewPdfRectangle(*array) if err != nil { return nil, err } - if len(rect) != 4 { - return nil, errors.New("len(Rect) != 4") - } + width, height := rect.Width(), rect.Height() common.Log.Debug("Choice, wa BS: %v", wa.BS) - width := rect[2] - rect[0] - height := rect[3] - rect[1] - // Get and process the default appearance string (DA) operands. - da := core.MakeString("") - if form.DA != nil { - da, _ = core.GetString(form.DA) - } - csp := contentstream.NewContentStreamParser(da.String()) - daOps, err := csp.Parse() + daOps, err := contentstream.NewContentStreamParser(getDA(fch.PdfField)).Parse() if err != nil { return nil, err } @@ -857,22 +821,25 @@ func genFieldComboboxAppearance(form *model.PdfAcroForm, wa *model.PdfAnnotation } } + // See section 12.7.4.4 "Choice Fields" (pp. 444-446 PDF32000_2008). dchoiceapp := core.MakeDict() for _, optObj := range fch.Opt.Elements() { + if optArr, ok := core.GetArray(optObj); ok && optArr.Len() == 2 { + optObj = optArr.Get(1) + } + var optstr string if opt, ok := core.GetString(optObj); ok { + optstr = opt.Decoded() + } else if opt, ok := core.GetName(optObj); ok { optstr = opt.String() } else { - if opt, ok := core.GetName(optObj); ok { - optstr = opt.String() - } else { - common.Log.Debug("ERROR: Opt not a name/string - %T", optObj) - return nil, errors.New("not a name/string") - } + common.Log.Debug("ERROR: Opt not a name/string - %T", optObj) + return nil, errors.New("not a name/string") } if len(optstr) > 0 { - xform, err := makeComboboxTextXObjForm(width, height, optstr, style, daOps, form.DR) + xform, err := makeComboboxTextXObjForm(fch.PdfField, width, height, optstr, style, daOps, form.DR) if err != nil { return nil, err } @@ -888,7 +855,9 @@ func genFieldComboboxAppearance(form *model.PdfAcroForm, wa *model.PdfAnnotation } // Make a text-based XObj Form. -func makeComboboxTextXObjForm(width, height float64, text string, style AppearanceStyle, daOps *contentstream.ContentStreamOperations, dr *model.PdfPageResources) (*model.XObjectForm, error) { +func makeComboboxTextXObjForm(field *model.PdfField, width, height float64, + text string, style AppearanceStyle, daOps *contentstream.ContentStreamOperations, + dr *model.PdfPageResources) (*model.XObjectForm, error) { resources := model.NewPdfPageResources() cc := contentstream.NewContentCreator() @@ -906,63 +875,25 @@ func makeComboboxTextXObjForm(width, height float64, text string, style Appearan // Graphic state changes. cc.Add_BT() - // Add DA operands. - var fontsize float64 - var fontname *core.PdfObjectName - var font *model.PdfFont - var err error - autosize := true - - fontsizeDef := height * style.AutoFontSizeFraction - for _, op := range *daOps { - // When Tf specified with font size is 0, it means we should set on our own based on the Rect (autosize). - if op.Operand == "Tf" && len(op.Params) == 2 { - if name, ok := core.GetName(op.Params[0]); ok { - fontname = name - } - num, err := core.GetNumberAsFloat(op.Params[1]) - if err == nil { - fontsize = num - } else { - common.Log.Debug("ERROR invalid font size: %v", op.Params[1]) - } - if fontsize == 0 { - // Use default if zero. - fontsize = fontsizeDef - } else { - // Disable autosize when font size (>0) explicitly specified. - autosize = false - } - // Skip over (set fontsize in code below). - continue - } - cc.AddOperand(*op) + // Process DA operands. + apFont, hasTf, err := style.processDA(field, daOps, dr, resources, cc) + if err != nil { + return nil, err } - // If fontname not set need to make a new font or use one defined in the resources. - // e.g. Helv commonly used for Helvetica. - if fontname == nil || dr == nil { - // Font not set, revert to Helvetica with name "Helv". - fontname = core.MakeName("Helv") - helv, err := model.NewStandard14Font("Helvetica") - if err != nil { - return nil, err - } - font = helv - resources.SetFontByName(*fontname, helv.ToPdfObject()) - } else { - fontobj, has := dr.GetFontByName(*fontname) - if !has { - return nil, errors.New("font not in DR") - } - font, err = model.NewPdfFontFromPdfObject(fontobj) - if err != nil { - common.Log.Debug("ERROR loading default appearance font: %v", err) - return nil, err - } - resources.SetFontByName(*fontname, fontobj) + font := apFont.Font + fontsize := apFont.Size + fontname := core.MakeName(apFont.Name) + autosize := fontsize == 0 + if autosize && hasTf { + fontsize = height * style.AutoFontSizeFraction } + encoder := font.Encoder() + if encoder == nil { + common.Log.Debug("WARN: font encoder is nil. Assuming identity encoder. Output may be incorrect.") + encoder = textencoding.NewIdentityTextEncoder("Identity-H") + } // If no text, no appearance needed. if len(text) == 0 { @@ -1136,6 +1067,105 @@ func (style *AppearanceStyle) applyAppearanceCharacteristics(mkDict *core.PdfObj return nil } +// processDA adds the operands found in the field default appearance stream to +// the provided content stream creator. It also provides a fallback font, based +// on the configuration of the AppearanceStyle, if no valid font is specified +// in the default appearance. The method returns the font to be used when +// generating the appearance of the field and a boolean value specifying if +// the DA stream contains any Tf operands. +func (style *AppearanceStyle) processDA(field *model.PdfField, + daOps *contentstream.ContentStreamOperations, dr, resources *model.PdfPageResources, + cc *contentstream.ContentCreator) (*AppearanceFont, bool, error) { + // Check for fallback fonts. + var fallbackFont *AppearanceFont + var forceReplace bool + if style.Fonts != nil { + // Use global fallback, if one is specified. + if style.Fonts.Fallback != nil { + fallbackFont = style.Fonts.Fallback + } + + // Use field fallback, if one is specified. + if fieldFallbacks := style.Fonts.FieldFallbacks; fieldFallbacks != nil { + if fbFont, ok := fieldFallbacks[field.PartialName()]; ok { + fallbackFont = fbFont + } else if fullName, err := field.FullName(); err == nil { + if fbFont, ok := fieldFallbacks[fullName]; ok { + fallbackFont = fbFont + } + } + } + + forceReplace = style.Fonts.ForceReplace + } + + // Iterate over the DA operands and extract the font, if specified. + var fontName string + var fontSize float64 + var hasTf bool + if daOps != nil { + for _, op := range *daOps { + if op.Operand == "Tf" && len(op.Params) == 2 { + if name, ok := core.GetNameVal(op.Params[0]); ok { + fontName = name + } + if size, err := core.GetNumberAsFloat(op.Params[1]); err == nil { + fontSize = size + } + hasTf = true + continue + } + cc.AddOperand(*op) + } + } + + var apFont *AppearanceFont + var apFontObj core.PdfObject + if forceReplace && fallbackFont != nil { + apFont = fallbackFont + } else { + // Check if font name was found in the DA stream and search it in the resources. + if dr != nil && fontName != "" { + if obj, ok := dr.GetFontByName(*core.MakeName(fontName)); ok { + if font, err := model.NewPdfFontFromPdfObject(obj); err == nil { + apFontObj = obj + apFont = &AppearanceFont{Name: fontName, Font: font, Size: fontSize} + } else { + common.Log.Debug("ERROR: could not load appearance font: %v", err) + } + } + } + + // Use fallback font, if one was specified. + if apFont == nil && fallbackFont != nil { + apFont = fallbackFont + } + + // Use default fallback font (Helvetica). + if apFont == nil { + font, err := model.NewStandard14Font("Helvetica") + if err != nil { + return nil, false, err + } + apFont = &AppearanceFont{Name: "Helv", Font: font, Size: fontSize} + } + } + + // Add appearance font to the form resources (DR). + apFontName := *core.MakeName(apFont.Name) + if apFontObj == nil { + apFontObj = apFont.Font.ToPdfObject() + } + if dr != nil && !dr.HasFontByName(apFontName) { + dr.SetFontByName(apFontName, apFontObj) + } + if resources != nil && !resources.HasFontByName(apFontName) { + resources.SetFontByName(apFontName, apFontObj) + } + + return apFont, hasTf, nil +} + // WrapContentStream ensures that the entire content stream for a `page` is wrapped within q ... Q operands. // Ensures that following operands that are added are not affected by additional operands that are added. // Implements interface model.ContentStreamWrapper. diff --git a/common/logging.go b/common/logging.go index b7452bf6..b3e62348 100644 --- a/common/logging.go +++ b/common/logging.go @@ -221,7 +221,7 @@ func (l WriterLogger) logToWriter(f io.Writer, prefix string, format string, arg } func logToWriter(f io.Writer, prefix string, format string, args ...interface{}) { - _, file, line, ok := runtime.Caller(2) + _, file, line, ok := runtime.Caller(3) if !ok { file = "???" line = 0 diff --git a/common/version.go b/common/version.go index 64f11253..01f44ad2 100644 --- a/common/version.go +++ b/common/version.go @@ -11,12 +11,12 @@ import ( ) const releaseYear = 2020 -const releaseMonth = 5 -const releaseDay = 25 -const releaseHour = 23 -const releaseMin = 35 +const releaseMonth = 6 +const releaseDay = 15 +const releaseHour = 20 +const releaseMin = 15 // Version holds version information, when bumping this make sure to bump the released at stamp also. -const Version = "3.7.1" +const Version = "3.8.0" var ReleasedAt = time.Date(releaseYear, releaseMonth, releaseDay, releaseHour, releaseMin, 0, 0, time.UTC) diff --git a/extractor/text.go b/extractor/text.go index 01c6a06f..659b3051 100644 --- a/extractor/text.go +++ b/extractor/text.go @@ -702,7 +702,7 @@ func (to *textObject) reset() { func (to *textObject) renderText(data []byte) error { font := to.getCurrentFont() charcodes := font.BytesToCharcodes(data) - runes, numChars, numMisses := font.CharcodesToUnicodeWithStats(charcodes) + texts, numChars, numMisses := font.CharcodesToStrings(charcodes) if numMisses > 0 { common.Log.Debug("renderText: numChars=%d numMisses=%d", numChars, numMisses) } @@ -721,18 +721,18 @@ func (to *textObject) renderText(data []byte) error { spaceMetrics, _ = model.DefaultFont().GetRuneMetrics(' ') } spaceWidth := spaceMetrics.Wx * glyphTextRatio - common.Log.Trace("spaceWidth=%.2f text=%q font=%s fontSize=%.1f", spaceWidth, runes, font, tfs) + common.Log.Trace("spaceWidth=%.2f text=%q font=%s fontSize=%.2f", spaceWidth, texts, font, tfs) stateMatrix := transform.NewMatrix( tfs*th, 0, 0, tfs, 0, state.trise) - common.Log.Trace("renderText: %d codes=%+v runes=%q", len(charcodes), charcodes, runes) + common.Log.Trace("renderText: %d codes=%+v runes=%q", len(charcodes), charcodes, len(texts)) - for i, r := range runes { - // TODO(peterwilliams97): Need to find and fix cases where this happens. - if r == '\x00' { + for i, text := range texts { + r := []rune(text) + if len(r) == 1 && r[0] == '\x00' { continue } @@ -746,14 +746,14 @@ func (to *textObject) renderText(data []byte) error { // w is the unscaled movement at the end of a word. w := 0.0 - if r == ' ' { + if string(r) == " " { w = state.tw } m, ok := font.GetCharMetrics(code) if !ok { common.Log.Debug("ERROR: No metric for code=%d r=0x%04x=%+q %s", code, r, r, font) - return errors.New("no char metrics") + return fmt.Errorf("no char metrics: font=%s code=%d", font.String(), code) } // c is the character size in unscaled text units. @@ -774,7 +774,7 @@ func (to *textObject) renderText(data []byte) error { common.Log.Trace("m=%s c=%+v t0=%+v td0=%s trm0=%s", m, c, t0, td0, td0.Mult(to.tm).Mult(to.gs.CTM)) mark := to.newTextMark( - string(r), + text, trm, translation(to.gs.CTM.Mult(to.tm).Mult(td0)), math.Abs(spaceWidth*trm.ScalingFactorX()), diff --git a/extractor/text_test.go b/extractor/text_test.go index 651ef63f..89b920f3 100644 --- a/extractor/text_test.go +++ b/extractor/text_test.go @@ -314,6 +314,11 @@ var fileExtractionTests = []struct { `The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",`}, }, }, + {filename: "Saudi.pdf", + pageTerms: map[int][]string{ + 10: []string{"الله"}, + }, + }, // TODO(peterwilliams97): Reinstate these 2 tests when diacritic combination is fixed. // {filename: "Ito_Formula.pdf", // pageTerms: map[int][]string{ diff --git a/fjson/fielddata_test.go b/fjson/fielddata_test.go index e8d034af..bb7551cd 100644 --- a/fjson/fielddata_test.go +++ b/fjson/fielddata_test.go @@ -148,6 +148,7 @@ func TestJSONExtractAndFill(t *testing.T) { fieldDataExp, err := LoadFromJSONFile("./testdata/advancedform.json") require.NoError(t, err) jsonDataExp, err := fieldDataExp.JSON() + require.NoError(t, err) // Check templates for equality. require.Equal(t, jsonDataExp, jsonData) @@ -184,6 +185,7 @@ func TestJSONExtractAndFill(t *testing.T) { fieldDataExp, err = LoadFromJSON(bytes.NewReader(jsonBytes)) require.NoError(t, err) jsonDataExp, err = fieldDataExp.JSON() + require.NoError(t, err) // Fill test PDF form fields and write to buffer. f, err := os.Open(inputFilePath) @@ -212,6 +214,47 @@ func TestJSONExtractAndFill(t *testing.T) { fieldData, err = LoadFromPDF(bytes.NewReader(buf.Bytes())) require.NoError(t, err) jsonData, err = fieldData.JSON() + require.NoError(t, err) + + // Check field data for equality. + require.Equal(t, jsonDataExp, jsonData) +} + +func TestJSONFillAndExtract(t *testing.T) { + // Read JSON fill data. + fieldDataExp, err := LoadFromJSONFile("./testdata/mixedfields.json") + require.NoError(t, err) + jsonDataExp, err := fieldDataExp.JSON() + require.NoError(t, err) + + // Fill test PDF form fields and write to buffer. + f, err := os.Open("./testdata/mixedfields.pdf") + require.NoError(t, err) + defer f.Close() + + reader, err := model.NewPdfReader(f) + require.NoError(t, err) + + err = reader.AcroForm.Fill(fieldDataExp) + require.NoError(t, err) + + var buf bytes.Buffer + writer := model.NewPdfWriter() + for i := range reader.PageList { + err := writer.AddPage(reader.PageList[i]) + require.NoError(t, err) + } + + err = writer.SetForms(reader.AcroForm) + require.NoError(t, err) + err = writer.Write(&buf) + require.NoError(t, err) + + // Load field data from buffer. + fieldData, err := LoadFromPDF(bytes.NewReader(buf.Bytes())) + require.NoError(t, err) + jsonData, err := fieldData.JSON() + require.NoError(t, err) // Check field data for equality. require.Equal(t, jsonDataExp, jsonData) diff --git a/fjson/testdata/mixedfields.json b/fjson/testdata/mixedfields.json new file mode 100644 index 00000000..3ee55d32 --- /dev/null +++ b/fjson/testdata/mixedfields.json @@ -0,0 +1,94 @@ +[ + { + "name": "Given Name Text Box", + "value": "Jane" + }, + { + "name": "Family Name Text Box", + "value": "Doe" + }, + { + "name": "House nr Text Box", + "value": "100" + }, + { + "name": "Address 2 Text Box", + "value": "Generic Avenue" + }, + { + "name": "Postcode Text Box", + "value": "11122" + }, + { + "name": "Country Combo Box", + "value": "France" + }, + { + "name": "Height Formatted Field", + "value": "175" + }, + { + "name": "City Text Box", + "value": "Paris" + }, + { + "name": "Driving License Check Box", + "value": "Yes", + "options": [ + "Yes", + "Off" + ] + }, + { + "name": "Favourite Colour List Box", + "value": "Yellow" + }, + { + "name": "Language 1 Check Box", + "value": "Yes", + "options": [ + "Yes", + "Off" + ] + }, + { + "name": "Language 2 Check Box", + "value": "Off", + "options": [ + "Yes", + "Off" + ] + }, + { + "name": "Language 3 Check Box", + "value": "Yes", + "options": [ + "Yes", + "Off" + ] + }, + { + "name": "Language 4 Check Box", + "value": "Off", + "options": [ + "Yes", + "Off" + ] + }, + { + "name": "Language 5 Check Box", + "value": "Yes", + "options": [ + "Yes", + "Off" + ] + }, + { + "name": "Gender List Box", + "value": "Woman" + }, + { + "name": "Address 1 Text Box", + "value": "Generic Street" + } +] diff --git a/fjson/testdata/mixedfields.pdf b/fjson/testdata/mixedfields.pdf new file mode 100644 index 00000000..72d0d21d Binary files /dev/null and b/fjson/testdata/mixedfields.pdf differ diff --git a/internal/cmap/cmap.go b/internal/cmap/cmap.go index 1299faa5..2729f934 100644 --- a/internal/cmap/cmap.go +++ b/internal/cmap/cmap.go @@ -21,6 +21,9 @@ const ( // MissingCodeRune replaces runes that can't be decoded. '\ufffd' = �. Was '?'. MissingCodeRune = '\ufffd' // � + + // MissingCodeString replaces strings that can't be decoded. + MissingCodeString = string(MissingCodeRune) ) // CharCode is a character code or Unicode @@ -41,7 +44,7 @@ type charRange struct { type fbRange struct { code0 CharCode code1 CharCode - r0 rune + r0 string } // CIDSystemInfo contains information for identifying the character collection @@ -106,8 +109,8 @@ type CMap struct { cidToCode map[CharCode]CharCode // CID -> charcode // Used by ctype 2 CMaps. - codeToUnicode map[CharCode]rune // CID -> Unicode - unicodeToCode map[rune]CharCode // Unicode -> CID + codeToUnicode map[CharCode]string // CID -> Unicode string + unicodeToCode map[string]CharCode // Unicode rune -> CID // cached contains the raw CMap data. It is used by the Bytes method in // order to avoid generating the data for every call. @@ -116,8 +119,13 @@ type CMap struct { cached []byte } -// NewToUnicodeCMap returns an identity CMap with codeToUnicode matching the `codeToUnicode` arg. -func NewToUnicodeCMap(codeToUnicode map[CharCode]rune) *CMap { +// NewToUnicodeCMap returns an identity CMap with codeToUnicode matching the `codeToRune` arg. +func NewToUnicodeCMap(codeToRune map[CharCode]rune) *CMap { + codeToUnicode := make(map[CharCode]string, len(codeToRune)) + for code, r := range codeToRune { + codeToUnicode[code] = string(r) + } + cmap := &CMap{ name: "Adobe-Identity-UCS", ctype: 2, @@ -128,13 +136,14 @@ func NewToUnicodeCMap(codeToUnicode map[CharCode]rune) *CMap { Supplement: 0, }, codespaces: []Codespace{{Low: 0, High: 0xffff}}, - codeToCID: make(map[CharCode]CharCode), - cidToCode: make(map[CharCode]CharCode), codeToUnicode: codeToUnicode, - unicodeToCode: make(map[rune]CharCode), + unicodeToCode: make(map[string]CharCode, len(codeToRune)), + codeToCID: make(map[CharCode]CharCode, len(codeToRune)), + cidToCode: make(map[CharCode]CharCode, len(codeToRune)), } cmap.computeInverseMappings() + return cmap } @@ -148,8 +157,8 @@ func newCMap(isSimple bool) *CMap { nbits: nbits, codeToCID: make(map[CharCode]CharCode), cidToCode: make(map[CharCode]CharCode), - codeToUnicode: make(map[CharCode]rune), - unicodeToCode: make(map[rune]CharCode), + codeToUnicode: make(map[CharCode]string), + unicodeToCode: make(map[string]CharCode), } } @@ -254,9 +263,9 @@ func (cmap *CMap) computeInverseMappings() { } // Generate Unicode -> CID map. - for cid, r := range cmap.codeToUnicode { - if c, ok := cmap.unicodeToCode[r]; !ok || (ok && c > cid) { - cmap.unicodeToCode[r] = cid + for cid, s := range cmap.codeToUnicode { + if c, ok := cmap.unicodeToCode[s]; !ok || (ok && c > cid) { + cmap.unicodeToCode[s] = cid } } @@ -277,19 +286,18 @@ func (cmap *CMap) CharcodeBytesToUnicode(data []byte) (string, int) { return "", 0 } - var ( - parts []rune - missing []CharCode - ) - for _, code := range charcodes { + parts := make([]string, len(charcodes)) + var missing []CharCode + for i, code := range charcodes { s, ok := cmap.codeToUnicode[code] if !ok { missing = append(missing, code) - s = MissingCodeRune + s = MissingCodeString } - parts = append(parts, s) + parts[i] = s } - unicode := string(parts) + unicode := strings.Join(parts, "") + if len(missing) > 0 { common.Log.Debug("ERROR: CharcodeBytesToUnicode. Not in map.\n"+ "\tdata=[% 02x]=%#q\n"+ @@ -305,17 +313,17 @@ func (cmap *CMap) CharcodeBytesToUnicode(data []byte) (string, int) { // CharcodeToUnicode converts a single character code `code` to a unicode string. // If `code` is not in the unicode map, '�' is returned. // NOTE: CharcodeBytesToUnicode is typically more efficient. -func (cmap *CMap) CharcodeToUnicode(code CharCode) (rune, bool) { +func (cmap *CMap) CharcodeToUnicode(code CharCode) (string, bool) { if s, ok := cmap.codeToUnicode[code]; ok { return s, true } - return MissingCodeRune, false + return MissingCodeString, false } -// RuneToCID maps the specified rune to a character identifier. If the provided -// rune has no available mapping, the second return value is false. -func (cmap *CMap) RuneToCID(r rune) (CharCode, bool) { - cid, ok := cmap.unicodeToCode[r] +// StringToCID maps the specified string to a character identifier. If the provided +// string has no available mapping, the bool return value is false. +func (cmap *CMap) StringToCID(s string) (CharCode, bool) { + cid, ok := cmap.unicodeToCode[s] return cid, ok } @@ -453,7 +461,7 @@ func (cmap *CMap) toBfData() string { } // codes is a sorted list of the codeToUnicode keys. - var codes []CharCode + codes := make([]CharCode, 0, len(cmap.codeToUnicode)) for code := range cmap.codeToUnicode { codes = append(codes, code) } @@ -473,7 +481,7 @@ func (cmap *CMap) toBfData() string { prevRune := cmap.codeToUnicode[codes[0]] for _, c := range codes[1:] { currRune := cmap.codeToUnicode[c] - if c == currCharRange.code1+1 && currRune == prevRune+1 { + if c == currCharRange.code1+1 && lastRune(currRune) == lastRune(prevRune)+1 { currCharRange.code1 = c } else { charRanges = append(charRanges, currCharRange) @@ -508,8 +516,8 @@ func (cmap *CMap) toBfData() string { lines = append(lines, fmt.Sprintf("%d beginbfchar", n)) for j := 0; j < n; j++ { code := fbChars[i*maxBfEntries+j] - r := cmap.codeToUnicode[code] - lines = append(lines, fmt.Sprintf("<%04x> <%04x>", code, r)) + s := cmap.codeToUnicode[code] + lines = append(lines, fmt.Sprintf("<%04x> %s", code, hexCode(s))) } lines = append(lines, "endbfchar") } @@ -521,8 +529,8 @@ func (cmap *CMap) toBfData() string { lines = append(lines, fmt.Sprintf("%d beginbfrange", n)) for j := 0; j < n; j++ { rng := fbRanges[i*maxBfEntries+j] - r := rng.r0 - lines = append(lines, fmt.Sprintf("<%04x><%04x> <%04x>", rng.code0, rng.code1, r)) + lines = append(lines, fmt.Sprintf("<%04x><%04x> %s", + rng.code0, rng.code1, hexCode(rng.r0))) } lines = append(lines, "endbfrange") } @@ -530,6 +538,22 @@ func (cmap *CMap) toBfData() string { return strings.Join(lines, "\n") } +// lastRune returns the last rune in `s`. +func lastRune(s string) rune { + runes := []rune(s) + return runes[len(runes)-1] +} + +// hexCode return the CMap hex code for `s`. +func hexCode(s string) string { + runes := []rune(s) + codes := make([]string, len(runes)) + for i, r := range runes { + codes[i] = fmt.Sprintf("%04x", r) + } + return fmt.Sprintf("<%s>", strings.Join(codes, "")) +} + const ( maxBfEntries = 100 // Maximum number of entries in a bfchar or bfrange section. cmapHeader = ` diff --git a/internal/cmap/cmap_parser.go b/internal/cmap/cmap_parser.go index 9236d782..7ee40ee2 100644 --- a/internal/cmap/cmap_parser.go +++ b/internal/cmap/cmap_parser.go @@ -105,7 +105,8 @@ func (cmap *CMap) parse() error { func (cmap *CMap) parseName() error { name := "" done := false - for i := 0; i < 10 && !done; i++ { + // NOTE(peterwilliams97): We need up to 20 iterations of this loop for some PDFs I have seen. + for i := 0; i < 20 && !done; i++ { o, err := cmap.parseObject() if err != nil { return err @@ -141,7 +142,6 @@ func (cmap *CMap) parseName() error { // parseType parses a cmap type and adds it to `cmap`. // cmap names are defined like this: /CMapType 1 def func (cmap *CMap) parseType() error { - ctype := 0 done := false for i := 0; i < 3 && !done; i++ { @@ -171,7 +171,6 @@ func (cmap *CMap) parseType() error { // We don't need the version. We do this to eat up the version code in the cmap definition // to reduce unhandled parse object warnings. func (cmap *CMap) parseVersion() error { - version := "" done := false for i := 0; i < 3 && !done; i++ { @@ -471,7 +470,7 @@ func (cmap *CMap) parseBfchar() error { } return err } - var target rune + var target []rune switch v := o.(type) { case cmapOperand: if v.Operand == endbfchar { @@ -480,16 +479,16 @@ func (cmap *CMap) parseBfchar() error { common.Log.Debug("ERROR: Unexpected operand. %#v", v) return ErrBadCMap case cmapHexString: - target = hexToRune(v) + target = hexToRunes(v) case cmapName: common.Log.Debug("ERROR: Unexpected name. %#v", v) - target = MissingCodeRune + target = []rune{MissingCodeRune} default: common.Log.Debug("ERROR: Unexpected type. %#v", o) return ErrBadCMap } - cmap.codeToUnicode[code] = target + cmap.codeToUnicode[code] = string(target) } return nil @@ -563,16 +562,17 @@ func (cmap *CMap) parseBfrange() error { if !ok { return errors.New("non-hex string in array") } - r := hexToRune(hexs) - cmap.codeToUnicode[code] = r + runes := hexToRunes(hexs) + cmap.codeToUnicode[code] = string(runes) } case cmapHexString: // , maps [from,to] to [dst,dst+to-from]. - r := hexToRune(v) + runes := hexToRunes(v) + n := len(runes) for code := srcCodeFrom; code <= srcCodeTo; code++ { - cmap.codeToUnicode[code] = r - r++ + cmap.codeToUnicode[code] = string(runes) + runes[n-1]++ } default: common.Log.Debug("ERROR: Unexpected type %T", o) diff --git a/internal/cmap/cmap_test.go b/internal/cmap/cmap_test.go index 5c8da78d..de26766e 100644 --- a/internal/cmap/cmap_test.go +++ b/internal/cmap/cmap_test.go @@ -104,14 +104,14 @@ func TestCMapParser1(t *testing.T) { } for k, expected := range expectedMappings { - if v, ok := cmap.CharcodeToUnicode(k); !ok || v != expected { + if v, ok := cmap.CharcodeToUnicode(k); !ok || v != string(expected) { t.Errorf("incorrect mapping, expecting 0x%X ➞ 0x%X (%#v)", k, expected, v) return } } v, _ := cmap.CharcodeToUnicode(0x99) - if v != MissingCodeRune { //!= "notdef" { + if v != MissingCodeString { //!= "notdef" { t.Errorf("Unmapped code, expected to map to undefined") return } @@ -188,7 +188,7 @@ func TestCMapParser2(t *testing.T) { } for k, expected := range expectedMappings { - if v, ok := cmap.CharcodeToUnicode(k); !ok || v != expected { + if v, ok := cmap.CharcodeToUnicode(k); !ok || v != string(expected) { t.Errorf("incorrect mapping, expecting 0x%X ➞ 0x%X (got 0x%X)", k, expected, v) return } @@ -297,7 +297,7 @@ func TestCMapParser3(t *testing.T) { 0xd140: 0xa000, } for k, expected := range expectedMappings { - if v, ok := cmap.CharcodeToUnicode(k); !ok || v != expected { + if v, ok := cmap.CharcodeToUnicode(k); !ok || v != string(expected) { t.Errorf("incorrect mapping: expecting 0x%02X ➞ 0x%02X (got 0x%02X)", k, expected, v) return } @@ -407,7 +407,7 @@ func TestCMapParser4(t *testing.T) { } for k, expected := range expectedMappings { - if v, ok := cmap.CharcodeToUnicode(k); !ok || v != expected { + if v, ok := cmap.CharcodeToUnicode(k); !ok || v != string(expected) { t.Errorf("incorrect mapping, expecting 0x%04X ➞ %+q (got %+q)", k, expected, v) return } @@ -520,6 +520,7 @@ var ( 0x017b: 'Ż', 0x017d: 'Ž', } + codeToUnicode3 = map[CharCode]rune{ // 93 entries 0x0124: 'Ĥ', 0x0125: 'ĥ', @@ -695,7 +696,7 @@ func checkCmapWriteRead(t *testing.T, codeToUnicode map[CharCode]rune) { } u0 := codeToUnicode[code] u := cmap.codeToUnicode[code] - if u != u0 { + if u != string(u0) { t.Errorf("Unicode mismatch: i=%d code0=0x%04x expected=%q test=%q", i, code, u0, u) return } diff --git a/internal/textencoding/cmap.go b/internal/textencoding/cmap.go index b0dfbedf..e727ab56 100644 --- a/internal/textencoding/cmap.go +++ b/internal/textencoding/cmap.go @@ -48,8 +48,8 @@ func (enc CMapEncoder) Decode(raw []byte) string { if codes, ok := enc.codeToCID.BytesToCharcodes(raw); ok { var buf bytes.Buffer for _, code := range codes { - r, _ := enc.CharcodeToRune(CharCode(code)) - buf.WriteRune(r) + s, _ := enc.charcodeToString(CharCode(code)) + buf.WriteString(s) } return buf.String() @@ -67,7 +67,7 @@ func (enc CMapEncoder) RuneToCharcode(r rune) (CharCode, bool) { } // Map rune to CID. - cid, ok := enc.cidToUnicode.RuneToCID(r) + cid, ok := enc.cidToUnicode.StringToCID(string(r)) if !ok { return 0, false } @@ -87,8 +87,13 @@ func (enc CMapEncoder) RuneToCharcode(r rune) (CharCode, bool) { // CharcodeToRune converts PDF character code `code` to a rune. // The bool return flag is true if there was a match, and false otherwise. func (enc CMapEncoder) CharcodeToRune(code CharCode) (rune, bool) { + s, ok := enc.charcodeToString(code) + return ([]rune(s))[0], ok +} + +func (enc CMapEncoder) charcodeToString(code CharCode) (string, bool) { if enc.cidToUnicode == nil { - return MissingCodeRune, false + return MissingCodeString, false } // Map charcode to CID. If charcode to CID CMap is nil, assume Identity encoding. @@ -96,7 +101,7 @@ func (enc CMapEncoder) CharcodeToRune(code CharCode) (rune, bool) { if enc.codeToCID != nil { var ok bool if cid, ok = enc.codeToCID.CharcodeToCID(cmap.CharCode(code)); !ok { - return MissingCodeRune, false + return MissingCodeString, false } } diff --git a/internal/textencoding/glyphs_glyphlist.go b/internal/textencoding/glyphs_glyphlist.go index e794bea8..a5ba8f63 100644 --- a/internal/textencoding/glyphs_glyphlist.go +++ b/internal/textencoding/glyphs_glyphlist.go @@ -18,7 +18,13 @@ import ( ) // MissingCodeRune is the rune returned when there is no matching glyph. It was previously '?'. -const MissingCodeRune = '\ufffd' // � +const ( + // MissingCodeRune replaces runes that can't be decoded. . + MissingCodeRune = '\ufffd' // � + + // MissingCodeString replaces strings that can't be decoded. + MissingCodeString = string(MissingCodeRune) +) // GlyphToRune returns the rune corresponding to glyph `glyph` if there is one. // TODO: Can we return a string here? e.g. When we are extracting text, we want to get "ffi" diff --git a/model/flatten.go b/model/flatten.go index b0937252..332e9ae8 100644 --- a/model/flatten.go +++ b/model/flatten.go @@ -85,9 +85,10 @@ func (r *PdfReader) FlattenFields(allannots bool, appgen FieldAppearanceGenerato var annots []*PdfAnnotation // Wrap the content streams. - err := appgen.WrapContentStream(page) - if err != nil { - return err + if appgen != nil { + if err := appgen.WrapContentStream(page); err != nil { + return err + } } annotations, err := page.GetAnnotations() diff --git a/model/font.go b/model/font.go index af688bf4..5a8cb2fb 100644 --- a/model/font.go +++ b/model/font.go @@ -420,16 +420,26 @@ func (font *PdfFont) BytesToCharcodes(data []byte) []textencoding.CharCode { return charcodes } -// CharcodesToUnicodeWithStats is identical to CharcodesToUnicode except returns more statistical +// CharcodesToUnicodeWithStats is identical to CharcodesToUnicode except it returns more statistical // information about hits and misses from the reverse mapping process. +// NOTE: The number of runes returned may be greater than the number of charcodes. +// TODO(peterwilliams97): Deprecate in v4 and use only CharcodesToStrings() func (font *PdfFont) CharcodesToUnicodeWithStats(charcodes []textencoding.CharCode) (runelist []rune, numHits, numMisses int) { + texts, numHits, numMisses := font.CharcodesToStrings(charcodes) + return []rune(strings.Join(texts, "")), numHits, numMisses +} + +// CharcodesToStrings returns the unicode strings corresponding to `charcodes`. +// The int returns are the number of strings and the number of unconvereted codes. +// NOTE: The number of strings returned is equal to the number of charcodes +func (font *PdfFont) CharcodesToStrings(charcodes []textencoding.CharCode) ([]string, int, int) { fontBase := font.baseFields() - runes := make([]rune, 0, len(charcodes)) - numMisses = 0 + texts := make([]string, 0, len(charcodes)) + numMisses := 0 for _, code := range charcodes { if fontBase.toUnicodeCmap != nil { - if r, ok := fontBase.toUnicodeCmap.CharcodeToUnicode(cmap.CharCode(code)); ok { - runes = append(runes, r) + if s, ok := fontBase.toUnicodeCmap.CharcodeToUnicode(cmap.CharCode(code)); ok { + texts = append(texts, s) continue } } @@ -438,7 +448,7 @@ func (font *PdfFont) CharcodesToUnicodeWithStats(charcodes []textencoding.CharCo encoder := font.Encoder() if encoder != nil { if r, ok := encoder.CharcodeToRune(code); ok { - runes = append(runes, r) + texts = append(texts, string(r)) continue } } @@ -447,7 +457,7 @@ func (font *PdfFont) CharcodesToUnicodeWithStats(charcodes []textencoding.CharCo "\tfont=%s\n\tencoding=%s", code, charcodes, fontBase.isCIDFont(), font, encoder) numMisses++ - runes = append(runes, cmap.MissingCodeRune) + texts = append(texts, cmap.MissingCodeString) } if numMisses != 0 { @@ -457,7 +467,7 @@ func (font *PdfFont) CharcodesToUnicodeWithStats(charcodes []textencoding.CharCo len(charcodes), numMisses, font) } - return runes, len(runes), numMisses + return texts, len(texts), numMisses } // CharcodeBytesToUnicode converts PDF character codes `data` to a Go unicode string. @@ -487,8 +497,8 @@ func (font *PdfFont) CharcodeBytesToUnicode(data []byte) (string, int, int) { // 1) Use the ToUnicode CMap if there is one. // 2) Use the underlying font's encoding. func (font *PdfFont) CharcodesToUnicode(charcodes []textencoding.CharCode) []rune { - strlist, _, _ := font.CharcodesToUnicodeWithStats(charcodes) - return strlist + runes, _, _ := font.CharcodesToUnicodeWithStats(charcodes) + return runes } // RunesToCharcodeBytes maps the provided runes to charcode bytes and it diff --git a/model/form.go b/model/form.go index 39b3d923..79880266 100644 --- a/model/form.go +++ b/model/form.go @@ -317,26 +317,36 @@ func fillFieldValue(f *PdfField, val core.PdfObject) error { default: common.Log.Debug("ERROR: Unsupported text field V type: %T (%#v)", t, t) } - case *PdfFieldButton, *PdfFieldChoice: - switch t := val.(type) { + case *PdfFieldButton: + // See section 12.7.4.2.3 "Check Boxes" (pp. 440-441 PDF32000_2008). + switch val.(type) { case *core.PdfObjectName: - if len(t.String()) == 0 { - return nil + if len(val.String()) > 0 { + f.V = val + setFieldAnnotAS(f, val) } - for _, wa := range f.Annotations { - wa.AS = val - } - f.V = val case *core.PdfObjectString: - if len(t.String()) == 0 { - return nil + if len(val.String()) > 0 { + f.V = core.MakeName(val.String()) + setFieldAnnotAS(f, f.V) } - common.Log.Debug("Unexpected string for button/choice field. Converting to name: '%s'", t.String()) - name := core.MakeName(t.String()) - for _, wa := range f.Annotations { - wa.AS = name + default: + common.Log.Debug("ERROR: UNEXPECTED %s -> %v", f.PartialName(), val) + f.V = val + } + case *PdfFieldChoice: + // See section 12.7.4.4 "Choice Fields" (pp. 444-446 PDF32000_2008). + switch val.(type) { + case *core.PdfObjectName: + if len(val.String()) > 0 { + f.V = core.MakeString(val.String()) + setFieldAnnotAS(f, val) + } + case *core.PdfObjectString: + if len(val.String()) > 0 { + f.V = val + setFieldAnnotAS(f, core.MakeName(val.String())) } - f.V = name default: common.Log.Debug("ERROR: UNEXPECTED %s -> %v", f.PartialName(), val) f.V = val @@ -347,3 +357,11 @@ func fillFieldValue(f *PdfField, val core.PdfObject) error { return nil } + +// setFieldAnnotAS sets the appearance stream of the field annotations to `val`. +func setFieldAnnotAS(f *PdfField, val core.PdfObject) { + for _, wa := range f.Annotations { + wa.AS = val + wa.ToPdfObject() + } +} diff --git a/model/reader.go b/model/reader.go index 01a8cc8a..85500c27 100644 --- a/model/reader.go +++ b/model/reader.go @@ -275,7 +275,7 @@ func (r *PdfReader) loadOutlines() (*PdfOutlineTreeNode, error) { outlineRootObj := core.ResolveReference(outlinesObj) common.Log.Trace("Outline root: %v", outlineRootObj) - if _, isNull := outlineRootObj.(*core.PdfObjectNull); isNull { + if isNull := core.IsNullObject(outlineRootObj); isNull { common.Log.Trace("Outline root is null - no outlines") return nil, nil }