mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-26 13:48:55 +08:00
Merge pull request #372 from gunnsth/release/v3.8.0
Prepare unipdf release v3.8.0
This commit is contained in:
commit
8ab0b6ff45
@ -13,6 +13,7 @@ import (
|
||||
|
||||
"github.com/unidoc/unipdf/v3/common"
|
||||
"github.com/unidoc/unipdf/v3/contentstream"
|
||||
"github.com/unidoc/unipdf/v3/contentstream/draw"
|
||||
"github.com/unidoc/unipdf/v3/core"
|
||||
"github.com/unidoc/unipdf/v3/internal/textencoding"
|
||||
"github.com/unidoc/unipdf/v3/model"
|
||||
@ -35,6 +36,7 @@ type FieldAppearance struct {
|
||||
type AppearanceStyle struct {
|
||||
// How much of Rect height to fill when autosizing text.
|
||||
AutoFontSizeFraction float64
|
||||
|
||||
// CheckmarkRune is a rune used for check mark in checkboxes (for ZapfDingbats font).
|
||||
CheckmarkRune rune
|
||||
|
||||
@ -51,6 +53,47 @@ type AppearanceStyle struct {
|
||||
|
||||
// Allow field MK appearance characteristics to override style settings.
|
||||
AllowMK bool
|
||||
|
||||
// Fonts holds appearance styles for fonts.
|
||||
Fonts *AppearanceFontStyle
|
||||
}
|
||||
|
||||
// AppearanceFontStyle defines font style characteristics for form fields,
|
||||
// used in the filling/flattening process.
|
||||
type AppearanceFontStyle struct {
|
||||
// Fallback represents a global font fallback, used for fields which do
|
||||
// not specify a font in their default appearance (DA). The fallback is
|
||||
// also used if there is a font specified in the DA, but it is not
|
||||
// found in the AcroForm resources (DR).
|
||||
Fallback *AppearanceFont
|
||||
|
||||
// FieldFallbacks defines font fallbacks for specific fields. The map keys
|
||||
// represent the names of the fields (which can be specified by their
|
||||
// partial or full names). Specific field fallback fonts take precedence
|
||||
// over the global font fallback.
|
||||
FieldFallbacks map[string]*AppearanceFont
|
||||
|
||||
// ForceReplace forces the replacement of fonts in the filling/flattening
|
||||
// process, even if the default appearance (DA) specify a valid font.
|
||||
// If no fallback font is provided, setting this field has no effect.
|
||||
ForceReplace bool
|
||||
}
|
||||
|
||||
// AppearanceFont represents a font used for generating the appearance of a
|
||||
// field in the filling/flattening process.
|
||||
type AppearanceFont struct {
|
||||
// Name represents the name of the font which will be added to the
|
||||
// AcroForm resources (DR).
|
||||
Name string
|
||||
|
||||
// Font represents the actual font used for the field appearance.
|
||||
Font *model.PdfFont
|
||||
|
||||
// Size represents the size of the font used for the field appearance.
|
||||
// If size is 0, a default font size will be used.
|
||||
// The default font size is calculated using the available annotation
|
||||
// height and the AutoFontSizeFraction of the AppearanceStyle.
|
||||
Size float64
|
||||
}
|
||||
|
||||
type quadding int
|
||||
@ -96,6 +139,9 @@ func (fa FieldAppearance) GenerateAppearanceDict(form *model.PdfAcroForm, field
|
||||
common.Log.Trace("Already populated - ignoring")
|
||||
return appDict, nil
|
||||
}
|
||||
if form.DR == nil {
|
||||
form.DR = model.NewPdfPageResources()
|
||||
}
|
||||
|
||||
// Generate the appearance.
|
||||
switch t := field.GetContext().(type) {
|
||||
@ -172,26 +218,26 @@ func genFieldTextAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFieldT
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
width := rect.Width()
|
||||
height := rect.Height()
|
||||
width, height := rect.Width(), rect.Height()
|
||||
|
||||
var rotation float64
|
||||
if mkDict, has := core.GetDict(wa.MK); has {
|
||||
bsDict, _ := core.GetDict(wa.BS)
|
||||
err := style.applyAppearanceCharacteristics(mkDict, bsDict, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rotation, _ = core.GetNumberAsFloat(mkDict.Get("R"))
|
||||
}
|
||||
|
||||
// Get and process the default appearance string (DA) operands.
|
||||
da := getDA(ftxt.PdfField)
|
||||
csp := contentstream.NewContentStreamParser(da)
|
||||
daOps, err := csp.Parse()
|
||||
daOps, err := contentstream.NewContentStreamParser(getDA(ftxt.PdfField)).Parse()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cc := contentstream.NewContentCreator()
|
||||
|
||||
if style.BorderSize > 0 {
|
||||
drawRect(cc, style, width, height)
|
||||
}
|
||||
@ -205,62 +251,44 @@ func genFieldTextAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFieldT
|
||||
|
||||
cc.Add_BMC("Tx")
|
||||
cc.Add_q()
|
||||
|
||||
bboxWidth, bboxHeight := width, height
|
||||
if rotation != 0 {
|
||||
// Calculate bounding box before rotation.
|
||||
revRotation := -rotation
|
||||
bbox := draw.Path{Points: []draw.Point{
|
||||
draw.NewPoint(0, 0).Rotate(revRotation),
|
||||
draw.NewPoint(width, 0).Rotate(revRotation),
|
||||
draw.NewPoint(0, height).Rotate(revRotation),
|
||||
draw.NewPoint(width, height).Rotate(revRotation),
|
||||
}}.GetBoundingBox()
|
||||
|
||||
// Update width and height, as the appearance is generated based on
|
||||
// the bounding of the annotation with no rotation.
|
||||
width = bbox.Width
|
||||
height = bbox.Height
|
||||
|
||||
// Apply rotation.
|
||||
cc.RotateDeg(rotation)
|
||||
cc.Translate(bbox.X, bbox.Y)
|
||||
}
|
||||
|
||||
// Graphic state changes.
|
||||
cc.Add_BT()
|
||||
|
||||
// Add DA operands.
|
||||
var fontsize float64
|
||||
var fontname *core.PdfObjectName
|
||||
var font *model.PdfFont
|
||||
autosize := true
|
||||
|
||||
fontsizeDef := height * style.AutoFontSizeFraction
|
||||
for _, op := range *daOps {
|
||||
// When Tf specified with font size is 0, it means we should set on our own based on the Rect (autosize).
|
||||
if op.Operand == "Tf" && len(op.Params) == 2 {
|
||||
if name, ok := core.GetName(op.Params[0]); ok {
|
||||
fontname = name
|
||||
}
|
||||
num, err := core.GetNumberAsFloat(op.Params[1])
|
||||
if err == nil {
|
||||
fontsize = num
|
||||
} else {
|
||||
common.Log.Debug("ERROR invalid font size: %v", op.Params[1])
|
||||
}
|
||||
if fontsize == 0 {
|
||||
// Use default if zero.
|
||||
fontsize = fontsizeDef
|
||||
} else {
|
||||
// Disable autosize when font size (>0) explicitly specified.
|
||||
autosize = false
|
||||
}
|
||||
// Skip over (set fontsize in code below).
|
||||
continue
|
||||
}
|
||||
cc.AddOperand(*op)
|
||||
}
|
||||
|
||||
// If the font name is not set or not found in the form resources, use
|
||||
// the default fallback font (Helvetica).
|
||||
var fontObj core.PdfObject
|
||||
if dr != nil && fontname != nil {
|
||||
if fObj, has := dr.GetFontByName(*fontname); has {
|
||||
if font, err = model.NewPdfFontFromPdfObject(fObj); err != nil {
|
||||
common.Log.Debug("ERROR: could not load appearance font: %v", err)
|
||||
// Process DA operands.
|
||||
apFont, hasTf, err := style.processDA(ftxt.PdfField, daOps, dr, resources, cc)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fontObj = fObj
|
||||
|
||||
font := apFont.Font
|
||||
fontsize := apFont.Size
|
||||
fontname := core.MakeName(apFont.Name)
|
||||
autosize := fontsize == 0
|
||||
if autosize && hasTf {
|
||||
fontsize = height * style.AutoFontSizeFraction
|
||||
}
|
||||
}
|
||||
if fontObj == nil {
|
||||
// Font not found. Reverting to Helvetica with name `Helv`.
|
||||
if font, err = model.NewStandard14Font("Helvetica"); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fontname = core.MakeName("Helv")
|
||||
fontObj = font.ToPdfObject()
|
||||
}
|
||||
resources.SetFontByName(*fontname, fontObj)
|
||||
|
||||
encoder := font.Encoder()
|
||||
if encoder == nil {
|
||||
@ -461,7 +489,7 @@ func genFieldTextAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFieldT
|
||||
|
||||
xform := model.NewXObjectForm()
|
||||
xform.Resources = resources
|
||||
xform.BBox = core.MakeArrayFromFloats([]float64{0, 0, width, height})
|
||||
xform.BBox = core.MakeArrayFromFloats([]float64{0, 0, bboxWidth, bboxHeight})
|
||||
xform.SetContentStream(cc.Bytes(), defStreamEncoder())
|
||||
|
||||
apDict := core.MakeDict()
|
||||
@ -480,16 +508,11 @@ func genFieldTextCombAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFi
|
||||
if !ok {
|
||||
return nil, errors.New("invalid Rect")
|
||||
}
|
||||
rect, err := array.ToFloat64Array()
|
||||
rect, err := model.NewPdfRectangle(*array)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(rect) != 4 {
|
||||
return nil, errors.New("len(Rect) != 4")
|
||||
}
|
||||
|
||||
width := rect[2] - rect[0]
|
||||
height := rect[3] - rect[1]
|
||||
width, height := rect.Width(), rect.Height()
|
||||
|
||||
if mkDict, has := core.GetDict(wa.MK); has {
|
||||
bsDict, _ := core.GetDict(wa.BS)
|
||||
@ -510,9 +533,7 @@ func genFieldTextCombAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFi
|
||||
boxwidth := float64(width) / float64(maxLen)
|
||||
|
||||
// Get and process the default appearance string (DA) operands.
|
||||
da := getDA(ftxt.PdfField)
|
||||
csp := contentstream.NewContentStreamParser(da)
|
||||
daOps, err := csp.Parse()
|
||||
daOps, err := contentstream.NewContentStreamParser(getDA(ftxt.PdfField)).Parse()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -529,68 +550,28 @@ func genFieldTextCombAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFi
|
||||
}
|
||||
cc.Add_BMC("Tx")
|
||||
cc.Add_q()
|
||||
|
||||
// Graphic state changes.
|
||||
cc.Add_BT()
|
||||
|
||||
// Add DA operands.
|
||||
var fontsize float64
|
||||
var fontname *core.PdfObjectName
|
||||
var font *model.PdfFont
|
||||
autosize := true
|
||||
|
||||
fontsizeDef := height * style.AutoFontSizeFraction
|
||||
for _, op := range *daOps {
|
||||
// If TF specified and font size is 0, it means we should set on our own based on the Rect.
|
||||
if op.Operand == "Tf" && len(op.Params) == 2 {
|
||||
if name, ok := core.GetName(op.Params[0]); ok {
|
||||
fontname = name
|
||||
}
|
||||
num, err := core.GetNumberAsFloat(op.Params[1])
|
||||
if err == nil {
|
||||
fontsize = num
|
||||
} else {
|
||||
common.Log.Debug("ERROR invalid font size: %v", op.Params[1])
|
||||
}
|
||||
if fontsize == 0 {
|
||||
// Use default if zero.
|
||||
fontsize = fontsizeDef
|
||||
} else {
|
||||
// Disable autosize when font size (>0) explicitly specified.
|
||||
autosize = false
|
||||
}
|
||||
// Skip over (set fontsize in code below).
|
||||
continue
|
||||
}
|
||||
cc.AddOperand(*op)
|
||||
}
|
||||
|
||||
// If fontname not set need to make a new font or use one defined in the resources.
|
||||
// e.g. Helv commonly used for Helvetica.
|
||||
if fontname == nil || dr == nil {
|
||||
// Font not set, revert to Helvetica with name "Helv".
|
||||
fontname = core.MakeName("Helv")
|
||||
helv, err := model.NewStandard14Font("Helvetica")
|
||||
// Process DA operands.
|
||||
apFont, hasTf, err := style.processDA(ftxt.PdfField, daOps, dr, resources, cc)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
font = helv
|
||||
resources.SetFontByName(*fontname, helv.ToPdfObject())
|
||||
cc.Add_Tf(*fontname, fontsizeDef)
|
||||
} else {
|
||||
fontobj, has := dr.GetFontByName(*fontname)
|
||||
if !has {
|
||||
return nil, errors.New("font not in DR")
|
||||
}
|
||||
font, err = model.NewPdfFontFromPdfObject(fontobj)
|
||||
if err != nil {
|
||||
common.Log.Debug("ERROR loading default appearance font: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
resources.SetFontByName(*fontname, fontobj)
|
||||
|
||||
font := apFont.Font
|
||||
fontname := core.MakeName(apFont.Name)
|
||||
fontsize := apFont.Size
|
||||
autosize := fontsize == 0
|
||||
if autosize && hasTf {
|
||||
fontsize = height * style.AutoFontSizeFraction
|
||||
}
|
||||
|
||||
encoder := font.Encoder()
|
||||
if encoder == nil {
|
||||
common.Log.Debug("ERROR - Encoder is nil - can expect bad results")
|
||||
common.Log.Debug("WARN: font encoder is nil. Assuming identity encoder. Output may be incorrect.")
|
||||
encoder = textencoding.NewIdentityTextEncoder("Identity-H")
|
||||
}
|
||||
|
||||
var text string
|
||||
@ -711,26 +692,19 @@ func genFieldTextCombAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFi
|
||||
// genFieldCheckboxAppearance generates an appearance dictionary for a widget annotation `wa` referenced by
|
||||
// a button field `fbtn` with form resources `dr` (DR).
|
||||
func genFieldCheckboxAppearance(wa *model.PdfAnnotationWidget, fbtn *model.PdfFieldButton, dr *model.PdfPageResources, style AppearanceStyle) (*core.PdfObjectDictionary, error) {
|
||||
// TODO(dennwc): unused parameters
|
||||
|
||||
// Get bounding Rect.
|
||||
array, ok := core.GetArray(wa.Rect)
|
||||
if !ok {
|
||||
return nil, errors.New("invalid Rect")
|
||||
}
|
||||
rect, err := array.ToFloat64Array()
|
||||
rect, err := model.NewPdfRectangle(*array)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(rect) != 4 {
|
||||
return nil, errors.New("len(Rect) != 4")
|
||||
}
|
||||
width, height := rect.Width(), rect.Height()
|
||||
|
||||
common.Log.Debug("Checkbox, wa BS: %v", wa.BS)
|
||||
|
||||
width := rect[2] - rect[0]
|
||||
height := rect[3] - rect[1]
|
||||
|
||||
zapfdb, err := model.NewStandard14Font("ZapfDingbats")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -825,26 +799,16 @@ func genFieldComboboxAppearance(form *model.PdfAcroForm, wa *model.PdfAnnotation
|
||||
if !ok {
|
||||
return nil, errors.New("invalid Rect")
|
||||
}
|
||||
rect, err := array.ToFloat64Array()
|
||||
rect, err := model.NewPdfRectangle(*array)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(rect) != 4 {
|
||||
return nil, errors.New("len(Rect) != 4")
|
||||
}
|
||||
width, height := rect.Width(), rect.Height()
|
||||
|
||||
common.Log.Debug("Choice, wa BS: %v", wa.BS)
|
||||
|
||||
width := rect[2] - rect[0]
|
||||
height := rect[3] - rect[1]
|
||||
|
||||
// Get and process the default appearance string (DA) operands.
|
||||
da := core.MakeString("")
|
||||
if form.DA != nil {
|
||||
da, _ = core.GetString(form.DA)
|
||||
}
|
||||
csp := contentstream.NewContentStreamParser(da.String())
|
||||
daOps, err := csp.Parse()
|
||||
daOps, err := contentstream.NewContentStreamParser(getDA(fch.PdfField)).Parse()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -857,22 +821,25 @@ func genFieldComboboxAppearance(form *model.PdfAcroForm, wa *model.PdfAnnotation
|
||||
}
|
||||
}
|
||||
|
||||
// See section 12.7.4.4 "Choice Fields" (pp. 444-446 PDF32000_2008).
|
||||
dchoiceapp := core.MakeDict()
|
||||
for _, optObj := range fch.Opt.Elements() {
|
||||
if optArr, ok := core.GetArray(optObj); ok && optArr.Len() == 2 {
|
||||
optObj = optArr.Get(1)
|
||||
}
|
||||
|
||||
var optstr string
|
||||
if opt, ok := core.GetString(optObj); ok {
|
||||
optstr = opt.String()
|
||||
} else {
|
||||
if opt, ok := core.GetName(optObj); ok {
|
||||
optstr = opt.Decoded()
|
||||
} else if opt, ok := core.GetName(optObj); ok {
|
||||
optstr = opt.String()
|
||||
} else {
|
||||
common.Log.Debug("ERROR: Opt not a name/string - %T", optObj)
|
||||
return nil, errors.New("not a name/string")
|
||||
}
|
||||
}
|
||||
|
||||
if len(optstr) > 0 {
|
||||
xform, err := makeComboboxTextXObjForm(width, height, optstr, style, daOps, form.DR)
|
||||
xform, err := makeComboboxTextXObjForm(fch.PdfField, width, height, optstr, style, daOps, form.DR)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -888,7 +855,9 @@ func genFieldComboboxAppearance(form *model.PdfAcroForm, wa *model.PdfAnnotation
|
||||
}
|
||||
|
||||
// Make a text-based XObj Form.
|
||||
func makeComboboxTextXObjForm(width, height float64, text string, style AppearanceStyle, daOps *contentstream.ContentStreamOperations, dr *model.PdfPageResources) (*model.XObjectForm, error) {
|
||||
func makeComboboxTextXObjForm(field *model.PdfField, width, height float64,
|
||||
text string, style AppearanceStyle, daOps *contentstream.ContentStreamOperations,
|
||||
dr *model.PdfPageResources) (*model.XObjectForm, error) {
|
||||
resources := model.NewPdfPageResources()
|
||||
|
||||
cc := contentstream.NewContentCreator()
|
||||
@ -906,63 +875,25 @@ func makeComboboxTextXObjForm(width, height float64, text string, style Appearan
|
||||
// Graphic state changes.
|
||||
cc.Add_BT()
|
||||
|
||||
// Add DA operands.
|
||||
var fontsize float64
|
||||
var fontname *core.PdfObjectName
|
||||
var font *model.PdfFont
|
||||
var err error
|
||||
autosize := true
|
||||
|
||||
fontsizeDef := height * style.AutoFontSizeFraction
|
||||
for _, op := range *daOps {
|
||||
// When Tf specified with font size is 0, it means we should set on our own based on the Rect (autosize).
|
||||
if op.Operand == "Tf" && len(op.Params) == 2 {
|
||||
if name, ok := core.GetName(op.Params[0]); ok {
|
||||
fontname = name
|
||||
}
|
||||
num, err := core.GetNumberAsFloat(op.Params[1])
|
||||
if err == nil {
|
||||
fontsize = num
|
||||
} else {
|
||||
common.Log.Debug("ERROR invalid font size: %v", op.Params[1])
|
||||
}
|
||||
if fontsize == 0 {
|
||||
// Use default if zero.
|
||||
fontsize = fontsizeDef
|
||||
} else {
|
||||
// Disable autosize when font size (>0) explicitly specified.
|
||||
autosize = false
|
||||
}
|
||||
// Skip over (set fontsize in code below).
|
||||
continue
|
||||
}
|
||||
cc.AddOperand(*op)
|
||||
}
|
||||
|
||||
// If fontname not set need to make a new font or use one defined in the resources.
|
||||
// e.g. Helv commonly used for Helvetica.
|
||||
if fontname == nil || dr == nil {
|
||||
// Font not set, revert to Helvetica with name "Helv".
|
||||
fontname = core.MakeName("Helv")
|
||||
helv, err := model.NewStandard14Font("Helvetica")
|
||||
// Process DA operands.
|
||||
apFont, hasTf, err := style.processDA(field, daOps, dr, resources, cc)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
font = helv
|
||||
resources.SetFontByName(*fontname, helv.ToPdfObject())
|
||||
} else {
|
||||
fontobj, has := dr.GetFontByName(*fontname)
|
||||
if !has {
|
||||
return nil, errors.New("font not in DR")
|
||||
}
|
||||
font, err = model.NewPdfFontFromPdfObject(fontobj)
|
||||
if err != nil {
|
||||
common.Log.Debug("ERROR loading default appearance font: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
resources.SetFontByName(*fontname, fontobj)
|
||||
|
||||
font := apFont.Font
|
||||
fontsize := apFont.Size
|
||||
fontname := core.MakeName(apFont.Name)
|
||||
autosize := fontsize == 0
|
||||
if autosize && hasTf {
|
||||
fontsize = height * style.AutoFontSizeFraction
|
||||
}
|
||||
|
||||
encoder := font.Encoder()
|
||||
if encoder == nil {
|
||||
common.Log.Debug("WARN: font encoder is nil. Assuming identity encoder. Output may be incorrect.")
|
||||
encoder = textencoding.NewIdentityTextEncoder("Identity-H")
|
||||
}
|
||||
|
||||
// If no text, no appearance needed.
|
||||
if len(text) == 0 {
|
||||
@ -1136,6 +1067,105 @@ func (style *AppearanceStyle) applyAppearanceCharacteristics(mkDict *core.PdfObj
|
||||
return nil
|
||||
}
|
||||
|
||||
// processDA adds the operands found in the field default appearance stream to
|
||||
// the provided content stream creator. It also provides a fallback font, based
|
||||
// on the configuration of the AppearanceStyle, if no valid font is specified
|
||||
// in the default appearance. The method returns the font to be used when
|
||||
// generating the appearance of the field and a boolean value specifying if
|
||||
// the DA stream contains any Tf operands.
|
||||
func (style *AppearanceStyle) processDA(field *model.PdfField,
|
||||
daOps *contentstream.ContentStreamOperations, dr, resources *model.PdfPageResources,
|
||||
cc *contentstream.ContentCreator) (*AppearanceFont, bool, error) {
|
||||
// Check for fallback fonts.
|
||||
var fallbackFont *AppearanceFont
|
||||
var forceReplace bool
|
||||
if style.Fonts != nil {
|
||||
// Use global fallback, if one is specified.
|
||||
if style.Fonts.Fallback != nil {
|
||||
fallbackFont = style.Fonts.Fallback
|
||||
}
|
||||
|
||||
// Use field fallback, if one is specified.
|
||||
if fieldFallbacks := style.Fonts.FieldFallbacks; fieldFallbacks != nil {
|
||||
if fbFont, ok := fieldFallbacks[field.PartialName()]; ok {
|
||||
fallbackFont = fbFont
|
||||
} else if fullName, err := field.FullName(); err == nil {
|
||||
if fbFont, ok := fieldFallbacks[fullName]; ok {
|
||||
fallbackFont = fbFont
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
forceReplace = style.Fonts.ForceReplace
|
||||
}
|
||||
|
||||
// Iterate over the DA operands and extract the font, if specified.
|
||||
var fontName string
|
||||
var fontSize float64
|
||||
var hasTf bool
|
||||
if daOps != nil {
|
||||
for _, op := range *daOps {
|
||||
if op.Operand == "Tf" && len(op.Params) == 2 {
|
||||
if name, ok := core.GetNameVal(op.Params[0]); ok {
|
||||
fontName = name
|
||||
}
|
||||
if size, err := core.GetNumberAsFloat(op.Params[1]); err == nil {
|
||||
fontSize = size
|
||||
}
|
||||
hasTf = true
|
||||
continue
|
||||
}
|
||||
cc.AddOperand(*op)
|
||||
}
|
||||
}
|
||||
|
||||
var apFont *AppearanceFont
|
||||
var apFontObj core.PdfObject
|
||||
if forceReplace && fallbackFont != nil {
|
||||
apFont = fallbackFont
|
||||
} else {
|
||||
// Check if font name was found in the DA stream and search it in the resources.
|
||||
if dr != nil && fontName != "" {
|
||||
if obj, ok := dr.GetFontByName(*core.MakeName(fontName)); ok {
|
||||
if font, err := model.NewPdfFontFromPdfObject(obj); err == nil {
|
||||
apFontObj = obj
|
||||
apFont = &AppearanceFont{Name: fontName, Font: font, Size: fontSize}
|
||||
} else {
|
||||
common.Log.Debug("ERROR: could not load appearance font: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Use fallback font, if one was specified.
|
||||
if apFont == nil && fallbackFont != nil {
|
||||
apFont = fallbackFont
|
||||
}
|
||||
|
||||
// Use default fallback font (Helvetica).
|
||||
if apFont == nil {
|
||||
font, err := model.NewStandard14Font("Helvetica")
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
apFont = &AppearanceFont{Name: "Helv", Font: font, Size: fontSize}
|
||||
}
|
||||
}
|
||||
|
||||
// Add appearance font to the form resources (DR).
|
||||
apFontName := *core.MakeName(apFont.Name)
|
||||
if apFontObj == nil {
|
||||
apFontObj = apFont.Font.ToPdfObject()
|
||||
}
|
||||
if dr != nil && !dr.HasFontByName(apFontName) {
|
||||
dr.SetFontByName(apFontName, apFontObj)
|
||||
}
|
||||
if resources != nil && !resources.HasFontByName(apFontName) {
|
||||
resources.SetFontByName(apFontName, apFontObj)
|
||||
}
|
||||
|
||||
return apFont, hasTf, nil
|
||||
}
|
||||
|
||||
// WrapContentStream ensures that the entire content stream for a `page` is wrapped within q ... Q operands.
|
||||
// Ensures that following operands that are added are not affected by additional operands that are added.
|
||||
// Implements interface model.ContentStreamWrapper.
|
||||
|
@ -221,7 +221,7 @@ func (l WriterLogger) logToWriter(f io.Writer, prefix string, format string, arg
|
||||
}
|
||||
|
||||
func logToWriter(f io.Writer, prefix string, format string, args ...interface{}) {
|
||||
_, file, line, ok := runtime.Caller(2)
|
||||
_, file, line, ok := runtime.Caller(3)
|
||||
if !ok {
|
||||
file = "???"
|
||||
line = 0
|
||||
|
@ -11,12 +11,12 @@ import (
|
||||
)
|
||||
|
||||
const releaseYear = 2020
|
||||
const releaseMonth = 5
|
||||
const releaseDay = 25
|
||||
const releaseHour = 23
|
||||
const releaseMin = 35
|
||||
const releaseMonth = 6
|
||||
const releaseDay = 15
|
||||
const releaseHour = 20
|
||||
const releaseMin = 15
|
||||
|
||||
// Version holds version information, when bumping this make sure to bump the released at stamp also.
|
||||
const Version = "3.7.1"
|
||||
const Version = "3.8.0"
|
||||
|
||||
var ReleasedAt = time.Date(releaseYear, releaseMonth, releaseDay, releaseHour, releaseMin, 0, 0, time.UTC)
|
||||
|
@ -702,7 +702,7 @@ func (to *textObject) reset() {
|
||||
func (to *textObject) renderText(data []byte) error {
|
||||
font := to.getCurrentFont()
|
||||
charcodes := font.BytesToCharcodes(data)
|
||||
runes, numChars, numMisses := font.CharcodesToUnicodeWithStats(charcodes)
|
||||
texts, numChars, numMisses := font.CharcodesToStrings(charcodes)
|
||||
if numMisses > 0 {
|
||||
common.Log.Debug("renderText: numChars=%d numMisses=%d", numChars, numMisses)
|
||||
}
|
||||
@ -721,18 +721,18 @@ func (to *textObject) renderText(data []byte) error {
|
||||
spaceMetrics, _ = model.DefaultFont().GetRuneMetrics(' ')
|
||||
}
|
||||
spaceWidth := spaceMetrics.Wx * glyphTextRatio
|
||||
common.Log.Trace("spaceWidth=%.2f text=%q font=%s fontSize=%.1f", spaceWidth, runes, font, tfs)
|
||||
common.Log.Trace("spaceWidth=%.2f text=%q font=%s fontSize=%.2f", spaceWidth, texts, font, tfs)
|
||||
|
||||
stateMatrix := transform.NewMatrix(
|
||||
tfs*th, 0,
|
||||
0, tfs,
|
||||
0, state.trise)
|
||||
|
||||
common.Log.Trace("renderText: %d codes=%+v runes=%q", len(charcodes), charcodes, runes)
|
||||
common.Log.Trace("renderText: %d codes=%+v runes=%q", len(charcodes), charcodes, len(texts))
|
||||
|
||||
for i, r := range runes {
|
||||
// TODO(peterwilliams97): Need to find and fix cases where this happens.
|
||||
if r == '\x00' {
|
||||
for i, text := range texts {
|
||||
r := []rune(text)
|
||||
if len(r) == 1 && r[0] == '\x00' {
|
||||
continue
|
||||
}
|
||||
|
||||
@ -746,14 +746,14 @@ func (to *textObject) renderText(data []byte) error {
|
||||
|
||||
// w is the unscaled movement at the end of a word.
|
||||
w := 0.0
|
||||
if r == ' ' {
|
||||
if string(r) == " " {
|
||||
w = state.tw
|
||||
}
|
||||
|
||||
m, ok := font.GetCharMetrics(code)
|
||||
if !ok {
|
||||
common.Log.Debug("ERROR: No metric for code=%d r=0x%04x=%+q %s", code, r, r, font)
|
||||
return errors.New("no char metrics")
|
||||
return fmt.Errorf("no char metrics: font=%s code=%d", font.String(), code)
|
||||
}
|
||||
|
||||
// c is the character size in unscaled text units.
|
||||
@ -774,7 +774,7 @@ func (to *textObject) renderText(data []byte) error {
|
||||
common.Log.Trace("m=%s c=%+v t0=%+v td0=%s trm0=%s", m, c, t0, td0, td0.Mult(to.tm).Mult(to.gs.CTM))
|
||||
|
||||
mark := to.newTextMark(
|
||||
string(r),
|
||||
text,
|
||||
trm,
|
||||
translation(to.gs.CTM.Mult(to.tm).Mult(td0)),
|
||||
math.Abs(spaceWidth*trm.ScalingFactorX()),
|
||||
|
@ -314,6 +314,11 @@ var fileExtractionTests = []struct {
|
||||
`The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",`},
|
||||
},
|
||||
},
|
||||
{filename: "Saudi.pdf",
|
||||
pageTerms: map[int][]string{
|
||||
10: []string{"الله"},
|
||||
},
|
||||
},
|
||||
// TODO(peterwilliams97): Reinstate these 2 tests when diacritic combination is fixed.
|
||||
// {filename: "Ito_Formula.pdf",
|
||||
// pageTerms: map[int][]string{
|
||||
|
@ -148,6 +148,7 @@ func TestJSONExtractAndFill(t *testing.T) {
|
||||
fieldDataExp, err := LoadFromJSONFile("./testdata/advancedform.json")
|
||||
require.NoError(t, err)
|
||||
jsonDataExp, err := fieldDataExp.JSON()
|
||||
require.NoError(t, err)
|
||||
|
||||
// Check templates for equality.
|
||||
require.Equal(t, jsonDataExp, jsonData)
|
||||
@ -184,6 +185,7 @@ func TestJSONExtractAndFill(t *testing.T) {
|
||||
fieldDataExp, err = LoadFromJSON(bytes.NewReader(jsonBytes))
|
||||
require.NoError(t, err)
|
||||
jsonDataExp, err = fieldDataExp.JSON()
|
||||
require.NoError(t, err)
|
||||
|
||||
// Fill test PDF form fields and write to buffer.
|
||||
f, err := os.Open(inputFilePath)
|
||||
@ -212,6 +214,47 @@ func TestJSONExtractAndFill(t *testing.T) {
|
||||
fieldData, err = LoadFromPDF(bytes.NewReader(buf.Bytes()))
|
||||
require.NoError(t, err)
|
||||
jsonData, err = fieldData.JSON()
|
||||
require.NoError(t, err)
|
||||
|
||||
// Check field data for equality.
|
||||
require.Equal(t, jsonDataExp, jsonData)
|
||||
}
|
||||
|
||||
func TestJSONFillAndExtract(t *testing.T) {
|
||||
// Read JSON fill data.
|
||||
fieldDataExp, err := LoadFromJSONFile("./testdata/mixedfields.json")
|
||||
require.NoError(t, err)
|
||||
jsonDataExp, err := fieldDataExp.JSON()
|
||||
require.NoError(t, err)
|
||||
|
||||
// Fill test PDF form fields and write to buffer.
|
||||
f, err := os.Open("./testdata/mixedfields.pdf")
|
||||
require.NoError(t, err)
|
||||
defer f.Close()
|
||||
|
||||
reader, err := model.NewPdfReader(f)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = reader.AcroForm.Fill(fieldDataExp)
|
||||
require.NoError(t, err)
|
||||
|
||||
var buf bytes.Buffer
|
||||
writer := model.NewPdfWriter()
|
||||
for i := range reader.PageList {
|
||||
err := writer.AddPage(reader.PageList[i])
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
err = writer.SetForms(reader.AcroForm)
|
||||
require.NoError(t, err)
|
||||
err = writer.Write(&buf)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Load field data from buffer.
|
||||
fieldData, err := LoadFromPDF(bytes.NewReader(buf.Bytes()))
|
||||
require.NoError(t, err)
|
||||
jsonData, err := fieldData.JSON()
|
||||
require.NoError(t, err)
|
||||
|
||||
// Check field data for equality.
|
||||
require.Equal(t, jsonDataExp, jsonData)
|
||||
|
94
fjson/testdata/mixedfields.json
vendored
Normal file
94
fjson/testdata/mixedfields.json
vendored
Normal file
@ -0,0 +1,94 @@
|
||||
[
|
||||
{
|
||||
"name": "Given Name Text Box",
|
||||
"value": "Jane"
|
||||
},
|
||||
{
|
||||
"name": "Family Name Text Box",
|
||||
"value": "Doe"
|
||||
},
|
||||
{
|
||||
"name": "House nr Text Box",
|
||||
"value": "100"
|
||||
},
|
||||
{
|
||||
"name": "Address 2 Text Box",
|
||||
"value": "Generic Avenue"
|
||||
},
|
||||
{
|
||||
"name": "Postcode Text Box",
|
||||
"value": "11122"
|
||||
},
|
||||
{
|
||||
"name": "Country Combo Box",
|
||||
"value": "France"
|
||||
},
|
||||
{
|
||||
"name": "Height Formatted Field",
|
||||
"value": "175"
|
||||
},
|
||||
{
|
||||
"name": "City Text Box",
|
||||
"value": "Paris"
|
||||
},
|
||||
{
|
||||
"name": "Driving License Check Box",
|
||||
"value": "Yes",
|
||||
"options": [
|
||||
"Yes",
|
||||
"Off"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Favourite Colour List Box",
|
||||
"value": "Yellow"
|
||||
},
|
||||
{
|
||||
"name": "Language 1 Check Box",
|
||||
"value": "Yes",
|
||||
"options": [
|
||||
"Yes",
|
||||
"Off"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Language 2 Check Box",
|
||||
"value": "Off",
|
||||
"options": [
|
||||
"Yes",
|
||||
"Off"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Language 3 Check Box",
|
||||
"value": "Yes",
|
||||
"options": [
|
||||
"Yes",
|
||||
"Off"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Language 4 Check Box",
|
||||
"value": "Off",
|
||||
"options": [
|
||||
"Yes",
|
||||
"Off"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Language 5 Check Box",
|
||||
"value": "Yes",
|
||||
"options": [
|
||||
"Yes",
|
||||
"Off"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Gender List Box",
|
||||
"value": "Woman"
|
||||
},
|
||||
{
|
||||
"name": "Address 1 Text Box",
|
||||
"value": "Generic Street"
|
||||
}
|
||||
]
|
BIN
fjson/testdata/mixedfields.pdf
vendored
Normal file
BIN
fjson/testdata/mixedfields.pdf
vendored
Normal file
Binary file not shown.
@ -21,6 +21,9 @@ const (
|
||||
|
||||
// MissingCodeRune replaces runes that can't be decoded. '\ufffd' = <20>. Was '?'.
|
||||
MissingCodeRune = '\ufffd' // <20>
|
||||
|
||||
// MissingCodeString replaces strings that can't be decoded.
|
||||
MissingCodeString = string(MissingCodeRune)
|
||||
)
|
||||
|
||||
// CharCode is a character code or Unicode
|
||||
@ -41,7 +44,7 @@ type charRange struct {
|
||||
type fbRange struct {
|
||||
code0 CharCode
|
||||
code1 CharCode
|
||||
r0 rune
|
||||
r0 string
|
||||
}
|
||||
|
||||
// CIDSystemInfo contains information for identifying the character collection
|
||||
@ -106,8 +109,8 @@ type CMap struct {
|
||||
cidToCode map[CharCode]CharCode // CID -> charcode
|
||||
|
||||
// Used by ctype 2 CMaps.
|
||||
codeToUnicode map[CharCode]rune // CID -> Unicode
|
||||
unicodeToCode map[rune]CharCode // Unicode -> CID
|
||||
codeToUnicode map[CharCode]string // CID -> Unicode string
|
||||
unicodeToCode map[string]CharCode // Unicode rune -> CID
|
||||
|
||||
// cached contains the raw CMap data. It is used by the Bytes method in
|
||||
// order to avoid generating the data for every call.
|
||||
@ -116,8 +119,13 @@ type CMap struct {
|
||||
cached []byte
|
||||
}
|
||||
|
||||
// NewToUnicodeCMap returns an identity CMap with codeToUnicode matching the `codeToUnicode` arg.
|
||||
func NewToUnicodeCMap(codeToUnicode map[CharCode]rune) *CMap {
|
||||
// NewToUnicodeCMap returns an identity CMap with codeToUnicode matching the `codeToRune` arg.
|
||||
func NewToUnicodeCMap(codeToRune map[CharCode]rune) *CMap {
|
||||
codeToUnicode := make(map[CharCode]string, len(codeToRune))
|
||||
for code, r := range codeToRune {
|
||||
codeToUnicode[code] = string(r)
|
||||
}
|
||||
|
||||
cmap := &CMap{
|
||||
name: "Adobe-Identity-UCS",
|
||||
ctype: 2,
|
||||
@ -128,13 +136,14 @@ func NewToUnicodeCMap(codeToUnicode map[CharCode]rune) *CMap {
|
||||
Supplement: 0,
|
||||
},
|
||||
codespaces: []Codespace{{Low: 0, High: 0xffff}},
|
||||
codeToCID: make(map[CharCode]CharCode),
|
||||
cidToCode: make(map[CharCode]CharCode),
|
||||
codeToUnicode: codeToUnicode,
|
||||
unicodeToCode: make(map[rune]CharCode),
|
||||
unicodeToCode: make(map[string]CharCode, len(codeToRune)),
|
||||
codeToCID: make(map[CharCode]CharCode, len(codeToRune)),
|
||||
cidToCode: make(map[CharCode]CharCode, len(codeToRune)),
|
||||
}
|
||||
|
||||
cmap.computeInverseMappings()
|
||||
|
||||
return cmap
|
||||
}
|
||||
|
||||
@ -148,8 +157,8 @@ func newCMap(isSimple bool) *CMap {
|
||||
nbits: nbits,
|
||||
codeToCID: make(map[CharCode]CharCode),
|
||||
cidToCode: make(map[CharCode]CharCode),
|
||||
codeToUnicode: make(map[CharCode]rune),
|
||||
unicodeToCode: make(map[rune]CharCode),
|
||||
codeToUnicode: make(map[CharCode]string),
|
||||
unicodeToCode: make(map[string]CharCode),
|
||||
}
|
||||
}
|
||||
|
||||
@ -254,9 +263,9 @@ func (cmap *CMap) computeInverseMappings() {
|
||||
}
|
||||
|
||||
// Generate Unicode -> CID map.
|
||||
for cid, r := range cmap.codeToUnicode {
|
||||
if c, ok := cmap.unicodeToCode[r]; !ok || (ok && c > cid) {
|
||||
cmap.unicodeToCode[r] = cid
|
||||
for cid, s := range cmap.codeToUnicode {
|
||||
if c, ok := cmap.unicodeToCode[s]; !ok || (ok && c > cid) {
|
||||
cmap.unicodeToCode[s] = cid
|
||||
}
|
||||
}
|
||||
|
||||
@ -277,19 +286,18 @@ func (cmap *CMap) CharcodeBytesToUnicode(data []byte) (string, int) {
|
||||
return "", 0
|
||||
}
|
||||
|
||||
var (
|
||||
parts []rune
|
||||
missing []CharCode
|
||||
)
|
||||
for _, code := range charcodes {
|
||||
parts := make([]string, len(charcodes))
|
||||
var missing []CharCode
|
||||
for i, code := range charcodes {
|
||||
s, ok := cmap.codeToUnicode[code]
|
||||
if !ok {
|
||||
missing = append(missing, code)
|
||||
s = MissingCodeRune
|
||||
s = MissingCodeString
|
||||
}
|
||||
parts = append(parts, s)
|
||||
parts[i] = s
|
||||
}
|
||||
unicode := string(parts)
|
||||
unicode := strings.Join(parts, "")
|
||||
|
||||
if len(missing) > 0 {
|
||||
common.Log.Debug("ERROR: CharcodeBytesToUnicode. Not in map.\n"+
|
||||
"\tdata=[% 02x]=%#q\n"+
|
||||
@ -305,17 +313,17 @@ func (cmap *CMap) CharcodeBytesToUnicode(data []byte) (string, int) {
|
||||
// CharcodeToUnicode converts a single character code `code` to a unicode string.
|
||||
// If `code` is not in the unicode map, '<27>' is returned.
|
||||
// NOTE: CharcodeBytesToUnicode is typically more efficient.
|
||||
func (cmap *CMap) CharcodeToUnicode(code CharCode) (rune, bool) {
|
||||
func (cmap *CMap) CharcodeToUnicode(code CharCode) (string, bool) {
|
||||
if s, ok := cmap.codeToUnicode[code]; ok {
|
||||
return s, true
|
||||
}
|
||||
return MissingCodeRune, false
|
||||
return MissingCodeString, false
|
||||
}
|
||||
|
||||
// RuneToCID maps the specified rune to a character identifier. If the provided
|
||||
// rune has no available mapping, the second return value is false.
|
||||
func (cmap *CMap) RuneToCID(r rune) (CharCode, bool) {
|
||||
cid, ok := cmap.unicodeToCode[r]
|
||||
// StringToCID maps the specified string to a character identifier. If the provided
|
||||
// string has no available mapping, the bool return value is false.
|
||||
func (cmap *CMap) StringToCID(s string) (CharCode, bool) {
|
||||
cid, ok := cmap.unicodeToCode[s]
|
||||
return cid, ok
|
||||
}
|
||||
|
||||
@ -453,7 +461,7 @@ func (cmap *CMap) toBfData() string {
|
||||
}
|
||||
|
||||
// codes is a sorted list of the codeToUnicode keys.
|
||||
var codes []CharCode
|
||||
codes := make([]CharCode, 0, len(cmap.codeToUnicode))
|
||||
for code := range cmap.codeToUnicode {
|
||||
codes = append(codes, code)
|
||||
}
|
||||
@ -473,7 +481,7 @@ func (cmap *CMap) toBfData() string {
|
||||
prevRune := cmap.codeToUnicode[codes[0]]
|
||||
for _, c := range codes[1:] {
|
||||
currRune := cmap.codeToUnicode[c]
|
||||
if c == currCharRange.code1+1 && currRune == prevRune+1 {
|
||||
if c == currCharRange.code1+1 && lastRune(currRune) == lastRune(prevRune)+1 {
|
||||
currCharRange.code1 = c
|
||||
} else {
|
||||
charRanges = append(charRanges, currCharRange)
|
||||
@ -508,8 +516,8 @@ func (cmap *CMap) toBfData() string {
|
||||
lines = append(lines, fmt.Sprintf("%d beginbfchar", n))
|
||||
for j := 0; j < n; j++ {
|
||||
code := fbChars[i*maxBfEntries+j]
|
||||
r := cmap.codeToUnicode[code]
|
||||
lines = append(lines, fmt.Sprintf("<%04x> <%04x>", code, r))
|
||||
s := cmap.codeToUnicode[code]
|
||||
lines = append(lines, fmt.Sprintf("<%04x> %s", code, hexCode(s)))
|
||||
}
|
||||
lines = append(lines, "endbfchar")
|
||||
}
|
||||
@ -521,8 +529,8 @@ func (cmap *CMap) toBfData() string {
|
||||
lines = append(lines, fmt.Sprintf("%d beginbfrange", n))
|
||||
for j := 0; j < n; j++ {
|
||||
rng := fbRanges[i*maxBfEntries+j]
|
||||
r := rng.r0
|
||||
lines = append(lines, fmt.Sprintf("<%04x><%04x> <%04x>", rng.code0, rng.code1, r))
|
||||
lines = append(lines, fmt.Sprintf("<%04x><%04x> %s",
|
||||
rng.code0, rng.code1, hexCode(rng.r0)))
|
||||
}
|
||||
lines = append(lines, "endbfrange")
|
||||
}
|
||||
@ -530,6 +538,22 @@ func (cmap *CMap) toBfData() string {
|
||||
return strings.Join(lines, "\n")
|
||||
}
|
||||
|
||||
// lastRune returns the last rune in `s`.
|
||||
func lastRune(s string) rune {
|
||||
runes := []rune(s)
|
||||
return runes[len(runes)-1]
|
||||
}
|
||||
|
||||
// hexCode return the CMap hex code for `s`.
|
||||
func hexCode(s string) string {
|
||||
runes := []rune(s)
|
||||
codes := make([]string, len(runes))
|
||||
for i, r := range runes {
|
||||
codes[i] = fmt.Sprintf("%04x", r)
|
||||
}
|
||||
return fmt.Sprintf("<%s>", strings.Join(codes, ""))
|
||||
}
|
||||
|
||||
const (
|
||||
maxBfEntries = 100 // Maximum number of entries in a bfchar or bfrange section.
|
||||
cmapHeader = `
|
||||
|
@ -105,7 +105,8 @@ func (cmap *CMap) parse() error {
|
||||
func (cmap *CMap) parseName() error {
|
||||
name := ""
|
||||
done := false
|
||||
for i := 0; i < 10 && !done; i++ {
|
||||
// NOTE(peterwilliams97): We need up to 20 iterations of this loop for some PDFs I have seen.
|
||||
for i := 0; i < 20 && !done; i++ {
|
||||
o, err := cmap.parseObject()
|
||||
if err != nil {
|
||||
return err
|
||||
@ -141,7 +142,6 @@ func (cmap *CMap) parseName() error {
|
||||
// parseType parses a cmap type and adds it to `cmap`.
|
||||
// cmap names are defined like this: /CMapType 1 def
|
||||
func (cmap *CMap) parseType() error {
|
||||
|
||||
ctype := 0
|
||||
done := false
|
||||
for i := 0; i < 3 && !done; i++ {
|
||||
@ -171,7 +171,6 @@ func (cmap *CMap) parseType() error {
|
||||
// We don't need the version. We do this to eat up the version code in the cmap definition
|
||||
// to reduce unhandled parse object warnings.
|
||||
func (cmap *CMap) parseVersion() error {
|
||||
|
||||
version := ""
|
||||
done := false
|
||||
for i := 0; i < 3 && !done; i++ {
|
||||
@ -471,7 +470,7 @@ func (cmap *CMap) parseBfchar() error {
|
||||
}
|
||||
return err
|
||||
}
|
||||
var target rune
|
||||
var target []rune
|
||||
switch v := o.(type) {
|
||||
case cmapOperand:
|
||||
if v.Operand == endbfchar {
|
||||
@ -480,16 +479,16 @@ func (cmap *CMap) parseBfchar() error {
|
||||
common.Log.Debug("ERROR: Unexpected operand. %#v", v)
|
||||
return ErrBadCMap
|
||||
case cmapHexString:
|
||||
target = hexToRune(v)
|
||||
target = hexToRunes(v)
|
||||
case cmapName:
|
||||
common.Log.Debug("ERROR: Unexpected name. %#v", v)
|
||||
target = MissingCodeRune
|
||||
target = []rune{MissingCodeRune}
|
||||
default:
|
||||
common.Log.Debug("ERROR: Unexpected type. %#v", o)
|
||||
return ErrBadCMap
|
||||
}
|
||||
|
||||
cmap.codeToUnicode[code] = target
|
||||
cmap.codeToUnicode[code] = string(target)
|
||||
}
|
||||
|
||||
return nil
|
||||
@ -563,16 +562,17 @@ func (cmap *CMap) parseBfrange() error {
|
||||
if !ok {
|
||||
return errors.New("non-hex string in array")
|
||||
}
|
||||
r := hexToRune(hexs)
|
||||
cmap.codeToUnicode[code] = r
|
||||
runes := hexToRunes(hexs)
|
||||
cmap.codeToUnicode[code] = string(runes)
|
||||
}
|
||||
|
||||
case cmapHexString:
|
||||
// <codeFrom> <codeTo> <dst>, maps [from,to] to [dst,dst+to-from].
|
||||
r := hexToRune(v)
|
||||
runes := hexToRunes(v)
|
||||
n := len(runes)
|
||||
for code := srcCodeFrom; code <= srcCodeTo; code++ {
|
||||
cmap.codeToUnicode[code] = r
|
||||
r++
|
||||
cmap.codeToUnicode[code] = string(runes)
|
||||
runes[n-1]++
|
||||
}
|
||||
default:
|
||||
common.Log.Debug("ERROR: Unexpected type %T", o)
|
||||
|
@ -104,14 +104,14 @@ func TestCMapParser1(t *testing.T) {
|
||||
}
|
||||
|
||||
for k, expected := range expectedMappings {
|
||||
if v, ok := cmap.CharcodeToUnicode(k); !ok || v != expected {
|
||||
if v, ok := cmap.CharcodeToUnicode(k); !ok || v != string(expected) {
|
||||
t.Errorf("incorrect mapping, expecting 0x%X ➞ 0x%X (%#v)", k, expected, v)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
v, _ := cmap.CharcodeToUnicode(0x99)
|
||||
if v != MissingCodeRune { //!= "notdef" {
|
||||
if v != MissingCodeString { //!= "notdef" {
|
||||
t.Errorf("Unmapped code, expected to map to undefined")
|
||||
return
|
||||
}
|
||||
@ -188,7 +188,7 @@ func TestCMapParser2(t *testing.T) {
|
||||
}
|
||||
|
||||
for k, expected := range expectedMappings {
|
||||
if v, ok := cmap.CharcodeToUnicode(k); !ok || v != expected {
|
||||
if v, ok := cmap.CharcodeToUnicode(k); !ok || v != string(expected) {
|
||||
t.Errorf("incorrect mapping, expecting 0x%X ➞ 0x%X (got 0x%X)", k, expected, v)
|
||||
return
|
||||
}
|
||||
@ -297,7 +297,7 @@ func TestCMapParser3(t *testing.T) {
|
||||
0xd140: 0xa000,
|
||||
}
|
||||
for k, expected := range expectedMappings {
|
||||
if v, ok := cmap.CharcodeToUnicode(k); !ok || v != expected {
|
||||
if v, ok := cmap.CharcodeToUnicode(k); !ok || v != string(expected) {
|
||||
t.Errorf("incorrect mapping: expecting 0x%02X ➞ 0x%02X (got 0x%02X)", k, expected, v)
|
||||
return
|
||||
}
|
||||
@ -407,7 +407,7 @@ func TestCMapParser4(t *testing.T) {
|
||||
}
|
||||
|
||||
for k, expected := range expectedMappings {
|
||||
if v, ok := cmap.CharcodeToUnicode(k); !ok || v != expected {
|
||||
if v, ok := cmap.CharcodeToUnicode(k); !ok || v != string(expected) {
|
||||
t.Errorf("incorrect mapping, expecting 0x%04X ➞ %+q (got %+q)", k, expected, v)
|
||||
return
|
||||
}
|
||||
@ -520,6 +520,7 @@ var (
|
||||
0x017b: 'Ż',
|
||||
0x017d: 'Ž',
|
||||
}
|
||||
|
||||
codeToUnicode3 = map[CharCode]rune{ // 93 entries
|
||||
0x0124: 'Ĥ',
|
||||
0x0125: 'ĥ',
|
||||
@ -695,7 +696,7 @@ func checkCmapWriteRead(t *testing.T, codeToUnicode map[CharCode]rune) {
|
||||
}
|
||||
u0 := codeToUnicode[code]
|
||||
u := cmap.codeToUnicode[code]
|
||||
if u != u0 {
|
||||
if u != string(u0) {
|
||||
t.Errorf("Unicode mismatch: i=%d code0=0x%04x expected=%q test=%q", i, code, u0, u)
|
||||
return
|
||||
}
|
||||
|
@ -48,8 +48,8 @@ func (enc CMapEncoder) Decode(raw []byte) string {
|
||||
if codes, ok := enc.codeToCID.BytesToCharcodes(raw); ok {
|
||||
var buf bytes.Buffer
|
||||
for _, code := range codes {
|
||||
r, _ := enc.CharcodeToRune(CharCode(code))
|
||||
buf.WriteRune(r)
|
||||
s, _ := enc.charcodeToString(CharCode(code))
|
||||
buf.WriteString(s)
|
||||
}
|
||||
|
||||
return buf.String()
|
||||
@ -67,7 +67,7 @@ func (enc CMapEncoder) RuneToCharcode(r rune) (CharCode, bool) {
|
||||
}
|
||||
|
||||
// Map rune to CID.
|
||||
cid, ok := enc.cidToUnicode.RuneToCID(r)
|
||||
cid, ok := enc.cidToUnicode.StringToCID(string(r))
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
@ -87,8 +87,13 @@ func (enc CMapEncoder) RuneToCharcode(r rune) (CharCode, bool) {
|
||||
// CharcodeToRune converts PDF character code `code` to a rune.
|
||||
// The bool return flag is true if there was a match, and false otherwise.
|
||||
func (enc CMapEncoder) CharcodeToRune(code CharCode) (rune, bool) {
|
||||
s, ok := enc.charcodeToString(code)
|
||||
return ([]rune(s))[0], ok
|
||||
}
|
||||
|
||||
func (enc CMapEncoder) charcodeToString(code CharCode) (string, bool) {
|
||||
if enc.cidToUnicode == nil {
|
||||
return MissingCodeRune, false
|
||||
return MissingCodeString, false
|
||||
}
|
||||
|
||||
// Map charcode to CID. If charcode to CID CMap is nil, assume Identity encoding.
|
||||
@ -96,7 +101,7 @@ func (enc CMapEncoder) CharcodeToRune(code CharCode) (rune, bool) {
|
||||
if enc.codeToCID != nil {
|
||||
var ok bool
|
||||
if cid, ok = enc.codeToCID.CharcodeToCID(cmap.CharCode(code)); !ok {
|
||||
return MissingCodeRune, false
|
||||
return MissingCodeString, false
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -18,7 +18,13 @@ import (
|
||||
)
|
||||
|
||||
// MissingCodeRune is the rune returned when there is no matching glyph. It was previously '?'.
|
||||
const MissingCodeRune = '\ufffd' // <20>
|
||||
const (
|
||||
// MissingCodeRune replaces runes that can't be decoded. .
|
||||
MissingCodeRune = '\ufffd' // <20>
|
||||
|
||||
// MissingCodeString replaces strings that can't be decoded.
|
||||
MissingCodeString = string(MissingCodeRune)
|
||||
)
|
||||
|
||||
// GlyphToRune returns the rune corresponding to glyph `glyph` if there is one.
|
||||
// TODO: Can we return a string here? e.g. When we are extracting text, we want to get "ffi"
|
||||
|
@ -85,10 +85,11 @@ func (r *PdfReader) FlattenFields(allannots bool, appgen FieldAppearanceGenerato
|
||||
var annots []*PdfAnnotation
|
||||
|
||||
// Wrap the content streams.
|
||||
err := appgen.WrapContentStream(page)
|
||||
if err != nil {
|
||||
if appgen != nil {
|
||||
if err := appgen.WrapContentStream(page); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
annotations, err := page.GetAnnotations()
|
||||
if err != nil {
|
||||
|
@ -420,16 +420,26 @@ func (font *PdfFont) BytesToCharcodes(data []byte) []textencoding.CharCode {
|
||||
return charcodes
|
||||
}
|
||||
|
||||
// CharcodesToUnicodeWithStats is identical to CharcodesToUnicode except returns more statistical
|
||||
// CharcodesToUnicodeWithStats is identical to CharcodesToUnicode except it returns more statistical
|
||||
// information about hits and misses from the reverse mapping process.
|
||||
// NOTE: The number of runes returned may be greater than the number of charcodes.
|
||||
// TODO(peterwilliams97): Deprecate in v4 and use only CharcodesToStrings()
|
||||
func (font *PdfFont) CharcodesToUnicodeWithStats(charcodes []textencoding.CharCode) (runelist []rune, numHits, numMisses int) {
|
||||
texts, numHits, numMisses := font.CharcodesToStrings(charcodes)
|
||||
return []rune(strings.Join(texts, "")), numHits, numMisses
|
||||
}
|
||||
|
||||
// CharcodesToStrings returns the unicode strings corresponding to `charcodes`.
|
||||
// The int returns are the number of strings and the number of unconvereted codes.
|
||||
// NOTE: The number of strings returned is equal to the number of charcodes
|
||||
func (font *PdfFont) CharcodesToStrings(charcodes []textencoding.CharCode) ([]string, int, int) {
|
||||
fontBase := font.baseFields()
|
||||
runes := make([]rune, 0, len(charcodes))
|
||||
numMisses = 0
|
||||
texts := make([]string, 0, len(charcodes))
|
||||
numMisses := 0
|
||||
for _, code := range charcodes {
|
||||
if fontBase.toUnicodeCmap != nil {
|
||||
if r, ok := fontBase.toUnicodeCmap.CharcodeToUnicode(cmap.CharCode(code)); ok {
|
||||
runes = append(runes, r)
|
||||
if s, ok := fontBase.toUnicodeCmap.CharcodeToUnicode(cmap.CharCode(code)); ok {
|
||||
texts = append(texts, s)
|
||||
continue
|
||||
}
|
||||
}
|
||||
@ -438,7 +448,7 @@ func (font *PdfFont) CharcodesToUnicodeWithStats(charcodes []textencoding.CharCo
|
||||
encoder := font.Encoder()
|
||||
if encoder != nil {
|
||||
if r, ok := encoder.CharcodeToRune(code); ok {
|
||||
runes = append(runes, r)
|
||||
texts = append(texts, string(r))
|
||||
continue
|
||||
}
|
||||
}
|
||||
@ -447,7 +457,7 @@ func (font *PdfFont) CharcodesToUnicodeWithStats(charcodes []textencoding.CharCo
|
||||
"\tfont=%s\n\tencoding=%s",
|
||||
code, charcodes, fontBase.isCIDFont(), font, encoder)
|
||||
numMisses++
|
||||
runes = append(runes, cmap.MissingCodeRune)
|
||||
texts = append(texts, cmap.MissingCodeString)
|
||||
}
|
||||
|
||||
if numMisses != 0 {
|
||||
@ -457,7 +467,7 @@ func (font *PdfFont) CharcodesToUnicodeWithStats(charcodes []textencoding.CharCo
|
||||
len(charcodes), numMisses, font)
|
||||
}
|
||||
|
||||
return runes, len(runes), numMisses
|
||||
return texts, len(texts), numMisses
|
||||
}
|
||||
|
||||
// CharcodeBytesToUnicode converts PDF character codes `data` to a Go unicode string.
|
||||
@ -487,8 +497,8 @@ func (font *PdfFont) CharcodeBytesToUnicode(data []byte) (string, int, int) {
|
||||
// 1) Use the ToUnicode CMap if there is one.
|
||||
// 2) Use the underlying font's encoding.
|
||||
func (font *PdfFont) CharcodesToUnicode(charcodes []textencoding.CharCode) []rune {
|
||||
strlist, _, _ := font.CharcodesToUnicodeWithStats(charcodes)
|
||||
return strlist
|
||||
runes, _, _ := font.CharcodesToUnicodeWithStats(charcodes)
|
||||
return runes
|
||||
}
|
||||
|
||||
// RunesToCharcodeBytes maps the provided runes to charcode bytes and it
|
||||
|
@ -317,26 +317,36 @@ func fillFieldValue(f *PdfField, val core.PdfObject) error {
|
||||
default:
|
||||
common.Log.Debug("ERROR: Unsupported text field V type: %T (%#v)", t, t)
|
||||
}
|
||||
case *PdfFieldButton, *PdfFieldChoice:
|
||||
switch t := val.(type) {
|
||||
case *PdfFieldButton:
|
||||
// See section 12.7.4.2.3 "Check Boxes" (pp. 440-441 PDF32000_2008).
|
||||
switch val.(type) {
|
||||
case *core.PdfObjectName:
|
||||
if len(t.String()) == 0 {
|
||||
return nil
|
||||
}
|
||||
for _, wa := range f.Annotations {
|
||||
wa.AS = val
|
||||
}
|
||||
if len(val.String()) > 0 {
|
||||
f.V = val
|
||||
setFieldAnnotAS(f, val)
|
||||
}
|
||||
case *core.PdfObjectString:
|
||||
if len(t.String()) == 0 {
|
||||
return nil
|
||||
if len(val.String()) > 0 {
|
||||
f.V = core.MakeName(val.String())
|
||||
setFieldAnnotAS(f, f.V)
|
||||
}
|
||||
common.Log.Debug("Unexpected string for button/choice field. Converting to name: '%s'", t.String())
|
||||
name := core.MakeName(t.String())
|
||||
for _, wa := range f.Annotations {
|
||||
wa.AS = name
|
||||
default:
|
||||
common.Log.Debug("ERROR: UNEXPECTED %s -> %v", f.PartialName(), val)
|
||||
f.V = val
|
||||
}
|
||||
case *PdfFieldChoice:
|
||||
// See section 12.7.4.4 "Choice Fields" (pp. 444-446 PDF32000_2008).
|
||||
switch val.(type) {
|
||||
case *core.PdfObjectName:
|
||||
if len(val.String()) > 0 {
|
||||
f.V = core.MakeString(val.String())
|
||||
setFieldAnnotAS(f, val)
|
||||
}
|
||||
case *core.PdfObjectString:
|
||||
if len(val.String()) > 0 {
|
||||
f.V = val
|
||||
setFieldAnnotAS(f, core.MakeName(val.String()))
|
||||
}
|
||||
f.V = name
|
||||
default:
|
||||
common.Log.Debug("ERROR: UNEXPECTED %s -> %v", f.PartialName(), val)
|
||||
f.V = val
|
||||
@ -347,3 +357,11 @@ func fillFieldValue(f *PdfField, val core.PdfObject) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// setFieldAnnotAS sets the appearance stream of the field annotations to `val`.
|
||||
func setFieldAnnotAS(f *PdfField, val core.PdfObject) {
|
||||
for _, wa := range f.Annotations {
|
||||
wa.AS = val
|
||||
wa.ToPdfObject()
|
||||
}
|
||||
}
|
||||
|
@ -275,7 +275,7 @@ func (r *PdfReader) loadOutlines() (*PdfOutlineTreeNode, error) {
|
||||
outlineRootObj := core.ResolveReference(outlinesObj)
|
||||
common.Log.Trace("Outline root: %v", outlineRootObj)
|
||||
|
||||
if _, isNull := outlineRootObj.(*core.PdfObjectNull); isNull {
|
||||
if isNull := core.IsNullObject(outlineRootObj); isNull {
|
||||
common.Log.Trace("Outline root is null - no outlines")
|
||||
return nil, nil
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user