Removed naked returns. Fixed godoc. Reorganized object extractors

This commit is contained in:
Peter Williams 2018-07-25 12:00:49 +10:00
parent e886846c6a
commit b1cf3494f7
15 changed files with 282 additions and 339 deletions

View File

@ -19,4 +19,5 @@ var (
ErrTypeError = errors.New("type check error")
ErrRangeError = errors.New("range range error")
ErrNotSupported = errors.New("feature not currently supported")
ErrNotANumber = errors.New("not a number")
)

View File

@ -355,26 +355,28 @@ func (array *PdfObjectArray) Append(objects ...PdfObject) {
}
}
// ToFloat64Array returns a slice of all elements in the array as a float64 slice. An error is returned if the array
// contains non-numeric objects (each element can be either PdfObjectInteger or PdfObjectFloat).
// ToFloat64Array returns a slice of all elements in the array as a float64 slice. An error is
// returned if the array contains non-numeric objects (each element can be either PdfObjectInteger
// or PdfObjectFloat).
func (array *PdfObjectArray) ToFloat64Array() ([]float64, error) {
vals := []float64{}
for _, obj := range array.Elements() {
if number, is := obj.(*PdfObjectInteger); is {
vals = append(vals, float64(*number))
} else if number, is := obj.(*PdfObjectFloat); is {
vals = append(vals, float64(*number))
} else {
return nil, fmt.Errorf("Type error")
switch t := obj.(type) {
case *PdfObjectInteger:
vals = append(vals, float64(*t))
case *PdfObjectFloat:
vals = append(vals, float64(*t))
default:
return nil, ErrTypeError
}
}
return vals, nil
}
// ToIntegerArray returns a slice of all array elements as an int slice. An error is returned if the array contains
// non-integer objects. Each element can only be PdfObjectInteger.
// ToIntegerArray returns a slice of all array elements as an int slice. An error is returned if the
// array non-integer objects. Each element can only be PdfObjectInteger.
func (array *PdfObjectArray) ToIntegerArray() ([]int, error) {
vals := []int{}
@ -389,6 +391,7 @@ func (array *PdfObjectArray) ToIntegerArray() ([]int, error) {
return vals, nil
}
// String returns a string describing `array`.
func (array *PdfObjectArray) String() string {
outStr := "["
for ind, o := range array.Elements() {
@ -417,19 +420,62 @@ func (array *PdfObjectArray) DefaultWriteString() string {
// GetNumberAsFloat returns the contents of `obj` as a float if it is an integer or float, or an
// error if it isn't.
func GetNumberAsFloat(obj PdfObject) (float64, error) {
return getNumberAsFloat(obj)
switch t := obj.(type) {
case *PdfObjectFloat:
return float64(*t), nil
case *PdfObjectInteger:
return float64(*t), nil
}
return 0, ErrNotANumber
}
func getNumberAsFloat(obj PdfObject) (float64, error) {
if fObj, ok := obj.(*PdfObjectFloat); ok {
return float64(*fObj), nil
}
// IsNullObject returns true if `obj` is a PdfObjectNull.
func IsNullObject(obj PdfObject) bool {
_, isNull := obj.(*PdfObjectNull)
return isNull
}
if iObj, ok := obj.(*PdfObjectInteger); ok {
return float64(*iObj), nil
// GetNumbersAsFloat converts a list of pdf objects representing floats or integers to a slice of
// float64 values.
func GetNumbersAsFloat(objects []PdfObject) (floats []float64, err error) {
for _, obj := range objects {
val, err := GetNumberAsFloat(obj)
if err != nil {
return nil, err
}
floats = append(floats, val)
}
return floats, nil
}
return 0, fmt.Errorf("Not a number")
// GetNumberAsInt64 returns the contents of `obj` as an int64 if it is an integer or float, or an
// error if it isn't. This is for cases where expecting an integer, but some implementations
// actually store the number in a floating point format.
func GetNumberAsInt64(obj PdfObject) (int64, error) {
switch t := obj.(type) {
case *PdfObjectFloat:
common.Log.Debug("Number expected as integer was stored as float (type casting used)")
return int64(*t), nil
case *PdfObjectInteger:
return int64(*t), nil
}
return 0, ErrNotANumber
}
// getNumberAsFloatOrNull returns the contents of `obj` as a *float if it is an integer or float,
// or nil if it `obj` is nil. In other cases an error is returned.
func getNumberAsFloatOrNull(obj PdfObject) (*float64, error) {
switch t := obj.(type) {
case *PdfObjectFloat:
val := float64(*t)
return &val, nil
case *PdfObjectInteger:
val := float64(*t)
return &val, nil
case *PdfObjectNull:
return nil, nil
}
return nil, ErrNotANumber
}
// GetAsFloat64Slice returns the array as []float64 slice.
@ -438,8 +484,7 @@ func (array *PdfObjectArray) GetAsFloat64Slice() ([]float64, error) {
slice := []float64{}
for _, obj := range array.Elements() {
obj := TraceToDirectObject(obj)
number, err := getNumberAsFloat(obj)
number, err := GetNumberAsFloat(TraceToDirectObject(obj))
if err != nil {
return nil, fmt.Errorf("Array element not a number")
}
@ -459,6 +504,7 @@ func (d *PdfObjectDictionary) Merge(another *PdfObjectDictionary) {
}
}
// String returns a string describing `d`.
func (d *PdfObjectDictionary) String() string {
outStr := "Dict("
for _, k := range d.keys {
@ -541,7 +587,6 @@ func (d *PdfObjectDictionary) Remove(key PdfObjectName) {
// Note that we take care to perform a type switch. Otherwise if we would supply a nil value
// of another type, e.g. (PdfObjectArray*)(nil), then it would not be a PdfObject(nil) and thus
// would get set.
//
func (d *PdfObjectDictionary) SetIfNotNil(key PdfObjectName, val PdfObject) {
if val != nil {
switch t := val.(type) {
@ -595,6 +640,7 @@ func (d *PdfObjectDictionary) SetIfNotNil(key PdfObjectName, val PdfObject) {
}
}
// String returns a string describing `ref`.
func (ref *PdfObjectReference) String() string {
return fmt.Sprintf("Ref(%d %d)", ref.ObjectNumber, ref.GenerationNumber)
}
@ -604,6 +650,7 @@ func (ref *PdfObjectReference) DefaultWriteString() string {
return fmt.Sprintf("%d %d R", ref.ObjectNumber, ref.GenerationNumber)
}
// String returns a string describing `ind`.
func (ind *PdfIndirectObject) String() string {
// Avoid printing out the object, can cause problems with circular
// references.
@ -616,6 +663,7 @@ func (ind *PdfIndirectObject) DefaultWriteString() string {
return outStr
}
// String returns a string describing `stream`.
func (stream *PdfObjectStream) String() string {
return fmt.Sprintf("Object stream %d: %s", stream.ObjectNumber, stream.PdfObjectDictionary)
}
@ -626,6 +674,7 @@ func (stream *PdfObjectStream) DefaultWriteString() string {
return outStr
}
// String returns a string describing `null`.
func (null *PdfObjectNull) String() string {
return "null"
}

View File

@ -45,7 +45,7 @@ func (e *Extractor) ExtractXYText() (*TextList, int, int, error) {
textList := &TextList{}
state := newTextState()
fontStack := fontStacker{}
var to *TextObject
var to *textObject
cstreamParser := contentstream.NewContentStreamParser(e.contents)
operations, err := cstreamParser.Parse()
@ -199,7 +199,7 @@ func (e *Extractor) ExtractXYText() (*TextList, int, int, error) {
common.Log.Debug("ERROR: Tm err=%v", err)
return err
}
floats, err := model.GetNumbersAsFloat(op.Params)
floats, err := core.GetNumbersAsFloat(op.Params)
if err != nil {
common.Log.Debug("ERROR: err=%v", err)
return err
@ -269,7 +269,7 @@ func (e *Extractor) ExtractXYText() (*TextList, int, int, error) {
// moveText "Td" Moves start of text by `tx`,`ty`
// Move to the start of the next line, offset from the start of the current line by (tx, ty).
// tx and ty are in unscaled text space units.
func (to *TextObject) moveText(tx, ty float64) {
func (to *textObject) moveText(tx, ty float64) {
// Not implemented yet
}
@ -279,7 +279,7 @@ func (to *TextObject) moveText(tx, ty float64) {
// have the same effect as this code:
// ty TL
// tx ty Td
func (to *TextObject) moveTextSetLeading(tx, ty float64) {
func (to *textObject) moveTextSetLeading(tx, ty float64) {
// Not implemented yet
// The following is supposed to be equivalent to the existing Unidoc implementation.
if tx > 0 {
@ -297,14 +297,14 @@ func (to *TextObject) moveTextSetLeading(tx, ty float64) {
// where Tl denotes the current leading parameter in the text state. The negative of Tl is used
// here because Tl is the text leading expressed as a positive number. Going to the next line
// entails decreasing the y coordinate. (page 250)
func (to *TextObject) nextLine() {
func (to *textObject) nextLine() {
// Not implemented yet
}
// setTextMatrix "Tm"
// Set the text matrix, Tm, and the text line matrix, Tlm to the Matrix specified by the 6 numbers
// in `f` (page 250)
func (to *TextObject) setTextMatrix(f []float64) {
func (to *textObject) setTextMatrix(f []float64) {
// Not implemented yet
// The following is supposed to be equivalent to the existing Unidoc implementation.
tx, ty := f[4], f[5]
@ -324,12 +324,12 @@ func (to *TextObject) setTextMatrix(f []float64) {
}
// showText "Tj" Show a text string
func (to *TextObject) showText(charcodes []byte) error {
func (to *textObject) showText(charcodes []byte) error {
return to.renderText(charcodes)
}
// showTextAdjusted "TJ" Show text with adjustable spacing
func (to *TextObject) showTextAdjusted(args []core.PdfObject) error {
func (to *textObject) showTextAdjusted(args []core.PdfObject) error {
for _, o := range args {
switch o.(type) {
case *core.PdfObjectFloat, *core.PdfObjectInteger:
@ -359,17 +359,17 @@ func (to *TextObject) showTextAdjusted(args []core.PdfObject) error {
}
// setTextLeading "TL" Set text leading
func (to *TextObject) setTextLeading(y float64) {
func (to *textObject) setTextLeading(y float64) {
// Not implemented yet
}
// setCharSpacing "Tc" Set character spacing
func (to *TextObject) setCharSpacing(x float64) {
func (to *textObject) setCharSpacing(x float64) {
// Not implemented yet
}
// setFont "Tf" Set font
func (to *TextObject) setFont(name string, size float64) error {
func (to *textObject) setFont(name string, size float64) error {
font, err := to.getFont(name)
if err == nil {
to.State.Tf = font
@ -389,22 +389,22 @@ func (to *TextObject) setFont(name string, size float64) error {
}
// setTextRenderMode "Tr" Set text rendering mode
func (to *TextObject) setTextRenderMode(mode int) {
func (to *textObject) setTextRenderMode(mode int) {
// Not implemented yet
}
// setTextRise "Ts" Set text rise
func (to *TextObject) setTextRise(y float64) {
func (to *textObject) setTextRise(y float64) {
// Not implemented yet
}
// setWordSpacing "Tw" Set word spacing
func (to *TextObject) setWordSpacing(y float64) {
func (to *textObject) setWordSpacing(y float64) {
// Not implemented yet
}
// setHorizScaling "Tz" Set horizontal scaling
func (to *TextObject) setHorizScaling(y float64) {
func (to *textObject) setHorizScaling(y float64) {
// Not implemented yet
}
@ -422,11 +422,11 @@ func floatParam(op *contentstream.ContentStreamOperation) (float64, error) {
// checkOp returns true if we are in a text stream and `op` has `numParams` params.
// If `hard` is true and the number of params don't match, an error is returned.
func (to *TextObject) checkOp(op *contentstream.ContentStreamOperation, numParams int,
func (to *textObject) checkOp(op *contentstream.ContentStreamOperation, numParams int,
hard bool) (ok bool, err error) {
if to == nil {
common.Log.Debug("%#q operand outside text", op.Operand)
return
return false, nil
}
if numParams >= 0 {
if len(op.Params) != numParams {
@ -435,15 +435,13 @@ func (to *TextObject) checkOp(op *contentstream.ContentStreamOperation, numParam
}
common.Log.Debug("ERROR: %#q should have %d input params, got %d %+v",
op.Operand, numParams, len(op.Params), op.Params)
return
return false, err
}
}
ok = true
return
return true, nil
}
// fontStacker is the PDF font stack implementation.
// I think this is correct. It has worked on my tests so far.
type fontStacker []*model.PdfFont
// String returns a string describing the current state of the font stack.
@ -475,27 +473,25 @@ func (fontStack *fontStacker) pop() *model.PdfFont {
}
// peek returns the element on the top of the font stack if there is one, or nil if there isn't.
func (fontStack *fontStacker) peek() (font *model.PdfFont) {
func (fontStack *fontStacker) peek() *model.PdfFont {
if fontStack.empty() {
return
return nil
}
font = (*fontStack)[len(*fontStack)-1]
return
return (*fontStack)[len(*fontStack)-1]
}
// get returns the `idx`'th element of the font stack if there is one, or nil if there isn't.
// idx = 0: bottom of font stack
// idx = len(fontstack) - 1: top of font stack
// idx = -n is same as dx = len(fontstack) - n, so fontstack.get(-1) is same as fontstack.peek()
func (fontStack *fontStacker) get(idx int) (font *model.PdfFont) {
func (fontStack *fontStacker) get(idx int) *model.PdfFont {
if idx < 0 {
idx += fontStack.size()
}
if idx < 0 || idx > fontStack.size()-1 {
return
return nil
}
font = (*fontStack)[idx]
return
return (*fontStack)[idx]
}
// empty returns true if the font stack is empty.
@ -512,7 +508,9 @@ func (fontStack *fontStacker) size() int {
// Some of these parameters are expressed in unscaled text space units. This means that they shall
// be specified in a coordinate system that shall be defined by the text matrix, Tm but shall not be
// scaled by the font size parameter, Tfs.
type TextState struct {
// textState represents the text state.
type textState struct {
// Tc float64 // Character spacing. Unscaled text space units.
// Tw float64 // Word spacing. Unscaled text space units.
// Th float64 // Horizontal scaling
@ -537,12 +535,13 @@ type TextState struct {
// | Tfs x Th 0 0 |
// Trm = | 0 Tfs 0 | × Tm × CTM
// | 0 Trise 1 |
//
type TextObject struct {
// textObject represents a PDF text object.
type textObject struct {
e *Extractor
gs contentstream.GraphicsState
fontStack *fontStacker
State *TextState
State *textState
// Tm contentstream.Matrix // Text matrix. For the character pointer.
// Tlm contentstream.Matrix // Text line matrix. For the start of line pointer.
Texts []XYText // Text gets written here.
@ -551,16 +550,16 @@ type TextObject struct {
xPos, yPos float64
}
// newTextState returns a default TextState
func newTextState() TextState {
// newTextState returns a default textState
func newTextState() textState {
// Not implemented yet
return TextState{}
return textState{}
}
// newTextObject returns a default TextObject
func newTextObject(e *Extractor, gs contentstream.GraphicsState, state *TextState,
fontStack *fontStacker) *TextObject {
return &TextObject{
// newTextObject returns a default textObject
func newTextObject(e *Extractor, gs contentstream.GraphicsState, state *textState,
fontStack *fontStacker) *textObject {
return &textObject{
e: e,
gs: gs,
fontStack: fontStack,
@ -571,26 +570,26 @@ func newTextObject(e *Extractor, gs contentstream.GraphicsState, state *TextStat
}
// renderRawText writes `text` directly to the extracted text
func (to *TextObject) renderRawText(text string) {
func (to *textObject) renderRawText(text string) {
to.Texts = append(to.Texts, XYText{text})
}
// renderText emits byte array `data` to the calling program
func (to *TextObject) renderText(data []byte) (err error) {
func (to *textObject) renderText(data []byte) error {
text := ""
if len(*to.fontStack) == 0 {
common.Log.Debug("ERROR: No font defined. data=%#q", string(data))
text = string(data)
err = model.ErrNoFont
} else {
font := to.fontStack.peek()
var numChars, numMisses int
text, numChars, numMisses = font.CharcodeBytesToUnicode(data)
to.State.numChars += numChars
to.State.numMisses += numMisses
return model.ErrNoFont
}
font := to.fontStack.peek()
var numChars, numMisses int
text, numChars, numMisses = font.CharcodeBytesToUnicode(data)
to.State.numChars += numChars
to.State.numMisses += numMisses
to.Texts = append(to.Texts, XYText{text})
return
return nil
}
// XYText represents text and its position in device coordinates
@ -622,8 +621,8 @@ func (tl *TextList) ToText() string {
}
// getFont returns the font named `name` if it exists in the page's resources or an error if it
// doesn't
func (to *TextObject) getFont(name string) (*model.PdfFont, error) {
// doesn't.
func (to *textObject) getFont(name string) (*model.PdfFont, error) {
// This is a hack for testing.
if name == "UniDocCourier" {
@ -642,29 +641,27 @@ func (to *TextObject) getFont(name string) (*model.PdfFont, error) {
}
// getFontDict returns the font object called `name` if it exists in the page's Font resources or
// an error if it doesn't
// an error if it doesn't.
// XXX: TODO: Can we cache font values?
func (to *TextObject) getFontDict(name string) (fontObj core.PdfObject, err error) {
func (to *textObject) getFontDict(name string) (fontObj core.PdfObject, err error) {
resources := to.e.resources
if resources == nil {
common.Log.Debug("getFontDict. No resources. name=%#q", name)
return
return nil, nil
}
fontObj, found := resources.GetFontByName(core.PdfObjectName(name))
if !found {
err = errors.New("Font not in resources")
common.Log.Debug("ERROR: getFontDict: Font not found: name=%#q err=%v", name, err)
return
common.Log.Debug("ERROR: getFontDict: Font not found: name=%#q", name)
return nil, errors.New("Font not in resources")
}
return
return fontObj, nil
}
// getCharMetrics returns the character metrics for the code points in `text1` for font `font`.
func getCharMetrics(font *model.PdfFont, text string) (metrics []fonts.CharMetrics, err error) {
encoder := font.Encoder()
if encoder == nil {
err = errors.New("No font encoder")
return nil, errors.New("No font encoder")
}
for _, r := range text {
glyph, found := encoder.RuneToGlyph(r)
@ -678,5 +675,5 @@ func getCharMetrics(font *model.PdfFont, text string) (metrics []fonts.CharMetri
}
metrics = append(metrics, m)
}
return
return metrics, nil
}

View File

@ -9,10 +9,8 @@ import (
"bytes"
"fmt"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/common/license"
"github.com/unidoc/unidoc/pdf/core"
"github.com/unidoc/unidoc/pdf/model"
)
func procBuf(buf *bytes.Buffer) {
@ -38,19 +36,16 @@ func procBuf(buf *bytes.Buffer) {
// toFloatList returns `objs` as 2 floats, if that's what it is, or an error if it isn't
func toFloatXY(objs []core.PdfObject) (x, y float64, err error) {
if len(objs) != 2 {
err = fmt.Errorf("Invalid number of params: %d", len(objs))
common.Log.Debug("toFloatXY: err=%v", err)
return
return 0, 0, fmt.Errorf("Invalid number of params: %d", len(objs))
}
floats, err := model.GetNumbersAsFloat(objs)
floats, err := core.GetNumbersAsFloat(objs)
if err != nil {
return
return 0, 0, err
}
x, y = floats[0], floats[1]
return
return floats[0], floats[1], nil
}
// truncate returns the first `n` characters in string `s`
// truncate returns the first `n` characters in string `s`.
func truncate(s string, n int) string {
if len(s) < n {
return s

View File

@ -58,7 +58,7 @@ type CMap struct {
toUnicodeIdentity bool
}
// String retuns a human readable description of `cmap`.
// String returns a human readable description of `cmap`.
func (cmap *CMap) String() string {
si := cmap.systemInfo
parts := []string{
@ -104,30 +104,25 @@ func (info *CIDSystemInfo) String() string {
func NewCIDSystemInfo(obj core.PdfObject) (info CIDSystemInfo, err error) {
d, ok := core.GetDict(obj)
if !ok {
err = core.ErrTypeError
return
return CIDSystemInfo{}, core.ErrTypeError
}
registry, ok := core.GetStringVal(d.Get("Registry"))
if !ok {
err = core.ErrTypeError
return
return CIDSystemInfo{}, core.ErrTypeError
}
ordering, ok := core.GetStringVal(d.Get("Ordering"))
if !ok {
err = core.ErrTypeError
return
return CIDSystemInfo{}, core.ErrTypeError
}
supplement, ok := core.GetIntVal(d.Get("Supplement"))
if !ok {
err = core.ErrTypeError
return
return CIDSystemInfo{}, core.ErrTypeError
}
info = CIDSystemInfo{
return CIDSystemInfo{
Registry: registry,
Ordering: ordering,
Supplement: supplement,
}
return
}, nil
}
// Name returns the name of the CMap.
@ -231,13 +226,13 @@ func (cmap *CMap) matchCode(data []byte) (code CharCode, n int, matched bool) {
}
matched = cmap.inCodespace(code, j+1)
if matched {
return
return code, n, true
}
}
// No codespace matched data. This is a serious problem.
common.Log.Debug("ERROR: No codespace matches bytes=[% 02x]=%#q cmap=%s",
data, string(data), cmap)
return
return 0, 0, false
}
// inCodespace returns true if `code` is in the `numBytes` byte codespace.

View File

@ -1890,7 +1890,7 @@ func newPdfBorderStyleFromPdfObject(obj PdfObject) (*PdfBorderStyle, error) {
// Border width.
if obj := d.Get("W"); obj != nil {
val, err := getNumberAsFloat(obj)
val, err := GetNumberAsFloat(obj)
if err != nil {
common.Log.Debug("Error retrieving W: %v", err)
return nil, err

View File

@ -274,7 +274,7 @@ func (this *PdfColorspaceDeviceGray) ColorFromPdfObjects(objects []PdfObject) (P
return nil, errors.New("Range check")
}
floats, err := getNumbersAsFloat(objects)
floats, err := GetNumbersAsFloat(objects)
if err != nil {
return nil, err
}
@ -418,7 +418,7 @@ func (this *PdfColorspaceDeviceRGB) ColorFromPdfObjects(objects []PdfObject) (Pd
return nil, errors.New("Range check")
}
floats, err := getNumbersAsFloat(objects)
floats, err := GetNumbersAsFloat(objects)
if err != nil {
return nil, err
}
@ -569,7 +569,7 @@ func (this *PdfColorspaceDeviceCMYK) ColorFromPdfObjects(objects []PdfObject) (P
return nil, errors.New("Range check")
}
floats, err := getNumbersAsFloat(objects)
floats, err := GetNumbersAsFloat(objects)
if err != nil {
return nil, err
}
@ -792,7 +792,7 @@ func newPdfColorspaceCalGrayFromPdfObject(obj PdfObject) (*PdfColorspaceCalGray,
obj = dict.Get("Gamma")
if obj != nil {
obj = TraceToDirectObject(obj)
gamma, err := getNumberAsFloat(obj)
gamma, err := GetNumberAsFloat(obj)
if err != nil {
return nil, fmt.Errorf("CalGray: gamma not a number")
}
@ -850,7 +850,7 @@ func (this *PdfColorspaceCalGray) ColorFromPdfObjects(objects []PdfObject) (PdfC
return nil, errors.New("Range check")
}
floats, err := getNumbersAsFloat(objects)
floats, err := GetNumbersAsFloat(objects)
if err != nil {
return nil, err
}
@ -1177,7 +1177,7 @@ func (this *PdfColorspaceCalRGB) ColorFromPdfObjects(objects []PdfObject) (PdfCo
return nil, errors.New("Range check")
}
floats, err := getNumbersAsFloat(objects)
floats, err := GetNumbersAsFloat(objects)
if err != nil {
return nil, err
}
@ -1510,7 +1510,7 @@ func (this *PdfColorspaceLab) ColorFromPdfObjects(objects []PdfObject) (PdfColor
return nil, errors.New("Range check")
}
floats, err := getNumbersAsFloat(objects)
floats, err := GetNumbersAsFloat(objects)
if err != nil {
return nil, err
}
@ -2193,7 +2193,7 @@ func newPdfColorspaceSpecialIndexedFromPdfObject(obj PdfObject) (*PdfColorspaceS
// Get hi val.
obj = array.Get(2)
val, err := getNumberAsInt64(obj)
val, err := GetNumberAsInt64(obj)
if err != nil {
return nil, err
}
@ -2268,7 +2268,7 @@ func (this *PdfColorspaceSpecialIndexed) ColorFromPdfObjects(objects []PdfObject
return nil, errors.New("Range check")
}
floats, err := getNumbersAsFloat(objects)
floats, err := GetNumbersAsFloat(objects)
if err != nil {
return nil, err
}
@ -2483,7 +2483,7 @@ func (this *PdfColorspaceSpecialSeparation) ColorFromPdfObjects(objects []PdfObj
return nil, errors.New("Range check")
}
floats, err := getNumbersAsFloat(objects)
floats, err := GetNumbersAsFloat(objects)
if err != nil {
return nil, err
}
@ -2704,7 +2704,7 @@ func (this *PdfColorspaceDeviceN) ColorFromPdfObjects(objects []PdfObject) (PdfC
return nil, errors.New("Range check")
}
floats, err := getNumbersAsFloat(objects)
floats, err := GetNumbersAsFloat(objects)
if err != nil {
return nil, err
}

View File

@ -36,7 +36,7 @@ type fontFile struct {
encoder textencoding.TextEncoder
}
// String retuns a human readable description of `fontfile`.
// String returns a human readable description of `fontfile`.
func (fontfile *fontFile) String() string {
encoding := "[None]"
if fontfile.encoder != nil {
@ -178,15 +178,14 @@ func getAsciiSections(data []byte) (keySection, encodingSection string, err erro
common.Log.Trace("getAsciiSections: %d ", len(data))
loc := reDictBegin.FindIndex(data)
if loc == nil {
err = core.ErrTypeError
common.Log.Debug("ERROR: getAsciiSections. No dict.")
return
return "", "", core.ErrTypeError
}
i0 := loc[1]
i := strings.Index(string(data[i0:]), encodingBegin)
if i < 0 {
keySection = string(data[i0:])
return
return keySection, "", nil
}
i1 := i0 + i
keySection = string(data[i0:i1])
@ -194,13 +193,12 @@ func getAsciiSections(data []byte) (keySection, encodingSection string, err erro
i2 := i1
i = strings.Index(string(data[i2:]), encodingEnd)
if i < 0 {
err = core.ErrTypeError
common.Log.Debug("ERROR: getAsciiSections. err=%v", err)
return
return "", "", core.ErrTypeError
}
i3 := i2 + i
encodingSection = string(data[i2:i3])
return
return keySection, encodingSection, nil
}
// ~/testdata/private/invoice61781040.pdf has \r line endings

View File

@ -102,17 +102,16 @@ type ttfParser struct {
}
// NewFontFile2FromPdfObject returns a TtfType describing the TrueType font file in PdfObject `obj`.
func NewFontFile2FromPdfObject(obj core.PdfObject) (rec TtfType, err error) {
func NewFontFile2FromPdfObject(obj core.PdfObject) (TtfType, error) {
obj = core.TraceToDirectObject(obj)
streamObj, ok := obj.(*core.PdfObjectStream)
if !ok {
common.Log.Debug("ERROR: FontFile2 must be a stream (%T)", obj)
err = core.ErrTypeError
return
return TtfType{}, core.ErrTypeError
}
data, err := core.DecodeStream(streamObj)
if err != nil {
return
return TtfType{}, err
}
// Uncomment these lines to see the contents of the font file. For debugging.
@ -121,33 +120,30 @@ func NewFontFile2FromPdfObject(obj core.PdfObject) (rec TtfType, err error) {
// fmt.Println("===============####===============")
t := ttfParser{f: bytes.NewReader(data)}
rec, err = t.Parse()
return
return t.Parse()
}
// NewFontFile2FromPdfObject returns a TtfType describing the TrueType font file in disk file `fileStr`.
func TtfParse(fileStr string) (rec TtfType, err error) {
func TtfParse(fileStr string) (TtfType, error) {
f, err := os.Open(fileStr)
if err != nil {
return
return TtfType{}, err
}
defer f.Close()
t := ttfParser{f: f}
rec, err = t.Parse()
return
return t.Parse()
}
// NewFontFile2FromPdfObject returns a TtfType describing the TrueType font file in io.Reader `t`.f.
func (t *ttfParser) Parse() (TtfRec TtfType, err error) {
func (t *ttfParser) Parse() (TtfType, error) {
version, err := t.ReadStr(4)
if err != nil {
return
return TtfType{}, err
}
if version == "OTTO" {
err = errors.New("fonts based on PostScript outlines are not supported")
return
return TtfType{}, errors.New("fonts based on PostScript outlines are not supported")
}
if version != "\x00\x01\x00\x00" {
common.Log.Debug("ERROR: Unrecognized TrueType file format. version=%q", version)
@ -159,7 +155,7 @@ func (t *ttfParser) Parse() (TtfRec TtfType, err error) {
for j := 0; j < numTables; j++ {
tag, err = t.ReadStr(4)
if err != nil {
return
return TtfType{}, err
}
t.Skip(4) // checkSum
offset := t.ReadULong()
@ -169,13 +165,10 @@ func (t *ttfParser) Parse() (TtfRec TtfType, err error) {
common.Log.Trace(describeTables(t.tables))
err = t.ParseComponents()
if err != nil {
return
if err = t.ParseComponents(); err != nil {
return TtfType{}, err
}
TtfRec = t.rec
return
return t.rec, nil
}
// describeTables returns a string describing `tables`, the tables in a TrueType font file.
@ -204,62 +197,55 @@ func describeTables(tables map[string]uint32) string {
// "hmtx"
// "fpgm"
// "gasp"
func (t *ttfParser) ParseComponents() (err error) {
func (t *ttfParser) ParseComponents() error {
// Mandatory tables.
err = t.ParseHead()
if err != nil {
return
if err := t.ParseHead(); err != nil {
return err
}
err = t.ParseHhea()
if err != nil {
return
if err := t.ParseHhea(); err != nil {
return err
}
err = t.ParseMaxp()
if err != nil {
return
if err := t.ParseMaxp(); err != nil {
return err
}
err = t.ParseHmtx()
if err != nil {
return
if err := t.ParseHmtx(); err != nil {
return err
}
// Optional tables.
if _, ok := t.tables["name"]; ok {
err = t.ParseName()
if err != nil {
return
if err := t.ParseName(); err != nil {
return err
}
}
if _, ok := t.tables["OS/2"]; ok {
err = t.ParseOS2()
if err != nil {
return
if err := t.ParseOS2(); err != nil {
return err
}
}
if _, ok := t.tables["post"]; ok {
err = t.ParsePost()
if err != nil {
return
if err := t.ParsePost(); err != nil {
return err
}
}
if _, ok := t.tables["cmap"]; ok {
err = t.ParseCmap()
if err != nil {
return
if err := t.ParseCmap(); err != nil {
return err
}
}
return
return nil
}
func (t *ttfParser) ParseHead() (err error) {
err = t.Seek("head")
func (t *ttfParser) ParseHead() error {
if err := t.Seek("head"); err != nil {
return err
}
t.Skip(3 * 4) // version, fontRevision, checkSumAdjustment
magicNumber := t.ReadULong()
if magicNumber != 0x5F0F3CF5 {
err = fmt.Errorf("incorrect magic number")
return
return fmt.Errorf("incorrect magic number")
}
t.Skip(2) // flags
t.rec.UnitsPerEm = t.ReadUShort()
@ -268,47 +254,49 @@ func (t *ttfParser) ParseHead() (err error) {
t.rec.Ymin = t.ReadShort()
t.rec.Xmax = t.ReadShort()
t.rec.Ymax = t.ReadShort()
return
return nil
}
func (t *ttfParser) ParseHhea() (err error) {
err = t.Seek("hhea")
if err == nil {
t.Skip(4 + 15*2)
t.numberOfHMetrics = t.ReadUShort()
func (t *ttfParser) ParseHhea() error {
if err := t.Seek("hhea"); err != nil {
return err
}
return
t.Skip(4 + 15*2)
t.numberOfHMetrics = t.ReadUShort()
return nil
}
func (t *ttfParser) ParseMaxp() (err error) {
err = t.Seek("maxp")
if err == nil {
t.Skip(4)
t.numGlyphs = t.ReadUShort()
func (t *ttfParser) ParseMaxp() error {
if err := t.Seek("maxp"); err != nil {
return err
}
return
t.Skip(4)
t.numGlyphs = t.ReadUShort()
return nil
}
func (t *ttfParser) ParseHmtx() (err error) {
err = t.Seek("hmtx")
if err == nil {
t.rec.Widths = make([]uint16, 0, 8)
for j := uint16(0); j < t.numberOfHMetrics; j++ {
t.rec.Widths = append(t.rec.Widths, t.ReadUShort())
t.Skip(2) // lsb
}
if t.numberOfHMetrics < t.numGlyphs {
lastWidth := t.rec.Widths[t.numberOfHMetrics-1]
for j := t.numberOfHMetrics; j < t.numGlyphs; j++ {
t.rec.Widths = append(t.rec.Widths, lastWidth)
}
func (t *ttfParser) ParseHmtx() error {
if err := t.Seek("hmtx"); err != nil {
return err
}
t.rec.Widths = make([]uint16, 0, 8)
for j := uint16(0); j < t.numberOfHMetrics; j++ {
t.rec.Widths = append(t.rec.Widths, t.ReadUShort())
t.Skip(2) // lsb
}
if t.numberOfHMetrics < t.numGlyphs {
lastWidth := t.rec.Widths[t.numberOfHMetrics-1]
for j := t.numberOfHMetrics; j < t.numGlyphs; j++ {
t.rec.Widths = append(t.rec.Widths, lastWidth)
}
}
return
return nil
}
// parseCmapSubtable31 parses information from an (3,1) subtable (Windows Unicode).
func (t *ttfParser) parseCmapSubtable31(offset31 int64) (err error) {
func (t *ttfParser) parseCmapSubtable31(offset31 int64) error {
startCount := make([]uint16, 0, 8)
endCount := make([]uint16, 0, 8)
idDelta := make([]int16, 0, 8)
@ -317,8 +305,7 @@ func (t *ttfParser) parseCmapSubtable31(offset31 int64) (err error) {
t.f.Seek(int64(t.tables["cmap"])+offset31, os.SEEK_SET)
format := t.ReadUShort()
if format != 4 {
err = fmt.Errorf("unexpected subtable format: %d", format)
return
return fmt.Errorf("unexpected subtable format: %d", format)
}
t.Skip(2 * 2) // length, language
segCount := int(t.ReadUShort() / 2)
@ -366,7 +353,7 @@ func (t *ttfParser) parseCmapSubtable31(offset31 int64) (err error) {
}
}
}
return
return nil
}
// parseCmapSubtable10 parses information from an (1,0) subtable (symbol).
@ -410,10 +397,10 @@ func (t *ttfParser) parseCmapSubtable10(offset10 int64) error {
}
// ParseCmap parses the cmap table in a TrueType font.
func (t *ttfParser) ParseCmap() (err error) {
func (t *ttfParser) ParseCmap() error {
var offset int64
if err = t.Seek("cmap"); err != nil {
return
if err := t.Seek("cmap"); err != nil {
return err
}
common.Log.Debug("ParseCmap")
t.ReadUShort() // version is ignored.
@ -432,21 +419,19 @@ func (t *ttfParser) ParseCmap() (err error) {
// Latin font support based on (3,1) table encoding.
if offset31 != 0 {
err = t.parseCmapSubtable31(offset31)
if err != nil {
return
if err := t.parseCmapSubtable31(offset31); err != nil {
return err
}
}
// Many non-Latin fonts (including asian fonts) use subtable (1,0).
if offset10 != 0 {
err = t.parseCmapVersion(offset10)
if err != nil {
return
if err := t.parseCmapVersion(offset10); err != nil {
return err
}
}
return
return nil
}
func (t *ttfParser) parseCmapVersion(offset int64) error {
@ -511,9 +496,9 @@ func (t *ttfParser) parseCmapFormat6() error {
return nil
}
func (t *ttfParser) ParseName() (err error) {
if err = t.Seek("name"); err != nil {
return
func (t *ttfParser) ParseName() error {
if err := t.Seek("name"); err != nil {
return err
}
tableOffset, _ := t.f.Seek(0, os.SEEK_CUR)
t.rec.PostScriptName = ""
@ -528,28 +513,27 @@ func (t *ttfParser) ParseName() (err error) {
if nameID == 6 {
// PostScript name
t.f.Seek(int64(tableOffset)+int64(stringOffset)+int64(offset), os.SEEK_SET)
var s string
s, err = t.ReadStr(int(length))
s, err := t.ReadStr(int(length))
if err != nil {
return
return err
}
s = strings.Replace(s, "\x00", "", -1)
var re *regexp.Regexp
if re, err = regexp.Compile("[(){}<> /%[\\]]"); err != nil {
return
re, err := regexp.Compile("[(){}<> /%[\\]]")
if err != nil {
return err
}
t.rec.PostScriptName = re.ReplaceAllString(s, "")
}
}
if t.rec.PostScriptName == "" {
err = fmt.Errorf("the name PostScript was not found")
return fmt.Errorf("the name PostScript was not found")
}
return
return nil
}
func (t *ttfParser) ParseOS2() (err error) {
if err = t.Seek("OS/2"); err != nil {
return
func (t *ttfParser) ParseOS2() error {
if err := t.Seek("OS/2"); err != nil {
return err
}
version := t.ReadUShort()
t.Skip(3 * 2) // xAvgCharWidth, usWeightClass, usWidthClass
@ -567,13 +551,13 @@ func (t *ttfParser) ParseOS2() (err error) {
} else {
t.rec.CapHeight = 0
}
return
return nil
}
// ParsePost reads the "post" section in a TrueType font table and sets t.rec.GlyphNames.
func (t *ttfParser) ParsePost() (err error) {
if err = t.Seek("post"); err != nil {
return
func (t *ttfParser) ParsePost() error {
if err := t.Seek("post"); err != nil {
return err
}
formatType := t.Read32Fixed()
@ -644,7 +628,7 @@ func (t *ttfParser) ParsePost() (err error) {
common.Log.Debug("ERROR: Unknown formatType=%f", formatType)
}
return
return nil
}
// The 258 standard mac glyph names used in 'post' format 1 and 2.
@ -710,49 +694,45 @@ func (t *ttfParser) Skip(n int) {
// ReadStr reads `length` bytes from the file and returns them as a string, or an error if there was
// a problem.
func (t *ttfParser) ReadStr(length int) (str string, err error) {
var n int
func (t *ttfParser) ReadStr(length int) (string, error) {
buf := make([]byte, length)
n, err = t.f.Read(buf)
n, err := t.f.Read(buf)
if err != nil {
return
return "", err
} else if n != length {
return "", fmt.Errorf("unable to read %d bytes", length)
}
if n == length {
str = string(buf)
} else {
err = fmt.Errorf("unable to read %d bytes", length)
}
return
return string(buf), nil
}
// ReadByte reads a byte and returns it as unsigned.
func (t *ttfParser) ReadByte() (val uint8) {
binary.Read(t.f, binary.BigEndian, &val)
return
return val
}
// ReadSByte reads a byte and returns it as signed.
func (t *ttfParser) ReadSByte() (val int8) {
binary.Read(t.f, binary.BigEndian, &val)
return
return val
}
// ReadUShort reads 2 bytes and returns them as a big endian unsigned 16 bit integer.
func (t *ttfParser) ReadUShort() (val uint16) {
binary.Read(t.f, binary.BigEndian, &val)
return
return val
}
// ReadShort reads 2 bytes and returns them as a big endian signed 16 bit integer.
func (t *ttfParser) ReadShort() (val int16) {
binary.Read(t.f, binary.BigEndian, &val)
return
return val
}
// ReadULong reads 4 bytes and returns them as a big endian unsigned 32 bit integer.
func (t *ttfParser) ReadULong() (val uint32) {
binary.Read(t.f, binary.BigEndian, &val)
return
return val
}
// ReadULong reads 4 bytes and returns them as a float, the first 2 bytes for the whole number and

View File

@ -453,11 +453,11 @@ func newPdfFunctionType2FromPdfObject(obj PdfObject) (*PdfFunctionType2, error)
if len(fun.C0) != len(fun.C1) {
common.Log.Error("C0 and C1 not matching")
return nil, errors.New("Range check")
return nil, ErrRangeError
}
// Exponent.
N, err := getNumberAsFloat(TraceToDirectObject(dict.Get("N")))
N, err := GetNumberAsFloat(TraceToDirectObject(dict.Get("N")))
if err != nil {
common.Log.Error("N missing or invalid, dict: %s", dict.String())
return nil, err

View File

@ -114,7 +114,7 @@ func newPdfOutlineFromIndirectObject(container *PdfIndirectObject) (*PdfOutline,
if obj := dict.Get("Count"); obj != nil {
// This should always be an integer, but in a few cases has been a float.
count, err := getNumberAsInt64(obj)
count, err := GetNumberAsInt64(obj)
if err != nil {
return nil, err
}

View File

@ -269,7 +269,7 @@ func newPdfTilingPatternFromDictionary(dict *PdfObjectDictionary) (*PdfTilingPat
common.Log.Debug("XStep missing")
return nil, ErrRequiredAttributeMissing
}
xStep, err := getNumberAsFloat(obj)
xStep, err := GetNumberAsFloat(obj)
if err != nil {
common.Log.Debug("Error getting XStep as float: %v", xStep)
return nil, err
@ -282,7 +282,7 @@ func newPdfTilingPatternFromDictionary(dict *PdfObjectDictionary) (*PdfTilingPat
common.Log.Debug("YStep missing")
return nil, ErrRequiredAttributeMissing
}
yStep, err := getNumberAsFloat(obj)
yStep, err := GetNumberAsFloat(obj)
if err != nil {
common.Log.Debug("Error getting YStep as float: %v", yStep)
return nil, err

View File

@ -39,7 +39,7 @@ func NewPdfPageResourcesFromDict(dict *PdfObjectDictionary) (*PdfPageResources,
if obj := dict.Get("ExtGState"); obj != nil {
r.ExtGState = obj
}
if obj := dict.Get("ColorSpace"); obj != nil && !isNullObject(obj) {
if obj := dict.Get("ColorSpace"); obj != nil && !IsNullObject(obj) {
colorspaces, err := newPdfPageResourcesColorspacesFromPdfObject(obj)
if err != nil {
return nil, err

View File

@ -35,22 +35,22 @@ func NewPdfRectangle(arr PdfObjectArray) (*PdfRectangle, error) {
}
var err error
rect.Llx, err = getNumberAsFloat(arr.Get(0))
rect.Llx, err = GetNumberAsFloat(arr.Get(0))
if err != nil {
return nil, err
}
rect.Lly, err = getNumberAsFloat(arr.Get(1))
rect.Lly, err = GetNumberAsFloat(arr.Get(1))
if err != nil {
return nil, err
}
rect.Urx, err = getNumberAsFloat(arr.Get(2))
rect.Urx, err = GetNumberAsFloat(arr.Get(2))
if err != nil {
return nil, err
}
rect.Ury, err = getNumberAsFloat(arr.Get(3))
rect.Ury, err = GetNumberAsFloat(arr.Get(3))
if err != nil {
return nil, err
}

View File

@ -6,98 +6,26 @@
package model
import (
"errors"
"github.com/unidoc/unidoc/common"
. "github.com/unidoc/unidoc/pdf/core"
"github.com/unidoc/unidoc/pdf/core"
)
func getUniDocVersion() string {
return common.Version
}
func getNumberAsFloat(obj PdfObject) (float64, error) {
if fObj, ok := obj.(*PdfObjectFloat); ok {
return float64(*fObj), nil
}
if iObj, ok := obj.(*PdfObjectInteger); ok {
return float64(*iObj), nil
}
return 0, errors.New("Not a number")
}
func isNullObject(obj PdfObject) bool {
_, isNull := obj.(*PdfObjectNull)
return isNull
}
// GetNumbersAsFloat converts a list of pdf objects representing floats or integers to a slice of
// float64 values. XXX: We need this exported for extractor
func GetNumbersAsFloat(objects []PdfObject) ([]float64, error) {
return getNumbersAsFloat(objects)
}
// Convert a list of pdf objects representing floats or integers to a slice of float64 values.
func getNumbersAsFloat(objects []PdfObject) ([]float64, error) {
floats := []float64{}
for _, obj := range objects {
val, err := getNumberAsFloat(obj)
if err != nil {
return nil, err
}
floats = append(floats, val)
}
return floats, nil
}
// Cases where expecting an integer, but some implementations actually
// store the number in a floating point format.
func getNumberAsInt64(obj PdfObject) (int64, error) {
if iObj, ok := obj.(*PdfObjectInteger); ok {
return int64(*iObj), nil
}
if fObj, ok := obj.(*PdfObjectFloat); ok {
common.Log.Debug("Number expected as integer was stored as float (type casting used)")
return int64(*fObj), nil
}
return 0, errors.New("Not a number")
}
func getNumberAsFloatOrNull(obj PdfObject) (*float64, error) {
if fObj, ok := obj.(*PdfObjectFloat); ok {
num := float64(*fObj)
return &num, nil
}
if iObj, ok := obj.(*PdfObjectInteger); ok {
num := float64(*iObj)
return &num, nil
}
if _, ok := obj.(*PdfObjectNull); ok {
return nil, nil
}
return nil, errors.New("Not a number")
}
// Handy function for debugging in development.
func debugObject(obj PdfObject) {
func debugObject(obj core.PdfObject) {
common.Log.Debug("obj: %T %s", obj, obj.String())
if stream, is := obj.(*PdfObjectStream); is {
decoded, err := DecodeStream(stream)
if stream, is := obj.(*core.PdfObjectStream); is {
decoded, err := core.DecodeStream(stream)
if err != nil {
common.Log.Debug("Error: %v", err)
return
}
common.Log.Debug("Decoded: %s", decoded)
} else if indObj, is := obj.(*PdfIndirectObject); is {
} else if indObj, is := obj.(*core.PdfIndirectObject); is {
common.Log.Debug("%T %v", indObj.PdfObject, indObj.PdfObject)
common.Log.Debug("%s", indObj.PdfObject.String())
}