Removed naked returns. Fixed godoc. Reorganized object extractors

This commit is contained in:
Peter Williams 2018-07-25 12:00:49 +10:00
parent e886846c6a
commit b1cf3494f7
15 changed files with 282 additions and 339 deletions

View File

@ -19,4 +19,5 @@ var (
ErrTypeError = errors.New("type check error") ErrTypeError = errors.New("type check error")
ErrRangeError = errors.New("range range error") ErrRangeError = errors.New("range range error")
ErrNotSupported = errors.New("feature not currently supported") ErrNotSupported = errors.New("feature not currently supported")
ErrNotANumber = errors.New("not a number")
) )

View File

@ -355,26 +355,28 @@ func (array *PdfObjectArray) Append(objects ...PdfObject) {
} }
} }
// ToFloat64Array returns a slice of all elements in the array as a float64 slice. An error is returned if the array // ToFloat64Array returns a slice of all elements in the array as a float64 slice. An error is
// contains non-numeric objects (each element can be either PdfObjectInteger or PdfObjectFloat). // returned if the array contains non-numeric objects (each element can be either PdfObjectInteger
// or PdfObjectFloat).
func (array *PdfObjectArray) ToFloat64Array() ([]float64, error) { func (array *PdfObjectArray) ToFloat64Array() ([]float64, error) {
vals := []float64{} vals := []float64{}
for _, obj := range array.Elements() { for _, obj := range array.Elements() {
if number, is := obj.(*PdfObjectInteger); is { switch t := obj.(type) {
vals = append(vals, float64(*number)) case *PdfObjectInteger:
} else if number, is := obj.(*PdfObjectFloat); is { vals = append(vals, float64(*t))
vals = append(vals, float64(*number)) case *PdfObjectFloat:
} else { vals = append(vals, float64(*t))
return nil, fmt.Errorf("Type error") default:
return nil, ErrTypeError
} }
} }
return vals, nil return vals, nil
} }
// ToIntegerArray returns a slice of all array elements as an int slice. An error is returned if the array contains // ToIntegerArray returns a slice of all array elements as an int slice. An error is returned if the
// non-integer objects. Each element can only be PdfObjectInteger. // array non-integer objects. Each element can only be PdfObjectInteger.
func (array *PdfObjectArray) ToIntegerArray() ([]int, error) { func (array *PdfObjectArray) ToIntegerArray() ([]int, error) {
vals := []int{} vals := []int{}
@ -389,6 +391,7 @@ func (array *PdfObjectArray) ToIntegerArray() ([]int, error) {
return vals, nil return vals, nil
} }
// String returns a string describing `array`.
func (array *PdfObjectArray) String() string { func (array *PdfObjectArray) String() string {
outStr := "[" outStr := "["
for ind, o := range array.Elements() { for ind, o := range array.Elements() {
@ -417,19 +420,62 @@ func (array *PdfObjectArray) DefaultWriteString() string {
// GetNumberAsFloat returns the contents of `obj` as a float if it is an integer or float, or an // GetNumberAsFloat returns the contents of `obj` as a float if it is an integer or float, or an
// error if it isn't. // error if it isn't.
func GetNumberAsFloat(obj PdfObject) (float64, error) { func GetNumberAsFloat(obj PdfObject) (float64, error) {
return getNumberAsFloat(obj) switch t := obj.(type) {
case *PdfObjectFloat:
return float64(*t), nil
case *PdfObjectInteger:
return float64(*t), nil
}
return 0, ErrNotANumber
} }
func getNumberAsFloat(obj PdfObject) (float64, error) { // IsNullObject returns true if `obj` is a PdfObjectNull.
if fObj, ok := obj.(*PdfObjectFloat); ok { func IsNullObject(obj PdfObject) bool {
return float64(*fObj), nil _, isNull := obj.(*PdfObjectNull)
} return isNull
}
if iObj, ok := obj.(*PdfObjectInteger); ok { // GetNumbersAsFloat converts a list of pdf objects representing floats or integers to a slice of
return float64(*iObj), nil // float64 values.
func GetNumbersAsFloat(objects []PdfObject) (floats []float64, err error) {
for _, obj := range objects {
val, err := GetNumberAsFloat(obj)
if err != nil {
return nil, err
}
floats = append(floats, val)
} }
return floats, nil
}
return 0, fmt.Errorf("Not a number") // GetNumberAsInt64 returns the contents of `obj` as an int64 if it is an integer or float, or an
// error if it isn't. This is for cases where expecting an integer, but some implementations
// actually store the number in a floating point format.
func GetNumberAsInt64(obj PdfObject) (int64, error) {
switch t := obj.(type) {
case *PdfObjectFloat:
common.Log.Debug("Number expected as integer was stored as float (type casting used)")
return int64(*t), nil
case *PdfObjectInteger:
return int64(*t), nil
}
return 0, ErrNotANumber
}
// getNumberAsFloatOrNull returns the contents of `obj` as a *float if it is an integer or float,
// or nil if it `obj` is nil. In other cases an error is returned.
func getNumberAsFloatOrNull(obj PdfObject) (*float64, error) {
switch t := obj.(type) {
case *PdfObjectFloat:
val := float64(*t)
return &val, nil
case *PdfObjectInteger:
val := float64(*t)
return &val, nil
case *PdfObjectNull:
return nil, nil
}
return nil, ErrNotANumber
} }
// GetAsFloat64Slice returns the array as []float64 slice. // GetAsFloat64Slice returns the array as []float64 slice.
@ -438,8 +484,7 @@ func (array *PdfObjectArray) GetAsFloat64Slice() ([]float64, error) {
slice := []float64{} slice := []float64{}
for _, obj := range array.Elements() { for _, obj := range array.Elements() {
obj := TraceToDirectObject(obj) number, err := GetNumberAsFloat(TraceToDirectObject(obj))
number, err := getNumberAsFloat(obj)
if err != nil { if err != nil {
return nil, fmt.Errorf("Array element not a number") return nil, fmt.Errorf("Array element not a number")
} }
@ -459,6 +504,7 @@ func (d *PdfObjectDictionary) Merge(another *PdfObjectDictionary) {
} }
} }
// String returns a string describing `d`.
func (d *PdfObjectDictionary) String() string { func (d *PdfObjectDictionary) String() string {
outStr := "Dict(" outStr := "Dict("
for _, k := range d.keys { for _, k := range d.keys {
@ -541,7 +587,6 @@ func (d *PdfObjectDictionary) Remove(key PdfObjectName) {
// Note that we take care to perform a type switch. Otherwise if we would supply a nil value // Note that we take care to perform a type switch. Otherwise if we would supply a nil value
// of another type, e.g. (PdfObjectArray*)(nil), then it would not be a PdfObject(nil) and thus // of another type, e.g. (PdfObjectArray*)(nil), then it would not be a PdfObject(nil) and thus
// would get set. // would get set.
//
func (d *PdfObjectDictionary) SetIfNotNil(key PdfObjectName, val PdfObject) { func (d *PdfObjectDictionary) SetIfNotNil(key PdfObjectName, val PdfObject) {
if val != nil { if val != nil {
switch t := val.(type) { switch t := val.(type) {
@ -595,6 +640,7 @@ func (d *PdfObjectDictionary) SetIfNotNil(key PdfObjectName, val PdfObject) {
} }
} }
// String returns a string describing `ref`.
func (ref *PdfObjectReference) String() string { func (ref *PdfObjectReference) String() string {
return fmt.Sprintf("Ref(%d %d)", ref.ObjectNumber, ref.GenerationNumber) return fmt.Sprintf("Ref(%d %d)", ref.ObjectNumber, ref.GenerationNumber)
} }
@ -604,6 +650,7 @@ func (ref *PdfObjectReference) DefaultWriteString() string {
return fmt.Sprintf("%d %d R", ref.ObjectNumber, ref.GenerationNumber) return fmt.Sprintf("%d %d R", ref.ObjectNumber, ref.GenerationNumber)
} }
// String returns a string describing `ind`.
func (ind *PdfIndirectObject) String() string { func (ind *PdfIndirectObject) String() string {
// Avoid printing out the object, can cause problems with circular // Avoid printing out the object, can cause problems with circular
// references. // references.
@ -616,6 +663,7 @@ func (ind *PdfIndirectObject) DefaultWriteString() string {
return outStr return outStr
} }
// String returns a string describing `stream`.
func (stream *PdfObjectStream) String() string { func (stream *PdfObjectStream) String() string {
return fmt.Sprintf("Object stream %d: %s", stream.ObjectNumber, stream.PdfObjectDictionary) return fmt.Sprintf("Object stream %d: %s", stream.ObjectNumber, stream.PdfObjectDictionary)
} }
@ -626,6 +674,7 @@ func (stream *PdfObjectStream) DefaultWriteString() string {
return outStr return outStr
} }
// String returns a string describing `null`.
func (null *PdfObjectNull) String() string { func (null *PdfObjectNull) String() string {
return "null" return "null"
} }

View File

@ -45,7 +45,7 @@ func (e *Extractor) ExtractXYText() (*TextList, int, int, error) {
textList := &TextList{} textList := &TextList{}
state := newTextState() state := newTextState()
fontStack := fontStacker{} fontStack := fontStacker{}
var to *TextObject var to *textObject
cstreamParser := contentstream.NewContentStreamParser(e.contents) cstreamParser := contentstream.NewContentStreamParser(e.contents)
operations, err := cstreamParser.Parse() operations, err := cstreamParser.Parse()
@ -199,7 +199,7 @@ func (e *Extractor) ExtractXYText() (*TextList, int, int, error) {
common.Log.Debug("ERROR: Tm err=%v", err) common.Log.Debug("ERROR: Tm err=%v", err)
return err return err
} }
floats, err := model.GetNumbersAsFloat(op.Params) floats, err := core.GetNumbersAsFloat(op.Params)
if err != nil { if err != nil {
common.Log.Debug("ERROR: err=%v", err) common.Log.Debug("ERROR: err=%v", err)
return err return err
@ -269,7 +269,7 @@ func (e *Extractor) ExtractXYText() (*TextList, int, int, error) {
// moveText "Td" Moves start of text by `tx`,`ty` // moveText "Td" Moves start of text by `tx`,`ty`
// Move to the start of the next line, offset from the start of the current line by (tx, ty). // Move to the start of the next line, offset from the start of the current line by (tx, ty).
// tx and ty are in unscaled text space units. // tx and ty are in unscaled text space units.
func (to *TextObject) moveText(tx, ty float64) { func (to *textObject) moveText(tx, ty float64) {
// Not implemented yet // Not implemented yet
} }
@ -279,7 +279,7 @@ func (to *TextObject) moveText(tx, ty float64) {
// have the same effect as this code: // have the same effect as this code:
// ty TL // ty TL
// tx ty Td // tx ty Td
func (to *TextObject) moveTextSetLeading(tx, ty float64) { func (to *textObject) moveTextSetLeading(tx, ty float64) {
// Not implemented yet // Not implemented yet
// The following is supposed to be equivalent to the existing Unidoc implementation. // The following is supposed to be equivalent to the existing Unidoc implementation.
if tx > 0 { if tx > 0 {
@ -297,14 +297,14 @@ func (to *TextObject) moveTextSetLeading(tx, ty float64) {
// where Tl denotes the current leading parameter in the text state. The negative of Tl is used // where Tl denotes the current leading parameter in the text state. The negative of Tl is used
// here because Tl is the text leading expressed as a positive number. Going to the next line // here because Tl is the text leading expressed as a positive number. Going to the next line
// entails decreasing the y coordinate. (page 250) // entails decreasing the y coordinate. (page 250)
func (to *TextObject) nextLine() { func (to *textObject) nextLine() {
// Not implemented yet // Not implemented yet
} }
// setTextMatrix "Tm" // setTextMatrix "Tm"
// Set the text matrix, Tm, and the text line matrix, Tlm to the Matrix specified by the 6 numbers // Set the text matrix, Tm, and the text line matrix, Tlm to the Matrix specified by the 6 numbers
// in `f` (page 250) // in `f` (page 250)
func (to *TextObject) setTextMatrix(f []float64) { func (to *textObject) setTextMatrix(f []float64) {
// Not implemented yet // Not implemented yet
// The following is supposed to be equivalent to the existing Unidoc implementation. // The following is supposed to be equivalent to the existing Unidoc implementation.
tx, ty := f[4], f[5] tx, ty := f[4], f[5]
@ -324,12 +324,12 @@ func (to *TextObject) setTextMatrix(f []float64) {
} }
// showText "Tj" Show a text string // showText "Tj" Show a text string
func (to *TextObject) showText(charcodes []byte) error { func (to *textObject) showText(charcodes []byte) error {
return to.renderText(charcodes) return to.renderText(charcodes)
} }
// showTextAdjusted "TJ" Show text with adjustable spacing // showTextAdjusted "TJ" Show text with adjustable spacing
func (to *TextObject) showTextAdjusted(args []core.PdfObject) error { func (to *textObject) showTextAdjusted(args []core.PdfObject) error {
for _, o := range args { for _, o := range args {
switch o.(type) { switch o.(type) {
case *core.PdfObjectFloat, *core.PdfObjectInteger: case *core.PdfObjectFloat, *core.PdfObjectInteger:
@ -359,17 +359,17 @@ func (to *TextObject) showTextAdjusted(args []core.PdfObject) error {
} }
// setTextLeading "TL" Set text leading // setTextLeading "TL" Set text leading
func (to *TextObject) setTextLeading(y float64) { func (to *textObject) setTextLeading(y float64) {
// Not implemented yet // Not implemented yet
} }
// setCharSpacing "Tc" Set character spacing // setCharSpacing "Tc" Set character spacing
func (to *TextObject) setCharSpacing(x float64) { func (to *textObject) setCharSpacing(x float64) {
// Not implemented yet // Not implemented yet
} }
// setFont "Tf" Set font // setFont "Tf" Set font
func (to *TextObject) setFont(name string, size float64) error { func (to *textObject) setFont(name string, size float64) error {
font, err := to.getFont(name) font, err := to.getFont(name)
if err == nil { if err == nil {
to.State.Tf = font to.State.Tf = font
@ -389,22 +389,22 @@ func (to *TextObject) setFont(name string, size float64) error {
} }
// setTextRenderMode "Tr" Set text rendering mode // setTextRenderMode "Tr" Set text rendering mode
func (to *TextObject) setTextRenderMode(mode int) { func (to *textObject) setTextRenderMode(mode int) {
// Not implemented yet // Not implemented yet
} }
// setTextRise "Ts" Set text rise // setTextRise "Ts" Set text rise
func (to *TextObject) setTextRise(y float64) { func (to *textObject) setTextRise(y float64) {
// Not implemented yet // Not implemented yet
} }
// setWordSpacing "Tw" Set word spacing // setWordSpacing "Tw" Set word spacing
func (to *TextObject) setWordSpacing(y float64) { func (to *textObject) setWordSpacing(y float64) {
// Not implemented yet // Not implemented yet
} }
// setHorizScaling "Tz" Set horizontal scaling // setHorizScaling "Tz" Set horizontal scaling
func (to *TextObject) setHorizScaling(y float64) { func (to *textObject) setHorizScaling(y float64) {
// Not implemented yet // Not implemented yet
} }
@ -422,11 +422,11 @@ func floatParam(op *contentstream.ContentStreamOperation) (float64, error) {
// checkOp returns true if we are in a text stream and `op` has `numParams` params. // checkOp returns true if we are in a text stream and `op` has `numParams` params.
// If `hard` is true and the number of params don't match, an error is returned. // If `hard` is true and the number of params don't match, an error is returned.
func (to *TextObject) checkOp(op *contentstream.ContentStreamOperation, numParams int, func (to *textObject) checkOp(op *contentstream.ContentStreamOperation, numParams int,
hard bool) (ok bool, err error) { hard bool) (ok bool, err error) {
if to == nil { if to == nil {
common.Log.Debug("%#q operand outside text", op.Operand) common.Log.Debug("%#q operand outside text", op.Operand)
return return false, nil
} }
if numParams >= 0 { if numParams >= 0 {
if len(op.Params) != numParams { if len(op.Params) != numParams {
@ -435,15 +435,13 @@ func (to *TextObject) checkOp(op *contentstream.ContentStreamOperation, numParam
} }
common.Log.Debug("ERROR: %#q should have %d input params, got %d %+v", common.Log.Debug("ERROR: %#q should have %d input params, got %d %+v",
op.Operand, numParams, len(op.Params), op.Params) op.Operand, numParams, len(op.Params), op.Params)
return return false, err
} }
} }
ok = true return true, nil
return
} }
// fontStacker is the PDF font stack implementation. // fontStacker is the PDF font stack implementation.
// I think this is correct. It has worked on my tests so far.
type fontStacker []*model.PdfFont type fontStacker []*model.PdfFont
// String returns a string describing the current state of the font stack. // String returns a string describing the current state of the font stack.
@ -475,27 +473,25 @@ func (fontStack *fontStacker) pop() *model.PdfFont {
} }
// peek returns the element on the top of the font stack if there is one, or nil if there isn't. // peek returns the element on the top of the font stack if there is one, or nil if there isn't.
func (fontStack *fontStacker) peek() (font *model.PdfFont) { func (fontStack *fontStacker) peek() *model.PdfFont {
if fontStack.empty() { if fontStack.empty() {
return return nil
} }
font = (*fontStack)[len(*fontStack)-1] return (*fontStack)[len(*fontStack)-1]
return
} }
// get returns the `idx`'th element of the font stack if there is one, or nil if there isn't. // get returns the `idx`'th element of the font stack if there is one, or nil if there isn't.
// idx = 0: bottom of font stack // idx = 0: bottom of font stack
// idx = len(fontstack) - 1: top of font stack // idx = len(fontstack) - 1: top of font stack
// idx = -n is same as dx = len(fontstack) - n, so fontstack.get(-1) is same as fontstack.peek() // idx = -n is same as dx = len(fontstack) - n, so fontstack.get(-1) is same as fontstack.peek()
func (fontStack *fontStacker) get(idx int) (font *model.PdfFont) { func (fontStack *fontStacker) get(idx int) *model.PdfFont {
if idx < 0 { if idx < 0 {
idx += fontStack.size() idx += fontStack.size()
} }
if idx < 0 || idx > fontStack.size()-1 { if idx < 0 || idx > fontStack.size()-1 {
return return nil
} }
font = (*fontStack)[idx] return (*fontStack)[idx]
return
} }
// empty returns true if the font stack is empty. // empty returns true if the font stack is empty.
@ -512,7 +508,9 @@ func (fontStack *fontStacker) size() int {
// Some of these parameters are expressed in unscaled text space units. This means that they shall // Some of these parameters are expressed in unscaled text space units. This means that they shall
// be specified in a coordinate system that shall be defined by the text matrix, Tm but shall not be // be specified in a coordinate system that shall be defined by the text matrix, Tm but shall not be
// scaled by the font size parameter, Tfs. // scaled by the font size parameter, Tfs.
type TextState struct {
// textState represents the text state.
type textState struct {
// Tc float64 // Character spacing. Unscaled text space units. // Tc float64 // Character spacing. Unscaled text space units.
// Tw float64 // Word spacing. Unscaled text space units. // Tw float64 // Word spacing. Unscaled text space units.
// Th float64 // Horizontal scaling // Th float64 // Horizontal scaling
@ -537,12 +535,13 @@ type TextState struct {
// | Tfs x Th 0 0 | // | Tfs x Th 0 0 |
// Trm = | 0 Tfs 0 | × Tm × CTM // Trm = | 0 Tfs 0 | × Tm × CTM
// | 0 Trise 1 | // | 0 Trise 1 |
//
type TextObject struct { // textObject represents a PDF text object.
type textObject struct {
e *Extractor e *Extractor
gs contentstream.GraphicsState gs contentstream.GraphicsState
fontStack *fontStacker fontStack *fontStacker
State *TextState State *textState
// Tm contentstream.Matrix // Text matrix. For the character pointer. // Tm contentstream.Matrix // Text matrix. For the character pointer.
// Tlm contentstream.Matrix // Text line matrix. For the start of line pointer. // Tlm contentstream.Matrix // Text line matrix. For the start of line pointer.
Texts []XYText // Text gets written here. Texts []XYText // Text gets written here.
@ -551,16 +550,16 @@ type TextObject struct {
xPos, yPos float64 xPos, yPos float64
} }
// newTextState returns a default TextState // newTextState returns a default textState
func newTextState() TextState { func newTextState() textState {
// Not implemented yet // Not implemented yet
return TextState{} return textState{}
} }
// newTextObject returns a default TextObject // newTextObject returns a default textObject
func newTextObject(e *Extractor, gs contentstream.GraphicsState, state *TextState, func newTextObject(e *Extractor, gs contentstream.GraphicsState, state *textState,
fontStack *fontStacker) *TextObject { fontStack *fontStacker) *textObject {
return &TextObject{ return &textObject{
e: e, e: e,
gs: gs, gs: gs,
fontStack: fontStack, fontStack: fontStack,
@ -571,26 +570,26 @@ func newTextObject(e *Extractor, gs contentstream.GraphicsState, state *TextStat
} }
// renderRawText writes `text` directly to the extracted text // renderRawText writes `text` directly to the extracted text
func (to *TextObject) renderRawText(text string) { func (to *textObject) renderRawText(text string) {
to.Texts = append(to.Texts, XYText{text}) to.Texts = append(to.Texts, XYText{text})
} }
// renderText emits byte array `data` to the calling program // renderText emits byte array `data` to the calling program
func (to *TextObject) renderText(data []byte) (err error) { func (to *textObject) renderText(data []byte) error {
text := "" text := ""
if len(*to.fontStack) == 0 { if len(*to.fontStack) == 0 {
common.Log.Debug("ERROR: No font defined. data=%#q", string(data)) common.Log.Debug("ERROR: No font defined. data=%#q", string(data))
text = string(data) text = string(data)
err = model.ErrNoFont return model.ErrNoFont
} else {
font := to.fontStack.peek()
var numChars, numMisses int
text, numChars, numMisses = font.CharcodeBytesToUnicode(data)
to.State.numChars += numChars
to.State.numMisses += numMisses
} }
font := to.fontStack.peek()
var numChars, numMisses int
text, numChars, numMisses = font.CharcodeBytesToUnicode(data)
to.State.numChars += numChars
to.State.numMisses += numMisses
to.Texts = append(to.Texts, XYText{text}) to.Texts = append(to.Texts, XYText{text})
return return nil
} }
// XYText represents text and its position in device coordinates // XYText represents text and its position in device coordinates
@ -622,8 +621,8 @@ func (tl *TextList) ToText() string {
} }
// getFont returns the font named `name` if it exists in the page's resources or an error if it // getFont returns the font named `name` if it exists in the page's resources or an error if it
// doesn't // doesn't.
func (to *TextObject) getFont(name string) (*model.PdfFont, error) { func (to *textObject) getFont(name string) (*model.PdfFont, error) {
// This is a hack for testing. // This is a hack for testing.
if name == "UniDocCourier" { if name == "UniDocCourier" {
@ -642,29 +641,27 @@ func (to *TextObject) getFont(name string) (*model.PdfFont, error) {
} }
// getFontDict returns the font object called `name` if it exists in the page's Font resources or // getFontDict returns the font object called `name` if it exists in the page's Font resources or
// an error if it doesn't // an error if it doesn't.
// XXX: TODO: Can we cache font values? // XXX: TODO: Can we cache font values?
func (to *TextObject) getFontDict(name string) (fontObj core.PdfObject, err error) { func (to *textObject) getFontDict(name string) (fontObj core.PdfObject, err error) {
resources := to.e.resources resources := to.e.resources
if resources == nil { if resources == nil {
common.Log.Debug("getFontDict. No resources. name=%#q", name) common.Log.Debug("getFontDict. No resources. name=%#q", name)
return return nil, nil
} }
fontObj, found := resources.GetFontByName(core.PdfObjectName(name)) fontObj, found := resources.GetFontByName(core.PdfObjectName(name))
if !found { if !found {
err = errors.New("Font not in resources") common.Log.Debug("ERROR: getFontDict: Font not found: name=%#q", name)
common.Log.Debug("ERROR: getFontDict: Font not found: name=%#q err=%v", name, err) return nil, errors.New("Font not in resources")
return
} }
return return fontObj, nil
} }
// getCharMetrics returns the character metrics for the code points in `text1` for font `font`. // getCharMetrics returns the character metrics for the code points in `text1` for font `font`.
func getCharMetrics(font *model.PdfFont, text string) (metrics []fonts.CharMetrics, err error) { func getCharMetrics(font *model.PdfFont, text string) (metrics []fonts.CharMetrics, err error) {
encoder := font.Encoder() encoder := font.Encoder()
if encoder == nil { if encoder == nil {
err = errors.New("No font encoder") return nil, errors.New("No font encoder")
} }
for _, r := range text { for _, r := range text {
glyph, found := encoder.RuneToGlyph(r) glyph, found := encoder.RuneToGlyph(r)
@ -678,5 +675,5 @@ func getCharMetrics(font *model.PdfFont, text string) (metrics []fonts.CharMetri
} }
metrics = append(metrics, m) metrics = append(metrics, m)
} }
return return metrics, nil
} }

View File

@ -9,10 +9,8 @@ import (
"bytes" "bytes"
"fmt" "fmt"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/common/license" "github.com/unidoc/unidoc/common/license"
"github.com/unidoc/unidoc/pdf/core" "github.com/unidoc/unidoc/pdf/core"
"github.com/unidoc/unidoc/pdf/model"
) )
func procBuf(buf *bytes.Buffer) { func procBuf(buf *bytes.Buffer) {
@ -38,19 +36,16 @@ func procBuf(buf *bytes.Buffer) {
// toFloatList returns `objs` as 2 floats, if that's what it is, or an error if it isn't // toFloatList returns `objs` as 2 floats, if that's what it is, or an error if it isn't
func toFloatXY(objs []core.PdfObject) (x, y float64, err error) { func toFloatXY(objs []core.PdfObject) (x, y float64, err error) {
if len(objs) != 2 { if len(objs) != 2 {
err = fmt.Errorf("Invalid number of params: %d", len(objs)) return 0, 0, fmt.Errorf("Invalid number of params: %d", len(objs))
common.Log.Debug("toFloatXY: err=%v", err)
return
} }
floats, err := model.GetNumbersAsFloat(objs) floats, err := core.GetNumbersAsFloat(objs)
if err != nil { if err != nil {
return return 0, 0, err
} }
x, y = floats[0], floats[1] return floats[0], floats[1], nil
return
} }
// truncate returns the first `n` characters in string `s` // truncate returns the first `n` characters in string `s`.
func truncate(s string, n int) string { func truncate(s string, n int) string {
if len(s) < n { if len(s) < n {
return s return s

View File

@ -58,7 +58,7 @@ type CMap struct {
toUnicodeIdentity bool toUnicodeIdentity bool
} }
// String retuns a human readable description of `cmap`. // String returns a human readable description of `cmap`.
func (cmap *CMap) String() string { func (cmap *CMap) String() string {
si := cmap.systemInfo si := cmap.systemInfo
parts := []string{ parts := []string{
@ -104,30 +104,25 @@ func (info *CIDSystemInfo) String() string {
func NewCIDSystemInfo(obj core.PdfObject) (info CIDSystemInfo, err error) { func NewCIDSystemInfo(obj core.PdfObject) (info CIDSystemInfo, err error) {
d, ok := core.GetDict(obj) d, ok := core.GetDict(obj)
if !ok { if !ok {
err = core.ErrTypeError return CIDSystemInfo{}, core.ErrTypeError
return
} }
registry, ok := core.GetStringVal(d.Get("Registry")) registry, ok := core.GetStringVal(d.Get("Registry"))
if !ok { if !ok {
err = core.ErrTypeError return CIDSystemInfo{}, core.ErrTypeError
return
} }
ordering, ok := core.GetStringVal(d.Get("Ordering")) ordering, ok := core.GetStringVal(d.Get("Ordering"))
if !ok { if !ok {
err = core.ErrTypeError return CIDSystemInfo{}, core.ErrTypeError
return
} }
supplement, ok := core.GetIntVal(d.Get("Supplement")) supplement, ok := core.GetIntVal(d.Get("Supplement"))
if !ok { if !ok {
err = core.ErrTypeError return CIDSystemInfo{}, core.ErrTypeError
return
} }
info = CIDSystemInfo{ return CIDSystemInfo{
Registry: registry, Registry: registry,
Ordering: ordering, Ordering: ordering,
Supplement: supplement, Supplement: supplement,
} }, nil
return
} }
// Name returns the name of the CMap. // Name returns the name of the CMap.
@ -231,13 +226,13 @@ func (cmap *CMap) matchCode(data []byte) (code CharCode, n int, matched bool) {
} }
matched = cmap.inCodespace(code, j+1) matched = cmap.inCodespace(code, j+1)
if matched { if matched {
return return code, n, true
} }
} }
// No codespace matched data. This is a serious problem. // No codespace matched data. This is a serious problem.
common.Log.Debug("ERROR: No codespace matches bytes=[% 02x]=%#q cmap=%s", common.Log.Debug("ERROR: No codespace matches bytes=[% 02x]=%#q cmap=%s",
data, string(data), cmap) data, string(data), cmap)
return return 0, 0, false
} }
// inCodespace returns true if `code` is in the `numBytes` byte codespace. // inCodespace returns true if `code` is in the `numBytes` byte codespace.

View File

@ -1890,7 +1890,7 @@ func newPdfBorderStyleFromPdfObject(obj PdfObject) (*PdfBorderStyle, error) {
// Border width. // Border width.
if obj := d.Get("W"); obj != nil { if obj := d.Get("W"); obj != nil {
val, err := getNumberAsFloat(obj) val, err := GetNumberAsFloat(obj)
if err != nil { if err != nil {
common.Log.Debug("Error retrieving W: %v", err) common.Log.Debug("Error retrieving W: %v", err)
return nil, err return nil, err

View File

@ -274,7 +274,7 @@ func (this *PdfColorspaceDeviceGray) ColorFromPdfObjects(objects []PdfObject) (P
return nil, errors.New("Range check") return nil, errors.New("Range check")
} }
floats, err := getNumbersAsFloat(objects) floats, err := GetNumbersAsFloat(objects)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -418,7 +418,7 @@ func (this *PdfColorspaceDeviceRGB) ColorFromPdfObjects(objects []PdfObject) (Pd
return nil, errors.New("Range check") return nil, errors.New("Range check")
} }
floats, err := getNumbersAsFloat(objects) floats, err := GetNumbersAsFloat(objects)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -569,7 +569,7 @@ func (this *PdfColorspaceDeviceCMYK) ColorFromPdfObjects(objects []PdfObject) (P
return nil, errors.New("Range check") return nil, errors.New("Range check")
} }
floats, err := getNumbersAsFloat(objects) floats, err := GetNumbersAsFloat(objects)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -792,7 +792,7 @@ func newPdfColorspaceCalGrayFromPdfObject(obj PdfObject) (*PdfColorspaceCalGray,
obj = dict.Get("Gamma") obj = dict.Get("Gamma")
if obj != nil { if obj != nil {
obj = TraceToDirectObject(obj) obj = TraceToDirectObject(obj)
gamma, err := getNumberAsFloat(obj) gamma, err := GetNumberAsFloat(obj)
if err != nil { if err != nil {
return nil, fmt.Errorf("CalGray: gamma not a number") return nil, fmt.Errorf("CalGray: gamma not a number")
} }
@ -850,7 +850,7 @@ func (this *PdfColorspaceCalGray) ColorFromPdfObjects(objects []PdfObject) (PdfC
return nil, errors.New("Range check") return nil, errors.New("Range check")
} }
floats, err := getNumbersAsFloat(objects) floats, err := GetNumbersAsFloat(objects)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -1177,7 +1177,7 @@ func (this *PdfColorspaceCalRGB) ColorFromPdfObjects(objects []PdfObject) (PdfCo
return nil, errors.New("Range check") return nil, errors.New("Range check")
} }
floats, err := getNumbersAsFloat(objects) floats, err := GetNumbersAsFloat(objects)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -1510,7 +1510,7 @@ func (this *PdfColorspaceLab) ColorFromPdfObjects(objects []PdfObject) (PdfColor
return nil, errors.New("Range check") return nil, errors.New("Range check")
} }
floats, err := getNumbersAsFloat(objects) floats, err := GetNumbersAsFloat(objects)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -2193,7 +2193,7 @@ func newPdfColorspaceSpecialIndexedFromPdfObject(obj PdfObject) (*PdfColorspaceS
// Get hi val. // Get hi val.
obj = array.Get(2) obj = array.Get(2)
val, err := getNumberAsInt64(obj) val, err := GetNumberAsInt64(obj)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -2268,7 +2268,7 @@ func (this *PdfColorspaceSpecialIndexed) ColorFromPdfObjects(objects []PdfObject
return nil, errors.New("Range check") return nil, errors.New("Range check")
} }
floats, err := getNumbersAsFloat(objects) floats, err := GetNumbersAsFloat(objects)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -2483,7 +2483,7 @@ func (this *PdfColorspaceSpecialSeparation) ColorFromPdfObjects(objects []PdfObj
return nil, errors.New("Range check") return nil, errors.New("Range check")
} }
floats, err := getNumbersAsFloat(objects) floats, err := GetNumbersAsFloat(objects)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -2704,7 +2704,7 @@ func (this *PdfColorspaceDeviceN) ColorFromPdfObjects(objects []PdfObject) (PdfC
return nil, errors.New("Range check") return nil, errors.New("Range check")
} }
floats, err := getNumbersAsFloat(objects) floats, err := GetNumbersAsFloat(objects)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -36,7 +36,7 @@ type fontFile struct {
encoder textencoding.TextEncoder encoder textencoding.TextEncoder
} }
// String retuns a human readable description of `fontfile`. // String returns a human readable description of `fontfile`.
func (fontfile *fontFile) String() string { func (fontfile *fontFile) String() string {
encoding := "[None]" encoding := "[None]"
if fontfile.encoder != nil { if fontfile.encoder != nil {
@ -178,15 +178,14 @@ func getAsciiSections(data []byte) (keySection, encodingSection string, err erro
common.Log.Trace("getAsciiSections: %d ", len(data)) common.Log.Trace("getAsciiSections: %d ", len(data))
loc := reDictBegin.FindIndex(data) loc := reDictBegin.FindIndex(data)
if loc == nil { if loc == nil {
err = core.ErrTypeError
common.Log.Debug("ERROR: getAsciiSections. No dict.") common.Log.Debug("ERROR: getAsciiSections. No dict.")
return return "", "", core.ErrTypeError
} }
i0 := loc[1] i0 := loc[1]
i := strings.Index(string(data[i0:]), encodingBegin) i := strings.Index(string(data[i0:]), encodingBegin)
if i < 0 { if i < 0 {
keySection = string(data[i0:]) keySection = string(data[i0:])
return return keySection, "", nil
} }
i1 := i0 + i i1 := i0 + i
keySection = string(data[i0:i1]) keySection = string(data[i0:i1])
@ -194,13 +193,12 @@ func getAsciiSections(data []byte) (keySection, encodingSection string, err erro
i2 := i1 i2 := i1
i = strings.Index(string(data[i2:]), encodingEnd) i = strings.Index(string(data[i2:]), encodingEnd)
if i < 0 { if i < 0 {
err = core.ErrTypeError
common.Log.Debug("ERROR: getAsciiSections. err=%v", err) common.Log.Debug("ERROR: getAsciiSections. err=%v", err)
return return "", "", core.ErrTypeError
} }
i3 := i2 + i i3 := i2 + i
encodingSection = string(data[i2:i3]) encodingSection = string(data[i2:i3])
return return keySection, encodingSection, nil
} }
// ~/testdata/private/invoice61781040.pdf has \r line endings // ~/testdata/private/invoice61781040.pdf has \r line endings

View File

@ -102,17 +102,16 @@ type ttfParser struct {
} }
// NewFontFile2FromPdfObject returns a TtfType describing the TrueType font file in PdfObject `obj`. // NewFontFile2FromPdfObject returns a TtfType describing the TrueType font file in PdfObject `obj`.
func NewFontFile2FromPdfObject(obj core.PdfObject) (rec TtfType, err error) { func NewFontFile2FromPdfObject(obj core.PdfObject) (TtfType, error) {
obj = core.TraceToDirectObject(obj) obj = core.TraceToDirectObject(obj)
streamObj, ok := obj.(*core.PdfObjectStream) streamObj, ok := obj.(*core.PdfObjectStream)
if !ok { if !ok {
common.Log.Debug("ERROR: FontFile2 must be a stream (%T)", obj) common.Log.Debug("ERROR: FontFile2 must be a stream (%T)", obj)
err = core.ErrTypeError return TtfType{}, core.ErrTypeError
return
} }
data, err := core.DecodeStream(streamObj) data, err := core.DecodeStream(streamObj)
if err != nil { if err != nil {
return return TtfType{}, err
} }
// Uncomment these lines to see the contents of the font file. For debugging. // Uncomment these lines to see the contents of the font file. For debugging.
@ -121,33 +120,30 @@ func NewFontFile2FromPdfObject(obj core.PdfObject) (rec TtfType, err error) {
// fmt.Println("===============####===============") // fmt.Println("===============####===============")
t := ttfParser{f: bytes.NewReader(data)} t := ttfParser{f: bytes.NewReader(data)}
rec, err = t.Parse() return t.Parse()
return
} }
// NewFontFile2FromPdfObject returns a TtfType describing the TrueType font file in disk file `fileStr`. // NewFontFile2FromPdfObject returns a TtfType describing the TrueType font file in disk file `fileStr`.
func TtfParse(fileStr string) (rec TtfType, err error) { func TtfParse(fileStr string) (TtfType, error) {
f, err := os.Open(fileStr) f, err := os.Open(fileStr)
if err != nil { if err != nil {
return return TtfType{}, err
} }
defer f.Close() defer f.Close()
t := ttfParser{f: f} t := ttfParser{f: f}
rec, err = t.Parse() return t.Parse()
return
} }
// NewFontFile2FromPdfObject returns a TtfType describing the TrueType font file in io.Reader `t`.f. // NewFontFile2FromPdfObject returns a TtfType describing the TrueType font file in io.Reader `t`.f.
func (t *ttfParser) Parse() (TtfRec TtfType, err error) { func (t *ttfParser) Parse() (TtfType, error) {
version, err := t.ReadStr(4) version, err := t.ReadStr(4)
if err != nil { if err != nil {
return return TtfType{}, err
} }
if version == "OTTO" { if version == "OTTO" {
err = errors.New("fonts based on PostScript outlines are not supported") return TtfType{}, errors.New("fonts based on PostScript outlines are not supported")
return
} }
if version != "\x00\x01\x00\x00" { if version != "\x00\x01\x00\x00" {
common.Log.Debug("ERROR: Unrecognized TrueType file format. version=%q", version) common.Log.Debug("ERROR: Unrecognized TrueType file format. version=%q", version)
@ -159,7 +155,7 @@ func (t *ttfParser) Parse() (TtfRec TtfType, err error) {
for j := 0; j < numTables; j++ { for j := 0; j < numTables; j++ {
tag, err = t.ReadStr(4) tag, err = t.ReadStr(4)
if err != nil { if err != nil {
return return TtfType{}, err
} }
t.Skip(4) // checkSum t.Skip(4) // checkSum
offset := t.ReadULong() offset := t.ReadULong()
@ -169,13 +165,10 @@ func (t *ttfParser) Parse() (TtfRec TtfType, err error) {
common.Log.Trace(describeTables(t.tables)) common.Log.Trace(describeTables(t.tables))
err = t.ParseComponents() if err = t.ParseComponents(); err != nil {
if err != nil { return TtfType{}, err
return
} }
return t.rec, nil
TtfRec = t.rec
return
} }
// describeTables returns a string describing `tables`, the tables in a TrueType font file. // describeTables returns a string describing `tables`, the tables in a TrueType font file.
@ -204,62 +197,55 @@ func describeTables(tables map[string]uint32) string {
// "hmtx" // "hmtx"
// "fpgm" // "fpgm"
// "gasp" // "gasp"
func (t *ttfParser) ParseComponents() (err error) { func (t *ttfParser) ParseComponents() error {
// Mandatory tables. // Mandatory tables.
err = t.ParseHead() if err := t.ParseHead(); err != nil {
if err != nil { return err
return
} }
err = t.ParseHhea() if err := t.ParseHhea(); err != nil {
if err != nil { return err
return
} }
err = t.ParseMaxp() if err := t.ParseMaxp(); err != nil {
if err != nil { return err
return
} }
err = t.ParseHmtx() if err := t.ParseHmtx(); err != nil {
if err != nil { return err
return
} }
// Optional tables. // Optional tables.
if _, ok := t.tables["name"]; ok { if _, ok := t.tables["name"]; ok {
err = t.ParseName() if err := t.ParseName(); err != nil {
if err != nil { return err
return
} }
} }
if _, ok := t.tables["OS/2"]; ok { if _, ok := t.tables["OS/2"]; ok {
err = t.ParseOS2() if err := t.ParseOS2(); err != nil {
if err != nil { return err
return
} }
} }
if _, ok := t.tables["post"]; ok { if _, ok := t.tables["post"]; ok {
err = t.ParsePost() if err := t.ParsePost(); err != nil {
if err != nil { return err
return
} }
} }
if _, ok := t.tables["cmap"]; ok { if _, ok := t.tables["cmap"]; ok {
err = t.ParseCmap() if err := t.ParseCmap(); err != nil {
if err != nil { return err
return
} }
} }
return return nil
} }
func (t *ttfParser) ParseHead() (err error) { func (t *ttfParser) ParseHead() error {
err = t.Seek("head") if err := t.Seek("head"); err != nil {
return err
}
t.Skip(3 * 4) // version, fontRevision, checkSumAdjustment t.Skip(3 * 4) // version, fontRevision, checkSumAdjustment
magicNumber := t.ReadULong() magicNumber := t.ReadULong()
if magicNumber != 0x5F0F3CF5 { if magicNumber != 0x5F0F3CF5 {
err = fmt.Errorf("incorrect magic number") return fmt.Errorf("incorrect magic number")
return
} }
t.Skip(2) // flags t.Skip(2) // flags
t.rec.UnitsPerEm = t.ReadUShort() t.rec.UnitsPerEm = t.ReadUShort()
@ -268,47 +254,49 @@ func (t *ttfParser) ParseHead() (err error) {
t.rec.Ymin = t.ReadShort() t.rec.Ymin = t.ReadShort()
t.rec.Xmax = t.ReadShort() t.rec.Xmax = t.ReadShort()
t.rec.Ymax = t.ReadShort() t.rec.Ymax = t.ReadShort()
return return nil
} }
func (t *ttfParser) ParseHhea() (err error) { func (t *ttfParser) ParseHhea() error {
err = t.Seek("hhea") if err := t.Seek("hhea"); err != nil {
if err == nil { return err
t.Skip(4 + 15*2)
t.numberOfHMetrics = t.ReadUShort()
} }
return t.Skip(4 + 15*2)
t.numberOfHMetrics = t.ReadUShort()
return nil
} }
func (t *ttfParser) ParseMaxp() (err error) { func (t *ttfParser) ParseMaxp() error {
err = t.Seek("maxp") if err := t.Seek("maxp"); err != nil {
if err == nil { return err
t.Skip(4)
t.numGlyphs = t.ReadUShort()
} }
return t.Skip(4)
t.numGlyphs = t.ReadUShort()
return nil
} }
func (t *ttfParser) ParseHmtx() (err error) { func (t *ttfParser) ParseHmtx() error {
err = t.Seek("hmtx") if err := t.Seek("hmtx"); err != nil {
if err == nil { return err
t.rec.Widths = make([]uint16, 0, 8) }
for j := uint16(0); j < t.numberOfHMetrics; j++ {
t.rec.Widths = append(t.rec.Widths, t.ReadUShort()) t.rec.Widths = make([]uint16, 0, 8)
t.Skip(2) // lsb for j := uint16(0); j < t.numberOfHMetrics; j++ {
} t.rec.Widths = append(t.rec.Widths, t.ReadUShort())
if t.numberOfHMetrics < t.numGlyphs { t.Skip(2) // lsb
lastWidth := t.rec.Widths[t.numberOfHMetrics-1] }
for j := t.numberOfHMetrics; j < t.numGlyphs; j++ { if t.numberOfHMetrics < t.numGlyphs {
t.rec.Widths = append(t.rec.Widths, lastWidth) lastWidth := t.rec.Widths[t.numberOfHMetrics-1]
} for j := t.numberOfHMetrics; j < t.numGlyphs; j++ {
t.rec.Widths = append(t.rec.Widths, lastWidth)
} }
} }
return
return nil
} }
// parseCmapSubtable31 parses information from an (3,1) subtable (Windows Unicode). // parseCmapSubtable31 parses information from an (3,1) subtable (Windows Unicode).
func (t *ttfParser) parseCmapSubtable31(offset31 int64) (err error) { func (t *ttfParser) parseCmapSubtable31(offset31 int64) error {
startCount := make([]uint16, 0, 8) startCount := make([]uint16, 0, 8)
endCount := make([]uint16, 0, 8) endCount := make([]uint16, 0, 8)
idDelta := make([]int16, 0, 8) idDelta := make([]int16, 0, 8)
@ -317,8 +305,7 @@ func (t *ttfParser) parseCmapSubtable31(offset31 int64) (err error) {
t.f.Seek(int64(t.tables["cmap"])+offset31, os.SEEK_SET) t.f.Seek(int64(t.tables["cmap"])+offset31, os.SEEK_SET)
format := t.ReadUShort() format := t.ReadUShort()
if format != 4 { if format != 4 {
err = fmt.Errorf("unexpected subtable format: %d", format) return fmt.Errorf("unexpected subtable format: %d", format)
return
} }
t.Skip(2 * 2) // length, language t.Skip(2 * 2) // length, language
segCount := int(t.ReadUShort() / 2) segCount := int(t.ReadUShort() / 2)
@ -366,7 +353,7 @@ func (t *ttfParser) parseCmapSubtable31(offset31 int64) (err error) {
} }
} }
} }
return return nil
} }
// parseCmapSubtable10 parses information from an (1,0) subtable (symbol). // parseCmapSubtable10 parses information from an (1,0) subtable (symbol).
@ -410,10 +397,10 @@ func (t *ttfParser) parseCmapSubtable10(offset10 int64) error {
} }
// ParseCmap parses the cmap table in a TrueType font. // ParseCmap parses the cmap table in a TrueType font.
func (t *ttfParser) ParseCmap() (err error) { func (t *ttfParser) ParseCmap() error {
var offset int64 var offset int64
if err = t.Seek("cmap"); err != nil { if err := t.Seek("cmap"); err != nil {
return return err
} }
common.Log.Debug("ParseCmap") common.Log.Debug("ParseCmap")
t.ReadUShort() // version is ignored. t.ReadUShort() // version is ignored.
@ -432,21 +419,19 @@ func (t *ttfParser) ParseCmap() (err error) {
// Latin font support based on (3,1) table encoding. // Latin font support based on (3,1) table encoding.
if offset31 != 0 { if offset31 != 0 {
err = t.parseCmapSubtable31(offset31) if err := t.parseCmapSubtable31(offset31); err != nil {
if err != nil { return err
return
} }
} }
// Many non-Latin fonts (including asian fonts) use subtable (1,0). // Many non-Latin fonts (including asian fonts) use subtable (1,0).
if offset10 != 0 { if offset10 != 0 {
err = t.parseCmapVersion(offset10) if err := t.parseCmapVersion(offset10); err != nil {
if err != nil { return err
return
} }
} }
return return nil
} }
func (t *ttfParser) parseCmapVersion(offset int64) error { func (t *ttfParser) parseCmapVersion(offset int64) error {
@ -511,9 +496,9 @@ func (t *ttfParser) parseCmapFormat6() error {
return nil return nil
} }
func (t *ttfParser) ParseName() (err error) { func (t *ttfParser) ParseName() error {
if err = t.Seek("name"); err != nil { if err := t.Seek("name"); err != nil {
return return err
} }
tableOffset, _ := t.f.Seek(0, os.SEEK_CUR) tableOffset, _ := t.f.Seek(0, os.SEEK_CUR)
t.rec.PostScriptName = "" t.rec.PostScriptName = ""
@ -528,28 +513,27 @@ func (t *ttfParser) ParseName() (err error) {
if nameID == 6 { if nameID == 6 {
// PostScript name // PostScript name
t.f.Seek(int64(tableOffset)+int64(stringOffset)+int64(offset), os.SEEK_SET) t.f.Seek(int64(tableOffset)+int64(stringOffset)+int64(offset), os.SEEK_SET)
var s string s, err := t.ReadStr(int(length))
s, err = t.ReadStr(int(length))
if err != nil { if err != nil {
return return err
} }
s = strings.Replace(s, "\x00", "", -1) s = strings.Replace(s, "\x00", "", -1)
var re *regexp.Regexp re, err := regexp.Compile("[(){}<> /%[\\]]")
if re, err = regexp.Compile("[(){}<> /%[\\]]"); err != nil { if err != nil {
return return err
} }
t.rec.PostScriptName = re.ReplaceAllString(s, "") t.rec.PostScriptName = re.ReplaceAllString(s, "")
} }
} }
if t.rec.PostScriptName == "" { if t.rec.PostScriptName == "" {
err = fmt.Errorf("the name PostScript was not found") return fmt.Errorf("the name PostScript was not found")
} }
return return nil
} }
func (t *ttfParser) ParseOS2() (err error) { func (t *ttfParser) ParseOS2() error {
if err = t.Seek("OS/2"); err != nil { if err := t.Seek("OS/2"); err != nil {
return return err
} }
version := t.ReadUShort() version := t.ReadUShort()
t.Skip(3 * 2) // xAvgCharWidth, usWeightClass, usWidthClass t.Skip(3 * 2) // xAvgCharWidth, usWeightClass, usWidthClass
@ -567,13 +551,13 @@ func (t *ttfParser) ParseOS2() (err error) {
} else { } else {
t.rec.CapHeight = 0 t.rec.CapHeight = 0
} }
return return nil
} }
// ParsePost reads the "post" section in a TrueType font table and sets t.rec.GlyphNames. // ParsePost reads the "post" section in a TrueType font table and sets t.rec.GlyphNames.
func (t *ttfParser) ParsePost() (err error) { func (t *ttfParser) ParsePost() error {
if err = t.Seek("post"); err != nil { if err := t.Seek("post"); err != nil {
return return err
} }
formatType := t.Read32Fixed() formatType := t.Read32Fixed()
@ -644,7 +628,7 @@ func (t *ttfParser) ParsePost() (err error) {
common.Log.Debug("ERROR: Unknown formatType=%f", formatType) common.Log.Debug("ERROR: Unknown formatType=%f", formatType)
} }
return return nil
} }
// The 258 standard mac glyph names used in 'post' format 1 and 2. // The 258 standard mac glyph names used in 'post' format 1 and 2.
@ -710,49 +694,45 @@ func (t *ttfParser) Skip(n int) {
// ReadStr reads `length` bytes from the file and returns them as a string, or an error if there was // ReadStr reads `length` bytes from the file and returns them as a string, or an error if there was
// a problem. // a problem.
func (t *ttfParser) ReadStr(length int) (str string, err error) { func (t *ttfParser) ReadStr(length int) (string, error) {
var n int
buf := make([]byte, length) buf := make([]byte, length)
n, err = t.f.Read(buf) n, err := t.f.Read(buf)
if err != nil { if err != nil {
return return "", err
} else if n != length {
return "", fmt.Errorf("unable to read %d bytes", length)
} }
if n == length { return string(buf), nil
str = string(buf)
} else {
err = fmt.Errorf("unable to read %d bytes", length)
}
return
} }
// ReadByte reads a byte and returns it as unsigned. // ReadByte reads a byte and returns it as unsigned.
func (t *ttfParser) ReadByte() (val uint8) { func (t *ttfParser) ReadByte() (val uint8) {
binary.Read(t.f, binary.BigEndian, &val) binary.Read(t.f, binary.BigEndian, &val)
return return val
} }
// ReadSByte reads a byte and returns it as signed. // ReadSByte reads a byte and returns it as signed.
func (t *ttfParser) ReadSByte() (val int8) { func (t *ttfParser) ReadSByte() (val int8) {
binary.Read(t.f, binary.BigEndian, &val) binary.Read(t.f, binary.BigEndian, &val)
return return val
} }
// ReadUShort reads 2 bytes and returns them as a big endian unsigned 16 bit integer. // ReadUShort reads 2 bytes and returns them as a big endian unsigned 16 bit integer.
func (t *ttfParser) ReadUShort() (val uint16) { func (t *ttfParser) ReadUShort() (val uint16) {
binary.Read(t.f, binary.BigEndian, &val) binary.Read(t.f, binary.BigEndian, &val)
return return val
} }
// ReadShort reads 2 bytes and returns them as a big endian signed 16 bit integer. // ReadShort reads 2 bytes and returns them as a big endian signed 16 bit integer.
func (t *ttfParser) ReadShort() (val int16) { func (t *ttfParser) ReadShort() (val int16) {
binary.Read(t.f, binary.BigEndian, &val) binary.Read(t.f, binary.BigEndian, &val)
return return val
} }
// ReadULong reads 4 bytes and returns them as a big endian unsigned 32 bit integer. // ReadULong reads 4 bytes and returns them as a big endian unsigned 32 bit integer.
func (t *ttfParser) ReadULong() (val uint32) { func (t *ttfParser) ReadULong() (val uint32) {
binary.Read(t.f, binary.BigEndian, &val) binary.Read(t.f, binary.BigEndian, &val)
return return val
} }
// ReadULong reads 4 bytes and returns them as a float, the first 2 bytes for the whole number and // ReadULong reads 4 bytes and returns them as a float, the first 2 bytes for the whole number and

View File

@ -453,11 +453,11 @@ func newPdfFunctionType2FromPdfObject(obj PdfObject) (*PdfFunctionType2, error)
if len(fun.C0) != len(fun.C1) { if len(fun.C0) != len(fun.C1) {
common.Log.Error("C0 and C1 not matching") common.Log.Error("C0 and C1 not matching")
return nil, errors.New("Range check") return nil, ErrRangeError
} }
// Exponent. // Exponent.
N, err := getNumberAsFloat(TraceToDirectObject(dict.Get("N"))) N, err := GetNumberAsFloat(TraceToDirectObject(dict.Get("N")))
if err != nil { if err != nil {
common.Log.Error("N missing or invalid, dict: %s", dict.String()) common.Log.Error("N missing or invalid, dict: %s", dict.String())
return nil, err return nil, err

View File

@ -114,7 +114,7 @@ func newPdfOutlineFromIndirectObject(container *PdfIndirectObject) (*PdfOutline,
if obj := dict.Get("Count"); obj != nil { if obj := dict.Get("Count"); obj != nil {
// This should always be an integer, but in a few cases has been a float. // This should always be an integer, but in a few cases has been a float.
count, err := getNumberAsInt64(obj) count, err := GetNumberAsInt64(obj)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -269,7 +269,7 @@ func newPdfTilingPatternFromDictionary(dict *PdfObjectDictionary) (*PdfTilingPat
common.Log.Debug("XStep missing") common.Log.Debug("XStep missing")
return nil, ErrRequiredAttributeMissing return nil, ErrRequiredAttributeMissing
} }
xStep, err := getNumberAsFloat(obj) xStep, err := GetNumberAsFloat(obj)
if err != nil { if err != nil {
common.Log.Debug("Error getting XStep as float: %v", xStep) common.Log.Debug("Error getting XStep as float: %v", xStep)
return nil, err return nil, err
@ -282,7 +282,7 @@ func newPdfTilingPatternFromDictionary(dict *PdfObjectDictionary) (*PdfTilingPat
common.Log.Debug("YStep missing") common.Log.Debug("YStep missing")
return nil, ErrRequiredAttributeMissing return nil, ErrRequiredAttributeMissing
} }
yStep, err := getNumberAsFloat(obj) yStep, err := GetNumberAsFloat(obj)
if err != nil { if err != nil {
common.Log.Debug("Error getting YStep as float: %v", yStep) common.Log.Debug("Error getting YStep as float: %v", yStep)
return nil, err return nil, err

View File

@ -39,7 +39,7 @@ func NewPdfPageResourcesFromDict(dict *PdfObjectDictionary) (*PdfPageResources,
if obj := dict.Get("ExtGState"); obj != nil { if obj := dict.Get("ExtGState"); obj != nil {
r.ExtGState = obj r.ExtGState = obj
} }
if obj := dict.Get("ColorSpace"); obj != nil && !isNullObject(obj) { if obj := dict.Get("ColorSpace"); obj != nil && !IsNullObject(obj) {
colorspaces, err := newPdfPageResourcesColorspacesFromPdfObject(obj) colorspaces, err := newPdfPageResourcesColorspacesFromPdfObject(obj)
if err != nil { if err != nil {
return nil, err return nil, err

View File

@ -35,22 +35,22 @@ func NewPdfRectangle(arr PdfObjectArray) (*PdfRectangle, error) {
} }
var err error var err error
rect.Llx, err = getNumberAsFloat(arr.Get(0)) rect.Llx, err = GetNumberAsFloat(arr.Get(0))
if err != nil { if err != nil {
return nil, err return nil, err
} }
rect.Lly, err = getNumberAsFloat(arr.Get(1)) rect.Lly, err = GetNumberAsFloat(arr.Get(1))
if err != nil { if err != nil {
return nil, err return nil, err
} }
rect.Urx, err = getNumberAsFloat(arr.Get(2)) rect.Urx, err = GetNumberAsFloat(arr.Get(2))
if err != nil { if err != nil {
return nil, err return nil, err
} }
rect.Ury, err = getNumberAsFloat(arr.Get(3)) rect.Ury, err = GetNumberAsFloat(arr.Get(3))
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -6,98 +6,26 @@
package model package model
import ( import (
"errors"
"github.com/unidoc/unidoc/common" "github.com/unidoc/unidoc/common"
. "github.com/unidoc/unidoc/pdf/core" "github.com/unidoc/unidoc/pdf/core"
) )
func getUniDocVersion() string { func getUniDocVersion() string {
return common.Version return common.Version
} }
func getNumberAsFloat(obj PdfObject) (float64, error) {
if fObj, ok := obj.(*PdfObjectFloat); ok {
return float64(*fObj), nil
}
if iObj, ok := obj.(*PdfObjectInteger); ok {
return float64(*iObj), nil
}
return 0, errors.New("Not a number")
}
func isNullObject(obj PdfObject) bool {
_, isNull := obj.(*PdfObjectNull)
return isNull
}
// GetNumbersAsFloat converts a list of pdf objects representing floats or integers to a slice of
// float64 values. XXX: We need this exported for extractor
func GetNumbersAsFloat(objects []PdfObject) ([]float64, error) {
return getNumbersAsFloat(objects)
}
// Convert a list of pdf objects representing floats or integers to a slice of float64 values.
func getNumbersAsFloat(objects []PdfObject) ([]float64, error) {
floats := []float64{}
for _, obj := range objects {
val, err := getNumberAsFloat(obj)
if err != nil {
return nil, err
}
floats = append(floats, val)
}
return floats, nil
}
// Cases where expecting an integer, but some implementations actually
// store the number in a floating point format.
func getNumberAsInt64(obj PdfObject) (int64, error) {
if iObj, ok := obj.(*PdfObjectInteger); ok {
return int64(*iObj), nil
}
if fObj, ok := obj.(*PdfObjectFloat); ok {
common.Log.Debug("Number expected as integer was stored as float (type casting used)")
return int64(*fObj), nil
}
return 0, errors.New("Not a number")
}
func getNumberAsFloatOrNull(obj PdfObject) (*float64, error) {
if fObj, ok := obj.(*PdfObjectFloat); ok {
num := float64(*fObj)
return &num, nil
}
if iObj, ok := obj.(*PdfObjectInteger); ok {
num := float64(*iObj)
return &num, nil
}
if _, ok := obj.(*PdfObjectNull); ok {
return nil, nil
}
return nil, errors.New("Not a number")
}
// Handy function for debugging in development. // Handy function for debugging in development.
func debugObject(obj PdfObject) { func debugObject(obj core.PdfObject) {
common.Log.Debug("obj: %T %s", obj, obj.String()) common.Log.Debug("obj: %T %s", obj, obj.String())
if stream, is := obj.(*PdfObjectStream); is { if stream, is := obj.(*core.PdfObjectStream); is {
decoded, err := DecodeStream(stream) decoded, err := core.DecodeStream(stream)
if err != nil { if err != nil {
common.Log.Debug("Error: %v", err) common.Log.Debug("Error: %v", err)
return return
} }
common.Log.Debug("Decoded: %s", decoded) common.Log.Debug("Decoded: %s", decoded)
} else if indObj, is := obj.(*PdfIndirectObject); is { } else if indObj, is := obj.(*core.PdfIndirectObject); is {
common.Log.Debug("%T %v", indObj.PdfObject, indObj.PdfObject) common.Log.Debug("%T %v", indObj.PdfObject, indObj.PdfObject)
common.Log.Debug("%s", indObj.PdfObject.String()) common.Log.Debug("%s", indObj.PdfObject.String())
} }