Address comments on PR

This commit is contained in:
Samuel Stauffer 2020-01-06 11:11:26 -08:00
parent e85397b57a
commit 5f19bfa269
8 changed files with 115 additions and 161 deletions

View File

@ -16,7 +16,6 @@ import (
"github.com/unidoc/unipdf/v3/common"
"github.com/unidoc/unipdf/v3/core"
"github.com/unidoc/unipdf/v3/internal/parseutils"
)
// ContentStreamParser represents a content stream parser for parsing content streams in PDFs.
@ -192,19 +191,7 @@ func (csp *ContentStreamParser) parseName() (core.PdfObjectName, error) {
// we will support it in the reader (no confusion with other types, so
// no compromise).
func (csp *ContentStreamParser) parseNumber() (core.PdfObject, error) {
num, err := parseutils.ParseNumber(csp.reader)
if err != nil {
return nil, err
}
switch num := num.(type) {
case float64:
o := core.PdfObjectFloat(num)
return &o, nil
case int64:
o := core.PdfObjectInteger(num)
return &o, nil
}
return nil, fmt.Errorf("unhandled number type %T", num)
return core.ParseNumber(csp.reader)
}
// A string starts with '(' and ends with ')'.

View File

@ -20,7 +20,6 @@ import (
"github.com/unidoc/unipdf/v3/common"
"github.com/unidoc/unipdf/v3/core/security"
"github.com/unidoc/unipdf/v3/internal/parseutils"
)
// Regular Expressions for parsing and identifying object signatures.
@ -287,19 +286,7 @@ func (parser *PdfParser) parseName() (PdfObjectName, error) {
// we will support it in the reader (no confusion with other types, so
// no compromise).
func (parser *PdfParser) parseNumber() (PdfObject, error) {
num, err := parseutils.ParseNumber(parser.reader)
if err != nil {
return nil, err
}
switch num := num.(type) {
case float64:
o := PdfObjectFloat(num)
return &o, nil
case int64:
o := PdfObjectInteger(num)
return &o, nil
}
return nil, fmt.Errorf("unhandled number type %T", num)
return ParseNumber(parser.reader)
}
// A string starts with '(' and ends with ')'.

View File

@ -6,10 +6,14 @@
package core
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"reflect"
"sort"
"strconv"
"github.com/unidoc/unipdf/v3/common"
)
@ -374,3 +378,91 @@ func flattenObject(obj PdfObject, depth int) PdfObject {
}
return obj
}
// ParseNumber parses a numeric objects from a buffered stream.
// Section 7.3.3.
// Integer or Float.
//
// An integer shall be written as one or more decimal digits optionally
// preceded by a sign. The value shall be interpreted as a signed
// decimal integer and shall be converted to an integer object.
//
// A real value shall be written as one or more decimal digits with an
// optional sign and a leading, trailing, or embedded PERIOD (2Eh)
// (decimal point). The value shall be interpreted as a real number
// and shall be converted to a real object.
//
// Regarding exponential numbers: 7.3.3 Numeric Objects:
// A conforming writer shall not use the PostScript syntax for numbers
// with non-decimal radices (such as 16#FFFE) or in exponential format
// (such as 6.02E23).
// Nonetheless, we sometimes get numbers with exponential format, so
// we will support it in the reader (no confusion with other types, so
// no compromise).
func ParseNumber(buf *bufio.Reader) (PdfObject, error) {
isFloat := false
allowSigns := true
var r bytes.Buffer
for {
if common.Log.IsLogLevel(common.LogLevelTrace) {
common.Log.Trace("Parsing number \"%s\"", r.String())
}
bb, err := buf.Peek(1)
if err == io.EOF {
// GH: EOF handling. Handle EOF like end of line. Can happen with
// encoded object streams that the object is at the end.
// In other cases, we will get the EOF error elsewhere at any rate.
break // Handle like EOF
}
if err != nil {
common.Log.Debug("ERROR %s", err)
return nil, err
}
if allowSigns && (bb[0] == '-' || bb[0] == '+') {
// Only appear in the beginning, otherwise serves as a delimiter.
b, _ := buf.ReadByte()
r.WriteByte(b)
allowSigns = false // Only allowed in beginning, and after e (exponential).
} else if IsDecimalDigit(bb[0]) {
b, _ := buf.ReadByte()
r.WriteByte(b)
} else if bb[0] == '.' {
b, _ := buf.ReadByte()
r.WriteByte(b)
isFloat = true
} else if bb[0] == 'e' || bb[0] == 'E' {
// Exponential number format.
b, _ := buf.ReadByte()
r.WriteByte(b)
isFloat = true
allowSigns = true
} else {
break
}
}
var o PdfObject
if isFloat {
fVal, err := strconv.ParseFloat(r.String(), 64)
if err != nil {
common.Log.Debug("Error parsing number %v err=%v. Using 0.0. Output may be incorrect", r.String(), err)
fVal = 0.0
err = nil
}
objFloat := PdfObjectFloat(fVal)
o = &objFloat
} else {
intVal, err := strconv.ParseInt(r.String(), 10, 64)
if err != nil {
common.Log.Debug("Error parsing number %v err=%v. Using 0. Output may be incorrect", r.String(), err)
intVal = 0
err = nil
}
objInt := PdfObjectInteger(intVal)
o = &objInt
}
return o, nil
}

View File

@ -19,7 +19,6 @@ import (
"github.com/unidoc/unipdf/v3/common"
"github.com/unidoc/unipdf/v3/core"
"github.com/unidoc/unipdf/v3/internal/parseutils"
)
// Regular Expressions for parsing and identifying object signatures.
@ -213,19 +212,7 @@ func (parser *fdfParser) parseName() (core.PdfObjectName, error) {
// we will support it in the reader (no confusion with other types, so
// no compromise).
func (parser *fdfParser) parseNumber() (core.PdfObject, error) {
num, err := parseutils.ParseNumber(parser.reader)
if err != nil {
return nil, err
}
switch num := num.(type) {
case float64:
o := core.PdfObjectFloat(num)
return &o, nil
case int64:
o := core.PdfObjectInteger(num)
return &o, nil
}
return nil, fmt.Errorf("unhandled number type %T", num)
return core.ParseNumber(parser.reader)
}
// A string starts with '(' and ends with ')'.

2
go.mod
View File

@ -12,3 +12,5 @@ require (
golang.org/x/text v0.3.2
golang.org/x/tools v0.0.0-20190606174628-0139d5756a7d // indirect
)
go 1.13

View File

@ -15,7 +15,6 @@ import (
"github.com/unidoc/unipdf/v3/common"
"github.com/unidoc/unipdf/v3/core"
"github.com/unidoc/unipdf/v3/internal/parseutils"
)
// cMapParser parses CMap character to unicode mapping files.
@ -393,17 +392,19 @@ func (p *cMapParser) parseDict() (cmapDict, error) {
// parseDict parseNumber a PDF number.
func (p *cMapParser) parseNumber() (cmapObject, error) {
num, err := parseutils.ParseNumber(p.reader)
o, err := core.ParseNumber(p.reader)
if err != nil {
return nil, err
}
switch num := num.(type) {
case float64:
return cmapFloat{num}, nil
case int64:
return cmapInt{num}, nil
switch o := o.(type) {
case *core.PdfObjectFloat:
return cmapFloat{float64(*o)}, nil
case *core.PdfObjectInteger:
return cmapInt{int64(*o)}, nil
}
return nil, fmt.Errorf("unhandled number type %T", num)
return nil, fmt.Errorf("unhandled number type %T", o)
}
// parseOperand parses an operand, which is a text command represented by a word.

View File

@ -1,103 +0,0 @@
package parseutils
import (
"bufio"
"bytes"
"io"
"strconv"
"github.com/unidoc/unipdf/v3/common"
)
// ParseNumber parses a numeric objects from a buffered stream.
// Section 7.3.3.
// Integer or Float.
//
// An integer shall be written as one or more decimal digits optionally
// preceded by a sign. The value shall be interpreted as a signed
// decimal integer and shall be converted to an integer object.
//
// A real value shall be written as one or more decimal digits with an
// optional sign and a leading, trailing, or embedded PERIOD (2Eh)
// (decimal point). The value shall be interpreted as a real number
// and shall be converted to a real object.
//
// Regarding exponential numbers: 7.3.3 Numeric Objects:
// A conforming writer shall not use the PostScript syntax for numbers
// with non-decimal radices (such as 16#FFFE) or in exponential format
// (such as 6.02E23).
// Nonetheless, we sometimes get numbers with exponential format, so
// we will support it in the reader (no confusion with other types, so
// no compromise).
func ParseNumber(bufr *bufio.Reader) (interface{}, error) {
isFloat := false
allowSigns := true
var r bytes.Buffer
for {
if common.Log.IsLogLevel(common.LogLevelTrace) {
common.Log.Trace("Parsing number \"%s\"", r.String())
}
bb, err := bufr.Peek(1)
if err == io.EOF {
// GH: EOF handling. Handle EOF like end of line. Can happen with
// encoded object streams that the object is at the end.
// In other cases, we will get the EOF error elsewhere at any rate.
break // Handle like EOF
}
if err != nil {
common.Log.Debug("ERROR %s", err)
return nil, err
}
if allowSigns && (bb[0] == '-' || bb[0] == '+') {
// Only appear in the beginning, otherwise serves as a delimiter.
b, _ := bufr.ReadByte()
r.WriteByte(b)
allowSigns = false // Only allowed in beginning, and after e (exponential).
} else if IsDecimalDigit(bb[0]) {
b, _ := bufr.ReadByte()
r.WriteByte(b)
} else if bb[0] == '.' {
b, _ := bufr.ReadByte()
r.WriteByte(b)
isFloat = true
} else if bb[0] == 'e' || bb[0] == 'E' {
// Exponential number format.
b, _ := bufr.ReadByte()
r.WriteByte(b)
isFloat = true
allowSigns = true
} else {
break
}
}
var o interface{}
if isFloat {
fVal, err := strconv.ParseFloat(r.String(), 64)
if err != nil {
common.Log.Debug("Error parsing number %v err=%v. Using 0.0. Output may be incorrect", r.String(), err)
fVal = 0.0
err = nil
}
o = fVal
} else {
intVal, err := strconv.ParseInt(r.String(), 10, 64)
if err != nil {
common.Log.Debug("Error parsing number %v err=%v. Using 0. Output may be incorrect", r.String(), err)
intVal = 0
err = nil
}
o = intVal
}
return o, nil
}
// IsDecimalDigit checks if the character is a part of a decimal number string.
func IsDecimalDigit(c byte) bool {
if c >= '0' && c <= '9' {
return true
}
return false
}

View File

@ -14,7 +14,6 @@ import (
"github.com/unidoc/unipdf/v3/common"
pdfcore "github.com/unidoc/unipdf/v3/core"
"github.com/unidoc/unipdf/v3/internal/parseutils"
)
// PSParser is a basic Postscript parser.
@ -146,17 +145,19 @@ func (p *PSParser) skipSpaces() (int, error) {
// Numeric objects.
// Integer or Real numbers.
func (p *PSParser) parseNumber() (PSObject, error) {
num, err := parseutils.ParseNumber(p.reader)
o, err := pdfcore.ParseNumber(p.reader)
if err != nil {
return nil, err
}
switch num := num.(type) {
case float64:
return MakeReal(num), nil
case int64:
return MakeInteger(int(num)), nil
switch o := o.(type) {
case *pdfcore.PdfObjectFloat:
return MakeReal(float64(*o)), nil
case *pdfcore.PdfObjectInteger:
return MakeInteger(int(*o)), nil
}
return nil, fmt.Errorf("unhandled number type %T", num)
return nil, fmt.Errorf("unhandled number type %T", o)
}
// Parse bool object.