mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-27 13:48:51 +08:00
Unify and optimize number parsing
This commit is contained in:
parent
d0f9c139ad
commit
e85397b57a
@ -16,6 +16,7 @@ import (
|
||||
|
||||
"github.com/unidoc/unipdf/v3/common"
|
||||
"github.com/unidoc/unipdf/v3/core"
|
||||
"github.com/unidoc/unipdf/v3/internal/parseutils"
|
||||
)
|
||||
|
||||
// ContentStreamParser represents a content stream parser for parsing content streams in PDFs.
|
||||
@ -191,67 +192,19 @@ func (csp *ContentStreamParser) parseName() (core.PdfObjectName, error) {
|
||||
// we will support it in the reader (no confusion with other types, so
|
||||
// no compromise).
|
||||
func (csp *ContentStreamParser) parseNumber() (core.PdfObject, error) {
|
||||
isFloat := false
|
||||
allowSigns := true
|
||||
numStr := ""
|
||||
for {
|
||||
common.Log.Trace("Parsing number \"%s\"", numStr)
|
||||
bb, err := csp.reader.Peek(1)
|
||||
if err == io.EOF {
|
||||
// GH: EOF handling. Handle EOF like end of line. Can happen with
|
||||
// encoded object streams that the object is at the end.
|
||||
// In other cases, we will get the EOF error elsewhere at any rate.
|
||||
break // Handle like EOF
|
||||
}
|
||||
if err != nil {
|
||||
common.Log.Error("ERROR %s", err)
|
||||
return nil, err
|
||||
}
|
||||
if allowSigns && (bb[0] == '-' || bb[0] == '+') {
|
||||
// Only appear in the beginning, otherwise serves as a delimiter.
|
||||
b, _ := csp.reader.ReadByte()
|
||||
numStr += string(b)
|
||||
allowSigns = false // Only allowed in beginning, and after e (exponential).
|
||||
} else if core.IsDecimalDigit(bb[0]) {
|
||||
b, _ := csp.reader.ReadByte()
|
||||
numStr += string(b)
|
||||
} else if bb[0] == '.' {
|
||||
b, _ := csp.reader.ReadByte()
|
||||
numStr += string(b)
|
||||
isFloat = true
|
||||
} else if bb[0] == 'e' {
|
||||
// Exponential number format.
|
||||
b, _ := csp.reader.ReadByte()
|
||||
numStr += string(b)
|
||||
isFloat = true
|
||||
allowSigns = true
|
||||
} else {
|
||||
break
|
||||
}
|
||||
num, err := parseutils.ParseNumber(csp.reader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var o core.PdfObject
|
||||
if isFloat {
|
||||
fVal, err := strconv.ParseFloat(numStr, 64)
|
||||
if err != nil {
|
||||
common.Log.Debug("Error parsing number %q err=%v. Using 0.0. Output may be incorrect", numStr, err)
|
||||
fVal = 0.0
|
||||
}
|
||||
|
||||
objFloat := core.PdfObjectFloat(fVal)
|
||||
o = &objFloat
|
||||
} else {
|
||||
intVal, err := strconv.ParseInt(numStr, 10, 64)
|
||||
if err != nil {
|
||||
common.Log.Debug("Error parsing integer %q err=%v. Using 0. Output may be incorrect", numStr, err)
|
||||
intVal = 0
|
||||
}
|
||||
|
||||
objInt := core.PdfObjectInteger(intVal)
|
||||
o = &objInt
|
||||
switch num := num.(type) {
|
||||
case float64:
|
||||
o := core.PdfObjectFloat(num)
|
||||
return &o, nil
|
||||
case int64:
|
||||
o := core.PdfObjectInteger(num)
|
||||
return &o, nil
|
||||
}
|
||||
|
||||
return o, nil
|
||||
return nil, fmt.Errorf("unhandled number type %T", num)
|
||||
}
|
||||
|
||||
// A string starts with '(' and ends with ')'.
|
||||
|
@ -20,6 +20,7 @@ import (
|
||||
|
||||
"github.com/unidoc/unipdf/v3/common"
|
||||
"github.com/unidoc/unipdf/v3/core/security"
|
||||
"github.com/unidoc/unipdf/v3/internal/parseutils"
|
||||
)
|
||||
|
||||
// Regular Expressions for parsing and identifying object signatures.
|
||||
@ -286,69 +287,19 @@ func (parser *PdfParser) parseName() (PdfObjectName, error) {
|
||||
// we will support it in the reader (no confusion with other types, so
|
||||
// no compromise).
|
||||
func (parser *PdfParser) parseNumber() (PdfObject, error) {
|
||||
isFloat := false
|
||||
allowSigns := true
|
||||
var r bytes.Buffer
|
||||
for {
|
||||
common.Log.Trace("Parsing number \"%s\"", r.String())
|
||||
bb, err := parser.reader.Peek(1)
|
||||
if err == io.EOF {
|
||||
// GH: EOF handling. Handle EOF like end of line. Can happen with
|
||||
// encoded object streams that the object is at the end.
|
||||
// In other cases, we will get the EOF error elsewhere at any rate.
|
||||
break // Handle like EOF
|
||||
}
|
||||
if err != nil {
|
||||
common.Log.Debug("ERROR %s", err)
|
||||
return nil, err
|
||||
}
|
||||
if allowSigns && (bb[0] == '-' || bb[0] == '+') {
|
||||
// Only appear in the beginning, otherwise serves as a delimiter.
|
||||
b, _ := parser.reader.ReadByte()
|
||||
r.WriteByte(b)
|
||||
allowSigns = false // Only allowed in beginning, and after e (exponential).
|
||||
} else if IsDecimalDigit(bb[0]) {
|
||||
b, _ := parser.reader.ReadByte()
|
||||
r.WriteByte(b)
|
||||
} else if bb[0] == '.' {
|
||||
b, _ := parser.reader.ReadByte()
|
||||
r.WriteByte(b)
|
||||
isFloat = true
|
||||
} else if bb[0] == 'e' || bb[0] == 'E' {
|
||||
// Exponential number format.
|
||||
b, _ := parser.reader.ReadByte()
|
||||
r.WriteByte(b)
|
||||
isFloat = true
|
||||
allowSigns = true
|
||||
} else {
|
||||
break
|
||||
}
|
||||
num, err := parseutils.ParseNumber(parser.reader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var o PdfObject
|
||||
if isFloat {
|
||||
fVal, err := strconv.ParseFloat(r.String(), 64)
|
||||
if err != nil {
|
||||
common.Log.Debug("Error parsing number %v err=%v. Using 0.0. Output may be incorrect", r.String(), err)
|
||||
fVal = 0.0
|
||||
err = nil
|
||||
}
|
||||
|
||||
objFloat := PdfObjectFloat(fVal)
|
||||
o = &objFloat
|
||||
} else {
|
||||
intVal, err := strconv.ParseInt(r.String(), 10, 64)
|
||||
if err != nil {
|
||||
common.Log.Debug("Error parsing number %v err=%v. Using 0. Output may be incorrect", r.String(), err)
|
||||
intVal = 0
|
||||
err = nil
|
||||
}
|
||||
|
||||
objInt := PdfObjectInteger(intVal)
|
||||
o = &objInt
|
||||
switch num := num.(type) {
|
||||
case float64:
|
||||
o := PdfObjectFloat(num)
|
||||
return &o, nil
|
||||
case int64:
|
||||
o := PdfObjectInteger(num)
|
||||
return &o, nil
|
||||
}
|
||||
|
||||
return o, nil
|
||||
return nil, fmt.Errorf("unhandled number type %T", num)
|
||||
}
|
||||
|
||||
// A string starts with '(' and ends with ')'.
|
||||
|
@ -19,6 +19,7 @@ import (
|
||||
|
||||
"github.com/unidoc/unipdf/v3/common"
|
||||
"github.com/unidoc/unipdf/v3/core"
|
||||
"github.com/unidoc/unipdf/v3/internal/parseutils"
|
||||
)
|
||||
|
||||
// Regular Expressions for parsing and identifying object signatures.
|
||||
@ -212,54 +213,19 @@ func (parser *fdfParser) parseName() (core.PdfObjectName, error) {
|
||||
// we will support it in the reader (no confusion with other types, so
|
||||
// no compromise).
|
||||
func (parser *fdfParser) parseNumber() (core.PdfObject, error) {
|
||||
isFloat := false
|
||||
allowSigns := true
|
||||
var r bytes.Buffer
|
||||
for {
|
||||
common.Log.Trace("Parsing number \"%s\"", r.String())
|
||||
bb, err := parser.reader.Peek(1)
|
||||
if err == io.EOF {
|
||||
// GH: EOF handling. Handle EOF like end of line. Can happen with
|
||||
// encoded object streams that the object is at the end.
|
||||
// In other cases, we will get the EOF error elsewhere at any rate.
|
||||
break // Handle like EOF
|
||||
}
|
||||
if err != nil {
|
||||
common.Log.Debug("ERROR %s", err)
|
||||
return nil, err
|
||||
}
|
||||
if allowSigns && (bb[0] == '-' || bb[0] == '+') {
|
||||
// Only appear in the beginning, otherwise serves as a delimiter.
|
||||
b, _ := parser.reader.ReadByte()
|
||||
r.WriteByte(b)
|
||||
allowSigns = false // Only allowed in beginning, and after e (exponential).
|
||||
} else if core.IsDecimalDigit(bb[0]) {
|
||||
b, _ := parser.reader.ReadByte()
|
||||
r.WriteByte(b)
|
||||
} else if bb[0] == '.' {
|
||||
b, _ := parser.reader.ReadByte()
|
||||
r.WriteByte(b)
|
||||
isFloat = true
|
||||
} else if bb[0] == 'e' {
|
||||
// Exponential number format.
|
||||
b, _ := parser.reader.ReadByte()
|
||||
r.WriteByte(b)
|
||||
isFloat = true
|
||||
allowSigns = true
|
||||
} else {
|
||||
break
|
||||
}
|
||||
num, err := parseutils.ParseNumber(parser.reader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if isFloat {
|
||||
fVal, err := strconv.ParseFloat(r.String(), 64)
|
||||
o := core.PdfObjectFloat(fVal)
|
||||
return &o, err
|
||||
} else {
|
||||
intVal, err := strconv.ParseInt(r.String(), 10, 64)
|
||||
o := core.PdfObjectInteger(intVal)
|
||||
return &o, err
|
||||
switch num := num.(type) {
|
||||
case float64:
|
||||
o := core.PdfObjectFloat(num)
|
||||
return &o, nil
|
||||
case int64:
|
||||
o := core.PdfObjectInteger(num)
|
||||
return &o, nil
|
||||
}
|
||||
return nil, fmt.Errorf("unhandled number type %T", num)
|
||||
}
|
||||
|
||||
// A string starts with '(' and ends with ')'.
|
||||
|
@ -15,6 +15,7 @@ import (
|
||||
|
||||
"github.com/unidoc/unipdf/v3/common"
|
||||
"github.com/unidoc/unipdf/v3/core"
|
||||
"github.com/unidoc/unipdf/v3/internal/parseutils"
|
||||
)
|
||||
|
||||
// cMapParser parses CMap character to unicode mapping files.
|
||||
@ -392,49 +393,17 @@ func (p *cMapParser) parseDict() (cmapDict, error) {
|
||||
|
||||
// parseDict parseNumber a PDF number.
|
||||
func (p *cMapParser) parseNumber() (cmapObject, error) {
|
||||
isFloat := false
|
||||
allowSigns := true
|
||||
|
||||
numStr := bytes.Buffer{}
|
||||
for {
|
||||
bb, err := p.reader.Peek(1)
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if allowSigns && (bb[0] == '-' || bb[0] == '+') {
|
||||
// Only appear in the beginning, otherwise serves as a delimiter.
|
||||
b, _ := p.reader.ReadByte()
|
||||
numStr.WriteByte(b)
|
||||
allowSigns = false // Only allowed in beginning, and after e (exponential).
|
||||
} else if core.IsDecimalDigit(bb[0]) {
|
||||
b, _ := p.reader.ReadByte()
|
||||
numStr.WriteByte(b)
|
||||
} else if bb[0] == '.' {
|
||||
b, _ := p.reader.ReadByte()
|
||||
numStr.WriteByte(b)
|
||||
isFloat = true
|
||||
} else if bb[0] == 'e' {
|
||||
// Exponential number format.
|
||||
b, _ := p.reader.ReadByte()
|
||||
numStr.WriteByte(b)
|
||||
isFloat = true
|
||||
allowSigns = true
|
||||
} else {
|
||||
break
|
||||
}
|
||||
num, err := parseutils.ParseNumber(p.reader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if isFloat {
|
||||
fVal, err := strconv.ParseFloat(numStr.String(), 64)
|
||||
o := cmapFloat{fVal}
|
||||
return o, err
|
||||
switch num := num.(type) {
|
||||
case float64:
|
||||
return cmapFloat{num}, nil
|
||||
case int64:
|
||||
return cmapInt{num}, nil
|
||||
}
|
||||
intVal, err := strconv.ParseInt(numStr.String(), 10, 64)
|
||||
o := cmapInt{intVal}
|
||||
return o, err
|
||||
return nil, fmt.Errorf("unhandled number type %T", num)
|
||||
}
|
||||
|
||||
// parseOperand parses an operand, which is a text command represented by a word.
|
||||
|
103
internal/parseutils/number.go
Normal file
103
internal/parseutils/number.go
Normal file
@ -0,0 +1,103 @@
|
||||
package parseutils
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"io"
|
||||
"strconv"
|
||||
|
||||
"github.com/unidoc/unipdf/v3/common"
|
||||
)
|
||||
|
||||
// ParseNumber parses a numeric objects from a buffered stream.
|
||||
// Section 7.3.3.
|
||||
// Integer or Float.
|
||||
//
|
||||
// An integer shall be written as one or more decimal digits optionally
|
||||
// preceded by a sign. The value shall be interpreted as a signed
|
||||
// decimal integer and shall be converted to an integer object.
|
||||
//
|
||||
// A real value shall be written as one or more decimal digits with an
|
||||
// optional sign and a leading, trailing, or embedded PERIOD (2Eh)
|
||||
// (decimal point). The value shall be interpreted as a real number
|
||||
// and shall be converted to a real object.
|
||||
//
|
||||
// Regarding exponential numbers: 7.3.3 Numeric Objects:
|
||||
// A conforming writer shall not use the PostScript syntax for numbers
|
||||
// with non-decimal radices (such as 16#FFFE) or in exponential format
|
||||
// (such as 6.02E23).
|
||||
// Nonetheless, we sometimes get numbers with exponential format, so
|
||||
// we will support it in the reader (no confusion with other types, so
|
||||
// no compromise).
|
||||
func ParseNumber(bufr *bufio.Reader) (interface{}, error) {
|
||||
isFloat := false
|
||||
allowSigns := true
|
||||
var r bytes.Buffer
|
||||
for {
|
||||
if common.Log.IsLogLevel(common.LogLevelTrace) {
|
||||
common.Log.Trace("Parsing number \"%s\"", r.String())
|
||||
}
|
||||
bb, err := bufr.Peek(1)
|
||||
if err == io.EOF {
|
||||
// GH: EOF handling. Handle EOF like end of line. Can happen with
|
||||
// encoded object streams that the object is at the end.
|
||||
// In other cases, we will get the EOF error elsewhere at any rate.
|
||||
break // Handle like EOF
|
||||
}
|
||||
if err != nil {
|
||||
common.Log.Debug("ERROR %s", err)
|
||||
return nil, err
|
||||
}
|
||||
if allowSigns && (bb[0] == '-' || bb[0] == '+') {
|
||||
// Only appear in the beginning, otherwise serves as a delimiter.
|
||||
b, _ := bufr.ReadByte()
|
||||
r.WriteByte(b)
|
||||
allowSigns = false // Only allowed in beginning, and after e (exponential).
|
||||
} else if IsDecimalDigit(bb[0]) {
|
||||
b, _ := bufr.ReadByte()
|
||||
r.WriteByte(b)
|
||||
} else if bb[0] == '.' {
|
||||
b, _ := bufr.ReadByte()
|
||||
r.WriteByte(b)
|
||||
isFloat = true
|
||||
} else if bb[0] == 'e' || bb[0] == 'E' {
|
||||
// Exponential number format.
|
||||
b, _ := bufr.ReadByte()
|
||||
r.WriteByte(b)
|
||||
isFloat = true
|
||||
allowSigns = true
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
var o interface{}
|
||||
if isFloat {
|
||||
fVal, err := strconv.ParseFloat(r.String(), 64)
|
||||
if err != nil {
|
||||
common.Log.Debug("Error parsing number %v err=%v. Using 0.0. Output may be incorrect", r.String(), err)
|
||||
fVal = 0.0
|
||||
err = nil
|
||||
}
|
||||
o = fVal
|
||||
} else {
|
||||
intVal, err := strconv.ParseInt(r.String(), 10, 64)
|
||||
if err != nil {
|
||||
common.Log.Debug("Error parsing number %v err=%v. Using 0. Output may be incorrect", r.String(), err)
|
||||
intVal = 0
|
||||
err = nil
|
||||
}
|
||||
o = intVal
|
||||
}
|
||||
|
||||
return o, nil
|
||||
}
|
||||
|
||||
// IsDecimalDigit checks if the character is a part of a decimal number string.
|
||||
func IsDecimalDigit(c byte) bool {
|
||||
if c >= '0' && c <= '9' {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
58
ps/parser.go
58
ps/parser.go
@ -9,11 +9,12 @@ import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
|
||||
"github.com/unidoc/unipdf/v3/common"
|
||||
pdfcore "github.com/unidoc/unipdf/v3/core"
|
||||
"github.com/unidoc/unipdf/v3/internal/parseutils"
|
||||
)
|
||||
|
||||
// PSParser is a basic Postscript parser.
|
||||
@ -145,54 +146,17 @@ func (p *PSParser) skipSpaces() (int, error) {
|
||||
// Numeric objects.
|
||||
// Integer or Real numbers.
|
||||
func (p *PSParser) parseNumber() (PSObject, error) {
|
||||
isFloat := false
|
||||
allowSigns := true
|
||||
numStr := ""
|
||||
for {
|
||||
common.Log.Trace("Parsing number \"%s\"", numStr)
|
||||
bb, err := p.reader.Peek(1)
|
||||
if err == io.EOF {
|
||||
// GH: EOF handling. Handle EOF like end of line. Can happen with
|
||||
// encoded object streams that the object is at the end.
|
||||
// In other cases, we will get the EOF error elsewhere at any rate.
|
||||
break // Handle like EOF
|
||||
}
|
||||
if err != nil {
|
||||
common.Log.Debug("PS ERROR: %s", err)
|
||||
return nil, err
|
||||
}
|
||||
if allowSigns && (bb[0] == '-' || bb[0] == '+') {
|
||||
// Only appear in the beginning, otherwise serves as a delimiter.
|
||||
b, _ := p.reader.ReadByte()
|
||||
numStr += string(b)
|
||||
allowSigns = false // Only allowed in beginning, and after e (exponential).
|
||||
} else if pdfcore.IsDecimalDigit(bb[0]) {
|
||||
b, _ := p.reader.ReadByte()
|
||||
numStr += string(b)
|
||||
} else if bb[0] == '.' {
|
||||
b, _ := p.reader.ReadByte()
|
||||
numStr += string(b)
|
||||
isFloat = true
|
||||
} else if bb[0] == 'e' {
|
||||
// Exponential number format.
|
||||
// TODO: Is this supported in PS?
|
||||
b, _ := p.reader.ReadByte()
|
||||
numStr += string(b)
|
||||
isFloat = true
|
||||
allowSigns = true
|
||||
} else {
|
||||
break
|
||||
}
|
||||
num, err := parseutils.ParseNumber(p.reader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if isFloat {
|
||||
fVal, err := strconv.ParseFloat(numStr, 64)
|
||||
o := MakeReal(fVal)
|
||||
return o, err
|
||||
switch num := num.(type) {
|
||||
case float64:
|
||||
return MakeReal(num), nil
|
||||
case int64:
|
||||
return MakeInteger(int(num)), nil
|
||||
}
|
||||
intVal, err := strconv.ParseInt(numStr, 10, 64)
|
||||
o := MakeInteger(int(intVal))
|
||||
return o, err
|
||||
return nil, fmt.Errorf("unhandled number type %T", num)
|
||||
}
|
||||
|
||||
// Parse bool object.
|
||||
|
Loading…
x
Reference in New Issue
Block a user