mirror of
https://github.com/unidoc/unipdf.git
synced 2025-05-02 22:17:06 +08:00
commit
e80a3dab58
@ -74,14 +74,14 @@ func (parser *PdfParser) GetTrailer() *PdfObjectDictionary {
|
|||||||
func (parser *PdfParser) skipSpaces() (int, error) {
|
func (parser *PdfParser) skipSpaces() (int, error) {
|
||||||
cnt := 0
|
cnt := 0
|
||||||
for {
|
for {
|
||||||
bb, err := parser.reader.Peek(1)
|
b, err := parser.reader.ReadByte()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
if IsWhiteSpace(bb[0]) {
|
if IsWhiteSpace(b) {
|
||||||
parser.reader.ReadByte()
|
|
||||||
cnt++
|
cnt++
|
||||||
} else {
|
} else {
|
||||||
|
parser.reader.UnreadByte()
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -121,11 +121,11 @@ func (parser *PdfParser) skipComments() error {
|
|||||||
|
|
||||||
// Read a comment starting with '%'.
|
// Read a comment starting with '%'.
|
||||||
func (parser *PdfParser) readComment() (string, error) {
|
func (parser *PdfParser) readComment() (string, error) {
|
||||||
commentText := ""
|
var r bytes.Buffer
|
||||||
|
|
||||||
_, err := parser.skipSpaces()
|
_, err := parser.skipSpaces()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return commentText, err
|
return r.String(), err
|
||||||
}
|
}
|
||||||
|
|
||||||
isFirst := true
|
isFirst := true
|
||||||
@ -133,21 +133,21 @@ func (parser *PdfParser) readComment() (string, error) {
|
|||||||
bb, err := parser.reader.Peek(1)
|
bb, err := parser.reader.Peek(1)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
common.Log.Debug("Error %s", err.Error())
|
common.Log.Debug("Error %s", err.Error())
|
||||||
return commentText, err
|
return r.String(), err
|
||||||
}
|
}
|
||||||
if isFirst && bb[0] != '%' {
|
if isFirst && bb[0] != '%' {
|
||||||
return commentText, errors.New("Comment should start with %")
|
return r.String(), errors.New("Comment should start with %")
|
||||||
} else {
|
} else {
|
||||||
isFirst = false
|
isFirst = false
|
||||||
}
|
}
|
||||||
if (bb[0] != '\r') && (bb[0] != '\n') {
|
if (bb[0] != '\r') && (bb[0] != '\n') {
|
||||||
b, _ := parser.reader.ReadByte()
|
b, _ := parser.reader.ReadByte()
|
||||||
commentText += string(b)
|
r.WriteByte(b)
|
||||||
} else {
|
} else {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return commentText, nil
|
return r.String(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read a single line of text from current position.
|
// Read a single line of text from current position.
|
||||||
@ -171,7 +171,7 @@ func (parser *PdfParser) readTextLine() (string, error) {
|
|||||||
|
|
||||||
// Parse a name starting with '/'.
|
// Parse a name starting with '/'.
|
||||||
func (parser *PdfParser) parseName() (PdfObjectName, error) {
|
func (parser *PdfParser) parseName() (PdfObjectName, error) {
|
||||||
name := ""
|
var r bytes.Buffer
|
||||||
nameStarted := false
|
nameStarted := false
|
||||||
for {
|
for {
|
||||||
bb, err := parser.reader.Peek(1)
|
bb, err := parser.reader.Peek(1)
|
||||||
@ -179,7 +179,7 @@ func (parser *PdfParser) parseName() (PdfObjectName, error) {
|
|||||||
break // Can happen when loading from object stream.
|
break // Can happen when loading from object stream.
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return PdfObjectName(name), err
|
return PdfObjectName(r.String()), err
|
||||||
}
|
}
|
||||||
|
|
||||||
if !nameStarted {
|
if !nameStarted {
|
||||||
@ -192,7 +192,7 @@ func (parser *PdfParser) parseName() (PdfObjectName, error) {
|
|||||||
parser.skipSpaces()
|
parser.skipSpaces()
|
||||||
} else {
|
} else {
|
||||||
common.Log.Debug("ERROR Name starting with %s (% x)", bb, bb)
|
common.Log.Debug("ERROR Name starting with %s (% x)", bb, bb)
|
||||||
return PdfObjectName(name), fmt.Errorf("Invalid name: (%c)", bb[0])
|
return PdfObjectName(r.String()), fmt.Errorf("Invalid name: (%c)", bb[0])
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if IsWhiteSpace(bb[0]) {
|
if IsWhiteSpace(bb[0]) {
|
||||||
@ -202,22 +202,22 @@ func (parser *PdfParser) parseName() (PdfObjectName, error) {
|
|||||||
} else if bb[0] == '#' {
|
} else if bb[0] == '#' {
|
||||||
hexcode, err := parser.reader.Peek(3)
|
hexcode, err := parser.reader.Peek(3)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return PdfObjectName(name), err
|
return PdfObjectName(r.String()), err
|
||||||
}
|
}
|
||||||
parser.reader.Discard(3)
|
parser.reader.Discard(3)
|
||||||
|
|
||||||
code, err := hex.DecodeString(string(hexcode[1:3]))
|
code, err := hex.DecodeString(string(hexcode[1:3]))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return PdfObjectName(name), err
|
return PdfObjectName(r.String()), err
|
||||||
}
|
}
|
||||||
name += string(code)
|
r.Write(code)
|
||||||
} else {
|
} else {
|
||||||
b, _ := parser.reader.ReadByte()
|
b, _ := parser.reader.ReadByte()
|
||||||
name += string(b)
|
r.WriteByte(b)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return PdfObjectName(name), nil
|
return PdfObjectName(r.String()), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Numeric objects.
|
// Numeric objects.
|
||||||
@ -243,9 +243,9 @@ func (parser *PdfParser) parseName() (PdfObjectName, error) {
|
|||||||
func (parser *PdfParser) parseNumber() (PdfObject, error) {
|
func (parser *PdfParser) parseNumber() (PdfObject, error) {
|
||||||
isFloat := false
|
isFloat := false
|
||||||
allowSigns := true
|
allowSigns := true
|
||||||
numStr := ""
|
var r bytes.Buffer
|
||||||
for {
|
for {
|
||||||
common.Log.Trace("Parsing number \"%s\"", numStr)
|
common.Log.Trace("Parsing number \"%s\"", r.String())
|
||||||
bb, err := parser.reader.Peek(1)
|
bb, err := parser.reader.Peek(1)
|
||||||
if err == io.EOF {
|
if err == io.EOF {
|
||||||
// GH: EOF handling. Handle EOF like end of line. Can happen with
|
// GH: EOF handling. Handle EOF like end of line. Can happen with
|
||||||
@ -260,19 +260,19 @@ func (parser *PdfParser) parseNumber() (PdfObject, error) {
|
|||||||
if allowSigns && (bb[0] == '-' || bb[0] == '+') {
|
if allowSigns && (bb[0] == '-' || bb[0] == '+') {
|
||||||
// Only appear in the beginning, otherwise serves as a delimiter.
|
// Only appear in the beginning, otherwise serves as a delimiter.
|
||||||
b, _ := parser.reader.ReadByte()
|
b, _ := parser.reader.ReadByte()
|
||||||
numStr += string(b)
|
r.WriteByte(b)
|
||||||
allowSigns = false // Only allowed in beginning, and after e (exponential).
|
allowSigns = false // Only allowed in beginning, and after e (exponential).
|
||||||
} else if IsDecimalDigit(bb[0]) {
|
} else if IsDecimalDigit(bb[0]) {
|
||||||
b, _ := parser.reader.ReadByte()
|
b, _ := parser.reader.ReadByte()
|
||||||
numStr += string(b)
|
r.WriteByte(b)
|
||||||
} else if bb[0] == '.' {
|
} else if bb[0] == '.' {
|
||||||
b, _ := parser.reader.ReadByte()
|
b, _ := parser.reader.ReadByte()
|
||||||
numStr += string(b)
|
r.WriteByte(b)
|
||||||
isFloat = true
|
isFloat = true
|
||||||
} else if bb[0] == 'e' {
|
} else if bb[0] == 'e' {
|
||||||
// Exponential number format.
|
// Exponential number format.
|
||||||
b, _ := parser.reader.ReadByte()
|
b, _ := parser.reader.ReadByte()
|
||||||
numStr += string(b)
|
r.WriteByte(b)
|
||||||
isFloat = true
|
isFloat = true
|
||||||
allowSigns = true
|
allowSigns = true
|
||||||
} else {
|
} else {
|
||||||
@ -281,11 +281,11 @@ func (parser *PdfParser) parseNumber() (PdfObject, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if isFloat {
|
if isFloat {
|
||||||
fVal, err := strconv.ParseFloat(numStr, 64)
|
fVal, err := strconv.ParseFloat(r.String(), 64)
|
||||||
o := PdfObjectFloat(fVal)
|
o := PdfObjectFloat(fVal)
|
||||||
return &o, err
|
return &o, err
|
||||||
} else {
|
} else {
|
||||||
intVal, err := strconv.ParseInt(numStr, 10, 64)
|
intVal, err := strconv.ParseInt(r.String(), 10, 64)
|
||||||
o := PdfObjectInteger(intVal)
|
o := PdfObjectInteger(intVal)
|
||||||
return &o, err
|
return &o, err
|
||||||
}
|
}
|
||||||
@ -295,26 +295,26 @@ func (parser *PdfParser) parseNumber() (PdfObject, error) {
|
|||||||
func (parser *PdfParser) parseString() (PdfObjectString, error) {
|
func (parser *PdfParser) parseString() (PdfObjectString, error) {
|
||||||
parser.reader.ReadByte()
|
parser.reader.ReadByte()
|
||||||
|
|
||||||
bytes := []byte{}
|
var r bytes.Buffer
|
||||||
count := 1
|
count := 1
|
||||||
for {
|
for {
|
||||||
bb, err := parser.reader.Peek(1)
|
bb, err := parser.reader.Peek(1)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return PdfObjectString(bytes), err
|
return PdfObjectString(r.String()), err
|
||||||
}
|
}
|
||||||
|
|
||||||
if bb[0] == '\\' { // Escape sequence.
|
if bb[0] == '\\' { // Escape sequence.
|
||||||
parser.reader.ReadByte() // Skip the escape \ byte.
|
parser.reader.ReadByte() // Skip the escape \ byte.
|
||||||
b, err := parser.reader.ReadByte()
|
b, err := parser.reader.ReadByte()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return PdfObjectString(bytes), err
|
return PdfObjectString(r.String()), err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Octal '\ddd' number (base 8).
|
// Octal '\ddd' number (base 8).
|
||||||
if IsOctalDigit(b) {
|
if IsOctalDigit(b) {
|
||||||
bb, err := parser.reader.Peek(2)
|
bb, err := parser.reader.Peek(2)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return PdfObjectString(bytes), err
|
return PdfObjectString(r.String()), err
|
||||||
}
|
}
|
||||||
|
|
||||||
numeric := []byte{}
|
numeric := []byte{}
|
||||||
@ -331,29 +331,29 @@ func (parser *PdfParser) parseString() (PdfObjectString, error) {
|
|||||||
common.Log.Trace("Numeric string \"%s\"", numeric)
|
common.Log.Trace("Numeric string \"%s\"", numeric)
|
||||||
code, err := strconv.ParseUint(string(numeric), 8, 32)
|
code, err := strconv.ParseUint(string(numeric), 8, 32)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return PdfObjectString(bytes), err
|
return PdfObjectString(r.String()), err
|
||||||
}
|
}
|
||||||
bytes = append(bytes, byte(code))
|
r.WriteByte(byte(code))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
switch b {
|
switch b {
|
||||||
case 'n':
|
case 'n':
|
||||||
bytes = append(bytes, '\n')
|
r.WriteRune('\n')
|
||||||
case 'r':
|
case 'r':
|
||||||
bytes = append(bytes, '\r')
|
r.WriteRune('\r')
|
||||||
case 't':
|
case 't':
|
||||||
bytes = append(bytes, '\t')
|
r.WriteRune('\t')
|
||||||
case 'b':
|
case 'b':
|
||||||
bytes = append(bytes, '\b')
|
r.WriteRune('\b')
|
||||||
case 'f':
|
case 'f':
|
||||||
bytes = append(bytes, '\f')
|
r.WriteRune('\f')
|
||||||
case '(':
|
case '(':
|
||||||
bytes = append(bytes, '(')
|
r.WriteRune('(')
|
||||||
case ')':
|
case ')':
|
||||||
bytes = append(bytes, ')')
|
r.WriteRune(')')
|
||||||
case '\\':
|
case '\\':
|
||||||
bytes = append(bytes, '\\')
|
r.WriteRune('\\')
|
||||||
}
|
}
|
||||||
|
|
||||||
continue
|
continue
|
||||||
@ -368,10 +368,10 @@ func (parser *PdfParser) parseString() (PdfObjectString, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
b, _ := parser.reader.ReadByte()
|
b, _ := parser.reader.ReadByte()
|
||||||
bytes = append(bytes, b)
|
r.WriteByte(b)
|
||||||
}
|
}
|
||||||
|
|
||||||
return PdfObjectString(bytes), nil
|
return PdfObjectString(r.String()), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Starts with '<' ends with '>'.
|
// Starts with '<' ends with '>'.
|
||||||
@ -379,12 +379,8 @@ func (parser *PdfParser) parseString() (PdfObjectString, error) {
|
|||||||
func (parser *PdfParser) parseHexString() (PdfObjectString, error) {
|
func (parser *PdfParser) parseHexString() (PdfObjectString, error) {
|
||||||
parser.reader.ReadByte()
|
parser.reader.ReadByte()
|
||||||
|
|
||||||
hextable := []byte("0123456789abcdefABCDEF")
|
var r bytes.Buffer
|
||||||
|
|
||||||
tmp := []byte{}
|
|
||||||
for {
|
for {
|
||||||
parser.skipSpaces()
|
|
||||||
|
|
||||||
bb, err := parser.reader.Peek(1)
|
bb, err := parser.reader.Peek(1)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return PdfObjectString(""), err
|
return PdfObjectString(""), err
|
||||||
@ -396,16 +392,16 @@ func (parser *PdfParser) parseHexString() (PdfObjectString, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
b, _ := parser.reader.ReadByte()
|
b, _ := parser.reader.ReadByte()
|
||||||
if bytes.IndexByte(hextable, b) >= 0 {
|
if !IsWhiteSpace(b) {
|
||||||
tmp = append(tmp, b)
|
r.WriteByte(b)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(tmp)%2 == 1 {
|
if r.Len()%2 == 1 {
|
||||||
tmp = append(tmp, '0')
|
r.WriteRune('0')
|
||||||
}
|
}
|
||||||
|
|
||||||
buf, _ := hex.DecodeString(string(tmp))
|
buf, _ := hex.DecodeString(r.String())
|
||||||
return PdfObjectString(buf), nil
|
return PdfObjectString(buf), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@ package core
|
|||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"encoding/hex"
|
||||||
//"fmt"
|
//"fmt"
|
||||||
"io"
|
"io"
|
||||||
//"os"
|
//"os"
|
||||||
@ -27,27 +28,54 @@ func makeReaderForText(txt string) (*bytes.Reader, *bufio.Reader, int64) {
|
|||||||
return bufReader, bufferedReader, int64(len(txt))
|
return bufReader, bufferedReader, int64(len(txt))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func makeParserForText(txt string) *PdfParser {
|
||||||
|
rs, reader, fileSize := makeReaderForText(txt)
|
||||||
|
return &PdfParser{rs: rs, reader: reader, fileSize: fileSize}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkSkipSpaces(b *testing.B) {
|
||||||
|
parser := makeParserForText(" \t\t \tABC")
|
||||||
|
for n := 0; n < b.N; n++ {
|
||||||
|
parser.skipSpaces()
|
||||||
|
parser.SetFileOffset(0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var namePairs = map[string]string{
|
||||||
|
"/Name1": "Name1",
|
||||||
|
"/ASomewhatLongerName": "ASomewhatLongerName",
|
||||||
|
"/A;Name_With-Various***Characters?": "A;Name_With-Various***Characters?",
|
||||||
|
"/1.2": "1.2",
|
||||||
|
"/$$": "$$",
|
||||||
|
"/@pattern": "@pattern",
|
||||||
|
"/.notdef": ".notdef",
|
||||||
|
"/Lime#20Green": "Lime Green",
|
||||||
|
"/paired#28#29parentheses": "paired()parentheses",
|
||||||
|
"/The_Key_of_F#23_Minor": "The_Key_of_F#_Minor",
|
||||||
|
"/A#42": "AB",
|
||||||
|
"/": "",
|
||||||
|
"/ ": "",
|
||||||
|
"/#3CBC88#3E#3CC5ED#3E#3CD544#3E#3CC694#3E": "<BC88><C5ED><D544><C694>",
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkNameParsing(b *testing.B) {
|
||||||
|
for n := 0; n < b.N; n++ {
|
||||||
|
for str, name := range namePairs {
|
||||||
|
parser := makeParserForText(str)
|
||||||
|
o, err := parser.parseName()
|
||||||
|
if err != nil && err != io.EOF {
|
||||||
|
b.Errorf("Unable to parse name string, error: %s", err)
|
||||||
|
}
|
||||||
|
if string(o) != name {
|
||||||
|
b.Errorf("Mismatch %s != %s", o, name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestNameParsing(t *testing.T) {
|
func TestNameParsing(t *testing.T) {
|
||||||
namePairs := map[string]string{}
|
|
||||||
|
|
||||||
namePairs["/Name1"] = "Name1"
|
|
||||||
namePairs["/ASomewhatLongerName"] = "ASomewhatLongerName"
|
|
||||||
namePairs["/A;Name_With-Various***Characters?"] = "A;Name_With-Various***Characters?"
|
|
||||||
namePairs["/1.2"] = "1.2"
|
|
||||||
namePairs["/$$"] = "$$"
|
|
||||||
namePairs["/@pattern"] = "@pattern"
|
|
||||||
namePairs["/.notdef"] = ".notdef"
|
|
||||||
namePairs["/Lime#20Green"] = "Lime Green"
|
|
||||||
namePairs["/paired#28#29parentheses"] = "paired()parentheses"
|
|
||||||
namePairs["/The_Key_of_F#23_Minor"] = "The_Key_of_F#_Minor"
|
|
||||||
namePairs["/A#42"] = "AB"
|
|
||||||
namePairs["/"] = ""
|
|
||||||
namePairs["/ "] = ""
|
|
||||||
namePairs["/#3CBC88#3E#3CC5ED#3E#3CD544#3E#3CC694#3E"] = "<BC88><C5ED><D544><C694>"
|
|
||||||
|
|
||||||
for str, name := range namePairs {
|
for str, name := range namePairs {
|
||||||
parser := PdfParser{}
|
parser := makeParserForText(str)
|
||||||
parser.rs, parser.reader, parser.fileSize = makeReaderForText(str)
|
|
||||||
o, err := parser.parseName()
|
o, err := parser.parseName()
|
||||||
if err != nil && err != io.EOF {
|
if err != nil && err != io.EOF {
|
||||||
t.Errorf("Unable to parse name string, error: %s", err)
|
t.Errorf("Unable to parse name string, error: %s", err)
|
||||||
@ -58,8 +86,7 @@ func TestNameParsing(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Should fail (require starting with '/')
|
// Should fail (require starting with '/')
|
||||||
parser := PdfParser{}
|
parser := makeParserForText(" /Name")
|
||||||
parser.rs, parser.reader, parser.fileSize = makeReaderForText(" /Name")
|
|
||||||
_, err := parser.parseName()
|
_, err := parser.parseName()
|
||||||
if err == nil || err == io.EOF {
|
if err == nil || err == io.EOF {
|
||||||
t.Errorf("Should be invalid name")
|
t.Errorf("Should be invalid name")
|
||||||
@ -71,33 +98,42 @@ type testStringEntry struct {
|
|||||||
expected string
|
expected string
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestStringParsing(t *testing.T) {
|
func BenchmarkStringParsing(b *testing.B) {
|
||||||
testEntries := []testStringEntry{
|
entry := "(Strings may contain balanced parenthesis () and\nspecial characters (*!&}^% and so on).)"
|
||||||
{"(This is a string)", "This is a string"},
|
parser := makeParserForText(entry)
|
||||||
{"(Strings may contain\n newlines and such)", "Strings may contain\n newlines and such"},
|
for n := 0; n < b.N; n++ {
|
||||||
{"(Strings may contain balanced parenthesis () and\nspecial characters (*!&}^% and so on).)",
|
_, err := parser.parseString()
|
||||||
"Strings may contain balanced parenthesis () and\nspecial characters (*!&}^% and so on)."},
|
if err != nil && err != io.EOF {
|
||||||
{"(These \\\ntwo strings \\\nare the same.)", "These two strings are the same."},
|
b.Errorf("Unable to parse string, error: %s", err)
|
||||||
{"(These two strings are the same.)", "These two strings are the same."},
|
}
|
||||||
{"(\\\\)", "\\"},
|
parser.SetFileOffset(0)
|
||||||
{"(This string has an end-of-line at the end of it.\n)",
|
|
||||||
"This string has an end-of-line at the end of it.\n"},
|
|
||||||
{"(So does this one.\\n)", "So does this one.\n"},
|
|
||||||
{"(\\0053)", "\0053"},
|
|
||||||
{"(\\053)", "\053"},
|
|
||||||
{"(\\53)", "\053"},
|
|
||||||
{"(\\053)", "+"},
|
|
||||||
{"(\\53\\101)", "+A"},
|
|
||||||
}
|
}
|
||||||
for _, entry := range testEntries {
|
}
|
||||||
parser := PdfParser{}
|
|
||||||
parser.rs, parser.reader, parser.fileSize = makeReaderForText(entry.raw)
|
var stringPairs = map[string]string{
|
||||||
|
"(This is a string)": "This is a string",
|
||||||
|
"(Strings may contain\n newlines and such)": "Strings may contain\n newlines and such",
|
||||||
|
"(Strings may contain balanced parenthesis () and\nspecial characters (*!&}^% and so on).)": "Strings may contain balanced parenthesis () and\nspecial characters (*!&}^% and so on).",
|
||||||
|
"(These \\\ntwo strings \\\nare the same.)": "These two strings are the same.",
|
||||||
|
"(These two strings are the same.)": "These two strings are the same.",
|
||||||
|
"(\\\\)": "\\",
|
||||||
|
"(This string has an end-of-line at the end of it.\n)": "This string has an end-of-line at the end of it.\n",
|
||||||
|
"(So does this one.\\n)": "So does this one.\n",
|
||||||
|
"(\\0053)": "\0053",
|
||||||
|
"(\\53)": "\053",
|
||||||
|
"(\\053)": "+",
|
||||||
|
"(\\53\\101)": "+A",
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStringParsing(t *testing.T) {
|
||||||
|
for raw, expected := range stringPairs {
|
||||||
|
parser := makeParserForText(raw)
|
||||||
o, err := parser.parseString()
|
o, err := parser.parseString()
|
||||||
if err != nil && err != io.EOF {
|
if err != nil && err != io.EOF {
|
||||||
t.Errorf("Unable to parse string, error: %s", err)
|
t.Errorf("Unable to parse string, error: %s", err)
|
||||||
}
|
}
|
||||||
if string(o) != entry.expected {
|
if string(o) != expected {
|
||||||
t.Errorf("String Mismatch %s: \"%s\" != \"%s\"", entry.raw, o, entry.expected)
|
t.Errorf("String Mismatch %s: \"%s\" != \"%s\"", raw, o, expected)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -106,8 +142,7 @@ func TestReadTextLine(t *testing.T) {
|
|||||||
// reading text ling + rewinding should be idempotent, that is:
|
// reading text ling + rewinding should be idempotent, that is:
|
||||||
// if we rewind back len(str) bytes after reading string str we should arrive at beginning of str
|
// if we rewind back len(str) bytes after reading string str we should arrive at beginning of str
|
||||||
rawText := "abc\xb0cde"
|
rawText := "abc\xb0cde"
|
||||||
parser := PdfParser{}
|
parser := makeParserForText(rawText)
|
||||||
parser.rs, parser.reader, parser.fileSize = makeReaderForText(rawText)
|
|
||||||
s, err := parser.readTextLine()
|
s, err := parser.readTextLine()
|
||||||
if err != nil && err != io.EOF {
|
if err != nil && err != io.EOF {
|
||||||
t.Errorf("Unable to parse string, error: %s", err)
|
t.Errorf("Unable to parse string, error: %s", err)
|
||||||
@ -172,6 +207,21 @@ func TestBoolParsing(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkNumbericParsing(b *testing.B) {
|
||||||
|
txt1 := "[34.5 -3.62 1 +123.6 4. -.002 0.0]"
|
||||||
|
parser := PdfParser{}
|
||||||
|
parser.rs, parser.reader, parser.fileSize = makeReaderForText(txt1)
|
||||||
|
|
||||||
|
for n := 0; n < b.N; n++ {
|
||||||
|
_, err := parser.parseArray()
|
||||||
|
if err != nil {
|
||||||
|
b.Errorf("Error parsing array")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
parser.SetFileOffset(0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestNumericParsing1(t *testing.T) {
|
func TestNumericParsing1(t *testing.T) {
|
||||||
// 7.3.3
|
// 7.3.3
|
||||||
txt1 := "[34.5 -3.62 1 +123.6 4. -.002 0.0]"
|
txt1 := "[34.5 -3.62 1 +123.6 4. -.002 0.0]"
|
||||||
@ -285,6 +335,25 @@ func TestNumericParsing3(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkHexStringParsing(b *testing.B) {
|
||||||
|
var ref bytes.Buffer
|
||||||
|
for i := 0; i < 0xff; i++ {
|
||||||
|
ref.WriteByte(byte(i))
|
||||||
|
}
|
||||||
|
parser := makeParserForText("<" + hex.EncodeToString(ref.Bytes()) + ">")
|
||||||
|
for n := 0; n < b.N; n++ {
|
||||||
|
hs, err := parser.parseHexString()
|
||||||
|
if err != nil {
|
||||||
|
b.Errorf("Error parsing hex string: %s", err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if string(hs) != ref.String() {
|
||||||
|
b.Errorf("Reference and parsed hex strings mismatch")
|
||||||
|
}
|
||||||
|
parser.SetFileOffset(0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestHexStringParsing(t *testing.T) {
|
func TestHexStringParsing(t *testing.T) {
|
||||||
// 7.3.4.3
|
// 7.3.4.3
|
||||||
}
|
}
|
||||||
|
@ -12,9 +12,8 @@ func IsWhiteSpace(ch byte) bool {
|
|||||||
// spaceCharacters := string([]byte{0x00, 0x09, 0x0A, 0x0C, 0x0D, 0x20})
|
// spaceCharacters := string([]byte{0x00, 0x09, 0x0A, 0x0C, 0x0D, 0x20})
|
||||||
if (ch == 0x00) || (ch == 0x09) || (ch == 0x0A) || (ch == 0x0C) || (ch == 0x0D) || (ch == 0x20) {
|
if (ch == 0x00) || (ch == 0x09) || (ch == 0x0A) || (ch == 0x0C) || (ch == 0x0D) || (ch == 0x20) {
|
||||||
return true
|
return true
|
||||||
} else {
|
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsFloatDigit checks if a character can be a part of a float number string.
|
// IsFloatDigit checks if a character can be a part of a float number string.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user