mirror of
https://github.com/unidoc/unipdf.git
synced 2025-05-02 22:17:06 +08:00
commit
e80a3dab58
@ -74,14 +74,14 @@ func (parser *PdfParser) GetTrailer() *PdfObjectDictionary {
|
||||
func (parser *PdfParser) skipSpaces() (int, error) {
|
||||
cnt := 0
|
||||
for {
|
||||
bb, err := parser.reader.Peek(1)
|
||||
b, err := parser.reader.ReadByte()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if IsWhiteSpace(bb[0]) {
|
||||
parser.reader.ReadByte()
|
||||
if IsWhiteSpace(b) {
|
||||
cnt++
|
||||
} else {
|
||||
parser.reader.UnreadByte()
|
||||
break
|
||||
}
|
||||
}
|
||||
@ -121,11 +121,11 @@ func (parser *PdfParser) skipComments() error {
|
||||
|
||||
// Read a comment starting with '%'.
|
||||
func (parser *PdfParser) readComment() (string, error) {
|
||||
commentText := ""
|
||||
var r bytes.Buffer
|
||||
|
||||
_, err := parser.skipSpaces()
|
||||
if err != nil {
|
||||
return commentText, err
|
||||
return r.String(), err
|
||||
}
|
||||
|
||||
isFirst := true
|
||||
@ -133,21 +133,21 @@ func (parser *PdfParser) readComment() (string, error) {
|
||||
bb, err := parser.reader.Peek(1)
|
||||
if err != nil {
|
||||
common.Log.Debug("Error %s", err.Error())
|
||||
return commentText, err
|
||||
return r.String(), err
|
||||
}
|
||||
if isFirst && bb[0] != '%' {
|
||||
return commentText, errors.New("Comment should start with %")
|
||||
return r.String(), errors.New("Comment should start with %")
|
||||
} else {
|
||||
isFirst = false
|
||||
}
|
||||
if (bb[0] != '\r') && (bb[0] != '\n') {
|
||||
b, _ := parser.reader.ReadByte()
|
||||
commentText += string(b)
|
||||
r.WriteByte(b)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return commentText, nil
|
||||
return r.String(), nil
|
||||
}
|
||||
|
||||
// Read a single line of text from current position.
|
||||
@ -171,7 +171,7 @@ func (parser *PdfParser) readTextLine() (string, error) {
|
||||
|
||||
// Parse a name starting with '/'.
|
||||
func (parser *PdfParser) parseName() (PdfObjectName, error) {
|
||||
name := ""
|
||||
var r bytes.Buffer
|
||||
nameStarted := false
|
||||
for {
|
||||
bb, err := parser.reader.Peek(1)
|
||||
@ -179,7 +179,7 @@ func (parser *PdfParser) parseName() (PdfObjectName, error) {
|
||||
break // Can happen when loading from object stream.
|
||||
}
|
||||
if err != nil {
|
||||
return PdfObjectName(name), err
|
||||
return PdfObjectName(r.String()), err
|
||||
}
|
||||
|
||||
if !nameStarted {
|
||||
@ -192,7 +192,7 @@ func (parser *PdfParser) parseName() (PdfObjectName, error) {
|
||||
parser.skipSpaces()
|
||||
} else {
|
||||
common.Log.Debug("ERROR Name starting with %s (% x)", bb, bb)
|
||||
return PdfObjectName(name), fmt.Errorf("Invalid name: (%c)", bb[0])
|
||||
return PdfObjectName(r.String()), fmt.Errorf("Invalid name: (%c)", bb[0])
|
||||
}
|
||||
} else {
|
||||
if IsWhiteSpace(bb[0]) {
|
||||
@ -202,22 +202,22 @@ func (parser *PdfParser) parseName() (PdfObjectName, error) {
|
||||
} else if bb[0] == '#' {
|
||||
hexcode, err := parser.reader.Peek(3)
|
||||
if err != nil {
|
||||
return PdfObjectName(name), err
|
||||
return PdfObjectName(r.String()), err
|
||||
}
|
||||
parser.reader.Discard(3)
|
||||
|
||||
code, err := hex.DecodeString(string(hexcode[1:3]))
|
||||
if err != nil {
|
||||
return PdfObjectName(name), err
|
||||
return PdfObjectName(r.String()), err
|
||||
}
|
||||
name += string(code)
|
||||
r.Write(code)
|
||||
} else {
|
||||
b, _ := parser.reader.ReadByte()
|
||||
name += string(b)
|
||||
r.WriteByte(b)
|
||||
}
|
||||
}
|
||||
}
|
||||
return PdfObjectName(name), nil
|
||||
return PdfObjectName(r.String()), nil
|
||||
}
|
||||
|
||||
// Numeric objects.
|
||||
@ -243,9 +243,9 @@ func (parser *PdfParser) parseName() (PdfObjectName, error) {
|
||||
func (parser *PdfParser) parseNumber() (PdfObject, error) {
|
||||
isFloat := false
|
||||
allowSigns := true
|
||||
numStr := ""
|
||||
var r bytes.Buffer
|
||||
for {
|
||||
common.Log.Trace("Parsing number \"%s\"", numStr)
|
||||
common.Log.Trace("Parsing number \"%s\"", r.String())
|
||||
bb, err := parser.reader.Peek(1)
|
||||
if err == io.EOF {
|
||||
// GH: EOF handling. Handle EOF like end of line. Can happen with
|
||||
@ -260,19 +260,19 @@ func (parser *PdfParser) parseNumber() (PdfObject, error) {
|
||||
if allowSigns && (bb[0] == '-' || bb[0] == '+') {
|
||||
// Only appear in the beginning, otherwise serves as a delimiter.
|
||||
b, _ := parser.reader.ReadByte()
|
||||
numStr += string(b)
|
||||
r.WriteByte(b)
|
||||
allowSigns = false // Only allowed in beginning, and after e (exponential).
|
||||
} else if IsDecimalDigit(bb[0]) {
|
||||
b, _ := parser.reader.ReadByte()
|
||||
numStr += string(b)
|
||||
r.WriteByte(b)
|
||||
} else if bb[0] == '.' {
|
||||
b, _ := parser.reader.ReadByte()
|
||||
numStr += string(b)
|
||||
r.WriteByte(b)
|
||||
isFloat = true
|
||||
} else if bb[0] == 'e' {
|
||||
// Exponential number format.
|
||||
b, _ := parser.reader.ReadByte()
|
||||
numStr += string(b)
|
||||
r.WriteByte(b)
|
||||
isFloat = true
|
||||
allowSigns = true
|
||||
} else {
|
||||
@ -281,11 +281,11 @@ func (parser *PdfParser) parseNumber() (PdfObject, error) {
|
||||
}
|
||||
|
||||
if isFloat {
|
||||
fVal, err := strconv.ParseFloat(numStr, 64)
|
||||
fVal, err := strconv.ParseFloat(r.String(), 64)
|
||||
o := PdfObjectFloat(fVal)
|
||||
return &o, err
|
||||
} else {
|
||||
intVal, err := strconv.ParseInt(numStr, 10, 64)
|
||||
intVal, err := strconv.ParseInt(r.String(), 10, 64)
|
||||
o := PdfObjectInteger(intVal)
|
||||
return &o, err
|
||||
}
|
||||
@ -295,26 +295,26 @@ func (parser *PdfParser) parseNumber() (PdfObject, error) {
|
||||
func (parser *PdfParser) parseString() (PdfObjectString, error) {
|
||||
parser.reader.ReadByte()
|
||||
|
||||
bytes := []byte{}
|
||||
var r bytes.Buffer
|
||||
count := 1
|
||||
for {
|
||||
bb, err := parser.reader.Peek(1)
|
||||
if err != nil {
|
||||
return PdfObjectString(bytes), err
|
||||
return PdfObjectString(r.String()), err
|
||||
}
|
||||
|
||||
if bb[0] == '\\' { // Escape sequence.
|
||||
parser.reader.ReadByte() // Skip the escape \ byte.
|
||||
b, err := parser.reader.ReadByte()
|
||||
if err != nil {
|
||||
return PdfObjectString(bytes), err
|
||||
return PdfObjectString(r.String()), err
|
||||
}
|
||||
|
||||
// Octal '\ddd' number (base 8).
|
||||
if IsOctalDigit(b) {
|
||||
bb, err := parser.reader.Peek(2)
|
||||
if err != nil {
|
||||
return PdfObjectString(bytes), err
|
||||
return PdfObjectString(r.String()), err
|
||||
}
|
||||
|
||||
numeric := []byte{}
|
||||
@ -331,29 +331,29 @@ func (parser *PdfParser) parseString() (PdfObjectString, error) {
|
||||
common.Log.Trace("Numeric string \"%s\"", numeric)
|
||||
code, err := strconv.ParseUint(string(numeric), 8, 32)
|
||||
if err != nil {
|
||||
return PdfObjectString(bytes), err
|
||||
return PdfObjectString(r.String()), err
|
||||
}
|
||||
bytes = append(bytes, byte(code))
|
||||
r.WriteByte(byte(code))
|
||||
continue
|
||||
}
|
||||
|
||||
switch b {
|
||||
case 'n':
|
||||
bytes = append(bytes, '\n')
|
||||
r.WriteRune('\n')
|
||||
case 'r':
|
||||
bytes = append(bytes, '\r')
|
||||
r.WriteRune('\r')
|
||||
case 't':
|
||||
bytes = append(bytes, '\t')
|
||||
r.WriteRune('\t')
|
||||
case 'b':
|
||||
bytes = append(bytes, '\b')
|
||||
r.WriteRune('\b')
|
||||
case 'f':
|
||||
bytes = append(bytes, '\f')
|
||||
r.WriteRune('\f')
|
||||
case '(':
|
||||
bytes = append(bytes, '(')
|
||||
r.WriteRune('(')
|
||||
case ')':
|
||||
bytes = append(bytes, ')')
|
||||
r.WriteRune(')')
|
||||
case '\\':
|
||||
bytes = append(bytes, '\\')
|
||||
r.WriteRune('\\')
|
||||
}
|
||||
|
||||
continue
|
||||
@ -368,10 +368,10 @@ func (parser *PdfParser) parseString() (PdfObjectString, error) {
|
||||
}
|
||||
|
||||
b, _ := parser.reader.ReadByte()
|
||||
bytes = append(bytes, b)
|
||||
r.WriteByte(b)
|
||||
}
|
||||
|
||||
return PdfObjectString(bytes), nil
|
||||
return PdfObjectString(r.String()), nil
|
||||
}
|
||||
|
||||
// Starts with '<' ends with '>'.
|
||||
@ -379,12 +379,8 @@ func (parser *PdfParser) parseString() (PdfObjectString, error) {
|
||||
func (parser *PdfParser) parseHexString() (PdfObjectString, error) {
|
||||
parser.reader.ReadByte()
|
||||
|
||||
hextable := []byte("0123456789abcdefABCDEF")
|
||||
|
||||
tmp := []byte{}
|
||||
var r bytes.Buffer
|
||||
for {
|
||||
parser.skipSpaces()
|
||||
|
||||
bb, err := parser.reader.Peek(1)
|
||||
if err != nil {
|
||||
return PdfObjectString(""), err
|
||||
@ -396,16 +392,16 @@ func (parser *PdfParser) parseHexString() (PdfObjectString, error) {
|
||||
}
|
||||
|
||||
b, _ := parser.reader.ReadByte()
|
||||
if bytes.IndexByte(hextable, b) >= 0 {
|
||||
tmp = append(tmp, b)
|
||||
if !IsWhiteSpace(b) {
|
||||
r.WriteByte(b)
|
||||
}
|
||||
}
|
||||
|
||||
if len(tmp)%2 == 1 {
|
||||
tmp = append(tmp, '0')
|
||||
if r.Len()%2 == 1 {
|
||||
r.WriteRune('0')
|
||||
}
|
||||
|
||||
buf, _ := hex.DecodeString(string(tmp))
|
||||
buf, _ := hex.DecodeString(r.String())
|
||||
return PdfObjectString(buf), nil
|
||||
}
|
||||
|
||||
|
@ -8,6 +8,7 @@ package core
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/hex"
|
||||
//"fmt"
|
||||
"io"
|
||||
//"os"
|
||||
@ -27,27 +28,54 @@ func makeReaderForText(txt string) (*bytes.Reader, *bufio.Reader, int64) {
|
||||
return bufReader, bufferedReader, int64(len(txt))
|
||||
}
|
||||
|
||||
func makeParserForText(txt string) *PdfParser {
|
||||
rs, reader, fileSize := makeReaderForText(txt)
|
||||
return &PdfParser{rs: rs, reader: reader, fileSize: fileSize}
|
||||
}
|
||||
|
||||
func BenchmarkSkipSpaces(b *testing.B) {
|
||||
parser := makeParserForText(" \t\t \tABC")
|
||||
for n := 0; n < b.N; n++ {
|
||||
parser.skipSpaces()
|
||||
parser.SetFileOffset(0)
|
||||
}
|
||||
}
|
||||
|
||||
var namePairs = map[string]string{
|
||||
"/Name1": "Name1",
|
||||
"/ASomewhatLongerName": "ASomewhatLongerName",
|
||||
"/A;Name_With-Various***Characters?": "A;Name_With-Various***Characters?",
|
||||
"/1.2": "1.2",
|
||||
"/$$": "$$",
|
||||
"/@pattern": "@pattern",
|
||||
"/.notdef": ".notdef",
|
||||
"/Lime#20Green": "Lime Green",
|
||||
"/paired#28#29parentheses": "paired()parentheses",
|
||||
"/The_Key_of_F#23_Minor": "The_Key_of_F#_Minor",
|
||||
"/A#42": "AB",
|
||||
"/": "",
|
||||
"/ ": "",
|
||||
"/#3CBC88#3E#3CC5ED#3E#3CD544#3E#3CC694#3E": "<BC88><C5ED><D544><C694>",
|
||||
}
|
||||
|
||||
func BenchmarkNameParsing(b *testing.B) {
|
||||
for n := 0; n < b.N; n++ {
|
||||
for str, name := range namePairs {
|
||||
parser := makeParserForText(str)
|
||||
o, err := parser.parseName()
|
||||
if err != nil && err != io.EOF {
|
||||
b.Errorf("Unable to parse name string, error: %s", err)
|
||||
}
|
||||
if string(o) != name {
|
||||
b.Errorf("Mismatch %s != %s", o, name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestNameParsing(t *testing.T) {
|
||||
namePairs := map[string]string{}
|
||||
|
||||
namePairs["/Name1"] = "Name1"
|
||||
namePairs["/ASomewhatLongerName"] = "ASomewhatLongerName"
|
||||
namePairs["/A;Name_With-Various***Characters?"] = "A;Name_With-Various***Characters?"
|
||||
namePairs["/1.2"] = "1.2"
|
||||
namePairs["/$$"] = "$$"
|
||||
namePairs["/@pattern"] = "@pattern"
|
||||
namePairs["/.notdef"] = ".notdef"
|
||||
namePairs["/Lime#20Green"] = "Lime Green"
|
||||
namePairs["/paired#28#29parentheses"] = "paired()parentheses"
|
||||
namePairs["/The_Key_of_F#23_Minor"] = "The_Key_of_F#_Minor"
|
||||
namePairs["/A#42"] = "AB"
|
||||
namePairs["/"] = ""
|
||||
namePairs["/ "] = ""
|
||||
namePairs["/#3CBC88#3E#3CC5ED#3E#3CD544#3E#3CC694#3E"] = "<BC88><C5ED><D544><C694>"
|
||||
|
||||
for str, name := range namePairs {
|
||||
parser := PdfParser{}
|
||||
parser.rs, parser.reader, parser.fileSize = makeReaderForText(str)
|
||||
parser := makeParserForText(str)
|
||||
o, err := parser.parseName()
|
||||
if err != nil && err != io.EOF {
|
||||
t.Errorf("Unable to parse name string, error: %s", err)
|
||||
@ -58,8 +86,7 @@ func TestNameParsing(t *testing.T) {
|
||||
}
|
||||
|
||||
// Should fail (require starting with '/')
|
||||
parser := PdfParser{}
|
||||
parser.rs, parser.reader, parser.fileSize = makeReaderForText(" /Name")
|
||||
parser := makeParserForText(" /Name")
|
||||
_, err := parser.parseName()
|
||||
if err == nil || err == io.EOF {
|
||||
t.Errorf("Should be invalid name")
|
||||
@ -71,33 +98,42 @@ type testStringEntry struct {
|
||||
expected string
|
||||
}
|
||||
|
||||
func TestStringParsing(t *testing.T) {
|
||||
testEntries := []testStringEntry{
|
||||
{"(This is a string)", "This is a string"},
|
||||
{"(Strings may contain\n newlines and such)", "Strings may contain\n newlines and such"},
|
||||
{"(Strings may contain balanced parenthesis () and\nspecial characters (*!&}^% and so on).)",
|
||||
"Strings may contain balanced parenthesis () and\nspecial characters (*!&}^% and so on)."},
|
||||
{"(These \\\ntwo strings \\\nare the same.)", "These two strings are the same."},
|
||||
{"(These two strings are the same.)", "These two strings are the same."},
|
||||
{"(\\\\)", "\\"},
|
||||
{"(This string has an end-of-line at the end of it.\n)",
|
||||
"This string has an end-of-line at the end of it.\n"},
|
||||
{"(So does this one.\\n)", "So does this one.\n"},
|
||||
{"(\\0053)", "\0053"},
|
||||
{"(\\053)", "\053"},
|
||||
{"(\\53)", "\053"},
|
||||
{"(\\053)", "+"},
|
||||
{"(\\53\\101)", "+A"},
|
||||
func BenchmarkStringParsing(b *testing.B) {
|
||||
entry := "(Strings may contain balanced parenthesis () and\nspecial characters (*!&}^% and so on).)"
|
||||
parser := makeParserForText(entry)
|
||||
for n := 0; n < b.N; n++ {
|
||||
_, err := parser.parseString()
|
||||
if err != nil && err != io.EOF {
|
||||
b.Errorf("Unable to parse string, error: %s", err)
|
||||
}
|
||||
parser.SetFileOffset(0)
|
||||
}
|
||||
for _, entry := range testEntries {
|
||||
parser := PdfParser{}
|
||||
parser.rs, parser.reader, parser.fileSize = makeReaderForText(entry.raw)
|
||||
}
|
||||
|
||||
var stringPairs = map[string]string{
|
||||
"(This is a string)": "This is a string",
|
||||
"(Strings may contain\n newlines and such)": "Strings may contain\n newlines and such",
|
||||
"(Strings may contain balanced parenthesis () and\nspecial characters (*!&}^% and so on).)": "Strings may contain balanced parenthesis () and\nspecial characters (*!&}^% and so on).",
|
||||
"(These \\\ntwo strings \\\nare the same.)": "These two strings are the same.",
|
||||
"(These two strings are the same.)": "These two strings are the same.",
|
||||
"(\\\\)": "\\",
|
||||
"(This string has an end-of-line at the end of it.\n)": "This string has an end-of-line at the end of it.\n",
|
||||
"(So does this one.\\n)": "So does this one.\n",
|
||||
"(\\0053)": "\0053",
|
||||
"(\\53)": "\053",
|
||||
"(\\053)": "+",
|
||||
"(\\53\\101)": "+A",
|
||||
}
|
||||
|
||||
func TestStringParsing(t *testing.T) {
|
||||
for raw, expected := range stringPairs {
|
||||
parser := makeParserForText(raw)
|
||||
o, err := parser.parseString()
|
||||
if err != nil && err != io.EOF {
|
||||
t.Errorf("Unable to parse string, error: %s", err)
|
||||
}
|
||||
if string(o) != entry.expected {
|
||||
t.Errorf("String Mismatch %s: \"%s\" != \"%s\"", entry.raw, o, entry.expected)
|
||||
if string(o) != expected {
|
||||
t.Errorf("String Mismatch %s: \"%s\" != \"%s\"", raw, o, expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -106,8 +142,7 @@ func TestReadTextLine(t *testing.T) {
|
||||
// reading text ling + rewinding should be idempotent, that is:
|
||||
// if we rewind back len(str) bytes after reading string str we should arrive at beginning of str
|
||||
rawText := "abc\xb0cde"
|
||||
parser := PdfParser{}
|
||||
parser.rs, parser.reader, parser.fileSize = makeReaderForText(rawText)
|
||||
parser := makeParserForText(rawText)
|
||||
s, err := parser.readTextLine()
|
||||
if err != nil && err != io.EOF {
|
||||
t.Errorf("Unable to parse string, error: %s", err)
|
||||
@ -172,6 +207,21 @@ func TestBoolParsing(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkNumbericParsing(b *testing.B) {
|
||||
txt1 := "[34.5 -3.62 1 +123.6 4. -.002 0.0]"
|
||||
parser := PdfParser{}
|
||||
parser.rs, parser.reader, parser.fileSize = makeReaderForText(txt1)
|
||||
|
||||
for n := 0; n < b.N; n++ {
|
||||
_, err := parser.parseArray()
|
||||
if err != nil {
|
||||
b.Errorf("Error parsing array")
|
||||
return
|
||||
}
|
||||
parser.SetFileOffset(0)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNumericParsing1(t *testing.T) {
|
||||
// 7.3.3
|
||||
txt1 := "[34.5 -3.62 1 +123.6 4. -.002 0.0]"
|
||||
@ -285,6 +335,25 @@ func TestNumericParsing3(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkHexStringParsing(b *testing.B) {
|
||||
var ref bytes.Buffer
|
||||
for i := 0; i < 0xff; i++ {
|
||||
ref.WriteByte(byte(i))
|
||||
}
|
||||
parser := makeParserForText("<" + hex.EncodeToString(ref.Bytes()) + ">")
|
||||
for n := 0; n < b.N; n++ {
|
||||
hs, err := parser.parseHexString()
|
||||
if err != nil {
|
||||
b.Errorf("Error parsing hex string: %s", err.Error())
|
||||
return
|
||||
}
|
||||
if string(hs) != ref.String() {
|
||||
b.Errorf("Reference and parsed hex strings mismatch")
|
||||
}
|
||||
parser.SetFileOffset(0)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHexStringParsing(t *testing.T) {
|
||||
// 7.3.4.3
|
||||
}
|
||||
|
@ -12,9 +12,8 @@ func IsWhiteSpace(ch byte) bool {
|
||||
// spaceCharacters := string([]byte{0x00, 0x09, 0x0A, 0x0C, 0x0D, 0x20})
|
||||
if (ch == 0x00) || (ch == 0x09) || (ch == 0x0A) || (ch == 0x0C) || (ch == 0x0D) || (ch == 0x20) {
|
||||
return true
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// IsFloatDigit checks if a character can be a part of a float number string.
|
||||
|
Loading…
x
Reference in New Issue
Block a user