2016-07-09 14:09:27 +00:00
/ *
* This file is subject to the terms and conditions defined in
2016-07-29 17:23:39 +00:00
* file ' LICENSE . md ' , which is part of this source code package .
2016-07-09 14:09:27 +00:00
* /
2016-09-08 17:53:45 +00:00
package core
2016-07-09 14:09:27 +00:00
import (
"bufio"
"bytes"
2018-03-20 04:26:10 +03:00
"encoding/hex"
2019-04-14 22:22:41 +00:00
"fmt"
2016-07-09 14:09:27 +00:00
"io"
2018-08-03 11:01:54 +00:00
"os"
2016-07-09 14:09:27 +00:00
"testing"
2016-07-17 19:59:17 +00:00
2019-04-02 17:44:53 +00:00
"github.com/stretchr/testify/require"
2019-05-16 23:08:40 +03:00
"github.com/unidoc/unipdf/v3/common"
2016-07-09 14:09:27 +00:00
)
2017-07-23 12:21:42 +00:00
func makeReaderForText ( txt string ) ( * bytes . Reader , * bufio . Reader , int64 ) {
2016-07-09 14:09:27 +00:00
buf := [ ] byte ( txt )
bufReader := bytes . NewReader ( buf )
bufferedReader := bufio . NewReader ( bufReader )
2017-07-23 12:21:42 +00:00
return bufReader , bufferedReader , int64 ( len ( txt ) )
2016-07-09 14:09:27 +00:00
}
2018-03-20 04:26:10 +03:00
func makeParserForText ( txt string ) * PdfParser {
rs , reader , fileSize := makeReaderForText ( txt )
return & PdfParser { rs : rs , reader : reader , fileSize : fileSize }
}
func BenchmarkSkipSpaces ( b * testing . B ) {
parser := makeParserForText ( " \t\t \tABC" )
for n := 0 ; n < b . N ; n ++ {
parser . skipSpaces ( )
parser . SetFileOffset ( 0 )
}
}
var namePairs = map [ string ] string {
"/Name1" : "Name1" ,
"/ASomewhatLongerName" : "ASomewhatLongerName" ,
"/A;Name_With-Various***Characters?" : "A;Name_With-Various***Characters?" ,
2019-04-25 17:08:15 +00:00
"/1.2" : "1.2" ,
"/$$" : "$$" ,
"/@pattern" : "@pattern" ,
"/.notdef" : ".notdef" ,
"/Lime#20Green" : "Lime Green" ,
"/paired#28#29parentheses" : "paired()parentheses" ,
"/The_Key_of_F#23_Minor" : "The_Key_of_F#_Minor" ,
"/A#42" : "AB" ,
"/" : "" ,
"/ " : "" ,
2018-03-20 04:26:10 +03:00
"/#3CBC88#3E#3CC5ED#3E#3CD544#3E#3CC694#3E" : "<BC88><C5ED><D544><C694>" ,
}
func BenchmarkNameParsing ( b * testing . B ) {
for n := 0 ; n < b . N ; n ++ {
for str , name := range namePairs {
parser := makeParserForText ( str )
o , err := parser . parseName ( )
if err != nil && err != io . EOF {
b . Errorf ( "Unable to parse name string, error: %s" , err )
}
if string ( o ) != name {
b . Errorf ( "Mismatch %s != %s" , o , name )
}
}
}
}
2016-07-09 14:09:27 +00:00
2018-03-20 04:26:10 +03:00
func TestNameParsing ( t * testing . T ) {
2016-07-09 14:09:27 +00:00
for str , name := range namePairs {
2018-03-20 04:26:10 +03:00
parser := makeParserForText ( str )
2016-07-09 14:09:27 +00:00
o , err := parser . parseName ( )
if err != nil && err != io . EOF {
t . Errorf ( "Unable to parse name string, error: %s" , err )
}
if string ( o ) != name {
t . Errorf ( "Mismatch %s != %s" , o , name )
}
}
// Should fail (require starting with '/')
2018-03-20 04:26:10 +03:00
parser := makeParserForText ( " /Name" )
2016-07-09 14:09:27 +00:00
_ , err := parser . parseName ( )
if err == nil || err == io . EOF {
t . Errorf ( "Should be invalid name" )
}
}
2019-04-14 22:22:41 +00:00
func TestBigDictParse ( t * testing . T ) {
numObjects := 150000
var buf bytes . Buffer
buf . WriteString ( "<<" )
buf . WriteString ( "/ColorSpace <<" )
for i := 0 ; i < numObjects ; i ++ {
buf . WriteString ( fmt . Sprintf ( ` /Cs%d %d 0 R ` , i , i ) )
}
buf . WriteString ( ">>" )
buf . WriteString ( "/Font <<>> " )
buf . WriteString ( ">>" )
rs := bytes . NewReader ( buf . Bytes ( ) )
reader := bufio . NewReader ( & buf )
parser := & PdfParser { rs : rs , reader : reader , fileSize : int64 ( buf . Len ( ) ) }
val , err := parser . parseObject ( )
require . NoError ( t , err )
require . NotNil ( t , val )
d , ok := GetDict ( val )
require . True ( t , ok )
require . Equal ( t , 2 , len ( d . Keys ( ) ) )
d , ok = GetDict ( d . Get ( "ColorSpace" ) )
require . True ( t , ok )
require . Equal ( t , numObjects , len ( d . Keys ( ) ) )
}
2018-03-20 04:26:10 +03:00
func BenchmarkStringParsing ( b * testing . B ) {
entry := "(Strings may contain balanced parenthesis () and\nspecial characters (*!&}^% and so on).)"
parser := makeParserForText ( entry )
for n := 0 ; n < b . N ; n ++ {
_ , err := parser . parseString ( )
if err != nil && err != io . EOF {
b . Errorf ( "Unable to parse string, error: %s" , err )
}
parser . SetFileOffset ( 0 )
2016-07-09 14:09:27 +00:00
}
2018-03-20 04:26:10 +03:00
}
var stringPairs = map [ string ] string {
2019-04-25 17:08:15 +00:00
"(This is a string)" : "This is a string" ,
"(Strings may contain\n newlines and such)" : "Strings may contain\n newlines and such" ,
2018-03-20 04:26:10 +03:00
"(Strings may contain balanced parenthesis () and\nspecial characters (*!&}^% and so on).)" : "Strings may contain balanced parenthesis () and\nspecial characters (*!&}^% and so on)." ,
"(These \\\ntwo strings \\\nare the same.)" : "These two strings are the same." ,
"(These two strings are the same.)" : "These two strings are the same." ,
2019-04-25 17:08:15 +00:00
"(\\\\)" : "\\" ,
"(This string has an end-of-line at the end of it.\n)" : "This string has an end-of-line at the end of it.\n" ,
"(So does this one.\\n)" : "So does this one.\n" ,
"(\\0053)" : "\0053" ,
"(\\53)" : "\053" ,
"(\\053)" : "+" ,
"(\\53\\101)" : "+A" ,
2018-03-20 04:26:10 +03:00
}
func TestStringParsing ( t * testing . T ) {
for raw , expected := range stringPairs {
parser := makeParserForText ( raw )
2016-07-09 14:09:27 +00:00
o , err := parser . parseString ( )
if err != nil && err != io . EOF {
t . Errorf ( "Unable to parse string, error: %s" , err )
}
2018-07-14 02:25:29 +00:00
if o . Str ( ) != expected {
2018-03-20 04:26:10 +03:00
t . Errorf ( "String Mismatch %s: \"%s\" != \"%s\"" , raw , o , expected )
2016-07-09 14:09:27 +00:00
}
}
}
2018-03-05 13:04:27 +03:00
func TestReadTextLine ( t * testing . T ) {
// reading text ling + rewinding should be idempotent, that is:
// if we rewind back len(str) bytes after reading string str we should arrive at beginning of str
rawText := "abc\xb0cde"
2018-03-20 04:26:10 +03:00
parser := makeParserForText ( rawText )
2018-03-05 13:04:27 +03:00
s , err := parser . readTextLine ( )
if err != nil && err != io . EOF {
t . Errorf ( "Unable to parse string, error: %s" , err )
}
if parser . GetFileOffset ( ) != int64 ( len ( s ) ) {
2018-12-11 16:06:34 +03:00
t . Errorf ( "File Offset after reading string of length %d is %d" , len ( s ) , parser . GetFileOffset ( ) )
2018-03-05 13:04:27 +03:00
}
}
2016-07-09 14:09:27 +00:00
func TestBinStringParsing ( t * testing . T ) {
// From an example O entry in Encrypt dictionary.
rawText1 := "(\xE6\x00\xEC\xC2\x02\x88\xAD\x8B\\r\x64\xA9" +
"\\)\xC6\xA8\x3E\xE2\x51\x76\x79\xAA\x02\x18\xBE\xCE\xEA" +
"\x8B\x79\x86\x72\x6A\x8C\xDB)"
parser := PdfParser { }
2017-07-23 12:21:42 +00:00
parser . rs , parser . reader , parser . fileSize = makeReaderForText ( rawText1 )
2016-07-09 14:09:27 +00:00
o , err := parser . parseString ( )
if err != nil && err != io . EOF {
t . Errorf ( "Unable to parse string, error: %s" , err )
}
2018-07-14 02:25:29 +00:00
if len ( o . Str ( ) ) != 32 {
t . Errorf ( "Wrong length, should be 32 (got %d)" , len ( o . Str ( ) ) )
2016-07-09 14:09:27 +00:00
}
}
// Main challenge in the text is "\\278A" which is "\\27" octal and 8A
func TestStringParsing2 ( t * testing . T ) {
rawText := "[(\\227\\224`\\274\\31W\\216\\276\\23\\231\\246U\\33\\317\\6-)(\\210S\\377:\\322\\278A\\200$*/e]\\371|)]"
parser := PdfParser { }
2017-07-23 12:21:42 +00:00
parser . rs , parser . reader , parser . fileSize = makeReaderForText ( rawText )
2016-07-09 14:09:27 +00:00
list , err := parser . parseArray ( )
2019-04-25 17:08:15 +00:00
require . NoError ( t , err )
require . Equal ( t , 2 , list . Len ( ) )
2016-07-09 14:09:27 +00:00
}
func TestBoolParsing ( t * testing . T ) {
// 7.3.2
testEntries := map [ string ] bool { }
testEntries [ "false" ] = false
testEntries [ "true" ] = true
for key , expected := range testEntries {
parser := PdfParser { }
2017-07-23 12:21:42 +00:00
parser . rs , parser . reader , parser . fileSize = makeReaderForText ( key )
2016-07-09 14:09:27 +00:00
val , err := parser . parseBool ( )
2019-04-25 17:08:15 +00:00
require . NoError ( t , err )
require . Equal ( t , expected , bool ( val ) )
2016-07-09 14:09:27 +00:00
}
}
2019-04-14 22:22:41 +00:00
func BenchmarkNumericParsing ( b * testing . B ) {
2018-03-20 04:26:10 +03:00
txt1 := "[34.5 -3.62 1 +123.6 4. -.002 0.0]"
parser := PdfParser { }
parser . rs , parser . reader , parser . fileSize = makeReaderForText ( txt1 )
for n := 0 ; n < b . N ; n ++ {
_ , err := parser . parseArray ( )
2019-04-25 17:08:15 +00:00
require . NoError ( b , err )
2018-03-20 04:26:10 +03:00
parser . SetFileOffset ( 0 )
}
}
2016-07-09 14:09:27 +00:00
func TestNumericParsing1 ( t * testing . T ) {
// 7.3.3
txt1 := "[34.5 -3.62 1 +123.6 4. -.002 0.0]"
parser := PdfParser { }
2017-07-23 12:21:42 +00:00
parser . rs , parser . reader , parser . fileSize = makeReaderForText ( txt1 )
2016-07-09 14:09:27 +00:00
list , err := parser . parseArray ( )
2019-04-25 17:08:15 +00:00
require . NoError ( t , err )
require . Equal ( t , 7 , list . Len ( ) )
2016-07-09 14:09:27 +00:00
expectedFloats := map [ int ] float32 {
0 : 34.5 ,
1 : - 3.62 ,
3 : 123.6 ,
4 : 4.0 ,
5 : - 0.002 ,
6 : 0.0 ,
}
for idx , val := range expectedFloats {
2018-07-15 17:52:53 +00:00
num , ok := list . Get ( idx ) . ( * PdfObjectFloat )
2019-04-25 17:08:15 +00:00
require . True ( t , ok )
require . Equal ( t , val , float32 ( * num ) )
2016-07-09 14:09:27 +00:00
}
2018-07-15 17:52:53 +00:00
inum , ok := list . Get ( 2 ) . ( * PdfObjectInteger )
2019-04-25 17:08:15 +00:00
require . True ( t , ok )
require . Equal ( t , 1 , int ( * inum ) )
2016-07-09 14:09:27 +00:00
}
func TestNumericParsing2 ( t * testing . T ) {
// 7.3.3
txt1 := "[+4.-.002]" // 4.0 and -0.002
parser := PdfParser { }
2017-07-23 12:21:42 +00:00
parser . rs , parser . reader , parser . fileSize = makeReaderForText ( txt1 )
2016-07-09 14:09:27 +00:00
list , err := parser . parseArray ( )
if err != nil {
t . Errorf ( "Error parsing array" )
return
}
2018-07-15 17:52:53 +00:00
if list . Len ( ) != 2 {
t . Errorf ( "Len list != 2 (%d)" , list . Len ( ) )
2016-07-09 14:09:27 +00:00
return
}
expectedFloats := map [ int ] float32 {
0 : 4.0 ,
1 : - 0.002 ,
}
for idx , val := range expectedFloats {
2018-07-15 17:52:53 +00:00
num , ok := list . Get ( idx ) . ( * PdfObjectFloat )
2016-07-09 14:09:27 +00:00
if ! ok {
t . Errorf ( "Idx %d not float (%f)" , idx , val )
return
}
if float32 ( * num ) != val {
2017-08-04 22:50:28 +00:00
t . Errorf ( "Idx %d, value incorrect (%f)" , idx , val )
2016-07-09 14:09:27 +00:00
}
}
}
2019-04-02 17:44:53 +00:00
func TestNumericParsingExponentials ( t * testing . T ) {
testcases := [ ] struct {
RawObj string
Expected [ ] float64
} {
{ "[+4.-.002+3e-2-2e0]" , [ ] float64 { 4.0 , - 0.002 , 0.03 , - 2.0 } } , // 7.3.3.
{ "[-1E+35 1E+35]" , [ ] float64 { - 1e35 , 1e35 } } ,
2016-07-09 14:09:27 +00:00
}
2019-04-02 17:44:53 +00:00
for _ , tcase := range testcases {
t . Run ( tcase . RawObj , func ( t * testing . T ) {
parser := PdfParser { }
parser . rs , parser . reader , parser . fileSize = makeReaderForText ( tcase . RawObj )
list , err := parser . parseArray ( )
require . NoError ( t , err )
floats , err := list . ToFloat64Array ( )
require . NoError ( t , err )
require . Equal ( t , tcase . Expected , floats )
} )
2016-07-09 14:09:27 +00:00
}
}
2018-03-20 04:26:10 +03:00
func BenchmarkHexStringParsing ( b * testing . B ) {
var ref bytes . Buffer
for i := 0 ; i < 0xff ; i ++ {
ref . WriteByte ( byte ( i ) )
}
parser := makeParserForText ( "<" + hex . EncodeToString ( ref . Bytes ( ) ) + ">" )
for n := 0 ; n < b . N ; n ++ {
hs , err := parser . parseHexString ( )
if err != nil {
b . Errorf ( "Error parsing hex string: %s" , err . Error ( ) )
return
}
2018-07-14 02:25:29 +00:00
if hs . Str ( ) != ref . String ( ) {
2018-03-20 04:26:10 +03:00
b . Errorf ( "Reference and parsed hex strings mismatch" )
}
parser . SetFileOffset ( 0 )
}
}
2016-07-09 14:09:27 +00:00
func TestHexStringParsing ( t * testing . T ) {
// 7.3.4.3
}
// TODO.
// Test reference to object outside of cross-ref table - should be 0
// Test xref object with offset 0, should be treated as 'f'ree.
// (compatibility with malformed writers).
func TestDictParsing1 ( t * testing . T ) {
txt1 := "<<\n\t/Name /Game /key/val/data\t[0 1 2 3.14 5]\t\n\n>>"
parser := PdfParser { }
2017-07-23 12:21:42 +00:00
parser . rs , parser . reader , parser . fileSize = makeReaderForText ( txt1 )
2017-01-03 15:36:32 +00:00
dict , err := parser . ParseDict ( )
2016-07-09 14:09:27 +00:00
if err != nil {
t . Errorf ( "Error parsing dict" )
}
2017-07-08 21:04:13 +00:00
if len ( dict . Keys ( ) ) != 3 {
2016-07-09 14:09:27 +00:00
t . Errorf ( "Length of dict != 3" )
}
2017-07-08 21:04:13 +00:00
name , ok := dict . Get ( "Name" ) . ( * PdfObjectName )
2016-07-09 14:09:27 +00:00
if ! ok || * name != "Game" {
t . Errorf ( "Value error" )
}
2017-07-08 21:04:13 +00:00
key , ok := dict . Get ( "key" ) . ( * PdfObjectName )
2016-07-09 14:09:27 +00:00
if ! ok || * key != "val" {
t . Errorf ( "Value error" )
}
2017-07-08 21:04:13 +00:00
data , ok := dict . Get ( "data" ) . ( * PdfObjectArray )
2016-07-09 14:09:27 +00:00
if ! ok {
t . Errorf ( "Invalid data" )
}
2018-07-15 17:52:53 +00:00
integer , ok := data . Get ( 2 ) . ( * PdfObjectInteger )
2016-07-09 14:09:27 +00:00
if ! ok || * integer != 2 {
t . Errorf ( "Wrong data" )
}
2018-07-15 17:52:53 +00:00
float , ok := data . Get ( 3 ) . ( * PdfObjectFloat )
2016-07-09 14:09:27 +00:00
if ! ok || * float != 3.14 {
t . Error ( "Wrong data" )
}
}
func TestDictParsing2 ( t * testing . T ) {
rawText := "<< /Type /Example\n" +
"/Subtype /DictionaryExample /Version 0.01\n" +
"/IntegerItem 12 \n" +
"/StringItem (a string) /Subdictionary << /Item1 0.4\n" +
"/Item2 true /LastItem (not!) /VeryLastItem (OK)\n" +
">>\n >>"
parser := PdfParser { }
2017-07-23 12:21:42 +00:00
parser . rs , parser . reader , parser . fileSize = makeReaderForText ( rawText )
2017-01-03 15:36:32 +00:00
dict , err := parser . ParseDict ( )
2016-07-09 14:09:27 +00:00
if err != nil {
t . Errorf ( "Error parsing dict" )
}
2017-07-08 21:04:13 +00:00
if len ( dict . Keys ( ) ) != 6 {
2016-07-09 14:09:27 +00:00
t . Errorf ( "Length of dict != 6" )
}
2017-07-08 21:04:13 +00:00
typeName , ok := dict . Get ( "Type" ) . ( * PdfObjectName )
2016-07-09 14:09:27 +00:00
if ! ok || * typeName != "Example" {
t . Errorf ( "Wrong type" )
}
2017-07-08 21:04:13 +00:00
str , ok := dict . Get ( "StringItem" ) . ( * PdfObjectString )
2018-07-14 02:25:29 +00:00
if ! ok || str . Str ( ) != "a string" {
2016-07-09 14:09:27 +00:00
t . Errorf ( "Invalid string item" )
}
2017-07-08 21:04:13 +00:00
subDict , ok := dict . Get ( "Subdictionary" ) . ( * PdfObjectDictionary )
2016-07-09 14:09:27 +00:00
if ! ok {
t . Errorf ( "Invalid sub dictionary" )
}
2017-07-08 21:04:13 +00:00
item2 , ok := subDict . Get ( "Item2" ) . ( * PdfObjectBool )
2016-07-09 14:09:27 +00:00
if ! ok || * item2 != true {
t . Errorf ( "Invalid bool item" )
}
2017-07-08 21:04:13 +00:00
realnum , ok := subDict . Get ( "Item1" ) . ( * PdfObjectFloat )
2016-07-09 14:09:27 +00:00
if ! ok || * realnum != 0.4 {
t . Errorf ( "Invalid real number" )
}
}
func TestDictParsing3 ( t * testing . T ) {
rawText := "<<>>"
parser := PdfParser { }
2017-07-23 12:21:42 +00:00
parser . rs , parser . reader , parser . fileSize = makeReaderForText ( rawText )
2017-01-03 15:36:32 +00:00
dict , err := parser . ParseDict ( )
2016-07-09 14:09:27 +00:00
if err != nil {
t . Errorf ( "Error parsing dict" )
}
2017-07-08 21:04:13 +00:00
if len ( dict . Keys ( ) ) != 0 {
2016-07-09 14:09:27 +00:00
t . Errorf ( "Length of dict != 0" )
}
}
/ *
func TestDictParsing4 ( t * testing . T ) {
rawText := "<</Key>>"
parser := PdfParser { }
2017-07-08 21:04:13 +00:00
parser . rs , parser . reader = makeReaderForText ( rawText )
2017-01-03 15:36:32 +00:00
dict , err := parser . ParseDict ( )
2016-07-09 14:09:27 +00:00
if err != nil {
t . Errorf ( "Error parsing dict (%s)" , err )
return
}
if len ( * dict ) != 1 {
t . Errorf ( "Length of dict != 1" )
return
}
_ , ok := ( * dict ) [ "Key" ] . ( * PdfObjectNull )
if ! ok {
t . Errorf ( "Invalid object (should be PDF null)" )
return
}
}
* /
func TestArrayParsing ( t * testing . T ) {
// 7.3.7.
}
func TestReferenceParsing ( t * testing . T ) {
// TODO
}
func TestNullParsing ( t * testing . T ) {
// TODO
}
func TestStreamParsing ( t * testing . T ) {
// TODO
}
func TestIndirectObjParsing1 ( t * testing . T ) {
2019-04-25 17:08:15 +00:00
testcases := [ ] struct {
description string
rawPDF string
checkFunc func ( obj PdfObject )
} {
{ "Typical case" ,
` 1 0 obj
2016-07-09 14:09:27 +00:00
<<
/ Names 2 0 R
/ Pages 3 0 R
/ Metadata 4 0 R
/ ViewerPreferences
<<
/ Rights
<<
/ Document [ / FullSave ]
/ TimeOfUbiquitization ( D : 20071210131309 Z )
/ RightsID [ ( x \ \ Ä - z < 80 > < 83 > ã [ W < b < 99 > \ rhvèC © ðFüE ^ TN £ ^ \ jó ] ç = çø \ n < 8 f > : Ë ¹ \ ( < 9 a > \ r = § ^ \ ~ CÌÁxîÚð ^ V /= Î | Q \ r < 99 > ¢ ) ( # $ ÐJ ^ C < 98 > ^ ZX < 86 > ^ TÞ ¿ ø ¸ ^ N ] ú < 8 f > ^ N × 2 < 9 f > § ø ± D ^ Q \ r ! ' ¡ < 8 a > dp ° , l ¿ < 9 d > É < 82 > « eæ § B } « Ç8p · < 97 > \ fl ¿ ² G / x ¹ > ) ( kc2 ² µ ^ ? - © ¸ þ $ åiØ . Aé7 ^ P ½ ÒÏð ^ S ^ ^ Y × rùç ^ O̵ ¶ ¿ Hp ^ ? * NËwóúËo § ü1ª < 97 > îFÜ \ \ < 8 f > OÚ ^ P [ ¸ < 93 > 0 ^ ) ]
/ Version 1
/ Msg ( This form has document rights applied to it . These rights allow anyone completing this form , with the free Adobe Reader , to save their filled - in form locally . )
/ Form [ / Import / Export / SubmitStandalone / SpawnTemplate ]
>>
>>
/ AcroForm 5 0 R
/ Type / Catalog
>>
endobj
3 0 obj
2019-04-25 17:08:15 +00:00
` ,
func ( obj PdfObject ) {
indirect , ok := GetIndirect ( obj )
require . True ( t , ok )
require . NotNil ( t , indirect )
require . NotNil ( t , indirect . PdfObject )
require . Equal ( t , int64 ( 1 ) , indirect . ObjectNumber )
require . Equal ( t , int64 ( 0 ) , indirect . GenerationNumber )
dict , isDict := GetDict ( indirect )
require . True ( t , isDict )
dict , isDict = GetDict ( dict . Get ( "ViewerPreferences" ) )
require . True ( t , isDict )
require . Len ( t , dict . Keys ( ) , 1 )
dict , isDict = GetDict ( dict . Get ( "Rights" ) )
require . True ( t , isDict )
version , ok := GetIntVal ( dict . Get ( "Version" ) )
require . True ( t , ok )
require . Equal ( t , 1 , version )
} ,
} ,
{
"Basic object with short inner string" ,
` 1 0 obj
( a )
endobj
` , func ( obj PdfObject ) {
indirect , ok := GetIndirect ( obj )
require . True ( t , ok )
require . NotNil ( t , indirect )
require . NotNil ( t , indirect . PdfObject )
str , ok := GetString ( obj )
require . True ( t , ok )
require . Equal ( t , "a" , str . String ( ) )
} ,
} ,
{ "Empty indirect object interpreted as containing null object" ,
` 1 0 obj
endobj
` ,
func ( obj PdfObject ) {
indirect , ok := GetIndirect ( obj )
require . True ( t , ok )
require . NotNil ( t , indirect )
require . NotNil ( t , indirect . PdfObject )
require . True ( t , IsNullObject ( indirect . PdfObject ) )
} ,
} ,
2016-07-09 14:09:27 +00:00
}
2019-04-25 17:08:15 +00:00
for _ , tcase := range testcases {
t . Logf ( "%s" , tcase . description )
parser := PdfParser { }
parser . rs , parser . reader , parser . fileSize = makeReaderForText ( tcase . rawPDF )
obj , err := parser . ParseIndirectObject ( )
if err != nil && err != io . EOF {
t . Errorf ( "Failed to parse indirect obj (%s)" , err )
return
}
tcase . checkFunc ( obj )
common . Log . Debug ( "Parsed obj: %s" , obj )
}
2016-07-09 14:09:27 +00:00
}
// Test /Prev and xref tables. Check if the priority order is right.
// Test recovering xref tables. Refactor to recovery.go ?
func TestXrefStreamParse ( t * testing . T ) {
rawText := ` 99 0 obj
<< / Type / XRef
/ Index [ 0 5 ]
/ W [ 1 2 2 ]
/ Filter / ASCIIHexDecode
/ Size 5
/ Length 65
>>
stream
00 0000 FFFF
02 000 F 0000
02 000 F 0001
02 000 F 0002
01 BA5E 0000 >
endstream
endobj `
parser := PdfParser { }
2019-04-18 19:22:45 +00:00
parser . xrefs . ObjectMap = make ( map [ int ] XrefObject )
2018-07-14 14:07:33 +00:00
parser . objstms = make ( objectStreams )
2017-07-23 12:21:42 +00:00
parser . rs , parser . reader , parser . fileSize = makeReaderForText ( rawText )
2016-07-09 14:09:27 +00:00
xrefDict , err := parser . parseXrefStream ( nil )
if err != nil {
t . Errorf ( "Invalid xref stream object (%s)" , err )
return
}
2017-07-08 21:04:13 +00:00
typeName , ok := xrefDict . Get ( "Type" ) . ( * PdfObjectName )
2016-07-09 14:09:27 +00:00
if ! ok || * typeName != "XRef" {
t . Errorf ( "Invalid Type != XRef" )
return
}
2019-04-18 19:22:45 +00:00
if len ( parser . xrefs . ObjectMap ) != 4 {
t . Errorf ( "Wrong length (%d)" , len ( parser . xrefs . ObjectMap ) )
2016-07-09 14:09:27 +00:00
return
}
2019-04-18 19:22:45 +00:00
if parser . xrefs . ObjectMap [ 3 ] . XType != XrefTypeObjectStream {
2016-07-09 14:09:27 +00:00
t . Errorf ( "Invalid type" )
return
}
2019-04-18 19:22:45 +00:00
if parser . xrefs . ObjectMap [ 3 ] . OsObjNumber != 15 {
2016-07-09 14:09:27 +00:00
t . Errorf ( "Wrong object stream obj number" )
return
}
2019-04-18 19:22:45 +00:00
if parser . xrefs . ObjectMap [ 3 ] . OsObjIndex != 2 {
2016-07-09 14:09:27 +00:00
t . Errorf ( "Wrong object stream obj index" )
return
}
2016-07-17 19:59:17 +00:00
common . Log . Debug ( "Xref dict: %s" , xrefDict )
2016-07-09 14:09:27 +00:00
}
2019-03-14 01:07:32 +00:00
// TODO(gunnsth): Clear up. Should define clear inputs and expectation data and then run it.
2016-07-09 14:09:27 +00:00
func TestObjectParse ( t * testing . T ) {
parser := PdfParser { }
// Test object detection.
// Invalid object type.
rawText := " \t9 0 false"
2017-07-23 12:21:42 +00:00
parser . rs , parser . reader , parser . fileSize = makeReaderForText ( rawText )
2016-07-09 14:09:27 +00:00
obj , err := parser . parseObject ( )
if err != nil {
t . Error ( "Should ignore tab/space" )
return
}
2019-03-14 01:07:32 +00:00
// Integer
rawText = "0"
parser . rs , parser . reader , parser . fileSize = makeReaderForText ( rawText )
obj , err = parser . parseObject ( )
if err != nil {
t . Errorf ( "Error parsing object: %v" , err )
return
}
nump , ok := obj . ( * PdfObjectInteger )
if ! ok {
t . Errorf ( "Unable to identify integer" )
return
}
if * nump != 0 {
t . Errorf ( "Wrong value, expecting 9 (%d)" , * nump )
return
}
2016-07-09 14:09:27 +00:00
// Integer
rawText = "9 0 false"
2017-07-23 12:21:42 +00:00
parser . rs , parser . reader , parser . fileSize = makeReaderForText ( rawText )
2016-07-09 14:09:27 +00:00
obj , err = parser . parseObject ( )
if err != nil {
t . Errorf ( "Error parsing object" )
return
}
2019-03-14 01:07:32 +00:00
nump , ok = obj . ( * PdfObjectInteger )
2016-07-09 14:09:27 +00:00
if ! ok {
t . Errorf ( "Unable to identify integer" )
return
}
if * nump != 9 {
t . Errorf ( "Wrong value, expecting 9 (%d)" , * nump )
return
}
// Reference
rawText = "9 0 R false"
2017-07-23 12:21:42 +00:00
parser . rs , parser . reader , parser . fileSize = makeReaderForText ( rawText )
2016-07-09 14:09:27 +00:00
obj , err = parser . parseObject ( )
if err != nil {
t . Errorf ( "Error parsing object" )
return
}
refp , ok := obj . ( * PdfObjectReference )
if ! ok {
t . Errorf ( "Unable to identify reference" )
return
}
if ( * refp ) . ObjectNumber != 9 {
t . Errorf ( "Wrong value, expecting object number 9" )
return
}
// Reference
rawText = "909 0 R false"
2017-07-23 12:21:42 +00:00
parser . rs , parser . reader , parser . fileSize = makeReaderForText ( rawText )
2016-07-09 14:09:27 +00:00
obj , err = parser . parseObject ( )
if err != nil {
t . Errorf ( "Error parsing object" )
return
}
refp , ok = obj . ( * PdfObjectReference )
if ! ok {
t . Errorf ( "Unable to identify reference" )
return
}
if ( * refp ) . ObjectNumber != 909 {
t . Errorf ( "Wrong value, expecting object number 9" )
return
}
// Bool
rawText = "false 9 0 R"
2017-07-23 12:21:42 +00:00
parser . rs , parser . reader , parser . fileSize = makeReaderForText ( rawText )
2016-07-09 14:09:27 +00:00
obj , err = parser . parseObject ( )
if err != nil {
t . Errorf ( "Error parsing object" )
return
}
boolp , ok := obj . ( * PdfObjectBool )
if ! ok {
t . Errorf ( "Unable to identify bool object" )
return
}
if * boolp != false {
t . Errorf ( "Wrong value, expecting false" )
return
}
}
2018-08-03 11:01:54 +00:00
// TestMinimalPDFFile test basic parsing of a minimal pdf file.
2016-07-09 14:09:27 +00:00
func TestMinimalPDFFile ( t * testing . T ) {
2018-08-03 11:01:54 +00:00
file , err := os . Open ( "./testdata/minimal.pdf" )
2019-04-18 19:22:45 +00:00
require . NoError ( t , err )
2016-07-09 14:09:27 +00:00
defer file . Close ( )
2018-08-03 11:01:54 +00:00
parser , err := NewParser ( file )
2019-04-18 19:22:45 +00:00
require . NoError ( t , err )
2016-07-09 14:09:27 +00:00
2019-04-18 19:22:45 +00:00
require . Len ( t , parser . xrefs . ObjectMap , 4 )
require . Equal ( t , 1 , parser . xrefs . ObjectMap [ 1 ] . ObjectNumber )
require . Equal ( t , int64 ( 18 ) , parser . xrefs . ObjectMap [ 1 ] . Offset )
require . Equal ( t , XrefTypeTableEntry , parser . xrefs . ObjectMap [ 1 ] . XType )
require . Equal ( t , 3 , parser . xrefs . ObjectMap [ 3 ] . ObjectNumber )
require . Equal ( t , int64 ( 178 ) , parser . xrefs . ObjectMap [ 3 ] . Offset )
require . Equal ( t , XrefTypeTableEntry , parser . xrefs . ObjectMap [ 3 ] . XType )
2016-07-09 14:09:27 +00:00
// Check catalog object.
catalogObj , err := parser . LookupByNumber ( 1 )
2019-04-18 19:22:45 +00:00
require . NoError ( t , err )
2016-07-09 14:09:27 +00:00
catalog , ok := catalogObj . ( * PdfIndirectObject )
2019-04-18 19:22:45 +00:00
require . True ( t , ok )
2016-07-09 14:09:27 +00:00
catalogDict , ok := catalog . PdfObject . ( * PdfObjectDictionary )
2019-04-18 19:22:45 +00:00
require . True ( t , ok )
2018-08-03 11:01:54 +00:00
typename , ok := catalogDict . Get ( "Type" ) . ( * PdfObjectName )
2019-04-18 19:22:45 +00:00
require . True ( t , ok )
require . Equal ( t , "Catalog" , typename . String ( ) )
2016-07-09 14:09:27 +00:00
// Check Page object.
pageObj , err := parser . LookupByNumber ( 3 )
2019-04-18 19:22:45 +00:00
require . NoError ( t , err )
2016-07-09 14:09:27 +00:00
page , ok := pageObj . ( * PdfIndirectObject )
2019-04-18 19:22:45 +00:00
require . True ( t , ok )
2016-07-09 14:09:27 +00:00
pageDict , ok := page . PdfObject . ( * PdfObjectDictionary )
2019-04-18 19:22:45 +00:00
require . True ( t , ok )
require . Len ( t , pageDict . Keys ( ) , 4 )
2018-08-03 11:01:54 +00:00
resourcesDict , ok := pageDict . Get ( "Resources" ) . ( * PdfObjectDictionary )
2019-04-18 19:22:45 +00:00
require . True ( t , ok )
require . Len ( t , resourcesDict . Keys ( ) , 1 )
2018-08-03 11:01:54 +00:00
fontDict , ok := resourcesDict . Get ( "Font" ) . ( * PdfObjectDictionary )
2019-04-18 19:22:45 +00:00
require . True ( t , ok )
2018-08-03 11:01:54 +00:00
f1Dict , ok := fontDict . Get ( "F1" ) . ( * PdfObjectDictionary )
2019-04-18 19:22:45 +00:00
require . True ( t , ok )
require . Len ( t , f1Dict . Keys ( ) , 3 )
2018-08-03 11:01:54 +00:00
baseFont , ok := f1Dict . Get ( "BaseFont" ) . ( * PdfObjectName )
2019-04-18 19:22:45 +00:00
require . True ( t , ok )
require . Equal ( t , "Times-Roman" , baseFont . String ( ) )
2016-07-09 14:09:27 +00:00
}
2019-04-24 22:04:30 +03:00
// Test PDF version parsing.
func TestPDFVersionParse ( t * testing . T ) {
// Test parsing when the version is at the start of the file.
f1 , err := os . Open ( "./testdata/minimal.pdf" )
require . NoError ( t , err )
defer f1 . Close ( )
parser := & PdfParser {
rs : f1 ,
ObjCache : make ( objectCache ) ,
streamLengthReferenceLookupInProgress : map [ int64 ] bool { } ,
}
// Test parsed version.
majorVersion , minorVersion , err := parser . parsePdfVersion ( )
require . NoError ( t , err )
require . Equal ( t , majorVersion , 1 )
require . Equal ( t , minorVersion , 1 )
// Test file offset position.
expected := "%PDF-1.1"
b := make ( [ ] byte , len ( expected ) )
_ , err = parser . reader . Read ( b )
require . NoError ( t , err )
require . Equal ( t , string ( b ) , expected )
// Test parsing when the file has invalid data before the version.
f2 , err := os . Open ( "./testdata/invalidstart.pdf" )
require . NoError ( t , err )
defer f2 . Close ( )
parser = & PdfParser {
rs : f2 ,
ObjCache : make ( objectCache ) ,
streamLengthReferenceLookupInProgress : map [ int64 ] bool { } ,
}
// Test parsed version.
majorVersion , minorVersion , err = parser . parsePdfVersion ( )
require . NoError ( t , err )
require . Equal ( t , majorVersion , 1 )
require . Equal ( t , minorVersion , 3 )
// Test file offset position.
expected = "%PDF-1.3"
b = make ( [ ] byte , len ( expected ) )
_ , err = parser . reader . Read ( b )
require . NoError ( t , err )
require . Equal ( t , string ( b ) , expected )
}