Preserve and allow output of hexadecimal strings

Refactored PdfObjectString into a struct with bool flag for hex.  Fixed any code broken by the change.
Unexported non-essential functions for crypto (not used by model).  Can unexport more later or refactor to internal package.
This commit is contained in:
Gunnsteinn Hall 2018-07-14 02:25:29 +00:00
parent 5e9ff8d3ca
commit 14ee80e1fe
18 changed files with 229 additions and 122 deletions

View File

@ -171,7 +171,7 @@ func (this *ContentStreamParser) ExtractText() (string, error) {
for _, obj := range *paramList {
switch v := obj.(type) {
case *PdfObjectString:
txt += string(*v)
txt += v.Str()
case *PdfObjectFloat:
if *v < -100 {
txt += " "
@ -190,7 +190,7 @@ func (this *ContentStreamParser) ExtractText() (string, error) {
if !ok {
return "", fmt.Errorf("Invalid parameter type, not string (%T)", op.Params[0])
}
txt += string(*param)
txt += param.Str()
}
}

View File

@ -358,11 +358,11 @@ func (this *ContentStreamParser) ParseInlineImage() (*ContentStreamInlineImage,
return nil, fmt.Errorf("Failed to read inline image - invalid operand")
}
if *operand == "EI" {
if operand.Str() == "EI" {
// Image fully defined
common.Log.Trace("Inline image finished...")
return &im, nil
} else if *operand == "ID" {
} else if operand.Str() == "ID" {
// Inline image data.
// Should get a single space (0x20) followed by the data and then EI.
common.Log.Trace("ID start")

View File

@ -18,12 +18,12 @@ import (
. "github.com/unidoc/unidoc/pdf/core"
)
// Content stream parser.
// ContentStreamParser represents a content stream parser for parsing content streams in PDFs.
type ContentStreamParser struct {
reader *bufio.Reader
}
// Create a new instance of the content stream parser from an input content
// NewContentStreamParser creates a new instance of the content stream parser from an input content
// stream string.
func NewContentStreamParser(contentStr string) *ContentStreamParser {
// Each command has parameters and an operand (command).
@ -35,7 +35,7 @@ func NewContentStreamParser(contentStr string) *ContentStreamParser {
return &parser
}
// Parses all commands in content stream, returning a list of operation data.
// Parse parses all commands in content stream, returning a list of operation data.
func (this *ContentStreamParser) Parse() (*ContentStreamOperations, error) {
operations := ContentStreamOperations{}
@ -52,7 +52,7 @@ func (this *ContentStreamParser) Parse() (*ContentStreamOperations, error) {
return &operations, err
}
if isOperand {
operation.Operand = string(*obj.(*PdfObjectString))
operation.Operand, _ = GetStringVal(obj)
operations = append(operations, &operation)
break
} else {
@ -252,7 +252,7 @@ func (this *ContentStreamParser) parseNumber() (PdfObject, error) {
}
// A string starts with '(' and ends with ')'.
func (this *ContentStreamParser) parseString() (PdfObjectString, error) {
func (this *ContentStreamParser) parseString() (*PdfObjectString, error) {
this.reader.ReadByte()
bytes := []byte{}
@ -260,21 +260,21 @@ func (this *ContentStreamParser) parseString() (PdfObjectString, error) {
for {
bb, err := this.reader.Peek(1)
if err != nil {
return PdfObjectString(bytes), err
return MakeString(string(bytes)), err
}
if bb[0] == '\\' { // Escape sequence.
this.reader.ReadByte() // Skip the escape \ byte.
b, err := this.reader.ReadByte()
if err != nil {
return PdfObjectString(bytes), err
return MakeString(string(bytes)), err
}
// Octal '\ddd' number (base 8).
if IsOctalDigit(b) {
bb, err := this.reader.Peek(2)
if err != nil {
return PdfObjectString(bytes), err
return MakeString(string(bytes)), err
}
numeric := []byte{}
@ -291,7 +291,7 @@ func (this *ContentStreamParser) parseString() (PdfObjectString, error) {
common.Log.Trace("Numeric string \"%s\"", numeric)
code, err := strconv.ParseUint(string(numeric), 8, 32)
if err != nil {
return PdfObjectString(bytes), err
return MakeString(string(bytes)), err
}
bytes = append(bytes, byte(code))
continue
@ -331,11 +331,11 @@ func (this *ContentStreamParser) parseString() (PdfObjectString, error) {
bytes = append(bytes, b)
}
return PdfObjectString(bytes), nil
return MakeString(string(bytes)), nil
}
// Starts with '<' ends with '>'.
func (this *ContentStreamParser) parseHexString() (PdfObjectString, error) {
func (this *ContentStreamParser) parseHexString() (*PdfObjectString, error) {
this.reader.ReadByte()
hextable := []byte("0123456789abcdefABCDEF")
@ -346,7 +346,7 @@ func (this *ContentStreamParser) parseHexString() (PdfObjectString, error) {
bb, err := this.reader.Peek(1)
if err != nil {
return PdfObjectString(""), err
return MakeString(""), err
}
if bb[0] == '>' {
@ -365,7 +365,7 @@ func (this *ContentStreamParser) parseHexString() (PdfObjectString, error) {
}
buf, _ := hex.DecodeString(string(tmp))
return PdfObjectString(buf), nil
return MakeHexString(string(buf)), nil
}
// Starts with '[' ends with ']'. Can contain any kinds of direct objects.
@ -494,12 +494,12 @@ func (this *ContentStreamParser) parseDict() (*PdfObjectDictionary, error) {
}
// An operand is a text command represented by a word.
func (this *ContentStreamParser) parseOperand() (PdfObjectString, error) {
func (this *ContentStreamParser) parseOperand() (*PdfObjectString, error) {
bytes := []byte{}
for {
bb, err := this.reader.Peek(1)
if err != nil {
return PdfObjectString(bytes), err
return MakeString(string(bytes)), err
}
if IsDelimiter(bb[0]) {
break
@ -512,13 +512,13 @@ func (this *ContentStreamParser) parseOperand() (PdfObjectString, error) {
bytes = append(bytes, b)
}
return PdfObjectString(bytes), nil
return MakeString(string(bytes)), nil
}
// Parse a generic object. Returns the object, an error code, and a bool
// value indicating whether the object is an operand. An operand
// is contained in a pdf string object.
func (this *ContentStreamParser) parseObject() (PdfObject, error, bool) {
func (this *ContentStreamParser) parseObject() (obj PdfObject, err error, isop bool) {
// Determine the kind of object.
// parse it!
// make a list of operands, then once operand arrives put into a package.
@ -542,11 +542,11 @@ func (this *ContentStreamParser) parseObject() (PdfObject, error, bool) {
} else if bb[0] == '(' {
common.Log.Trace("->String!")
str, err := this.parseString()
return &str, err, false
return str, err, false
} else if bb[0] == '<' && bb[1] != '<' {
common.Log.Trace("->Hex String!")
str, err := this.parseHexString()
return &str, err, false
return str, err, false
} else if bb[0] == '[' {
common.Log.Trace("->Array!")
arr, err := this.parseArray()
@ -577,7 +577,7 @@ func (this *ContentStreamParser) parseObject() (PdfObject, error, bool) {
}
operand, err := this.parseOperand()
return &operand, err, true
return operand, err, true
}
}
}

View File

@ -32,7 +32,7 @@ func makeParamsFromNames(vals []core.PdfObjectName) []core.PdfObject {
func makeParamsFromStrings(vals []core.PdfObjectString) []core.PdfObject {
params := []core.PdfObject{}
for _, val := range vals {
params = append(params, core.MakeString(string(val)))
params = append(params, core.MakeString(val.Str()))
}
return params
}

View File

@ -216,7 +216,7 @@ func PdfCryptMakeNew(parser *PdfParser, ed, trailer *PdfObjectDictionary) (PdfCr
subfilter, ok := ed.Get("SubFilter").(*PdfObjectString)
if ok {
crypter.Subfilter = string(*subfilter)
crypter.Subfilter = subfilter.Str()
common.Log.Debug("Using subfilter %s", subfilter)
}
@ -263,22 +263,22 @@ func PdfCryptMakeNew(parser *PdfParser, ed, trailer *PdfObjectDictionary) (PdfCr
if !ok {
return crypter, errors.New("Encrypt dictionary missing O")
}
if len(*O) != 32 {
return crypter, fmt.Errorf("Length(O) != 32 (%d)", len(*O))
if len(O.Str()) != 32 {
return crypter, fmt.Errorf("Length(O) != 32 (%d)", len(O.Str()))
}
crypter.O = []byte(*O)
crypter.O = O.Bytes()
U, ok := ed.Get("U").(*PdfObjectString)
if !ok {
return crypter, errors.New("Encrypt dictionary missing U")
}
if len(*U) != 32 {
if len(U.Str()) != 32 {
// Strictly this does not cause an error.
// If O is OK and others then can still read the file.
common.Log.Debug("Warning: Length(U) != 32 (%d)", len(*U))
common.Log.Debug("Warning: Length(U) != 32 (%d)", len(U.Str()))
//return crypter, errors.New("Length(U) != 32")
}
crypter.U = []byte(*U)
crypter.U = U.Bytes()
P, ok := ed.Get("P").(*PdfObjectInteger)
if !ok {
@ -296,17 +296,17 @@ func PdfCryptMakeNew(parser *PdfParser, ed, trailer *PdfObjectDictionary) (PdfCr
// Default: empty ID.
// Strictly, if file is encrypted, the ID should always be specified
// but clearly not everyone is following the specification.
id0 := PdfObjectString("")
id0 := ""
if idArray, ok := trailer.Get("ID").(*PdfObjectArray); ok && len(*idArray) >= 1 {
id0obj, ok := (*idArray)[0].(*PdfObjectString)
if !ok {
return crypter, errors.New("Invalid trailer ID")
}
id0 = *id0obj
id0 = id0obj.Str()
} else {
common.Log.Debug("Trailer ID array missing or invalid!")
}
crypter.Id0 = string(id0)
crypter.Id0 = id0
return crypter, nil
}
@ -382,7 +382,7 @@ func (crypt *PdfCrypt) authenticate(password []byte) (bool, error) {
// Try user password.
common.Log.Trace("Debugging authentication - user pass")
authenticated, err := crypt.Alg6(password)
authenticated, err := crypt.alg6(password)
if err != nil {
return false, err
}
@ -396,7 +396,7 @@ func (crypt *PdfCrypt) authenticate(password []byte) (bool, error) {
// May not be necessary if only want to get all contents.
// (user pass needs to be known or empty).
common.Log.Trace("Debugging authentication - owner pass")
authenticated, err = crypt.Alg7(password)
authenticated, err = crypt.alg7(password)
if err != nil {
return false, err
}
@ -419,7 +419,7 @@ func (crypt *PdfCrypt) checkAccessRights(password []byte) (bool, AccessPermissio
perms := AccessPermissions{}
// Try owner password -> full rights.
isOwner, err := crypt.Alg7(password)
isOwner, err := crypt.alg7(password)
if err != nil {
return false, perms, err
}
@ -437,7 +437,7 @@ func (crypt *PdfCrypt) checkAccessRights(password []byte) (bool, AccessPermissio
}
// Try user password.
isUser, err := crypt.Alg6(password)
isUser, err := crypt.alg6(password)
if err != nil {
return false, perms, err
}
@ -721,17 +721,17 @@ func (crypt *PdfCrypt) Decrypt(obj PdfObject, parentObjNum, parentGenNum int64)
}
// Overwrite the encrypted with decrypted string.
decrypted := make([]byte, len(*s))
for i := 0; i < len(*s); i++ {
decrypted[i] = (*s)[i]
decrypted := make([]byte, len(s.Str()))
str := s.Str()
for i := 0; i < len(str); i++ {
decrypted[i] = str[i]
}
common.Log.Trace("Decrypt string: %s : % x", decrypted, decrypted)
decrypted, err = crypt.decryptBytes(decrypted, stringFilter, key)
if err != nil {
return err
}
*s = PdfObjectString(decrypted)
s.val = string(decrypted)
return nil
}
@ -968,16 +968,17 @@ func (crypt *PdfCrypt) Encrypt(obj PdfObject, parentObjNum, parentGenNum int64)
return err
}
encrypted := make([]byte, len(*s))
for i := 0; i < len(*s); i++ {
encrypted[i] = (*s)[i]
str := s.Str()
encrypted := make([]byte, len(str))
for i := 0; i < len(str); i++ {
encrypted[i] = str[i]
}
common.Log.Trace("Encrypt string: %s : % x", encrypted, encrypted)
encrypted, err = crypt.encryptBytes(encrypted, stringFilter, key)
if err != nil {
return err
}
*s = PdfObjectString(encrypted)
s.val = string(encrypted)
return nil
}
@ -1022,10 +1023,9 @@ func (crypt *PdfCrypt) Encrypt(obj PdfObject, parentObjNum, parentGenNum int64)
return nil
}
// Alg2 computes an encryption key.
// TODO (v3): Unexport.
func (crypt *PdfCrypt) Alg2(pass []byte) []byte {
common.Log.Trace("Alg2")
// alg2 computes an encryption key.
func (crypt *PdfCrypt) alg2(pass []byte) []byte {
common.Log.Trace("alg2")
key := crypt.paddedPass(pass)
h := md5.New()
@ -1091,10 +1091,9 @@ func (crypt *PdfCrypt) alg3Key(pass []byte) []byte {
}
// Alg3 computes the encryption dictionarys O (owner password) value.
// TODO (v3): Unexport.
func (crypt *PdfCrypt) Alg3(upass, opass []byte) (PdfObjectString, error) {
func (crypt *PdfCrypt) Alg3(upass, opass []byte) (string, error) {
// Return O string val.
O := PdfObjectString("")
O := ""
var encKey []byte
if len(opass) > 0 {
@ -1126,16 +1125,15 @@ func (crypt *PdfCrypt) Alg3(upass, opass []byte) (PdfObjectString, error) {
}
}
O = PdfObjectString(encrypted)
O = string(encrypted)
return O, nil
}
// Alg4 computes the encryption dictionarys U (user password) value (Security handlers of revision 2).
// TODO (v3): Unexport.
func (crypt *PdfCrypt) Alg4(upass []byte) (PdfObjectString, []byte, error) {
U := PdfObjectString("")
// alg4 computes the encryption dictionarys U (user password) value (Security handlers of revision 2).
func (crypt *PdfCrypt) alg4(upass []byte) (string, []byte, error) {
U := ""
ekey := crypt.Alg2(upass)
ekey := crypt.alg2(upass)
ciph, err := rc4.NewCipher(ekey)
if err != nil {
return U, ekey, errors.New("Failed rc4 ciph")
@ -1145,16 +1143,15 @@ func (crypt *PdfCrypt) Alg4(upass []byte) (PdfObjectString, []byte, error) {
encrypted := make([]byte, len(s))
ciph.XORKeyStream(encrypted, s)
U = PdfObjectString(encrypted)
U = string(encrypted)
return U, ekey, nil
}
// Alg5 computes the encryption dictionarys U (user password) value (Security handlers of revision 3 or greater).
// TODO (v3): Unexport.
func (crypt *PdfCrypt) Alg5(upass []byte) (PdfObjectString, []byte, error) {
U := PdfObjectString("")
func (crypt *PdfCrypt) Alg5(upass []byte) (string, []byte, error) {
U := ""
ekey := crypt.Alg2(upass)
ekey := crypt.alg2(upass)
h := md5.New()
h.Write([]byte(padding))
@ -1209,18 +1206,17 @@ func (crypt *PdfCrypt) Alg5(upass []byte) (PdfObjectString, []byte, error) {
return U, ekey, errors.New("Failed to gen rand number")
}
U = PdfObjectString(bb)
U = string(bb)
return U, ekey, nil
}
// Alg6 authenticates the user password.
// TODO (v3): Unexport.
func (crypt *PdfCrypt) Alg6(upass []byte) (bool, error) {
var uo PdfObjectString
// alg6 authenticates the user password.
func (crypt *PdfCrypt) alg6(upass []byte) (bool, error) {
var uo string
var err error
var key []byte
if crypt.R == 2 {
uo, key, err = crypt.Alg4(upass)
uo, key, err = crypt.alg4(upass)
} else if crypt.R >= 3 {
uo, key, err = crypt.Alg5(upass)
} else {
@ -1254,9 +1250,8 @@ func (crypt *PdfCrypt) Alg6(upass []byte) (bool, error) {
return false, nil
}
// Alg7 authenticates the owner password.
// TODO (v3): Unexport.
func (crypt *PdfCrypt) Alg7(opass []byte) (bool, error) {
// alg7 authenticates the owner password.
func (crypt *PdfCrypt) alg7(opass []byte) (bool, error) {
encKey := crypt.alg3Key(opass)
decrypted := make([]byte, len(crypt.O))
@ -1285,7 +1280,7 @@ func (crypt *PdfCrypt) Alg7(opass []byte) (bool, error) {
return false, errors.New("invalid R")
}
auth, err := crypt.Alg6(decrypted)
auth, err := crypt.alg6(decrypted)
if err != nil {
return false, nil
}

View File

@ -63,7 +63,7 @@ func TestAlg2(t *testing.T) {
crypter.Length = 128
crypter.EncryptMetadata = true
key := crypter.Alg2([]byte(""))
key := crypter.alg2([]byte(""))
keyExp := []byte{0xf8, 0x94, 0x9c, 0x5a, 0xf5, 0xa0, 0xc0, 0xca,
0x30, 0xb8, 0x91, 0xc1, 0xbb, 0x2c, 0x4f, 0xf5}

View File

@ -297,7 +297,7 @@ func (parser *PdfParser) parseNumber() (PdfObject, error) {
}
// A string starts with '(' and ends with ')'.
func (parser *PdfParser) parseString() (PdfObjectString, error) {
func (parser *PdfParser) parseString() (*PdfObjectString, error) {
parser.reader.ReadByte()
var r bytes.Buffer
@ -305,21 +305,21 @@ func (parser *PdfParser) parseString() (PdfObjectString, error) {
for {
bb, err := parser.reader.Peek(1)
if err != nil {
return PdfObjectString(r.String()), err
return MakeString(r.String()), err
}
if bb[0] == '\\' { // Escape sequence.
parser.reader.ReadByte() // Skip the escape \ byte.
b, err := parser.reader.ReadByte()
if err != nil {
return PdfObjectString(r.String()), err
return MakeString(r.String()), err
}
// Octal '\ddd' number (base 8).
if IsOctalDigit(b) {
bb, err := parser.reader.Peek(2)
if err != nil {
return PdfObjectString(r.String()), err
return MakeString(r.String()), err
}
numeric := []byte{}
@ -336,7 +336,7 @@ func (parser *PdfParser) parseString() (PdfObjectString, error) {
common.Log.Trace("Numeric string \"%s\"", numeric)
code, err := strconv.ParseUint(string(numeric), 8, 32)
if err != nil {
return PdfObjectString(r.String()), err
return MakeString(r.String()), err
}
r.WriteByte(byte(code))
continue
@ -376,19 +376,19 @@ func (parser *PdfParser) parseString() (PdfObjectString, error) {
r.WriteByte(b)
}
return PdfObjectString(r.String()), nil
return MakeString(r.String()), nil
}
// Starts with '<' ends with '>'.
// Currently not converting the hex codes to characters.
func (parser *PdfParser) parseHexString() (PdfObjectString, error) {
func (parser *PdfParser) parseHexString() (*PdfObjectString, error) {
parser.reader.ReadByte()
var r bytes.Buffer
for {
bb, err := parser.reader.Peek(1)
if err != nil {
return PdfObjectString(""), err
return MakeString(""), err
}
if bb[0] == '>' {
@ -407,7 +407,7 @@ func (parser *PdfParser) parseHexString() (PdfObjectString, error) {
}
buf, _ := hex.DecodeString(r.String())
return PdfObjectString(buf), nil
return MakeHexString(string(buf)), nil
}
// Starts with '[' ends with ']'. Can contain any kinds of direct objects.
@ -506,7 +506,7 @@ func (parser *PdfParser) parseObject() (PdfObject, error) {
} else if bb[0] == '(' {
common.Log.Trace("->String!")
str, err := parser.parseString()
return &str, err
return str, err
} else if bb[0] == '[' {
common.Log.Trace("->Array!")
arr, err := parser.parseArray()
@ -518,7 +518,7 @@ func (parser *PdfParser) parseObject() (PdfObject, error) {
} else if bb[0] == '<' {
common.Log.Trace("->Hex string!")
str, err := parser.parseHexString()
return &str, err
return str, err
} else if bb[0] == '%' {
parser.readComment()
parser.skipSpaces()

View File

@ -132,7 +132,7 @@ func TestStringParsing(t *testing.T) {
if err != nil && err != io.EOF {
t.Errorf("Unable to parse string, error: %s", err)
}
if string(o) != expected {
if o.Str() != expected {
t.Errorf("String Mismatch %s: \"%s\" != \"%s\"", raw, o, expected)
}
}
@ -164,8 +164,8 @@ func TestBinStringParsing(t *testing.T) {
if err != nil && err != io.EOF {
t.Errorf("Unable to parse string, error: %s", err)
}
if len(string(o)) != 32 {
t.Errorf("Wrong length, should be 32 (got %d)", len(string(o)))
if len(o.Str()) != 32 {
t.Errorf("Wrong length, should be 32 (got %d)", len(o.Str()))
}
}
@ -347,7 +347,7 @@ func BenchmarkHexStringParsing(b *testing.B) {
b.Errorf("Error parsing hex string: %s", err.Error())
return
}
if string(hs) != ref.String() {
if hs.Str() != ref.String() {
b.Errorf("Reference and parsed hex strings mismatch")
}
parser.SetFileOffset(0)
@ -426,7 +426,7 @@ func TestDictParsing2(t *testing.T) {
}
str, ok := dict.Get("StringItem").(*PdfObjectString)
if !ok || *str != "a string" {
if !ok || str.Str() != "a string" {
t.Errorf("Invalid string item")
}

View File

@ -7,6 +7,7 @@ package core
import (
"bytes"
"encoding/hex"
"fmt"
"github.com/unidoc/unidoc/common"
@ -31,8 +32,10 @@ type PdfObjectInteger int64
type PdfObjectFloat float64
// PdfObjectString represents the primitive PDF string object.
// TODO (v3): Change to a struct and add a flag for hex/plaintext.
type PdfObjectString string
type PdfObjectString struct {
val string
isHex bool
}
// PdfObjectName represents the primitive PDF name object.
type PdfObjectName string
@ -135,7 +138,13 @@ func MakeFloat(val float64) *PdfObjectFloat {
// MakeString creates an PdfObjectString from a string.
func MakeString(s string) *PdfObjectString {
str := PdfObjectString(s)
str := PdfObjectString{val: s}
return &str
}
// MakeHexString creates an PdfObjectString from a string intended for output as a hexadecimal string.
func MakeHexString(s string) *PdfObjectString {
str := PdfObjectString{val: s, isHex: true}
return &str
}
@ -208,14 +217,38 @@ func (float *PdfObjectFloat) DefaultWriteString() string {
return fmt.Sprintf("%f", *float)
}
// String returns a string representation of the *PdfObjectString.
func (str *PdfObjectString) String() string {
return fmt.Sprintf("%s", string(*str))
return str.val
}
// Str returns the string value of the PdfObjectString. Defined in addition to String() function to clarify that
// this function returns the underlying string directly, whereas the String function technically could include
// debug info.
func (str *PdfObjectString) Str() string {
return str.val
}
// Bytes returns the PdfObjectString content as a []byte array.
func (str *PdfObjectString) Bytes() []byte {
return []byte(str.val)
}
// DefaultWriteString outputs the object as it is to be written to file.
func (str *PdfObjectString) DefaultWriteString() string {
var output bytes.Buffer
// Handle hex representation.
if str.isHex {
shex := hex.EncodeToString(str.Bytes())
output.WriteString("<")
output.WriteString(shex)
output.WriteString(">")
return output.String()
}
// Otherwise regular string.
escapeSequences := map[byte]string{
'\n': "\\n",
'\r': "\\r",
@ -228,8 +261,8 @@ func (str *PdfObjectString) DefaultWriteString() string {
}
output.WriteString("(")
for i := 0; i < len(*str); i++ {
char := (*str)[i]
for i := 0; i < len(str.val); i++ {
char := str.val[i]
if escStr, useEsc := escapeSequences[char]; useEsc {
output.WriteString(escStr)
} else {
@ -242,7 +275,7 @@ func (str *PdfObjectString) DefaultWriteString() string {
}
func (name *PdfObjectName) String() string {
return fmt.Sprintf("%s", string(*name))
return string(*name)
}
// DefaultWriteString outputs the object as it is to be written to file.

View File

@ -0,0 +1,80 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package core
import (
"testing"
)
func TestHexStringWriteBasic(t *testing.T) {
testcases := map[string]string{
" ": "<20>",
}
for src, expected := range testcases {
strObj := MakeHexString(src)
ws := strObj.DefaultWriteString()
if ws != expected {
t.Fatalf("%s: '%s' != '%s'\n", src, ws, expected)
}
}
}
// Test writing and parsing back of hexadecimal and regular strings.
func TestHexStringMulti(t *testing.T) {
testcases := []string{
"This is a string",
"Strings may contain\n newlines and such",
string([]byte{0x50, 0x01, 0x00, 0x90, 0xff, 0x49, 0xdf, 0x20, 0x32}),
"",
}
for _, testcase := range testcases {
// Make *PdfObject representations for regular and hexadecimal strings.
s := MakeString(testcase)
shex := MakeHexString(testcase)
// Write out.
writestr := s.DefaultWriteString()
writestrhex := shex.DefaultWriteString()
// Parse back.
parser1 := makeParserForText(writestr)
parser2 := makeParserForText(writestrhex)
// Check that representation is correct.
obj1, err := parser1.parseObject()
if err != nil {
t.Fatalf("Error: %v", err)
}
strObj1, ok := obj1.(*PdfObjectString)
if !ok {
t.Fatalf("Type incorrect")
}
if strObj1.isHex != false {
t.Fatalf("Should not be hex")
}
if strObj1.Str() != testcase {
t.Fatalf("String mismatch")
}
obj2, err := parser2.parseObject()
if err != nil {
t.Fatalf("Error: %v", err)
}
strObj2, ok := obj2.(*PdfObjectString)
if !ok {
t.Fatalf("Type incorrect")
}
if strObj2.isHex != true {
t.Fatalf("Should be hex")
}
if strObj2.Str() != testcase {
t.Fatalf("String mismatch")
}
}
}

View File

@ -29,7 +29,8 @@ func checkBounds(sliceLen, a, b int) error {
return nil
}
// Inspect analyzes the document object structure.
// Inspect analyzes the document object structure. Returns a map of object types (by name) with the instance count
// as value.
func (parser *PdfParser) Inspect() (map[string]int, error) {
return parser.inspect()
}

View File

@ -185,9 +185,9 @@ func (e *Extractor) ExtractText() (string, error) {
switch v := obj.(type) {
case *core.PdfObjectString:
if codemap != nil {
buf.WriteString(codemap.CharcodeBytesToUnicode([]byte(*v)))
buf.WriteString(codemap.CharcodeBytesToUnicode(v.Bytes()))
} else {
buf.WriteString(string(*v))
buf.WriteString(v.Str())
}
case *core.PdfObjectFloat:
if *v < -100 {
@ -212,9 +212,9 @@ func (e *Extractor) ExtractText() (string, error) {
return fmt.Errorf("Invalid parameter type, not string (%T)", op.Params[0])
}
if codemap != nil {
buf.WriteString(codemap.CharcodeBytesToUnicode([]byte(*param)))
buf.WriteString(codemap.CharcodeBytesToUnicode(param.Bytes()))
} else {
buf.WriteString(string(*param))
buf.WriteString(param.Str())
}
}

View File

@ -14,7 +14,7 @@ import (
. "github.com/unidoc/unidoc/pdf/core"
)
//
// PdfColorspace interface defines the common properties of a PDF colorspace.
// The colorspace defines the data storage format for each color and color representation.
//
// Device based colorspace, specified by name
@ -50,6 +50,7 @@ type PdfColorspace interface {
DecodeArray() []float64
}
// PdfColor interface represents a generic color in PDF.
type PdfColor interface {
}
@ -2196,7 +2197,7 @@ func newPdfColorspaceSpecialIndexedFromPdfObject(obj PdfObject) (*PdfColorspaceS
obj = TraceToDirectObject(obj)
var data []byte
if str, ok := obj.(*PdfObjectString); ok {
data = []byte(*str)
data = str.Bytes()
common.Log.Trace("Indexed string color data: % d", data)
} else if stream, ok := obj.(*PdfObjectStream); ok {
common.Log.Trace("Indexed stream: %s", obj.String())

View File

@ -21,7 +21,7 @@ import (
. "github.com/unidoc/unidoc/pdf/core"
)
// PDF page object (7.7.3.3 - Table 30).
// PdfPage represents a page in a PDF document. (7.7.3.3 - Table 30).
type PdfPage struct {
Parent PdfObject
LastModified *PdfDate
@ -115,7 +115,7 @@ func (reader *PdfReader) newPdfPageFromDict(p *PdfObjectDictionary) (*PdfPage, e
if !ok {
return nil, errors.New("Page dictionary LastModified != string")
}
lastmod, err := NewPdfDate(string(*strObj))
lastmod, err := NewPdfDate(strObj.Str())
if err != nil {
return nil, err
}
@ -801,7 +801,7 @@ func (this *PdfPage) SetContentStreams(cStreams []string, encoder StreamEncoder)
func getContentStreamAsString(cstreamObj PdfObject) (string, error) {
if cstream, ok := TraceToDirectObject(cstreamObj).(*PdfObjectString); ok {
return string(*cstream), nil
return cstream.Str(), nil
}
if cstream, ok := TraceToDirectObject(cstreamObj).(*PdfObjectStream); ok {

View File

@ -196,7 +196,7 @@ func TestPdfDateBuild(t *testing.T) {
t.Errorf("Date PDF object should be a string")
return
}
if string(*strObj) != dateStr1 {
if strObj.Str() != dateStr1 {
t.Errorf("Built date string does not match original (%s)", strObj)
return
}

View File

@ -425,7 +425,7 @@ func (this *PdfReader) GetOutlinesFlattened() ([]*PdfOutlineTreeNode, []string,
if item, isItem := node.context.(*PdfOutlineItem); isItem {
*outlineList = append(*outlineList, &item.PdfOutlineTreeNode)
title := strings.Repeat(" ", depth*2) + string(*item.Title)
title := strings.Repeat(" ", depth*2) + item.Title.Str()
*titleList = append(*titleList, title)
if item.Next != nil {
flattenFunc(item.Next, outlineList, titleList, depth)

View File

@ -129,6 +129,5 @@ func (date *PdfDate) ToPdfObject() PdfObject {
str := fmt.Sprintf("D:%.4d%.2d%.2d%.2d%.2d%.2d%c%.2d'%.2d'",
date.year, date.month, date.day, date.hour, date.minute, date.second,
date.utOffsetSign, date.utOffsetHours, date.utOffsetMins)
pdfStr := PdfObjectString(str)
return &pdfStr
return MakeString(str)
}

View File

@ -16,13 +16,13 @@ import (
"fmt"
"io"
"os"
"strings"
"time"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/common/license"
. "github.com/unidoc/unidoc/pdf/core"
"github.com/unidoc/unidoc/pdf/model/fonts"
"strings"
)
var pdfCreator = ""
@ -342,8 +342,6 @@ func (this *PdfWriter) AddPage(page *PdfPage) error {
this.addObject(pageObj)
// Traverse the page and record all object references.
err := this.addObjects(pDict)
if err != nil {
@ -500,14 +498,14 @@ func (this *PdfWriter) Encrypt(userPass, ownerPass []byte, options *EncryptOptio
// Prepare the ID object for the trailer.
hashcode := md5.Sum([]byte(time.Now().Format(time.RFC850)))
id0 := PdfObjectString(hashcode[:])
id0 := string(hashcode[:])
b := make([]byte, 100)
rand.Read(b)
hashcode = md5.Sum(b)
id1 := PdfObjectString(hashcode[:])
id1 := string(hashcode[:])
common.Log.Trace("Random b: % x", b)
this.ids = &PdfObjectArray{&id0, &id1}
this.ids = MakeArray(MakeHexString(id0), MakeHexString(id1))
common.Log.Trace("Gen Id 0: % x", id0)
crypter.Id0 = string(id0)
@ -536,8 +534,8 @@ func (this *PdfWriter) Encrypt(userPass, ownerPass []byte, options *EncryptOptio
encDict.Set("V", MakeInteger(int64(crypter.V)))
encDict.Set("R", MakeInteger(int64(crypter.R)))
encDict.Set("Length", MakeInteger(int64(crypter.Length)))
encDict.Set("O", &O)
encDict.Set("U", &U)
encDict.Set("O", MakeHexString(O))
encDict.Set("U", MakeHexString(U))
this.encryptDict = encDict
// Make an object to contain it.