diff --git a/pdf/contentstream/contentstream.go b/pdf/contentstream/contentstream.go index da382708..d8f86d73 100644 --- a/pdf/contentstream/contentstream.go +++ b/pdf/contentstream/contentstream.go @@ -171,7 +171,7 @@ func (this *ContentStreamParser) ExtractText() (string, error) { for _, obj := range *paramList { switch v := obj.(type) { case *PdfObjectString: - txt += string(*v) + txt += v.Str() case *PdfObjectFloat: if *v < -100 { txt += " " @@ -190,7 +190,7 @@ func (this *ContentStreamParser) ExtractText() (string, error) { if !ok { return "", fmt.Errorf("Invalid parameter type, not string (%T)", op.Params[0]) } - txt += string(*param) + txt += param.Str() } } diff --git a/pdf/contentstream/inline-image.go b/pdf/contentstream/inline-image.go index 70f6c883..1d3fa0bd 100644 --- a/pdf/contentstream/inline-image.go +++ b/pdf/contentstream/inline-image.go @@ -358,11 +358,11 @@ func (this *ContentStreamParser) ParseInlineImage() (*ContentStreamInlineImage, return nil, fmt.Errorf("Failed to read inline image - invalid operand") } - if *operand == "EI" { + if operand.Str() == "EI" { // Image fully defined common.Log.Trace("Inline image finished...") return &im, nil - } else if *operand == "ID" { + } else if operand.Str() == "ID" { // Inline image data. // Should get a single space (0x20) followed by the data and then EI. common.Log.Trace("ID start") diff --git a/pdf/contentstream/parser.go b/pdf/contentstream/parser.go index df6847a0..04a5c05b 100644 --- a/pdf/contentstream/parser.go +++ b/pdf/contentstream/parser.go @@ -18,12 +18,12 @@ import ( . "github.com/unidoc/unidoc/pdf/core" ) -// Content stream parser. +// ContentStreamParser represents a content stream parser for parsing content streams in PDFs. type ContentStreamParser struct { reader *bufio.Reader } -// Create a new instance of the content stream parser from an input content +// NewContentStreamParser creates a new instance of the content stream parser from an input content // stream string. func NewContentStreamParser(contentStr string) *ContentStreamParser { // Each command has parameters and an operand (command). @@ -35,7 +35,7 @@ func NewContentStreamParser(contentStr string) *ContentStreamParser { return &parser } -// Parses all commands in content stream, returning a list of operation data. +// Parse parses all commands in content stream, returning a list of operation data. func (this *ContentStreamParser) Parse() (*ContentStreamOperations, error) { operations := ContentStreamOperations{} @@ -52,7 +52,7 @@ func (this *ContentStreamParser) Parse() (*ContentStreamOperations, error) { return &operations, err } if isOperand { - operation.Operand = string(*obj.(*PdfObjectString)) + operation.Operand, _ = GetStringVal(obj) operations = append(operations, &operation) break } else { @@ -252,7 +252,7 @@ func (this *ContentStreamParser) parseNumber() (PdfObject, error) { } // A string starts with '(' and ends with ')'. -func (this *ContentStreamParser) parseString() (PdfObjectString, error) { +func (this *ContentStreamParser) parseString() (*PdfObjectString, error) { this.reader.ReadByte() bytes := []byte{} @@ -260,21 +260,21 @@ func (this *ContentStreamParser) parseString() (PdfObjectString, error) { for { bb, err := this.reader.Peek(1) if err != nil { - return PdfObjectString(bytes), err + return MakeString(string(bytes)), err } if bb[0] == '\\' { // Escape sequence. this.reader.ReadByte() // Skip the escape \ byte. b, err := this.reader.ReadByte() if err != nil { - return PdfObjectString(bytes), err + return MakeString(string(bytes)), err } // Octal '\ddd' number (base 8). if IsOctalDigit(b) { bb, err := this.reader.Peek(2) if err != nil { - return PdfObjectString(bytes), err + return MakeString(string(bytes)), err } numeric := []byte{} @@ -291,7 +291,7 @@ func (this *ContentStreamParser) parseString() (PdfObjectString, error) { common.Log.Trace("Numeric string \"%s\"", numeric) code, err := strconv.ParseUint(string(numeric), 8, 32) if err != nil { - return PdfObjectString(bytes), err + return MakeString(string(bytes)), err } bytes = append(bytes, byte(code)) continue @@ -331,11 +331,11 @@ func (this *ContentStreamParser) parseString() (PdfObjectString, error) { bytes = append(bytes, b) } - return PdfObjectString(bytes), nil + return MakeString(string(bytes)), nil } // Starts with '<' ends with '>'. -func (this *ContentStreamParser) parseHexString() (PdfObjectString, error) { +func (this *ContentStreamParser) parseHexString() (*PdfObjectString, error) { this.reader.ReadByte() hextable := []byte("0123456789abcdefABCDEF") @@ -346,7 +346,7 @@ func (this *ContentStreamParser) parseHexString() (PdfObjectString, error) { bb, err := this.reader.Peek(1) if err != nil { - return PdfObjectString(""), err + return MakeString(""), err } if bb[0] == '>' { @@ -365,7 +365,7 @@ func (this *ContentStreamParser) parseHexString() (PdfObjectString, error) { } buf, _ := hex.DecodeString(string(tmp)) - return PdfObjectString(buf), nil + return MakeHexString(string(buf)), nil } // Starts with '[' ends with ']'. Can contain any kinds of direct objects. @@ -494,12 +494,12 @@ func (this *ContentStreamParser) parseDict() (*PdfObjectDictionary, error) { } // An operand is a text command represented by a word. -func (this *ContentStreamParser) parseOperand() (PdfObjectString, error) { +func (this *ContentStreamParser) parseOperand() (*PdfObjectString, error) { bytes := []byte{} for { bb, err := this.reader.Peek(1) if err != nil { - return PdfObjectString(bytes), err + return MakeString(string(bytes)), err } if IsDelimiter(bb[0]) { break @@ -512,13 +512,13 @@ func (this *ContentStreamParser) parseOperand() (PdfObjectString, error) { bytes = append(bytes, b) } - return PdfObjectString(bytes), nil + return MakeString(string(bytes)), nil } // Parse a generic object. Returns the object, an error code, and a bool // value indicating whether the object is an operand. An operand // is contained in a pdf string object. -func (this *ContentStreamParser) parseObject() (PdfObject, error, bool) { +func (this *ContentStreamParser) parseObject() (obj PdfObject, err error, isop bool) { // Determine the kind of object. // parse it! // make a list of operands, then once operand arrives put into a package. @@ -542,11 +542,11 @@ func (this *ContentStreamParser) parseObject() (PdfObject, error, bool) { } else if bb[0] == '(' { common.Log.Trace("->String!") str, err := this.parseString() - return &str, err, false + return str, err, false } else if bb[0] == '<' && bb[1] != '<' { common.Log.Trace("->Hex String!") str, err := this.parseHexString() - return &str, err, false + return str, err, false } else if bb[0] == '[' { common.Log.Trace("->Array!") arr, err := this.parseArray() @@ -577,7 +577,7 @@ func (this *ContentStreamParser) parseObject() (PdfObject, error, bool) { } operand, err := this.parseOperand() - return &operand, err, true + return operand, err, true } } } diff --git a/pdf/contentstream/utils.go b/pdf/contentstream/utils.go index a6c0e7e6..5672d67f 100644 --- a/pdf/contentstream/utils.go +++ b/pdf/contentstream/utils.go @@ -32,7 +32,7 @@ func makeParamsFromNames(vals []core.PdfObjectName) []core.PdfObject { func makeParamsFromStrings(vals []core.PdfObjectString) []core.PdfObject { params := []core.PdfObject{} for _, val := range vals { - params = append(params, core.MakeString(string(val))) + params = append(params, core.MakeString(val.Str())) } return params } diff --git a/pdf/core/crypt.go b/pdf/core/crypt.go index 16686512..e86c9b21 100644 --- a/pdf/core/crypt.go +++ b/pdf/core/crypt.go @@ -216,7 +216,7 @@ func PdfCryptMakeNew(parser *PdfParser, ed, trailer *PdfObjectDictionary) (PdfCr subfilter, ok := ed.Get("SubFilter").(*PdfObjectString) if ok { - crypter.Subfilter = string(*subfilter) + crypter.Subfilter = subfilter.Str() common.Log.Debug("Using subfilter %s", subfilter) } @@ -263,22 +263,22 @@ func PdfCryptMakeNew(parser *PdfParser, ed, trailer *PdfObjectDictionary) (PdfCr if !ok { return crypter, errors.New("Encrypt dictionary missing O") } - if len(*O) != 32 { - return crypter, fmt.Errorf("Length(O) != 32 (%d)", len(*O)) + if len(O.Str()) != 32 { + return crypter, fmt.Errorf("Length(O) != 32 (%d)", len(O.Str())) } - crypter.O = []byte(*O) + crypter.O = O.Bytes() U, ok := ed.Get("U").(*PdfObjectString) if !ok { return crypter, errors.New("Encrypt dictionary missing U") } - if len(*U) != 32 { + if len(U.Str()) != 32 { // Strictly this does not cause an error. // If O is OK and others then can still read the file. - common.Log.Debug("Warning: Length(U) != 32 (%d)", len(*U)) + common.Log.Debug("Warning: Length(U) != 32 (%d)", len(U.Str())) //return crypter, errors.New("Length(U) != 32") } - crypter.U = []byte(*U) + crypter.U = U.Bytes() P, ok := ed.Get("P").(*PdfObjectInteger) if !ok { @@ -296,17 +296,17 @@ func PdfCryptMakeNew(parser *PdfParser, ed, trailer *PdfObjectDictionary) (PdfCr // Default: empty ID. // Strictly, if file is encrypted, the ID should always be specified // but clearly not everyone is following the specification. - id0 := PdfObjectString("") + id0 := "" if idArray, ok := trailer.Get("ID").(*PdfObjectArray); ok && len(*idArray) >= 1 { id0obj, ok := (*idArray)[0].(*PdfObjectString) if !ok { return crypter, errors.New("Invalid trailer ID") } - id0 = *id0obj + id0 = id0obj.Str() } else { common.Log.Debug("Trailer ID array missing or invalid!") } - crypter.Id0 = string(id0) + crypter.Id0 = id0 return crypter, nil } @@ -382,7 +382,7 @@ func (crypt *PdfCrypt) authenticate(password []byte) (bool, error) { // Try user password. common.Log.Trace("Debugging authentication - user pass") - authenticated, err := crypt.Alg6(password) + authenticated, err := crypt.alg6(password) if err != nil { return false, err } @@ -396,7 +396,7 @@ func (crypt *PdfCrypt) authenticate(password []byte) (bool, error) { // May not be necessary if only want to get all contents. // (user pass needs to be known or empty). common.Log.Trace("Debugging authentication - owner pass") - authenticated, err = crypt.Alg7(password) + authenticated, err = crypt.alg7(password) if err != nil { return false, err } @@ -419,7 +419,7 @@ func (crypt *PdfCrypt) checkAccessRights(password []byte) (bool, AccessPermissio perms := AccessPermissions{} // Try owner password -> full rights. - isOwner, err := crypt.Alg7(password) + isOwner, err := crypt.alg7(password) if err != nil { return false, perms, err } @@ -437,7 +437,7 @@ func (crypt *PdfCrypt) checkAccessRights(password []byte) (bool, AccessPermissio } // Try user password. - isUser, err := crypt.Alg6(password) + isUser, err := crypt.alg6(password) if err != nil { return false, perms, err } @@ -721,17 +721,17 @@ func (crypt *PdfCrypt) Decrypt(obj PdfObject, parentObjNum, parentGenNum int64) } // Overwrite the encrypted with decrypted string. - decrypted := make([]byte, len(*s)) - for i := 0; i < len(*s); i++ { - decrypted[i] = (*s)[i] + decrypted := make([]byte, len(s.Str())) + str := s.Str() + for i := 0; i < len(str); i++ { + decrypted[i] = str[i] } common.Log.Trace("Decrypt string: %s : % x", decrypted, decrypted) decrypted, err = crypt.decryptBytes(decrypted, stringFilter, key) if err != nil { return err } - *s = PdfObjectString(decrypted) - + s.val = string(decrypted) return nil } @@ -968,16 +968,17 @@ func (crypt *PdfCrypt) Encrypt(obj PdfObject, parentObjNum, parentGenNum int64) return err } - encrypted := make([]byte, len(*s)) - for i := 0; i < len(*s); i++ { - encrypted[i] = (*s)[i] + str := s.Str() + encrypted := make([]byte, len(str)) + for i := 0; i < len(str); i++ { + encrypted[i] = str[i] } common.Log.Trace("Encrypt string: %s : % x", encrypted, encrypted) encrypted, err = crypt.encryptBytes(encrypted, stringFilter, key) if err != nil { return err } - *s = PdfObjectString(encrypted) + s.val = string(encrypted) return nil } @@ -1022,10 +1023,9 @@ func (crypt *PdfCrypt) Encrypt(obj PdfObject, parentObjNum, parentGenNum int64) return nil } -// Alg2 computes an encryption key. -// TODO (v3): Unexport. -func (crypt *PdfCrypt) Alg2(pass []byte) []byte { - common.Log.Trace("Alg2") +// alg2 computes an encryption key. +func (crypt *PdfCrypt) alg2(pass []byte) []byte { + common.Log.Trace("alg2") key := crypt.paddedPass(pass) h := md5.New() @@ -1091,10 +1091,9 @@ func (crypt *PdfCrypt) alg3Key(pass []byte) []byte { } // Alg3 computes the encryption dictionary’s O (owner password) value. -// TODO (v3): Unexport. -func (crypt *PdfCrypt) Alg3(upass, opass []byte) (PdfObjectString, error) { +func (crypt *PdfCrypt) Alg3(upass, opass []byte) (string, error) { // Return O string val. - O := PdfObjectString("") + O := "" var encKey []byte if len(opass) > 0 { @@ -1126,16 +1125,15 @@ func (crypt *PdfCrypt) Alg3(upass, opass []byte) (PdfObjectString, error) { } } - O = PdfObjectString(encrypted) + O = string(encrypted) return O, nil } -// Alg4 computes the encryption dictionary’s U (user password) value (Security handlers of revision 2). -// TODO (v3): Unexport. -func (crypt *PdfCrypt) Alg4(upass []byte) (PdfObjectString, []byte, error) { - U := PdfObjectString("") +// alg4 computes the encryption dictionary’s U (user password) value (Security handlers of revision 2). +func (crypt *PdfCrypt) alg4(upass []byte) (string, []byte, error) { + U := "" - ekey := crypt.Alg2(upass) + ekey := crypt.alg2(upass) ciph, err := rc4.NewCipher(ekey) if err != nil { return U, ekey, errors.New("Failed rc4 ciph") @@ -1145,16 +1143,15 @@ func (crypt *PdfCrypt) Alg4(upass []byte) (PdfObjectString, []byte, error) { encrypted := make([]byte, len(s)) ciph.XORKeyStream(encrypted, s) - U = PdfObjectString(encrypted) + U = string(encrypted) return U, ekey, nil } // Alg5 computes the encryption dictionary’s U (user password) value (Security handlers of revision 3 or greater). -// TODO (v3): Unexport. -func (crypt *PdfCrypt) Alg5(upass []byte) (PdfObjectString, []byte, error) { - U := PdfObjectString("") +func (crypt *PdfCrypt) Alg5(upass []byte) (string, []byte, error) { + U := "" - ekey := crypt.Alg2(upass) + ekey := crypt.alg2(upass) h := md5.New() h.Write([]byte(padding)) @@ -1209,18 +1206,17 @@ func (crypt *PdfCrypt) Alg5(upass []byte) (PdfObjectString, []byte, error) { return U, ekey, errors.New("Failed to gen rand number") } - U = PdfObjectString(bb) + U = string(bb) return U, ekey, nil } -// Alg6 authenticates the user password. -// TODO (v3): Unexport. -func (crypt *PdfCrypt) Alg6(upass []byte) (bool, error) { - var uo PdfObjectString +// alg6 authenticates the user password. +func (crypt *PdfCrypt) alg6(upass []byte) (bool, error) { + var uo string var err error var key []byte if crypt.R == 2 { - uo, key, err = crypt.Alg4(upass) + uo, key, err = crypt.alg4(upass) } else if crypt.R >= 3 { uo, key, err = crypt.Alg5(upass) } else { @@ -1254,9 +1250,8 @@ func (crypt *PdfCrypt) Alg6(upass []byte) (bool, error) { return false, nil } -// Alg7 authenticates the owner password. -// TODO (v3): Unexport. -func (crypt *PdfCrypt) Alg7(opass []byte) (bool, error) { +// alg7 authenticates the owner password. +func (crypt *PdfCrypt) alg7(opass []byte) (bool, error) { encKey := crypt.alg3Key(opass) decrypted := make([]byte, len(crypt.O)) @@ -1285,7 +1280,7 @@ func (crypt *PdfCrypt) Alg7(opass []byte) (bool, error) { return false, errors.New("invalid R") } - auth, err := crypt.Alg6(decrypted) + auth, err := crypt.alg6(decrypted) if err != nil { return false, nil } diff --git a/pdf/core/crypt_test.go b/pdf/core/crypt_test.go index b85615e1..0f47b875 100644 --- a/pdf/core/crypt_test.go +++ b/pdf/core/crypt_test.go @@ -63,7 +63,7 @@ func TestAlg2(t *testing.T) { crypter.Length = 128 crypter.EncryptMetadata = true - key := crypter.Alg2([]byte("")) + key := crypter.alg2([]byte("")) keyExp := []byte{0xf8, 0x94, 0x9c, 0x5a, 0xf5, 0xa0, 0xc0, 0xca, 0x30, 0xb8, 0x91, 0xc1, 0xbb, 0x2c, 0x4f, 0xf5} diff --git a/pdf/core/parser.go b/pdf/core/parser.go index 6aac638f..bbc8ffb0 100644 --- a/pdf/core/parser.go +++ b/pdf/core/parser.go @@ -297,7 +297,7 @@ func (parser *PdfParser) parseNumber() (PdfObject, error) { } // A string starts with '(' and ends with ')'. -func (parser *PdfParser) parseString() (PdfObjectString, error) { +func (parser *PdfParser) parseString() (*PdfObjectString, error) { parser.reader.ReadByte() var r bytes.Buffer @@ -305,21 +305,21 @@ func (parser *PdfParser) parseString() (PdfObjectString, error) { for { bb, err := parser.reader.Peek(1) if err != nil { - return PdfObjectString(r.String()), err + return MakeString(r.String()), err } if bb[0] == '\\' { // Escape sequence. parser.reader.ReadByte() // Skip the escape \ byte. b, err := parser.reader.ReadByte() if err != nil { - return PdfObjectString(r.String()), err + return MakeString(r.String()), err } // Octal '\ddd' number (base 8). if IsOctalDigit(b) { bb, err := parser.reader.Peek(2) if err != nil { - return PdfObjectString(r.String()), err + return MakeString(r.String()), err } numeric := []byte{} @@ -336,7 +336,7 @@ func (parser *PdfParser) parseString() (PdfObjectString, error) { common.Log.Trace("Numeric string \"%s\"", numeric) code, err := strconv.ParseUint(string(numeric), 8, 32) if err != nil { - return PdfObjectString(r.String()), err + return MakeString(r.String()), err } r.WriteByte(byte(code)) continue @@ -376,19 +376,19 @@ func (parser *PdfParser) parseString() (PdfObjectString, error) { r.WriteByte(b) } - return PdfObjectString(r.String()), nil + return MakeString(r.String()), nil } // Starts with '<' ends with '>'. // Currently not converting the hex codes to characters. -func (parser *PdfParser) parseHexString() (PdfObjectString, error) { +func (parser *PdfParser) parseHexString() (*PdfObjectString, error) { parser.reader.ReadByte() var r bytes.Buffer for { bb, err := parser.reader.Peek(1) if err != nil { - return PdfObjectString(""), err + return MakeString(""), err } if bb[0] == '>' { @@ -407,7 +407,7 @@ func (parser *PdfParser) parseHexString() (PdfObjectString, error) { } buf, _ := hex.DecodeString(r.String()) - return PdfObjectString(buf), nil + return MakeHexString(string(buf)), nil } // Starts with '[' ends with ']'. Can contain any kinds of direct objects. @@ -506,7 +506,7 @@ func (parser *PdfParser) parseObject() (PdfObject, error) { } else if bb[0] == '(' { common.Log.Trace("->String!") str, err := parser.parseString() - return &str, err + return str, err } else if bb[0] == '[' { common.Log.Trace("->Array!") arr, err := parser.parseArray() @@ -518,7 +518,7 @@ func (parser *PdfParser) parseObject() (PdfObject, error) { } else if bb[0] == '<' { common.Log.Trace("->Hex string!") str, err := parser.parseHexString() - return &str, err + return str, err } else if bb[0] == '%' { parser.readComment() parser.skipSpaces() diff --git a/pdf/core/parser_test.go b/pdf/core/parser_test.go index 011c49fd..a4a51faa 100644 --- a/pdf/core/parser_test.go +++ b/pdf/core/parser_test.go @@ -132,7 +132,7 @@ func TestStringParsing(t *testing.T) { if err != nil && err != io.EOF { t.Errorf("Unable to parse string, error: %s", err) } - if string(o) != expected { + if o.Str() != expected { t.Errorf("String Mismatch %s: \"%s\" != \"%s\"", raw, o, expected) } } @@ -164,8 +164,8 @@ func TestBinStringParsing(t *testing.T) { if err != nil && err != io.EOF { t.Errorf("Unable to parse string, error: %s", err) } - if len(string(o)) != 32 { - t.Errorf("Wrong length, should be 32 (got %d)", len(string(o))) + if len(o.Str()) != 32 { + t.Errorf("Wrong length, should be 32 (got %d)", len(o.Str())) } } @@ -347,7 +347,7 @@ func BenchmarkHexStringParsing(b *testing.B) { b.Errorf("Error parsing hex string: %s", err.Error()) return } - if string(hs) != ref.String() { + if hs.Str() != ref.String() { b.Errorf("Reference and parsed hex strings mismatch") } parser.SetFileOffset(0) @@ -426,7 +426,7 @@ func TestDictParsing2(t *testing.T) { } str, ok := dict.Get("StringItem").(*PdfObjectString) - if !ok || *str != "a string" { + if !ok || str.Str() != "a string" { t.Errorf("Invalid string item") } diff --git a/pdf/core/primitives.go b/pdf/core/primitives.go index dba8a6e0..98435615 100644 --- a/pdf/core/primitives.go +++ b/pdf/core/primitives.go @@ -7,6 +7,7 @@ package core import ( "bytes" + "encoding/hex" "fmt" "github.com/unidoc/unidoc/common" @@ -31,8 +32,10 @@ type PdfObjectInteger int64 type PdfObjectFloat float64 // PdfObjectString represents the primitive PDF string object. -// TODO (v3): Change to a struct and add a flag for hex/plaintext. -type PdfObjectString string +type PdfObjectString struct { + val string + isHex bool +} // PdfObjectName represents the primitive PDF name object. type PdfObjectName string @@ -135,7 +138,13 @@ func MakeFloat(val float64) *PdfObjectFloat { // MakeString creates an PdfObjectString from a string. func MakeString(s string) *PdfObjectString { - str := PdfObjectString(s) + str := PdfObjectString{val: s} + return &str +} + +// MakeHexString creates an PdfObjectString from a string intended for output as a hexadecimal string. +func MakeHexString(s string) *PdfObjectString { + str := PdfObjectString{val: s, isHex: true} return &str } @@ -208,14 +217,38 @@ func (float *PdfObjectFloat) DefaultWriteString() string { return fmt.Sprintf("%f", *float) } +// String returns a string representation of the *PdfObjectString. func (str *PdfObjectString) String() string { - return fmt.Sprintf("%s", string(*str)) + return str.val +} + +// Str returns the string value of the PdfObjectString. Defined in addition to String() function to clarify that +// this function returns the underlying string directly, whereas the String function technically could include +// debug info. +func (str *PdfObjectString) Str() string { + return str.val +} + +// Bytes returns the PdfObjectString content as a []byte array. +func (str *PdfObjectString) Bytes() []byte { + return []byte(str.val) } // DefaultWriteString outputs the object as it is to be written to file. func (str *PdfObjectString) DefaultWriteString() string { var output bytes.Buffer + // Handle hex representation. + if str.isHex { + shex := hex.EncodeToString(str.Bytes()) + output.WriteString("<") + output.WriteString(shex) + output.WriteString(">") + return output.String() + } + + // Otherwise regular string. + escapeSequences := map[byte]string{ '\n': "\\n", '\r': "\\r", @@ -228,8 +261,8 @@ func (str *PdfObjectString) DefaultWriteString() string { } output.WriteString("(") - for i := 0; i < len(*str); i++ { - char := (*str)[i] + for i := 0; i < len(str.val); i++ { + char := str.val[i] if escStr, useEsc := escapeSequences[char]; useEsc { output.WriteString(escStr) } else { @@ -242,7 +275,7 @@ func (str *PdfObjectString) DefaultWriteString() string { } func (name *PdfObjectName) String() string { - return fmt.Sprintf("%s", string(*name)) + return string(*name) } // DefaultWriteString outputs the object as it is to be written to file. diff --git a/pdf/core/primitives_test.go b/pdf/core/primitives_test.go new file mode 100644 index 00000000..a0cb426d --- /dev/null +++ b/pdf/core/primitives_test.go @@ -0,0 +1,80 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package core + +import ( + "testing" +) + +func TestHexStringWriteBasic(t *testing.T) { + testcases := map[string]string{ + " ": "<20>", + } + + for src, expected := range testcases { + strObj := MakeHexString(src) + ws := strObj.DefaultWriteString() + + if ws != expected { + t.Fatalf("%s: '%s' != '%s'\n", src, ws, expected) + } + } +} + +// Test writing and parsing back of hexadecimal and regular strings. +func TestHexStringMulti(t *testing.T) { + testcases := []string{ + "This is a string", + "Strings may contain\n newlines and such", + string([]byte{0x50, 0x01, 0x00, 0x90, 0xff, 0x49, 0xdf, 0x20, 0x32}), + "", + } + + for _, testcase := range testcases { + // Make *PdfObject representations for regular and hexadecimal strings. + s := MakeString(testcase) + shex := MakeHexString(testcase) + + // Write out. + writestr := s.DefaultWriteString() + writestrhex := shex.DefaultWriteString() + + // Parse back. + parser1 := makeParserForText(writestr) + parser2 := makeParserForText(writestrhex) + + // Check that representation is correct. + obj1, err := parser1.parseObject() + if err != nil { + t.Fatalf("Error: %v", err) + } + strObj1, ok := obj1.(*PdfObjectString) + if !ok { + t.Fatalf("Type incorrect") + } + if strObj1.isHex != false { + t.Fatalf("Should not be hex") + } + if strObj1.Str() != testcase { + t.Fatalf("String mismatch") + } + + obj2, err := parser2.parseObject() + if err != nil { + t.Fatalf("Error: %v", err) + } + strObj2, ok := obj2.(*PdfObjectString) + if !ok { + t.Fatalf("Type incorrect") + } + if strObj2.isHex != true { + t.Fatalf("Should be hex") + } + if strObj2.Str() != testcase { + t.Fatalf("String mismatch") + } + } +} diff --git a/pdf/core/utils.go b/pdf/core/utils.go index 10f0f30f..54dd126e 100644 --- a/pdf/core/utils.go +++ b/pdf/core/utils.go @@ -29,7 +29,8 @@ func checkBounds(sliceLen, a, b int) error { return nil } -// Inspect analyzes the document object structure. +// Inspect analyzes the document object structure. Returns a map of object types (by name) with the instance count +// as value. func (parser *PdfParser) Inspect() (map[string]int, error) { return parser.inspect() } diff --git a/pdf/extractor/text.go b/pdf/extractor/text.go index 6b333db4..ba05947e 100644 --- a/pdf/extractor/text.go +++ b/pdf/extractor/text.go @@ -185,9 +185,9 @@ func (e *Extractor) ExtractText() (string, error) { switch v := obj.(type) { case *core.PdfObjectString: if codemap != nil { - buf.WriteString(codemap.CharcodeBytesToUnicode([]byte(*v))) + buf.WriteString(codemap.CharcodeBytesToUnicode(v.Bytes())) } else { - buf.WriteString(string(*v)) + buf.WriteString(v.Str()) } case *core.PdfObjectFloat: if *v < -100 { @@ -212,9 +212,9 @@ func (e *Extractor) ExtractText() (string, error) { return fmt.Errorf("Invalid parameter type, not string (%T)", op.Params[0]) } if codemap != nil { - buf.WriteString(codemap.CharcodeBytesToUnicode([]byte(*param))) + buf.WriteString(codemap.CharcodeBytesToUnicode(param.Bytes())) } else { - buf.WriteString(string(*param)) + buf.WriteString(param.Str()) } } diff --git a/pdf/model/colorspace.go b/pdf/model/colorspace.go index ce9ee44c..79cdae94 100644 --- a/pdf/model/colorspace.go +++ b/pdf/model/colorspace.go @@ -14,7 +14,7 @@ import ( . "github.com/unidoc/unidoc/pdf/core" ) -// +// PdfColorspace interface defines the common properties of a PDF colorspace. // The colorspace defines the data storage format for each color and color representation. // // Device based colorspace, specified by name @@ -50,6 +50,7 @@ type PdfColorspace interface { DecodeArray() []float64 } +// PdfColor interface represents a generic color in PDF. type PdfColor interface { } @@ -2196,7 +2197,7 @@ func newPdfColorspaceSpecialIndexedFromPdfObject(obj PdfObject) (*PdfColorspaceS obj = TraceToDirectObject(obj) var data []byte if str, ok := obj.(*PdfObjectString); ok { - data = []byte(*str) + data = str.Bytes() common.Log.Trace("Indexed string color data: % d", data) } else if stream, ok := obj.(*PdfObjectStream); ok { common.Log.Trace("Indexed stream: %s", obj.String()) diff --git a/pdf/model/page.go b/pdf/model/page.go index c1c7df1e..83aa2b76 100644 --- a/pdf/model/page.go +++ b/pdf/model/page.go @@ -21,7 +21,7 @@ import ( . "github.com/unidoc/unidoc/pdf/core" ) -// PDF page object (7.7.3.3 - Table 30). +// PdfPage represents a page in a PDF document. (7.7.3.3 - Table 30). type PdfPage struct { Parent PdfObject LastModified *PdfDate @@ -115,7 +115,7 @@ func (reader *PdfReader) newPdfPageFromDict(p *PdfObjectDictionary) (*PdfPage, e if !ok { return nil, errors.New("Page dictionary LastModified != string") } - lastmod, err := NewPdfDate(string(*strObj)) + lastmod, err := NewPdfDate(strObj.Str()) if err != nil { return nil, err } @@ -801,7 +801,7 @@ func (this *PdfPage) SetContentStreams(cStreams []string, encoder StreamEncoder) func getContentStreamAsString(cstreamObj PdfObject) (string, error) { if cstream, ok := TraceToDirectObject(cstreamObj).(*PdfObjectString); ok { - return string(*cstream), nil + return cstream.Str(), nil } if cstream, ok := TraceToDirectObject(cstreamObj).(*PdfObjectStream); ok { diff --git a/pdf/model/page_test.go b/pdf/model/page_test.go index 6b1e813c..d47fc3eb 100644 --- a/pdf/model/page_test.go +++ b/pdf/model/page_test.go @@ -196,7 +196,7 @@ func TestPdfDateBuild(t *testing.T) { t.Errorf("Date PDF object should be a string") return } - if string(*strObj) != dateStr1 { + if strObj.Str() != dateStr1 { t.Errorf("Built date string does not match original (%s)", strObj) return } diff --git a/pdf/model/reader.go b/pdf/model/reader.go index 152f020f..dde9652f 100644 --- a/pdf/model/reader.go +++ b/pdf/model/reader.go @@ -425,7 +425,7 @@ func (this *PdfReader) GetOutlinesFlattened() ([]*PdfOutlineTreeNode, []string, if item, isItem := node.context.(*PdfOutlineItem); isItem { *outlineList = append(*outlineList, &item.PdfOutlineTreeNode) - title := strings.Repeat(" ", depth*2) + string(*item.Title) + title := strings.Repeat(" ", depth*2) + item.Title.Str() *titleList = append(*titleList, title) if item.Next != nil { flattenFunc(item.Next, outlineList, titleList, depth) diff --git a/pdf/model/structures.go b/pdf/model/structures.go index 65194cc0..fa628960 100644 --- a/pdf/model/structures.go +++ b/pdf/model/structures.go @@ -129,6 +129,5 @@ func (date *PdfDate) ToPdfObject() PdfObject { str := fmt.Sprintf("D:%.4d%.2d%.2d%.2d%.2d%.2d%c%.2d'%.2d'", date.year, date.month, date.day, date.hour, date.minute, date.second, date.utOffsetSign, date.utOffsetHours, date.utOffsetMins) - pdfStr := PdfObjectString(str) - return &pdfStr + return MakeString(str) } diff --git a/pdf/model/writer.go b/pdf/model/writer.go index c38baacf..910f8f38 100644 --- a/pdf/model/writer.go +++ b/pdf/model/writer.go @@ -16,13 +16,13 @@ import ( "fmt" "io" "os" + "strings" "time" "github.com/unidoc/unidoc/common" "github.com/unidoc/unidoc/common/license" . "github.com/unidoc/unidoc/pdf/core" "github.com/unidoc/unidoc/pdf/model/fonts" - "strings" ) var pdfCreator = "" @@ -342,8 +342,6 @@ func (this *PdfWriter) AddPage(page *PdfPage) error { this.addObject(pageObj) - - // Traverse the page and record all object references. err := this.addObjects(pDict) if err != nil { @@ -500,14 +498,14 @@ func (this *PdfWriter) Encrypt(userPass, ownerPass []byte, options *EncryptOptio // Prepare the ID object for the trailer. hashcode := md5.Sum([]byte(time.Now().Format(time.RFC850))) - id0 := PdfObjectString(hashcode[:]) + id0 := string(hashcode[:]) b := make([]byte, 100) rand.Read(b) hashcode = md5.Sum(b) - id1 := PdfObjectString(hashcode[:]) + id1 := string(hashcode[:]) common.Log.Trace("Random b: % x", b) - this.ids = &PdfObjectArray{&id0, &id1} + this.ids = MakeArray(MakeHexString(id0), MakeHexString(id1)) common.Log.Trace("Gen Id 0: % x", id0) crypter.Id0 = string(id0) @@ -536,8 +534,8 @@ func (this *PdfWriter) Encrypt(userPass, ownerPass []byte, options *EncryptOptio encDict.Set("V", MakeInteger(int64(crypter.V))) encDict.Set("R", MakeInteger(int64(crypter.R))) encDict.Set("Length", MakeInteger(int64(crypter.Length))) - encDict.Set("O", &O) - encDict.Set("U", &U) + encDict.Set("O", MakeHexString(O)) + encDict.Set("U", MakeHexString(U)) this.encryptDict = encDict // Make an object to contain it.