mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-26 13:48:55 +08:00
Preserve and allow output of hexadecimal strings
Refactored PdfObjectString into a struct with bool flag for hex. Fixed any code broken by the change. Unexported non-essential functions for crypto (not used by model). Can unexport more later or refactor to internal package.
This commit is contained in:
parent
5e9ff8d3ca
commit
14ee80e1fe
@ -171,7 +171,7 @@ func (this *ContentStreamParser) ExtractText() (string, error) {
|
||||
for _, obj := range *paramList {
|
||||
switch v := obj.(type) {
|
||||
case *PdfObjectString:
|
||||
txt += string(*v)
|
||||
txt += v.Str()
|
||||
case *PdfObjectFloat:
|
||||
if *v < -100 {
|
||||
txt += " "
|
||||
@ -190,7 +190,7 @@ func (this *ContentStreamParser) ExtractText() (string, error) {
|
||||
if !ok {
|
||||
return "", fmt.Errorf("Invalid parameter type, not string (%T)", op.Params[0])
|
||||
}
|
||||
txt += string(*param)
|
||||
txt += param.Str()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -358,11 +358,11 @@ func (this *ContentStreamParser) ParseInlineImage() (*ContentStreamInlineImage,
|
||||
return nil, fmt.Errorf("Failed to read inline image - invalid operand")
|
||||
}
|
||||
|
||||
if *operand == "EI" {
|
||||
if operand.Str() == "EI" {
|
||||
// Image fully defined
|
||||
common.Log.Trace("Inline image finished...")
|
||||
return &im, nil
|
||||
} else if *operand == "ID" {
|
||||
} else if operand.Str() == "ID" {
|
||||
// Inline image data.
|
||||
// Should get a single space (0x20) followed by the data and then EI.
|
||||
common.Log.Trace("ID start")
|
||||
|
@ -18,12 +18,12 @@ import (
|
||||
. "github.com/unidoc/unidoc/pdf/core"
|
||||
)
|
||||
|
||||
// Content stream parser.
|
||||
// ContentStreamParser represents a content stream parser for parsing content streams in PDFs.
|
||||
type ContentStreamParser struct {
|
||||
reader *bufio.Reader
|
||||
}
|
||||
|
||||
// Create a new instance of the content stream parser from an input content
|
||||
// NewContentStreamParser creates a new instance of the content stream parser from an input content
|
||||
// stream string.
|
||||
func NewContentStreamParser(contentStr string) *ContentStreamParser {
|
||||
// Each command has parameters and an operand (command).
|
||||
@ -35,7 +35,7 @@ func NewContentStreamParser(contentStr string) *ContentStreamParser {
|
||||
return &parser
|
||||
}
|
||||
|
||||
// Parses all commands in content stream, returning a list of operation data.
|
||||
// Parse parses all commands in content stream, returning a list of operation data.
|
||||
func (this *ContentStreamParser) Parse() (*ContentStreamOperations, error) {
|
||||
operations := ContentStreamOperations{}
|
||||
|
||||
@ -52,7 +52,7 @@ func (this *ContentStreamParser) Parse() (*ContentStreamOperations, error) {
|
||||
return &operations, err
|
||||
}
|
||||
if isOperand {
|
||||
operation.Operand = string(*obj.(*PdfObjectString))
|
||||
operation.Operand, _ = GetStringVal(obj)
|
||||
operations = append(operations, &operation)
|
||||
break
|
||||
} else {
|
||||
@ -252,7 +252,7 @@ func (this *ContentStreamParser) parseNumber() (PdfObject, error) {
|
||||
}
|
||||
|
||||
// A string starts with '(' and ends with ')'.
|
||||
func (this *ContentStreamParser) parseString() (PdfObjectString, error) {
|
||||
func (this *ContentStreamParser) parseString() (*PdfObjectString, error) {
|
||||
this.reader.ReadByte()
|
||||
|
||||
bytes := []byte{}
|
||||
@ -260,21 +260,21 @@ func (this *ContentStreamParser) parseString() (PdfObjectString, error) {
|
||||
for {
|
||||
bb, err := this.reader.Peek(1)
|
||||
if err != nil {
|
||||
return PdfObjectString(bytes), err
|
||||
return MakeString(string(bytes)), err
|
||||
}
|
||||
|
||||
if bb[0] == '\\' { // Escape sequence.
|
||||
this.reader.ReadByte() // Skip the escape \ byte.
|
||||
b, err := this.reader.ReadByte()
|
||||
if err != nil {
|
||||
return PdfObjectString(bytes), err
|
||||
return MakeString(string(bytes)), err
|
||||
}
|
||||
|
||||
// Octal '\ddd' number (base 8).
|
||||
if IsOctalDigit(b) {
|
||||
bb, err := this.reader.Peek(2)
|
||||
if err != nil {
|
||||
return PdfObjectString(bytes), err
|
||||
return MakeString(string(bytes)), err
|
||||
}
|
||||
|
||||
numeric := []byte{}
|
||||
@ -291,7 +291,7 @@ func (this *ContentStreamParser) parseString() (PdfObjectString, error) {
|
||||
common.Log.Trace("Numeric string \"%s\"", numeric)
|
||||
code, err := strconv.ParseUint(string(numeric), 8, 32)
|
||||
if err != nil {
|
||||
return PdfObjectString(bytes), err
|
||||
return MakeString(string(bytes)), err
|
||||
}
|
||||
bytes = append(bytes, byte(code))
|
||||
continue
|
||||
@ -331,11 +331,11 @@ func (this *ContentStreamParser) parseString() (PdfObjectString, error) {
|
||||
bytes = append(bytes, b)
|
||||
}
|
||||
|
||||
return PdfObjectString(bytes), nil
|
||||
return MakeString(string(bytes)), nil
|
||||
}
|
||||
|
||||
// Starts with '<' ends with '>'.
|
||||
func (this *ContentStreamParser) parseHexString() (PdfObjectString, error) {
|
||||
func (this *ContentStreamParser) parseHexString() (*PdfObjectString, error) {
|
||||
this.reader.ReadByte()
|
||||
|
||||
hextable := []byte("0123456789abcdefABCDEF")
|
||||
@ -346,7 +346,7 @@ func (this *ContentStreamParser) parseHexString() (PdfObjectString, error) {
|
||||
|
||||
bb, err := this.reader.Peek(1)
|
||||
if err != nil {
|
||||
return PdfObjectString(""), err
|
||||
return MakeString(""), err
|
||||
}
|
||||
|
||||
if bb[0] == '>' {
|
||||
@ -365,7 +365,7 @@ func (this *ContentStreamParser) parseHexString() (PdfObjectString, error) {
|
||||
}
|
||||
|
||||
buf, _ := hex.DecodeString(string(tmp))
|
||||
return PdfObjectString(buf), nil
|
||||
return MakeHexString(string(buf)), nil
|
||||
}
|
||||
|
||||
// Starts with '[' ends with ']'. Can contain any kinds of direct objects.
|
||||
@ -494,12 +494,12 @@ func (this *ContentStreamParser) parseDict() (*PdfObjectDictionary, error) {
|
||||
}
|
||||
|
||||
// An operand is a text command represented by a word.
|
||||
func (this *ContentStreamParser) parseOperand() (PdfObjectString, error) {
|
||||
func (this *ContentStreamParser) parseOperand() (*PdfObjectString, error) {
|
||||
bytes := []byte{}
|
||||
for {
|
||||
bb, err := this.reader.Peek(1)
|
||||
if err != nil {
|
||||
return PdfObjectString(bytes), err
|
||||
return MakeString(string(bytes)), err
|
||||
}
|
||||
if IsDelimiter(bb[0]) {
|
||||
break
|
||||
@ -512,13 +512,13 @@ func (this *ContentStreamParser) parseOperand() (PdfObjectString, error) {
|
||||
bytes = append(bytes, b)
|
||||
}
|
||||
|
||||
return PdfObjectString(bytes), nil
|
||||
return MakeString(string(bytes)), nil
|
||||
}
|
||||
|
||||
// Parse a generic object. Returns the object, an error code, and a bool
|
||||
// value indicating whether the object is an operand. An operand
|
||||
// is contained in a pdf string object.
|
||||
func (this *ContentStreamParser) parseObject() (PdfObject, error, bool) {
|
||||
func (this *ContentStreamParser) parseObject() (obj PdfObject, err error, isop bool) {
|
||||
// Determine the kind of object.
|
||||
// parse it!
|
||||
// make a list of operands, then once operand arrives put into a package.
|
||||
@ -542,11 +542,11 @@ func (this *ContentStreamParser) parseObject() (PdfObject, error, bool) {
|
||||
} else if bb[0] == '(' {
|
||||
common.Log.Trace("->String!")
|
||||
str, err := this.parseString()
|
||||
return &str, err, false
|
||||
return str, err, false
|
||||
} else if bb[0] == '<' && bb[1] != '<' {
|
||||
common.Log.Trace("->Hex String!")
|
||||
str, err := this.parseHexString()
|
||||
return &str, err, false
|
||||
return str, err, false
|
||||
} else if bb[0] == '[' {
|
||||
common.Log.Trace("->Array!")
|
||||
arr, err := this.parseArray()
|
||||
@ -577,7 +577,7 @@ func (this *ContentStreamParser) parseObject() (PdfObject, error, bool) {
|
||||
}
|
||||
|
||||
operand, err := this.parseOperand()
|
||||
return &operand, err, true
|
||||
return operand, err, true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -32,7 +32,7 @@ func makeParamsFromNames(vals []core.PdfObjectName) []core.PdfObject {
|
||||
func makeParamsFromStrings(vals []core.PdfObjectString) []core.PdfObject {
|
||||
params := []core.PdfObject{}
|
||||
for _, val := range vals {
|
||||
params = append(params, core.MakeString(string(val)))
|
||||
params = append(params, core.MakeString(val.Str()))
|
||||
}
|
||||
return params
|
||||
}
|
||||
|
@ -216,7 +216,7 @@ func PdfCryptMakeNew(parser *PdfParser, ed, trailer *PdfObjectDictionary) (PdfCr
|
||||
|
||||
subfilter, ok := ed.Get("SubFilter").(*PdfObjectString)
|
||||
if ok {
|
||||
crypter.Subfilter = string(*subfilter)
|
||||
crypter.Subfilter = subfilter.Str()
|
||||
common.Log.Debug("Using subfilter %s", subfilter)
|
||||
}
|
||||
|
||||
@ -263,22 +263,22 @@ func PdfCryptMakeNew(parser *PdfParser, ed, trailer *PdfObjectDictionary) (PdfCr
|
||||
if !ok {
|
||||
return crypter, errors.New("Encrypt dictionary missing O")
|
||||
}
|
||||
if len(*O) != 32 {
|
||||
return crypter, fmt.Errorf("Length(O) != 32 (%d)", len(*O))
|
||||
if len(O.Str()) != 32 {
|
||||
return crypter, fmt.Errorf("Length(O) != 32 (%d)", len(O.Str()))
|
||||
}
|
||||
crypter.O = []byte(*O)
|
||||
crypter.O = O.Bytes()
|
||||
|
||||
U, ok := ed.Get("U").(*PdfObjectString)
|
||||
if !ok {
|
||||
return crypter, errors.New("Encrypt dictionary missing U")
|
||||
}
|
||||
if len(*U) != 32 {
|
||||
if len(U.Str()) != 32 {
|
||||
// Strictly this does not cause an error.
|
||||
// If O is OK and others then can still read the file.
|
||||
common.Log.Debug("Warning: Length(U) != 32 (%d)", len(*U))
|
||||
common.Log.Debug("Warning: Length(U) != 32 (%d)", len(U.Str()))
|
||||
//return crypter, errors.New("Length(U) != 32")
|
||||
}
|
||||
crypter.U = []byte(*U)
|
||||
crypter.U = U.Bytes()
|
||||
|
||||
P, ok := ed.Get("P").(*PdfObjectInteger)
|
||||
if !ok {
|
||||
@ -296,17 +296,17 @@ func PdfCryptMakeNew(parser *PdfParser, ed, trailer *PdfObjectDictionary) (PdfCr
|
||||
// Default: empty ID.
|
||||
// Strictly, if file is encrypted, the ID should always be specified
|
||||
// but clearly not everyone is following the specification.
|
||||
id0 := PdfObjectString("")
|
||||
id0 := ""
|
||||
if idArray, ok := trailer.Get("ID").(*PdfObjectArray); ok && len(*idArray) >= 1 {
|
||||
id0obj, ok := (*idArray)[0].(*PdfObjectString)
|
||||
if !ok {
|
||||
return crypter, errors.New("Invalid trailer ID")
|
||||
}
|
||||
id0 = *id0obj
|
||||
id0 = id0obj.Str()
|
||||
} else {
|
||||
common.Log.Debug("Trailer ID array missing or invalid!")
|
||||
}
|
||||
crypter.Id0 = string(id0)
|
||||
crypter.Id0 = id0
|
||||
|
||||
return crypter, nil
|
||||
}
|
||||
@ -382,7 +382,7 @@ func (crypt *PdfCrypt) authenticate(password []byte) (bool, error) {
|
||||
|
||||
// Try user password.
|
||||
common.Log.Trace("Debugging authentication - user pass")
|
||||
authenticated, err := crypt.Alg6(password)
|
||||
authenticated, err := crypt.alg6(password)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
@ -396,7 +396,7 @@ func (crypt *PdfCrypt) authenticate(password []byte) (bool, error) {
|
||||
// May not be necessary if only want to get all contents.
|
||||
// (user pass needs to be known or empty).
|
||||
common.Log.Trace("Debugging authentication - owner pass")
|
||||
authenticated, err = crypt.Alg7(password)
|
||||
authenticated, err = crypt.alg7(password)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
@ -419,7 +419,7 @@ func (crypt *PdfCrypt) checkAccessRights(password []byte) (bool, AccessPermissio
|
||||
perms := AccessPermissions{}
|
||||
|
||||
// Try owner password -> full rights.
|
||||
isOwner, err := crypt.Alg7(password)
|
||||
isOwner, err := crypt.alg7(password)
|
||||
if err != nil {
|
||||
return false, perms, err
|
||||
}
|
||||
@ -437,7 +437,7 @@ func (crypt *PdfCrypt) checkAccessRights(password []byte) (bool, AccessPermissio
|
||||
}
|
||||
|
||||
// Try user password.
|
||||
isUser, err := crypt.Alg6(password)
|
||||
isUser, err := crypt.alg6(password)
|
||||
if err != nil {
|
||||
return false, perms, err
|
||||
}
|
||||
@ -721,17 +721,17 @@ func (crypt *PdfCrypt) Decrypt(obj PdfObject, parentObjNum, parentGenNum int64)
|
||||
}
|
||||
|
||||
// Overwrite the encrypted with decrypted string.
|
||||
decrypted := make([]byte, len(*s))
|
||||
for i := 0; i < len(*s); i++ {
|
||||
decrypted[i] = (*s)[i]
|
||||
decrypted := make([]byte, len(s.Str()))
|
||||
str := s.Str()
|
||||
for i := 0; i < len(str); i++ {
|
||||
decrypted[i] = str[i]
|
||||
}
|
||||
common.Log.Trace("Decrypt string: %s : % x", decrypted, decrypted)
|
||||
decrypted, err = crypt.decryptBytes(decrypted, stringFilter, key)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
*s = PdfObjectString(decrypted)
|
||||
|
||||
s.val = string(decrypted)
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -968,16 +968,17 @@ func (crypt *PdfCrypt) Encrypt(obj PdfObject, parentObjNum, parentGenNum int64)
|
||||
return err
|
||||
}
|
||||
|
||||
encrypted := make([]byte, len(*s))
|
||||
for i := 0; i < len(*s); i++ {
|
||||
encrypted[i] = (*s)[i]
|
||||
str := s.Str()
|
||||
encrypted := make([]byte, len(str))
|
||||
for i := 0; i < len(str); i++ {
|
||||
encrypted[i] = str[i]
|
||||
}
|
||||
common.Log.Trace("Encrypt string: %s : % x", encrypted, encrypted)
|
||||
encrypted, err = crypt.encryptBytes(encrypted, stringFilter, key)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
*s = PdfObjectString(encrypted)
|
||||
s.val = string(encrypted)
|
||||
|
||||
return nil
|
||||
}
|
||||
@ -1022,10 +1023,9 @@ func (crypt *PdfCrypt) Encrypt(obj PdfObject, parentObjNum, parentGenNum int64)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Alg2 computes an encryption key.
|
||||
// TODO (v3): Unexport.
|
||||
func (crypt *PdfCrypt) Alg2(pass []byte) []byte {
|
||||
common.Log.Trace("Alg2")
|
||||
// alg2 computes an encryption key.
|
||||
func (crypt *PdfCrypt) alg2(pass []byte) []byte {
|
||||
common.Log.Trace("alg2")
|
||||
key := crypt.paddedPass(pass)
|
||||
|
||||
h := md5.New()
|
||||
@ -1091,10 +1091,9 @@ func (crypt *PdfCrypt) alg3Key(pass []byte) []byte {
|
||||
}
|
||||
|
||||
// Alg3 computes the encryption dictionary’s O (owner password) value.
|
||||
// TODO (v3): Unexport.
|
||||
func (crypt *PdfCrypt) Alg3(upass, opass []byte) (PdfObjectString, error) {
|
||||
func (crypt *PdfCrypt) Alg3(upass, opass []byte) (string, error) {
|
||||
// Return O string val.
|
||||
O := PdfObjectString("")
|
||||
O := ""
|
||||
|
||||
var encKey []byte
|
||||
if len(opass) > 0 {
|
||||
@ -1126,16 +1125,15 @@ func (crypt *PdfCrypt) Alg3(upass, opass []byte) (PdfObjectString, error) {
|
||||
}
|
||||
}
|
||||
|
||||
O = PdfObjectString(encrypted)
|
||||
O = string(encrypted)
|
||||
return O, nil
|
||||
}
|
||||
|
||||
// Alg4 computes the encryption dictionary’s U (user password) value (Security handlers of revision 2).
|
||||
// TODO (v3): Unexport.
|
||||
func (crypt *PdfCrypt) Alg4(upass []byte) (PdfObjectString, []byte, error) {
|
||||
U := PdfObjectString("")
|
||||
// alg4 computes the encryption dictionary’s U (user password) value (Security handlers of revision 2).
|
||||
func (crypt *PdfCrypt) alg4(upass []byte) (string, []byte, error) {
|
||||
U := ""
|
||||
|
||||
ekey := crypt.Alg2(upass)
|
||||
ekey := crypt.alg2(upass)
|
||||
ciph, err := rc4.NewCipher(ekey)
|
||||
if err != nil {
|
||||
return U, ekey, errors.New("Failed rc4 ciph")
|
||||
@ -1145,16 +1143,15 @@ func (crypt *PdfCrypt) Alg4(upass []byte) (PdfObjectString, []byte, error) {
|
||||
encrypted := make([]byte, len(s))
|
||||
ciph.XORKeyStream(encrypted, s)
|
||||
|
||||
U = PdfObjectString(encrypted)
|
||||
U = string(encrypted)
|
||||
return U, ekey, nil
|
||||
}
|
||||
|
||||
// Alg5 computes the encryption dictionary’s U (user password) value (Security handlers of revision 3 or greater).
|
||||
// TODO (v3): Unexport.
|
||||
func (crypt *PdfCrypt) Alg5(upass []byte) (PdfObjectString, []byte, error) {
|
||||
U := PdfObjectString("")
|
||||
func (crypt *PdfCrypt) Alg5(upass []byte) (string, []byte, error) {
|
||||
U := ""
|
||||
|
||||
ekey := crypt.Alg2(upass)
|
||||
ekey := crypt.alg2(upass)
|
||||
|
||||
h := md5.New()
|
||||
h.Write([]byte(padding))
|
||||
@ -1209,18 +1206,17 @@ func (crypt *PdfCrypt) Alg5(upass []byte) (PdfObjectString, []byte, error) {
|
||||
return U, ekey, errors.New("Failed to gen rand number")
|
||||
}
|
||||
|
||||
U = PdfObjectString(bb)
|
||||
U = string(bb)
|
||||
return U, ekey, nil
|
||||
}
|
||||
|
||||
// Alg6 authenticates the user password.
|
||||
// TODO (v3): Unexport.
|
||||
func (crypt *PdfCrypt) Alg6(upass []byte) (bool, error) {
|
||||
var uo PdfObjectString
|
||||
// alg6 authenticates the user password.
|
||||
func (crypt *PdfCrypt) alg6(upass []byte) (bool, error) {
|
||||
var uo string
|
||||
var err error
|
||||
var key []byte
|
||||
if crypt.R == 2 {
|
||||
uo, key, err = crypt.Alg4(upass)
|
||||
uo, key, err = crypt.alg4(upass)
|
||||
} else if crypt.R >= 3 {
|
||||
uo, key, err = crypt.Alg5(upass)
|
||||
} else {
|
||||
@ -1254,9 +1250,8 @@ func (crypt *PdfCrypt) Alg6(upass []byte) (bool, error) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Alg7 authenticates the owner password.
|
||||
// TODO (v3): Unexport.
|
||||
func (crypt *PdfCrypt) Alg7(opass []byte) (bool, error) {
|
||||
// alg7 authenticates the owner password.
|
||||
func (crypt *PdfCrypt) alg7(opass []byte) (bool, error) {
|
||||
encKey := crypt.alg3Key(opass)
|
||||
|
||||
decrypted := make([]byte, len(crypt.O))
|
||||
@ -1285,7 +1280,7 @@ func (crypt *PdfCrypt) Alg7(opass []byte) (bool, error) {
|
||||
return false, errors.New("invalid R")
|
||||
}
|
||||
|
||||
auth, err := crypt.Alg6(decrypted)
|
||||
auth, err := crypt.alg6(decrypted)
|
||||
if err != nil {
|
||||
return false, nil
|
||||
}
|
||||
|
@ -63,7 +63,7 @@ func TestAlg2(t *testing.T) {
|
||||
crypter.Length = 128
|
||||
crypter.EncryptMetadata = true
|
||||
|
||||
key := crypter.Alg2([]byte(""))
|
||||
key := crypter.alg2([]byte(""))
|
||||
|
||||
keyExp := []byte{0xf8, 0x94, 0x9c, 0x5a, 0xf5, 0xa0, 0xc0, 0xca,
|
||||
0x30, 0xb8, 0x91, 0xc1, 0xbb, 0x2c, 0x4f, 0xf5}
|
||||
|
@ -297,7 +297,7 @@ func (parser *PdfParser) parseNumber() (PdfObject, error) {
|
||||
}
|
||||
|
||||
// A string starts with '(' and ends with ')'.
|
||||
func (parser *PdfParser) parseString() (PdfObjectString, error) {
|
||||
func (parser *PdfParser) parseString() (*PdfObjectString, error) {
|
||||
parser.reader.ReadByte()
|
||||
|
||||
var r bytes.Buffer
|
||||
@ -305,21 +305,21 @@ func (parser *PdfParser) parseString() (PdfObjectString, error) {
|
||||
for {
|
||||
bb, err := parser.reader.Peek(1)
|
||||
if err != nil {
|
||||
return PdfObjectString(r.String()), err
|
||||
return MakeString(r.String()), err
|
||||
}
|
||||
|
||||
if bb[0] == '\\' { // Escape sequence.
|
||||
parser.reader.ReadByte() // Skip the escape \ byte.
|
||||
b, err := parser.reader.ReadByte()
|
||||
if err != nil {
|
||||
return PdfObjectString(r.String()), err
|
||||
return MakeString(r.String()), err
|
||||
}
|
||||
|
||||
// Octal '\ddd' number (base 8).
|
||||
if IsOctalDigit(b) {
|
||||
bb, err := parser.reader.Peek(2)
|
||||
if err != nil {
|
||||
return PdfObjectString(r.String()), err
|
||||
return MakeString(r.String()), err
|
||||
}
|
||||
|
||||
numeric := []byte{}
|
||||
@ -336,7 +336,7 @@ func (parser *PdfParser) parseString() (PdfObjectString, error) {
|
||||
common.Log.Trace("Numeric string \"%s\"", numeric)
|
||||
code, err := strconv.ParseUint(string(numeric), 8, 32)
|
||||
if err != nil {
|
||||
return PdfObjectString(r.String()), err
|
||||
return MakeString(r.String()), err
|
||||
}
|
||||
r.WriteByte(byte(code))
|
||||
continue
|
||||
@ -376,19 +376,19 @@ func (parser *PdfParser) parseString() (PdfObjectString, error) {
|
||||
r.WriteByte(b)
|
||||
}
|
||||
|
||||
return PdfObjectString(r.String()), nil
|
||||
return MakeString(r.String()), nil
|
||||
}
|
||||
|
||||
// Starts with '<' ends with '>'.
|
||||
// Currently not converting the hex codes to characters.
|
||||
func (parser *PdfParser) parseHexString() (PdfObjectString, error) {
|
||||
func (parser *PdfParser) parseHexString() (*PdfObjectString, error) {
|
||||
parser.reader.ReadByte()
|
||||
|
||||
var r bytes.Buffer
|
||||
for {
|
||||
bb, err := parser.reader.Peek(1)
|
||||
if err != nil {
|
||||
return PdfObjectString(""), err
|
||||
return MakeString(""), err
|
||||
}
|
||||
|
||||
if bb[0] == '>' {
|
||||
@ -407,7 +407,7 @@ func (parser *PdfParser) parseHexString() (PdfObjectString, error) {
|
||||
}
|
||||
|
||||
buf, _ := hex.DecodeString(r.String())
|
||||
return PdfObjectString(buf), nil
|
||||
return MakeHexString(string(buf)), nil
|
||||
}
|
||||
|
||||
// Starts with '[' ends with ']'. Can contain any kinds of direct objects.
|
||||
@ -506,7 +506,7 @@ func (parser *PdfParser) parseObject() (PdfObject, error) {
|
||||
} else if bb[0] == '(' {
|
||||
common.Log.Trace("->String!")
|
||||
str, err := parser.parseString()
|
||||
return &str, err
|
||||
return str, err
|
||||
} else if bb[0] == '[' {
|
||||
common.Log.Trace("->Array!")
|
||||
arr, err := parser.parseArray()
|
||||
@ -518,7 +518,7 @@ func (parser *PdfParser) parseObject() (PdfObject, error) {
|
||||
} else if bb[0] == '<' {
|
||||
common.Log.Trace("->Hex string!")
|
||||
str, err := parser.parseHexString()
|
||||
return &str, err
|
||||
return str, err
|
||||
} else if bb[0] == '%' {
|
||||
parser.readComment()
|
||||
parser.skipSpaces()
|
||||
|
@ -132,7 +132,7 @@ func TestStringParsing(t *testing.T) {
|
||||
if err != nil && err != io.EOF {
|
||||
t.Errorf("Unable to parse string, error: %s", err)
|
||||
}
|
||||
if string(o) != expected {
|
||||
if o.Str() != expected {
|
||||
t.Errorf("String Mismatch %s: \"%s\" != \"%s\"", raw, o, expected)
|
||||
}
|
||||
}
|
||||
@ -164,8 +164,8 @@ func TestBinStringParsing(t *testing.T) {
|
||||
if err != nil && err != io.EOF {
|
||||
t.Errorf("Unable to parse string, error: %s", err)
|
||||
}
|
||||
if len(string(o)) != 32 {
|
||||
t.Errorf("Wrong length, should be 32 (got %d)", len(string(o)))
|
||||
if len(o.Str()) != 32 {
|
||||
t.Errorf("Wrong length, should be 32 (got %d)", len(o.Str()))
|
||||
}
|
||||
}
|
||||
|
||||
@ -347,7 +347,7 @@ func BenchmarkHexStringParsing(b *testing.B) {
|
||||
b.Errorf("Error parsing hex string: %s", err.Error())
|
||||
return
|
||||
}
|
||||
if string(hs) != ref.String() {
|
||||
if hs.Str() != ref.String() {
|
||||
b.Errorf("Reference and parsed hex strings mismatch")
|
||||
}
|
||||
parser.SetFileOffset(0)
|
||||
@ -426,7 +426,7 @@ func TestDictParsing2(t *testing.T) {
|
||||
}
|
||||
|
||||
str, ok := dict.Get("StringItem").(*PdfObjectString)
|
||||
if !ok || *str != "a string" {
|
||||
if !ok || str.Str() != "a string" {
|
||||
t.Errorf("Invalid string item")
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,7 @@ package core
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
|
||||
"github.com/unidoc/unidoc/common"
|
||||
@ -31,8 +32,10 @@ type PdfObjectInteger int64
|
||||
type PdfObjectFloat float64
|
||||
|
||||
// PdfObjectString represents the primitive PDF string object.
|
||||
// TODO (v3): Change to a struct and add a flag for hex/plaintext.
|
||||
type PdfObjectString string
|
||||
type PdfObjectString struct {
|
||||
val string
|
||||
isHex bool
|
||||
}
|
||||
|
||||
// PdfObjectName represents the primitive PDF name object.
|
||||
type PdfObjectName string
|
||||
@ -135,7 +138,13 @@ func MakeFloat(val float64) *PdfObjectFloat {
|
||||
|
||||
// MakeString creates an PdfObjectString from a string.
|
||||
func MakeString(s string) *PdfObjectString {
|
||||
str := PdfObjectString(s)
|
||||
str := PdfObjectString{val: s}
|
||||
return &str
|
||||
}
|
||||
|
||||
// MakeHexString creates an PdfObjectString from a string intended for output as a hexadecimal string.
|
||||
func MakeHexString(s string) *PdfObjectString {
|
||||
str := PdfObjectString{val: s, isHex: true}
|
||||
return &str
|
||||
}
|
||||
|
||||
@ -208,14 +217,38 @@ func (float *PdfObjectFloat) DefaultWriteString() string {
|
||||
return fmt.Sprintf("%f", *float)
|
||||
}
|
||||
|
||||
// String returns a string representation of the *PdfObjectString.
|
||||
func (str *PdfObjectString) String() string {
|
||||
return fmt.Sprintf("%s", string(*str))
|
||||
return str.val
|
||||
}
|
||||
|
||||
// Str returns the string value of the PdfObjectString. Defined in addition to String() function to clarify that
|
||||
// this function returns the underlying string directly, whereas the String function technically could include
|
||||
// debug info.
|
||||
func (str *PdfObjectString) Str() string {
|
||||
return str.val
|
||||
}
|
||||
|
||||
// Bytes returns the PdfObjectString content as a []byte array.
|
||||
func (str *PdfObjectString) Bytes() []byte {
|
||||
return []byte(str.val)
|
||||
}
|
||||
|
||||
// DefaultWriteString outputs the object as it is to be written to file.
|
||||
func (str *PdfObjectString) DefaultWriteString() string {
|
||||
var output bytes.Buffer
|
||||
|
||||
// Handle hex representation.
|
||||
if str.isHex {
|
||||
shex := hex.EncodeToString(str.Bytes())
|
||||
output.WriteString("<")
|
||||
output.WriteString(shex)
|
||||
output.WriteString(">")
|
||||
return output.String()
|
||||
}
|
||||
|
||||
// Otherwise regular string.
|
||||
|
||||
escapeSequences := map[byte]string{
|
||||
'\n': "\\n",
|
||||
'\r': "\\r",
|
||||
@ -228,8 +261,8 @@ func (str *PdfObjectString) DefaultWriteString() string {
|
||||
}
|
||||
|
||||
output.WriteString("(")
|
||||
for i := 0; i < len(*str); i++ {
|
||||
char := (*str)[i]
|
||||
for i := 0; i < len(str.val); i++ {
|
||||
char := str.val[i]
|
||||
if escStr, useEsc := escapeSequences[char]; useEsc {
|
||||
output.WriteString(escStr)
|
||||
} else {
|
||||
@ -242,7 +275,7 @@ func (str *PdfObjectString) DefaultWriteString() string {
|
||||
}
|
||||
|
||||
func (name *PdfObjectName) String() string {
|
||||
return fmt.Sprintf("%s", string(*name))
|
||||
return string(*name)
|
||||
}
|
||||
|
||||
// DefaultWriteString outputs the object as it is to be written to file.
|
||||
|
80
pdf/core/primitives_test.go
Normal file
80
pdf/core/primitives_test.go
Normal file
@ -0,0 +1,80 @@
|
||||
/*
|
||||
* This file is subject to the terms and conditions defined in
|
||||
* file 'LICENSE.md', which is part of this source code package.
|
||||
*/
|
||||
|
||||
package core
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestHexStringWriteBasic(t *testing.T) {
|
||||
testcases := map[string]string{
|
||||
" ": "<20>",
|
||||
}
|
||||
|
||||
for src, expected := range testcases {
|
||||
strObj := MakeHexString(src)
|
||||
ws := strObj.DefaultWriteString()
|
||||
|
||||
if ws != expected {
|
||||
t.Fatalf("%s: '%s' != '%s'\n", src, ws, expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test writing and parsing back of hexadecimal and regular strings.
|
||||
func TestHexStringMulti(t *testing.T) {
|
||||
testcases := []string{
|
||||
"This is a string",
|
||||
"Strings may contain\n newlines and such",
|
||||
string([]byte{0x50, 0x01, 0x00, 0x90, 0xff, 0x49, 0xdf, 0x20, 0x32}),
|
||||
"",
|
||||
}
|
||||
|
||||
for _, testcase := range testcases {
|
||||
// Make *PdfObject representations for regular and hexadecimal strings.
|
||||
s := MakeString(testcase)
|
||||
shex := MakeHexString(testcase)
|
||||
|
||||
// Write out.
|
||||
writestr := s.DefaultWriteString()
|
||||
writestrhex := shex.DefaultWriteString()
|
||||
|
||||
// Parse back.
|
||||
parser1 := makeParserForText(writestr)
|
||||
parser2 := makeParserForText(writestrhex)
|
||||
|
||||
// Check that representation is correct.
|
||||
obj1, err := parser1.parseObject()
|
||||
if err != nil {
|
||||
t.Fatalf("Error: %v", err)
|
||||
}
|
||||
strObj1, ok := obj1.(*PdfObjectString)
|
||||
if !ok {
|
||||
t.Fatalf("Type incorrect")
|
||||
}
|
||||
if strObj1.isHex != false {
|
||||
t.Fatalf("Should not be hex")
|
||||
}
|
||||
if strObj1.Str() != testcase {
|
||||
t.Fatalf("String mismatch")
|
||||
}
|
||||
|
||||
obj2, err := parser2.parseObject()
|
||||
if err != nil {
|
||||
t.Fatalf("Error: %v", err)
|
||||
}
|
||||
strObj2, ok := obj2.(*PdfObjectString)
|
||||
if !ok {
|
||||
t.Fatalf("Type incorrect")
|
||||
}
|
||||
if strObj2.isHex != true {
|
||||
t.Fatalf("Should be hex")
|
||||
}
|
||||
if strObj2.Str() != testcase {
|
||||
t.Fatalf("String mismatch")
|
||||
}
|
||||
}
|
||||
}
|
@ -29,7 +29,8 @@ func checkBounds(sliceLen, a, b int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Inspect analyzes the document object structure.
|
||||
// Inspect analyzes the document object structure. Returns a map of object types (by name) with the instance count
|
||||
// as value.
|
||||
func (parser *PdfParser) Inspect() (map[string]int, error) {
|
||||
return parser.inspect()
|
||||
}
|
||||
|
@ -185,9 +185,9 @@ func (e *Extractor) ExtractText() (string, error) {
|
||||
switch v := obj.(type) {
|
||||
case *core.PdfObjectString:
|
||||
if codemap != nil {
|
||||
buf.WriteString(codemap.CharcodeBytesToUnicode([]byte(*v)))
|
||||
buf.WriteString(codemap.CharcodeBytesToUnicode(v.Bytes()))
|
||||
} else {
|
||||
buf.WriteString(string(*v))
|
||||
buf.WriteString(v.Str())
|
||||
}
|
||||
case *core.PdfObjectFloat:
|
||||
if *v < -100 {
|
||||
@ -212,9 +212,9 @@ func (e *Extractor) ExtractText() (string, error) {
|
||||
return fmt.Errorf("Invalid parameter type, not string (%T)", op.Params[0])
|
||||
}
|
||||
if codemap != nil {
|
||||
buf.WriteString(codemap.CharcodeBytesToUnicode([]byte(*param)))
|
||||
buf.WriteString(codemap.CharcodeBytesToUnicode(param.Bytes()))
|
||||
} else {
|
||||
buf.WriteString(string(*param))
|
||||
buf.WriteString(param.Str())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -14,7 +14,7 @@ import (
|
||||
. "github.com/unidoc/unidoc/pdf/core"
|
||||
)
|
||||
|
||||
//
|
||||
// PdfColorspace interface defines the common properties of a PDF colorspace.
|
||||
// The colorspace defines the data storage format for each color and color representation.
|
||||
//
|
||||
// Device based colorspace, specified by name
|
||||
@ -50,6 +50,7 @@ type PdfColorspace interface {
|
||||
DecodeArray() []float64
|
||||
}
|
||||
|
||||
// PdfColor interface represents a generic color in PDF.
|
||||
type PdfColor interface {
|
||||
}
|
||||
|
||||
@ -2196,7 +2197,7 @@ func newPdfColorspaceSpecialIndexedFromPdfObject(obj PdfObject) (*PdfColorspaceS
|
||||
obj = TraceToDirectObject(obj)
|
||||
var data []byte
|
||||
if str, ok := obj.(*PdfObjectString); ok {
|
||||
data = []byte(*str)
|
||||
data = str.Bytes()
|
||||
common.Log.Trace("Indexed string color data: % d", data)
|
||||
} else if stream, ok := obj.(*PdfObjectStream); ok {
|
||||
common.Log.Trace("Indexed stream: %s", obj.String())
|
||||
|
@ -21,7 +21,7 @@ import (
|
||||
. "github.com/unidoc/unidoc/pdf/core"
|
||||
)
|
||||
|
||||
// PDF page object (7.7.3.3 - Table 30).
|
||||
// PdfPage represents a page in a PDF document. (7.7.3.3 - Table 30).
|
||||
type PdfPage struct {
|
||||
Parent PdfObject
|
||||
LastModified *PdfDate
|
||||
@ -115,7 +115,7 @@ func (reader *PdfReader) newPdfPageFromDict(p *PdfObjectDictionary) (*PdfPage, e
|
||||
if !ok {
|
||||
return nil, errors.New("Page dictionary LastModified != string")
|
||||
}
|
||||
lastmod, err := NewPdfDate(string(*strObj))
|
||||
lastmod, err := NewPdfDate(strObj.Str())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -801,7 +801,7 @@ func (this *PdfPage) SetContentStreams(cStreams []string, encoder StreamEncoder)
|
||||
|
||||
func getContentStreamAsString(cstreamObj PdfObject) (string, error) {
|
||||
if cstream, ok := TraceToDirectObject(cstreamObj).(*PdfObjectString); ok {
|
||||
return string(*cstream), nil
|
||||
return cstream.Str(), nil
|
||||
}
|
||||
|
||||
if cstream, ok := TraceToDirectObject(cstreamObj).(*PdfObjectStream); ok {
|
||||
|
@ -196,7 +196,7 @@ func TestPdfDateBuild(t *testing.T) {
|
||||
t.Errorf("Date PDF object should be a string")
|
||||
return
|
||||
}
|
||||
if string(*strObj) != dateStr1 {
|
||||
if strObj.Str() != dateStr1 {
|
||||
t.Errorf("Built date string does not match original (%s)", strObj)
|
||||
return
|
||||
}
|
||||
|
@ -425,7 +425,7 @@ func (this *PdfReader) GetOutlinesFlattened() ([]*PdfOutlineTreeNode, []string,
|
||||
|
||||
if item, isItem := node.context.(*PdfOutlineItem); isItem {
|
||||
*outlineList = append(*outlineList, &item.PdfOutlineTreeNode)
|
||||
title := strings.Repeat(" ", depth*2) + string(*item.Title)
|
||||
title := strings.Repeat(" ", depth*2) + item.Title.Str()
|
||||
*titleList = append(*titleList, title)
|
||||
if item.Next != nil {
|
||||
flattenFunc(item.Next, outlineList, titleList, depth)
|
||||
|
@ -129,6 +129,5 @@ func (date *PdfDate) ToPdfObject() PdfObject {
|
||||
str := fmt.Sprintf("D:%.4d%.2d%.2d%.2d%.2d%.2d%c%.2d'%.2d'",
|
||||
date.year, date.month, date.day, date.hour, date.minute, date.second,
|
||||
date.utOffsetSign, date.utOffsetHours, date.utOffsetMins)
|
||||
pdfStr := PdfObjectString(str)
|
||||
return &pdfStr
|
||||
return MakeString(str)
|
||||
}
|
||||
|
@ -16,13 +16,13 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/unidoc/unidoc/common"
|
||||
"github.com/unidoc/unidoc/common/license"
|
||||
. "github.com/unidoc/unidoc/pdf/core"
|
||||
"github.com/unidoc/unidoc/pdf/model/fonts"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var pdfCreator = ""
|
||||
@ -342,8 +342,6 @@ func (this *PdfWriter) AddPage(page *PdfPage) error {
|
||||
|
||||
this.addObject(pageObj)
|
||||
|
||||
|
||||
|
||||
// Traverse the page and record all object references.
|
||||
err := this.addObjects(pDict)
|
||||
if err != nil {
|
||||
@ -500,14 +498,14 @@ func (this *PdfWriter) Encrypt(userPass, ownerPass []byte, options *EncryptOptio
|
||||
|
||||
// Prepare the ID object for the trailer.
|
||||
hashcode := md5.Sum([]byte(time.Now().Format(time.RFC850)))
|
||||
id0 := PdfObjectString(hashcode[:])
|
||||
id0 := string(hashcode[:])
|
||||
b := make([]byte, 100)
|
||||
rand.Read(b)
|
||||
hashcode = md5.Sum(b)
|
||||
id1 := PdfObjectString(hashcode[:])
|
||||
id1 := string(hashcode[:])
|
||||
common.Log.Trace("Random b: % x", b)
|
||||
|
||||
this.ids = &PdfObjectArray{&id0, &id1}
|
||||
this.ids = MakeArray(MakeHexString(id0), MakeHexString(id1))
|
||||
common.Log.Trace("Gen Id 0: % x", id0)
|
||||
|
||||
crypter.Id0 = string(id0)
|
||||
@ -536,8 +534,8 @@ func (this *PdfWriter) Encrypt(userPass, ownerPass []byte, options *EncryptOptio
|
||||
encDict.Set("V", MakeInteger(int64(crypter.V)))
|
||||
encDict.Set("R", MakeInteger(int64(crypter.R)))
|
||||
encDict.Set("Length", MakeInteger(int64(crypter.Length)))
|
||||
encDict.Set("O", &O)
|
||||
encDict.Set("U", &U)
|
||||
encDict.Set("O", MakeHexString(O))
|
||||
encDict.Set("U", MakeHexString(U))
|
||||
this.encryptDict = encDict
|
||||
|
||||
// Make an object to contain it.
|
||||
|
Loading…
x
Reference in New Issue
Block a user