mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-26 13:48:55 +08:00
More flexibility in parsing dates etc to support more PDFs with the new outlines handling
PdfDate more flexible (offset made optional) Count that should be int can now also be a float
This commit is contained in:
parent
1593b0ebd4
commit
eb13f3b8be
@ -70,17 +70,20 @@ func newPdfOutlineFromDict(dict *PdfObjectDictionary) (*PdfOutline, error) {
|
||||
typeVal, ok := obj.(*PdfObjectName)
|
||||
if ok {
|
||||
if *typeVal != "Outlines" {
|
||||
return nil, fmt.Errorf("Type != Outlines (%s)", *typeVal)
|
||||
common.Log.Error("Type != Outlines (%s)", *typeVal)
|
||||
// Should be "Outlines" if there, but some files have other types
|
||||
// Log as an error but do not quit.
|
||||
// Might be a good idea to log this kind of deviation from the standard separately.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if obj, hasCount := (*dict)["Count"]; hasCount {
|
||||
countVal, ok := obj.(*PdfObjectInteger)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("Count not an integer (%T)", obj)
|
||||
// This should always be an integer, but in a few cases has been a float.
|
||||
count, err := getNumberAsInt64(obj)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
count := int64(*countVal)
|
||||
outline.Count = &count
|
||||
}
|
||||
|
||||
|
38
pdf/page.go
38
pdf/page.go
@ -17,6 +17,8 @@ import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
|
||||
"github.com/unidoc/unidoc/common"
|
||||
)
|
||||
|
||||
type PdfRectangle struct {
|
||||
@ -38,6 +40,21 @@ func getNumberAsFloat(obj PdfObject) (float64, error) {
|
||||
return 0, errors.New("Not a number")
|
||||
}
|
||||
|
||||
// Cases where expecting an integer, but some implementations actually
|
||||
// store the number in a floating point format.
|
||||
func getNumberAsInt64(obj PdfObject) (int64, error) {
|
||||
if iObj, ok := obj.(*PdfObjectInteger); ok {
|
||||
return int64(*iObj), nil
|
||||
}
|
||||
|
||||
if fObj, ok := obj.(*PdfObjectFloat); ok {
|
||||
common.Log.Debug("Number expected as integer was stored as float (type casting used)")
|
||||
return int64(*fObj), nil
|
||||
}
|
||||
|
||||
return 0, errors.New("Not a number")
|
||||
}
|
||||
|
||||
func getNumberAsFloatOrNull(obj PdfObject) (*float64, error) {
|
||||
if fObj, ok := obj.(*PdfObjectFloat); ok {
|
||||
num := float64(*fObj)
|
||||
@ -112,7 +129,7 @@ type PdfDate struct {
|
||||
utOffsetMins int64 // mm (00-59)
|
||||
}
|
||||
|
||||
var reDate = regexp.MustCompile(`\s*D\s*:\s*(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})([+-Z])(\d{2})'(\d{2})?`)
|
||||
var reDate = regexp.MustCompile(`\s*D\s*:\s*(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})([+-Z])?(\d{2})?'?(\d{2})?`)
|
||||
|
||||
// Make a new PdfDate object from a PDF date string (see 7.9.4 Dates).
|
||||
// format: "D: YYYYMMDDHHmmSSOHH'mm"
|
||||
@ -134,9 +151,22 @@ func NewPdfDate(dateStr string) (PdfDate, error) {
|
||||
d.hour, _ = strconv.ParseInt(matches[0][4], 10, 32)
|
||||
d.minute, _ = strconv.ParseInt(matches[0][5], 10, 32)
|
||||
d.second, _ = strconv.ParseInt(matches[0][6], 10, 32)
|
||||
d.utOffsetSign = matches[0][7][0]
|
||||
d.utOffsetHours, _ = strconv.ParseInt(matches[0][8], 10, 32)
|
||||
d.utOffsetMins, _ = strconv.ParseInt(matches[0][9], 10, 32)
|
||||
// Some poor implementations do not include the offset.
|
||||
if len(matches[0][7]) > 0 {
|
||||
d.utOffsetSign = matches[0][7][0]
|
||||
} else {
|
||||
d.utOffsetSign = '+'
|
||||
}
|
||||
if len(matches[0][8]) > 0 {
|
||||
d.utOffsetHours, _ = strconv.ParseInt(matches[0][8], 10, 32)
|
||||
} else {
|
||||
d.utOffsetHours = 0
|
||||
}
|
||||
if len(matches[0][9]) > 0 {
|
||||
d.utOffsetMins, _ = strconv.ParseInt(matches[0][9], 10, 32)
|
||||
} else {
|
||||
d.utOffsetMins = 0
|
||||
}
|
||||
|
||||
return d, nil
|
||||
}
|
||||
|
@ -125,6 +125,48 @@ func TestDateParse(t *testing.T) {
|
||||
t.Errorf("Invalid offset minutes")
|
||||
return
|
||||
}
|
||||
|
||||
// Case 5: Missing some more parameters.
|
||||
// Seems that many implementations consider some stuff optional...
|
||||
// Not following the standard, but we need to handle it.
|
||||
// D:20050823042205
|
||||
str = "D:20050823042205"
|
||||
date, err = NewPdfDate(str)
|
||||
if err != nil {
|
||||
t.Errorf("Fail: %s", err)
|
||||
return
|
||||
}
|
||||
if date.year != 2005 {
|
||||
t.Errorf("Year != 2005")
|
||||
return
|
||||
}
|
||||
if date.month != 8 {
|
||||
t.Errorf("month != 8")
|
||||
return
|
||||
}
|
||||
if date.day != 23 {
|
||||
t.Errorf("Day != 23")
|
||||
return
|
||||
}
|
||||
if date.hour != 04 {
|
||||
t.Errorf("Hour != 11 (%d)", date.hour)
|
||||
return
|
||||
}
|
||||
if date.minute != 22 {
|
||||
t.Errorf("Minute != 29 (%d)", date.minute)
|
||||
}
|
||||
if date.second != 05 {
|
||||
t.Errorf("Second != 37 (%d)", date.second)
|
||||
return
|
||||
}
|
||||
if date.utOffsetHours != 0 {
|
||||
t.Errorf("Invalid offset hours")
|
||||
return
|
||||
}
|
||||
if date.utOffsetMins != 0 {
|
||||
t.Errorf("Invalid offset minutes")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Test parsing and building the date.
|
||||
|
Loading…
x
Reference in New Issue
Block a user