mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-27 13:48:51 +08:00
More flexibility in parsing dates etc to support more PDFs with the new outlines handling
PdfDate more flexible (offset made optional) Count that should be int can now also be a float
This commit is contained in:
parent
1593b0ebd4
commit
eb13f3b8be
@ -70,17 +70,20 @@ func newPdfOutlineFromDict(dict *PdfObjectDictionary) (*PdfOutline, error) {
|
|||||||
typeVal, ok := obj.(*PdfObjectName)
|
typeVal, ok := obj.(*PdfObjectName)
|
||||||
if ok {
|
if ok {
|
||||||
if *typeVal != "Outlines" {
|
if *typeVal != "Outlines" {
|
||||||
return nil, fmt.Errorf("Type != Outlines (%s)", *typeVal)
|
common.Log.Error("Type != Outlines (%s)", *typeVal)
|
||||||
|
// Should be "Outlines" if there, but some files have other types
|
||||||
|
// Log as an error but do not quit.
|
||||||
|
// Might be a good idea to log this kind of deviation from the standard separately.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if obj, hasCount := (*dict)["Count"]; hasCount {
|
if obj, hasCount := (*dict)["Count"]; hasCount {
|
||||||
countVal, ok := obj.(*PdfObjectInteger)
|
// This should always be an integer, but in a few cases has been a float.
|
||||||
if !ok {
|
count, err := getNumberAsInt64(obj)
|
||||||
return nil, fmt.Errorf("Count not an integer (%T)", obj)
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
count := int64(*countVal)
|
|
||||||
outline.Count = &count
|
outline.Count = &count
|
||||||
}
|
}
|
||||||
|
|
||||||
|
38
pdf/page.go
38
pdf/page.go
@ -17,6 +17,8 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
|
||||||
|
"github.com/unidoc/unidoc/common"
|
||||||
)
|
)
|
||||||
|
|
||||||
type PdfRectangle struct {
|
type PdfRectangle struct {
|
||||||
@ -38,6 +40,21 @@ func getNumberAsFloat(obj PdfObject) (float64, error) {
|
|||||||
return 0, errors.New("Not a number")
|
return 0, errors.New("Not a number")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Cases where expecting an integer, but some implementations actually
|
||||||
|
// store the number in a floating point format.
|
||||||
|
func getNumberAsInt64(obj PdfObject) (int64, error) {
|
||||||
|
if iObj, ok := obj.(*PdfObjectInteger); ok {
|
||||||
|
return int64(*iObj), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if fObj, ok := obj.(*PdfObjectFloat); ok {
|
||||||
|
common.Log.Debug("Number expected as integer was stored as float (type casting used)")
|
||||||
|
return int64(*fObj), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0, errors.New("Not a number")
|
||||||
|
}
|
||||||
|
|
||||||
func getNumberAsFloatOrNull(obj PdfObject) (*float64, error) {
|
func getNumberAsFloatOrNull(obj PdfObject) (*float64, error) {
|
||||||
if fObj, ok := obj.(*PdfObjectFloat); ok {
|
if fObj, ok := obj.(*PdfObjectFloat); ok {
|
||||||
num := float64(*fObj)
|
num := float64(*fObj)
|
||||||
@ -112,7 +129,7 @@ type PdfDate struct {
|
|||||||
utOffsetMins int64 // mm (00-59)
|
utOffsetMins int64 // mm (00-59)
|
||||||
}
|
}
|
||||||
|
|
||||||
var reDate = regexp.MustCompile(`\s*D\s*:\s*(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})([+-Z])(\d{2})'(\d{2})?`)
|
var reDate = regexp.MustCompile(`\s*D\s*:\s*(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})([+-Z])?(\d{2})?'?(\d{2})?`)
|
||||||
|
|
||||||
// Make a new PdfDate object from a PDF date string (see 7.9.4 Dates).
|
// Make a new PdfDate object from a PDF date string (see 7.9.4 Dates).
|
||||||
// format: "D: YYYYMMDDHHmmSSOHH'mm"
|
// format: "D: YYYYMMDDHHmmSSOHH'mm"
|
||||||
@ -134,9 +151,22 @@ func NewPdfDate(dateStr string) (PdfDate, error) {
|
|||||||
d.hour, _ = strconv.ParseInt(matches[0][4], 10, 32)
|
d.hour, _ = strconv.ParseInt(matches[0][4], 10, 32)
|
||||||
d.minute, _ = strconv.ParseInt(matches[0][5], 10, 32)
|
d.minute, _ = strconv.ParseInt(matches[0][5], 10, 32)
|
||||||
d.second, _ = strconv.ParseInt(matches[0][6], 10, 32)
|
d.second, _ = strconv.ParseInt(matches[0][6], 10, 32)
|
||||||
d.utOffsetSign = matches[0][7][0]
|
// Some poor implementations do not include the offset.
|
||||||
d.utOffsetHours, _ = strconv.ParseInt(matches[0][8], 10, 32)
|
if len(matches[0][7]) > 0 {
|
||||||
d.utOffsetMins, _ = strconv.ParseInt(matches[0][9], 10, 32)
|
d.utOffsetSign = matches[0][7][0]
|
||||||
|
} else {
|
||||||
|
d.utOffsetSign = '+'
|
||||||
|
}
|
||||||
|
if len(matches[0][8]) > 0 {
|
||||||
|
d.utOffsetHours, _ = strconv.ParseInt(matches[0][8], 10, 32)
|
||||||
|
} else {
|
||||||
|
d.utOffsetHours = 0
|
||||||
|
}
|
||||||
|
if len(matches[0][9]) > 0 {
|
||||||
|
d.utOffsetMins, _ = strconv.ParseInt(matches[0][9], 10, 32)
|
||||||
|
} else {
|
||||||
|
d.utOffsetMins = 0
|
||||||
|
}
|
||||||
|
|
||||||
return d, nil
|
return d, nil
|
||||||
}
|
}
|
||||||
|
@ -125,6 +125,48 @@ func TestDateParse(t *testing.T) {
|
|||||||
t.Errorf("Invalid offset minutes")
|
t.Errorf("Invalid offset minutes")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Case 5: Missing some more parameters.
|
||||||
|
// Seems that many implementations consider some stuff optional...
|
||||||
|
// Not following the standard, but we need to handle it.
|
||||||
|
// D:20050823042205
|
||||||
|
str = "D:20050823042205"
|
||||||
|
date, err = NewPdfDate(str)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Fail: %s", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if date.year != 2005 {
|
||||||
|
t.Errorf("Year != 2005")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if date.month != 8 {
|
||||||
|
t.Errorf("month != 8")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if date.day != 23 {
|
||||||
|
t.Errorf("Day != 23")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if date.hour != 04 {
|
||||||
|
t.Errorf("Hour != 11 (%d)", date.hour)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if date.minute != 22 {
|
||||||
|
t.Errorf("Minute != 29 (%d)", date.minute)
|
||||||
|
}
|
||||||
|
if date.second != 05 {
|
||||||
|
t.Errorf("Second != 37 (%d)", date.second)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if date.utOffsetHours != 0 {
|
||||||
|
t.Errorf("Invalid offset hours")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if date.utOffsetMins != 0 {
|
||||||
|
t.Errorf("Invalid offset minutes")
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test parsing and building the date.
|
// Test parsing and building the date.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user