Merge branch 'master' into compositefonts

This commit is contained in:
Gunnsteinn Hall 2018-06-07 14:55:37 +00:00
commit ce9c6ba114
49 changed files with 8558 additions and 283 deletions

21
LICENSE.md Normal file
View File

@ -0,0 +1,21 @@
## Licensing Information
This library (UniDoc) has a dual license, a commercial one suitable for closed source projects and an
AGPL license that can be used in open source software.
Depending on your needs, you must choose one of them and follow its policies. A detail of the policies
and agreements for each license type are available in the [LICENSE.COMMERCIAL](LICENSE.COMMERCIAL)
and [LICENSE.AGPL](LICENSE.AGPL) files.
In brief, purchasing a license is mandatory as soon as you develop activities
distributing the UniDoc software inside your product or deploying it on a network
without disclosing the source code of your own applications under the AGPL license.
These activities include:
* offering services as an application service provider or over-network application programming interface (API)
* creating/manipulating documents for users in a web/server/cloud application
* shipping UniDoc with a closed source product
Please see [pricing](http://unidoc.io/pricing) to purchase a commercial license or contact sales at sales@unidoc.io
for more info.

View File

@ -1,27 +1,35 @@
# UniDoc
[UniDoc](http://unidoc.io) is a fast and powerful open source PDF library for Go (golang). The library is written and supported by the owners of the [FoxyUtils.com](https://foxyutils.com) website, where the library is used to power many of the PDF services offered.
[UniDoc](http://unidoc.io) is a powerful PDF library for Go (golang). The library is written and supported by the owners of the [FoxyUtils.com](https://foxyutils.com) website, where the library is used to power many of the PDF services offered.
[![wercker status](https://app.wercker.com/status/22b50db125a6d376080f3f0c80d085fa/s/master "wercker status")](https://app.wercker.com/project/bykey/22b50db125a6d376080f3f0c80d085fa)
[![GoDoc](https://godoc.org/github.com/unidoc/unidoc?status.svg)](https://godoc.org/github.com/unidoc/unidoc)
# Version 2
Version 2.0.0 has been released. Version 2 represents a major improvement over version 1 with capabilities for modifying
and generating PDF contents. The library has been split up into three major packages and a
few smaller ones. The **core** package contains core PDF file parsing functionality and
primitive objects, whereas the **model** subpackage provides a higher level interface to the PDF.
The **creator** package provides a convenient interface for creating image and text based PDF files
and reports.
See the release announcement: [https://unidoc.io/news/unidoc-v2-released](https://unidoc.io/news/unidoc-v2-released)
## Installation
~~~
go get github.com/unidoc/unidoc/...
~~~
## Getting Rid of the Watermark - Get a License
Out of the box - unidoc is unlicensed and outputs a watermark on all pages, perfect for prototyping.
To use unidoc in your projects, you need to get a license. We have 3 license types:
* Community: For open source AGPLv3 projects
* Business Individual
* Business Unlimited
Get your license on [https://unidoc.io](https://unidoc.io).
To load your license, simply do:
```
unidocLicenseKey := "... your license here ..."
err := license.SetLicenseKey(unidocLicenseKey)
if err != nil {
fmt.Printf("Error loading license: %v\n", err)
os.Exit(1)
}
```
## Examples
Multiple examples are provided in our example repository.
@ -62,8 +70,6 @@ Contributors need to approve the [Contributor License Agreement](https://docs.go
Please email us at support@unidoc.io for any queries.
Technical support is included with a purchase of a license, as listed on our [pricing](http://unidoc.io/pricing) page.
If you have any specific tasks that need to be done, we offer consulting in certain cases.
Please contact us with a brief summary of what you need and we will get back to you with a quote, if appropriate.

View File

@ -158,7 +158,6 @@ func licenseKeyDecode(content string) (LicenseKey, error) {
}
ret.CreatedAt = time.Unix(ret.CreatedAtInt, 0)
ret.ExpiresAt = time.Unix(ret.ExpiresAtInt, 0)
return ret, nil
}

View File

@ -7,24 +7,18 @@ package license
import (
"fmt"
"strings"
"time"
"github.com/unidoc/unidoc/common"
)
const (
LicenseTypeCommercial = "commercial"
LicenseTypeOpensource = "opensource"
LicenseTierUnlicensed = "unlicensed"
LicenseTierCommunity = "community"
LicenseTierIndividual = "individual"
LicenseTierBusiness = "business"
)
const opensourceLicenseId = "01aa523c-b4c6-4d57-bbdd-5a88d2bd5300"
const opensourceLicenseUuid = "01aa523c-b4c6-4d57-bbdd-5a88d2bd5301"
func getSupportedFeatures() []string {
return []string{"unidoc", "unidoc-cli"}
}
// Make sure all time is at least after this for sanity check.
var testTime = time.Date(2010, 1, 1, 0, 0, 0, 0, time.UTC)
@ -32,12 +26,9 @@ type LicenseKey struct {
LicenseId string `json:"license_id"`
CustomerId string `json:"customer_id"`
CustomerName string `json:"customer_name"`
Type string `json:"type"`
Features []string `json:"features"`
Tier string `json:"tier"`
CreatedAt time.Time `json:"-"`
CreatedAtInt int64 `json:"created_at"`
ExpiresAt time.Time `json:"-"`
ExpiresAtInt int64 `json:"expires_at"`
CreatedBy string `json:"created_by"`
CreatorName string `json:"creator_name"`
CreatorEmail string `json:"creator_email"`
@ -56,37 +47,10 @@ func (this *LicenseKey) Validate() error {
return fmt.Errorf("Invalid license: Customer Name")
}
if this.Features == nil || len(this.Features) < 1 {
return fmt.Errorf("Invalid license: No features")
}
for _, feature := range this.Features {
found := false
for _, sf := range getSupportedFeatures() {
if sf == feature {
found = true
break
}
}
if !found {
return fmt.Errorf("Invalid license: Unsupported feature %s", feature)
}
}
if testTime.After(this.CreatedAt) {
return fmt.Errorf("Invalid license: Created At is invalid")
}
if this.CreatedAt.After(this.ExpiresAt) {
return fmt.Errorf("Invalid license: Created At cannot be Greater than Expires At")
}
if common.ReleasedAt.After(this.ExpiresAt) {
return fmt.Errorf("Expired license, expired at: %s", common.UtcTimeFormat(this.ExpiresAt))
}
if len(this.CreatorName) < 1 {
return fmt.Errorf("Invalid license: Creator name")
}
@ -99,39 +63,40 @@ func (this *LicenseKey) Validate() error {
}
func (this *LicenseKey) TypeToString() string {
ret := "AGPLv3 Open Source License"
if this.Type == LicenseTypeCommercial {
ret = "Commercial License"
if this.Tier == LicenseTierUnlicensed {
return "Unlicensed"
}
return ret
if this.Tier == LicenseTierCommunity {
return "AGPLv3 Open Source Community License"
}
if this.Tier == LicenseTierIndividual || this.Tier == "indie" {
return "Commercial License - Individual"
}
return "Commercial License - Business"
}
func (this *LicenseKey) ToString() string {
str := fmt.Sprintf("License Id: %s\n", this.LicenseId)
str += fmt.Sprintf("Customer Id: %s\n", this.CustomerId)
str += fmt.Sprintf("Customer Name: %s\n", this.CustomerName)
str += fmt.Sprintf("Type: %s\n", this.Type)
str += fmt.Sprintf("Features: %s\n", strings.Join(this.Features, ", "))
str += fmt.Sprintf("Tier: %s\n", this.Tier)
str += fmt.Sprintf("Created At: %s\n", common.UtcTimeFormat(this.CreatedAt))
str += fmt.Sprintf("Expires At: %s\n", common.UtcTimeFormat(this.ExpiresAt))
str += fmt.Sprintf("Creator: %s <%s>\n", this.CreatorName, this.CreatorEmail)
return str
}
func MakeOpensourceLicenseKey() *LicenseKey {
func (lk *LicenseKey) IsLicensed() bool {
return lk.Tier != LicenseTierUnlicensed
}
func MakeUnlicensedKey() *LicenseKey {
lk := LicenseKey{}
lk.LicenseId = opensourceLicenseId
lk.CustomerId = opensourceLicenseUuid
lk.CustomerName = "Open Source Evangelist"
lk.Type = LicenseTypeOpensource
lk.Features = getSupportedFeatures()
lk.CustomerName = "Unlicensed"
lk.Tier = LicenseTierUnlicensed
lk.CreatedAt = time.Now().UTC()
lk.CreatedAtInt = lk.CreatedAt.Unix()
lk.ExpiresAt = lk.CreatedAt.AddDate(10, 0, 0)
lk.ExpiresAtInt = lk.ExpiresAt.Unix()
lk.CreatorName = "UniDoc Support"
lk.CreatorEmail = "support@unidoc.io"
return &lk
}

View File

@ -7,7 +7,7 @@
package license
// Defaults to the open source license.
var licenseKey *LicenseKey = MakeOpensourceLicenseKey()
var licenseKey *LicenseKey = MakeUnlicensedKey()
// Sets and validates the license key.
func SetLicenseKey(content string) error {
@ -27,5 +27,11 @@ func SetLicenseKey(content string) error {
}
func GetLicenseKey() *LicenseKey {
return licenseKey
if licenseKey == nil {
return nil
}
// Copy.
lk2 := *licenseKey
return &lk2
}

View File

@ -10,13 +10,13 @@ import (
"time"
)
const releaseYear = 2017
const releaseMonth = 8
const releaseDay = 11
const releaseHour = 12
const releaseMin = 05
const releaseYear = 2018
const releaseMonth = 5
const releaseDay = 20
const releaseHour = 23
const releaseMin = 30
// Holds version information, when bumping this make sure to bump the released at stamp also.
const Version = "2.0.1"
const Version = "2.1.0"
var ReleasedAt = time.Date(releaseYear, releaseMonth, releaseDay, releaseHour, releaseMin, 0, 0, time.UTC)

4
doc.go
View File

@ -39,4 +39,8 @@
// and graphical reports. It is designed with simplicity in mind, with the goal of
// making it easy to create reports without needing any knowledge about the PDF
// format or specifications.
//
// - pdf/extractor: Package extractor is used for quickly extracting PDF content
// through a simple interface. Currently offers functionality for extracting textual
// content.
package unidoc

View File

@ -103,14 +103,17 @@ func (this *ContentStreamOperations) Bytes() []byte {
return buf.Bytes()
}
// Parses and extracts all text data in content streams and returns as a string.
// ExtractText parses and extracts all text data in content streams and returns as a string.
// Does not take into account Encoding table, the output is simply the character codes.
//
// Deprecated: More advanced text extraction is offered in package extractor with character encoding support.
func (this *ContentStreamParser) ExtractText() (string, error) {
operations, err := this.Parse()
if err != nil {
return "", err
}
inText := false
xPos, yPos := float64(-1), float64(-1)
txt := ""
for _, op := range *operations {
if op.Operand == "BT" {
@ -122,6 +125,41 @@ func (this *ContentStreamParser) ExtractText() (string, error) {
// Move to next line...
txt += "\n"
}
if op.Operand == "Tm" {
if len(op.Params) != 6 {
continue
}
xfloat, ok := op.Params[4].(*PdfObjectFloat)
if !ok {
xint, ok := op.Params[4].(*PdfObjectInteger)
if !ok {
continue
}
xfloat = MakeFloat(float64(*xint))
}
yfloat, ok := op.Params[5].(*PdfObjectFloat)
if !ok {
yint, ok := op.Params[5].(*PdfObjectInteger)
if !ok {
continue
}
yfloat = MakeFloat(float64(*yint))
}
if yPos == -1 {
yPos = float64(*yfloat)
} else if yPos > float64(*yfloat) {
txt += "\n"
xPos = float64(*xfloat)
yPos = float64(*yfloat)
continue
}
if xPos == -1 {
xPos = float64(*xfloat)
} else if xPos < float64(*xfloat) {
txt += "\t"
xPos = float64(*xfloat)
}
}
if inText && op.Operand == "TJ" {
if len(op.Params) < 1 {
continue
@ -131,8 +169,17 @@ func (this *ContentStreamParser) ExtractText() (string, error) {
return "", fmt.Errorf("Invalid parameter type, no array (%T)", op.Params[0])
}
for _, obj := range *paramList {
if strObj, ok := obj.(*PdfObjectString); ok {
txt += string(*strObj)
switch v := obj.(type) {
case *PdfObjectString:
txt += string(*v)
case *PdfObjectFloat:
if *v < -100 {
txt += " "
}
case *PdfObjectInteger:
if *v < -100 {
txt += " "
}
}
}
} else if inText && op.Operand == "Tj" {

View File

@ -0,0 +1,25 @@
package contentstream
import (
"testing"
)
func TestOperandTJSpacing(t *testing.T) {
content := `BT
[(are)-328(h)5(ypothesized)-328(to)-327(in\003uence)-328(the)-328(stability)-328(of)-328(the)-328(upstream)-327(glaciers,)-328(and)-328(thus)-328(of)-328(the)-328(entire)-327(ice)-328(sheet)]TJ
ET`
referenceText := "are hypothesized to in\003uence the stability of the upstream glaciers, and thus of the entire ice sheet"
cStreamParser := NewContentStreamParser(content)
text, err := cStreamParser.ExtractText()
if err != nil {
t.Error()
}
if text != referenceText {
t.Fail()
}
}

View File

@ -331,7 +331,7 @@ func (this *ContentStreamParser) ParseInlineImage() (*ContentStreamInlineImage,
im.BitsPerComponent = valueObj
} else if *param == "CS" || *param == "ColorSpace" {
im.ColorSpace = valueObj
} else if *param == "D" {
} else if *param == "D" || *param == "Decode" {
im.Decode = valueObj
} else if *param == "DP" || *param == "DecodeParms" {
im.DecodeParms = valueObj

View File

@ -74,14 +74,14 @@ func (parser *PdfParser) GetTrailer() *PdfObjectDictionary {
func (parser *PdfParser) skipSpaces() (int, error) {
cnt := 0
for {
bb, err := parser.reader.Peek(1)
b, err := parser.reader.ReadByte()
if err != nil {
return 0, err
}
if IsWhiteSpace(bb[0]) {
parser.reader.ReadByte()
if IsWhiteSpace(b) {
cnt++
} else {
parser.reader.UnreadByte()
break
}
}
@ -121,11 +121,11 @@ func (parser *PdfParser) skipComments() error {
// Read a comment starting with '%'.
func (parser *PdfParser) readComment() (string, error) {
commentText := ""
var r bytes.Buffer
_, err := parser.skipSpaces()
if err != nil {
return commentText, err
return r.String(), err
}
isFirst := true
@ -133,45 +133,45 @@ func (parser *PdfParser) readComment() (string, error) {
bb, err := parser.reader.Peek(1)
if err != nil {
common.Log.Debug("Error %s", err.Error())
return commentText, err
return r.String(), err
}
if isFirst && bb[0] != '%' {
return commentText, errors.New("Comment should start with %")
return r.String(), errors.New("Comment should start with %")
} else {
isFirst = false
}
if (bb[0] != '\r') && (bb[0] != '\n') {
b, _ := parser.reader.ReadByte()
commentText += string(b)
r.WriteByte(b)
} else {
break
}
}
return commentText, nil
return r.String(), nil
}
// Read a single line of text from current position.
func (parser *PdfParser) readTextLine() (string, error) {
lineStr := ""
var r bytes.Buffer
for {
bb, err := parser.reader.Peek(1)
if err != nil {
common.Log.Debug("Error %s", err.Error())
return lineStr, err
return r.String(), err
}
if (bb[0] != '\r') && (bb[0] != '\n') {
b, _ := parser.reader.ReadByte()
lineStr += string(b)
r.WriteByte(b)
} else {
break
}
}
return lineStr, nil
return r.String(), nil
}
// Parse a name starting with '/'.
func (parser *PdfParser) parseName() (PdfObjectName, error) {
name := ""
var r bytes.Buffer
nameStarted := false
for {
bb, err := parser.reader.Peek(1)
@ -179,7 +179,7 @@ func (parser *PdfParser) parseName() (PdfObjectName, error) {
break // Can happen when loading from object stream.
}
if err != nil {
return PdfObjectName(name), err
return PdfObjectName(r.String()), err
}
if !nameStarted {
@ -192,7 +192,7 @@ func (parser *PdfParser) parseName() (PdfObjectName, error) {
parser.skipSpaces()
} else {
common.Log.Debug("ERROR Name starting with %s (% x)", bb, bb)
return PdfObjectName(name), fmt.Errorf("Invalid name: (%c)", bb[0])
return PdfObjectName(r.String()), fmt.Errorf("Invalid name: (%c)", bb[0])
}
} else {
if IsWhiteSpace(bb[0]) {
@ -202,22 +202,22 @@ func (parser *PdfParser) parseName() (PdfObjectName, error) {
} else if bb[0] == '#' {
hexcode, err := parser.reader.Peek(3)
if err != nil {
return PdfObjectName(name), err
return PdfObjectName(r.String()), err
}
parser.reader.Discard(3)
code, err := hex.DecodeString(string(hexcode[1:3]))
if err != nil {
return PdfObjectName(name), err
return PdfObjectName(r.String()), err
}
name += string(code)
r.Write(code)
} else {
b, _ := parser.reader.ReadByte()
name += string(b)
r.WriteByte(b)
}
}
}
return PdfObjectName(name), nil
return PdfObjectName(r.String()), nil
}
// Numeric objects.
@ -243,9 +243,9 @@ func (parser *PdfParser) parseName() (PdfObjectName, error) {
func (parser *PdfParser) parseNumber() (PdfObject, error) {
isFloat := false
allowSigns := true
numStr := ""
var r bytes.Buffer
for {
common.Log.Trace("Parsing number \"%s\"", numStr)
common.Log.Trace("Parsing number \"%s\"", r.String())
bb, err := parser.reader.Peek(1)
if err == io.EOF {
// GH: EOF handling. Handle EOF like end of line. Can happen with
@ -260,19 +260,19 @@ func (parser *PdfParser) parseNumber() (PdfObject, error) {
if allowSigns && (bb[0] == '-' || bb[0] == '+') {
// Only appear in the beginning, otherwise serves as a delimiter.
b, _ := parser.reader.ReadByte()
numStr += string(b)
r.WriteByte(b)
allowSigns = false // Only allowed in beginning, and after e (exponential).
} else if IsDecimalDigit(bb[0]) {
b, _ := parser.reader.ReadByte()
numStr += string(b)
r.WriteByte(b)
} else if bb[0] == '.' {
b, _ := parser.reader.ReadByte()
numStr += string(b)
r.WriteByte(b)
isFloat = true
} else if bb[0] == 'e' {
// Exponential number format.
b, _ := parser.reader.ReadByte()
numStr += string(b)
r.WriteByte(b)
isFloat = true
allowSigns = true
} else {
@ -281,11 +281,16 @@ func (parser *PdfParser) parseNumber() (PdfObject, error) {
}
if isFloat {
fVal, err := strconv.ParseFloat(numStr, 64)
fVal, err := strconv.ParseFloat(r.String(), 64)
if err != nil {
common.Log.Debug("Error parsing number %v err=%v. Using 0.0. Output may be incorrect", r.String(), err)
fVal = 0.0
err = nil
}
o := PdfObjectFloat(fVal)
return &o, err
} else {
intVal, err := strconv.ParseInt(numStr, 10, 64)
intVal, err := strconv.ParseInt(r.String(), 10, 64)
o := PdfObjectInteger(intVal)
return &o, err
}
@ -295,26 +300,26 @@ func (parser *PdfParser) parseNumber() (PdfObject, error) {
func (parser *PdfParser) parseString() (PdfObjectString, error) {
parser.reader.ReadByte()
bytes := []byte{}
var r bytes.Buffer
count := 1
for {
bb, err := parser.reader.Peek(1)
if err != nil {
return PdfObjectString(bytes), err
return PdfObjectString(r.String()), err
}
if bb[0] == '\\' { // Escape sequence.
parser.reader.ReadByte() // Skip the escape \ byte.
b, err := parser.reader.ReadByte()
if err != nil {
return PdfObjectString(bytes), err
return PdfObjectString(r.String()), err
}
// Octal '\ddd' number (base 8).
if IsOctalDigit(b) {
bb, err := parser.reader.Peek(2)
if err != nil {
return PdfObjectString(bytes), err
return PdfObjectString(r.String()), err
}
numeric := []byte{}
@ -331,29 +336,29 @@ func (parser *PdfParser) parseString() (PdfObjectString, error) {
common.Log.Trace("Numeric string \"%s\"", numeric)
code, err := strconv.ParseUint(string(numeric), 8, 32)
if err != nil {
return PdfObjectString(bytes), err
return PdfObjectString(r.String()), err
}
bytes = append(bytes, byte(code))
r.WriteByte(byte(code))
continue
}
switch b {
case 'n':
bytes = append(bytes, '\n')
r.WriteRune('\n')
case 'r':
bytes = append(bytes, '\r')
r.WriteRune('\r')
case 't':
bytes = append(bytes, '\t')
r.WriteRune('\t')
case 'b':
bytes = append(bytes, '\b')
r.WriteRune('\b')
case 'f':
bytes = append(bytes, '\f')
r.WriteRune('\f')
case '(':
bytes = append(bytes, '(')
r.WriteRune('(')
case ')':
bytes = append(bytes, ')')
r.WriteRune(')')
case '\\':
bytes = append(bytes, '\\')
r.WriteRune('\\')
}
continue
@ -368,10 +373,10 @@ func (parser *PdfParser) parseString() (PdfObjectString, error) {
}
b, _ := parser.reader.ReadByte()
bytes = append(bytes, b)
r.WriteByte(b)
}
return PdfObjectString(bytes), nil
return PdfObjectString(r.String()), nil
}
// Starts with '<' ends with '>'.
@ -379,12 +384,8 @@ func (parser *PdfParser) parseString() (PdfObjectString, error) {
func (parser *PdfParser) parseHexString() (PdfObjectString, error) {
parser.reader.ReadByte()
hextable := []byte("0123456789abcdefABCDEF")
tmp := []byte{}
var r bytes.Buffer
for {
parser.skipSpaces()
bb, err := parser.reader.Peek(1)
if err != nil {
return PdfObjectString(""), err
@ -396,16 +397,16 @@ func (parser *PdfParser) parseHexString() (PdfObjectString, error) {
}
b, _ := parser.reader.ReadByte()
if bytes.IndexByte(hextable, b) >= 0 {
tmp = append(tmp, b)
if !IsWhiteSpace(b) {
r.WriteByte(b)
}
}
if len(tmp)%2 == 1 {
tmp = append(tmp, '0')
if r.Len()%2 == 1 {
r.WriteRune('0')
}
buf, _ := hex.DecodeString(string(tmp))
buf, _ := hex.DecodeString(r.String())
return PdfObjectString(buf), nil
}

View File

@ -8,6 +8,7 @@ package core
import (
"bufio"
"bytes"
"encoding/hex"
//"fmt"
"io"
//"os"
@ -27,27 +28,54 @@ func makeReaderForText(txt string) (*bytes.Reader, *bufio.Reader, int64) {
return bufReader, bufferedReader, int64(len(txt))
}
func makeParserForText(txt string) *PdfParser {
rs, reader, fileSize := makeReaderForText(txt)
return &PdfParser{rs: rs, reader: reader, fileSize: fileSize}
}
func BenchmarkSkipSpaces(b *testing.B) {
parser := makeParserForText(" \t\t \tABC")
for n := 0; n < b.N; n++ {
parser.skipSpaces()
parser.SetFileOffset(0)
}
}
var namePairs = map[string]string{
"/Name1": "Name1",
"/ASomewhatLongerName": "ASomewhatLongerName",
"/A;Name_With-Various***Characters?": "A;Name_With-Various***Characters?",
"/1.2": "1.2",
"/$$": "$$",
"/@pattern": "@pattern",
"/.notdef": ".notdef",
"/Lime#20Green": "Lime Green",
"/paired#28#29parentheses": "paired()parentheses",
"/The_Key_of_F#23_Minor": "The_Key_of_F#_Minor",
"/A#42": "AB",
"/": "",
"/ ": "",
"/#3CBC88#3E#3CC5ED#3E#3CD544#3E#3CC694#3E": "<BC88><C5ED><D544><C694>",
}
func BenchmarkNameParsing(b *testing.B) {
for n := 0; n < b.N; n++ {
for str, name := range namePairs {
parser := makeParserForText(str)
o, err := parser.parseName()
if err != nil && err != io.EOF {
b.Errorf("Unable to parse name string, error: %s", err)
}
if string(o) != name {
b.Errorf("Mismatch %s != %s", o, name)
}
}
}
}
func TestNameParsing(t *testing.T) {
namePairs := map[string]string{}
namePairs["/Name1"] = "Name1"
namePairs["/ASomewhatLongerName"] = "ASomewhatLongerName"
namePairs["/A;Name_With-Various***Characters?"] = "A;Name_With-Various***Characters?"
namePairs["/1.2"] = "1.2"
namePairs["/$$"] = "$$"
namePairs["/@pattern"] = "@pattern"
namePairs["/.notdef"] = ".notdef"
namePairs["/Lime#20Green"] = "Lime Green"
namePairs["/paired#28#29parentheses"] = "paired()parentheses"
namePairs["/The_Key_of_F#23_Minor"] = "The_Key_of_F#_Minor"
namePairs["/A#42"] = "AB"
namePairs["/"] = ""
namePairs["/ "] = ""
namePairs["/#3CBC88#3E#3CC5ED#3E#3CD544#3E#3CC694#3E"] = "<BC88><C5ED><D544><C694>"
for str, name := range namePairs {
parser := PdfParser{}
parser.rs, parser.reader, parser.fileSize = makeReaderForText(str)
parser := makeParserForText(str)
o, err := parser.parseName()
if err != nil && err != io.EOF {
t.Errorf("Unable to parse name string, error: %s", err)
@ -58,8 +86,7 @@ func TestNameParsing(t *testing.T) {
}
// Should fail (require starting with '/')
parser := PdfParser{}
parser.rs, parser.reader, parser.fileSize = makeReaderForText(" /Name")
parser := makeParserForText(" /Name")
_, err := parser.parseName()
if err == nil || err == io.EOF {
t.Errorf("Should be invalid name")
@ -71,37 +98,60 @@ type testStringEntry struct {
expected string
}
func TestStringParsing(t *testing.T) {
testEntries := []testStringEntry{
{"(This is a string)", "This is a string"},
{"(Strings may contain\n newlines and such)", "Strings may contain\n newlines and such"},
{"(Strings may contain balanced parenthesis () and\nspecial characters (*!&}^% and so on).)",
"Strings may contain balanced parenthesis () and\nspecial characters (*!&}^% and so on)."},
{"(These \\\ntwo strings \\\nare the same.)", "These two strings are the same."},
{"(These two strings are the same.)", "These two strings are the same."},
{"(\\\\)", "\\"},
{"(This string has an end-of-line at the end of it.\n)",
"This string has an end-of-line at the end of it.\n"},
{"(So does this one.\\n)", "So does this one.\n"},
{"(\\0053)", "\0053"},
{"(\\053)", "\053"},
{"(\\53)", "\053"},
{"(\\053)", "+"},
{"(\\53\\101)", "+A"},
func BenchmarkStringParsing(b *testing.B) {
entry := "(Strings may contain balanced parenthesis () and\nspecial characters (*!&}^% and so on).)"
parser := makeParserForText(entry)
for n := 0; n < b.N; n++ {
_, err := parser.parseString()
if err != nil && err != io.EOF {
b.Errorf("Unable to parse string, error: %s", err)
}
parser.SetFileOffset(0)
}
for _, entry := range testEntries {
parser := PdfParser{}
parser.rs, parser.reader, parser.fileSize = makeReaderForText(entry.raw)
}
var stringPairs = map[string]string{
"(This is a string)": "This is a string",
"(Strings may contain\n newlines and such)": "Strings may contain\n newlines and such",
"(Strings may contain balanced parenthesis () and\nspecial characters (*!&}^% and so on).)": "Strings may contain balanced parenthesis () and\nspecial characters (*!&}^% and so on).",
"(These \\\ntwo strings \\\nare the same.)": "These two strings are the same.",
"(These two strings are the same.)": "These two strings are the same.",
"(\\\\)": "\\",
"(This string has an end-of-line at the end of it.\n)": "This string has an end-of-line at the end of it.\n",
"(So does this one.\\n)": "So does this one.\n",
"(\\0053)": "\0053",
"(\\53)": "\053",
"(\\053)": "+",
"(\\53\\101)": "+A",
}
func TestStringParsing(t *testing.T) {
for raw, expected := range stringPairs {
parser := makeParserForText(raw)
o, err := parser.parseString()
if err != nil && err != io.EOF {
t.Errorf("Unable to parse string, error: %s", err)
}
if string(o) != entry.expected {
t.Errorf("String Mismatch %s: \"%s\" != \"%s\"", entry.raw, o, entry.expected)
if string(o) != expected {
t.Errorf("String Mismatch %s: \"%s\" != \"%s\"", raw, o, expected)
}
}
}
func TestReadTextLine(t *testing.T) {
// reading text ling + rewinding should be idempotent, that is:
// if we rewind back len(str) bytes after reading string str we should arrive at beginning of str
rawText := "abc\xb0cde"
parser := makeParserForText(rawText)
s, err := parser.readTextLine()
if err != nil && err != io.EOF {
t.Errorf("Unable to parse string, error: %s", err)
}
if parser.GetFileOffset() != int64(len(s)) {
t.Errorf("File offset after reading string of length %d is %d", len(s), parser.GetFileOffset())
}
}
func TestBinStringParsing(t *testing.T) {
// From an example O entry in Encrypt dictionary.
rawText1 := "(\xE6\x00\xEC\xC2\x02\x88\xAD\x8B\\r\x64\xA9" +
@ -157,6 +207,21 @@ func TestBoolParsing(t *testing.T) {
}
}
func BenchmarkNumbericParsing(b *testing.B) {
txt1 := "[34.5 -3.62 1 +123.6 4. -.002 0.0]"
parser := PdfParser{}
parser.rs, parser.reader, parser.fileSize = makeReaderForText(txt1)
for n := 0; n < b.N; n++ {
_, err := parser.parseArray()
if err != nil {
b.Errorf("Error parsing array")
return
}
parser.SetFileOffset(0)
}
}
func TestNumericParsing1(t *testing.T) {
// 7.3.3
txt1 := "[34.5 -3.62 1 +123.6 4. -.002 0.0]"
@ -270,6 +335,25 @@ func TestNumericParsing3(t *testing.T) {
}
}
func BenchmarkHexStringParsing(b *testing.B) {
var ref bytes.Buffer
for i := 0; i < 0xff; i++ {
ref.WriteByte(byte(i))
}
parser := makeParserForText("<" + hex.EncodeToString(ref.Bytes()) + ">")
for n := 0; n < b.N; n++ {
hs, err := parser.parseHexString()
if err != nil {
b.Errorf("Error parsing hex string: %s", err.Error())
return
}
if string(hs) != ref.String() {
b.Errorf("Reference and parsed hex strings mismatch")
}
parser.SetFileOffset(0)
}
}
func TestHexStringParsing(t *testing.T) {
// 7.3.4.3
}

View File

@ -12,9 +12,8 @@ func IsWhiteSpace(ch byte) bool {
// spaceCharacters := string([]byte{0x00, 0x09, 0x0A, 0x0C, 0x0D, 0x20})
if (ch == 0x00) || (ch == 0x09) || (ch == 0x0A) || (ch == 0x0C) || (ch == 0x0D) || (ch == 0x20) {
return true
} else {
return false
}
return false
}
// IsFloatDigit checks if a character can be a part of a float number string.

5523
pdf/creator/1 Normal file

File diff suppressed because it is too large Load Diff

View File

@ -7,9 +7,9 @@ package creator
import (
"errors"
"fmt"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/contentstream"
"github.com/unidoc/unidoc/pdf/core"
"github.com/unidoc/unidoc/pdf/model"
@ -344,13 +344,15 @@ func mergeContents(contents *contentstream.ContentStreamOperations, resources *m
// To properly add contents from a block, we need to handle the resources that the block is
// using and make sure it is accessible in the modified Page.
//
// Currently only supporting: Font, XObject, Colormap resources
// Currently supporting: Font, XObject, Colormap, Pattern, Shading, GState resources
// from the block.
//
xobjectMap := map[core.PdfObjectName]core.PdfObjectName{}
fontMap := map[core.PdfObjectName]core.PdfObjectName{}
csMap := map[core.PdfObjectName]core.PdfObjectName{}
patternMap := map[core.PdfObjectName]core.PdfObjectName{}
shadingMap := map[core.PdfObjectName]core.PdfObjectName{}
gstateMap := map[core.PdfObjectName]core.PdfObjectName{}
for _, op := range *contentsToAdd {
@ -426,14 +428,86 @@ func mergeContents(contents *contentstream.ContentStreamOperations, resources *m
}
useName = useName + "0"
}
}
resources.SetColorspaceByName(useName, cs)
csMap[*name] = useName
resources.SetColorspaceByName(useName, cs)
csMap[*name] = useName
} else {
common.Log.Debug("Colorspace not found")
}
}
useName := csMap[*name]
op.Params[0] = &useName
if useName, has := csMap[*name]; has {
op.Params[0] = &useName
} else {
common.Log.Debug("Error: Colorspace %s not found", *name)
}
}
}
case "SCN", "scn":
if len(op.Params) == 1 {
if name, ok := op.Params[0].(*core.PdfObjectName); ok {
if _, processed := patternMap[*name]; !processed {
var useName core.PdfObjectName
p, found := resourcesToAdd.GetPatternByName(*name)
if found {
useName = *name
for {
p2, found := resources.GetPatternByName(useName)
if !found || p2 == p {
break
}
useName = useName + "0"
}
err := resources.SetPatternByName(useName, p.ToPdfObject())
if err != nil {
return err
}
patternMap[*name] = useName
}
}
if useName, has := patternMap[*name]; has {
op.Params[0] = &useName
}
}
}
case "sh":
// Shading.
if len(op.Params) == 1 {
if name, ok := op.Params[0].(*core.PdfObjectName); ok {
if _, processed := shadingMap[*name]; !processed {
var useName core.PdfObjectName
// Process if not already processed.
sh, found := resourcesToAdd.GetShadingByName(*name)
if found {
useName = *name
for {
sh2, found := resources.GetShadingByName(useName)
if !found || sh == sh2 {
break
}
useName = useName + "0"
}
err := resources.SetShadingByName(useName, sh.ToPdfObject())
if err != nil {
common.Log.Debug("ERROR Set shading: %v", err)
return err
}
shadingMap[*name] = useName
} else {
common.Log.Debug("Shading not found")
}
}
if useName, has := shadingMap[*name]; has {
op.Params[0] = &useName
} else {
common.Log.Debug("Error: Shading %s not found", *name)
}
}
}
case "gs":

View File

@ -115,7 +115,7 @@ func (chap *Chapter) Add(d Drawable) error {
case *Chapter:
common.Log.Debug("Error: Cannot add chapter to a chapter")
return errors.New("Type check error")
case *Paragraph, *Image, *Block, *Subchapter, *Table:
case *Paragraph, *Image, *Block, *Subchapter, *Table, *PageBreak:
chap.contents = append(chap.contents, d)
default:
common.Log.Debug("Unsupported: %T", d)

View File

@ -41,6 +41,15 @@ type Creator struct {
finalized bool
toc *TableOfContents
// Forms.
acroForm *model.PdfAcroForm
}
// SetForms Add Acroforms to a PDF file. Sets the specified form for writing.
func (c *Creator) SetForms(form *model.PdfAcroForm) error {
c.acroForm = form
return nil
}
// FrontpageFunctionArgs holds the input arguments to a front page drawing function.
@ -453,6 +462,14 @@ func (c *Creator) Write(ws io.WriteSeeker) error {
}
pdfWriter := model.NewPdfWriter()
// Form fields.
if c.acroForm != nil {
errF := pdfWriter.SetForms(c.acroForm)
if errF != nil {
common.Log.Debug("Failure: %v", errF)
return errF
}
}
// Pdf Writer access hook. Can be used to encrypt, etc. via the PdfWriter instance.
if c.pdfWriterAccessFunc != nil {

View File

@ -20,6 +20,7 @@ import (
"github.com/boombuler/barcode/qr"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/core"
"github.com/unidoc/unidoc/pdf/model"
"github.com/unidoc/unidoc/pdf/model/fonts"
"github.com/unidoc/unidoc/pdf/model/textencoding"
@ -75,6 +76,7 @@ func TestTemplate1(t *testing.T) {
return
}
// TestImage1 tests loading an image and adding to file at an absolute position.
func TestImage1(t *testing.T) {
creator := New()
@ -106,6 +108,45 @@ func TestImage1(t *testing.T) {
}
}
// TestImageWithEncoder tests loading inserting an image with a specified encoder.
func TestImageWithEncoder(t *testing.T) {
creator := New()
imgData, err := ioutil.ReadFile(testImageFile1)
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
img, err := NewImageFromData(imgData)
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
// JPEG encoder (DCT) with quality factor 70.
encoder := core.NewDCTEncoder()
encoder.Quality = 70
encoder.Width = int(img.Width())
encoder.Height = int(img.Height())
img.SetEncoder(encoder)
img.SetPos(0, 100)
img.ScaleToWidth(1.0 * creator.Width())
err = creator.Draw(img)
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
err = creator.WriteToFile("/tmp/1_dct.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
}
func TestShapes1(t *testing.T) {
creator := New()
@ -573,23 +614,31 @@ func TestParagraphStandardFonts(t *testing.T) {
func TestParagraphChinese(t *testing.T) {
creator := New()
p := NewParagraph("你好")
font, err := model.NewCompositePdfFontFromTTFFile(testWts11TTFFile)
if err != nil {
t.Errorf("Fail: %v\n", err)
return
lines := []string{
"你好",
"你好你好你好你好",
"河上白云",
}
p.SetFont(font)
for _, line := range lines {
p := NewParagraph(line)
err = creator.Draw(p)
if err != nil {
t.Errorf("Fail: %v\n", err)
return
font, err := model.NewCompositePdfFontFromTTFFile(testWts11TTFFile)
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
p.SetFont(font)
err = creator.Draw(p)
if err != nil {
t.Errorf("Fail: %v\n", err)
return
}
}
err = creator.WriteToFile("/tmp/2_p_nihao.pdf")
err := creator.WriteToFile("/tmp/2_p_nihao.pdf")
if err != nil {
t.Errorf("Fail: %v\n", err)
return

71
pdf/creator/curve.go Normal file
View File

@ -0,0 +1,71 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package creator
import (
"fmt"
"strings"
"github.com/unidoc/unidoc/pdf/model"
)
// NewCurve returns new instance of Curve between points (x1,y1) and (x2, y2) with control point (cx,cy).
func NewCurve(x1, y1, cx, cy, x2, y2 float64) *Curve {
c := &Curve{}
c.x1 = x1
c.y1 = y1
c.cx = cx
c.cy = cy
c.x2 = x2
c.y2 = y2
c.lineColor = model.NewPdfColorDeviceRGB(0, 0, 0)
c.lineWidth = 1.0
return c
}
// Curve represents a cubic Bezier curve with a control point.
type Curve struct {
x1 float64
y1 float64
cx float64 // control point
cy float64
x2 float64
y2 float64
lineColor *model.PdfColorDeviceRGB
lineWidth float64
}
// SetWidth sets line width.
func (c *Curve) SetWidth(width float64) {
c.lineWidth = width
}
// SetColor sets the line color.
func (c *Curve) SetColor(col Color) {
c.lineColor = model.NewPdfColorDeviceRGB(col.ToRGB())
}
// GeneratePageBlocks draws the curve onto page blocks.
func (c *Curve) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawContext, error) {
block := NewBlock(ctx.PageWidth, ctx.PageHeight)
var ops []string
ops = append(ops, fmt.Sprintf("%.2f w", c.lineWidth)) // line widtdh
ops = append(ops, fmt.Sprintf("%.3f %.3f %.3f RG", c.lineColor[0], c.lineColor[1], c.lineColor[2])) // line color
ops = append(ops, fmt.Sprintf("%.2f %.2f m", c.x1, ctx.PageHeight-c.y1)) // move to
ops = append(ops, fmt.Sprintf("%.5f %.5f %.5f %.5f v S", c.cx, ctx.PageHeight-c.cy, c.x2, ctx.PageHeight-c.y2))
err := block.addContentsByString(strings.Join(ops, "\n"))
if err != nil {
return nil, ctx, err
}
return []*Block{block}, ctx, nil
}

71
pdf/creator/curve_test.go Normal file
View File

@ -0,0 +1,71 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package creator
import "testing"
func TestNewCurve(t *testing.T) {
creator := New()
creator.NewPage()
curve := NewCurve(20, 20, 15, 35, 40, 150)
curve.SetWidth(3.0)
curve.SetColor(ColorGreen)
err := creator.Draw(curve)
if err != nil {
t.Errorf("Fail: %v", err)
return
}
err = creator.WriteToFile("/tmp/curve.pdf")
if err != nil {
t.Errorf("Fail: %v", err)
return
}
}
func CreateCurve(x1, y1, cx, cy, x2, y2 float64, color Color) *Curve {
curve := NewCurve(x1, y1, cx, cy, x2, y2)
curve.SetWidth(1)
curve.SetColor(color)
return curve
}
func CreateLine(x1, y1, x2, y2, width float64) *Line {
line := NewLine(x1, y1, x2, y2)
line.SetLineWidth(width)
line.SetColor(ColorRed)
return line
}
func TestNewCurveWithGlass(t *testing.T) {
creator := New()
creator.NewPage()
// Width 200
creator.Draw(CreateLine(30, 200, 270, 200, 1))
// Curve up
creator.Draw(CreateCurve(50, 200, 75, 145, 150, 150, ColorRed))
creator.Draw(CreateCurve(150, 150, 205, 145, 250, 200, ColorGreen))
// Curve down
creator.Draw(CreateCurve(50, 200, 75, 245, 150, 250, ColorBlue))
creator.Draw(CreateCurve(150, 250, 225, 245, 250, 200, ColorBlack))
// Vertical line
creator.Draw(CreateLine(50, 200, 51, 400, 1))
creator.Draw(CreateLine(250, 200, 251, 400, 1))
// Curve down
creator.Draw(CreateCurve(51, 399, 75, 445, 150, 450, ColorRed))
creator.Draw(CreateCurve(150, 450, 225, 445, 251, 399, ColorGreen))
err := creator.WriteToFile("/tmp/curve_glass.pdf")
if err != nil {
t.Errorf("Fail: %v", err)
return
}
}

112
pdf/creator/filled_curve.go Normal file
View File

@ -0,0 +1,112 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package creator
import (
pdfcontent "github.com/unidoc/unidoc/pdf/contentstream"
"github.com/unidoc/unidoc/pdf/contentstream/draw"
pdfcore "github.com/unidoc/unidoc/pdf/core"
pdf "github.com/unidoc/unidoc/pdf/model"
)
// FilledCurve represents a closed path of Bezier curves with a border and fill.
type FilledCurve struct {
curves []draw.CubicBezierCurve
FillEnabled bool // Show fill?
fillColor *pdf.PdfColorDeviceRGB
BorderEnabled bool // Show border?
BorderWidth float64
borderColor *pdf.PdfColorDeviceRGB
}
// NewFilledCurve returns a instance of filled curve.
func NewFilledCurve() *FilledCurve {
curve := FilledCurve{}
curve.curves = []draw.CubicBezierCurve{}
return &curve
}
// AppendCurve appends a Bezier curve to the filled curve.
func (this *FilledCurve) AppendCurve(curve draw.CubicBezierCurve) *FilledCurve {
this.curves = append(this.curves, curve)
return this
}
// SetFillColor sets the fill color for the path.
func (this *FilledCurve) SetFillColor(color Color) {
this.fillColor = pdf.NewPdfColorDeviceRGB(color.ToRGB())
}
// SetBorderColor sets the border color for the path.
func (this *FilledCurve) SetBorderColor(color Color) {
this.borderColor = pdf.NewPdfColorDeviceRGB(color.ToRGB())
}
// draw draws the filled curve. Can specify a graphics state (gsName) for setting opacity etc. Otherwise leave empty ("").
// Returns the content stream as a byte array, the bounding box and an error on failure.
func (this *FilledCurve) draw(gsName string) ([]byte, *pdf.PdfRectangle, error) {
bpath := draw.NewCubicBezierPath()
for _, c := range this.curves {
bpath = bpath.AppendCurve(c)
}
creator := pdfcontent.NewContentCreator()
creator.Add_q()
if this.FillEnabled {
creator.Add_rg(this.fillColor.R(), this.fillColor.G(), this.fillColor.B())
}
if this.BorderEnabled {
creator.Add_RG(this.borderColor.R(), this.borderColor.G(), this.borderColor.B())
creator.Add_w(this.BorderWidth)
}
if len(gsName) > 1 {
// If a graphics state is provided, use it. (can support transparency).
creator.Add_gs(pdfcore.PdfObjectName(gsName))
}
draw.DrawBezierPathWithCreator(bpath, creator)
creator.Add_h() // Close the path.
if this.FillEnabled && this.BorderEnabled {
creator.Add_B() // fill and stroke.
} else if this.FillEnabled {
creator.Add_f() // Fill.
} else if this.BorderEnabled {
creator.Add_S() // Stroke.
}
creator.Add_Q()
// Get bounding box.
pathBbox := bpath.GetBoundingBox()
if this.BorderEnabled {
// Account for stroke width.
pathBbox.Height += this.BorderWidth
pathBbox.Width += this.BorderWidth
pathBbox.X -= this.BorderWidth / 2
pathBbox.Y -= this.BorderWidth / 2
}
// Bounding box - global coordinate system.
bbox := &pdf.PdfRectangle{}
bbox.Llx = pathBbox.X
bbox.Lly = pathBbox.Y
bbox.Urx = pathBbox.X + pathBbox.Width
bbox.Ury = pathBbox.Y + pathBbox.Height
return creator.Bytes(), bbox, nil
}
// GeneratePageBlocks draws the filled curve on page blocks.
func (this *FilledCurve) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawContext, error) {
block := NewBlock(ctx.PageWidth, ctx.PageHeight)
contents, _, err := this.draw("")
err = block.addContentsByString(string(contents))
if err != nil {
return nil, ctx, err
}
return []*Block{block}, ctx, nil
}

View File

@ -0,0 +1,46 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package creator
import (
"testing"
"github.com/unidoc/unidoc/pdf/contentstream/draw"
)
func CreateFillCurve(x0, y0, x1, y1, x2, y2, x3, y3 float64) draw.CubicBezierCurve {
return draw.NewCubicBezierCurve(x0, y0, x1, y1, x2, y2, x3, y3)
}
func TestNewFilledCurve(t *testing.T) {
filledCurve := NewFilledCurve()
filledCurve.FillEnabled = true
filledCurve.BorderEnabled = true
filledCurve.BorderWidth = 2
filledCurve.SetFillColor(ColorGreen)
filledCurve.SetBorderColor(ColorBlue)
// Up Left
filledCurve.AppendCurve(CreateFillCurve(300, 300, 230, 350, 200, 280, 220, 220))
// Down Left
filledCurve.AppendCurve(CreateFillCurve(225, 240, 240, 180, 260, 160, 300, 180))
// Down Right
filledCurve.AppendCurve(CreateFillCurve(305, 170, 335, 165, 350, 185, 365, 220))
// Up Right
filledCurve.AppendCurve(CreateFillCurve(365, 240, 385, 315, 350, 325, 300, 300))
// Leaf
filledCurve.AppendCurve(CreateFillCurve(300, 300, 290, 350, 295, 370, 300, 390))
creator := New()
creator.NewPage()
creator.Draw(filledCurve)
err := creator.WriteToFile("/tmp/filledCurve.pdf")
if err != nil {
t.Errorf("Fail: %v", err)
return
}
}

View File

@ -21,6 +21,7 @@ import (
// The Image type is used to draw an image onto PDF.
type Image struct {
xobj *model.XObjectImage
img *model.Image
// Rotation angle.
angle float64
@ -46,20 +47,15 @@ type Image struct {
// Rotional origin. Default (0,0 - upper left corner of block).
rotOriginX, rotOriginY float64
// Encoder
encoder core.StreamEncoder
}
// NewImage create a new image from a unidoc image (model.Image).
func NewImage(img *model.Image) (*Image, error) {
image := &Image{}
// Create the XObject image.
ximg, err := model.NewXObjectImageFromImage(img, nil, core.NewFlateEncoder())
if err != nil {
common.Log.Error("Failed to create xobject image: %s", err)
return nil, err
}
image.xobj = ximg
image.img = img
// Image original size in points = pixel size.
image.origWidth = float64(img.Width)
@ -113,6 +109,11 @@ func NewImageFromGoImage(goimg goimage.Image) (*Image, error) {
return NewImage(img)
}
// SetEncoder sets the encoding/compression mechanism for the image.
func (img *Image) SetEncoder(encoder core.StreamEncoder) {
img.encoder = encoder
}
// Height returns Image's document height.
func (img *Image) Height() float64 {
return img.height
@ -141,8 +142,32 @@ func (img *Image) GetMargins() (float64, float64, float64, float64) {
return img.margins.left, img.margins.right, img.margins.top, img.margins.bottom
}
// makeXObject makes the encoded XObject Image that will be used in the PDF.
func (img *Image) makeXObject() error {
encoder := img.encoder
if encoder == nil {
// Default: Use flate encoder.
encoder = core.NewFlateEncoder()
}
// Create the XObject image.
ximg, err := model.NewXObjectImageFromImage(img.img, nil, encoder)
if err != nil {
common.Log.Error("Failed to create xobject image: %s", err)
return err
}
img.xobj = ximg
return nil
}
// GeneratePageBlocks generate the Page blocks. Draws the Image on a block, implementing the Drawable interface.
func (img *Image) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawContext, error) {
if img.xobj == nil {
// Build the XObject Image if not already prepared.
img.makeXObject()
}
blocks := []*Block{}
origCtx := ctx

31
pdf/creator/pagebreak.go Normal file
View File

@ -0,0 +1,31 @@
package creator
// PageBreak represents a page break for a chapter.
type PageBreak struct {
}
// NewPageBreak create a new page break.
func NewPageBreak() *PageBreak {
return &PageBreak{}
}
// GeneratePageBlocks generates a page break block.
func (p *PageBreak) GeneratePageBlocks(ctx DrawContext) ([]*Block, DrawContext, error) {
// Return two empty blocks. First one simply means that there is nothing more to add at the current page.
// The second one starts a new page.
blocks := []*Block{
NewBlock(ctx.PageWidth, ctx.PageHeight-ctx.Y),
NewBlock(ctx.PageWidth, ctx.PageHeight),
}
// New Page. Place context in upper left corner (with margins).
ctx.Page++
newContext := ctx
newContext.Y = ctx.Margins.top
newContext.X = ctx.Margins.left
newContext.Height = ctx.PageHeight - ctx.Margins.top - ctx.Margins.bottom
newContext.Width = ctx.PageWidth - ctx.Margins.left - ctx.Margins.right
ctx = newContext
return blocks, ctx, nil
}

View File

@ -116,6 +116,11 @@ func (p *Paragraph) SetText(text string) {
p.text = text
}
// Text sets the text content of the Paragraph.
func (p *Paragraph) Text() string {
return p.text
}
// SetEnableWrap sets the line wrapping enabled flag.
func (p *Paragraph) SetEnableWrap(enableWrap bool) {
p.enableWrap = enableWrap
@ -238,9 +243,9 @@ func (p *Paragraph) wrapText() error {
metrics, found := p.textFont.GetGlyphCharMetrics(glyph)
if !found {
common.Log.Debug("Glyph char metrics not found! %s\n", glyph)
common.Log.Debug("Font: %#v", p.textFont)
common.Log.Debug("Encoder: %#v", p.textFont.Encoder())
common.Log.Debug("Glyph char metrics not found! %s (%s)\n", glyph, string(val))
common.Log.Trace("Font: %#v", p.textFont)
common.Log.Trace("Encoder: %#v", p.textFont.Encoder())
return errors.New("Glyph char metrics missing") // XXX/FIXME: return error.
}

View File

@ -124,7 +124,7 @@ func (subchap *Subchapter) Add(d Drawable) {
switch d.(type) {
case *Chapter, *Subchapter:
common.Log.Debug("Error: Cannot add chapter or subchapter to a subchapter")
case *Paragraph, *Image, *Block, *Table:
case *Paragraph, *Image, *Block, *Table, *PageBreak:
subchap.contents = append(subchap.contents, d)
default:
common.Log.Debug("Unsupported: %T", d)

View File

@ -480,8 +480,8 @@ func (cell *TableCell) SetBorder(style CellBorderStyle, width float64) {
}
// SetBorderColor sets the cell's border color.
func (cell *TableCell) SetBorderColor(color rgbColor) {
cell.borderColor = model.NewPdfColorDeviceRGB(color.r, color.g, color.b)
func (cell *TableCell) SetBorderColor(col Color) {
cell.borderColor = model.NewPdfColorDeviceRGB(col.ToRGB())
}
// SetBackgroundColor sets the cell's background color.

8
pdf/extractor/const.go Normal file
View File

@ -0,0 +1,8 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package extractor
var isTesting = false

10
pdf/extractor/doc.go Normal file
View File

@ -0,0 +1,10 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
//
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
//
package extractor

View File

@ -0,0 +1,28 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package extractor
import "github.com/unidoc/unidoc/pdf/model"
// Extractor stores and offers functionality for extracting content from PDF pages.
type Extractor struct {
contents string
resources *model.PdfPageResources
}
// New returns an Extractor instance for extracting content from the input PDF page.
func New(page *model.PdfPage) (*Extractor, error) {
contents, err := page.GetAllContentStreams()
if err != nil {
return nil, err
}
e := &Extractor{}
e.contents = contents
e.resources = page.Resources
return e, nil
}

233
pdf/extractor/text.go Normal file
View File

@ -0,0 +1,233 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package extractor
import (
"bytes"
"errors"
"fmt"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/contentstream"
"github.com/unidoc/unidoc/pdf/core"
"github.com/unidoc/unidoc/pdf/internal/cmap"
"github.com/unidoc/unidoc/pdf/model"
)
// ExtractText processes and extracts all text data in content streams and returns as a string. Takes into
// account character encoding via CMaps in the PDF file.
// The text is processed linearly e.g. in the order in which it appears. A best effort is done to add
// spaces and newlines.
func (e *Extractor) ExtractText() (string, error) {
var buf bytes.Buffer
cstreamParser := contentstream.NewContentStreamParser(e.contents)
operations, err := cstreamParser.Parse()
if err != nil {
return buf.String(), err
}
processor := contentstream.NewContentStreamProcessor(*operations)
var codemap *cmap.CMap
inText := false
xPos, yPos := float64(-1), float64(-1)
processor.AddHandler(contentstream.HandlerConditionEnumAllOperands, "",
func(op *contentstream.ContentStreamOperation, gs contentstream.GraphicsState, resources *model.PdfPageResources) error {
operand := op.Operand
switch operand {
case "BT":
inText = true
case "ET":
inText = false
case "Tf":
if !inText {
common.Log.Debug("Tf operand outside text")
return nil
}
if len(op.Params) != 2 {
common.Log.Debug("Error Tf should only get 2 input params, got %d", len(op.Params))
return errors.New("Incorrect parameter count")
}
codemap = nil
fontName, ok := op.Params[0].(*core.PdfObjectName)
if !ok {
common.Log.Debug("Error Tf font input not a name")
return errors.New("Tf range error")
}
if resources == nil {
return nil
}
fontObj, found := resources.GetFontByName(*fontName)
if !found {
common.Log.Debug("Font not found...")
return errors.New("Font not in resources")
}
fontObj = core.TraceToDirectObject(fontObj)
if fontDict, isDict := fontObj.(*core.PdfObjectDictionary); isDict {
toUnicode := fontDict.Get("ToUnicode")
if toUnicode != nil {
toUnicode = core.TraceToDirectObject(toUnicode)
toUnicodeStream, ok := toUnicode.(*core.PdfObjectStream)
if !ok {
return errors.New("Invalid ToUnicode entry - not a stream")
}
decoded, err := core.DecodeStream(toUnicodeStream)
if err != nil {
return err
}
codemap, err = cmap.LoadCmapFromData(decoded)
if err != nil {
return err
}
}
}
case "T*":
if !inText {
common.Log.Debug("T* operand outside text")
return nil
}
buf.WriteString("\n")
case "Td", "TD":
if !inText {
common.Log.Debug("Td/TD operand outside text")
return nil
}
// Params: [tx ty], corresponeds to Tm=Tlm=[1 0 0;0 1 0;tx ty 1]*Tm
if len(op.Params) != 2 {
common.Log.Debug("Td/TD invalid arguments")
return nil
}
tx, err := getNumberAsFloat(op.Params[0])
if err != nil {
common.Log.Debug("Td Float parse error")
return nil
}
ty, err := getNumberAsFloat(op.Params[1])
if err != nil {
common.Log.Debug("Td Float parse error")
return nil
}
if tx > 0 {
buf.WriteString(" ")
}
if ty < 0 {
// TODO: More flexible space characters?
buf.WriteString("\n")
}
case "Tm":
if !inText {
common.Log.Debug("Tm operand outside text")
return nil
}
// Params: a,b,c,d,e,f as in Tm = [a b 0; c d 0; e f 1].
// The last two (e,f) represent translation.
if len(op.Params) != 6 {
return errors.New("Tm: Invalid number of inputs")
}
xfloat, ok := op.Params[4].(*core.PdfObjectFloat)
if !ok {
xint, ok := op.Params[4].(*core.PdfObjectInteger)
if !ok {
return nil
}
xfloat = core.MakeFloat(float64(*xint))
}
yfloat, ok := op.Params[5].(*core.PdfObjectFloat)
if !ok {
yint, ok := op.Params[5].(*core.PdfObjectInteger)
if !ok {
return nil
}
yfloat = core.MakeFloat(float64(*yint))
}
if yPos == -1 {
yPos = float64(*yfloat)
} else if yPos > float64(*yfloat) {
buf.WriteString("\n")
xPos = float64(*xfloat)
yPos = float64(*yfloat)
return nil
}
if xPos == -1 {
xPos = float64(*xfloat)
} else if xPos < float64(*xfloat) {
buf.WriteString("\t")
xPos = float64(*xfloat)
}
case "TJ":
if !inText {
common.Log.Debug("TJ operand outside text")
return nil
}
if len(op.Params) < 1 {
return nil
}
paramList, ok := op.Params[0].(*core.PdfObjectArray)
if !ok {
return fmt.Errorf("Invalid parameter type, no array (%T)", op.Params[0])
}
for _, obj := range *paramList {
switch v := obj.(type) {
case *core.PdfObjectString:
if codemap != nil {
buf.WriteString(codemap.CharcodeBytesToUnicode([]byte(*v)))
} else {
buf.WriteString(string(*v))
}
case *core.PdfObjectFloat:
if *v < -100 {
buf.WriteString(" ")
}
case *core.PdfObjectInteger:
if *v < -100 {
buf.WriteString(" ")
}
}
}
case "Tj":
if !inText {
common.Log.Debug("Tj operand outside text")
return nil
}
if len(op.Params) < 1 {
return nil
}
param, ok := op.Params[0].(*core.PdfObjectString)
if !ok {
return fmt.Errorf("Invalid parameter type, not string (%T)", op.Params[0])
}
if codemap != nil {
buf.WriteString(codemap.CharcodeBytesToUnicode([]byte(*param)))
} else {
buf.WriteString(string(*param))
}
}
return nil
})
err = processor.Process(e.resources)
if err != nil {
common.Log.Error("Error processing: %v", err)
return buf.String(), err
}
procBuf(&buf)
return buf.String(), nil
}

View File

@ -0,0 +1,43 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package extractor
import (
"flag"
"testing"
)
func init() {
if flag.Lookup("test.v") != nil {
isTesting = true
}
}
const testContents1 = `
BT
/F1 24 Tf
(Hello World!)Tj
0 -10 Td
(Doink)Tj
ET
`
const testExpected1 = "Hello World!\nDoink"
func TestTextExtraction1(t *testing.T) {
e := Extractor{}
e.contents = testContents1
s, err := e.ExtractText()
if err != nil {
t.Errorf("Error extracting text: %v", err)
return
}
if s != testExpected1 {
t.Errorf("Text mismatch (%s)", s)
t.Errorf("Text mismatch (% X vs % X)", s, testExpected1)
return
}
}

48
pdf/extractor/utils.go Normal file
View File

@ -0,0 +1,48 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package extractor
import (
"bytes"
"errors"
"fmt"
"github.com/unidoc/unidoc/common/license"
"github.com/unidoc/unidoc/pdf/core"
)
// getNumberAsFloat can retrieve numeric values from PdfObject (both integer/float).
func getNumberAsFloat(obj core.PdfObject) (float64, error) {
if fObj, ok := obj.(*core.PdfObjectFloat); ok {
return float64(*fObj), nil
}
if iObj, ok := obj.(*core.PdfObjectInteger); ok {
return float64(*iObj), nil
}
return 0, errors.New("Not a number")
}
func procBuf(buf *bytes.Buffer) {
if isTesting {
return
}
lk := license.GetLicenseKey()
if lk != nil && lk.IsLicensed() {
return
}
fmt.Printf("Unlicensed copy of unidoc\n")
fmt.Printf("To get rid of the watermark and keep entire text - Please get a license on https://unidoc.io\n")
s := "- [Unlicensed UniDoc - Get a license on https://unidoc.io]"
if buf.Len() > 100 {
s = "... [Truncated - Unlicensed UniDoc - Get a license on https://unidoc.io]"
buf.Truncate(buf.Len() - 100)
}
buf.WriteString(s)
}

405
pdf/internal/cmap/cmap.go Normal file
View File

@ -0,0 +1,405 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package cmap
import (
"bytes"
"errors"
"io"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/model/textencoding"
)
// CMap represents a character code to unicode mapping used in PDF files.
type CMap struct {
*cMapParser
// Text encoder to look up runes from input glyph names.
encoder textencoding.TextEncoder
// map of character code to string (sequence of runes) for 1-4 byte codes separately.
codeMap [4]map[uint64]string
name string
ctype int
codespaces []codespace
}
// codespace represents a single codespace range used in the CMap.
type codespace struct {
numBytes int
low uint64
high uint64
}
// Name returns the name of the CMap.
func (cmap *CMap) Name() string {
return cmap.name
}
// Type returns the type of the CMap.
func (cmap *CMap) Type() int {
return cmap.ctype
}
// CharcodeBytesToUnicode converts a byte array of charcodes to a unicode string representation.
func (cmap *CMap) CharcodeBytesToUnicode(src []byte) string {
var buf bytes.Buffer
// Maximum number of possible bytes per code.
maxLen := 4
i := 0
for i < len(src) {
var code uint64
var j int
for j = 0; j < maxLen && i+j < len(src); j++ {
b := src[i+j]
code <<= 8
code |= uint64(b)
tgt, has := cmap.codeMap[j][code]
if has {
buf.WriteString(tgt)
break
} else if j == maxLen-1 || i+j == len(src)-1 {
break
}
}
i += j + 1
}
return buf.String()
}
// CharcodeToUnicode converts a single character code to unicode string.
// Note that CharcodeBytesToUnicode is typically more efficient.
func (cmap *CMap) CharcodeToUnicode(srcCode uint64) string {
// Search through different code lengths.
for numBytes := 1; numBytes <= 4; numBytes++ {
if c, has := cmap.codeMap[numBytes-1][srcCode]; has {
return c
}
}
// Not found.
return "?"
}
// newCMap returns an initialized CMap.
func newCMap() *CMap {
cmap := &CMap{}
cmap.codespaces = []codespace{}
cmap.codeMap = [4]map[uint64]string{}
// Maps for 1-4 bytes are initialized. Minimal overhead if not used (most commonly used are 1-2 bytes).
cmap.codeMap[0] = map[uint64]string{}
cmap.codeMap[1] = map[uint64]string{}
cmap.codeMap[2] = map[uint64]string{}
cmap.codeMap[3] = map[uint64]string{}
return cmap
}
// LoadCmapFromData parses CMap data in memory through a byte vector and returns a CMap which
// can be used for character code to unicode conversion.
func LoadCmapFromData(data []byte) (*CMap, error) {
cmap := newCMap()
cmap.cMapParser = newCMapParser(data)
err := cmap.parse()
if err != nil {
return cmap, err
}
return cmap, nil
}
// parse parses the CMap file and loads into the CMap structure.
func (cmap *CMap) parse() error {
for {
o, err := cmap.parseObject()
if err != nil {
if err == io.EOF {
break
}
common.Log.Debug("Error parsing CMap: %v", err)
return err
}
if op, isOp := o.(cmapOperand); isOp {
common.Log.Trace("Operand: %s", op.Operand)
if op.Operand == begincodespacerange {
err := cmap.parseCodespaceRange()
if err != nil {
return err
}
} else if op.Operand == beginbfchar {
err := cmap.parseBfchar()
if err != nil {
return err
}
} else if op.Operand == beginbfrange {
err := cmap.parseBfrange()
if err != nil {
return err
}
}
} else if n, isName := o.(cmapName); isName {
if n.Name == cmapname {
o, err := cmap.parseObject()
if err != nil {
if err == io.EOF {
break
}
return err
}
name, ok := o.(cmapName)
if !ok {
return errors.New("CMap name not a name")
}
cmap.name = name.Name
} else if n.Name == cmaptype {
o, err := cmap.parseObject()
if err != nil {
if err == io.EOF {
break
}
return err
}
typeInt, ok := o.(cmapInt)
if !ok {
return errors.New("CMap type not an integer")
}
cmap.ctype = int(typeInt.val)
}
} else {
common.Log.Trace("Unhandled object: %T %#v", o, o)
}
}
return nil
}
// parseCodespaceRange parses the codespace range section of a CMap.
func (cmap *CMap) parseCodespaceRange() error {
for {
o, err := cmap.parseObject()
if err != nil {
if err == io.EOF {
break
}
return err
}
hexLow, isHex := o.(cmapHexString)
if !isHex {
if op, isOperand := o.(cmapOperand); isOperand {
if op.Operand == endcodespacerange {
return nil
}
return errors.New("Unexpected operand")
}
}
o, err = cmap.parseObject()
if err != nil {
if err == io.EOF {
break
}
return err
}
hexHigh, ok := o.(cmapHexString)
if !ok {
return errors.New("Non-hex high")
}
if hexLow.numBytes != hexHigh.numBytes {
return errors.New("Unequal number of bytes in range")
}
low := hexToUint64(hexLow)
high := hexToUint64(hexHigh)
numBytes := hexLow.numBytes
cspace := codespace{numBytes: numBytes, low: low, high: high}
cmap.codespaces = append(cmap.codespaces, cspace)
common.Log.Trace("Codespace low: 0x%X, high: 0x%X", low, high)
}
return nil
}
// parseBfchar parses a bfchar section of a CMap file.
func (cmap *CMap) parseBfchar() error {
for {
// Src code.
o, err := cmap.parseObject()
if err != nil {
if err == io.EOF {
break
}
return err
}
var srcCode uint64
var numBytes int
switch v := o.(type) {
case cmapOperand:
if v.Operand == endbfchar {
return nil
}
return errors.New("Unexpected operand")
case cmapHexString:
srcCode = hexToUint64(v)
numBytes = v.numBytes
default:
return errors.New("Unexpected type")
}
// Target code.
o, err = cmap.parseObject()
if err != nil {
if err == io.EOF {
break
}
return err
}
var toCode string
switch v := o.(type) {
case cmapOperand:
if v.Operand == endbfchar {
return nil
}
return errors.New("Unexpected operand")
case cmapHexString:
toCode = hexToString(v)
case cmapName:
toCode = "?"
if cmap.encoder != nil {
if r, found := cmap.encoder.GlyphToRune(v.Name); found {
toCode = string(r)
}
}
default:
return errors.New("Unexpected type")
}
if numBytes <= 0 || numBytes > 4 {
return errors.New("Invalid code length")
}
cmap.codeMap[numBytes-1][srcCode] = toCode
}
return nil
}
// parseBfrange parses a bfrange section of a CMap file.
func (cmap *CMap) parseBfrange() error {
for {
// The specifications are in pairs of 3.
// <srcCodeFrom> <srcCodeTo> <target>
// where target can be either <destFrom> as a hex code, or a list.
// Src code from.
var srcCodeFrom uint64
var numBytes int
{
o, err := cmap.parseObject()
if err != nil {
if err == io.EOF {
break
}
return err
}
switch v := o.(type) {
case cmapOperand:
if v.Operand == endbfrange {
return nil
}
return errors.New("Unexpected operand")
case cmapHexString:
srcCodeFrom = hexToUint64(v)
numBytes = v.numBytes
default:
return errors.New("Unexpected type")
}
}
// Src code to.
var srcCodeTo uint64
{
o, err := cmap.parseObject()
if err != nil {
if err == io.EOF {
break
}
return err
}
switch v := o.(type) {
case cmapOperand:
if v.Operand == endbfrange {
return nil
}
return errors.New("Unexpected operand")
case cmapHexString:
srcCodeTo = hexToUint64(v)
default:
return errors.New("Unexpected type")
}
}
// target(s).
o, err := cmap.parseObject()
if err != nil {
if err == io.EOF {
break
}
return err
}
if numBytes <= 0 || numBytes > 4 {
return errors.New("Invalid code length")
}
switch v := o.(type) {
case cmapArray:
sc := srcCodeFrom
for _, o := range v.Array {
hexs, ok := o.(cmapHexString)
if !ok {
return errors.New("Non-hex string in array")
}
cmap.codeMap[numBytes-1][sc] = hexToString(hexs)
sc++
}
if sc != srcCodeTo+1 {
return errors.New("Invalid number of items in array")
}
case cmapHexString:
// <srcCodeFrom> <srcCodeTo> <dstCode>, maps [from,to] to [dstCode,dstCode+to-from].
// in hex format.
target := hexToUint64(v)
i := uint64(0)
for sc := srcCodeFrom; sc <= srcCodeTo; sc++ {
r := target + i
cmap.codeMap[numBytes-1][sc] = string(r)
i++
}
default:
return errors.New("Unexpected type")
}
}
return nil
}

View File

@ -0,0 +1,329 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package cmap
import (
"testing"
"github.com/unidoc/unidoc/common"
)
func init() {
//common.SetLogger(common.NewConsoleLogger(common.LogLevelDebug))
common.SetLogger(common.NewConsoleLogger(common.LogLevelTrace))
}
// cmap1Data represents a basic CMap.
const cmap1Data = `
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (UCS)
/Supplement 0
>> def
/CMapName /Adobe-Identity-UCS def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
8 beginbfchar
<0003> <0020>
<0007> <0024>
<0033> <0050>
<0035> <0052>
<0037> <0054>
<005A> <0077>
<005C> <0079>
<005F> <007C>
endbfchar
7 beginbfrange
<000F> <0017> <002C>
<001B> <001D> <0038>
<0025> <0026> <0042>
<002F> <0031> <004C>
<0044> <004C> <0061>
<004F> <0053> <006C>
<0055> <0057> <0072>
endbfrange
endcmap
CMapName currentdict /CMap defineresource pop
end
end
`
// TestCMapParser tests basic loading of a simple CMap.
func TestCMapParser1(t *testing.T) {
common.SetLogger(common.NewConsoleLogger(common.LogLevelTrace))
cmap, err := LoadCmapFromData([]byte(cmap1Data))
if err != nil {
t.Error("Failed: ", err)
return
}
if cmap.Name() != "Adobe-Identity-UCS" {
t.Errorf("CMap name incorrect (%s)", cmap.Name())
return
}
if cmap.Type() != 2 {
t.Errorf("CMap type incorrect")
return
}
if len(cmap.codespaces) != 1 {
t.Errorf("len codespace != 1 (%d)", len(cmap.codespaces))
return
}
if cmap.codespaces[0].low != 0 {
t.Errorf("code space low range != 0 (%d)", cmap.codespaces[0].low)
return
}
if cmap.codespaces[0].high != 0xFFFF {
t.Errorf("code space high range != 0xffff (%d)", cmap.codespaces[0].high)
return
}
expectedMappings := map[uint64]rune{
0x0003: 0x0020,
0x005F: 0x007C,
0x000F: 0x002C,
0x000F + 5: 0x002C + 5,
0x001B: 0x0038,
0x001B + 2: 0x0038 + 2,
0x002F: 0x004C,
0x0044: 0x0061,
0x004F: 0x006C,
0x0055: 0x0072,
}
for k, expected := range expectedMappings {
if v := cmap.CharcodeToUnicode(k); v != string(expected) {
t.Errorf("incorrect mapping, expecting 0x%X -> 0x%X (%#v)", k, expected, v)
return
}
}
v := cmap.CharcodeToUnicode(0x99)
if v != "?" { //!= "notdef" {
t.Errorf("Unmapped code, expected to map to undefined")
return
}
charcodes := []byte{0x00, 0x03, 0x00, 0x0F}
s := cmap.CharcodeBytesToUnicode(charcodes)
if s != " ," {
t.Error("Incorrect charcode bytes -> string mapping")
return
}
}
const cmap2Data = `
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (UCS)
/Supplement 0
>> def
/CMapName /Adobe-Identity-UCS def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
7 beginbfrange
<0080> <00FF> <002C>
<802F> <902F> <0038>
endbfrange
endcmap
CMapName currentdict /CMap defineresource pop
end
end
`
// TestCMapParser2 tests a bug that came up when 2-byte character codes had the higher byte set to 0,
// e.g. 0x0080, and the character map was not taking the number of bytes of the input codemap into account.
func TestCMapParser2(t *testing.T) {
common.SetLogger(common.NewConsoleLogger(common.LogLevelTrace))
cmap, err := LoadCmapFromData([]byte(cmap2Data))
if err != nil {
t.Error("Failed: ", err)
return
}
if cmap.Name() != "Adobe-Identity-UCS" {
t.Errorf("CMap name incorrect (%s)", cmap.Name())
return
}
if cmap.Type() != 2 {
t.Errorf("CMap type incorrect")
return
}
if len(cmap.codespaces) != 1 {
t.Errorf("len codespace != 1 (%d)", len(cmap.codespaces))
return
}
if cmap.codespaces[0].low != 0 {
t.Errorf("code space low range != 0 (%d)", cmap.codespaces[0].low)
return
}
if cmap.codespaces[0].high != 0xFFFF {
t.Errorf("code space high range != 0xffff (%d)", cmap.codespaces[0].high)
return
}
expectedMappings := map[uint64]rune{
0x0080: 0x002C,
0x802F: 0x0038,
}
for k, expected := range expectedMappings {
if v := cmap.CharcodeToUnicode(k); v != string(expected) {
t.Errorf("incorrect mapping, expecting 0x%X -> 0x%X (got 0x%X)", k, expected, v)
return
}
}
// Check byte sequence mappings.
excpectedSequenceMappings := []struct {
bytes []byte
expected string
}{
{[]byte{0x80, 0x2F, 0x00, 0x80}, string([]rune{0x0038, 0x002C})},
}
for _, exp := range excpectedSequenceMappings {
str := cmap.CharcodeBytesToUnicode(exp.bytes)
if str != exp.expected {
t.Errorf("Incorrect byte sequence mapping -> % X -> % X (got % X)", exp.bytes, []rune(exp.expected), []rune(str))
return
}
}
}
// cmapData3 is a CMap with a mixture of 1 and 2 byte codespaces.
const cmapData3 = `
/CIDInit /ProcSet findresource begin
12 dict begin begincmap
/CIDSystemInfo
3 dict dup begin
/Registry (Adobe) def
/Supplement 2 def
end def
/CMapName /test-1 def
/CMapType 1 def
4 begincodespacerange
<00> <80>
<8100> <9fff>
<a0> <df>
<d040> <fbfc>
endcodespacerange
7 beginbfrange
<00> <80> <10>
<8100> <9f00> <1000>
<a0> <d0> <90>
<d140> <f000> <a000>
endbfrange
endcmap
`
// TestCMapParser3 test case of a CMap with mixed number of 1 and 2 bytes in the codespace range.
func TestCMapParser3(t *testing.T) {
common.SetLogger(common.NewConsoleLogger(common.LogLevelTrace))
cmap, err := LoadCmapFromData([]byte(cmapData3))
if err != nil {
t.Error("Failed: ", err)
return
}
if cmap.Name() != "test-1" {
t.Errorf("CMap name incorrect (%s)", cmap.Name())
return
}
if cmap.Type() != 1 {
t.Errorf("CMap type incorrect")
return
}
// Check codespaces.
expectedCodespaces := []struct {
numBytes int
low uint64
high uint64
}{
{1, 0x00, 0x80},
{2, 0x8100, 0x9fff},
{1, 0xa0, 0xdf},
{2, 0xd040, 0xfbfc},
}
if len(cmap.codespaces) != len(expectedCodespaces) {
t.Errorf("len codespace != %d (%d)", len(expectedCodespaces), len(cmap.codespaces))
return
}
for i, cs := range cmap.codespaces {
exp := expectedCodespaces[i]
if cs.numBytes != exp.numBytes {
t.Errorf("code space number of bytes != %d (%d)", exp.numBytes, cs.numBytes)
return
}
if cs.low != exp.low {
t.Errorf("code space low range != %d (%d)", exp.low, cs.low)
return
}
if cs.high != exp.high {
t.Errorf("code space high range != 0x%X (0x%X)", exp.high, cs.high)
return
}
}
// Check mappings.
expectedMappings := map[uint64]rune{
0x0080: 0x10 + 0x80,
0x8100: 0x1000,
0x00a0: 0x90,
0xd140: 0xa000,
}
for k, expected := range expectedMappings {
if v := cmap.CharcodeToUnicode(k); v != string(expected) {
t.Errorf("incorrect mapping, expecting 0x%X -> 0x%X (got 0x%X)", k, expected, v)
return
}
}
// Check byte sequence mappings.
excpectedSequenceMappings := []struct {
bytes []byte
expected string
}{
{[]byte{0x80, 0x81, 0x00, 0xa1, 0xd1, 0x80, 0x00}, string([]rune{0x90, 0x1000, 0x91, 0xa000 + 0x40, 0x10})},
}
for _, exp := range excpectedSequenceMappings {
str := cmap.CharcodeBytesToUnicode(exp.bytes)
if str != exp.expected {
t.Errorf("Incorrect byte sequence mapping -> % X -> % X (got % X)", exp.bytes, []rune(exp.expected), []rune(str))
return
}
}
}

View File

@ -0,0 +1,23 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package cmap
import "regexp"
const (
cisSystemInfo = "/CIDSystemInfo"
begincodespacerange = "begincodespacerange"
endcodespacerange = "endcodespacerange"
beginbfchar = "beginbfchar"
endbfchar = "endbfchar"
beginbfrange = "beginbfrange"
endbfrange = "endbfrange"
cmapname = "CMapName"
cmaptype = "CMapType"
)
var reNumeric = regexp.MustCompile(`^[\+-.]*([0-9.]+)`)

472
pdf/internal/cmap/parser.go Normal file
View File

@ -0,0 +1,472 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package cmap
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"strconv"
"encoding/hex"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/core"
)
// cMapParser parses CMap character to unicode mapping files.
type cMapParser struct {
reader *bufio.Reader
}
// cMapParser creates a new instance of the PDF CMap parser from input data.
func newCMapParser(content []byte) *cMapParser {
parser := cMapParser{}
buffer := bytes.NewBuffer(content)
parser.reader = bufio.NewReader(buffer)
return &parser
}
// Detect the signature at the current file position and parse
// the corresponding object.
func (p *cMapParser) parseObject() (cmapObject, error) {
p.skipSpaces()
for {
bb, err := p.reader.Peek(2)
if err != nil {
return nil, err
}
if bb[0] == '%' {
p.parseComment()
p.skipSpaces()
continue
} else if bb[0] == '/' {
name, err := p.parseName()
return name, err
} else if bb[0] == '(' {
str, err := p.parseString()
return str, err
} else if bb[0] == '[' {
arr, err := p.parseArray()
return arr, err
} else if (bb[0] == '<') && (bb[1] == '<') {
dict, err := p.parseDict()
return dict, err
} else if bb[0] == '<' {
shex, err := p.parseHexString()
return shex, err
} else if core.IsDecimalDigit(bb[0]) || (bb[0] == '-' && core.IsDecimalDigit(bb[1])) {
number, err := p.parseNumber()
if err != nil {
return nil, err
}
return number, nil
} else {
// Operand?
operand, err := p.parseOperand()
if err != nil {
return nil, err
}
return operand, nil
}
}
}
// Skip over any spaces. Returns the number of spaces skipped and
// an error if any.
func (p *cMapParser) skipSpaces() (int, error) {
cnt := 0
for {
bb, err := p.reader.Peek(1)
if err != nil {
return 0, err
}
if core.IsWhiteSpace(bb[0]) {
p.reader.ReadByte()
cnt++
} else {
break
}
}
return cnt, nil
}
// parseComment reads a comment line starting with '%'.
func (p *cMapParser) parseComment() (string, error) {
var r bytes.Buffer
_, err := p.skipSpaces()
if err != nil {
return r.String(), err
}
isFirst := true
for {
bb, err := p.reader.Peek(1)
if err != nil {
common.Log.Debug("Error %s", err.Error())
return r.String(), err
}
if isFirst && bb[0] != '%' {
return r.String(), errors.New("Comment should start with %")
}
isFirst = false
if (bb[0] != '\r') && (bb[0] != '\n') {
b, _ := p.reader.ReadByte()
r.WriteByte(b)
} else {
break
}
}
return r.String(), nil
}
// Parse a name starting with '/'.
func (p *cMapParser) parseName() (cmapName, error) {
name := ""
nameStarted := false
for {
bb, err := p.reader.Peek(1)
if err == io.EOF {
break // Can happen when loading from object stream.
}
if err != nil {
return cmapName{name}, err
}
if !nameStarted {
// Should always start with '/', otherwise not valid.
if bb[0] == '/' {
nameStarted = true
p.reader.ReadByte()
} else {
common.Log.Debug("ERROR Name starting with %s (% x)", bb, bb)
return cmapName{name}, fmt.Errorf("Invalid name: (%c)", bb[0])
}
} else {
if core.IsWhiteSpace(bb[0]) {
break
} else if (bb[0] == '/') || (bb[0] == '[') || (bb[0] == '(') || (bb[0] == ']') || (bb[0] == '<') || (bb[0] == '>') {
break // Looks like start of next statement.
} else if bb[0] == '#' {
hexcode, err := p.reader.Peek(3)
if err != nil {
return cmapName{name}, err
}
p.reader.Discard(3)
code, err := hex.DecodeString(string(hexcode[1:3]))
if err != nil {
return cmapName{name}, err
}
name += string(code)
} else {
b, _ := p.reader.ReadByte()
name += string(b)
}
}
}
return cmapName{name}, nil
}
// A string starts with '(' and ends with ')'.
func (p *cMapParser) parseString() (cmapString, error) {
p.reader.ReadByte()
buf := bytes.Buffer{}
count := 1
for {
bb, err := p.reader.Peek(1)
if err != nil {
return cmapString{buf.String()}, err
}
if bb[0] == '\\' { // Escape sequence.
p.reader.ReadByte() // Skip the escape \ byte.
b, err := p.reader.ReadByte()
if err != nil {
return cmapString{buf.String()}, err
}
// Octal '\ddd' number (base 8).
if core.IsOctalDigit(b) {
bb, err := p.reader.Peek(2)
if err != nil {
return cmapString{buf.String()}, err
}
numeric := []byte{}
numeric = append(numeric, b)
for _, val := range bb {
if core.IsOctalDigit(val) {
numeric = append(numeric, val)
} else {
break
}
}
p.reader.Discard(len(numeric) - 1)
common.Log.Trace("Numeric string \"%s\"", numeric)
code, err := strconv.ParseUint(string(numeric), 8, 32)
if err != nil {
return cmapString{buf.String()}, err
}
buf.WriteByte(byte(code))
continue
}
switch b {
case 'n':
buf.WriteByte('\n')
case 'r':
buf.WriteByte('\r')
case 't':
buf.WriteByte('\t')
case 'b':
buf.WriteByte('\b')
case 'f':
buf.WriteByte('\f')
case '(':
buf.WriteByte('(')
case ')':
buf.WriteByte(')')
case '\\':
buf.WriteByte('\\')
}
continue
} else if bb[0] == '(' {
count++
} else if bb[0] == ')' {
count--
if count == 0 {
p.reader.ReadByte()
break
}
}
b, _ := p.reader.ReadByte()
buf.WriteByte(b)
}
return cmapString{buf.String()}, nil
}
// Starts with '<' ends with '>'.
// Currently not converting the hex codes to characters.
func (p *cMapParser) parseHexString() (cmapHexString, error) {
p.reader.ReadByte()
hextable := []byte("0123456789abcdefABCDEF")
buf := bytes.Buffer{}
//tmp := []byte{}
for {
p.skipSpaces()
bb, err := p.reader.Peek(1)
if err != nil {
return cmapHexString{numBytes: 0, b: []byte("")}, err
}
if bb[0] == '>' {
p.reader.ReadByte()
break
}
b, _ := p.reader.ReadByte()
if bytes.IndexByte(hextable, b) >= 0 {
buf.WriteByte(b)
}
}
if buf.Len()%2 == 1 {
buf.WriteByte('0')
}
numBytes := buf.Len() / 2
hexb, _ := hex.DecodeString(buf.String())
return cmapHexString{numBytes: numBytes, b: hexb}, nil
}
// Starts with '[' ends with ']'. Can contain any kinds of direct objects.
func (p *cMapParser) parseArray() (cmapArray, error) {
arr := cmapArray{}
arr.Array = []cmapObject{}
p.reader.ReadByte()
for {
p.skipSpaces()
bb, err := p.reader.Peek(1)
if err != nil {
return arr, err
}
if bb[0] == ']' {
p.reader.ReadByte()
break
}
obj, err := p.parseObject()
if err != nil {
return arr, err
}
arr.Array = append(arr.Array, obj)
}
return arr, nil
}
// Reads and parses a PDF dictionary object enclosed with '<<' and '>>'
func (p *cMapParser) parseDict() (cmapDict, error) {
common.Log.Trace("Reading PDF Dict!")
dict := makeDict()
// Pass the '<<'
c, _ := p.reader.ReadByte()
if c != '<' {
return dict, errors.New("Invalid dict")
}
c, _ = p.reader.ReadByte()
if c != '<' {
return dict, errors.New("Invalid dict")
}
for {
p.skipSpaces()
bb, err := p.reader.Peek(2)
if err != nil {
return dict, err
}
if (bb[0] == '>') && (bb[1] == '>') {
p.reader.ReadByte()
p.reader.ReadByte()
break
}
key, err := p.parseName()
common.Log.Trace("Key: %s", key.Name)
if err != nil {
common.Log.Debug("ERROR Returning name err %s", err)
return dict, err
}
p.skipSpaces()
val, err := p.parseObject()
if err != nil {
return dict, err
}
dict.Dict[key.Name] = val
// Skip "def" which optionally follows key value dict definitions in CMaps.
p.skipSpaces()
bb, err = p.reader.Peek(3)
if err != nil {
return dict, err
}
if string(bb) == "def" {
p.reader.Discard(3)
}
}
return dict, nil
}
func (p *cMapParser) parseNumber() (cmapObject, error) {
isFloat := false
allowSigns := true
numStr := bytes.Buffer{}
for {
bb, err := p.reader.Peek(1)
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
if allowSigns && (bb[0] == '-' || bb[0] == '+') {
// Only appear in the beginning, otherwise serves as a delimiter.
b, _ := p.reader.ReadByte()
numStr.WriteByte(b)
allowSigns = false // Only allowed in beginning, and after e (exponential).
} else if core.IsDecimalDigit(bb[0]) {
b, _ := p.reader.ReadByte()
numStr.WriteByte(b)
} else if bb[0] == '.' {
b, _ := p.reader.ReadByte()
numStr.WriteByte(b)
isFloat = true
} else if bb[0] == 'e' {
// Exponential number format.
b, _ := p.reader.ReadByte()
numStr.WriteByte(b)
isFloat = true
allowSigns = true
} else {
break
}
}
if isFloat {
fVal, err := strconv.ParseFloat(numStr.String(), 64)
o := cmapFloat{fVal}
return o, err
}
intVal, err := strconv.ParseInt(numStr.String(), 10, 64)
o := cmapInt{intVal}
return o, err
}
// An operand is a text command represented by a word.
func (p *cMapParser) parseOperand() (cmapOperand, error) {
op := cmapOperand{}
buf := bytes.Buffer{}
for {
bb, err := p.reader.Peek(1)
if err != nil {
if err == io.EOF {
break
}
return op, err
}
if core.IsDelimiter(bb[0]) {
break
}
if core.IsWhiteSpace(bb[0]) {
break
}
b, _ := p.reader.ReadByte()
buf.WriteByte(b)
}
if buf.Len() == 0 {
return op, fmt.Errorf("Invalid operand (empty)")
}
op.Operand = buf.String()
return op, nil
}

View File

@ -0,0 +1,48 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package cmap
type cmapObject interface {
}
type cmapName struct {
Name string
}
type cmapOperand struct {
Operand string
}
type cmapHexString struct {
numBytes int // original number of bytes in the raw representation
b []byte
}
type cmapString struct {
String string
}
type cmapArray struct {
Array []cmapObject
}
type cmapDict struct {
Dict map[string]cmapObject
}
type cmapFloat struct {
val float64
}
type cmapInt struct {
val int64
}
func makeDict() cmapDict {
d := cmapDict{}
d.Dict = map[string]cmapObject{}
return d
}

View File

@ -0,0 +1,34 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package cmap
import "bytes"
func hexToUint64(shex cmapHexString) uint64 {
val := uint64(0)
for _, v := range shex.b {
val <<= 8
val |= uint64(v)
}
return val
}
func hexToString(shex cmapHexString) string {
var buf bytes.Buffer
// Assumes unicode in format <HHLL> with 2 bytes HH and LL representing a rune.
for i := 0; i < len(shex.b)-1; i += 2 {
b1 := uint64(shex.b[i])
b2 := uint64(shex.b[i+1])
r := rune((b1 << 8) | b2)
buf.WriteRune(r)
}
return buf.String()
}

View File

@ -596,6 +596,15 @@ func NewPdfAnnotationRedact() *PdfAnnotationRedact {
return redactAnnotation
}
// Create a new annotation widget and initializes the underlying primitive.
func NewPdfAnnotationWidget() *PdfAnnotationWidget {
annotation := NewPdfAnnotation()
annotationWidget := &PdfAnnotationWidget{}
annotationWidget.PdfAnnotation = annotation
annotation.SetContext(annotationWidget)
return annotationWidget
}
// Used for PDF parsing. Loads a PDF annotation model from a PDF primitive dictionary object.
// Loads the common PDF annotation dictionary, and anything needed for the annotation subtype.
func (r *PdfReader) newPdfAnnotationFromIndirectObject(container *PdfIndirectObject) (*PdfAnnotation, error) {

View File

@ -81,6 +81,7 @@ func newPdfFunctionFromPdfObject(obj PdfObject) (PdfFunction, error) {
return nil, errors.New("Invalid function type")
}
} else {
common.Log.Debug("Function Type error: %#v", obj)
return nil, errors.New("Type error")
}
}

View File

@ -73,6 +73,61 @@ func (this *Image) SetSamples(samples []uint32) {
this.Data = data
}
// Resample resamples the image data converting from current BitsPerComponent to a target BitsPerComponent
// value. Sets the image's BitsPerComponent to the target value following resampling.
//
// For example, converting an 8-bit RGB image to 1-bit grayscale (common for scanned images):
// // Convert RGB image to grayscale.
// rgbColorSpace := pdf.NewPdfColorspaceDeviceRGB()
// grayImage, err := rgbColorSpace.ImageToGray(rgbImage)
// if err != nil {
// return err
// }
// // Resample as 1 bit.
// grayImage.Resample(1)
func (this *Image) Resample(targetBitsPerComponent int64) {
samples := this.GetSamples()
// Image data are stored row by row. If the number of bits per row is not a multiple of 8, the end of the
// row needs to be padded with extra bits to fill out the last byte.
// Thus the processing is done on a row by row basis below.
// This one simply resamples the data so that each component has target bits per component...
// So if the original data was 10011010, then will have 1 0 0 1 1 0 1 0... much longer
// The key to resampling is that we need to upsample/downsample,
// i.e. 10011010 >> targetBitsPerComponent
// Current bits: 8, target bits: 1... need to downsample by 8-1 = 7
if targetBitsPerComponent < this.BitsPerComponent {
downsampling := this.BitsPerComponent - targetBitsPerComponent
for i := range samples {
samples[i] >>= uint(downsampling)
}
} else if targetBitsPerComponent > this.BitsPerComponent {
upsampling := targetBitsPerComponent - this.BitsPerComponent
for i := range samples {
samples[i] <<= uint(upsampling)
}
} else {
return
}
// Write out row by row...
data := []byte{}
for i := int64(0); i < this.Height; i++ {
ind1 := i * this.Width * int64(this.ColorComponents)
ind2 := (i+1)*this.Width*int64(this.ColorComponents) - 1
resampled := sampling.ResampleUint32(samples[ind1:ind2], int(targetBitsPerComponent), 8)
for _, val := range resampled {
data = append(data, byte(val))
}
}
this.Data = data
this.BitsPerComponent = int64(targetBitsPerComponent)
}
// Converts the unidoc Image to a golang Image structure.
func (this *Image) ToGoImage() (goimage.Image, error) {
common.Log.Trace("Converting to go image")
@ -122,7 +177,7 @@ func (this *Image) ToGoImage() (goimage.Image, error) {
r := uint16(samples[i])<<8 | uint16(samples[i+1])
g := uint16(samples[i+2])<<8 | uint16(samples[i+3])
b := uint16(samples[i+4])<<8 | uint16(samples[i+5])
a := uint16(0)
a := uint16(0xffff) // Default: solid (0xffff) whereas transparent=0.
if this.alphaData != nil && len(this.alphaData) > aidx+1 {
a = (uint16(this.alphaData[aidx]) << 8) | uint16(this.alphaData[aidx+1])
aidx += 2
@ -132,7 +187,7 @@ func (this *Image) ToGoImage() (goimage.Image, error) {
r := uint8(samples[i] & 0xff)
g := uint8(samples[i+1] & 0xff)
b := uint8(samples[i+2] & 0xff)
a := uint8(0)
a := uint8(0xff) // Default: solid (0xff) whereas transparent=0.
if this.alphaData != nil && len(this.alphaData) > aidx {
a = uint8(this.alphaData[aidx])
aidx++

66
pdf/model/image_test.go Normal file
View File

@ -0,0 +1,66 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package model
import (
"testing"
)
func TestImageResampling(t *testing.T) {
img := Image{}
// Case 1:
// Data:
// 4x8bit: 00000001 11101000 01101110 00001010
// Resample as 1bit:
//
// 4x8bit: 00000001 11101000 01101110 00001010
// Downsample to 1bit
// 4x8bit: 00000000 00000001 00000000 00000000
// 4x1bit: 0100
// Padding with 4x00
// -> 01000000 = 64 decimal
//
img.BitsPerComponent = 8
img.Data = []byte{1, 232, 110, 10}
//int(this.Width) * int(this.Height) * this.ColorComponents
img.Width = 4
img.ColorComponents = 1
img.Height = 1
img.Resample(1)
if len(img.Data) != 1 {
t.Errorf("Incorrect length != 1 (%d)", len(img.Data))
return
}
if img.Data[0] != 64 {
t.Errorf("Value != 4 (%d)", img.Data[0])
}
// Case 2:
// Data:
// 4x8bit: 00000001 11101000 01101110 00001010 00000001 11101000 01101110 00001010 00000001 11101000 01101110 00001010
// 0 1 0 0 0 1 0 0 0 1 0 0
// 010001000100
// -> 01000100 0100(0000)
// -> 68 64
img.BitsPerComponent = 8
img.Data = []byte{1, 232, 110, 10, 1, 232, 110, 10, 1, 232, 110, 10}
img.Width = 12
img.ColorComponents = 1
img.Height = 1
img.Resample(1)
if len(img.Data) != 2 {
t.Errorf("Incorrect length != 2 (%d)", len(img.Data))
return
}
if img.Data[0] != 68 {
t.Errorf("Value != 68 (%d)", img.Data[0])
}
if img.Data[1] != 64 {
t.Errorf("Value != 64 (%d)", img.Data[1])
}
}

View File

@ -419,7 +419,7 @@ func (this *PdfPage) getResources() (*PdfPageResources, error) {
}
if obj := dict.Get("Resources"); obj != nil {
prDict, ok := obj.(*PdfObjectDictionary)
prDict, ok := TraceToDirectObject(obj).(*PdfObjectDictionary)
if !ok {
return nil, errors.New("Invalid resource dict!")
}

View File

@ -81,21 +81,34 @@ func (this *PdfTilingPattern) IsColored() bool {
}
}
// Get the pattern cell's content stream.
// GetContentStream returns the pattern cell's content stream
func (this *PdfTilingPattern) GetContentStream() ([]byte, error) {
decoded, _, err := this.GetContentStreamWithEncoder()
return decoded, err
}
// GetContentStreamWithEncoder returns the pattern cell's content stream and its encoder
// TODO (v3): Change GetContentStreamWithEncoder to GetContentStream
func (this *PdfTilingPattern) GetContentStreamWithEncoder() ([]byte, StreamEncoder, error) {
streamObj, ok := this.container.(*PdfObjectStream)
if !ok {
common.Log.Debug("Tiling pattern container not a stream (got %T)", this.container)
return nil, ErrTypeError
return nil, nil, ErrTypeError
}
decoded, err := DecodeStream(streamObj)
if err != nil {
common.Log.Debug("Failed decoding stream, err: %v", err)
return nil, err
return nil, nil, err
}
return decoded, nil
encoder, err := NewEncoderFromStream(streamObj)
if err != nil {
common.Log.Debug("Failed finding decoding encoder: %v", err)
return nil, nil, err
}
return decoded, encoder, nil
}
// Set the pattern cell's content stream.

View File

@ -724,7 +724,11 @@ func (this *PdfReader) GetPage(pageNumber int) (*PdfPage, error) {
if len(this.pageList) < pageNumber {
return nil, errors.New("Invalid page number (page count too short)")
}
page := this.PageList[pageNumber-1]
idx := pageNumber - 1
if idx < 0 {
return nil, fmt.Errorf("Page numbering must start at 1")
}
page := this.PageList[idx]
return page, nil
}

View File

@ -71,7 +71,7 @@ type PdfShadingType1 struct {
*PdfShading
Domain *PdfObjectArray
Matrix *PdfObjectArray
Function PdfFunction
Function []PdfFunction
}
// Shading type 2: Axial shading.
@ -79,7 +79,7 @@ type PdfShadingType2 struct {
*PdfShading
Coords *PdfObjectArray
Domain *PdfObjectArray
Function PdfFunction
Function []PdfFunction
Extend *PdfObjectArray
}
@ -88,7 +88,7 @@ type PdfShadingType3 struct {
*PdfShading
Coords *PdfObjectArray
Domain *PdfObjectArray
Function PdfFunction
Function []PdfFunction
Extend *PdfObjectArray
}
@ -99,7 +99,7 @@ type PdfShadingType4 struct {
BitsPerComponent *PdfObjectInteger
BitsPerFlag *PdfObjectInteger
Decode *PdfObjectArray
Function PdfFunction
Function []PdfFunction
}
// Shading type 5: Lattice-form Gouraud-shaded triangle mesh.
@ -109,7 +109,7 @@ type PdfShadingType5 struct {
BitsPerComponent *PdfObjectInteger
VerticesPerRow *PdfObjectInteger
Decode *PdfObjectArray
Function PdfFunction
Function []PdfFunction
}
// Shading type 6: Coons patch mesh.
@ -119,7 +119,7 @@ type PdfShadingType6 struct {
BitsPerComponent *PdfObjectInteger
BitsPerFlag *PdfObjectInteger
Decode *PdfObjectArray
Function PdfFunction
Function []PdfFunction
}
// Shading type 7: Tensor-product patch mesh.
@ -129,7 +129,7 @@ type PdfShadingType7 struct {
BitsPerComponent *PdfObjectInteger
BitsPerFlag *PdfObjectInteger
Decode *PdfObjectArray
Function PdfFunction
Function []PdfFunction
}
// Used for PDF parsing. Loads the PDF shading from a PDF object.
@ -330,14 +330,26 @@ func newPdfShadingType1FromDictionary(dict *PdfObjectDictionary) (*PdfShadingTyp
obj := dict.Get("Function")
if obj == nil {
common.Log.Debug("Required attribute missing: Function")
return nil, errors.New("Required attribute missing")
return nil, ErrRequiredAttributeMissing
}
function, err := newPdfFunctionFromPdfObject(obj)
if err != nil {
common.Log.Debug("Error parsing function: %v", err)
return nil, err
shading.Function = []PdfFunction{}
if array, is := obj.(*PdfObjectArray); is {
for _, obj := range *array {
function, err := newPdfFunctionFromPdfObject(obj)
if err != nil {
common.Log.Debug("Error parsing function: %v", err)
return nil, err
}
shading.Function = append(shading.Function, function)
}
} else {
function, err := newPdfFunctionFromPdfObject(obj)
if err != nil {
common.Log.Debug("Error parsing function: %v", err)
return nil, err
}
shading.Function = append(shading.Function, function)
}
shading.Function = function
return &shading, nil
}
@ -350,7 +362,7 @@ func newPdfShadingType2FromDictionary(dict *PdfObjectDictionary) (*PdfShadingTyp
obj := dict.Get("Coords")
if obj == nil {
common.Log.Debug("Required attribute missing: Coords")
return nil, errors.New("Required attribute missing")
return nil, ErrRequiredAttributeMissing
}
arr, ok := obj.(*PdfObjectArray)
if !ok {
@ -378,14 +390,26 @@ func newPdfShadingType2FromDictionary(dict *PdfObjectDictionary) (*PdfShadingTyp
obj = dict.Get("Function")
if obj == nil {
common.Log.Debug("Required attribute missing: Function")
return nil, errors.New("Required attribute missing")
return nil, ErrRequiredAttributeMissing
}
function, err := newPdfFunctionFromPdfObject(obj)
if err != nil {
common.Log.Debug("Error parsing function: %v", err)
return nil, err
shading.Function = []PdfFunction{}
if array, is := obj.(*PdfObjectArray); is {
for _, obj := range *array {
function, err := newPdfFunctionFromPdfObject(obj)
if err != nil {
common.Log.Debug("Error parsing function: %v", err)
return nil, err
}
shading.Function = append(shading.Function, function)
}
} else {
function, err := newPdfFunctionFromPdfObject(obj)
if err != nil {
common.Log.Debug("Error parsing function: %v", err)
return nil, err
}
shading.Function = append(shading.Function, function)
}
shading.Function = function
// Extend (optional).
if obj := dict.Get("Extend"); obj != nil {
@ -393,11 +417,11 @@ func newPdfShadingType2FromDictionary(dict *PdfObjectDictionary) (*PdfShadingTyp
arr, ok := obj.(*PdfObjectArray)
if !ok {
common.Log.Debug("Matrix not an array (got %T)", obj)
return nil, errors.New("Type check error")
return nil, ErrTypeCheck
}
if len(*arr) != 2 {
common.Log.Debug("Extend length not 2 (got %d)", len(*arr))
return nil, errors.New("Invalid attribute")
return nil, ErrInvalidAttribute
}
shading.Extend = arr
}
@ -440,15 +464,27 @@ func newPdfShadingType3FromDictionary(dict *PdfObjectDictionary) (*PdfShadingTyp
// Function (required).
obj = dict.Get("Function")
if obj == nil {
common.Log.Debug("Required attribute missing: Function")
common.Log.Debug("Required attribute missing: Function")
return nil, ErrRequiredAttributeMissing
}
function, err := newPdfFunctionFromPdfObject(obj)
if err != nil {
common.Log.Debug("Error parsing function: %v", err)
return nil, err
shading.Function = []PdfFunction{}
if array, is := obj.(*PdfObjectArray); is {
for _, obj := range *array {
function, err := newPdfFunctionFromPdfObject(obj)
if err != nil {
common.Log.Debug("Error parsing function: %v", err)
return nil, err
}
shading.Function = append(shading.Function, function)
}
} else {
function, err := newPdfFunctionFromPdfObject(obj)
if err != nil {
common.Log.Debug("Error parsing function: %v", err)
return nil, err
}
shading.Function = append(shading.Function, function)
}
shading.Function = function
// Extend (optional).
if obj := dict.Get("Extend"); obj != nil {
@ -524,15 +560,29 @@ func newPdfShadingType4FromDictionary(dict *PdfObjectDictionary) (*PdfShadingTyp
}
shading.Decode = arr
// Function (optional).
// Function (required).
obj = dict.Get("Function")
if obj != nil {
if obj == nil {
common.Log.Debug("Required attribute missing: Function")
return nil, ErrRequiredAttributeMissing
}
shading.Function = []PdfFunction{}
if array, is := obj.(*PdfObjectArray); is {
for _, obj := range *array {
function, err := newPdfFunctionFromPdfObject(obj)
if err != nil {
common.Log.Debug("Error parsing function: %v", err)
return nil, err
}
shading.Function = append(shading.Function, function)
}
} else {
function, err := newPdfFunctionFromPdfObject(obj)
if err != nil {
common.Log.Debug("Error parsing function: %v", err)
return nil, err
}
shading.Function = function
shading.Function = append(shading.Function, function)
}
return &shading, nil
@ -596,12 +646,25 @@ func newPdfShadingType5FromDictionary(dict *PdfObjectDictionary) (*PdfShadingTyp
// Function (optional).
if obj := dict.Get("Function"); obj != nil {
function, err := newPdfFunctionFromPdfObject(obj)
if err != nil {
common.Log.Debug("Error parsing function: %v", err)
return nil, err
// Function (required).
shading.Function = []PdfFunction{}
if array, is := obj.(*PdfObjectArray); is {
for _, obj := range *array {
function, err := newPdfFunctionFromPdfObject(obj)
if err != nil {
common.Log.Debug("Error parsing function: %v", err)
return nil, err
}
shading.Function = append(shading.Function, function)
}
} else {
function, err := newPdfFunctionFromPdfObject(obj)
if err != nil {
common.Log.Debug("Error parsing function: %v", err)
return nil, err
}
shading.Function = append(shading.Function, function)
}
shading.Function = function
}
return &shading, nil
@ -665,12 +728,24 @@ func newPdfShadingType6FromDictionary(dict *PdfObjectDictionary) (*PdfShadingTyp
// Function (optional).
if obj := dict.Get("Function"); obj != nil {
function, err := newPdfFunctionFromPdfObject(obj)
if err != nil {
common.Log.Debug("Error parsing function: %v", err)
return nil, err
shading.Function = []PdfFunction{}
if array, is := obj.(*PdfObjectArray); is {
for _, obj := range *array {
function, err := newPdfFunctionFromPdfObject(obj)
if err != nil {
common.Log.Debug("Error parsing function: %v", err)
return nil, err
}
shading.Function = append(shading.Function, function)
}
} else {
function, err := newPdfFunctionFromPdfObject(obj)
if err != nil {
common.Log.Debug("Error parsing function: %v", err)
return nil, err
}
shading.Function = append(shading.Function, function)
}
shading.Function = function
}
return &shading, nil
@ -734,12 +809,24 @@ func newPdfShadingType7FromDictionary(dict *PdfObjectDictionary) (*PdfShadingTyp
// Function (optional).
if obj := dict.Get("Function"); obj != nil {
function, err := newPdfFunctionFromPdfObject(obj)
if err != nil {
common.Log.Debug("Error parsing function: %v", err)
return nil, err
shading.Function = []PdfFunction{}
if array, is := obj.(*PdfObjectArray); is {
for _, obj := range *array {
function, err := newPdfFunctionFromPdfObject(obj)
if err != nil {
common.Log.Debug("Error parsing function: %v", err)
return nil, err
}
shading.Function = append(shading.Function, function)
}
} else {
function, err := newPdfFunctionFromPdfObject(obj)
if err != nil {
common.Log.Debug("Error parsing function: %v", err)
return nil, err
}
shading.Function = append(shading.Function, function)
}
shading.Function = function
}
return &shading, nil
@ -791,7 +878,15 @@ func (this *PdfShadingType1) ToPdfObject() PdfObject {
d.Set("Matrix", this.Matrix)
}
if this.Function != nil {
d.Set("Function", this.Function.ToPdfObject())
if len(this.Function) == 1 {
d.Set("Function", this.Function[0].ToPdfObject())
} else {
farr := MakeArray()
for _, f := range this.Function {
farr.Append(f.ToPdfObject())
}
d.Set("Function", farr)
}
}
return this.container
@ -816,7 +911,15 @@ func (this *PdfShadingType2) ToPdfObject() PdfObject {
d.Set("Domain", this.Domain)
}
if this.Function != nil {
d.Set("Function", this.Function.ToPdfObject())
if len(this.Function) == 1 {
d.Set("Function", this.Function[0].ToPdfObject())
} else {
farr := MakeArray()
for _, f := range this.Function {
farr.Append(f.ToPdfObject())
}
d.Set("Function", farr)
}
}
if this.Extend != nil {
d.Set("Extend", this.Extend)
@ -841,7 +944,15 @@ func (this *PdfShadingType3) ToPdfObject() PdfObject {
d.Set("Domain", this.Domain)
}
if this.Function != nil {
d.Set("Function", this.Function.ToPdfObject())
if len(this.Function) == 1 {
d.Set("Function", this.Function[0].ToPdfObject())
} else {
farr := MakeArray()
for _, f := range this.Function {
farr.Append(f.ToPdfObject())
}
d.Set("Function", farr)
}
}
if this.Extend != nil {
d.Set("Extend", this.Extend)
@ -872,7 +983,15 @@ func (this *PdfShadingType4) ToPdfObject() PdfObject {
d.Set("Decode", this.Decode)
}
if this.Function != nil {
d.Set("Function", this.Function.ToPdfObject())
if len(this.Function) == 1 {
d.Set("Function", this.Function[0].ToPdfObject())
} else {
farr := MakeArray()
for _, f := range this.Function {
farr.Append(f.ToPdfObject())
}
d.Set("Function", farr)
}
}
return this.container
@ -900,7 +1019,15 @@ func (this *PdfShadingType5) ToPdfObject() PdfObject {
d.Set("Decode", this.Decode)
}
if this.Function != nil {
d.Set("Function", this.Function.ToPdfObject())
if len(this.Function) == 1 {
d.Set("Function", this.Function[0].ToPdfObject())
} else {
farr := MakeArray()
for _, f := range this.Function {
farr.Append(f.ToPdfObject())
}
d.Set("Function", farr)
}
}
return this.container
@ -928,7 +1055,15 @@ func (this *PdfShadingType6) ToPdfObject() PdfObject {
d.Set("Decode", this.Decode)
}
if this.Function != nil {
d.Set("Function", this.Function.ToPdfObject())
if len(this.Function) == 1 {
d.Set("Function", this.Function[0].ToPdfObject())
} else {
farr := MakeArray()
for _, f := range this.Function {
farr.Append(f.ToPdfObject())
}
d.Set("Function", farr)
}
}
return this.container
@ -956,7 +1091,15 @@ func (this *PdfShadingType7) ToPdfObject() PdfObject {
d.Set("Decode", this.Decode)
}
if this.Function != nil {
d.Set("Function", this.Function.ToPdfObject())
if len(this.Function) == 1 {
d.Set("Function", this.Function[0].ToPdfObject())
} else {
farr := MakeArray()
for _, f := range this.Function {
farr.Append(f.ToPdfObject())
}
d.Set("Function", farr)
}
}
return this.container

View File

@ -21,6 +21,8 @@ import (
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/common/license"
. "github.com/unidoc/unidoc/pdf/core"
"github.com/unidoc/unidoc/pdf/model/fonts"
"strings"
)
var pdfCreator = ""
@ -264,6 +266,7 @@ func (this *PdfWriter) AddPage(page *PdfPage) error {
obj := page.ToPdfObject()
common.Log.Trace("==========")
common.Log.Trace("Appending to page list %T", obj)
procPage(page)
pageObj, ok := obj.(*PdfIndirectObject)
if !ok {
@ -339,6 +342,8 @@ func (this *PdfWriter) AddPage(page *PdfPage) error {
this.addObject(pageObj)
// Traverse the page and record all object references.
err := this.addObjects(pDict)
if err != nil {
@ -348,6 +353,34 @@ func (this *PdfWriter) AddPage(page *PdfPage) error {
return nil
}
func procPage(p *PdfPage) {
lk := license.GetLicenseKey()
if lk != nil && lk.IsLicensed() {
return
}
// Add font as needed.
f := fonts.NewFontHelvetica()
p.Resources.SetFontByName("UF1", f.ToPdfObject())
ops := []string{}
ops = append(ops, "q")
ops = append(ops, "BT")
ops = append(ops, "/UF1 14 Tf")
ops = append(ops, "1 0 0 rg")
ops = append(ops, "10 10 Td")
s := "Unlicensed UniDoc - Get a license on https://unidoc.io"
ops = append(ops, fmt.Sprintf("(%s) Tj", s))
ops = append(ops, "ET")
ops = append(ops, "Q")
contentstr := strings.Join(ops, "\n")
p.AddContentStreamByString(contentstr)
// Update page object.
p.ToPdfObject()
}
// Add outlines to a PDF file.
func (this *PdfWriter) AddOutlineTree(outlineTree *PdfOutlineTreeNode) {
this.outlineTree = outlineTree
@ -518,6 +551,13 @@ func (this *PdfWriter) Encrypt(userPass, ownerPass []byte, options *EncryptOptio
// Write the pdf out.
func (this *PdfWriter) Write(ws io.WriteSeeker) error {
common.Log.Trace("Write()")
lk := license.GetLicenseKey()
if lk == nil || !lk.IsLicensed() {
fmt.Printf("Unlicensed copy of unidoc\n")
fmt.Printf("To get rid of the watermark - Please get a license on https://unidoc.io\n")
}
// Outlines.
if this.outlineTree != nil {
common.Log.Trace("OutlineTree: %+v", this.outlineTree)

View File

@ -37,6 +37,8 @@ type XObjectForm struct {
primitive *PdfObjectStream
}
var ErrTypeCheck = errors.New("Type check error")
// Create a brand new XObject Form. Creates a new underlying PDF object stream primitive.
func NewXObjectForm() *XObjectForm {
xobj := &XObjectForm{}
@ -198,6 +200,7 @@ type XObjectImage struct {
Intent PdfObject
ImageMask PdfObject
Mask PdfObject
Matte PdfObject
Decode PdfObject
Interpolate PdfObject
Alternatives PdfObject
@ -226,6 +229,16 @@ func NewXObjectImage() *XObjectImage {
// If encoder is nil, uses raw encoding (none).
func NewXObjectImageFromImage(img *Image, cs PdfColorspace, encoder StreamEncoder) (*XObjectImage, error) {
xobj := NewXObjectImage()
return UpdateXObjectImageFromImage(xobj, img, cs, encoder)
}
// UpdateXObjectImageFromImage creates a new XObject Image from an Image object `img` and default
// masks from xobjIn.
// The default masks are overriden if img.hasAlpha
// If `encoder` is nil, uses raw encoding (none).
func UpdateXObjectImageFromImage(xobjIn *XObjectImage, img *Image, cs PdfColorspace,
encoder StreamEncoder) (*XObjectImage, error) {
xobj := NewXObjectImage()
if encoder == nil {
encoder = NewRawEncoder()
@ -281,11 +294,78 @@ func NewXObjectImageFromImage(img *Image, cs PdfColorspace, encoder StreamEncode
smask.Height = &img.Height
smask.ColorSpace = NewPdfColorspaceDeviceGray()
xobj.SMask = smask.ToPdfObject()
} else {
xobj.SMask = xobjIn.SMask
xobj.ImageMask = xobjIn.ImageMask
if xobj.ColorSpace.GetNumComponents() == 1 {
smaskMatteToGray(xobj)
}
}
return xobj, nil
}
// smaskMatteToGray converts to gray the Matte value in the SMask image referenced by `xobj` (if
// there is one)
func smaskMatteToGray(xobj *XObjectImage) error {
if xobj.SMask == nil {
return nil
}
stream, ok := xobj.SMask.(*PdfObjectStream)
if !ok {
common.Log.Debug("SMask is not *PdfObjectStream")
return ErrTypeCheck
}
dict := stream.PdfObjectDictionary
matte := dict.Get("Matte")
if matte == nil {
return nil
}
gray, err := toGray(matte.(*PdfObjectArray))
if err != nil {
return err
}
grayMatte := MakeArrayFromFloats([]float64{gray})
dict.SetIfNotNil("Matte", grayMatte)
return nil
}
// toGray converts a 1, 3 or 4 dimensional color `matte` to gray
// If `matte` is not a 1, 3 or 4 dimensional color then an error is returned
func toGray(matte *PdfObjectArray) (float64, error) {
colors, err := matte.ToFloat64Array()
if err != nil {
common.Log.Debug("Bad Matte array: matte=%s err=%v", matte, err)
}
switch len(colors) {
case 1:
return colors[0], nil
case 3:
cs := PdfColorspaceDeviceRGB{}
rgbColor, err := cs.ColorFromFloats(colors)
if err != nil {
return 0.0, err
}
return rgbColor.(*PdfColorDeviceRGB).ToGray().Val(), nil
case 4:
cs := PdfColorspaceDeviceCMYK{}
cmykColor, err := cs.ColorFromFloats(colors)
if err != nil {
return 0.0, err
}
rgbColor, err := cs.ColorToRGB(cmykColor.(*PdfColorDeviceCMYK))
if err != nil {
return 0.0, err
}
return rgbColor.(*PdfColorDeviceRGB).ToGray().Val(), nil
}
err = errors.New("Bad Matte color")
common.Log.Error("toGray: matte=%s err=%v", matte, err)
return 0.0, err
}
// Build the image xobject from a stream object.
// An image dictionary is the dictionary portion of a stream object representing an image XObject.
func NewXObjectImageFromStream(stream *PdfObjectStream) (*XObjectImage, error) {
@ -351,6 +431,7 @@ func NewXObjectImageFromStream(stream *PdfObjectStream) (*XObjectImage, error) {
img.Alternatives = dict.Get("Alternatives")
img.SMask = dict.Get("SMask")
img.SMaskInData = dict.Get("SMaskInData")
img.Matte = dict.Get("Matte")
img.Name = dict.Get("Name")
img.StructParent = dict.Get("StructParent")
img.ID = dict.Get("ID")
@ -493,6 +574,7 @@ func (ximg *XObjectImage) ToPdfObject() PdfObject {
dict.SetIfNotNil("Alternatives", ximg.Alternatives)
dict.SetIfNotNil("SMask", ximg.SMask)
dict.SetIfNotNil("SMaskInData", ximg.SMaskInData)
dict.SetIfNotNil("Matte", ximg.Matte)
dict.SetIfNotNil("Name", ximg.Name)
dict.SetIfNotNil("StructParent", ximg.StructParent)
dict.SetIfNotNil("ID", ximg.ID)