mirror of
https://github.com/unidoc/unipdf.git
synced 2025-05-05 19:30:30 +08:00

* Track runes in IdentityEncoder (for subsetting), track decoded runes * Working with the identity encoder in font_composite.go * Add GetFilterArray to multi encoder. Add comments. * Add NewFromContents constructor to extractor only requiring contents and resources * golint fixes * Optimizer compress streams - improved detection of raw streams * Optimize - CleanContentStream optimizer that removes redundant operands * WIP Optimize - clean fonts Will support both font file reduction and subsetting. (WIP) * Optimize - image processing - try combined DCT and Flate * Update options.go * Update optimizer.go * Create utils.go for optimize with common methods needed for optimization * Optimizer - add font subsetting method Covers XObject Forms, annotaitons etc. Uses extractor package to extract text marks covering what fonts and glyphs are used. Package truetype used for subsetting. * Add some comments * Fix cmap parsing rune conversion * Error checking for extractor. Add some comments. * Update Jenkinsfile * Update modules
121 lines
3.8 KiB
Go
121 lines
3.8 KiB
Go
/*
|
|
* This file is subject to the terms and conditions defined in
|
|
* file 'LICENSE.md', which is part of this source code package.
|
|
*/
|
|
|
|
package textencoding
|
|
|
|
import (
|
|
"fmt"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/unidoc/unipdf/v3/core"
|
|
)
|
|
|
|
// IdentityEncoder represents an 2-byte identity encoding.
|
|
// NOTE: In many cases this is just used to encode/decode to glyph index and does not have a unicode
|
|
// meaning, except via the ToUnicode maps.
|
|
// TODO: The use of runes as indicators for glyph indices and not-utf8 runes is not good and confusing.
|
|
// Might be better to combine the Identity encoder with a ToUnicode map and keep track of the actual
|
|
// runes and character codes, CMaps together.
|
|
type IdentityEncoder struct {
|
|
baseName string
|
|
|
|
// runes registered by encoder for tracking what runes are used for subsetting.
|
|
registeredMap map[rune]struct{}
|
|
}
|
|
|
|
// NewIdentityTextEncoder returns a new IdentityEncoder based on predefined
|
|
// encoding `baseName` and difference map `differences`.
|
|
func NewIdentityTextEncoder(baseName string) *IdentityEncoder {
|
|
return &IdentityEncoder{
|
|
baseName: baseName,
|
|
}
|
|
}
|
|
|
|
// RegisteredRunes returns the slice of runes that have been registered as used by the encoder.
|
|
func (enc *IdentityEncoder) RegisteredRunes() []rune {
|
|
runes := make([]rune, len(enc.registeredMap))
|
|
i := 0
|
|
for r := range enc.registeredMap {
|
|
runes[i] = r
|
|
i++
|
|
}
|
|
return runes
|
|
}
|
|
|
|
// String returns a string that describes `enc`.
|
|
func (enc *IdentityEncoder) String() string {
|
|
return enc.baseName
|
|
}
|
|
|
|
// Encode converts the Go unicode string to a PDF encoded string.
|
|
func (enc *IdentityEncoder) Encode(str string) []byte {
|
|
return encodeString16bit(enc, str)
|
|
}
|
|
|
|
// Decode converts PDF encoded string to a Go unicode string.
|
|
func (enc *IdentityEncoder) Decode(raw []byte) string {
|
|
return decodeString16bit(enc, raw)
|
|
}
|
|
|
|
// RuneToCharcode converts rune `r` to a PDF character code.
|
|
// The bool return flag is true if there was a match, and false otherwise.
|
|
// TODO: Here the `r` is an actual rune.
|
|
func (enc *IdentityEncoder) RuneToCharcode(r rune) (CharCode, bool) {
|
|
if enc.registeredMap == nil {
|
|
enc.registeredMap = map[rune]struct{}{}
|
|
}
|
|
enc.registeredMap[r] = struct{}{} // Register use (subsetting).
|
|
|
|
return CharCode(r), true
|
|
}
|
|
|
|
// CharcodeToRune converts PDF character code `code` to a rune.
|
|
// The bool return flag is true if there was a match, and false otherwise.
|
|
// TODO: Here the `r` is not necessarily an actual rune but a glyph index (unless both).
|
|
func (enc *IdentityEncoder) CharcodeToRune(code CharCode) (rune, bool) {
|
|
if enc.registeredMap == nil {
|
|
enc.registeredMap = map[rune]struct{}{}
|
|
}
|
|
|
|
// TODO: The rune(code) is confusing and is not an actual utf8 rune.
|
|
enc.registeredMap[rune(code)] = struct{}{}
|
|
return rune(code), true
|
|
}
|
|
|
|
// RuneToGlyph returns the glyph name for rune `r`.
|
|
// The bool return flag is true if there was a match, and false otherwise.
|
|
func (enc *IdentityEncoder) RuneToGlyph(r rune) (GlyphName, bool) {
|
|
if r == ' ' {
|
|
return "space", true
|
|
}
|
|
glyph := GlyphName(fmt.Sprintf("uni%.4X", r))
|
|
return glyph, true
|
|
}
|
|
|
|
// GlyphToRune returns the rune corresponding to glyph name `glyph`.
|
|
// The bool return flag is true if there was a match, and false otherwise.
|
|
func (enc *IdentityEncoder) GlyphToRune(glyph GlyphName) (rune, bool) {
|
|
// String with "uniXXXX" format where XXXX is the hexcode.
|
|
if glyph == "space" {
|
|
return ' ', true
|
|
} else if !strings.HasPrefix(string(glyph), "uni") || len(glyph) != 7 {
|
|
return 0, false
|
|
}
|
|
r, err := strconv.ParseUint(string(glyph[3:]), 16, 16)
|
|
if err != nil {
|
|
return 0, false
|
|
}
|
|
return rune(r), true
|
|
}
|
|
|
|
// ToPdfObject returns a nil as it is not truly a PDF object and should not be attempted to store in file.
|
|
func (enc *IdentityEncoder) ToPdfObject() core.PdfObject {
|
|
if enc.baseName != "" {
|
|
return core.MakeName(enc.baseName)
|
|
}
|
|
return core.MakeNull()
|
|
}
|