mirror of
https://github.com/unidoc/unipdf.git
synced 2025-05-02 22:17:06 +08:00

* Prepared skeleton and basic component implementations for the jbig2 encoding. * Added Bitset. Implemented Bitmap. * Decoder with old Arithmetic Decoder * Partly working arithmetic * Working arithmetic decoder. * MMR patched. * rebuild to apache. * Working generic * Working generic * Decoded full document * Update Jenkinsfile go version [master] (#398) * Update Jenkinsfile go version * Decoded AnnexH document * Minor issues fixed. * Update README.md * Fixed generic region errors. Added benchmark. Added bitmap unpadder. Added Bitmap toImage method. * Fixed endofpage error * Added integration test. * Decoded all test files without errors. Implemented JBIG2Global. * Merged with v3 version * Fixed the EOF in the globals issue * Fixed the JBIG2 ChocolateData Decode * JBIG2 Added license information * Minor fix in jbig2 encoding. * Applied the logging convention * Cleaned unnecessary imports * Go modules clear unused imports * checked out the README.md * Moved trace to Debug. Fixed the build integrate tag in the document_decode_test.go * Initial encoder skeleton * Applied UniPDF Developer Guide. Fixed lint issues. * Cleared documentation, fixed style issues. * Added jbig2 doc.go files. Applied unipdf guide style. * Minor code style changes. * Minor naming and style issues fixes. * Minor naming changes. Style issues fixed. * Review r11 fixes. * Added JBIG2 Encoder skeleton. * Moved Document and Page to jbig2/document package. Created decoder package responsible for decoding jbig2 stream. * Implemented raster functions. * Added raster uni low test funcitons. * Added raster low test functions * untracked files on jbig2-encoder: c869089 Added raster low test functions * index on jbig2-encoder: c869089 Added raster low test functions * Added morph files. * implemented jbig2 encoder basics * JBIG2 Encoder - Generic method * Added jbig2 image encode ttests, black/white image tests * cleaned and tested jbig2 package * unfinished jbig2 classified encoder * jbig2 minor style changes * minor jbig2 encoder changes * prepared JBIG2 Encoder * Style and lint fixes * Minor changes and lints * Fixed shift unsinged value build errors * Minor naming change * Added jbig2 encode, image gondels. Fixed jbig2 decode bug. * Provided jbig2 core.DecodeGlobals function. * Fixed JBIG2Encoder `r6` revision issues. * Removed public JBIG2Encoder document. * Minor style changes * added NewJBIG2Encoder function. * fixed JBIG2Encoder 'r9' revision issues. * Cleared 'r9' commented code. * Updated ACKNOWLEDGEMENETS. Fixed JBIG2Encoder 'r10' revision issues. Co-authored-by: Gunnsteinn Hall <gunnsteinn.hall@gmail.com>
538 lines
15 KiB
Go
538 lines
15 KiB
Go
/*
|
|
* This file is subject to the terms and conditions defined in
|
|
* file 'LICENSE.md', which is part of this source code package.
|
|
*/
|
|
|
|
package document
|
|
|
|
import (
|
|
"fmt"
|
|
"math"
|
|
|
|
"github.com/unidoc/unipdf/v3/common"
|
|
|
|
"github.com/unidoc/unipdf/v3/internal/jbig2/basic"
|
|
"github.com/unidoc/unipdf/v3/internal/jbig2/bitmap"
|
|
"github.com/unidoc/unipdf/v3/internal/jbig2/document/segments"
|
|
"github.com/unidoc/unipdf/v3/internal/jbig2/errors"
|
|
"github.com/unidoc/unipdf/v3/internal/jbig2/writer"
|
|
)
|
|
|
|
// EncodingMethod defines the method of encoding for given page,
|
|
type EncodingMethod int
|
|
|
|
// enums that defines encoding method.
|
|
const (
|
|
GenericEM EncodingMethod = iota
|
|
CorrelationEM
|
|
RankHausEM
|
|
)
|
|
|
|
// Page represents JBIG2 Page structure.
|
|
// It contains all the included segments header definitions mapped to
|
|
// their number relation to the document and the resultant page bitmap.
|
|
// NOTE: page numeration starts from 1 and the association to 0'th page means the segments
|
|
// are associated to global segments.
|
|
type Page struct {
|
|
// Segments relation of the page number to their structures.
|
|
Segments []*segments.Header
|
|
// PageNumber defines this page number.
|
|
PageNumber int
|
|
// Bitmap represents the page image.
|
|
Bitmap *bitmap.Bitmap
|
|
|
|
// Page parameters
|
|
FinalHeight int
|
|
FinalWidth int
|
|
ResolutionX int
|
|
ResolutionY int
|
|
|
|
IsLossless bool
|
|
|
|
// Document is a relation to page's document
|
|
Document *Document
|
|
// FirstSegmentNumber defines first segment number for given page
|
|
FirstSegmentNumber int
|
|
// EncodingMethod defines
|
|
EncodingMethod EncodingMethod
|
|
}
|
|
|
|
// AddEndOfPageSegment adds the end of page segment.
|
|
func (p *Page) AddEndOfPageSegment() {
|
|
seg := &segments.Header{
|
|
Type: segments.TEndOfPage,
|
|
PageAssociation: p.PageNumber,
|
|
}
|
|
p.Segments = append(p.Segments, seg)
|
|
}
|
|
|
|
// AddGenericRegion adds the generic region to the page context.
|
|
// 'bm' - bitmap containing data to encode
|
|
// 'xloc' - x location of the generic region
|
|
// 'yloc' - y location of the generic region
|
|
// 'template' - generic region template
|
|
// 'tp' - is the generic region type
|
|
// 'duplicateLineRemoval' - is the flag that defines if the generic region segment should remove duplicated lines
|
|
func (p *Page) AddGenericRegion(bm *bitmap.Bitmap, xloc, yloc, template int, tp segments.Type, duplicateLineRemoval bool) error {
|
|
const processName = "Page.AddGenericRegion"
|
|
// create generic region segment
|
|
genReg := &segments.GenericRegion{}
|
|
if err := genReg.InitEncode(bm, xloc, yloc, template, duplicateLineRemoval); err != nil {
|
|
return errors.Wrap(err, processName, "")
|
|
}
|
|
// create segment header for the generic region
|
|
genRegSegmentHeader := &segments.Header{
|
|
Type: segments.TImmediateGenericRegion,
|
|
PageAssociation: p.PageNumber,
|
|
SegmentData: genReg,
|
|
}
|
|
p.Segments = append(p.Segments, genRegSegmentHeader)
|
|
return nil
|
|
}
|
|
|
|
// AddPageInformationSegment adds the page information segment to the page segments.
|
|
func (p *Page) AddPageInformationSegment() {
|
|
// prepare page info segment data
|
|
pageInfo := &segments.PageInformationSegment{
|
|
PageBMWidth: p.FinalWidth,
|
|
PageBMHeight: p.FinalHeight,
|
|
ResolutionX: p.ResolutionX,
|
|
ResolutionY: p.ResolutionY,
|
|
IsLossless: p.IsLossless,
|
|
}
|
|
|
|
// and the page info header
|
|
pageInfoHeader := &segments.Header{
|
|
PageAssociation: p.PageNumber,
|
|
SegmentDataLength: uint64(pageInfo.Size()),
|
|
SegmentData: pageInfo,
|
|
Type: segments.TPageInformation,
|
|
}
|
|
p.Segments = append(p.Segments, pageInfoHeader)
|
|
}
|
|
|
|
// addTextRegionSegment adds text region segment to the given page.
|
|
// arguments:
|
|
// - referredTo is the referred to segments header
|
|
// - globalSymbolsMap - is the mapping between global symbols id and their classes.
|
|
// - localSymbolsMap - is the mapping between this page exclusive symbols id and their' classes.
|
|
// - comps - are the components numbers for this page.
|
|
// - inLL - is the slice of the lower-left corners of the boxes for each symbol
|
|
// - symbols - the slice of symbols
|
|
// - assignments -
|
|
func (p *Page) addTextRegionSegment(referredTo []*segments.Header, globalSymbolsMap, localSymbolsMap map[int]int, comps []int, inLL *bitmap.Points, symbols *bitmap.Bitmaps, classIDs *basic.IntSlice, boxes *bitmap.Boxes, symbits, sbNumInstances int) {
|
|
textRegion := &segments.TextRegion{NumberOfSymbols: uint32(sbNumInstances)}
|
|
textRegion.InitEncode(globalSymbolsMap, localSymbolsMap, comps, inLL, symbols, classIDs, boxes, p.FinalWidth, p.FinalHeight, symbits)
|
|
|
|
textRegionHeader := &segments.Header{
|
|
RTSegments: referredTo,
|
|
SegmentData: textRegion,
|
|
PageAssociation: p.PageNumber,
|
|
Type: segments.TImmediateTextRegion,
|
|
}
|
|
|
|
// if the text region referes only to global symbol dictionary
|
|
// it shold be stored just after page information segment
|
|
tp := segments.TPageInformation
|
|
if localSymbolsMap != nil {
|
|
// otherwise store it after local symbol dictionary
|
|
tp = segments.TSymbolDictionary
|
|
}
|
|
|
|
var index int
|
|
for ; index < len(p.Segments); index++ {
|
|
if p.Segments[index].Type == tp {
|
|
index++
|
|
break
|
|
}
|
|
}
|
|
p.Segments = append(p.Segments, nil)
|
|
copy(p.Segments[index+1:], p.Segments[index:])
|
|
p.Segments[index] = textRegionHeader
|
|
}
|
|
|
|
// Encode encodes segments into provided 'w' writer.
|
|
func (p *Page) Encode(w writer.BinaryWriter) (n int, err error) {
|
|
const processName = "Page.Encode"
|
|
var temp int
|
|
for _, seg := range p.Segments {
|
|
if temp, err = seg.Encode(w); err != nil {
|
|
return n, errors.Wrap(err, processName, "")
|
|
}
|
|
n += temp
|
|
}
|
|
return n, nil
|
|
}
|
|
|
|
// GetBitmap implements segments.Pager interface.
|
|
func (p *Page) GetBitmap() (bm *bitmap.Bitmap, err error) {
|
|
common.Log.Trace(fmt.Sprintf("[PAGE][#%d] GetBitmap begins...", p.PageNumber))
|
|
defer func() {
|
|
if err != nil {
|
|
common.Log.Trace(fmt.Sprintf("[PAGE][#%d] GetBitmap failed. %v", p.PageNumber, err))
|
|
} else {
|
|
common.Log.Trace(fmt.Sprintf("[PAGE][#%d] GetBitmap finished", p.PageNumber))
|
|
}
|
|
}()
|
|
|
|
if p.Bitmap != nil {
|
|
return p.Bitmap, nil
|
|
}
|
|
|
|
err = p.composePageBitmap()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return p.Bitmap, nil
|
|
}
|
|
|
|
// GetHeight gets the page height.
|
|
func (p *Page) GetHeight() (int, error) {
|
|
return p.getHeight()
|
|
}
|
|
|
|
// GetResolutionX gets the 'x' resolution of the page.
|
|
func (p *Page) GetResolutionX() (int, error) {
|
|
return p.getResolutionX()
|
|
}
|
|
|
|
// GetResolutionY gets the 'y' resolution of the page.
|
|
func (p *Page) GetResolutionY() (int, error) {
|
|
return p.getResolutionY()
|
|
}
|
|
|
|
// GetWidth gets the page width.
|
|
func (p *Page) GetWidth() (int, error) {
|
|
return p.getWidth()
|
|
}
|
|
|
|
// GetSegment implements segments.Pager interface.
|
|
func (p *Page) GetSegment(number int) (*segments.Header, error) {
|
|
const processName = "Page.GetSegment"
|
|
|
|
for _, h := range p.Segments {
|
|
if h.SegmentNumber == uint32(number) {
|
|
return h, nil
|
|
}
|
|
}
|
|
containedIDS := make([]uint32, len(p.Segments))
|
|
for i, h := range p.Segments {
|
|
containedIDS[i] = h.SegmentNumber
|
|
}
|
|
return nil, errors.Errorf(processName, "segment with number: '%d' not found in the page: '%d'. Known segment numbers: %v", number, p.PageNumber, containedIDS)
|
|
}
|
|
|
|
// String implements Stringer interface.
|
|
func (p *Page) String() string {
|
|
return fmt.Sprintf("Page #%d", p.PageNumber)
|
|
}
|
|
|
|
// newPage is the creator for the Page structure.
|
|
func newPage(d *Document, pageNumber int) *Page {
|
|
return &Page{Document: d, PageNumber: pageNumber, Segments: []*segments.Header{}}
|
|
}
|
|
|
|
// composePageBitmap composes the segment's bitmaps
|
|
// as a single page Bitmap.
|
|
func (p *Page) composePageBitmap() error {
|
|
const processName = "composePageBitmap"
|
|
if p.PageNumber == 0 {
|
|
return nil
|
|
}
|
|
h := p.getPageInformationSegment()
|
|
if h == nil {
|
|
return errors.Error(processName, "page information segment not found")
|
|
}
|
|
|
|
// get the Segment data
|
|
seg, err := h.GetSegmentData()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
pageInformation, ok := seg.(*segments.PageInformationSegment)
|
|
if !ok {
|
|
return errors.Error(processName, "page information segment is of invalid type")
|
|
}
|
|
|
|
if err = p.createPage(pageInformation); err != nil {
|
|
return errors.Wrap(err, processName, "")
|
|
}
|
|
p.clearSegmentData()
|
|
return nil
|
|
}
|
|
|
|
func (p *Page) createPage(i *segments.PageInformationSegment) error {
|
|
var err error
|
|
if !i.IsStripe || i.PageBMHeight != -1 {
|
|
// Page 79, 4)
|
|
err = p.createNormalPage(i)
|
|
} else {
|
|
err = p.createStripedPage(i)
|
|
}
|
|
return err
|
|
}
|
|
|
|
func (p *Page) createNormalPage(i *segments.PageInformationSegment) error {
|
|
const processName = "createNormalPage"
|
|
p.Bitmap = bitmap.New(i.PageBMWidth, i.PageBMHeight)
|
|
|
|
// Page 79, 3)
|
|
// if default pixel value is not 0, byte will be filled with 0xff
|
|
if i.DefaultPixelValue() != 0 {
|
|
p.Bitmap.SetDefaultPixel()
|
|
}
|
|
|
|
for _, h := range p.Segments {
|
|
switch h.Type {
|
|
case 6, 7, 22, 23, 38, 39, 42, 43:
|
|
common.Log.Trace("Getting Segment: %d", h.SegmentNumber)
|
|
s, err := h.GetSegmentData()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
r, ok := s.(segments.Regioner)
|
|
if !ok {
|
|
common.Log.Debug("Segment: %T is not a Regioner", s)
|
|
return errors.Errorf(processName, "invalid jbig2 segment type - not a Regioner: %T", s)
|
|
}
|
|
|
|
regionBitmap, err := r.GetRegionBitmap()
|
|
if err != nil {
|
|
return errors.Wrap(err, processName, "")
|
|
}
|
|
|
|
if p.fitsPage(i, regionBitmap) {
|
|
p.Bitmap = regionBitmap
|
|
} else {
|
|
regionInfo := r.GetRegionInfo()
|
|
op := p.getCombinationOperator(i, regionInfo.CombinaionOperator)
|
|
err = bitmap.Blit(regionBitmap, p.Bitmap, int(regionInfo.XLocation), int(regionInfo.YLocation), op)
|
|
if err != nil {
|
|
return errors.Wrap(err, processName, "")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (p *Page) createStripedPage(i *segments.PageInformationSegment) error {
|
|
const processName = "createStripedPage"
|
|
pageStripes, err := p.collectPageStripes()
|
|
if err != nil {
|
|
return errors.Wrap(err, processName, "")
|
|
}
|
|
|
|
var startLine int
|
|
for _, sd := range pageStripes {
|
|
if eos, ok := sd.(*segments.EndOfStripe); ok {
|
|
startLine = eos.LineNumber() + 1
|
|
} else {
|
|
r := sd.(segments.Regioner)
|
|
regionInfo := r.GetRegionInfo()
|
|
op := p.getCombinationOperator(i, regionInfo.CombinaionOperator)
|
|
regionBitmap, err := r.GetRegionBitmap()
|
|
if err != nil {
|
|
return errors.Wrap(err, processName, "")
|
|
}
|
|
|
|
err = bitmap.Blit(regionBitmap, p.Bitmap, int(regionInfo.XLocation), startLine, op)
|
|
if err != nil {
|
|
return errors.Wrap(err, processName, "")
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (p *Page) collectPageStripes() (stripes []segments.Segmenter, err error) {
|
|
const processName = "collectPageStripes"
|
|
var s segments.Segmenter
|
|
|
|
for _, h := range p.Segments {
|
|
switch h.Type {
|
|
case 6, 7, 22, 23, 38, 39, 42, 43:
|
|
s, err = h.GetSegmentData()
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, processName, "")
|
|
}
|
|
stripes = append(stripes, s)
|
|
case 50:
|
|
s, err = h.GetSegmentData()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
eos, ok := s.(*segments.EndOfStripe)
|
|
if !ok {
|
|
return nil, errors.Errorf(processName, "EndOfStripe is not of valid type: '%T'", s)
|
|
}
|
|
|
|
stripes = append(stripes, eos)
|
|
p.FinalHeight = eos.LineNumber()
|
|
}
|
|
}
|
|
return stripes, nil
|
|
}
|
|
|
|
func (p *Page) clearSegmentData() {
|
|
for i := range p.Segments {
|
|
p.Segments[i].CleanSegmentData()
|
|
}
|
|
}
|
|
|
|
// countRegions counts the region segments in the Page.
|
|
func (p *Page) countRegions() int {
|
|
var regionCount int
|
|
|
|
for _, h := range p.Segments {
|
|
switch h.Type {
|
|
case 6, 7, 22, 23, 38, 39, 42, 43:
|
|
regionCount++
|
|
}
|
|
}
|
|
return regionCount
|
|
}
|
|
|
|
func (p *Page) fitsPage(i *segments.PageInformationSegment, regionBitmap *bitmap.Bitmap) bool {
|
|
return p.countRegions() == 1 &&
|
|
i.DefaultPixelValue() == 0 &&
|
|
i.PageBMWidth == regionBitmap.Width &&
|
|
i.PageBMHeight == regionBitmap.Height
|
|
}
|
|
|
|
func (p *Page) getCombinationOperator(i *segments.PageInformationSegment, newOperator bitmap.CombinationOperator) bitmap.CombinationOperator {
|
|
if i.CombinationOperatorOverrideAllowed() {
|
|
return newOperator
|
|
}
|
|
return i.CombinationOperator()
|
|
}
|
|
|
|
// getPageInformationSegment returns the associated page information segment.
|
|
func (p *Page) getPageInformationSegment() *segments.Header {
|
|
for _, s := range p.Segments {
|
|
if s.Type == segments.TPageInformation {
|
|
return s
|
|
}
|
|
}
|
|
common.Log.Debug("Page information segment not found for page: %s.", p)
|
|
return nil
|
|
}
|
|
|
|
func (p *Page) getHeight() (int, error) {
|
|
const processName = "getHeight"
|
|
if p.FinalHeight != 0 {
|
|
return p.FinalHeight, nil
|
|
}
|
|
|
|
h := p.getPageInformationSegment()
|
|
if h == nil {
|
|
return 0, errors.Error(processName, "nil page information")
|
|
}
|
|
|
|
s, err := h.GetSegmentData()
|
|
if err != nil {
|
|
return 0, errors.Wrap(err, processName, "")
|
|
}
|
|
|
|
pi, ok := s.(*segments.PageInformationSegment)
|
|
if !ok {
|
|
return 0, errors.Errorf(processName, "page information segment is of invalid type: '%T'", s)
|
|
}
|
|
|
|
if pi.PageBMHeight == math.MaxInt32 {
|
|
_, err = p.GetBitmap()
|
|
if err != nil {
|
|
return 0, errors.Wrap(err, processName, "")
|
|
}
|
|
} else {
|
|
p.FinalHeight = pi.PageBMHeight
|
|
}
|
|
return p.FinalHeight, nil
|
|
}
|
|
|
|
func (p *Page) getWidth() (int, error) {
|
|
const processName = "getWidth"
|
|
if p.FinalWidth != 0 {
|
|
return p.FinalWidth, nil
|
|
}
|
|
|
|
h := p.getPageInformationSegment()
|
|
if h == nil {
|
|
return 0, errors.Error(processName, "nil page information")
|
|
}
|
|
|
|
s, err := h.GetSegmentData()
|
|
if err != nil {
|
|
return 0, errors.Wrap(err, processName, "")
|
|
}
|
|
|
|
pi, ok := s.(*segments.PageInformationSegment)
|
|
if !ok {
|
|
return 0, errors.Errorf(processName, "page information segment is of invalid type: '%T'", s)
|
|
}
|
|
p.FinalWidth = pi.PageBMWidth
|
|
return p.FinalWidth, nil
|
|
}
|
|
|
|
func (p *Page) getResolutionX() (int, error) {
|
|
const processName = "getResolutionX"
|
|
if p.ResolutionX != 0 {
|
|
return p.ResolutionX, nil
|
|
}
|
|
h := p.getPageInformationSegment()
|
|
if h == nil {
|
|
return 0, errors.Error(processName, "nil page information")
|
|
}
|
|
|
|
s, err := h.GetSegmentData()
|
|
if err != nil {
|
|
return 0, errors.Wrap(err, processName, "")
|
|
}
|
|
|
|
pi, ok := s.(*segments.PageInformationSegment)
|
|
if !ok {
|
|
return 0, errors.Errorf(processName, "page information segment is of invalid type: '%T'", s)
|
|
}
|
|
p.ResolutionX = pi.ResolutionX
|
|
return p.ResolutionX, nil
|
|
}
|
|
|
|
func (p *Page) getResolutionY() (int, error) {
|
|
const processName = "getResolutionY"
|
|
if p.ResolutionY != 0 {
|
|
return p.ResolutionY, nil
|
|
}
|
|
h := p.getPageInformationSegment()
|
|
if h == nil {
|
|
return 0, errors.Error(processName, "nil page information")
|
|
}
|
|
|
|
s, err := h.GetSegmentData()
|
|
if err != nil {
|
|
return 0, errors.Wrap(err, processName, "")
|
|
}
|
|
|
|
pi, ok := s.(*segments.PageInformationSegment)
|
|
if !ok {
|
|
return 0, errors.Errorf(processName, "page information segment is of invalid type:'%T'", s)
|
|
}
|
|
p.ResolutionY = pi.ResolutionY
|
|
return p.ResolutionY, nil
|
|
}
|
|
|
|
// lastSegmentNumber gets the number of the last segment in the page.
|
|
func (p *Page) lastSegmentNumber() (last uint32, err error) {
|
|
const processName = "lastSegmentNumber"
|
|
if len(p.Segments) == 0 {
|
|
return last, errors.Errorf(processName, "no segments found in the page '%d'", p.PageNumber)
|
|
}
|
|
return p.Segments[len(p.Segments)-1].SegmentNumber, nil
|
|
}
|
|
|
|
func (p *Page) nextSegmentNumber() uint32 {
|
|
return p.Document.nextSegmentNumber()
|
|
}
|