mirror of
https://github.com/unidoc/unipdf.git
synced 2025-05-05 19:30:30 +08:00

* Prepared skeleton and basic component implementations for the jbig2 encoding. * Added Bitset. Implemented Bitmap. * Decoder with old Arithmetic Decoder * Partly working arithmetic * Working arithmetic decoder. * MMR patched. * rebuild to apache. * Working generic * Working generic * Decoded full document * Update Jenkinsfile go version [master] (#398) * Update Jenkinsfile go version * Decoded AnnexH document * Minor issues fixed. * Update README.md * Fixed generic region errors. Added benchmark. Added bitmap unpadder. Added Bitmap toImage method. * Fixed endofpage error * Added integration test. * Decoded all test files without errors. Implemented JBIG2Global. * Merged with v3 version * Fixed the EOF in the globals issue * Fixed the JBIG2 ChocolateData Decode * JBIG2 Added license information * Minor fix in jbig2 encoding. * Applied the logging convention * Cleaned unnecessary imports * Go modules clear unused imports * checked out the README.md * Moved trace to Debug. Fixed the build integrate tag in the document_decode_test.go * Initial encoder skeleton * Applied UniPDF Developer Guide. Fixed lint issues. * Cleared documentation, fixed style issues. * Added jbig2 doc.go files. Applied unipdf guide style. * Minor code style changes. * Minor naming and style issues fixes. * Minor naming changes. Style issues fixed. * Review r11 fixes. * Added JBIG2 Encoder skeleton. * Moved Document and Page to jbig2/document package. Created decoder package responsible for decoding jbig2 stream. * Implemented raster functions. * Added raster uni low test funcitons. * Added raster low test functions * untracked files on jbig2-encoder: c869089 Added raster low test functions * index on jbig2-encoder: c869089 Added raster low test functions * Added morph files. * implemented jbig2 encoder basics * JBIG2 Encoder - Generic method * Added jbig2 image encode ttests, black/white image tests * cleaned and tested jbig2 package * unfinished jbig2 classified encoder * jbig2 minor style changes * minor jbig2 encoder changes * prepared JBIG2 Encoder * Style and lint fixes * Minor changes and lints * Fixed shift unsinged value build errors * Minor naming change * Added jbig2 encode, image gondels. Fixed jbig2 decode bug. * Provided jbig2 core.DecodeGlobals function. * Fixed JBIG2Encoder `r6` revision issues. * Removed public JBIG2Encoder document. * Minor style changes * added NewJBIG2Encoder function. * fixed JBIG2Encoder 'r9' revision issues. * Cleared 'r9' commented code. * Updated ACKNOWLEDGEMENETS. Fixed JBIG2Encoder 'r10' revision issues. Co-authored-by: Gunnsteinn Hall <gunnsteinn.hall@gmail.com>
269 lines
8.2 KiB
Go
269 lines
8.2 KiB
Go
/*
|
|
* This file is subject to the terms and conditions defined in
|
|
* file 'LICENSE.md', which is part of this source code package.
|
|
*/
|
|
|
|
package classer
|
|
|
|
import (
|
|
"image"
|
|
|
|
"github.com/unidoc/unipdf/v3/common"
|
|
|
|
"github.com/unidoc/unipdf/v3/internal/jbig2/basic"
|
|
"github.com/unidoc/unipdf/v3/internal/jbig2/bitmap"
|
|
"github.com/unidoc/unipdf/v3/internal/jbig2/errors"
|
|
)
|
|
|
|
// Classer holds all the data accumulated during the classification
|
|
// process that can be used for a compressed jbig2-type representation
|
|
// of a set of images.
|
|
type Classer struct {
|
|
// BaseIndex is number of components already processed on fully processed pages.
|
|
BaseIndex int
|
|
// Settings are current classer settings.
|
|
Settings Settings
|
|
|
|
// Number of components on each page - 'nacomps'- for each page added to the classer a new entry to the slice
|
|
// is added with the value of components per page.
|
|
ComponentsNumber *basic.IntSlice
|
|
// Width * Height of each template without extra border pixels - 'naarea'.
|
|
TemplateAreas *basic.IntSlice
|
|
|
|
// Widths is max width of original src images.
|
|
Widths map[int]int
|
|
// Heights is max height of original src images.
|
|
Heights map[int]int
|
|
|
|
// NumberOfClasses is the current number of classes - 'nclass'.
|
|
NumberOfClasses int
|
|
// ClassInstances is the slice of bitmaps for each class. Unbordered - 'pixaa'.
|
|
ClassInstances *bitmap.BitmapsArray
|
|
// UndilatedTemplates for each class. Bordered and not dilated - 'pixat'.
|
|
UndilatedTemplates *bitmap.Bitmaps
|
|
// DilatedTemplates for each class. Bordered and dilated - 'pixatd'.
|
|
DilatedTemplates *bitmap.Bitmaps
|
|
|
|
// Hash table to find templates by their size - 'dahash'.
|
|
TemplatesSize basic.IntsMap
|
|
// FgTemplates - foreground areas of undilated templates. Used for rank < 1.0 - 'nafgt'.
|
|
FgTemplates *basic.NumSlice
|
|
|
|
// CentroidPoints centroids of all bordered cc.
|
|
CentroidPoints *bitmap.Points
|
|
// CentroidPointsTemplates centroids of all bordered template cc.
|
|
CentroidPointsTemplates *bitmap.Points
|
|
// ClassIDs is the slice of class ids for each component - 'naclass'.
|
|
ClassIDs *basic.IntSlice
|
|
// ComponentPageNumbers is the slice of page numbers for each component - 'napage'.
|
|
// The index is the component id.
|
|
ComponentPageNumbers *basic.IntSlice
|
|
// PtaUL is the slice of UL corners at which the template
|
|
// is to be placed for each component.
|
|
PtaUL *bitmap.Points
|
|
// PtaLL is the slice of LL corners at which the template
|
|
// is to be placed for each component.
|
|
PtaLL *bitmap.Points
|
|
}
|
|
|
|
// Init initializes the classer with the provided settings.
|
|
func Init(settings Settings) (*Classer, error) {
|
|
const processName = "classer.Init"
|
|
c := &Classer{
|
|
Settings: settings,
|
|
Widths: map[int]int{},
|
|
Heights: map[int]int{},
|
|
TemplatesSize: basic.IntsMap{},
|
|
TemplateAreas: &basic.IntSlice{},
|
|
ComponentPageNumbers: &basic.IntSlice{},
|
|
ClassIDs: &basic.IntSlice{},
|
|
ComponentsNumber: &basic.IntSlice{},
|
|
CentroidPoints: &bitmap.Points{},
|
|
CentroidPointsTemplates: &bitmap.Points{},
|
|
UndilatedTemplates: &bitmap.Bitmaps{},
|
|
DilatedTemplates: &bitmap.Bitmaps{},
|
|
ClassInstances: &bitmap.BitmapsArray{},
|
|
FgTemplates: &basic.NumSlice{},
|
|
}
|
|
if err := c.Settings.Validate(); err != nil {
|
|
return nil, errors.Wrap(err, processName, "")
|
|
}
|
|
return c, nil
|
|
}
|
|
|
|
// AddPage adds the 'inputPage' to the classer 'c'.
|
|
func (c *Classer) AddPage(inputPage *bitmap.Bitmap, pageNumber int, method Method) (err error) {
|
|
const processName = "Classer.AddPage"
|
|
c.Widths[pageNumber] = inputPage.Width
|
|
c.Heights[pageNumber] = inputPage.Height
|
|
|
|
if err = c.verifyMethod(method); err != nil {
|
|
return errors.Wrap(err, processName, "")
|
|
}
|
|
|
|
comps, boxes, err := inputPage.GetComponents(c.Settings.Components, c.Settings.MaxCompWidth, c.Settings.MaxCompHeight)
|
|
if err != nil {
|
|
return errors.Wrap(err, processName, "")
|
|
}
|
|
|
|
common.Log.Debug("Components: %v", comps)
|
|
// add the computed components to the page using provided method.
|
|
if err = c.addPageComponents(inputPage, boxes, comps, pageNumber, method); err != nil {
|
|
return errors.Wrap(err, processName, "")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// ComputeLLCorners computes the position of the LL (lower left) corners.
|
|
func (c *Classer) ComputeLLCorners() (err error) {
|
|
const processName = "Classer.ComputeLLCorners"
|
|
if c.PtaUL == nil {
|
|
return errors.Error(processName, "UL Corners not defined")
|
|
}
|
|
n := len(*c.PtaUL)
|
|
|
|
c.PtaLL = &bitmap.Points{}
|
|
|
|
var (
|
|
x1, y1 float32
|
|
iClass, h int
|
|
bm *bitmap.Bitmap
|
|
)
|
|
for i := 0; i < n; i++ {
|
|
x1, y1, err = c.PtaUL.GetGeometry(i)
|
|
if err != nil {
|
|
common.Log.Debug("Getting PtaUL failed: %v", err)
|
|
return errors.Wrap(err, processName, "PtaUL Geometry")
|
|
}
|
|
iClass, err = c.ClassIDs.Get(i)
|
|
if err != nil {
|
|
common.Log.Debug("Getting ClassID failed: %v", err)
|
|
return errors.Wrap(err, processName, "ClassID")
|
|
}
|
|
bm, err = c.UndilatedTemplates.GetBitmap(iClass)
|
|
if err != nil {
|
|
common.Log.Debug("Getting UndilatedTemplates failed: %v", err)
|
|
return errors.Wrap(err, processName, "Undilated Templates")
|
|
}
|
|
h = bm.Height
|
|
// Add the global LL corner point.
|
|
c.PtaLL.AddPoint(x1, y1+float32(h)) // )-1-2*float32(JbAddedPixels)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
/**
|
|
|
|
Private methods and functions
|
|
|
|
*/
|
|
|
|
// addPageComponents adds the components to the 'inputPage'.
|
|
func (c *Classer) addPageComponents(inputPage *bitmap.Bitmap, boxas *bitmap.Boxes, components *bitmap.Bitmaps, pageNumber int, method Method) error {
|
|
const processName = "Classer.AddPageComponents"
|
|
if inputPage == nil {
|
|
return errors.Error(processName, "nil input page")
|
|
}
|
|
if boxas == nil || components == nil || len(*boxas) == 0 {
|
|
common.Log.Trace("AddPageComponents: %s. No components found", inputPage)
|
|
return nil
|
|
}
|
|
var err error
|
|
switch method {
|
|
case RankHaus:
|
|
err = c.classifyRankHaus(boxas, components, pageNumber)
|
|
case Correlation:
|
|
err = c.classifyCorrelation(boxas, components, pageNumber)
|
|
default:
|
|
common.Log.Debug("Unknown classify method: '%v'", method)
|
|
return errors.Error(processName, "unknown classify method")
|
|
}
|
|
if err != nil {
|
|
return errors.Wrap(err, processName, "")
|
|
}
|
|
|
|
if err = c.getULCorners(inputPage, boxas); err != nil {
|
|
return errors.Wrap(err, processName, "")
|
|
}
|
|
n := len(*boxas)
|
|
c.BaseIndex += n
|
|
if err = c.ComponentsNumber.Add(n); err != nil {
|
|
return errors.Wrap(err, processName, "")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// getULCorners get the ul corners.
|
|
func (c *Classer) getULCorners(s *bitmap.Bitmap, boxa *bitmap.Boxes) error {
|
|
const processName = "getULCorners"
|
|
if s == nil {
|
|
return errors.Error(processName, "nil image bitmap")
|
|
}
|
|
if boxa == nil {
|
|
return errors.Error(processName, "nil bounds")
|
|
}
|
|
if c.PtaUL == nil {
|
|
c.PtaUL = &bitmap.Points{}
|
|
}
|
|
|
|
n := len(*boxa)
|
|
var (
|
|
index, iClass, idelX, idelY int
|
|
x1, y1, x2, y2 float32
|
|
err error
|
|
box *image.Rectangle
|
|
t *bitmap.Bitmap
|
|
pt image.Point
|
|
)
|
|
|
|
for i := 0; i < n; i++ {
|
|
index = c.BaseIndex + i
|
|
if x1, y1, err = c.CentroidPoints.GetGeometry(index); err != nil {
|
|
return errors.Wrap(err, processName, "CentroidPoints")
|
|
}
|
|
if iClass, err = c.ClassIDs.Get(index); err != nil {
|
|
return errors.Wrap(err, processName, "ClassIDs.Get")
|
|
}
|
|
if x2, y2, err = c.CentroidPointsTemplates.GetGeometry(iClass); err != nil {
|
|
return errors.Wrap(err, processName, "CentroidPointsTemplates")
|
|
}
|
|
delX := x2 - x1
|
|
delY := y2 - y1
|
|
if delX >= 0 {
|
|
idelX = int(delX + 0.5)
|
|
} else {
|
|
idelX = int(delX - 0.5)
|
|
}
|
|
if delY >= 0 {
|
|
idelY = int(delY + 0.5)
|
|
} else {
|
|
idelY = int(delY - 0.5)
|
|
}
|
|
if box, err = boxa.Get(i); err != nil {
|
|
return errors.Wrap(err, processName, "")
|
|
}
|
|
x, y := box.Min.X, box.Min.Y
|
|
|
|
// finalPositionForAligment()
|
|
t, err = c.UndilatedTemplates.GetBitmap(iClass)
|
|
if err != nil {
|
|
return errors.Wrap(err, processName, "UndilatedTemplates.Get(iClass)")
|
|
}
|
|
|
|
pt, err = finalAlignmentPositioning(s, x, y, idelX, idelY, t)
|
|
if err != nil {
|
|
return errors.Wrap(err, processName, "")
|
|
}
|
|
|
|
c.PtaUL.AddPoint(float32(x-idelX+pt.X), float32(y-idelY+pt.Y))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *Classer) verifyMethod(method Method) error {
|
|
if method != RankHaus && method != Correlation {
|
|
return errors.Error("verifyMethod", "invalid classer method")
|
|
}
|
|
return nil
|
|
}
|