mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-29 13:48:54 +08:00

* Prepared skeleton and basic component implementations for the jbig2 encoding. * Added Bitset. Implemented Bitmap. * Decoder with old Arithmetic Decoder * Partly working arithmetic * Working arithmetic decoder. * MMR patched. * rebuild to apache. * Working generic * Decoded full document * Decoded AnnexH document * Minor issues fixed. * Update README.md * Fixed generic region errors. Added benchmark. Added bitmap unpadder. Added Bitmap toImage method. * Fixed endofpage error * Added integration test. * Decoded all test files without errors. Implemented JBIG2Global. * Merged with v3 version * Fixed the EOF in the globals issue * Fixed the JBIG2 ChocolateData Decode * JBIG2 Added license information * Minor fix in jbig2 encoding. * Applied the logging convention * Cleaned unnecessary imports * Go modules clear unused imports * checked out the README.md * Moved trace to Debug. Fixed the build integrate tag in the document_decode_test.go * Applied UniPDF Developer Guide. Fixed lint issues. * Cleared documentation, fixed style issues. * Added jbig2 doc.go files. Applied unipdf guide style. * Minor code style changes. * Minor naming and style issues fixes. * Minor naming changes. Style issues fixed. * Review r11 fixes. * Integrate jbig2 tests with build system * Added jbig2 integration test golden files. * Minor jbig2 integration test fix * Removed jbig2 integration image assertions * Fixed jbig2 rowstride issue. Implemented jbig2 bit writer * Changed golden files logic. Fixes r13 issues.
291 lines
7.3 KiB
Go
291 lines
7.3 KiB
Go
/*
|
|
* This file is subject to the terms and conditions defined in
|
|
* file 'LICENSE.md', which is part of this source code package.
|
|
*/
|
|
|
|
package jbig2
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"runtime/debug"
|
|
|
|
"github.com/unidoc/unipdf/v3/common"
|
|
|
|
"github.com/unidoc/unipdf/v3/internal/jbig2/reader"
|
|
"github.com/unidoc/unipdf/v3/internal/jbig2/segments"
|
|
)
|
|
|
|
// fileHeaderID first byte slices of the jbig2 encoded file, see D.4.1.
|
|
var fileHeaderID = []byte{0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A}
|
|
|
|
// Document is the jbig2 document model containing pages and global segments.
|
|
// By creating new document with method NewDocument or NewDocumentWithGlobals
|
|
// all the jbig2 encoded data segment headers are decoded.
|
|
// In order to decode whole document, all of it's pages should be decoded using GetBitmap method.
|
|
// PDF encoded documents should contains only one Page with the number 1.
|
|
type Document struct {
|
|
// Pages contains all pages of this document.
|
|
Pages map[int]*Page
|
|
|
|
// NumberOfPagesUnknown defines if the ammount of the pages is known.
|
|
NumberOfPagesUnknown bool
|
|
|
|
// NumberOfPages - D.4.3 - Number of pages field (4 bytes). Only presented if NumberOfPagesUnknown is true.
|
|
NumberOfPages uint32
|
|
|
|
// GBUseExtTemplate defines wether extended Template is used.
|
|
GBUseExtTemplate bool
|
|
|
|
// SubInputStream is the source data stream wrapped into a SubInputStream.
|
|
InputStream *reader.Reader
|
|
|
|
// GlobalSegments contains all segments that aren't associated with a page.
|
|
GlobalSegments Globals
|
|
|
|
// OrganisationType is the document segment organization.
|
|
OrganizationType segments.OrganizationType
|
|
|
|
fileHeaderLength uint8
|
|
}
|
|
|
|
// NewDocument creates new Document for the 'data' byte slice.
|
|
func NewDocument(data []byte) (*Document, error) {
|
|
return NewDocumentWithGlobals(data, nil)
|
|
}
|
|
|
|
// NewDocumentWithGlobals creates new Document for the provided encoded 'data'
|
|
// byte slice and the 'globals' Globals.
|
|
func NewDocumentWithGlobals(data []byte, globals Globals) (*Document, error) {
|
|
d := &Document{
|
|
Pages: make(map[int]*Page),
|
|
InputStream: reader.New(data),
|
|
OrganizationType: segments.OSequential,
|
|
NumberOfPagesUnknown: true,
|
|
GlobalSegments: globals,
|
|
fileHeaderLength: 9,
|
|
}
|
|
|
|
if d.GlobalSegments == nil {
|
|
d.GlobalSegments = Globals(make(map[int]*segments.Header))
|
|
}
|
|
|
|
// mapData map the data stream
|
|
if err := d.mapData(); err != nil {
|
|
return nil, err
|
|
}
|
|
return d, nil
|
|
}
|
|
|
|
// GetNumberOfPages gets the amount of Pages in the given document.
|
|
func (d *Document) GetNumberOfPages() (uint32, error) {
|
|
if d.NumberOfPagesUnknown || d.NumberOfPages == 0 {
|
|
if len(d.Pages) == 0 {
|
|
d.mapData()
|
|
}
|
|
return uint32(len(d.Pages)), nil
|
|
}
|
|
return d.NumberOfPages, nil
|
|
}
|
|
|
|
// GetPage implements segments.Documenter interface.
|
|
// NOTE: in order to decode all document images, get page by page (page numeration starts from '1') and
|
|
// decode them by calling 'GetBitmap' method.
|
|
func (d *Document) GetPage(pageNumber int) (segments.Pager, error) {
|
|
if pageNumber < 0 {
|
|
common.Log.Debug("JBIG2 Page - GetPage: %d. Page cannot be lower than 0. %s", pageNumber, debug.Stack())
|
|
return nil, fmt.Errorf("invalid jbig2 document - provided invalid page number: %d", pageNumber)
|
|
}
|
|
|
|
if pageNumber > len(d.Pages) {
|
|
common.Log.Debug("Page not found: %d. %s", pageNumber, debug.Stack())
|
|
return nil, errors.New("invalid jbig2 document - page not found")
|
|
}
|
|
|
|
p, ok := d.Pages[pageNumber]
|
|
if !ok {
|
|
common.Log.Debug("Page not found: %d. %s", pageNumber, debug.Stack())
|
|
return nil, errors.New("invalid jbig2 document - page not found")
|
|
}
|
|
|
|
return p, nil
|
|
}
|
|
|
|
// GetGlobalSegment implements segments.Documenter interface.
|
|
func (d *Document) GetGlobalSegment(i int) *segments.Header {
|
|
if d.GlobalSegments == nil {
|
|
common.Log.Debug("Trying to get Global segment from nil Globals")
|
|
return nil
|
|
}
|
|
return d.GlobalSegments[i]
|
|
}
|
|
|
|
func (d *Document) determineRandomDataOffsets(segmentHeaders []*segments.Header, offset uint64) {
|
|
if d.OrganizationType != segments.ORandom {
|
|
return
|
|
}
|
|
|
|
for _, s := range segmentHeaders {
|
|
s.SegmentDataStartOffset = offset
|
|
offset += s.SegmentDataLength
|
|
}
|
|
}
|
|
|
|
func (d *Document) isFileHeaderPresent() (bool, error) {
|
|
d.InputStream.Mark()
|
|
|
|
for _, magicByte := range fileHeaderID {
|
|
b, err := d.InputStream.ReadByte()
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
if magicByte != b {
|
|
d.InputStream.Reset()
|
|
return false, nil
|
|
}
|
|
}
|
|
|
|
d.InputStream.Reset()
|
|
return true, nil
|
|
}
|
|
|
|
func (d *Document) mapData() error {
|
|
// Get the header list
|
|
var (
|
|
segmentHeaders []*segments.Header
|
|
offset int64
|
|
kind segments.Type
|
|
)
|
|
|
|
isFileHeaderPresent, err := d.isFileHeaderPresent()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Parse the file header if exists.
|
|
if isFileHeaderPresent {
|
|
if err = d.parseFileHeader(); err != nil {
|
|
return err
|
|
}
|
|
offset += int64(d.fileHeaderLength)
|
|
}
|
|
|
|
var (
|
|
page *Page
|
|
segmentNo int
|
|
reachedEOF bool
|
|
)
|
|
|
|
// type 51 is the EndOfFile segment kind
|
|
for kind != 51 && !reachedEOF {
|
|
segmentNo++
|
|
|
|
// get new segment
|
|
segment, err := segments.NewHeader(d, d.InputStream, offset, d.OrganizationType)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
common.Log.Trace("Decoding segment number: %d, Type: %s", segmentNo, segment.Type)
|
|
|
|
kind = segment.Type
|
|
if kind != segments.TEndOfFile {
|
|
if segment.PageAssociation != 0 {
|
|
page = d.Pages[segment.PageAssociation]
|
|
|
|
if page == nil {
|
|
page = newPage(d, segment.PageAssociation)
|
|
d.Pages[segment.PageAssociation] = page
|
|
}
|
|
|
|
page.Segments[int(segment.SegmentNumber)] = segment
|
|
} else {
|
|
d.GlobalSegments.AddSegment(int(segment.SegmentNumber), segment)
|
|
}
|
|
}
|
|
|
|
segmentHeaders = append(segmentHeaders, segment)
|
|
offset = d.InputStream.StreamPosition()
|
|
|
|
if d.OrganizationType == segments.OSequential {
|
|
offset += int64(segment.SegmentDataLength)
|
|
}
|
|
|
|
reachedEOF, err = d.reachedEOF(offset)
|
|
if err != nil {
|
|
common.Log.Debug("jbig2 document reached EOF with error: %v", err)
|
|
return err
|
|
}
|
|
}
|
|
d.determineRandomDataOffsets(segmentHeaders, uint64(offset))
|
|
return nil
|
|
}
|
|
|
|
func (d *Document) parseFileHeader() error {
|
|
// D.4.1 ID string read will be skipped.
|
|
_, err := d.InputStream.Seek(8, io.SeekStart)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// D.4.2 Header flag (1 byte)
|
|
// Bit 3-7 are reserverd and must be 0
|
|
_, err = d.InputStream.ReadBits(5)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Bit 2 - extended templates are used
|
|
b, err := d.InputStream.ReadBit()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if b == 1 {
|
|
d.GBUseExtTemplate = true
|
|
}
|
|
|
|
// Bit 1 - Indicates if amount of pages are unknown.
|
|
b, err = d.InputStream.ReadBit()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if b != 1 {
|
|
d.NumberOfPagesUnknown = false
|
|
}
|
|
|
|
// Bit 0 - Indicates file organisation type.
|
|
b, err = d.InputStream.ReadBit()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
d.OrganizationType = segments.OrganizationType(b)
|
|
|
|
// D.4.3 Number of pages
|
|
if !d.NumberOfPagesUnknown {
|
|
d.NumberOfPages, err = d.InputStream.ReadUnsignedInt()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
d.fileHeaderLength = 13
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *Document) reachedEOF(offset int64) (bool, error) {
|
|
_, err := d.InputStream.Seek(offset, io.SeekStart)
|
|
if err != nil {
|
|
common.Log.Debug("reachedEOF - d.InputStream.Seek failed: %v", err)
|
|
return false, err
|
|
}
|
|
|
|
_, err = d.InputStream.ReadBits(32)
|
|
if err == io.EOF {
|
|
return true, nil
|
|
} else if err != nil {
|
|
return false, err
|
|
}
|
|
return false, nil
|
|
}
|