mirror of
https://github.com/unidoc/unipdf.git
synced 2025-05-01 22:17:29 +08:00

* Fixing platform indepenedent integer size * Cleared test logs. * Cleared unnecessary int32 * Defined precise integer size for jbig2 segments.
351 lines
8.4 KiB
Go
351 lines
8.4 KiB
Go
/*
|
|
* This file is subject to the terms and conditions defined in
|
|
* file 'LICENSE.md', which is part of this source code package.
|
|
*/
|
|
|
|
package segments
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"fmt"
|
|
"io"
|
|
"math"
|
|
"strings"
|
|
|
|
"github.com/unidoc/unipdf/v3/common"
|
|
|
|
"github.com/unidoc/unipdf/v3/internal/jbig2/reader"
|
|
)
|
|
|
|
// Header is the segment header used to define the segment parameters - see 7.2.
|
|
type Header struct {
|
|
SegmentNumber uint32
|
|
Type Type
|
|
RetainFlag bool
|
|
PageAssociation int
|
|
PageAssociationFieldSize bool
|
|
RTSegments []*Header
|
|
HeaderLength int64
|
|
SegmentDataLength uint64
|
|
SegmentDataStartOffset uint64
|
|
Reader reader.StreamReader
|
|
SegmentData Segmenter
|
|
RTSNumbers []int
|
|
}
|
|
|
|
// NewHeader creates new segment header for the provided document from the stream reader.
|
|
func NewHeader(d Documenter, r reader.StreamReader, offset int64, organizationType OrganizationType) (*Header, error) {
|
|
h := &Header{Reader: r}
|
|
if err := h.parse(d, r, offset, organizationType); err != nil {
|
|
return nil, err
|
|
}
|
|
return h, nil
|
|
}
|
|
|
|
// CleanSegmentData cleans the segment's data setting it's segment data to nil.
|
|
func (h *Header) CleanSegmentData() {
|
|
if h.SegmentData != nil {
|
|
h.SegmentData = nil
|
|
}
|
|
}
|
|
|
|
// GetSegmentData gets the segment's data returning the Segmenter instance.
|
|
func (h *Header) GetSegmentData() (Segmenter, error) {
|
|
var segmentDataPart Segmenter
|
|
|
|
if h.SegmentData != nil {
|
|
segmentDataPart = h.SegmentData
|
|
}
|
|
|
|
if segmentDataPart == nil {
|
|
creator, ok := kindMap[h.Type]
|
|
if !ok {
|
|
return nil, fmt.Errorf("type: %s/ %d creator not found. ", h.Type, h.Type)
|
|
}
|
|
segmentDataPart = creator()
|
|
|
|
common.Log.Trace("[SEGMENT-HEADER][#%d] GetSegmentData at Offset: %04X", h.SegmentNumber, h.SegmentDataStartOffset)
|
|
subReader, err := h.subInputReader()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := segmentDataPart.Init(h, subReader); err != nil {
|
|
common.Log.Debug("Init failed: %v for type: %T", err, segmentDataPart)
|
|
return nil, err
|
|
}
|
|
h.SegmentData = segmentDataPart
|
|
}
|
|
return segmentDataPart, nil
|
|
}
|
|
|
|
// String implements Stringer interface.
|
|
func (h *Header) String() string {
|
|
sb := &strings.Builder{}
|
|
sb.WriteString("\n[SEGMENT-HEADER]\n")
|
|
sb.WriteString(fmt.Sprintf("\t- SegmentNumber: %v\n", h.SegmentNumber))
|
|
sb.WriteString(fmt.Sprintf("\t- Type: %v\n", h.Type))
|
|
sb.WriteString(fmt.Sprintf("\t- RetainFlag: %v\n", h.RetainFlag))
|
|
sb.WriteString(fmt.Sprintf("\t- PageAssociation: %v\n", h.PageAssociation))
|
|
sb.WriteString(fmt.Sprintf("\t- PageAssociationFieldSize: %v\n", h.PageAssociationFieldSize))
|
|
sb.WriteString("\t- RTSEGMENTS:\n")
|
|
for _, rt := range h.RTSNumbers {
|
|
sb.WriteString(fmt.Sprintf("\t\t- %d\n", rt))
|
|
}
|
|
sb.WriteString(fmt.Sprintf("\t- HeaderLength: %v\n", h.HeaderLength))
|
|
sb.WriteString(fmt.Sprintf("\t- SegmentDataLength: %v\n", h.SegmentDataLength))
|
|
sb.WriteString(fmt.Sprintf("\t- SegmentDataStartOffset: %v\n", h.SegmentDataStartOffset))
|
|
|
|
return sb.String()
|
|
}
|
|
|
|
// parses the current segment header for the provided document 'd'.
|
|
func (h *Header) parse(
|
|
d Documenter, r reader.StreamReader,
|
|
offset int64, organizationType OrganizationType,
|
|
) (err error) {
|
|
common.Log.Trace("[SEGMENT-HEADER][PARSE] Begins")
|
|
defer func() {
|
|
if err != nil {
|
|
common.Log.Trace("[SEGMENT-HEADER][PARSE] Failed. %v", err)
|
|
} else {
|
|
common.Log.Trace("[SEGMENT-HEADER][PARSE] Finished")
|
|
}
|
|
}()
|
|
|
|
_, err = r.Seek(offset, io.SeekStart)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// 7.2.2 Segment Number.
|
|
if err = h.readSegmentNumber(r); err != nil {
|
|
return err
|
|
}
|
|
|
|
// 7.2.3 Segment header flags.
|
|
if err = h.readHeaderFlags(r); err != nil {
|
|
return err
|
|
}
|
|
|
|
// 7.2.4 Amount of referred-to segment.
|
|
var countOfRTS uint64
|
|
countOfRTS, err = h.readNumberOfReferredToSegments(r)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// 7.2.5 Refered-tp segment numbers.
|
|
h.RTSNumbers, err = h.readReferedToSegmentNumbers(r, int(countOfRTS))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// 7.2.6 Segment page association.
|
|
err = h.readSegmentPageAssociation(d, r, countOfRTS, h.RTSNumbers...)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if h.Type != TEndOfFile {
|
|
// 7.2.7 Segment data length (Contains the length of the data).
|
|
if err = h.readSegmentDataLength(r); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
h.readDataStartOffset(r, organizationType)
|
|
h.readHeaderLength(r, offset)
|
|
|
|
common.Log.Trace("%s", h)
|
|
return nil
|
|
}
|
|
|
|
// readSegmentNumber reads the segment number.
|
|
func (h *Header) readSegmentNumber(r reader.StreamReader) error {
|
|
// 7.2.2
|
|
b := make([]byte, 4)
|
|
_, err := r.Read(b)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// BigEndian number
|
|
h.SegmentNumber = binary.BigEndian.Uint32(b)
|
|
return nil
|
|
}
|
|
|
|
// readHeaderFlags reads the header flag values.
|
|
func (h *Header) readHeaderFlags(r reader.StreamReader) error {
|
|
// 7.2.3
|
|
bit, err := h.Reader.ReadBit()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Bit 7: Retain Flag
|
|
if bit != 0 {
|
|
h.RetainFlag = true
|
|
}
|
|
|
|
bit, err = h.Reader.ReadBit()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Bit 6: Size of the page association field
|
|
if bit != 0 {
|
|
h.PageAssociationFieldSize = true
|
|
}
|
|
|
|
// Bit 5-0 Contains the values (between 0 - 62 with gaps) for segment types
|
|
tp, err := h.Reader.ReadBits(6)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
h.Type = Type(int(tp))
|
|
|
|
return nil
|
|
}
|
|
|
|
// readNumberOfReferredToSegments gets the amount of referred-to segments.
|
|
func (h *Header) readNumberOfReferredToSegments(r reader.StreamReader) (uint64, error) {
|
|
// 7.2.4
|
|
countOfRTS, err := r.ReadBits(3)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
countOfRTS &= 0xf
|
|
var retainBit []byte
|
|
|
|
if countOfRTS <= 4 {
|
|
// short format
|
|
retainBit = make([]byte, 5)
|
|
for i := 0; i <= 4; i++ {
|
|
b, err := r.ReadBit()
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
retainBit[i] = byte(b)
|
|
}
|
|
} else {
|
|
// long format
|
|
countOfRTS, err = r.ReadBits(29)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
countOfRTS &= math.MaxInt32
|
|
arrayLength := (countOfRTS + 8) >> 3
|
|
arrayLength <<= 3
|
|
retainBit = make([]byte, arrayLength)
|
|
|
|
var i uint64
|
|
for i = 0; i < arrayLength; i++ {
|
|
b, err := r.ReadBit()
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
retainBit[i] = byte(b)
|
|
}
|
|
}
|
|
return countOfRTS, nil
|
|
}
|
|
|
|
// readReferedToSegmentNumbers gathers all segment numbers of referred-to segments. The
|
|
// segment itself is in rtSegments the array.
|
|
func (h *Header) readReferedToSegmentNumbers(r reader.StreamReader, countOfRTS int) ([]int, error) {
|
|
// 7.2.5
|
|
rtsNumbers := make([]int, countOfRTS)
|
|
|
|
if countOfRTS > 0 {
|
|
rtsSize := byte(1)
|
|
|
|
if h.SegmentNumber > 256 {
|
|
rtsSize = 2
|
|
|
|
if h.SegmentNumber > 65536 {
|
|
rtsSize = 4
|
|
}
|
|
}
|
|
|
|
h.RTSegments = make([]*Header, countOfRTS)
|
|
var (
|
|
bits uint64
|
|
err error
|
|
)
|
|
for i := 0; i < countOfRTS; i++ {
|
|
bits, err = r.ReadBits(rtsSize << 3)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
rtsNumbers[i] = int(bits & math.MaxInt32)
|
|
}
|
|
}
|
|
return rtsNumbers, nil
|
|
}
|
|
|
|
// readSegmentPageAssociation gets the segment's associated page number.
|
|
func (h *Header) readSegmentPageAssociation(d Documenter, r reader.StreamReader, countOfRTS uint64, rtsNumbers ...int) error {
|
|
// 7.2.6
|
|
if !h.PageAssociationFieldSize {
|
|
// Short format
|
|
bits, err := r.ReadBits(8)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
h.PageAssociation = int(bits & 0xFF)
|
|
} else {
|
|
// Long format
|
|
bits, err := r.ReadBits(32)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
h.PageAssociation = int(bits & math.MaxInt32)
|
|
}
|
|
|
|
if countOfRTS > 0 {
|
|
page, _ := d.GetPage(h.PageAssociation)
|
|
var i uint64
|
|
|
|
for i = 0; i < countOfRTS; i++ {
|
|
if page != nil {
|
|
h.RTSegments[i] = page.GetSegment(rtsNumbers[i])
|
|
} else {
|
|
h.RTSegments[i] = d.GetGlobalSegment(rtsNumbers[i])
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// readSegmentDataLength contains the length of the data part in bytes.
|
|
func (h *Header) readSegmentDataLength(r reader.StreamReader) (err error) {
|
|
// 7.2.7
|
|
h.SegmentDataLength, err = r.ReadBits(32)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Set the 4bytes mask
|
|
h.SegmentDataLength &= math.MaxInt32
|
|
return nil
|
|
}
|
|
|
|
// readDataStartOffset sets the offset of the current reader if
|
|
// the organisation type is OSequential.
|
|
func (h *Header) readDataStartOffset(r reader.StreamReader, organizationType OrganizationType) {
|
|
if organizationType == OSequential {
|
|
h.SegmentDataStartOffset = uint64(r.StreamPosition())
|
|
}
|
|
}
|
|
|
|
func (h *Header) readHeaderLength(r reader.StreamReader, offset int64) {
|
|
h.HeaderLength = r.StreamPosition() - offset
|
|
}
|
|
|
|
func (h *Header) subInputReader() (reader.StreamReader, error) {
|
|
return reader.NewSubstreamReader(h.Reader, h.SegmentDataStartOffset, h.SegmentDataLength)
|
|
}
|