2020-08-27 21:45:09 +00:00
//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
2018-03-22 14:03:47 +00:00
2020-08-27 21:45:09 +00:00
//
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
//
2022-03-13 12:41:53 +00:00
package extractor ; import ( _e "bytes" ; _d "errors" ; _cag "fmt" ; _fb "github.com/unidoc/unipdf/v3/common" ; _ag "github.com/unidoc/unipdf/v3/contentstream" ; _gdd "github.com/unidoc/unipdf/v3/core" ; _fg "github.com/unidoc/unipdf/v3/internal/license" ; _dc "github.com/unidoc/unipdf/v3/internal/textencoding" ;
_afd "github.com/unidoc/unipdf/v3/internal/transform" ; _ac "github.com/unidoc/unipdf/v3/model" ; _dd "golang.org/x/text/unicode/norm" ; _cg "golang.org/x/xerrors" ; _ca "image/color" ; _c "io" ; _gc "math" ; _a "regexp" ; _af "sort" ; _cf "strings" ; _gd "unicode" ; _g "unicode/utf8" ;
) ; func ( _decad * textTable ) computeBbox ( ) _ac . PdfRectangle { var _dgadd _ac . PdfRectangle ; _gdgc := false ; for _defc := 0 ; _defc < _decad . _dcbdf ; _defc ++ { for _cbdd := 0 ; _cbdd < _decad . _agac ; _cbdd ++ { _eefa := _decad . get ( _cbdd , _defc ) ; if _eefa == nil { continue ;
} ; if ! _gdgc { _dgadd = _eefa . PdfRectangle ; _gdgc = true ; } else { _dgadd = _gcae ( _dgadd , _eefa . PdfRectangle ) ; } ; } ; } ; return _dgadd ; } ; func _acded ( _fccb , _cfcda int ) int { if _fccb < _cfcda { return _fccb ; } ; return _cfcda ; } ; func ( _cecg * textObject ) setWordSpacing ( _bfe float64 ) { if _cecg == nil { return ;
} ; _cecg . _fga . _eag = _bfe ; } ;
// New returns an Extractor instance for extracting content from the input PDF page.
func New ( page * _ac . PdfPage ) ( * Extractor , error ) { const _b = "\u0065\u0078\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077" ; _cac , _ggc := page . GetAllContentStreams ( ) ; if _ggc != nil { return nil , _ggc ; } ; _fbb , _ggc := page . GetMediaBox ( ) ;
if _ggc != nil { return nil , _cag . Errorf ( "\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076" , _ggc ) ; } ; _be := & Extractor { _gg : _cac , _cb : page . Resources , _fe : * _fbb , _gde : map [ string ] fontEntry { } , _agb : map [ string ] textResult { } } ;
if _be . _fe . Llx > _be . _fe . Urx { _fb . Log . Info ( "\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e" , _be . _fe ) ;
_be . _fe . Llx , _be . _fe . Urx = _be . _fe . Urx , _be . _fe . Llx ; } ; if _be . _fe . Lly > _be . _fe . Ury { _fb . Log . Info ( "\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e" , _be . _fe ) ;
_be . _fe . Lly , _be . _fe . Ury = _be . _fe . Ury , _be . _fe . Lly ; } ; _fg . TrackUse ( _b ) ; return _be , nil ; } ; func ( _cgcef * textTable ) putComposite ( _gafg , _bfgg int , _fgcb paraList , _cdbafb _ac . PdfRectangle ) { if len ( _fgcb ) == 0 { _fb . Log . Error ( "\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073" ) ;
return ; } ; _gaeg := compositeCell { PdfRectangle : _cdbafb , paraList : _fgcb } ; if _agede { _cag . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a" , _gafg , _bfgg , _gaeg . String ( ) ) ;
} ; _gaeg . updateBBox ( ) ; _cgcef . _cead [ _addg ( _gafg , _bfgg ) ] = _gaeg ; } ; func ( _ccag paraList ) eventNeighbours ( _ggda [ ] event ) map [ * textPara ] [ ] int { _af . Slice ( _ggda , func ( _eddc , _fddgg int ) bool { _gfffd , _gdbf := _ggda [ _eddc ] , _ggda [ _fddgg ] ; _begef , _gdaab := _gfffd . _eefg , _gdbf . _eefg ;
if _begef != _gdaab { return _begef < _gdaab ; } ; if _gfffd . _bgdfg != _gdbf . _bgdfg { return _gfffd . _bgdfg ; } ; return _eddc < _fddgg ; } ) ; _bgdd := make ( map [ int ] intSet ) ; _badde := make ( intSet ) ; for _ , _cbddb := range _ggda { if _cbddb . _bgdfg { _bgdd [ _cbddb . _deed ] = make ( intSet ) ;
for _ddgdd := range _badde { if _ddgdd != _cbddb . _deed { _bgdd [ _cbddb . _deed ] . add ( _ddgdd ) ; _bgdd [ _ddgdd ] . add ( _cbddb . _deed ) ; } ; } ; _badde . add ( _cbddb . _deed ) ; } else { _badde . del ( _cbddb . _deed ) ; } ; } ; _fgba := map [ * textPara ] [ ] int { } ; for _ceade , _bdcda := range _bgdd { _eaeeed := _ccag [ _ceade ] ;
if len ( _bdcda ) == 0 { _fgba [ _eaeeed ] = nil ; continue ; } ; _aadbe := make ( [ ] int , len ( _bdcda ) ) ; _dgfgc := 0 ; for _dgffd := range _bdcda { _aadbe [ _dgfgc ] = _dgffd ; _dgfgc ++ ; } ; _fgba [ _eaeeed ] = _aadbe ; } ; return _fgba ; } ; func ( _gdbd * wordBag ) maxDepth ( ) float64 { return _gdbd . _egbb - _gdbd . Lly } ;
func ( _becc * textObject ) setCharSpacing ( _cddg float64 ) { if _becc == nil { return ; } ; _becc . _fga . _feba = _cddg ; if _agcb { _fb . Log . Info ( "\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073" , _cddg , _becc . _fga . String ( ) ) ;
} ; } ; func _dccd ( _aaaf map [ float64 ] map [ float64 ] gridTile ) [ ] float64 { _gaead := make ( [ ] float64 , 0 , len ( _aaaf ) ) ; _gddea := make ( map [ float64 ] struct { } , len ( _aaaf ) ) ; for _ , _eacgb := range _aaaf { for _effea := range _eacgb { if _ , _dgdef := _gddea [ _effea ] ; _dgdef { continue ;
} ; _gaead = append ( _gaead , _effea ) ; _gddea [ _effea ] = struct { } { } ; } ; } ; _af . Float64s ( _gaead ) ; return _gaead ; } ; const _bf = 20 ; func _gegd ( _abcde [ ] * textWord , _bbbg int ) [ ] * textWord { _edgeg := len ( _abcde ) ; copy ( _abcde [ _bbbg : ] , _abcde [ _bbbg + 1 : ] ) ; return _abcde [ : _edgeg - 1 ] ;
} ;
2021-09-23 22:37:42 +00:00
2022-02-05 21:34:53 +00:00
// String returns a description of `b`.
2022-03-13 12:41:53 +00:00
func ( _bga * wordBag ) String ( ) string { var _fdbg [ ] string ; for _ , _daa := range _bga . depthIndexes ( ) { _gcce := _bga . _cfeeb [ _daa ] ; for _ , _daaa := range _gcce { _fdbg = append ( _fdbg , _daaa . _gebf ) ; } ; } ; return _cag . Sprintf ( "\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071" , _bga . PdfRectangle , _bga . _aege , len ( _fdbg ) , _fdbg ) ;
} ;
2021-10-22 10:53:20 +00:00
2022-03-13 12:41:53 +00:00
// TableCell is a cell in a TextTable.
type TableCell struct {
2021-10-22 10:53:20 +00:00
2022-03-13 12:41:53 +00:00
// Text is the extracted text.
Text string ;
2021-07-30 00:21:16 +00:00
2022-03-13 12:41:53 +00:00
// Marks returns the TextMarks corresponding to the text in Text.
Marks TextMarkArray ; } ; func ( _fdg * imageExtractContext ) extractFormImages ( _cfg * _gdd . PdfObjectName , _ebg _ag . GraphicsState , _bge * _ac . PdfPageResources ) error { _aff , _ea := _bge . GetXObjectFormByName ( * _cfg ) ; if _ea != nil { return _ea ; } ; if _aff == nil { return nil ;
} ; _dec , _ea := _aff . GetContentStream ( ) ; if _ea != nil { return _ea ; } ; _bgef := _aff . Resources ; if _bgef == nil { _bgef = _bge ; } ; _ea = _fdg . extractContentStreamImages ( string ( _dec ) , _bgef ) ; if _ea != nil { return _ea ; } ; _fdg . _acg ++ ; return nil ; } ; func ( _eab * textObject ) renderText ( _gdcd [ ] byte ) error { if _eab . _dcb { _fb . Log . Debug ( "\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e" ) ;
return nil ; } ; _gda := _eab . getCurrentFont ( ) ; _dfg := _gda . BytesToCharcodes ( _gdcd ) ; _cafc , _edce , _cacf := _gda . CharcodesToStrings ( _dfg ) ; if _cacf > 0 { _fb . Log . Debug ( "\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064" , _edce , _cacf ) ;
} ; _eab . _fga . _ededf += _edce ; _eab . _fga . _eca += _cacf ; _agf := _eab . _fga ; _cffc := _agf . _cef ; _acf := _agf . _bgg / 100.0 ; _afff := _gef ; if _gda . Subtype ( ) == "\u0054\u0079\u0070e\u0033" { _afff = 1 ; } ; _gcee , _dcf := _gda . GetRuneMetrics ( ' ' ) ; if ! _dcf { _gcee , _dcf = _gda . GetCharMetrics ( 32 ) ;
} ; if ! _dcf { _gcee , _ = _ac . DefaultFont ( ) . GetRuneMetrics ( ' ' ) ; } ; _fefg := _gcee . Wx * _afff ; _fb . Log . Trace ( "\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066" , _fefg , _cafc , _gda , _cffc ) ;
_gbfa := _afd . NewMatrix ( _cffc * _acf , 0 , 0 , _cffc , 0 , _agf . _fec ) ; if _agcb { _fb . Log . Info ( "\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071" , len ( _dfg ) , _dfg , _cafc ) ;
} ; _fb . Log . Trace ( "\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071" , len ( _dfg ) , _dfg , len ( _cafc ) ) ; _aeb := _eab . getFillColor ( ) ;
_aef := _eab . getStrokeColor ( ) ; for _ffa , _dad := range _cafc { _afe := [ ] rune ( _dad ) ; if len ( _afe ) == 1 && _afe [ 0 ] == '\x00' { continue ; } ; _eaca := _dfg [ _ffa ] ; _aee := _eab . _gbc . CTM . Mult ( _eab . _bgc ) . Mult ( _gbfa ) ; _aebb := 0.0 ; if len ( _afe ) == 1 && _afe [ 0 ] == 32 { _aebb = _agf . _eag ;
} ; _afgc , _egff := _gda . GetCharMetrics ( _eaca ) ; if ! _egff { _fb . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073" , _eaca , _afe , _afe , _gda ) ;
return _cag . Errorf ( "\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064" , _gda . String ( ) , _eaca ) ; } ; _ecfc := _afd . Point { X : _afgc . Wx * _afff , Y : _afgc . Wy * _afff } ;
_fcfd := _afd . Point { X : ( _ecfc . X * _cffc + _aebb ) * _acf } ; _gbeg := _afd . Point { X : ( _ecfc . X * _cffc + _agf . _feba + _aebb ) * _acf } ; if _agcb { _fb . Log . Info ( "\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066" , _cffc , _agf . _feba , _agf . _eag , _acf ) ;
_fb . Log . Info ( "\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f" , _ecfc , _fcfd , _gbeg ) ; } ; _ddd := _babf ( _fcfd ) ; _ebce := _babf ( _gbeg ) ; _acff := _eab . _gbc . CTM . Mult ( _eab . _bgc ) . Mult ( _ddd ) ;
if _ccc { _fb . Log . Info ( "e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a" + "\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a" + "\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073" , _eab . _gbc . CTM , _eab . _bgc , _ebce , _fgfb ( _eab . _gbc . CTM . Mult ( _eab . _bgc ) . Mult ( _ebce ) ) , _ddd , _acff , _fgfb ( _acff ) ) ;
} ; _dbdg , _efbd := _eab . newTextMark ( _dc . ExpandLigatures ( _afe ) , _aee , _fgfb ( _acff ) , _gc . Abs ( _fefg * _aee . ScalingFactorX ( ) ) , _gda , _eab . _fga . _feba , _aeb , _aef ) ; if ! _efbd { _fb . Log . Debug ( "\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067" ) ;
continue ; } ; if _gda == nil { _fb . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e" ) ; } else if _gda . Encoder ( ) == nil { _fb . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073" , _gda ) ;
} else { if _bgfdf , _bgdc := _gda . Encoder ( ) . CharcodeToRune ( _eaca ) ; _bgdc { _dbdg . _bcfd = string ( _bgfdf ) ; } ; } ; _fb . Log . Trace ( "i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073" , _ffa , _eaca , _dbdg , _aee ) ;
_eab . _bece = append ( _eab . _bece , & _dbdg ) ; _eab . _bgc . Concat ( _ebce ) ; } ; return nil ; } ; const ( _fgg = "\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065" ;
_eb = "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064" ;
_fa = "\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065" ; ) ;
func _gfaf ( _gbcd , _dgded _ac . PdfRectangle ) bool { return _gbcd . Llx <= _dgded . Llx && _dgded . Urx <= _gbcd . Urx && _gbcd . Lly <= _dgded . Lly && _dgded . Ury <= _gbcd . Ury ; } ;
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// String returns a string describing the current state of the textState stack.
func ( _ebd * stateStack ) String ( ) string { _gcg := [ ] string { _cag . Sprintf ( "\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064" , len ( * _ebd ) ) } ; for _cgf , _aad := range * _ebd { _cedd := "\u003c\u006e\u0069l\u003e" ;
if _aad != nil { _cedd = _aad . String ( ) ; } ; _gcg = append ( _gcg , _cag . Sprintf ( "\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073" , _cgf , _cedd ) ) ; } ; return _cf . Join ( _gcg , "\u000a" ) ; } ; var ( _dedc = map [ rune ] string { 0x0060 : "\u0300" , 0x02CB : "\u0300" , 0x0027 : "\u0301" , 0x00B4 : "\u0301" , 0x02B9 : "\u0301" , 0x02CA : "\u0301" , 0x005E : "\u0302" , 0x02C6 : "\u0302" , 0x007E : "\u0303" , 0x02DC : "\u0303" , 0x00AF : "\u0304" , 0x02C9 : "\u0304" , 0x02D8 : "\u0306" , 0x02D9 : "\u0307" , 0x00A8 : "\u0308" , 0x00B0 : "\u030a" , 0x02DA : "\u030a" , 0x02BA : "\u030b" , 0x02DD : "\u030b" , 0x02C7 : "\u030c" , 0x02C8 : "\u030d" , 0x0022 : "\u030e" , 0x02BB : "\u0312" , 0x02BC : "\u0313" , 0x0486 : "\u0313" , 0x055A : "\u0313" , 0x02BD : "\u0314" , 0x0485 : "\u0314" , 0x0559 : "\u0314" , 0x02D4 : "\u031d" , 0x02D5 : "\u031e" , 0x02D6 : "\u031f" , 0x02D7 : "\u0320" , 0x02B2 : "\u0321" , 0x00B8 : "\u0327" , 0x02CC : "\u0329" , 0x02B7 : "\u032b" , 0x02CD : "\u0331" , 0x005F : "\u0332" , 0x204E : "\u0359" } ;
) ; var _dgfa = TextMark { Text : "\u005b\u0058\u005d" , Original : "\u0020" , Meta : true , FillColor : _ca . White , StrokeColor : _ca . White } ; func ( _ebea * textObject ) newTextMark ( _dag string , _ccge _afd . Matrix , _egbf _afd . Point , _gee float64 , _egfb * _ac . PdfFont , _adfe float64 , _dcbd , _cgdb _ca . Color ) ( textMark , bool ) { _cgef := _ccge . Angle ( ) ;
_dba := _cagd ( _cgef , _ebgc ) ; var _begc float64 ; if _dba % 180 != 90 { _begc = _ccge . ScalingFactorY ( ) ; } else { _begc = _ccge . ScalingFactorX ( ) ; } ; _agd := _fgfb ( _ccge ) ; _afdc := _ac . PdfRectangle { Llx : _agd . X , Lly : _agd . Y , Urx : _egbf . X , Ury : _egbf . Y } ; switch _dba % 360 { case 90 : _afdc . Urx -= _begc ;
case 180 : _afdc . Ury -= _begc ; case 270 : _afdc . Urx += _begc ; case 0 : _afdc . Ury += _begc ; default : _dba = 0 ; _afdc . Ury += _begc ; } ; if _afdc . Llx > _afdc . Urx { _afdc . Llx , _afdc . Urx = _afdc . Urx , _afdc . Llx ; } ; if _afdc . Lly > _afdc . Ury { _afdc . Lly , _afdc . Ury = _afdc . Ury , _afdc . Lly ;
} ; _aabc := true ; if _ebea . _defa . _fe . Width ( ) > 0 { _cgg , _bgdg := _aaga ( _afdc , _ebea . _defa . _fe ) ; if ! _bgdg { _aabc = false ; _fb . Log . Debug ( "\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q" , _afdc , _ebea . _defa . _fe , _dag ) ;
} ; _afdc = _cgg ; } ; _bgfde := _afdc ; _aecd := _ebea . _defa . _fe ; switch _dba % 360 { case 90 : _aecd . Urx , _aecd . Ury = _aecd . Ury , _aecd . Urx ; _bgfde = _ac . PdfRectangle { Llx : _aecd . Urx - _afdc . Ury , Urx : _aecd . Urx - _afdc . Lly , Lly : _afdc . Llx , Ury : _afdc . Urx } ;
case 180 : _bgfde = _ac . PdfRectangle { Llx : _aecd . Urx - _afdc . Llx , Urx : _aecd . Urx - _afdc . Urx , Lly : _aecd . Ury - _afdc . Lly , Ury : _aecd . Ury - _afdc . Ury } ; case 270 : _aecd . Urx , _aecd . Ury = _aecd . Ury , _aecd . Urx ; _bgfde = _ac . PdfRectangle { Llx : _afdc . Ury , Urx : _afdc . Lly , Lly : _aecd . Ury - _afdc . Llx , Ury : _aecd . Ury - _afdc . Urx } ;
} ; if _bgfde . Llx > _bgfde . Urx { _bgfde . Llx , _bgfde . Urx = _bgfde . Urx , _bgfde . Llx ; } ; if _bgfde . Lly > _bgfde . Ury { _bgfde . Lly , _bgfde . Ury = _bgfde . Ury , _bgfde . Lly ; } ; _adca := textMark { _cadaf : _dag , PdfRectangle : _bgfde , _bagc : _afdc , _afac : _egfb , _gba : _begc , _dcabg : _adfe , _ccbb : _ccge , _cgfb : _egbf , _adbf : _dba , _bfc : _dcbd , _bdba : _cgdb } ;
if _abb { _fb . Log . Info ( "n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073" , _agd , _egbf , _adca . String ( ) ) ; } ; return _adca , _aabc ;
} ; var _fccc = map [ markKind ] string { _gagc : "\u0073\u0074\u0072\u006f\u006b\u0065" , _ceaac : "\u0066\u0069\u006c\u006c" , _adbg : "\u0061u\u0067\u006d\u0065\u006e\u0074" } ;
2021-07-30 00:21:16 +00:00
2022-03-13 12:41:53 +00:00
// String returns a description of `state`.
func ( _eff * textState ) String ( ) string { _caac := "\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]" ; if _eff . _eedd != nil { _caac = _eff . _eedd . BaseFont ( ) ; } ; return _cag . Sprintf ( "\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071" , _eff . _feba , _eff . _eag , _eff . _cef , _caac ) ;
} ; func _cbcbf ( _fegca , _deba , _ffecb float64 ) rulingKind { if _fegca >= _ffecb && _dgbd ( _deba , _fegca ) { return _eccgd ; } ; if _deba >= _ffecb && _dgbd ( _fegca , _deba ) { return _aafafg ; } ; return _dgdb ; } ; func _ade ( _beg [ ] Font , _fc string ) bool { for _ , _bg := range _beg { if _bg . FontName == _fc { return true ;
} ; } ; return false ; } ; type imageExtractContext struct { _caa [ ] ImageMark ; _ebc int ; _cdge int ; _acg int ; _gfb map [ * _gdd . PdfObjectStream ] * cachedImage ; _ga * ImageExtractOptions ; } ; func _ffcc ( _accc [ ] int ) [ ] int { _aceb := make ( [ ] int , len ( _accc ) ) ; for _badc , _dfca := range _accc { _aceb [ len ( _accc ) - 1 - _badc ] = _dfca ;
} ; return _aceb ; } ; func ( _aed * wordBag ) blocked ( _faac * textWord ) bool { if _faac . Urx < _aed . Llx { _fdgaf := _fgbec ( _faac . PdfRectangle ) ; _fbbf := _eacc ( _aed . PdfRectangle ) ; if _aed . _fedd . blocks ( _fdgaf , _fbbf ) { if _ccfb { _fb . Log . Info ( "\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073" , _faac , _aed ) ;
} ; return true ; } ; } else if _aed . Urx < _faac . Llx { _agbf := _fgbec ( _aed . PdfRectangle ) ; _gge := _eacc ( _faac . PdfRectangle ) ; if _aed . _fedd . blocks ( _agbf , _gge ) { if _ccfb { _fb . Log . Info ( "b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s" , _faac , _aed ) ;
} ; return true ; } ; } ; if _faac . Ury < _aed . Lly { _bgea := _ebgf ( _faac . PdfRectangle ) ; _dfcef := _dgba ( _aed . PdfRectangle ) ; if _aed . _bba . blocks ( _bgea , _dfcef ) { if _ccfb { _fb . Log . Info ( "\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073" , _faac , _aed ) ;
} ; return true ; } ; } else if _aed . Ury < _faac . Lly { _ecgc := _ebgf ( _aed . PdfRectangle ) ; _ggab := _dgba ( _faac . PdfRectangle ) ; if _aed . _bba . blocks ( _ecgc , _ggab ) { if _ccfb { _fb . Log . Info ( "b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s" , _faac , _aed ) ;
} ; return true ; } ; } ; return false ; } ; func ( _afa * textObject ) setTextLeading ( _cfcg float64 ) { if _afa == nil { return ; } ; _afa . _fga . _dgc = _cfcg ; } ; func _aab ( _bee _afd . Point ) * subpath { return & subpath { _egd : [ ] _afd . Point { _bee } } } ; func _gefb ( _abdd , _cbac bounded ) float64 { _cdaf := _cafa ( _abdd , _cbac ) ;
if ! _gcceb ( _cdaf ) { return _cdaf ; } ; return _bdga ( _abdd , _cbac ) ; } ;
2021-10-22 10:53:20 +00:00
2022-03-13 12:41:53 +00:00
// String returns a human readable description of `s`.
func ( _ceca intSet ) String ( ) string { var _bbbef [ ] int ; for _eedc := range _ceca { if _ceca . has ( _eedc ) { _bbbef = append ( _bbbef , _eedc ) ; } ; } ; _af . Ints ( _bbbef ) ; return _cag . Sprintf ( "\u0025\u002b\u0076" , _bbbef ) ; } ; func _dfbc ( _gfag string , _cbed [ ] rulingList ) { _fb . Log . Info ( "\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073" , len ( _cbed ) , _gfag ) ;
for _bcac , _aabcg := range _cbed { _cag . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bcac , _aabcg . String ( ) ) ; } ; } ; func ( _eagb * textTable ) getComposite ( _adecb , _eggff int ) ( paraList , _ac . PdfRectangle ) { _faafd , _gaeag := _eagb . _cead [ _addg ( _adecb , _eggff ) ] ;
if _agede { _cag . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a" , _adecb , _eggff , _faafd . String ( ) ) ; } ; if ! _gaeag { return nil , _ac . PdfRectangle { } ;
} ; return _faafd . parasBBox ( ) ; } ; func ( _egac rulingList ) merge ( ) * ruling { _cacfc := _egac [ 0 ] . _edcba ; _eeccc := _egac [ 0 ] . _bfeag ; _fbcd := _egac [ 0 ] . _cbba ; for _ , _acabc := range _egac [ 1 : ] { _cacfc += _acabc . _edcba ; if _acabc . _bfeag < _eeccc { _eeccc = _acabc . _bfeag ;
} ; if _acabc . _cbba > _fbcd { _fbcd = _acabc . _cbba ; } ; } ; _gedg := & ruling { _agcgg : _egac [ 0 ] . _agcgg , _aaec : _egac [ 0 ] . _aaec , Color : _egac [ 0 ] . Color , _edcba : _cacfc / float64 ( len ( _egac ) ) , _bfeag : _eeccc , _cbba : _fbcd } ; if _cecd { _fb . Log . Info ( "\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073" , len ( _egac ) , _gedg ) ;
for _badef , _fgbdg := range _egac { _cag . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _badef , _fgbdg ) ; } ; } ; return _gedg ; } ; func ( _gbgb rulingList ) tidied ( _gdde string ) rulingList { _egcf := _gbgb . removeDuplicates ( ) ; _egcf . log ( "\u0075n\u0069\u0071\u0075\u0065\u0073" ) ;
_fdgfe := _egcf . snapToGroups ( ) ; if _fdgfe == nil { return nil ; } ; _fdgfe . sort ( ) ; if _adce { _fb . Log . Info ( "\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064" , _gdde , len ( _gbgb ) , len ( _egcf ) , len ( _fdgfe ) ) ;
} ; _fdgfe . log ( "\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d" ) ; return _fdgfe ; } ; func ( _bbaa * ruling ) alignsPrimary ( _ecab * ruling ) bool { return _bbaa . _agcgg == _ecab . _agcgg && _gc . Abs ( _bbaa . _edcba - _ecab . _edcba ) < _gbbd * 0.5 ; } ; func ( _abde paraList ) findTableGrid ( _fbabg gridTiling ) ( * textTable , map [ * textPara ] struct { } ) { _gfgcb := len ( _fbabg . _egfga ) ;
_cdaa := len ( _fbabg . _ebbb ) ; _acfg := textTable { _ebabc : true , _agac : _gfgcb , _dcbdf : _cdaa , _abccf : make ( map [ uint64 ] * textPara , _gfgcb * _cdaa ) , _cead : make ( map [ uint64 ] compositeCell , _gfgcb * _cdaa ) } ; _edaa := make ( map [ * textPara ] struct { } ) ; _fafaa := int ( ( 1.0 - _cfef ) * float64 ( _gfgcb * _cdaa ) ) ;
_affe := 0 ; if _bdea { _fb . Log . Info ( "\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064" , _gfgcb , _cdaa ) ; } ; for _egca , _egffg := range _fbabg . _ebbb { _fdeae , _abag := _fbabg . _cgdc [ _egffg ] ;
if ! _abag { continue ; } ; for _caga , _cfff := range _fbabg . _egfga { _fbcdfg , _gcgec := _fdeae [ _cfff ] ; if ! _gcgec { continue ; } ; _eebb := _abde . inTile ( _fbcdfg ) ; if len ( _eebb ) == 0 { _affe ++ ; if _affe > _fafaa { if _bdea { _fb . Log . Info ( "\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064" , _affe ) ;
} ; return nil , nil ; } ; } else { _acfg . putComposite ( _caga , _egca , _eebb , _fbcdfg . PdfRectangle ) ; for _ , _eafef := range _eebb { _edaa [ _eafef ] = struct { } { } ; } ; } ; } ; } ; _beafe := 0 ; for _cdfe := 0 ; _cdfe < _gfgcb ; _cdfe ++ { _cdeb := _acfg . get ( _cdfe , 0 ) ; if _cdeb == nil || ! _cdeb . _affbf { _beafe ++ ;
} ; } ; if _beafe == 0 { if _bdea { _fb . Log . Info ( "\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030" ) ; } ; return nil , nil ; } ; _eebg := _acfg . reduceTiling ( _fbabg , _faf ) ; _eebg = _eebg . subdivide ( ) ; return _eebg , _edaa ; } ; func ( _fdef * textTable ) logComposite ( _fceba string ) { if ! _agede { return ;
} ; _fb . Log . Info ( "\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073" , _fdef . _agac , _fdef . _dcbdf , _fceba ) ; _cag . Printf ( "\u0025\u0035\u0073 \u007c" , "" ) ; for _egaff := 0 ; _egaff < _fdef . _agac ; _egaff ++ { _cag . Printf ( "\u0025\u0033\u0064 \u007c" , _egaff ) ;
} ; _cag . Println ( "" ) ; _cag . Printf ( "\u0025\u0035\u0073 \u002b" , "" ) ; for _cdffg := 0 ; _cdffg < _fdef . _agac ; _cdffg ++ { _cag . Printf ( "\u0025\u0033\u0073 \u002b" , "\u002d\u002d\u002d" ) ; } ; _cag . Println ( "" ) ; for _fafac := 0 ; _fafac < _fdef . _dcbdf ; _fafac ++ { _cag . Printf ( "\u0025\u0035\u0064 \u007c" , _fafac ) ;
for _eddff := 0 ; _eddff < _fdef . _agac ; _eddff ++ { _dgfg , _ := _fdef . _cead [ _addg ( _eddff , _fafac ) ] . parasBBox ( ) ; _cag . Printf ( "\u0025\u0033\u0064 \u007c" , len ( _dgfg ) ) ; } ; _cag . Println ( "" ) ; } ; _fb . Log . Info ( "\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073" , _fdef . _agac , _fdef . _dcbdf , _fceba ) ;
_cag . Printf ( "\u0025\u0035\u0073 \u007c" , "" ) ; for _gbaff := 0 ; _gbaff < _fdef . _agac ; _gbaff ++ { _cag . Printf ( "\u0025\u0031\u0032\u0064\u0020\u007c" , _gbaff ) ; } ; _cag . Println ( "" ) ; _cag . Printf ( "\u0025\u0035\u0073 \u002b" , "" ) ; for _ffcb := 0 ; _ffcb < _fdef . _agac ;
_ffcb ++ { _cag . Print ( "\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b" ) ; } ; _cag . Println ( "" ) ; for _efabg := 0 ; _efabg < _fdef . _dcbdf ; _efabg ++ { _cag . Printf ( "\u0025\u0035\u0064 \u007c" , _efabg ) ; for _cbbef := 0 ; _cbbef < _fdef . _agac ;
_cbbef ++ { _efaga , _ := _fdef . _cead [ _addg ( _cbbef , _efabg ) ] . parasBBox ( ) ; _aacag := "" ; _gcfac := _efaga . merge ( ) ; if _gcfac != nil { _aacag = _gcfac . text ( ) ; } ; _aacag = _cag . Sprintf ( "\u0025\u0071" , _bgdca ( _aacag , 12 ) ) ; _aacag = _aacag [ 1 : len ( _aacag ) - 1 ] ; _cag . Printf ( "\u0025\u0031\u0032\u0073\u0020\u007c" , _aacag ) ;
} ; _cag . Println ( "" ) ; } ; } ;
// String returns a description of `tm`.
func ( _ffaf * textMark ) String ( ) string { return _cag . Sprintf ( "\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022" , _ffaf . PdfRectangle , _ffaf . _gba , _ffaf . _cadaf ) ; } ; func ( _gbfgc * textTable ) bbox ( ) _ac . PdfRectangle { return _gbfgc . PdfRectangle } ;
func _dgbd ( _edccc , _dddf float64 ) bool { return _edccc / _gc . Max ( _efff , _dddf ) < _dda } ; func ( _cgcbf * textPara ) writeText ( _eece _c . Writer ) { if _cgcbf . _ccec == nil { _cgcbf . writeCellText ( _eece ) ; return ; } ; for _aeeb := 0 ; _aeeb < _cgcbf . _ccec . _dcbdf ;
_aeeb ++ { for _fafg := 0 ; _fafg < _cgcbf . _ccec . _agac ; _fafg ++ { _ggde := _cgcbf . _ccec . get ( _fafg , _aeeb ) ; if _ggde == nil { _eece . Write ( [ ] byte ( "\u0009" ) ) ; } else { _ggde . writeCellText ( _eece ) ; } ; _eece . Write ( [ ] byte ( "\u0020" ) ) ; } ; if _aeeb < _cgcbf . _ccec . _dcbdf - 1 { _eece . Write ( [ ] byte ( "\u000a" ) ) ;
} ; } ; } ; func ( _gbfg * stateStack ) size ( ) int { return len ( * _gbfg ) } ; type textResult struct { _gga PageText ; _febb int ; _gcf int ; } ; const _gef = 1.0 / 1000.0 ; func ( _fcdb * shapesState ) stroke ( _efage * [ ] pathSection ) { _cgcf := pathSection { _bbdc : _fcdb . _fegc , Color : _fcdb . _afbf . getStrokeColor ( ) } ;
* _efage = append ( * _efage , _cgcf ) ; if _adce { _cag . Printf ( "\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , len ( * _efage ) , _fcdb , _fcdb . _afbf . getStrokeColor ( ) , _cgcf . bbox ( ) ) ;
if _gbbagd { for _gfbc , _gbfe := range _fcdb . _fegc { _cag . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _gfbc , _gbfe ) ; if _gfbc == 10 { break ; } ; } ; } ; } ; } ; func ( _ccga rulingList ) log ( _decac string ) { if ! _adce { return ; } ; _fb . Log . Info ( "\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073" , _decac , _ccga . String ( ) ) ;
for _fbfdf , _gbgdb := range _ccga { _cag . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _fbfdf , _gbgdb . String ( ) ) ; } ; } ; func ( _dcdd * textObject ) nextLine ( ) { _dcdd . moveLP ( 0 , - _dcdd . _fga . _dgc ) } ;
2021-01-07 14:20:10 +00:00
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
//
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
type TextMark struct {
2020-11-23 22:15:56 +00:00
2020-12-06 13:03:03 +00:00
// Text is the extracted text.
Text string ;
2020-11-23 22:15:56 +00:00
2021-01-07 14:20:10 +00:00
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
2020-11-23 22:15:56 +00:00
2021-01-07 14:20:10 +00:00
// BBox is the bounding box of the text.
2022-03-13 12:41:53 +00:00
BBox _ac . PdfRectangle ;
2021-01-07 14:20:10 +00:00
// Font is the font the text was drawn with.
2022-03-13 12:41:53 +00:00
Font * _ac . PdfFont ;
2021-01-07 14:20:10 +00:00
// FontSize is the font size the text was drawn with.
FontSize float64 ;
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2022-03-13 12:41:53 +00:00
FillColor _ca . Color ;
2021-01-07 14:20:10 +00:00
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2022-03-13 12:41:53 +00:00
StrokeColor _ca . Color ;
2021-01-07 14:20:10 +00:00
// Orientation is the text orientation
2022-03-13 12:41:53 +00:00
Orientation int ; } ; func _bcbef ( _gfaa , _ecded _ac . PdfRectangle ) bool { return _ecded . Llx <= _gfaa . Urx && _gfaa . Llx <= _ecded . Urx ; } ; func ( _gcffe * compositeCell ) updateBBox ( ) { for _ , _edcd := range _gcffe . paraList { _gcffe . PdfRectangle = _gcae ( _gcffe . PdfRectangle , _edcd . PdfRectangle ) ;
} ; } ; func _eeb ( _afad * wordBag , _bbf * textWord , _bdf float64 ) bool { return _afad . Urx <= _bbf . Llx && _bbf . Llx < _afad . Urx + _bdf ; } ; func ( _gedba * textPara ) toTextMarks ( _ffcdf * int ) [ ] TextMark { if _gedba . _ccec == nil { return _gedba . toCellTextMarks ( _ffcdf ) ;
} ; var _feabg [ ] TextMark ; for _dcdb := 0 ; _dcdb < _gedba . _ccec . _dcbdf ; _dcdb ++ { for _faeg := 0 ; _faeg < _gedba . _ccec . _agac ; _faeg ++ { _dbgc := _gedba . _ccec . get ( _faeg , _dcdb ) ; if _dbgc == nil { _feabg = _bafe ( _feabg , _ffcdf , "\u0009" ) ; } else { _acbb := _dbgc . toCellTextMarks ( _ffcdf ) ;
_feabg = append ( _feabg , _acbb ... ) ; } ; _feabg = _bafe ( _feabg , _ffcdf , "\u0020" ) ; } ; if _dcdb < _gedba . _ccec . _dcbdf - 1 { _feabg = _bafe ( _feabg , _ffcdf , "\u000a" ) ; } ; } ; return _feabg ; } ; func ( _cdffa * textTable ) emptyCompositeRow ( _caabe int ) bool { for _cccc := 0 ;
_cccc < _cdffa . _agac ; _cccc ++ { if _bbcc , _eddac := _cdffa . _cead [ _addg ( _cccc , _caabe ) ] ; _eddac { if len ( _bbcc . paraList ) > 0 { return false ; } ; } ; } ; return true ; } ; const ( _ebff markKind = iota ; _gagc ; _ceaac ; _adbg ; ) ; func ( _gcge * shapesState ) newSubPath ( ) { _gcge . clearPath ( ) ;
if _efca { _fb . Log . Info ( "\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073" , _gcge ) ; } ; } ; func ( _gcfa gridTiling ) log ( _dddb string ) { if ! _bdea { return ; } ; _fb . Log . Info ( "\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071" , len ( _gcfa . _egfga ) , len ( _gcfa . _ebbb ) , _dddb ) ;
_cag . Printf ( "\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a" , _gcfa . _egfga ) ; _cag . Printf ( "\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a" , _gcfa . _ebbb ) ; for _cddbg , _cfab := range _gcfa . _ebbb { _acfa , _fgbgc := _gcfa . _cgdc [ _cfab ] ;
if ! _fgbgc { continue ; } ; _cag . Printf ( "%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , _cddbg , _cfab ) ; for _ccba , _dgda := range _gcfa . _egfga { _gdaa , _gdbeb := _acfa [ _dgda ] ; if ! _gdbeb { continue ; } ; _cag . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _ccba , _gdaa . String ( ) ) ;
} ; } ; } ; func ( _cdb * textObject ) getFontDirect ( _fgff string ) ( * _ac . PdfFont , error ) { _face , _dddg := _cdb . getFontDict ( _fgff ) ; if _dddg != nil { return nil , _dddg ; } ; _dbccc , _dddg := _ac . NewPdfFontFromPdfObject ( _face ) ; if _dddg != nil { _fb . Log . Debug ( "\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fgff , _dddg ) ;
} ; return _dbccc , _dddg ; } ; func ( _ccd rulingList ) aligned ( ) bool { if len ( _ccd ) < 2 { return false ; } ; _dbee := make ( map [ * ruling ] int ) ; _dbee [ _ccd [ 0 ] ] = 0 ; for _ , _gabb := range _ccd [ 1 : ] { _ggec := false ; for _cbbge := range _dbee { if _gabb . gridIntersecting ( _cbbge ) { _dbee [ _cbbge ] ++ ;
_ggec = true ; break ; } ; } ; if ! _ggec { _dbee [ _gabb ] = 0 ; } ; } ; _adfbf := 0 ; for _ , _aggde := range _dbee { if _aggde == 0 { _adfbf ++ ; } ; } ; _fddga := float64 ( _adfbf ) / float64 ( len ( _ccd ) ) ; _ffga := _fddga <= 1.0 - _adcf ; if _adce { _fb . Log . Info ( "\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073" , _ffga , _fddga , _adfbf , len ( _ccd ) , _ccd . String ( ) ) ;
} ; return _ffga ; } ; type pathSection struct { _bbdc [ ] * subpath ; _ca . Color ; } ; func ( _cffd paraList ) toTextMarks ( ) [ ] TextMark { _acea := 0 ; var _gcea [ ] TextMark ; for _bbbed , _cbdg := range _cffd { if _cbdg . _affbf { continue ; } ; _gccc := _cbdg . toTextMarks ( & _acea ) ;
_gcea = append ( _gcea , _gccc ... ) ; if _bbbed != len ( _cffd ) - 1 { if _abae ( _cbdg , _cffd [ _bbbed + 1 ] ) { _gcea = _bafe ( _gcea , & _acea , "\u0020" ) ; } else { _gcea = _bafe ( _gcea , & _acea , "\u000a" ) ; _gcea = _bafe ( _gcea , & _acea , "\u000a" ) ; } ; } ; } ; _gcea = _bafe ( _gcea , & _acea , "\u000a" ) ;
_gcea = _bafe ( _gcea , & _acea , "\u000a" ) ; return _gcea ; } ; type rulingKind int ; func ( _dbaa compositeCell ) String ( ) string { _ebeb := "" ; if len ( _dbaa . paraList ) > 0 { _ebeb = _bgdca ( _dbaa . paraList . merge ( ) . text ( ) , 50 ) ; } ; return _cag . Sprintf ( "\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071" , _dbaa . PdfRectangle , len ( _dbaa . paraList ) , _ebeb ) ;
} ; func ( _dfad * wordBag ) scanBand ( _gdg string , _dcea * wordBag , _aggb func ( _fedf * wordBag , _dafb * textWord ) bool , _eddb , _gdeb , _gdgd float64 , _ddgdg , _afga bool ) int { _eaf := _dcea . _aege ; var _ebag map [ int ] map [ * textWord ] struct { } ; if ! _ddgdg { _ebag = _dfad . makeRemovals ( ) ;
} ; _afaec := _dbga * _eaf ; _efaf := 0 ; for _ , _baba := range _dfad . depthBand ( _eddb - _afaec , _gdeb + _afaec ) { if len ( _dfad . _cfeeb [ _baba ] ) == 0 { continue ; } ; for _ , _bdef := range _dfad . _cfeeb [ _baba ] { if ! ( _eddb - _afaec <= _bdef . _fgbda && _bdef . _fgbda <= _gdeb + _afaec ) { continue ;
} ; if ! _aggb ( _dcea , _bdef ) { continue ; } ; _babg := 2.0 * _gc . Abs ( _bdef . _abeg - _dcea . _aege ) / ( _bdef . _abeg + _dcea . _aege ) ; _dbbb := _gc . Max ( _bdef . _abeg / _dcea . _aege , _dcea . _aege / _bdef . _abeg ) ; _dcab := _gc . Min ( _babg , _dbbb ) ; if _gdgd > 0 && _dcab > _gdgd { continue ;
} ; if _dcea . blocked ( _bdef ) { continue ; } ; if ! _ddgdg { _dcea . pullWord ( _bdef , _baba , _ebag ) ; } ; _efaf ++ ; if ! _afga { if _bdef . _fgbda < _eddb { _eddb = _bdef . _fgbda ; } ; if _bdef . _fgbda > _gdeb { _gdeb = _bdef . _fgbda ; } ; } ; if _ddgdg { break ; } ; } ; } ; if ! _ddgdg { _dfad . applyRemovals ( _ebag ) ;
} ; return _efaf ; } ; func ( _efc * imageExtractContext ) extractInlineImage ( _ecg * _ag . ContentStreamInlineImage , _bgd _ag . GraphicsState , _cba * _ac . PdfPageResources ) error { _da , _ggd := _ecg . ToImage ( _cba ) ; if _ggd != nil { return _ggd ; } ; _egg , _ggd := _ecg . GetColorSpace ( _cba ) ;
if _ggd != nil { return _ggd ; } ; if _egg == nil { _egg = _ac . NewPdfColorspaceDeviceGray ( ) ; } ; _cff , _ggd := _egg . ImageToRGB ( * _da ) ; if _ggd != nil { return _ggd ; } ; _dca := ImageMark { Image : & _cff , Width : _bgd . CTM . ScalingFactorX ( ) , Height : _bgd . CTM . ScalingFactorY ( ) , Angle : _bgd . CTM . Angle ( ) } ;
_dca . X , _dca . Y = _bgd . CTM . Translation ( ) ; _efc . _caa = append ( _efc . _caa , _dca ) ; _efc . _ebc ++ ; return nil ; } ; var _gbfad = _a . MustCompile ( "\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024" ) ;
func ( _eabg lineRuling ) yMean ( ) float64 { return 0.5 * ( _eabg . _gffd . Y + _eabg . _eeebc . Y ) } ;
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// Extractor stores and offers functionality for extracting content from PDF pages.
type Extractor struct { _gg string ; _cb * _ac . PdfPageResources ; _fe _ac . PdfRectangle ; _gde map [ string ] fontEntry ; _agb map [ string ] textResult ; _df int64 ; _ae int ; } ; var ( _agc = _d . New ( "\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072" ) ;
_ge = _d . New ( "\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072" ) ; ) ;
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// String returns a string describing `tm`.
func ( _bcca TextMark ) String ( ) string { _agbdf := _bcca . BBox ; var _eege string ; if _bcca . Font != nil { _eege = _bcca . Font . String ( ) ; if len ( _eege ) > 50 { _eege = _eege [ : 50 ] + "\u002e\u002e\u002e" ; } ; } ; var _cadg string ; if _bcca . Meta { _cadg = "\u0020\u002a\u004d\u002a" ;
} ; return _cag . Sprintf ( "\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d" , _bcca . Offset , _bcca . Text , [ ] rune ( _bcca . Text ) , _agbdf . Llx , _agbdf . Lly , _agbdf . Urx , _agbdf . Ury , _eege , _cadg ) ;
} ;
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// Tables returns the tables extracted from the page.
func ( _ggb PageText ) Tables ( ) [ ] TextTable { if _agede { _fb . Log . Info ( "\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064" , len ( _ggb . _dcgg ) ) ; } ; return _ggb . _dcgg ; } ; func _aaga ( _acgf , _faca _ac . PdfRectangle ) ( _ac . PdfRectangle , bool ) { if ! _ecde ( _acgf , _faca ) { return _ac . PdfRectangle { } , false ;
} ; return _ac . PdfRectangle { Llx : _gc . Max ( _acgf . Llx , _faca . Llx ) , Urx : _gc . Min ( _acgf . Urx , _faca . Urx ) , Lly : _gc . Max ( _acgf . Lly , _faca . Lly ) , Ury : _gc . Min ( _acgf . Ury , _faca . Ury ) } , true ; } ; func ( _gcab paraList ) readBefore ( _bege [ ] int , _aedb , _bddd int ) bool { _ceggc , _eacgg := _gcab [ _aedb ] , _gcab [ _bddd ] ;
if _cbdb ( _ceggc , _eacgg ) && _ceggc . Lly > _eacgg . Lly { return true ; } ; if ! ( _ceggc . _gbaa . Urx < _eacgg . _gbaa . Llx ) { return false ; } ; _fgcf , _cgca := _ceggc . Lly , _eacgg . Lly ; if _fgcf > _cgca { _cgca , _fgcf = _fgcf , _cgca ; } ; _cdfg := _gc . Max ( _ceggc . _gbaa . Llx , _eacgg . _gbaa . Llx ) ;
_bcbb := _gc . Min ( _ceggc . _gbaa . Urx , _eacgg . _gbaa . Urx ) ; _fabbe := _gcab . llyRange ( _bege , _fgcf , _cgca ) ; for _ , _cbgg := range _fabbe { if _cbgg == _aedb || _cbgg == _bddd { continue ; } ; _geea := _gcab [ _cbgg ] ; if _geea . _gbaa . Llx <= _bcbb && _cdfg <= _geea . _gbaa . Urx { return false ;
} ; } ; return true ; } ; func ( _cfa * textObject ) showText ( _bfd [ ] byte ) error { return _cfa . renderText ( _bfd ) } ; func ( _cebe gridTiling ) complete ( ) bool { for _ , _geef := range _cebe . _cgdc { for _ , _acae := range _geef { if ! _acae . complete ( ) { return false ; } ; } ; } ;
return true ; } ; func _edde ( _dab , _abdcb bounded ) float64 { _bccg := _bdga ( _dab , _abdcb ) ; if ! _gcceb ( _bccg ) { return _bccg ; } ; return _cafa ( _dab , _abdcb ) ; } ; var _afbfd = map [ rulingKind ] string { _dgdb : "\u006e\u006f\u006e\u0065" , _eccgd : "\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c" , _aafafg : "\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c" } ;
const ( _ecgaa = true ; _acdb = true ; _bfdb = true ; _bfae = false ; _ebfga = false ; _gfgc = 6 ; _bdfg = 3.0 ; _gfcfb = 200 ; _ggfg = true ; _aebe = true ; _ffac = true ; _gcbg = true ; _efdb = false ; ) ; const ( _cbd = 1.0e-6 ; _eada = 1.0e-4 ; _ebgc = 10 ; _eddf = 6 ; _dbga = 0.5 ; _aafe = 0.12 ;
_bafd = 0.19 ; _gdcf = 0.04 ; _bffb = 0.04 ; _cafad = 1.0 ; _cegb = 0.04 ; _effe = 0.4 ; _fgeb = 0.7 ; _ebba = 1.0 ; _faae = 0.1 ; _ddbc = 1.4 ; _fecd = 0.46 ; _gdf = 0.02 ; _bbce = 0.2 ; _ffcd = 0.5 ; _dfed = 4 ; _eaeg = 4.0 ; _eecc = 6 ; _cfef = 0.3 ; _bbdee = 0.01 ; _aaa = 0.02 ; _fee = 2 ; _edcf = 2 ; _edaeb = 500 ;
_dcbf = 4.0 ; _cefe = 4.0 ; _dda = 0.05 ; _efff = 0.1 ; _bade = 2.0 ; _gbbd = 2.0 ; _dcbg = 1.5 ; _faf = 3.0 ; _adcf = 0.25 ; ) ; func _afbfb ( _dbeb map [ float64 ] map [ float64 ] gridTile ) [ ] float64 { _bdde := make ( [ ] float64 , 0 , len ( _dbeb ) ) ; for _eedf := range _dbeb { _bdde = append ( _bdde , _eedf ) ;
} ; _af . Float64s ( _bdde ) ; _cfede := len ( _bdde ) ; for _babfe := 0 ; _babfe < _cfede / 2 ; _babfe ++ { _bdde [ _babfe ] , _bdde [ _cfede - 1 - _babfe ] = _bdde [ _cfede - 1 - _babfe ] , _bdde [ _babfe ] ; } ; return _bdde ; } ; func ( _cgfae * wordBag ) arrangeText ( ) * textPara { _cgfae . sort ( ) ;
if _acdb { _cgfae . removeDuplicates ( ) ; } ; var _caecb [ ] * textLine ; for _ , _gcege := range _cgfae . depthIndexes ( ) { for ! _cgfae . empty ( _gcege ) { _cedb := _cgfae . firstReadingIndex ( _gcege ) ; _cbga := _cgfae . firstWord ( _cedb ) ; _ebab := _eade ( _cgfae , _cedb ) ; _abec := _cbga . _abeg ;
_deeg := _cbga . _fgbda - _dbga * _abec ; _adfeg := _cbga . _fgbda + _dbga * _abec ; _fcdc := _ddbc * _abec ; _gdcff := _fecd * _abec ; _cgbb : for { var _gafce * textWord ; _gdff := 0 ; for _ , _ecaff := range _cgfae . depthBand ( _deeg , _adfeg ) { _abbg := _cgfae . highestWord ( _ecaff , _deeg , _adfeg ) ;
if _abbg == nil { continue ; } ; _bagg := _dgfaa ( _abbg , _ebab . _cadc [ len ( _ebab . _cadc ) - 1 ] ) ; if _bagg < - _gdcff { break _cgbb ; } ; if _bagg > _fcdc { continue ; } ; if _gafce != nil && _cafa ( _abbg , _gafce ) >= 0 { continue ; } ; _gafce = _abbg ; _gdff = _ecaff ; } ; if _gafce == nil { break ;
} ; _ebab . pullWord ( _cgfae , _gafce , _gdff ) ; } ; _ebab . markWordBoundaries ( ) ; _caecb = append ( _caecb , _ebab ) ; } ; } ; if len ( _caecb ) == 0 { return nil ; } ; _af . Slice ( _caecb , func ( _abccc , _dffb int ) bool { return _edde ( _caecb [ _abccc ] , _caecb [ _dffb ] ) < 0 } ) ; _ebge := _efagec ( _cgfae . PdfRectangle , _caecb ) ;
if _addb { _fb . Log . Info ( "\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073" , _ebge . String ( ) ) ; if _dfgd { for _cgda , _faaa := range _ebge . _ddeb { _cag . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _cgda , _faaa . String ( ) ) ;
if _ceac { for _cgdgd , _cgeff := range _faaa . _cadc { _cag . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _cgdgd , _cgeff . String ( ) ) ; for _cceb , _cfeg := range _cgeff . _ggabc { _cag . Printf ( "\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n" , _cceb , _cfeg . String ( ) ) ;
} ; } ; } ; } ; } ; } ; return _ebge ; } ; func ( _cdacd paraList ) findTables ( _aabb [ ] gridTiling ) [ ] * textTable { _cdacd . addNeighbours ( ) ; _af . Slice ( _cdacd , func ( _egbbd , _accdc int ) bool { return _gefb ( _cdacd [ _egbbd ] , _cdacd [ _accdc ] ) < 0 } ) ; var _gaag [ ] * textTable ;
if _ggfg { _abffb := _cdacd . findGridTables ( _aabb ) ; _gaag = append ( _gaag , _abffb ... ) ; } ; if _aebe { _gfdce := _cdacd . findTextTables ( ) ; _gaag = append ( _gaag , _gfdce ... ) ; } ; return _gaag ; } ; func ( _dbcbc * textWord ) computeText ( ) string { _ebfgd := make ( [ ] string , len ( _dbcbc . _ggabc ) ) ;
for _afbc , _gefeg := range _dbcbc . _ggabc { _ebfgd [ _afbc ] = _gefeg . _cadaf ; } ; return _cf . Join ( _ebfgd , "" ) ; } ; func _aaca ( _bgde _ac . PdfRectangle ) rulingKind { _eaabb := _bgde . Width ( ) ; _aebbf := _bgde . Height ( ) ; if _eaabb > _aebbf { if _eaabb >= _dcbf { return _eccgd ;
} ; } else { if _aebbf >= _dcbf { return _aafafg ; } ; } ; return _dgdb ; } ; func _efeb ( _fcedg , _bdbb _afd . Point ) rulingKind { _gcacc := _gc . Abs ( _fcedg . X - _bdbb . X ) ; _gefd := _gc . Abs ( _fcedg . Y - _bdbb . Y ) ; return _cbcbf ( _gcacc , _gefd , _dcbf ) ; } ; func ( _afae * wordBag ) makeRemovals ( ) map [ int ] map [ * textWord ] struct { } { _gdae := make ( map [ int ] map [ * textWord ] struct { } , len ( _afae . _cfeeb ) ) ;
for _edbg := range _afae . _cfeeb { _gdae [ _edbg ] = make ( map [ * textWord ] struct { } ) ; } ; return _gdae ; } ; func ( _gbaf * textPara ) fontsize ( ) float64 { return _gbaf . _ddeb [ 0 ] . _eabc } ; func ( _bdaag * textTable ) getDown ( ) paraList { _ebdad := make ( paraList , _bdaag . _agac ) ;
for _ccefc := 0 ; _ccefc < _bdaag . _agac ; _ccefc ++ { _baeb := _bdaag . get ( _ccefc , _bdaag . _dcbdf - 1 ) . _aegf ; if _baeb . taken ( ) { return nil ; } ; _ebdad [ _ccefc ] = _baeb ; } ; for _ddad := 0 ; _ddad < _bdaag . _agac - 1 ; _ddad ++ { if _ebdad [ _ddad ] . _cegf != _ebdad [ _ddad + 1 ] { return nil ;
} ; } ; return _ebdad ; } ; func ( _ddc * PageFonts ) extractPageResourcesToFont ( _geg * _ac . PdfPageResources ) error { _afg , _ed := _gdd . GetDict ( _geg . Font ) ; if ! _ed { return _d . New ( _fgg ) ; } ; for _ , _cab := range _afg . Keys ( ) { var ( _gf = true ; _cae [ ] byte ; _cd string ;
) ; _gb , _fggc := _geg . GetFontByName ( _cab ) ; if ! _fggc { return _d . New ( _eb ) ; } ; _agg , _ef := _ac . NewPdfFontFromPdfObject ( _gb ) ; if _ef != nil { return _ef ; } ; _ec := _agg . FontDescriptor ( ) ; _ff := _agg . FontDescriptor ( ) . FontName . String ( ) ; _bc := _agg . Subtype ( ) ;
if _ade ( _ddc . Fonts , _ff ) { continue ; } ; if len ( _agg . ToUnicode ( ) ) == 0 { _gf = false ; } ; if _ec . FontFile != nil { if _cgb , _edd := _gdd . GetStream ( _ec . FontFile ) ; _edd { _cae , _ef = _gdd . DecodeStream ( _cgb ) ; if _ef != nil { return _ef ; } ; _cd = _ff + "\u002e\u0070\u0066\u0062" ;
} ; } else if _ec . FontFile2 != nil { if _fd , _efg := _gdd . GetStream ( _ec . FontFile2 ) ; _efg { _cae , _ef = _gdd . DecodeStream ( _fd ) ; if _ef != nil { return _ef ; } ; _cd = _ff + "\u002e\u0074\u0074\u0066" ; } ; } else if _ec . FontFile3 != nil { if _bcg , _ebf := _gdd . GetStream ( _ec . FontFile3 ) ;
_ebf { _cae , _ef = _gdd . DecodeStream ( _bcg ) ; if _ef != nil { return _ef ; } ; _cd = _ff + "\u002e\u0063\u0066\u0066" ; } ; } ; if len ( _cd ) < 1 { _fb . Log . Debug ( _fa ) ; } ; _fbg := Font { FontName : _ff , PdfFont : _agg , IsCID : _agg . IsCID ( ) , IsSimple : _agg . IsSimple ( ) , ToUnicode : _gf , FontType : _bc , FontData : _cae , FontFileName : _cd , FontDescriptor : _ec } ;
_ddc . Fonts = append ( _ddc . Fonts , _fbg ) ; } ; return nil ; } ; func ( _bdbbd rulingList ) toGrids ( ) [ ] rulingList { if _adce { _fb . Log . Info ( "t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073" , _bdbbd ) ; } ; _dgfbb := _bdbbd . intersections ( ) ; if _adce { _fb . Log . Info ( "\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020" , len ( _bdbbd ) , len ( _dgfbb ) ) ;
for _ , _cdagg := range _cdgaf ( _dgfbb ) { _cag . Printf ( "\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n" , _cdagg , _dgfbb [ _cdagg ] ) ; } ; } ; _bead := make ( map [ int ] intSet , len ( _bdbbd ) ) ; for _bccdd := range _bdbbd { _acfb := _bdbbd . connections ( _dgfbb , _bccdd ) ; if len ( _acfb ) > 0 { _bead [ _bccdd ] = _acfb ;
} ; } ; if _adce { _fb . Log . Info ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064" , len ( _bead ) ) ; for _ , _efcg := range _cdgaf ( _bead ) { _cag . Printf ( "\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n" , _efcg , _bead [ _efcg ] ) ;
} ; } ; _gbgad := _dffge ( len ( _bdbbd ) , func ( _gdda , _acce int ) bool { _bdbff , _fcdcc := len ( _bead [ _gdda ] ) , len ( _bead [ _acce ] ) ; if _bdbff != _fcdcc { return _bdbff > _fcdcc ; } ; return _bdbbd . comp ( _gdda , _acce ) ; } ) ; if _adce { _fb . Log . Info ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076" , _gbgad ) ;
} ; _gadc := [ ] [ ] int { { _gbgad [ 0 ] } } ; _caceg : for _ , _abbc := range _gbgad [ 1 : ] { for _ebee , _gaed := range _gadc { for _ , _gfceb := range _gaed { if _bead [ _gfceb ] . has ( _abbc ) { _gadc [ _ebee ] = append ( _gaed , _abbc ) ; continue _caceg ; } ; } ; } ; _gadc = append ( _gadc , [ ] int { _abbc } ) ;
} ; if _adce { _fb . Log . Info ( "\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076" , _gadc ) ; } ; _af . SliceStable ( _gadc , func ( _eadf , _bfcc int ) bool { return len ( _gadc [ _eadf ] ) > len ( _gadc [ _bfcc ] ) } ) ; for _ , _egga := range _gadc { _af . Slice ( _egga , func ( _gdcbg , _abfb int ) bool { return _bdbbd . comp ( _egga [ _gdcbg ] , _egga [ _abfb ] ) } ) ;
} ; _faff := make ( [ ] rulingList , len ( _gadc ) ) ; for _gece , _fbc := range _gadc { _dafg := make ( rulingList , len ( _fbc ) ) ; for _cegfc , _egda := range _fbc { _dafg [ _cegfc ] = _bdbbd [ _egda ] ; } ; _faff [ _gece ] = _dafg ; } ; if _adce { _fb . Log . Info ( "\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076" , _faff ) ;
} ; var _gddbb [ ] rulingList ; for _ , _ccecg := range _faff { if _aggd , _bacb := _ccecg . isActualGrid ( ) ; _bacb { _ccecg = _aggd ; _ccecg = _ccecg . snapToGroups ( ) ; _gddbb = append ( _gddbb , _ccecg ) ; } ; } ; if _adce { _dfbc ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073" , _gddbb ) ;
_fb . Log . Info ( "\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064" , len ( _faff ) , len ( _gddbb ) ) ; } ; return _gddbb ; } ; func _cagd ( _dadb float64 , _defee int ) int { if _defee == 0 { _defee = 1 ;
} ; _badd := float64 ( _defee ) ; return int ( _gc . Round ( _dadb / _badd ) * _badd ) ; } ; func ( _facc rulingList ) vertsHorzs ( ) ( rulingList , rulingList ) { var _cgbcg , _cfafb rulingList ; for _ , _bfee := range _facc { switch _bfee . _agcgg { case _aafafg : _cgbcg = append ( _cgbcg , _bfee ) ;
case _eccgd : _cfafb = append ( _cfafb , _bfee ) ; } ; } ; return _cgbcg , _cfafb ; } ; func ( _aeg * textObject ) getFontDict ( _fbgfe string ) ( _bcbee _gdd . PdfObject , _eacd error ) { _ecd := _aeg . _fgf ; if _ecd == nil { _fb . Log . Debug ( "g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071" , _fbgfe ) ;
return nil , nil ; } ; _bcbee , _ggad := _ecd . GetFontByName ( _gdd . PdfObjectName ( _fbgfe ) ) ; if ! _ggad { _fb . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071" , _fbgfe ) ;
return nil , _d . New ( "f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073" ) ; } ; return _bcbee , nil ; } ; func ( _abgfd * textPara ) bbox ( ) _ac . PdfRectangle { return _abgfd . PdfRectangle } ; func ( _fggge rulingList ) asTiling ( ) gridTiling { if _bdea { _fb . Log . Info ( "r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d" , len ( _fggge ) ) ;
} ; for _cacc , _ddgce := range _fggge [ 1 : ] { _fgcd := _fggge [ _cacc ] ; if _fgcd . alignsPrimary ( _ddgce ) && _fgcd . alignsSec ( _ddgce ) { _fb . Log . Error ( "a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073" , _ddgce , _fgcd ) ;
} ; } ; _fggge . sortStrict ( ) ; _fggge . log ( "\u0073n\u0061\u0070\u0070\u0065\u0064" ) ; _aeccd , _cddgg := _fggge . vertsHorzs ( ) ; _ggbaa := _aeccd . primaries ( ) ; _gacf := _cddgg . primaries ( ) ; _dbdc := len ( _ggbaa ) - 1 ; _ffdd := len ( _gacf ) - 1 ; if _dbdc == 0 || _ffdd == 0 { return gridTiling { } ;
} ; _fgfa := _ac . PdfRectangle { Llx : _ggbaa [ 0 ] , Urx : _ggbaa [ _dbdc ] , Lly : _gacf [ 0 ] , Ury : _gacf [ _ffdd ] } ; if _bdea { _fb . Log . Info ( "\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064" , len ( _aeccd ) ) ;
for _acgaf , _gafca := range _aeccd { _cag . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _acgaf , _gafca ) ; } ; _fb . Log . Info ( "\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064" , len ( _cddgg ) ) ;
for _cbbff , _edff := range _cddgg { _cag . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _cbbff , _edff ) ; } ; _fb . Log . Info ( "\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f" , _dbdc , _ffdd , _ggbaa , _gacf ) ;
} ; _gbde := make ( [ ] gridTile , _dbdc * _ffdd ) ; for _fgbgbe := _ffdd - 1 ; _fgbgbe >= 0 ; _fgbgbe -- { _gagcd := _gacf [ _fgbgbe ] ; _gdgg := _gacf [ _fgbgbe + 1 ] ; for _bfed := 0 ; _bfed < _dbdc ; _bfed ++ { _fedaa := _ggbaa [ _bfed ] ; _bfeef := _ggbaa [ _bfed + 1 ] ; _gbgc := _aeccd . findPrimSec ( _fedaa , _gagcd ) ;
_fbec := _aeccd . findPrimSec ( _bfeef , _gagcd ) ; _geaa := _cddgg . findPrimSec ( _gagcd , _fedaa ) ; _cefg := _cddgg . findPrimSec ( _gdgg , _fedaa ) ; _gafad := _ac . PdfRectangle { Llx : _fedaa , Urx : _bfeef , Lly : _gagcd , Ury : _gdgg } ; _bgegd := _gcgf ( _gafad , _gbgc , _fbec , _geaa , _cefg ) ;
_gbde [ _fgbgbe * _dbdc + _bfed ] = _bgegd ; if _bdea { _cag . Printf ( "\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , _bfed , _fgbgbe , _bgegd . String ( ) , _bgegd . Width ( ) , _bgegd . Height ( ) ) ;
} ; } ; } ; if _bdea { _fb . Log . Info ( "r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066" , _fgfa ) ;
} ; _accd := make ( [ ] map [ float64 ] gridTile , _ffdd ) ; for _adec := _ffdd - 1 ; _adec >= 0 ; _adec -- { if _bdea { _cag . Printf ( "\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a" , _adec ) ; } ; _accd [ _adec ] = make ( map [ float64 ] gridTile , _dbdc ) ; for _edecd := 0 ; _edecd < _dbdc ;
_edecd ++ { _ggff := _gbde [ _adec * _dbdc + _edecd ] ; if _bdea { _cag . Printf ( "\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _edecd , _ggff ) ; } ; if ! _ggff . _fcgbfd { continue ; } ; _fcbbg := _edecd ; for _ebceb := _edecd + 1 ; ! _ggff . _fgdd && _ebceb < _dbdc ;
_ebceb ++ { _cgff := _gbde [ _adec * _dbdc + _ebceb ] ; _ggff . Urx = _cgff . Urx ; _ggff . _gbac = _ggff . _gbac || _cgff . _gbac ; _ggff . _cccb = _ggff . _cccb || _cgff . _cccb ; _ggff . _fgdd = _cgff . _fgdd ; if _bdea { _cag . Printf ( "\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a" , _ebceb , _cgff , _ggff ) ;
} ; _fcbbg = _ebceb ; } ; if _bdea { _cag . Printf ( " \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n" , _edecd , _fcbbg , _ggff ) ; } ; _edecd = _fcbbg ; _accd [ _adec ] [ _ggff . Llx ] = _ggff ; } ; } ; _afee := make ( map [ float64 ] map [ float64 ] gridTile , _ffdd ) ;
_ggbcc := make ( map [ float64 ] map [ float64 ] struct { } , _ffdd ) ; for _fbfge := _ffdd - 1 ; _fbfge >= 0 ; _fbfge -- { _abacf := _gbde [ _fbfge * _dbdc ] . Lly ; _afee [ _abacf ] = make ( map [ float64 ] gridTile , _dbdc ) ; _ggbcc [ _abacf ] = make ( map [ float64 ] struct { } , _dbdc ) ; } ; if _bdea { _fb . Log . Info ( "\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066" , _fgfa ) ;
} ; for _bffa := _ffdd - 1 ; _bffa >= 0 ; _bffa -- { _abace := _gbde [ _bffa * _dbdc ] . Lly ; _agdag := _accd [ _bffa ] ; if _bdea { _cag . Printf ( "\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a" , _bffa ) ; } ; for _ , _bfag := range _aggbbd ( _agdag ) { if _ , _ccca := _ggbcc [ _abace ] [ _bfag ] ;
_ccca { continue ; } ; _fgabd := _agdag [ _bfag ] ; if _bdea { _cag . Printf ( " \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a" , _fgabd . String ( ) ) ; } ; for _ggfa := _bffa - 1 ; _ggfa >= 0 ; _ggfa -- { if _fgabd . _cccb { break ; } ; _ffdf := _accd [ _ggfa ] ; _dggc , _decg := _ffdf [ _bfag ] ;
if ! _decg { break ; } ; if _dggc . Urx != _fgabd . Urx { break ; } ; _fgabd . _cccb = _dggc . _cccb ; _fgabd . Lly = _dggc . Lly ; if _bdea { _cag . Printf ( "\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a" , _dggc . String ( ) , _fgabd . String ( ) ) ;
} ; _ggbcc [ _dggc . Lly ] [ _dggc . Llx ] = struct { } { } ; } ; if _bffa == 0 { _fgabd . _cccb = true ; } ; if _fgabd . complete ( ) { _afee [ _abace ] [ _bfag ] = _fgabd ; } ; } ; } ; _dcgf := gridTiling { PdfRectangle : _fgfa , _egfga : _dccd ( _afee ) , _ebbb : _afbfb ( _afee ) , _cgdc : _afee } ; _dcgf . log ( "\u0043r\u0065\u0061\u0074\u0065\u0064" ) ;
return _dcgf ; } ;
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
func ( _gdab * PageText ) ApplyArea ( bbox _ac . PdfRectangle ) { _gccb := make ( [ ] * textMark , 0 , len ( _gdab . _aged ) ) ; for _ , _gcec := range _gdab . _aged { if _ecde ( _gcec . bbox ( ) , bbox ) { _gccb = append ( _gccb , _gcec ) ; } ; } ; var _gbdf paraList ; _dgbc := len ( _gccb ) ; for _gedb := 0 ;
_gedb < 360 && _dgbc > 0 ; _gedb += 90 { _aga := make ( [ ] * textMark , 0 , len ( _gccb ) - _dgbc ) ; for _ , _edda := range _gccb { if _edda . _adbf == _gedb { _aga = append ( _aga , _edda ) ; } ; } ; if len ( _aga ) > 0 { _dfag := _dgg ( _aga , _gdab . _abea , nil , nil ) ; _gbdf = append ( _gbdf , _dfag ... ) ;
_dgbc -= len ( _aga ) ; } ; } ; _bbbee := new ( _e . Buffer ) ; _gbdf . writeText ( _bbbee ) ; _gdab . _cged = _bbbee . String ( ) ; _gdab . _gccf = _gbdf . toTextMarks ( ) ; _gdab . _dcgg = _gbdf . tables ( ) ; } ; type gridTile struct { _ac . PdfRectangle ; _gbac , _fcgbfd , _cccb , _fgdd bool ;
} ; func ( _afcc rulingList ) primMinMax ( ) ( float64 , float64 ) { _cgbd , _dadaa := _afcc [ 0 ] . _edcba , _afcc [ 0 ] . _edcba ; for _ , _cbec := range _afcc [ 1 : ] { if _cbec . _edcba < _cgbd { _cgbd = _cbec . _edcba ; } else if _cbec . _edcba > _dadaa { _dadaa = _cbec . _edcba ; } ; } ; return _cgbd , _dadaa ;
} ;
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// String returns a human readable description of `ss`.
func ( _bgb * shapesState ) String ( ) string { return _cag . Sprintf ( "\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d" , len ( _bgb . _fegc ) , _bgb . _dcef ) ; } ; type textWord struct { _ac . PdfRectangle ;
_fgbda float64 ; _gebf string ; _ggabc [ ] * textMark ; _abeg float64 ; _gcefe bool ; } ;
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
func ( _bb * Extractor ) ExtractPageImages ( options * ImageExtractOptions ) ( * PageImages , error ) { _cbf := & imageExtractContext { _ga : options } ; _cdg := _cbf . extractContentStreamImages ( _bb . _gg , _bb . _cb ) ; if _cdg != nil { return nil , _cdg ; } ; return & PageImages { Images : _cbf . _caa } , nil ;
} ; const ( RenderModeStroke RenderMode = 1 << iota ; RenderModeFill ; RenderModeClip ; ) ; type markKind int ; func _agad ( _ddfb _ac . PdfRectangle , _egaf bounded ) float64 { return _ddfb . Ury - _egaf . bbox ( ) . Lly } ; func _ecde ( _dacg , _gec _ac . PdfRectangle ) bool { return _bcbef ( _dacg , _gec ) && _aecg ( _dacg , _gec ) } ;
func ( _abgb compositeCell ) hasLines ( _debb [ ] * textLine ) bool { for _cfcd , _agca := range _debb { _bded := _ecde ( _abgb . PdfRectangle , _agca . PdfRectangle ) ; if _agede { _cag . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a" , _bded , _cfcd , len ( _debb ) ) ;
_cag . Printf ( "\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a" , _abgb ) ; _cag . Printf ( "\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a" , _agca ) ; } ; if _bded { return true ;
} ; } ; return false ; } ; func ( _gag paraList ) writeText ( _bfea _c . Writer ) { for _fbbb , _bdfc := range _gag { if _bdfc . _affbf { continue ; } ; _bdfc . writeText ( _bfea ) ; if _fbbb != len ( _gag ) - 1 { if _abae ( _bdfc , _gag [ _fbbb + 1 ] ) { _bfea . Write ( [ ] byte ( "\u0020" ) ) ; } else { _bfea . Write ( [ ] byte ( "\u000a" ) ) ;
_bfea . Write ( [ ] byte ( "\u000a" ) ) ; } ; } ; } ; _bfea . Write ( [ ] byte ( "\u000a" ) ) ; _bfea . Write ( [ ] byte ( "\u000a" ) ) ; } ; func ( _cbca * textPara ) taken ( ) bool { return _cbca == nil || _cbca . _cfeff } ; func ( _bedgb * textTable ) isExportable ( ) bool { if _bedgb . _ebabc { return true ;
} ; _beec := func ( _cbbfc int ) bool { _bedc := _bedgb . get ( 0 , _cbbfc ) ; if _bedc == nil { return false ; } ; _abda := _bedc . text ( ) ; _gcfe := _g . RuneCountInString ( _abda ) ; _agebc := _gbfad . MatchString ( _abda ) ; return _gcfe <= 1 || _agebc ; } ; for _dadce := 0 ; _dadce < _bedgb . _dcbdf ;
_dadce ++ { if ! _beec ( _dadce ) { return true ; } ; } ; return false ; } ; func ( _fge * wordBag ) getDepthIdx ( _fbe float64 ) int { _gdcg := _fge . depthIndexes ( ) ; _edcb := _agfb ( _fbe ) ; if _edcb < _gdcg [ 0 ] { return _gdcg [ 0 ] ; } ; if _edcb > _gdcg [ len ( _gdcg ) - 1 ] { return _gdcg [ len ( _gdcg ) - 1 ] ;
} ; return _edcb ; } ; func _aggbbd ( _bfdd map [ float64 ] gridTile ) [ ] float64 { _aecga := make ( [ ] float64 , 0 , len ( _bfdd ) ) ; for _gdecc := range _bfdd { _aecga = append ( _aecga , _gdecc ) ; } ; _af . Float64s ( _aecga ) ; return _aecga ; } ; const _daca = 10 ; func ( _feda * shapesState ) quadraticTo ( _gbb , _feaf , _feff , _bcfe float64 ) { if _efca { _fb . Log . Info ( "\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a" ) ;
} ; _feda . addPoint ( _feff , _bcfe ) ; } ; func ( _fdfg * wordBag ) sort ( ) { for _ , _gcaf := range _fdfg . _cfeeb { _af . Slice ( _gcaf , func ( _ceaf , _gdecg int ) bool { return _cafa ( _gcaf [ _ceaf ] , _gcaf [ _gdecg ] ) < 0 } ) ; } ; } ; func ( _cgefa paraList ) topoOrder ( ) [ ] int { if _dfgc { _fb . Log . Info ( "\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a" ) ;
} ; _geb := len ( _cgefa ) ; _gbfcb := make ( [ ] bool , _geb ) ; _gdcb := make ( [ ] int , 0 , _geb ) ; _ggba := _cgefa . llyOrdering ( ) ; var _bdeac func ( _aeae int ) ; _bdeac = func ( _bgcc int ) { _gbfcb [ _bgcc ] = true ; for _degf := 0 ; _degf < _geb ; _degf ++ { if ! _gbfcb [ _degf ] { if _cgefa . readBefore ( _ggba , _bgcc , _degf ) { _bdeac ( _degf ) ;
} ; } ; } ; _gdcb = append ( _gdcb , _bgcc ) ; } ; for _fgac := 0 ; _fgac < _geb ; _fgac ++ { if ! _gbfcb [ _fgac ] { _bdeac ( _fgac ) ; } ; } ; return _ffcc ( _gdcb ) ; } ; type stateStack [ ] * textState ; func ( _eeaa * wordBag ) highestWord ( _caacg int , _gdbe , _bfef float64 ) * textWord { for _ , _fbf := range _eeaa . _cfeeb [ _caacg ] { if _gdbe <= _fbf . _fgbda && _fbf . _fgbda <= _bfef { return _fbf ;
} ; } ; return nil ; } ; func ( _dfaf lineRuling ) xMean ( ) float64 { return 0.5 * ( _dfaf . _gffd . X + _dfaf . _eeebc . X ) } ;
2022-02-05 21:34:53 +00:00
// String returns a description of `l`.
2022-03-13 12:41:53 +00:00
func ( _aaff * textLine ) String ( ) string { return _cag . Sprintf ( "\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022" , _aaff . _bcgf , _aaff . PdfRectangle , _aaff . _eabc , _aaff . text ( ) ) ;
} ; func ( _cebec rulingList ) splitSec ( ) [ ] rulingList { _af . Slice ( _cebec , func ( _gfdd , _bbad int ) bool { _daed , _geaaa := _cebec [ _gfdd ] , _cebec [ _bbad ] ; if _daed . _bfeag != _geaaa . _bfeag { return _daed . _bfeag < _geaaa . _bfeag ; } ; return _daed . _cbba < _geaaa . _cbba ;
} ) ; _affbb := make ( map [ * ruling ] struct { } , len ( _cebec ) ) ; _fbbc := func ( _cbdgg * ruling ) rulingList { _adaee := rulingList { _cbdgg } ; _affbb [ _cbdgg ] = struct { } { } ; for _ , _ggefg := range _cebec { if _ , _cafae := _affbb [ _ggefg ] ; _cafae { continue ; } ; for _ , _bfeac := range _adaee { if _ggefg . alignsSec ( _bfeac ) { _adaee = append ( _adaee , _ggefg ) ;
_affbb [ _ggefg ] = struct { } { } ; break ; } ; } ; } ; return _adaee ; } ; _gfac := [ ] rulingList { _fbbc ( _cebec [ 0 ] ) } ; for _ , _bgdcg := range _cebec [ 1 : ] { if _ , _cegbg := _affbb [ _bgdcg ] ; _cegbg { continue ; } ; _gfac = append ( _gfac , _fbbc ( _bgdcg ) ) ; } ; return _gfac ; } ; func _bafc ( _fgbg [ ] TextMark , _cbfa * int , _agec TextMark ) [ ] TextMark { _agec . Offset = * _cbfa ;
_fgbg = append ( _fgbg , _agec ) ; * _cbfa += len ( _agec . Text ) ; return _fgbg ; } ; func ( _abff * subpath ) clear ( ) { * _abff = subpath { } } ; type textMark struct { _ac . PdfRectangle ; _adbf int ; _cadaf string ; _bcfd string ; _afac * _ac . PdfFont ; _gba float64 ; _dcabg float64 ;
_ccbb _afd . Matrix ; _cgfb _afd . Point ; _bagc _ac . PdfRectangle ; _bfc _ca . Color ; _bdba _ca . Color ; } ; func ( _deb * wordBag ) allWords ( ) [ ] * textWord { var _fabb [ ] * textWord ; for _ , _fbfb := range _deb . _cfeeb { _fabb = append ( _fabb , _fbfb ... ) ; } ; return _fabb ;
} ; func ( _ffece * textWord ) bbox ( ) _ac . PdfRectangle { return _ffece . PdfRectangle } ; func ( _afdg * wordBag ) minDepth ( ) float64 { return _afdg . _egbb - ( _afdg . Ury - _afdg . _aege ) } ; func ( _aadb rulingList ) secMinMax ( ) ( float64 , float64 ) { _bcaa , _ffb := _aadb [ 0 ] . _bfeag , _aadb [ 0 ] . _cbba ;
for _ , _gdcc := range _aadb [ 1 : ] { if _gdcc . _bfeag < _bcaa { _bcaa = _gdcc . _bfeag ; } ; if _gdcc . _cbba > _ffb { _ffb = _gdcc . _cbba ; } ; } ; return _bcaa , _ffb ; } ; func ( _edea * textObject ) moveLP ( _bedg , _gbed float64 ) { _edea . _bab . Concat ( _afd . NewMatrix ( 1 , 0 , 0 , 1 , _bedg , _gbed ) ) ;
_edea . _bgc = _edea . _bab ; } ; func ( _abbec rectRuling ) checkWidth ( _faaf , _cgdgg float64 ) ( float64 , bool ) { _edad := _cgdgg - _faaf ; _fegb := _edad <= _gbbd ; return _edad , _fegb ; } ;
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// Elements returns the TextMarks in `ma`.
func ( _edbb * TextMarkArray ) Elements ( ) [ ] TextMark { return _edbb . _dgf } ; func ( _fcg * stateStack ) pop ( ) * textState { if _fcg . empty ( ) { return nil ; } ; _ffd := * ( * _fcg ) [ len ( * _fcg ) - 1 ] ; * _fcg = ( * _fcg ) [ : len ( * _fcg ) - 1 ] ; return & _ffd ; } ; func _cggg ( _bcae , _aedbg _afd . Point ) bool { _ddfg := _gc . Abs ( _bcae . X - _aedbg . X ) ;
_fagb := _gc . Abs ( _bcae . Y - _aedbg . Y ) ; return _dgbd ( _fagb , _ddfg ) ; } ; func _abae ( _cdff , _gcgc * textPara ) bool { if _cdff . _affbf || _gcgc . _affbf { return true ; } ; return _gcceb ( _cdff . depth ( ) - _gcgc . depth ( ) ) ; } ; func ( _edcbda * textPara ) toCellTextMarks ( _cbbg * int ) [ ] TextMark { var _fbdg [ ] TextMark ;
for _bggc , _dddgg := range _edcbda . _ddeb { _gacabg := _dddgg . toTextMarks ( _cbbg ) ; _abbf := _ecgaa && _dddgg . endsInHyphen ( ) && _bggc != len ( _edcbda . _ddeb ) - 1 ; if _abbf { _gacabg = _fafc ( _gacabg , _cbbg ) ; } ; _fbdg = append ( _fbdg , _gacabg ... ) ; if ! ( _abbf || _bggc == len ( _edcbda . _ddeb ) - 1 ) { _fbdg = _bafe ( _fbdg , _cbbg , _aaee ( _dddgg . _bcgf , _edcbda . _ddeb [ _bggc + 1 ] . _bcgf ) ) ;
} ; } ; return _fbdg ; } ; func ( _agba * textMark ) inDiacriticArea ( _ffge * textMark ) bool { _dae := _agba . Llx - _ffge . Llx ; _decb := _agba . Urx - _ffge . Urx ; _fdbf := _agba . Lly - _ffge . Lly ; return _gc . Abs ( _dae + _decb ) < _agba . Width ( ) * _ffcd && _gc . Abs ( _fdbf ) < _agba . Height ( ) * _ffcd ;
} ; func _aecc ( _fcfe [ ] rulingList ) ( rulingList , rulingList ) { var _eggd rulingList ; for _ , _ageg := range _fcfe { _eggd = append ( _eggd , _ageg ... ) ; } ; return _eggd . vertsHorzs ( ) ; } ; func ( _gafe * textLine ) toTextMarks ( _bfgd * int ) [ ] TextMark { var _cfed [ ] TextMark ;
for _ , _ebgde := range _gafe . _cadc { if _ebgde . _gcefe { _cfed = _bafe ( _cfed , _bfgd , "\u0020" ) ; } ; _geab := _ebgde . toTextMarks ( _bfgd ) ; _cfed = append ( _cfed , _geab ... ) ; } ; return _cfed ; } ;
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// ToTextMark returns the public view of `tm`.
func ( _dbgd * textMark ) ToTextMark ( ) TextMark { return TextMark { Text : _dbgd . _cadaf , Original : _dbgd . _bcfd , BBox : _dbgd . _bagc , Font : _dbgd . _afac , FontSize : _dbgd . _gba , FillColor : _dbgd . _bfc , StrokeColor : _dbgd . _bdba , Orientation : _dbgd . _adbf } ; } ;
func _gff ( _adc func ( * wordBag , * textWord , float64 ) bool , _gbbag float64 ) func ( * wordBag , * textWord ) bool { return func ( _ecdd * wordBag , _dabf * textWord ) bool { return _adc ( _ecdd , _dabf , _gbbag ) } ; } ; func ( _cga * imageExtractContext ) extractContentStreamImages ( _ggg string , _ebaf * _ac . PdfPageResources ) error { _bd := _ag . NewContentStreamParser ( _ggg ) ;
_ee , _cfe := _bd . Parse ( ) ; if _cfe != nil { return _cfe ; } ; if _cga . _gfb == nil { _cga . _gfb = map [ * _gdd . PdfObjectStream ] * cachedImage { } ; } ; if _cga . _ga == nil { _cga . _ga = & ImageExtractOptions { } ; } ; _aec := _ag . NewContentStreamProcessor ( * _ee ) ; _aec . AddHandler ( _ag . HandlerConditionEnumAllOperands , "" , _cga . processOperand ) ;
return _aec . Process ( _ebaf ) ; } ; func ( _agff intSet ) add ( _eggbc int ) { _agff [ _eggbc ] = struct { } { } } ; func _cgfe ( _cgea , _bbgg float64 ) bool { return _gc . Abs ( _cgea - _bbgg ) <= _bade } ; func _ecdfc ( _gcfcg , _cdbb , _bacg , _caacf * textPara ) * textTable { _affc := & textTable { _agac : 2 , _dcbdf : 2 , _abccf : make ( map [ uint64 ] * textPara , 4 ) } ;
_affc . put ( 0 , 0 , _gcfcg ) ; _affc . put ( 1 , 0 , _cdbb ) ; _affc . put ( 0 , 1 , _bacg ) ; _affc . put ( 1 , 1 , _caacf ) ; return _affc ; } ; func ( _caabc gridTile ) complete ( ) bool { return _caabc . numBorders ( ) == 4 } ; func _gfe ( _dcc _ac . PdfRectangle ) textState { return textState { _bgg : 100 , _acgdg : RenderModeFill , _beb : _dcc } ;
} ; func ( _accbc rulingList ) intersections ( ) map [ int ] intSet { var _gffa , _cgbeg [ ] int ; for _decbg , _gaec := range _accbc { switch _gaec . _agcgg { case _aafafg : _gffa = append ( _gffa , _decbg ) ; case _eccgd : _cgbeg = append ( _cgbeg , _decbg ) ; } ; } ; if len ( _gffa ) < _fee + 1 || len ( _cgbeg ) < _edcf + 1 { return nil ;
} ; if len ( _gffa ) + len ( _cgbeg ) > _edaeb { _fb . Log . Debug ( "\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064" , len ( _accbc ) , len ( _gffa ) , len ( _cgbeg ) ) ;
return nil ; } ; _bggdd := make ( map [ int ] intSet , len ( _gffa ) + len ( _cgbeg ) ) ; for _ , _cbad := range _gffa { for _ , _fceb := range _cgbeg { if _accbc [ _cbad ] . intersects ( _accbc [ _fceb ] ) { if _ , _cdbaf := _bggdd [ _cbad ] ; ! _cdbaf { _bggdd [ _cbad ] = make ( intSet ) ; } ;
if _ , _gcd := _bggdd [ _fceb ] ; ! _gcd { _bggdd [ _fceb ] = make ( intSet ) ; } ; _bggdd [ _cbad ] . add ( _fceb ) ; _bggdd [ _fceb ] . add ( _cbad ) ; } ; } ; } ; return _bggdd ; } ; type subpath struct { _egd [ ] _afd . Point ; _ega bool ; } ; func ( _cgfc paraList ) applyTables ( _gfbd [ ] * textTable ) paraList { var _acede paraList ;
for _ , _ddeg := range _gfbd { _acede = append ( _acede , _ddeg . newTablePara ( ) ) ; } ; for _ , _cfedf := range _cgfc { if _cfedf . _cfeff { continue ; } ; _acede = append ( _acede , _cfedf ) ; } ; return _acede ; } ; func ( _bdc * wordBag ) removeDuplicates ( ) { if _ecfa { _fb . Log . Info ( "r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071" , _bdc . text ( ) ) ;
} ; for _ , _fbffd := range _bdc . depthIndexes ( ) { if len ( _bdc . _cfeeb [ _fbffd ] ) == 0 { continue ; } ; _adaf := _bdc . _cfeeb [ _fbffd ] [ 0 ] ; _dbgae := _bbce * _adaf . _abeg ; _addc := _adaf . _fgbda ; for _ , _cdad := range _bdc . depthBand ( _addc , _addc + _dbgae ) { _cffa := map [ * textWord ] struct { } { } ;
_fbda := _bdc . _cfeeb [ _cdad ] ; for _ , _edccad := range _fbda { if _ , _gcac := _cffa [ _edccad ] ; _gcac { continue ; } ; for _ , _ddba := range _fbda { if _ , _efba := _cffa [ _ddba ] ; _efba { continue ; } ; if _ddba != _edccad && _ddba . _gebf == _edccad . _gebf && _gc . Abs ( _ddba . Llx - _edccad . Llx ) < _dbgae && _gc . Abs ( _ddba . Urx - _edccad . Urx ) < _dbgae && _gc . Abs ( _ddba . Lly - _edccad . Lly ) < _dbgae && _gc . Abs ( _ddba . Ury - _edccad . Ury ) < _dbgae { _cffa [ _ddba ] = struct { } { } ;
} ; } ; } ; if len ( _cffa ) > 0 { _bgeg := 0 ; for _ , _ccbd := range _fbda { if _ , _gae := _cffa [ _ccbd ] ; ! _gae { _fbda [ _bgeg ] = _ccbd ; _bgeg ++ ; } ; } ; _bdc . _cfeeb [ _cdad ] = _fbda [ : len ( _fbda ) - len ( _cffa ) ] ; if len ( _bdc . _cfeeb [ _cdad ] ) == 0 { delete ( _bdc . _cfeeb , _cdad ) ;
} ; } ; } ; } ; } ; func ( _agfbe * textTable ) compositeRowCorridors ( ) map [ int ] [ ] float64 { _bacd := make ( map [ int ] [ ] float64 , _agfbe . _dcbdf ) ; if _agede { _fb . Log . Info ( "c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064" , _agfbe . _dcbdf ) ;
} ; for _dadbd := 1 ; _dadbd < _agfbe . _dcbdf ; _dadbd ++ { var _efcb [ ] compositeCell ; for _aagb := 0 ; _aagb < _agfbe . _agac ; _aagb ++ { if _bgcb , _dage := _agfbe . _cead [ _addg ( _aagb , _dadbd ) ] ; _dage { _efcb = append ( _efcb , _bgcb ) ; } ; } ; if len ( _efcb ) == 0 { continue ;
} ; _aagfa := _dedd ( _efcb ) ; _bacd [ _dadbd ] = _aagfa ; if _agede { _cag . Printf ( "\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a" , _dadbd , _aagfa ) ; } ; } ; return _bacd ; } ; func ( _cgfa * textObject ) getFillColor ( ) _ca . Color { return _aecce ( _cgfa . _gbc . ColorspaceNonStroking , _cgfa . _gbc . ColorNonStroking ) ;
} ; func _efae ( _faab bounded ) float64 { return - _faab . bbox ( ) . Lly } ; func _addg ( _cbag , _cdeca int ) uint64 { return uint64 ( _cbag ) * 0x1000000 + uint64 ( _cdeca ) } ; func _dgba ( _dgfb _ac . PdfRectangle ) * ruling { return & ruling { _agcgg : _eccgd , _edcba : _dgfb . Lly , _bfeag : _dgfb . Llx , _cbba : _dgfb . Urx } ;
} ; func ( _egbg paraList ) reorder ( _cggd [ ] int ) { _gdfa := make ( paraList , len ( _egbg ) ) ; for _gcbe , _afdgf := range _cggd { _gdfa [ _gcbe ] = _egbg [ _afdgf ] ; } ; copy ( _egbg , _gdfa ) ; } ; func ( _dgbb * shapesState ) closePath ( ) { if _dgbb . _dcef { _dgbb . _fegc = append ( _dgbb . _fegc , _aab ( _dgbb . _dffc ) ) ;
_dgbb . _dcef = false ; } else if len ( _dgbb . _fegc ) == 0 { if _efca { _fb . Log . Debug ( "\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068" ) ; } ; _dgbb . _dcef = false ; return ; } ; _dgbb . _fegc [ len ( _dgbb . _fegc ) - 1 ] . close ( ) ;
if _efca { _fb . Log . Info ( "\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073" , _dgbb ) ; } ; } ; func ( _fcaa * textObject ) getStrokeColor ( ) _ca . Color { return _aecce ( _fcaa . _gbc . ColorspaceStroking , _fcaa . _gbc . ColorStroking ) ; } ;
type lineRuling struct { _fbab rulingKind ; _ebbe markKind ; _ca . Color ; _gffd , _eeebc _afd . Point ; } ; func ( _dbc * textObject ) moveTextSetLeading ( _aaf , _bdb float64 ) { _dbc . _fga . _dgc = - _bdb ; _dbc . moveLP ( _aaf , _bdb ) ; } ; func ( _gdac * wordBag ) firstReadingIndex ( _cded int ) int { _gcff := _gdac . firstWord ( _cded ) . _abeg ;
_daac := float64 ( _cded + 1 ) * _eddf ; _ddga := _daac + _eaeg * _gcff ; _bea := _cded ; for _ , _bfefc := range _gdac . depthBand ( _daac , _ddga ) { if _cafa ( _gdac . firstWord ( _bfefc ) , _gdac . firstWord ( _bea ) ) < 0 { _bea = _bfefc ; } ; } ; return _bea ; } ;
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// String returns a human readable description of `path`.
func ( _befe * subpath ) String ( ) string { _gfae := _befe . _egd ; _bccf := len ( _gfae ) ; if _bccf <= 5 { return _cag . Sprintf ( "\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f" , _bccf , _gfae ) ; } ; return _cag . Sprintf ( "\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f" , _bccf , _gfae [ 0 ] , _gfae [ 1 ] , _gfae [ _bccf - 1 ] ) ;
} ; func ( _aacg compositeCell ) split ( _bdfd , _fafa [ ] float64 ) * textTable { _geedd := len ( _bdfd ) + 1 ; _cbgae := len ( _fafa ) + 1 ; if _agede { _fb . Log . Info ( "\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a" + "\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066" , _cbgae , _geedd , _aacg , _bdfd , _fafa ) ;
_cag . Printf ( "\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a" , len ( _aacg . paraList ) ) ; for _fdgag , _gbcg := range _aacg . paraList { _cag . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _fdgag , _gbcg . String ( ) ) ;
} ; _cag . Printf ( "\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a" , len ( _aacg . lines ( ) ) ) ; for _aafaf , _cfd := range _aacg . lines ( ) { _cag . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _aafaf , _cfd ) ; } ; } ; _bdfd = _gagd ( _bdfd , _aacg . Ury , _aacg . Lly ) ;
_fafa = _gagd ( _fafa , _aacg . Llx , _aacg . Urx ) ; _agfd := make ( map [ uint64 ] * textPara , _cbgae * _geedd ) ; _dacf := textTable { _agac : _cbgae , _dcbdf : _geedd , _abccf : _agfd } ; _acgfg := _aacg . paraList ; _af . Slice ( _acgfg , func ( _fefcg , _adee int ) bool { _gdbeg , _ffgc := _acgfg [ _fefcg ] , _acgfg [ _adee ] ;
_cbdc , _adad := _gdbeg . Lly , _ffgc . Lly ; if _cbdc != _adad { return _cbdc < _adad ; } ; return _gdbeg . Llx < _ffgc . Llx ; } ) ; _edag := make ( map [ uint64 ] _ac . PdfRectangle , _cbgae * _geedd ) ; for _aafdf , _abga := range _bdfd [ 1 : ] { _abfc := _bdfd [ _aafdf ] ; for _bcfdf , _gdca := range _fafa [ 1 : ] { _fbag := _fafa [ _bcfdf ] ;
_edag [ _addg ( _bcfdf , _aafdf ) ] = _ac . PdfRectangle { Llx : _fbag , Urx : _gdca , Lly : _abga , Ury : _abfc } ; } ; } ; if _agede { _fb . Log . Info ( "\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073" ) ;
_cag . Printf ( "\u0020\u0020\u0020\u0020" ) ; for _eeae := 0 ; _eeae < _cbgae ; _eeae ++ { _cag . Printf ( "\u0025\u0033\u0030\u0064\u002c\u0020" , _eeae ) ; } ; _cag . Println ( ) ; for _aagf := 0 ; _aagf < _geedd ; _aagf ++ { _cag . Printf ( "\u0020\u0020\u0025\u0032\u0064\u003a" , _aagf ) ;
for _agbca := 0 ; _agbca < _cbgae ; _agbca ++ { _cag . Printf ( "\u00256\u002e\u0032\u0066\u002c\u0020" , _edag [ _addg ( _agbca , _aagf ) ] ) ; } ; _cag . Println ( ) ; } ; } ; _cffaf := func ( _acgac * textLine ) ( int , int ) { for _edfa := 0 ; _edfa < _geedd ; _edfa ++ { for _defeeb := 0 ; _defeeb < _cbgae ;
_defeeb ++ { if _gfaf ( _edag [ _addg ( _defeeb , _edfa ) ] , _acgac . PdfRectangle ) { return _defeeb , _edfa ; } ; } ; } ; return - 1 , - 1 ; } ; _afec := make ( map [ uint64 ] [ ] * textLine , _cbgae * _geedd ) ; for _ , _dbe := range _acgfg . lines ( ) { _fcbdc , _ceec := _cffaf ( _dbe ) ; if _fcbdc < 0 { continue ;
} ; _afec [ _addg ( _fcbdc , _ceec ) ] = append ( _afec [ _addg ( _fcbdc , _ceec ) ] , _dbe ) ; } ; for _agee := 0 ; _agee < len ( _bdfd ) - 1 ; _agee ++ { _ffed := _bdfd [ _agee ] ; _aafb := _bdfd [ _agee + 1 ] ; for _cgcd := 0 ; _cgcd < len ( _fafa ) - 1 ; _cgcd ++ { _cabg := _fafa [ _cgcd ] ; _fdfe := _fafa [ _cgcd + 1 ] ;
_eedb := _ac . PdfRectangle { Llx : _cabg , Urx : _fdfe , Lly : _aafb , Ury : _ffed } ; _cdfc := _afec [ _addg ( _cgcd , _agee ) ] ; if len ( _cdfc ) == 0 { continue ; } ; _bdfa := _efagec ( _eedb , _cdfc ) ; _dacf . put ( _cgcd , _agee , _bdfa ) ; } ; } ; return & _dacf ; } ;
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
type ImageMark struct { Image * _ac . Image ;
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// Dimensions of the image as displayed in the PDF.
Width float64 ; Height float64 ;
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// Position of the image in PDF coordinates (lower left corner).
X float64 ; Y float64 ;
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// Angle in degrees, if rotated.
Angle float64 ; } ; func ( _efgd * ruling ) alignsSec ( _ecfbg * ruling ) bool { const _bdcc = _gbbd + 1.0 ; return _efgd . _bfeag - _bdcc <= _ecfbg . _cbba && _ecfbg . _bfeag - _bdcc <= _efgd . _cbba ; } ; func ( _dada * subpath ) removeDuplicates ( ) { if len ( _dada . _egd ) == 0 { return ;
} ; _ggbc := [ ] _afd . Point { _dada . _egd [ 0 ] } ; for _ , _gced := range _dada . _egd [ 1 : ] { if ! _cdbg ( _gced , _ggbc [ len ( _ggbc ) - 1 ] ) { _ggbc = append ( _ggbc , _gced ) ; } ; } ; _dada . _egd = _ggbc ; } ;
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// String returns a string describing `pt`.
func ( _gafc PageText ) String ( ) string { _eabf := _cag . Sprintf ( "P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073" , len ( _gafc . _aged ) ) ; _fcgb := [ ] string { "\u002d" + _eabf } ; for _ , _ddf := range _gafc . _aged { _fcgb = append ( _fcgb , _ddf . String ( ) ) ;
} ; _fcgb = append ( _fcgb , "\u002b" + _eabf ) ; return _cf . Join ( _fcgb , "\u000a" ) ; } ; func ( _ceeb paraList ) computeEBBoxes ( ) { if _dccc { _fb . Log . Info ( "\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a" ) ; } ; for _ , _bccd := range _ceeb { _bccd . _gbaa = _bccd . PdfRectangle ;
} ; _facec := _ceeb . yNeighbours ( 0 ) ; for _cafe , _gfff := range _ceeb { _acde := _gfff . _gbaa ; _gdgb , _egad := - 1.0e9 , + 1.0e9 ; for _ , _afdd := range _facec [ _gfff ] { _ggbe := _ceeb [ _afdd ] . _gbaa ; if _ggbe . Urx < _acde . Llx { _gdgb = _gc . Max ( _gdgb , _ggbe . Urx ) ; } else if _acde . Urx < _ggbe . Llx { _egad = _gc . Min ( _egad , _ggbe . Llx ) ;
} ; } ; for _fgec , _edcca := range _ceeb { _dgad := _edcca . _gbaa ; if _cafe == _fgec || _dgad . Ury > _acde . Lly { continue ; } ; if _gdgb <= _dgad . Llx && _dgad . Llx < _acde . Llx { _acde . Llx = _dgad . Llx ; } else if _dgad . Urx <= _egad && _acde . Urx < _dgad . Urx { _acde . Urx = _dgad . Urx ;
} ; } ; if _dccc { _cag . Printf ( "\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a" , _cafe , _gfff . _gbaa , _acde , _bgdca ( _gfff . text ( ) , 50 ) ) ; } ; _gfff . _gbaa = _acde ; } ; if _bfae { for _ , _fdcc := range _ceeb { _fdcc . PdfRectangle = _fdcc . _gbaa ;
} ; } ; } ; func _dgfaa ( _abgf , _gdgf bounded ) float64 { return _abgf . bbox ( ) . Llx - _gdgf . bbox ( ) . Urx } ; type textPara struct { _ac . PdfRectangle ; _gbaa _ac . PdfRectangle ; _ddeb [ ] * textLine ; _ccec * textTable ; _cfeff bool ; _affbf bool ; _bddfg * textPara ; _cegf * textPara ;
_dcaba * textPara ; _aegf * textPara ; } ; func ( _eegb * shapesState ) addPoint ( _bff , _bbc float64 ) { _ebgg := _eegb . establishSubpath ( ) ; _fcca := _eegb . devicePoint ( _bff , _bbc ) ; if _ebgg == nil { _eegb . _dcef = true ; _eegb . _dffc = _fcca ; } else { _ebgg . add ( _fcca ) ;
} ; } ; func ( _ebgd * textLine ) text ( ) string { var _fcgbf [ ] string ; for _ , _gfdc := range _ebgd . _cadc { if _gfdc . _gcefe { _fcgbf = append ( _fcgbf , "\u0020" ) ; } ; _fcgbf = append ( _fcgbf , _gfdc . _gebf ) ; } ; return _cf . Join ( _fcgbf , "" ) ; } ; func ( _baaa * PageText ) computeViews ( ) { var _bgcd rulingList ;
if _ffac { _cacg := _fbgcd ( _baaa . _cagb ) ; _bgcd = append ( _bgcd , _cacg ... ) ; } ; if _gcbg { _gcfb := _bac ( _baaa . _bfa ) ; _bgcd = append ( _bgcd , _gcfb ... ) ; } ; _bgcd , _feac := _bgcd . toTilings ( ) ; var _dafa paraList ; _gfce := len ( _baaa . _aged ) ; for _bfga := 0 ; _bfga < 360 && _gfce > 0 ;
_bfga += 90 { _ceb := make ( [ ] * textMark , 0 , len ( _baaa . _aged ) - _gfce ) ; for _ , _fcfa := range _baaa . _aged { if _fcfa . _adbf == _bfga { _ceb = append ( _ceb , _fcfa ) ; } ; } ; if len ( _ceb ) > 0 { _fbgc := _dgg ( _ceb , _baaa . _abea , _bgcd , _feac ) ; _dafa = append ( _dafa , _fbgc ... ) ;
_gfce -= len ( _ceb ) ; } ; } ; _abcd := new ( _e . Buffer ) ; _dafa . writeText ( _abcd ) ; _baaa . _cged = _abcd . String ( ) ; _baaa . _gccf = _dafa . toTextMarks ( ) ; _baaa . _dcgg = _dafa . tables ( ) ; if _agede { _fb . Log . Info ( "\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064" , len ( _baaa . _dcgg ) ) ;
} ; } ; func ( _cagg rulingList ) augmentGrid ( ) ( rulingList , rulingList ) { _gfeb , _agecb := _cagg . vertsHorzs ( ) ; if len ( _gfeb ) == 0 || len ( _agecb ) == 0 { return _gfeb , _agecb ; } ; _caeb , _aaad := _gfeb , _agecb ; _adgd := _gfeb . bbox ( ) ; _gcbb := _agecb . bbox ( ) ; if _adce { _fb . Log . Info ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066" , _adgd ) ;
_fb . Log . Info ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066" , _gcbb ) ; } ; var _dfcdf , _daga , _fdea , _eabgf * ruling ; if _gcbb . Llx < _adgd . Llx - _bade { _dfcdf = & ruling { _aaec : _adbg , _agcgg : _aafafg , _edcba : _gcbb . Llx , _bfeag : _adgd . Lly , _cbba : _adgd . Ury } ;
_gfeb = append ( rulingList { _dfcdf } , _gfeb ... ) ; } ; if _gcbb . Urx > _adgd . Urx + _bade { _daga = & ruling { _aaec : _adbg , _agcgg : _aafafg , _edcba : _gcbb . Urx , _bfeag : _adgd . Lly , _cbba : _adgd . Ury } ; _gfeb = append ( _gfeb , _daga ) ; } ; if _adgd . Lly < _gcbb . Lly - _bade { _fdea = & ruling { _aaec : _adbg , _agcgg : _eccgd , _edcba : _adgd . Lly , _bfeag : _gcbb . Llx , _cbba : _gcbb . Urx } ;
_agecb = append ( rulingList { _fdea } , _agecb ... ) ; } ; if _adgd . Ury > _gcbb . Ury + _bade { _eabgf = & ruling { _aaec : _adbg , _agcgg : _eccgd , _edcba : _adgd . Ury , _bfeag : _gcbb . Llx , _cbba : _gcbb . Urx } ; _agecb = append ( _agecb , _eabgf ) ; } ; if len ( _gfeb ) + len ( _agecb ) == len ( _cagg ) { return _caeb , _aaad ;
} ; _cgbcf := append ( _gfeb , _agecb ... ) ; _cagg . log ( "u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064" ) ; _cgbcf . log ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d" ) ; return _gfeb , _agecb ; } ; type textState struct { _feba float64 ; _eag float64 ; _bgg float64 ;
_dgc float64 ; _cef float64 ; _acgdg RenderMode ; _fec float64 ; _eedd * _ac . PdfFont ; _beb _ac . PdfRectangle ; _ededf int ; _eca int ; } ; func ( _cbfgg paraList ) extractTables ( _cagbe [ ] gridTiling ) paraList { if _agede { _fb . Log . Debug ( "\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _cbfgg ) ) ;
} ; if len ( _cbfgg ) < _eecc { return _cbfgg ; } ; _aced := _cbfgg . findTables ( _cagbe ) ; if _agede { _fb . Log . Info ( "c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d" , len ( _aced ) ) ;
for _cfcdb , _cccge := range _aced { _cccge . log ( _cag . Sprintf ( "c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064" , _cfcdb ) ) ; } ; } ; return _cbfgg . applyTables ( _aced ) ; } ;
// String returns a human readable description of `vecs`.
func ( _fbeg rulingList ) String ( ) string { if len ( _fbeg ) == 0 { return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}" ; } ; _gaea , _bcab := _fbeg . vertsHorzs ( ) ; _agaa := len ( _gaea ) ; _degbc := len ( _bcab ) ; if _agaa == 0 || _degbc == 0 { return _cag . Sprintf ( "\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}" , _agaa , _degbc ) ;
} ; _cbeb := _ac . PdfRectangle { Llx : _gaea [ 0 ] . _edcba , Urx : _gaea [ _agaa - 1 ] . _edcba , Lly : _bcab [ _degbc - 1 ] . _edcba , Ury : _bcab [ 0 ] . _edcba } ; return _cag . Sprintf ( "\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d" , _agaa , _degbc , _cbeb ) ;
} ;
2022-02-05 21:34:53 +00:00
// String returns a string describing `ma`.
2022-03-13 12:41:53 +00:00
func ( _ddb TextMarkArray ) String ( ) string { _aafd := len ( _ddb . _dgf ) ; if _aafd == 0 { return "\u0045\u004d\u0050T\u0059" ; } ; _ecc := _ddb . _dgf [ 0 ] ; _fdc := _ddb . _dgf [ _aafd - 1 ] ; return _cag . Sprintf ( "\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d" , _aafd , _ecc , _fdc ) ;
} ; func _cbdb ( _gfcc , _efgg * textPara ) bool { return _bcbef ( _gfcc . _gbaa , _efgg . _gbaa ) } ;
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents ( contents string , resources * _ac . PdfPageResources ) ( * Extractor , error ) { const _ba = "\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s" ; _gdc := & Extractor { _gg : contents , _cb : resources , _gde : map [ string ] fontEntry { } , _agb : map [ string ] textResult { } } ;
_fg . TrackUse ( _ba ) ; return _gdc , nil ; } ; func ( _dadg * wordBag ) applyRemovals ( _gabc map [ int ] map [ * textWord ] struct { } ) { for _fabf , _acgb := range _gabc { if len ( _acgb ) == 0 { continue ; } ; _fegdc := _dadg . _cfeeb [ _fabf ] ; _egbc := len ( _fegdc ) - len ( _acgb ) ; if _egbc == 0 { delete ( _dadg . _cfeeb , _fabf ) ;
continue ; } ; _deca := make ( [ ] * textWord , _egbc ) ; _aagc := 0 ; for _ , _afdf := range _fegdc { if _ , _abgd := _acgb [ _afdf ] ; ! _abgd { _deca [ _aagc ] = _afdf ; _aagc ++ ; } ; } ; _dadg . _cfeeb [ _fabf ] = _deca ; } ; } ; func _dffge ( _aeea int , _aeedg func ( int , int ) bool ) [ ] int { _eaeee := make ( [ ] int , _aeea ) ;
for _fddfd := range _eaeee { _eaeee [ _fddfd ] = _fddfd ; } ; _af . Slice ( _eaeee , func ( _fccf , _gadgb int ) bool { return _aeedg ( _eaeee [ _fccf ] , _eaeee [ _gadgb ] ) } ) ; return _eaeee ; } ;
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// Text returns the extracted page text.
func ( _bcce PageText ) Text ( ) string { return _bcce . _cged } ; func _eadb ( _fegdg [ ] * wordBag ) [ ] * wordBag { if len ( _fegdg ) <= 1 { return _fegdg ; } ; if _addb { _fb . Log . Info ( "\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a" ) ; } ; _af . Slice ( _fegdg , func ( _fffb , _dgbf int ) bool { _dbce , _eee := _fegdg [ _fffb ] , _fegdg [ _dgbf ] ;
_dgde := _dbce . Width ( ) * _dbce . Height ( ) ; _agcc := _eee . Width ( ) * _eee . Height ( ) ; if _dgde != _agcc { return _dgde > _agcc ; } ; if _dbce . Height ( ) != _eee . Height ( ) { return _dbce . Height ( ) > _eee . Height ( ) ; } ; return _fffb < _dgbf ; } ) ; var _cddgc [ ] * wordBag ; _cade := make ( intSet ) ;
for _adae := 0 ; _adae < len ( _fegdg ) ; _adae ++ { if _cade . has ( _adae ) { continue ; } ; _fccaf := _fegdg [ _adae ] ; for _febe := _adae + 1 ; _febe < len ( _fegdg ) ; _febe ++ { if _cade . has ( _adae ) { continue ; } ; _eaae := _fegdg [ _febe ] ; _feffg := _fccaf . PdfRectangle ; _feffg . Llx -= _fccaf . _aege ;
if _gfaf ( _feffg , _eaae . PdfRectangle ) { _fccaf . absorb ( _eaae ) ; _cade . add ( _febe ) ; } ; } ; _cddgc = append ( _cddgc , _fccaf ) ; } ; if len ( _fegdg ) != len ( _cddgc ) + len ( _cade ) { _fb . Log . Error ( "\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064" , len ( _fegdg ) , len ( _cddgc ) , len ( _cade ) ) ;
} ; return _cddgc ; } ; func _cdbg ( _ecfge , _gggfe _afd . Point ) bool { return _ecfge . X == _gggfe . X && _ecfge . Y == _gggfe . Y } ; func ( _deag * textTable ) depth ( ) float64 { _bgfg := 1e10 ; for _ddec := 0 ; _ddec < _deag . _agac ; _ddec ++ { _aefa := _deag . get ( _ddec , 0 ) ; if _aefa == nil || _aefa . _affbf { continue ;
} ; _bgfg = _gc . Min ( _bgfg , _aefa . depth ( ) ) ; } ; return _bgfg ; } ; func ( _fgadb * textTable ) get ( _dcgae , _afefg int ) * textPara { return _fgadb . _abccf [ _addg ( _dcgae , _afefg ) ] ; } ; func ( _gdcac * textTable ) getRight ( ) paraList { _dbaab := make ( paraList , _gdcac . _dcbdf ) ;
for _egfff := 0 ; _egfff < _gdcac . _dcbdf ; _egfff ++ { _bgfa := _gdcac . get ( _gdcac . _agac - 1 , _egfff ) . _cegf ; if _bgfa . taken ( ) { return nil ; } ; _dbaab [ _egfff ] = _bgfa ; } ; for _effa := 0 ; _effa < _gdcac . _dcbdf - 1 ; _effa ++ { if _dbaab [ _effa ] . _aegf != _dbaab [ _effa + 1 ] { return nil ;
} ; } ; return _dbaab ; } ; func ( _fccg lineRuling ) asRuling ( ) ( * ruling , bool ) { _ggged := ruling { _agcgg : _fccg . _fbab , Color : _fccg . Color , _aaec : _gagc } ; switch _fccg . _fbab { case _aafafg : _ggged . _edcba = _fccg . xMean ( ) ; _ggged . _bfeag = _gc . Min ( _fccg . _gffd . Y , _fccg . _eeebc . Y ) ;
_ggged . _cbba = _gc . Max ( _fccg . _gffd . Y , _fccg . _eeebc . Y ) ; case _eccgd : _ggged . _edcba = _fccg . yMean ( ) ; _ggged . _bfeag = _gc . Min ( _fccg . _gffd . X , _fccg . _eeebc . X ) ; _ggged . _cbba = _gc . Max ( _fccg . _gffd . X , _fccg . _eeebc . X ) ; default : _fb . Log . Error ( "\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064" , _fccg . _fbab ) ;
return nil , false ; } ; return & _ggged , true ; } ; func ( _aecf * textObject ) reset ( ) { _aecf . _bgc = _afd . IdentityMatrix ( ) ; _aecf . _bab = _afd . IdentityMatrix ( ) ; _aecf . _bece = nil ; } ; func ( _dcba * textTable ) growTable ( ) { _eeac := func ( _dbde paraList ) { _dcba . _dcbdf ++ ;
for _afcf := 0 ; _afcf < _dcba . _agac ; _afcf ++ { _fecb := _dbde [ _afcf ] ; _dcba . put ( _afcf , _dcba . _dcbdf - 1 , _fecb ) ; } ; } ; _aaeeg := func ( _gabd paraList ) { _dcba . _agac ++ ; for _geca := 0 ; _geca < _dcba . _dcbdf ; _geca ++ { _gcdf := _gabd [ _geca ] ; _dcba . put ( _dcba . _agac - 1 , _geca , _gcdf ) ;
} ; } ; if _adga { _dcba . log ( "\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce" ) ; } ; for _gebac := 0 ; ; _gebac ++ { _gbeb := false ; _gbbaa := _dcba . getDown ( ) ; _afffe := _dcba . getRight ( ) ; if _adga { _cag . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _gebac , _dcba ) ;
_cag . Printf ( "\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a" , _gbbaa ) ; _cag . Printf ( "\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a" , _afffe ) ; } ; if _gbbaa != nil && _afffe != nil { _ecgcb := _gbbaa [ len ( _gbbaa ) - 1 ] ;
if ! _ecgcb . taken ( ) && _ecgcb == _afffe [ len ( _afffe ) - 1 ] { _eeac ( _gbbaa ) ; if _afffe = _dcba . getRight ( ) ; _afffe != nil { _aaeeg ( _afffe ) ; _dcba . put ( _dcba . _agac - 1 , _dcba . _dcbdf - 1 , _ecgcb ) ; } ; _gbeb = true ; } ; } ; if ! _gbeb && _gbbaa != nil { _eeac ( _gbbaa ) ; _gbeb = true ;
} ; if ! _gbeb && _afffe != nil { _aaeeg ( _afffe ) ; _gbeb = true ; } ; if ! _gbeb { break ; } ; } ; } ; func _acca ( _ebfg * textWord , _dfcfe float64 , _baf , _begf rulingList ) * wordBag { _edcc := _agfb ( _ebfg . _fgbda ) ; _cdc := [ ] * textWord { _ebfg } ; _egge := wordBag { _cfeeb : map [ int ] [ ] * textWord { _edcc : _cdc } , PdfRectangle : _ebfg . PdfRectangle , _aege : _ebfg . _abeg , _egbb : _dfcfe , _fedd : _baf , _bba : _begf } ;
return & _egge ; } ; func ( _bfgae * textLine ) endsInHyphen ( ) bool { _abeae := _bfgae . _cadc [ len ( _bfgae . _cadc ) - 1 ] ; _egdg := _abeae . _gebf ; _aac , _fbfg := _g . DecodeLastRuneInString ( _egdg ) ; if _fbfg <= 0 || ! _gd . Is ( _gd . Hyphen , _aac ) { return false ; } ; if _abeae . _gcefe && _bdbg ( _egdg ) { return true ;
} ; return _bdbg ( _bfgae . text ( ) ) ; } ;
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// String returns a description of `w`.
func ( _ccac * textWord ) String ( ) string { return _cag . Sprintf ( "\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022" , _ccac . _fgbda , _ccac . PdfRectangle , _ccac . _abeg , _ccac . _gebf ) ;
} ; type textLine struct { _ac . PdfRectangle ; _bcgf float64 ; _cadc [ ] * textWord ; _eabc float64 ; } ; func _agbg ( _gdce * Extractor , _fcb * _ac . PdfPageResources , _egfg _ag . GraphicsState , _bdbf * textState , _gcef * stateStack ) * textObject { return & textObject { _defa : _gdce , _fgf : _fcb , _gbc : _egfg , _aece : _gcef , _fga : _bdbf , _bgc : _afd . IdentityMatrix ( ) , _bab : _afd . IdentityMatrix ( ) } ;
} ; func _efag ( _bed * _ag . ContentStreamOperation ) ( float64 , error ) { if len ( _bed . Params ) != 1 { _gbe := _d . New ( "\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et" ) ; _fb . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076" , _bed . Operand , 1 , len ( _bed . Params ) , _bed . Params ) ;
return 0.0 , _gbe ; } ; return _gdd . GetNumberAsFloat ( _bed . Params [ 0 ] ) ; } ; func ( _agcfa rulingList ) comp ( _bdaa , _dcccb int ) bool { _ggee , _cedbd := _agcfa [ _bdaa ] , _agcfa [ _dcccb ] ; _caff , _fda := _ggee . _agcgg , _cedbd . _agcgg ; if _caff != _fda { return _caff > _fda ;
} ; if _caff == _dgdb { return false ; } ; _ccfbba := func ( _eabb bool ) bool { if _caff == _eccgd { return _eabb ; } ; return ! _eabb ; } ; _cegbc , _beadg := _ggee . _edcba , _cedbd . _edcba ; if _cegbc != _beadg { return _ccfbba ( _cegbc > _beadg ) ; } ; _cegbc , _beadg = _ggee . _bfeag , _cedbd . _bfeag ;
if _cegbc != _beadg { return _ccfbba ( _cegbc < _beadg ) ; } ; return _ccfbba ( _ggee . _cbba < _cedbd . _cbba ) ; } ; func ( _ecgb * textTable ) reduce ( ) * textTable { _gaad := make ( [ ] int , 0 , _ecgb . _dcbdf ) ; _bebc := make ( [ ] int , 0 , _ecgb . _agac ) ; for _bdgb := 0 ; _bdgb < _ecgb . _dcbdf ;
_bdgb ++ { if ! _ecgb . emptyCompositeRow ( _bdgb ) { _gaad = append ( _gaad , _bdgb ) ; } ; } ; for _geedc := 0 ; _geedc < _ecgb . _agac ; _geedc ++ { if ! _ecgb . emptyCompositeColumn ( _geedc ) { _bebc = append ( _bebc , _geedc ) ; } ; } ; if len ( _gaad ) == _ecgb . _dcbdf && len ( _bebc ) == _ecgb . _agac { return _ecgb ;
} ; _eagff := textTable { _ebabc : _ecgb . _ebabc , _agac : len ( _bebc ) , _dcbdf : len ( _gaad ) , _abccf : make ( map [ uint64 ] * textPara , len ( _bebc ) * len ( _gaad ) ) } ; if _agede { _fb . Log . Info ( "\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064" , _ecgb . _agac , _ecgb . _dcbdf , len ( _bebc ) , len ( _gaad ) ) ;
_fb . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076" , _bebc ) ; _fb . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076" , _gaad ) ; } ; for _acdd , _affcd := range _gaad { for _ggdcb , _dcabf := range _bebc { _egadg , _cgcff := _ecgb . getComposite ( _dcabf , _affcd ) ;
if _egadg == nil { continue ; } ; if _agede { _cag . Printf ( "\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n" , _ggdcb , _acdd , _dcabf , _affcd , _bgdca ( _egadg . merge ( ) . text ( ) , 50 ) ) ; } ; _eagff . putComposite ( _ggdcb , _acdd , _egadg , _cgcff ) ;
} ; } ; return & _eagff ; } ; func ( _gdec * imageExtractContext ) extractXObjectImage ( _daf * _gdd . PdfObjectName , _faea _ag . GraphicsState , _cc * _ac . PdfPageResources ) error { _adg , _ := _cc . GetXObjectByName ( * _daf ) ; if _adg == nil { return nil ; } ; _ggge , _cgd := _gdec . _gfb [ _adg ] ;
if ! _cgd { _fea , _abf := _cc . GetXObjectImageByName ( * _daf ) ; if _abf != nil { return _abf ; } ; if _fea == nil { return nil ; } ; _fdb , _abf := _fea . ToImage ( ) ; if _abf != nil { return _abf ; } ; _ggge = & cachedImage { _dfa : _fdb , _ecf : _fea . ColorSpace } ; _gdec . _gfb [ _adg ] = _ggge ;
} ; _eded := _ggge . _dfa ; _gfba := _ggge . _ecf ; _feg , _cgc := _gfba . ImageToRGB ( * _eded ) ; if _cgc != nil { return _cgc ; } ; _fb . Log . Debug ( "@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073" , _faea . CTM . String ( ) ) ; _bbd := ImageMark { Image : & _feg , Width : _faea . CTM . ScalingFactorX ( ) , Height : _faea . CTM . ScalingFactorY ( ) , Angle : _faea . CTM . Angle ( ) } ;
_bbd . X , _bbd . Y = _faea . CTM . Translation ( ) ; _gdec . _caa = append ( _gdec . _caa , _bbd ) ; _gdec . _cdge ++ ; return nil ; } ; func ( _dbfg * textPara ) writeCellText ( _ccef _c . Writer ) { for _cabe , _bae := range _dbfg . _ddeb { _ggcad := _bae . text ( ) ; _cbcb := _ecgaa && _bae . endsInHyphen ( ) && _cabe != len ( _dbfg . _ddeb ) - 1 ;
if _cbcb { _ggcad = _gaaf ( _ggcad ) ; } ; _ccef . Write ( [ ] byte ( _ggcad ) ) ; if ! ( _cbcb || _cabe == len ( _dbfg . _ddeb ) - 1 ) { _ccef . Write ( [ ] byte ( _aaee ( _bae . _bcgf , _dbfg . _ddeb [ _cabe + 1 ] . _bcgf ) ) ) ; } ; } ; } ; func ( _cgfaf * textWord ) toTextMarks ( _gcca * int ) [ ] TextMark { var _bbdef [ ] TextMark ;
for _ , _dbdgg := range _cgfaf . _ggabc { _bbdef = _bafc ( _bbdef , _gcca , _dbdgg . ToTextMark ( ) ) ; } ; return _bbdef ; } ;
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
// Replace with a function like Extract() (*PageText, error)
func ( _dg * Extractor ) ExtractPageText ( ) ( * PageText , int , int , error ) { _edg , _gaca , _edc , _gacd := _dg . extractPageText ( _dg . _gg , _dg . _cb , _afd . IdentityMatrix ( ) , 0 ) ; if _gacd != nil && _gacd != _ac . ErrColorOutOfRange { return nil , 0 , 0 , _gacd ; } ; _edg . computeViews ( ) ;
_gacd = _degfb ( _edg ) ; if _gacd != nil { return nil , 0 , 0 , _gacd ; } ; return _edg , _gaca , _edc , nil ; } ; func ( _fbdf paraList ) llyOrdering ( ) [ ] int { _cffcd := make ( [ ] int , len ( _fbdf ) ) ; for _eeeb := range _fbdf { _cffcd [ _eeeb ] = _eeeb ; } ; _af . SliceStable ( _cffcd , func ( _cdcf , _bgaf int ) bool { _abcc , _geed := _cffcd [ _cdcf ] , _cffcd [ _bgaf ] ;
return _fbdf [ _abcc ] . Lly < _fbdf [ _geed ] . Lly ; } ) ; return _cffcd ; } ; type compositeCell struct { _ac . PdfRectangle ; paraList ; } ; func ( _dbeg * textTable ) reduceTiling ( _eacge gridTiling , _aegg float64 ) * textTable { _ggac := make ( [ ] int , 0 , _dbeg . _dcbdf ) ; _agega := make ( [ ] int , 0 , _dbeg . _agac ) ;
_gcfaf := _eacge . _egfga ; _acfd := _eacge . _ebbb ; for _ebdf := 0 ; _ebdf < _dbeg . _dcbdf ; _ebdf ++ { _bacc := _ebdf > 0 && _gc . Abs ( _acfd [ _ebdf - 1 ] - _acfd [ _ebdf ] ) < _aegg && _dbeg . emptyCompositeRow ( _ebdf ) ; if ! _bacc { _ggac = append ( _ggac , _ebdf ) ; } ; } ; for _aaeab := 0 ;
_aaeab < _dbeg . _agac ; _aaeab ++ { _bdfe := _aaeab < _dbeg . _agac - 1 && _gc . Abs ( _gcfaf [ _aaeab + 1 ] - _gcfaf [ _aaeab ] ) < _aegg && _dbeg . emptyCompositeColumn ( _aaeab ) ; if ! _bdfe { _agega = append ( _agega , _aaeab ) ; } ; } ; if len ( _ggac ) == _dbeg . _dcbdf && len ( _agega ) == _dbeg . _agac { return _dbeg ;
} ; _ggbg := textTable { _ebabc : _dbeg . _ebabc , _agac : len ( _agega ) , _dcbdf : len ( _ggac ) , _cead : make ( map [ uint64 ] compositeCell , len ( _agega ) * len ( _ggac ) ) } ; if _agede { _fb . Log . Info ( "\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064" , _dbeg . _agac , _dbeg . _dcbdf , len ( _agega ) , len ( _ggac ) ) ;
_fb . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076" , _agega ) ; _fb . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076" , _ggac ) ; } ; for _cdcfa , _ecbac := range _ggac { for _egec , _ddda := range _agega { _cbcdg , _effeaa := _dbeg . getComposite ( _ddda , _ecbac ) ;
if len ( _cbcdg ) == 0 { continue ; } ; if _agede { _cag . Printf ( "\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n" , _egec , _cdcfa , _ddda , _ecbac , _bgdca ( _cbcdg . merge ( ) . text ( ) , 50 ) ) ; } ; _ggbg . putComposite ( _egec , _cdcfa , _cbcdg , _effeaa ) ;
} ; } ; return & _ggbg ; } ; func _eade ( _fgeg * wordBag , _bfgf int ) * textLine { _dabc := _fgeg . firstWord ( _bfgf ) ; _gea := textLine { PdfRectangle : _dabc . PdfRectangle , _eabc : _dabc . _abeg , _bcgf : _dabc . _fgbda } ; _gea . pullWord ( _fgeg , _dabc , _bfgf ) ; return & _gea ;
} ; func ( _bbab rulingList ) bbox ( ) _ac . PdfRectangle { var _adba _ac . PdfRectangle ; if len ( _bbab ) == 0 { _fb . Log . Error ( "r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073" ) ;
return _ac . PdfRectangle { } ; } ; if _bbab [ 0 ] . _agcgg == _eccgd { _adba . Llx , _adba . Urx = _bbab . secMinMax ( ) ; _adba . Lly , _adba . Ury = _bbab . primMinMax ( ) ; } else { _adba . Llx , _adba . Urx = _bbab . primMinMax ( ) ; _adba . Lly , _adba . Ury = _bbab . secMinMax ( ) ; } ; return _adba ;
} ; func _dedd ( _dbgcg [ ] compositeCell ) [ ] float64 { var _gbddf [ ] * textLine ; _bcaf := 0 ; for _ , _gcde := range _dbgcg { _bcaf += len ( _gcde . paraList ) ; _gbddf = append ( _gbddf , _gcde . lines ( ) ... ) ; } ; _af . Slice ( _gbddf , func ( _cgae , _bdab int ) bool { _ceee , _bbcg := _gbddf [ _cgae ] , _gbddf [ _bdab ] ;
_bcfdc , _bgce := _ceee . _bcgf , _bbcg . _bcgf ; if ! _gcceb ( _bcfdc - _bgce ) { return _bcfdc < _bgce ; } ; return _ceee . Llx < _bbcg . Llx ; } ) ; if _agede { _cag . Printf ( "\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a" , _bcaf , len ( _gbddf ) ) ;
for _ebfea , _ceaacc := range _gbddf { _cag . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _ebfea , _ceaacc ) ; } ; } ; var _gbafa [ ] float64 ; _egdd := _gbddf [ 0 ] ; var _eddfd [ ] [ ] * textLine ; _degg := [ ] * textLine { _egdd } ; for _cbcc , _dfgdg := range _gbddf [ 1 : ] { if _dfgdg . Ury < _egdd . Lly { _dfdc := 0.5 * ( _dfgdg . Ury + _egdd . Lly ) ;
if _agede { _cag . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a" + "\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a" , _cbcc , _dfgdg . Ury , _egdd . Lly , _dfdc , _egdd , _dfgdg ) ;
} ; _gbafa = append ( _gbafa , _dfdc ) ; _eddfd = append ( _eddfd , _degg ) ; _degg = nil ; } ; _degg = append ( _degg , _dfgdg ) ; if _dfgdg . Lly < _egdd . Lly { _egdd = _dfgdg ; } ; } ; if len ( _degg ) > 0 { _eddfd = append ( _eddfd , _degg ) ; } ; if _agede { _cag . Printf ( " \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a" , _gbafa ) ;
} ; if _agede { _fb . Log . Info ( "\u0072\u006f\u0077\u003d\u0025\u0064" , len ( _dbgcg ) ) ; for _ccefa , _cfcb := range _dbgcg { _cag . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _ccefa , _cfcb ) ; } ; _fb . Log . Info ( "\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d" , len ( _eddfd ) ) ;
for _faaeg , _cbebf := range _eddfd { _cag . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a" , _faaeg , len ( _cbebf ) ) ; for _fcfga , _gefa := range _cbebf { _cag . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _fcfga , _gefa ) ; } ; } ; } ; _gcgg := true ;
for _acbf , _bdccc := range _eddfd { _bfgdc := true ; for _gagb , _eedbe := range _dbgcg { if _agede { _cag . Printf ( "\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a" , _acbf , len ( _eddfd ) , _gagb , len ( _dbgcg ) , _eedbe ) ;
} ; if ! _eedbe . hasLines ( _bdccc ) { if _agede { _cag . Printf ( "\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a" , _acbf , len ( _eddfd ) , _gagb , len ( _dbgcg ) ) ;
} ; _bfgdc = false ; break ; } ; } ; if ! _bfgdc { _gcgg = false ; break ; } ; } ; if ! _gcgg { if _agede { _fb . Log . Info ( "\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg" ) ;
} ; _gbafa = nil ; } ; if _agede && _gbafa != nil { _cag . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a" , _gbafa ) ; } ; return _gbafa ;
} ; type rectRuling struct { _bbgc rulingKind ; _gdecgd markKind ; _ca . Color ; _ac . PdfRectangle ; } ; func _fgbec ( _becea _ac . PdfRectangle ) * ruling { return & ruling { _agcgg : _aafafg , _edcba : _becea . Urx , _bfeag : _becea . Lly , _cbba : _becea . Ury } ; } ; func ( _ccfgc paraList ) findTextTables ( ) [ ] * textTable { var _ecfg [ ] * textTable ;
for _ , _cffb := range _ccfgc { if _cffb . taken ( ) || _cffb . Width ( ) == 0 { continue ; } ; _bfb := _cffb . isAtom ( ) ; if _bfb == nil { continue ; } ; _bfb . growTable ( ) ; if _bfb . _agac * _bfb . _dcbdf < _eecc { continue ; } ; _bfb . markCells ( ) ; _bfb . log ( "\u0067\u0072\u006fw\u006e" ) ;
_ecfg = append ( _ecfg , _bfb ) ; } ; return _ecfg ; } ; func _aded ( _efea * wordBag , _aggbb * textWord , _adfb float64 ) bool { return _aggbb . Llx < _efea . Urx + _adfb && _efea . Llx - _adfb < _aggbb . Urx ; } ;
// Font represents the font properties on a PDF page.
type Font struct { PdfFont * _ac . PdfFont ;
// FontName represents Font Name from font properties.
FontName string ;
// FontType represents Font Subtype entry in the font dictionary inside page resources.
// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont.
FontType string ;
// ToUnicode is true if font provides a `ToUnicode` mapping.
ToUnicode bool ;
// IsCID is true if underlying font is a composite font.
// Composite font is represented by a font dictionary whose Subtype is `Type0`
IsCID bool ;
// IsSimple is true if font is simple font.
// A simple font is limited to only 8 bit (255) character codes.
IsSimple bool ;
// FontData represents the raw data of the embedded font file.
// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF).
// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor.
// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value.
FontData [ ] byte ;
// FontFileName is a name representing the font. it has format:
// (Font Name) + (Font Type Extension), example: helvetica.ttf.
FontFileName string ;
// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor).
FontDescriptor * _ac . PdfFontDescriptor ; } ; func ( _gfca rulingList ) blocks ( _fcea , _bffg * ruling ) bool { if _fcea . _bfeag > _bffg . _cbba || _bffg . _bfeag > _fcea . _cbba { return false ; } ; _cbff := _gc . Max ( _fcea . _bfeag , _bffg . _bfeag ) ; _edeg := _gc . Min ( _fcea . _cbba , _bffg . _cbba ) ;
if _fcea . _edcba > _bffg . _edcba { _fcea , _bffg = _bffg , _fcea ; } ; for _ , _afgg := range _gfca { if _fcea . _edcba <= _afgg . _edcba + _gbbd && _afgg . _edcba <= _bffg . _edcba + _gbbd && _afgg . _bfeag <= _edeg && _cbff <= _afgg . _cbba { return true ; } ; } ; return false ; } ;
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
type ImageExtractOptions struct { IncludeInlineStencilMasks bool ; } ; func _ddcd ( _ecaaa map [ int ] [ ] float64 ) [ ] int { _defaa := make ( [ ] int , len ( _ecaaa ) ) ; _egfgd := 0 ; for _begb := range _ecaaa { _defaa [ _egfgd ] = _begb ; _egfgd ++ ; } ; _af . Ints ( _defaa ) ; return _defaa ;
} ; func ( _bfcad paraList ) yNeighbours ( _ebebg float64 ) map [ * textPara ] [ ] int { _caag := make ( [ ] event , 2 * len ( _bfcad ) ) ; if _ebebg == 0 { for _eeebd , _eedga := range _bfcad { _caag [ 2 * _eeebd ] = event { _eedga . Lly , true , _eeebd } ; _caag [ 2 * _eeebd + 1 ] = event { _eedga . Ury , false , _eeebd } ;
} ; } else { for _abdcc , _dfbda := range _bfcad { _caag [ 2 * _abdcc ] = event { _dfbda . Lly - _ebebg * _dfbda . fontsize ( ) , true , _abdcc } ; _caag [ 2 * _abdcc + 1 ] = event { _dfbda . Ury + _ebebg * _dfbda . fontsize ( ) , false , _abdcc } ; } ; } ; return _bfcad . eventNeighbours ( _caag ) ;
} ; func ( _gbgec * textTable ) put ( _gfef , _deece int , _baggd * textPara ) { _gbgec . _abccf [ _addg ( _gfef , _deece ) ] = _baggd ; } ; type wordBag struct { _ac . PdfRectangle ; _aege float64 ; _fedd , _bba rulingList ; _egbb float64 ; _cfeeb map [ int ] [ ] * textWord ; } ; func _agfb ( _efcc float64 ) int { var _cedc int ;
if _efcc >= 0 { _cedc = int ( _efcc / _eddf ) ; } else { _cedc = int ( _efcc / _eddf ) - 1 ; } ; return _cedc ; } ; const ( _dgdb rulingKind = iota ; _eccgd ; _aafafg ; ) ; func _gaaf ( _ebaeb string ) string { _ebbg := [ ] rune ( _ebaeb ) ; return string ( _ebbg [ : len ( _ebbg ) - 1 ] ) } ; func ( _bfcaf * ruling ) encloses ( _dfde , _ccfbc float64 ) bool { return _bfcaf . _bfeag - _bade <= _dfde && _ccfbc <= _bfcaf . _cbba + _bade ;
} ; func ( _acgd * stateStack ) push ( _edec * textState ) { _gab := * _edec ; * _acgd = append ( * _acgd , & _gab ) } ; type intSet map [ int ] struct { } ; func ( _bbdb * textPara ) text ( ) string { _eaee := new ( _e . Buffer ) ; _bbdb . writeText ( _eaee ) ; return _eaee . String ( ) ; } ; func ( _cbe * textObject ) setTextRise ( _aead float64 ) { if _cbe == nil { return ;
} ; _cbe . _fga . _fec = _aead ; } ; func ( _abdbf * ruling ) gridIntersecting ( _cbgf * ruling ) bool { return _cgfe ( _abdbf . _bfeag , _cbgf . _bfeag ) && _cgfe ( _abdbf . _cbba , _cbgf . _cbba ) ; } ;
// PageText represents the layout of text on a device page.
type PageText struct { _aged [ ] * textMark ; _cged string ; _gccf [ ] TextMark ; _dcgg [ ] TextTable ; _abea _ac . PdfRectangle ; _cagb [ ] pathSection ; _bfa [ ] pathSection ; } ;
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
func ( _adb * TextMarkArray ) BBox ( ) ( _ac . PdfRectangle , bool ) { var _bef _ac . PdfRectangle ; _bcf := false ; for _ , _ffg := range _adb . _dgf { if _ffg . Meta || _feaba ( _ffg . Text ) { continue ; } ; if _bcf { _bef = _gcae ( _bef , _ffg . BBox ) ; } else { _bef = _ffg . BBox ; _bcf = true ;
} ; } ; return _bef , _bcf ; } ; func _eacc ( _bgefd _ac . PdfRectangle ) * ruling { return & ruling { _agcgg : _aafafg , _edcba : _bgefd . Llx , _bfeag : _bgefd . Lly , _cbba : _bgefd . Ury } ; } ; func ( _gffc * textWord ) absorb ( _fded * textWord ) { _gffc . PdfRectangle = _gcae ( _gffc . PdfRectangle , _fded . PdfRectangle ) ;
_gffc . _ggabc = append ( _gffc . _ggabc , _fded . _ggabc ... ) ; } ; func ( _dafbe rulingList ) toTilings ( ) ( rulingList , [ ] gridTiling ) { _dafbe . log ( "\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s" ) ; if len ( _dafbe ) == 0 { return nil , nil ; } ; _dafbe = _dafbe . tidied ( "\u0061\u006c\u006c" ) ;
_dafbe . log ( "\u0074\u0069\u0064\u0069\u0065\u0064" ) ; _fbde := _dafbe . toGrids ( ) ; _aacaa := make ( [ ] gridTiling , len ( _fbde ) ) ; for _eaea , _bccb := range _fbde { _aacaa [ _eaea ] = _bccb . asTiling ( ) ; } ; return _dafbe , _aacaa ; } ; func ( _bggd * textLine ) markWordBoundaries ( ) { _edcbd := _gdf * _bggd . _eabc ;
for _edbc , _aeadc := range _bggd . _cadc [ 1 : ] { if _dgfaa ( _aeadc , _bggd . _cadc [ _edbc ] ) >= _edcbd { _aeadc . _gcefe = true ; } ; } ; } ; func ( _fafd rulingList ) removeDuplicates ( ) rulingList { if len ( _fafd ) == 0 { return nil ; } ; _fafd . sort ( ) ; _adbe := rulingList { _fafd [ 0 ] } ;
for _ , _afef := range _fafd [ 1 : ] { if _afef . equals ( _adbe [ len ( _adbe ) - 1 ] ) { continue ; } ; _adbe = append ( _adbe , _afef ) ; } ; return _adbe ; } ; func ( _gceg * textObject ) setTextRenderMode ( _gcc int ) { if _gceg == nil { return ; } ; _gceg . _fga . _acgdg = RenderMode ( _gcc ) ;
} ; func ( _fca * textObject ) setFont ( _efac string , _caab float64 ) error { if _fca == nil { return nil ; } ; _fca . _fga . _cef = _caab ; _gad , _ded := _fca . getFont ( _efac ) ; if _ded != nil { return _ded ; } ; _fca . _fga . _eedd = _gad ; return nil ; } ; func _bdbg ( _ege string ) bool { if _g . RuneCountInString ( _ege ) < _dfed { return false ;
} ; _ffec , _bfgdg := _g . DecodeLastRuneInString ( _ege ) ; if _bfgdg <= 0 || ! _gd . Is ( _gd . Hyphen , _ffec ) { return false ; } ; _ffec , _bfgdg = _g . DecodeLastRuneInString ( _ege [ : len ( _ege ) - _bfgdg ] ) ; return _bfgdg > 0 && ! _gd . IsSpace ( _ffec ) ; } ; func ( _eecf rulingList ) primaries ( ) [ ] float64 { _agag := make ( map [ float64 ] struct { } , len ( _eecf ) ) ;
for _ , _aedc := range _eecf { _agag [ _aedc . _edcba ] = struct { } { } ; } ; _dafbd := make ( [ ] float64 , len ( _agag ) ) ; _cbcbg := 0 ; for _bgcf := range _agag { _dafbd [ _cbcbg ] = _bgcf ; _cbcbg ++ ; } ; _af . Float64s ( _dafbd ) ; return _dafbd ; } ; func ( _deefg intSet ) del ( _gddad int ) { delete ( _deefg , _gddad ) } ;
func _efagec ( _bbfg _ac . PdfRectangle , _afca [ ] * textLine ) * textPara { return & textPara { PdfRectangle : _bbfg , _ddeb : _afca } ; } ; func _eeea ( _bcde , _fde _afd . Point , _ggcd _ca . Color ) ( * ruling , bool ) { _fecc := lineRuling { _gffd : _bcde , _eeebc : _fde , _fbab : _efeb ( _bcde , _fde ) , Color : _ggcd } ;
if _fecc . _fbab == _dgdb { return nil , false ; } ; return _fecc . asRuling ( ) ; } ; func ( _ddaf * subpath ) isQuadrilateral ( ) bool { if len ( _ddaf . _egd ) < 4 || len ( _ddaf . _egd ) > 5 { return false ; } ; if len ( _ddaf . _egd ) == 5 { _dccb := _ddaf . _egd [ 0 ] ; _fafb := _ddaf . _egd [ 4 ] ;
if _dccb . X != _fafb . X || _dccb . Y != _fafb . Y { return false ; } ; } ; return true ; } ; func ( _gdbda rulingList ) connections ( _bfce map [ int ] intSet , _adfc int ) intSet { _ccfe := make ( intSet ) ; _bfcb := make ( intSet ) ; var _fdee func ( int ) ; _fdee = func ( _baeg int ) { if ! _bfcb . has ( _baeg ) { _bfcb . add ( _baeg ) ;
for _gbfb := range _gdbda { if _bfce [ _gbfb ] . has ( _baeg ) { _ccfe . add ( _gbfb ) ; } ; } ; for _aace := range _gdbda { if _ccfe . has ( _aace ) { _fdee ( _aace ) ; } ; } ; } ; } ; _fdee ( _adfc ) ; return _ccfe ; } ; func ( _fbgd intSet ) has ( _gecdb int ) bool { _ , _gdbegf := _fbgd [ _gecdb ] ;
return _gdbegf } ;
// String returns a description of `v`.
func ( _cfaf * ruling ) String ( ) string { if _cfaf . _agcgg == _dgdb { return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047" ; } ; _fcad , _egcbb := "\u0078" , "\u0079" ; if _cfaf . _agcgg == _eccgd { _fcad , _egcbb = "\u0079" , "\u0078" ; } ; _fgab := "" ; if _cfaf . _geba != 0.0 { _fgab = _cag . Sprintf ( " \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066" , _cfaf . _geba ) ;
} ; return _cag . Sprintf ( "\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073" , _cfaf . _agcgg , _fcad , _cfaf . _edcba , _egcbb , _cfaf . _bfeag , _cfaf . _cbba , _cfaf . _cbba - _cfaf . _bfeag , _cfaf . _aaec , _cfaf . Color , _fgab ) ;
} ; func ( _abdca * textTable ) newTablePara ( ) * textPara { _egce := _abdca . computeBbox ( ) ; _gada := & textPara { PdfRectangle : _egce , _gbaa : _egce , _ccec : _abdca } ; if _agede { _fb . Log . Info ( "\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073" , _gada ) ;
} ; return _gada ; } ; func _bafe ( _ebec [ ] TextMark , _fafe * int , _ddgaa string ) [ ] TextMark { _fgc := _dgfa ; _fgc . Text = _ddgaa ; return _bafc ( _ebec , _fafe , _fgc ) ; } ; func _ddgc ( _agda , _bgag _afd . Point ) rulingKind { _gdcfe := _gc . Abs ( _agda . X - _bgag . X ) ; _adffe := _gc . Abs ( _agda . Y - _bgag . Y ) ;
return _cbcbf ( _gdcfe , _adffe , _dda ) ; } ; func ( _fbga * textMark ) bbox ( ) _ac . PdfRectangle { return _fbga . PdfRectangle } ; func ( _facgb * textLine ) pullWord ( _edbbe * wordBag , _aedf * textWord , _debe int ) { _facgb . appendWord ( _aedf ) ; _edbbe . removeWord ( _aedf , _debe ) ;
} ; type textTable struct { _ac . PdfRectangle ; _agac , _dcbdf int ; _ebabc bool ; _abccf map [ uint64 ] * textPara ; _cead map [ uint64 ] compositeCell ; } ; var _ad = false ; func ( _gbge pathSection ) bbox ( ) _ac . PdfRectangle { _dfce := _gbge . _bbdc [ 0 ] . _egd [ 0 ] ; _gggf := _ac . PdfRectangle { Llx : _dfce . X , Urx : _dfce . X , Lly : _dfce . Y , Ury : _dfce . Y } ;
_fcbb := func ( _caef _afd . Point ) { if _caef . X < _gggf . Llx { _gggf . Llx = _caef . X ; } else if _caef . X > _gggf . Urx { _gggf . Urx = _caef . X ; } ; if _caef . Y < _gggf . Lly { _gggf . Lly = _caef . Y ; } else if _caef . Y > _gggf . Ury { _gggf . Ury = _caef . Y ; } ; } ; for _ , _efgb := range _gbge . _bbdc [ 0 ] . _egd [ 1 : ] { _fcbb ( _efgb ) ;
} ; for _ , _eaa := range _gbge . _bbdc [ 1 : ] { for _ , _gegbc := range _eaa . _egd { _fcbb ( _gegbc ) ; } ; } ; return _gggf ; } ; func ( _edae * shapesState ) lastpointEstablished ( ) ( _afd . Point , bool ) { if _edae . _dcef { return _edae . _dffc , false ; } ; _bdbe := len ( _edae . _fegc ) ;
if _bdbe > 0 && _edae . _fegc [ _bdbe - 1 ] . _ega { return _edae . _fegc [ _bdbe - 1 ] . last ( ) , false ; } ; return _afd . Point { } , true ; } ; func _aecce ( _gbbg _ac . PdfColorspace , _eegg _ac . PdfColor ) _ca . Color { if _gbbg == nil || _eegg == nil { return _ca . Black ; } ; _bcadd , _agfc := _gbbg . ColorToRGB ( _eegg ) ;
if _agfc != nil { _fb . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073" , _eegg , _gbbg , _agfc ) ;
return _ca . Black ; } ; _bfeace , _fcee := _bcadd . ( * _ac . PdfColorDeviceRGB ) ; if ! _fcee { _fb . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076" , _bcadd ) ;
return _ca . Black ; } ; return _ca . NRGBA { R : uint8 ( _bfeace . R ( ) * 255 ) , G : uint8 ( _bfeace . G ( ) * 255 ) , B : uint8 ( _bfeace . B ( ) * 255 ) , A : uint8 ( 255 ) } ; } ; func ( _gcga paraList ) findGridTables ( _fegaf [ ] gridTiling ) [ ] * textTable { if _agede { _fb . Log . Info ( "\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073" , len ( _gcga ) ) ;
for _dceff , _fgad := range _gcga { _cag . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _dceff , _fgad ) ; } ; } ; var _aafgg [ ] * textTable ; for _ffgb , _aebf := range _fegaf { _dfec , _ccbae := _gcga . findTableGrid ( _aebf ) ; if _dfec != nil { _dfec . log ( _cag . Sprintf ( "\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064" , _ffgb ) ) ;
_aafgg = append ( _aafgg , _dfec ) ; _dfec . markCells ( ) ; } ; for _afdfd := range _ccbae { _afdfd . _cfeff = true ; } ; } ; if _agede { _fb . Log . Info ( "\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s" , len ( _aafgg ) ) ;
} ; return _aafgg ; } ; func ( _dabd gridTile ) numBorders ( ) int { _fgcg := 0 ; if _dabd . _fcgbfd { _fgcg ++ ; } ; if _dabd . _fgdd { _fgcg ++ ; } ; if _dabd . _cccb { _fgcg ++ ; } ; if _dabd . _gbac { _fgcg ++ ; } ; return _fgcg ; } ; func ( _eggg * textPara ) depth ( ) float64 { if _eggg . _affbf { return - 1.0 ;
} ; if len ( _eggg . _ddeb ) > 0 { return _eggg . _ddeb [ 0 ] . _bcgf ; } ; return _eggg . _ccec . depth ( ) ; } ; func _geabe ( _dccfe float64 ) float64 { return _eada * _gc . Round ( _dccfe / _eada ) } ;
// String returns a description of `k`.
func ( _gdfe rulingKind ) String ( ) string { _gdffa , _acad := _afbfd [ _gdfe ] ; if ! _acad { return _cag . Sprintf ( "\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064" , _gdfe ) ; } ; return _gdffa ; } ; func ( _bffd rulingList ) snapToGroupsDirection ( ) rulingList { _bffd . sortStrict ( ) ;
_aafg := make ( map [ * ruling ] rulingList , len ( _bffd ) ) ; _dcccd := _bffd [ 0 ] ; _eaag := func ( _fdcf * ruling ) { _dcccd = _fdcf ; _aafg [ _dcccd ] = rulingList { _fdcf } } ; _eaag ( _bffd [ 0 ] ) ; for _ , _fdcce := range _bffd [ 1 : ] { if _fdcce . _edcba < _dcccd . _edcba - _cbd { _fb . Log . Error ( "\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073" , _dcccd , _fdcce ) ;
} ; if _fdcce . _edcba > _dcccd . _edcba + _gbbd { _eaag ( _fdcce ) ; } else { _aafg [ _dcccd ] = append ( _aafg [ _dcccd ] , _fdcce ) ; } ; } ; _bafcg := make ( map [ * ruling ] float64 , len ( _aafg ) ) ; _gbcc := make ( map [ * ruling ] * ruling , len ( _bffd ) ) ; for _cdbe , _gaeaa := range _aafg { _bafcg [ _cdbe ] = _gaeaa . mergePrimary ( ) ;
for _ , _gdfd := range _gaeaa { _gbcc [ _gdfd ] = _cdbe ; } ; } ; for _ , _aedfd := range _bffd { _aedfd . _edcba = _bafcg [ _gbcc [ _aedfd ] ] ; } ; _adbcf := make ( rulingList , 0 , len ( _bffd ) ) ; for _ , _bgcda := range _aafg { _dffbg := _bgcda . splitSec ( ) ; for _gfcd , _ece := range _dffbg { _aafae := _ece . merge ( ) ;
if len ( _adbcf ) > 0 { _gdcba := _adbcf [ len ( _adbcf ) - 1 ] ; if _gdcba . alignsPrimary ( _aafae ) && _gdcba . alignsSec ( _aafae ) { _fb . Log . Error ( "\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073" , _gfcd , _gdcba , _aafae ) ;
continue ; } ; } ; _adbcf = append ( _adbcf , _aafae ) ; } ; } ; _adbcf . sortStrict ( ) ; return _adbcf ; } ; func ( _ddcc paraList ) llyRange ( _aceaa [ ] int , _ddff , _fag float64 ) [ ] int { _bdaf := len ( _ddcc ) ; if _fag < _ddcc [ _aceaa [ 0 ] ] . Lly || _ddff > _ddcc [ _aceaa [ _bdaf - 1 ] ] . Lly { return nil ;
} ; _dccf := _af . Search ( _bdaf , func ( _gfdb int ) bool { return _ddcc [ _aceaa [ _gfdb ] ] . Lly >= _ddff } ) ; _bgdf := _af . Search ( _bdaf , func ( _cbbf int ) bool { return _ddcc [ _aceaa [ _cbbf ] ] . Lly > _fag } ) ; return _aceaa [ _dccf : _bgdf ] ; } ;
// String returns a string descibing `i`.
func ( _fbca gridTile ) String ( ) string { _dafc := func ( _gfade bool , _fdab string ) string { if _gfade { return _fdab ; } ; return "\u005f" ; } ; return _cag . Sprintf ( "\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073" , _fbca . PdfRectangle , _dafc ( _fbca . _fcgbfd , "\u004c" ) , _dafc ( _fbca . _fgdd , "\u0052" ) , _dafc ( _fbca . _cccb , "\u0042" ) , _dafc ( _fbca . _gbac , "\u0054" ) ) ;
} ; func ( _ecfb * shapesState ) fill ( _baab * [ ] pathSection ) { _gfea := pathSection { _bbdc : _ecfb . _fegc , Color : _ecfb . _afbf . getFillColor ( ) } ; * _baab = append ( * _baab , _gfea ) ; if _adce { _effd := _gfea . bbox ( ) ; _cag . Printf ( "\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a" , len ( * _baab ) , len ( _gfea . _bbdc ) , _ecfb , _gfea . Color , _effd , _effd . Width ( ) , _effd . Height ( ) ) ;
if _gbbagd { for _dbbg , _ebda := range _gfea . _bbdc { _cag . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _dbbg , _ebda ) ; if _dbbg == 10 { break ; } ; } ; } ; } ; } ; type paraList [ ] * textPara ; func ( _cgdg * textObject ) checkOp ( _dbd * _ag . ContentStreamOperation , _edge int , _gafb bool ) ( _ffe bool , _ecgae error ) { if _cgdg == nil { var _cabba [ ] _gdd . PdfObject ;
if _edge > 0 { _cabba = _dbd . Params ; if len ( _cabba ) > _edge { _cabba = _cabba [ : _edge ] ; } ; } ; _fb . Log . Debug ( "\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076" , _dbd . Operand , _cabba ) ;
} ; if _edge >= 0 { if len ( _dbd . Params ) != _edge { if _gafb { _ecgae = _d . New ( "\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et" ) ; } ; _fb . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076" , _dbd . Operand , _edge , len ( _dbd . Params ) , _dbd . Params ) ;
return false , _ecgae ; } ; } ; return true , nil ; } ;
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
type RenderMode int ; func ( _acffc * textTable ) emptyCompositeColumn ( _cebgg int ) bool { for _dcdbd := 0 ; _dcdbd < _acffc . _dcbdf ; _dcdbd ++ { if _dfbcd , _eabca := _acffc . _cead [ _addg ( _cebgg , _dcdbd ) ] ; _eabca { if len ( _dfbcd . paraList ) > 0 { return false ; } ; } ;
} ; return true ; } ; func ( _cdgd * wordBag ) removeWord ( _fad * textWord , _bgac int ) { _bbe := _cdgd . _cfeeb [ _bgac ] ; _bbe = _eadbg ( _bbe , _fad ) ; if len ( _bbe ) == 0 { delete ( _cdgd . _cfeeb , _bgac ) ; } else { _cdgd . _cfeeb [ _bgac ] = _bbe ; } ; } ; func _bac ( _aabd [ ] pathSection ) rulingList { _eefaa ( _aabd ) ;
if _adce { _fb . Log . Info ( "\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs" , len ( _aabd ) ) ; } ; var _ecge rulingList ; for _ , _bedb := range _aabd { for _ , _bcbefd := range _bedb . _bbdc { if ! _bcbefd . isQuadrilateral ( ) { if _adce { _fb . Log . Error ( "!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073" , _bcbefd ) ;
} ; continue ; } ; if _dgbe , _bgegf := _bcbefd . makeRectRuling ( _bedb . Color ) ; _bgegf { _ecge = append ( _ecge , _dgbe ) ; } else { if _caea { _fb . Log . Error ( "\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073" , _bcbefd ) ;
} ; } ; } ; } ; if _adce { _fb . Log . Info ( "\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073" , _ecge . String ( ) ) ; } ; return _ecge ; } ; func _fddf ( _eeaf [ ] * textWord , _agbc float64 , _abac , _deeb rulingList ) * wordBag { _dega := _acca ( _eeaf [ 0 ] , _agbc , _abac , _deeb ) ;
for _ , _dde := range _eeaf [ 1 : ] { _ggaa := _agfb ( _dde . _fgbda ) ; _dega . _cfeeb [ _ggaa ] = append ( _dega . _cfeeb [ _ggaa ] , _dde ) ; _dega . PdfRectangle = _gcae ( _dega . PdfRectangle , _dde . PdfRectangle ) ; } ; _dega . sort ( ) ; return _dega ; } ; func ( _abge * ruling ) intersects ( _fcff * ruling ) bool { _dcfe := ( _abge . _agcgg == _aafafg && _fcff . _agcgg == _eccgd ) || ( _fcff . _agcgg == _aafafg && _abge . _agcgg == _eccgd ) ;
_dagc := func ( _aeed , _fbdd * ruling ) bool { return _aeed . _bfeag - _bade <= _fbdd . _edcba && _fbdd . _edcba <= _aeed . _cbba + _bade ; } ; _fega := _dagc ( _abge , _fcff ) ; _fdcd := _dagc ( _fcff , _abge ) ; if _adce { _cag . Printf ( "\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a" + "\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a" + " \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a" , _dcfe , _fega , _fdcd , _dcfe && _fega && _fdcd , _abge , _fcff ) ;
} ; return _dcfe && _fega && _fdcd ; } ; func _aaee ( _dcdbe , _abbe float64 ) string { _cebc := ! _gcceb ( _dcdbe - _abbe ) ; if _cebc { return "\u000a" ; } ; return "\u0020" ; } ; func _feaba ( _cdbc string ) bool { for _ , _fabcf := range _cdbc { if ! _gd . IsSpace ( _fabcf ) { return false ;
} ; } ; return true ; } ; func _fbgcd ( _edeac [ ] pathSection ) rulingList { _eefaa ( _edeac ) ; if _adce { _fb . Log . Info ( "\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073" , len ( _edeac ) ) ;
} ; var _gage rulingList ; for _ , _cabed := range _edeac { for _ , _cceg := range _cabed . _bbdc { if len ( _cceg . _egd ) < 2 { continue ; } ; _abfcf := _cceg . _egd [ 0 ] ; for _ , _dcge := range _cceg . _egd [ 1 : ] { if _cccf , _cafb := _eeea ( _abfcf , _dcge , _cabed . Color ) ; _cafb { _gage = append ( _gage , _cccf ) ;
} ; _abfcf = _dcge ; } ; } ; } ; if _adce { _fb . Log . Info ( "m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073" , _gage ) ; } ; return _gage ; } ; func ( _dee * stateStack ) empty ( ) bool { return len ( * _dee ) == 0 } ; func ( _ebfe paraList ) merge ( ) * textPara { _fb . Log . Trace ( "\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _ebfe ) ) ;
if len ( _ebfe ) == 0 { return nil ; } ; _ebfe . sortReadingOrder ( ) ; _cfefc := _ebfe [ 0 ] . PdfRectangle ; _cccg := _ebfe [ 0 ] . _ddeb ; for _ , _gcad := range _ebfe [ 1 : ] { _cfefc = _gcae ( _cfefc , _gcad . PdfRectangle ) ; _cccg = append ( _cccg , _gcad . _ddeb ... ) ; } ; return _efagec ( _cfefc , _cccg ) ;
} ; func _eggb ( _geede map [ int ] [ ] float64 ) { if len ( _geede ) <= 1 { return ; } ; _acgfc := _ddcd ( _geede ) ; if _agede { _fb . Log . Info ( "\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076" , _acgfc ) ; } ; var _cbade , _dfcc int ; for _cbade , _dfcc = range _acgfc { if _geede [ _dfcc ] != nil { break ;
} ; } ; for _ebfdf , _aefg := range _acgfc [ _cbade : ] { _fbgb := _geede [ _aefg ] ; if _fbgb == nil { continue ; } ; if _agede { _cag . Printf ( "\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a" , _cbade + _ebfdf , _dfcc , _aefg ) ;
} ; _fbef := _geede [ _aefg ] ; if _fbef [ len ( _fbef ) - 1 ] > _fbgb [ 0 ] { _fbef [ len ( _fbef ) - 1 ] = _fbgb [ 0 ] ; _geede [ _dfcc ] = _fbef ; } ; _dfcc = _aefg ; } ; } ; func _fdbe ( _gafa , _dbdf _afd . Point ) bool { _fabe := _gc . Abs ( _gafa . X - _dbdf . X ) ; _agdc := _gc . Abs ( _gafa . Y - _dbdf . Y ) ;
return _dgbd ( _fabe , _agdc ) ; } ; func _gcgf ( _cgabb _ac . PdfRectangle , _agdfd , _aedfc , _dgbca , _ecgd * ruling ) gridTile { _gfad := _cgabb . Llx ; _eddfc := _cgabb . Urx ; _eedfg := _cgabb . Lly ; _afab := _cgabb . Ury ; return gridTile { PdfRectangle : _cgabb , _fcgbfd : _agdfd != nil && _agdfd . encloses ( _eedfg , _afab ) , _fgdd : _aedfc != nil && _aedfc . encloses ( _eedfg , _afab ) , _cccb : _dgbca != nil && _dgbca . encloses ( _gfad , _eddfc ) , _gbac : _ecgd != nil && _ecgd . encloses ( _gfad , _eddfc ) } ;
} ; type event struct { _eefg float64 ; _bgdfg bool ; _deed int ; } ; func ( _ebac * stateStack ) top ( ) * textState { if _ebac . empty ( ) { return nil ; } ; return ( * _ebac ) [ _ebac . size ( ) - 1 ] ; } ; func ( _efbc * wordBag ) absorb ( _cegg * wordBag ) { _deef := _cegg . makeRemovals ( ) ;
for _fcag , _gbba := range _cegg . _cfeeb { for _ , _ggfd := range _gbba { _efbc . pullWord ( _ggfd , _fcag , _deef ) ; } ; } ; _cegg . applyRemovals ( _deef ) ; } ; func ( _dbcb * textObject ) getCurrentFont ( ) * _ac . PdfFont { _cdec := _dbcb . _fga . _eedd ; if _cdec == nil { _fb . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e" ) ;
return _ac . DefaultFont ( ) ; } ; return _cdec ; } ; func ( _egcg * shapesState ) establishSubpath ( ) * subpath { _fbgfc , _fdga := _egcg . lastpointEstablished ( ) ; if ! _fdga { _egcg . _fegc = append ( _egcg . _fegc , _aab ( _fbgfc ) ) ; } ; if len ( _egcg . _fegc ) == 0 { return nil ; } ;
_egcg . _dcef = false ; return _egcg . _fegc [ len ( _egcg . _fegc ) - 1 ] ; } ; type gridTiling struct { _ac . PdfRectangle ; _egfga [ ] float64 ; _ebbb [ ] float64 ; _cgdc map [ float64 ] map [ float64 ] gridTile ; } ; func ( _egag * textLine ) bbox ( ) _ac . PdfRectangle { return _egag . PdfRectangle } ;
func ( _cgab * subpath ) makeRectRuling ( _efdf _ca . Color ) ( * ruling , bool ) { if _caea { _fb . Log . Info ( "\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076" , _cgab ) ; } ; _ddab := _cgab . _egd [ : 4 ] ;
_afbb := make ( map [ int ] rulingKind , len ( _ddab ) ) ; for _acbc , _feag := range _ddab { _gdeg := _cgab . _egd [ ( _acbc + 1 ) % 4 ] ; _afbb [ _acbc ] = _ddgc ( _feag , _gdeg ) ; if _caea { _cag . Printf ( "\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066" , _acbc , _afbb [ _acbc ] , _feag , _gdeg ) ;
} ; } ; if _caea { _cag . Printf ( "\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a" , _afbb ) ; } ; var _fbed , _bfca [ ] int ; for _fceg , _dggg := range _afbb { switch _dggg { case _eccgd : _bfca = append ( _bfca , _fceg ) ; case _aafafg : _fbed = append ( _fbed , _fceg ) ;
} ; } ; if _caea { _cag . Printf ( "\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a" , len ( _bfca ) , _bfca ) ; _cag . Printf ( "\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a" , len ( _fbed ) , _fbed ) ;
} ; _gbga := ( len ( _bfca ) == 2 && len ( _fbed ) == 2 ) || ( len ( _bfca ) == 2 && len ( _fbed ) == 0 && _cggg ( _ddab [ _bfca [ 0 ] ] , _ddab [ _bfca [ 1 ] ] ) ) || ( len ( _fbed ) == 2 && len ( _bfca ) == 0 && _fdbe ( _ddab [ _fbed [ 0 ] ] , _ddab [ _fbed [ 1 ] ] ) ) ; if _caea { _cag . Printf ( " \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a" , len ( _bfca ) , len ( _fbed ) , _gbga ) ;
} ; if ! _gbga { if _caea { _fb . Log . Error ( "\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v" , _cgab ) ; _cag . Printf ( " \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a" , len ( _bfca ) , len ( _fbed ) , _gbga ) ;
} ; return & ruling { } , false ; } ; if len ( _fbed ) == 0 { for _bffbc , _aagcb := range _afbb { if _aagcb != _eccgd { _fbed = append ( _fbed , _bffbc ) ; } ; } ; } ; if len ( _bfca ) == 0 { for _eegd , _afaca := range _afbb { if _afaca != _aafafg { _bfca = append ( _bfca , _eegd ) ; } ; } ; } ; if _caea { _fb . Log . Info ( "\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a" + "\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a" + "\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a" + "\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076" , len ( _bfca ) , len ( _fbed ) , len ( _ddab ) , _bfca , _fbed , _ddab ) ;
} ; var _ebgdg , _fced , _bfdbc , _ecdf _afd . Point ; if _ddab [ _bfca [ 0 ] ] . Y > _ddab [ _bfca [ 1 ] ] . Y { _bfdbc , _ecdf = _ddab [ _bfca [ 0 ] ] , _ddab [ _bfca [ 1 ] ] ; } else { _bfdbc , _ecdf = _ddab [ _bfca [ 1 ] ] , _ddab [ _bfca [ 0 ] ] ; } ; if _ddab [ _fbed [ 0 ] ] . X > _ddab [ _fbed [ 1 ] ] . X { _ebgdg , _fced = _ddab [ _fbed [ 0 ] ] , _ddab [ _fbed [ 1 ] ] ;
} else { _ebgdg , _fced = _ddab [ _fbed [ 1 ] ] , _ddab [ _fbed [ 0 ] ] ; } ; _ggdf := _ac . PdfRectangle { Llx : _ebgdg . X , Urx : _fced . X , Lly : _ecdf . Y , Ury : _bfdbc . Y } ; if _ggdf . Llx > _ggdf . Urx { _ggdf . Llx , _ggdf . Urx = _ggdf . Urx , _ggdf . Llx ; } ; if _ggdf . Lly > _ggdf . Ury { _ggdf . Lly , _ggdf . Ury = _ggdf . Ury , _ggdf . Lly ;
} ; _eedg := rectRuling { PdfRectangle : _ggdf , _bbgc : _aaca ( _ggdf ) , Color : _efdf } ; if _eedg . _bbgc == _dgdb { if _caea { _fb . Log . Error ( "\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c" ) ;
} ; return nil , false ; } ; _gcfc , _aagaa := _eedg . asRuling ( ) ; if ! _aagaa { if _caea { _fb . Log . Error ( "\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg" ) ; } ; return nil , false ; } ; if _adce { _cag . Printf ( "\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a" , _gcfc . String ( ) ) ;
} ; return _gcfc , true ; } ; func ( _gfeg * ruling ) equals ( _ccbe * ruling ) bool { return _gfeg . _agcgg == _ccbe . _agcgg && _cgfe ( _gfeg . _edcba , _ccbe . _edcba ) && _cgfe ( _gfeg . _bfeag , _ccbe . _bfeag ) && _cgfe ( _gfeg . _cbba , _ccbe . _cbba ) ; } ; func ( _bbadf * textTable ) toTextTable ( ) TextTable { if _agede { _fb . Log . Info ( "t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064" , _bbadf . _agac , _bbadf . _dcbdf ) ;
} ; _ggbce := make ( [ ] [ ] TableCell , _bbadf . _dcbdf ) ; for _bgbc := 0 ; _bgbc < _bbadf . _dcbdf ; _bgbc ++ { _ggbce [ _bgbc ] = make ( [ ] TableCell , _bbadf . _agac ) ; for _ccgb := 0 ; _ccgb < _bbadf . _agac ; _ccgb ++ { _cedg := _bbadf . get ( _ccgb , _bgbc ) ; if _cedg == nil { continue ;
} ; if _agede { _cag . Printf ( "\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a" , _ccgb , _bgbc , _cedg ) ; } ; _ggbce [ _bgbc ] [ _ccgb ] . Text = _cedg . text ( ) ; _baddc := 0 ; _ggbce [ _bgbc ] [ _ccgb ] . Marks . _dgf = _cedg . toTextMarks ( & _baddc ) ; } ; } ; return TextTable { W : _bbadf . _agac , H : _bbadf . _dcbdf , Cells : _ggbce } ;
} ; func ( _gcdg rulingList ) mergePrimary ( ) float64 { _cagcb := _gcdg [ 0 ] . _edcba ; for _ , _faefe := range _gcdg [ 1 : ] { _cagcb += _faefe . _edcba ; } ; return _cagcb / float64 ( len ( _gcdg ) ) ; } ;
2022-02-05 21:34:53 +00:00
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
2022-03-13 12:41:53 +00:00
func ( _bcbe * TextMarkArray ) RangeOffset ( start , end int ) ( * TextMarkArray , error ) { if _bcbe == nil { return nil , _d . New ( "\u006da\u003d\u003d\u006e\u0069\u006c" ) ; } ; if end < start { return nil , _cag . Errorf ( "\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020" , start , end ) ;
} ; _baad := len ( _bcbe . _dgf ) ; if _baad == 0 { return _bcbe , nil ; } ; if start < _bcbe . _dgf [ 0 ] . Offset { start = _bcbe . _dgf [ 0 ] . Offset ; } ; if end > _bcbe . _dgf [ _baad - 1 ] . Offset + 1 { end = _bcbe . _dgf [ _baad - 1 ] . Offset + 1 ; } ; _fecg := _af . Search ( _baad , func ( _dfcd int ) bool { return _bcbe . _dgf [ _dfcd ] . Offset + len ( _bcbe . _dgf [ _dfcd ] . Text ) - 1 >= start } ) ;
if ! ( 0 <= _fecg && _fecg < _baad ) { _fdf := _cag . Errorf ( "\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076" , start , _fecg , _baad , _bcbe . _dgf [ 0 ] , _bcbe . _dgf [ _baad - 1 ] ) ;
return nil , _fdf ; } ; _dgfe := _af . Search ( _baad , func ( _defe int ) bool { return _bcbe . _dgf [ _defe ] . Offset > end - 1 } ) ; if ! ( 0 <= _dgfe && _dgfe < _baad ) { _ecac := _cag . Errorf ( "\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076" , end , _dgfe , _baad , _bcbe . _dgf [ 0 ] , _bcbe . _dgf [ _baad - 1 ] ) ;
return nil , _ecac ; } ; if _dgfe <= _fecg { return nil , _cag . Errorf ( "\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064" , start , end , _fecg , _dgfe ) ;
} ; return & TextMarkArray { _dgf : _bcbe . _dgf [ _fecg : _dgfe ] } , nil ; } ; func ( _gbgae rulingList ) findPrimSec ( _degb , _adccd float64 ) * ruling { for _ , _befc := range _gbgae { if _gcceb ( _befc . _edcba - _degb ) && _befc . _bfeag - _bade <= _adccd && _adccd <= _befc . _cbba + _bade { return _befc ;
} ; } ; return nil ; } ; func ( _ebcf rulingList ) sortStrict ( ) { _af . Slice ( _ebcf , func ( _ebcfb , _aeged int ) bool { _abef , _dabcg := _ebcf [ _ebcfb ] , _ebcf [ _aeged ] ; _eeded , _cfbc := _abef . _agcgg , _dabcg . _agcgg ; if _eeded != _cfbc { return _eeded > _cfbc ; } ; _ggdc , _fcffe := _abef . _edcba , _dabcg . _edcba ;
if ! _gcceb ( _ggdc - _fcffe ) { return _ggdc < _fcffe ; } ; _ggdc , _fcffe = _abef . _bfeag , _dabcg . _bfeag ; if _ggdc != _fcffe { return _ggdc < _fcffe ; } ; return _abef . _cbba < _dabcg . _cbba ; } ) ; } ; func ( _dbbc * textObject ) moveText ( _fbbd , _cbbc float64 ) { _dbbc . moveLP ( _fbbd , _cbbc ) } ;
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// String returns a description of `t`.
func ( _bdcd * textTable ) String ( ) string { return _cag . Sprintf ( "\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074" , _bdcd . _agac , _bdcd . _dcbdf , _bdcd . _ebabc ) ; } ; type shapesState struct { _eacf _afd . Matrix ; _bag _afd . Matrix ; _fegc [ ] * subpath ;
_dcef bool ; _dffc _afd . Point ; _afbf * textObject ; } ; func ( _agae gridTile ) contains ( _gfed _ac . PdfRectangle ) bool { if _agae . numBorders ( ) < 3 { return false ; } ; if _agae . _fcgbfd && _gfed . Llx < _agae . Llx - _dcbg { return false ; } ; if _agae . _fgdd && _gfed . Urx > _agae . Urx + _dcbg { return false ;
} ; if _agae . _cccb && _gfed . Lly < _agae . Lly - _dcbg { return false ; } ; if _agae . _gbac && _gfed . Ury > _agae . Ury + _dcbg { return false ; } ; return true ; } ; func ( _de * imageExtractContext ) processOperand ( _fae * _ag . ContentStreamOperation , _ab _ag . GraphicsState , _fggg * _ac . PdfPageResources ) error { if _fae . Operand == "\u0042\u0049" && len ( _fae . Params ) == 1 { _caf , _gbg := _fae . Params [ 0 ] . ( * _ag . ContentStreamInlineImage ) ;
if ! _gbg { return nil ; } ; if _gac , _deg := _gdd . GetBoolVal ( _caf . ImageMask ) ; _deg { if _gac && ! _de . _ga . IncludeInlineStencilMasks { return nil ; } ; } ; return _de . extractInlineImage ( _caf , _ab , _fggg ) ; } else if _fae . Operand == "\u0044\u006f" && len ( _fae . Params ) == 1 { _dcg , _eg := _gdd . GetName ( _fae . Params [ 0 ] ) ;
if ! _eg { _fb . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065" ) ; return _agc ; } ; _ , _gbf := _fggg . GetXObjectByName ( * _dcg ) ; switch _gbf { case _ac . XObjectTypeImage : return _de . extractXObjectImage ( _dcg , _ab , _fggg ) ; case _ac . XObjectTypeForm : return _de . extractFormImages ( _dcg , _ab , _fggg ) ;
} ; } ; return nil ; } ; func ( _dbb * Extractor ) extractPageText ( _eda string , _fggcd * _ac . PdfPageResources , _afgf _afd . Matrix , _ebfd int ) ( * PageText , int , int , error ) { _fb . Log . Trace ( "\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d" , _ebfd ) ;
_dbf := & PageText { _abea : _dbb . _fe } ; _accb := _gfe ( _dbb . _fe ) ; var _dce stateStack ; _dafe := _agbg ( _dbb , _fggcd , _ag . GraphicsState { } , & _accb , & _dce ) ; _eae := shapesState { _bag : _afgf , _eacf : _afd . IdentityMatrix ( ) , _afbf : _dafe } ; var _gfd bool ; if _ebfd > _bf { _abd := _d . New ( "\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077" ) ;
_fb . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076" , _ebfd , _abd ) ;
return _dbf , _accb . _ededf , _accb . _eca , _abd ; } ; _efb := _ag . NewContentStreamParser ( _eda ) ; _fac , _ace := _efb . Parse ( ) ; if _ace != nil { _fb . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ace ) ;
return _dbf , _accb . _ededf , _accb . _eca , _ace ; } ; _afb := _ag . NewContentStreamProcessor ( * _fac ) ; _afb . AddHandler ( _ag . HandlerConditionEnumAllOperands , "" , func ( _baa * _ag . ContentStreamOperation , _ddcb _ag . GraphicsState , _cbc * _ac . PdfPageResources ) error { _eed := _baa . Operand ;
if _dbcdf { _fb . Log . Info ( "\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s" , _baa ) ; } ; switch _eed { case "\u0071" : if _efca { _fb . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _eae . _eacf ) ; } ; _dce . push ( & _accb ) ; case "\u0051" : if ! _dce . empty ( ) { _accb = * _dce . pop ( ) ;
} ; _eae . _eacf = _ddcb . CTM ; if _efca { _fb . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _eae . _eacf ) ; } ; case "\u0042\u0054" : if _gfd { _fb . Log . Debug ( "\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074" ) ;
_dbf . _aged = append ( _dbf . _aged , _dafe . _bece ... ) ; } ; _gfd = true ; _eec := _ddcb ; _eec . CTM = _afgf . Mult ( _eec . CTM ) ; _dafe = _agbg ( _dbb , _cbc , _eec , & _accb , & _dce ) ; _eae . _afbf = _dafe ; case "\u0045\u0054" : if ! _gfd { _fb . Log . Debug ( "\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074" ) ;
} ; _gfd = false ; _dbf . _aged = append ( _dbf . _aged , _dafe . _bece ... ) ; _dafe . reset ( ) ; case "\u0054\u002a" : _dafe . nextLine ( ) ; case "\u0054\u0064" : if _egcb , _cbb := _dafe . checkOp ( _baa , 2 , true ) ; ! _egcb { _fb . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _cbb ) ;
return _cbb ; } ; _gbd , _fegd , _gaf := _acfgd ( _baa . Params ) ; if _gaf != nil { return _gaf ; } ; _dafe . moveText ( _gbd , _fegd ) ; case "\u0054\u0044" : if _abe , _dcga := _dafe . checkOp ( _baa , 2 , true ) ; ! _abe { _fb . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _dcga ) ;
return _dcga ; } ; _dfb , _bbb , _eac := _acfgd ( _baa . Params ) ; if _eac != nil { _fb . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _eac ) ; return _eac ; } ; _dafe . moveTextSetLeading ( _dfb , _bbb ) ; case "\u0054\u006a" : if _cge , _ada := _dafe . checkOp ( _baa , 1 , true ) ;
! _cge { _fb . Log . Debug ( "\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076" , _baa , _ada ) ; return _ada ; } ; _bde , _dbfd := _gdd . GetStringBytes ( _baa . Params [ 0 ] ) ; if ! _dbfd { _fb . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064" , _baa ) ;
return _gdd . ErrTypeError ; } ; return _dafe . showText ( _bde ) ; case "\u0054\u004a" : if _def , _fdgf := _dafe . checkOp ( _baa , 1 , true ) ; ! _def { _fb . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fdgf ) ; return _fdgf ;
} ; _bda , _bad := _gdd . GetArray ( _baa . Params [ 0 ] ) ; if ! _bad { _fb . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _baa ) ;
return _ace ; } ; return _dafe . showTextAdjusted ( _bda ) ; case "\u0027" : if _efe , _egf := _dafe . checkOp ( _baa , 1 , true ) ; ! _efe { _fb . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _egf ) ; return _egf ; } ; _edb , _eea := _gdd . GetStringBytes ( _baa . Params [ 0 ] ) ;
if ! _eea { _fb . Log . Debug ( "\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064" , _baa ) ; return _gdd . ErrTypeError ; } ; _dafe . nextLine ( ) ; return _dafe . showText ( _edb ) ;
case "\u0022" : if _gdb , _faef := _dafe . checkOp ( _baa , 3 , true ) ; ! _gdb { _fb . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _faef ) ; return _faef ; } ; _gacab , _bfg , _ddg := _acfgd ( _baa . Params [ : 2 ] ) ; if _ddg != nil { return _ddg ;
} ; _bec , _fef := _gdd . GetStringBytes ( _baa . Params [ 2 ] ) ; if ! _fef { _fb . Log . Debug ( "\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064" , _baa ) ;
return _gdd . ErrTypeError ; } ; _dafe . setCharSpacing ( _gacab ) ; _dafe . setWordSpacing ( _bfg ) ; _dafe . nextLine ( ) ; return _dafe . showText ( _bec ) ; case "\u0054\u004c" : _efa , _dfe := _efag ( _baa ) ; if _dfe != nil { _fb . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _dfe ) ;
return _dfe ; } ; _dafe . setTextLeading ( _efa ) ; case "\u0054\u0063" : _dff , _acb := _efag ( _baa ) ; if _acb != nil { _fb . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _acb ) ; return _acb ; } ; _dafe . setCharSpacing ( _dff ) ;
case "\u0054\u0066" : if _agcg , _abg := _dafe . checkOp ( _baa , 2 , true ) ; ! _agcg { _fb . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _abg ) ; return _abg ; } ; _eeg , _fed := _gdd . GetNameVal ( _baa . Params [ 0 ] ) ;
if ! _fed { _fb . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064" , _baa ) ; return _gdd . ErrTypeError ; } ; _cea , _dea := _gdd . GetNumberAsFloat ( _baa . Params [ 1 ] ) ;
if ! _fed { _fb . Log . Debug ( "\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _baa , _dea ) ;
return _dea ; } ; _dea = _dafe . setFont ( _eeg , _cea ) ; _dafe . _dcb = _cg . Is ( _dea , _gdd . ErrNotSupported ) ; if _dea != nil && ! _dafe . _dcb { return _dea ; } ; case "\u0054\u006d" : if _ggce , _cabb := _dafe . checkOp ( _baa , 6 , true ) ; ! _ggce { _fb . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _cabb ) ;
return _cabb ; } ; _eacb , _dac := _gdd . GetNumbersAsFloat ( _baa . Params ) ; if _dac != nil { _fb . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _dac ) ; return _dac ; } ; _dafe . setTextMatrix ( _eacb ) ; case "\u0054\u0072" : if _bdad , _ccf := _dafe . checkOp ( _baa , 1 , true ) ;
! _bdad { _fb . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ccf ) ; return _ccf ; } ; _ecb , _cdf := _gdd . GetIntVal ( _baa . Params [ 0 ] ) ; if ! _cdf { _fb . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _baa ) ;
return _gdd . ErrTypeError ; } ; _dafe . setTextRenderMode ( _ecb ) ; case "\u0054\u0073" : if _gdba , _dfee := _dafe . checkOp ( _baa , 1 , true ) ; ! _gdba { _fb . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _dfee ) ;
return _dfee ; } ; _dcd , _bca := _gdd . GetNumberAsFloat ( _baa . Params [ 0 ] ) ; if _bca != nil { _fb . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bca ) ; return _bca ; } ; _dafe . setTextRise ( _dcd ) ; case "\u0054\u0077" : if _abdc , _ced := _dafe . checkOp ( _baa , 1 , true ) ;
! _abdc { _fb . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ced ) ; return _ced ; } ; _fba , _fedc := _gdd . GetNumberAsFloat ( _baa . Params [ 0 ] ) ; if _fedc != nil { _fb . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fedc ) ;
return _fedc ; } ; _dafe . setWordSpacing ( _fba ) ; case "\u0054\u007a" : if _efaa , _bcb := _dafe . checkOp ( _baa , 1 , true ) ; ! _efaa { _fb . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bcb ) ; return _bcb ; } ; _efd , _aba := _gdd . GetNumberAsFloat ( _baa . Params [ 0 ] ) ;
if _aba != nil { _fb . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _aba ) ; return _aba ; } ; _dafe . setHorizScaling ( _efd ) ; case "\u0063\u006d" : _eae . _eacf = _ddcb . CTM ; if _eae . _eacf . Singular ( ) { _cgcb := _afd . IdentityMatrix ( ) . Translate ( _eae . _eacf . Translation ( ) ) ;
_fb . Log . Debug ( "S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s" , _eae . _eacf , _cgcb ) ; _eae . _eacf = _cgcb ; } ; if _efca { _fb . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _eae . _eacf ) ; } ; case "\u006d" : if len ( _baa . Params ) != 2 { _fb . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" , _ge ) ;
return nil ; } ; _dfc , _aca := _gdd . GetNumbersAsFloat ( _baa . Params ) ; if _aca != nil { return _aca ; } ; _eae . moveTo ( _dfc [ 0 ] , _dfc [ 1 ] ) ; case "\u006c" : if len ( _baa . Params ) != 2 { _fb . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" , _ge ) ;
return nil ; } ; _ceg , _cec := _gdd . GetNumbersAsFloat ( _baa . Params ) ; if _cec != nil { return _cec ; } ; _eae . lineTo ( _ceg [ 0 ] , _ceg [ 1 ] ) ; case "\u0063" : if len ( _baa . Params ) != 6 { return _ge ; } ; _acab , _dgd := _gdd . GetNumbersAsFloat ( _baa . Params ) ; if _dgd != nil { return _dgd ;
} ; _fb . Log . Debug ( "\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f" , _acab ) ; _eae . cubicTo ( _acab [ 0 ] , _acab [ 1 ] , _acab [ 2 ] , _acab [ 3 ] , _acab [ 4 ] , _acab [ 5 ] ) ; case "\u0076" , "\u0079" : if len ( _baa . Params ) != 4 { return _ge ;
} ; _gcb , _bgf := _gdd . GetNumbersAsFloat ( _baa . Params ) ; if _bgf != nil { return _bgf ; } ; _fb . Log . Debug ( "\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f" , _gcb ) ; _eae . quadraticTo ( _gcb [ 0 ] , _gcb [ 1 ] , _gcb [ 2 ] , _gcb [ 3 ] ) ;
case "\u0068" : _eae . closePath ( ) ; case "\u0072\u0065" : if len ( _baa . Params ) != 4 { return _ge ; } ; _gfa , _aea := _gdd . GetNumbersAsFloat ( _baa . Params ) ; if _aea != nil { return _aea ; } ; _eae . drawRectangle ( _gfa [ 0 ] , _gfa [ 1 ] , _gfa [ 2 ] , _gfa [ 3 ] ) ; _eae . closePath ( ) ;
case "\u0053" : _eae . stroke ( & _dbf . _cagb ) ; _eae . clearPath ( ) ; case "\u0073" : _eae . closePath ( ) ; _eae . stroke ( & _dbf . _cagb ) ; _eae . clearPath ( ) ; case "\u0046" : _eae . fill ( & _dbf . _bfa ) ; _eae . clearPath ( ) ; case "\u0066" , "\u0066\u002a" : _eae . closePath ( ) ; _eae . fill ( & _dbf . _bfa ) ;
_eae . clearPath ( ) ; case "\u0042" , "\u0042\u002a" : _eae . fill ( & _dbf . _bfa ) ; _eae . stroke ( & _dbf . _cagb ) ; _eae . clearPath ( ) ; case "\u0062" , "\u0062\u002a" : _eae . closePath ( ) ; _eae . fill ( & _dbf . _bfa ) ; _eae . stroke ( & _dbf . _cagb ) ; _eae . clearPath ( ) ; case "\u006e" : _eae . clearPath ( ) ;
case "\u0044\u006f" : if len ( _baa . Params ) == 0 { _fb . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e" , _baa . Params ) ;
return _gdd . ErrRangeError ; } ; _dceg , _add := _gdd . GetName ( _baa . Params [ 0 ] ) ; if ! _add { _fb . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e" , _baa . Params [ 0 ] ) ;
return _gdd . ErrTypeError ; } ; _ , _gegb := _cbc . GetXObjectByName ( * _dceg ) ; if _gegb != _ac . XObjectTypeForm { break ; } ; _abc , _add := _dbb . _agb [ _dceg . String ( ) ] ; if ! _add { _bgeb , _caaa := _cbc . GetXObjectFormByName ( * _dceg ) ; if _caaa != nil { _fb . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _caaa ) ;
return _caaa ; } ; _cad , _caaa := _bgeb . GetContentStream ( ) ; if _caaa != nil { _fb . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _caaa ) ; return _caaa ; } ; _aa := _bgeb . Resources ; if _aa == nil { _aa = _cbc ; } ; _cee , _fgggb , _cdga , _caaa := _dbb . extractPageText ( string ( _cad ) , _aa , _afgf . Mult ( _ddcb . CTM ) , _ebfd + 1 ) ;
if _caaa != nil { _fb . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _caaa ) ; return _caaa ; } ; _abc = textResult { * _cee , _fgggb , _cdga } ; _dbb . _agb [ _dceg . String ( ) ] = _abc ; } ; _eae . _eacf = _ddcb . CTM ; if _efca { _fb . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _eae . _eacf ) ;
} ; _dbf . _aged = append ( _dbf . _aged , _abc . _gga . _aged ... ) ; _dbf . _cagb = append ( _dbf . _cagb , _abc . _gga . _cagb ... ) ; _dbf . _bfa = append ( _dbf . _bfa , _abc . _gga . _bfa ... ) ; _accb . _ededf += _abc . _febb ; _accb . _eca += _abc . _gcf ; case "\u0072\u0067" , "\u0067" , "\u006b" , "\u0063\u0073" , "\u0073\u0063" , "\u0073\u0063\u006e" : _dafe . _gbc . ColorspaceNonStroking = _ddcb . ColorspaceNonStroking ;
_dafe . _gbc . ColorNonStroking = _ddcb . ColorNonStroking ; case "\u0052\u0047" , "\u0047" , "\u004b" , "\u0043\u0053" , "\u0053\u0043" , "\u0053\u0043\u004e" : _dafe . _gbc . ColorspaceStroking = _ddcb . ColorspaceStroking ; _dafe . _gbc . ColorStroking = _ddcb . ColorStroking ;
} ; return nil ; } ) ; _ace = _afb . Process ( _fggcd ) ; return _dbf , _accb . _ededf , _accb . _eca , _ace ; } ; func ( _ccfg * textObject ) setHorizScaling ( _affb float64 ) { if _ccfg == nil { return ; } ; _ccfg . _fga . _bgg = _affb ; } ; func _eefaa ( _bgfdc [ ] pathSection ) { if _eada < 0.0 { return ;
} ; if _adce { _fb . Log . Info ( "\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073" , len ( _bgfdc ) ) ; } ; for _cdgc , _dacb := range _bgfdc { for _abcg , _dgaf := range _dacb . _bbdc { for _ggfdg , _efaef := range _dgaf . _egd { _dgaf . _egd [ _ggfdg ] = _afd . Point { X : _geabe ( _efaef . X ) , Y : _geabe ( _efaef . Y ) } ;
if _adce { _cfde := _dgaf . _egd [ _ggfdg ] ; if ! _cdbg ( _efaef , _cfde ) { _dbcf := _afd . Point { X : _cfde . X - _efaef . X , Y : _cfde . Y - _efaef . Y } ; _cag . Printf ( "\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a" , _cdgc , _abcg , _ggfdg , _efaef , _cfde , _dbcf ) ;
} ; } ; } ; } ; } ; } ; func ( _cacgb * textTable ) log ( _bggf string ) { if ! _agede { return ; } ; _fb . Log . Info ( "~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066" , _bggf , _cacgb . _agac , _cacgb . _dcbdf , _cacgb . _ebabc , _cacgb . PdfRectangle ) ;
for _aefd := 0 ; _aefd < _cacgb . _dcbdf ; _aefd ++ { for _ddccb := 0 ; _ddccb < _cacgb . _agac ; _ddccb ++ { _gebe := _cacgb . get ( _ddccb , _aefd ) ; if _gebe == nil { continue ; } ; _cag . Printf ( "%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a" , _ddccb , _aefd , _gebe . PdfRectangle , _bgdca ( _gebe . text ( ) , 50 ) , _g . RuneCountInString ( _gebe . text ( ) ) ) ;
} ; } ; } ; func ( _dgcf rulingList ) sort ( ) { _af . Slice ( _dgcf , _dgcf . comp ) } ; func _bdga ( _gedbb , _gfbae bounded ) float64 { return _efae ( _gedbb ) - _efae ( _gfbae ) } ; func ( _cbfg * shapesState ) moveTo ( _cage , _cddb float64 ) { _cbfg . _dcef = true ; _cbfg . _dffc = _cbfg . devicePoint ( _cage , _cddb ) ;
if _efca { _fb . Log . Info ( "\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066" , _cage , _cddb , _cbfg . _dffc ) ; } ; } ; func ( _febf * wordBag ) firstWord ( _efbg int ) * textWord { return _febf . _cfeeb [ _efbg ] [ 0 ] } ;
type textObject struct { _defa * Extractor ; _fgf * _ac . PdfPageResources ; _gbc _ag . GraphicsState ; _fga * textState ; _aece * stateStack ; _bgc _afd . Matrix ; _bab _afd . Matrix ; _bece [ ] * textMark ; _dcb bool ; } ; func _fgfb ( _fbd _afd . Matrix ) _afd . Point { _bbbe , _dacd := _fbd . Translation ( ) ;
return _afd . Point { X : _bbbe , Y : _dacd } ; } ; func _cafa ( _gfcf , _ebbc bounded ) float64 { return _gfcf . bbox ( ) . Llx - _ebbc . bbox ( ) . Llx } ; func _ccbf ( _gbcb [ ] * textMark , _eaagc _ac . PdfRectangle ) * textWord { _cecgg := _gbcb [ 0 ] . PdfRectangle ; _egef := _gbcb [ 0 ] . _gba ;
for _ , _eebgd := range _gbcb [ 1 : ] { _cecgg = _gcae ( _cecgg , _eebgd . PdfRectangle ) ; if _eebgd . _gba > _egef { _egef = _eebgd . _gba ; } ; } ; return & textWord { PdfRectangle : _cecgg , _ggabc : _gbcb , _fgbda : _eaagc . Ury - _cecgg . Lly , _abeg : _egef } ; } ; func ( _fcc * textObject ) getFont ( _gegg string ) ( * _ac . PdfFont , error ) { if _fcc . _defa . _gde != nil { _cgce , _acffg := _fcc . getFontDict ( _gegg ) ;
if _acffg != nil { _fb . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073" , _gegg , _acffg . Error ( ) ) ; return nil , _acffg ;
} ; _fcc . _defa . _df ++ ; _gbfaa , _cbbd := _fcc . _defa . _gde [ _cgce . String ( ) ] ; if _cbbd { _gbfaa . _fabc = _fcc . _defa . _df ; return _gbfaa . _fab , nil ; } ; } ; _fcbg , _cddf := _fcc . getFontDict ( _gegg ) ; if _cddf != nil { return nil , _cddf ; } ; _cda , _cddf := _fcc . getFontDirect ( _gegg ) ;
if _cddf != nil { return nil , _cddf ; } ; if _fcc . _defa . _gde != nil { _ffea := fontEntry { _cda , _fcc . _defa . _df } ; if len ( _fcc . _defa . _gde ) >= _daca { var _dbg [ ] string ; for _dbcc := range _fcc . _defa . _gde { _dbg = append ( _dbg , _dbcc ) ; } ; _af . Slice ( _dbg , func ( _fcbd , _dade int ) bool { return _fcc . _defa . _gde [ _dbg [ _fcbd ] ] . _fabc < _fcc . _defa . _gde [ _dbg [ _dade ] ] . _fabc ;
} ) ; delete ( _fcc . _defa . _gde , _dbg [ 0 ] ) ; } ; _fcc . _defa . _gde [ _fcbg . String ( ) ] = _ffea ; } ; return _cda , nil ; } ; func _degfb ( _cddba * PageText ) error { _adgb := _fg . GetLicenseKey ( ) ; if _adgb != nil && _adgb . IsLicensed ( ) || _ad { return nil ; } ; _cag . Printf ( "\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a" ) ;
_cag . Println ( "-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f" ) ;
return _d . New ( "\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064" ) ; } ; func ( _ageb * textObject ) showTextAdjusted ( _fcf * _gdd . PdfObjectArray ) error { _gfc := false ;
for _ , _cdd := range _fcf . Elements ( ) { switch _cdd . ( type ) { case * _gdd . PdfObjectFloat , * _gdd . PdfObjectInteger : _dfcf , _abdb := _gdd . GetNumberAsFloat ( _cdd ) ; if _abdb != nil { _fb . Log . Debug ( "\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _cdd , _fcf ) ;
return _abdb ; } ; _fefc , _bcc := - _dfcf * 0.001 * _ageb . _fga . _cef , 0.0 ; if _gfc { _bcc , _fefc = _fefc , _bcc ; } ; _aggg := _babf ( _afd . Point { X : _fefc , Y : _bcc } ) ; _ageb . _bgc . Concat ( _aggg ) ; case * _gdd . PdfObjectString : _cde , _dafd := _gdd . GetStringBytes ( _cdd ) ;
if ! _dafd { _fb . Log . Trace ( "s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _cdd , _fcf ) ;
return _gdd . ErrTypeError ; } ; _ageb . renderText ( _cde ) ; default : _fb . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _cdd , _fcf ) ;
return _gdd . ErrTypeError ; } ; } ; return nil ; } ; func ( _cfbb * shapesState ) devicePoint ( _ccg , _fcbdf float64 ) _afd . Point { _dgdc := _cfbb . _bag . Mult ( _cfbb . _eacf ) ; _ccg , _fcbdf = _dgdc . Transform ( _ccg , _fcbdf ) ; return _afd . NewPoint ( _ccg , _fcbdf ) ; } ; func _adbc ( _gddb * wordBag , _eddaa float64 , _dfbd , _cbcd rulingList ) [ ] * wordBag { var _feaa [ ] * wordBag ;
for _ , _beaf := range _gddb . depthIndexes ( ) { _bbfe := false ; for ! _gddb . empty ( _beaf ) { _fce := _gddb . firstReadingIndex ( _beaf ) ; _ddaa := _gddb . firstWord ( _fce ) ; _dfceg := _acca ( _ddaa , _eddaa , _dfbd , _cbcd ) ; _gddb . removeWord ( _ddaa , _fce ) ; if _gbfc { _fb . Log . Info ( "\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073" , _ddaa . String ( ) ) ;
} ; for _cebd := true ; _cebd ; _cebd = _bbfe { _bbfe = false ; _cbcf := _ebba * _dfceg . _aege ; _gabcd := _effe * _dfceg . _aege ; _dfedc := _cafad * _dfceg . _aege ; if _gbfc { _fb . Log . Info ( "\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066" , _dfceg . minDepth ( ) , _dfceg . maxDepth ( ) , _dfedc , _gabcd ) ;
} ; if _gddb . scanBand ( "\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c" , _dfceg , _gff ( _aded , 0 ) , _dfceg . minDepth ( ) - _dfedc , _dfceg . maxDepth ( ) + _dfedc , _cegb , false , false ) > 0 { _bbfe = true ; } ; if _gddb . scanBand ( "\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c" , _dfceg , _gff ( _aded , _gabcd ) , _dfceg . minDepth ( ) , _dfceg . maxDepth ( ) , _fgeb , false , false ) > 0 { _bbfe = true ;
} ; if _bbfe { continue ; } ; _dbae := _gddb . scanBand ( "" , _dfceg , _gff ( _eeb , _cbcf ) , _dfceg . minDepth ( ) , _dfceg . maxDepth ( ) , _faae , true , false ) ; if _dbae > 0 { _cca := ( _dfceg . maxDepth ( ) - _dfceg . minDepth ( ) ) / _dfceg . _aege ; if ( _dbae > 1 && float64 ( _dbae ) > 0.3 * _cca ) || _dbae <= 10 { if _gddb . scanBand ( "\u006f\u0074\u0068e\u0072" , _dfceg , _gff ( _eeb , _cbcf ) , _dfceg . minDepth ( ) , _dfceg . maxDepth ( ) , _faae , false , true ) > 0 { _bbfe = true ;
} ; } ; } ; } ; _feaa = append ( _feaa , _dfceg ) ; } ; } ; return _feaa ; } ; func ( _fbff * wordBag ) depthBand ( _deec , _bfdf float64 ) [ ] int { if len ( _fbff . _cfeeb ) == 0 { return nil ; } ; return _fbff . depthRange ( _fbff . getDepthIdx ( _deec ) , _fbff . getDepthIdx ( _bfdf ) ) ; } ; func ( _ggf * shapesState ) cubicTo ( _fgbe , _eeged , _bdd , _gbdd , _geff , _bbde float64 ) { if _efca { _fb . Log . Info ( "\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a" ) ;
} ; _ggf . addPoint ( _geff , _bbde ) ; } ; func ( _aag * textObject ) setTextMatrix ( _age [ ] float64 ) { if len ( _age ) != 6 { _fb . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029" , len ( _age ) ) ;
return ; } ; _dbcd , _ddgd , _fbgf , _ggca , _bgfd , _ddgda := _age [ 0 ] , _age [ 1 ] , _age [ 2 ] , _age [ 3 ] , _age [ 4 ] , _age [ 5 ] ; _aag . _bgc = _afd . NewMatrix ( _dbcd , _ddgd , _fbgf , _ggca , _bgfd , _ddgda ) ; _aag . _bab = _aag . _bgc ; } ; func _cdgaf ( _fcgg map [ int ] intSet ) [ ] int { _adfcg := make ( [ ] int , 0 , len ( _fcgg ) ) ;
for _gcbc := range _fcgg { _adfcg = append ( _adfcg , _gcbc ) ; } ; _af . Ints ( _adfcg ) ; return _adfcg ; } ; type ruling struct { _agcgg rulingKind ; _aaec markKind ; _ca . Color ; _edcba float64 ; _bfeag float64 ; _cbba float64 ; _geba float64 ; } ; func ( _fgbgb paraList ) lines ( ) [ ] * textLine { var _fcfac [ ] * textLine ;
for _ , _gggg := range _fgbgb { _fcfac = append ( _fcfac , _gggg . _ddeb ... ) ; } ; return _fcfac ; } ; func ( _fecbg * textTable ) subdivide ( ) * textTable { _fecbg . logComposite ( "\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e" ) ; _cgged := _fecbg . compositeRowCorridors ( ) ; _gadg := _fecbg . compositeColCorridors ( ) ;
if _agede { _fb . Log . Info ( "\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073" , _beba ( _cgged ) , _beba ( _gadg ) ) ;
} ; if len ( _cgged ) == 0 || len ( _gadg ) == 0 { return _fecbg ; } ; _eggb ( _cgged ) ; _eggb ( _gadg ) ; if _agede { _fb . Log . Info ( "\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073" , _beba ( _cgged ) , _beba ( _gadg ) ) ;
} ; _efaea , _gcefd := _fbaf ( _fecbg . _dcbdf , _cgged ) ; _cccga , _fbcdf := _fbaf ( _fecbg . _agac , _gadg ) ; _agfbb := make ( map [ uint64 ] * textPara , _fbcdf * _gcefd ) ; _ebde := & textTable { PdfRectangle : _fecbg . PdfRectangle , _ebabc : _fecbg . _ebabc , _dcbdf : _gcefd , _agac : _fbcdf , _abccf : _agfbb } ;
if _agede { _fb . Log . Info ( "\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a" + "\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a" + "\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a" + "\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a" + "\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076" , _fecbg . _agac , _fecbg . _dcbdf , _fbcdf , _gcefd , _beba ( _cgged ) , _beba ( _gadg ) , _efaea , _cccga ) ;
} ; for _abeac := 0 ; _abeac < _fecbg . _dcbdf ; _abeac ++ { _ffdb := _efaea [ _abeac ] ; for _egcbg := 0 ; _egcbg < _fecbg . _agac ; _egcbg ++ { _deaff := _cccga [ _egcbg ] ; if _agede { _cag . Printf ( "\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a" , _egcbg , _abeac , _deaff , _ffdb ) ;
} ; _ecbe , _eafe := _fecbg . _cead [ _addg ( _egcbg , _abeac ) ] ; if ! _eafe { continue ; } ; _fgfac := _ecbe . split ( _cgged [ _abeac ] , _gadg [ _egcbg ] ) ; for _egbgd := 0 ; _egbgd < _fgfac . _dcbdf ; _egbgd ++ { for _dbebg := 0 ; _dbebg < _fgfac . _agac ; _dbebg ++ { _gdbae := _fgfac . get ( _dbebg , _egbgd ) ;
_ebde . put ( _deaff + _dbebg , _ffdb + _egbgd , _gdbae ) ; if _agede { _cag . Printf ( "\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a" , _deaff + _dbebg , _ffdb + _egbgd , _gdbae ) ; } ; } ; } ; } ; } ; return _ebde ; } ; func _bfeb ( _feca [ ] * textMark , _cgeac _ac . PdfRectangle ) [ ] * textWord { var _egacd [ ] * textWord ;
var _aeef * textWord ; if _abb { _fb . Log . Info ( "\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073" , len ( _feca ) ) ; } ; _gdfc := func ( ) { if _aeef != nil { _cgac := _aeef . computeText ( ) ;
if ! _feaba ( _cgac ) { _aeef . _gebf = _cgac ; _egacd = append ( _egacd , _aeef ) ; if _abb { _fb . Log . Info ( "\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073" , len ( _egacd ) - 1 , _aeef . String ( ) ) ;
for _cece , _dagb := range _aeef . _ggabc { _cag . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _cece , _dagb . String ( ) ) ; } ; } ; } ; _aeef = nil ; } ; } ; for _ , _gacaf := range _feca { if _bfdb && _aeef != nil && len ( _aeef . _ggabc ) > 0 { _daeg := _aeef . _ggabc [ len ( _aeef . _ggabc ) - 1 ] ;
_cabgb , _fbdc := _ffdcd ( _gacaf . _cadaf ) ; _dcccbe , _efdg := _ffdcd ( _daeg . _cadaf ) ; if _fbdc && ! _efdg && _daeg . inDiacriticArea ( _gacaf ) { _aeef . addDiacritic ( _cabgb ) ; continue ; } ; if _efdg && ! _fbdc && _gacaf . inDiacriticArea ( _daeg ) { _aeef . _ggabc = _aeef . _ggabc [ : len ( _aeef . _ggabc ) - 1 ] ;
_aeef . appendMark ( _gacaf , _cgeac ) ; _aeef . addDiacritic ( _dcccbe ) ; continue ; } ; } ; _dagd := _feaba ( _gacaf . _cadaf ) ; if _dagd { _gdfc ( ) ; continue ; } ; if _aeef == nil && ! _dagd { _aeef = _ccbf ( [ ] * textMark { _gacaf } , _cgeac ) ; continue ; } ; _bgdb := _aeef . _abeg ; _cfefa := _gc . Abs ( _agad ( _cgeac , _gacaf ) - _aeef . _fgbda ) / _bgdb ;
_fggce := _dgfaa ( _gacaf , _aeef ) / _bgdb ; if _fggce >= _aafe || ! ( - _bafd <= _fggce && _cfefa <= _gdcf ) { _gdfc ( ) ; _aeef = _ccbf ( [ ] * textMark { _gacaf } , _cgeac ) ; continue ; } ; _aeef . appendMark ( _gacaf , _cgeac ) ; } ; _gdfc ( ) ; return _egacd ; } ;
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct { _dgf [ ] TextMark } ; func ( _becd paraList ) inTile ( _gded gridTile ) paraList { var _agbcab paraList ; for _ , _fcbgc := range _becd { if _gded . contains ( _fcbgc . PdfRectangle ) { _agbcab = append ( _agbcab , _fcbgc ) ; } ; } ; if _agede { _cag . Printf ( "\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n" , _gded , len ( _agbcab ) ) ;
for _aecff , _beab := range _agbcab { _cag . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _aecff , _beab ) ; } ; _cag . Println ( "" ) ; } ; return _agbcab ; } ; func ( _dadbf paraList ) xNeighbours ( _ecdde float64 ) map [ * textPara ] [ ] int { _gaeb := make ( [ ] event , 2 * len ( _dadbf ) ) ;
if _ecdde == 0 { for _cfdb , _ccbg := range _dadbf { _gaeb [ 2 * _cfdb ] = event { _ccbg . Llx , true , _cfdb } ; _gaeb [ 2 * _cfdb + 1 ] = event { _ccbg . Urx , false , _cfdb } ; } ; } else { for _ebcc , _ecfbb := range _dadbf { _gaeb [ 2 * _ebcc ] = event { _ecfbb . Llx - _ecdde * _ecfbb . fontsize ( ) , true , _ebcc } ;
_gaeb [ 2 * _ebcc + 1 ] = event { _ecfbb . Urx + _ecdde * _ecfbb . fontsize ( ) , false , _ebcc } ; } ; } ; return _dadbf . eventNeighbours ( _gaeb ) ; } ; func ( _aagd rulingList ) isActualGrid ( ) ( rulingList , bool ) { _edfg , _gfgd := _aagd . augmentGrid ( ) ; if ! ( len ( _edfg ) >= _fee + 1 && len ( _gfgd ) >= _edcf + 1 ) { if _adce { _fb . Log . Info ( "\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064" , len ( _edfg ) , len ( _gfgd ) , _fee + 1 , _edcf + 1 ) ;
} ; return nil , false ; } ; if _adce { _fb . Log . Info ( "\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074" , _aagd , len ( _edfg ) >= 2 , len ( _gfgd ) >= 2 , len ( _edfg ) >= 2 && len ( _gfgd ) >= 2 ) ;
for _dgcc , _aedfg := range _aagd { _cag . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a" , _dgcc , _aedfg ) ; } ; } ; if _efdb { _aeaa , _egfd := _edfg [ 0 ] , _edfg [ len ( _edfg ) - 1 ] ; _feec , _aaea := _gfgd [ 0 ] , _gfgd [ len ( _gfgd ) - 1 ] ; if ! ( _gfdcf ( _aeaa . _edcba - _feec . _bfeag ) && _gfdcf ( _egfd . _edcba - _feec . _cbba ) && _gfdcf ( _feec . _edcba - _aeaa . _cbba ) && _gfdcf ( _aaea . _edcba - _aeaa . _bfeag ) ) { if _adce { _fb . Log . Info ( "\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073" , _aeaa , _egfd , _feec , _aaea ) ;
} ; return nil , false ; } ; } else { if ! _edfg . aligned ( ) { if _cecd { _fb . Log . Info ( "i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064" , len ( _edfg ) ) ;
} ; return nil , false ; } ; if ! _gfgd . aligned ( ) { if _adce { _fb . Log . Info ( "i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064" , len ( _gfgd ) ) ;
} ; return nil , false ; } ; } ; _cbdgf := append ( _edfg , _gfgd ... ) ; return _cbdgf , true ; } ; func ( _gecd paraList ) tables ( ) [ ] TextTable { var _baddg [ ] TextTable ; if _agede { _fb . Log . Info ( "\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a" ) ;
} ; for _ , _afc := range _gecd { _ggdd := _afc . _ccec ; if _ggdd != nil && _ggdd . isExportable ( ) { _baddg = append ( _baddg , _ggdd . toTextTable ( ) ) ; } ; } ; return _baddg ; } ; func ( _bbag * wordBag ) empty ( _cgbe int ) bool { _ , _deac := _bbag . _cfeeb [ _cgbe ] ; return ! _deac } ;
func _fafc ( _abgc [ ] TextMark , _eccg * int ) [ ] TextMark { _adff := _abgc [ len ( _abgc ) - 1 ] ; _fcde := [ ] rune ( _adff . Text ) ; if len ( _fcde ) == 1 { _abgc = _abgc [ : len ( _abgc ) - 1 ] ; _ccfbb := _abgc [ len ( _abgc ) - 1 ] ; * _eccg = _ccfbb . Offset + len ( _ccfbb . Text ) ; } else { _bdee := _gaaf ( _adff . Text ) ;
* _eccg += len ( _bdee ) - len ( _adff . Text ) ; _adff . Text = _bdee ; } ; return _abgc ; } ; func _fbaf ( _aacf int , _adeg map [ int ] [ ] float64 ) ( [ ] int , int ) { _fgfc := make ( [ ] int , _aacf ) ; _bcbeef := 0 ; for _ecdee := 0 ; _ecdee < _aacf ; _ecdee ++ { _fgfc [ _ecdee ] = _bcbeef ; _bcbeef += len ( _adeg [ _ecdee ] ) + 1 ;
} ; return _fgfc , _bcbeef ; } ; func _gcae ( _egafg , _bbef _ac . PdfRectangle ) _ac . PdfRectangle { return _ac . PdfRectangle { Llx : _gc . Min ( _egafg . Llx , _bbef . Llx ) , Lly : _gc . Min ( _egafg . Lly , _bbef . Lly ) , Urx : _gc . Max ( _egafg . Urx , _bbef . Urx ) , Ury : _gc . Max ( _egafg . Ury , _bbef . Ury ) } ;
} ; func ( _feeg * textPara ) isAtom ( ) * textTable { _fbcc := _feeg ; _deaf := _feeg . _cegf ; _fbaba := _feeg . _aegf ; if _deaf . taken ( ) || _fbaba . taken ( ) { return nil ; } ; _bfad := _deaf . _aegf ; if _bfad . taken ( ) || _bfad != _fbaba . _cegf { return nil ; } ; return _ecdfc ( _fbcc , _deaf , _fbaba , _bfad ) ;
} ;
2021-09-23 22:37:42 +00:00
2022-03-13 12:41:53 +00:00
// Marks returns the TextMark collection for a page. It represents all the text on the page.
func ( _ged PageText ) Marks ( ) * TextMarkArray { return & TextMarkArray { _dgf : _ged . _gccf } } ; func ( _dffg * wordBag ) depthIndexes ( ) [ ] int { if len ( _dffg . _cfeeb ) == 0 { return nil ; } ; _cgbc := make ( [ ] int , len ( _dffg . _cfeeb ) ) ; _cebf := 0 ; for _cafg := range _dffg . _cfeeb { _cgbc [ _cebf ] = _cafg ;
_cebf ++ ; } ; _af . Ints ( _cgbc ) ; return _cgbc ; } ;
// PageFonts represents extracted fonts on a PDF page.
type PageFonts struct { Fonts [ ] Font ; } ; func ( _aafa paraList ) sortReadingOrder ( ) { _fb . Log . Trace ( "\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _aafa ) ) ;
if len ( _aafa ) <= 1 { return ; } ; _aafa . computeEBBoxes ( ) ; _af . Slice ( _aafa , func ( _eega , _bcad int ) bool { return _edde ( _aafa [ _eega ] , _aafa [ _bcad ] ) <= 0 } ) ; _cce := _aafa . topoOrder ( ) ; _aafa . reorder ( _cce ) ; } ; func ( _babfg * subpath ) last ( ) _afd . Point { return _babfg . _egd [ len ( _babfg . _egd ) - 1 ] } ;
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20> ).
2022-03-13 12:41:53 +00:00
func ( _gce * Extractor ) ExtractText ( ) ( string , error ) { _egc , _ , _ , _fdd := _gce . ExtractTextWithStats ( ) ; return _egc , _fdd ; } ; func ( _ddca * wordBag ) text ( ) string { _aae := _ddca . allWords ( ) ; _ecfbd := make ( [ ] string , len ( _aae ) ) ; for _dadc , _abaa := range _aae { _ecfbd [ _dadc ] = _abaa . _gebf ;
} ; return _cf . Join ( _ecfbd , "\u0020" ) ; } ; func _babf ( _dfbf _afd . Point ) _afd . Matrix { return _afd . TranslationMatrix ( _dfbf . X , _dfbf . Y ) } ; func ( _bfgdgb paraList ) log ( _eede string ) { if ! _dfgc { return ; } ; _fb . Log . Info ( "%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d" , _eede , len ( _bfgdgb ) ) ;
for _agadf , _bbda := range _bfgdgb { if _bbda == nil { continue ; } ; _dfada := _bbda . text ( ) ; _gcccf := "\u0020\u0020" ; if _bbda . _ccec != nil { _gcccf = _cag . Sprintf ( "\u005b%\u0064\u0078\u0025\u0064\u005d" , _bbda . _ccec . _agac , _bbda . _ccec . _dcbdf ) ; } ; _cag . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a" , _agadf , _bbda . PdfRectangle , _gcccf , _bgdca ( _dfada , 50 ) ) ;
} ; } ; type rulingList [ ] * ruling ; type bounded interface { bbox ( ) _ac . PdfRectangle } ; const ( _dccc = false ; _abb = false ; _dbcdf = false ; _ccc = false ; _efca = false ; _agcb = false ; _gbfc = false ; _dfgc = false ; _addb = false ; _dfgd = _addb && true ; _ceac = _dfgd && false ;
_ecfa = _addb && true ; _agede = false ; _adga = _agede && false ; _effg = _agede && true ; _adce = false ; _gbbagd = _adce && false ; _cecd = _adce && false ; _bdea = _adce && true ; _caea = _adce && false ; _ccfb = _adce && false ; ) ; func _ebgf ( _cfge _ac . PdfRectangle ) * ruling { return & ruling { _agcgg : _eccgd , _edcba : _cfge . Ury , _bfeag : _cfge . Llx , _cbba : _cfge . Urx } ;
} ;
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
2022-03-13 12:41:53 +00:00
func ( _cfc * Extractor ) ExtractTextWithStats ( ) ( _fgb string , _fddg int , _feab int , _ebe error ) { _fbbg , _fddg , _feab , _ebe := _cfc . ExtractPageText ( ) ; if _ebe != nil { return "" , _fddg , _feab , _ebe ; } ; return _fbbg . Text ( ) , _fddg , _feab , nil ; } ; func ( _ggae compositeCell ) parasBBox ( ) ( paraList , _ac . PdfRectangle ) { return _ggae . paraList , _ggae . PdfRectangle ;
} ; func _beba ( _dbgf map [ int ] [ ] float64 ) string { _cdgf := _ddcd ( _dbgf ) ; _ccgf := make ( [ ] string , len ( _dbgf ) ) ; for _gdcca , _agce := range _cdgf { _ccgf [ _gdcca ] = _cag . Sprintf ( "\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066" , _agce , _dbgf [ _agce ] ) ; } ; return _cag . Sprintf ( "\u007b\u0025\u0073\u007d" , _cf . Join ( _ccgf , "\u002c\u0020" ) ) ;
} ; func _acfgd ( _daae [ ] _gdd . PdfObject ) ( _bcff , _bada float64 , _efdbc error ) { if len ( _daae ) != 2 { return 0 , 0 , _cag . Errorf ( "\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064" , len ( _daae ) ) ;
} ; _dbfb , _efdbc := _gdd . GetNumbersAsFloat ( _daae ) ; if _efdbc != nil { return 0 , 0 , _efdbc ; } ; return _dbfb [ 0 ] , _dbfb [ 1 ] , nil ; } ; func _eadbg ( _cafaa [ ] * textWord , _cfffa * textWord ) [ ] * textWord { for _gbea , _adeb := range _cafaa { if _adeb == _cfffa { return _gegd ( _cafaa , _gbea ) ;
} ; } ; _fb . Log . Error ( "\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073" , _cfffa ) ;
return nil ; } ; func ( _dffa * wordBag ) pullWord ( _ggef * textWord , _becf int , _cada map [ int ] map [ * textWord ] struct { } ) { _dffa . PdfRectangle = _gcae ( _dffa . PdfRectangle , _ggef . PdfRectangle ) ; if _ggef . _abeg > _dffa . _aege { _dffa . _aege = _ggef . _abeg ; } ; _dffa . _cfeeb [ _becf ] = append ( _dffa . _cfeeb [ _becf ] , _ggef ) ;
_cada [ _becf ] [ _ggef ] = struct { } { } ; } ; func ( _ead * wordBag ) depthRange ( _aadg , _fbba int ) [ ] int { var _fff [ ] int ; for _afeg := range _ead . _cfeeb { if _aadg <= _afeg && _afeg <= _fbba { _fff = append ( _fff , _afeg ) ; } ; } ; if len ( _fff ) == 0 { return nil ; } ; _af . Ints ( _fff ) ;
return _fff ; } ;
// ExtractFonts returns all font information from the page extractor, including
// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more.
//
// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document.
// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries.
//
// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range.
func ( _acc * Extractor ) ExtractFonts ( previousPageFonts * PageFonts ) ( * PageFonts , error ) { _ce := PageFonts { } ; _feb := _ce . extractPageResourcesToFont ( _acc . _cb ) ; if _feb != nil { return nil , _feb ; } ; if previousPageFonts != nil { for _ , _eba := range previousPageFonts . Fonts { if ! _ade ( _ce . Fonts , _eba . FontName ) { _ce . Fonts = append ( _ce . Fonts , _eba ) ;
} ; } ; } ; return & PageFonts { Fonts : _ce . Fonts } , nil ; } ;
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// Len returns the number of TextMarks in `ma`.
func ( _effb * TextMarkArray ) Len ( ) int { if _effb == nil { return 0 ; } ; return len ( _effb . _dgf ) ; } ; func _ffdcd ( _dcfa string ) ( string , bool ) { _adcff := [ ] rune ( _dcfa ) ; if len ( _adcff ) != 1 { return "" , false ; } ; _ceed , _bdaae := _dedc [ _adcff [ 0 ] ] ; return _ceed , _bdaae ;
} ; func _aecg ( _dfeeg , _gaa _ac . PdfRectangle ) bool { return _dfeeg . Lly <= _gaa . Ury && _gaa . Lly <= _dfeeg . Ury ; } ;
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// Append appends `mark` to the mark array.
func ( _eegf * TextMarkArray ) Append ( mark TextMark ) { _eegf . _dgf = append ( _eegf . _dgf , mark ) } ;
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
type PageImages struct { Images [ ] ImageMark ; } ; func ( _bged * textTable ) markCells ( ) { for _fgcc := 0 ; _fgcc < _bged . _dcbdf ; _fgcc ++ { for _gfbcd := 0 ; _gfbcd < _bged . _agac ; _gfbcd ++ { _abdda := _bged . get ( _gfbcd , _fgcc ) ; if _abdda != nil { _abdda . _cfeff = true ;
} ; } ; } ; } ; func ( _gca * shapesState ) lineTo ( _edga , _acga float64 ) { if _efca { _fb . Log . Info ( "\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066" , _edga , _acga , _gca . devicePoint ( _edga , _acga ) ) ;
} ; _gca . addPoint ( _edga , _acga ) ; } ;
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
func ( _faa PageText ) ToText ( ) string { return _faa . Text ( ) } ; func _bgdca ( _cegfg string , _cdecf int ) string { if len ( _cegfg ) < _cdecf { return _cegfg ; } ; return _cegfg [ : _cdecf ] ; } ; func ( _dbcdd * textLine ) appendWord ( _becg * textWord ) { _dbcdd . _cadc = append ( _dbcdd . _cadc , _becg ) ;
_dbcdd . PdfRectangle = _gcae ( _dbcdd . PdfRectangle , _becg . PdfRectangle ) ; if _becg . _abeg > _dbcdd . _eabc { _dbcdd . _eabc = _becg . _abeg ; } ; if _becg . _fgbda > _dbcdd . _bcgf { _dbcdd . _bcgf = _becg . _fgbda ; } ; } ; func _gagd ( _dfbff [ ] float64 , _bceb , _cbdf float64 ) [ ] float64 { _dfea , _eddg := _bceb , _cbdf ;
if _eddg < _dfea { _dfea , _eddg = _eddg , _dfea ; } ; _cedcf := make ( [ ] float64 , 0 , len ( _dfbff ) + 2 ) ; _cedcf = append ( _cedcf , _bceb ) ; for _ , _abfbd := range _dfbff { if _abfbd <= _dfea { continue ; } else if _abfbd >= _eddg { break ; } ; _cedcf = append ( _cedcf , _abfbd ) ; } ;
_cedcf = append ( _cedcf , _cbdf ) ; return _cedcf ; } ; func _gfdcf ( _cfdd float64 ) bool { return _gc . Abs ( _cfdd ) < _gbbd } ; func ( _fbbce * textWord ) addDiacritic ( _edcfc string ) { _feee := _fbbce . _ggabc [ len ( _fbbce . _ggabc ) - 1 ] ; _feee . _cadaf += _edcfc ; _feee . _cadaf = _dd . NFKC . String ( _feee . _cadaf ) ;
} ; type cachedImage struct { _dfa * _ac . Image ; _ecf _ac . PdfColorspace ; } ; func ( _eebe paraList ) addNeighbours ( ) { _ecfd := func ( _cgcfg [ ] int , _dbebc * textPara ) ( [ ] * textPara , [ ] * textPara ) { _aefb := make ( [ ] * textPara , 0 , len ( _cgcfg ) - 1 ) ; _fefa := make ( [ ] * textPara , 0 , len ( _cgcfg ) - 1 ) ;
for _ , _dcce := range _cgcfg { _fdde := _eebe [ _dcce ] ; if _fdde . Urx <= _dbebc . Llx { _aefb = append ( _aefb , _fdde ) ; } else if _fdde . Llx >= _dbebc . Urx { _fefa = append ( _fefa , _fdde ) ; } ; } ; return _aefb , _fefa ; } ; _gddf := func ( _agfe [ ] int , _afea * textPara ) ( [ ] * textPara , [ ] * textPara ) { _bdgd := make ( [ ] * textPara , 0 , len ( _agfe ) - 1 ) ;
_cbaf := make ( [ ] * textPara , 0 , len ( _agfe ) - 1 ) ; for _ , _egcgc := range _agfe { _gcggd := _eebe [ _egcgc ] ; if _gcggd . Ury <= _afea . Lly { _cbaf = append ( _cbaf , _gcggd ) ; } else if _gcggd . Lly >= _afea . Ury { _bdgd = append ( _bdgd , _gcggd ) ; } ; } ; return _bdgd , _cbaf ; } ;
_fdda := _eebe . yNeighbours ( _aaa ) ; for _ , _bfeg := range _eebe { _fdec := _fdda [ _bfeg ] ; if len ( _fdec ) == 0 { continue ; } ; _bdff , _cddfe := _ecfd ( _fdec , _bfeg ) ; if len ( _bdff ) == 0 && len ( _cddfe ) == 0 { continue ; } ; if len ( _bdff ) > 0 { _gdgca := _bdff [ 0 ] ; for _ , _egeb := range _bdff [ 1 : ] { if _egeb . Urx >= _gdgca . Urx { _gdgca = _egeb ;
} ; } ; for _ , _edab := range _bdff { if _edab != _gdgca && _edab . Urx > _gdgca . Llx { _gdgca = nil ; break ; } ; } ; if _gdgca != nil && _aecg ( _bfeg . PdfRectangle , _gdgca . PdfRectangle ) { _bfeg . _bddfg = _gdgca ; } ; } ; if len ( _cddfe ) > 0 { _gfaec := _cddfe [ 0 ] ; for _ , _fcffc := range _cddfe [ 1 : ] { if _fcffc . Llx <= _gfaec . Llx { _gfaec = _fcffc ;
} ; } ; for _ , _aedbc := range _cddfe { if _aedbc != _gfaec && _aedbc . Llx < _gfaec . Urx { _gfaec = nil ; break ; } ; } ; if _gfaec != nil && _aecg ( _bfeg . PdfRectangle , _gfaec . PdfRectangle ) { _bfeg . _cegf = _gfaec ; } ; } ; } ; _fdda = _eebe . xNeighbours ( _bbdee ) ; for _ , _gdgdf := range _eebe { _gcbbc := _fdda [ _gdgdf ] ;
if len ( _gcbbc ) == 0 { continue ; } ; _bcec , _dgdd := _gddf ( _gcbbc , _gdgdf ) ; if len ( _bcec ) == 0 && len ( _dgdd ) == 0 { continue ; } ; if len ( _dgdd ) > 0 { _dabde := _dgdd [ 0 ] ; for _ , _efgge := range _dgdd [ 1 : ] { if _efgge . Ury >= _dabde . Ury { _dabde = _efgge ; } ; } ; for _ , _eedge := range _dgdd { if _eedge != _dabde && _eedge . Ury > _dabde . Lly { _dabde = nil ;
break ; } ; } ; if _dabde != nil && _bcbef ( _gdgdf . PdfRectangle , _dabde . PdfRectangle ) { _gdgdf . _aegf = _dabde ; } ; } ; if len ( _bcec ) > 0 { _ggfgg := _bcec [ 0 ] ; for _ , _gfdcg := range _bcec [ 1 : ] { if _gfdcg . Lly <= _ggfgg . Lly { _ggfgg = _gfdcg ; } ; } ; for _ , _bffag := range _bcec { if _bffag != _ggfgg && _bffag . Lly < _ggfgg . Ury { _ggfgg = nil ;
break ; } ; } ; if _ggfgg != nil && _bcbef ( _gdgdf . PdfRectangle , _ggfgg . PdfRectangle ) { _gdgdf . _dcaba = _ggfgg ; } ; } ; } ; for _ , _eacff := range _eebe { if _eacff . _bddfg != nil && _eacff . _bddfg . _cegf != _eacff { _eacff . _bddfg = nil ; } ; if _eacff . _dcaba != nil && _eacff . _dcaba . _aegf != _eacff { _eacff . _dcaba = nil ;
} ; if _eacff . _cegf != nil && _eacff . _cegf . _bddfg != _eacff { _eacff . _cegf = nil ; } ; if _eacff . _aegf != nil && _eacff . _aegf . _dcaba != _eacff { _eacff . _aegf = nil ; } ; } ; } ; func ( _agcf rectRuling ) asRuling ( ) ( * ruling , bool ) { _bbdbf := ruling { _agcgg : _agcf . _bbgc , Color : _agcf . Color , _aaec : _ceaac } ;
switch _agcf . _bbgc { case _aafafg : _bbdbf . _edcba = 0.5 * ( _agcf . Llx + _agcf . Urx ) ; _bbdbf . _bfeag = _agcf . Lly ; _bbdbf . _cbba = _agcf . Ury ; _dge , _dgff := _agcf . checkWidth ( _agcf . Llx , _agcf . Urx ) ; if ! _dgff { if _caea { _fb . Log . Error ( "\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076" , _agcf ) ;
} ; return nil , false ; } ; _bbdbf . _geba = _dge ; case _eccgd : _bbdbf . _edcba = 0.5 * ( _agcf . Lly + _agcf . Ury ) ; _bbdbf . _bfeag = _agcf . Llx ; _bbdbf . _cbba = _agcf . Urx ; _aadgc , _abbfa := _agcf . checkWidth ( _agcf . Lly , _agcf . Ury ) ; if ! _abbfa { if _caea { _fb . Log . Error ( "\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076" , _agcf ) ;
} ; return nil , false ; } ; _bbdbf . _geba = _aadgc ; default : _fb . Log . Error ( "\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064" , _agcf . _bbgc ) ; return nil , false ; } ; return & _bbdbf , true ; } ; func ( _edf * shapesState ) drawRectangle ( _ebb , _cfee , _caed , _ffc float64 ) { if _efca { _cebg := _edf . devicePoint ( _ebb , _cfee ) ;
_ebga := _edf . devicePoint ( _ebb + _caed , _cfee + _ffc ) ; _facg := _ac . PdfRectangle { Llx : _cebg . X , Lly : _cebg . Y , Urx : _ebga . X , Ury : _ebga . Y } ; _fb . Log . Info ( "d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066" , _facg ) ;
} ; _edf . newSubPath ( ) ; _edf . moveTo ( _ebb , _cfee ) ; _edf . lineTo ( _ebb + _caed , _cfee ) ; _edf . lineTo ( _ebb + _caed , _cfee + _ffc ) ; _edf . lineTo ( _ebb , _cfee + _ffc ) ; _edf . closePath ( ) ; } ; func _dgg ( _bfdc [ ] * textMark , _eagf _ac . PdfRectangle , _dece rulingList , _dafdc [ ] gridTiling ) paraList { _fb . Log . Trace ( "\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066" , len ( _bfdc ) , _eagf ) ;
if len ( _bfdc ) == 0 { return nil ; } ; _ddbf := _bfeb ( _bfdc , _eagf ) ; if len ( _ddbf ) == 0 { return nil ; } ; _dece . log ( "\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065" ) ; _acec , _cbbe := _dece . vertsHorzs ( ) ; _cgbed := _fddf ( _ddbf , _eagf . Ury , _acec , _cbbe ) ;
_ecba := _adbc ( _cgbed , _eagf . Ury , _acec , _cbbe ) ; _ecba = _eadb ( _ecba ) ; _cdba := make ( paraList , 0 , len ( _ecba ) ) ; for _ , _abcf := range _ecba { _dbceb := _abcf . arrangeText ( ) ; if _dbceb != nil { _cdba = append ( _cdba , _dbceb ) ; } ; } ; if len ( _cdba ) >= _eecc { _cdba = _cdba . extractTables ( _dafdc ) ;
} ; _cdba . sortReadingOrder ( ) ; _cdba . log ( "\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072" ) ; return _cdba ; } ;
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// String returns a description of `p`.
func ( _efaee * textPara ) String ( ) string { if _efaee . _affbf { return _cag . Sprintf ( "\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d" , _efaee . PdfRectangle ) ; } ; _cace := "" ; if _efaee . _ccec != nil { _cace = _cag . Sprintf ( "\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020" , _efaee . _ccec . _agac , _efaee . _ccec . _dcbdf ) ;
} ; return _cag . Sprintf ( "\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071" , _efaee . PdfRectangle , _cace , len ( _efaee . _ddeb ) , _bgdca ( _efaee . text ( ) , 50 ) ) ; } ; func ( _adef * textTable ) compositeColCorridors ( ) map [ int ] [ ] float64 { _ccfeg := make ( map [ int ] [ ] float64 , _adef . _agac ) ;
if _agede { _fb . Log . Info ( "\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020" , _adef . _agac ) ; } ; for _efcf := 0 ; _efcf < _adef . _agac ; _efcf ++ { _ccfeg [ _efcf ] = nil ;
} ; return _ccfeg ; } ; func ( _bdg * shapesState ) clearPath ( ) { _bdg . _fegc = nil ; _bdg . _dcef = false ; if _efca { _fb . Log . Info ( "\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073" , _bdg ) ; } ; } ; func ( _effaf * textWord ) appendMark ( _fffa * textMark , _cgcfa _ac . PdfRectangle ) { _effaf . _ggabc = append ( _effaf . _ggabc , _fffa ) ;
_effaf . PdfRectangle = _gcae ( _effaf . PdfRectangle , _fffa . PdfRectangle ) ; if _fffa . _gba > _effaf . _abeg { _effaf . _abeg = _fffa . _gba ; } ; _effaf . _fgbda = _cgcfa . Ury - _effaf . PdfRectangle . Lly ; } ; func ( _gfceg * subpath ) close ( ) { if ! _cdbg ( _gfceg . _egd [ 0 ] , _gfceg . last ( ) ) { _gfceg . add ( _gfceg . _egd [ 0 ] ) ;
} ; _gfceg . _ega = true ; _gfceg . removeDuplicates ( ) ; } ; func _gcceb ( _gccfa float64 ) bool { return _gc . Abs ( _gccfa ) < _cbd } ; func _beee ( _ffbg , _bcbf int ) int { if _ffbg > _bcbf { return _ffbg ; } ; return _bcbf ; } ;
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
2022-03-13 12:41:53 +00:00
type TextTable struct { W , H int ; Cells [ ] [ ] TableCell ; } ; type fontEntry struct { _fab * _ac . PdfFont ; _fabc int64 ; } ; func ( _dacc * subpath ) add ( _ebae ... _afd . Point ) { _dacc . _egd = append ( _dacc . _egd , _ebae ... ) } ; func ( _abgff rulingList ) snapToGroups ( ) rulingList { _gbded , _cfddb := _abgff . vertsHorzs ( ) ;
if len ( _gbded ) > 0 { _gbded = _gbded . snapToGroupsDirection ( ) ; } ; if len ( _cfddb ) > 0 { _cfddb = _cfddb . snapToGroupsDirection ( ) ; } ; _ccdb := append ( _gbded , _cfddb ... ) ; _ccdb . log ( "\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073" ) ; return _ccdb ;
} ;
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// String returns a description of `k`.
func ( _eabcb markKind ) String ( ) string { _bcbg , _ebbf := _fccc [ _eabcb ] ; if ! _ebbf { return _cag . Sprintf ( "\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064" , _eabcb ) ; } ; return _bcbg ; } ;