2020-08-27 21:45:09 +00:00
//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
2018-03-22 14:03:47 +00:00
2020-08-27 21:45:09 +00:00
//
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
//
2022-09-23 18:05:51 +00:00
package extractor ; import ( _f "bytes" ; _g "errors" ; _caa "fmt" ; _d "github.com/unidoc/unipdf/v3/common" ; _gcf "github.com/unidoc/unipdf/v3/contentstream" ; _aa "github.com/unidoc/unipdf/v3/core" ; _gd "github.com/unidoc/unipdf/v3/internal/license" ; _ad "github.com/unidoc/unipdf/v3/internal/textencoding" ;
_bab "github.com/unidoc/unipdf/v3/internal/transform" ; _dc "github.com/unidoc/unipdf/v3/model" ; _ac "golang.org/x/text/unicode/norm" ; _af "golang.org/x/xerrors" ; _ba "image/color" ; _gc "io" ; _ca "math" ; _c "regexp" ; _e "sort" ; _ce "strings" ; _bb "unicode" ; _b "unicode/utf8" ;
) ; var _aefa = TextMark { Text : "\u005b\u0058\u005d" , Original : "\u0020" , Meta : true , FillColor : _ba . White , StrokeColor : _ba . White } ; func ( _ecbf gridTile ) numBorders ( ) int { _ecge := 0 ; if _ecbf . _gcfbe { _ecge ++ ; } ; if _ecbf . _ebbb { _ecge ++ ; } ; if _ecbf . _ecbgb { _ecge ++ ;
} ; if _ecbf . _faaa { _ecge ++ ; } ; return _ecge ; } ; func ( _afcb rulingList ) log ( _fade string ) { if ! _cage { return ; } ; _d . Log . Info ( "\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073" , _fade , _afcb . String ( ) ) ; for _abdae , _dgebc := range _afcb { _caa . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _abdae , _dgebc . String ( ) ) ;
} ; } ; func ( _fbfe * textTable ) computeBbox ( ) _dc . PdfRectangle { var _ebfa _dc . PdfRectangle ; _fbgb := false ; for _dcaeb := 0 ; _dcaeb < _fbfe . _fcedd ; _dcaeb ++ { for _dacfe := 0 ; _dacfe < _fbfe . _bgdee ; _dacfe ++ { _gddad := _fbfe . get ( _dacfe , _dcaeb ) ; if _gddad == nil { continue ;
} ; if ! _fbgb { _ebfa = _gddad . PdfRectangle ; _fbgb = true ; } else { _ebfa = _abga ( _ebfa , _gddad . PdfRectangle ) ; } ; } ; } ; return _ebfa ; } ;
2022-02-05 21:34:53 +00:00
2022-09-23 18:05:51 +00:00
// Extractor stores and offers functionality for extracting content from PDF pages.
type Extractor struct { _aaf string ; _gdc * _dc . PdfPageResources ; _cbb _dc . PdfRectangle ; _aae * _dc . PdfRectangle ; _ab map [ string ] fontEntry ; _bf map [ string ] textResult ; _abd int64 ; _ff int ; _da * Options ; } ; type markKind int ; func ( _aca * textPara ) depth ( ) float64 { if _aca . _bedda { return - 1.0 ;
} ; if len ( _aca . _ddaf ) > 0 { return _aca . _ddaf [ 0 ] . _gddd ; } ; return _aca . _affa . depth ( ) ; } ; func ( _acgf compositeCell ) split ( _gffbc , _bgagd [ ] float64 ) * textTable { _eeeg := len ( _gffbc ) + 1 ; _eegdcf := len ( _bgagd ) + 1 ; if _eea { _d . Log . Info ( "\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a" + "\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066" , _eegdcf , _eeeg , _acgf , _gffbc , _bgagd ) ;
_caa . Printf ( "\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a" , len ( _acgf . paraList ) ) ; for _dagf , _bbdg := range _acgf . paraList { _caa . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _dagf , _bbdg . String ( ) ) ; } ;
_caa . Printf ( "\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a" , len ( _acgf . lines ( ) ) ) ; for _beaa , _gded := range _acgf . lines ( ) { _caa . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _beaa , _gded ) ; } ; } ; _gffbc = _aebga ( _gffbc , _acgf . Ury , _acgf . Lly ) ;
_bgagd = _aebga ( _bgagd , _acgf . Llx , _acgf . Urx ) ; _bfcb := make ( map [ uint64 ] * textPara , _eegdcf * _eeeg ) ; _bede := textTable { _bgdee : _eegdcf , _fcedd : _eeeg , _fddab : _bfcb } ; _acgc := _acgf . paraList ; _e . Slice ( _acgc , func ( _fbbbd , _bcdgb int ) bool { _abceg , _eaeg := _acgc [ _fbbbd ] , _acgc [ _bcdgb ] ;
_bbfb , _daaa := _abceg . Lly , _eaeg . Lly ; if _bbfb != _daaa { return _bbfb < _daaa ; } ; return _abceg . Llx < _eaeg . Llx ; } ) ; _dfab := make ( map [ uint64 ] _dc . PdfRectangle , _eegdcf * _eeeg ) ; for _fdff , _afcda := range _gffbc [ 1 : ] { _cddb := _gffbc [ _fdff ] ; for _faba , _fggda := range _bgagd [ 1 : ] { _eeef := _bgagd [ _faba ] ;
_dfab [ _abdec ( _faba , _fdff ) ] = _dc . PdfRectangle { Llx : _eeef , Urx : _fggda , Lly : _afcda , Ury : _cddb } ; } ; } ; if _eea { _d . Log . Info ( "\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073" ) ;
_caa . Printf ( "\u0020\u0020\u0020\u0020" ) ; for _cfed := 0 ; _cfed < _eegdcf ; _cfed ++ { _caa . Printf ( "\u0025\u0033\u0030\u0064\u002c\u0020" , _cfed ) ; } ; _caa . Println ( ) ; for _ageag := 0 ; _ageag < _eeeg ; _ageag ++ { _caa . Printf ( "\u0020\u0020\u0025\u0032\u0064\u003a" , _ageag ) ;
for _bfca := 0 ; _bfca < _eegdcf ; _bfca ++ { _caa . Printf ( "\u00256\u002e\u0032\u0066\u002c\u0020" , _dfab [ _abdec ( _bfca , _ageag ) ] ) ; } ; _caa . Println ( ) ; } ; } ; _afad := func ( _febaa * textLine ) ( int , int ) { for _ffde := 0 ; _ffde < _eeeg ; _ffde ++ { for _efcff := 0 ; _efcff < _eegdcf ;
_efcff ++ { if _gcdf ( _dfab [ _abdec ( _efcff , _ffde ) ] , _febaa . PdfRectangle ) { return _efcff , _ffde ; } ; } ; } ; return - 1 , - 1 ; } ; _ggdg := make ( map [ uint64 ] [ ] * textLine , _eegdcf * _eeeg ) ; for _ , _ffeg := range _acgc . lines ( ) { _ggcc , _aeed := _afad ( _ffeg ) ; if _ggcc < 0 { continue ;
} ; _ggdg [ _abdec ( _ggcc , _aeed ) ] = append ( _ggdg [ _abdec ( _ggcc , _aeed ) ] , _ffeg ) ; } ; for _eadc := 0 ; _eadc < len ( _gffbc ) - 1 ; _eadc ++ { _ffee := _gffbc [ _eadc ] ; _fcaac := _gffbc [ _eadc + 1 ] ; for _fffd := 0 ; _fffd < len ( _bgagd ) - 1 ; _fffd ++ { _adda := _bgagd [ _fffd ] ; _fbbbg := _bgagd [ _fffd + 1 ] ;
_bgaaf := _dc . PdfRectangle { Llx : _adda , Urx : _fbbbg , Lly : _fcaac , Ury : _ffee } ; _dgbe := _ggdg [ _abdec ( _fffd , _eadc ) ] ; if len ( _dgbe ) == 0 { continue ; } ; _agad := _dcfc ( _bgaaf , _dgbe ) ; _bede . put ( _fffd , _eadc , _agad ) ; } ; } ; return & _bede ; } ; func ( _agfc * textTable ) logComposite ( _ggaab string ) { if ! _eea { return ;
} ; _d . Log . Info ( "\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073" , _agfc . _bgdee , _agfc . _fcedd , _ggaab ) ; _caa . Printf ( "\u0025\u0035\u0073 \u007c" , "" ) ; for _gdgg := 0 ; _gdgg < _agfc . _bgdee ; _gdgg ++ { _caa . Printf ( "\u0025\u0033\u0064 \u007c" , _gdgg ) ;
} ; _caa . Println ( "" ) ; _caa . Printf ( "\u0025\u0035\u0073 \u002b" , "" ) ; for _fdeg := 0 ; _fdeg < _agfc . _bgdee ; _fdeg ++ { _caa . Printf ( "\u0025\u0033\u0073 \u002b" , "\u002d\u002d\u002d" ) ; } ; _caa . Println ( "" ) ; for _bcag := 0 ; _bcag < _agfc . _fcedd ; _bcag ++ { _caa . Printf ( "\u0025\u0035\u0064 \u007c" , _bcag ) ;
for _ffaa := 0 ; _ffaa < _agfc . _bgdee ; _ffaa ++ { _eedf , _ := _agfc . _fgge [ _abdec ( _ffaa , _bcag ) ] . parasBBox ( ) ; _caa . Printf ( "\u0025\u0033\u0064 \u007c" , len ( _eedf ) ) ; } ; _caa . Println ( "" ) ; } ; _d . Log . Info ( "\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073" , _agfc . _bgdee , _agfc . _fcedd , _ggaab ) ;
_caa . Printf ( "\u0025\u0035\u0073 \u007c" , "" ) ; for _eeaag := 0 ; _eeaag < _agfc . _bgdee ; _eeaag ++ { _caa . Printf ( "\u0025\u0031\u0032\u0064\u0020\u007c" , _eeaag ) ; } ; _caa . Println ( "" ) ; _caa . Printf ( "\u0025\u0035\u0073 \u002b" , "" ) ; for _ggaff := 0 ; _ggaff < _agfc . _bgdee ;
_ggaff ++ { _caa . Print ( "\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b" ) ; } ; _caa . Println ( "" ) ; for _adfde := 0 ; _adfde < _agfc . _fcedd ; _adfde ++ { _caa . Printf ( "\u0025\u0035\u0064 \u007c" , _adfde ) ; for _afgd := 0 ; _afgd < _agfc . _bgdee ;
_afgd ++ { _ggfde , _ := _agfc . _fgge [ _abdec ( _afgd , _adfde ) ] . parasBBox ( ) ; _cafg := "" ; _edfed := _ggfde . merge ( ) ; if _edfed != nil { _cafg = _edfed . text ( ) ; } ; _cafg = _caa . Sprintf ( "\u0025\u0071" , _ebfce ( _cafg , 12 ) ) ; _cafg = _cafg [ 1 : len ( _cafg ) - 1 ] ; _caa . Printf ( "\u0025\u0031\u0032\u0073\u0020\u007c" , _cafg ) ;
} ; _caa . Println ( "" ) ; } ; } ; func ( _decb * shapesState ) stroke ( _cgcad * [ ] pathSection ) { _cea := pathSection { _bffc : _decb . _bfg , Color : _decb . _degb . getStrokeColor ( ) } ; * _cgcad = append ( * _cgcad , _cea ) ; if _cage { _caa . Printf ( "\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , len ( * _cgcad ) , _decb , _decb . _degb . getStrokeColor ( ) , _cea . bbox ( ) ) ;
if _eabg { for _ecc , _cdge := range _decb . _bfg { _caa . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _ecc , _cdge ) ; if _ecc == 10 { break ; } ; } ; } ; } ; } ; func ( _gbbe paraList ) findTextTables ( ) [ ] * textTable { var _gfdg [ ] * textTable ; for _ , _deeeb := range _gbbe { if _deeeb . taken ( ) || _deeeb . Width ( ) == 0 { continue ;
} ; _aaeag := _deeeb . isAtom ( ) ; if _aaeag == nil { continue ; } ; _aaeag . growTable ( ) ; if _aaeag . _bgdee * _aaeag . _fcedd < _baeag { continue ; } ; _aaeag . markCells ( ) ; _aaeag . log ( "\u0067\u0072\u006fw\u006e" ) ; _gfdg = append ( _gfdg , _aaeag ) ; } ; return _gfdg ; } ; func ( _afe * shapesState ) quadraticTo ( _dbgf , _bcb , _fddb , _edfc float64 ) { if _fgeac { _d . Log . Info ( "\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a" ) ;
} ; _afe . addPoint ( _fddb , _edfc ) ; } ; func ( _dba * shapesState ) clearPath ( ) { _dba . _bfg = nil ; _dba . _dcgc = false ; if _fgeac { _d . Log . Info ( "\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073" , _dba ) ; } ; } ; func ( _ggdd paraList ) sortReadingOrder ( ) { _d . Log . Trace ( "\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _ggdd ) ) ;
if len ( _ggdd ) <= 1 { return ; } ; _ggdd . computeEBBoxes ( ) ; _e . Slice ( _ggdd , func ( _decf , _bgeea int ) bool { return _aecf ( _ggdd [ _decf ] , _ggdd [ _bgeea ] ) <= 0 } ) ; _geea := _ggdd . topoOrder ( ) ; _ggdd . reorder ( _geea ) ; } ; func ( _eddd * textObject ) newTextMark ( _dcaf string , _fgdbg _bab . Matrix , _bbcf _bab . Point , _cecf float64 , _abdd * _dc . PdfFont , _fabe float64 , _bcca , _bggc _ba . Color , _afggc _aa . PdfObject , _egfe [ ] string , _ecbb int ) ( textMark , bool ) { _gedfb := _fgdbg . Angle ( ) ;
_acec := _cbbb ( _gedfb , _aaaa ) ; var _edgf float64 ; if _acec % 180 != 90 { _edgf = _fgdbg . ScalingFactorY ( ) ; } else { _edgf = _fgdbg . ScalingFactorX ( ) ; } ; _egcee := _fdacf ( _fgdbg ) ; _cccf := _dc . PdfRectangle { Llx : _egcee . X , Lly : _egcee . Y , Urx : _bbcf . X , Ury : _bbcf . Y } ;
switch _acec % 360 { case 90 : _cccf . Urx -= _edgf ; case 180 : _cccf . Ury -= _edgf ; case 270 : _cccf . Urx += _edgf ; case 0 : _cccf . Ury += _edgf ; default : _acec = 0 ; _cccf . Ury += _edgf ; } ; if _cccf . Llx > _cccf . Urx { _cccf . Llx , _cccf . Urx = _cccf . Urx , _cccf . Llx ; } ; if _cccf . Lly > _cccf . Ury { _cccf . Lly , _cccf . Ury = _cccf . Ury , _cccf . Lly ;
} ; _bacbg := true ; if _eddd . _ebe . _cbb . Width ( ) > 0 { _gfabb , _fabf := _cedb ( _cccf , _eddd . _ebe . _cbb ) ; if ! _fabf { _bacbg = false ; _d . Log . Debug ( "\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q" , _cccf , _eddd . _ebe . _cbb , _dcaf ) ;
} ; _cccf = _gfabb ; } ; _ccab := _cccf ; _adee := _eddd . _ebe . _cbb ; switch _acec % 360 { case 90 : _adee . Urx , _adee . Ury = _adee . Ury , _adee . Urx ; _ccab = _dc . PdfRectangle { Llx : _adee . Urx - _cccf . Ury , Urx : _adee . Urx - _cccf . Lly , Lly : _cccf . Llx , Ury : _cccf . Urx } ;
case 180 : _ccab = _dc . PdfRectangle { Llx : _adee . Urx - _cccf . Llx , Urx : _adee . Urx - _cccf . Urx , Lly : _adee . Ury - _cccf . Lly , Ury : _adee . Ury - _cccf . Ury } ; case 270 : _adee . Urx , _adee . Ury = _adee . Ury , _adee . Urx ; _ccab = _dc . PdfRectangle { Llx : _cccf . Ury , Urx : _cccf . Lly , Lly : _adee . Ury - _cccf . Llx , Ury : _adee . Ury - _cccf . Urx } ;
} ; if _ccab . Llx > _ccab . Urx { _ccab . Llx , _ccab . Urx = _ccab . Urx , _ccab . Llx ; } ; if _ccab . Lly > _ccab . Ury { _ccab . Lly , _ccab . Ury = _ccab . Ury , _ccab . Lly ; } ; _edb := textMark { _cbge : _dcaf , PdfRectangle : _ccab , _gde : _cccf , _eead : _abdd , _beaf : _edgf , _bddca : _fabe , _adcg : _fgdbg , _ffb : _bbcf , _gcce : _acec , _fgeee : _bcca , _cab : _bggc , _abda : _afggc , _bfga : _egfe , Th : _eddd . _cgf . _abbd , Tw : _eddd . _cgf . _fgd , _gffe : _ecbb } ;
if _agb { _d . Log . Info ( "n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073" , _egcee , _bbcf , _edb . String ( ) ) ; } ; return _edb , _bacbg ;
} ; func _fbag ( _cdd * Extractor , _agef * _dc . PdfPageResources , _gab _gcf . GraphicsState , _afga * textState , _fceb * stateStack ) * textObject { return & textObject { _ebe : _cdd , _eff : _agef , _fdf : _gab , _cdbd : _fceb , _cgf : _afga , _aafd : _bab . IdentityMatrix ( ) , _acd : _bab . IdentityMatrix ( ) } ;
} ; func _caca ( _fcbf float64 ) float64 { return _bacb * _ca . Round ( _fcbf / _bacb ) } ; func ( _bbcb paraList ) xNeighbours ( _abeb float64 ) map [ * textPara ] [ ] int { _afcee := make ( [ ] event , 2 * len ( _bbcb ) ) ; if _abeb == 0 { for _aedd , _eeca := range _bbcb { _afcee [ 2 * _aedd ] = event { _eeca . Llx , true , _aedd } ;
_afcee [ 2 * _aedd + 1 ] = event { _eeca . Urx , false , _aedd } ; } ; } else { for _fbdde , _fbcgbd := range _bbcb { _afcee [ 2 * _fbdde ] = event { _fbcgbd . Llx - _abeb * _fbcgbd . fontsize ( ) , true , _fbdde } ; _afcee [ 2 * _fbdde + 1 ] = event { _fbcgbd . Urx + _abeb * _fbcgbd . fontsize ( ) , false , _fbdde } ;
} ; } ; return _bbcb . eventNeighbours ( _afcee ) ; } ; func ( _babfc * wordBag ) applyRemovals ( _aega map [ int ] map [ * textWord ] struct { } ) { for _cacea , _bcbf := range _aega { if len ( _bcbf ) == 0 { continue ; } ; _abea := _babfc . _adcb [ _cacea ] ; _fab := len ( _abea ) - len ( _bcbf ) ;
if _fab == 0 { delete ( _babfc . _adcb , _cacea ) ; continue ; } ; _aadd := make ( [ ] * textWord , _fab ) ; _bcbfg := 0 ; for _ , _fdbcb := range _abea { if _ , _ebdg := _bcbf [ _fdbcb ] ; ! _ebdg { _aadd [ _bcbfg ] = _fdbcb ; _bcbfg ++ ; } ; } ; _babfc . _adcb [ _cacea ] = _aadd ; } ; } ; func ( _gafa * textTable ) put ( _ebbbc , _adfg int , _dadf * textPara ) { _gafa . _fddab [ _abdec ( _ebbbc , _adfg ) ] = _dadf ;
} ; func ( _eegc * textWord ) absorb ( _gacf * textWord ) { _eegc . PdfRectangle = _abga ( _eegc . PdfRectangle , _gacf . PdfRectangle ) ; _eegc . _gbaed = append ( _eegc . _gbaed , _gacf . _gbaed ... ) ; } ; func ( _efb * textObject ) setCharSpacing ( _ddd float64 ) { if _efb == nil { return ;
} ; _efb . _cgf . _cace = _ddd ; if _gfaee { _d . Log . Info ( "\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073" , _ddd , _efb . _cgf . String ( ) ) ; } ; } ; func _ebfce ( _fccff string , _caeg int ) string { if len ( _fccff ) < _caeg { return _fccff ;
} ; return _fccff [ : _caeg ] ; } ; func ( _acfg * compositeCell ) updateBBox ( ) { for _ , _abddc := range _acfg . paraList { _acfg . PdfRectangle = _abga ( _acfg . PdfRectangle , _abddc . PdfRectangle ) ; } ; } ; func _fgbcc ( _cdbf , _gfbfa _bab . Point ) rulingKind { _ffgf := _ca . Abs ( _cdbf . X - _gfbfa . X ) ;
_ebdd := _ca . Abs ( _cdbf . Y - _gfbfa . Y ) ; return _afgac ( _ffgf , _ebdd , _dagaf ) ; } ; type ruling struct { _fabfb rulingKind ; _fcgb markKind ; _ba . Color ; _efbdg float64 ; _becdd float64 ; _aggb float64 ; _gebag float64 ; } ; func _aecf ( _cgfb , _ffdg bounded ) float64 { _fdge := _aaed ( _cgfb , _ffdg ) ;
if ! _edeg ( _fdge ) { return _fdge ; } ; return _deba ( _cgfb , _ffdg ) ; } ; func ( _aba * PageFonts ) extractPageResourcesToFont ( _ge * _dc . PdfPageResources ) error { _adg , _dd := _aa . GetDict ( _ge . Font ) ; if ! _dd { return _g . New ( _bbf ) ; } ; for _ , _eef := range _adg . Keys ( ) { var ( _eac = true ;
_edfb [ ] byte ; _bdg string ; ) ; _baf , _ada := _ge . GetFontByName ( _eef ) ; if ! _ada { return _g . New ( _dac ) ; } ; _gb , _gf := _dc . NewPdfFontFromPdfObject ( _baf ) ; if _gf != nil { return _gf ; } ; _abag := _gb . FontDescriptor ( ) ; _gg := _gb . FontDescriptor ( ) . FontName . String ( ) ;
_cd := _gb . Subtype ( ) ; if _dca ( _aba . Fonts , _gg ) { continue ; } ; if len ( _gb . ToUnicode ( ) ) == 0 { _eac = false ; } ; if _abag . FontFile != nil { if _de , _agg := _aa . GetStream ( _abag . FontFile ) ; _agg { _edfb , _gf = _aa . DecodeStream ( _de ) ; if _gf != nil { return _gf ; } ;
_bdg = _gg + "\u002e\u0070\u0066\u0062" ; } ; } else if _abag . FontFile2 != nil { if _bac , _fb := _aa . GetStream ( _abag . FontFile2 ) ; _fb { _edfb , _gf = _aa . DecodeStream ( _bac ) ; if _gf != nil { return _gf ; } ; _bdg = _gg + "\u002e\u0074\u0074\u0066" ; } ; } else if _abag . FontFile3 != nil { if _fc , _fg := _aa . GetStream ( _abag . FontFile3 ) ;
_fg { _edfb , _gf = _aa . DecodeStream ( _fc ) ; if _gf != nil { return _gf ; } ; _bdg = _gg + "\u002e\u0063\u0066\u0066" ; } ; } ; if len ( _bdg ) < 1 { _d . Log . Debug ( _eg ) ; } ; _dda := Font { FontName : _gg , PdfFont : _gb , IsCID : _gb . IsCID ( ) , IsSimple : _gb . IsSimple ( ) , ToUnicode : _eac , FontType : _cd , FontData : _edfb , FontFileName : _bdg , FontDescriptor : _abag } ;
_aba . Fonts = append ( _aba . Fonts , _dda ) ; } ; return nil ; } ; func ( _dgea * textLine ) toTextMarks ( _dafg * int ) [ ] TextMark { var _eeff [ ] TextMark ; for _ , _gdcgf := range _dgea . _ccfce { if _gdcgf . _gabe { _eeff = _egag ( _eeff , _dafg , "\u0020" ) ; } ; _abdcf := _gdcgf . toTextMarks ( _dafg ) ;
_eeff = append ( _eeff , _abdcf ... ) ; } ; return _eeff ; } ;
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
func ( _dfca PageText ) ToText ( ) string { return _dfca . Text ( ) } ; func ( _dcdd * stateStack ) size ( ) int { return len ( * _dcdd ) } ; func ( _aafe rulingList ) toTilings ( ) ( rulingList , [ ] gridTiling ) { _aafe . log ( "\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s" ) ; if len ( _aafe ) == 0 { return nil , nil ;
} ; _aafe = _aafe . tidied ( "\u0061\u006c\u006c" ) ; _aafe . log ( "\u0074\u0069\u0064\u0069\u0065\u0064" ) ; _acgfe := _aafe . toGrids ( ) ; _fefb := make ( [ ] gridTiling , len ( _acgfe ) ) ; for _gbadg , _eaba := range _acgfe { _fefb [ _gbadg ] = _eaba . asTiling ( ) ; } ; return _aafe , _fefb ;
} ; func ( _fbg * PageText ) computeViews ( ) { var _gefg rulingList ; if _efg { _aaa := _gfge ( _fbg . _ggeb ) ; _gefg = append ( _gefg , _aaa ... ) ; } ; if _abcf { _dbbc := _cega ( _fbg . _agc ) ; _gefg = append ( _gefg , _dbbc ... ) ; } ; _gefg , _ecg := _gefg . toTilings ( ) ; var _dbcf paraList ;
_cede := len ( _fbg . _gfc ) ; for _dfge := 0 ; _dfge < 360 && _cede > 0 ; _dfge += 90 { _dcfg := make ( [ ] * textMark , 0 , len ( _fbg . _gfc ) - _cede ) ; for _ , _dgeb := range _fbg . _gfc { if _dgeb . _gcce == _dfge { _dcfg = append ( _dcfg , _dgeb ) ; } ; } ; if len ( _dcfg ) > 0 { _efba := _fdef ( _dcfg , _fbg . _fdbe , _gefg , _ecg ) ;
_dbcf = append ( _dbcf , _efba ... ) ; _cede -= len ( _dcfg ) ; } ; } ; _cecgb := new ( _f . Buffer ) ; _dbcf . writeText ( _cecgb ) ; _fbg . _dbdg = _cecgb . String ( ) ; _fbg . _efda = _dbcf . toTextMarks ( ) ; _fbg . _fdc = _dbcf . tables ( ) ; if _eea { _d . Log . Info ( "\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064" , len ( _fbg . _fdc ) ) ;
} ; } ; type textWord struct { _dc . PdfRectangle ; _cffg float64 ; _bgdg string ; _gbaed [ ] * textMark ; _debab float64 ; _gabe bool ; } ;
// String returns a description of `t`.
func ( _egdbe * textTable ) String ( ) string { return _caa . Sprintf ( "\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074" , _egdbe . _bgdee , _egdbe . _fcedd , _egdbe . _bebc ) ; } ; func ( _gcbc * shapesState ) devicePoint ( _fgee , _gbgg float64 ) _bab . Point { _dea := _gcbc . _gcbe . Mult ( _gcbc . _eabb ) ;
_fgee , _gbgg = _dea . Transform ( _fgee , _gbgg ) ; return _bab . NewPoint ( _fgee , _gbgg ) ; } ; func ( _afdf * textObject ) showTextAdjusted ( _ddfc * _aa . PdfObjectArray ) error { _cecg := false ; for _ , _baba := range _ddfc . Elements ( ) { switch _baba . ( type ) { case * _aa . PdfObjectFloat , * _aa . PdfObjectInteger : _bff , _daac := _aa . GetNumberAsFloat ( _baba ) ;
if _daac != nil { _d . Log . Debug ( "\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _baba , _ddfc ) ;
return _daac ; } ; _cceb , _fbfd := - _bff * 0.001 * _afdf . _cgf . _fdd , 0.0 ; if _cecg { _fbfd , _cceb = _cceb , _fbfd ; } ; _fca := _aec ( _bab . Point { X : _cceb , Y : _fbfd } ) ; _afdf . _aafd . Concat ( _fca ) ; case * _aa . PdfObjectString : _dbc := _aa . TraceToDirectObject ( _baba ) ;
_eaa , _befd := _aa . GetStringBytes ( _dbc ) ; if ! _befd { _d . Log . Trace ( "s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _baba , _ddfc ) ;
return _aa . ErrTypeError ; } ; _afdf . renderText ( _dbc , _eaa ) ; default : _d . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _baba , _ddfc ) ;
return _aa . ErrTypeError ; } ; } ; return nil ; } ; func _badg ( _bfbcc _bab . Point ) * subpath { return & subpath { _bcbc : [ ] _bab . Point { _bfbcc } } } ; func _cabb ( _bdcf , _dgcab _bab . Point , _bcaa _ba . Color ) ( * ruling , bool ) { _fcee := lineRuling { _becgc : _bdcf , _facd : _dgcab , _gfdf : _fgbcc ( _bdcf , _dgcab ) , Color : _bcaa } ;
if _fcee . _gfdf == _aebb { return nil , false ; } ; return _fcee . asRuling ( ) ; } ; func _gdfc ( _bedf float64 ) bool { return _ca . Abs ( _bedf ) < _geff } ; func _dbef ( _gfcc _dc . PdfRectangle ) * ruling { return & ruling { _fabfb : _bcaef , _efbdg : _gfcc . Urx , _becdd : _gfcc . Lly , _aggb : _gfcc . Ury } ;
} ; func _gcdf ( _cedf , _ffcc _dc . PdfRectangle ) bool { return _cedf . Llx <= _ffcc . Llx && _ffcc . Urx <= _cedf . Urx && _cedf . Lly <= _ffcc . Lly && _ffcc . Ury <= _cedf . Ury ; } ; func ( _cgdd * ruling ) encloses ( _abcfb , _afgf float64 ) bool { return _cgdd . _becdd - _aecg <= _abcfb && _afgf <= _cgdd . _aggb + _aecg ;
} ; func ( _fbbge rulingList ) blocks ( _egecg , _ccgf * ruling ) bool { if _egecg . _becdd > _ccgf . _aggb || _ccgf . _becdd > _egecg . _aggb { return false ; } ; _cacef := _ca . Max ( _egecg . _becdd , _ccgf . _becdd ) ; _babaf := _ca . Min ( _egecg . _aggb , _ccgf . _aggb ) ; if _egecg . _efbdg > _ccgf . _efbdg { _egecg , _ccgf = _ccgf , _egecg ;
} ; for _ , _gead := range _fbbge { if _egecg . _efbdg <= _gead . _efbdg + _geff && _gead . _efbdg <= _ccgf . _efbdg + _geff && _gead . _becdd <= _babaf && _cacef <= _gead . _aggb { return true ; } ; } ; return false ; } ; type stateStack [ ] * textState ; func ( _abb * stateStack ) push ( _dfb * textState ) { _beg := * _dfb ;
* _abb = append ( * _abb , & _beg ) } ; func ( _fddde gridTile ) complete ( ) bool { return _fddde . numBorders ( ) == 4 } ; func ( _bggff paraList ) tables ( ) [ ] TextTable { var _ebbd [ ] TextTable ; if _eea { _d . Log . Info ( "\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a" ) ;
} ; for _ , _gdgf := range _bggff { _gbeg := _gdgf . _affa ; if _gbeg != nil && _gbeg . isExportable ( ) { _ebbd = append ( _ebbd , _gbeg . toTextTable ( ) ) ; } ; } ; return _ebbd ; } ; func _cadb ( _dedb , _gdce , _gbaaf , _bdcag * textPara ) * textTable { _fgdbdb := & textTable { _bgdee : 2 , _fcedd : 2 , _fddab : make ( map [ uint64 ] * textPara , 4 ) } ;
_fgdbdb . put ( 0 , 0 , _dedb ) ; _fgdbdb . put ( 1 , 0 , _gdce ) ; _fgdbdb . put ( 0 , 1 , _gbaaf ) ; _fgdbdb . put ( 1 , 1 , _bdcag ) ; return _fgdbdb ; } ; func ( _cbde * textObject ) getFontDirect ( _gbfd string ) ( * _dc . PdfFont , error ) { _ggdb , _ebcg := _cbde . getFontDict ( _gbfd ) ; if _ebcg != nil { return nil , _ebcg ;
} ; _fef , _ebcg := _dc . NewPdfFontFromPdfObject ( _ggdb ) ; if _ebcg != nil { _d . Log . Debug ( "\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gbfd , _ebcg ) ;
} ; return _fef , _ebcg ; } ; func ( _dcba * textTable ) reduce ( ) * textTable { _gfacb := make ( [ ] int , 0 , _dcba . _fcedd ) ; _eeadc := make ( [ ] int , 0 , _dcba . _bgdee ) ; for _dcbg := 0 ; _dcbg < _dcba . _fcedd ; _dcbg ++ { if ! _dcba . emptyCompositeRow ( _dcbg ) { _gfacb = append ( _gfacb , _dcbg ) ;
} ; } ; for _gaecf := 0 ; _gaecf < _dcba . _bgdee ; _gaecf ++ { if ! _dcba . emptyCompositeColumn ( _gaecf ) { _eeadc = append ( _eeadc , _gaecf ) ; } ; } ; if len ( _gfacb ) == _dcba . _fcedd && len ( _eeadc ) == _dcba . _bgdee { return _dcba ; } ; _ddcag := textTable { _bebc : _dcba . _bebc , _bgdee : len ( _eeadc ) , _fcedd : len ( _gfacb ) , _fddab : make ( map [ uint64 ] * textPara , len ( _eeadc ) * len ( _gfacb ) ) } ;
if _eea { _d . Log . Info ( "\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064" , _dcba . _bgdee , _dcba . _fcedd , len ( _eeadc ) , len ( _gfacb ) ) ; _d . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076" , _eeadc ) ;
_d . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076" , _gfacb ) ; } ; for _ecdb , _afdbc := range _gfacb { for _baeee , _fabg := range _eeadc { _cccb , _ecfga := _dcba . getComposite ( _fabg , _afdbc ) ; if _cccb == nil { continue ;
} ; if _eea { _caa . Printf ( "\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n" , _baeee , _ecdb , _fabg , _afdbc , _ebfce ( _cccb . merge ( ) . text ( ) , 50 ) ) ; } ; _ddcag . putComposite ( _baeee , _ecdb , _cccb , _ecfga ) ;
} ; } ; return & _ddcag ; } ; const _gdcag = 1.0 / 1000.0 ; func ( _abfb * textLine ) endsInHyphen ( ) bool { _eegdc := _abfb . _ccfce [ len ( _abfb . _ccfce ) - 1 ] ; _dbgb := _eegdc . _bgdg ; _dfbdb , _geba := _b . DecodeLastRuneInString ( _dbgb ) ; if _geba <= 0 || ! _bb . Is ( _bb . Hyphen , _dfbdb ) { return false ;
} ; if _eegdc . _gabe && _dgcf ( _dbgb ) { return true ; } ; return _dgcf ( _abfb . text ( ) ) ; } ; func _dbcfe ( _cgce [ ] * textWord , _abce float64 , _gga , _gcd rulingList ) * wordBag { _eagc := _afccc ( _cgce [ 0 ] , _abce , _gga , _gcd ) ; for _ , _dede := range _cgce [ 1 : ] { _gcab := _ebcf ( _dede . _cffg ) ;
_eagc . _adcb [ _gcab ] = append ( _eagc . _adcb [ _gcab ] , _dede ) ; _eagc . PdfRectangle = _abga ( _eagc . PdfRectangle , _dede . PdfRectangle ) ; } ; _eagc . sort ( ) ; return _eagc ; } ;
// TableCell is a cell in a TextTable.
type TableCell struct {
// Text is the extracted text.
Text string ;
// Marks returns the TextMarks corresponding to the text in Text.
Marks TextMarkArray ; } ; func ( _defc intSet ) has ( _bbfdce int ) bool { _ , _ecgd := _defc [ _bbfdce ] ; return _ecgd } ;
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20> ).
func ( _bag * Extractor ) ExtractText ( ) ( string , error ) { _gdca , _ , _ , _cbaa := _bag . ExtractTextWithStats ( ) ; return _gdca , _cbaa ; } ;
// New returns an Extractor instance for extracting content from the input PDF page.
func New ( page * _dc . PdfPage ) ( * Extractor , error ) { return NewWithOptions ( page , nil ) } ; type gridTiling struct { _dc . PdfRectangle ; _gggb [ ] float64 ; _gceb [ ] float64 ; _begc map [ float64 ] map [ float64 ] gridTile ; } ; func ( _acfab * textWord ) appendMark ( _egdgc * textMark , _eeea _dc . PdfRectangle ) { _acfab . _gbaed = append ( _acfab . _gbaed , _egdgc ) ;
_acfab . PdfRectangle = _abga ( _acfab . PdfRectangle , _egdgc . PdfRectangle ) ; if _egdgc . _beaf > _acfab . _debab { _acfab . _debab = _egdgc . _beaf ; } ; _acfab . _cffg = _eeea . Ury - _acfab . PdfRectangle . Lly ; } ; func _dee ( _ffge , _cgfc _dc . PdfRectangle ) bool { return _cgfc . Llx <= _ffge . Urx && _ffge . Llx <= _cgfc . Urx ;
} ; func _ebbda ( _ddca map [ float64 ] map [ float64 ] gridTile ) [ ] float64 { _bcgf := make ( [ ] float64 , 0 , len ( _ddca ) ) ; for _gaad := range _ddca { _bcgf = append ( _bcgf , _gaad ) ; } ; _e . Float64s ( _bcgf ) ; _caba := len ( _bcgf ) ; for _baeb := 0 ; _baeb < _caba / 2 ; _baeb ++ { _bcgf [ _baeb ] , _bcgf [ _caba - 1 - _baeb ] = _bcgf [ _caba - 1 - _baeb ] , _bcgf [ _baeb ] ;
} ; return _bcgf ; } ; func ( _aed * textObject ) getFontDict ( _babf string ) ( _bbff _aa . PdfObject , _gad error ) { _dae := _aed . _eff ; if _dae == nil { _d . Log . Debug ( "g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071" , _babf ) ;
return nil , nil ; } ; _bbff , _cbca := _dae . GetFontByName ( _aa . PdfObjectName ( _babf ) ) ; if ! _cbca { _d . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071" , _babf ) ;
return nil , _g . New ( "f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073" ) ; } ; return _bbff , nil ; } ; func ( _effb paraList ) writeText ( _dfda _gc . Writer ) { for _afdd , _bda := range _effb { if _bda . _bedda { continue ;
} ; _bda . writeText ( _dfda ) ; if _afdd != len ( _effb ) - 1 { if _adfb ( _bda , _effb [ _afdd + 1 ] ) { _dfda . Write ( [ ] byte ( "\u0020" ) ) ; } else { _dfda . Write ( [ ] byte ( "\u000a" ) ) ; _dfda . Write ( [ ] byte ( "\u000a" ) ) ; } ; } ; } ; _dfda . Write ( [ ] byte ( "\u000a" ) ) ; _dfda . Write ( [ ] byte ( "\u000a" ) ) ;
} ; func ( _cacb rulingList ) snapToGroupsDirection ( ) rulingList { _cacb . sortStrict ( ) ; _cfffb := make ( map [ * ruling ] rulingList , len ( _cacb ) ) ; _afdc := _cacb [ 0 ] ; _ccfde := func ( _ffdc * ruling ) { _afdc = _ffdc ; _cfffb [ _afdc ] = rulingList { _ffdc } } ; _ccfde ( _cacb [ 0 ] ) ;
for _ , _geada := range _cacb [ 1 : ] { if _geada . _efbdg < _afdc . _efbdg - _agfe { _d . Log . Error ( "\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073" , _afdc , _geada ) ;
} ; if _geada . _efbdg > _afdc . _efbdg + _geff { _ccfde ( _geada ) ; } else { _cfffb [ _afdc ] = append ( _cfffb [ _afdc ] , _geada ) ; } ; } ; _edbb := make ( map [ * ruling ] float64 , len ( _cfffb ) ) ; _becfg := make ( map [ * ruling ] * ruling , len ( _cacb ) ) ; for _bged , _fbddd := range _cfffb { _edbb [ _bged ] = _fbddd . mergePrimary ( ) ;
for _ , _agae := range _fbddd { _becfg [ _agae ] = _bged ; } ; } ; for _ , _fbdge := range _cacb { _fbdge . _efbdg = _edbb [ _becfg [ _fbdge ] ] ; } ; _cdgg := make ( rulingList , 0 , len ( _cacb ) ) ; for _ , _gfbde := range _cfffb { _bcce := _gfbde . splitSec ( ) ; for _fdaa , _fbfg := range _bcce { _gffg := _fbfg . merge ( ) ;
if len ( _cdgg ) > 0 { _dbeae := _cdgg [ len ( _cdgg ) - 1 ] ; if _dbeae . alignsPrimary ( _gffg ) && _dbeae . alignsSec ( _gffg ) { _d . Log . Error ( "\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073" , _fdaa , _dbeae , _gffg ) ;
continue ; } ; } ; _cdgg = append ( _cdgg , _gffg ) ; } ; } ; _cdgg . sortStrict ( ) ; return _cdgg ; } ; type paraList [ ] * textPara ; func ( _cce * imageExtractContext ) extractContentStreamImages ( _cga string , _gge * _dc . PdfPageResources ) error { _acf := _gcf . NewContentStreamParser ( _cga ) ;
_ef , _fgc := _acf . Parse ( ) ; if _fgc != nil { return _fgc ; } ; if _cce . _egc == nil { _cce . _egc = map [ * _aa . PdfObjectStream ] * cachedImage { } ; } ; if _cce . _cbc == nil { _cce . _cbc = & ImageExtractOptions { } ; } ; _dcf := _gcf . NewContentStreamProcessor ( * _ef ) ; _dcf . AddHandler ( _gcf . HandlerConditionEnumAllOperands , "" , _cce . processOperand ) ;
return _dcf . Process ( _gge ) ; } ; func ( _ddf * textObject ) moveText ( _abdbd , _acfc float64 ) { _ddf . moveLP ( _abdbd , _acfc ) } ;
// Font represents the font properties on a PDF page.
type Font struct { PdfFont * _dc . PdfFont ;
// FontName represents Font Name from font properties.
FontName string ;
// FontType represents Font Subtype entry in the font dictionary inside page resources.
// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont.
FontType string ;
// ToUnicode is true if font provides a `ToUnicode` mapping.
ToUnicode bool ;
// IsCID is true if underlying font is a composite font.
// Composite font is represented by a font dictionary whose Subtype is `Type0`
IsCID bool ;
// IsSimple is true if font is simple font.
// A simple font is limited to only 8 bit (255) character codes.
IsSimple bool ;
// FontData represents the raw data of the embedded font file.
// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF).
// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor.
// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value.
FontData [ ] byte ;
// FontFileName is a name representing the font. it has format:
// (Font Name) + (Font Type Extension), example: helvetica.ttf.
FontFileName string ;
// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor).
FontDescriptor * _dc . PdfFontDescriptor ; } ; func ( _bbfa * subpath ) clear ( ) { * _bbfa = subpath { } } ; func ( _cbfe * textMark ) bbox ( ) _dc . PdfRectangle { return _cbfe . PdfRectangle } ; const ( _ggccf markKind = iota ; _bagd ; _gfefd ; _cefcf ; ) ; func ( _beca * subpath ) makeRectRuling ( _bddba _ba . Color ) ( * ruling , bool ) { if _agca { _d . Log . Info ( "\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076" , _beca ) ;
} ; _afba := _beca . _bcbc [ : 4 ] ; _ccgd := make ( map [ int ] rulingKind , len ( _afba ) ) ; for _cddcgc , _gafff := range _afba { _edef := _beca . _bcbc [ ( _cddcgc + 1 ) % 4 ] ; _ccgd [ _cddcgc ] = _gefd ( _gafff , _edef ) ; if _agca { _caa . Printf ( "\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066" , _cddcgc , _ccgd [ _cddcgc ] , _gafff , _edef ) ;
} ; } ; if _agca { _caa . Printf ( "\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a" , _ccgd ) ; } ; var _cagge , _dacgg [ ] int ; for _fbcgb , _cccd := range _ccgd { switch _cccd { case _gegc : _dacgg = append ( _dacgg , _fbcgb ) ; case _bcaef : _cagge = append ( _cagge , _fbcgb ) ;
} ; } ; if _agca { _caa . Printf ( "\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a" , len ( _dacgg ) , _dacgg ) ; _caa . Printf ( "\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a" , len ( _cagge ) , _cagge ) ;
} ; _aaeb := ( len ( _dacgg ) == 2 && len ( _cagge ) == 2 ) || ( len ( _dacgg ) == 2 && len ( _cagge ) == 0 && _cacgd ( _afba [ _dacgg [ 0 ] ] , _afba [ _dacgg [ 1 ] ] ) ) || ( len ( _cagge ) == 2 && len ( _dacgg ) == 0 && _addda ( _afba [ _cagge [ 0 ] ] , _afba [ _cagge [ 1 ] ] ) ) ; if _agca { _caa . Printf ( " \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a" , len ( _dacgg ) , len ( _cagge ) , _aaeb ) ;
} ; if ! _aaeb { if _agca { _d . Log . Error ( "\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v" , _beca ) ; _caa . Printf ( " \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a" , len ( _dacgg ) , len ( _cagge ) , _aaeb ) ;
} ; return & ruling { } , false ; } ; if len ( _cagge ) == 0 { for _egdgg , _gbbb := range _ccgd { if _gbbb != _gegc { _cagge = append ( _cagge , _egdgg ) ; } ; } ; } ; if len ( _dacgg ) == 0 { for _cefa , _bfea := range _ccgd { if _bfea != _bcaef { _dacgg = append ( _dacgg , _cefa ) ; } ; } ; } ; if _agca { _d . Log . Info ( "\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a" + "\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a" + "\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a" + "\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076" , len ( _dacgg ) , len ( _cagge ) , len ( _afba ) , _dacgg , _cagge , _afba ) ;
} ; var _dbabf , _efgd , _dgfe , _bgde _bab . Point ; if _afba [ _dacgg [ 0 ] ] . Y > _afba [ _dacgg [ 1 ] ] . Y { _dgfe , _bgde = _afba [ _dacgg [ 0 ] ] , _afba [ _dacgg [ 1 ] ] ; } else { _dgfe , _bgde = _afba [ _dacgg [ 1 ] ] , _afba [ _dacgg [ 0 ] ] ; } ; if _afba [ _cagge [ 0 ] ] . X > _afba [ _cagge [ 1 ] ] . X { _dbabf , _efgd = _afba [ _cagge [ 0 ] ] , _afba [ _cagge [ 1 ] ] ;
} else { _dbabf , _efgd = _afba [ _cagge [ 1 ] ] , _afba [ _cagge [ 0 ] ] ; } ; _aggba := _dc . PdfRectangle { Llx : _dbabf . X , Urx : _efgd . X , Lly : _bgde . Y , Ury : _dgfe . Y } ; if _aggba . Llx > _aggba . Urx { _aggba . Llx , _aggba . Urx = _aggba . Urx , _aggba . Llx ; } ; if _aggba . Lly > _aggba . Ury { _aggba . Lly , _aggba . Ury = _aggba . Ury , _aggba . Lly ;
} ; _gfgf := rectRuling { PdfRectangle : _aggba , _fgdfae : _bgeae ( _aggba ) , Color : _bddba } ; if _gfgf . _fgdfae == _aebb { if _agca { _d . Log . Error ( "\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c" ) ;
} ; return nil , false ; } ; _bbec , _edbd := _gfgf . asRuling ( ) ; if ! _edbd { if _agca { _d . Log . Error ( "\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg" ) ; } ; return nil , false ; } ; if _cage { _caa . Printf ( "\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a" , _bbec . String ( ) ) ;
} ; return _bbec , true ; } ;
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
func ( _cdf * TextMarkArray ) BBox ( ) ( _dc . PdfRectangle , bool ) { var _ccfcd _dc . PdfRectangle ; _edab := false ; for _ , _ggb := range _cdf . _bgbd { if _ggb . Meta || _fcgba ( _ggb . Text ) { continue ; } ; if _edab { _ccfcd = _abga ( _ccfcd , _ggb . BBox ) ; } else { _ccfcd = _ggb . BBox ;
_edab = true ; } ; } ; return _ccfcd , _edab ; } ; func ( _gafd * wordBag ) sort ( ) { for _ , _bddb := range _gafd . _adcb { _e . Slice ( _bddb , func ( _afdbd , _daad int ) bool { return _deba ( _bddb [ _afdbd ] , _bddb [ _daad ] ) < 0 } ) ; } ; } ; func _edeg ( _cebfd float64 ) bool { return _ca . Abs ( _cebfd ) < _agfe } ;
func ( _feea * wordBag ) allWords ( ) [ ] * textWord { var _dbdeg [ ] * textWord ; for _ , _ggaf := range _feea . _adcb { _dbdeg = append ( _dbdeg , _ggaf ... ) ; } ; return _dbdeg ; } ; func ( _cdda * wordBag ) firstWord ( _bgaa int ) * textWord { return _cdda . _adcb [ _bgaa ] [ 0 ] } ; func ( _cgcg rulingList ) splitSec ( ) [ ] rulingList { _e . Slice ( _cgcg , func ( _efec , _gbaeb int ) bool { _bfcbf , _eede := _cgcg [ _efec ] , _cgcg [ _gbaeb ] ;
if _bfcbf . _becdd != _eede . _becdd { return _bfcbf . _becdd < _eede . _becdd ; } ; return _bfcbf . _aggb < _eede . _aggb ; } ) ; _aeef := make ( map [ * ruling ] struct { } , len ( _cgcg ) ) ; _adgg := func ( _dcdge * ruling ) rulingList { _fefge := rulingList { _dcdge } ; _aeef [ _dcdge ] = struct { } { } ;
for _ , _ebbc := range _cgcg { if _ , _ecfa := _aeef [ _ebbc ] ; _ecfa { continue ; } ; for _ , _afed := range _fefge { if _ebbc . alignsSec ( _afed ) { _fefge = append ( _fefge , _ebbc ) ; _aeef [ _ebbc ] = struct { } { } ; break ; } ; } ; } ; return _fefge ; } ; _facg := [ ] rulingList { _adgg ( _cgcg [ 0 ] ) } ;
for _ , _ddac := range _cgcg [ 1 : ] { if _ , _adffc := _aeef [ _ddac ] ; _adffc { continue ; } ; _facg = append ( _facg , _adgg ( _ddac ) ) ; } ; return _facg ; } ; func ( _gedc * wordBag ) firstReadingIndex ( _ceee int ) int { _degg := _gedc . firstWord ( _ceee ) . _debab ; _eecb := float64 ( _ceee + 1 ) * _caaee ;
_abceb := _eecb + _ffed * _degg ; _agccd := _ceee ; for _ , _gefe := range _gedc . depthBand ( _eecb , _abceb ) { if _deba ( _gedc . firstWord ( _gefe ) , _gedc . firstWord ( _agccd ) ) < 0 { _agccd = _gefe ; } ; } ; return _agccd ; } ; func ( _ccaa rulingList ) intersections ( ) map [ int ] intSet { var _fbef , _adef [ ] int ;
for _aacd , _deee := range _ccaa { switch _deee . _fabfb { case _bcaef : _fbef = append ( _fbef , _aacd ) ; case _gegc : _adef = append ( _adef , _aacd ) ; } ; } ; if len ( _fbef ) < _cddcc + 1 || len ( _adef ) < _eadb + 1 { return nil ; } ; if len ( _fbef ) + len ( _adef ) > _gddb { _d . Log . Debug ( "\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064" , len ( _ccaa ) , len ( _fbef ) , len ( _adef ) ) ;
return nil ; } ; _fedef := make ( map [ int ] intSet , len ( _fbef ) + len ( _adef ) ) ; for _ , _ccea := range _fbef { for _ , _geaf := range _adef { if _ccaa [ _ccea ] . intersects ( _ccaa [ _geaf ] ) { if _ , _dbcfa := _fedef [ _ccea ] ; ! _dbcfa { _fedef [ _ccea ] = make ( intSet ) ; } ; if _ , _bcef := _fedef [ _geaf ] ;
! _bcef { _fedef [ _geaf ] = make ( intSet ) ; } ; _fedef [ _ccea ] . add ( _geaf ) ; _fedef [ _geaf ] . add ( _ccea ) ; } ; } ; } ; return _fedef ; } ; type textLine struct { _dc . PdfRectangle ; _gddd float64 ; _ccfce [ ] * textWord ; _bfcdd float64 ; } ; func ( _gebfe paraList ) reorder ( _cad [ ] int ) { _bfbb := make ( paraList , len ( _gebfe ) ) ;
for _eagb , _efcf := range _cad { _bfbb [ _eagb ] = _gebfe [ _efcf ] ; } ; copy ( _gebfe , _bfbb ) ; } ;
// String returns a description of `v`.
func ( _edfd * ruling ) String ( ) string { if _edfd . _fabfb == _aebb { return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047" ; } ; _cdecg , _cddafc := "\u0078" , "\u0079" ; if _edfd . _fabfb == _gegc { _cdecg , _cddafc = "\u0079" , "\u0078" ; } ; _gbff := "" ; if _edfd . _gebag != 0.0 { _gbff = _caa . Sprintf ( " \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066" , _edfd . _gebag ) ;
} ; return _caa . Sprintf ( "\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073" , _edfd . _fabfb , _cdecg , _edfd . _efbdg , _cddafc , _edfd . _becdd , _edfd . _aggb , _edfd . _aggb - _edfd . _becdd , _edfd . _fcgb , _edfd . Color , _gbff ) ;
} ; func _ffffg ( _feba [ ] int ) [ ] int { _dbeb := make ( [ ] int , len ( _feba ) ) ; for _fdee , _bedd := range _feba { _dbeb [ len ( _feba ) - 1 - _fdee ] = _bedd ; } ; return _dbeb ; } ; func ( _bddaa * textTable ) emptyCompositeRow ( _dfafc int ) bool { for _bbcg := 0 ; _bbcg < _bddaa . _bgdee ;
_bbcg ++ { if _cfca , _dfdbd := _bddaa . _fgge [ _abdec ( _bbcg , _dfafc ) ] ; _dfdbd { if len ( _cfca . paraList ) > 0 { return false ; } ; } ; } ; return true ; } ; func ( _edga * textLine ) markWordBoundaries ( ) { _ebef := _fde * _edga . _bfcdd ; for _cdbc , _fabd := range _edga . _ccfce [ 1 : ] { if _ggee ( _fabd , _edga . _ccfce [ _cdbc ] ) >= _ebef { _fabd . _gabe = true ;
} ; } ; } ;
// String returns a string describing `pt`.
func ( _dcfbg PageText ) String ( ) string { _adbe := _caa . Sprintf ( "P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073" , len ( _dcfbg . _gfc ) ) ; _acdd := [ ] string { "\u002d" + _adbe } ; for _ , _bba := range _dcfbg . _gfc { _acdd = append ( _acdd , _bba . String ( ) ) ;
} ; _acdd = append ( _acdd , "\u002b" + _adbe ) ; return _ce . Join ( _acdd , "\u000a" ) ; } ; type cachedImage struct { _dga * _dc . Image ; _fcc _dc . PdfColorspace ; } ; func ( _gegbd rulingList ) augmentGrid ( ) ( rulingList , rulingList ) { _cageg , _bbeb := _gegbd . vertsHorzs ( ) ; if len ( _cageg ) == 0 || len ( _bbeb ) == 0 { return _cageg , _bbeb ;
} ; _cada , _bffed := _cageg , _bbeb ; _cgef := _cageg . bbox ( ) ; _beb := _bbeb . bbox ( ) ; if _cage { _d . Log . Info ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066" , _cgef ) ; _d . Log . Info ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066" , _beb ) ;
} ; var _dbea , _fffdc , _egdff , _bedee * ruling ; if _beb . Llx < _cgef . Llx - _aecg { _dbea = & ruling { _fcgb : _cefcf , _fabfb : _bcaef , _efbdg : _beb . Llx , _becdd : _cgef . Lly , _aggb : _cgef . Ury } ; _cageg = append ( rulingList { _dbea } , _cageg ... ) ; } ; if _beb . Urx > _cgef . Urx + _aecg { _fffdc = & ruling { _fcgb : _cefcf , _fabfb : _bcaef , _efbdg : _beb . Urx , _becdd : _cgef . Lly , _aggb : _cgef . Ury } ;
_cageg = append ( _cageg , _fffdc ) ; } ; if _cgef . Lly < _beb . Lly - _aecg { _egdff = & ruling { _fcgb : _cefcf , _fabfb : _gegc , _efbdg : _cgef . Lly , _becdd : _beb . Llx , _aggb : _beb . Urx } ; _bbeb = append ( rulingList { _egdff } , _bbeb ... ) ; } ; if _cgef . Ury > _beb . Ury + _aecg { _bedee = & ruling { _fcgb : _cefcf , _fabfb : _gegc , _efbdg : _cgef . Ury , _becdd : _beb . Llx , _aggb : _beb . Urx } ;
_bbeb = append ( _bbeb , _bedee ) ; } ; if len ( _cageg ) + len ( _bbeb ) == len ( _gegbd ) { return _cada , _bffed ; } ; _gfgeb := append ( _cageg , _bbeb ... ) ; _gegbd . log ( "u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064" ) ; _gfgeb . log ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d" ) ;
return _cageg , _bbeb ; } ; func ( _badb * textPara ) bbox ( ) _dc . PdfRectangle { return _badb . PdfRectangle } ; func _daeba ( _bgfec [ ] pathSection ) { if _bacb < 0.0 { return ; } ; if _cage { _d . Log . Info ( "\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073" , len ( _bgfec ) ) ;
} ; for _edff , _bgdbe := range _bgfec { for _fcag , _dfbbd := range _bgdbe . _bffc { for _cbdee , _eecbf := range _dfbbd . _bcbc { _dfbbd . _bcbc [ _cbdee ] = _bab . Point { X : _caca ( _eecbf . X ) , Y : _caca ( _eecbf . Y ) } ; if _cage { _cfec := _dfbbd . _bcbc [ _cbdee ] ; if ! _aada ( _eecbf , _cfec ) { _cfac := _bab . Point { X : _cfec . X - _eecbf . X , Y : _cfec . Y - _eecbf . Y } ;
_caa . Printf ( "\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a" , _edff , _fcag , _cbdee , _eecbf , _cfec , _cfac ) ; } ; } ; } ; } ; } ; } ;
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
type PageImages struct { Images [ ] ImageMark ; } ; func ( _cae paraList ) extractTables ( _bfdd [ ] gridTiling ) paraList { if _eea { _d . Log . Debug ( "\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _cae ) ) ;
} ; if len ( _cae ) < _baeag { return _cae ; } ; _gcga := _cae . findTables ( _bfdd ) ; if _eea { _d . Log . Info ( "c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d" , len ( _gcga ) ) ;
for _afdag , _efagd := range _gcga { _efagd . log ( _caa . Sprintf ( "c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064" , _afdag ) ) ; } ; } ; return _cae . applyTables ( _gcga ) ; } ; func _eeac ( _degea string ) string { _afcfg := [ ] rune ( _degea ) ; return string ( _afcfg [ : len ( _afcfg ) - 1 ] ) } ;
func _abga ( _aff , _agdg _dc . PdfRectangle ) _dc . PdfRectangle { return _dc . PdfRectangle { Llx : _ca . Min ( _aff . Llx , _agdg . Llx ) , Lly : _ca . Min ( _aff . Lly , _agdg . Lly ) , Urx : _ca . Max ( _aff . Urx , _agdg . Urx ) , Ury : _ca . Max ( _aff . Ury , _agdg . Ury ) } ; } ; func ( _cbfed compositeCell ) parasBBox ( ) ( paraList , _dc . PdfRectangle ) { return _cbfed . paraList , _cbfed . PdfRectangle ;
} ; func ( _dab * wordBag ) removeDuplicates ( ) { if _feaa { _d . Log . Info ( "r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071" , _dab . text ( ) ) ; } ; for _ , _bace := range _dab . depthIndexes ( ) { if len ( _dab . _adcb [ _bace ] ) == 0 { continue ;
} ; _ggab := _dab . _adcb [ _bace ] [ 0 ] ; _eaggd := _feae * _ggab . _debab ; _aagb := _ggab . _cffg ; for _ , _gbbg := range _dab . depthBand ( _aagb , _aagb + _eaggd ) { _dfbc := map [ * textWord ] struct { } { } ; _afda := _dab . _adcb [ _gbbg ] ; for _ , _bgdad := range _afda { if _ , _aefe := _dfbc [ _bgdad ] ;
_aefe { continue ; } ; for _ , _cagd := range _afda { if _ , _gefff := _dfbc [ _cagd ] ; _gefff { continue ; } ; if _cagd != _bgdad && _cagd . _bgdg == _bgdad . _bgdg && _ca . Abs ( _cagd . Llx - _bgdad . Llx ) < _eaggd && _ca . Abs ( _cagd . Urx - _bgdad . Urx ) < _eaggd && _ca . Abs ( _cagd . Lly - _bgdad . Lly ) < _eaggd && _ca . Abs ( _cagd . Ury - _bgdad . Ury ) < _eaggd { _dfbc [ _cagd ] = struct { } { } ;
} ; } ; } ; if len ( _dfbc ) > 0 { _ddeb := 0 ; for _ , _eadg := range _afda { if _ , _beed := _dfbc [ _eadg ] ; ! _beed { _afda [ _ddeb ] = _eadg ; _ddeb ++ ; } ; } ; _dab . _adcb [ _gbbg ] = _afda [ : len ( _afda ) - len ( _dfbc ) ] ; if len ( _dab . _adcb [ _gbbg ] ) == 0 { delete ( _dab . _adcb , _gbbg ) ;
} ; } ; } ; } ; } ; type textMark struct { _dc . PdfRectangle ; _gcce int ; _cbge string ; _bcabg string ; _eead * _dc . PdfFont ; _beaf float64 ; _bddca float64 ; _adcg _bab . Matrix ; _ffb _bab . Point ; _gde _dc . PdfRectangle ; _fgeee _ba . Color ; _cab _ba . Color ; _abda _aa . PdfObject ;
_bfga [ ] string ; Tw float64 ; Th float64 ; _gffe int ; } ; func ( _daec paraList ) findTables ( _ecbe [ ] gridTiling ) [ ] * textTable { _daec . addNeighbours ( ) ; _e . Slice ( _daec , func ( _dffb , _cgfg int ) bool { return _aeb ( _daec [ _dffb ] , _daec [ _cgfg ] ) < 0 } ) ; var _adcab [ ] * textTable ;
if _ccde { _cgcabc := _daec . findGridTables ( _ecbe ) ; _adcab = append ( _adcab , _cgcabc ... ) ; } ; if _aeca { _agcb := _daec . findTextTables ( ) ; _adcab = append ( _adcab , _agcb ... ) ; } ; return _adcab ; } ; type intSet map [ int ] struct { } ;
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
type RenderMode int ; func ( _egcc rulingList ) toGrids ( ) [ ] rulingList { if _cage { _d . Log . Info ( "t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073" , _egcc ) ; } ; _aecfd := _egcc . intersections ( ) ; if _cage { _d . Log . Info ( "\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020" , len ( _egcc ) , len ( _aecfd ) ) ;
for _ , _fffff := range _dbed ( _aecfd ) { _caa . Printf ( "\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n" , _fffff , _aecfd [ _fffff ] ) ; } ; } ; _cgdf := make ( map [ int ] intSet , len ( _egcc ) ) ; for _bdbgf := range _egcc { _bafa := _egcc . connections ( _aecfd , _bdbgf ) ; if len ( _bafa ) > 0 { _cgdf [ _bdbgf ] = _bafa ;
} ; } ; if _cage { _d . Log . Info ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064" , len ( _cgdf ) ) ; for _ , _gbaa := range _dbed ( _cgdf ) { _caa . Printf ( "\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n" , _gbaa , _cgdf [ _gbaa ] ) ;
} ; } ; _effa := _gagd ( len ( _egcc ) , func ( _dadg , _effba int ) bool { _egbc , _edac := len ( _cgdf [ _dadg ] ) , len ( _cgdf [ _effba ] ) ; if _egbc != _edac { return _egbc > _edac ; } ; return _egcc . comp ( _dadg , _effba ) ; } ) ; if _cage { _d . Log . Info ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076" , _effa ) ;
} ; _faaf := [ ] [ ] int { { _effa [ 0 ] } } ; _gdeg : for _ , _dbgfa := range _effa [ 1 : ] { for _bfee , _bade := range _faaf { for _ , _bece := range _bade { if _cgdf [ _bece ] . has ( _dbgfa ) { _faaf [ _bfee ] = append ( _bade , _dbgfa ) ; continue _gdeg ; } ; } ; } ; _faaf = append ( _faaf , [ ] int { _dbgfa } ) ;
} ; if _cage { _d . Log . Info ( "\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076" , _faaf ) ; } ; _e . SliceStable ( _faaf , func ( _cebdb , _edde int ) bool { return len ( _faaf [ _cebdb ] ) > len ( _faaf [ _edde ] ) } ) ; for _ , _gecf := range _faaf { _e . Slice ( _gecf , func ( _defb , _bdcfd int ) bool { return _egcc . comp ( _gecf [ _defb ] , _gecf [ _bdcfd ] ) } ) ;
} ; _aedbb := make ( [ ] rulingList , len ( _faaf ) ) ; for _eeaa , _adaf := range _faaf { _dcae := make ( rulingList , len ( _adaf ) ) ; for _bggfa , _bgef := range _adaf { _dcae [ _bggfa ] = _egcc [ _bgef ] ; } ; _aedbb [ _eeaa ] = _dcae ; } ; if _cage { _d . Log . Info ( "\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076" , _aedbb ) ;
} ; var _ggcgg [ ] rulingList ; for _ , _fcfbf := range _aedbb { if _bcec , _cccg := _fcfbf . isActualGrid ( ) ; _cccg { _fcfbf = _bcec ; _fcfbf = _fcfbf . snapToGroups ( ) ; _ggcgg = append ( _ggcgg , _fcfbf ) ; } ; } ; if _cage { _cccdb ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073" , _ggcgg ) ;
_d . Log . Info ( "\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064" , len ( _aedbb ) , len ( _ggcgg ) ) ; } ; return _ggcgg ; } ;
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct { _bgbd [ ] TextMark } ; func ( _gafg * textObject ) getCurrentFont ( ) * _dc . PdfFont { _dbbg := _gafg . _cgf . _gcb ; if _dbbg == nil { _d . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e" ) ;
return _dc . DefaultFont ( ) ; } ; return _dbbg ; } ; func ( _beafc rulingList ) isActualGrid ( ) ( rulingList , bool ) { _dbgfe , _aecga := _beafc . augmentGrid ( ) ; if ! ( len ( _dbgfe ) >= _cddcc + 1 && len ( _aecga ) >= _eadb + 1 ) { if _cage { _d . Log . Info ( "\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064" , len ( _dbgfe ) , len ( _aecga ) , _cddcc + 1 , _eadb + 1 ) ;
} ; return nil , false ; } ; if _cage { _d . Log . Info ( "\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074" , _beafc , len ( _dbgfe ) >= 2 , len ( _aecga ) >= 2 , len ( _dbgfe ) >= 2 && len ( _aecga ) >= 2 ) ;
for _dabb , _cegd := range _beafc { _caa . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a" , _dabb , _cegd ) ; } ; } ; if _gfef { _fddca , _bbae := _dbgfe [ 0 ] , _dbgfe [ len ( _dbgfe ) - 1 ] ; _ebdbb , _gcccb := _aecga [ 0 ] , _aecga [ len ( _aecga ) - 1 ] ; if ! ( _gdfc ( _fddca . _efbdg - _ebdbb . _becdd ) && _gdfc ( _bbae . _efbdg - _ebdbb . _aggb ) && _gdfc ( _ebdbb . _efbdg - _fddca . _aggb ) && _gdfc ( _gcccb . _efbdg - _fddca . _becdd ) ) { if _cage { _d . Log . Info ( "\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073" , _fddca , _bbae , _ebdbb , _gcccb ) ;
} ; return nil , false ; } ; } else { if ! _dbgfe . aligned ( ) { if _baea { _d . Log . Info ( "i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064" , len ( _dbgfe ) ) ;
} ; return nil , false ; } ; if ! _aecga . aligned ( ) { if _cage { _d . Log . Info ( "i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064" , len ( _aecga ) ) ;
} ; return nil , false ; } ; } ; _addd := append ( _dbgfe , _aecga ... ) ; return _addd , true ; } ; func ( _egdf * textPara ) writeCellText ( _dccc _gc . Writer ) { for _aede , _gage := range _egdf . _ddaf { _fbbb := _gage . text ( ) ; _adbf := _cggce && _gage . endsInHyphen ( ) && _aede != len ( _egdf . _ddaf ) - 1 ;
if _adbf { _fbbb = _eeac ( _fbbb ) ; } ; _dccc . Write ( [ ] byte ( _fbbb ) ) ; if ! ( _adbf || _aede == len ( _egdf . _ddaf ) - 1 ) { _dccc . Write ( [ ] byte ( _gfga ( _gage . _gddd , _egdf . _ddaf [ _aede + 1 ] . _gddd ) ) ) ; } ; } ; } ; type event struct { _dfec float64 ; _efga bool ; _agafc int ; } ; type rulingKind int ;
func _daadd ( _gaga map [ int ] [ ] float64 ) [ ] int { _bbgbe := make ( [ ] int , len ( _gaga ) ) ; _gbbba := 0 ; for _dbaab := range _gaga { _bbgbe [ _gbbba ] = _dbaab ; _gbbba ++ ; } ; _e . Ints ( _bbgbe ) ; return _bbgbe ; } ; func ( _afa * shapesState ) cubicTo ( _fgdf , _gebf , _cbeg , _eafc , _ffg , _acdcd float64 ) { if _fgeac { _d . Log . Info ( "\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a" ) ;
} ; _afa . addPoint ( _ffg , _acdcd ) ; } ; func ( _gaeg * wordBag ) removeWord ( _agea * textWord , _cfef int ) { _abfe := _gaeg . _adcb [ _cfef ] ; _abfe = _cafea ( _abfe , _agea ) ; if len ( _abfe ) == 0 { delete ( _gaeg . _adcb , _cfef ) ; } else { _gaeg . _adcb [ _cfef ] = _abfe ; } ; } ; func ( _ceda paraList ) merge ( ) * textPara { _d . Log . Trace ( "\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _ceda ) ) ;
if len ( _ceda ) == 0 { return nil ; } ; _ceda . sortReadingOrder ( ) ; _bfec := _ceda [ 0 ] . PdfRectangle ; _acfeg := _ceda [ 0 ] . _ddaf ; for _ , _befa := range _ceda [ 1 : ] { _bfec = _abga ( _bfec , _befa . PdfRectangle ) ; _acfeg = append ( _acfeg , _befa . _ddaf ... ) ; } ; return _dcfc ( _bfec , _acfeg ) ;
} ;
// NewWithOptions an Extractor instance for extracting content from the input PDF page with options.
func NewWithOptions ( page * _dc . PdfPage , options * Options ) ( * Extractor , error ) { const _bg = "\u0065x\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077W\u0069\u0074\u0068\u004f\u0070\u0074\u0069\u006f\u006e\u0073" ; _dcb , _ed := page . GetAllContentStreams ( ) ;
if _ed != nil { return nil , _ed ; } ; _df , _ed := page . GetMediaBox ( ) ; if _ed != nil { return nil , _caa . Errorf ( "\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076" , _ed ) ;
} ; _edf := & Extractor { _aaf : _dcb , _gdc : page . Resources , _cbb : * _df , _aae : page . CropBox , _ab : map [ string ] fontEntry { } , _bf : map [ string ] textResult { } , _da : options } ; if _edf . _cbb . Llx > _edf . _cbb . Urx { _d . Log . Info ( "\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e" , _edf . _cbb ) ;
_edf . _cbb . Llx , _edf . _cbb . Urx = _edf . _cbb . Urx , _edf . _cbb . Llx ; } ; if _edf . _cbb . Lly > _edf . _cbb . Ury { _d . Log . Info ( "\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e" , _edf . _cbb ) ;
_edf . _cbb . Lly , _edf . _cbb . Ury = _edf . _cbb . Ury , _edf . _cbb . Lly ; } ; _gd . TrackUse ( _bg ) ; return _edf , nil ; } ; func ( _cacf * wordBag ) depthBand ( _dedf , _dddc float64 ) [ ] int { if len ( _cacf . _adcb ) == 0 { return nil ; } ; return _cacf . depthRange ( _cacf . getDepthIdx ( _dedf ) , _cacf . getDepthIdx ( _dddc ) ) ;
} ; func ( _gbfa * textTable ) isExportable ( ) bool { if _gbfa . _bebc { return true ; } ; _babb := func ( _bdbcg int ) bool { _eabe := _gbfa . get ( 0 , _bdbcg ) ; if _eabe == nil { return false ; } ; _gcced := _eabe . text ( ) ; _gaee := _b . RuneCountInString ( _gcced ) ; _dbebb := _fcfg . MatchString ( _gcced ) ;
return _gaee <= 1 || _dbebb ; } ; for _dbbe := 0 ; _dbbe < _gbfa . _fcedd ; _dbbe ++ { if ! _babb ( _dbbe ) { return true ; } ; } ; return false ; } ; func ( _acfa * textTable ) getComposite ( _ffdea , _dadcd int ) ( paraList , _dc . PdfRectangle ) { _dbfe , _gfbbc := _acfa . _fgge [ _abdec ( _ffdea , _dadcd ) ] ;
if _eea { _caa . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a" , _ffdea , _dadcd , _dbfe . String ( ) ) ; } ; if ! _gfbbc { return nil , _dc . PdfRectangle { } ;
} ; return _dbfe . parasBBox ( ) ; } ; func ( _bgfee * textTable ) compositeColCorridors ( ) map [ int ] [ ] float64 { _bgbf := make ( map [ int ] [ ] float64 , _bgfee . _bgdee ) ; if _eea { _d . Log . Info ( "\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020" , _bgfee . _bgdee ) ;
} ; for _agfd := 0 ; _agfd < _bgfee . _bgdee ; _agfd ++ { _bgbf [ _agfd ] = nil ; } ; return _bgbf ; } ; type shapesState struct { _eabb _bab . Matrix ; _gcbe _bab . Matrix ; _bfg [ ] * subpath ; _dcgc bool ; _fcaf _bab . Point ; _degb * textObject ; } ; func ( _dcfe * textObject ) moveLP ( _cffa , _dfaf float64 ) { _dcfe . _acd . Concat ( _bab . NewMatrix ( 1 , 0 , 0 , 1 , _cffa , _dfaf ) ) ;
_dcfe . _aafd = _dcfe . _acd ; } ; const ( _cggce = true ; _dadc = true ; _fdfd = true ; _gce = false ; _cfgc = false ; _gbad = 6 ; _fcbe = 3.0 ; _cdbdb = 200 ; _ccde = true ; _aeca = true ; _efg = true ; _abcf = true ; _gfef = false ; ) ; func ( _gfbc * wordBag ) getDepthIdx ( _gafb float64 ) int { _afgg := _gfbc . depthIndexes ( ) ;
_dgge := _ebcf ( _gafb ) ; if _dgge < _afgg [ 0 ] { return _afgg [ 0 ] ; } ; if _dgge > _afgg [ len ( _afgg ) - 1 ] { return _afgg [ len ( _afgg ) - 1 ] ; } ; return _dgge ; } ; type bounded interface { bbox ( ) _dc . PdfRectangle } ; func ( _fbec * wordBag ) arrangeText ( ) * textPara { _fbec . sort ( ) ;
if _dadc { _fbec . removeDuplicates ( ) ; } ; var _ecab [ ] * textLine ; for _ , _efdc := range _fbec . depthIndexes ( ) { for ! _fbec . empty ( _efdc ) { _dbfga := _fbec . firstReadingIndex ( _efdc ) ; _ddfe := _fbec . firstWord ( _dbfga ) ; _dfcg := _fcfb ( _fbec , _dbfga ) ; _cdbdf := _ddfe . _debab ;
_agcde := _ddfe . _cffg - _gdgc * _cdbdf ; _fgbcg := _ddfe . _cffg + _gdgc * _cdbdf ; _dgcgb := _bdbc * _cdbdf ; _ffccg := _agdd * _cdbdf ; _babg : for { var _bgdb * textWord ; _fdfa := 0 ; for _ , _bdef := range _fbec . depthBand ( _agcde , _fgbcg ) { _bcfd := _fbec . highestWord ( _bdef , _agcde , _fgbcg ) ;
if _bcfd == nil { continue ; } ; _ecaba := _ggee ( _bcfd , _dfcg . _ccfce [ len ( _dfcg . _ccfce ) - 1 ] ) ; if _ecaba < - _ffccg { break _babg ; } ; if _ecaba > _dgcgb { continue ; } ; if _bgdb != nil && _deba ( _bcfd , _bgdb ) >= 0 { continue ; } ; _bgdb = _bcfd ; _fdfa = _bdef ; } ; if _bgdb == nil { break ;
} ; _dfcg . pullWord ( _fbec , _bgdb , _fdfa ) ; } ; _dfcg . markWordBoundaries ( ) ; _ecab = append ( _ecab , _dfcg ) ; } ; } ; if len ( _ecab ) == 0 { return nil ; } ; _e . Slice ( _ecab , func ( _fbba , _dgd int ) bool { return _aecf ( _ecab [ _fbba ] , _ecab [ _dgd ] ) < 0 } ) ; _gebe := _dcfc ( _fbec . PdfRectangle , _ecab ) ;
if _daag { _d . Log . Info ( "\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073" , _gebe . String ( ) ) ; if _gegf { for _abcfg , _aaac := range _gebe . _ddaf { _caa . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _abcfg , _aaac . String ( ) ) ;
if _dbf { for _cbdd , _ebdb := range _aaac . _ccfce { _caa . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _cbdd , _ebdb . String ( ) ) ; for _fgce , _efce := range _ebdb . _gbaed { _caa . Printf ( "\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n" , _fgce , _efce . String ( ) ) ;
} ; } ; } ; } ; } ; } ; return _gebe ; } ; func _dggd ( _dgfca [ ] * textMark , _fbbgf _dc . PdfRectangle ) * textWord { _aeab := _dgfca [ 0 ] . PdfRectangle ; _gfaa := _dgfca [ 0 ] . _beaf ; for _ , _gafcf := range _dgfca [ 1 : ] { _aeab = _abga ( _aeab , _gafcf . PdfRectangle ) ; if _gafcf . _beaf > _gfaa { _gfaa = _gafcf . _beaf ;
} ; } ; return & textWord { PdfRectangle : _aeab , _gbaed : _dgfca , _cffg : _fbbgf . Ury - _aeab . Lly , _debab : _gfaa } ; } ; func ( _gbg * textObject ) showText ( _degd _aa . PdfObject , _bad [ ] byte ) error { return _gbg . renderText ( _degd , _bad ) ; } ; func ( _agddd rulingList ) connections ( _aeedg map [ int ] intSet , _dcgb int ) intSet { _degbe := make ( intSet ) ;
_feeg := make ( intSet ) ; var _dbabb func ( int ) ; _dbabb = func ( _efae int ) { if ! _feeg . has ( _efae ) { _feeg . add ( _efae ) ; for _gabb := range _agddd { if _aeedg [ _gabb ] . has ( _efae ) { _degbe . add ( _gabb ) ; } ; } ; for _afab := range _agddd { if _degbe . has ( _afab ) { _dbabb ( _afab ) ;
} ; } ; } ; } ; _dbabb ( _dcgb ) ; return _degbe ; } ; func ( _ecae * ruling ) equals ( _cbddg * ruling ) bool { return _ecae . _fabfb == _cbddg . _fabfb && _efca ( _ecae . _efbdg , _cbddg . _efbdg ) && _efca ( _ecae . _becdd , _cbddg . _becdd ) && _efca ( _ecae . _aggb , _cbddg . _aggb ) ; } ; func _cggf ( _fcff , _agead int ) int { if _fcff > _agead { return _fcff ;
} ; return _agead ; } ; func ( _gfae * subpath ) add ( _bdbg ... _bab . Point ) { _gfae . _bcbc = append ( _gfae . _bcbc , _bdbg ... ) } ;
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
type ImageExtractOptions struct { IncludeInlineStencilMasks bool ; } ; func _bbbb ( _ebfd int , _fgead map [ int ] [ ] float64 ) ( [ ] int , int ) { _ffcb := make ( [ ] int , _ebfd ) ; _fcfga := 0 ; for _aabd := 0 ; _aabd < _ebfd ; _aabd ++ { _ffcb [ _aabd ] = _fcfga ; _fcfga += len ( _fgead [ _aabd ] ) + 1 ;
} ; return _ffcb , _fcfga ; } ; func ( _cbdff * textTable ) getDown ( ) paraList { _gefeb := make ( paraList , _cbdff . _bgdee ) ; for _cdgd := 0 ; _cdgd < _cbdff . _bgdee ; _cdgd ++ { _acdg := _cbdff . get ( _cdgd , _cbdff . _fcedd - 1 ) . _bgce ; if _acdg . taken ( ) { return nil ; } ; _gefeb [ _cdgd ] = _acdg ;
} ; for _ggbac := 0 ; _ggbac < _cbdff . _bgdee - 1 ; _ggbac ++ { if _gefeb [ _ggbac ] . _becf != _gefeb [ _ggbac + 1 ] { return nil ; } ; } ; return _gefeb ; } ; func _dca ( _eeb [ ] Font , _cg string ) bool { for _ , _bcg := range _eeb { if _bcg . FontName == _cg { return true ; } ; } ; return false ;
} ; func ( _ebf * textObject ) setTextLeading ( _ecf float64 ) { if _ebf == nil { return ; } ; _ebf . _cgf . _add = _ecf ; } ; func ( _ebfb * textObject ) setFont ( _bfe string , _cbf float64 ) error { if _ebfb == nil { return nil ; } ; _ebfb . _cgf . _fdd = _cbf ; _gaf , _gcc := _ebfb . getFont ( _bfe ) ;
if _gcc != nil { return _gcc ; } ; _ebfb . _cgf . _gcb = _gaf ; return nil ; } ; func ( _agcae paraList ) toTextMarks ( ) [ ] TextMark { _bdda := 0 ; var _bccf [ ] TextMark ; for _ccdc , _fddcc := range _agcae { if _fddcc . _bedda { continue ; } ; _egdef := _fddcc . toTextMarks ( & _bdda ) ;
_bccf = append ( _bccf , _egdef ... ) ; if _ccdc != len ( _agcae ) - 1 { if _adfb ( _fddcc , _agcae [ _ccdc + 1 ] ) { _bccf = _egag ( _bccf , & _bdda , "\u0020" ) ; } else { _bccf = _egag ( _bccf , & _bdda , "\u000a" ) ; _bccf = _egag ( _bccf , & _bdda , "\u000a" ) ; } ; } ; } ; _bccf = _egag ( _bccf , & _bdda , "\u000a" ) ;
_bccf = _egag ( _bccf , & _bdda , "\u000a" ) ; return _bccf ; } ; func _cdce ( _eced func ( * wordBag , * textWord , float64 ) bool , _gebfd float64 ) func ( * wordBag , * textWord ) bool { return func ( _aeag * wordBag , _gccf * textWord ) bool { return _eced ( _aeag , _gccf , _gebfd ) } ;
} ; func ( _gfed * wordBag ) depthRange ( _ddfba , _bead int ) [ ] int { var _faeg [ ] int ; for _bgdd := range _gfed . _adcb { if _ddfba <= _bgdd && _bgdd <= _bead { _faeg = append ( _faeg , _bgdd ) ; } ; } ; if len ( _faeg ) == 0 { return nil ; } ; _e . Ints ( _faeg ) ; return _faeg ; } ; func ( _dgbfb * wordBag ) text ( ) string { _ffcf := _dgbfb . allWords ( ) ;
_daadf := make ( [ ] string , len ( _ffcf ) ) ; for _fced , _egeb := range _ffcf { _daadf [ _fced ] = _egeb . _bgdg ; } ; return _ce . Join ( _daadf , "\u0020" ) ; } ; func _cccdb ( _fedc string , _dfag [ ] rulingList ) { _d . Log . Info ( "\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073" , len ( _dfag ) , _fedc ) ;
for _ccba , _fcab := range _dfag { _caa . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _ccba , _fcab . String ( ) ) ; } ; } ; func _fcgba ( _baae string ) bool { for _ , _afbe := range _baae { if ! _bb . IsSpace ( _afbe ) { return false ; } ; } ; return true ; } ; func ( _dfgb compositeCell ) String ( ) string { _fdfga := "" ;
if len ( _dfgb . paraList ) > 0 { _fdfga = _ebfce ( _dfgb . paraList . merge ( ) . text ( ) , 50 ) ; } ; return _caa . Sprintf ( "\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071" , _dfgb . PdfRectangle , len ( _dfgb . paraList ) , _fdfga ) ;
} ; func ( _agab * textTable ) toTextTable ( ) TextTable { if _eea { _d . Log . Info ( "t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064" , _agab . _bgdee , _agab . _fcedd ) ; } ; _ebad := make ( [ ] [ ] TableCell , _agab . _fcedd ) ; for _dbfgd := 0 ;
_dbfgd < _agab . _fcedd ; _dbfgd ++ { _ebad [ _dbfgd ] = make ( [ ] TableCell , _agab . _bgdee ) ; for _abada := 0 ; _abada < _agab . _bgdee ; _abada ++ { _addac := _agab . get ( _abada , _dbfgd ) ; if _addac == nil { continue ; } ; if _eea { _caa . Printf ( "\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a" , _abada , _dbfgd , _addac ) ;
} ; _ebad [ _dbfgd ] [ _abada ] . Text = _addac . text ( ) ; _dcgf := 0 ; _ebad [ _dbfgd ] [ _abada ] . Marks . _bgbd = _addac . toTextMarks ( & _dcgf ) ; } ; } ; return TextTable { W : _agab . _bgdee , H : _agab . _fcedd , Cells : _ebad } ; } ;
// String returns a description of `k`.
func ( _aaeaa markKind ) String ( ) string { _badf , _aafg := _bbdf [ _aaeaa ] ; if ! _aafg { return _caa . Sprintf ( "\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064" , _aaeaa ) ; } ; return _badf ; } ;
// Elements returns the TextMarks in `ma`.
func ( _fgab * TextMarkArray ) Elements ( ) [ ] TextMark { return _fgab . _bgbd } ; func ( _bdeac * textPara ) isAtom ( ) * textTable { _abdcb := _bdeac ; _gfgb := _bdeac . _becf ; _fbccg := _bdeac . _bgce ; if _gfgb . taken ( ) || _fbccg . taken ( ) { return nil ; } ; _ccgc := _gfgb . _bgce ;
if _ccgc . taken ( ) || _ccgc != _fbccg . _becf { return nil ; } ; return _cadb ( _abdcb , _gfgb , _fbccg , _ccgc ) ; } ; func ( _debd rulingList ) aligned ( ) bool { if len ( _debd ) < 2 { return false ; } ; _becb := make ( map [ * ruling ] int ) ; _becb [ _debd [ 0 ] ] = 0 ; for _ , _agbd := range _debd [ 1 : ] { _bacd := false ;
for _deed := range _becb { if _agbd . gridIntersecting ( _deed ) { _becb [ _deed ] ++ ; _bacd = true ; break ; } ; } ; if ! _bacd { _becb [ _agbd ] = 0 ; } ; } ; _cffba := 0 ; for _ , _cfab := range _becb { if _cfab == 0 { _cffba ++ ; } ; } ; _fddd := float64 ( _cffba ) / float64 ( len ( _debd ) ) ; _ceaaa := _fddd <= 1.0 - _ega ;
if _cage { _d . Log . Info ( "\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073" , _ceaaa , _fddd , _cffba , len ( _debd ) , _debd . String ( ) ) ;
} ; return _ceaaa ; } ; type wordBag struct { _dc . PdfRectangle ; _gffd float64 ; _bbce , _dce rulingList ; _deae float64 ; _adcb map [ int ] [ ] * textWord ; } ; func ( _bgaeg * textWord ) toTextMarks ( _dfbac * int ) [ ] TextMark { var _dged [ ] TextMark ; for _ , _agcf := range _bgaeg . _gbaed { _dged = _fgbc ( _dged , _dfbac , _agcf . ToTextMark ( ) ) ;
} ; return _dged ; } ; func _agag ( _eggg [ ] rulingList ) ( rulingList , rulingList ) { var _ggbg rulingList ; for _ , _egcg := range _eggg { _ggbg = append ( _ggbg , _egcg ... ) ; } ; return _ggbg . vertsHorzs ( ) ; } ; func ( _aga compositeCell ) hasLines ( _adfdd [ ] * textLine ) bool { for _fbbg , _faege := range _adfdd { _fefgf := _dggg ( _aga . PdfRectangle , _faege . PdfRectangle ) ;
if _eea { _caa . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a" , _fefgf , _fbbg , len ( _adfdd ) ) ; _caa . Printf ( "\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a" , _aga ) ;
_caa . Printf ( "\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a" , _faege ) ; } ; if _fefgf { return true ; } ; } ; return false ; } ; func ( _ceg * shapesState ) newSubPath ( ) { _ceg . clearPath ( ) ; if _fgeac { _d . Log . Info ( "\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073" , _ceg ) ;
} ; } ; func _bdb ( _abf * _gcf . ContentStreamOperation ) ( float64 , error ) { if len ( _abf . Params ) != 1 { _adb := _g . New ( "\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et" ) ; _d . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076" , _abf . Operand , 1 , len ( _abf . Params ) , _abf . Params ) ;
return 0.0 , _adb ; } ; return _aa . GetNumberAsFloat ( _abf . Params [ 0 ] ) ; } ; func _fgac ( _aedeb [ ] _aa . PdfObject ) ( _ggaba , _ddafac float64 , _fcbag error ) { if len ( _aedeb ) != 2 { return 0 , 0 , _caa . Errorf ( "\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064" , len ( _aedeb ) ) ;
} ; _cceg , _fcbag := _aa . GetNumbersAsFloat ( _aedeb ) ; if _fcbag != nil { return 0 , 0 , _fcbag ; } ; return _cceg [ 0 ] , _cceg [ 1 ] , nil ; } ; func ( _ddff * textTable ) log ( _dcaa string ) { if ! _eea { return ; } ; _d . Log . Info ( "~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066" , _dcaa , _ddff . _bgdee , _ddff . _fcedd , _ddff . _bebc , _ddff . PdfRectangle ) ;
for _dcafa := 0 ; _dcafa < _ddff . _fcedd ; _dcafa ++ { for _gbccb := 0 ; _gbccb < _ddff . _bgdee ; _gbccb ++ { _agaf := _ddff . get ( _gbccb , _dcafa ) ; if _agaf == nil { continue ; } ; _caa . Printf ( "%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a" , _gbccb , _dcafa , _agaf . PdfRectangle , _ebfce ( _agaf . text ( ) , 50 ) , _b . RuneCountInString ( _agaf . text ( ) ) ) ;
} ; } ; } ; func ( _cdab * wordBag ) maxDepth ( ) float64 { return _cdab . _deae - _cdab . Lly } ; func ( _cfbgg rulingList ) bbox ( ) _dc . PdfRectangle { var _addg _dc . PdfRectangle ; if len ( _cfbgg ) == 0 { _d . Log . Error ( "r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073" ) ;
return _dc . PdfRectangle { } ; } ; if _cfbgg [ 0 ] . _fabfb == _gegc { _addg . Llx , _addg . Urx = _cfbgg . secMinMax ( ) ; _addg . Lly , _addg . Ury = _cfbgg . primMinMax ( ) ; } else { _addg . Llx , _addg . Urx = _cfbgg . primMinMax ( ) ; _addg . Lly , _addg . Ury = _cfbgg . secMinMax ( ) ; } ; return _addg ;
} ; func _gfga ( _aggd , _ddab float64 ) string { _eabc := ! _edeg ( _aggd - _ddab ) ; if _eabc { return "\u000a" ; } ; return "\u0020" ; } ; func ( _gfec lineRuling ) xMean ( ) float64 { return 0.5 * ( _gfec . _becgc . X + _gfec . _facd . X ) } ;
// String returns a human readable description of `vecs`.
func ( _cbfb rulingList ) String ( ) string { if len ( _cbfb ) == 0 { return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}" ; } ; _cbdf , _bgbg := _cbfb . vertsHorzs ( ) ; _bacdc := len ( _cbdf ) ; _dgab := len ( _bgbg ) ; if _bacdc == 0 || _dgab == 0 { return _caa . Sprintf ( "\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}" , _bacdc , _dgab ) ;
} ; _bdgc := _dc . PdfRectangle { Llx : _cbdf [ 0 ] . _efbdg , Urx : _cbdf [ _bacdc - 1 ] . _efbdg , Lly : _bgbg [ _dgab - 1 ] . _efbdg , Ury : _bgbg [ 0 ] . _efbdg } ; return _caa . Sprintf ( "\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d" , _bacdc , _dgab , _bdgc ) ;
} ; type textState struct { _cace float64 ; _fgd float64 ; _abbd float64 ; _add float64 ; _fdd float64 ; _efbd RenderMode ; _bffe float64 ; _gcb * _dc . PdfFont ; _eegd _dc . PdfRectangle ; _efde int ; _cdc int ; } ; func _deba ( _ebac , _edae bounded ) float64 { return _ebac . bbox ( ) . Llx - _edae . bbox ( ) . Llx } ;
// String returns a string descibing `i`.
func ( _bacf gridTile ) String ( ) string { _abddf := func ( _gdag bool , _ceaac string ) string { if _gdag { return _ceaac ; } ; return "\u005f" ; } ; return _caa . Sprintf ( "\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073" , _bacf . PdfRectangle , _abddf ( _bacf . _gcfbe , "\u004c" ) , _abddf ( _bacf . _ebbb , "\u0052" ) , _abddf ( _bacf . _ecbgb , "\u0042" ) , _abddf ( _bacf . _faaa , "\u0054" ) ) ;
} ;
// String returns a description of `l`.
func ( _gecc * textLine ) String ( ) string { return _caa . Sprintf ( "\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022" , _gecc . _gddd , _gecc . PdfRectangle , _gecc . _bfcdd , _gecc . text ( ) ) ;
} ; func ( _egbb paraList ) log ( _dbcfd string ) { if ! _cfee { return ; } ; _d . Log . Info ( "%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d" , _dbcfd , len ( _egbb ) ) ;
for _cefc , _dbab := range _egbb { if _dbab == nil { continue ; } ; _eefe := _dbab . text ( ) ; _dbee := "\u0020\u0020" ; if _dbab . _affa != nil { _dbee = _caa . Sprintf ( "\u005b%\u0064\u0078\u0025\u0064\u005d" , _dbab . _affa . _bgdee , _dbab . _affa . _fcedd ) ; } ; _caa . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a" , _cefc , _dbab . PdfRectangle , _dbee , _ebfce ( _eefe , 50 ) ) ;
} ; } ; func ( _edg * textLine ) appendWord ( _cbfdf * textWord ) { _edg . _ccfce = append ( _edg . _ccfce , _cbfdf ) ; _edg . PdfRectangle = _abga ( _edg . PdfRectangle , _cbfdf . PdfRectangle ) ; if _cbfdf . _debab > _edg . _bfcdd { _edg . _bfcdd = _cbfdf . _debab ; } ; if _cbfdf . _cffg > _edg . _gddd { _edg . _gddd = _cbfdf . _cffg ;
} ; } ; func ( _feac * textWord ) bbox ( ) _dc . PdfRectangle { return _feac . PdfRectangle } ; func _fdga ( _gbcc , _aeec _dc . PdfRectangle ) bool { return _gbcc . Lly <= _aeec . Ury && _aeec . Lly <= _gbcc . Ury ; } ; func ( _fgged paraList ) yNeighbours ( _cfeff float64 ) map [ * textPara ] [ ] int { _feead := make ( [ ] event , 2 * len ( _fgged ) ) ;
if _cfeff == 0 { for _acdge , _beef := range _fgged { _feead [ 2 * _acdge ] = event { _beef . Lly , true , _acdge } ; _feead [ 2 * _acdge + 1 ] = event { _beef . Ury , false , _acdge } ; } ; } else { for _gafffc , _fdbbc := range _fgged { _feead [ 2 * _gafffc ] = event { _fdbbc . Lly - _cfeff * _fdbbc . fontsize ( ) , true , _gafffc } ;
_feead [ 2 * _gafffc + 1 ] = event { _fdbbc . Ury + _cfeff * _fdbbc . fontsize ( ) , false , _gafffc } ; } ; } ; return _fgged . eventNeighbours ( _feead ) ; } ; func ( _eacg rulingList ) primaries ( ) [ ] float64 { _bebd := make ( map [ float64 ] struct { } , len ( _eacg ) ) ; for _ , _agge := range _eacg { _bebd [ _agge . _efbdg ] = struct { } { } ;
} ; _bbead := make ( [ ] float64 , len ( _bebd ) ) ; _ceeg := 0 ; for _aebgg := range _bebd { _bbead [ _ceeg ] = _aebgg ; _ceeg ++ ; } ; _e . Float64s ( _bbead ) ; return _bbead ; } ; func _fcfb ( _daca * wordBag , _afb int ) * textLine { _daee := _daca . firstWord ( _afb ) ; _fddbb := textLine { PdfRectangle : _daee . PdfRectangle , _bfcdd : _daee . _debab , _gddd : _daee . _cffg } ;
_fddbb . pullWord ( _daca , _daee , _afb ) ; return & _fddbb ; } ; func _aebga ( _becad [ ] float64 , _fdeeb , _dffc float64 ) [ ] float64 { _beaaf , _ddae := _fdeeb , _dffc ; if _ddae < _beaaf { _beaaf , _ddae = _ddae , _beaaf ; } ; _gaed := make ( [ ] float64 , 0 , len ( _becad ) + 2 ) ; _gaed = append ( _gaed , _fdeeb ) ;
for _ , _bfdfe := range _becad { if _bfdfe <= _beaaf { continue ; } else if _bfdfe >= _ddae { break ; } ; _gaed = append ( _gaed , _bfdfe ) ; } ; _gaed = append ( _gaed , _dffc ) ; return _gaed ; } ; func ( _gbde paraList ) eventNeighbours ( _adae [ ] event ) map [ * textPara ] [ ] int { _e . Slice ( _adae , func ( _daeec , _gcdeeb int ) bool { _geca , _fgeg := _adae [ _daeec ] , _adae [ _gcdeeb ] ;
_bbdag , _egagc := _geca . _dfec , _fgeg . _dfec ; if _bbdag != _egagc { return _bbdag < _egagc ; } ; if _geca . _efga != _fgeg . _efga { return _geca . _efga ; } ; return _daeec < _gcdeeb ; } ) ; _ddbg := make ( map [ int ] intSet ) ; _efef := make ( intSet ) ; for _ , _eagbc := range _adae { if _eagbc . _efga { _ddbg [ _eagbc . _agafc ] = make ( intSet ) ;
for _gbccbd := range _efef { if _gbccbd != _eagbc . _agafc { _ddbg [ _eagbc . _agafc ] . add ( _gbccbd ) ; _ddbg [ _gbccbd ] . add ( _eagbc . _agafc ) ; } ; } ; _efef . add ( _eagbc . _agafc ) ; } else { _efef . del ( _eagbc . _agafc ) ; } ; } ; _gceba := map [ * textPara ] [ ] int { } ; for _fbda , _caggeb := range _ddbg { _efcggg := _gbde [ _fbda ] ;
if len ( _caggeb ) == 0 { _gceba [ _efcggg ] = nil ; continue ; } ; _eeaae := make ( [ ] int , len ( _caggeb ) ) ; _bbga := 0 ; for _gdcf := range _caggeb { _eeaae [ _bbga ] = _gdcf ; _bbga ++ ; } ; _gceba [ _efcggg ] = _eeaae ; } ; return _gceba ; } ; func ( _cgaee paraList ) findGridTables ( _cedd [ ] gridTiling ) [ ] * textTable { if _eea { _d . Log . Info ( "\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073" , len ( _cgaee ) ) ;
for _dfdb , _aece := range _cgaee { _caa . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _dfdb , _aece ) ; } ; } ; var _bega [ ] * textTable ; for _agadd , _eeagd := range _cedd { _bagbc , _dbag := _cgaee . findTableGrid ( _eeagd ) ; if _bagbc != nil { _bagbc . log ( _caa . Sprintf ( "\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064" , _agadd ) ) ;
_bega = append ( _bega , _bagbc ) ; _bagbc . markCells ( ) ; } ; for _adfff := range _dbag { _adfff . _bgcb = true ; } ; } ; if _eea { _d . Log . Info ( "\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s" , len ( _bega ) ) ;
} ; return _bega ; } ; const ( _aedg = false ; _agb = false ; _dccf = false ; _eece = false ; _fgeac = false ; _gfaee = false ; _aebe = false ; _cfee = false ; _daag = false ; _gegf = _daag && true ; _dbf = _gegf && false ; _feaa = _daag && true ; _eea = false ; _agbf = _eea && false ; _gggd = _eea && true ;
_cage = false ; _eabg = _cage && false ; _baea = _cage && false ; _dgfd = _cage && true ; _agca = _cage && false ; _gcdd = _cage && false ; ) ; var ( _cc = _g . New ( "\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072" ) ; _cb = _g . New ( "\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072" ) ;
) ; func ( _ffcg paraList ) lines ( ) [ ] * textLine { var _eega [ ] * textLine ; for _ , _ebee := range _ffcg { _eega = append ( _eega , _ebee . _ddaf ... ) ; } ; return _eega ; } ;
// Len returns the number of TextMarks in `ma`.
func ( _fgea * TextMarkArray ) Len ( ) int { if _fgea == nil { return 0 ; } ; return len ( _fgea . _bgbd ) ; } ; type textPara struct { _dc . PdfRectangle ; _aecfg _dc . PdfRectangle ; _ddaf [ ] * textLine ; _affa * textTable ; _bgcb bool ; _bedda bool ; _egdgfd * textPara ; _becf * textPara ;
_gffb * textPara ; _bgce * textPara ; } ; func ( _bfdf * textObject ) setTextMatrix ( _cfcd [ ] float64 ) { if len ( _cfcd ) != 6 { _d . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029" , len ( _cfcd ) ) ;
return ; } ; _ebg , _cbe , _febe , _dfa , _agff , _cee := _cfcd [ 0 ] , _cfcd [ 1 ] , _cfcd [ 2 ] , _cfcd [ 3 ] , _cfcd [ 4 ] , _cfcd [ 5 ] ; _bfdf . _aafd = _bab . NewMatrix ( _ebg , _cbe , _febe , _dfa , _agff , _cee ) ; _bfdf . _acd = _bfdf . _aafd ; } ; type textResult struct { _fbf PageText ; _eab int ;
_fadb int ; } ; func ( _cba * imageExtractContext ) extractXObjectImage ( _eb * _aa . PdfObjectName , _cf _gcf . GraphicsState , _egd * _dc . PdfPageResources ) error { _age , _ := _egd . GetXObjectByName ( * _eb ) ; if _age == nil { return nil ; } ; _fag , _dcd := _cba . _egc [ _age ] ;
if ! _dcd { _gbe , _bgg := _egd . GetXObjectImageByName ( * _eb ) ; if _bgg != nil { return _bgg ; } ; if _gbe == nil { return nil ; } ; _bca , _bgg := _gbe . ToImage ( ) ; if _bgg != nil { return _bgg ; } ; _fag = & cachedImage { _dga : _bca , _fcc : _gbe . ColorSpace } ; _cba . _egc [ _age ] = _fag ;
} ; _dge := _fag . _dga ; _fae := _fag . _fcc ; _fd , _bbb := _fae . ImageToRGB ( * _dge ) ; if _bbb != nil { return _bbb ; } ; _d . Log . Debug ( "@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073" , _cf . CTM . String ( ) ) ; _bgea := ImageMark { Image : & _fd , Width : _cf . CTM . ScalingFactorX ( ) , Height : _cf . CTM . ScalingFactorY ( ) , Angle : _cf . CTM . Angle ( ) } ;
_bgea . X , _bgea . Y = _cf . CTM . Translation ( ) ; _cba . _ggf = append ( _cba . _ggf , _bgea ) ; _cba . _gfa ++ ; return nil ; } ; func _ggce ( _gefc _dc . PdfColorspace , _bgfa _dc . PdfColor ) _ba . Color { if _gefc == nil || _bgfa == nil { return _ba . Black ; } ; _fgcd , _fddbd := _gefc . ColorToRGB ( _bgfa ) ;
if _fddbd != nil { _d . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073" , _bgfa , _gefc , _fddbd ) ;
return _ba . Black ; } ; _cbfee , _cgaf := _fgcd . ( * _dc . PdfColorDeviceRGB ) ; if ! _cgaf { _d . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076" , _fgcd ) ;
return _ba . Black ; } ; return _ba . NRGBA { R : uint8 ( _cbfee . R ( ) * 255 ) , G : uint8 ( _cbfee . G ( ) * 255 ) , B : uint8 ( _cbfee . B ( ) * 255 ) , A : uint8 ( 255 ) } ; } ; func ( _gbb * wordBag ) blocked ( _gecg * textWord ) bool { if _gecg . Urx < _gbb . Llx { _dfbdd := _dbef ( _gecg . PdfRectangle ) ;
_fdfb := _dggfd ( _gbb . PdfRectangle ) ; if _gbb . _bbce . blocks ( _dfbdd , _fdfb ) { if _gcdd { _d . Log . Info ( "\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073" , _gecg , _gbb ) ; } ; return true ; } ; } else if _gbb . Urx < _gecg . Llx { _bcac := _dbef ( _gbb . PdfRectangle ) ;
_dfbb := _dggfd ( _gecg . PdfRectangle ) ; if _gbb . _bbce . blocks ( _bcac , _dfbb ) { if _gcdd { _d . Log . Info ( "b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s" , _gecg , _gbb ) ; } ; return true ; } ; } ; if _gecg . Ury < _gbb . Lly { _afac := _adabd ( _gecg . PdfRectangle ) ;
_dedc := _ggdge ( _gbb . PdfRectangle ) ; if _gbb . _dce . blocks ( _afac , _dedc ) { if _gcdd { _d . Log . Info ( "\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073" , _gecg , _gbb ) ; } ; return true ; } ; } else if _gbb . Ury < _gecg . Lly { _afef := _adabd ( _gbb . PdfRectangle ) ;
_fefe := _ggdge ( _gecg . PdfRectangle ) ; if _gbb . _dce . blocks ( _afef , _fefe ) { if _gcdd { _d . Log . Info ( "b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s" , _gecg , _gbb ) ; } ; return true ; } ; } ; return false ; } ; func _gagd ( _caad int , _cfgf func ( int , int ) bool ) [ ] int { _feaea := make ( [ ] int , _caad ) ;
for _fbccf := range _feaea { _feaea [ _fbccf ] = _fbccf ; } ; _e . Slice ( _feaea , func ( _bffd , _abefd int ) bool { return _cfgf ( _feaea [ _bffd ] , _feaea [ _abefd ] ) } ) ; return _feaea ; } ; func ( _feg * textObject ) setWordSpacing ( _gaff float64 ) { if _feg == nil { return ; } ;
_feg . _cgf . _fgd = _gaff ; } ; func ( _acbe * textWord ) addDiacritic ( _faca string ) { _efcdd := _acbe . _gbaed [ len ( _acbe . _gbaed ) - 1 ] ; _efcdd . _cbge += _faca ; _efcdd . _cbge = _ac . NFKC . String ( _efcdd . _cbge ) ; } ;
2022-06-27 19:58:38 +00:00
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
2022-09-23 18:05:51 +00:00
type TextTable struct { W , H int ; Cells [ ] [ ] TableCell ; } ; func _adabd ( _eebe _dc . PdfRectangle ) * ruling { return & ruling { _fabfb : _gegc , _efbdg : _eebe . Ury , _becdd : _eebe . Llx , _aggb : _eebe . Urx } ; } ; func ( _cdbdg paraList ) addNeighbours ( ) { _cafbg := func ( _gaged [ ] int , _adbgf * textPara ) ( [ ] * textPara , [ ] * textPara ) { _fgcef := make ( [ ] * textPara , 0 , len ( _gaged ) - 1 ) ;
_gaffb := make ( [ ] * textPara , 0 , len ( _gaged ) - 1 ) ; for _ , _gegbdd := range _gaged { _eebf := _cdbdg [ _gegbdd ] ; if _eebf . Urx <= _adbgf . Llx { _fgcef = append ( _fgcef , _eebf ) ; } else if _eebf . Llx >= _adbgf . Urx { _gaffb = append ( _gaffb , _eebf ) ; } ; } ; return _fgcef , _gaffb ;
} ; _cbec := func ( _bbgc [ ] int , _daeed * textPara ) ( [ ] * textPara , [ ] * textPara ) { _gaaa := make ( [ ] * textPara , 0 , len ( _bbgc ) - 1 ) ; _dgad := make ( [ ] * textPara , 0 , len ( _bbgc ) - 1 ) ; for _ , _gfefa := range _bbgc { _dggeg := _cdbdg [ _gfefa ] ; if _dggeg . Ury <= _daeed . Lly { _dgad = append ( _dgad , _dggeg ) ;
} else if _dggeg . Lly >= _daeed . Ury { _gaaa = append ( _gaaa , _dggeg ) ; } ; } ; return _gaaa , _dgad ; } ; _dagd := _cdbdg . yNeighbours ( _cddaf ) ; for _ , _daed := range _cdbdg { _ddfcc := _dagd [ _daed ] ; if len ( _ddfcc ) == 0 { continue ; } ; _bfdg , _dfagg := _cafbg ( _ddfcc , _daed ) ;
if len ( _bfdg ) == 0 && len ( _dfagg ) == 0 { continue ; } ; if len ( _bfdg ) > 0 { _bfdb := _bfdg [ 0 ] ; for _ , _fgbec := range _bfdg [ 1 : ] { if _fgbec . Urx >= _bfdb . Urx { _bfdb = _fgbec ; } ; } ; for _ , _gafgd := range _bfdg { if _gafgd != _bfdb && _gafgd . Urx > _bfdb . Llx { _bfdb = nil ;
break ; } ; } ; if _bfdb != nil && _fdga ( _daed . PdfRectangle , _bfdb . PdfRectangle ) { _daed . _egdgfd = _bfdb ; } ; } ; if len ( _dfagg ) > 0 { _bfdde := _dfagg [ 0 ] ; for _ , _cefgc := range _dfagg [ 1 : ] { if _cefgc . Llx <= _bfdde . Llx { _bfdde = _cefgc ; } ; } ; for _ , _gagb := range _dfagg { if _gagb != _bfdde && _gagb . Llx < _bfdde . Urx { _bfdde = nil ;
break ; } ; } ; if _bfdde != nil && _fdga ( _daed . PdfRectangle , _bfdde . PdfRectangle ) { _daed . _becf = _bfdde ; } ; } ; } ; _dagd = _cdbdg . xNeighbours ( _eaga ) ; for _ , _caag := range _cdbdg { _abfc := _dagd [ _caag ] ; if len ( _abfc ) == 0 { continue ; } ; _befc , _gbcb := _cbec ( _abfc , _caag ) ;
if len ( _befc ) == 0 && len ( _gbcb ) == 0 { continue ; } ; if len ( _gbcb ) > 0 { _bacc := _gbcb [ 0 ] ; for _ , _egdbd := range _gbcb [ 1 : ] { if _egdbd . Ury >= _bacc . Ury { _bacc = _egdbd ; } ; } ; for _ , _ebbce := range _gbcb { if _ebbce != _bacc && _ebbce . Ury > _bacc . Lly { _bacc = nil ;
break ; } ; } ; if _bacc != nil && _dee ( _caag . PdfRectangle , _bacc . PdfRectangle ) { _caag . _bgce = _bacc ; } ; } ; if len ( _befc ) > 0 { _fdbca := _befc [ 0 ] ; for _ , _bdgg := range _befc [ 1 : ] { if _bdgg . Lly <= _fdbca . Lly { _fdbca = _bdgg ; } ; } ; for _ , _egegd := range _befc { if _egegd != _fdbca && _egegd . Lly < _fdbca . Ury { _fdbca = nil ;
break ; } ; } ; if _fdbca != nil && _dee ( _caag . PdfRectangle , _fdbca . PdfRectangle ) { _caag . _gffb = _fdbca ; } ; } ; } ; for _ , _gfdef := range _cdbdg { if _gfdef . _egdgfd != nil && _gfdef . _egdgfd . _becf != _gfdef { _gfdef . _egdgfd = nil ; } ; if _gfdef . _gffb != nil && _gfdef . _gffb . _bgce != _gfdef { _gfdef . _gffb = nil ;
} ; if _gfdef . _becf != nil && _gfdef . _becf . _egdgfd != _gfdef { _gfdef . _becf = nil ; } ; if _gfdef . _bgce != nil && _gfdef . _bgce . _gffb != _gfdef { _gfdef . _bgce = nil ; } ; } ; } ; func _ceebc ( _cacgb [ ] compositeCell ) [ ] float64 { var _dafeda [ ] * textLine ; _bagag := 0 ; for _ , _beaag := range _cacgb { _bagag += len ( _beaag . paraList ) ;
_dafeda = append ( _dafeda , _beaag . lines ( ) ... ) ; } ; _e . Slice ( _dafeda , func ( _dabc , _efbf int ) bool { _egggb , _fefd := _dafeda [ _dabc ] , _dafeda [ _efbf ] ; _fagd , _cddd := _egggb . _gddd , _fefd . _gddd ; if ! _edeg ( _fagd - _cddd ) { return _fagd < _cddd ; } ; return _egggb . Llx < _fefd . Llx ;
} ) ; if _eea { _caa . Printf ( "\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a" , _bagag , len ( _dafeda ) ) ; for _bbbf , _gbed := range _dafeda { _caa . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _bbbf , _gbed ) ;
} ; } ; var _bgccd [ ] float64 ; _afec := _dafeda [ 0 ] ; var _bcea [ ] [ ] * textLine ; _dagc := [ ] * textLine { _afec } ; for _afeca , _fcdbe := range _dafeda [ 1 : ] { if _fcdbe . Ury < _afec . Lly { _gcccbb := 0.5 * ( _fcdbe . Ury + _afec . Lly ) ; if _eea { _caa . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a" + "\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a" , _afeca , _fcdbe . Ury , _afec . Lly , _gcccbb , _afec , _fcdbe ) ;
} ; _bgccd = append ( _bgccd , _gcccbb ) ; _bcea = append ( _bcea , _dagc ) ; _dagc = nil ; } ; _dagc = append ( _dagc , _fcdbe ) ; if _fcdbe . Lly < _afec . Lly { _afec = _fcdbe ; } ; } ; if len ( _dagc ) > 0 { _bcea = append ( _bcea , _dagc ) ; } ; if _eea { _caa . Printf ( " \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a" , _bgccd ) ;
} ; if _eea { _d . Log . Info ( "\u0072\u006f\u0077\u003d\u0025\u0064" , len ( _cacgb ) ) ; for _efcgg , _dgabb := range _cacgb { _caa . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _efcgg , _dgabb ) ; } ; _d . Log . Info ( "\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d" , len ( _bcea ) ) ;
for _ddag , _bgad := range _bcea { _caa . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a" , _ddag , len ( _bgad ) ) ; for _cade , _ebcd := range _bgad { _caa . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _cade , _ebcd ) ; } ; } ; } ; _gffbf := true ; for _degbbb , _efbfc := range _bcea { _cefd := true ;
for _degae , _bgca := range _cacgb { if _eea { _caa . Printf ( "\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a" , _degbbb , len ( _bcea ) , _degae , len ( _cacgb ) , _bgca ) ;
} ; if ! _bgca . hasLines ( _efbfc ) { if _eea { _caa . Printf ( "\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a" , _degbbb , len ( _bcea ) , _degae , len ( _cacgb ) ) ;
} ; _cefd = false ; break ; } ; } ; if ! _cefd { _gffbf = false ; break ; } ; } ; if ! _gffbf { if _eea { _d . Log . Info ( "\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg" ) ;
} ; _bgccd = nil ; } ; if _eea && _bgccd != nil { _caa . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a" , _bgccd ) ; } ; return _bgccd ;
} ; func ( _bcfb * textTable ) newTablePara ( ) * textPara { _egbdb := _bcfb . computeBbox ( ) ; _fbccc := & textPara { PdfRectangle : _egbdb , _aecfg : _egbdb , _affa : _bcfb } ; if _eea { _d . Log . Info ( "\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073" , _fbccc ) ;
} ; return _fbccc ; } ;
// String returns a description of `w`.
func ( _cgdb * textWord ) String ( ) string { return _caa . Sprintf ( "\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022" , _cgdb . _cffg , _cgdb . PdfRectangle , _cgdb . _debab , _cgdb . _bgdg ) ;
} ; type rectRuling struct { _fgdfae rulingKind ; _afdbf markKind ; _ba . Color ; _dc . PdfRectangle ; } ;
// String returns a string describing `tm`.
func ( _bga TextMark ) String ( ) string { _cgca := _bga . BBox ; var _agcd string ; if _bga . Font != nil { _agcd = _bga . Font . String ( ) ; if len ( _agcd ) > 50 { _agcd = _agcd [ : 50 ] + "\u002e\u002e\u002e" ; } ; } ; var _gffc string ; if _bga . Meta { _gffc = "\u0020\u002a\u004d\u002a" ;
} ; return _caa . Sprintf ( "\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d" , _bga . Offset , _bga . Text , [ ] rune ( _bga . Text ) , _cgca . Llx , _cgca . Lly , _cgca . Urx , _cgca . Ury , _agcd , _gffc ) ;
} ; func ( _bcdad * wordBag ) highestWord ( _gfgc int , _acg , _edfe float64 ) * textWord { for _ , _gdbb := range _bcdad . _adcb [ _gfgc ] { if _acg <= _gdbb . _cffg && _gdbb . _cffg <= _edfe { return _gdbb ; } ; } ; return nil ; } ; type subpath struct { _bcbc [ ] _bab . Point ; _adge bool ;
} ; func ( _egce * textObject ) renderText ( _cagc _aa . PdfObject , _adf [ ] byte ) error { if _egce . _dcg { _d . Log . Debug ( "\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e" ) ;
return nil ; } ; _fge := _egce . getCurrentFont ( ) ; _bcdd := _fge . BytesToCharcodes ( _adf ) ; _beag , _ggc , _fede := _fge . CharcodesToStrings ( _bcdd ) ; if _fede > 0 { _d . Log . Debug ( "\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064" , _ggc , _fede ) ;
} ; _egce . _cgf . _efde += _ggc ; _egce . _cgf . _cdc += _fede ; _dec := _egce . _cgf ; _bdc := _dec . _fdd ; _fbcg := _dec . _abbd / 100.0 ; _dggf := _gdcag ; if _fge . Subtype ( ) == "\u0054\u0079\u0070e\u0033" { _dggf = 1 ; } ; _fedg , _bddc := _fge . GetRuneMetrics ( ' ' ) ; if ! _bddc { _fedg , _bddc = _fge . GetCharMetrics ( 32 ) ;
} ; if ! _bddc { _fedg , _ = _dc . DefaultFont ( ) . GetRuneMetrics ( ' ' ) ; } ; _bgee := _fedg . Wx * _dggf ; _d . Log . Trace ( "\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066" , _bgee , _beag , _fge , _bdc ) ;
_eda := _bab . NewMatrix ( _bdc * _fbcg , 0 , 0 , _bdc , 0 , _dec . _bffe ) ; if _gfaee { _d . Log . Info ( "\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071" , len ( _bcdd ) , _bcdd , _beag ) ;
} ; _d . Log . Trace ( "\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071" , len ( _bcdd ) , _bcdd , len ( _beag ) ) ; _efc := _egce . getFillColor ( ) ;
_gda := _egce . getStrokeColor ( ) ; for _geee , _dfcb := range _beag { _egee := [ ] rune ( _dfcb ) ; if len ( _egee ) == 1 && _egee [ 0 ] == '\x00' { continue ; } ; _bde := _bcdd [ _geee ] ; _fbdd := _egce . _fdf . CTM . Mult ( _egce . _aafd ) . Mult ( _eda ) ; _bgd := 0.0 ; if len ( _egee ) == 1 && _egee [ 0 ] == 32 { _bgd = _dec . _fgd ;
} ; _dcbe , _cffd := _fge . GetCharMetrics ( _bde ) ; if ! _cffd { _d . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073" , _bde , _egee , _egee , _fge ) ;
return _caa . Errorf ( "\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064" , _fge . String ( ) , _bde ) ; } ; _daga := _bab . Point { X : _dcbe . Wx * _dggf , Y : _dcbe . Wy * _dggf } ;
_fdba := _bab . Point { X : ( _daga . X * _bdc + _bgd ) * _fbcg } ; _gdge := _bab . Point { X : ( _daga . X * _bdc + _dec . _cace + _bgd ) * _fbcg } ; if _gfaee { _d . Log . Info ( "\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066" , _bdc , _dec . _cace , _dec . _fgd , _fbcg ) ;
_d . Log . Info ( "\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f" , _daga , _fdba , _gdge ) ; } ; _bfc := _aec ( _fdba ) ; _fdbb := _aec ( _gdge ) ; _acdc := _egce . _fdf . CTM . Mult ( _egce . _aafd ) . Mult ( _bfc ) ;
if _eece { _d . Log . Info ( "e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a" + "\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a" + "\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073" , _egce . _fdf . CTM , _egce . _aafd , _fdbb , _fdacf ( _egce . _fdf . CTM . Mult ( _egce . _aafd ) . Mult ( _fdbb ) ) , _bfc , _acdc , _fdacf ( _acdc ) ) ;
} ; _fadf , _efe := _egce . newTextMark ( _ad . ExpandLigatures ( _egee ) , _fbdd , _fdacf ( _acdc ) , _ca . Abs ( _bgee * _fbdd . ScalingFactorX ( ) ) , _fge , _egce . _cgf . _cace , _efc , _gda , _cagc , _beag , _geee ) ; if ! _efe { _d . Log . Debug ( "\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067" ) ;
continue ; } ; if _fge == nil { _d . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e" ) ; } else if _fge . Encoder ( ) == nil { _d . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073" , _fge ) ;
} else { if _dcga , _deda := _fge . Encoder ( ) . CharcodeToRune ( _bde ) ; _deda { _fadf . _bcabg = string ( _dcga ) ; } ; } ; _d . Log . Trace ( "i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073" , _geee , _bde , _fadf , _fbdd ) ;
_egce . _dcfd = append ( _egce . _dcfd , & _fadf ) ; _egce . _aafd . Concat ( _fdbb ) ; } ; return nil ; } ; func _gefd ( _bfbf , _cafe _bab . Point ) rulingKind { _fgbe := _ca . Abs ( _bfbf . X - _cafe . X ) ; _cabg := _ca . Abs ( _bfbf . Y - _cafe . Y ) ; return _afgac ( _fgbe , _cabg , _adfd ) ;
} ; type compositeCell struct { _dc . PdfRectangle ; paraList ; } ; func _cacgd ( _cgcab , _cadag _bab . Point ) bool { _agbg := _ca . Abs ( _cgcab . X - _cadag . X ) ; _feaed := _ca . Abs ( _cgcab . Y - _cadag . Y ) ; return _aged ( _feaed , _agbg ) ; } ;
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
func ( _gbc * PageText ) ApplyArea ( bbox _dc . PdfRectangle ) { _dege := make ( [ ] * textMark , 0 , len ( _gbc . _gfc ) ) ; for _ , _egca := range _gbc . _gfc { if _dggg ( _egca . bbox ( ) , bbox ) { _dege = append ( _dege , _egca ) ; } ; } ; var _cgg paraList ; _bcda := len ( _dege ) ; for _agcc := 0 ;
_agcc < 360 && _bcda > 0 ; _agcc += 90 { _dgfa := make ( [ ] * textMark , 0 , len ( _dege ) - _bcda ) ; for _ , _ebed := range _dege { if _ebed . _gcce == _agcc { _dgfa = append ( _dgfa , _ebed ) ; } ; } ; if len ( _dgfa ) > 0 { _bgff := _fdef ( _dgfa , _gbc . _fdbe , nil , nil ) ; _cgg = append ( _cgg , _bgff ... ) ;
_bcda -= len ( _dgfa ) ; } ; } ; _gffa := new ( _f . Buffer ) ; _cgg . writeText ( _gffa ) ; _gbc . _dbdg = _gffa . String ( ) ; _gbc . _efda = _cgg . toTextMarks ( ) ; _gbc . _fdc = _cgg . tables ( ) ; } ; func ( _eeee lineRuling ) yMean ( ) float64 { return 0.5 * ( _eeee . _becgc . Y + _eeee . _facd . Y ) } ;
func _gbda ( _gdf , _eafcf * textPara ) bool { return _dee ( _gdf . _aecfg , _eafcf . _aecfg ) } ; func _aaed ( _ggcg , _afgb bounded ) float64 { return _aab ( _ggcg ) - _aab ( _afgb ) } ; func ( _gae * shapesState ) closePath ( ) { if _gae . _dcgc { _gae . _bfg = append ( _gae . _bfg , _badg ( _gae . _fcaf ) ) ;
_gae . _dcgc = false ; } else if len ( _gae . _bfg ) == 0 { if _fgeac { _d . Log . Debug ( "\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068" ) ; } ; _gae . _dcgc = false ; return ; } ; _gae . _bfg [ len ( _gae . _bfg ) - 1 ] . close ( ) ;
if _fgeac { _d . Log . Info ( "\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073" , _gae ) ; } ; } ; func _ggee ( _egfg , _bdcd bounded ) float64 { return _egfg . bbox ( ) . Llx - _bdcd . bbox ( ) . Urx } ; func ( _adbg * shapesState ) fill ( _faea * [ ] pathSection ) { _gcae := pathSection { _bffc : _adbg . _bfg , Color : _adbg . _degb . getFillColor ( ) } ;
* _faea = append ( * _faea , _gcae ) ; if _cage { _ebd := _gcae . bbox ( ) ; _caa . Printf ( "\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a" , len ( * _faea ) , len ( _gcae . _bffc ) , _adbg , _gcae . Color , _ebd , _ebd . Width ( ) , _ebd . Height ( ) ) ;
if _eabg { for _abbc , _gccc := range _gcae . _bffc { _caa . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _abbc , _gccc ) ; if _abbc == 10 { break ; } ; } ; } ; } ; } ; func ( _dggge rulingList ) mergePrimary ( ) float64 { _bedc := _dggge [ 0 ] . _efbdg ; for _ , _gecd := range _dggge [ 1 : ] { _bedc += _gecd . _efbdg ;
} ; return _bedc / float64 ( len ( _dggge ) ) ; } ; func ( _cfeg * textTable ) growTable ( ) { _gfaea := func ( _cbebe paraList ) { _cfeg . _fcedd ++ ; for _gffbg := 0 ; _gffbg < _cfeg . _bgdee ; _gffbg ++ { _acbc := _cbebe [ _gffbg ] ; _cfeg . put ( _gffbg , _cfeg . _fcedd - 1 , _acbc ) ; } ; } ;
_afedg := func ( _ebfbg paraList ) { _cfeg . _bgdee ++ ; for _gcbb := 0 ; _gcbb < _cfeg . _fcedd ; _gcbb ++ { _fbcga := _ebfbg [ _gcbb ] ; _cfeg . put ( _cfeg . _bgdee - 1 , _gcbb , _fbcga ) ; } ; } ; if _agbf { _cfeg . log ( "\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce" ) ; } ; for _bgdf := 0 ;
; _bgdf ++ { _faegd := false ; _adgf := _cfeg . getDown ( ) ; _cacbf := _cfeg . getRight ( ) ; if _agbf { _caa . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bgdf , _cfeg ) ; _caa . Printf ( "\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a" , _adgf ) ;
_caa . Printf ( "\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a" , _cacbf ) ; } ; if _adgf != nil && _cacbf != nil { _ecccf := _adgf [ len ( _adgf ) - 1 ] ; if ! _ecccf . taken ( ) && _ecccf == _cacbf [ len ( _cacbf ) - 1 ] { _gfaea ( _adgf ) ; if _cacbf = _cfeg . getRight ( ) ;
_cacbf != nil { _afedg ( _cacbf ) ; _cfeg . put ( _cfeg . _bgdee - 1 , _cfeg . _fcedd - 1 , _ecccf ) ; } ; _faegd = true ; } ; } ; if ! _faegd && _adgf != nil { _gfaea ( _adgf ) ; _faegd = true ; } ; if ! _faegd && _cacbf != nil { _afedg ( _cacbf ) ; _faegd = true ; } ; if ! _faegd { break ; } ; } ; } ; func ( _ffbfg * textTable ) putComposite ( _ffcfb , _ffdd int , _eage paraList , _abagd _dc . PdfRectangle ) { if len ( _eage ) == 0 { _d . Log . Error ( "\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073" ) ;
return ; } ; _gbag := compositeCell { PdfRectangle : _abagd , paraList : _eage } ; if _eea { _caa . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a" , _ffcfb , _ffdd , _gbag . String ( ) ) ;
} ; _gbag . updateBBox ( ) ; _ffbfg . _fgge [ _abdec ( _ffcfb , _ffdd ) ] = _gbag ; } ; func _afgac ( _ccac , _dfcgb , _ebeb float64 ) rulingKind { if _ccac >= _ebeb && _aged ( _dfcgb , _ccac ) { return _gegc ; } ; if _dfcgb >= _ebeb && _aged ( _ccac , _dfcgb ) { return _bcaef ; } ; return _aebb ;
} ; type lineRuling struct { _gfdf rulingKind ; _fgdc markKind ; _ba . Color ; _becgc , _facd _bab . Point ; } ; func ( _effd pathSection ) bbox ( ) _dc . PdfRectangle { _ggde := _effd . _bffc [ 0 ] . _bcbc [ 0 ] ; _dbca := _dc . PdfRectangle { Llx : _ggde . X , Urx : _ggde . X , Lly : _ggde . Y , Ury : _ggde . Y } ;
_fgga := func ( _ffaec _bab . Point ) { if _ffaec . X < _dbca . Llx { _dbca . Llx = _ffaec . X ; } else if _ffaec . X > _dbca . Urx { _dbca . Urx = _ffaec . X ; } ; if _ffaec . Y < _dbca . Lly { _dbca . Lly = _ffaec . Y ; } else if _ffaec . Y > _dbca . Ury { _dbca . Ury = _ffaec . Y ; } ; } ;
for _ , _aadf := range _effd . _bffc [ 0 ] . _bcbc [ 1 : ] { _fgga ( _aadf ) ; } ; for _ , _cggc := range _effd . _bffc [ 1 : ] { for _ , _bee := range _cggc . _bcbc { _fgga ( _bee ) ; } ; } ; return _dbca ; } ; func _cedb ( _cagb , _ccc _dc . PdfRectangle ) ( _dc . PdfRectangle , bool ) { if ! _dggg ( _cagb , _ccc ) { return _dc . PdfRectangle { } , false ;
} ; return _dc . PdfRectangle { Llx : _ca . Max ( _cagb . Llx , _ccc . Llx ) , Urx : _ca . Min ( _cagb . Urx , _ccc . Urx ) , Lly : _ca . Max ( _cagb . Lly , _ccc . Lly ) , Ury : _ca . Min ( _cagb . Ury , _ccc . Ury ) } , true ; } ; func ( _ecegd intSet ) del ( _afge int ) { delete ( _ecegd , _afge ) } ;
// String returns a string describing the current state of the textState stack.
func ( _abec * stateStack ) String ( ) string { _adc := [ ] string { _caa . Sprintf ( "\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064" , len ( * _abec ) ) } ; for _fdac , _dacg := range * _abec { _eegg := "\u003c\u006e\u0069l\u003e" ;
if _dacg != nil { _eegg = _dacg . String ( ) ; } ; _adc = append ( _adc , _caa . Sprintf ( "\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073" , _fdac , _eegg ) ) ; } ; return _ce . Join ( _adc , "\u000a" ) ; } ; func _ggdf ( _fgeff * PageText ) error { _eebfa := _gd . GetLicenseKey ( ) ; if _eebfa != nil && _eebfa . IsLicensed ( ) || _ag { return nil ;
} ; _caa . Printf ( "\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a" ) ; _caa . Println ( "-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f" ) ;
return _g . New ( "\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064" ) ; } ; func ( _aaad rulingList ) vertsHorzs ( ) ( rulingList , rulingList ) { var _gaecc , _cbed rulingList ;
for _ , _cbff := range _aaad { switch _cbff . _fabfb { case _bcaef : _gaecc = append ( _gaecc , _cbff ) ; case _gegc : _cbed = append ( _cbed , _cbff ) ; } ; } ; return _gaecc , _cbed ; } ; type textTable struct { _dc . PdfRectangle ; _bgdee , _fcedd int ; _bebc bool ; _fddab map [ uint64 ] * textPara ;
_fgge map [ uint64 ] compositeCell ; } ; var _ebfc = map [ rulingKind ] string { _aebb : "\u006e\u006f\u006e\u0065" , _gegc : "\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c" , _bcaef : "\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c" } ; func ( _faef * textObject ) getStrokeColor ( ) _ba . Color { return _ggce ( _faef . _fdf . ColorspaceStroking , _faef . _fdf . ColorStroking ) ;
} ; func ( _gefb paraList ) findTableGrid ( _bgade gridTiling ) ( * textTable , map [ * textPara ] struct { } ) { _aagg := len ( _bgade . _gggb ) ; _fabea := len ( _bgade . _gceb ) ; _bggca := textTable { _bebc : true , _bgdee : _aagg , _fcedd : _fabea , _fddab : make ( map [ uint64 ] * textPara , _aagg * _fabea ) , _fgge : make ( map [ uint64 ] compositeCell , _aagg * _fabea ) } ;
_fbge := make ( map [ * textPara ] struct { } ) ; _fgfgg := int ( ( 1.0 - _cefb ) * float64 ( _aagg * _fabea ) ) ; _gfaed := 0 ; if _dgfd { _d . Log . Info ( "\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064" , _aagg , _fabea ) ;
} ; for _aedff , _aaacb := range _bgade . _gceb { _abba , _dbcb := _bgade . _begc [ _aaacb ] ; if ! _dbcb { continue ; } ; for _acaee , _eeacc := range _bgade . _gggb { _cfedb , _gfdee := _abba [ _eeacc ] ; if ! _gfdee { continue ; } ; _aaag := _gefb . inTile ( _cfedb ) ; if len ( _aaag ) == 0 { _gfaed ++ ;
if _gfaed > _fgfgg { if _dgfd { _d . Log . Info ( "\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064" , _gfaed ) ; } ; return nil , nil ; } ; } else { _bggca . putComposite ( _acaee , _aedff , _aaag , _cfedb . PdfRectangle ) ; for _ , _gfdge := range _aaag { _fbge [ _gfdge ] = struct { } { } ;
} ; } ; } ; } ; _fdae := 0 ; for _dbaf := 0 ; _dbaf < _aagg ; _dbaf ++ { _fbffc := _bggca . get ( _dbaf , 0 ) ; if _fbffc == nil || ! _fbffc . _bedda { _fdae ++ ; } ; } ; if _fdae == 0 { if _dgfd { _d . Log . Info ( "\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030" ) ; } ; return nil , nil ;
} ; _egda := _bggca . reduceTiling ( _bgade , _dedfe ) ; _egda = _egda . subdivide ( ) ; return _egda , _fbge ; } ; func ( _bgcfc rectRuling ) checkWidth ( _acfgf , _abgc float64 ) ( float64 , bool ) { _dgde := _abgc - _acfgf ; _dagg := _dgde <= _geff ; return _dgde , _dagg ; } ; func ( _ecga * shapesState ) establishSubpath ( ) * subpath { _baga , _eddb := _ecga . lastpointEstablished ( ) ;
if ! _eddb { _ecga . _bfg = append ( _ecga . _bfg , _badg ( _baga ) ) ; } ; if len ( _ecga . _bfg ) == 0 { return nil ; } ; _ecga . _dcgc = false ; return _ecga . _bfg [ len ( _ecga . _bfg ) - 1 ] ; } ; func ( _afgc gridTiling ) complete ( ) bool { for _ , _cfff := range _afgc . _begc { for _ , _bgeg := range _cfff { if ! _bgeg . complete ( ) { return false ;
} ; } ; } ; return true ; } ; func ( _fbbf * textObject ) getFont ( _dfeb string ) ( * _dc . PdfFont , error ) { if _fbbf . _ebe . _ab != nil { _ebcb , _ggg := _fbbf . getFontDict ( _dfeb ) ; if _ggg != nil { _d . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073" , _dfeb , _ggg . Error ( ) ) ;
return nil , _ggg ; } ; _fbbf . _ebe . _abd ++ ; _gec , _abde := _fbbf . _ebe . _ab [ _ebcb . String ( ) ] ; if _abde { _gec . _dcgaa = _fbbf . _ebe . _abd ; return _gec . _bcab , nil ; } ; } ; _dddf , _dacd := _fbbf . getFontDict ( _dfeb ) ; if _dacd != nil { return nil , _dacd ; } ; _babab , _dacd := _fbbf . getFontDirect ( _dfeb ) ;
if _dacd != nil { return nil , _dacd ; } ; if _fbbf . _ebe . _ab != nil { _fcb := fontEntry { _babab , _fbbf . _ebe . _abd } ; if len ( _fbbf . _ebe . _ab ) >= _adaa { var _fdbc [ ] string ; for _bfcd := range _fbbf . _ebe . _ab { _fdbc = append ( _fdbc , _bfcd ) ; } ; _e . Slice ( _fdbc , func ( _fbgf , _dffe int ) bool { return _fbbf . _ebe . _ab [ _fdbc [ _fbgf ] ] . _dcgaa < _fbbf . _ebe . _ab [ _fdbc [ _dffe ] ] . _dcgaa ;
} ) ; delete ( _fbbf . _ebe . _ab , _fdbc [ 0 ] ) ; } ; _fbbf . _ebe . _ab [ _dddf . String ( ) ] = _fcb ; } ; return _babab , nil ; } ; func _adfb ( _bgdc , _afggd * textPara ) bool { if _bgdc . _bedda || _afggd . _bedda { return true ; } ; return _edeg ( _bgdc . depth ( ) - _afggd . depth ( ) ) ; } ; func _cega ( _egdfg [ ] pathSection ) rulingList { _daeba ( _egdfg ) ;
if _cage { _d . Log . Info ( "\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs" , len ( _egdfg ) ) ; } ; var _fead rulingList ; for _ , _adac := range _egdfg { for _ , _egge := range _adac . _bffc { if ! _egge . isQuadrilateral ( ) { if _cage { _d . Log . Error ( "!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073" , _egge ) ;
} ; continue ; } ; if _febc , _gdee := _egge . makeRectRuling ( _adac . Color ) ; _gdee { _fead = append ( _fead , _febc ) ; } else { if _agca { _d . Log . Error ( "\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073" , _egge ) ;
} ; } ; } ; } ; if _cage { _d . Log . Info ( "\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073" , _fead . String ( ) ) ; } ; return _fead ; } ; func ( _dbdf * shapesState ) drawRectangle ( _fdfg , _ddb , _fbcc , _gegb float64 ) { if _fgeac { _dgbf := _dbdf . devicePoint ( _fdfg , _ddb ) ;
_eae := _dbdf . devicePoint ( _fdfg + _fbcc , _ddb + _gegb ) ; _cfb := _dc . PdfRectangle { Llx : _dgbf . X , Lly : _dgbf . Y , Urx : _eae . X , Ury : _eae . Y } ; _d . Log . Info ( "d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066" , _cfb ) ;
} ; _dbdf . newSubPath ( ) ; _dbdf . moveTo ( _fdfg , _ddb ) ; _dbdf . lineTo ( _fdfg + _fbcc , _ddb ) ; _dbdf . lineTo ( _fdfg + _fbcc , _ddb + _gegb ) ; _dbdf . lineTo ( _fdfg , _ddb + _gegb ) ; _dbdf . closePath ( ) ; } ; func _fgbc ( _ccfd [ ] TextMark , _eeffc * int , _fbga TextMark ) [ ] TextMark { _fbga . Offset = * _eeffc ;
_ccfd = append ( _ccfd , _fbga ) ; * _eeffc += len ( _fbga . Text ) ; return _ccfd ; } ; func ( _dffa * textObject ) setHorizScaling ( _bddd float64 ) { if _dffa == nil { return ; } ; _dffa . _cgf . _abbd = _bddd ; } ; func ( _ebebb * textTable ) emptyCompositeColumn ( _dece int ) bool { for _daeg := 0 ;
_daeg < _ebebb . _fcedd ; _daeg ++ { if _bedcc , _fdafd := _ebebb . _fgge [ _abdec ( _dece , _daeg ) ] ; _fdafd { if len ( _bedcc . paraList ) > 0 { return false ; } ; } ; } ; return true ; } ; func ( _fgbb * textPara ) text ( ) string { _cfga := new ( _f . Buffer ) ; _fgbb . writeText ( _cfga ) ;
return _cfga . String ( ) ; } ; func ( _aedf rulingList ) secMinMax ( ) ( float64 , float64 ) { _cgcadg , _cgeb := _aedf [ 0 ] . _becdd , _aedf [ 0 ] . _aggb ; for _ , _bcdc := range _aedf [ 1 : ] { if _bcdc . _becdd < _cgcadg { _cgcadg = _bcdc . _becdd ; } ; if _bcdc . _aggb > _cgeb { _cgeb = _bcdc . _aggb ;
} ; } ; return _cgcadg , _cgeb ; } ; func ( _afgcb * textWord ) computeText ( ) string { _fafb := make ( [ ] string , len ( _afgcb . _gbaed ) ) ; for _aecfga , _bdae := range _afgcb . _gbaed { _fafb [ _aecfga ] = _bdae . _cbge ; } ; return _ce . Join ( _fafb , "" ) ; } ;
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
func ( _be * Extractor ) ExtractPageImages ( options * ImageExtractOptions ) ( * PageImages , error ) { _cca := & imageExtractContext { _cbc : options } ; _cgc := _cca . extractContentStreamImages ( _be . _aaf , _be . _gdc ) ; if _cgc != nil { return nil , _cgc ; } ; return & PageImages { Images : _cca . _ggf } , nil ;
} ;
// String returns a description of `b`.
func ( _dgfg * wordBag ) String ( ) string { var _gdaa [ ] string ; for _ , _bgcf := range _dgfg . depthIndexes ( ) { _edee := _dgfg . _adcb [ _bgcf ] ; for _ , _ffc := range _edee { _gdaa = append ( _gdaa , _ffc . _bgdg ) ; } ; } ; return _caa . Sprintf ( "\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071" , _dgfg . PdfRectangle , _dgfg . _gffd , len ( _gdaa ) , _gdaa ) ;
} ; func ( _dbba * wordBag ) empty ( _fedb int ) bool { _ , _ecbg := _dbba . _adcb [ _fedb ] ; return ! _ecbg } ; func _cbbb ( _cedg float64 , _cfba int ) int { if _cfba == 0 { _cfba = 1 ; } ; _fcaa := float64 ( _cfba ) ; return int ( _ca . Round ( _cedg / _fcaa ) * _fcaa ) ; } ; func ( _fgca * textTable ) get ( _dbce , _eeege int ) * textPara { return _fgca . _fddab [ _abdec ( _dbce , _eeege ) ] } ;
func ( _acecg rulingList ) snapToGroups ( ) rulingList { _gacc , _adff := _acecg . vertsHorzs ( ) ; if len ( _gacc ) > 0 { _gacc = _gacc . snapToGroupsDirection ( ) ; } ; if len ( _adff ) > 0 { _adff = _adff . snapToGroupsDirection ( ) ; } ; _faf := append ( _gacc , _adff ... ) ; _faf . log ( "\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073" ) ;
return _faf ; } ; const ( _aebb rulingKind = iota ; _gegc ; _bcaef ; ) ; const ( _bbf = "\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065" ;
_dac = "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064" ;
_eg = "\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065" ; ) ;
func ( _dbbae * textMark ) inDiacriticArea ( _edfa * textMark ) bool { _cfbe := _dbbae . Llx - _edfa . Llx ; _ecea := _dbbae . Urx - _edfa . Urx ; _ddfa := _dbbae . Lly - _edfa . Lly ; return _ca . Abs ( _cfbe + _ecea ) < _dbbae . Width ( ) * _cebd && _ca . Abs ( _ddfa ) < _dbbae . Height ( ) * _cebd ;
} ; func ( _bfeb * shapesState ) lineTo ( _cefg , _fdbg float64 ) { if _fgeac { _d . Log . Info ( "\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066" , _cefg , _fdbg , _bfeb . devicePoint ( _cefg , _fdbg ) ) ;
} ; _bfeb . addPoint ( _cefg , _fdbg ) ; } ;
2022-06-27 19:58:38 +00:00
2022-09-10 15:35:04 +00:00
// String returns a description of `k`.
2022-09-23 18:05:51 +00:00
func ( _ceff rulingKind ) String ( ) string { _cge , _aecfe := _ebfc [ _ceff ] ; if ! _aecfe { return _caa . Sprintf ( "\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064" , _ceff ) ; } ; return _cge ; } ;
// PageText represents the layout of text on a device page.
type PageText struct { _gfc [ ] * textMark ; _dbdg string ; _efda [ ] TextMark ; _fdc [ ] TextTable ; _fdbe _dc . PdfRectangle ; _ggeb [ ] pathSection ; _agc [ ] pathSection ; _ced * _gcf . ContentStreamOperations ; } ; func ( _fdb * textObject ) nextLine ( ) { _fdb . moveLP ( 0 , - _fdb . _cgf . _add ) } ;
func _ebcf ( _efdg float64 ) int { var _bagb int ; if _efdg >= 0 { _bagb = int ( _efdg / _caaee ) ; } else { _bagb = int ( _efdg / _caaee ) - 1 ; } ; return _bagb ; } ; var _fcfg = _c . MustCompile ( "\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024" ) ;
var _bbdf = map [ markKind ] string { _bagd : "\u0073\u0074\u0072\u006f\u006b\u0065" , _gfefd : "\u0066\u0069\u006c\u006c" , _cefcf : "\u0061u\u0067\u006d\u0065\u006e\u0074" } ; func ( _cacfa rulingList ) findPrimSec ( _gfaf , _bgeb float64 ) * ruling { for _ , _bebg := range _cacfa { if _edeg ( _bebg . _efbdg - _gfaf ) && _bebg . _becdd - _aecg <= _bgeb && _bgeb <= _bebg . _aggb + _aecg { return _bebg ;
} ; } ; return nil ; } ; type imageExtractContext struct { _ggf [ ] ImageMark ; _fe int ; _gfa int ; _dg int ; _egc map [ * _aa . PdfObjectStream ] * cachedImage ; _cbc * ImageExtractOptions ; } ; func ( _fgf * subpath ) removeDuplicates ( ) { if len ( _fgf . _bcbc ) == 0 { return ; } ; _fgdg := [ ] _bab . Point { _fgf . _bcbc [ 0 ] } ;
for _ , _cbfd := range _fgf . _bcbc [ 1 : ] { if ! _aada ( _cbfd , _fgdg [ len ( _fgdg ) - 1 ] ) { _fgdg = append ( _fgdg , _cbfd ) ; } ; } ; _fgf . _bcbc = _fgdg ; } ; func _aab ( _ceb bounded ) float64 { return - _ceb . bbox ( ) . Lly } ; func ( _debb * textPara ) toCellTextMarks ( _bbca * int ) [ ] TextMark { var _ddeg [ ] TextMark ;
for _gcea , _dcad := range _debb . _ddaf { _decd := _dcad . toTextMarks ( _bbca ) ; _ebefd := _cggce && _dcad . endsInHyphen ( ) && _gcea != len ( _debb . _ddaf ) - 1 ; if _ebefd { _decd = _fgdbf ( _decd , _bbca ) ; } ; _ddeg = append ( _ddeg , _decd ... ) ; if ! ( _ebefd || _gcea == len ( _debb . _ddaf ) - 1 ) { _ddeg = _egag ( _ddeg , _bbca , _gfga ( _dcad . _gddd , _debb . _ddaf [ _gcea + 1 ] . _gddd ) ) ;
} ; } ; return _ddeg ; } ; func ( _fagg * imageExtractContext ) extractFormImages ( _afc * _aa . PdfObjectName , _fcf _gcf . GraphicsState , _cag * _dc . PdfPageResources ) error { _dfc , _ged := _cag . GetXObjectFormByName ( * _afc ) ; if _ged != nil { return _ged ; } ; if _dfc == nil { return nil ;
} ; _egf , _ged := _dfc . GetContentStream ( ) ; if _ged != nil { return _ged ; } ; _gba := _dfc . Resources ; if _gba == nil { _gba = _cag ; } ; _ged = _fagg . extractContentStreamImages ( string ( _egf ) , _gba ) ; if _ged != nil { return _ged ; } ; _fagg . _dg ++ ; return nil ; } ; func ( _cbac * shapesState ) moveTo ( _adab , _dfebb float64 ) { _cbac . _dcgc = true ;
_cbac . _fcaf = _cbac . devicePoint ( _adab , _dfebb ) ; if _fgeac { _d . Log . Info ( "\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066" , _adab , _dfebb , _cbac . _fcaf ) ;
} ; } ; func ( _gafc * textLine ) bbox ( ) _dc . PdfRectangle { return _gafc . PdfRectangle } ; type pathSection struct { _bffc [ ] * subpath ; _ba . Color ; } ; func ( _abdf * stateStack ) top ( ) * textState { if _abdf . empty ( ) { return nil ; } ; return ( * _abdf ) [ _abdf . size ( ) - 1 ] ; } ; func ( _deeg * textTable ) reduceTiling ( _agfa gridTiling , _ccddd float64 ) * textTable { _acac := make ( [ ] int , 0 , _deeg . _fcedd ) ;
_bggfg := make ( [ ] int , 0 , _deeg . _bgdee ) ; _cdad := _agfa . _gggb ; _fbfeb := _agfa . _gceb ; for _ffbf := 0 ; _ffbf < _deeg . _fcedd ; _ffbf ++ { _fbfgb := _ffbf > 0 && _ca . Abs ( _fbfeb [ _ffbf - 1 ] - _fbfeb [ _ffbf ] ) < _ccddd && _deeg . emptyCompositeRow ( _ffbf ) ; if ! _fbfgb { _acac = append ( _acac , _ffbf ) ;
} ; } ; for _bebgc := 0 ; _bebgc < _deeg . _bgdee ; _bebgc ++ { _ecgc := _bebgc < _deeg . _bgdee - 1 && _ca . Abs ( _cdad [ _bebgc + 1 ] - _cdad [ _bebgc ] ) < _ccddd && _deeg . emptyCompositeColumn ( _bebgc ) ; if ! _ecgc { _bggfg = append ( _bggfg , _bebgc ) ; } ; } ; if len ( _acac ) == _deeg . _fcedd && len ( _bggfg ) == _deeg . _bgdee { return _deeg ;
} ; _bacfa := textTable { _bebc : _deeg . _bebc , _bgdee : len ( _bggfg ) , _fcedd : len ( _acac ) , _fgge : make ( map [ uint64 ] compositeCell , len ( _bggfg ) * len ( _acac ) ) } ; if _eea { _d . Log . Info ( "\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064" , _deeg . _bgdee , _deeg . _fcedd , len ( _bggfg ) , len ( _acac ) ) ;
_d . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076" , _bggfg ) ; _d . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076" , _acac ) ; } ; for _ebaee , _aaead := range _acac { for _cgfce , _abadf := range _bggfg { _ceeee , _gdcb := _deeg . getComposite ( _abadf , _aaead ) ;
if len ( _ceeee ) == 0 { continue ; } ; if _eea { _caa . Printf ( "\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n" , _cgfce , _ebaee , _abadf , _aaead , _ebfce ( _ceeee . merge ( ) . text ( ) , 50 ) ) ; } ; _bacfa . putComposite ( _cgfce , _ebaee , _ceeee , _gdcb ) ;
} ; } ; return & _bacfa ; } ; func _aec ( _bgcg _bab . Point ) _bab . Matrix { return _bab . TranslationMatrix ( _bgcg . X , _bgcg . Y ) } ; func ( _gaec * wordBag ) absorb ( _dgfaa * wordBag ) { _cgff := _dgfaa . makeRemovals ( ) ; for _bdec , _cfa := range _dgfaa . _adcb { for _ , _acca := range _cfa { _gaec . pullWord ( _acca , _bdec , _cgff ) ;
} ; } ; _dgfaa . applyRemovals ( _cgff ) ; } ;
// String returns a description of `p`.
func ( _cbcc * textPara ) String ( ) string { if _cbcc . _bedda { return _caa . Sprintf ( "\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d" , _cbcc . PdfRectangle ) ; } ; _gcba := "" ; if _cbcc . _affa != nil { _gcba = _caa . Sprintf ( "\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020" , _cbcc . _affa . _bgdee , _cbcc . _affa . _fcedd ) ;
} ; return _caa . Sprintf ( "\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071" , _cbcc . PdfRectangle , _gcba , len ( _cbcc . _ddaf ) , _ebfce ( _cbcc . text ( ) , 50 ) ) ; } ; func ( _ebadc * textTable ) subdivide ( ) * textTable { _ebadc . logComposite ( "\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e" ) ;
_cbdfe := _ebadc . compositeRowCorridors ( ) ; _gdga := _ebadc . compositeColCorridors ( ) ; if _eea { _d . Log . Info ( "\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073" , _eafa ( _cbdfe ) , _eafa ( _gdga ) ) ;
} ; if len ( _cbdfe ) == 0 || len ( _gdga ) == 0 { return _ebadc ; } ; _cgfbe ( _cbdfe ) ; _cgfbe ( _gdga ) ; if _eea { _d . Log . Info ( "\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073" , _eafa ( _cbdfe ) , _eafa ( _gdga ) ) ;
} ; _degec , _eecec := _bbbb ( _ebadc . _fcedd , _cbdfe ) ; _dfaab , _daade := _bbbb ( _ebadc . _bgdee , _gdga ) ; _ggag := make ( map [ uint64 ] * textPara , _daade * _eecec ) ; _ecdcb := & textTable { PdfRectangle : _ebadc . PdfRectangle , _bebc : _ebadc . _bebc , _fcedd : _eecec , _bgdee : _daade , _fddab : _ggag } ;
if _eea { _d . Log . Info ( "\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a" + "\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a" + "\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a" + "\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a" + "\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076" , _ebadc . _bgdee , _ebadc . _fcedd , _daade , _eecec , _eafa ( _cbdfe ) , _eafa ( _gdga ) , _degec , _dfaab ) ;
} ; for _eeec := 0 ; _eeec < _ebadc . _fcedd ; _eeec ++ { _daebb := _degec [ _eeec ] ; for _ggbf := 0 ; _ggbf < _ebadc . _bgdee ; _ggbf ++ { _fdeec := _dfaab [ _ggbf ] ; if _eea { _caa . Printf ( "\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a" , _ggbf , _eeec , _fdeec , _daebb ) ;
} ; _daagc , _dgbb := _ebadc . _fgge [ _abdec ( _ggbf , _eeec ) ] ; if ! _dgbb { continue ; } ; _eddf := _daagc . split ( _cbdfe [ _eeec ] , _gdga [ _ggbf ] ) ; for _gaab := 0 ; _gaab < _eddf . _fcedd ; _gaab ++ { for _cfcg := 0 ; _cfcg < _eddf . _bgdee ; _cfcg ++ { _afaca := _eddf . get ( _cfcg , _gaab ) ;
_ecdcb . put ( _fdeec + _cfcg , _daebb + _gaab , _afaca ) ; if _eea { _caa . Printf ( "\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a" , _fdeec + _cfcg , _daebb + _gaab , _afaca ) ; } ; } ; } ; } ; } ; return _ecdcb ; } ;
// GetContentStreamOps returns the contentStreamOps field of `pt`.
func ( _efcd * PageText ) GetContentStreamOps ( ) * _gcf . ContentStreamOperations { return _efcd . _ced } ; func ( _ec * imageExtractContext ) processOperand ( _acc * _gcf . ContentStreamOperation , _ae _gcf . GraphicsState , _ggfb * _dc . PdfPageResources ) error { if _acc . Operand == "\u0042\u0049" && len ( _acc . Params ) == 1 { _cbg , _ffa := _acc . Params [ 0 ] . ( * _gcf . ContentStreamInlineImage ) ;
if ! _ffa { return nil ; } ; if _bec , _fgg := _aa . GetBoolVal ( _cbg . ImageMask ) ; _fgg { if _bec && ! _ec . _cbc . IncludeInlineStencilMasks { return nil ; } ; } ; return _ec . extractInlineImage ( _cbg , _ae , _ggfb ) ; } else if _acc . Operand == "\u0044\u006f" && len ( _acc . Params ) == 1 { _fbc , _fff := _aa . GetName ( _acc . Params [ 0 ] ) ;
if ! _fff { _d . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065" ) ; return _cc ; } ; _ , _edfg := _ggfb . GetXObjectByName ( * _fbc ) ; switch _edfg { case _dc . XObjectTypeImage : return _ec . extractXObjectImage ( _fbc , _ae , _ggfb ) ; case _dc . XObjectTypeForm : return _ec . extractFormImages ( _fbc , _ae , _ggfb ) ;
} ; } ; return nil ; } ; func ( _cffe * textPara ) toTextMarks ( _bcf * int ) [ ] TextMark { if _cffe . _affa == nil { return _cffe . toCellTextMarks ( _bcf ) ; } ; var _bcdg [ ] TextMark ; for _bfgc := 0 ; _bfgc < _cffe . _affa . _fcedd ; _bfgc ++ { for _ddad := 0 ; _ddad < _cffe . _affa . _bgdee ;
_ddad ++ { _dcdda := _cffe . _affa . get ( _ddad , _bfgc ) ; if _dcdda == nil { _bcdg = _egag ( _bcdg , _bcf , "\u0009" ) ; } else { _degbb := _dcdda . toCellTextMarks ( _bcf ) ; _bcdg = append ( _bcdg , _degbb ... ) ; } ; _bcdg = _egag ( _bcdg , _bcf , "\u0020" ) ; } ; if _bfgc < _cffe . _affa . _fcedd - 1 { _bcdg = _egag ( _bcdg , _bcf , "\u000a" ) ;
} ; } ; return _bcdg ; } ;
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
// Replace with a function like Extract() (*PageText, error)
func ( _gdcg * Extractor ) ExtractPageText ( ) ( * PageText , int , int , error ) { _fdg , _dfe , _fbd , _gfe := _gdcg . extractPageText ( _gdcg . _aaf , _gdcg . _gdc , _bab . IdentityMatrix ( ) , 0 ) ; if _gfe != nil && _gfe != _dc . ErrColorOutOfRange { return nil , 0 , 0 , _gfe ; } ; _fdg . computeViews ( ) ;
_gfe = _ggdf ( _fdg ) ; if _gfe != nil { return nil , 0 , 0 , _gfe ; } ; if _gdcg . _da != nil { if _gdcg . _da . ApplyCropBox && _gdcg . _aae != nil { _fdg . ApplyArea ( * _gdcg . _aae ) ; } ; } ; return _fdg , _dfe , _fbd , nil ; } ; func _dgcf ( _eed string ) bool { if _b . RuneCountInString ( _eed ) < _dgcga { return false ;
} ; _bafba , _dcfef := _b . DecodeLastRuneInString ( _eed ) ; if _dcfef <= 0 || ! _bb . Is ( _bb . Hyphen , _bafba ) { return false ; } ; _bafba , _dcfef = _b . DecodeLastRuneInString ( _eed [ : len ( _eed ) - _dcfef ] ) ; return _dcfef > 0 && ! _bb . IsSpace ( _bafba ) ; } ; func ( _dacef * textObject ) moveTextSetLeading ( _cbag , _eeg float64 ) { _dacef . _cgf . _add = - _eeg ;
_dacef . moveLP ( _cbag , _eeg ) ; } ; func ( _feaf rulingList ) sortStrict ( ) { _e . Slice ( _feaf , func ( _gcde , _gcgb int ) bool { _dggc , _fdefe := _feaf [ _gcde ] , _feaf [ _gcgb ] ; _aebee , _fcfba := _dggc . _fabfb , _fdefe . _fabfb ; if _aebee != _fcfba { return _aebee > _fcfba ;
} ; _cccfgb , _bdeg := _dggc . _efbdg , _fdefe . _efbdg ; if ! _edeg ( _cccfgb - _bdeg ) { return _cccfgb < _bdeg ; } ; _cccfgb , _bdeg = _dggc . _becdd , _fdefe . _becdd ; if _cccfgb != _bdeg { return _cccfgb < _bdeg ; } ; return _dggc . _aggb < _fdefe . _aggb ; } ) ; } ; func ( _decc * textLine ) pullWord ( _egcec * wordBag , _fgb * textWord , _feab int ) { _decc . appendWord ( _fgb ) ;
_egcec . removeWord ( _fgb , _feab ) ; } ;
// String returns a human readable description of `ss`.
func ( _dfd * shapesState ) String ( ) string { return _caa . Sprintf ( "\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d" , len ( _dfd . _bfg ) , _dfd . _dcgc ) ; } ; func ( _geffff * textTable ) compositeRowCorridors ( ) map [ int ] [ ] float64 { _edec := make ( map [ int ] [ ] float64 , _geffff . _fcedd ) ;
if _eea { _d . Log . Info ( "c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064" , _geffff . _fcedd ) ; } ; for _accbe := 1 ; _accbe < _geffff . _fcedd ; _accbe ++ { var _ggfg [ ] compositeCell ;
for _cadab := 0 ; _cadab < _geffff . _bgdee ; _cadab ++ { if _dedg , _gdeb := _geffff . _fgge [ _abdec ( _cadab , _accbe ) ] ; _gdeb { _ggfg = append ( _ggfg , _dedg ) ; } ; } ; if len ( _ggfg ) == 0 { continue ; } ; _abfg := _ceebc ( _ggfg ) ; _edec [ _accbe ] = _abfg ; if _eea { _caa . Printf ( "\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a" , _accbe , _abfg ) ;
} ; } ; return _edec ; } ; func ( _debe * textTable ) getRight ( ) paraList { _bbgb := make ( paraList , _debe . _fcedd ) ; for _bbad := 0 ; _bbad < _debe . _fcedd ; _bbad ++ { _dbecf := _debe . get ( _debe . _bgdee - 1 , _bbad ) . _becf ; if _dbecf . taken ( ) { return nil ; } ; _bbgb [ _bbad ] = _dbecf ;
} ; for _efgg := 0 ; _efgg < _debe . _fcedd - 1 ; _efgg ++ { if _bbgb [ _efgg ] . _bgce != _bbgb [ _efgg + 1 ] { return nil ; } ; } ; return _bbgb ; } ; func ( _gcfb rulingList ) comp ( _cfbf , _ddcd int ) bool { _eeag , _cead := _gcfb [ _cfbf ] , _gcfb [ _ddcd ] ; _bcacd , _dfaff := _eeag . _fabfb , _cead . _fabfb ;
if _bcacd != _dfaff { return _bcacd > _dfaff ; } ; if _bcacd == _aebb { return false ; } ; _geeaf := func ( _fcba bool ) bool { if _bcacd == _gegc { return _fcba ; } ; return ! _fcba ; } ; _feda , _gfde := _eeag . _efbdg , _cead . _efbdg ; if _feda != _gfde { return _geeaf ( _feda > _gfde ) ;
} ; _feda , _gfde = _eeag . _becdd , _cead . _becdd ; if _feda != _gfde { return _geeaf ( _feda < _gfde ) ; } ; return _geeaf ( _eeag . _aggb < _cead . _aggb ) ; } ; func _ggba ( _fece * wordBag , _ebda float64 , _edc , _ggac rulingList ) [ ] * wordBag { var _cbcd [ ] * wordBag ; for _ , _eccc := range _fece . depthIndexes ( ) { _fgfe := false ;
for ! _fece . empty ( _eccc ) { _cdaa := _fece . firstReadingIndex ( _eccc ) ; _dbdc := _fece . firstWord ( _cdaa ) ; _dffaa := _afccc ( _dbdc , _ebda , _edc , _ggac ) ; _fece . removeWord ( _dbdc , _cdaa ) ; if _aebe { _d . Log . Info ( "\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073" , _dbdc . String ( ) ) ;
} ; for _ceaa := true ; _ceaa ; _ceaa = _fgfe { _fgfe = false ; _egdgf := _adbb * _dffaa . _gffd ; _dgcb := _cgda * _dffaa . _gffd ; _efbc := _egde * _dffaa . _gffd ; if _aebe { _d . Log . Info ( "\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066" , _dffaa . minDepth ( ) , _dffaa . maxDepth ( ) , _efbc , _dgcb ) ;
} ; if _fece . scanBand ( "\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c" , _dffaa , _cdce ( _dad , 0 ) , _dffaa . minDepth ( ) - _efbc , _dffaa . maxDepth ( ) + _efbc , _gdcga , false , false ) > 0 { _fgfe = true ; } ; if _fece . scanBand ( "\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c" , _dffaa , _cdce ( _dad , _dgcb ) , _dffaa . minDepth ( ) , _dffaa . maxDepth ( ) , _acgg , false , false ) > 0 { _fgfe = true ;
} ; if _fgfe { continue ; } ; _afca := _fece . scanBand ( "" , _dffaa , _cdce ( _dddb , _egdgf ) , _dffaa . minDepth ( ) , _dffaa . maxDepth ( ) , _bfa , true , false ) ; if _afca > 0 { _cbbbf := ( _dffaa . maxDepth ( ) - _dffaa . minDepth ( ) ) / _dffaa . _gffd ; if ( _afca > 1 && float64 ( _afca ) > 0.3 * _cbbbf ) || _afca <= 10 { if _fece . scanBand ( "\u006f\u0074\u0068e\u0072" , _dffaa , _cdce ( _dddb , _egdgf ) , _dffaa . minDepth ( ) , _dffaa . maxDepth ( ) , _bfa , false , true ) > 0 { _fgfe = true ;
} ; } ; } ; } ; _cbcd = append ( _cbcd , _dffaa ) ; } ; } ; return _cbcd ; } ; func ( _deca * wordBag ) pullWord ( _bfba * textWord , _faab int , _cdeb map [ int ] map [ * textWord ] struct { } ) { _deca . PdfRectangle = _abga ( _deca . PdfRectangle , _bfba . PdfRectangle ) ; if _bfba . _debab > _deca . _gffd { _deca . _gffd = _bfba . _debab ;
} ; _deca . _adcb [ _faab ] = append ( _deca . _adcb [ _faab ] , _bfba ) ; _cdeb [ _faab ] [ _bfba ] = struct { } { } ; } ; func ( _fcca * imageExtractContext ) extractInlineImage ( _efa * _gcf . ContentStreamInlineImage , _gdb _gcf . GraphicsState , _dgc * _dc . PdfPageResources ) error { _cde , _bgb := _efa . ToImage ( _dgc ) ;
if _bgb != nil { return _bgb ; } ; _bge , _bgb := _efa . GetColorSpace ( _dgc ) ; if _bgb != nil { return _bgb ; } ; if _bge == nil { _bge = _dc . NewPdfColorspaceDeviceGray ( ) ; } ; _bcd , _bgb := _bge . ImageToRGB ( * _cde ) ; if _bgb != nil { return _bgb ; } ; _fba := ImageMark { Image : & _bcd , Width : _gdb . CTM . ScalingFactorX ( ) , Height : _gdb . CTM . ScalingFactorY ( ) , Angle : _gdb . CTM . Angle ( ) } ;
_fba . X , _fba . Y = _gdb . CTM . Translation ( ) ; _fcca . _ggf = append ( _fcca . _ggf , _fba ) ; _fcca . _fe ++ ; return nil ; } ; const ( _agfe = 1.0e-6 ; _bacb = 1.0e-4 ; _aaaa = 10 ; _caaee = 6 ; _gdgc = 0.5 ; _gaac = 0.12 ; _aabe = 0.19 ; _gfbbb = 0.04 ; _fggf = 0.04 ; _egde = 1.0 ; _gdcga = 0.04 ;
_cgda = 0.4 ; _acgg = 0.7 ; _adbb = 1.0 ; _bfa = 0.1 ; _bdbc = 1.4 ; _agdd = 0.46 ; _fde = 0.02 ; _feae = 0.2 ; _cebd = 0.5 ; _dgcga = 4 ; _ffed = 4.0 ; _baeag = 6 ; _cefb = 0.3 ; _eaga = 0.01 ; _cddaf = 0.02 ; _cddcc = 2 ; _eadb = 2 ; _gddb = 500 ; _dagaf = 4.0 ; _dbaa = 4.0 ; _adfd = 0.05 ; _bgffc = 0.1 ; _aecg = 2.0 ;
_geff = 2.0 ; _geed = 1.5 ; _dedfe = 3.0 ; _ega = 0.25 ; ) ; func ( _gceg paraList ) inTile ( _eaaeg gridTile ) paraList { var _fagda paraList ; for _ , _abaf := range _gceg { if _eaaeg . contains ( _abaf . PdfRectangle ) { _fagda = append ( _fagda , _abaf ) ; } ; } ; if _eea { _caa . Printf ( "\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n" , _eaaeg , len ( _fagda ) ) ;
for _dage , _eabef := range _fagda { _caa . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _dage , _eabef ) ; } ; _caa . Println ( "" ) ; } ; return _fagda ; } ; func _gfbf ( _cffb [ ] * wordBag ) [ ] * wordBag { if len ( _cffb ) <= 1 { return _cffb ; } ; if _daag { _d . Log . Info ( "\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a" ) ;
} ; _e . Slice ( _cffb , func ( _aafda , _eccb int ) bool { _abgb , _bae := _cffb [ _aafda ] , _cffb [ _eccb ] ; _aac := _abgb . Width ( ) * _abgb . Height ( ) ; _dcdb := _bae . Width ( ) * _bae . Height ( ) ; if _aac != _dcdb { return _aac > _dcdb ; } ; if _abgb . Height ( ) != _bae . Height ( ) { return _abgb . Height ( ) > _bae . Height ( ) ;
} ; return _aafda < _eccb ; } ) ; var _cggb [ ] * wordBag ; _gac := make ( intSet ) ; for _ccdg := 0 ; _ccdg < len ( _cffb ) ; _ccdg ++ { if _gac . has ( _ccdg ) { continue ; } ; _beee := _cffb [ _ccdg ] ; for _bce := _ccdg + 1 ; _bce < len ( _cffb ) ; _bce ++ { if _gac . has ( _ccdg ) { continue ;
} ; _fcd := _cffb [ _bce ] ; _cddad := _beee . PdfRectangle ; _cddad . Llx -= _beee . _gffd ; if _gcdf ( _cddad , _fcd . PdfRectangle ) { _beee . absorb ( _fcd ) ; _gac . add ( _bce ) ; } ; } ; _cggb = append ( _cggb , _beee ) ; } ; if len ( _cffb ) != len ( _cggb ) + len ( _gac ) { _d . Log . Error ( "\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064" , len ( _cffb ) , len ( _cggb ) , len ( _gac ) ) ;
} ; return _cggb ; } ; func _fgdbf ( _dafc [ ] TextMark , _ebdc * int ) [ ] TextMark { _fdbge := _dafc [ len ( _dafc ) - 1 ] ; _gcfe := [ ] rune ( _fdbge . Text ) ; if len ( _gcfe ) == 1 { _dafc = _dafc [ : len ( _dafc ) - 1 ] ; _ggaa := _dafc [ len ( _dafc ) - 1 ] ; * _ebdc = _ggaa . Offset + len ( _ggaa . Text ) ;
} else { _face := _eeac ( _fdbge . Text ) ; * _ebdc += len ( _face ) - len ( _fdbge . Text ) ; _fdbge . Text = _face ; } ; return _dafc ; } ; func ( _ceeb * stateStack ) empty ( ) bool { return len ( * _ceeb ) == 0 } ; func ( _fecc * textPara ) fontsize ( ) float64 { return _fecc . _ddaf [ 0 ] . _bfcdd } ;
var _ag = false ; func _acda ( _gadf string ) ( string , bool ) { _bgede := [ ] rune ( _gadf ) ; if len ( _bgede ) != 1 { return "" , false ; } ; _gafga , _aafb := _adabg [ _bgede [ 0 ] ] ; return _gafga , _aafb ; } ; func ( _eaee rulingList ) merge ( ) * ruling { _cedc := _eaee [ 0 ] . _efbdg ; _dceb := _eaee [ 0 ] . _becdd ;
_ecdd := _eaee [ 0 ] . _aggb ; for _ , _gaag := range _eaee [ 1 : ] { _cedc += _gaag . _efbdg ; if _gaag . _becdd < _dceb { _dceb = _gaag . _becdd ; } ; if _gaag . _aggb > _ecdd { _ecdd = _gaag . _aggb ; } ; } ; _gedd := & ruling { _fabfb : _eaee [ 0 ] . _fabfb , _fcgb : _eaee [ 0 ] . _fcgb , Color : _eaee [ 0 ] . Color , _efbdg : _cedc / float64 ( len ( _eaee ) ) , _becdd : _dceb , _aggb : _ecdd } ;
if _baea { _d . Log . Info ( "\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073" , len ( _eaee ) , _gedd ) ; for _dada , _bgae := range _eaee { _caa . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _dada , _bgae ) ; } ; } ;
return _gedd ; } ; func ( _dagaa intSet ) add ( _dbgbe int ) { _dagaa [ _dbgbe ] = struct { } { } } ; func ( _ddcdd gridTile ) contains ( _gdda _dc . PdfRectangle ) bool { if _ddcdd . numBorders ( ) < 3 { return false ; } ; if _ddcdd . _gcfbe && _gdda . Llx < _ddcdd . Llx - _geed { return false ;
} ; if _ddcdd . _ebbb && _gdda . Urx > _ddcdd . Urx + _geed { return false ; } ; if _ddcdd . _ecbgb && _gdda . Lly < _ddcdd . Lly - _geed { return false ; } ; if _ddcdd . _faaa && _gdda . Ury > _ddcdd . Ury + _geed { return false ; } ; return true ; } ; func ( _daacb * subpath ) last ( ) _bab . Point { return _daacb . _bcbc [ len ( _daacb . _bcbc ) - 1 ] } ;
func ( _bggcf paraList ) llyOrdering ( ) [ ] int { _gdfe := make ( [ ] int , len ( _bggcf ) ) ; for _daeb := range _bggcf { _gdfe [ _daeb ] = _daeb ; } ; _e . SliceStable ( _gdfe , func ( _badd , _abad int ) bool { _bdca , _dfdg := _gdfe [ _badd ] , _gdfe [ _abad ] ; return _bggcf [ _bdca ] . Lly < _bggcf [ _dfdg ] . Lly ;
} ) ; return _gdfe ; } ; func ( _agade * ruling ) alignsSec ( _gdfa * ruling ) bool { const _adafd = _geff + 1.0 ; return _agade . _becdd - _adafd <= _gdfa . _aggb && _gdfa . _becdd - _adafd <= _agade . _aggb ; } ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
//
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
type TextMark struct {
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Text is the extracted text.
Text string ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// BBox is the bounding box of the text.
2022-09-23 18:05:51 +00:00
BBox _dc . PdfRectangle ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Font is the font the text was drawn with.
2022-09-23 18:05:51 +00:00
Font * _dc . PdfFont ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// FontSize is the font size the text was drawn with.
FontSize float64 ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2022-09-23 18:05:51 +00:00
FillColor _ba . Color ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2022-09-23 18:05:51 +00:00
StrokeColor _ba . Color ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Orientation is the text orientation
Orientation int ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// DirectObject is the underlying PdfObject (Text Object) that represents the visible texts. This is introduced to get
// a simple access to the TextObject in case editing or replacment of some text is needed. E.g during redaction.
2022-09-23 18:05:51 +00:00
DirectObject _aa . PdfObject ;
2022-07-13 21:28:43 +00:00
// ObjString is a decoded string operand of a text-showing operator. It has the same value as `Text` attribute except
// when many glyphs are represented with the same Text Object that contains multiple length string operand in which case
// ObjString spans more than one character string that falls in different TextMark objects.
2022-09-23 18:05:51 +00:00
ObjString [ ] string ; Tw float64 ; Th float64 ; Tc float64 ; Index int ; } ; func ( _ggfd * stateStack ) pop ( ) * textState { if _ggfd . empty ( ) { return nil ; } ; _fbb := * ( * _ggfd ) [ len ( * _ggfd ) - 1 ] ; * _ggfd = ( * _ggfd ) [ : len ( * _ggfd ) - 1 ] ; return & _fbb ; } ; func ( _abgef rulingList ) sort ( ) { _e . Slice ( _abgef , _abgef . comp ) } ;
func ( _cddcg * wordBag ) makeRemovals ( ) map [ int ] map [ * textWord ] struct { } { _aeaf := make ( map [ int ] map [ * textWord ] struct { } , len ( _cddcg . _adcb ) ) ; for _fdcc := range _cddcg . _adcb { _aeaf [ _fdcc ] = make ( map [ * textWord ] struct { } ) ; } ; return _aeaf ; } ; func ( _dgef * textObject ) setTextRenderMode ( _ffad int ) { if _dgef == nil { return ;
} ; _dgef . _cgf . _efbd = RenderMode ( _ffad ) ; } ; func ( _cgb paraList ) applyTables ( _daceg [ ] * textTable ) paraList { var _egcb paraList ; for _ , _efcg := range _daceg { _egcb = append ( _egcb , _efcg . newTablePara ( ) ) ; } ; for _ , _bgbdg := range _cgb { if _bgbdg . _bgcb { continue ;
} ; _egcb = append ( _egcb , _bgbdg ) ; } ; return _egcb ; } ; func ( _aedb * textLine ) text ( ) string { var _adbbc [ ] string ; for _ , _abcb := range _aedb . _ccfce { if _abcb . _gabe { _adbbc = append ( _adbbc , "\u0020" ) ; } ; _adbbc = append ( _adbbc , _abcb . _bgdg ) ; } ; return _ce . Join ( _adbbc , "" ) ;
} ; const _adaa = 10 ;
2022-06-27 19:58:38 +00:00
2022-09-10 15:35:04 +00:00
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
2022-09-23 18:05:51 +00:00
func ( _cgcd * Extractor ) ExtractTextWithStats ( ) ( _gca string , _fda int , _def int , _gdbe error ) { _bbc , _fda , _def , _gdbe := _cgcd . ExtractPageText ( ) ; if _gdbe != nil { return "" , _fda , _def , _gdbe ; } ; return _bbc . Text ( ) , _fda , _def , nil ; } ; func _bgfd ( _ecgb [ ] * textWord , _fgdgb int ) [ ] * textWord { _ageg := len ( _ecgb ) ;
copy ( _ecgb [ _fgdgb : ] , _ecgb [ _fgdgb + 1 : ] ) ; return _ecgb [ : _ageg - 1 ] ; } ; func ( _faecb paraList ) readBefore ( _cgfd [ ] int , _egeca , _cagg int ) bool { _cdec , _dfba := _faecb [ _egeca ] , _faecb [ _cagg ] ; if _gbda ( _cdec , _dfba ) && _cdec . Lly > _dfba . Lly { return true ;
} ; if ! ( _cdec . _aecfg . Urx < _dfba . _aecfg . Llx ) { return false ; } ; _bbac , _dgcd := _cdec . Lly , _dfba . Lly ; if _bbac > _dgcd { _dgcd , _bbac = _bbac , _dgcd ; } ; _gfefg := _ca . Max ( _cdec . _aecfg . Llx , _dfba . _aecfg . Llx ) ; _eaag := _ca . Min ( _cdec . _aecfg . Urx , _dfba . _aecfg . Urx ) ;
_acecb := _faecb . llyRange ( _cgfd , _bbac , _dgcd ) ; for _ , _aaaf := range _acecb { if _aaaf == _egeca || _aaaf == _cagg { continue ; } ; _gcfd := _faecb [ _aaaf ] ; if _gcfd . _aecfg . Llx <= _eaag && _gfefg <= _gcfd . _aecfg . Urx { return false ; } ; } ; return true ; } ; func ( _geb * textObject ) setTextRise ( _efab float64 ) { if _geb == nil { return ;
} ; _geb . _cgf . _bffe = _efab ; } ; func ( _fgbca * ruling ) gridIntersecting ( _gdbbf * ruling ) bool { return _efca ( _fgbca . _becdd , _gdbbf . _becdd ) && _efca ( _fgbca . _aggb , _gdbbf . _aggb ) ; } ; func ( _faec * wordBag ) minDepth ( ) float64 { return _faec . _deae - ( _faec . Ury - _faec . _gffd ) } ;
func _dbed ( _adggf map [ int ] intSet ) [ ] int { _beff := make ( [ ] int , 0 , len ( _adggf ) ) ; for _cgaeg := range _adggf { _beff = append ( _beff , _cgaeg ) ; } ; _e . Ints ( _beff ) ; return _beff ; } ; func _aged ( _bdegd , _eebd float64 ) bool { return _bdegd / _ca . Max ( _bgffc , _eebd ) < _adfd } ;
func ( _gebae paraList ) topoOrder ( ) [ ] int { if _cfee { _d . Log . Info ( "\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a" ) ; } ; _cgfcc := len ( _gebae ) ; _bbd := make ( [ ] bool , _cgfcc ) ; _fcbd := make ( [ ] int , 0 , _cgfcc ) ; _bcdf := _gebae . llyOrdering ( ) ; var _ecbgf func ( _bgfe int ) ;
_ecbgf = func ( _egbe int ) { _bbd [ _egbe ] = true ; for _aebg := 0 ; _aebg < _cgfcc ; _aebg ++ { if ! _bbd [ _aebg ] { if _gebae . readBefore ( _bcdf , _egbe , _aebg ) { _ecbgf ( _aebg ) ; } ; } ; } ; _fcbd = append ( _fcbd , _egbe ) ; } ; for _cddf := 0 ; _cddf < _cgfcc ; _cddf ++ { if ! _bbd [ _cddf ] { _ecbgf ( _cddf ) ;
} ; } ; return _ffffg ( _fcbd ) ; } ;
2022-09-10 15:35:04 +00:00
2022-09-23 18:05:51 +00:00
// String returns a human readable description of `path`.
func ( _agd * subpath ) String ( ) string { _egec := _agd . _bcbc ; _dgca := len ( _egec ) ; if _dgca <= 5 { return _caa . Sprintf ( "\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f" , _dgca , _egec ) ; } ; return _caa . Sprintf ( "\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f" , _dgca , _egec [ 0 ] , _egec [ 1 ] , _egec [ _dgca - 1 ] ) ;
} ; func _eafa ( _caga map [ int ] [ ] float64 ) string { _cadga := _daadd ( _caga ) ; _edad := make ( [ ] string , len ( _caga ) ) ; for _fefbc , _dcff := range _cadga { _edad [ _fefbc ] = _caa . Sprintf ( "\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066" , _dcff , _caga [ _dcff ] ) ; } ;
return _caa . Sprintf ( "\u007b\u0025\u0073\u007d" , _ce . Join ( _edad , "\u002c\u0020" ) ) ; } ; func _bdgf ( _ccbb map [ float64 ] map [ float64 ] gridTile ) [ ] float64 { _egcf := make ( [ ] float64 , 0 , len ( _ccbb ) ) ; _ddafa := make ( map [ float64 ] struct { } , len ( _ccbb ) ) ; for _ , _dbec := range _ccbb { for _ebaf := range _dbec { if _ , _dabbd := _ddafa [ _ebaf ] ;
_dabbd { continue ; } ; _egcf = append ( _egcf , _ebaf ) ; _ddafa [ _ebaf ] = struct { } { } ; } ; } ; _e . Float64s ( _egcf ) ; return _egcf ; } ; func _dggfd ( _deaf _dc . PdfRectangle ) * ruling { return & ruling { _fabfb : _bcaef , _efbdg : _deaf . Llx , _becdd : _deaf . Lly , _aggb : _deaf . Ury } ;
} ; func ( _bacfc * textTable ) markCells ( ) { for _cadgd := 0 ; _cadgd < _bacfc . _fcedd ; _cadgd ++ { for _egfd := 0 ; _egfd < _bacfc . _bgdee ; _egfd ++ { _aacc := _bacfc . get ( _egfd , _cadgd ) ; if _aacc != nil { _aacc . _bgcb = true ; } ; } ; } ; } ;
2022-09-10 15:35:04 +00:00
2022-09-23 18:05:51 +00:00
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
func ( _ddfd * TextMarkArray ) RangeOffset ( start , end int ) ( * TextMarkArray , error ) { if _ddfd == nil { return nil , _g . New ( "\u006da\u003d\u003d\u006e\u0069\u006c" ) ; } ; if end < start { return nil , _caa . Errorf ( "\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020" , start , end ) ;
} ; _afcd := len ( _ddfd . _bgbd ) ; if _afcd == 0 { return _ddfd , nil ; } ; if start < _ddfd . _bgbd [ 0 ] . Offset { start = _ddfd . _bgbd [ 0 ] . Offset ; } ; if end > _ddfd . _bgbd [ _afcd - 1 ] . Offset + 1 { end = _ddfd . _bgbd [ _afcd - 1 ] . Offset + 1 ; } ; _gfab := _e . Search ( _afcd , func ( _abef int ) bool { return _ddfd . _bgbd [ _abef ] . Offset + len ( _ddfd . _bgbd [ _abef ] . Text ) - 1 >= start } ) ;
if ! ( 0 <= _gfab && _gfab < _afcd ) { _bgda := _caa . Errorf ( "\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076" , start , _gfab , _afcd , _ddfd . _bgbd [ 0 ] , _ddfd . _bgbd [ _afcd - 1 ] ) ;
return nil , _bgda ; } ; _egbd := _e . Search ( _afcd , func ( _gfbb int ) bool { return _ddfd . _bgbd [ _gfbb ] . Offset > end - 1 } ) ; if ! ( 0 <= _egbd && _egbd < _afcd ) { _aeg := _caa . Errorf ( "\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076" , end , _egbd , _afcd , _ddfd . _bgbd [ 0 ] , _ddfd . _bgbd [ _afcd - 1 ] ) ;
return nil , _aeg ; } ; if _egbd <= _gfab { return nil , _caa . Errorf ( "\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064" , start , end , _gfab , _egbd ) ;
} ; return & TextMarkArray { _bgbd : _ddfd . _bgbd [ _gfab : _egbd ] } , nil ; } ; func ( _faa * textObject ) reset ( ) { _faa . _aafd = _bab . IdentityMatrix ( ) ; _faa . _acd = _bab . IdentityMatrix ( ) ; _faa . _dcfd = nil ; } ; func _fdef ( _efaf [ ] * textMark , _fefg _dc . PdfRectangle , _agdc rulingList , _edag [ ] gridTiling ) paraList { _d . Log . Trace ( "\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066" , len ( _efaf ) , _fefg ) ;
if len ( _efaf ) == 0 { return nil ; } ; _ffgea := _gdgfd ( _efaf , _fefg ) ; if len ( _ffgea ) == 0 { return nil ; } ; _agdc . log ( "\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065" ) ; _cfd , _cbeb := _agdc . vertsHorzs ( ) ; _dcdg := _dbcfe ( _ffgea , _fefg . Ury , _cfd , _cbeb ) ;
_ceea := _ggba ( _dcdg , _fefg . Ury , _cfd , _cbeb ) ; _ceea = _gfbf ( _ceea ) ; _ecfg := make ( paraList , 0 , len ( _ceea ) ) ; for _ , _dfdd := range _ceea { _dbe := _dfdd . arrangeText ( ) ; if _dbe != nil { _ecfg = append ( _ecfg , _dbe ) ; } ; } ; if len ( _ecfg ) >= _baeag { _ecfg = _ecfg . extractTables ( _edag ) ;
} ; _ecfg . sortReadingOrder ( ) ; _ecfg . log ( "\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072" ) ; return _ecfg ; } ; func _gfge ( _cebf [ ] pathSection ) rulingList { _daeba ( _cebf ) ; if _cage { _d . Log . Info ( "\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073" , len ( _cebf ) ) ;
} ; var _cacg rulingList ; for _ , _fccf := range _cebf { for _ , _gbdb := range _fccf . _bffc { if len ( _gbdb . _bcbc ) < 2 { continue ; } ; _dfcad := _gbdb . _bcbc [ 0 ] ; for _ , _aagbb := range _gbdb . _bcbc [ 1 : ] { if _befg , _dfad := _cabb ( _dfcad , _aagbb , _fccf . Color ) ; _dfad { _cacg = append ( _cacg , _befg ) ;
} ; _dfcad = _aagbb ; } ; } ; } ; if _cage { _d . Log . Info ( "m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073" , _cacg ) ; } ; return _cacg ; } ;
2022-07-13 21:28:43 +00:00
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
2022-09-23 18:05:51 +00:00
type ImageMark struct { Image * _dc . Image ;
2022-07-13 21:28:43 +00:00
// Dimensions of the image as displayed in the PDF.
Width float64 ; Height float64 ;
// Position of the image in PDF coordinates (lower left corner).
X float64 ; Y float64 ;
// Angle in degrees, if rotated.
2022-09-23 18:05:51 +00:00
Angle float64 ; } ; func ( _fecd * textTable ) depth ( ) float64 { _dfgg := 1e10 ; for _decg := 0 ; _decg < _fecd . _bgdee ; _decg ++ { _gbfdb := _fecd . get ( _decg , 0 ) ; if _gbfdb == nil || _gbfdb . _bedda { continue ; } ; _dfgg = _ca . Min ( _dfgg , _gbfdb . depth ( ) ) ; } ; return _dfgg ;
2022-09-10 15:35:04 +00:00
} ;
2022-09-23 18:05:51 +00:00
// ExtractFonts returns all font information from the page extractor, including
// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more.
//
// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document.
// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries.
//
// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range.
func ( _bd * Extractor ) ExtractFonts ( previousPageFonts * PageFonts ) ( * PageFonts , error ) { _fa := PageFonts { } ; _ccf := _fa . extractPageResourcesToFont ( _bd . _gdc ) ; if _ccf != nil { return nil , _ccf ; } ; if previousPageFonts != nil { for _ , _ea := range previousPageFonts . Fonts { if ! _dca ( _fa . Fonts , _ea . FontName ) { _fa . Fonts = append ( _fa . Fonts , _ea ) ;
} ; } ; } ; return & PageFonts { Fonts : _fa . Fonts } , nil ; } ; func ( _adca * ruling ) alignsPrimary ( _ecbbea * ruling ) bool { return _adca . _fabfb == _ecbbea . _fabfb && _ca . Abs ( _adca . _efbdg - _ecbbea . _efbdg ) < _geff * 0.5 ; } ; func ( _geccc * subpath ) isQuadrilateral ( ) bool { if len ( _geccc . _bcbc ) < 4 || len ( _geccc . _bcbc ) > 5 { return false ;
} ; if len ( _geccc . _bcbc ) == 5 { _ecfc := _geccc . _bcbc [ 0 ] ; _edgff := _geccc . _bcbc [ 4 ] ; if _ecfc . X != _edgff . X || _ecfc . Y != _edgff . Y { return false ; } ; } ; return true ; } ; func _cgfbe ( _cafb map [ int ] [ ] float64 ) { if len ( _cafb ) <= 1 { return ; } ; _ffgff := _daadd ( _cafb ) ;
if _eea { _d . Log . Info ( "\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076" , _ffgff ) ; } ; var _begcc , _fdffg int ; for _begcc , _fdffg = range _ffgff { if _cafb [ _fdffg ] != nil { break ; } ; } ; for _dcgad , _gcbac := range _ffgff [ _begcc : ] { _cgcee := _cafb [ _gcbac ] ;
if _cgcee == nil { continue ; } ; if _eea { _caa . Printf ( "\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a" , _begcc + _dcgad , _fdffg , _gcbac ) ; } ; _gffee := _cafb [ _gcbac ] ; if _gffee [ len ( _gffee ) - 1 ] > _cgcee [ 0 ] { _gffee [ len ( _gffee ) - 1 ] = _cgcee [ 0 ] ;
_cafb [ _fdffg ] = _gffee ; } ; _fdffg = _gcbac ; } ; } ; func ( _bbea * subpath ) close ( ) { if ! _aada ( _bbea . _bcbc [ 0 ] , _bbea . last ( ) ) { _bbea . add ( _bbea . _bcbc [ 0 ] ) ; } ; _bbea . _adge = true ; _bbea . removeDuplicates ( ) ; } ;
2022-09-10 15:35:04 +00:00
2022-09-23 18:05:51 +00:00
// String returns a string describing `ma`.
func ( _dfbd TextMarkArray ) String ( ) string { _abge := len ( _dfbd . _bgbd ) ; if _abge == 0 { return "\u0045\u004d\u0050T\u0059" ; } ; _fgdb := _dfbd . _bgbd [ 0 ] ; _caf := _dfbd . _bgbd [ _abge - 1 ] ; return _caa . Sprintf ( "\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d" , _abge , _fgdb , _caf ) ;
} ; const ( RenderModeStroke RenderMode = 1 << iota ; RenderModeFill ; RenderModeClip ; ) ;
2022-09-10 15:35:04 +00:00
2022-09-23 18:05:51 +00:00
// Tables returns the tables extracted from the page.
func ( _bgcc PageText ) Tables ( ) [ ] TextTable { if _eea { _d . Log . Info ( "\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064" , len ( _bgcc . _fdc ) ) ; } ; return _bgcc . _fdc ; } ; func ( _cdcc * wordBag ) depthIndexes ( ) [ ] int { if len ( _cdcc . _adcb ) == 0 { return nil ;
} ; _cgae := make ( [ ] int , len ( _cdcc . _adcb ) ) ; _egdb := 0 ; for _efbe := range _cdcc . _adcb { _cgae [ _egdb ] = _efbe ; _egdb ++ ; } ; _e . Ints ( _cgae ) ; return _cgae ; } ;
2022-09-10 15:35:04 +00:00
2022-09-23 18:05:51 +00:00
// String returns a description of `tm`.
func ( _bafe * textMark ) String ( ) string { return _caa . Sprintf ( "\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022" , _bafe . PdfRectangle , _bafe . _beaf , _bafe . _cbge ) ; } ;
2022-09-10 15:35:04 +00:00
2022-09-23 18:05:51 +00:00
// String returns a human readable description of `s`.
func ( _ggaad intSet ) String ( ) string { var _eeba [ ] int ; for _abadfb := range _ggaad { if _ggaad . has ( _abadfb ) { _eeba = append ( _eeba , _abadfb ) ; } ; } ; _e . Ints ( _eeba ) ; return _caa . Sprintf ( "\u0025\u002b\u0076" , _eeba ) ; } ; func ( _fbff rulingList ) tidied ( _gcg string ) rulingList { _bgbda := _fbff . removeDuplicates ( ) ;
_bgbda . log ( "\u0075n\u0069\u0071\u0075\u0065\u0073" ) ; _fdbba := _bgbda . snapToGroups ( ) ; if _fdbba == nil { return nil ; } ; _fdbba . sort ( ) ; if _cage { _d . Log . Info ( "\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064" , _gcg , len ( _fbff ) , len ( _bgbda ) , len ( _fdbba ) ) ;
} ; _fdbba . log ( "\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d" ) ; return _fdbba ; } ; func _dad ( _gaegg * wordBag , _bdbd * textWord , _efag float64 ) bool { return _bdbd . Llx < _gaegg . Urx + _efag && _gaegg . Llx - _efag < _bdbd . Urx ; } ;
2022-09-10 15:35:04 +00:00
2022-09-23 18:05:51 +00:00
// Marks returns the TextMark collection for a page. It represents all the text on the page.
func ( _gfd PageText ) Marks ( ) * TextMarkArray { return & TextMarkArray { _bgbd : _gfd . _efda } } ; func _cafea ( _gdfae [ ] * textWord , _effg * textWord ) [ ] * textWord { for _fbccd , _efee := range _gdfae { if _efee == _effg { return _bgfd ( _gdfae , _fbccd ) ; } ; } ; _d . Log . Error ( "\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073" , _effg ) ;
return nil ; } ; func _aada ( _egeee , _adfdcf _bab . Point ) bool { return _egeee . X == _adfdcf . X && _egeee . Y == _adfdcf . Y } ;
2022-09-10 15:35:04 +00:00
2022-09-23 18:05:51 +00:00
// Append appends `mark` to the mark array.
func ( _bcgg * TextMarkArray ) Append ( mark TextMark ) { _bcgg . _bgbd = append ( _bcgg . _bgbd , mark ) } ; func ( _cegf * shapesState ) lastpointEstablished ( ) ( _bab . Point , bool ) { if _cegf . _dcgc { return _cegf . _fcaf , false ; } ; _dafe := len ( _cegf . _bfg ) ; if _dafe > 0 && _cegf . _bfg [ _dafe - 1 ] . _adge { return _cegf . _bfg [ _dafe - 1 ] . last ( ) , false ;
} ; return _bab . Point { } , true ; } ; func ( _acae * textTable ) bbox ( ) _dc . PdfRectangle { return _acae . PdfRectangle } ; func _dgggee ( _bbfdc , _gffff int ) int { if _bbfdc < _gffff { return _bbfdc ; } ; return _gffff ; } ; func ( _gdd * shapesState ) addPoint ( _abc , _gged float64 ) { _fbcgf := _gdd . establishSubpath ( ) ;
_cdeg := _gdd . devicePoint ( _abc , _gged ) ; if _fbcgf == nil { _gdd . _dcgc = true ; _gdd . _fcaf = _cdeg ; } else { _fbcgf . add ( _cdeg ) ; } ; } ; func ( _dgefd * textPara ) writeText ( _adfdc _gc . Writer ) { if _dgefd . _affa == nil { _dgefd . writeCellText ( _adfdc ) ; return ; } ; for _faed := 0 ;
_faed < _dgefd . _affa . _fcedd ; _faed ++ { for _eaae := 0 ; _eaae < _dgefd . _affa . _bgdee ; _eaae ++ { _eee := _dgefd . _affa . get ( _eaae , _faed ) ; if _eee == nil { _adfdc . Write ( [ ] byte ( "\u0009" ) ) ; } else { _eee . writeCellText ( _adfdc ) ; } ; _adfdc . Write ( [ ] byte ( "\u0020" ) ) ;
} ; if _faed < _dgefd . _affa . _fcedd - 1 { _adfdc . Write ( [ ] byte ( "\u000a" ) ) ; } ; } ; } ; func _fdacf ( _efbg _bab . Matrix ) _bab . Point { _cddc , _abg := _efbg . Translation ( ) ; return _bab . Point { X : _cddc , Y : _abg } ; } ; func ( _fbae * textObject ) getFillColor ( ) _ba . Color { return _ggce ( _fbae . _fdf . ColorspaceNonStroking , _fbae . _fdf . ColorNonStroking ) ;
2022-09-10 15:35:04 +00:00
} ;
2022-09-23 18:05:51 +00:00
// Options extractor options.
type Options struct {
2022-09-10 15:35:04 +00:00
2022-09-23 18:05:51 +00:00
// ApplyCropBox will extract page text based on page cropbox if set to `true`.
ApplyCropBox bool ; } ; const _ccfc = 20 ; func _dddb ( _bceg * wordBag , _bcaf * textWord , _fac float64 ) bool { return _bceg . Urx <= _bcaf . Llx && _bcaf . Llx < _bceg . Urx + _fac ; } ; func ( _gefec rectRuling ) asRuling ( ) ( * ruling , bool ) { _bgdcf := ruling { _fabfb : _gefec . _fgdfae , Color : _gefec . Color , _fcgb : _gfefd } ;
switch _gefec . _fgdfae { case _bcaef : _bgdcf . _efbdg = 0.5 * ( _gefec . Llx + _gefec . Urx ) ; _bgdcf . _becdd = _gefec . Lly ; _bgdcf . _aggb = _gefec . Ury ; _accb , _ggbb := _gefec . checkWidth ( _gefec . Llx , _gefec . Urx ) ; if ! _ggbb { if _agca { _d . Log . Error ( "\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076" , _gefec ) ;
} ; return nil , false ; } ; _bgdcf . _gebag = _accb ; case _gegc : _bgdcf . _efbdg = 0.5 * ( _gefec . Lly + _gefec . Ury ) ; _bgdcf . _becdd = _gefec . Llx ; _bgdcf . _aggb = _gefec . Urx ; _abdce , _gdbg := _gefec . checkWidth ( _gefec . Lly , _gefec . Ury ) ; if ! _gdbg { if _agca { _d . Log . Error ( "\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076" , _gefec ) ;
} ; return nil , false ; } ; _bgdcf . _gebag = _abdce ; default : _d . Log . Error ( "\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064" , _gefec . _fgdfae ) ; return nil , false ; } ; return & _bgdcf , true ; } ; func ( _dafed rulingList ) primMinMax ( ) ( float64 , float64 ) { _aggde , _gfgg := _dafed [ 0 ] . _efbdg , _dafed [ 0 ] . _efbdg ;
for _ , _edgd := range _dafed [ 1 : ] { if _edgd . _efbdg < _aggde { _aggde = _edgd . _efbdg ; } else if _edgd . _efbdg > _gfgg { _gfgg = _edgd . _efbdg ; } ; } ; return _aggde , _gfgg ; } ; func ( _fgdfa * wordBag ) scanBand ( _gaa string , _caae * wordBag , _ccdd func ( _dagb * wordBag , _adcbd * textWord ) bool , _bgge , _fcfc , _cfea float64 , _cfbg , _abab bool ) int { _ebde := _caae . _gffd ;
var _dgcg map [ int ] map [ * textWord ] struct { } ; if ! _cfbg { _dgcg = _fgdfa . makeRemovals ( ) ; } ; _ebb := _gdgc * _ebde ; _egecc := 0 ; for _ , _bcae := range _fgdfa . depthBand ( _bgge - _ebb , _fcfc + _ebb ) { if len ( _fgdfa . _adcb [ _bcae ] ) == 0 { continue ; } ; for _ , _bbcd := range _fgdfa . _adcb [ _bcae ] { if ! ( _bgge - _ebb <= _bbcd . _cffg && _bbcd . _cffg <= _fcfc + _ebb ) { continue ;
} ; if ! _ccdd ( _caae , _bbcd ) { continue ; } ; _ddg := 2.0 * _ca . Abs ( _bbcd . _debab - _caae . _gffd ) / ( _bbcd . _debab + _caae . _gffd ) ; _cda := _ca . Max ( _bbcd . _debab / _caae . _gffd , _caae . _gffd / _bbcd . _debab ) ; _afce := _ca . Min ( _ddg , _cda ) ; if _cfea > 0 && _afce > _cfea { continue ;
} ; if _caae . blocked ( _bbcd ) { continue ; } ; if ! _cfbg { _caae . pullWord ( _bbcd , _bcae , _dgcg ) ; } ; _egecc ++ ; if ! _abab { if _bbcd . _cffg < _bgge { _bgge = _bbcd . _cffg ; } ; if _bbcd . _cffg > _fcfc { _fcfc = _bbcd . _cffg ; } ; } ; if _cfbg { break ; } ; } ; } ; if ! _cfbg { _fgdfa . applyRemovals ( _dgcg ) ;
} ; return _egecc ; } ; func _abdec ( _dagba , _cecd int ) uint64 { return uint64 ( _dagba ) * 0x1000000 + uint64 ( _cecd ) } ; func _bgeae ( _ggddg _dc . PdfRectangle ) rulingKind { _fegd := _ggddg . Width ( ) ; _cdbb := _ggddg . Height ( ) ; if _fegd > _cdbb { if _fegd >= _dagaf { return _gegc ;
} ; } else { if _cdbb >= _dagaf { return _bcaef ; } ; } ; return _aebb ; } ; func _ggdge ( _gcaea _dc . PdfRectangle ) * ruling { return & ruling { _fabfb : _gegc , _efbdg : _gcaea . Lly , _becdd : _gcaea . Llx , _aggb : _gcaea . Urx } ; } ; func _egag ( _fffc [ ] TextMark , _aeeg * int , _fddc string ) [ ] TextMark { _fgef := _aefa ;
_fgef . Text = _fddc ; return _fgbc ( _fffc , _aeeg , _fgef ) ; } ; func ( _efcfg * ruling ) intersects ( _dega * ruling ) bool { _fdbef := ( _efcfg . _fabfb == _bcaef && _dega . _fabfb == _gegc ) || ( _dega . _fabfb == _bcaef && _efcfg . _fabfb == _gegc ) ; _adefa := func ( _baac , _cddcce * ruling ) bool { return _baac . _becdd - _aecg <= _cddcce . _efbdg && _cddcce . _efbdg <= _baac . _aggb + _aecg ;
} ; _cccfg := _adefa ( _efcfg , _dega ) ; _gcgd := _adefa ( _dega , _efcfg ) ; if _cage { _caa . Printf ( "\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a" + "\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a" + " \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a" , _fdbef , _cccfg , _gcgd , _fdbef && _cccfg && _gcgd , _efcfg , _dega ) ;
} ; return _fdbef && _cccfg && _gcgd ; } ; func _fcce ( _daaab map [ float64 ] gridTile ) [ ] float64 { _fbab := make ( [ ] float64 , 0 , len ( _daaab ) ) ; for _bgga := range _daaab { _fbab = append ( _fbab , _bgga ) ; } ; _e . Float64s ( _fbab ) ; return _fbab ; } ; func ( _ffec lineRuling ) asRuling ( ) ( * ruling , bool ) { _fagf := ruling { _fabfb : _ffec . _gfdf , Color : _ffec . Color , _fcgb : _bagd } ;
switch _ffec . _gfdf { case _bcaef : _fagf . _efbdg = _ffec . xMean ( ) ; _fagf . _becdd = _ca . Min ( _ffec . _becgc . Y , _ffec . _facd . Y ) ; _fagf . _aggb = _ca . Max ( _ffec . _becgc . Y , _ffec . _facd . Y ) ; case _gegc : _fagf . _efbdg = _ffec . yMean ( ) ; _fagf . _becdd = _ca . Min ( _ffec . _becgc . X , _ffec . _facd . X ) ;
_fagf . _aggb = _ca . Max ( _ffec . _becgc . X , _ffec . _facd . X ) ; default : _d . Log . Error ( "\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064" , _ffec . _gfdf ) ; return nil , false ; } ; return & _fagf , true ; } ;
2022-09-10 15:35:04 +00:00
// NewFromContents creates a new extractor from contents and page resources.
2022-09-23 18:05:51 +00:00
func NewFromContents ( contents string , resources * _dc . PdfPageResources ) ( * Extractor , error ) { const _ee = "\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s" ; _aafa := & Extractor { _aaf : contents , _gdc : resources , _ab : map [ string ] fontEntry { } , _bf : map [ string ] textResult { } } ;
_gd . TrackUse ( _ee ) ; return _aafa , nil ; } ; func _gdgfd ( _gffdf [ ] * textMark , _afaf _dc . PdfRectangle ) [ ] * textWord { var _bbece [ ] * textWord ; var _fdag * textWord ; if _agb { _d . Log . Info ( "\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073" , len ( _gffdf ) ) ;
} ; _eeaad := func ( ) { if _fdag != nil { _fbed := _fdag . computeText ( ) ; if ! _fcgba ( _fbed ) { _fdag . _bgdg = _fbed ; _bbece = append ( _bbece , _fdag ) ; if _agb { _d . Log . Info ( "\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073" , len ( _bbece ) - 1 , _fdag . String ( ) ) ;
for _bggb , _cfdc := range _fdag . _gbaed { _caa . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bggb , _cfdc . String ( ) ) ; } ; } ; } ; _fdag = nil ; } ; } ; for _ , _afgge := range _gffdf { if _fdfd && _fdag != nil && len ( _fdag . _gbaed ) > 0 { _ecfb := _fdag . _gbaed [ len ( _fdag . _gbaed ) - 1 ] ;
_cbgb , _cadd := _acda ( _afgge . _cbge ) ; _ebaec , _ffbfd := _acda ( _ecfb . _cbge ) ; if _cadd && ! _ffbfd && _ecfb . inDiacriticArea ( _afgge ) { _fdag . addDiacritic ( _cbgb ) ; continue ; } ; if _ffbfd && ! _cadd && _afgge . inDiacriticArea ( _ecfb ) { _fdag . _gbaed = _fdag . _gbaed [ : len ( _fdag . _gbaed ) - 1 ] ;
_fdag . appendMark ( _afgge , _afaf ) ; _fdag . addDiacritic ( _ebaec ) ; continue ; } ; } ; _eaad := _fcgba ( _afgge . _cbge ) ; if _eaad { _eeaad ( ) ; continue ; } ; if _fdag == nil && ! _eaad { _fdag = _dggd ( [ ] * textMark { _afgge } , _afaf ) ; continue ; } ; _ccda := _fdag . _debab ; _bddcf := _ca . Abs ( _bgag ( _afaf , _afgge ) - _fdag . _cffg ) / _ccda ;
_cfdce := _ggee ( _afgge , _fdag ) / _ccda ; if _cfdce >= _gaac || ! ( - _aabe <= _cfdce && _bddcf <= _gfbbb ) { _eeaad ( ) ; _fdag = _dggd ( [ ] * textMark { _afgge } , _afaf ) ; continue ; } ; _fdag . appendMark ( _afgge , _afaf ) ; } ; _eeaad ( ) ; return _bbece ; } ; var ( _adabg = map [ rune ] string { 0x0060 : "\u0300" , 0x02CB : "\u0300" , 0x0027 : "\u0301" , 0x00B4 : "\u0301" , 0x02B9 : "\u0301" , 0x02CA : "\u0301" , 0x005E : "\u0302" , 0x02C6 : "\u0302" , 0x007E : "\u0303" , 0x02DC : "\u0303" , 0x00AF : "\u0304" , 0x02C9 : "\u0304" , 0x02D8 : "\u0306" , 0x02D9 : "\u0307" , 0x00A8 : "\u0308" , 0x00B0 : "\u030a" , 0x02DA : "\u030a" , 0x02BA : "\u030b" , 0x02DD : "\u030b" , 0x02C7 : "\u030c" , 0x02C8 : "\u030d" , 0x0022 : "\u030e" , 0x02BB : "\u0312" , 0x02BC : "\u0313" , 0x0486 : "\u0313" , 0x055A : "\u0313" , 0x02BD : "\u0314" , 0x0485 : "\u0314" , 0x0559 : "\u0314" , 0x02D4 : "\u031d" , 0x02D5 : "\u031e" , 0x02D6 : "\u031f" , 0x02D7 : "\u0320" , 0x02B2 : "\u0321" , 0x00B8 : "\u0327" , 0x02CC : "\u0329" , 0x02B7 : "\u032b" , 0x02CD : "\u0331" , 0x005F : "\u0332" , 0x204E : "\u0359" } ;
) ; func ( _gabf rulingList ) asTiling ( ) gridTiling { if _dgfd { _d . Log . Info ( "r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d" , len ( _gabf ) ) ;
} ; for _ddba , _eacf := range _gabf [ 1 : ] { _bbba := _gabf [ _ddba ] ; if _bbba . alignsPrimary ( _eacf ) && _bbba . alignsSec ( _eacf ) { _d . Log . Error ( "a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073" , _eacf , _bbba ) ;
} ; } ; _gabf . sortStrict ( ) ; _gabf . log ( "\u0073n\u0061\u0070\u0070\u0065\u0064" ) ; _dcfbf , _bcge := _gabf . vertsHorzs ( ) ; _fbce := _dcfbf . primaries ( ) ; _egacf := _bcge . primaries ( ) ; _gebb := len ( _fbce ) - 1 ; _eebg := len ( _egacf ) - 1 ; if _gebb == 0 || _eebg == 0 { return gridTiling { } ;
} ; _fdca := _dc . PdfRectangle { Llx : _fbce [ 0 ] , Urx : _fbce [ _gebb ] , Lly : _egacf [ 0 ] , Ury : _egacf [ _eebg ] } ; if _dgfd { _d . Log . Info ( "\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064" , len ( _dcfbf ) ) ;
for _gfbd , _fagb := range _dcfbf { _caa . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _gfbd , _fagb ) ; } ; _d . Log . Info ( "\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064" , len ( _bcge ) ) ;
for _cffbc , _gcdee := range _bcge { _caa . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _cffbc , _gcdee ) ; } ; _d . Log . Info ( "\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f" , _gebb , _eebg , _fbce , _egacf ) ;
} ; _ggdc := make ( [ ] gridTile , _gebb * _eebg ) ; for _ggff := _eebg - 1 ; _ggff >= 0 ; _ggff -- { _gdbd := _egacf [ _ggff ] ; _eaddg := _egacf [ _ggff + 1 ] ; for _cagdb := 0 ; _cagdb < _gebb ; _cagdb ++ { _edaa := _fbce [ _cagdb ] ; _abgf := _fbce [ _cagdb + 1 ] ; _ddga := _dcfbf . findPrimSec ( _edaa , _gdbd ) ;
_fgcg := _dcfbf . findPrimSec ( _abgf , _gdbd ) ; _dgaa := _bcge . findPrimSec ( _gdbd , _edaa ) ; _gggbd := _bcge . findPrimSec ( _eaddg , _edaa ) ; _gacd := _dc . PdfRectangle { Llx : _edaa , Urx : _abgf , Lly : _gdbd , Ury : _eaddg } ; _baad := _bceb ( _gacd , _ddga , _fgcg , _dgaa , _gggbd ) ;
_ggdc [ _ggff * _gebb + _cagdb ] = _baad ; if _dgfd { _caa . Printf ( "\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , _cagdb , _ggff , _baad . String ( ) , _baad . Width ( ) , _baad . Height ( ) ) ;
} ; } ; } ; if _dgfd { _d . Log . Info ( "r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066" , _fdca ) ;
} ; _bdf := make ( [ ] map [ float64 ] gridTile , _eebg ) ; for _geffb := _eebg - 1 ; _geffb >= 0 ; _geffb -- { if _dgfd { _caa . Printf ( "\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a" , _geffb ) ; } ; _bdf [ _geffb ] = make ( map [ float64 ] gridTile , _gebb ) ; for _gddc := 0 ; _gddc < _gebb ;
_gddc ++ { _eefeb := _ggdc [ _geffb * _gebb + _gddc ] ; if _dgfd { _caa . Printf ( "\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _gddc , _eefeb ) ; } ; if ! _eefeb . _gcfbe { continue ; } ; _gbdc := _gddc ; for _fccd := _gddc + 1 ; ! _eefeb . _ebbb && _fccd < _gebb ;
_fccd ++ { _cadg := _ggdc [ _geffb * _gebb + _fccd ] ; _eefeb . Urx = _cadg . Urx ; _eefeb . _faaa = _eefeb . _faaa || _cadg . _faaa ; _eefeb . _ecbgb = _eefeb . _ecbgb || _cadg . _ecbgb ; _eefeb . _ebbb = _cadg . _ebbb ; if _dgfd { _caa . Printf ( "\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a" , _fccd , _cadg , _eefeb ) ;
} ; _gbdc = _fccd ; } ; if _dgfd { _caa . Printf ( " \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n" , _gddc , _gbdc , _eefeb ) ; } ; _gddc = _gbdc ; _bdf [ _geffb ] [ _eefeb . Llx ] = _eefeb ; } ; } ; _ddebc := make ( map [ float64 ] map [ float64 ] gridTile , _eebg ) ;
_ffaca := make ( map [ float64 ] map [ float64 ] struct { } , _eebg ) ; for _gecccb := _eebg - 1 ; _gecccb >= 0 ; _gecccb -- { _adgbf := _ggdc [ _gecccb * _gebb ] . Lly ; _ddebc [ _adgbf ] = make ( map [ float64 ] gridTile , _gebb ) ; _ffaca [ _adgbf ] = make ( map [ float64 ] struct { } , _gebb ) ; } ; if _dgfd { _d . Log . Info ( "\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066" , _fdca ) ;
} ; for _ebec := _eebg - 1 ; _ebec >= 0 ; _ebec -- { _gcfdf := _ggdc [ _ebec * _gebb ] . Lly ; _cgac := _bdf [ _ebec ] ; if _dgfd { _caa . Printf ( "\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a" , _ebec ) ; } ; for _ , _cacd := range _fcce ( _cgac ) { if _ , _cgec := _ffaca [ _gcfdf ] [ _cacd ] ;
_cgec { continue ; } ; _eabbd := _cgac [ _cacd ] ; if _dgfd { _caa . Printf ( " \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a" , _eabbd . String ( ) ) ; } ; for _dabe := _ebec - 1 ; _dabe >= 0 ; _dabe -- { if _eabbd . _ecbgb { break ; } ; _bgeag := _bdf [ _dabe ] ; _aeee , _bbedb := _bgeag [ _cacd ] ;
if ! _bbedb { break ; } ; if _aeee . Urx != _eabbd . Urx { break ; } ; _eabbd . _ecbgb = _aeee . _ecbgb ; _eabbd . Lly = _aeee . Lly ; if _dgfd { _caa . Printf ( "\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a" , _aeee . String ( ) , _eabbd . String ( ) ) ;
} ; _ffaca [ _aeee . Lly ] [ _aeee . Llx ] = struct { } { } ; } ; if _ebec == 0 { _eabbd . _ecbgb = true ; } ; if _eabbd . complete ( ) { _ddebc [ _gcfdf ] [ _cacd ] = _eabbd ; } ; } ; } ; _acdb := gridTiling { PdfRectangle : _fdca , _gggb : _bdgf ( _ddebc ) , _gceb : _ebbda ( _ddebc ) , _begc : _ddebc } ;
_acdb . log ( "\u0043r\u0065\u0061\u0074\u0065\u0064" ) ; return _acdb ; } ; func _aeb ( _fgfg , _becga bounded ) float64 { _gdbba := _deba ( _fgfg , _becga ) ; if ! _edeg ( _gdbba ) { return _gdbba ; } ; return _aaed ( _fgfg , _becga ) ; } ; func _efca ( _gebd , _fabda float64 ) bool { return _ca . Abs ( _gebd - _fabda ) <= _aecg } ;
type fontEntry struct { _bcab * _dc . PdfFont ; _dcgaa int64 ; } ; func ( _bgf * Extractor ) extractPageText ( _bggf string , _db * _dc . PdfPageResources , _fec _bab . Matrix , _gede int ) ( * PageText , int , int , error ) { _d . Log . Trace ( "\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d" , _gede ) ;
_dace := & PageText { _fdbe : _bgf . _cbb } ; _abe := _bea ( _bgf . _cbb ) ; var _dff stateStack ; _fcg := _fbag ( _bgf , _db , _gcf . GraphicsState { } , & _abe , & _dff ) ; _bbfg := shapesState { _gcbe : _fec , _eabb : _bab . IdentityMatrix ( ) , _degb : _fcg } ; var _fga bool ; if _gede > _ccfc { _dgg := _g . New ( "\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077" ) ;
_d . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076" , _gede , _dgg ) ;
return _dace , _abe . _efde , _abe . _cdc , _dgg ; } ; _ccfb := _gcf . NewContentStreamParser ( _bggf ) ; _fage , _ead := _ccfb . Parse ( ) ; if _ead != nil { _d . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ead ) ;
return _dace , _abe . _efde , _abe . _cdc , _ead ; } ; _dace . _ced = _fage ; _eba := _gcf . NewContentStreamProcessor ( * _fage ) ; _eba . AddHandler ( _gcf . HandlerConditionEnumAllOperands , "" , func ( _bgfg * _gcf . ContentStreamOperation , _ege _gcf . GraphicsState , _aea * _dc . PdfPageResources ) error { _edd := _bgfg . Operand ;
if _dccf { _d . Log . Info ( "\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s" , _bgfg ) ; } ; switch _edd { case "\u0071" : if _fgeac { _d . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _bbfg . _eabb ) ; } ; _dff . push ( & _abe ) ; case "\u0051" : if ! _dff . empty ( ) { _abe = * _dff . pop ( ) ;
} ; _bbfg . _eabb = _ege . CTM ; if _fgeac { _d . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _bbfg . _eabb ) ; } ; case "\u0042\u0054" : if _fga { _d . Log . Debug ( "\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074" ) ;
_dace . _gfc = append ( _dace . _gfc , _fcg . _dcfd ... ) ; } ; _fga = true ; _dcfb := _ege ; _dcfb . CTM = _fec . Mult ( _dcfb . CTM ) ; _fcg = _fbag ( _bgf , _aea , _dcfb , & _abe , & _dff ) ; _bbfg . _degb = _fcg ; case "\u0045\u0054" : if ! _fga { _d . Log . Debug ( "\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074" ) ;
} ; _fga = false ; _dace . _gfc = append ( _dace . _gfc , _fcg . _dcfd ... ) ; _fcg . reset ( ) ; case "\u0054\u002a" : _fcg . nextLine ( ) ; case "\u0054\u0064" : if _dgf , _gee := _fcg . checkOp ( _bgfg , 2 , true ) ; ! _dgf { _d . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gee ) ;
return _gee ; } ; _cfc , _ade , _bbe := _fgac ( _bgfg . Params ) ; if _bbe != nil { return _bbe ; } ; _fcg . moveText ( _cfc , _ade ) ; case "\u0054\u0044" : if _cac , _ageb := _fcg . checkOp ( _bgfg , 2 , true ) ; ! _cac { _d . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ageb ) ;
return _ageb ; } ; _egfb , _efd , _ffe := _fgac ( _bgfg . Params ) ; if _ffe != nil { _d . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ffe ) ; return _ffe ; } ; _fcg . moveTextSetLeading ( _egfb , _efd ) ; case "\u0054\u006a" : if _ffae , _abdb := _fcg . checkOp ( _bgfg , 1 , true ) ;
! _ffae { _d . Log . Debug ( "\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076" , _bgfg , _abdb ) ; return _abdb ; } ; _ede := _aa . TraceToDirectObject ( _bgfg . Params [ 0 ] ) ; _cbd , _dbd := _aa . GetStringBytes ( _ede ) ;
if ! _dbd { _d . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064" , _bgfg ) ; return _aa . ErrTypeError ;
} ; return _fcg . showText ( _ede , _cbd ) ; case "\u0054\u004a" : if _dacf , _bfd := _fcg . checkOp ( _bgfg , 1 , true ) ; ! _dacf { _d . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bfd ) ; return _bfd ; } ; _aef , _gfad := _aa . GetArray ( _bgfg . Params [ 0 ] ) ;
if ! _gfad { _d . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _bgfg ) ; return _ead ; } ; return _fcg . showTextAdjusted ( _aef ) ;
case "\u0027" : if _bfb , _egeg := _fcg . checkOp ( _bgfg , 1 , true ) ; ! _bfb { _d . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _egeg ) ; return _egeg ; } ; _agf := _aa . TraceToDirectObject ( _bgfg . Params [ 0 ] ) ; _cdb , _egg := _aa . GetStringBytes ( _agf ) ;
if ! _egg { _d . Log . Debug ( "\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064" , _bgfg ) ; return _aa . ErrTypeError ; } ; _fcg . nextLine ( ) ; return _fcg . showText ( _agf , _cdb ) ;
case "\u0022" : if _dbb , _ffd := _fcg . checkOp ( _bgfg , 3 , true ) ; ! _dbb { _d . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ffd ) ; return _ffd ; } ; _adgb , _bcc , _egdg := _fgac ( _bgfg . Params [ : 2 ] ) ; if _egdg != nil { return _egdg ;
} ; _feca := _aa . TraceToDirectObject ( _bgfg . Params [ 2 ] ) ; _gff , _egb := _aa . GetStringBytes ( _feca ) ; if ! _egb { _d . Log . Debug ( "\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064" , _bgfg ) ;
return _aa . ErrTypeError ; } ; _fcg . setCharSpacing ( _adgb ) ; _fcg . setWordSpacing ( _bcc ) ; _fcg . nextLine ( ) ; return _fcg . showText ( _feca , _gff ) ; case "\u0054\u004c" : _ggfc , _afd := _bdb ( _bgfg ) ; if _afd != nil { _d . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _afd ) ;
return _afd ; } ; _fcg . setTextLeading ( _ggfc ) ; case "\u0054\u0063" : _bbbc , _gfb := _bdb ( _bgfg ) ; if _gfb != nil { _d . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gfb ) ; return _gfb ; } ; _fcg . setCharSpacing ( _bbbc ) ;
case "\u0054\u0066" : if _fggd , _ebag := _fcg . checkOp ( _bgfg , 2 , true ) ; ! _fggd { _d . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ebag ) ; return _ebag ; } ; _cec , _fageb := _aa . GetNameVal ( _bgfg . Params [ 0 ] ) ;
if ! _fageb { _d . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064" , _bgfg ) ; return _aa . ErrTypeError ; } ; _ggd , _ffff := _aa . GetNumberAsFloat ( _bgfg . Params [ 1 ] ) ;
if ! _fageb { _d . Log . Debug ( "\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bgfg , _ffff ) ;
return _ffff ; } ; _ffff = _fcg . setFont ( _cec , _ggd ) ; _fcg . _dcg = _af . Is ( _ffff , _aa . ErrNotSupported ) ; if _ffff != nil && ! _fcg . _dcg { return _ffff ; } ; case "\u0054\u006d" : if _dag , _fed := _fcg . checkOp ( _bgfg , 6 , true ) ; ! _dag { _d . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fed ) ;
return _fed ; } ; _aad , _abdc := _aa . GetNumbersAsFloat ( _bgfg . Params ) ; if _abdc != nil { _d . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _abdc ) ; return _abdc ; } ; _fcg . setTextMatrix ( _aad ) ; case "\u0054\u0072" : if _ecd , _gfg := _fcg . checkOp ( _bgfg , 1 , true ) ;
! _ecd { _d . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gfg ) ; return _gfg ; } ; _afcc , _baa := _aa . GetIntVal ( _bgfg . Params [ 0 ] ) ; if ! _baa { _d . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _bgfg ) ;
return _aa . ErrTypeError ; } ; _fcg . setTextRenderMode ( _afcc ) ; case "\u0054\u0073" : if _feb , _cfe := _fcg . checkOp ( _bgfg , 1 , true ) ; ! _feb { _d . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _cfe ) ; return _cfe ;
} ; _deg , _ace := _aa . GetNumberAsFloat ( _bgfg . Params [ 0 ] ) ; if _ace != nil { _d . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ace ) ; return _ace ; } ; _fcg . setTextRise ( _deg ) ; case "\u0054\u0077" : if _eaf , _daa := _fcg . checkOp ( _bgfg , 1 , true ) ;
! _eaf { _d . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _daa ) ; return _daa ; } ; _afg , _ebc := _aa . GetNumberAsFloat ( _bgfg . Params [ 0 ] ) ; if _ebc != nil { _d . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ebc ) ;
return _ebc ; } ; _fcg . setWordSpacing ( _afg ) ; case "\u0054\u007a" : if _bbed , _egea := _fcg . checkOp ( _bgfg , 1 , true ) ; ! _bbed { _d . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _egea ) ; return _egea ; } ; _dbg , _gcfa := _aa . GetNumberAsFloat ( _bgfg . Params [ 0 ] ) ;
if _gcfa != nil { _d . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gcfa ) ; return _gcfa ; } ; _fcg . setHorizScaling ( _dbg ) ; case "\u0063\u006d" : _bbfg . _eabb = _ege . CTM ; if _bbfg . _eabb . Singular ( ) { _ddc := _bab . IdentityMatrix ( ) . Translate ( _bbfg . _eabb . Translation ( ) ) ;
_d . Log . Debug ( "S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s" , _bbfg . _eabb , _ddc ) ; _bbfg . _eabb = _ddc ; } ; if _fgeac { _d . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _bbfg . _eabb ) ; } ; case "\u006d" : if len ( _bgfg . Params ) != 2 { _d . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" , _cb ) ;
return nil ; } ; _dgb , _ebcc := _aa . GetNumbersAsFloat ( _bgfg . Params ) ; if _ebcc != nil { return _ebcc ; } ; _bbfg . moveTo ( _dgb [ 0 ] , _dgb [ 1 ] ) ; case "\u006c" : if len ( _bgfg . Params ) != 2 { _d . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" , _cb ) ;
return nil ; } ; _ecb , _gbf := _aa . GetNumbersAsFloat ( _bgfg . Params ) ; if _gbf != nil { return _gbf ; } ; _bbfg . lineTo ( _ecb [ 0 ] , _ecb [ 1 ] ) ; case "\u0063" : if len ( _bgfg . Params ) != 6 { return _cb ; } ; _ece , _bafb := _aa . GetNumbersAsFloat ( _bgfg . Params ) ; if _bafb != nil { return _bafb ;
} ; _d . Log . Debug ( "\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f" , _ece ) ; _bbfg . cubicTo ( _ece [ 0 ] , _ece [ 1 ] , _ece [ 2 ] , _ece [ 3 ] , _ece [ 4 ] , _ece [ 5 ] ) ; case "\u0076" , "\u0079" : if len ( _bgfg . Params ) != 4 { return _cb ;
} ; _cef , _bef := _aa . GetNumbersAsFloat ( _bgfg . Params ) ; if _bef != nil { return _bef ; } ; _d . Log . Debug ( "\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f" , _cef ) ; _bbfg . quadraticTo ( _cef [ 0 ] , _cef [ 1 ] , _cef [ 2 ] , _cef [ 3 ] ) ;
case "\u0068" : _bbfg . closePath ( ) ; case "\u0072\u0065" : if len ( _bgfg . Params ) != 4 { return _cb ; } ; _eec , _fbdg := _aa . GetNumbersAsFloat ( _bgfg . Params ) ; if _fbdg != nil { return _fbdg ; } ; _bbfg . drawRectangle ( _eec [ 0 ] , _eec [ 1 ] , _eec [ 2 ] , _eec [ 3 ] ) ; _bbfg . closePath ( ) ;
case "\u0053" : _bbfg . stroke ( & _dace . _ggeb ) ; _bbfg . clearPath ( ) ; case "\u0073" : _bbfg . closePath ( ) ; _bbfg . stroke ( & _dace . _ggeb ) ; _bbfg . clearPath ( ) ; case "\u0046" : _bbfg . fill ( & _dace . _agc ) ; _bbfg . clearPath ( ) ; case "\u0066" , "\u0066\u002a" : _bbfg . closePath ( ) ;
_bbfg . fill ( & _dace . _agc ) ; _bbfg . clearPath ( ) ; case "\u0042" , "\u0042\u002a" : _bbfg . fill ( & _dace . _agc ) ; _bbfg . stroke ( & _dace . _ggeb ) ; _bbfg . clearPath ( ) ; case "\u0062" , "\u0062\u002a" : _bbfg . closePath ( ) ; _bbfg . fill ( & _dace . _agc ) ; _bbfg . stroke ( & _dace . _ggeb ) ;
_bbfg . clearPath ( ) ; case "\u006e" : _bbfg . clearPath ( ) ; case "\u0044\u006f" : if len ( _bgfg . Params ) == 0 { _d . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e" , _bgfg . Params ) ;
return _aa . ErrRangeError ; } ; _ecef , _becg := _aa . GetName ( _bgfg . Params [ 0 ] ) ; if ! _becg { _d . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e" , _bgfg . Params [ 0 ] ) ;
return _aa . ErrTypeError ; } ; _ , _ga := _aea . GetXObjectByName ( * _ecef ) ; if _ga != _dc . XObjectTypeForm { break ; } ; _geg , _becg := _bgf . _bf [ _ecef . String ( ) ] ; if ! _becg { _gdg , _eca := _aea . GetXObjectFormByName ( * _ecef ) ; if _eca != nil { _d . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _eca ) ;
return _eca ; } ; _ded , _eca := _gdg . GetContentStream ( ) ; if _eca != nil { _d . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _eca ) ; return _eca ; } ; _daaf := _gdg . Resources ; if _daaf == nil { _daaf = _aea ; } ; _fce , _cff , _fad , _eca := _bgf . extractPageText ( string ( _ded ) , _daaf , _fec . Mult ( _ege . CTM ) , _gede + 1 ) ;
if _eca != nil { _d . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _eca ) ; return _eca ; } ; _geg = textResult { * _fce , _cff , _fad } ; _bgf . _bf [ _ecef . String ( ) ] = _geg ; } ; _bbfg . _eabb = _ege . CTM ; if _fgeac { _d . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _bbfg . _eabb ) ;
} ; _dace . _gfc = append ( _dace . _gfc , _geg . _fbf . _gfc ... ) ; _dace . _ggeb = append ( _dace . _ggeb , _geg . _fbf . _ggeb ... ) ; _dace . _agc = append ( _dace . _agc , _geg . _fbf . _agc ... ) ; _abe . _efde += _geg . _eab ; _abe . _cdc += _geg . _fadb ; case "\u0072\u0067" , "\u0067" , "\u006b" , "\u0063\u0073" , "\u0073\u0063" , "\u0073\u0063\u006e" : _fcg . _fdf . ColorspaceNonStroking = _ege . ColorspaceNonStroking ;
_fcg . _fdf . ColorNonStroking = _ege . ColorNonStroking ; case "\u0052\u0047" , "\u0047" , "\u004b" , "\u0043\u0053" , "\u0053\u0043" , "\u0053\u0043\u004e" : _fcg . _fdf . ColorspaceStroking = _ege . ColorspaceStroking ; _fcg . _fdf . ColorStroking = _ege . ColorStroking ; } ; return nil ;
} ) ; _ead = _eba . Process ( _db ) ; return _dace , _abe . _efde , _abe . _cdc , _ead ; } ; func _dggg ( _bed , _bfgb _dc . PdfRectangle ) bool { return _dee ( _bed , _bfgb ) && _fdga ( _bed , _bfgb ) } ;
2022-09-10 15:35:04 +00:00
2022-09-23 18:05:51 +00:00
// PageFonts represents extracted fonts on a PDF page.
type PageFonts struct { Fonts [ ] Font ; } ;
2022-09-10 15:35:04 +00:00
2022-09-23 18:05:51 +00:00
// Text returns the extracted page text.
func ( _abbb PageText ) Text ( ) string { return _abbb . _dbdg } ; func _bea ( _cgaa _dc . PdfRectangle ) textState { return textState { _abbd : 100 , _efbd : RenderModeFill , _eegd : _cgaa } ; } ; type gridTile struct { _dc . PdfRectangle ; _faaa , _gcfbe , _ecbgb , _ebbb bool ; } ;
func _dcfc ( _bgcfe _dc . PdfRectangle , _gcfdg [ ] * textLine ) * textPara { return & textPara { PdfRectangle : _bgcfe , _ddaf : _gcfdg } ; } ; type rulingList [ ] * ruling ; func ( _ccaf paraList ) llyRange ( _ebae [ ] int , _fccg , _fcdb float64 ) [ ] int { _aag := len ( _ccaf ) ; if _fcdb < _ccaf [ _ebae [ 0 ] ] . Lly || _fccg > _ccaf [ _ebae [ _aag - 1 ] ] . Lly { return nil ;
} ; _ecca := _e . Search ( _aag , func ( _dbcc int ) bool { return _ccaf [ _ebae [ _dbcc ] ] . Lly >= _fccg } ) ; _bdgb := _e . Search ( _aag , func ( _aefc int ) bool { return _ccaf [ _ebae [ _aefc ] ] . Lly > _fcdb } ) ; return _ebae [ _ecca : _bdgb ] ; } ; func _afccc ( _egcd * textWord , _cgd float64 , _abgg , _fea rulingList ) * wordBag { _abgd := _ebcf ( _egcd . _cffg ) ;
_becd := [ ] * textWord { _egcd } ; _ecdc := wordBag { _adcb : map [ int ] [ ] * textWord { _abgd : _becd } , PdfRectangle : _egcd . PdfRectangle , _gffd : _egcd . _debab , _deae : _cgd , _bbce : _abgg , _dce : _fea } ; return & _ecdc ; } ; func _addda ( _fddaa , _egac _bab . Point ) bool { _fbfa := _ca . Abs ( _fddaa . X - _egac . X ) ;
_degf := _ca . Abs ( _fddaa . Y - _egac . Y ) ; return _aged ( _fbfa , _degf ) ; } ; func ( _adec rulingList ) removeDuplicates ( ) rulingList { if len ( _adec ) == 0 { return nil ; } ; _adec . sort ( ) ; _bbda := rulingList { _adec [ 0 ] } ; for _ , _aefef := range _adec [ 1 : ] { if _aefef . equals ( _bbda [ len ( _bbda ) - 1 ] ) { continue ;
} ; _bbda = append ( _bbda , _aefef ) ; } ; return _bbda ; } ; func ( _dfae paraList ) computeEBBoxes ( ) { if _aedg { _d . Log . Info ( "\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a" ) ; } ; for _ , _cbae := range _dfae { _cbae . _aecfg = _cbae . PdfRectangle ;
} ; _dcbb := _dfae . yNeighbours ( 0 ) ; for _gfag , _aegf := range _dfae { _ccg := _aegf . _aecfg ; _fbdga , _aaea := - 1.0e9 , + 1.0e9 ; for _ , _dbbge := range _dcbb [ _aegf ] { _adde := _dfae [ _dbbge ] . _aecfg ; if _adde . Urx < _ccg . Llx { _fbdga = _ca . Max ( _fbdga , _adde . Urx ) ;
} else if _ccg . Urx < _adde . Llx { _aaea = _ca . Min ( _aaea , _adde . Llx ) ; } ; } ; for _dbfg , _cbba := range _dfae { _dgae := _cbba . _aecfg ; if _gfag == _dbfg || _dgae . Ury > _ccg . Lly { continue ; } ; if _fbdga <= _dgae . Llx && _dgae . Llx < _ccg . Llx { _ccg . Llx = _dgae . Llx ;
} else if _dgae . Urx <= _aaea && _ccg . Urx < _dgae . Urx { _ccg . Urx = _dgae . Urx ; } ; } ; if _aedg { _caa . Printf ( "\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a" , _gfag , _aegf . _aecfg , _ccg , _ebfce ( _aegf . text ( ) , 50 ) ) ;
} ; _aegf . _aecfg = _ccg ; } ; if _gce { for _ , _ffeae := range _dfae { _ffeae . PdfRectangle = _ffeae . _aecfg ; } ; } ; } ; func ( _addb * textPara ) taken ( ) bool { return _addb == nil || _addb . _bgcb } ; func ( _bbeaa gridTiling ) log ( _adbbce string ) { if ! _dgfd { return ; } ; _d . Log . Info ( "\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071" , len ( _bbeaa . _gggb ) , len ( _bbeaa . _gceb ) , _adbbce ) ;
_caa . Printf ( "\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a" , _bbeaa . _gggb ) ; _caa . Printf ( "\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a" , _bbeaa . _gceb ) ; for _cece , _fbdb := range _bbeaa . _gceb { _abdeb , _agde := _bbeaa . _begc [ _fbdb ] ;
if ! _agde { continue ; } ; _caa . Printf ( "%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , _cece , _fbdb ) ; for _cbda , _aaga := range _bbeaa . _gggb { _fddf , _gfbbg := _abdeb [ _aaga ] ; if ! _gfbbg { continue ; } ; _caa . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _cbda , _fddf . String ( ) ) ;
} ; } ; } ; func _bceb ( _bfbe _dc . PdfRectangle , _acff , _cbdb , _dfcge , _aafga * ruling ) gridTile { _ecbbe := _bfbe . Llx ; _bdbf := _bfbe . Urx ; _aaeae := _bfbe . Lly ; _dcccf := _bfbe . Ury ; return gridTile { PdfRectangle : _bfbe , _gcfbe : _acff != nil && _acff . encloses ( _aaeae , _dcccf ) , _ebbb : _cbdb != nil && _cbdb . encloses ( _aaeae , _dcccf ) , _ecbgb : _dfcge != nil && _dfcge . encloses ( _ecbbe , _bdbf ) , _faaa : _aafga != nil && _aafga . encloses ( _ecbbe , _bdbf ) } ;
} ; func ( _eadd * textObject ) checkOp ( _gef * _gcf . ContentStreamOperation , _afdb int , _eceg bool ) ( _bfbc bool , _gfff error ) { if _eadd == nil { var _bgc [ ] _aa . PdfObject ; if _afdb > 0 { _bgc = _gef . Params ; if len ( _bgc ) > _afdb { _bgc = _bgc [ : _afdb ] ; } ; } ; _d . Log . Debug ( "\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076" , _gef . Operand , _bgc ) ;
} ; if _afdb >= 0 { if len ( _gef . Params ) != _afdb { if _eceg { _gfff = _g . New ( "\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et" ) ; } ; _d . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076" , _gef . Operand , _afdb , len ( _gef . Params ) , _gef . Params ) ;
return false , _gfff ; } ; } ; return true , nil ; } ; type textObject struct { _ebe * Extractor ; _eff * _dc . PdfPageResources ; _fdf _gcf . GraphicsState ; _cgf * textState ; _cdbd * stateStack ; _aafd _bab . Matrix ; _acd _bab . Matrix ; _dcfd [ ] * textMark ; _dcg bool ; } ;
2022-09-10 15:35:04 +00:00
2022-09-23 18:05:51 +00:00
// String returns a description of `state`.
func ( _dfg * textState ) String ( ) string { _bbfd := "\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]" ; if _dfg . _gcb != nil { _bbfd = _dfg . _gcb . BaseFont ( ) ; } ; return _caa . Sprintf ( "\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071" , _dfg . _cace , _dfg . _fgd , _dfg . _fdd , _bbfd ) ;
} ;
2022-09-10 15:35:04 +00:00
2022-09-23 18:05:51 +00:00
// ToTextMark returns the public view of `tm`.
func ( _dafa * textMark ) ToTextMark ( ) TextMark { return TextMark { Text : _dafa . _cbge , Original : _dafa . _bcabg , BBox : _dafa . _gde , Font : _dafa . _eead , FontSize : _dafa . _beaf , FillColor : _dafa . _fgeee , StrokeColor : _dafa . _cab , Orientation : _dafa . _gcce , DirectObject : _dafa . _abda , ObjString : _dafa . _bfga , Tw : _dafa . Tw , Th : _dafa . Th , Tc : _dafa . _bddca , Index : _dafa . _gffe } ;
} ; func _bgag ( _gfee _dc . PdfRectangle , _degc bounded ) float64 { return _gfee . Ury - _degc . bbox ( ) . Lly } ;