2020-08-27 21:45:09 +00:00
//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
2018-03-22 14:03:47 +00:00
2020-08-27 21:45:09 +00:00
//
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
//
2022-07-13 21:28:43 +00:00
package extractor ; import ( _ae "bytes" ; _gd "errors" ; _fc "fmt" ; _fdd "github.com/unidoc/unipdf/v3/common" ; _aeg "github.com/unidoc/unipdf/v3/contentstream" ; _dd "github.com/unidoc/unipdf/v3/core" ; _gg "github.com/unidoc/unipdf/v3/internal/license" ; _gf "github.com/unidoc/unipdf/v3/internal/textencoding" ;
_ebf "github.com/unidoc/unipdf/v3/internal/transform" ; _ee "github.com/unidoc/unipdf/v3/model" ; _eb "golang.org/x/text/unicode/norm" ; _bd "golang.org/x/xerrors" ; _c "image/color" ; _f "io" ; _fd "math" ; _d "regexp" ; _e "sort" ; _a "strings" ; _b "unicode" ; _df "unicode/utf8" ;
) ; func _ebced ( _gdbfg int , _ceebd map [ int ] [ ] float64 ) ( [ ] int , int ) { _gagg := make ( [ ] int , _gdbfg ) ; _ddceg := 0 ; for _dead := 0 ; _dead < _gdbfg ; _dead ++ { _gagg [ _dead ] = _ddceg ; _ddceg += len ( _ceebd [ _dead ] ) + 1 ; } ; return _gagg , _ddceg ; } ;
2022-02-05 21:34:53 +00:00
2022-07-13 21:28:43 +00:00
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
func ( _cccc * TextMarkArray ) BBox ( ) ( _ee . PdfRectangle , bool ) { var _gdcf _ee . PdfRectangle ; _fcfc := false ; for _ , _fbd := range _cccc . _fef { if _fbd . Meta || _bceaa ( _fbd . Text ) { continue ; } ; if _fcfc { _gdcf = _dcd ( _gdcf , _fbd . BBox ) ; } else { _gdcf = _fbd . BBox ;
_fcfc = true ; } ; } ; return _gdcf , _fcfc ; } ; func ( _fdec rulingList ) splitSec ( ) [ ] rulingList { _e . Slice ( _fdec , func ( _fcba , _efga int ) bool { _gaec , _bdgaf := _fdec [ _fcba ] , _fdec [ _efga ] ; if _gaec . _gcfe != _bdgaf . _gcfe { return _gaec . _gcfe < _bdgaf . _gcfe ;
} ; return _gaec . _fbdf < _bdgaf . _fbdf ; } ) ; _dabgb := make ( map [ * ruling ] struct { } , len ( _fdec ) ) ; _bdae := func ( _efcac * ruling ) rulingList { _aeda := rulingList { _efcac } ; _dabgb [ _efcac ] = struct { } { } ; for _ , _ccbf := range _fdec { if _ , _aebe := _dabgb [ _ccbf ] ; _aebe { continue ;
} ; for _ , _ebba := range _aeda { if _ccbf . alignsSec ( _ebba ) { _aeda = append ( _aeda , _ccbf ) ; _dabgb [ _ccbf ] = struct { } { } ; break ; } ; } ; } ; return _aeda ; } ; _gfdab := [ ] rulingList { _bdae ( _fdec [ 0 ] ) } ; for _ , _fgba := range _fdec [ 1 : ] { if _ , _fcfda := _dabgb [ _fgba ] ; _fcfda { continue ;
} ; _gfdab = append ( _gfdab , _bdae ( _fgba ) ) ; } ; return _gfdab ; } ; func ( _feeaf paraList ) yNeighbours ( _cbgf float64 ) map [ * textPara ] [ ] int { _cfge := make ( [ ] event , 2 * len ( _feeaf ) ) ; if _cbgf == 0 { for _gddbb , _beeb := range _feeaf { _cfge [ 2 * _gddbb ] = event { _beeb . Lly , true , _gddbb } ;
_cfge [ 2 * _gddbb + 1 ] = event { _beeb . Ury , false , _gddbb } ; } ; } else { for _fgcbd , _dggfe := range _feeaf { _cfge [ 2 * _fgcbd ] = event { _dggfe . Lly - _cbgf * _dggfe . fontsize ( ) , true , _fgcbd } ; _cfge [ 2 * _fgcbd + 1 ] = event { _dggfe . Ury + _cbgf * _dggfe . fontsize ( ) , false , _fgcbd } ;
} ; } ; return _feeaf . eventNeighbours ( _cfge ) ; } ; func _bfbf ( _cgac * wordBag , _eceb * textWord , _afeg float64 ) bool { return _cgac . Urx <= _eceb . Llx && _eceb . Llx < _cgac . Urx + _afeg ; } ; func _dfdf ( _gdaeb [ ] pathSection ) { if _gage < 0.0 { return ; } ; if _cfcd { _fdd . Log . Info ( "\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073" , len ( _gdaeb ) ) ;
} ; for _cbag , _dcdfc := range _gdaeb { for _bgdf , _bfdag := range _dcdfc . _fdee { for _fbag , _aagec := range _bfdag . _bdb { _bfdag . _bdb [ _fbag ] = _ebf . Point { X : _ddaccg ( _aagec . X ) , Y : _ddaccg ( _aagec . Y ) } ; if _cfcd { _ffcb := _bfdag . _bdb [ _fbag ] ; if ! _fabdg ( _aagec , _ffcb ) { _dgaef := _ebf . Point { X : _ffcb . X - _aagec . X , Y : _ffcb . Y - _aagec . Y } ;
_fc . Printf ( "\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a" , _cbag , _bgdf , _fbag , _aagec , _ffcb , _dgaef ) ; } ; } ; } ; } ; } ; } ; func ( _fcd rulingList ) connections ( _eabce map [ int ] intSet , _gacb int ) intSet { _fcgg := make ( intSet ) ;
_deab := make ( intSet ) ; var _bfca func ( int ) ; _bfca = func ( _geda int ) { if ! _deab . has ( _geda ) { _deab . add ( _geda ) ; for _cbbcg := range _fcd { if _eabce [ _cbbcg ] . has ( _geda ) { _fcgg . add ( _cbbcg ) ; } ; } ; for _dbfd := range _fcd { if _fcgg . has ( _dbfd ) { _bfca ( _dbfd ) ;
} ; } ; } ; } ; _bfca ( _gacb ) ; return _fcgg ; } ; func ( _fffg * textTable ) isExportable ( ) bool { if _fffg . _ebfb { return true ; } ; _ffec := func ( _cadf int ) bool { _babc := _fffg . get ( 0 , _cadf ) ; if _babc == nil { return false ; } ; _cfaf := _babc . text ( ) ; _agdb := _df . RuneCountInString ( _cfaf ) ;
_efbde := _gefge . MatchString ( _cfaf ) ; return _agdb <= 1 || _efbde ; } ; for _abecc := 0 ; _abecc < _fffg . _dcbg ; _abecc ++ { if ! _ffec ( _abecc ) { return true ; } ; } ; return false ; } ; func ( _gbc * shapesState ) lastpointEstablished ( ) ( _ebf . Point , bool ) { if _gbc . _bfdb { return _gbc . _dac , false ;
} ; _bcd := len ( _gbc . _ebd ) ; if _bcd > 0 && _gbc . _ebd [ _bcd - 1 ] . _caee { return _gbc . _ebd [ _bcd - 1 ] . last ( ) , false ; } ; return _ebf . Point { } , true ; } ; func ( _dgcd * textPara ) writeText ( _gegbb _f . Writer ) { if _dgcd . _cbfe == nil { _dgcd . writeCellText ( _gegbb ) ; return ;
} ; for _febg := 0 ; _febg < _dgcd . _cbfe . _dcbg ; _febg ++ { for _dadef := 0 ; _dadef < _dgcd . _cbfe . _bfgf ; _dadef ++ { _cfbg := _dgcd . _cbfe . get ( _dadef , _febg ) ; if _cfbg == nil { _gegbb . Write ( [ ] byte ( "\u0009" ) ) ; } else { _cfbg . writeCellText ( _gegbb ) ; } ; _gegbb . Write ( [ ] byte ( "\u0020" ) ) ;
} ; if _febg < _dgcd . _cbfe . _dcbg - 1 { _gegbb . Write ( [ ] byte ( "\u000a" ) ) ; } ; } ; } ; func ( _gdeg gridTiling ) log ( _ecbe string ) { if ! _gedg { return ; } ; _fdd . Log . Info ( "\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071" , len ( _gdeg . _facc ) , len ( _gdeg . _aeccd ) , _ecbe ) ;
_fc . Printf ( "\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a" , _gdeg . _facc ) ; _fc . Printf ( "\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a" , _gdeg . _aeccd ) ; for _egbfe , _gfba := range _gdeg . _aeccd { _ddfc , _ecfda := _gdeg . _fgbg [ _gfba ] ;
if ! _ecfda { continue ; } ; _fc . Printf ( "%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , _egbfe , _gfba ) ; for _cefb , _dgabd := range _gdeg . _facc { _efcb , _gbedb := _ddfc [ _dgabd ] ; if ! _gbedb { continue ; } ; _fc . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _cefb , _efcb . String ( ) ) ;
} ; } ; } ; func ( _gdbd paraList ) reorder ( _ggefc [ ] int ) { _adcb := make ( paraList , len ( _gdbd ) ) ; for _ceedf , _agbfd := range _ggefc { _adcb [ _ceedf ] = _gdbd [ _agbfd ] ; } ; copy ( _gdbd , _adcb ) ; } ;
2021-12-14 01:08:28 +00:00
2022-07-13 21:28:43 +00:00
// PageFonts represents extracted fonts on a PDF page.
type PageFonts struct { Fonts [ ] Font ; } ; func _dba ( _feac * Extractor , _cge * _ee . PdfPageResources , _bbfg _aeg . GraphicsState , _deca * textState , _addc * stateStack ) * textObject { return & textObject { _ecfa : _feac , _faa : _cge , _cddc : _bbfg , _bfdc : _addc , _bbca : _deca , _ccgf : _ebf . IdentityMatrix ( ) , _afbg : _ebf . IdentityMatrix ( ) } ;
} ; func ( _ddf * stateStack ) pop ( ) * textState { if _ddf . empty ( ) { return nil ; } ; _fcgc := * ( * _ddf ) [ len ( * _ddf ) - 1 ] ; * _ddf = ( * _ddf ) [ : len ( * _ddf ) - 1 ] ; return & _fcgc ; } ; func ( _ceab * PageText ) computeViews ( ) { var _aecc rulingList ; if _dcag { _gcec := _bgfd ( _ceab . _ccdb ) ;
_aecc = append ( _aecc , _gcec ... ) ; } ; if _fddd { _dgf := _bfed ( _ceab . _ccdc ) ; _aecc = append ( _aecc , _dgf ... ) ; } ; _aecc , _fgeb := _aecc . toTilings ( ) ; var _fcb paraList ; _gfb := len ( _ceab . _gcea ) ; for _gefe := 0 ; _gefe < 360 && _gfb > 0 ; _gefe += 90 { _aef := make ( [ ] * textMark , 0 , len ( _ceab . _gcea ) - _gfb ) ;
for _ , _dcc := range _ceab . _gcea { if _dcc . _gdba == _gefe { _aef = append ( _aef , _dcc ) ; } ; } ; if len ( _aef ) > 0 { _ddeb := _bafb ( _aef , _ceab . _cace , _aecc , _fgeb ) ; _fcb = append ( _fcb , _ddeb ... ) ; _gfb -= len ( _aef ) ; } ; } ; _cebd := new ( _ae . Buffer ) ; _fcb . writeText ( _cebd ) ;
_ceab . _aec = _cebd . String ( ) ; _ceab . _fddc = _fcb . toTextMarks ( ) ; _ceab . _ccbc = _fcb . tables ( ) ; if _fabc { _fdd . Log . Info ( "\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064" , len ( _ceab . _ccbc ) ) ;
} ; } ; func ( _acdgd rulingList ) bbox ( ) _ee . PdfRectangle { var _cacd _ee . PdfRectangle ; if len ( _acdgd ) == 0 { _fdd . Log . Error ( "r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073" ) ;
return _ee . PdfRectangle { } ; } ; if _acdgd [ 0 ] . _deeb == _dcce { _cacd . Llx , _cacd . Urx = _acdgd . secMinMax ( ) ; _cacd . Lly , _cacd . Ury = _acdgd . primMinMax ( ) ; } else { _cacd . Llx , _cacd . Urx = _acdgd . primMinMax ( ) ; _cacd . Lly , _cacd . Ury = _acdgd . secMinMax ( ) ; } ; return _cacd ;
} ; func ( _bbg * stateStack ) push ( _bcff * textState ) { _cafg := * _bcff ; * _bbg = append ( * _bbg , & _cafg ) } ; func _eaea ( _dgb [ ] TextMark , _faac * int , _bfgdf string ) [ ] TextMark { _ggggb := _cff ; _ggggb . Text = _bfgdf ; return _aeaf ( _dgb , _faac , _ggggb ) ; } ; func ( _baaac paraList ) readBefore ( _edfg [ ] int , _beac , _dbfe int ) bool { _dag , _ceee := _baaac [ _beac ] , _baaac [ _dbfe ] ;
if _fgaf ( _dag , _ceee ) && _dag . Lly > _ceee . Lly { return true ; } ; if ! ( _dag . _ddebf . Urx < _ceee . _ddebf . Llx ) { return false ; } ; _gae , _dbed := _dag . Lly , _ceee . Lly ; if _gae > _dbed { _dbed , _gae = _gae , _dbed ; } ; _bdfa := _fd . Max ( _dag . _ddebf . Llx , _ceee . _ddebf . Llx ) ;
_dafa := _fd . Min ( _dag . _ddebf . Urx , _ceee . _ddebf . Urx ) ; _aafdf := _baaac . llyRange ( _edfg , _gae , _dbed ) ; for _ , _fefa := range _aafdf { if _fefa == _beac || _fefa == _dbfe { continue ; } ; _afdf := _baaac [ _fefa ] ; if _afdf . _ddebf . Llx <= _dafa && _bdfa <= _afdf . _ddebf . Urx { return false ;
} ; } ; return true ; } ; type imageExtractContext struct { _ccg [ ] ImageMark ; _fag int ; _bg int ; _fdbg int ; _be map [ * _dd . PdfObjectStream ] * cachedImage ; _ec * ImageExtractOptions ; } ; func _aaef ( _dfgbg [ ] float64 , _cceb , _aadg float64 ) [ ] float64 { _aage , _eabfc := _cceb , _aadg ;
if _eabfc < _aage { _aage , _eabfc = _eabfc , _aage ; } ; _agbg := make ( [ ] float64 , 0 , len ( _dfgbg ) + 2 ) ; _agbg = append ( _agbg , _cceb ) ; for _ , _bddf := range _dfgbg { if _bddf <= _aage { continue ; } else if _bddf >= _eabfc { break ; } ; _agbg = append ( _agbg , _bddf ) ; } ; _agbg = append ( _agbg , _aadg ) ;
return _agbg ; } ; func ( _efac * textObject ) reset ( ) { _efac . _ccgf = _ebf . IdentityMatrix ( ) ; _efac . _afbg = _ebf . IdentityMatrix ( ) ; _efac . _abbb = nil ; } ; func _cdbaf ( _cfeg * wordBag , _bbcfb float64 , _fcbee , _beba rulingList ) [ ] * wordBag { var _egfec [ ] * wordBag ;
for _ , _cfa := range _cfeg . depthIndexes ( ) { _dadgc := false ; for ! _cfeg . empty ( _cfa ) { _cdbb := _cfeg . firstReadingIndex ( _cfa ) ; _acffa := _cfeg . firstWord ( _cdbb ) ; _fdfc := _eaag ( _acffa , _bbcfb , _fcbee , _beba ) ; _cfeg . removeWord ( _acffa , _cdbb ) ; if _fba { _fdd . Log . Info ( "\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073" , _acffa . String ( ) ) ;
} ; for _bcdg := true ; _bcdg ; _bcdg = _dadgc { _dadgc = false ; _begcc := _feeg * _fdfc . _bbgb ; _bafg := _ffdd * _fdfc . _bbgb ; _cebg := _bbff * _fdfc . _bbgb ; if _fba { _fdd . Log . Info ( "\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066" , _fdfc . minDepth ( ) , _fdfc . maxDepth ( ) , _cebg , _bafg ) ;
} ; if _cfeg . scanBand ( "\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c" , _fdfc , _cafa ( _bfdcd , 0 ) , _fdfc . minDepth ( ) - _cebg , _fdfc . maxDepth ( ) + _cebg , _bacgd , false , false ) > 0 { _dadgc = true ; } ; if _cfeg . scanBand ( "\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c" , _fdfc , _cafa ( _bfdcd , _bafg ) , _fdfc . minDepth ( ) , _fdfc . maxDepth ( ) , _ffab , false , false ) > 0 { _dadgc = true ;
} ; if _dadgc { continue ; } ; _bge := _cfeg . scanBand ( "" , _fdfc , _cafa ( _bfbf , _begcc ) , _fdfc . minDepth ( ) , _fdfc . maxDepth ( ) , _gdgg , true , false ) ; if _bge > 0 { _gbfg := ( _fdfc . maxDepth ( ) - _fdfc . minDepth ( ) ) / _fdfc . _bbgb ; if ( _bge > 1 && float64 ( _bge ) > 0.3 * _gbfg ) || _bge <= 10 { if _cfeg . scanBand ( "\u006f\u0074\u0068e\u0072" , _fdfc , _cafa ( _bfbf , _begcc ) , _fdfc . minDepth ( ) , _fdfc . maxDepth ( ) , _gdgg , false , true ) > 0 { _dadgc = true ;
} ; } ; } ; } ; _egfec = append ( _egfec , _fdfc ) ; } ; } ; return _egfec ; } ; func ( _cead * textLine ) pullWord ( _ffdbc * wordBag , _dffbf * textWord , _cdab int ) { _cead . appendWord ( _dffbf ) ; _ffdbc . removeWord ( _dffbf , _cdab ) ; } ; func ( _begd * textObject ) getFont ( _fca string ) ( * _ee . PdfFont , error ) { if _begd . _ecfa . _ac != nil { _ade , _aeca := _begd . getFontDict ( _fca ) ;
if _aeca != nil { _fdd . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073" , _fca , _aeca . Error ( ) ) ; return nil , _aeca ;
} ; _begd . _ecfa . _cb ++ ; _edfa , _eea := _begd . _ecfa . _ac [ _ade . String ( ) ] ; if _eea { _edfa . _fdfe = _begd . _ecfa . _cb ; return _edfa . _ebe , nil ; } ; } ; _bcg , _gcg := _begd . getFontDict ( _fca ) ; if _gcg != nil { return nil , _gcg ; } ; _dgec , _gcg := _begd . getFontDirect ( _fca ) ;
if _gcg != nil { return nil , _gcg ; } ; if _begd . _ecfa . _ac != nil { _aegb := fontEntry { _dgec , _begd . _ecfa . _cb } ; if len ( _begd . _ecfa . _ac ) >= _fagb { var _gfdc [ ] string ; for _ccef := range _begd . _ecfa . _ac { _gfdc = append ( _gfdc , _ccef ) ; } ; _e . Slice ( _gfdc , func ( _cbfd , _bcgd int ) bool { return _begd . _ecfa . _ac [ _gfdc [ _cbfd ] ] . _fdfe < _begd . _ecfa . _ac [ _gfdc [ _bcgd ] ] . _fdfe ;
} ) ; delete ( _begd . _ecfa . _ac , _gfdc [ 0 ] ) ; } ; _begd . _ecfa . _ac [ _bcg . String ( ) ] = _aegb ; } ; return _dgec , nil ; } ; func ( _cfcg * wordBag ) depthRange ( _bfag , _beaa int ) [ ] int { var _gde [ ] int ; for _caceg := range _cfcg . _eed { if _bfag <= _caceg && _caceg <= _beaa { _gde = append ( _gde , _caceg ) ;
} ; } ; if len ( _gde ) == 0 { return nil ; } ; _e . Ints ( _gde ) ; return _gde ; } ; func ( _eeca rulingList ) isActualGrid ( ) ( rulingList , bool ) { _faedd , _fegc := _eeca . augmentGrid ( ) ; if ! ( len ( _faedd ) >= _bdacg + 1 && len ( _fegc ) >= _bfc + 1 ) { if _cfcd { _fdd . Log . Info ( "\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064" , len ( _faedd ) , len ( _fegc ) , _bdacg + 1 , _bfc + 1 ) ;
} ; return nil , false ; } ; if _cfcd { _fdd . Log . Info ( "\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074" , _eeca , len ( _faedd ) >= 2 , len ( _fegc ) >= 2 , len ( _faedd ) >= 2 && len ( _fegc ) >= 2 ) ;
for _egfdg , _gegba := range _eeca { _fc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a" , _egfdg , _gegba ) ; } ; } ; if _gbb { _bfef , _fadag := _faedd [ 0 ] , _faedd [ len ( _faedd ) - 1 ] ; _gdec , _gfbg := _fegc [ 0 ] , _fegc [ len ( _fegc ) - 1 ] ; if ! ( _dbefe ( _bfef . _dadb - _gdec . _gcfe ) && _dbefe ( _fadag . _dadb - _gdec . _fbdf ) && _dbefe ( _gdec . _dadb - _bfef . _fbdf ) && _dbefe ( _gfbg . _dadb - _bfef . _gcfe ) ) { if _cfcd { _fdd . Log . Info ( "\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073" , _bfef , _fadag , _gdec , _gfbg ) ;
} ; return nil , false ; } ; } else { if ! _faedd . aligned ( ) { if _gega { _fdd . Log . Info ( "i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064" , len ( _faedd ) ) ;
} ; return nil , false ; } ; if ! _fegc . aligned ( ) { if _cfcd { _fdd . Log . Info ( "i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064" , len ( _fegc ) ) ;
} ; return nil , false ; } ; } ; _eafdd := append ( _faedd , _fegc ... ) ; return _eafdd , true ; } ;
2022-06-06 22:48:24 +00:00
2022-07-13 21:28:43 +00:00
// Marks returns the TextMark collection for a page. It represents all the text on the page.
func ( _aee PageText ) Marks ( ) * TextMarkArray { return & TextMarkArray { _fef : _aee . _fddc } } ;
2022-06-06 22:48:24 +00:00
2022-07-13 21:28:43 +00:00
// Elements returns the TextMarks in `ma`.
func ( _gcee * TextMarkArray ) Elements ( ) [ ] TextMark { return _gcee . _fef } ; func _ddc ( _fdcg , _gdgd _ee . PdfRectangle ) bool { return _dffc ( _fdcg , _gdgd ) && _gdef ( _fdcg , _gdgd ) } ; func ( _cabe * stateStack ) empty ( ) bool { return len ( * _cabe ) == 0 } ; func ( _fafa * ruling ) intersects ( _ggbg * ruling ) bool { _ffddf := ( _fafa . _deeb == _fbeg && _ggbg . _deeb == _dcce ) || ( _ggbg . _deeb == _fbeg && _fafa . _deeb == _dcce ) ;
_edbb := func ( _egfg , _ecfb * ruling ) bool { return _egfg . _gcfe - _ebcb <= _ecfb . _dadb && _ecfb . _dadb <= _egfg . _fbdf + _ebcb ; } ; _dbff := _edbb ( _fafa , _ggbg ) ; _eefgg := _edbb ( _ggbg , _fafa ) ; if _cfcd { _fc . Printf ( "\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a" + "\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a" + " \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a" , _ffddf , _dbff , _eefgg , _ffddf && _dbff && _eefgg , _fafa , _ggbg ) ;
} ; return _ffddf && _dbff && _eefgg ; } ; func ( _fcee gridTile ) complete ( ) bool { return _fcee . numBorders ( ) == 4 } ; func ( _ffgc * shapesState ) quadraticTo ( _ececg , _dacf , _ddea , _bdf float64 ) { if _agcb { _fdd . Log . Info ( "\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a" ) ;
} ; _ffgc . addPoint ( _ddea , _bdf ) ; } ; func ( _fbfaa rulingList ) merge ( ) * ruling { _baaga := _fbfaa [ 0 ] . _dadb ; _bbffd := _fbfaa [ 0 ] . _gcfe ; _aegg := _fbfaa [ 0 ] . _fbdf ; for _ , _afdaa := range _fbfaa [ 1 : ] { _baaga += _afdaa . _dadb ; if _afdaa . _gcfe < _bbffd { _bbffd = _afdaa . _gcfe ;
} ; if _afdaa . _fbdf > _aegg { _aegg = _afdaa . _fbdf ; } ; } ; _bacfd := & ruling { _deeb : _fbfaa [ 0 ] . _deeb , _fbgc : _fbfaa [ 0 ] . _fbgc , Color : _fbfaa [ 0 ] . Color , _dadb : _baaga / float64 ( len ( _fbfaa ) ) , _gcfe : _bbffd , _fbdf : _aegg } ; if _gega { _fdd . Log . Info ( "\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073" , len ( _fbfaa ) , _bacfd ) ;
for _afgf , _fgab := range _fbfaa { _fc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _afgf , _fgab ) ; } ; } ; return _bacfd ; } ;
2022-06-06 22:48:24 +00:00
2022-07-13 21:28:43 +00:00
// Append appends `mark` to the mark array.
func ( _bage * TextMarkArray ) Append ( mark TextMark ) { _bage . _fef = append ( _bage . _fef , mark ) } ; func ( _afbe * shapesState ) drawRectangle ( _gbe , _fggf , _fab , _abgc float64 ) { if _agcb { _fcbe := _afbe . devicePoint ( _gbe , _fggf ) ; _fcfa := _afbe . devicePoint ( _gbe + _fab , _fggf + _abgc ) ;
_faed := _ee . PdfRectangle { Llx : _fcbe . X , Lly : _fcbe . Y , Urx : _fcfa . X , Ury : _fcfa . Y } ; _fdd . Log . Info ( "d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066" , _faed ) ; } ; _afbe . newSubPath ( ) ; _afbe . moveTo ( _gbe , _fggf ) ;
_afbe . lineTo ( _gbe + _fab , _fggf ) ; _afbe . lineTo ( _gbe + _fab , _fggf + _abgc ) ; _afbe . lineTo ( _gbe , _fggf + _abgc ) ; _afbe . closePath ( ) ; } ; func _bbf ( _acf _ee . PdfRectangle ) textState { return textState { _cfc : 100 , _dfab : RenderModeFill , _gede : _acf } ; } ;
2022-06-06 22:48:24 +00:00
// Font represents the font properties on a PDF page.
2022-07-13 21:28:43 +00:00
type Font struct { PdfFont * _ee . PdfFont ;
2022-06-06 22:48:24 +00:00
// FontName represents Font Name from font properties.
FontName string ;
// FontType represents Font Subtype entry in the font dictionary inside page resources.
// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont.
FontType string ;
// ToUnicode is true if font provides a `ToUnicode` mapping.
ToUnicode bool ;
// IsCID is true if underlying font is a composite font.
// Composite font is represented by a font dictionary whose Subtype is `Type0`
IsCID bool ;
// IsSimple is true if font is simple font.
// A simple font is limited to only 8 bit (255) character codes.
IsSimple bool ;
// FontData represents the raw data of the embedded font file.
// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF).
// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor.
// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value.
FontData [ ] byte ;
// FontFileName is a name representing the font. it has format:
// (Font Name) + (Font Type Extension), example: helvetica.ttf.
FontFileName string ;
// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor).
2022-07-13 21:28:43 +00:00
FontDescriptor * _ee . PdfFontDescriptor ; } ; func _dffc ( _feacc , _fff _ee . PdfRectangle ) bool { return _fff . Llx <= _feacc . Urx && _feacc . Llx <= _fff . Urx ; } ; func ( _dceg * textObject ) moveTextSetLeading ( _cbe , _eacg float64 ) { _dceg . _bbca . _caa = - _eacg ; _dceg . moveLP ( _cbe , _eacg ) ;
} ; func ( _dbec * ruling ) gridIntersecting ( _dgff * ruling ) bool { return _agac ( _dbec . _gcfe , _dgff . _gcfe ) && _agac ( _dbec . _fbdf , _dgff . _fbdf ) ; } ; func _dada ( _gecb [ ] _dd . PdfObject ) ( _gedgd , _badb float64 , _fdcd error ) { if len ( _gecb ) != 2 { return 0 , 0 , _fc . Errorf ( "\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064" , len ( _gecb ) ) ;
} ; _ggbbf , _fdcd := _dd . GetNumbersAsFloat ( _gecb ) ; if _fdcd != nil { return 0 , 0 , _fdcd ; } ; return _ggbbf [ 0 ] , _ggbbf [ 1 ] , nil ; } ;
2022-06-06 22:48:24 +00:00
2022-07-13 21:28:43 +00:00
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
func ( _accf * PageText ) ApplyArea ( bbox _ee . PdfRectangle ) { _bgf := make ( [ ] * textMark , 0 , len ( _accf . _gcea ) ) ; for _ , _ddeba := range _accf . _gcea { if _ddc ( _ddeba . bbox ( ) , bbox ) { _bgf = append ( _bgf , _ddeba ) ; } ; } ; var _cddb paraList ; _abd := len ( _bgf ) ; for _gad := 0 ;
_gad < 360 && _abd > 0 ; _gad += 90 { _fcfg := make ( [ ] * textMark , 0 , len ( _bgf ) - _abd ) ; for _ , _fecae := range _bgf { if _fecae . _gdba == _gad { _fcfg = append ( _fcfg , _fecae ) ; } ; } ; if len ( _fcfg ) > 0 { _cgfb := _bafb ( _fcfg , _accf . _cace , nil , nil ) ; _cddb = append ( _cddb , _cgfb ... ) ;
_abd -= len ( _fcfg ) ; } ; } ; _bbe := new ( _ae . Buffer ) ; _cddb . writeText ( _bbe ) ; _accf . _aec = _bbe . String ( ) ; _accf . _fddc = _cddb . toTextMarks ( ) ; _accf . _ccbc = _cddb . tables ( ) ; } ; func _fdgd ( _gcba _ebf . Point ) _ebf . Matrix { return _ebf . TranslationMatrix ( _gcba . X , _gcba . Y ) } ;
2022-06-06 22:48:24 +00:00
2022-07-13 21:28:43 +00:00
// PageText represents the layout of text on a device page.
type PageText struct { _gcea [ ] * textMark ; _aec string ; _fddc [ ] TextMark ; _ccbc [ ] TextTable ; _cace _ee . PdfRectangle ; _ccdb [ ] pathSection ; _ccdc [ ] pathSection ; _eda * _aeg . ContentStreamOperations ; } ; func ( _dgd * shapesState ) cubicTo ( _dadg , _fagd , _afdg , _acaf , _dda , _ebccc float64 ) { if _agcb { _fdd . Log . Info ( "\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a" ) ;
} ; _dgd . addPoint ( _dda , _ebccc ) ; } ; func _fbbc ( _bdfc , _ecfde , _acdb float64 ) rulingKind { if _bdfc >= _acdb && _baba ( _ecfde , _bdfc ) { return _dcce ; } ; if _ecfde >= _acdb && _baba ( _bdfc , _ecfde ) { return _fbeg ; } ; return _gcbc ; } ; func ( _aadf paraList ) topoOrder ( ) [ ] int { if _bcfe { _fdd . Log . Info ( "\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a" ) ;
} ; _gadd := len ( _aadf ) ; _acbc := make ( [ ] bool , _gadd ) ; _ebcg := make ( [ ] int , 0 , _gadd ) ; _dgcg := _aadf . llyOrdering ( ) ; var _cece func ( _bgdb int ) ; _cece = func ( _fcace int ) { _acbc [ _fcace ] = true ; for _fafe := 0 ; _fafe < _gadd ; _fafe ++ { if ! _acbc [ _fafe ] { if _aadf . readBefore ( _dgcg , _fcace , _fafe ) { _cece ( _fafe ) ;
} ; } ; } ; _ebcg = append ( _ebcg , _fcace ) ; } ; for _acfb := 0 ; _acfb < _gadd ; _acfb ++ { if ! _acbc [ _acfb ] { _cece ( _acfb ) ; } ; } ; return _gefab ( _ebcg ) ; } ;
2022-06-06 22:48:24 +00:00
2022-07-13 21:28:43 +00:00
// String returns a human readable description of `vecs`.
func ( _eebed rulingList ) String ( ) string { if len ( _eebed ) == 0 { return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}" ; } ; _gbbb , _ebed := _eebed . vertsHorzs ( ) ; _geeag := len ( _gbbb ) ; _ffgb := len ( _ebed ) ; if _geeag == 0 || _ffgb == 0 { return _fc . Sprintf ( "\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}" , _geeag , _ffgb ) ;
} ; _bddc := _ee . PdfRectangle { Llx : _gbbb [ 0 ] . _dadb , Urx : _gbbb [ _geeag - 1 ] . _dadb , Lly : _ebed [ _ffgb - 1 ] . _dadb , Ury : _ebed [ 0 ] . _dadb } ; return _fc . Sprintf ( "\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d" , _geeag , _ffgb , _bddc ) ;
} ; func ( _bfadd * ruling ) equals ( _gddf * ruling ) bool { return _bfadd . _deeb == _gddf . _deeb && _agac ( _bfadd . _dadb , _gddf . _dadb ) && _agac ( _bfadd . _gcfe , _gddf . _gcfe ) && _agac ( _bfadd . _fbdf , _gddf . _fbdf ) ; } ; func _cca ( _fcaf , _egef * textPara ) bool { if _fcaf . _faaed || _egef . _faaed { return true ;
} ; return _dafec ( _fcaf . depth ( ) - _egef . depth ( ) ) ; } ; func ( _edcd rulingList ) log ( _fedbd string ) { if ! _cfcd { return ; } ; _fdd . Log . Info ( "\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073" , _fedbd , _edcd . String ( ) ) ;
for _bdgge , _eabc := range _edcd { _fc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bdgge , _eabc . String ( ) ) ; } ; } ; func ( _ecfae * textWord ) computeText ( ) string { _eggd := make ( [ ] string , len ( _ecfae . _eedb ) ) ; for _ecegb , _bged := range _ecfae . _eedb { _eggd [ _ecegb ] = _bged . _cdg ;
} ; return _a . Join ( _eggd , "" ) ; } ;
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
2022-07-13 21:28:43 +00:00
func ( _aca * Extractor ) ExtractPageImages ( options * ImageExtractOptions ) ( * PageImages , error ) { _fee := & imageExtractContext { _ec : options } ; _edg := _fee . extractContentStreamImages ( _aca . _bc , _aca . _abc ) ; if _edg != nil { return nil , _edg ; } ; return & PageImages { Images : _fee . _ccg } , nil ;
} ; func ( _dgga * wordBag ) empty ( _ffa int ) bool { _ , _gfgg := _dgga . _eed [ _ffa ] ; return ! _gfgg } ; func _baf ( _eba [ ] Font , _cec string ) bool { for _ , _abb := range _eba { if _abb . FontName == _cec { return true ; } ; } ; return false ; } ; func _ffadb ( _abade _ee . PdfColorspace , _cffdf _ee . PdfColor ) _c . Color { if _abade == nil || _cffdf == nil { return _c . Black ;
} ; _bfdae , _dgee := _abade . ColorToRGB ( _cffdf ) ; if _dgee != nil { _fdd . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073" , _cffdf , _abade , _dgee ) ;
return _c . Black ; } ; _bdefd , _gagd := _bfdae . ( * _ee . PdfColorDeviceRGB ) ; if ! _gagd { _fdd . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076" , _bfdae ) ;
return _c . Black ; } ; return _c . NRGBA { R : uint8 ( _bdefd . R ( ) * 255 ) , G : uint8 ( _bdefd . G ( ) * 255 ) , B : uint8 ( _bdefd . B ( ) * 255 ) , A : uint8 ( 255 ) } ; } ; func ( _gffa rulingList ) secMinMax ( ) ( float64 , float64 ) { _agea , _cfde := _gffa [ 0 ] . _gcfe , _gffa [ 0 ] . _fbdf ; for _ , _gceda := range _gffa [ 1 : ] { if _gceda . _gcfe < _agea { _agea = _gceda . _gcfe ;
} ; if _gceda . _fbdf > _cfde { _cfde = _gceda . _fbdf ; } ; } ; return _agea , _cfde ; } ; func _cacf ( _bdef , _ebff , _ccfcd , _facb * textPara ) * textTable { _fedbe := & textTable { _bfgf : 2 , _dcbg : 2 , _dbfec : make ( map [ uint64 ] * textPara , 4 ) } ; _fedbe . put ( 0 , 0 , _bdef ) ; _fedbe . put ( 1 , 0 , _ebff ) ;
_fedbe . put ( 0 , 1 , _ccfcd ) ; _fedbe . put ( 1 , 1 , _facb ) ; return _fedbe ; } ; func ( _afgd paraList ) tables ( ) [ ] TextTable { var _dgae [ ] TextTable ; if _fabc { _fdd . Log . Info ( "\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a" ) ; } ; for _ , _dbgc := range _afgd { _dfac := _dbgc . _cbfe ;
if _dfac != nil && _dfac . isExportable ( ) { _dgae = append ( _dgae , _dfac . toTextTable ( ) ) ; } ; } ; return _dgae ; } ; func ( _fadac * ruling ) alignsPrimary ( _fced * ruling ) bool { return _fadac . _deeb == _fced . _deeb && _fd . Abs ( _fadac . _dadb - _fced . _dadb ) < _gade * 0.5 ;
} ; var _cff = TextMark { Text : "\u005b\u0058\u005d" , Original : "\u0020" , Meta : true , FillColor : _c . White , StrokeColor : _c . White } ; func ( _cggac paraList ) merge ( ) * textPara { _fdd . Log . Trace ( "\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _cggac ) ) ;
if len ( _cggac ) == 0 { return nil ; } ; _cggac . sortReadingOrder ( ) ; _ggfg := _cggac [ 0 ] . PdfRectangle ; _fdda := _cggac [ 0 ] . _ddgc ; for _ , _gacf := range _cggac [ 1 : ] { _ggfg = _dcd ( _ggfg , _gacf . PdfRectangle ) ; _fdda = append ( _fdda , _gacf . _ddgc ... ) ; } ; return _dfde ( _ggfg , _fdda ) ;
} ; type textTable struct { _ee . PdfRectangle ; _bfgf , _dcbg int ; _ebfb bool ; _dbfec map [ uint64 ] * textPara ; _facee map [ uint64 ] compositeCell ; } ; func _bafb ( _aefa [ ] * textMark , _bbcacd _ee . PdfRectangle , _dgbe rulingList , _ffbdg [ ] gridTiling ) paraList { _fdd . Log . Trace ( "\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066" , len ( _aefa ) , _bbcacd ) ;
if len ( _aefa ) == 0 { return nil ; } ; _gagb := _eaff ( _aefa , _bbcacd ) ; if len ( _gagb ) == 0 { return nil ; } ; _dgbe . log ( "\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065" ) ; _faef , _fefbc := _dgbe . vertsHorzs ( ) ; _gcedg := _fgcd ( _gagb , _bbcacd . Ury , _faef , _fefbc ) ;
_dfgfc := _cdbaf ( _gcedg , _bbcacd . Ury , _faef , _fefbc ) ; _dfgfc = _ecfd ( _dfgfc ) ; _gbgde := make ( paraList , 0 , len ( _dfgfc ) ) ; for _ , _geeee := range _dfgfc { _fcbeb := _geeee . arrangeText ( ) ; if _fcbeb != nil { _gbgde = append ( _gbgde , _fcbeb ) ; } ; } ; if len ( _gbgde ) >= _aeeg { _gbgde = _gbgde . extractTables ( _ffbdg ) ;
} ; _gbgde . sortReadingOrder ( ) ; _gbgde . log ( "\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072" ) ; return _gbgde ; } ; func _gedf ( _fggdc , _gbggc int ) int { if _fggdc < _gbggc { return _fggdc ;
} ; return _gbggc ; } ; func ( _bagc * shapesState ) clearPath ( ) { _bagc . _ebd = nil ; _bagc . _bfdb = false ; if _agcb { _fdd . Log . Info ( "\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073" , _bagc ) ; } ; } ; const ( _gcbc rulingKind = iota ; _dcce ; _fbeg ; ) ;
type textResult struct { _ace PageText ; _dgg int ; _abg int ; } ; func _ddaccg ( _bafc float64 ) float64 { return _gage * _fd . Round ( _bafc / _gage ) } ; func ( _eacc compositeCell ) String ( ) string { _egea := "" ; if len ( _eacc . paraList ) > 0 { _egea = _fgaa ( _eacc . paraList . merge ( ) . text ( ) , 50 ) ;
} ; return _fc . Sprintf ( "\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071" , _eacc . PdfRectangle , len ( _eacc . paraList ) , _egea ) ; } ;
2021-12-14 01:08:28 +00:00
2022-07-13 21:28:43 +00:00
// String returns a description of `state`.
func ( _ccfa * textState ) String ( ) string { _afad := "\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]" ; if _ccfa . _gfgf != nil { _afad = _ccfa . _gfgf . BaseFont ( ) ; } ; return _fc . Sprintf ( "\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071" , _ccfa . _dcfg , _ccfa . _eff , _ccfa . _gaa , _afad ) ;
} ; func ( _bafd * textLine ) text ( ) string { var _eedf [ ] string ; for _ , _abef := range _bafd . _bbfe { if _abef . _faaf { _eedf = append ( _eedf , "\u0020" ) ; } ; _eedf = append ( _eedf , _abef . _fbgbed ) ; } ; return _a . Join ( _eedf , "" ) ; } ; func _cdge ( _gedae map [ float64 ] map [ float64 ] gridTile ) [ ] float64 { _aeafg := make ( [ ] float64 , 0 , len ( _gedae ) ) ;
for _eeaa := range _gedae { _aeafg = append ( _aeafg , _eeaa ) ; } ; _e . Float64s ( _aeafg ) ; _dccea := len ( _aeafg ) ; for _fcdg := 0 ; _fcdg < _dccea / 2 ; _fcdg ++ { _aeafg [ _fcdg ] , _aeafg [ _dccea - 1 - _fcdg ] = _aeafg [ _dccea - 1 - _fcdg ] , _aeafg [ _fcdg ] ; } ; return _aeafg ; } ; func ( _gfbf rulingList ) toTilings ( ) ( rulingList , [ ] gridTiling ) { _gfbf . log ( "\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s" ) ;
if len ( _gfbf ) == 0 { return nil , nil ; } ; _gfbf = _gfbf . tidied ( "\u0061\u006c\u006c" ) ; _gfbf . log ( "\u0074\u0069\u0064\u0069\u0065\u0064" ) ; _ggca := _gfbf . toGrids ( ) ; _abbg := make ( [ ] gridTiling , len ( _ggca ) ) ; for _ccgg , _dacg := range _ggca { _abbg [ _ccgg ] = _dacg . asTiling ( ) ;
} ; return _gfbf , _abbg ; } ; const ( _ddcc = 1.0e-6 ; _gage = 1.0e-4 ; _edaa = 10 ; _ebcfe = 6 ; _fdfd = 0.5 ; _adea = 0.12 ; _bgbe = 0.19 ; _ddfd = 0.04 ; _cffb = 0.04 ; _bbff = 1.0 ; _bacgd = 0.04 ; _ffdd = 0.4 ; _ffab = 0.7 ; _feeg = 1.0 ; _gdgg = 0.1 ; _deef = 1.4 ; _fgcgf = 0.46 ; _eca = 0.02 ; _acddf = 0.2 ;
_cbggc = 0.5 ; _eddg = 4 ; _gbdb = 4.0 ; _aeeg = 6 ; _dace = 0.3 ; _aeeb = 0.01 ; _dcfc = 0.02 ; _bdacg = 2 ; _bfc = 2 ; _cbce = 500 ; _ecefb = 4.0 ; _gffg = 4.0 ; _ddge = 0.05 ; _gdaa = 0.1 ; _ebcb = 2.0 ; _gade = 2.0 ; _caea = 1.5 ; _fcgf = 3.0 ; _eeef = 0.25 ; ) ; func _fgaa ( _gbfcc string , _edfcf int ) string { if len ( _gbfcc ) < _edfcf { return _gbfcc ;
} ; return _gbfcc [ : _edfcf ] ; } ; const _fdge = 20 ; func ( _dfafd * textTable ) toTextTable ( ) TextTable { if _fabc { _fdd . Log . Info ( "t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064" , _dfafd . _bfgf , _dfafd . _dcbg ) ; } ;
_dffcb := make ( [ ] [ ] TableCell , _dfafd . _dcbg ) ; for _ffbc := 0 ; _ffbc < _dfafd . _dcbg ; _ffbc ++ { _dffcb [ _ffbc ] = make ( [ ] TableCell , _dfafd . _bfgf ) ; for _daff := 0 ; _daff < _dfafd . _bfgf ; _daff ++ { _eafcc := _dfafd . get ( _daff , _ffbc ) ; if _eafcc == nil { continue ;
} ; if _fabc { _fc . Printf ( "\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a" , _daff , _ffbc , _eafcc ) ; } ; _dffcb [ _ffbc ] [ _daff ] . Text = _eafcc . text ( ) ; _gagc := 0 ; _dffcb [ _ffbc ] [ _daff ] . Marks . _fef = _eafcc . toTextMarks ( & _gagc ) ; } ; } ; return TextTable { W : _dfafd . _bfgf , H : _dfafd . _dcbg , Cells : _dffcb } ;
2022-03-13 12:41:53 +00:00
} ;
2021-12-14 01:08:28 +00:00
2022-07-13 21:28:43 +00:00
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
type ImageExtractOptions struct { IncludeInlineStencilMasks bool ; } ; func _bbae ( _gdfaa [ ] * textWord , _adbcb int ) [ ] * textWord { _bdba := len ( _gdfaa ) ; copy ( _gdfaa [ _adbcb : ] , _gdfaa [ _adbcb + 1 : ] ) ; return _gdfaa [ : _bdba - 1 ] ; } ; type rulingKind int ; func ( _fdeg * textLine ) bbox ( ) _ee . PdfRectangle { return _fdeg . PdfRectangle } ;
func ( _dafb * shapesState ) devicePoint ( _dacd , _abdc float64 ) _ebf . Point { _bbac := _dafb . _affd . Mult ( _dafb . _dfdd ) ; _dacd , _abdc = _bbac . Transform ( _dacd , _abdc ) ; return _ebf . NewPoint ( _dacd , _abdc ) ; } ; func ( _eceg * shapesState ) addPoint ( _fddb , _fgee float64 ) { _dade := _eceg . establishSubpath ( ) ;
_cda := _eceg . devicePoint ( _fddb , _fgee ) ; if _dade == nil { _eceg . _bfdb = true ; _eceg . _dac = _cda ; } else { _dade . add ( _cda ) ; } ; } ;
// String returns a human readable description of `ss`.
func ( _gdcc * shapesState ) String ( ) string { return _fc . Sprintf ( "\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d" , len ( _gdcc . _ebd ) , _gdcc . _bfdb ) ; } ;
// String returns a description of `l`.
func ( _bbbd * textLine ) String ( ) string { return _fc . Sprintf ( "\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022" , _bbbd . _bbcac , _bbbd . PdfRectangle , _bbbd . _edad , _bbbd . text ( ) ) ;
} ; type fontEntry struct { _ebe * _ee . PdfFont ; _fdfe int64 ; } ; func _ebgc ( _gdde string ) string { _gfeb := [ ] rune ( _gdde ) ; return string ( _gfeb [ : len ( _gfeb ) - 1 ] ) } ; func ( _dfbgg * wordBag ) maxDepth ( ) float64 { return _dfbgg . _ecef - _dfbgg . Lly } ; func ( _cgae compositeCell ) parasBBox ( ) ( paraList , _ee . PdfRectangle ) { return _cgae . paraList , _cgae . PdfRectangle ;
} ; func _bceaa ( _gded string ) bool { for _ , _eege := range _gded { if ! _b . IsSpace ( _eege ) { return false ; } ; } ; return true ; } ; func ( _geegb * subpath ) isQuadrilateral ( ) bool { if len ( _geegb . _bdb ) < 4 || len ( _geegb . _bdb ) > 5 { return false ; } ; if len ( _geegb . _bdb ) == 5 { _fged := _geegb . _bdb [ 0 ] ;
_febf := _geegb . _bdb [ 4 ] ; if _fged . X != _febf . X || _fged . Y != _febf . Y { return false ; } ; } ; return true ; } ; type rulingList [ ] * ruling ;
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
// Replace with a function like Extract() (*PageText, error)
func ( _ecb * Extractor ) ExtractPageText ( ) ( * PageText , int , int , error ) { _bcf , _feee , _faf , _abeg := _ecb . extractPageText ( _ecb . _bc , _ecb . _abc , _ebf . IdentityMatrix ( ) , 0 ) ; if _abeg != nil && _abeg != _ee . ErrColorOutOfRange { return nil , 0 , 0 , _abeg ; } ; _bcf . computeViews ( ) ;
_abeg = _fddaa ( _bcf ) ; if _abeg != nil { return nil , 0 , 0 , _abeg ; } ; return _bcf , _feee , _faf , nil ; } ; func ( _eebbd * wordBag ) applyRemovals ( _eaba map [ int ] map [ * textWord ] struct { } ) { for _ebb , _gbf := range _eaba { if len ( _gbf ) == 0 { continue ; } ; _dbaa := _eebbd . _eed [ _ebb ] ;
_caba := len ( _dbaa ) - len ( _gbf ) ; if _caba == 0 { delete ( _eebbd . _eed , _ebb ) ; continue ; } ; _abaf := make ( [ ] * textWord , _caba ) ; _dfbc := 0 ; for _ , _bfgb := range _dbaa { if _ , _cggg := _gbf [ _bfgb ] ; ! _cggg { _abaf [ _dfbc ] = _bfgb ; _dfbc ++ ; } ; } ; _eebbd . _eed [ _ebb ] = _abaf ;
} ; } ; func ( _adg * textPara ) text ( ) string { _fffb := new ( _ae . Buffer ) ; _adg . writeText ( _fffb ) ; return _fffb . String ( ) ; } ; func ( _bfdbg * textTable ) newTablePara ( ) * textPara { _ceac := _bfdbg . computeBbox ( ) ; _cccf := & textPara { PdfRectangle : _ceac , _ddebf : _ceac , _cbfe : _bfdbg } ;
if _fabc { _fdd . Log . Info ( "\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073" , _cccf ) ; } ; return _cccf ; } ; func ( _acba * textMark ) inDiacriticArea ( _ceadb * textMark ) bool { _baccg := _acba . Llx - _ceadb . Llx ; _bgaf := _acba . Urx - _ceadb . Urx ;
_cffa := _acba . Lly - _ceadb . Lly ; return _fd . Abs ( _baccg + _bgaf ) < _acba . Width ( ) * _cbggc && _fd . Abs ( _cffa ) < _acba . Height ( ) * _cbggc ; } ; var ( _aacb = map [ rune ] string { 0x0060 : "\u0300" , 0x02CB : "\u0300" , 0x0027 : "\u0301" , 0x00B4 : "\u0301" , 0x02B9 : "\u0301" , 0x02CA : "\u0301" , 0x005E : "\u0302" , 0x02C6 : "\u0302" , 0x007E : "\u0303" , 0x02DC : "\u0303" , 0x00AF : "\u0304" , 0x02C9 : "\u0304" , 0x02D8 : "\u0306" , 0x02D9 : "\u0307" , 0x00A8 : "\u0308" , 0x00B0 : "\u030a" , 0x02DA : "\u030a" , 0x02BA : "\u030b" , 0x02DD : "\u030b" , 0x02C7 : "\u030c" , 0x02C8 : "\u030d" , 0x0022 : "\u030e" , 0x02BB : "\u0312" , 0x02BC : "\u0313" , 0x0486 : "\u0313" , 0x055A : "\u0313" , 0x02BD : "\u0314" , 0x0485 : "\u0314" , 0x0559 : "\u0314" , 0x02D4 : "\u031d" , 0x02D5 : "\u031e" , 0x02D6 : "\u031f" , 0x02D7 : "\u0320" , 0x02B2 : "\u0321" , 0x00B8 : "\u0327" , 0x02CC : "\u0329" , 0x02B7 : "\u032b" , 0x02CD : "\u0331" , 0x005F : "\u0332" , 0x204E : "\u0359" } ;
) ; func ( _aga * shapesState ) closePath ( ) { if _aga . _bfdb { _aga . _ebd = append ( _aga . _ebd , _gbef ( _aga . _dac ) ) ; _aga . _bfdb = false ; } else if len ( _aga . _ebd ) == 0 { if _agcb { _fdd . Log . Debug ( "\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068" ) ;
} ; _aga . _bfdb = false ; return ; } ; _aga . _ebd [ len ( _aga . _ebd ) - 1 ] . close ( ) ; if _agcb { _fdd . Log . Info ( "\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073" , _aga ) ; } ; } ; func ( _dccf * textMark ) bbox ( ) _ee . PdfRectangle { return _dccf . PdfRectangle } ;
func _dcfaf ( _aefg , _fdgdee bounded ) float64 { return _aefg . bbox ( ) . Llx - _fdgdee . bbox ( ) . Llx } ; func _fddaa ( _aebd * PageText ) error { _edfdg := _gg . GetLicenseKey ( ) ; if _edfdg != nil && _edfdg . IsLicensed ( ) || _ab { return nil ; } ; _fc . Printf ( "\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a" ) ;
_fc . Println ( "-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f" ) ;
return _gd . New ( "\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064" ) ; } ;
// String returns a string describing `ma`.
func ( _deb TextMarkArray ) String ( ) string { _fdbe := len ( _deb . _fef ) ; if _fdbe == 0 { return "\u0045\u004d\u0050T\u0059" ; } ; _eaaf := _deb . _fef [ 0 ] ; _ccde := _deb . _fef [ _fdbe - 1 ] ; return _fc . Sprintf ( "\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d" , _fdbe , _eaaf , _ccde ) ;
} ; func ( _deaa * textObject ) setWordSpacing ( _acg float64 ) { if _deaa == nil { return ; } ; _deaa . _bbca . _eff = _acg ; } ; func ( _abca rulingList ) snapToGroupsDirection ( ) rulingList { _abca . sortStrict ( ) ; _dgcga := make ( map [ * ruling ] rulingList , len ( _abca ) ) ; _fdfde := _abca [ 0 ] ;
_bfee := func ( _cbcf * ruling ) { _fdfde = _cbcf ; _dgcga [ _fdfde ] = rulingList { _cbcf } } ; _bfee ( _abca [ 0 ] ) ; for _ , _ddda := range _abca [ 1 : ] { if _ddda . _dadb < _fdfde . _dadb - _ddcc { _fdd . Log . Error ( "\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073" , _fdfde , _ddda ) ;
} ; if _ddda . _dadb > _fdfde . _dadb + _gade { _bfee ( _ddda ) ; } else { _dgcga [ _fdfde ] = append ( _dgcga [ _fdfde ] , _ddda ) ; } ; } ; _ggfe := make ( map [ * ruling ] float64 , len ( _dgcga ) ) ; _bcbf := make ( map [ * ruling ] * ruling , len ( _abca ) ) ; for _bcfed , _afef := range _dgcga { _ggfe [ _bcfed ] = _afef . mergePrimary ( ) ;
for _ , _bacgdg := range _afef { _bcbf [ _bacgdg ] = _bcfed ; } ; } ; for _ , _fddf := range _abca { _fddf . _dadb = _ggfe [ _bcbf [ _fddf ] ] ; } ; _fbef := make ( rulingList , 0 , len ( _abca ) ) ; for _ , _ebbd := range _dgcga { _cagd := _ebbd . splitSec ( ) ; for _abbbg , _fgdda := range _cagd { _ageaa := _fgdda . merge ( ) ;
if len ( _fbef ) > 0 { _fcggc := _fbef [ len ( _fbef ) - 1 ] ; if _fcggc . alignsPrimary ( _ageaa ) && _fcggc . alignsSec ( _ageaa ) { _fdd . Log . Error ( "\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073" , _abbbg , _fcggc , _ageaa ) ;
continue ; } ; } ; _fbef = append ( _fbef , _ageaa ) ; } ; } ; _fbef . sortStrict ( ) ; return _fbef ; } ; func ( _affgf * wordBag ) sort ( ) { for _ , _defg := range _affgf . _eed { _e . Slice ( _defg , func ( _edc , _bgbg int ) bool { return _dcfaf ( _defg [ _edc ] , _defg [ _bgbg ] ) < 0 } ) ; } ; } ;
// TableCell is a cell in a TextTable.
type TableCell struct {
// Text is the extracted text.
Text string ;
// Marks returns the TextMarks corresponding to the text in Text.
Marks TextMarkArray ; } ; func _bfdcd ( _cbfad * wordBag , _bdbgf * textWord , _bccd float64 ) bool { return _bdbgf . Llx < _cbfad . Urx + _bccd && _cbfad . Llx - _bccd < _bdbgf . Urx ; } ; func ( _efc * wordBag ) blocked ( _agg * textWord ) bool { if _agg . Urx < _efc . Llx { _gcgc := _bfbg ( _agg . PdfRectangle ) ;
_dcae := _bbffe ( _efc . PdfRectangle ) ; if _efc . _dffb . blocks ( _gcgc , _dcae ) { if _acdge { _fdd . Log . Info ( "\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073" , _agg , _efc ) ; } ; return true ; } ; } else if _efc . Urx < _agg . Llx { _cdc := _bfbg ( _efc . PdfRectangle ) ;
_dfaf := _bbffe ( _agg . PdfRectangle ) ; if _efc . _dffb . blocks ( _cdc , _dfaf ) { if _acdge { _fdd . Log . Info ( "b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s" , _agg , _efc ) ; } ; return true ; } ; } ; if _agg . Ury < _efc . Lly { _feff := _edaf ( _agg . PdfRectangle ) ;
_gbde := _eeeeb ( _efc . PdfRectangle ) ; if _efc . _gdfa . blocks ( _feff , _gbde ) { if _acdge { _fdd . Log . Info ( "\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073" , _agg , _efc ) ; } ; return true ; } ; } else if _efc . Ury < _agg . Lly { _cbgg := _edaf ( _efc . PdfRectangle ) ;
_adc := _eeeeb ( _agg . PdfRectangle ) ; if _efc . _gdfa . blocks ( _cbgg , _adc ) { if _acdge { _fdd . Log . Info ( "b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s" , _agg , _efc ) ; } ; return true ; } ; } ; return false ; } ; type subpath struct { _bdb [ ] _ebf . Point ;
_caee bool ; } ; var _acgf = map [ markKind ] string { _aeaec : "\u0073\u0074\u0072\u006f\u006b\u0065" , _cabg : "\u0066\u0069\u006c\u006c" , _eggf : "\u0061u\u0067\u006d\u0065\u006e\u0074" } ; func ( _efda * stateStack ) top ( ) * textState { if _efda . empty ( ) { return nil ; } ;
return ( * _efda ) [ _efda . size ( ) - 1 ] ; } ; func _dcd ( _fdgbb , _fabdb _ee . PdfRectangle ) _ee . PdfRectangle { return _ee . PdfRectangle { Llx : _fd . Min ( _fdgbb . Llx , _fabdb . Llx ) , Lly : _fd . Min ( _fdgbb . Lly , _fabdb . Lly ) , Urx : _fd . Max ( _fdgbb . Urx , _fabdb . Urx ) , Ury : _fd . Max ( _fdgbb . Ury , _fabdb . Ury ) } ;
} ; func ( _bdfe * textTable ) computeBbox ( ) _ee . PdfRectangle { var _gabc _ee . PdfRectangle ; _dfabb := false ; for _debgg := 0 ; _debgg < _bdfe . _dcbg ; _debgg ++ { for _efdc := 0 ; _efdc < _bdfe . _bfgf ; _efdc ++ { _cafgb := _bdfe . get ( _efdc , _debgg ) ; if _cafgb == nil { continue ;
} ; if ! _dfabb { _gabc = _cafgb . PdfRectangle ; _dfabb = true ; } else { _gabc = _dcd ( _gabc , _cafgb . PdfRectangle ) ; } ; } ; } ; return _gabc ; } ; func _edddc ( _ffeg map [ int ] intSet ) [ ] int { _gggga := make ( [ ] int , 0 , len ( _ffeg ) ) ; for _eedc := range _ffeg { _gggga = append ( _gggga , _eedc ) ;
} ; _e . Ints ( _gggga ) ; return _gggga ; } ; func _dafec ( _ebbef float64 ) bool { return _fd . Abs ( _ebbef ) < _ddcc } ; func ( _aeb * textObject ) getStrokeColor ( ) _c . Color { return _ffadb ( _aeb . _cddc . ColorspaceStroking , _aeb . _cddc . ColorStroking ) ; } ; func ( _gga * textObject ) renderText ( _cea _dd . PdfObject , _ffbd [ ] byte ) error { if _gga . _bad { _fdd . Log . Debug ( "\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e" ) ;
return nil ; } ; _cbeg := _gga . getCurrentFont ( ) ; _aaab := _cbeg . BytesToCharcodes ( _ffbd ) ; _cfb , _eacf , _eab := _cbeg . CharcodesToStrings ( _aaab ) ; if _eab > 0 { _fdd . Log . Debug ( "\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064" , _eacf , _eab ) ;
} ; _gga . _bbca . _ded += _eacf ; _gga . _bbca . _fgce += _eab ; _afae := _gga . _bbca ; _acd := _afae . _gaa ; _cfcc := _afae . _cfc / 100.0 ; _bdgb := _feca ; if _cbeg . Subtype ( ) == "\u0054\u0079\u0070e\u0033" { _bdgb = 1 ; } ; _fgcg , _cecd := _cbeg . GetRuneMetrics ( ' ' ) ; if ! _cecd { _fgcg , _cecd = _cbeg . GetCharMetrics ( 32 ) ;
} ; if ! _cecd { _fgcg , _ = _ee . DefaultFont ( ) . GetRuneMetrics ( ' ' ) ; } ; _fdc := _fgcg . Wx * _bdgb ; _fdd . Log . Trace ( "\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066" , _fdc , _cfb , _cbeg , _acd ) ;
_fcfd := _ebf . NewMatrix ( _acd * _cfcc , 0 , 0 , _acd , 0 , _afae . _bacg ) ; if _ccfgb { _fdd . Log . Info ( "\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071" , len ( _aaab ) , _aaab , _cfb ) ;
} ; _fdd . Log . Trace ( "\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071" , len ( _aaab ) , _aaab , len ( _cfb ) ) ; _cddcb := _gga . getFillColor ( ) ;
_ggab := _gga . getStrokeColor ( ) ; for _dfea , _gddb := range _cfb { _gdc := [ ] rune ( _gddb ) ; if len ( _gdc ) == 1 && _gdc [ 0 ] == '\x00' { continue ; } ; _eeg := _aaab [ _dfea ] ; _cac := _gga . _cddc . CTM . Mult ( _gga . _ccgf ) . Mult ( _fcfd ) ; _fda := 0.0 ; if len ( _gdc ) == 1 && _gdc [ 0 ] == 32 { _fda = _afae . _eff ;
} ; _ebg , _agff := _cbeg . GetCharMetrics ( _eeg ) ; if ! _agff { _fdd . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073" , _eeg , _gdc , _gdc , _cbeg ) ;
return _fc . Errorf ( "\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064" , _cbeg . String ( ) , _eeg ) ; } ; _gfge := _ebf . Point { X : _ebg . Wx * _bdgb , Y : _ebg . Wy * _bdgb } ;
_ccfad := _ebf . Point { X : ( _gfge . X * _acd + _fda ) * _cfcc } ; _geg := _ebf . Point { X : ( _gfge . X * _acd + _afae . _dcfg + _fda ) * _cfcc } ; if _ccfgb { _fdd . Log . Info ( "\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066" , _acd , _afae . _dcfg , _afae . _eff , _cfcc ) ;
_fdd . Log . Info ( "\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f" , _gfge , _ccfad , _geg ) ; } ; _bgdc := _fdgd ( _ccfad ) ; _beeg := _fdgd ( _geg ) ; _ggfd := _gga . _cddc . CTM . Mult ( _gga . _ccgf ) . Mult ( _bgdc ) ;
if _gebg { _fdd . Log . Info ( "e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a" + "\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a" + "\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073" , _gga . _cddc . CTM , _gga . _ccgf , _beeg , _ebfe ( _gga . _cddc . CTM . Mult ( _gga . _ccgf ) . Mult ( _beeg ) ) , _bgdc , _ggfd , _ebfe ( _ggfd ) ) ;
} ; _ebcc , _eadg := _gga . newTextMark ( _gf . ExpandLigatures ( _gdc ) , _cac , _ebfe ( _ggfd ) , _fd . Abs ( _fdc * _cac . ScalingFactorX ( ) ) , _cbeg , _gga . _bbca . _dcfg , _cddcb , _ggab , _cea , _cfb , _dfea ) ; if ! _eadg { _fdd . Log . Debug ( "\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067" ) ;
continue ; } ; if _cbeg == nil { _fdd . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e" ) ; } else if _cbeg . Encoder ( ) == nil { _fdd . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073" , _cbeg ) ;
} else { if _acff , _egac := _cbeg . Encoder ( ) . CharcodeToRune ( _eeg ) ; _egac { _ebcc . _ccgfc = string ( _acff ) ; } ; } ; _fdd . Log . Trace ( "i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073" , _dfea , _eeg , _ebcc , _cac ) ;
_gga . _abbb = append ( _gga . _abbb , & _ebcc ) ; _gga . _ccgf . Concat ( _beeg ) ; } ; return nil ; } ;
// Text returns the extracted page text.
func ( _baff PageText ) Text ( ) string { return _baff . _aec } ; func _dfde ( _fbce _ee . PdfRectangle , _cbba [ ] * textLine ) * textPara { return & textPara { PdfRectangle : _fbce , _ddgc : _cbba } ; } ; func ( _bbdcg * textPara ) taken ( ) bool { return _bbdcg == nil || _bbdcg . _bccb } ;
func _ddfa ( _aac bounded ) float64 { return - _aac . bbox ( ) . Lly } ; func ( _gbaa gridTile ) contains ( _effc _ee . PdfRectangle ) bool { if _gbaa . numBorders ( ) < 3 { return false ; } ; if _gbaa . _affea && _effc . Llx < _gbaa . Llx - _caea { return false ; } ; if _gbaa . _daca && _effc . Urx > _gbaa . Urx + _caea { return false ;
} ; if _gbaa . _ffca && _effc . Lly < _gbaa . Lly - _caea { return false ; } ; if _gbaa . _bfab && _effc . Ury > _gbaa . Ury + _caea { return false ; } ; return true ; } ; func ( _dabd rectRuling ) checkWidth ( _bbade , _fddg float64 ) ( float64 , bool ) { _eaca := _fddg - _bbade ; _gegae := _eaca <= _gade ;
return _eaca , _gegae ; } ; func ( _ag * imageExtractContext ) extractInlineImage ( _aaa * _aeg . ContentStreamInlineImage , _ega _aeg . GraphicsState , _bca * _ee . PdfPageResources ) error { _fdgc , _adf := _aaa . ToImage ( _bca ) ; if _adf != nil { return _adf ; } ; _efd , _adf := _aaa . GetColorSpace ( _bca ) ;
if _adf != nil { return _adf ; } ; if _efd == nil { _efd = _ee . NewPdfColorspaceDeviceGray ( ) ; } ; _abe , _adf := _efd . ImageToRGB ( * _fdgc ) ; if _adf != nil { return _adf ; } ; _dbg := ImageMark { Image : & _abe , Width : _ega . CTM . ScalingFactorX ( ) , Height : _ega . CTM . ScalingFactorY ( ) , Angle : _ega . CTM . Angle ( ) } ;
_dbg . X , _dbg . Y = _ega . CTM . Translation ( ) ; _ag . _ccg = append ( _ag . _ccg , _dbg ) ; _ag . _fag ++ ; return nil ; } ; func ( _fbf * textObject ) setTextRise ( _ggeg float64 ) { if _fbf == nil { return ; } ; _fbf . _bbca . _bacg = _ggeg ; } ; func ( _fabg paraList ) sortReadingOrder ( ) { _fdd . Log . Trace ( "\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _fabg ) ) ;
if len ( _fabg ) <= 1 { return ; } ; _fabg . computeEBBoxes ( ) ; _e . Slice ( _fabg , func ( _fgcga , _gba int ) bool { return _cbcdb ( _fabg [ _fgcga ] , _fabg [ _gba ] ) <= 0 } ) ; _bcda := _fabg . topoOrder ( ) ; _fabg . reorder ( _bcda ) ; } ; func ( _dcef * textLine ) toTextMarks ( _egbc * int ) [ ] TextMark { var _dgac [ ] TextMark ;
for _ , _dcfb := range _dcef . _bbfe { if _dcfb . _faaf { _dgac = _eaea ( _dgac , _egbc , "\u0020" ) ; } ; _affb := _dcfb . toTextMarks ( _egbc ) ; _dgac = append ( _dgac , _affb ... ) ; } ; return _dgac ; } ; func ( _egd * shapesState ) lineTo ( _aba , _agbf float64 ) { if _agcb { _fdd . Log . Info ( "\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066" , _aba , _agbf , _egd . devicePoint ( _aba , _agbf ) ) ;
} ; _egd . addPoint ( _aba , _agbf ) ; } ; type rectRuling struct { _ffabf rulingKind ; _fgae markKind ; _c . Color ; _ee . PdfRectangle ; } ;
// String returns a string describing `tm`.
func ( _bba TextMark ) String ( ) string { _cbfc := _bba . BBox ; var _fbdb string ; if _bba . Font != nil { _fbdb = _bba . Font . String ( ) ; if len ( _fbdb ) > 50 { _fbdb = _fbdb [ : 50 ] + "\u002e\u002e\u002e" ; } ; } ; var _gea string ; if _bba . Meta { _gea = "\u0020\u002a\u004d\u002a" ;
} ; return _fc . Sprintf ( "\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d" , _bba . Offset , _bba . Text , [ ] rune ( _bba . Text ) , _cbfc . Llx , _cbfc . Lly , _cbfc . Urx , _cbfc . Ury , _fbdb , _gea ) ;
} ; type ruling struct { _deeb rulingKind ; _fbgc markKind ; _c . Color ; _dadb float64 ; _gcfe float64 ; _fbdf float64 ; _cecfb float64 ; } ;
// Extractor stores and offers functionality for extracting content from PDF pages.
type Extractor struct { _bc string ; _abc * _ee . PdfPageResources ; _da _ee . PdfRectangle ; _ac map [ string ] fontEntry ; _ef map [ string ] textResult ; _cb int64 ; _ce int ; } ; func ( _cade * textObject ) moveLP ( _bag , _fege float64 ) { _cade . _afbg . Concat ( _ebf . NewMatrix ( 1 , 0 , 0 , 1 , _bag , _fege ) ) ;
_cade . _ccgf = _cade . _afbg ; } ; var _aefcd = map [ rulingKind ] string { _gcbc : "\u006e\u006f\u006e\u0065" , _dcce : "\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c" , _fbeg : "\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c" } ; func ( _fgddd rulingList ) removeDuplicates ( ) rulingList { if len ( _fgddd ) == 0 { return nil ;
} ; _fgddd . sort ( ) ; _bcgf := rulingList { _fgddd [ 0 ] } ; for _ , _ebac := range _fgddd [ 1 : ] { if _ebac . equals ( _bcgf [ len ( _bcgf ) - 1 ] ) { continue ; } ; _bcgf = append ( _bcgf , _ebac ) ; } ; return _bcgf ; } ; func _fdafg ( _eged string ) bool { if _df . RuneCountInString ( _eged ) < _eddg { return false ;
} ; _ggbb , _fbfd := _df . DecodeLastRuneInString ( _eged ) ; if _fbfd <= 0 || ! _b . Is ( _b . Hyphen , _ggbb ) { return false ; } ; _ggbb , _fbfd = _df . DecodeLastRuneInString ( _eged [ : len ( _eged ) - _fbfd ] ) ; return _fbfd > 0 && ! _b . IsSpace ( _ggbb ) ; } ; func _bcag ( _aafga [ ] * textWord , _fcbea * textWord ) [ ] * textWord { for _ggedc , _bcbb := range _aafga { if _bcbb == _fcbea { return _bbae ( _aafga , _ggedc ) ;
} ; } ; _fdd . Log . Error ( "\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073" , _fcbea ) ;
return nil ; } ;
2022-06-27 19:58:38 +00:00
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
2022-07-13 21:28:43 +00:00
func ( _faae PageText ) ToText ( ) string { return _faae . Text ( ) } ; func ( _fbca * subpath ) removeDuplicates ( ) { if len ( _fbca . _bdb ) == 0 { return ; } ; _fgf := [ ] _ebf . Point { _fbca . _bdb [ 0 ] } ; for _ , _cdda := range _fbca . _bdb [ 1 : ] { if ! _fabdg ( _cdda , _fgf [ len ( _fgf ) - 1 ] ) { _fgf = append ( _fgf , _cdda ) ;
} ; } ; _fbca . _bdb = _fgf ; } ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// String returns a description of `b`.
func ( _cbfca * wordBag ) String ( ) string { var _gbed [ ] string ; for _ , _fgfd := range _cbfca . depthIndexes ( ) { _aggg := _cbfca . _eed [ _fgfd ] ; for _ , _dafc := range _aggg { _gbed = append ( _gbed , _dafc . _fbgbed ) ; } ; } ; return _fc . Sprintf ( "\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071" , _cbfca . PdfRectangle , _cbfca . _bbgb , len ( _gbed ) , _gbed ) ;
} ; func ( _gbfea paraList ) xNeighbours ( _ffgbf float64 ) map [ * textPara ] [ ] int { _ceea := make ( [ ] event , 2 * len ( _gbfea ) ) ; if _ffgbf == 0 { for _abgf , _cfeec := range _gbfea { _ceea [ 2 * _abgf ] = event { _cfeec . Llx , true , _abgf } ; _ceea [ 2 * _abgf + 1 ] = event { _cfeec . Urx , false , _abgf } ;
} ; } else { for _ccbcb , _efab := range _gbfea { _ceea [ 2 * _ccbcb ] = event { _efab . Llx - _ffgbf * _efab . fontsize ( ) , true , _ccbcb } ; _ceea [ 2 * _ccbcb + 1 ] = event { _efab . Urx + _ffgbf * _efab . fontsize ( ) , false , _ccbcb } ; } ; } ; return _gbfea . eventNeighbours ( _ceea ) ; } ; func ( _ddd * textObject ) showText ( _dfg _dd . PdfObject , _agf [ ] byte ) error { return _ddd . renderText ( _dfg , _agf ) ;
} ; func _bdegc ( _dbb int , _bffe func ( int , int ) bool ) [ ] int { _gfbef := make ( [ ] int , _dbb ) ; for _gfagf := range _gfbef { _gfbef [ _gfagf ] = _gfagf ; } ; _e . Slice ( _gfbef , func ( _baebg , _cabc int ) bool { return _bffe ( _gfbef [ _baebg ] , _gfbef [ _cabc ] ) } ) ; return _gfbef ;
} ; func _gbef ( _efg _ebf . Point ) * subpath { return & subpath { _bdb : [ ] _ebf . Point { _efg } } } ; func ( _gda * imageExtractContext ) extractContentStreamImages ( _cd string , _dg * _ee . PdfPageResources ) error { _dbe := _aeg . NewContentStreamParser ( _cd ) ; _caf , _ga := _dbe . Parse ( ) ;
if _ga != nil { return _ga ; } ; if _gda . _be == nil { _gda . _be = map [ * _dd . PdfObjectStream ] * cachedImage { } ; } ; if _gda . _ec == nil { _gda . _ec = & ImageExtractOptions { } ; } ; _dae := _aeg . NewContentStreamProcessor ( * _caf ) ; _dae . AddHandler ( _aeg . HandlerConditionEnumAllOperands , "" , _gda . processOperand ) ;
return _dae . Process ( _dg ) ; } ; func ( _bagb rulingList ) augmentGrid ( ) ( rulingList , rulingList ) { _acbd , _gadge := _bagb . vertsHorzs ( ) ; if len ( _acbd ) == 0 || len ( _gadge ) == 0 { return _acbd , _gadge ; } ; _dffa , _ebggf := _acbd , _gadge ; _gbfef := _acbd . bbox ( ) ; _acfda := _gadge . bbox ( ) ;
if _cfcd { _fdd . Log . Info ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066" , _gbfef ) ; _fdd . Log . Info ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066" , _acfda ) ;
} ; var _ecgf , _fgbeb , _eccd , _cdfc * ruling ; if _acfda . Llx < _gbfef . Llx - _ebcb { _ecgf = & ruling { _fbgc : _eggf , _deeb : _fbeg , _dadb : _acfda . Llx , _gcfe : _gbfef . Lly , _fbdf : _gbfef . Ury } ; _acbd = append ( rulingList { _ecgf } , _acbd ... ) ; } ; if _acfda . Urx > _gbfef . Urx + _ebcb { _fgbeb = & ruling { _fbgc : _eggf , _deeb : _fbeg , _dadb : _acfda . Urx , _gcfe : _gbfef . Lly , _fbdf : _gbfef . Ury } ;
_acbd = append ( _acbd , _fgbeb ) ; } ; if _gbfef . Lly < _acfda . Lly - _ebcb { _eccd = & ruling { _fbgc : _eggf , _deeb : _dcce , _dadb : _gbfef . Lly , _gcfe : _acfda . Llx , _fbdf : _acfda . Urx } ; _gadge = append ( rulingList { _eccd } , _gadge ... ) ; } ; if _gbfef . Ury > _acfda . Ury + _ebcb { _cdfc = & ruling { _fbgc : _eggf , _deeb : _dcce , _dadb : _gbfef . Ury , _gcfe : _acfda . Llx , _fbdf : _acfda . Urx } ;
_gadge = append ( _gadge , _cdfc ) ; } ; if len ( _acbd ) + len ( _gadge ) == len ( _bagb ) { return _dffa , _ebggf ; } ; _cfcef := append ( _acbd , _gadge ... ) ; _bagb . log ( "u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064" ) ; _cfcef . log ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d" ) ;
return _acbd , _gadge ; } ; func _eefd ( _cfbb _ee . PdfRectangle ) rulingKind { _bfbff := _cfbb . Width ( ) ; _edec := _cfbb . Height ( ) ; if _bfbff > _edec { if _bfbff >= _ecefb { return _dcce ; } ; } else { if _edec >= _ecefb { return _fbeg ; } ; } ; return _gcbc ; } ; func ( _gfcb rulingList ) aligned ( ) bool { if len ( _gfcb ) < 2 { return false ;
} ; _efbd := make ( map [ * ruling ] int ) ; _efbd [ _gfcb [ 0 ] ] = 0 ; for _ , _dagg := range _gfcb [ 1 : ] { _edaab := false ; for _debe := range _efbd { if _dagg . gridIntersecting ( _debe ) { _efbd [ _debe ] ++ ; _edaab = true ; break ; } ; } ; if ! _edaab { _efbd [ _dagg ] = 0 ; } ; } ; _dgad := 0 ; for _ , _bfgc := range _efbd { if _bfgc == 0 { _dgad ++ ;
} ; } ; _fbfb := float64 ( _dgad ) / float64 ( len ( _gfcb ) ) ; _dafag := _fbfb <= 1.0 - _eeef ; if _cfcd { _fdd . Log . Info ( "\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073" , _dafag , _fbfb , _dgad , len ( _gfcb ) , _gfcb . String ( ) ) ;
} ; return _dafag ; } ; func ( _cbfg * wordBag ) depthIndexes ( ) [ ] int { if len ( _cbfg . _eed ) == 0 { return nil ; } ; _gbg := make ( [ ] int , len ( _cbfg . _eed ) ) ; _feaf := 0 ; for _cccbf := range _cbfg . _eed { _gbg [ _feaf ] = _cccbf ; _feaf ++ ; } ; _e . Ints ( _gbg ) ; return _gbg ; } ; func ( _ccfd * imageExtractContext ) extractXObjectImage ( _ceb * _dd . PdfObjectName , _edf _aeg . GraphicsState , _daf * _ee . PdfPageResources ) error { _edgg , _ := _daf . GetXObjectByName ( * _ceb ) ;
if _edgg == nil { return nil ; } ; _fgd , _ddb := _ccfd . _be [ _edgg ] ; if ! _ddb { _bfeb , _afcf := _daf . GetXObjectImageByName ( * _ceb ) ; if _afcf != nil { return _afcf ; } ; if _bfeb == nil { return nil ; } ; _bbb , _afcf := _bfeb . ToImage ( ) ; if _afcf != nil { return _afcf ; } ; _fgd = & cachedImage { _db : _bbb , _eebb : _bfeb . ColorSpace } ;
_ccfd . _be [ _edgg ] = _fgd ; } ; _dfd := _fgd . _db ; _cab := _fgd . _eebb ; _gdag , _cf := _cab . ImageToRGB ( * _dfd ) ; if _cf != nil { return _cf ; } ; _fdd . Log . Debug ( "@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073" , _edf . CTM . String ( ) ) ; _efa := ImageMark { Image : & _gdag , Width : _edf . CTM . ScalingFactorX ( ) , Height : _edf . CTM . ScalingFactorY ( ) , Angle : _edf . CTM . Angle ( ) } ;
_efa . X , _efa . Y = _edf . CTM . Translation ( ) ; _ccfd . _ccg = append ( _ccfd . _ccg , _efa ) ; _ccfd . _bg ++ ; return nil ; } ; type textMark struct { _ee . PdfRectangle ; _gdba int ; _cdg string ; _ccgfc string ; _dbdg * _ee . PdfFont ; _cfccd float64 ; _fgcc float64 ; _cadda _ebf . Matrix ;
_dfgd _ebf . Point ; _dgde _ee . PdfRectangle ; _afbgf _c . Color ; _bbacf _c . Color ; _aafgb _dd . PdfObject ; _bfgda [ ] string ; Tw float64 ; Th float64 ; _efb int ; } ;
2022-06-06 22:48:24 +00:00
2022-07-13 21:28:43 +00:00
// String returns a description of `p`.
func ( _eegae * textPara ) String ( ) string { if _eegae . _faaed { return _fc . Sprintf ( "\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d" , _eegae . PdfRectangle ) ; } ; _aeec := "" ; if _eegae . _cbfe != nil { _aeec = _fc . Sprintf ( "\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020" , _eegae . _cbfe . _bfgf , _eegae . _cbfe . _dcbg ) ;
} ; return _fc . Sprintf ( "\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071" , _eegae . PdfRectangle , _aeec , len ( _eegae . _ddgc ) , _fgaa ( _eegae . text ( ) , 50 ) ) ; } ; func _ffcag ( _dggc map [ float64 ] map [ float64 ] gridTile ) [ ] float64 { _gdfag := make ( [ ] float64 , 0 , len ( _dggc ) ) ;
_dcfbe := make ( map [ float64 ] struct { } , len ( _dggc ) ) ; for _ , _fegga := range _dggc { for _fgfe := range _fegga { if _ , _ebegg := _dcfbe [ _fgfe ] ; _ebegg { continue ; } ; _gdfag = append ( _gdfag , _fgfe ) ; _dcfbe [ _fgfe ] = struct { } { } ; } ; } ; _e . Float64s ( _gdfag ) ; return _gdfag ;
} ; func ( _dee * wordBag ) getDepthIdx ( _ggcc float64 ) int { _bdbg := _dee . depthIndexes ( ) ; _efgd := _cfe ( _ggcc ) ; if _efgd < _bdbg [ 0 ] { return _bdbg [ 0 ] ; } ; if _efgd > _bdbg [ len ( _bdbg ) - 1 ] { return _bdbg [ len ( _bdbg ) - 1 ] ; } ; return _efgd ; } ; func ( _gfaf * textObject ) moveText ( _ffe , _gdbf float64 ) { _gfaf . moveLP ( _ffe , _gdbf ) } ;
func _bgfd ( _gceg [ ] pathSection ) rulingList { _dfdf ( _gceg ) ; if _cfcd { _fdd . Log . Info ( "\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073" , len ( _gceg ) ) ;
} ; var _agfd rulingList ; for _ , _aaga := range _gceg { for _ , _dfcg := range _aaga . _fdee { if len ( _dfcg . _bdb ) < 2 { continue ; } ; _gadf := _dfcg . _bdb [ 0 ] ; for _ , _gaefe := range _dfcg . _bdb [ 1 : ] { if _aaed , _edfc := _cgad ( _gadf , _gaefe , _aaga . Color ) ; _edfc { _agfd = append ( _agfd , _aaed ) ;
} ; _gadf = _gaefe ; } ; } ; } ; if _cfcd { _fdd . Log . Info ( "m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073" , _agfd ) ; } ; return _agfd ; } ; func ( _ffc * imageExtractContext ) processOperand ( _ggf * _aeg . ContentStreamOperation , _dca _aeg . GraphicsState , _cee * _ee . PdfPageResources ) error { if _ggf . Operand == "\u0042\u0049" && len ( _ggf . Params ) == 1 { _ffd , _bfd := _ggf . Params [ 0 ] . ( * _aeg . ContentStreamInlineImage ) ;
if ! _bfd { return nil ; } ; if _fea , _aff := _dd . GetBoolVal ( _ffd . ImageMask ) ; _aff { if _fea && ! _ffc . _ec . IncludeInlineStencilMasks { return nil ; } ; } ; return _ffc . extractInlineImage ( _ffd , _dca , _cee ) ; } else if _ggf . Operand == "\u0044\u006f" && len ( _ggf . Params ) == 1 { _ccf , _geb := _dd . GetName ( _ggf . Params [ 0 ] ) ;
if ! _geb { _fdd . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065" ) ; return _gdd ; } ; _ , _ffde := _cee . GetXObjectByName ( * _ccf ) ; switch _ffde { case _ee . XObjectTypeImage : return _ffc . extractXObjectImage ( _ccf , _dca , _cee ) ; case _ee . XObjectTypeForm : return _ffc . extractFormImages ( _ccf , _dca , _cee ) ;
} ; } ; return nil ; } ; func ( _egec gridTile ) numBorders ( ) int { _cdec := 0 ; if _egec . _affea { _cdec ++ ; } ; if _egec . _daca { _cdec ++ ; } ; if _egec . _ffca { _cdec ++ ; } ; if _egec . _bfab { _cdec ++ ; } ; return _cdec ; } ; func _gdabd ( _dgcf , _eafaf _ebf . Point ) rulingKind { _eagd := _fd . Abs ( _dgcf . X - _eafaf . X ) ;
_gadg := _fd . Abs ( _dgcf . Y - _eafaf . Y ) ; return _fbbc ( _eagd , _gadg , _ecefb ) ; } ; func ( _gaef * textPara ) toTextMarks ( _accad * int ) [ ] TextMark { if _gaef . _cbfe == nil { return _gaef . toCellTextMarks ( _accad ) ; } ; var _gfab [ ] TextMark ; for _daeca := 0 ; _daeca < _gaef . _cbfe . _dcbg ;
_daeca ++ { for _ebcgg := 0 ; _ebcgg < _gaef . _cbfe . _bfgf ; _ebcgg ++ { _abdb := _gaef . _cbfe . get ( _ebcgg , _daeca ) ; if _abdb == nil { _gfab = _eaea ( _gfab , _accad , "\u0009" ) ; } else { _fadf := _abdb . toCellTextMarks ( _accad ) ; _gfab = append ( _gfab , _fadf ... ) ; } ; _gfab = _eaea ( _gfab , _accad , "\u0020" ) ;
} ; if _daeca < _gaef . _cbfe . _dcbg - 1 { _gfab = _eaea ( _gfab , _accad , "\u000a" ) ; } ; } ; return _gfab ; } ; func _bfed ( _cdgg [ ] pathSection ) rulingList { _dfdf ( _cdgg ) ; if _cfcd { _fdd . Log . Info ( "\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs" , len ( _cdgg ) ) ;
} ; var _abdbe rulingList ; for _ , _babd := range _cdgg { for _ , _aadfg := range _babd . _fdee { if ! _aadfg . isQuadrilateral ( ) { if _cfcd { _fdd . Log . Error ( "!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073" , _aadfg ) ;
} ; continue ; } ; if _eddd , _fabb := _aadfg . makeRectRuling ( _babd . Color ) ; _fabb { _abdbe = append ( _abdbe , _eddd ) ; } else { if _cbfcg { _fdd . Log . Error ( "\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073" , _aadfg ) ;
} ; } ; } ; } ; if _cfcd { _fdd . Log . Info ( "\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073" , _abdbe . String ( ) ) ; } ; return _abdbe ; } ; func ( _face * textPara ) bbox ( ) _ee . PdfRectangle { return _face . PdfRectangle } ;
// Tables returns the tables extracted from the page.
func ( _fgda PageText ) Tables ( ) [ ] TextTable { if _fabc { _fdd . Log . Info ( "\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064" , len ( _fgda . _ccbc ) ) ; } ; return _fgda . _ccbc ; } ; func ( _ggbc * subpath ) makeRectRuling ( _baffd _c . Color ) ( * ruling , bool ) { if _cbfcg { _fdd . Log . Info ( "\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076" , _ggbc ) ;
} ; _babfd := _ggbc . _bdb [ : 4 ] ; _feeec := make ( map [ int ] rulingKind , len ( _babfd ) ) ; for _fgdf , _bdgc := range _babfd { _gbae := _ggbc . _bdb [ ( _fgdf + 1 ) % 4 ] ; _feeec [ _fgdf ] = _adeaf ( _bdgc , _gbae ) ; if _cbfcg { _fc . Printf ( "\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066" , _fgdf , _feeec [ _fgdf ] , _bdgc , _gbae ) ;
} ; } ; if _cbfcg { _fc . Printf ( "\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a" , _feeec ) ; } ; var _febge , _dged [ ] int ; for _cfae , _ebag := range _feeec { switch _ebag { case _dcce : _dged = append ( _dged , _cfae ) ; case _fbeg : _febge = append ( _febge , _cfae ) ;
} ; } ; if _cbfcg { _fc . Printf ( "\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a" , len ( _dged ) , _dged ) ; _fc . Printf ( "\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a" , len ( _febge ) , _febge ) ;
} ; _gadfb := ( len ( _dged ) == 2 && len ( _febge ) == 2 ) || ( len ( _dged ) == 2 && len ( _febge ) == 0 && _bgdca ( _babfd [ _dged [ 0 ] ] , _babfd [ _dged [ 1 ] ] ) ) || ( len ( _febge ) == 2 && len ( _dged ) == 0 && _abbgg ( _babfd [ _febge [ 0 ] ] , _babfd [ _febge [ 1 ] ] ) ) ; if _cbfcg { _fc . Printf ( " \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a" , len ( _dged ) , len ( _febge ) , _gadfb ) ;
} ; if ! _gadfb { if _cbfcg { _fdd . Log . Error ( "\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v" , _ggbc ) ; _fc . Printf ( " \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a" , len ( _dged ) , len ( _febge ) , _gadfb ) ;
} ; return & ruling { } , false ; } ; if len ( _febge ) == 0 { for _gdgag , _dabc := range _feeec { if _dabc != _dcce { _febge = append ( _febge , _gdgag ) ; } ; } ; } ; if len ( _dged ) == 0 { for _cfg , _gaff := range _feeec { if _gaff != _fbeg { _dged = append ( _dged , _cfg ) ; } ; } ; } ; if _cbfcg { _fdd . Log . Info ( "\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a" + "\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a" + "\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a" + "\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076" , len ( _dged ) , len ( _febge ) , len ( _babfd ) , _dged , _febge , _babfd ) ;
} ; var _dabg , _fgdg , _bebga , _dbgeg _ebf . Point ; if _babfd [ _dged [ 0 ] ] . Y > _babfd [ _dged [ 1 ] ] . Y { _bebga , _dbgeg = _babfd [ _dged [ 0 ] ] , _babfd [ _dged [ 1 ] ] ; } else { _bebga , _dbgeg = _babfd [ _dged [ 1 ] ] , _babfd [ _dged [ 0 ] ] ; } ; if _babfd [ _febge [ 0 ] ] . X > _babfd [ _febge [ 1 ] ] . X { _dabg , _fgdg = _babfd [ _febge [ 0 ] ] , _babfd [ _febge [ 1 ] ] ;
} else { _dabg , _fgdg = _babfd [ _febge [ 1 ] ] , _babfd [ _febge [ 0 ] ] ; } ; _fafef := _ee . PdfRectangle { Llx : _dabg . X , Urx : _fgdg . X , Lly : _dbgeg . Y , Ury : _bebga . Y } ; if _fafef . Llx > _fafef . Urx { _fafef . Llx , _fafef . Urx = _fafef . Urx , _fafef . Llx ; } ; if _fafef . Lly > _fafef . Ury { _fafef . Lly , _fafef . Ury = _fafef . Ury , _fafef . Lly ;
} ; _cfdd := rectRuling { PdfRectangle : _fafef , _ffabf : _eefd ( _fafef ) , Color : _baffd } ; if _cfdd . _ffabf == _gcbc { if _cbfcg { _fdd . Log . Error ( "\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c" ) ;
} ; return nil , false ; } ; _daa , _cfcdg := _cfdd . asRuling ( ) ; if ! _cfcdg { if _cbfcg { _fdd . Log . Error ( "\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg" ) ; } ; return nil , false ; } ; if _cfcd { _fc . Printf ( "\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a" , _daa . String ( ) ) ;
} ; return _daa , true ; } ; func ( _cbfda * wordBag ) depthBand ( _gaad , _begdg float64 ) [ ] int { if len ( _cbfda . _eed ) == 0 { return nil ; } ; return _cbfda . depthRange ( _cbfda . getDepthIdx ( _gaad ) , _cbfda . getDepthIdx ( _begdg ) ) ; } ;
// String returns a description of `w`.
func ( _edbbd * textWord ) String ( ) string { return _fc . Sprintf ( "\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022" , _edbbd . _baed , _edbbd . PdfRectangle , _edbbd . _aaaa , _edbbd . _fbgbed ) ;
} ; func ( _bbfa * wordBag ) removeDuplicates ( ) { if _bdge { _fdd . Log . Info ( "r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071" , _bbfa . text ( ) ) ; } ; for _ , _ffdf := range _bbfa . depthIndexes ( ) { if len ( _bbfa . _eed [ _ffdf ] ) == 0 { continue ;
} ; _cedd := _bbfa . _eed [ _ffdf ] [ 0 ] ; _defe := _acddf * _cedd . _aaaa ; _aaag := _cedd . _baed ; for _ , _dcg := range _bbfa . depthBand ( _aaag , _aaag + _defe ) { _fbga := map [ * textWord ] struct { } { } ; _gdab := _bbfa . _eed [ _dcg ] ; for _ , _cgca := range _gdab { if _ , _gbbg := _fbga [ _cgca ] ;
_gbbg { continue ; } ; for _ , _gcd := range _gdab { if _ , _eabd := _fbga [ _gcd ] ; _eabd { continue ; } ; if _gcd != _cgca && _gcd . _fbgbed == _cgca . _fbgbed && _fd . Abs ( _gcd . Llx - _cgca . Llx ) < _defe && _fd . Abs ( _gcd . Urx - _cgca . Urx ) < _defe && _fd . Abs ( _gcd . Lly - _cgca . Lly ) < _defe && _fd . Abs ( _gcd . Ury - _cgca . Ury ) < _defe { _fbga [ _gcd ] = struct { } { } ;
} ; } ; } ; if len ( _fbga ) > 0 { _ecde := 0 ; for _ , _afbec := range _gdab { if _ , _eagb := _fbga [ _afbec ] ; ! _eagb { _gdab [ _ecde ] = _afbec ; _ecde ++ ; } ; } ; _bbfa . _eed [ _dcg ] = _gdab [ : len ( _gdab ) - len ( _fbga ) ] ; if len ( _bbfa . _eed [ _dcg ] ) == 0 { delete ( _bbfa . _eed , _dcg ) ;
} ; } ; } ; } ; } ; func ( _efaee paraList ) eventNeighbours ( _baeac [ ] event ) map [ * textPara ] [ ] int { _e . Slice ( _baeac , func ( _fffe , _ceged int ) bool { _cfdbb , _egcc := _baeac [ _fffe ] , _baeac [ _ceged ] ; _dffbc , _gfcd := _cfdbb . _fdbf , _egcc . _fdbf ; if _dffbc != _gfcd { return _dffbc < _gfcd ;
} ; if _cfdbb . _eccae != _egcc . _eccae { return _cfdbb . _eccae ; } ; return _fffe < _ceged ; } ) ; _bacb := make ( map [ int ] intSet ) ; _cgfce := make ( intSet ) ; for _ , _edfab := range _baeac { if _edfab . _eccae { _bacb [ _edfab . _dedbf ] = make ( intSet ) ; for _bada := range _cgfce { if _bada != _edfab . _dedbf { _bacb [ _edfab . _dedbf ] . add ( _bada ) ;
_bacb [ _bada ] . add ( _edfab . _dedbf ) ; } ; } ; _cgfce . add ( _edfab . _dedbf ) ; } else { _cgfce . del ( _edfab . _dedbf ) ; } ; } ; _cgabb := map [ * textPara ] [ ] int { } ; for _agaa , _bede := range _bacb { _eecf := _efaee [ _agaa ] ; if len ( _bede ) == 0 { _cgabb [ _eecf ] = nil ; continue ; } ;
_bfgbc := make ( [ ] int , len ( _bede ) ) ; _ecacc := 0 ; for _bfbga := range _bede { _bfgbc [ _ecacc ] = _bfbga ; _ecacc ++ ; } ; _cgabb [ _eecf ] = _bfgbc ; } ; return _cgabb ; } ; func ( _eeac paraList ) applyTables ( _feefg [ ] * textTable ) paraList { var _ddab paraList ; for _ , _ecdb := range _feefg { _ddab = append ( _ddab , _ecdb . newTablePara ( ) ) ;
} ; for _ , _adda := range _eeac { if _adda . _bccb { continue ; } ; _ddab = append ( _ddab , _adda ) ; } ; return _ddab ; } ;
2022-06-27 19:58:38 +00:00
// String returns a string descibing `i`.
2022-07-13 21:28:43 +00:00
func ( _cggfcf gridTile ) String ( ) string { _agcbd := func ( _eddf bool , _ecad string ) string { if _eddf { return _ecad ; } ; return "\u005f" ; } ; return _fc . Sprintf ( "\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073" , _cggfcf . PdfRectangle , _agcbd ( _cggfcf . _affea , "\u004c" ) , _agcbd ( _cggfcf . _daca , "\u0052" ) , _agcbd ( _cggfcf . _ffca , "\u0042" ) , _agcbd ( _cggfcf . _bfab , "\u0054" ) ) ;
} ; func ( _geee * textObject ) setTextRenderMode ( _bcecf int ) { if _geee == nil { return ; } ; _geee . _bbca . _dfab = RenderMode ( _bcecf ) ; } ; type cachedImage struct { _db * _ee . Image ; _eebb _ee . PdfColorspace ; } ;
// String returns a human readable description of `s`.
func ( _aggfa intSet ) String ( ) string { var _cbcb [ ] int ; for _bbbfa := range _aggfa { if _aggfa . has ( _bbbfa ) { _cbcb = append ( _cbcb , _bbbfa ) ; } ; } ; _e . Ints ( _cbcb ) ; return _fc . Sprintf ( "\u0025\u002b\u0076" , _cbcb ) ; } ; func ( _eafa * textObject ) checkOp ( _ede * _aeg . ContentStreamOperation , _dff int , _feed bool ) ( _bec bool , _bgb error ) { if _eafa == nil { var _dbee [ ] _dd . PdfObject ;
if _dff > 0 { _dbee = _ede . Params ; if len ( _dbee ) > _dff { _dbee = _dbee [ : _dff ] ; } ; } ; _fdd . Log . Debug ( "\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076" , _ede . Operand , _dbee ) ;
} ; if _dff >= 0 { if len ( _ede . Params ) != _dff { if _feed { _bgb = _gd . New ( "\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et" ) ; } ; _fdd . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076" , _ede . Operand , _dff , len ( _ede . Params ) , _ede . Params ) ;
return false , _bgb ; } ; } ; return true , nil ; } ; func ( _cdf * wordBag ) firstWord ( _ggfb int ) * textWord { return _cdf . _eed [ _ggfb ] [ 0 ] } ; type intSet map [ int ] struct { } ; func ( _ddbbc * textTable ) compositeRowCorridors ( ) map [ int ] [ ] float64 { _ccca := make ( map [ int ] [ ] float64 , _ddbbc . _dcbg ) ;
if _fabc { _fdd . Log . Info ( "c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064" , _ddbbc . _dcbg ) ; } ; for _cbdgd := 1 ; _cbdgd < _ddbbc . _dcbg ; _cbdgd ++ { var _efgg [ ] compositeCell ;
for _fdbdd := 0 ; _fdbdd < _ddbbc . _bfgf ; _fdbdd ++ { if _eeeb , _eeab := _ddbbc . _facee [ _gdeed ( _fdbdd , _cbdgd ) ] ; _eeab { _efgg = append ( _efgg , _eeeb ) ; } ; } ; if len ( _efgg ) == 0 { continue ; } ; _bcedb := _ggec ( _efgg ) ; _ccca [ _cbdgd ] = _bcedb ; if _fabc { _fc . Printf ( "\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a" , _cbdgd , _bcedb ) ;
} ; } ; return _ccca ; } ; func ( _egcdf * textTable ) compositeColCorridors ( ) map [ int ] [ ] float64 { _faff := make ( map [ int ] [ ] float64 , _egcdf . _bfgf ) ; if _fabc { _fdd . Log . Info ( "\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020" , _egcdf . _bfgf ) ;
} ; for _bdfce := 0 ; _bdfce < _egcdf . _bfgf ; _bdfce ++ { _faff [ _bdfce ] = nil ; } ; return _faff ; } ; func ( _fbffg compositeCell ) hasLines ( _bbad [ ] * textLine ) bool { for _agef , _bed := range _bbad { _ffabe := _ddc ( _fbffg . PdfRectangle , _bed . PdfRectangle ) ; if _fabc { _fc . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a" , _ffabe , _agef , len ( _bbad ) ) ;
_fc . Printf ( "\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a" , _fbffg ) ; _fc . Printf ( "\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a" , _bed ) ; } ; if _ffabe { return true ;
} ; } ; return false ; } ; func _fgcd ( _fcac [ ] * textWord , _cgff float64 , _cafd , _cgggg rulingList ) * wordBag { _eef := _eaag ( _fcac [ 0 ] , _cgff , _cafd , _cgggg ) ; for _ , _bgdd := range _fcac [ 1 : ] { _cefd := _cfe ( _bgdd . _baed ) ; _eef . _eed [ _cefd ] = append ( _eef . _eed [ _cefd ] , _bgdd ) ;
_eef . PdfRectangle = _dcd ( _eef . PdfRectangle , _bgdd . PdfRectangle ) ; } ; _eef . sort ( ) ; return _eef ; } ; func _gdef ( _ccfda , _bgc _ee . PdfRectangle ) bool { return _ccfda . Lly <= _bgc . Ury && _bgc . Lly <= _ccfda . Ury ; } ; func ( _gdcbe * textTable ) subdivide ( ) * textTable { _gdcbe . logComposite ( "\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e" ) ;
_gfbe := _gdcbe . compositeRowCorridors ( ) ; _ecbc := _gdcbe . compositeColCorridors ( ) ; if _fabc { _fdd . Log . Info ( "\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073" , _cfcgf ( _gfbe ) , _cfcgf ( _ecbc ) ) ;
} ; if len ( _gfbe ) == 0 || len ( _ecbc ) == 0 { return _gdcbe ; } ; _adbb ( _gfbe ) ; _adbb ( _ecbc ) ; if _fabc { _fdd . Log . Info ( "\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073" , _cfcgf ( _gfbe ) , _cfcgf ( _ecbc ) ) ;
} ; _eggfc , _bgfca := _ebced ( _gdcbe . _dcbg , _gfbe ) ; _cgab , _ffcg := _ebced ( _gdcbe . _bfgf , _ecbc ) ; _abff := make ( map [ uint64 ] * textPara , _ffcg * _bgfca ) ; _dcfcf := & textTable { PdfRectangle : _gdcbe . PdfRectangle , _ebfb : _gdcbe . _ebfb , _dcbg : _bgfca , _bfgf : _ffcg , _dbfec : _abff } ;
if _fabc { _fdd . Log . Info ( "\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a" + "\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a" + "\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a" + "\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a" + "\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076" , _gdcbe . _bfgf , _gdcbe . _dcbg , _ffcg , _bgfca , _cfcgf ( _gfbe ) , _cfcgf ( _ecbc ) , _eggfc , _cgab ) ;
} ; for _afbgff := 0 ; _afbgff < _gdcbe . _dcbg ; _afbgff ++ { _fbae := _eggfc [ _afbgff ] ; for _bgecc := 0 ; _bgecc < _gdcbe . _bfgf ; _bgecc ++ { _ffdec := _cgab [ _bgecc ] ; if _fabc { _fc . Printf ( "\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a" , _bgecc , _afbgff , _ffdec , _fbae ) ;
} ; _bbdeg , _aggge := _gdcbe . _facee [ _gdeed ( _bgecc , _afbgff ) ] ; if ! _aggge { continue ; } ; _ccfab := _bbdeg . split ( _gfbe [ _afbgff ] , _ecbc [ _bgecc ] ) ; for _faedc := 0 ; _faedc < _ccfab . _dcbg ; _faedc ++ { for _agbe := 0 ; _agbe < _ccfab . _bfgf ; _agbe ++ { _aebf := _ccfab . get ( _agbe , _faedc ) ;
_dcfcf . put ( _ffdec + _agbe , _fbae + _faedc , _aebf ) ; if _fabc { _fc . Printf ( "\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a" , _ffdec + _agbe , _fbae + _faedc , _aebf ) ; } ; } ; } ; } ; } ; return _dcfcf ; } ; func ( _ccc * textObject ) getFillColor ( ) _c . Color { return _ffadb ( _ccc . _cddc . ColorspaceNonStroking , _ccc . _cddc . ColorNonStroking ) ;
} ; func ( _efgca * ruling ) alignsSec ( _eaead * ruling ) bool { const _dgbf = _gade + 1.0 ; return _efgca . _gcfe - _dgbf <= _eaead . _fbdf && _eaead . _gcfe - _dgbf <= _efgca . _fbdf ; } ; func _afcb ( _dcee * _aeg . ContentStreamOperation ) ( float64 , error ) { if len ( _dcee . Params ) != 1 { _bfbd := _gd . New ( "\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et" ) ;
_fdd . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076" , _dcee . Operand , 1 , len ( _dcee . Params ) , _dcee . Params ) ;
return 0.0 , _bfbd ; } ; return _dd . GetNumberAsFloat ( _dcee . Params [ 0 ] ) ; } ; func _dffd ( _fcacd [ ] rulingList ) ( rulingList , rulingList ) { var _adcf rulingList ; for _ , _egcf := range _fcacd { _adcf = append ( _adcf , _egcf ... ) ; } ; return _adcf . vertsHorzs ( ) ; } ; type stateStack [ ] * textState ;
func ( _gaae rulingList ) intersections ( ) map [ int ] intSet { var _agfc , _ccdf [ ] int ; for _gcda , _bcgfb := range _gaae { switch _bcgfb . _deeb { case _fbeg : _agfc = append ( _agfc , _gcda ) ; case _dcce : _ccdf = append ( _ccdf , _gcda ) ; } ; } ; if len ( _agfc ) < _bdacg + 1 || len ( _ccdf ) < _bfc + 1 { return nil ;
} ; if len ( _agfc ) + len ( _ccdf ) > _cbce { _fdd . Log . Debug ( "\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064" , len ( _gaae ) , len ( _agfc ) , len ( _ccdf ) ) ;
return nil ; } ; _dafab := make ( map [ int ] intSet , len ( _agfc ) + len ( _ccdf ) ) ; for _ , _cdabb := range _agfc { for _ , _gaadf := range _ccdf { if _gaae [ _cdabb ] . intersects ( _gaae [ _gaadf ] ) { if _ , _aeecc := _dafab [ _cdabb ] ; ! _aeecc { _dafab [ _cdabb ] = make ( intSet ) ;
} ; if _ , _cagg := _dafab [ _gaadf ] ; ! _cagg { _dafab [ _gaadf ] = make ( intSet ) ; } ; _dafab [ _cdabb ] . add ( _gaadf ) ; _dafab [ _gaadf ] . add ( _cdabb ) ; } ; } ; } ; return _dafab ; } ; func ( _gdfad * wordBag ) arrangeText ( ) * textPara { _gdfad . sort ( ) ; if _gdff { _gdfad . removeDuplicates ( ) ;
} ; var _gfaa [ ] * textLine ; for _ , _feea := range _gdfad . depthIndexes ( ) { for ! _gdfad . empty ( _feea ) { _fabf := _gdfad . firstReadingIndex ( _feea ) ; _ebeg := _gdfad . firstWord ( _fabf ) ; _gccd := _agd ( _gdfad , _fabf ) ; _abbba := _ebeg . _aaaa ; _ecgd := _ebeg . _baed - _fdfd * _abbba ;
_gdffg := _ebeg . _baed + _fdfd * _abbba ; _ecgdd := _deef * _abbba ; _dafe := _fgcgf * _abbba ; _dfgfca : for { var _aeecb * textWord ; _cadee := 0 ; for _ , _cggfc := range _gdfad . depthBand ( _ecgd , _gdffg ) { _ddgad := _gdfad . highestWord ( _cggfc , _ecgd , _gdffg ) ; if _ddgad == nil { continue ;
} ; _ceca := _efgdf ( _ddgad , _gccd . _bbfe [ len ( _gccd . _bbfe ) - 1 ] ) ; if _ceca < - _dafe { break _dfgfca ; } ; if _ceca > _ecgdd { continue ; } ; if _aeecb != nil && _dcfaf ( _ddgad , _aeecb ) >= 0 { continue ; } ; _aeecb = _ddgad ; _cadee = _cggfc ; } ; if _aeecb == nil { break ; } ; _gccd . pullWord ( _gdfad , _aeecb , _cadee ) ;
} ; _gccd . markWordBoundaries ( ) ; _gfaa = append ( _gfaa , _gccd ) ; } ; } ; if len ( _gfaa ) == 0 { return nil ; } ; _e . Slice ( _gfaa , func ( _acde , _cgef int ) bool { return _cbcdb ( _gfaa [ _acde ] , _gfaa [ _cgef ] ) < 0 } ) ; _dcefc := _dfde ( _gdfad . PdfRectangle , _gfaa ) ; if _ccfcg { _fdd . Log . Info ( "\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073" , _dcefc . String ( ) ) ;
if _efgc { for _bebf , _dcde := range _dcefc . _ddgc { _fc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bebf , _dcde . String ( ) ) ; if _fdaf { for _cegf , _bdbc := range _dcde . _bbfe { _fc . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _cegf , _bdbc . String ( ) ) ;
for _eafe , _aefb := range _bdbc . _eedb { _fc . Printf ( "\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n" , _eafe , _aefb . String ( ) ) ; } ; } ; } ; } ; } ; } ; return _dcefc ; } ; func _afab ( _bcdf , _acdd bounded ) float64 { _gead := _dcfaf ( _bcdf , _acdd ) ; if ! _dafec ( _gead ) { return _gead ;
} ; return _aecb ( _bcdf , _acdd ) ; } ; func ( _dddbf rulingList ) snapToGroups ( ) rulingList { _bgcf , _egdbe := _dddbf . vertsHorzs ( ) ; if len ( _bgcf ) > 0 { _bgcf = _bgcf . snapToGroupsDirection ( ) ; } ; if len ( _egdbe ) > 0 { _egdbe = _egdbe . snapToGroupsDirection ( ) ; } ; _fgaed := append ( _bgcf , _egdbe ... ) ;
_fgaed . log ( "\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073" ) ; return _fgaed ; } ;
// Len returns the number of TextMarks in `ma`.
func ( _cged * TextMarkArray ) Len ( ) int { if _cged == nil { return 0 ; } ; return len ( _cged . _fef ) ; } ; const ( _effg = true ; _gdff = true ; _cgd = true ; _ggdf = false ; _ggef = false ; _dafbd = 6 ; _ffad = 3.0 ; _fbgg = 200 ; _gdgbe = true ; _abf = true ; _dcag = true ; _fddd = true ; _gbb = false ;
) ; func ( _ffeb * textTable ) markCells ( ) { for _gffe := 0 ; _gffe < _ffeb . _dcbg ; _gffe ++ { for _fbab := 0 ; _fbab < _ffeb . _bfgf ; _fbab ++ { _bdgag := _ffeb . get ( _fbab , _gffe ) ; if _bdgag != nil { _bdgag . _bccb = true ; } ; } ; } ; } ;
// ToTextMark returns the public view of `tm`.
func ( _gcff * textMark ) ToTextMark ( ) TextMark { return TextMark { Text : _gcff . _cdg , Original : _gcff . _ccgfc , BBox : _gcff . _dgde , Font : _gcff . _dbdg , FontSize : _gcff . _cfccd , FillColor : _gcff . _afbgf , StrokeColor : _gcff . _bbacf , Orientation : _gcff . _gdba , DirectObject : _gcff . _aafgb , ObjString : _gcff . _bfgda , Tw : _gcff . Tw , Th : _gcff . Th , Tc : _gcff . _fgcc , Index : _gcff . _efb } ;
} ; func ( _acga rulingList ) blocks ( _bgbb , _afeb * ruling ) bool { if _bgbb . _gcfe > _afeb . _fbdf || _afeb . _gcfe > _bgbb . _fbdf { return false ; } ; _bdaf := _fd . Max ( _bgbb . _gcfe , _afeb . _gcfe ) ; _faacg := _fd . Min ( _bgbb . _fbdf , _afeb . _fbdf ) ; if _bgbb . _dadb > _afeb . _dadb { _bgbb , _afeb = _afeb , _bgbb ;
} ; for _ , _cefa := range _acga { if _bgbb . _dadb <= _cefa . _dadb + _gade && _cefa . _dadb <= _afeb . _dadb + _gade && _cefa . _gcfe <= _faacg && _bdaf <= _cefa . _fbdf { return true ; } ; } ; return false ; } ; type pathSection struct { _fdee [ ] * subpath ; _c . Color ; } ; func ( _dfdb * ruling ) encloses ( _ebda , _afggb float64 ) bool { return _dfdb . _gcfe - _ebcb <= _ebda && _afggb <= _dfdb . _fbdf + _ebcb ;
} ; func ( _fgdcf * textTable ) getRight ( ) paraList { _bgbba := make ( paraList , _fgdcf . _dcbg ) ; for _ccefc := 0 ; _ccefc < _fgdcf . _dcbg ; _ccefc ++ { _efge := _fgdcf . get ( _fgdcf . _bfgf - 1 , _ccefc ) . _ffbf ; if _efge . taken ( ) { return nil ; } ; _bgbba [ _ccefc ] = _efge ; } ; for _acbg := 0 ;
_acbg < _fgdcf . _dcbg - 1 ; _acbg ++ { if _bgbba [ _acbg ] . _feb != _bgbba [ _acbg + 1 ] { return nil ; } ; } ; return _bgbba ; } ; func ( _debd * subpath ) clear ( ) { * _debd = subpath { } } ; func ( _bdde * wordBag ) pullWord ( _eaeb * textWord , _gfdb int , _bef map [ int ] map [ * textWord ] struct { } ) { _bdde . PdfRectangle = _dcd ( _bdde . PdfRectangle , _eaeb . PdfRectangle ) ;
if _eaeb . _aaaa > _bdde . _bbgb { _bdde . _bbgb = _eaeb . _aaaa ; } ; _bdde . _eed [ _gfdb ] = append ( _bdde . _eed [ _gfdb ] , _eaeb ) ; _bef [ _gfdb ] [ _eaeb ] = struct { } { } ; } ; func _fcgea ( _gabac , _gegd int ) int { if _gabac > _gegd { return _gabac ; } ; return _gegd ; } ; type gridTile struct { _ee . PdfRectangle ;
_bfab , _affea , _ffca , _daca bool ; } ; func ( _bbace * textWord ) appendMark ( _cfbc * textMark , _gfbab _ee . PdfRectangle ) { _bbace . _eedb = append ( _bbace . _eedb , _cfbc ) ; _bbace . PdfRectangle = _dcd ( _bbace . PdfRectangle , _cfbc . PdfRectangle ) ; if _cfbc . _cfccd > _bbace . _aaaa { _bbace . _aaaa = _cfbc . _cfccd ;
} ; _bbace . _baed = _gfbab . Ury - _bbace . PdfRectangle . Lly ; } ; const ( _ed = "\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065" ;
_egf = "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064" ;
_fe = "\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065" ; ) ;
func ( _fcea rulingList ) mergePrimary ( ) float64 { _gfcc := _fcea [ 0 ] . _dadb ; for _ , _fafag := range _fcea [ 1 : ] { _gfcc += _fafag . _dadb ; } ; return _gfcc / float64 ( len ( _fcea ) ) ; } ; func _gcca ( _cgffe _ee . PdfRectangle , _cddg bounded ) float64 { return _cgffe . Ury - _cddg . bbox ( ) . Lly } ;
func _abbbd ( _cfdc string ) ( string , bool ) { _gcgce := [ ] rune ( _cfdc ) ; if len ( _gcgce ) != 1 { return "" , false ; } ; _bdfbe , _caafd := _aacb [ _gcgce [ 0 ] ] ; return _bdfbe , _caafd ; } ; func ( _efef * textWord ) addDiacritic ( _dfee string ) { _gagbe := _efef . _eedb [ len ( _efef . _eedb ) - 1 ] ;
_gagbe . _cdg += _dfee ; _gagbe . _cdg = _eb . NFKC . String ( _gagbe . _cdg ) ; } ; func _fbdab ( _bdfb _ee . PdfRectangle , _aafec , _fggd , _fdfcc , _degf * ruling ) gridTile { _cdbbb := _bdfb . Llx ; _cgffa := _bdfb . Urx ; _edgbe := _bdfb . Lly ; _gdgaa := _bdfb . Ury ; return gridTile { PdfRectangle : _bdfb , _affea : _aafec != nil && _aafec . encloses ( _edgbe , _gdgaa ) , _daca : _fggd != nil && _fggd . encloses ( _edgbe , _gdgaa ) , _ffca : _fdfcc != nil && _fdfcc . encloses ( _cdbbb , _cgffa ) , _bfab : _degf != nil && _degf . encloses ( _cdbbb , _cgffa ) } ;
} ; func ( _fgbgb * textTable ) log ( _gcae string ) { if ! _fabc { return ; } ; _fdd . Log . Info ( "~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066" , _gcae , _fgbgb . _bfgf , _fgbgb . _dcbg , _fgbgb . _ebfb , _fgbgb . PdfRectangle ) ;
for _cgedc := 0 ; _cgedc < _fgbgb . _dcbg ; _cgedc ++ { for _fgabbe := 0 ; _fgabbe < _fgbgb . _bfgf ; _fgabbe ++ { _afdag := _fgbgb . get ( _fgabbe , _cgedc ) ; if _afdag == nil { continue ; } ; _fc . Printf ( "%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a" , _fgabbe , _cgedc , _afdag . PdfRectangle , _fgaa ( _afdag . text ( ) , 50 ) , _df . RuneCountInString ( _afdag . text ( ) ) ) ;
} ; } ; } ; func _bgdca ( _eadc , _dcba _ebf . Point ) bool { _cbfeb := _fd . Abs ( _eadc . X - _dcba . X ) ; _bbde := _fd . Abs ( _eadc . Y - _dcba . Y ) ; return _baba ( _bbde , _cbfeb ) ; } ; func ( _ccab * textTable ) putComposite ( _egbgc , _dbffe int , _gfceb paraList , _edeb _ee . PdfRectangle ) { if len ( _gfceb ) == 0 { _fdd . Log . Error ( "\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073" ) ;
return ; } ; _ggdcf := compositeCell { PdfRectangle : _edeb , paraList : _gfceb } ; if _fabc { _fc . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a" , _egbgc , _dbffe , _ggdcf . String ( ) ) ;
} ; _ggdcf . updateBBox ( ) ; _ccab . _facee [ _gdeed ( _egbgc , _dbffe ) ] = _ggdcf ; } ; func _eeeeb ( _agfgd _ee . PdfRectangle ) * ruling { return & ruling { _deeb : _dcce , _dadb : _agfgd . Lly , _gcfe : _agfgd . Llx , _fbdf : _agfgd . Urx } ; } ;
// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents ( contents string , resources * _ee . PdfPageResources ) ( * Extractor , error ) { const _ff = "\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s" ; _eg := & Extractor { _bc : contents , _abc : resources , _ac : map [ string ] fontEntry { } , _ef : map [ string ] textResult { } } ;
_gg . TrackUse ( _ff ) ; return _eg , nil ; } ; func ( _cccb * wordBag ) absorb ( _edd * wordBag ) { _egda := _edd . makeRemovals ( ) ; for _eebe , _aggf := range _edd . _eed { for _ , _abcd := range _aggf { _cccb . pullWord ( _abcd , _eebe , _egda ) ; } ; } ; _edd . applyRemovals ( _egda ) ;
} ; func ( _cgeg * shapesState ) moveTo ( _gfac , _edgd float64 ) { _cgeg . _bfdb = true ; _cgeg . _dac = _cgeg . devicePoint ( _gfac , _edgd ) ; if _agcb { _fdd . Log . Info ( "\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066" , _gfac , _edgd , _cgeg . _dac ) ;
} ; } ; type textWord struct { _ee . PdfRectangle ; _baed float64 ; _fbgbed string ; _eedb [ ] * textMark ; _aaaa float64 ; _faaf bool ; } ; func _gdcb ( _dgcdg string , _defag [ ] rulingList ) { _fdd . Log . Info ( "\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073" , len ( _defag ) , _dgcdg ) ;
for _abbfa , _facgc := range _defag { _fc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _abbfa , _facgc . String ( ) ) ; } ; } ; func ( _caaf rulingList ) findPrimSec ( _ggbdc , _gbfa float64 ) * ruling { for _ , _fdba := range _caaf { if _dafec ( _fdba . _dadb - _ggbdc ) && _fdba . _gcfe - _ebcb <= _gbfa && _gbfa <= _fdba . _fbdf + _ebcb { return _fdba ;
} ; } ; return nil ; } ; func _ebgg ( _afdc , _cefe _ee . PdfRectangle ) bool { return _afdc . Llx <= _cefe . Llx && _cefe . Urx <= _afdc . Urx && _afdc . Lly <= _cefe . Lly && _cefe . Ury <= _afdc . Ury ; } ; func ( _eefdg rulingList ) sort ( ) { _e . Slice ( _eefdg , _eefdg . comp ) } ; func ( _bcdgb intSet ) add ( _dfff int ) { _bcdgb [ _dfff ] = struct { } { } } ;
func _cbcdb ( _fgga , _cbga bounded ) float64 { _bgae := _aecb ( _fgga , _cbga ) ; if ! _dafec ( _bgae ) { return _bgae ; } ; return _dcfaf ( _fgga , _cbga ) ; } ;
// String returns a description of `v`.
func ( _aadff * ruling ) String ( ) string { if _aadff . _deeb == _gcbc { return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047" ; } ; _cffbf , _acad := "\u0078" , "\u0079" ; if _aadff . _deeb == _dcce { _cffbf , _acad = "\u0079" , "\u0078" ; } ; _afda := "" ; if _aadff . _cecfb != 0.0 { _afda = _fc . Sprintf ( " \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066" , _aadff . _cecfb ) ;
} ; return _fc . Sprintf ( "\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073" , _aadff . _deeb , _cffbf , _aadff . _dadb , _acad , _aadff . _gcfe , _aadff . _fbdf , _aadff . _fbdf - _aadff . _gcfe , _aadff . _fbgc , _aadff . Color , _afda ) ;
} ; type lineRuling struct { _bebg rulingKind ; _ebggc markKind ; _c . Color ; _bdgf , _agba _ebf . Point ; } ;
// String returns a description of `k`.
func ( _abad markKind ) String ( ) string { _fbad , _bcaed := _acgf [ _abad ] ; if ! _bcaed { return _fc . Sprintf ( "\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064" , _abad ) ; } ; return _fbad ; } ; func ( _gdgc * textLine ) markWordBoundaries ( ) { _acgcf := _eca * _gdgc . _edad ;
for _gcf , _bgfb := range _gdgc . _bbfe [ 1 : ] { if _efgdf ( _bgfb , _gdgc . _bbfe [ _gcf ] ) >= _acgcf { _bgfb . _faaf = true ; } ; } ; } ; func ( _egbb paraList ) extractTables ( _efdef [ ] gridTiling ) paraList { if _fabc { _fdd . Log . Debug ( "\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _egbb ) ) ;
} ; if len ( _egbb ) < _aeeg { return _egbb ; } ; _bbcg := _egbb . findTables ( _efdef ) ; if _fabc { _fdd . Log . Info ( "c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d" , len ( _bbcg ) ) ;
for _cbbf , _aedac := range _bbcg { _aedac . log ( _fc . Sprintf ( "c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064" , _cbbf ) ) ; } ; } ; return _egbb . applyTables ( _bbcg ) ; } ; type textLine struct { _ee . PdfRectangle ; _bbcac float64 ; _bbfe [ ] * textWord ; _edad float64 ;
} ; func _edaf ( _dedb _ee . PdfRectangle ) * ruling { return & ruling { _deeb : _dcce , _dadb : _dedb . Ury , _gcfe : _dedb . Llx , _fbdf : _dedb . Urx } ; } ; func _bfbg ( _geadf _ee . PdfRectangle ) * ruling { return & ruling { _deeb : _fbeg , _dadb : _geadf . Urx , _gcfe : _geadf . Lly , _fbdf : _geadf . Ury } ;
} ; func ( _aeaa gridTiling ) complete ( ) bool { for _ , _gdee := range _aeaa . _fgbg { for _ , _fcaca := range _gdee { if ! _fcaca . complete ( ) { return false ; } ; } ; } ; return true ; } ; const ( _cgea = false ; _ebfca = false ; _ggcd = false ; _gebg = false ; _agcb = false ; _ccfgb = false ;
_fba = false ; _bcfe = false ; _ccfcg = false ; _efgc = _ccfcg && true ; _fdaf = _efgc && false ; _bdge = _ccfcg && true ; _fabc = false ; _dga = _fabc && false ; _edfaf = _fabc && true ; _cfcd = false ; _ffee = _cfcd && false ; _gega = _cfcd && false ; _gedg = _cfcd && true ; _cbfcg = _cfcd && false ;
_acdge = _cfcd && false ; ) ; type textPara struct { _ee . PdfRectangle ; _ddebf _ee . PdfRectangle ; _ddgc [ ] * textLine ; _cbfe * textTable ; _bccb bool ; _faaed bool ; _ggfbf * textPara ; _ffbf * textPara ; _dccd * textPara ; _feb * textPara ; } ;
// String returns a description of `k`.
func ( _fdab rulingKind ) String ( ) string { _aaca , _ddeg := _aefcd [ _fdab ] ; if ! _ddeg { return _fc . Sprintf ( "\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064" , _fdab ) ; } ; return _aaca ; } ; var ( _gdd = _gd . New ( "\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072" ) ;
_fg = _gd . New ( "\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072" ) ; ) ; func _cafa ( _baffg func ( * wordBag , * textWord , float64 ) bool , _cefdc float64 ) func ( * wordBag , * textWord ) bool { return func ( _edfb * wordBag , _gccf * textWord ) bool { return _baffg ( _edfb , _gccf , _cefdc ) } ;
} ; func ( _bffd intSet ) del ( _beab int ) { delete ( _bffd , _beab ) } ;
// ExtractFonts returns all font information from the page extractor, including
// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more.
//
// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document.
// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries.
//
// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range.
func ( _ge * Extractor ) ExtractFonts ( previousPageFonts * PageFonts ) ( * PageFonts , error ) { _dfe := PageFonts { } ; _cbg := _dfe . extractPageResourcesToFont ( _ge . _abc ) ; if _cbg != nil { return nil , _cbg ; } ; if previousPageFonts != nil { for _ , _af := range previousPageFonts . Fonts { if ! _baf ( _dfe . Fonts , _af . FontName ) { _dfe . Fonts = append ( _dfe . Fonts , _af ) ;
} ; } ; } ; return & PageFonts { Fonts : _dfe . Fonts } , nil ; } ; func ( _gged * textObject ) setHorizScaling ( _gece float64 ) { if _gged == nil { return ; } ; _gged . _bbca . _cfc = _gece ; } ; func ( _gcfg rulingList ) vertsHorzs ( ) ( rulingList , rulingList ) { var _fbbf , _ebce rulingList ;
for _ , _bced := range _gcfg { switch _bced . _deeb { case _fbeg : _fbbf = append ( _fbbf , _bced ) ; case _dcce : _ebce = append ( _ebce , _bced ) ; } ; } ; return _fbbf , _ebce ; } ; func ( _gccc paraList ) writeText ( _gbgg _f . Writer ) { for _cdac , _ceba := range _gccc { if _ceba . _faaed { continue ;
} ; _ceba . writeText ( _gbgg ) ; if _cdac != len ( _gccc ) - 1 { if _cca ( _ceba , _gccc [ _cdac + 1 ] ) { _gbgg . Write ( [ ] byte ( "\u0020" ) ) ; } else { _gbgg . Write ( [ ] byte ( "\u000a" ) ) ; _gbgg . Write ( [ ] byte ( "\u000a" ) ) ; } ; } ; } ; _gbgg . Write ( [ ] byte ( "\u000a" ) ) ; _gbgg . Write ( [ ] byte ( "\u000a" ) ) ;
} ; func ( _gddd * shapesState ) stroke ( _agaf * [ ] pathSection ) { _cbbc := pathSection { _fdee : _gddd . _ebd , Color : _gddd . _cgga . getStrokeColor ( ) } ; * _agaf = append ( * _agaf , _cbbc ) ; if _cfcd { _fc . Printf ( "\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , len ( * _agaf ) , _gddd , _gddd . _cgga . getStrokeColor ( ) , _cbbc . bbox ( ) ) ;
if _ffee { for _gefg , _fbcg := range _gddd . _ebd { _fc . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _gefg , _fbcg ) ; if _gefg == 10 { break ; } ; } ; } ; } ; } ; func ( _edgcg pathSection ) bbox ( ) _ee . PdfRectangle { _gfde := _edgcg . _fdee [ 0 ] . _bdb [ 0 ] ; _acee := _ee . PdfRectangle { Llx : _gfde . X , Urx : _gfde . X , Lly : _gfde . Y , Ury : _gfde . Y } ;
_bea := func ( _ceg _ebf . Point ) { if _ceg . X < _acee . Llx { _acee . Llx = _ceg . X ; } else if _ceg . X > _acee . Urx { _acee . Urx = _ceg . X ; } ; if _ceg . Y < _acee . Lly { _acee . Lly = _ceg . Y ; } else if _ceg . Y > _acee . Ury { _acee . Ury = _ceg . Y ; } ; } ; for _ , _cag := range _edgcg . _fdee [ 0 ] . _bdb [ 1 : ] { _bea ( _cag ) ;
} ; for _ , _dfgf := range _edgcg . _fdee [ 1 : ] { for _ , _fbdc := range _dfgf . _bdb { _bea ( _fbdc ) ; } ; } ; return _acee ; } ; func _bbffe ( _gfdd _ee . PdfRectangle ) * ruling { return & ruling { _deeb : _fbeg , _dadb : _gfdd . Llx , _gcfe : _gfdd . Lly , _fbdf : _gfdd . Ury } ; } ; func _agac ( _gggd , _fegg float64 ) bool { return _fd . Abs ( _gggd - _fegg ) <= _ebcb } ;
type markKind int ;
// String returns a human readable description of `path`.
func ( _fdgde * subpath ) String ( ) string { _bcde := _fdgde . _bdb ; _aafg := len ( _bcde ) ; if _aafg <= 5 { return _fc . Sprintf ( "\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f" , _aafg , _bcde ) ; } ; return _fc . Sprintf ( "\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f" , _aafg , _bcde [ 0 ] , _bcde [ 1 ] , _bcde [ _aafg - 1 ] ) ;
} ; var _ab = false ; func ( _dacgg rulingList ) asTiling ( ) gridTiling { if _gedg { _fdd . Log . Info ( "r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d" , len ( _dacgg ) ) ;
} ; for _daba , _gfda := range _dacgg [ 1 : ] { _efgbc := _dacgg [ _daba ] ; if _efgbc . alignsPrimary ( _gfda ) && _efgbc . alignsSec ( _gfda ) { _fdd . Log . Error ( "a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073" , _gfda , _efgbc ) ;
} ; } ; _dacgg . sortStrict ( ) ; _dacgg . log ( "\u0073n\u0061\u0070\u0070\u0065\u0064" ) ; _acbe , _bdga := _dacgg . vertsHorzs ( ) ; _ggafc := _acbe . primaries ( ) ; _cdee := _bdga . primaries ( ) ; _bege := len ( _ggafc ) - 1 ; _ddee := len ( _cdee ) - 1 ; if _bege == 0 || _ddee == 0 { return gridTiling { } ;
} ; _cbdb := _ee . PdfRectangle { Llx : _ggafc [ 0 ] , Urx : _ggafc [ _bege ] , Lly : _cdee [ 0 ] , Ury : _cdee [ _ddee ] } ; if _gedg { _fdd . Log . Info ( "\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064" , len ( _acbe ) ) ;
for _degb , _cdfe := range _acbe { _fc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _degb , _cdfe ) ; } ; _fdd . Log . Info ( "\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064" , len ( _bdga ) ) ;
for _daag , _eebeb := range _bdga { _fc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _daag , _eebeb ) ; } ; _fdd . Log . Info ( "\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f" , _bege , _ddee , _ggafc , _cdee ) ;
} ; _cfgf := make ( [ ] gridTile , _bege * _ddee ) ; for _edea := _ddee - 1 ; _edea >= 0 ; _edea -- { _aebb := _cdee [ _edea ] ; _eebd := _cdee [ _edea + 1 ] ; for _fgcdc := 0 ; _fgcdc < _bege ; _fgcdc ++ { _bcee := _ggafc [ _fgcdc ] ; _eccb := _ggafc [ _fgcdc + 1 ] ; _dcea := _acbe . findPrimSec ( _bcee , _aebb ) ;
_fgcdf := _acbe . findPrimSec ( _eccb , _aebb ) ; _acabf := _bdga . findPrimSec ( _aebb , _bcee ) ; _fbcae := _bdga . findPrimSec ( _eebd , _bcee ) ; _dedc := _ee . PdfRectangle { Llx : _bcee , Urx : _eccb , Lly : _aebb , Ury : _eebd } ; _faea := _fbdab ( _dedc , _dcea , _fgcdf , _acabf , _fbcae ) ;
_cfgf [ _edea * _bege + _fgcdc ] = _faea ; if _gedg { _fc . Printf ( "\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , _fgcdc , _edea , _faea . String ( ) , _faea . Width ( ) , _faea . Height ( ) ) ;
} ; } ; } ; if _gedg { _fdd . Log . Info ( "r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066" , _cbdb ) ;
} ; _bcfc := make ( [ ] map [ float64 ] gridTile , _ddee ) ; for _bebd := _ddee - 1 ; _bebd >= 0 ; _bebd -- { if _gedg { _fc . Printf ( "\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a" , _bebd ) ; } ; _bcfc [ _bebd ] = make ( map [ float64 ] gridTile , _bege ) ; for _ddac := 0 ; _ddac < _bege ;
_ddac ++ { _bfdf := _cfgf [ _bebd * _bege + _ddac ] ; if _gedg { _fc . Printf ( "\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _ddac , _bfdf ) ; } ; if ! _bfdf . _affea { continue ; } ; _gaba := _ddac ; for _abegc := _ddac + 1 ; ! _bfdf . _daca && _abegc < _bege ; _abegc ++ { _adb := _cfgf [ _bebd * _bege + _abegc ] ;
_bfdf . Urx = _adb . Urx ; _bfdf . _bfab = _bfdf . _bfab || _adb . _bfab ; _bfdf . _ffca = _bfdf . _ffca || _adb . _ffca ; _bfdf . _daca = _adb . _daca ; if _gedg { _fc . Printf ( "\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a" , _abegc , _adb , _bfdf ) ;
} ; _gaba = _abegc ; } ; if _gedg { _fc . Printf ( " \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n" , _ddac , _gaba , _bfdf ) ; } ; _ddac = _gaba ; _bcfc [ _bebd ] [ _bfdf . Llx ] = _bfdf ; } ; } ; _acgeg := make ( map [ float64 ] map [ float64 ] gridTile , _ddee ) ;
_cdeea := make ( map [ float64 ] map [ float64 ] struct { } , _ddee ) ; for _gccb := _ddee - 1 ; _gccb >= 0 ; _gccb -- { _bdfcb := _cfgf [ _gccb * _bege ] . Lly ; _acgeg [ _bdfcb ] = make ( map [ float64 ] gridTile , _bege ) ; _cdeea [ _bdfcb ] = make ( map [ float64 ] struct { } , _bege ) ; } ; if _gedg { _fdd . Log . Info ( "\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066" , _cbdb ) ;
} ; for _cgdc := _ddee - 1 ; _cgdc >= 0 ; _cgdc -- { _eacag := _cfgf [ _cgdc * _bege ] . Lly ; _efde := _bcfc [ _cgdc ] ; if _gedg { _fc . Printf ( "\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a" , _cgdc ) ; } ; for _ , _bgbf := range _gcac ( _efde ) { if _ , _deedb := _cdeea [ _eacag ] [ _bgbf ] ;
_deedb { continue ; } ; _baccf := _efde [ _bgbf ] ; if _gedg { _fc . Printf ( " \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a" , _baccf . String ( ) ) ; } ; for _ggegb := _cgdc - 1 ; _ggegb >= 0 ; _ggegb -- { if _baccf . _ffca { break ; } ; _geege := _bcfc [ _ggegb ] ; _aace , _ecac := _geege [ _bgbf ] ;
if ! _ecac { break ; } ; if _aace . Urx != _baccf . Urx { break ; } ; _baccf . _ffca = _aace . _ffca ; _baccf . Lly = _aace . Lly ; if _gedg { _fc . Printf ( "\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a" , _aace . String ( ) , _baccf . String ( ) ) ;
} ; _cdeea [ _aace . Lly ] [ _aace . Llx ] = struct { } { } ; } ; if _cgdc == 0 { _baccf . _ffca = true ; } ; if _baccf . complete ( ) { _acgeg [ _eacag ] [ _bgbf ] = _baccf ; } ; } ; } ; _ebca := gridTiling { PdfRectangle : _cbdb , _facc : _ffcag ( _acgeg ) , _aeccd : _cdge ( _acgeg ) , _fgbg : _acgeg } ;
_ebca . log ( "\u0043r\u0065\u0061\u0074\u0065\u0064" ) ; return _ebca ; } ; type paraList [ ] * textPara ; func ( _dccdg rulingList ) primaries ( ) [ ] float64 { _agcd := make ( map [ float64 ] struct { } , len ( _dccdg ) ) ; for _ , _bfdd := range _dccdg { _agcd [ _bfdd . _dadb ] = struct { } { } ;
} ; _ccbd := make ( [ ] float64 , len ( _agcd ) ) ; _cdfb := 0 ; for _eace := range _agcd { _ccbd [ _cdfb ] = _eace ; _cdfb ++ ; } ; _e . Float64s ( _ccbd ) ; return _ccbd ; } ; func ( _aecfb intSet ) has ( _feec int ) bool { _ , _fbbb := _aecfb [ _feec ] ; return _fbbb } ; func ( _fbbfd * textPara ) isAtom ( ) * textTable { _fgabb := _fbbfd ;
_agbaf := _fbbfd . _ffbf ; _afcbg := _fbbfd . _feb ; if _agbaf . taken ( ) || _afcbg . taken ( ) { return nil ; } ; _abea := _agbaf . _feb ; if _abea . taken ( ) || _abea != _afcbg . _ffbf { return nil ; } ; return _cacf ( _fgabb , _agbaf , _afcbg , _abea ) ; } ; func _cfe ( _acb float64 ) int { var _daea int ;
if _acb >= 0 { _daea = int ( _acb / _ebcfe ) ; } else { _daea = int ( _acb / _ebcfe ) - 1 ; } ; return _daea ; } ; func ( _ddaf lineRuling ) xMean ( ) float64 { return 0.5 * ( _ddaf . _bdgf . X + _ddaf . _agba . X ) } ; func _ecfd ( _dfcf [ ] * wordBag ) [ ] * wordBag { if len ( _dfcf ) <= 1 { return _dfcf ;
} ; if _ccfcg { _fdd . Log . Info ( "\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a" ) ; } ; _e . Slice ( _dfcf , func ( _eec , _gac int ) bool { _faaa , _efff := _dfcf [ _eec ] , _dfcf [ _gac ] ; _agggd := _faaa . Width ( ) * _faaa . Height ( ) ; _eega := _efff . Width ( ) * _efff . Height ( ) ;
if _agggd != _eega { return _agggd > _eega ; } ; if _faaa . Height ( ) != _efff . Height ( ) { return _faaa . Height ( ) > _efff . Height ( ) ; } ; return _eec < _gac ; } ) ; var _beb [ ] * wordBag ; _gcgf := make ( intSet ) ; for _dcfa := 0 ; _dcfa < len ( _dfcf ) ; _dcfa ++ { if _gcgf . has ( _dcfa ) { continue ;
} ; _gdfe := _dfcf [ _dcfa ] ; for _gdfce := _dcfa + 1 ; _gdfce < len ( _dfcf ) ; _gdfce ++ { if _gcgf . has ( _dcfa ) { continue ; } ; _ggbee := _dfcf [ _gdfce ] ; _bfad := _gdfe . PdfRectangle ; _bfad . Llx -= _gdfe . _bbgb ; if _ebgg ( _bfad , _ggbee . PdfRectangle ) { _gdfe . absorb ( _ggbee ) ;
_gcgf . add ( _gdfce ) ; } ; } ; _beb = append ( _beb , _gdfe ) ; } ; if len ( _dfcf ) != len ( _beb ) + len ( _gcgf ) { _fdd . Log . Error ( "\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064" , len ( _dfcf ) , len ( _beb ) , len ( _gcgf ) ) ;
} ; return _beb ; } ; func ( _fadc * textTable ) reduceTiling ( _geafb gridTiling , _bebfd float64 ) * textTable { _egbd := make ( [ ] int , 0 , _fadc . _dcbg ) ; _eagdf := make ( [ ] int , 0 , _fadc . _bfgf ) ; _egdac := _geafb . _facc ; _ebaa := _geafb . _aeccd ; for _gbfc := 0 ; _gbfc < _fadc . _dcbg ;
_gbfc ++ { _abeb := _gbfc > 0 && _fd . Abs ( _ebaa [ _gbfc - 1 ] - _ebaa [ _gbfc ] ) < _bebfd && _fadc . emptyCompositeRow ( _gbfc ) ; if ! _abeb { _egbd = append ( _egbd , _gbfc ) ; } ; } ; for _cbgae := 0 ; _cbgae < _fadc . _bfgf ; _cbgae ++ { _egga := _cbgae < _fadc . _bfgf - 1 && _fd . Abs ( _egdac [ _cbgae + 1 ] - _egdac [ _cbgae ] ) < _bebfd && _fadc . emptyCompositeColumn ( _cbgae ) ;
if ! _egga { _eagdf = append ( _eagdf , _cbgae ) ; } ; } ; if len ( _egbd ) == _fadc . _dcbg && len ( _eagdf ) == _fadc . _bfgf { return _fadc ; } ; _fgbb := textTable { _ebfb : _fadc . _ebfb , _bfgf : len ( _eagdf ) , _dcbg : len ( _egbd ) , _facee : make ( map [ uint64 ] compositeCell , len ( _eagdf ) * len ( _egbd ) ) } ;
if _fabc { _fdd . Log . Info ( "\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064" , _fadc . _bfgf , _fadc . _dcbg , len ( _eagdf ) , len ( _egbd ) ) ; _fdd . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076" , _eagdf ) ;
_fdd . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076" , _egbd ) ; } ; for _gfdf , _dgea := range _egbd { for _fece , _ccfe := range _eagdf { _effgg , _bgbfd := _fadc . getComposite ( _ccfe , _dgea ) ; if len ( _effgg ) == 0 { continue ;
} ; if _fabc { _fc . Printf ( "\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n" , _fece , _gfdf , _ccfe , _dgea , _fgaa ( _effgg . merge ( ) . text ( ) , 50 ) ) ; } ; _fgbb . putComposite ( _fece , _gfdf , _effgg , _bgbfd ) ;
} ; } ; return & _fgbb ; } ; var _gefge = _d . MustCompile ( "\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024" ) ; func ( _cggf * textObject ) newTextMark ( _bbgc string , _faeda _ebf . Matrix , _eegc _ebf . Point , _dadd float64 , _egg * _ee . PdfFont , _cfcf float64 , _bacc , _fada _c . Color , _bfagg _dd . PdfObject , _efcc [ ] string , _dfge int ) ( textMark , bool ) { _cdca := _faeda . Angle ( ) ;
_efe := _fgccb ( _cdca , _edaa ) ; var _gced float64 ; if _efe % 180 != 90 { _gced = _faeda . ScalingFactorY ( ) ; } else { _gced = _faeda . ScalingFactorX ( ) ; } ; _fgde := _ebfe ( _faeda ) ; _bfga := _ee . PdfRectangle { Llx : _fgde . X , Lly : _fgde . Y , Urx : _eegc . X , Ury : _eegc . Y } ;
switch _efe % 360 { case 90 : _bfga . Urx -= _gced ; case 180 : _bfga . Ury -= _gced ; case 270 : _bfga . Urx += _gced ; case 0 : _bfga . Ury += _gced ; default : _efe = 0 ; _bfga . Ury += _gced ; } ; if _bfga . Llx > _bfga . Urx { _bfga . Llx , _bfga . Urx = _bfga . Urx , _bfga . Llx ; } ; if _bfga . Lly > _bfga . Ury { _bfga . Lly , _bfga . Ury = _bfga . Ury , _bfga . Lly ;
} ; _efba := true ; if _cggf . _ecfa . _da . Width ( ) > 0 { _aegcb , _ggde := _bagd ( _bfga , _cggf . _ecfa . _da ) ; if ! _ggde { _efba = false ; _fdd . Log . Debug ( "\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q" , _bfga , _cggf . _ecfa . _da , _bbgc ) ;
} ; _bfga = _aegcb ; } ; _cdae := _bfga ; _cbfac := _cggf . _ecfa . _da ; switch _efe % 360 { case 90 : _cbfac . Urx , _cbfac . Ury = _cbfac . Ury , _cbfac . Urx ; _cdae = _ee . PdfRectangle { Llx : _cbfac . Urx - _bfga . Ury , Urx : _cbfac . Urx - _bfga . Lly , Lly : _bfga . Llx , Ury : _bfga . Urx } ;
case 180 : _cdae = _ee . PdfRectangle { Llx : _cbfac . Urx - _bfga . Llx , Urx : _cbfac . Urx - _bfga . Urx , Lly : _cbfac . Ury - _bfga . Lly , Ury : _cbfac . Ury - _bfga . Ury } ; case 270 : _cbfac . Urx , _cbfac . Ury = _cbfac . Ury , _cbfac . Urx ; _cdae = _ee . PdfRectangle { Llx : _bfga . Ury , Urx : _bfga . Lly , Lly : _cbfac . Ury - _bfga . Llx , Ury : _cbfac . Ury - _bfga . Urx } ;
} ; if _cdae . Llx > _cdae . Urx { _cdae . Llx , _cdae . Urx = _cdae . Urx , _cdae . Llx ; } ; if _cdae . Lly > _cdae . Ury { _cdae . Lly , _cdae . Ury = _cdae . Ury , _cdae . Lly ; } ; _fdcc := textMark { _cdg : _bbgc , PdfRectangle : _cdae , _dgde : _bfga , _dbdg : _egg , _cfccd : _gced , _fgcc : _cfcf , _cadda : _faeda , _dfgd : _eegc , _gdba : _efe , _afbgf : _bacc , _bbacf : _fada , _aafgb : _bfagg , _bfgda : _efcc , Th : _cggf . _bbca . _cfc , Tw : _cggf . _bbca . _eff , _efb : _dfge } ;
if _ebfca { _fdd . Log . Info ( "n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073" , _fgde , _eegc , _fdcc . String ( ) ) ; } ; return _fdcc , _efba ;
} ; func _ebfe ( _cce _ebf . Matrix ) _ebf . Point { _gdbfa , _efdf := _cce . Translation ( ) ; return _ebf . Point { X : _gdbfa , Y : _efdf } ; } ; func _eaff ( _eggfcb [ ] * textMark , _aagb _ee . PdfRectangle ) [ ] * textWord { var _adbc [ ] * textWord ; var _adbe * textWord ; if _ebfca { _fdd . Log . Info ( "\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073" , len ( _eggfcb ) ) ;
} ; _cbab := func ( ) { if _adbe != nil { _dcgc := _adbe . computeText ( ) ; if ! _bceaa ( _dcgc ) { _adbe . _fbgbed = _dcgc ; _adbc = append ( _adbc , _adbe ) ; if _ebfca { _fdd . Log . Info ( "\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073" , len ( _adbc ) - 1 , _adbe . String ( ) ) ;
for _edgcgb , _ddbbg := range _adbe . _eedb { _fc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _edgcgb , _ddbbg . String ( ) ) ; } ; } ; } ; _adbe = nil ; } ; } ; for _ , _effff := range _eggfcb { if _cgd && _adbe != nil && len ( _adbe . _eedb ) > 0 { _bfaddg := _adbe . _eedb [ len ( _adbe . _eedb ) - 1 ] ;
_dfffd , _gfcf := _abbbd ( _effff . _cdg ) ; _aadda , _cfga := _abbbd ( _bfaddg . _cdg ) ; if _gfcf && ! _cfga && _bfaddg . inDiacriticArea ( _effff ) { _adbe . addDiacritic ( _dfffd ) ; continue ; } ; if _cfga && ! _gfcf && _effff . inDiacriticArea ( _bfaddg ) { _adbe . _eedb = _adbe . _eedb [ : len ( _adbe . _eedb ) - 1 ] ;
_adbe . appendMark ( _effff , _aagb ) ; _adbe . addDiacritic ( _aadda ) ; continue ; } ; } ; _eaab := _bceaa ( _effff . _cdg ) ; if _eaab { _cbab ( ) ; continue ; } ; if _adbe == nil && ! _eaab { _adbe = _bbegf ( [ ] * textMark { _effff } , _aagb ) ; continue ; } ; _dddba := _adbe . _aaaa ; _bbcfd := _fd . Abs ( _gcca ( _aagb , _effff ) - _adbe . _baed ) / _dddba ;
_fdddf := _efgdf ( _effff , _adbe ) / _dddba ; if _fdddf >= _adea || ! ( - _bgbe <= _fdddf && _bbcfd <= _ddfd ) { _cbab ( ) ; _adbe = _bbegf ( [ ] * textMark { _effff } , _aagb ) ; continue ; } ; _adbe . appendMark ( _effff , _aagb ) ; } ; _cbab ( ) ; return _adbc ; } ; func ( _cbdg * wordBag ) text ( ) string { _aedf := _cbdg . allWords ( ) ;
_age := make ( [ ] string , len ( _aedf ) ) ; for _egb , _bddgf := range _aedf { _age [ _egb ] = _bddgf . _fbgbed ; } ; return _a . Join ( _age , "\u0020" ) ; } ;
// String returns a description of `t`.
func ( _gbgb * textTable ) String ( ) string { return _fc . Sprintf ( "\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074" , _gbgb . _bfgf , _gbgb . _dcbg , _gbgb . _ebfb ) ; } ;
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
func ( _bce * Extractor ) ExtractTextWithStats ( ) ( _cde string , _bde int , _add int , _cdd error ) { _eebc , _bde , _add , _cdd := _bce . ExtractPageText ( ) ; if _cdd != nil { return "" , _bde , _add , _cdd ; } ; return _eebc . Text ( ) , _bde , _add , nil ; } ; func ( _bcec * Extractor ) extractPageText ( _ggg string , _ea * _ee . PdfPageResources , _fge _ebf . Matrix , _gggb int ) ( * PageText , int , int , error ) { _fdd . Log . Trace ( "\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d" , _gggb ) ;
_gag := & PageText { _cace : _bcec . _da } ; _cgc := _bbf ( _bcec . _da ) ; var _fga stateStack ; _ddg := _dba ( _bcec , _ea , _aeg . GraphicsState { } , & _cgc , & _fga ) ; _gdgb := shapesState { _affd : _fge , _dfdd : _ebf . IdentityMatrix ( ) , _cgga : _ddg } ; var _ccd bool ; if _gggb > _fdge { _eac := _gd . New ( "\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077" ) ;
_fdd . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076" , _gggb , _eac ) ;
return _gag , _cgc . _ded , _cgc . _fgce , _eac ; } ; _fdbb := _aeg . NewContentStreamParser ( _ggg ) ; _gc , _eee := _fdbb . Parse ( ) ; if _eee != nil { _fdd . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _eee ) ;
return _gag , _cgc . _ded , _cgc . _fgce , _eee ; } ; _gag . _eda = _gc ; _afe := _aeg . NewContentStreamProcessor ( * _gc ) ; _afe . AddHandler ( _aeg . HandlerConditionEnumAllOperands , "" , func ( _feg * _aeg . ContentStreamOperation , _edgb _aeg . GraphicsState , _bbd * _ee . PdfPageResources ) error { _dfbg := _feg . Operand ;
if _ggcd { _fdd . Log . Info ( "\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s" , _feg ) ; } ; switch _dfbg { case "\u0071" : if _agcb { _fdd . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _gdgb . _dfdd ) ; } ; _fga . push ( & _cgc ) ; case "\u0051" : if ! _fga . empty ( ) { _cgc = * _fga . pop ( ) ;
} ; _gdgb . _dfdd = _edgb . CTM ; if _agcb { _fdd . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _gdgb . _dfdd ) ; } ; case "\u0042\u0054" : if _ccd { _fdd . Log . Debug ( "\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074" ) ;
_gag . _gcea = append ( _gag . _gcea , _ddg . _abbb ... ) ; } ; _ccd = true ; _dbeb := _edgb ; _dbeb . CTM = _fge . Mult ( _dbeb . CTM ) ; _ddg = _dba ( _bcec , _bbd , _dbeb , & _cgc , & _fga ) ; _gdgb . _cgga = _ddg ; case "\u0045\u0054" : if ! _ccd { _fdd . Log . Debug ( "\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074" ) ;
} ; _ccd = false ; _gag . _gcea = append ( _gag . _gcea , _ddg . _abbb ... ) ; _ddg . reset ( ) ; case "\u0054\u002a" : _ddg . nextLine ( ) ; case "\u0054\u0064" : if _egc , _ggd := _ddg . checkOp ( _feg , 2 , true ) ; ! _egc { _fdd . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ggd ) ;
return _ggd ; } ; _fae , _eag , _afd := _dada ( _feg . Params ) ; if _afd != nil { return _afd ; } ; _ddg . moveText ( _fae , _eag ) ; case "\u0054\u0044" : if _bdeg , _fec := _ddg . checkOp ( _feg , 2 , true ) ; ! _bdeg { _fdd . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fec ) ;
return _fec ; } ; _bbcd , _egcd , _ged := _dada ( _feg . Params ) ; if _ged != nil { _fdd . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ged ) ; return _ged ; } ; _ddg . moveTextSetLeading ( _bbcd , _egcd ) ; case "\u0054\u006a" : if _fed , _acab := _ddg . checkOp ( _feg , 1 , true ) ;
! _fed { _fdd . Log . Debug ( "\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076" , _feg , _acab ) ; return _acab ; } ; _ebfa := _dd . TraceToDirectObject ( _feg . Params [ 0 ] ) ; _dec , _dbea := _dd . GetStringBytes ( _ebfa ) ;
if ! _dbea { _fdd . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064" , _feg ) ; return _dd . ErrTypeError ;
} ; return _ddg . showText ( _ebfa , _dec ) ; case "\u0054\u004a" : if _def , _bfb := _ddg . checkOp ( _feg , 1 , true ) ; ! _def { _fdd . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bfb ) ; return _bfb ; } ; _ceef , _fad := _dd . GetArray ( _feg . Params [ 0 ] ) ;
if ! _fad { _fdd . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _feg ) ; return _eee ; } ; return _ddg . showTextAdjusted ( _ceef ) ;
case "\u0027" : if _ege , _bdac := _ddg . checkOp ( _feg , 1 , true ) ; ! _ege { _fdd . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bdac ) ; return _bdac ; } ; _fb := _dd . TraceToDirectObject ( _feg . Params [ 0 ] ) ; _ecd , _fdf := _dd . GetStringBytes ( _fb ) ;
if ! _fdf { _fdd . Log . Debug ( "\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064" , _feg ) ; return _dd . ErrTypeError ; } ; _ddg . nextLine ( ) ; return _ddg . showText ( _fb , _ecd ) ;
case "\u0022" : if _aegf , _bbdf := _ddg . checkOp ( _feg , 3 , true ) ; ! _aegf { _fdd . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bbdf ) ; return _bbdf ; } ; _deg , _ceeb , _ggb := _dada ( _feg . Params [ : 2 ] ) ; if _ggb != nil { return _ggb ;
} ; _aag := _dd . TraceToDirectObject ( _feg . Params [ 2 ] ) ; _bbbe , _aed := _dd . GetStringBytes ( _aag ) ; if ! _aed { _fdd . Log . Debug ( "\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064" , _feg ) ;
return _dd . ErrTypeError ; } ; _ddg . setCharSpacing ( _deg ) ; _ddg . setWordSpacing ( _ceeb ) ; _ddg . nextLine ( ) ; return _ddg . showText ( _aag , _bbbe ) ; case "\u0054\u004c" : _daec , _egfe := _afcb ( _feg ) ; if _egfe != nil { _fdd . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _egfe ) ;
return _egfe ; } ; _ddg . setTextLeading ( _daec ) ; case "\u0054\u0063" : _ebc , _aegc := _afcb ( _feg ) ; if _aegc != nil { _fdd . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _aegc ) ; return _aegc ; } ; _ddg . setCharSpacing ( _ebc ) ;
case "\u0054\u0066" : if _gec , _gbd := _ddg . checkOp ( _feg , 2 , true ) ; ! _gec { _fdd . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gbd ) ; return _gbd ; } ; _geea , _cef := _dd . GetNameVal ( _feg . Params [ 0 ] ) ; if ! _cef { _fdd . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064" , _feg ) ;
return _dd . ErrTypeError ; } ; _gfad , _afa := _dd . GetNumberAsFloat ( _feg . Params [ 1 ] ) ; if ! _cef { _fdd . Log . Debug ( "\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _feg , _afa ) ;
return _afa ; } ; _afa = _ddg . setFont ( _geea , _gfad ) ; _ddg . _bad = _bd . Is ( _afa , _dd . ErrNotSupported ) ; if _afa != nil && ! _ddg . _bad { return _afa ; } ; case "\u0054\u006d" : if _gge , _gdac := _ddg . checkOp ( _feg , 6 , true ) ; ! _gge { _fdd . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gdac ) ;
return _gdac ; } ; _feag , _bgd := _dd . GetNumbersAsFloat ( _feg . Params ) ; if _bgd != nil { _fdd . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bgd ) ; return _bgd ; } ; _ddg . setTextMatrix ( _feag ) ; case "\u0054\u0072" : if _afb , _fbb := _ddg . checkOp ( _feg , 1 , true ) ;
! _afb { _fdd . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fbb ) ; return _fbb ; } ; _fcf , _ead := _dd . GetIntVal ( _feg . Params [ 0 ] ) ; if ! _ead { _fdd . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _feg ) ;
return _dd . ErrTypeError ; } ; _ddg . setTextRenderMode ( _fcf ) ; case "\u0054\u0073" : if _fdfa , _beg := _ddg . checkOp ( _feg , 1 , true ) ; ! _fdfa { _fdd . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _beg ) ; return _beg ;
} ; _ggeb , _fbc := _dd . GetNumberAsFloat ( _feg . Params [ 0 ] ) ; if _fbc != nil { _fdd . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fbc ) ; return _fbc ; } ; _ddg . setTextRise ( _ggeb ) ; case "\u0054\u0077" : if _fegb , _gddc := _ddg . checkOp ( _feg , 1 , true ) ;
! _fegb { _fdd . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gddc ) ; return _gddc ; } ; _agb , _bae := _dd . GetNumberAsFloat ( _feg . Params [ 0 ] ) ; if _bae != nil { _fdd . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bae ) ;
return _bae ; } ; _ddg . setWordSpacing ( _agb ) ; case "\u0054\u007a" : if _bfae , _cgf := _ddg . checkOp ( _feg , 1 , true ) ; ! _bfae { _fdd . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _cgf ) ; return _cgf ; } ; _bddg , _ccb := _dd . GetNumberAsFloat ( _feg . Params [ 0 ] ) ;
if _ccb != nil { _fdd . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ccb ) ; return _ccb ; } ; _ddg . setHorizScaling ( _bddg ) ; case "\u0063\u006d" : _gdgb . _dfdd = _edgb . CTM ; if _gdgb . _dfdd . Singular ( ) { _eae := _ebf . IdentityMatrix ( ) . Translate ( _gdgb . _dfdd . Translation ( ) ) ;
_fdd . Log . Debug ( "S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s" , _gdgb . _dfdd , _eae ) ; _gdgb . _dfdd = _eae ; } ; if _agcb { _fdd . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _gdgb . _dfdd ) ; } ; case "\u006d" : if len ( _feg . Params ) != 2 { _fdd . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" , _fg ) ;
return nil ; } ; _fde , _bdg := _dd . GetNumbersAsFloat ( _feg . Params ) ; if _bdg != nil { return _bdg ; } ; _gdgb . moveTo ( _fde [ 0 ] , _fde [ 1 ] ) ; case "\u006c" : if len ( _feg . Params ) != 2 { _fdd . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" , _fg ) ;
return nil ; } ; _bfg , _aaf := _dd . GetNumbersAsFloat ( _feg . Params ) ; if _aaf != nil { return _aaf ; } ; _gdgb . lineTo ( _bfg [ 0 ] , _bfg [ 1 ] ) ; case "\u0063" : if len ( _feg . Params ) != 6 { return _fg ; } ; _dcaa , _gcb := _dd . GetNumbersAsFloat ( _feg . Params ) ; if _gcb != nil { return _gcb ;
} ; _fdd . Log . Debug ( "\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f" , _dcaa ) ; _gdgb . cubicTo ( _dcaa [ 0 ] , _dcaa [ 1 ] , _dcaa [ 2 ] , _dcaa [ 3 ] , _dcaa [ 4 ] , _dcaa [ 5 ] ) ; case "\u0076" , "\u0079" : if len ( _feg . Params ) != 4 { return _fg ;
} ; _ceff , _cad := _dd . GetNumbersAsFloat ( _feg . Params ) ; if _cad != nil { return _cad ; } ; _fdd . Log . Debug ( "\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f" , _ceff ) ; _gdgb . quadraticTo ( _ceff [ 0 ] , _ceff [ 1 ] , _ceff [ 2 ] , _ceff [ 3 ] ) ;
case "\u0068" : _gdgb . closePath ( ) ; case "\u0072\u0065" : if len ( _feg . Params ) != 4 { return _fg ; } ; _cbb , _aea := _dd . GetNumbersAsFloat ( _feg . Params ) ; if _aea != nil { return _aea ; } ; _gdgb . drawRectangle ( _cbb [ 0 ] , _cbb [ 1 ] , _cbb [ 2 ] , _cbb [ 3 ] ) ; _gdgb . closePath ( ) ;
case "\u0053" : _gdgb . stroke ( & _gag . _ccdb ) ; _gdgb . clearPath ( ) ; case "\u0073" : _gdgb . closePath ( ) ; _gdgb . stroke ( & _gag . _ccdb ) ; _gdgb . clearPath ( ) ; case "\u0046" : _gdgb . fill ( & _gag . _ccdc ) ; _gdgb . clearPath ( ) ; case "\u0066" , "\u0066\u002a" : _gdgb . closePath ( ) ;
_gdgb . fill ( & _gag . _ccdc ) ; _gdgb . clearPath ( ) ; case "\u0042" , "\u0042\u002a" : _gdgb . fill ( & _gag . _ccdc ) ; _gdgb . stroke ( & _gag . _ccdb ) ; _gdgb . clearPath ( ) ; case "\u0062" , "\u0062\u002a" : _gdgb . closePath ( ) ; _gdgb . fill ( & _gag . _ccdc ) ; _gdgb . stroke ( & _gag . _ccdb ) ;
_gdgb . clearPath ( ) ; case "\u006e" : _gdgb . clearPath ( ) ; case "\u0044\u006f" : if len ( _feg . Params ) == 0 { _fdd . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e" , _feg . Params ) ;
return _dd . ErrRangeError ; } ; _cbf , _edgc := _dd . GetName ( _feg . Params [ 0 ] ) ; if ! _edgc { _fdd . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e" , _feg . Params [ 0 ] ) ;
return _dd . ErrTypeError ; } ; _ , _cae := _bbd . GetXObjectByName ( * _cbf ) ; if _cae != _ee . XObjectTypeForm { break ; } ; _fgge , _edgc := _bcec . _ef [ _cbf . String ( ) ] ; if ! _edgc { _cecf , _fcgd := _bbd . GetXObjectFormByName ( * _cbf ) ; if _fcgd != nil { _fdd . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _fcgd ) ;
return _fcgd ; } ; _gcc , _fcgd := _cecf . GetContentStream ( ) ; if _fcgd != nil { _fdd . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _fcgd ) ; return _fcgd ; } ; _ccfde := _cecf . Resources ; if _ccfde == nil { _ccfde = _bbd ; } ; _cfd , _cgb , _gff , _fcgd := _bcec . extractPageText ( string ( _gcc ) , _ccfde , _fge . Mult ( _edgb . CTM ) , _gggb + 1 ) ;
if _fcgd != nil { _fdd . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _fcgd ) ; return _fcgd ; } ; _fgge = textResult { * _cfd , _cgb , _gff } ; _bcec . _ef [ _cbf . String ( ) ] = _fgge ; } ; _gdgb . _dfdd = _edgb . CTM ; if _agcb { _fdd . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _gdgb . _dfdd ) ;
} ; _gag . _gcea = append ( _gag . _gcea , _fgge . _ace . _gcea ... ) ; _gag . _ccdb = append ( _gag . _ccdb , _fgge . _ace . _ccdb ... ) ; _gag . _ccdc = append ( _gag . _ccdc , _fgge . _ace . _ccdc ... ) ; _cgc . _ded += _fgge . _dgg ; _cgc . _fgce += _fgge . _abg ; case "\u0072\u0067" , "\u0067" , "\u006b" , "\u0063\u0073" , "\u0073\u0063" , "\u0073\u0063\u006e" : _ddg . _cddc . ColorspaceNonStroking = _edgb . ColorspaceNonStroking ;
_ddg . _cddc . ColorNonStroking = _edgb . ColorNonStroking ; case "\u0052\u0047" , "\u0047" , "\u004b" , "\u0043\u0053" , "\u0053\u0043" , "\u0053\u0043\u004e" : _ddg . _cddc . ColorspaceStroking = _edgb . ColorspaceStroking ; _ddg . _cddc . ColorStroking = _edgb . ColorStroking ;
} ; return nil ; } ) ; _eee = _afe . Process ( _ea ) ; return _gag , _cgc . _ded , _cgc . _fgce , _eee ; } ; func ( _ecg * wordBag ) highestWord ( _bgg int , _fabd , _fbcc float64 ) * textWord { for _ , _bbacc := range _ecg . _eed [ _bgg ] { if _fabd <= _bbacc . _baed && _bbacc . _baed <= _fbcc { return _bbacc ;
} ; } ; return nil ; } ; func _cgad ( _egdc , _fagg _ebf . Point , _edage _c . Color ) ( * ruling , bool ) { _ecca := lineRuling { _bdgf : _egdc , _agba : _fagg , _bebg : _gdabd ( _egdc , _fagg ) , Color : _edage } ; if _ecca . _bebg == _gcbc { return nil , false ; } ; return _ecca . asRuling ( ) ;
} ; type textState struct { _dcfg float64 ; _eff float64 ; _cfc float64 ; _caa float64 ; _gaa float64 ; _dfab RenderMode ; _bacg float64 ; _gfgf * _ee . PdfFont ; _gede _ee . PdfRectangle ; _ded int ; _fgce int ; } ; func ( _efad * textLine ) appendWord ( _bgce * textWord ) { _efad . _bbfe = append ( _efad . _bbfe , _bgce ) ;
_efad . PdfRectangle = _dcd ( _efad . PdfRectangle , _bgce . PdfRectangle ) ; if _bgce . _aaaa > _efad . _edad { _efad . _edad = _bgce . _aaaa ; } ; if _bgce . _baed > _efad . _bbcac { _efad . _bbcac = _bgce . _baed ; } ; } ; func ( _ebfd * textTable ) reduce ( ) * textTable { _baca := make ( [ ] int , 0 , _ebfd . _dcbg ) ;
_dcca := make ( [ ] int , 0 , _ebfd . _bfgf ) ; for _dagge := 0 ; _dagge < _ebfd . _dcbg ; _dagge ++ { if ! _ebfd . emptyCompositeRow ( _dagge ) { _baca = append ( _baca , _dagge ) ; } ; } ; for _abcg := 0 ; _abcg < _ebfd . _bfgf ; _abcg ++ { if ! _ebfd . emptyCompositeColumn ( _abcg ) { _dcca = append ( _dcca , _abcg ) ;
} ; } ; if len ( _baca ) == _ebfd . _dcbg && len ( _dcca ) == _ebfd . _bfgf { return _ebfd ; } ; _fcge := textTable { _ebfb : _ebfd . _ebfb , _bfgf : len ( _dcca ) , _dcbg : len ( _baca ) , _dbfec : make ( map [ uint64 ] * textPara , len ( _dcca ) * len ( _baca ) ) } ; if _fabc { _fdd . Log . Info ( "\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064" , _ebfd . _bfgf , _ebfd . _dcbg , len ( _dcca ) , len ( _baca ) ) ;
_fdd . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076" , _dcca ) ; _fdd . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076" , _baca ) ; } ; for _bgab , _geeed := range _baca { for _geec , _bgcb := range _dcca { _dega , _eadca := _ebfd . getComposite ( _bgcb , _geeed ) ;
if _dega == nil { continue ; } ; if _fabc { _fc . Printf ( "\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n" , _geec , _bgab , _bgcb , _geeed , _fgaa ( _dega . merge ( ) . text ( ) , 50 ) ) ; } ; _fcge . putComposite ( _geec , _bgab , _dega , _eadca ) ;
} ; } ; return & _fcge ; } ; func ( _aecgg compositeCell ) split ( _cfeb , _befa [ ] float64 ) * textTable { _ffba := len ( _cfeb ) + 1 ; _cdde := len ( _befa ) + 1 ; if _fabc { _fdd . Log . Info ( "\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a" + "\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066" , _cdde , _ffba , _aecgg , _cfeb , _befa ) ;
_fc . Printf ( "\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a" , len ( _aecgg . paraList ) ) ; for _adge , _gaac := range _aecgg . paraList { _fc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _adge , _gaac . String ( ) ) ; } ;
_fc . Printf ( "\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a" , len ( _aecgg . lines ( ) ) ) ; for _bbgg , _dffe := range _aecgg . lines ( ) { _fc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bbgg , _dffe ) ; } ; } ; _cfeb = _aaef ( _cfeb , _aecgg . Ury , _aecgg . Lly ) ;
_befa = _aaef ( _befa , _aecgg . Llx , _aecgg . Urx ) ; _bdeb := make ( map [ uint64 ] * textPara , _cdde * _ffba ) ; _ebec := textTable { _bfgf : _cdde , _dcbg : _ffba , _dbfec : _bdeb } ; _dfda := _aecgg . paraList ; _e . Slice ( _dfda , func ( _abee , _abde int ) bool { _abeec , _dbdd := _dfda [ _abee ] , _dfda [ _abde ] ;
_bfdbd , _bdbb := _abeec . Lly , _dbdd . Lly ; if _bfdbd != _bdbb { return _bfdbd < _bdbb ; } ; return _abeec . Llx < _dbdd . Llx ; } ) ; _bggc := make ( map [ uint64 ] _ee . PdfRectangle , _cdde * _ffba ) ; for _adcd , _eceec := range _cfeb [ 1 : ] { _caca := _cfeb [ _adcd ] ; for _bbbf , _ebea := range _befa [ 1 : ] { _adae := _befa [ _bbbf ] ;
_bggc [ _gdeed ( _bbbf , _adcd ) ] = _ee . PdfRectangle { Llx : _adae , Urx : _ebea , Lly : _eceec , Ury : _caca } ; } ; } ; if _fabc { _fdd . Log . Info ( "\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073" ) ;
_fc . Printf ( "\u0020\u0020\u0020\u0020" ) ; for _eagf := 0 ; _eagf < _cdde ; _eagf ++ { _fc . Printf ( "\u0025\u0033\u0030\u0064\u002c\u0020" , _eagf ) ; } ; _fc . Println ( ) ; for _fce := 0 ; _fce < _ffba ; _fce ++ { _fc . Printf ( "\u0020\u0020\u0025\u0032\u0064\u003a" , _fce ) ; for _bdee := 0 ;
_bdee < _cdde ; _bdee ++ { _fc . Printf ( "\u00256\u002e\u0032\u0066\u002c\u0020" , _bggc [ _gdeed ( _bdee , _fce ) ] ) ; } ; _fc . Println ( ) ; } ; } ; _agedf := func ( _bcbe * textLine ) ( int , int ) { for _aegfa := 0 ; _aegfa < _ffba ; _aegfa ++ { for _agcf := 0 ; _agcf < _cdde ; _agcf ++ { if _ebgg ( _bggc [ _gdeed ( _agcf , _aegfa ) ] , _bcbe . PdfRectangle ) { return _agcf , _aegfa ;
} ; } ; } ; return - 1 , - 1 ; } ; _dabf := make ( map [ uint64 ] [ ] * textLine , _cdde * _ffba ) ; for _ , _fegbb := range _dfda . lines ( ) { _fbfae , _bcdd := _agedf ( _fegbb ) ; if _fbfae < 0 { continue ; } ; _dabf [ _gdeed ( _fbfae , _bcdd ) ] = append ( _dabf [ _gdeed ( _fbfae , _bcdd ) ] , _fegbb ) ;
} ; for _baffe := 0 ; _baffe < len ( _cfeb ) - 1 ; _baffe ++ { _dbge := _cfeb [ _baffe ] ; _dbac := _cfeb [ _baffe + 1 ] ; for _fefg := 0 ; _fefg < len ( _befa ) - 1 ; _fefg ++ { _fbda := _befa [ _fefg ] ; _dcge := _befa [ _fefg + 1 ] ; _efgdfb := _ee . PdfRectangle { Llx : _fbda , Urx : _dcge , Lly : _dbac , Ury : _dbge } ;
_baccd := _dabf [ _gdeed ( _fefg , _baffe ) ] ; if len ( _baccd ) == 0 { continue ; } ; _fbgb := _dfde ( _efgdfb , _baccd ) ; _ebec . put ( _fefg , _baffe , _fbgb ) ; } ; } ; return & _ebec ; } ; func ( _edfe lineRuling ) asRuling ( ) ( * ruling , bool ) { _deeg := ruling { _deeb : _edfe . _bebg , Color : _edfe . Color , _fbgc : _aeaec } ;
switch _edfe . _bebg { case _fbeg : _deeg . _dadb = _edfe . xMean ( ) ; _deeg . _gcfe = _fd . Min ( _edfe . _bdgf . Y , _edfe . _agba . Y ) ; _deeg . _fbdf = _fd . Max ( _edfe . _bdgf . Y , _edfe . _agba . Y ) ; case _dcce : _deeg . _dadb = _edfe . yMean ( ) ; _deeg . _gcfe = _fd . Min ( _edfe . _bdgf . X , _edfe . _agba . X ) ;
_deeg . _fbdf = _fd . Max ( _edfe . _bdgf . X , _edfe . _agba . X ) ; default : _fdd . Log . Error ( "\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064" , _edfe . _bebg ) ; return nil , false ; } ; return & _deeg , true ; } ; func ( _cdbg paraList ) addNeighbours ( ) { _cafac := func ( _cedgg [ ] int , _caaa * textPara ) ( [ ] * textPara , [ ] * textPara ) { _efdab := make ( [ ] * textPara , 0 , len ( _cedgg ) - 1 ) ;
_fcbc := make ( [ ] * textPara , 0 , len ( _cedgg ) - 1 ) ; for _ , _abecd := range _cedgg { _fggb := _cdbg [ _abecd ] ; if _fggb . Urx <= _caaa . Llx { _efdab = append ( _efdab , _fggb ) ; } else if _fggb . Llx >= _caaa . Urx { _fcbc = append ( _fcbc , _fggb ) ; } ; } ; return _efdab , _fcbc ; } ;
_acgg := func ( _acafc [ ] int , _cefde * textPara ) ( [ ] * textPara , [ ] * textPara ) { _afdad := make ( [ ] * textPara , 0 , len ( _acafc ) - 1 ) ; _afcbe := make ( [ ] * textPara , 0 , len ( _acafc ) - 1 ) ; for _ , _ebbaf := range _acafc { _dadf := _cdbg [ _ebbaf ] ; if _dadf . Ury <= _cefde . Lly { _afcbe = append ( _afcbe , _dadf ) ;
} else if _dadf . Lly >= _cefde . Ury { _afdad = append ( _afdad , _dadf ) ; } ; } ; return _afdad , _afcbe ; } ; _adce := _cdbg . yNeighbours ( _dcfc ) ; for _ , _ebaf := range _cdbg { _eaeda := _adce [ _ebaf ] ; if len ( _eaeda ) == 0 { continue ; } ; _acadfc , _ccebc := _cafac ( _eaeda , _ebaf ) ;
if len ( _acadfc ) == 0 && len ( _ccebc ) == 0 { continue ; } ; if len ( _acadfc ) > 0 { _acea := _acadfc [ 0 ] ; for _ , _aedg := range _acadfc [ 1 : ] { if _aedg . Urx >= _acea . Urx { _acea = _aedg ; } ; } ; for _ , _dbgd := range _acadfc { if _dbgd != _acea && _dbgd . Urx > _acea . Llx { _acea = nil ;
break ; } ; } ; if _acea != nil && _gdef ( _ebaf . PdfRectangle , _acea . PdfRectangle ) { _ebaf . _ggfbf = _acea ; } ; } ; if len ( _ccebc ) > 0 { _gdffge := _ccebc [ 0 ] ; for _ , _faedb := range _ccebc [ 1 : ] { if _faedb . Llx <= _gdffge . Llx { _gdffge = _faedb ; } ; } ; for _ , _bcab := range _ccebc { if _bcab != _gdffge && _bcab . Llx < _gdffge . Urx { _gdffge = nil ;
break ; } ; } ; if _gdffge != nil && _gdef ( _ebaf . PdfRectangle , _gdffge . PdfRectangle ) { _ebaf . _ffbf = _gdffge ; } ; } ; } ; _adce = _cdbg . xNeighbours ( _aeeb ) ; for _ , _fgeg := range _cdbg { _ebab := _adce [ _fgeg ] ; if len ( _ebab ) == 0 { continue ; } ; _bcac , _gaeff := _acgg ( _ebab , _fgeg ) ;
if len ( _bcac ) == 0 && len ( _gaeff ) == 0 { continue ; } ; if len ( _gaeff ) > 0 { _dggf := _gaeff [ 0 ] ; for _ , _fgca := range _gaeff [ 1 : ] { if _fgca . Ury >= _dggf . Ury { _dggf = _fgca ; } ; } ; for _ , _cfee := range _gaeff { if _cfee != _dggf && _cfee . Ury > _dggf . Lly { _dggf = nil ;
break ; } ; } ; if _dggf != nil && _dffc ( _fgeg . PdfRectangle , _dggf . PdfRectangle ) { _fgeg . _feb = _dggf ; } ; } ; if len ( _bcac ) > 0 { _faab := _bcac [ 0 ] ; for _ , _cbcc := range _bcac [ 1 : ] { if _cbcc . Lly <= _faab . Lly { _faab = _cbcc ; } ; } ; for _ , _cdedd := range _bcac { if _cdedd != _faab && _cdedd . Lly < _faab . Ury { _faab = nil ;
break ; } ; } ; if _faab != nil && _dffc ( _fgeg . PdfRectangle , _faab . PdfRectangle ) { _fgeg . _dccd = _faab ; } ; } ; } ; for _ , _gdaee := range _cdbg { if _gdaee . _ggfbf != nil && _gdaee . _ggfbf . _ffbf != _gdaee { _gdaee . _ggfbf = nil ; } ; if _gdaee . _dccd != nil && _gdaee . _dccd . _feb != _gdaee { _gdaee . _dccd = nil ;
} ; if _gdaee . _ffbf != nil && _gdaee . _ffbf . _ggfbf != _gdaee { _gdaee . _ffbf = nil ; } ; if _gdaee . _feb != nil && _gdaee . _feb . _dccd != _gdaee { _gdaee . _feb = nil ; } ; } ; } ; func ( _cgdb paraList ) findTables ( _egffe [ ] gridTiling ) [ ] * textTable { _cgdb . addNeighbours ( ) ;
_e . Slice ( _cgdb , func ( _cacc , _daddd int ) bool { return _afab ( _cgdb [ _cacc ] , _cgdb [ _daddd ] ) < 0 } ) ; var _cbdbg [ ] * textTable ; if _gdgbe { _cfgb := _cgdb . findGridTables ( _egffe ) ; _cbdbg = append ( _cbdbg , _cfgb ... ) ; } ; if _abf { _dagb := _cgdb . findTextTables ( ) ;
_cbdbg = append ( _cbdbg , _dagb ... ) ; } ; return _cbdbg ; } ; func ( _agae * textTable ) depth ( ) float64 { _ebbf := 1e10 ; for _gbee := 0 ; _gbee < _agae . _bfgf ; _gbee ++ { _ggabg := _agae . get ( _gbee , 0 ) ; if _ggabg == nil || _ggabg . _faaed { continue ; } ; _ebbf = _fd . Min ( _ebbf , _ggabg . depth ( ) ) ;
} ; return _ebbf ; } ; func ( _geag rulingList ) primMinMax ( ) ( float64 , float64 ) { _gdccg , _bagbe := _geag [ 0 ] . _dadb , _geag [ 0 ] . _dadb ; for _ , _ebgd := range _geag [ 1 : ] { if _ebgd . _dadb < _gdccg { _gdccg = _ebgd . _dadb ; } else if _ebgd . _dadb > _bagbe { _bagbe = _ebgd . _dadb ;
} ; } ; return _gdccg , _bagbe ; } ; func ( _ebcf * textObject ) showTextAdjusted ( _bcae * _dd . PdfObjectArray ) error { _egeg := false ; for _ , _egfd := range _bcae . Elements ( ) { switch _egfd . ( type ) { case * _dd . PdfObjectFloat , * _dd . PdfObjectInteger : _ece , _bff := _dd . GetNumberAsFloat ( _egfd ) ;
if _bff != nil { _fdd . Log . Debug ( "\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _egfd , _bcae ) ;
return _bff ; } ; _bgag , _daee := - _ece * 0.001 * _ebcf . _bbca . _gaa , 0.0 ; if _egeg { _daee , _bgag = _bgag , _daee ; } ; _aafd := _fdgd ( _ebf . Point { X : _bgag , Y : _daee } ) ; _ebcf . _ccgf . Concat ( _aafd ) ; case * _dd . PdfObjectString : _cadg := _dd . TraceToDirectObject ( _egfd ) ;
_cba , _eacgd := _dd . GetStringBytes ( _cadg ) ; if ! _eacgd { _fdd . Log . Trace ( "s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _egfd , _bcae ) ;
return _dd . ErrTypeError ; } ; _ebcf . renderText ( _cadg , _cba ) ; default : _fdd . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _egfd , _bcae ) ;
return _dd . ErrTypeError ; } ; } ; return nil ; } ; type textObject struct { _ecfa * Extractor ; _faa * _ee . PdfPageResources ; _cddc _aeg . GraphicsState ; _bbca * textState ; _bfdc * stateStack ; _ccgf _ebf . Matrix ; _afbg _ebf . Matrix ; _abbb [ ] * textMark ; _bad bool ; } ; func _bbegf ( _bedd [ ] * textMark , _bdcfc _ee . PdfRectangle ) * textWord { _acbaf := _bedd [ 0 ] . PdfRectangle ;
_cdebb := _bedd [ 0 ] . _cfccd ; for _ , _bebfe := range _bedd [ 1 : ] { _acbaf = _dcd ( _acbaf , _bebfe . PdfRectangle ) ; if _bebfe . _cfccd > _cdebb { _cdebb = _bebfe . _cfccd ; } ; } ; return & textWord { PdfRectangle : _acbaf , _eedb : _bedd , _baed : _bdcfc . Ury - _acbaf . Lly , _aaaa : _cdebb } ;
} ; func _bagd ( _gbgd , _dgfc _ee . PdfRectangle ) ( _ee . PdfRectangle , bool ) { if ! _ddc ( _gbgd , _dgfc ) { return _ee . PdfRectangle { } , false ; } ; return _ee . PdfRectangle { Llx : _fd . Max ( _gbgd . Llx , _dgfc . Llx ) , Urx : _fd . Min ( _gbgd . Urx , _dgfc . Urx ) , Lly : _fd . Max ( _gbgd . Lly , _dgfc . Lly ) , Ury : _fd . Min ( _gbgd . Ury , _dgfc . Ury ) } , true ;
} ; func ( _decc * shapesState ) newSubPath ( ) { _decc . clearPath ( ) ; if _agcb { _fdd . Log . Info ( "\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073" , _decc ) ; } ; } ;
// New returns an Extractor instance for extracting content from the input PDF page.
func New ( page * _ee . PdfPage ) ( * Extractor , error ) { const _ad = "\u0065\u0078\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077" ; _dc , _de := page . GetAllContentStreams ( ) ; if _de != nil { return nil , _de ; } ; _dfb , _de := page . GetMediaBox ( ) ; if _de != nil { return nil , _fc . Errorf ( "\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076" , _de ) ;
} ; _bf := & Extractor { _bc : _dc , _abc : page . Resources , _da : * _dfb , _ac : map [ string ] fontEntry { } , _ef : map [ string ] textResult { } } ; if _bf . _da . Llx > _bf . _da . Urx { _fdd . Log . Info ( "\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e" , _bf . _da ) ;
_bf . _da . Llx , _bf . _da . Urx = _bf . _da . Urx , _bf . _da . Llx ; } ; if _bf . _da . Lly > _bf . _da . Ury { _fdd . Log . Info ( "\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e" , _bf . _da ) ;
_bf . _da . Lly , _bf . _da . Ury = _bf . _da . Ury , _bf . _da . Lly ; } ; _gg . TrackUse ( _ad ) ; return _bf , nil ; } ; func ( _fegd paraList ) findGridTables ( _decb [ ] gridTiling ) [ ] * textTable { if _fabc { _fdd . Log . Info ( "\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073" , len ( _fegd ) ) ;
for _ccdg , _cagc := range _fegd { _fc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _ccdg , _cagc ) ; } ; } ; var _eeaad [ ] * textTable ; for _abcf , _dege := range _decb { _dbdc , _bfaf := _fegd . findTableGrid ( _dege ) ; if _dbdc != nil { _dbdc . log ( _fc . Sprintf ( "\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064" , _abcf ) ) ;
_eeaad = append ( _eeaad , _dbdc ) ; _dbdc . markCells ( ) ; } ; for _ddegf := range _bfaf { _ddegf . _bccb = true ; } ; } ; if _fabc { _fdd . Log . Info ( "\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s" , len ( _eeaad ) ) ;
} ; return _eeaad ; } ; func ( _fccg * textTable ) emptyCompositeRow ( _gefd int ) bool { for _egdg := 0 ; _egdg < _fccg . _bfgf ; _egdg ++ { if _fbaee , _cgfc := _fccg . _facee [ _gdeed ( _egdg , _gefd ) ] ; _cgfc { if len ( _fbaee . paraList ) > 0 { return false ; } ; } ; } ; return true ;
} ; func ( _fbe * wordBag ) makeRemovals ( ) map [ int ] map [ * textWord ] struct { } { _bgfc := make ( map [ int ] map [ * textWord ] struct { } , len ( _fbe . _eed ) ) ; for _cbfa := range _fbe . _eed { _bgfc [ _cbfa ] = make ( map [ * textWord ] struct { } ) ; } ; return _bgfc ; } ; func _adbb ( _ddbb map [ int ] [ ] float64 ) { if len ( _ddbb ) <= 1 { return ;
} ; _fgggb := _bfdcb ( _ddbb ) ; if _fabc { _fdd . Log . Info ( "\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076" , _fgggb ) ; } ; var _gfeg , _ggdg int ; for _gfeg , _ggdg = range _fgggb { if _ddbb [ _ggdg ] != nil { break ; } ; } ; for _caccf , _efgcb := range _fgggb [ _gfeg : ] { _fcfe := _ddbb [ _efgcb ] ;
if _fcfe == nil { continue ; } ; if _fabc { _fc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a" , _gfeg + _caccf , _ggdg , _efgcb ) ; } ; _abfg := _ddbb [ _efgcb ] ; if _abfg [ len ( _abfg ) - 1 ] > _fcfe [ 0 ] { _abfg [ len ( _abfg ) - 1 ] = _fcfe [ 0 ] ;
_ddbb [ _ggdg ] = _abfg ; } ; _ggdg = _efgcb ; } ; } ; func _cfcgf ( _ggfae map [ int ] [ ] float64 ) string { _fgbag := _bfdcb ( _ggfae ) ; _dgbb := make ( [ ] string , len ( _ggfae ) ) ; for _cbdf , _aegd := range _fgbag { _dgbb [ _cbdf ] = _fc . Sprintf ( "\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066" , _aegd , _ggfae [ _aegd ] ) ;
} ; return _fc . Sprintf ( "\u007b\u0025\u0073\u007d" , _a . Join ( _dgbb , "\u002c\u0020" ) ) ; } ; func ( _gfd * textObject ) setFont ( _dfa string , _gce float64 ) error { if _gfd == nil { return nil ; } ; _gfd . _bbca . _gaa = _gce ; _cbgd , _ebfc := _gfd . getFont ( _dfa ) ; if _ebfc != nil { return _ebfc ;
} ; _gfd . _bbca . _gfgf = _cbgd ; return nil ; } ; func ( _debg * textObject ) getFontDirect ( _ecec string ) ( * _ee . PdfFont , error ) { _cga , _afg := _debg . getFontDict ( _ecec ) ; if _afg != nil { return nil , _afg ; } ; _fdea , _afg := _ee . NewPdfFontFromPdfObject ( _cga ) ; if _afg != nil { _fdd . Log . Debug ( "\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ecec , _afg ) ;
} ; return _fdea , _afg ; } ; func ( _edfga * textPara ) toCellTextMarks ( _ceag * int ) [ ] TextMark { var _ecgc [ ] TextMark ; for _abeff , _egbf := range _edfga . _ddgc { _fgdc := _egbf . toTextMarks ( _ceag ) ; _edgce := _effg && _egbf . endsInHyphen ( ) && _abeff != len ( _edfga . _ddgc ) - 1 ;
if _edgce { _fgdc = _gfce ( _fgdc , _ceag ) ; } ; _ecgc = append ( _ecgc , _fgdc ... ) ; if ! ( _edgce || _abeff == len ( _edfga . _ddgc ) - 1 ) { _ecgc = _eaea ( _ecgc , _ceag , _dddb ( _egbf . _bbcac , _edfga . _ddgc [ _abeff + 1 ] . _bbcac ) ) ; } ; } ; return _ecgc ; } ; func ( _accd * stateStack ) size ( ) int { return len ( * _accd ) } ;
func _agd ( _cbae * wordBag , _eaad int ) * textLine { _bfdbf := _cbae . firstWord ( _eaad ) ; _edcb := textLine { PdfRectangle : _bfdbf . PdfRectangle , _edad : _bfdbf . _aaaa , _bbcac : _bfdbf . _baed } ; _edcb . pullWord ( _cbae , _bfdbf , _eaad ) ; return & _edcb ; } ;
2022-06-27 19:58:38 +00:00
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
2022-07-13 21:28:43 +00:00
type TextTable struct { W , H int ; Cells [ ] [ ] TableCell ; } ; func _dbefe ( _gfbc float64 ) bool { return _fd . Abs ( _gfbc ) < _gade } ; func ( _fffa * textWord ) absorb ( _eeeef * textWord ) { _fffa . PdfRectangle = _dcd ( _fffa . PdfRectangle , _eeeef . PdfRectangle ) ; _fffa . _eedb = append ( _fffa . _eedb , _eeeef . _eedb ... ) ;
} ; func ( _afbee paraList ) llyOrdering ( ) [ ] int { _gdfgg := make ( [ ] int , len ( _afbee ) ) ; for _efdg := range _afbee { _gdfgg [ _efdg ] = _efdg ; } ; _e . SliceStable ( _gdfgg , func ( _fcgfc , _acca int ) bool { _egbe , _fcfdf := _gdfgg [ _fcgfc ] , _gdfgg [ _acca ] ; return _afbee [ _egbe ] . Lly < _afbee [ _fcfdf ] . Lly ;
} ) ; return _gdfgg ; } ; func ( _dbeac * subpath ) last ( ) _ebf . Point { return _dbeac . _bdb [ len ( _dbeac . _bdb ) - 1 ] } ; func ( _ddgcd * textPara ) writeCellText ( _ggad _f . Writer ) { for _bcb , _cdcc := range _ddgcd . _ddgc { _afgg := _cdcc . text ( ) ; _eaec := _effg && _cdcc . endsInHyphen ( ) && _bcb != len ( _ddgcd . _ddgc ) - 1 ;
if _eaec { _afgg = _ebgc ( _afgg ) ; } ; _ggad . Write ( [ ] byte ( _afgg ) ) ; if ! ( _eaec || _bcb == len ( _ddgcd . _ddgc ) - 1 ) { _ggad . Write ( [ ] byte ( _dddb ( _cdcc . _bbcac , _ddgcd . _ddgc [ _bcb + 1 ] . _bbcac ) ) ) ; } ; } ; } ;
2022-06-27 19:58:38 +00:00
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20> ).
2022-07-13 21:28:43 +00:00
func ( _abbe * Extractor ) ExtractText ( ) ( string , error ) { _bfeg , _ , _ , _fcg := _abbe . ExtractTextWithStats ( ) ; return _bfeg , _fcg ; } ; func ( _bac * imageExtractContext ) extractFormImages ( _feef * _dd . PdfObjectName , _gef _aeg . GraphicsState , _dge * _ee . PdfPageResources ) error { _bee , _gdg := _dge . GetXObjectFormByName ( * _feef ) ;
if _gdg != nil { return _gdg ; } ; if _bee == nil { return nil ; } ; _ecf , _gdg := _bee . GetContentStream ( ) ; if _gdg != nil { return _gdg ; } ; _gfa := _bee . Resources ; if _gfa == nil { _gfa = _dge ; } ; _gdg = _bac . extractContentStreamImages ( string ( _ecf ) , _gfa ) ; if _gdg != nil { return _gdg ;
} ; _bac . _fdbg ++ ; return nil ; } ; func ( _fdfbg * textWord ) bbox ( ) _ee . PdfRectangle { return _fdfbg . PdfRectangle } ; func ( _dbeg paraList ) findTableGrid ( _baea gridTiling ) ( * textTable , map [ * textPara ] struct { } ) { _aeaed := len ( _baea . _facc ) ; _bbbc := len ( _baea . _aeccd ) ;
_cdcaf := textTable { _ebfb : true , _bfgf : _aeaed , _dcbg : _bbbc , _dbfec : make ( map [ uint64 ] * textPara , _aeaed * _bbbc ) , _facee : make ( map [ uint64 ] compositeCell , _aeaed * _bbbc ) } ; _agec := make ( map [ * textPara ] struct { } ) ; _deee := int ( ( 1.0 - _dace ) * float64 ( _aeaed * _bbbc ) ) ;
_cbgdc := 0 ; if _gedg { _fdd . Log . Info ( "\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064" , _aeaed , _bbbc ) ; } ; for _fecfg , _bgaa := range _baea . _aeccd { _ddacc , _acdde := _baea . _fgbg [ _bgaa ] ;
if ! _acdde { continue ; } ; for _dgaca , _fbggd := range _baea . _facc { _cdabbe , _edggb := _ddacc [ _fbggd ] ; if ! _edggb { continue ; } ; _cffd := _dbeg . inTile ( _cdabbe ) ; if len ( _cffd ) == 0 { _cbgdc ++ ; if _cbgdc > _deee { if _gedg { _fdd . Log . Info ( "\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064" , _cbgdc ) ;
} ; return nil , nil ; } ; } else { _cdcaf . putComposite ( _dgaca , _fecfg , _cffd , _cdabbe . PdfRectangle ) ; for _ , _cdcg := range _cffd { _agec [ _cdcg ] = struct { } { } ; } ; } ; } ; } ; _eedfb := 0 ; for _gaga := 0 ; _gaga < _aeaed ; _gaga ++ { _bbcae := _cdcaf . get ( _gaga , 0 ) ; if _bbcae == nil || ! _bbcae . _faaed { _eedfb ++ ;
} ; } ; if _eedfb == 0 { if _gedg { _fdd . Log . Info ( "\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030" ) ; } ; return nil , nil ; } ; _dbfeb := _cdcaf . reduceTiling ( _baea , _fcgf ) ; _dbfeb = _dbfeb . subdivide ( ) ; return _dbfeb , _agec ; } ; func ( _effca * textTable ) growTable ( ) { _edgbea := func ( _fggea paraList ) { _effca . _dcbg ++ ;
for _cccg := 0 ; _cccg < _effca . _bfgf ; _cccg ++ { _fbege := _fggea [ _cccg ] ; _effca . put ( _cccg , _effca . _dcbg - 1 , _fbege ) ; } ; } ; _afde := func ( _egdcd paraList ) { _effca . _bfgf ++ ; for _baef := 0 ; _baef < _effca . _dcbg ; _baef ++ { _fefae := _egdcd [ _baef ] ; _effca . put ( _effca . _bfgf - 1 , _baef , _fefae ) ;
} ; } ; if _dga { _effca . log ( "\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce" ) ; } ; for _bgec := 0 ; ; _bgec ++ { _abefc := false ; _cefaa := _effca . getDown ( ) ; _egcea := _effca . getRight ( ) ; if _dga { _fc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bgec , _effca ) ;
_fc . Printf ( "\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a" , _cefaa ) ; _fc . Printf ( "\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a" , _egcea ) ; } ; if _cefaa != nil && _egcea != nil { _fdff := _cefaa [ len ( _cefaa ) - 1 ] ;
if ! _fdff . taken ( ) && _fdff == _egcea [ len ( _egcea ) - 1 ] { _edgbea ( _cefaa ) ; if _egcea = _effca . getRight ( ) ; _egcea != nil { _afde ( _egcea ) ; _effca . put ( _effca . _bfgf - 1 , _effca . _dcbg - 1 , _fdff ) ; } ; _abefc = true ; } ; } ; if ! _abefc && _cefaa != nil { _edgbea ( _cefaa ) ;
_abefc = true ; } ; if ! _abefc && _egcea != nil { _afde ( _egcea ) ; _abefc = true ; } ; if ! _abefc { break ; } ; } ; } ; func ( _bbdb * textTable ) getComposite ( _egaa , _dcbf int ) ( paraList , _ee . PdfRectangle ) { _dccee , _baeg := _bbdb . _facee [ _gdeed ( _egaa , _dcbf ) ] ; if _fabc { _fc . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a" , _egaa , _dcbf , _dccee . String ( ) ) ;
} ; if ! _baeg { return nil , _ee . PdfRectangle { } ; } ; return _dccee . parasBBox ( ) ; } ; func _eaag ( _gbcb * textWord , _cbbe float64 , _egaf , _acge rulingList ) * wordBag { _ecfg := _cfe ( _gbcb . _baed ) ; _gdcd := [ ] * textWord { _gbcb } ; _dab := wordBag { _eed : map [ int ] [ ] * textWord { _ecfg : _gdcd } , PdfRectangle : _gbcb . PdfRectangle , _bbgb : _gbcb . _aaaa , _ecef : _cbbe , _dffb : _egaf , _gdfa : _acge } ;
return & _dab ; } ; const ( RenderModeStroke RenderMode = 1 << iota ; RenderModeFill ; RenderModeClip ; ) ; const _feca = 1.0 / 1000.0 ; func ( _cgcaa rectRuling ) asRuling ( ) ( * ruling , bool ) { _ddbc := ruling { _deeb : _cgcaa . _ffabf , Color : _cgcaa . Color , _fbgc : _cabg } ; switch _cgcaa . _ffabf { case _fbeg : _ddbc . _dadb = 0.5 * ( _cgcaa . Llx + _cgcaa . Urx ) ;
_ddbc . _gcfe = _cgcaa . Lly ; _ddbc . _fbdf = _cgcaa . Ury ; _efgb , _bgbc := _cgcaa . checkWidth ( _cgcaa . Llx , _cgcaa . Urx ) ; if ! _bgbc { if _cbfcg { _fdd . Log . Error ( "\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076" , _cgcaa ) ;
} ; return nil , false ; } ; _ddbc . _cecfb = _efgb ; case _dcce : _ddbc . _dadb = 0.5 * ( _cgcaa . Lly + _cgcaa . Ury ) ; _ddbc . _gcfe = _cgcaa . Llx ; _ddbc . _fbdf = _cgcaa . Urx ; _cgcg , _gdcg := _cgcaa . checkWidth ( _cgcaa . Lly , _cgcaa . Ury ) ; if ! _gdcg { if _cbfcg { _fdd . Log . Error ( "\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076" , _cgcaa ) ;
} ; return nil , false ; } ; _ddbc . _cecfb = _cgcg ; default : _fdd . Log . Error ( "\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064" , _cgcaa . _ffabf ) ; return nil , false ; } ; return & _ddbc , true ; } ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// String returns a string describing `pt`.
func ( _gdfc PageText ) String ( ) string { _fecc := _fc . Sprintf ( "P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073" , len ( _gdfc . _gcea ) ) ; _ccfg := [ ] string { "\u002d" + _fecc } ; for _ , _ada := range _gdfc . _gcea { _ccfg = append ( _ccfg , _ada . String ( ) ) ;
} ; _ccfg = append ( _ccfg , "\u002b" + _fecc ) ; return _a . Join ( _ccfg , "\u000a" ) ; } ; func ( _aae paraList ) lines ( ) [ ] * textLine { var _gfag [ ] * textLine ; for _ , _gebb := range _aae { _gfag = append ( _gfag , _gebb . _ddgc ... ) ; } ; return _gfag ; } ; type bounded interface { bbox ( ) _ee . PdfRectangle } ;
func ( _gggg * subpath ) close ( ) { if ! _fabdg ( _gggg . _bdb [ 0 ] , _gggg . last ( ) ) { _gggg . add ( _gggg . _bdb [ 0 ] ) ; } ; _gggg . _caee = true ; _gggg . removeDuplicates ( ) ; } ; func _ggec ( _ggae [ ] compositeCell ) [ ] float64 { var _dbebf [ ] * textLine ; _cgbd := 0 ; for _ , _gcffg := range _ggae { _cgbd += len ( _gcffg . paraList ) ;
_dbebf = append ( _dbebf , _gcffg . lines ( ) ... ) ; } ; _e . Slice ( _dbebf , func ( _bdcf , _cgfa int ) bool { _gdcff , _eede := _dbebf [ _bdcf ] , _dbebf [ _cgfa ] ; _fcbab , _ebee := _gdcff . _bbcac , _eede . _bbcac ; if ! _dafec ( _fcbab - _ebee ) { return _fcbab < _ebee ; } ; return _gdcff . Llx < _eede . Llx ;
} ) ; if _fabc { _fc . Printf ( "\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a" , _cgbd , len ( _dbebf ) ) ; for _bfgg , _gfddg := range _dbebf { _fc . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _bfgg , _gfddg ) ;
} ; } ; var _aaba [ ] float64 ; _debdd := _dbebf [ 0 ] ; var _bdfced [ ] [ ] * textLine ; _eebg := [ ] * textLine { _debdd } ; for _bgbd , _dacge := range _dbebf [ 1 : ] { if _dacge . Ury < _debdd . Lly { _gbac := 0.5 * ( _dacge . Ury + _debdd . Lly ) ; if _fabc { _fc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a" + "\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a" , _bgbd , _dacge . Ury , _debdd . Lly , _gbac , _debdd , _dacge ) ;
} ; _aaba = append ( _aaba , _gbac ) ; _bdfced = append ( _bdfced , _eebg ) ; _eebg = nil ; } ; _eebg = append ( _eebg , _dacge ) ; if _dacge . Lly < _debdd . Lly { _debdd = _dacge ; } ; } ; if len ( _eebg ) > 0 { _bdfced = append ( _bdfced , _eebg ) ; } ; if _fabc { _fc . Printf ( " \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a" , _aaba ) ;
} ; if _fabc { _fdd . Log . Info ( "\u0072\u006f\u0077\u003d\u0025\u0064" , len ( _ggae ) ) ; for _bdfca , _ffdc := range _ggae { _fc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bdfca , _ffdc ) ; } ; _fdd . Log . Info ( "\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d" , len ( _bdfced ) ) ;
for _eegab , _ceacd := range _bdfced { _fc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a" , _eegab , len ( _ceacd ) ) ; for _cdfcg , _aecff := range _ceacd { _fc . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _cdfcg , _aecff ) ; } ; } ; } ; _bfefb := true ;
for _abgb , _ebbag := range _bdfced { _fdfac := true ; for _ggfdd , _fcce := range _ggae { if _fabc { _fc . Printf ( "\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a" , _abgb , len ( _bdfced ) , _ggfdd , len ( _ggae ) , _fcce ) ;
} ; if ! _fcce . hasLines ( _ebbag ) { if _fabc { _fc . Printf ( "\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a" , _abgb , len ( _bdfced ) , _ggfdd , len ( _ggae ) ) ;
} ; _fdfac = false ; break ; } ; } ; if ! _fdfac { _bfefb = false ; break ; } ; } ; if ! _bfefb { if _fabc { _fdd . Log . Info ( "\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg" ) ;
} ; _aaba = nil ; } ; if _fabc && _aaba != nil { _fc . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a" , _aaba ) ; } ; return _aaba ; } ;
const _fagb = 10 ; func ( _daed * textObject ) setTextLeading ( _cbd float64 ) { if _daed == nil { return ; } ; _daed . _bbca . _caa = _cbd ; } ; type compositeCell struct { _ee . PdfRectangle ; paraList ; } ;
// GetContentStreamOps returns the contentStreamOps field of `pt`.
func ( _ced * PageText ) GetContentStreamOps ( ) * _aeg . ContentStreamOperations { return _ced . _eda } ; func ( _bcgb * textPara ) depth ( ) float64 { if _bcgb . _faaed { return - 1.0 ; } ; if len ( _bcgb . _ddgc ) > 0 { return _bcgb . _ddgc [ 0 ] . _bbcac ; } ; return _bcgb . _cbfe . depth ( ) ;
} ; func ( _eaf * textObject ) nextLine ( ) { _eaf . moveLP ( 0 , - _eaf . _bbca . _caa ) } ; func ( _cfce * textObject ) getCurrentFont ( ) * _ee . PdfFont { _fbg := _cfce . _bbca . _gfgf ; if _fbg == nil { _fdd . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e" ) ;
return _ee . DefaultFont ( ) ; } ; return _fbg ; } ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct { _fef [ ] TextMark } ; func ( _dfbe * textObject ) setCharSpacing ( _gfg float64 ) { if _dfbe == nil { return ; } ; _dfbe . _bbca . _dcfg = _gfg ; if _ccfgb { _fdd . Log . Info ( "\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073" , _gfg , _dfbe . _bbca . String ( ) ) ;
} ; } ; func ( _gdga * textPara ) fontsize ( ) float64 { return _gdga . _ddgc [ 0 ] . _edad } ; func ( _fbdd lineRuling ) yMean ( ) float64 { return 0.5 * ( _fbdd . _bdgf . Y + _fbdd . _agba . Y ) } ; func ( _dbd * textLine ) endsInHyphen ( ) bool { _bbba := _dbd . _bbfe [ len ( _dbd . _bbfe ) - 1 ] ;
_deed := _bbba . _fbgbed ; _aged , _fbff := _df . DecodeLastRuneInString ( _deed ) ; if _fbff <= 0 || ! _b . Is ( _b . Hyphen , _aged ) { return false ; } ; if _bbba . _faaf && _fdafg ( _deed ) { return true ; } ; return _fdafg ( _dbd . text ( ) ) ; } ; func _gfce ( _fddag [ ] TextMark , _fdfb * int ) [ ] TextMark { _gfgd := _fddag [ len ( _fddag ) - 1 ] ;
_eafd := [ ] rune ( _gfgd . Text ) ; if len ( _eafd ) == 1 { _fddag = _fddag [ : len ( _fddag ) - 1 ] ; _cbad := _fddag [ len ( _fddag ) - 1 ] ; * _fdfb = _cbad . Offset + len ( _cbad . Text ) ; } else { _gggbg := _ebgc ( _gfgd . Text ) ; * _fdfb += len ( _gggbg ) - len ( _gfgd . Text ) ; _gfgd . Text = _gggbg ;
} ; return _fddag ; } ; func _aecb ( _gdfg , _eafc bounded ) float64 { return _ddfa ( _gdfg ) - _ddfa ( _eafc ) } ; func ( _agde * textTable ) emptyCompositeColumn ( _faaab int ) bool { for _adcc := 0 ; _adcc < _agde . _dcbg ; _adcc ++ { if _dgbef , _aefbc := _agde . _facee [ _gdeed ( _faaab , _adcc ) ] ;
_aefbc { if len ( _dgbef . paraList ) > 0 { return false ; } ; } ; } ; return true ; } ; type wordBag struct { _ee . PdfRectangle ; _bbgb float64 ; _dffb , _gdfa rulingList ; _ecef float64 ; _eed map [ int ] [ ] * textWord ; } ; func ( _cafgc * shapesState ) fill ( _defa * [ ] pathSection ) { _cgfd := pathSection { _fdee : _cafgc . _ebd , Color : _cafgc . _cgga . getFillColor ( ) } ;
* _defa = append ( * _defa , _cgfd ) ; if _cfcd { _bdgg := _cgfd . bbox ( ) ; _fc . Printf ( "\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a" , len ( * _defa ) , len ( _cgfd . _fdee ) , _cafgc , _cgfd . Color , _bdgg , _bdgg . Width ( ) , _bdgg . Height ( ) ) ;
if _ffee { for _acec , _aefc := range _cgfd . _fdee { _fc . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _acec , _aefc ) ; if _acec == 10 { break ; } ; } ; } ; } ; } ; type gridTiling struct { _ee . PdfRectangle ; _facc [ ] float64 ; _aeccd [ ] float64 ; _fgbg map [ float64 ] map [ float64 ] gridTile ;
} ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
//
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
type TextMark struct {
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Text is the extracted text.
Text string ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// BBox is the bounding box of the text.
BBox _ee . PdfRectangle ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Font is the font the text was drawn with.
Font * _ee . PdfFont ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// FontSize is the font size the text was drawn with.
FontSize float64 ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
FillColor _c . Color ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
StrokeColor _c . Color ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Orientation is the text orientation
Orientation int ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// DirectObject is the underlying PdfObject (Text Object) that represents the visible texts. This is introduced to get
// a simple access to the TextObject in case editing or replacment of some text is needed. E.g during redaction.
DirectObject _dd . PdfObject ;
// ObjString is a decoded string operand of a text-showing operator. It has the same value as `Text` attribute except
// when many glyphs are represented with the same Text Object that contains multiple length string operand in which case
// ObjString spans more than one character string that falls in different TextMark objects.
ObjString [ ] string ; Tw float64 ; Th float64 ; Tc float64 ; Index int ; } ; func ( _feeeb * subpath ) add ( _agc ... _ebf . Point ) { _feeeb . _bdb = append ( _feeeb . _bdb , _agc ... ) } ; func ( _fbaf rulingList ) comp ( _gddcg , _aaage int ) bool { _dbebg , _bfbdf := _fbaf [ _gddcg ] , _fbaf [ _aaage ] ;
_geaf , _cefeg := _dbebg . _deeb , _bfbdf . _deeb ; if _geaf != _cefeg { return _geaf > _cefeg ; } ; if _geaf == _gcbc { return false ; } ; _fddcf := func ( _cfaeb bool ) bool { if _geaf == _dcce { return _cfaeb ; } ; return ! _cfaeb ; } ; _ddce , _afcc := _dbebg . _dadb , _bfbdf . _dadb ;
if _ddce != _afcc { return _fddcf ( _ddce > _afcc ) ; } ; _ddce , _afcc = _dbebg . _gcfe , _bfbdf . _gcfe ; if _ddce != _afcc { return _fddcf ( _ddce < _afcc ) ; } ; return _fddcf ( _dbebg . _fbdf < _bfbdf . _fbdf ) ; } ;
2022-06-27 19:58:38 +00:00
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
2022-07-13 21:28:43 +00:00
type RenderMode int ; func _fgaf ( _fdad , _bgdee * textPara ) bool { return _dffc ( _fdad . _ddebf , _bgdee . _ddebf ) } ; func ( _affe * textObject ) setTextMatrix ( _dbf [ ] float64 ) { if len ( _dbf ) != 6 { _fdd . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029" , len ( _dbf ) ) ;
return ; } ; _bgde , _cgg , _eadb , _feagd , _ffb , _gdf := _dbf [ 0 ] , _dbf [ 1 ] , _dbf [ 2 ] , _dbf [ 3 ] , _dbf [ 4 ] , _dbf [ 5 ] ; _affe . _ccgf = _ebf . NewMatrix ( _bgde , _cgg , _eadb , _feagd , _ffb , _gdf ) ; _affe . _afbg = _affe . _ccgf ; } ; func ( _cecc * shapesState ) establishSubpath ( ) * subpath { _dbef , _fgb := _cecc . lastpointEstablished ( ) ;
if ! _fgb { _cecc . _ebd = append ( _cecc . _ebd , _gbef ( _dbef ) ) ; } ; if len ( _cecc . _ebd ) == 0 { return nil ; } ; _cecc . _bfdb = false ; return _cecc . _ebd [ len ( _cecc . _ebd ) - 1 ] ; } ; func ( _dcgd * textWord ) toTextMarks ( _efaa * int ) [ ] TextMark { var _ecaa [ ] TextMark ; for _ , _baccc := range _dcgd . _eedb { _ecaa = _aeaf ( _ecaa , _efaa , _baccc . ToTextMark ( ) ) ;
} ; return _ecaa ; } ; func ( _dgcc paraList ) computeEBBoxes ( ) { if _cgea { _fdd . Log . Info ( "\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a" ) ; } ; for _ , _egce := range _dgcc { _egce . _ddebf = _egce . PdfRectangle ; } ; _cfbe := _dgcc . yNeighbours ( 0 ) ;
for _abcc , _abac := range _dgcc { _fagdf := _abac . _ddebf ; _ecee , _ecc := - 1.0e9 , + 1.0e9 ; for _ , _fdbgb := range _cfbe [ _abac ] { _edb := _dgcc [ _fdbgb ] . _ddebf ; if _edb . Urx < _fagdf . Llx { _ecee = _fd . Max ( _ecee , _edb . Urx ) ; } else if _fagdf . Urx < _edb . Llx { _ecc = _fd . Min ( _ecc , _edb . Llx ) ;
} ; } ; for _afcd , _fggfc := range _dgcc { _fefe := _fggfc . _ddebf ; if _abcc == _afcd || _fefe . Ury > _fagdf . Lly { continue ; } ; if _ecee <= _fefe . Llx && _fefe . Llx < _fagdf . Llx { _fagdf . Llx = _fefe . Llx ; } else if _fefe . Urx <= _ecc && _fagdf . Urx < _fefe . Urx { _fagdf . Urx = _fefe . Urx ;
} ; } ; if _cgea { _fc . Printf ( "\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a" , _abcc , _abac . _ddebf , _fagdf , _fgaa ( _abac . text ( ) , 50 ) ) ; } ; _abac . _ddebf = _fagdf ; } ; if _ggdf { for _ , _ecab := range _dgcc { _ecab . PdfRectangle = _ecab . _ddebf ;
} ; } ; } ; func _adeaf ( _dcaaa , _egbg _ebf . Point ) rulingKind { _ggfa := _fd . Abs ( _dcaaa . X - _egbg . X ) ; _dfgb := _fd . Abs ( _dcaaa . Y - _egbg . Y ) ; return _fbbc ( _ggfa , _dfgb , _ddge ) ; } ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
func ( _acdg * TextMarkArray ) RangeOffset ( start , end int ) ( * TextMarkArray , error ) { if _acdg == nil { return nil , _gd . New ( "\u006da\u003d\u003d\u006e\u0069\u006c" ) ; } ; if end < start { return nil , _fc . Errorf ( "\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020" , start , end ) ;
} ; _cbda := len ( _acdg . _fef ) ; if _cbda == 0 { return _acdg , nil ; } ; if start < _acdg . _fef [ 0 ] . Offset { start = _acdg . _fef [ 0 ] . Offset ; } ; if end > _acdg . _fef [ _cbda - 1 ] . Offset + 1 { end = _acdg . _fef [ _cbda - 1 ] . Offset + 1 ; } ; _eagc := _e . Search ( _cbda , func ( _cdba int ) bool { return _acdg . _fef [ _cdba ] . Offset + len ( _acdg . _fef [ _cdba ] . Text ) - 1 >= start } ) ;
if ! ( 0 <= _eagc && _eagc < _cbda ) { _fefb := _fc . Errorf ( "\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076" , start , _eagc , _cbda , _acdg . _fef [ 0 ] , _acdg . _fef [ _cbda - 1 ] ) ;
return nil , _fefb ; } ; _cgcc := _e . Search ( _cbda , func ( _acffc int ) bool { return _acdg . _fef [ _acffc ] . Offset > end - 1 } ) ; if ! ( 0 <= _cgcc && _cgcc < _cbda ) { _eacgc := _fc . Errorf ( "\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076" , end , _cgcc , _cbda , _acdg . _fef [ 0 ] , _acdg . _fef [ _cbda - 1 ] ) ;
return nil , _eacgc ; } ; if _cgcc <= _eagc { return nil , _fc . Errorf ( "\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064" , start , end , _eagc , _cgcc ) ;
} ; return & TextMarkArray { _fef : _acdg . _fef [ _eagc : _cgcc ] } , nil ; } ; func _aeaf ( _ddad [ ] TextMark , _aadd * int , _bab TextMark ) [ ] TextMark { _bab . Offset = * _aadd ; _ddad = append ( _ddad , _bab ) ; * _aadd += len ( _bab . Text ) ; return _ddad ; } ; func ( _ffg * PageFonts ) extractPageResourcesToFont ( _bb * _ee . PdfPageResources ) error { _fa , _aad := _dd . GetDict ( _bb . Font ) ;
if ! _aad { return _gd . New ( _ed ) ; } ; for _ , _ba := range _fa . Keys ( ) { var ( _afc = true ; _eeb [ ] byte ; _ca string ; ) ; _cg , _dde := _bb . GetFontByName ( _ba ) ; if ! _dde { return _gd . New ( _egf ) ; } ; _gdb , _bfe := _ee . NewPdfFontFromPdfObject ( _cg ) ; if _bfe != nil { return _bfe ;
} ; _bda := _gdb . FontDescriptor ( ) ; _fdb := _gdb . FontDescriptor ( ) . FontName . String ( ) ; _dea := _gdb . Subtype ( ) ; if _baf ( _ffg . Fonts , _fdb ) { continue ; } ; if len ( _gdb . ToUnicode ( ) ) == 0 { _afc = false ; } ; if _bda . FontFile != nil { if _cc , _bfa := _dd . GetStream ( _bda . FontFile ) ;
_bfa { _eeb , _bfe = _dd . DecodeStream ( _cc ) ; if _bfe != nil { return _bfe ; } ; _ca = _fdb + "\u002e\u0070\u0066\u0062" ; } ; } else if _bda . FontFile2 != nil { if _fgc , _bbc := _dd . GetStream ( _bda . FontFile2 ) ; _bbc { _eeb , _bfe = _dd . DecodeStream ( _fgc ) ; if _bfe != nil { return _bfe ;
} ; _ca = _fdb + "\u002e\u0074\u0074\u0066" ; } ; } else if _bda . FontFile3 != nil { if _gee , _dad := _dd . GetStream ( _bda . FontFile3 ) ; _dad { _eeb , _bfe = _dd . DecodeStream ( _gee ) ; if _bfe != nil { return _bfe ; } ; _ca = _fdb + "\u002e\u0063\u0066\u0066" ; } ; } ; if len ( _ca ) < 1 { _fdd . Log . Debug ( _fe ) ;
} ; _fdg := Font { FontName : _fdb , PdfFont : _gdb , IsCID : _gdb . IsCID ( ) , IsSimple : _gdb . IsSimple ( ) , ToUnicode : _afc , FontType : _dea , FontData : _eeb , FontFileName : _ca , FontDescriptor : _bda } ; _ffg . Fonts = append ( _ffg . Fonts , _fdg ) ; } ; return nil ; } ; func ( _acfe * wordBag ) removeWord ( _gaf * textWord , _baeb int ) { _gegb := _acfe . _eed [ _baeb ] ;
_gegb = _bcag ( _gegb , _gaf ) ; if len ( _gegb ) == 0 { delete ( _acfe . _eed , _baeb ) ; } else { _acfe . _eed [ _baeb ] = _gegb ; } ; } ; type shapesState struct { _dfdd _ebf . Matrix ; _affd _ebf . Matrix ; _ebd [ ] * subpath ; _bfdb bool ; _dac _ebf . Point ; _cgga * textObject ; } ; func ( _dgeg paraList ) llyRange ( _beea [ ] int , _abec , _bcge float64 ) [ ] int { _gfc := len ( _dgeg ) ;
if _bcge < _dgeg [ _beea [ 0 ] ] . Lly || _abec > _dgeg [ _beea [ _gfc - 1 ] ] . Lly { return nil ; } ; _gdaae := _e . Search ( _gfc , func ( _bgfg int ) bool { return _dgeg [ _beea [ _bgfg ] ] . Lly >= _abec } ) ; _gdaf := _e . Search ( _gfc , func ( _gfae int ) bool { return _dgeg [ _beea [ _gfae ] ] . Lly > _bcge } ) ;
return _beea [ _gdaae : _gdaf ] ; } ; func _baba ( _agafb , _fcgfe float64 ) bool { return _agafb / _fd . Max ( _gdaa , _fcgfe ) < _ddge } ; func ( _affa * wordBag ) allWords ( ) [ ] * textWord { var _ddga [ ] * textWord ; for _ , _ggbd := range _affa . _eed { _ddga = append ( _ddga , _ggbd ... ) ;
} ; return _ddga ; } ; func _fgccb ( _abbf float64 , _agdd int ) int { if _agdd == 0 { _agdd = 1 ; } ; _fbfa := float64 ( _agdd ) ; return int ( _fd . Round ( _abbf / _fbfa ) * _fbfa ) ; } ; func ( _gbeg * wordBag ) scanBand ( _baaa string , _baag * wordBag , _bcc func ( _dfc * wordBag , _agbb * textWord ) bool , _ffbb , _aecf , _ggc float64 , _eeee , _bacf bool ) int { _cbcd := _baag . _bbgb ;
var _gefa map [ int ] map [ * textWord ] struct { } ; if ! _eeee { _gefa = _gbeg . makeRemovals ( ) ; } ; _bfgd := _fdfd * _cbcd ; _eaef := 0 ; for _ , _gca := range _gbeg . depthBand ( _ffbb - _bfgd , _aecf + _bfgd ) { if len ( _gbeg . _eed [ _gca ] ) == 0 { continue ; } ; for _ , _defaf := range _gbeg . _eed [ _gca ] { if ! ( _ffbb - _bfgd <= _defaf . _baed && _defaf . _baed <= _aecf + _bfgd ) { continue ;
} ; if ! _bcc ( _baag , _defaf ) { continue ; } ; _bdc := 2.0 * _fd . Abs ( _defaf . _aaaa - _baag . _bbgb ) / ( _defaf . _aaaa + _baag . _bbgb ) ; _ggegd := _fd . Max ( _defaf . _aaaa / _baag . _bbgb , _baag . _bbgb / _defaf . _aaaa ) ; _cgaf := _fd . Min ( _bdc , _ggegd ) ; if _ggc > 0 && _cgaf > _ggc { continue ;
} ; if _baag . blocked ( _defaf ) { continue ; } ; if ! _eeee { _baag . pullWord ( _defaf , _gca , _gefa ) ; } ; _eaef ++ ; if ! _bacf { if _defaf . _baed < _ffbb { _ffbb = _defaf . _baed ; } ; if _defaf . _baed > _aecf { _aecf = _defaf . _baed ; } ; } ; if _eeee { break ; } ; } ; } ; if ! _eeee { _gbeg . applyRemovals ( _gefa ) ;
} ; return _eaef ; } ; func _gefab ( _cgcb [ ] int ) [ ] int { _bceg := make ( [ ] int , len ( _cgcb ) ) ; for _bcfa , _edag := range _cgcb { _bceg [ len ( _cgcb ) - 1 - _bcfa ] = _edag ; } ; return _bceg ; } ; func ( _acadf paraList ) inTile ( _ccbg gridTile ) paraList { var _bccbc paraList ; for _ , _gafg := range _acadf { if _ccbg . contains ( _gafg . PdfRectangle ) { _bccbc = append ( _bccbc , _gafg ) ;
} ; } ; if _fabc { _fc . Printf ( "\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n" , _ccbg , len ( _bccbc ) ) ; for _acfcc , _aacee := range _bccbc { _fc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _acfcc , _aacee ) ;
} ; _fc . Println ( "" ) ; } ; return _bccbc ; } ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// String returns a description of `tm`.
func ( _fggg * textMark ) String ( ) string { return _fc . Sprintf ( "\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022" , _fggg . PdfRectangle , _fggg . _cfccd , _fggg . _cdg ) ; } ; func ( _abdd * compositeCell ) updateBBox ( ) { for _ , _fefec := range _abdd . paraList { _abdd . PdfRectangle = _dcd ( _abdd . PdfRectangle , _fefec . PdfRectangle ) ;
} ; } ; func ( _ebcfd * textTable ) get ( _afabg , _dcfe int ) * textPara { return _ebcfd . _dbfec [ _gdeed ( _afabg , _dcfe ) ] ; } ; func ( _dced * textTable ) getDown ( ) paraList { _bfgbe := make ( paraList , _dced . _bfgf ) ; for _eabe := 0 ; _eabe < _dced . _bfgf ; _eabe ++ { _fecf := _dced . get ( _eabe , _dced . _dcbg - 1 ) . _feb ;
if _fecf . taken ( ) { return nil ; } ; _bfgbe [ _eabe ] = _fecf ; } ; for _abbdc := 0 ; _abbdc < _dced . _bfgf - 1 ; _abbdc ++ { if _bfgbe [ _abbdc ] . _ffbf != _bfgbe [ _abbdc + 1 ] { return nil ; } ; } ; return _bfgbe ; } ; func ( _feacb * textTable ) put ( _daac , _babe int , _bgafg * textPara ) { _feacb . _dbfec [ _gdeed ( _daac , _babe ) ] = _bgafg ;
} ; func ( _gadeg paraList ) toTextMarks ( ) [ ] TextMark { _aadc := 0 ; var _aegba [ ] TextMark ; for _gcce , _edfd := range _gadeg { if _edfd . _faaed { continue ; } ; _fac := _edfd . toTextMarks ( & _aadc ) ; _aegba = append ( _aegba , _fac ... ) ; if _gcce != len ( _gadeg ) - 1 { if _cca ( _edfd , _gadeg [ _gcce + 1 ] ) { _aegba = _eaea ( _aegba , & _aadc , "\u0020" ) ;
} else { _aegba = _eaea ( _aegba , & _aadc , "\u000a" ) ; _aegba = _eaea ( _aegba , & _aadc , "\u000a" ) ; } ; } ; } ; _aegba = _eaea ( _aegba , & _aadc , "\u000a" ) ; _aegba = _eaea ( _aegba , & _aadc , "\u000a" ) ; return _aegba ; } ; func _dddb ( _gbfe , _aab float64 ) string { _eefg := ! _dafec ( _gbfe - _aab ) ;
if _eefg { return "\u000a" ; } ; return "\u0020" ; } ; func ( _bbdc * wordBag ) minDepth ( ) float64 { return _bbdc . _ecef - ( _bbdc . Ury - _bbdc . _bbgb ) } ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
type PageImages struct { Images [ ] ImageMark ; } ; func _bfdcb ( _gdae map [ int ] [ ] float64 ) [ ] int { _bdff := make ( [ ] int , len ( _gdae ) ) ; _eecc := 0 ; for _edee := range _gdae { _bdff [ _eecc ] = _edee ; _eecc ++ ; } ; _e . Ints ( _bdff ) ; return _bdff ; } ; func ( _bfeee * textTable ) bbox ( ) _ee . PdfRectangle { return _bfeee . PdfRectangle } ;
const ( _ebecg markKind = iota ; _aeaec ; _cabg ; _eggf ; ) ; func ( _ddfca paraList ) findTextTables ( ) [ ] * textTable { var _babb [ ] * textTable ; for _ , _ebbe := range _ddfca { if _ebbe . taken ( ) || _ebbe . Width ( ) == 0 { continue ; } ; _efee := _ebbe . isAtom ( ) ; if _efee == nil { continue ;
} ; _efee . growTable ( ) ; if _efee . _bfgf * _efee . _dcbg < _aeeg { continue ; } ; _efee . markCells ( ) ; _efee . log ( "\u0067\u0072\u006fw\u006e" ) ; _babb = append ( _babb , _efee ) ; } ; return _babb ; } ; type event struct { _fdbf float64 ; _eccae bool ; _dedbf int ; } ;
// String returns a string describing the current state of the textState stack.
func ( _fcc * stateStack ) String ( ) string { _dcf := [ ] string { _fc . Sprintf ( "\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064" , len ( * _fcc ) ) } ; for _ccfc , _acag := range * _fcc { _eaa := "\u003c\u006e\u0069l\u003e" ;
if _acag != nil { _eaa = _acag . String ( ) ; } ; _dcf = append ( _dcf , _fc . Sprintf ( "\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073" , _ccfc , _eaa ) ) ; } ; return _a . Join ( _dcf , "\u000a" ) ; } ; func _abbgg ( _fdgdc , _ecfc _ebf . Point ) bool { _gefga := _fd . Abs ( _fdgdc . X - _ecfc . X ) ;
_edce := _fd . Abs ( _fdgdc . Y - _ecfc . Y ) ; return _baba ( _gefga , _edce ) ; } ; func _gcac ( _eceeb map [ float64 ] gridTile ) [ ] float64 { _dgbg := make ( [ ] float64 , 0 , len ( _eceeb ) ) ; for _eabf := range _eceeb { _dgbg = append ( _dgbg , _eabf ) ; } ; _e . Float64s ( _dgbg ) ; return _dgbg ;
} ; func ( _dbc rulingList ) tidied ( _bcea string ) rulingList { _gegc := _dbc . removeDuplicates ( ) ; _gegc . log ( "\u0075n\u0069\u0071\u0075\u0065\u0073" ) ; _edbg := _gegc . snapToGroups ( ) ; if _edbg == nil { return nil ; } ; _edbg . sort ( ) ; if _cfcd { _fdd . Log . Info ( "\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064" , _bcea , len ( _dbc ) , len ( _gegc ) , len ( _edbg ) ) ;
} ; _edbg . log ( "\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d" ) ; return _edbg ; } ; func ( _deec paraList ) log ( _ddgd string ) { if ! _bcfe { return ; } ; _fdd . Log . Info ( "%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d" , _ddgd , len ( _deec ) ) ;
for _gab , _bbeg := range _deec { if _bbeg == nil { continue ; } ; _fedbf := _bbeg . text ( ) ; _acfc := "\u0020\u0020" ; if _bbeg . _cbfe != nil { _acfc = _fc . Sprintf ( "\u005b%\u0064\u0078\u0025\u0064\u005d" , _bbeg . _cbfe . _bfgf , _bbeg . _cbfe . _dcbg ) ; } ; _fc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a" , _gab , _bbeg . PdfRectangle , _acfc , _fgaa ( _fedbf , 50 ) ) ;
} ; } ; func ( _bbcf * textObject ) getFontDict ( _aecg string ) ( _fdfg _dd . PdfObject , _dgfb error ) { _fdgb := _bbcf . _faa ; if _fdgb == nil { _fdd . Log . Debug ( "g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071" , _aecg ) ;
return nil , nil ; } ; _fdfg , _agfg := _fdgb . GetFontByName ( _dd . PdfObjectName ( _aecg ) ) ; if ! _agfg { _fdd . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071" , _aecg ) ;
return nil , _gd . New ( "f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073" ) ; } ; return _fdfg , nil ; } ; func ( _fcdgg * textTable ) logComposite ( _bddb string ) { if ! _fabc { return ; } ; _fdd . Log . Info ( "\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073" , _fcdgg . _bfgf , _fcdgg . _dcbg , _bddb ) ;
_fc . Printf ( "\u0025\u0035\u0073 \u007c" , "" ) ; for _eacac := 0 ; _eacac < _fcdgg . _bfgf ; _eacac ++ { _fc . Printf ( "\u0025\u0033\u0064 \u007c" , _eacac ) ; } ; _fc . Println ( "" ) ; _fc . Printf ( "\u0025\u0035\u0073 \u002b" , "" ) ; for _gcdg := 0 ; _gcdg < _fcdgg . _bfgf ; _gcdg ++ { _fc . Printf ( "\u0025\u0033\u0073 \u002b" , "\u002d\u002d\u002d" ) ;
} ; _fc . Println ( "" ) ; for _bbag := 0 ; _bbag < _fcdgg . _dcbg ; _bbag ++ { _fc . Printf ( "\u0025\u0035\u0064 \u007c" , _bbag ) ; for _dcgb := 0 ; _dcgb < _fcdgg . _bfgf ; _dcgb ++ { _bdddc , _ := _fcdgg . _facee [ _gdeed ( _dcgb , _bbag ) ] . parasBBox ( ) ; _fc . Printf ( "\u0025\u0033\u0064 \u007c" , len ( _bdddc ) ) ;
} ; _fc . Println ( "" ) ; } ; _fdd . Log . Info ( "\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073" , _fcdgg . _bfgf , _fcdgg . _dcbg , _bddb ) ; _fc . Printf ( "\u0025\u0035\u0073 \u007c" , "" ) ; for _bceab := 0 ; _bceab < _fcdgg . _bfgf ;
_bceab ++ { _fc . Printf ( "\u0025\u0031\u0032\u0064\u0020\u007c" , _bceab ) ; } ; _fc . Println ( "" ) ; _fc . Printf ( "\u0025\u0035\u0073 \u002b" , "" ) ; for _dadba := 0 ; _dadba < _fcdgg . _bfgf ; _dadba ++ { _fc . Print ( "\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b" ) ;
} ; _fc . Println ( "" ) ; for _gbeb := 0 ; _gbeb < _fcdgg . _dcbg ; _gbeb ++ { _fc . Printf ( "\u0025\u0035\u0064 \u007c" , _gbeb ) ; for _abga := 0 ; _abga < _fcdgg . _bfgf ; _abga ++ { _aaedg , _ := _fcdgg . _facee [ _gdeed ( _abga , _gbeb ) ] . parasBBox ( ) ; _eaeg := "" ; _fgef := _aaedg . merge ( ) ;
if _fgef != nil { _eaeg = _fgef . text ( ) ; } ; _eaeg = _fc . Sprintf ( "\u0025\u0071" , _fgaa ( _eaeg , 12 ) ) ; _eaeg = _eaeg [ 1 : len ( _eaeg ) - 1 ] ; _fc . Printf ( "\u0025\u0031\u0032\u0073\u0020\u007c" , _eaeg ) ; } ; _fc . Println ( "" ) ; } ; } ; func _efgdf ( _dgc , _abed bounded ) float64 { return _dgc . bbox ( ) . Llx - _abed . bbox ( ) . Urx } ;
func ( _afggg rulingList ) sortStrict ( ) { _e . Slice ( _afggg , func ( _cagb , _dcab int ) bool { _dacb , _dfbd := _afggg [ _cagb ] , _afggg [ _dcab ] ; _ggga , _dcbd := _dacb . _deeb , _dfbd . _deeb ; if _ggga != _dcbd { return _ggga > _dcbd ; } ; _ecbd , _ddgce := _dacb . _dadb , _dfbd . _dadb ;
if ! _dafec ( _ecbd - _ddgce ) { return _ecbd < _ddgce ; } ; _ecbd , _ddgce = _dacb . _gcfe , _dfbd . _gcfe ; if _ecbd != _ddgce { return _ecbd < _ddgce ; } ; return _dacb . _fbdf < _dfbd . _fbdf ; } ) ; } ; func _fabdg ( _afce , _fgff _ebf . Point ) bool { return _afce . X == _fgff . X && _afce . Y == _fgff . Y } ;
func ( _egab * wordBag ) firstReadingIndex ( _ggaf int ) int { _affg := _egab . firstWord ( _ggaf ) . _aaaa ; _begc := float64 ( _ggaf + 1 ) * _ebcfe ; _bdcg := _begc + _gbdb * _affg ; _gfgb := _ggaf ; for _ , _cffe := range _egab . depthBand ( _begc , _bdcg ) { if _dcfaf ( _egab . firstWord ( _cffe ) , _egab . firstWord ( _gfgb ) ) < 0 { _gfgb = _cffe ;
} ; } ; return _gfgb ; } ; func ( _ceeec rulingList ) toGrids ( ) [ ] rulingList { if _cfcd { _fdd . Log . Info ( "t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073" , _ceeec ) ; } ; _dbeae := _ceeec . intersections ( ) ; if _cfcd { _fdd . Log . Info ( "\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020" , len ( _ceeec ) , len ( _dbeae ) ) ;
for _ , _bgff := range _edddc ( _dbeae ) { _fc . Printf ( "\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n" , _bgff , _dbeae [ _bgff ] ) ; } ; } ; _dddc := make ( map [ int ] intSet , len ( _ceeec ) ) ; for _addb := range _ceeec { _defd := _ceeec . connections ( _dbeae , _addb ) ; if len ( _defd ) > 0 { _dddc [ _addb ] = _defd ;
} ; } ; if _cfcd { _fdd . Log . Info ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064" , len ( _dddc ) ) ; for _ , _cgcbe := range _edddc ( _dddc ) { _fc . Printf ( "\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n" , _cgcbe , _dddc [ _cgcbe ] ) ;
} ; } ; _dcb := _bdegc ( len ( _ceeec ) , func ( _gaed , _gggbf int ) bool { _bcdb , _fdbd := len ( _dddc [ _gaed ] ) , len ( _dddc [ _gggbf ] ) ; if _bcdb != _fdbd { return _bcdb > _fdbd ; } ; return _ceeec . comp ( _gaed , _gggbf ) ; } ) ; if _cfcd { _fdd . Log . Info ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076" , _dcb ) ;
} ; _cded := [ ] [ ] int { { _dcb [ 0 ] } } ; _bfaef : for _ , _agfb := range _dcb [ 1 : ] { for _gcbca , _gaeb := range _cded { for _ , _cfceg := range _gaeb { if _dddc [ _cfceg ] . has ( _agfb ) { _cded [ _gcbca ] = append ( _gaeb , _agfb ) ; continue _bfaef ; } ; } ; } ; _cded = append ( _cded , [ ] int { _agfb } ) ;
} ; if _cfcd { _fdd . Log . Info ( "\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076" , _cded ) ; } ; _e . SliceStable ( _cded , func ( _dafcb , _fgbe int ) bool { return len ( _cded [ _dafcb ] ) > len ( _cded [ _fgbe ] ) } ) ; for _ , _facg := range _cded { _e . Slice ( _facg , func ( _acfd , _geeg int ) bool { return _ceeec . comp ( _facg [ _acfd ] , _facg [ _geeg ] ) } ) ;
} ; _cdbd := make ( [ ] rulingList , len ( _cded ) ) ; for _cfdbc , _gccac := range _cded { _gccef := make ( rulingList , len ( _gccac ) ) ; for _ggdc , _cbgdd := range _gccac { _gccef [ _ggdc ] = _ceeec [ _cbgdd ] ; } ; _cdbd [ _cfdbc ] = _gccef ; } ; if _cfcd { _fdd . Log . Info ( "\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076" , _cdbd ) ;
} ; var _ggfge [ ] rulingList ; for _ , _gadfa := range _cdbd { if _bccdf , _cebgg := _gadfa . isActualGrid ( ) ; _cebgg { _gadfa = _bccdf ; _gadfa = _gadfa . snapToGroups ( ) ; _ggfge = append ( _ggfge , _gadfa ) ; } ; } ; if _cfcd { _gdcb ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073" , _ggfge ) ;
_fdd . Log . Info ( "\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064" , len ( _cdbd ) , len ( _ggfge ) ) ; } ; return _ggfge ; } ;
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
type ImageMark struct { Image * _ee . Image ;
// Dimensions of the image as displayed in the PDF.
Width float64 ; Height float64 ;
// Position of the image in PDF coordinates (lower left corner).
X float64 ; Y float64 ;
// Angle in degrees, if rotated.
Angle float64 ; } ; func _gdeed ( _gefaf , _fbgbe int ) uint64 { return uint64 ( _gefaf ) * 0x1000000 + uint64 ( _fbgbe ) } ;