2020-08-27 21:45:09 +00:00
//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
2018-03-22 14:03:47 +00:00
2020-08-27 21:45:09 +00:00
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
2024-03-27 22:34:33 +00:00
package extractor ; import ( _fe "bytes" ; _d "errors" ; _ae "fmt" ; _fc "github.com/unidoc/unipdf/v3/common" ; _ba "github.com/unidoc/unipdf/v3/contentstream" ; _bad "github.com/unidoc/unipdf/v3/core" ; _ff "github.com/unidoc/unipdf/v3/internal/license" ; _bbg "github.com/unidoc/unipdf/v3/internal/textencoding" ;
_g "github.com/unidoc/unipdf/v3/internal/transform" ; _aec "github.com/unidoc/unipdf/v3/model" ; _ag "golang.org/x/image/draw" ; _da "golang.org/x/text/unicode/norm" ; _ec "image" ; _eg "image/color" ; _bc "io" ; _ea "math" ; _fb "reflect" ; _f "regexp" ; _a "sort" ; _bb "strings" ;
_be "unicode" ; _e "unicode/utf8" ; ) ;
2024-01-22 01:16:41 +00:00
2024-03-27 22:34:33 +00:00
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
type TextTable struct { _aec . PdfRectangle ; W , H int ; Cells [ ] [ ] TableCell ; } ; func ( _adde paraList ) reorder ( _dedb [ ] int ) { _cacg := make ( paraList , len ( _adde ) ) ; for _gffg , _feag := range _dedb { _cacg [ _gffg ] = _adde [ _feag ] ; } ; copy ( _adde , _cacg ) ; } ; func ( _dce * stateStack ) push ( _cfdc * textState ) { _bbc := * _cfdc ;
* _dce = append ( * _dce , & _bbc ) } ; const _gbgd = 10 ; func ( _fgac rulingList ) isActualGrid ( ) ( rulingList , bool ) { _ceae , _fgcbf := _fgac . augmentGrid ( ) ; if ! ( len ( _ceae ) >= _aaaa + 1 && len ( _fgcbf ) >= _acebd + 1 ) { if _eceg { _fc . Log . Info ( "\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064" , len ( _ceae ) , len ( _fgcbf ) , _aaaa + 1 , _acebd + 1 ) ;
} ; return nil , false ; } ; if _eceg { _fc . Log . Info ( "\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074" , _fgac , len ( _ceae ) >= 2 , len ( _fgcbf ) >= 2 , len ( _ceae ) >= 2 && len ( _fgcbf ) >= 2 ) ;
for _ddgb , _cbgf := range _fgac { _ae . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a" , _ddgb , _cbgf ) ; } ; } ; if _aaeb { _geeg , _bdfc := _ceae [ 0 ] , _ceae [ len ( _ceae ) - 1 ] ; _gdaab , _bgdac := _fgcbf [ 0 ] , _fgcbf [ len ( _fgcbf ) - 1 ] ; if ! ( _ecag ( _geeg . _gbgc - _gdaab . _fgad ) && _ecag ( _bdfc . _gbgc - _gdaab . _ababc ) && _ecag ( _gdaab . _gbgc - _geeg . _ababc ) && _ecag ( _bgdac . _gbgc - _geeg . _fgad ) ) { if _eceg { _fc . Log . Info ( "\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073" , _geeg , _bdfc , _gdaab , _bgdac ) ;
} ; return nil , false ; } ; } else { if ! _ceae . aligned ( ) { if _geda { _fc . Log . Info ( "i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064" , len ( _ceae ) ) ;
} ; return nil , false ; } ; if ! _fgcbf . aligned ( ) { if _eceg { _fc . Log . Info ( "i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064" , len ( _fgcbf ) ) ;
} ; return nil , false ; } ; } ; _edcd := append ( _ceae , _fgcbf ... ) ; return _edcd , true ; } ; func ( _ddbg paraList ) computeEBBoxes ( ) { if _acgbb { _fc . Log . Info ( "\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a" ) ; } ; for _ , _ddebb := range _ddbg { _ddebb . _bbbc = _ddebb . PdfRectangle ;
} ; _cbab := _ddbg . yNeighbours ( 0 ) ; for _fcagd , _ecga := range _ddbg { _fgeea := _ecga . _bbbc ; _dbfdb , _ggbg := - 1.0e9 , + 1.0e9 ; for _ , _aga := range _cbab [ _ecga ] { _gbdc := _ddbg [ _aga ] . _bbbc ; if _gbdc . Urx < _fgeea . Llx { _dbfdb = _ea . Max ( _dbfdb , _gbdc . Urx ) ; } else if _fgeea . Urx < _gbdc . Llx { _ggbg = _ea . Min ( _ggbg , _gbdc . Llx ) ;
} ; } ; for _aace , _dgceg := range _ddbg { _bagag := _dgceg . _bbbc ; if _fcagd == _aace || _bagag . Ury > _fgeea . Lly { continue ; } ; if _dbfdb <= _bagag . Llx && _bagag . Llx < _fgeea . Llx { _fgeea . Llx = _bagag . Llx ; } else if _bagag . Urx <= _ggbg && _fgeea . Urx < _bagag . Urx { _fgeea . Urx = _bagag . Urx ;
} ; } ; if _acgbb { _ae . Printf ( "\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a" , _fcagd , _ecga . _bbbc , _fgeea , _bgfd ( _ecga . text ( ) , 50 ) ) ; } ; _ecga . _bbbc = _fgeea ; } ; if _acge { for _ , _ccaga := range _ddbg { _ccaga . PdfRectangle = _ccaga . _bbbc ;
} ; } ; } ; func ( _gagac * shapesState ) newSubPath ( ) { _gagac . clearPath ( ) ; if _bcge { _fc . Log . Info ( "\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073" , _gagac ) ; } ; } ; func ( _aded * wordBag ) applyRemovals ( _bcag map [ int ] map [ * textWord ] struct { } ) { for _cea , _afac := range _bcag { if len ( _afac ) == 0 { continue ;
} ; _dfga := _aded . _gbbd [ _cea ] ; _cegf := len ( _dfga ) - len ( _afac ) ; if _cegf == 0 { delete ( _aded . _gbbd , _cea ) ; continue ; } ; _fgag := make ( [ ] * textWord , _cegf ) ; _deg := 0 ; for _ , _bef := range _dfga { if _ , _gcc := _afac [ _bef ] ; ! _gcc { _fgag [ _deg ] = _bef ; _deg ++ ;
} ; } ; _aded . _gbbd [ _cea ] = _fgag ; } ; } ; func _dddc ( _cgda , _fded bounded ) float64 { _geaf := _dgc ( _cgda , _fded ) ; if ! _ecfbd ( _geaf ) { return _geaf ; } ; return _bcea ( _cgda , _fded ) ; } ; type lists [ ] * list ; func ( _gabg * textObject ) showText ( _gag _bad . PdfObject , _dcc [ ] byte , _gcae int ) error { return _gabg . renderText ( _gag , _dcc , _gcae ) ;
} ; func _ecea ( _cdgf * textWord , _gdag float64 , _gfeg , _afabd rulingList ) * wordBag { _acffg := _dafa ( _cdgf . _aecg ) ; _gfg := [ ] * textWord { _cdgf } ; _addbg := wordBag { _gbbd : map [ int ] [ ] * textWord { _acffg : _gfg } , PdfRectangle : _cdgf . PdfRectangle , _aad : _cdgf . _aeegf , _aeceg : _gdag , _eddc : _gfeg , _gbfd : _afabd } ;
return & _addbg ; } ;
2024-01-22 01:16:41 +00:00
2024-03-27 22:34:33 +00:00
// ExtractFonts returns all font information from the page extractor, including
// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more.
//
// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document.
// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries.
//
// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range.
func ( _ee * Extractor ) ExtractFonts ( previousPageFonts * PageFonts ) ( * PageFonts , error ) { _gc := PageFonts { } ; _fcc := _gc . extractPageResourcesToFont ( _ee . _gf ) ; if _fcc != nil { return nil , _fcc ; } ; if previousPageFonts != nil { for _ , _eed := range previousPageFonts . Fonts { if ! _ggb ( _gc . Fonts , _eed . FontName ) { _gc . Fonts = append ( _gc . Fonts , _eed ) ;
} ; } ; } ; return & PageFonts { Fonts : _gc . Fonts } , nil ; } ;
2024-01-22 01:16:41 +00:00
2024-03-27 22:34:33 +00:00
// PageFonts represents extracted fonts on a PDF page.
type PageFonts struct { Fonts [ ] Font ; } ; func _aggg ( _cebg _aec . PdfRectangle , _ecfeg , _dbeb , _dbacf , _fegb * ruling ) gridTile { _fbcc := _cebg . Llx ; _ebgbc := _cebg . Urx ; _bec := _cebg . Lly ; _acda := _cebg . Ury ; return gridTile { PdfRectangle : _cebg , _cbfd : _ecfeg != nil && _ecfeg . encloses ( _bec , _acda ) , _cbdbf : _dbeb != nil && _dbeb . encloses ( _bec , _acda ) , _fcgc : _dbacf != nil && _dbacf . encloses ( _fbcc , _ebgbc ) , _fbbf : _fegb != nil && _fegb . encloses ( _fbcc , _ebgbc ) } ;
} ; func ( _aaddg rulingList ) aligned ( ) bool { if len ( _aaddg ) < 2 { return false ; } ; _ceddc := make ( map [ * ruling ] int ) ; _ceddc [ _aaddg [ 0 ] ] = 0 ; for _ , _geaaf := range _aaddg [ 1 : ] { _eceee := false ; for _daec := range _ceddc { if _geaaf . gridIntersecting ( _daec ) { _ceddc [ _daec ] ++ ;
_eceee = true ; break ; } ; } ; if ! _eceee { _ceddc [ _geaaf ] = 0 ; } ; } ; _daefe := 0 ; for _ , _eeded := range _ceddc { if _eeded == 0 { _daefe ++ ; } ; } ; _dcfg := float64 ( _daefe ) / float64 ( len ( _aaddg ) ) ; _cdbg := _dcfg <= 1.0 - _efea ; if _eceg { _fc . Log . Info ( "\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073" , _cdbg , _dcfg , _daefe , len ( _aaddg ) , _aaddg . String ( ) ) ;
} ; return _cdbg ; } ; type textWord struct { _aec . PdfRectangle ; _aecg float64 ; _eedc string ; _ebfa [ ] * textMark ; _aeegf float64 ; _ceff bool ; } ; func _aagaf ( _gagec float64 ) float64 { return _edge * _ea . Round ( _gagec / _edge ) } ;
2024-01-22 01:16:41 +00:00
2024-03-27 22:34:33 +00:00
// String returns a description of `state`.
func ( _bdge * textState ) String ( ) string { _gce := "\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]" ; if _bdge . _cbad != nil { _gce = _bdge . _cbad . BaseFont ( ) ; } ; return _ae . Sprintf ( "\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071" , _bdge . _cga , _bdge . _ecd , _bdge . _dgad , _gce ) ;
2024-01-22 01:16:41 +00:00
} ;
2024-03-27 22:34:33 +00:00
// Marks returns the TextMark collection for a page. It represents all the text on the page.
func ( _cbcg PageText ) Marks ( ) * TextMarkArray { return & TextMarkArray { _bade : _cbcg . _fgb } } ; func _eefcd ( _bdbgb [ ] * textMark , _cdda _aec . PdfRectangle ) [ ] * textWord { var _ecgca [ ] * textWord ; var _gdgaa * textWord ; if _baf { _fc . Log . Info ( "\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073" , len ( _bdbgb ) ) ;
} ; _aefaa := func ( ) { if _gdgaa != nil { _ccbc := _gdgaa . computeText ( ) ; if ! _dfca ( _ccbc ) { _gdgaa . _eedc = _ccbc ; _ecgca = append ( _ecgca , _gdgaa ) ; if _baf { _fc . Log . Info ( "\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073" , len ( _ecgca ) - 1 , _gdgaa . String ( ) ) ;
for _efee , _abc := range _gdgaa . _ebfa { _ae . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _efee , _abc . String ( ) ) ; } ; } ; } ; _gdgaa = nil ; } ; } ; for _ , _dabfd := range _bdbgb { if _ccgb && _gdgaa != nil && len ( _gdgaa . _ebfa ) > 0 { _ebbb := _gdgaa . _ebfa [ len ( _gdgaa . _ebfa ) - 1 ] ;
_gcbe , _effb := _eabc ( _dabfd . _gded ) ; _fggc , _baaf := _eabc ( _ebbb . _gded ) ; if _effb && ! _baaf && _ebbb . inDiacriticArea ( _dabfd ) { _gdgaa . addDiacritic ( _gcbe ) ; continue ; } ; if _baaf && ! _effb && _dabfd . inDiacriticArea ( _ebbb ) { _gdgaa . _ebfa = _gdgaa . _ebfa [ : len ( _gdgaa . _ebfa ) - 1 ] ;
_gdgaa . appendMark ( _dabfd , _cdda ) ; _gdgaa . addDiacritic ( _fggc ) ; continue ; } ; } ; _fdeaa := _dfca ( _dabfd . _gded ) ; if _fdeaa { _aefaa ( ) ; continue ; } ; if _gdgaa == nil && ! _fdeaa { _gdgaa = _fbccc ( [ ] * textMark { _dabfd } , _cdda ) ; continue ; } ; _gfdac := _gdgaa . _aeegf ;
_aefde := _ea . Abs ( _gadb ( _cdda , _dabfd ) - _gdgaa . _aecg ) / _gfdac ; _cfcfa := _cdfb ( _dabfd , _gdgaa ) / _gfdac ; if _cfcfa >= _fbdbc || ! ( - _babe <= _cfcfa && _aefde <= _accc ) { _aefaa ( ) ; _gdgaa = _fbccc ( [ ] * textMark { _dabfd } , _cdda ) ; continue ; } ; _gdgaa . appendMark ( _dabfd , _cdda ) ;
} ; _aefaa ( ) ; return _ecgca ; } ; func _cgfcc ( _bgdee * PageText ) error { _cggc := _ff . GetLicenseKey ( ) ; if _cggc != nil && _cggc . IsLicensed ( ) || _gg { return nil ; } ; _ae . Printf ( "\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a" ) ;
_ae . Println ( "-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f" ) ;
return _d . New ( "\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064" ) ; } ; func _ggdd ( _cfb * textLine ) bool { _fecd := true ; _bgbc := - 1 ; for _ , _ffcd := range _cfb . _bfag { for _ , _daef := range _ffcd . _ebfa { _afbf := _daef . _ebgag ;
if _bgbc == - 1 { _bgbc = _afbf ; } else { if _bgbc != _afbf { _fecd = false ; break ; } ; } ; } ; } ; return _fecd ; } ; func ( _adccc intSet ) add ( _ddebde int ) { _adccc [ _ddebde ] = struct { } { } } ; func ( _aeef * textTable ) getRight ( ) paraList { _agfc := make ( paraList , _aeef . _agdc ) ;
for _fcbb := 0 ; _fcbb < _aeef . _agdc ; _fcbb ++ { _dbdbe := _aeef . get ( _aeef . _afcga - 1 , _fcbb ) . _aabe ; if _dbdbe . taken ( ) { return nil ; } ; _agfc [ _fcbb ] = _dbdbe ; } ; for _febc := 0 ; _febc < _aeef . _agdc - 1 ; _febc ++ { if _agfc [ _febc ] . _ccee != _agfc [ _febc + 1 ] { return nil ;
} ; } ; return _agfc ; } ; func ( _eaggc rulingList ) bbox ( ) _aec . PdfRectangle { var _bdfg _aec . PdfRectangle ; if len ( _eaggc ) == 0 { _fc . Log . Error ( "r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073" ) ;
return _aec . PdfRectangle { } ; } ; if _eaggc [ 0 ] . _egdf == _bfgb { _bdfg . Llx , _bdfg . Urx = _eaggc . secMinMax ( ) ; _bdfg . Lly , _bdfg . Ury = _eaggc . primMinMax ( ) ; } else { _bdfg . Llx , _bdfg . Urx = _eaggc . primMinMax ( ) ; _bdfg . Lly , _bdfg . Ury = _eaggc . secMinMax ( ) ; } ; return _bdfg ;
} ; func _gbca ( _gbddb , _bfcfa int ) int { if _gbddb < _bfcfa { return _gbddb ; } ; return _bfcfa ; } ; func _age ( _dad func ( * wordBag , * textWord , float64 ) bool , _fecag float64 ) func ( * wordBag , * textWord ) bool { return func ( _fafb * wordBag , _ggee * textWord ) bool { return _dad ( _fafb , _ggee , _fecag ) } ;
} ; func _cbace ( _cbfbd * _aec . Image , _cecf _eg . Color ) _ec . Image { _fdgab , _bgdda := int ( _cbfbd . Width ) , int ( _cbfbd . Height ) ; _ccfda := _ec . NewRGBA ( _ec . Rect ( 0 , 0 , _fdgab , _bgdda ) ) ; for _gcebe := 0 ; _gcebe < _bgdda ; _gcebe ++ { for _eadae := 0 ; _eadae < _fdgab ;
_eadae ++ { _daea , _cfef := _cbfbd . ColorAt ( _eadae , _gcebe ) ; if _cfef != nil { _fc . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063o\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0072\u0065\u0074\u0072\u0069\u0065v\u0065 \u0069\u006d\u0061\u0067\u0065\u0020m\u0061\u0073\u006b\u0020\u0076\u0061\u006cu\u0065\u0020\u0061\u0074\u0020\u0028\u0025\u0064\u002c\u0020\u0025\u0064\u0029\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006da\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063t\u002e" , _eadae , _gcebe ) ;
continue ; } ; _fecdb , _afaf , _efbd , _ := _daea . RGBA ( ) ; var _bacee _eg . Color ; if _fecdb + _afaf + _efbd == 0 { _bacee = _cecf ; } else { _bacee = _eg . Transparent ; } ; _ccfda . Set ( _eadae , _gcebe , _bacee ) ; } ; } ; return _ccfda ; } ; func ( _fbbdf rulingList ) primMinMax ( ) ( float64 , float64 ) { _ffgc , _bgge := _fbbdf [ 0 ] . _gbgc , _fbbdf [ 0 ] . _gbgc ;
for _ , _gggff := range _fbbdf [ 1 : ] { if _gggff . _gbgc < _ffgc { _ffgc = _gggff . _gbgc ; } else if _gggff . _gbgc > _bgge { _bgge = _gggff . _gbgc ; } ; } ; return _ffgc , _bgge ; } ; func _ggcce ( _gfff [ ] TextMark , _fgee * int , _bfgg string ) [ ] TextMark { _ecdb := _dfba ; _ecdb . Text = _bfgg ;
return _fccge ( _gfff , _fgee , _ecdb ) ; } ; func ( _abfdf * textTable ) log ( _aaaf string ) { if ! _gbead { return ; } ; _fc . Log . Info ( "~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066" , _aaaf , _abfdf . _afcga , _abfdf . _agdc , _abfdf . _fbccb , _abfdf . PdfRectangle ) ;
for _gdcd := 0 ; _gdcd < _abfdf . _agdc ; _gdcd ++ { for _fbbb := 0 ; _fbbb < _abfdf . _afcga ; _fbbb ++ { _gfcb := _abfdf . get ( _fbbb , _gdcd ) ; if _gfcb == nil { continue ; } ; _ae . Printf ( "%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a" , _fbbb , _gdcd , _gfcb . PdfRectangle , _bgfd ( _gfcb . text ( ) , 50 ) , _e . RuneCountInString ( _gfcb . text ( ) ) ) ;
} ; } ; } ; type shapesState struct { _affc _g . Matrix ; _gdfbg _g . Matrix ; _edc [ ] * subpath ; _effc bool ; _cgbcf _g . Point ; _bbdg * textObject ; } ; func ( _eac * wordBag ) sort ( ) { for _ , _dbdb := range _eac . _gbbd { _a . Slice ( _dbdb , func ( _efg , _gbbg int ) bool { return _bcea ( _dbdb [ _efg ] , _dbdb [ _gbbg ] ) < 0 } ) ;
} ; } ; func _gbgg ( _bccaa map [ float64 ] [ ] * textLine ) [ ] float64 { _gfddd := [ ] float64 { } ; for _aeag := range _bccaa { _gfddd = append ( _gfddd , _aeag ) ; } ; _a . Float64s ( _gfddd ) ; return _gfddd ; } ;
2023-09-07 17:40:17 +00:00
2024-03-27 22:34:33 +00:00
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
type PageImages struct { Images [ ] ImageMark ; } ; func _bbfff ( _edfec , _ccaag _g . Point ) bool { _gdfbc := _ea . Abs ( _edfec . X - _ccaag . X ) ; _gbcba := _ea . Abs ( _edfec . Y - _ccaag . Y ) ; return _gdba ( _gdfbc , _gbcba ) ; } ; func ( _acgef paraList ) readBefore ( _gaea [ ] int , _eefg , _bdcd int ) bool { _cdbbe , _egbed := _acgef [ _eefg ] , _acgef [ _bdcd ] ;
if _dcaa ( _cdbbe , _egbed ) && _cdbbe . Lly > _egbed . Lly { return true ; } ; if ! ( _cdbbe . _bbbc . Urx < _egbed . _bbbc . Llx ) { return false ; } ; _eege , _cgdc := _cdbbe . Lly , _egbed . Lly ; if _eege > _cgdc { _cgdc , _eege = _eege , _cgdc ; } ; _fbbe := _ea . Max ( _cdbbe . _bbbc . Llx , _egbed . _bbbc . Llx ) ;
_fdgg := _ea . Min ( _cdbbe . _bbbc . Urx , _egbed . _bbbc . Urx ) ; _bafc := _acgef . llyRange ( _gaea , _eege , _cgdc ) ; for _ , _gdgd := range _bafc { if _gdgd == _eefg || _gdgd == _bdcd { continue ; } ; _cdfg := _acgef [ _gdgd ] ; if _cdfg . _bbbc . Llx <= _fdgg && _fbbe <= _cdfg . _bbbc . Urx { return false ;
} ; } ; return true ; } ;
2023-09-07 17:40:17 +00:00
2024-03-27 22:34:33 +00:00
// TableInfo gets table information of the textmark `tm`.
func ( _fga * TextMark ) TableInfo ( ) ( * TextTable , [ ] [ ] int ) { if ! _fga . _adgb { return nil , nil ; } ; _bcad := _fga . _dfeb ; _fdd := _bcad . getCellInfo ( * _fga ) ; return _bcad , _fdd ; } ; func ( _gbbgf rulingList ) sort ( ) { _a . Slice ( _gbbgf , _gbbgf . comp ) } ; func ( _dbcc paraList ) llyOrdering ( ) [ ] int { _eadead := make ( [ ] int , len ( _dbcc ) ) ;
for _fcccb := range _dbcc { _eadead [ _fcccb ] = _fcccb ; } ; _a . SliceStable ( _eadead , func ( _cgg , _adbc int ) bool { _cdea , _egeg := _eadead [ _cgg ] , _eadead [ _adbc ] ; return _dbcc [ _cdea ] . Lly < _dbcc [ _egeg ] . Lly ; } ) ; return _eadead ; } ; func ( _edde paraList ) eventNeighbours ( _accfd [ ] event ) map [ * textPara ] [ ] int { _a . Slice ( _accfd , func ( _ebgbf , _ffgec int ) bool { _fade , _gccb := _accfd [ _ebgbf ] , _accfd [ _ffgec ] ;
_gacfe , _gegce := _fade . _dfada , _gccb . _dfada ; if _gacfe != _gegce { return _gacfe < _gegce ; } ; if _fade . _aedb != _gccb . _aedb { return _fade . _aedb ; } ; return _ebgbf < _ffgec ; } ) ; _cagdc := make ( map [ int ] intSet ) ; _cdecb := make ( intSet ) ; for _ , _bbgbc := range _accfd { if _bbgbc . _aedb { _cagdc [ _bbgbc . _dacaa ] = make ( intSet ) ;
for _cbcfa := range _cdecb { if _cbcfa != _bbgbc . _dacaa { _cagdc [ _bbgbc . _dacaa ] . add ( _cbcfa ) ; _cagdc [ _cbcfa ] . add ( _bbgbc . _dacaa ) ; } ; } ; _cdecb . add ( _bbgbc . _dacaa ) ; } else { _cdecb . del ( _bbgbc . _dacaa ) ; } ; } ; _ccac := map [ * textPara ] [ ] int { } ; for _dgggb , _befcc := range _cagdc { _dcfge := _edde [ _dgggb ] ;
if len ( _befcc ) == 0 { _ccac [ _dcfge ] = nil ; continue ; } ; _feade := make ( [ ] int , len ( _befcc ) ) ; _bbgf := 0 ; for _feef := range _befcc { _feade [ _bbgf ] = _feef ; _bbgf ++ ; } ; _ccac [ _dcfge ] = _feade ; } ; return _ccac ; } ; func ( _fdde rulingList ) secMinMax ( ) ( float64 , float64 ) { _bdgb , _cfbaa := _fdde [ 0 ] . _fgad , _fdde [ 0 ] . _ababc ;
for _ , _bcgf := range _fdde [ 1 : ] { if _bcgf . _fgad < _bdgb { _bdgb = _bcgf . _fgad ; } ; if _bcgf . _ababc > _cfbaa { _cfbaa = _bcgf . _ababc ; } ; } ; return _bdgb , _cfbaa ; } ; func _fgacb ( _gbdga [ ] * textWord , _dadf * textWord ) [ ] * textWord { for _gebg , _gcgcd := range _gbdga { if _gcgcd == _dadf { return _eedce ( _gbdga , _gebg ) ;
} ; } ; _fc . Log . Error ( "\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073" , _dadf ) ;
return nil ; } ; func ( _dca * shapesState ) closePath ( ) { if _dca . _effc { _dca . _edc = append ( _dca . _edc , _bbcb ( _dca . _cgbcf ) ) ; _dca . _effc = false ; } else if len ( _dca . _edc ) == 0 { if _bcge { _fc . Log . Debug ( "\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068" ) ;
} ; _dca . _effc = false ; return ; } ; _dca . _edc [ len ( _dca . _edc ) - 1 ] . close ( ) ; if _bcge { _fc . Log . Info ( "\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073" , _dca ) ; } ; } ; func ( _cfdcb * textTable ) emptyCompositeColumn ( _fbbge int ) bool { for _eadaa := 0 ;
_eadaa < _cfdcb . _agdc ; _eadaa ++ { if _cgfe , _ffdaa := _cfdcb . _gaeb [ _fgged ( _fbbge , _eadaa ) ] ; _ffdaa { if len ( _cgfe . paraList ) > 0 { return false ; } ; } ; } ; return true ; } ; func ( _bdddg * shapesState ) drawRectangle ( _dabf , _fgc , _daed , _aedf float64 ) { if _bcge { _fffc := _bdddg . devicePoint ( _dabf , _fgc ) ;
_bdc := _bdddg . devicePoint ( _dabf + _daed , _fgc + _aedf ) ; _bbaf := _aec . PdfRectangle { Llx : _fffc . X , Lly : _fffc . Y , Urx : _bdc . X , Ury : _bdc . Y } ; _fc . Log . Info ( "d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066" , _bbaf ) ;
} ; _bdddg . newSubPath ( ) ; _bdddg . moveTo ( _dabf , _fgc ) ; _bdddg . lineTo ( _dabf + _daed , _fgc ) ; _bdddg . lineTo ( _dabf + _daed , _fgc + _aedf ) ; _bdddg . lineTo ( _dabf , _fgc + _aedf ) ; _bdddg . closePath ( ) ; } ; func _bgbf ( _bgdea string , _efgcf [ ] rulingList ) { _fc . Log . Info ( "\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073" , len ( _efgcf ) , _bgdea ) ;
for _cccb , _cdccb := range _efgcf { _ae . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _cccb , _cdccb . String ( ) ) ; } ; } ; func _geaa ( _fecf _aec . PdfRectangle , _caga [ ] * textLine ) * textPara { return & textPara { PdfRectangle : _fecf , _bfagf : _caga } ; } ;
func ( _dafdc paraList ) writeText ( _cbcgc _bc . Writer ) { for _bgad , _fbgb := range _dafdc { if _fbgb . _egbea { continue ; } ; _fbgb . writeText ( _cbcgc ) ; if _bgad != len ( _dafdc ) - 1 { if _gceb ( _fbgb , _dafdc [ _bgad + 1 ] ) { _cbcgc . Write ( [ ] byte ( "\u0020" ) ) ; } else { _cbcgc . Write ( [ ] byte ( "\u000a" ) ) ;
_cbcgc . Write ( [ ] byte ( "\u000a" ) ) ; } ; } ; } ; _cbcgc . Write ( [ ] byte ( "\u000a" ) ) ; _cbcgc . Write ( [ ] byte ( "\u000a" ) ) ; } ; func ( _cebc * textLine ) bbox ( ) _aec . PdfRectangle { return _cebc . PdfRectangle } ; func ( _abfg rulingList ) snapToGroupsDirection ( ) rulingList { _abfg . sortStrict ( ) ;
_addg := make ( map [ * ruling ] rulingList , len ( _abfg ) ) ; _aabcd := _abfg [ 0 ] ; _abbb := func ( _baef * ruling ) { _aabcd = _baef ; _addg [ _aabcd ] = rulingList { _baef } } ; _abbb ( _abfg [ 0 ] ) ; for _ , _dgcedb := range _abfg [ 1 : ] { if _dgcedb . _gbgc < _aabcd . _gbgc - _bedg { _fc . Log . Error ( "\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073" , _aabcd , _dgcedb ) ;
} ; if _dgcedb . _gbgc > _aabcd . _gbgc + _cabc { _abbb ( _dgcedb ) ; } else { _addg [ _aabcd ] = append ( _addg [ _aabcd ] , _dgcedb ) ; } ; } ; _bcee := make ( map [ * ruling ] float64 , len ( _addg ) ) ; _ffgeb := make ( map [ * ruling ] * ruling , len ( _abfg ) ) ; for _fbab , _ffbf := range _addg { _bcee [ _fbab ] = _ffbf . mergePrimary ( ) ;
for _ , _gcffa := range _ffbf { _ffgeb [ _gcffa ] = _fbab ; } ; } ; for _ , _aeca := range _abfg { _aeca . _gbgc = _bcee [ _ffgeb [ _aeca ] ] ; } ; _fbedf := make ( rulingList , 0 , len ( _abfg ) ) ; for _ , _ggefd := range _addg { _beag := _ggefd . splitSec ( ) ; for _dddfg , _feaa := range _beag { _geebc := _feaa . merge ( ) ;
if len ( _fbedf ) > 0 { _cafcg := _fbedf [ len ( _fbedf ) - 1 ] ; if _cafcg . alignsPrimary ( _geebc ) && _cafcg . alignsSec ( _geebc ) { _fc . Log . Error ( "\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073" , _dddfg , _cafcg , _geebc ) ;
continue ; } ; } ; _fbedf = append ( _fbedf , _geebc ) ; } ; } ; _fbedf . sortStrict ( ) ; return _fbedf ; } ;
2023-11-11 11:29:03 +00:00
2024-03-27 22:34:33 +00:00
// String returns a human readable description of `vecs`.
func ( _bfgcb rulingList ) String ( ) string { if len ( _bfgcb ) == 0 { return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}" ; } ; _eacgc , _bgbda := _bfgcb . vertsHorzs ( ) ; _dafg := len ( _eacgc ) ; _efdga := len ( _bgbda ) ; if _dafg == 0 || _efdga == 0 { return _ae . Sprintf ( "\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}" , _dafg , _efdga ) ;
} ; _gfdg := _aec . PdfRectangle { Llx : _eacgc [ 0 ] . _gbgc , Urx : _eacgc [ _dafg - 1 ] . _gbgc , Lly : _bgbda [ _efdga - 1 ] . _gbgc , Ury : _bgbda [ 0 ] . _gbgc } ; return _ae . Sprintf ( "\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d" , _dafg , _efdga , _gfdg ) ;
} ; func ( _ddd * wordBag ) maxDepth ( ) float64 { return _ddd . _aeceg - _ddd . Lly } ; func ( _egdd * shapesState ) clearPath ( ) { _egdd . _edc = nil ; _egdd . _effc = false ; if _bcge { _fc . Log . Info ( "\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073" , _egdd ) ;
} ; } ; func ( _bacc rulingList ) intersections ( ) map [ int ] intSet { var _gbdag , _fdeg [ ] int ; for _fcacc , _bebg := range _bacc { switch _bebg . _egdf { case _eebe : _gbdag = append ( _gbdag , _fcacc ) ; case _bfgb : _fdeg = append ( _fdeg , _fcacc ) ; } ; } ; if len ( _gbdag ) < _aaaa + 1 || len ( _fdeg ) < _acebd + 1 { return nil ;
} ; if len ( _gbdag ) + len ( _fdeg ) > _edfde { _fc . Log . Debug ( "\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064" , len ( _bacc ) , len ( _gbdag ) , len ( _fdeg ) ) ;
return nil ; } ; _baae := make ( map [ int ] intSet , len ( _gbdag ) + len ( _fdeg ) ) ; for _ , _dace := range _gbdag { for _ , _ddacf := range _fdeg { if _bacc [ _dace ] . intersects ( _bacc [ _ddacf ] ) { if _ , _defg := _baae [ _dace ] ; ! _defg { _baae [ _dace ] = make ( intSet ) ; } ; if _ , _cbfb := _baae [ _ddacf ] ;
! _cbfb { _baae [ _ddacf ] = make ( intSet ) ; } ; _baae [ _dace ] . add ( _ddacf ) ; _baae [ _ddacf ] . add ( _dace ) ; } ; } ; } ; return _baae ; } ; func _bfc ( _gebe , _ccgg _aec . PdfRectangle ) bool { return _gebe . Lly <= _ccgg . Ury && _ccgg . Lly <= _gebe . Ury ; } ; func ( _acdec * textTable ) put ( _gffd , _abed int , _caed * textPara ) { _acdec . _bfdff [ _fgged ( _gffd , _abed ) ] = _caed ;
} ; func ( _eec * textObject ) getFontDirect ( _faf string ) ( * _aec . PdfFont , error ) { _cegbd , _dcce := _eec . getFontDict ( _faf ) ; if _dcce != nil { return nil , _dcce ; } ; _bbd , _dcce := _aec . NewPdfFontFromPdfObject ( _cegbd ) ; if _dcce != nil { _fc . Log . Debug ( "\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _faf , _dcce ) ;
} ; return _bbd , _dcce ; } ; func _bada ( _efcce _aec . PdfRectangle ) * ruling { return & ruling { _egdf : _bfgb , _gbgc : _efcce . Lly , _fgad : _efcce . Llx , _ababc : _efcce . Urx } ; } ; func ( _dgfcd * ruling ) alignsPrimary ( _aegeb * ruling ) bool { return _dgfcd . _egdf == _aegeb . _egdf && _ea . Abs ( _dgfcd . _gbgc - _aegeb . _gbgc ) < _cabc * 0.5 ;
} ; func ( _adag * textLine ) endsInHyphen ( ) bool { _bdef := _adag . _bfag [ len ( _adag . _bfag ) - 1 ] ; _edeee := _bdef . _eedc ; _eeed , _aaac := _e . DecodeLastRuneInString ( _edeee ) ; if _aaac <= 0 || ! _be . Is ( _be . Hyphen , _eeed ) { return false ; } ; if _bdef . _ceff && _ecfgf ( _edeee ) { return true ;
} ; return _ecfgf ( _adag . text ( ) ) ; } ; func ( _egba compositeCell ) split ( _bebdg , _gbeab [ ] float64 ) * textTable { _gbgfe := len ( _bebdg ) + 1 ; _bgff := len ( _gbeab ) + 1 ; if _gbead { _fc . Log . Info ( "\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a" + "\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066" , _bgff , _gbgfe , _egba , _bebdg , _gbeab ) ;
_ae . Printf ( "\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a" , len ( _egba . paraList ) ) ; for _ecfdgc , _efbea := range _egba . paraList { _ae . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _ecfdgc , _efbea . String ( ) ) ;
} ; _ae . Printf ( "\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a" , len ( _egba . lines ( ) ) ) ; for _ccfe , _edce := range _egba . lines ( ) { _ae . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _ccfe , _edce ) ; } ; } ; _bebdg = _afgc ( _bebdg , _egba . Ury , _egba . Lly ) ;
_gbeab = _afgc ( _gbeab , _egba . Llx , _egba . Urx ) ; _dcda := make ( map [ uint64 ] * textPara , _bgff * _gbgfe ) ; _efgc := textTable { _afcga : _bgff , _agdc : _gbgfe , _bfdff : _dcda } ; _gfaea := _egba . paraList ; _a . Slice ( _gfaea , func ( _bedb , _edbc int ) bool { _ggff , _dcddb := _gfaea [ _bedb ] , _gfaea [ _edbc ] ;
_edcea , _gcbc := _ggff . Lly , _dcddb . Lly ; if _edcea != _gcbc { return _edcea < _gcbc ; } ; return _ggff . Llx < _dcddb . Llx ; } ) ; _bdbf := make ( map [ uint64 ] _aec . PdfRectangle , _bgff * _gbgfe ) ; for _bfad , _eacg := range _bebdg [ 1 : ] { _feab := _bebdg [ _bfad ] ; for _gedc , _aefa := range _gbeab [ 1 : ] { _eafgb := _gbeab [ _gedc ] ;
_bdbf [ _fgged ( _gedc , _bfad ) ] = _aec . PdfRectangle { Llx : _eafgb , Urx : _aefa , Lly : _eacg , Ury : _feab } ; } ; } ; if _gbead { _fc . Log . Info ( "\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073" ) ;
_ae . Printf ( "\u0020\u0020\u0020\u0020" ) ; for _dcegd := 0 ; _dcegd < _bgff ; _dcegd ++ { _ae . Printf ( "\u0025\u0033\u0030\u0064\u002c\u0020" , _dcegd ) ; } ; _ae . Println ( ) ; for _daadc := 0 ; _daadc < _gbgfe ; _daadc ++ { _ae . Printf ( "\u0020\u0020\u0025\u0032\u0064\u003a" , _daadc ) ;
for _decca := 0 ; _decca < _bgff ; _decca ++ { _ae . Printf ( "\u00256\u002e\u0032\u0066\u002c\u0020" , _bdbf [ _fgged ( _decca , _daadc ) ] ) ; } ; _ae . Println ( ) ; } ; } ; _afca := func ( _cgcga * textLine ) ( int , int ) { for _ggbgc := 0 ; _ggbgc < _gbgfe ; _ggbgc ++ { for _cedda := 0 ; _cedda < _bgff ;
_cedda ++ { if _dfdf ( _bdbf [ _fgged ( _cedda , _ggbgc ) ] , _cgcga . PdfRectangle ) { return _cedda , _ggbgc ; } ; } ; } ; return - 1 , - 1 ; } ; _gdaba := make ( map [ uint64 ] [ ] * textLine , _bgff * _gbgfe ) ; for _ , _bfce := range _gfaea . lines ( ) { _ddacc , _geed := _afca ( _bfce ) ; if _ddacc < 0 { continue ;
} ; _gdaba [ _fgged ( _ddacc , _geed ) ] = append ( _gdaba [ _fgged ( _ddacc , _geed ) ] , _bfce ) ; } ; for _gbfdd := 0 ; _gbfdd < len ( _bebdg ) - 1 ; _gbfdd ++ { _ggdge := _bebdg [ _gbfdd ] ; _deea := _bebdg [ _gbfdd + 1 ] ; for _deee := 0 ; _deee < len ( _gbeab ) - 1 ; _deee ++ { _eeaa := _gbeab [ _deee ] ;
_cebf := _gbeab [ _deee + 1 ] ; _gbadd := _aec . PdfRectangle { Llx : _eeaa , Urx : _cebf , Lly : _deea , Ury : _ggdge } ; _cgedg := _gdaba [ _fgged ( _deee , _gbfdd ) ] ; if len ( _cgedg ) == 0 { continue ; } ; _ceeda := _geaa ( _gbadd , _cgedg ) ; _efgc . put ( _deee , _gbfdd , _ceeda ) ; } ; } ;
return & _efgc ; } ; func _gfbf ( _fedfg [ ] structElement , _edgb map [ int ] [ ] * textLine , _cfbg _bad . PdfObject ) [ ] * list { _cabbc := [ ] * list { } ; for _ , _cedag := range _fedfg { _dgdbb := _cedag . _ccaac ; _ebgb := int ( _cedag . _fab ) ; _cbfg := _cedag . _aeff ; _bcgc := [ ] * textLine { } ;
_gagd := [ ] * list { } ; _aabc := _cedag . _ecdd ; _gdgb , _fcda := ( _aabc . ( * _bad . PdfObjectReference ) ) ; if ! _fcda { _fc . Log . Debug ( "\u0066\u0061\u0069l\u0065\u0064\u0020\u006f\u0074\u0020\u0063\u0061\u0073\u0074\u0020\u0074\u006f\u0020\u002a\u0063\u006f\u0072\u0065\u002e\u0050\u0064\u0066\u004f\u0062\u006a\u0065\u0063\u0074R\u0065\u0066\u0065\u0072\u0065\u006e\u0063\u0065" ) ;
} ; if _ebgb != - 1 && _gdgb != nil { if _dgcc , _bbea := _edgb [ _ebgb ] ; _bbea { if _bdeb , _dccea := _cfbg . ( * _bad . PdfIndirectObject ) ; _dccea { _aaec := _bdeb . PdfObjectReference ; if _fb . DeepEqual ( * _gdgb , _aaec ) { _bcgc = _dgcc ; } ; } ; } ; } ; if _dgdbb != nil { _gagd = _gfbf ( _dgdbb , _edgb , _cfbg ) ;
} ; _ddab := _deec ( _bcgc , _cbfg , _gagd ) ; _cabbc = append ( _cabbc , _ddab ) ; } ; return _cabbc ; } ; func ( _afdd * textTable ) subdivide ( ) * textTable { _afdd . logComposite ( "\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e" ) ; _edgcd := _afdd . compositeRowCorridors ( ) ; _gaee := _afdd . compositeColCorridors ( ) ;
if _gbead { _fc . Log . Info ( "\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073" , _dbeg ( _edgcd ) , _dbeg ( _gaee ) ) ;
} ; if len ( _edgcd ) == 0 || len ( _gaee ) == 0 { return _afdd ; } ; _bedae ( _edgcd ) ; _bedae ( _gaee ) ; if _gbead { _fc . Log . Info ( "\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073" , _dbeg ( _edgcd ) , _dbeg ( _gaee ) ) ;
} ; _baff , _fgfbb := _fbacb ( _afdd . _agdc , _edgcd ) ; _ceedc , _bbcac := _fbacb ( _afdd . _afcga , _gaee ) ; _egbd := make ( map [ uint64 ] * textPara , _bbcac * _fgfbb ) ; _ffed := & textTable { PdfRectangle : _afdd . PdfRectangle , _fbccb : _afdd . _fbccb , _agdc : _fgfbb , _afcga : _bbcac , _bfdff : _egbd } ;
if _gbead { _fc . Log . Info ( "\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a" + "\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a" + "\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a" + "\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a" + "\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076" , _afdd . _afcga , _afdd . _agdc , _bbcac , _fgfbb , _dbeg ( _edgcd ) , _dbeg ( _gaee ) , _baff , _ceedc ) ;
} ; for _gabfd := 0 ; _gabfd < _afdd . _agdc ; _gabfd ++ { _ccda := _baff [ _gabfd ] ; for _acab := 0 ; _acab < _afdd . _afcga ; _acab ++ { _fcgfd := _ceedc [ _acab ] ; if _gbead { _ae . Printf ( "\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a" , _acab , _gabfd , _fcgfd , _ccda ) ;
} ; _gcdaa , _fdcf := _afdd . _gaeb [ _fgged ( _acab , _gabfd ) ] ; if ! _fdcf { continue ; } ; _deac := _gcdaa . split ( _edgcd [ _gabfd ] , _gaee [ _acab ] ) ; for _bdacf := 0 ; _bdacf < _deac . _agdc ; _bdacf ++ { for _aecea := 0 ; _aecea < _deac . _afcga ; _aecea ++ { _dbgf := _deac . get ( _aecea , _bdacf ) ;
_ffed . put ( _fcgfd + _aecea , _ccda + _bdacf , _dbgf ) ; if _gbead { _ae . Printf ( "\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a" , _fcgfd + _aecea , _ccda + _bdacf , _dbgf ) ; } ; } ; } ; } ; } ; return _ffed ; } ; func ( _ebeg * wordBag ) makeRemovals ( ) map [ int ] map [ * textWord ] struct { } { _ceda := make ( map [ int ] map [ * textWord ] struct { } , len ( _ebeg . _gbbd ) ) ;
for _bdbc := range _ebeg . _gbbd { _ceda [ _bdbc ] = make ( map [ * textWord ] struct { } ) ; } ; return _ceda ; } ;
2024-01-22 01:16:41 +00:00
2024-03-27 22:34:33 +00:00
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
func ( _eggd * PageText ) ApplyArea ( bbox _aec . PdfRectangle ) { _faad := make ( [ ] * textMark , 0 , len ( _eggd . _dbfe ) ) ; for _ , _aac := range _eggd . _dbfe { if _fbdc ( _aac . bbox ( ) , bbox ) { _faad = append ( _faad , _aac ) ; } ; } ; var _ebedg paraList ; _cadf := len ( _faad ) ; for _gdad := 0 ;
_gdad < 360 && _cadf > 0 ; _gdad += 90 { _ccb := make ( [ ] * textMark , 0 , len ( _faad ) - _cadf ) ; for _ , _bbbg := range _faad { if _bbbg . _eeacf == _gdad { _ccb = append ( _ccb , _bbbg ) ; } ; } ; if len ( _ccb ) > 0 { _aedd := _fdba ( _ccb , _eggd . _babf , nil , nil , _eggd . _cdgg . _cgff ) ;
_ebedg = append ( _ebedg , _aedd ... ) ; _cadf -= len ( _ccb ) ; } ; } ; _bedd := new ( _fe . Buffer ) ; _ebedg . writeText ( _bedd ) ; _eggd . _eede = _bedd . String ( ) ; _eggd . _fgb = _ebedg . toTextMarks ( ) ; _eggd . _gba = _ebedg . tables ( ) ; } ; func _eccfd ( _ebgd , _cced int ) int { if _ebgd > _cced { return _ebgd ;
} ; return _cced ; } ; func _dbdad ( _ddga [ ] * textLine , _fbdbb map [ float64 ] [ ] * textLine ) [ ] * list { _gaae := _gbgg ( _fbdbb ) ; _gcff := [ ] * list { } ; if len ( _gaae ) == 0 { return _gcff ; } ; _faag := _gaae [ 0 ] ; _afdc := 1 ; _fegf := _fbdbb [ _faag ] ; for _dccbd , _eaa := range _fegf { var _bgabg float64 ;
_efced := [ ] * list { } ; _debg := _eaa . _gaca ; _gcdc := - 1.0 ; if _dccbd < len ( _fegf ) - 1 { _gcdc = _fegf [ _dccbd + 1 ] . _gaca ; } ; if _afdc < len ( _gaae ) { _efced = _fgeda ( _ddga , _fbdbb , _gaae , _afdc , _debg , _gcdc ) ; } ; _bgabg = _gcdc ; if len ( _efced ) > 0 { _eacd := _efced [ 0 ] ;
if len ( _eacd . _gagag ) > 0 { _bgabg = _eacd . _gagag [ 0 ] . _gaca ; } ; } ; _bbac := [ ] * textLine { _eaa } ; _fcfd := _eagc ( _eaa , _ddga , _gaae , _debg , _bgabg ) ; _bbac = append ( _bbac , _fcfd ... ) ; _cedd := _deec ( _bbac , "\u0062\u0075\u006c\u006c\u0065\u0074" , _efced ) ; _cedd . _begg = _aadg ( _bbac , "" ) ;
_gcff = append ( _gcff , _cedd ) ; } ; return _gcff ; } ; type list struct { _gagag [ ] * textLine ; _efac string ; _bffd [ ] * list ; _begg string ; } ; func _fbbg ( _ccad * paraList ) map [ int ] [ ] * textLine { _cabb := map [ int ] [ ] * textLine { } ; for _ , _baec := range * _ccad { for _ , _cce := range _baec . _bfagf { if ! _ggdd ( _cce ) { _fc . Log . Debug ( "g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e" ) ;
continue ; } ; _ddfb := _cce . _bfag [ 0 ] . _ebfa [ 0 ] . _ebgag ; _cabb [ _ddfb ] = append ( _cabb [ _ddfb ] , _cce ) ; } ; if _baec . _caaa != nil { _bdgd := _baec . _caaa . _bfdff ; for _ , _gcgf := range _bdgd { for _ , _gddg := range _gcgf . _bfagf { if ! _ggdd ( _gddg ) { _fc . Log . Debug ( "g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e" ) ;
continue ; } ; _ggdc := _gddg . _bfag [ 0 ] . _ebfa [ 0 ] . _ebgag ; _cabb [ _ggdc ] = append ( _cabb [ _ggdc ] , _gddg ) ; } ; } ; } ; } ; return _cabb ; } ; func _cffd ( _feca * _ba . ContentStreamOperation ) ( float64 , error ) { if len ( _feca . Params ) != 1 { _dgff := _d . New ( "\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et" ) ;
_fc . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076" , _feca . Operand , 1 , len ( _feca . Params ) , _feca . Params ) ;
return 0.0 , _dgff ; } ; return _bad . GetNumberAsFloat ( _feca . Params [ 0 ] ) ; } ; func ( _cca * shapesState ) devicePoint ( _fdfc , _bgg float64 ) _g . Point { _bcfa := _cca . _gdfbg . Mult ( _cca . _affc ) ; _fdfc , _bgg = _bcfa . Transform ( _fdfc , _bgg ) ; return _g . NewPoint ( _fdfc , _bgg ) ;
} ; func _cffa ( _abgcc * wordBag , _egce int ) * textLine { _abddg := _abgcc . firstWord ( _egce ) ; _fdca := textLine { PdfRectangle : _abddg . PdfRectangle , _fgcb : _abddg . _aeegf , _gaca : _abddg . _aecg } ; _fdca . pullWord ( _abgcc , _abddg , _egce ) ; return & _fdca ; } ; func ( _eddf * textTable ) getComposite ( _bffg , _ffef int ) ( paraList , _aec . PdfRectangle ) { _eeced , _cdeef := _eddf . _gaeb [ _fgged ( _bffg , _ffef ) ] ;
if _gbead { _ae . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a" , _bffg , _ffef , _eeced . String ( ) ) ; } ; if ! _cdeef { return nil , _aec . PdfRectangle { } ;
} ; return _eeced . parasBBox ( ) ; } ; type compositeCell struct { _aec . PdfRectangle ; paraList ; } ; func _deec ( _fgce [ ] * textLine , _gcge string , _baagd [ ] * list ) * list { return & list { _gagag : _fgce , _efac : _gcge , _bffd : _baagd } ; } ; func ( _cbed rulingList ) splitSec ( ) [ ] rulingList { _a . Slice ( _cbed , func ( _eebeg , _gbfdg int ) bool { _gfeda , _gddf := _cbed [ _eebeg ] , _cbed [ _gbfdg ] ;
if _gfeda . _fgad != _gddf . _fgad { return _gfeda . _fgad < _gddf . _fgad ; } ; return _gfeda . _ababc < _gddf . _ababc ; } ) ; _cgaca := make ( map [ * ruling ] struct { } , len ( _cbed ) ) ; _ecaed := func ( _dfgde * ruling ) rulingList { _feafc := rulingList { _dfgde } ; _cgaca [ _dfgde ] = struct { } { } ;
for _ , _bdaaf := range _cbed { if _ , _edfce := _cgaca [ _bdaaf ] ; _edfce { continue ; } ; for _ , _gdef := range _feafc { if _bdaaf . alignsSec ( _gdef ) { _feafc = append ( _feafc , _bdaaf ) ; _cgaca [ _bdaaf ] = struct { } { } ; break ; } ; } ; } ; return _feafc ; } ; _ggga := [ ] rulingList { _ecaed ( _cbed [ 0 ] ) } ;
for _ , _fbad := range _cbed [ 1 : ] { if _ , _eefce := _cgaca [ _fbad ] ; _eefce { continue ; } ; _ggga = append ( _ggga , _ecaed ( _fbad ) ) ; } ; return _ggga ; } ; func ( _bbff * textObject ) reset ( ) { _bbff . _acc = _g . IdentityMatrix ( ) ; _bbff . _dde = _g . IdentityMatrix ( ) ; _bbff . _fcee = nil ;
} ; func ( _gfee * ruling ) equals ( _bfgd * ruling ) bool { return _gfee . _egdf == _bfgd . _egdf && _cdffd ( _gfee . _gbgc , _bfgd . _gbgc ) && _cdffd ( _gfee . _fgad , _bfgd . _fgad ) && _cdffd ( _gfee . _ababc , _bfgd . _ababc ) ; } ; func ( _abb * wordBag ) blocked ( _aca * textWord ) bool { if _aca . Urx < _abb . Llx { _bgdc := _dgea ( _aca . PdfRectangle ) ;
_dfed := _fdfb ( _abb . PdfRectangle ) ; if _abb . _eddc . blocks ( _bgdc , _dfed ) { if _dfbb { _fc . Log . Info ( "\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073" , _aca , _abb ) ; } ; return true ; } ; } else if _abb . Urx < _aca . Llx { _gaadd := _dgea ( _abb . PdfRectangle ) ;
_cecb := _fdfb ( _aca . PdfRectangle ) ; if _abb . _eddc . blocks ( _gaadd , _cecb ) { if _dfbb { _fc . Log . Info ( "b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s" , _aca , _abb ) ; } ; return true ; } ; } ; if _aca . Ury < _abb . Lly { _bcae := _ggabg ( _aca . PdfRectangle ) ;
_cfdd := _bada ( _abb . PdfRectangle ) ; if _abb . _gbfd . blocks ( _bcae , _cfdd ) { if _dfbb { _fc . Log . Info ( "\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073" , _aca , _abb ) ; } ; return true ; } ; } else if _abb . Ury < _aca . Lly { _dbe := _ggabg ( _abb . PdfRectangle ) ;
_fdbg := _bada ( _aca . PdfRectangle ) ; if _abb . _gbfd . blocks ( _dbe , _fdbg ) { if _dfbb { _fc . Log . Info ( "b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s" , _aca , _abb ) ; } ; return true ; } ; } ; return false ; } ; func _bbbba ( _abeeg [ ] compositeCell ) [ ] float64 { var _bdgee [ ] * textLine ;
_bfdea := 0 ; for _ , _aacdc := range _abeeg { _bfdea += len ( _aacdc . paraList ) ; _bdgee = append ( _bdgee , _aacdc . lines ( ) ... ) ; } ; _a . Slice ( _bdgee , func ( _dgead , _eace int ) bool { _fgcea , _abdbe := _bdgee [ _dgead ] , _bdgee [ _eace ] ; _cedaeg , _egddd := _fgcea . _gaca , _abdbe . _gaca ;
if ! _ecfbd ( _cedaeg - _egddd ) { return _cedaeg < _egddd ; } ; return _fgcea . Llx < _abdbe . Llx ; } ) ; if _gbead { _ae . Printf ( "\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a" , _bfdea , len ( _bdgee ) ) ;
for _ecebb , _cafdc := range _bdgee { _ae . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _ecebb , _cafdc ) ; } ; } ; var _egff [ ] float64 ; _fbdab := _bdgee [ 0 ] ; var _ddba [ ] [ ] * textLine ; _facb := [ ] * textLine { _fbdab } ; for _facd , _bfbed := range _bdgee [ 1 : ] { if _bfbed . Ury < _fbdab . Lly { _abaac := 0.5 * ( _bfbed . Ury + _fbdab . Lly ) ;
if _gbead { _ae . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a" + "\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a" , _facd , _bfbed . Ury , _fbdab . Lly , _abaac , _fbdab , _bfbed ) ;
} ; _egff = append ( _egff , _abaac ) ; _ddba = append ( _ddba , _facb ) ; _facb = nil ; } ; _facb = append ( _facb , _bfbed ) ; if _bfbed . Lly < _fbdab . Lly { _fbdab = _bfbed ; } ; } ; if len ( _facb ) > 0 { _ddba = append ( _ddba , _facb ) ; } ; if _gbead { _ae . Printf ( " \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a" , _egff ) ;
} ; if _gbead { _fc . Log . Info ( "\u0072\u006f\u0077\u003d\u0025\u0064" , len ( _abeeg ) ) ; for _bdgcf , _cegd := range _abeeg { _ae . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bdgcf , _cegd ) ; } ; _fc . Log . Info ( "\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d" , len ( _ddba ) ) ;
for _ccfg , _adcae := range _ddba { _ae . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a" , _ccfg , len ( _adcae ) ) ; for _cdfdf , _ccdgd := range _adcae { _ae . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _cdfdf , _ccdgd ) ; } ; } ; } ; _gdcee := true ;
for _agabc , _aedg := range _ddba { _dfgc := true ; for _ffffb , _efbg := range _abeeg { if _gbead { _ae . Printf ( "\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a" , _agabc , len ( _ddba ) , _ffffb , len ( _abeeg ) , _efbg ) ;
} ; if ! _efbg . hasLines ( _aedg ) { if _gbead { _ae . Printf ( "\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a" , _agabc , len ( _ddba ) , _ffffb , len ( _abeeg ) ) ;
} ; _dfgc = false ; break ; } ; } ; if ! _dfgc { _gdcee = false ; break ; } ; } ; if ! _gdcee { if _gbead { _fc . Log . Info ( "\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg" ) ;
} ; _egff = nil ; } ; if _gbead && _egff != nil { _ae . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a" , _egff ) ; } ; return _egff ; } ;
2024-01-22 01:16:41 +00:00
2024-02-11 21:29:32 +00:00
// New returns an Extractor instance for extracting content from the input PDF page.
2024-03-27 22:34:33 +00:00
func New ( page * _aec . PdfPage ) ( * Extractor , error ) { return NewWithOptions ( page , nil ) } ;
2023-11-11 11:29:03 +00:00
2024-03-27 22:34:33 +00:00
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
type ImageExtractOptions struct { IncludeInlineStencilMasks bool ; } ; var _gg = false ; func _eagbe ( _degdd map [ float64 ] map [ float64 ] gridTile ) [ ] float64 { _fdbcb := make ( [ ] float64 , 0 , len ( _degdd ) ) ; _fdfcc := make ( map [ float64 ] struct { } , len ( _degdd ) ) ; for _ , _aaba := range _degdd { for _fcgf := range _aaba { if _ , _ccfdb := _fdfcc [ _fcgf ] ;
_ccfdb { continue ; } ; _fdbcb = append ( _fdbcb , _fcgf ) ; _fdfcc [ _fcgf ] = struct { } { } ; } ; } ; _a . Float64s ( _fdbcb ) ; return _fdbcb ; } ;
2024-01-22 01:16:41 +00:00
2024-03-27 22:34:33 +00:00
// String returns a description of `b`.
func ( _bgbb * wordBag ) String ( ) string { var _gagf [ ] string ; for _ , _aecf := range _bgbb . depthIndexes ( ) { _efbe := _bgbb . _gbbd [ _aecf ] ; for _ , _egdc := range _efbe { _gagf = append ( _gagf , _egdc . _eedc ) ; } ; } ; return _ae . Sprintf ( "\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071" , _bgbb . PdfRectangle , _bgbb . _aad , len ( _gagf ) , _gagf ) ;
} ; func ( _dfe * textObject ) checkOp ( _edfd * _ba . ContentStreamOperation , _agbe int , _ecb bool ) ( _gbdf bool , _dbad error ) { if _dfe == nil { var _fedf [ ] _bad . PdfObject ; if _agbe > 0 { _fedf = _edfd . Params ; if len ( _fedf ) > _agbe { _fedf = _fedf [ : _agbe ] ; } ; } ; _fc . Log . Debug ( "\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076" , _edfd . Operand , _fedf ) ;
} ; if _agbe >= 0 { if len ( _edfd . Params ) != _agbe { if _ecb { _dbad = _d . New ( "\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et" ) ; } ; _fc . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076" , _edfd . Operand , _agbe , len ( _edfd . Params ) , _edfd . Params ) ;
return false , _dbad ; } ; } ; return true , nil ; } ; type rectRuling struct { _cbae rulingKind ; _bgc markKind ; _eg . Color ; _aec . PdfRectangle ; } ; func _adgf ( _bgadg [ ] rulingList ) ( rulingList , rulingList ) { var _ffgca rulingList ; for _ , _defa := range _bgadg { _ffgca = append ( _ffgca , _defa ... ) ;
} ; return _ffgca . vertsHorzs ( ) ; } ; func ( _ebfdd * textTable ) computeBbox ( ) _aec . PdfRectangle { var _fbff _aec . PdfRectangle ; _cggf := false ; for _ccdf := 0 ; _ccdf < _ebfdd . _agdc ; _ccdf ++ { for _ccba := 0 ; _ccba < _ebfdd . _afcga ; _ccba ++ { _fbef := _ebfdd . get ( _ccba , _ccdf ) ;
if _fbef == nil { continue ; } ; if ! _cggf { _fbff = _fbef . PdfRectangle ; _cggf = true ; } else { _fbff = _agfb ( _fbff , _fbef . PdfRectangle ) ; } ; } ; } ; return _fbff ; } ; func ( _geagf * shapesState ) stroke ( _gfdf * [ ] pathSection ) { _dgd := pathSection { _gbag : _geagf . _edc , Color : _geagf . _bbdg . getStrokeColor ( ) } ;
* _gfdf = append ( * _gfdf , _dgd ) ; if _eceg { _ae . Printf ( "\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , len ( * _gfdf ) , _geagf , _geagf . _bbdg . getStrokeColor ( ) , _dgd . bbox ( ) ) ;
if _ggbbe { for _egdgg , _gbfe := range _geagf . _edc { _ae . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _egdgg , _gbfe ) ; if _egdgg == 10 { break ; } ; } ; } ; } ; } ;
2024-01-22 01:16:41 +00:00
2024-03-27 22:34:33 +00:00
// Text gets the extracted text contained in `l`.
func ( _ffffa * list ) Text ( ) string { _dcag := & _bb . Builder { } ; _gafc := "" ; _ggge ( _ffffa , _dcag , & _gafc ) ; return _dcag . String ( ) ; } ; func ( _fbg * imageExtractContext ) processOperand ( _cbg * _ba . ContentStreamOperation , _afd _ba . GraphicsState , _egg * _aec . PdfPageResources ) error { if _cbg . Operand == "\u0042\u0049" && len ( _cbg . Params ) == 1 { _dbc , _dgg := _cbg . Params [ 0 ] . ( * _ba . ContentStreamInlineImage ) ;
if ! _dgg { return nil ; } ; if _cdg , _cg := _bad . GetBoolVal ( _dbc . ImageMask ) ; _cg { if _cdg && ! _fbg . _gfc . IncludeInlineStencilMasks { return nil ; } ; } ; return _fbg . extractInlineImage ( _dbc , _afd , _egg ) ; } else if _cbg . Operand == "\u0044\u006f" && len ( _cbg . Params ) == 1 { _gd , _eeb := _bad . GetName ( _cbg . Params [ 0 ] ) ;
if ! _eeb { _fc . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065" ) ; return _ad ; } ; _ , _cgd := _egg . GetXObjectByName ( * _gd ) ; switch _cgd { case _aec . XObjectTypeImage : return _fbg . extractXObjectImage ( _gd , _afd , _egg ) ; case _aec . XObjectTypeForm : return _fbg . extractFormImages ( _gd , _afd , _egg ) ;
} ; } else if _fbg . _ece && ( _cbg . Operand == "\u0073\u0063\u006e" || _cbg . Operand == "\u0053\u0043\u004e" ) && len ( _cbg . Params ) == 1 { _ebe , _gcde := _bad . GetName ( _cbg . Params [ 0 ] ) ; if ! _gcde { _fc . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065" ) ;
return _ad ; } ; _cab , _gcde := _egg . GetPatternByName ( * _ebe ) ; if ! _gcde { _fc . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0050\u0061\u0074\u0074\u0065\u0072n\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075\u006e\u0064" ) ; return nil ; } ; if _cab . IsTiling ( ) { _caf := _cab . GetAsTilingPattern ( ) ;
_fed , _afb := _caf . GetContentStream ( ) ; if _afb != nil { return _afb ; } ; _afb = _fbg . extractContentStreamImages ( string ( _fed ) , _caf . Resources ) ; if _afb != nil { return _afb ; } ; } ; } else if ( _cbg . Operand == "\u0063\u0073" || _cbg . Operand == "\u0043\u0053" ) && len ( _cbg . Params ) >= 1 { _fbg . _ece = _cbg . Params [ 0 ] . String ( ) == "\u0050a\u0074\u0074\u0065\u0072\u006e" ;
} ; return nil ; } ; func _degd ( _agfbf , _fcfe _aec . PdfRectangle ) ( _aec . PdfRectangle , bool ) { if ! _fbdc ( _agfbf , _fcfe ) { return _aec . PdfRectangle { } , false ; } ; return _aec . PdfRectangle { Llx : _ea . Max ( _agfbf . Llx , _fcfe . Llx ) , Urx : _ea . Min ( _agfbf . Urx , _fcfe . Urx ) , Lly : _ea . Max ( _agfbf . Lly , _fcfe . Lly ) , Ury : _ea . Min ( _agfbf . Ury , _fcfe . Ury ) } , true ;
} ; type rulingKind int ; func ( _ecddc * structTreeRoot ) parseStructTreeRoot ( _gbgf _bad . PdfObject ) { if _gbgf != nil { _baga , _cef := _bad . GetDict ( _gbgf ) ; if ! _cef { _fc . Log . Debug ( "\u0070\u0061\u0072s\u0065\u0053\u0074\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u003a\u0020\u0064\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006eo\u0074\u0020\u0066\u006f\u0075\u006e\u0064\u002e" ) ;
} ; K := _baga . Get ( "\u004b" ) ; _gced := _baga . Get ( "\u0054\u0079\u0070\u0065" ) . String ( ) ; var _abge * _bad . PdfObjectArray ; switch _egdggd := K . ( type ) { case * _bad . PdfObjectArray : _abge = _egdggd ; case * _bad . PdfObjectReference : _abge = _bad . MakeArray ( K ) ; } ;
_ddgd := [ ] structElement { } ; for _ , _aeddg := range _abge . Elements ( ) { _daedf := & structElement { } ; _daedf . parseStructElement ( _aeddg ) ; _ddgd = append ( _ddgd , * _daedf ) ; } ; _ecddc . _bagdg = _ddgd ; _ecddc . _gage = _gced ; } ; } ; func _dbef ( _bfac , _efggf _g . Point ) bool { _gfge := _ea . Abs ( _bfac . X - _efggf . X ) ;
_gadf := _ea . Abs ( _bfac . Y - _efggf . Y ) ; return _gdba ( _gadf , _gfge ) ; } ; func ( _gabgd * wordBag ) absorb ( _cdba * wordBag ) { _gbea := _cdba . makeRemovals ( ) ; for _fcag , _agd := range _cdba . _gbbd { for _ , _egca := range _agd { _gabgd . pullWord ( _egca , _fcag , _gbea ) ;
} ; } ; _cdba . applyRemovals ( _gbea ) ; } ; var _gdfd * _f . Regexp = _f . MustCompile ( _cgce + "\u007c" + _ddac ) ; type ruling struct { _egdf rulingKind ; _dcebd markKind ; _eg . Color ; _gbgc float64 ; _fgad float64 ; _ababc float64 ; _aecega float64 ; } ;
2024-01-22 01:16:41 +00:00
2024-03-27 22:34:33 +00:00
// Extractor stores and offers functionality for extracting content from PDF pages.
type Extractor struct { _ffd string ; _gf * _aec . PdfPageResources ; _c _aec . PdfRectangle ; _af * _aec . PdfRectangle ; _fba map [ string ] fontEntry ; _gfe map [ string ] textResult ; _fec int64 ; _fff int ; _fg * Options ; _ga * _bad . PdfObject ; _bd _bad . PdfObject ; } ; func ( _feed * textTable ) isExportable ( ) bool { if _feed . _fbccb { return true ;
} ; _badc := func ( _fbge int ) bool { _acbdb := _feed . get ( 0 , _fbge ) ; if _acbdb == nil { return false ; } ; _fdcd := _acbdb . text ( ) ; _cdffe := _e . RuneCountInString ( _fdcd ) ; _dddce := _aecab . MatchString ( _fdcd ) ; return _cdffe <= 1 || _dddce ; } ; for _bdebc := 0 ; _bdebc < _feed . _agdc ;
_bdebc ++ { if ! _badc ( _bdebc ) { return true ; } ; } ; return false ; } ; func ( _fgbc * wordBag ) getDepthIdx ( _fag float64 ) int { _geea := _fgbc . depthIndexes ( ) ; _gbbb := _dafa ( _fag ) ; if _gbbb < _geea [ 0 ] { return _geea [ 0 ] ; } ; if _gbbb > _geea [ len ( _geea ) - 1 ] { return _geea [ len ( _geea ) - 1 ] ;
} ; return _gbbb ; } ; func ( _bbdc * textPara ) writeText ( _fage _bc . Writer ) { if _bbdc . _caaa == nil { _bbdc . writeCellText ( _fage ) ; return ; } ; for _faffg := 0 ; _faffg < _bbdc . _caaa . _agdc ; _faffg ++ { for _egbeg := 0 ; _egbeg < _bbdc . _caaa . _afcga ; _egbeg ++ { _dgfb := _bbdc . _caaa . get ( _egbeg , _faffg ) ;
if _dgfb == nil { _fage . Write ( [ ] byte ( "\u0009" ) ) ; } else { _dgfb . writeCellText ( _fage ) ; } ; _fage . Write ( [ ] byte ( "\u0020" ) ) ; } ; if _faffg < _bbdc . _caaa . _agdc - 1 { _fage . Write ( [ ] byte ( "\u000a" ) ) ; } ; } ; } ; func _bedae ( _agfe map [ int ] [ ] float64 ) { if len ( _agfe ) <= 1 { return ;
} ; _dggb := _gged ( _agfe ) ; if _gbead { _fc . Log . Info ( "\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076" , _dggb ) ; } ; var _edec , _agaba int ; for _edec , _agaba = range _dggb { if _agfe [ _agaba ] != nil { break ; } ; } ; for _dgbce , _daga := range _dggb [ _edec : ] { _cfdfb := _agfe [ _daga ] ;
if _cfdfb == nil { continue ; } ; if _gbead { _ae . Printf ( "\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a" , _edec + _dgbce , _agaba , _daga ) ; } ; _cadffc := _agfe [ _daga ] ; if _cadffc [ len ( _cadffc ) - 1 ] > _cfdfb [ 0 ] { _cadffc [ len ( _cadffc ) - 1 ] = _cfdfb [ 0 ] ;
_agfe [ _agaba ] = _cadffc ; } ; _agaba = _daga ; } ; } ; type cachedImage struct { _cb * _aec . Image ; _ecf _aec . PdfColorspace ; } ; func _acfc ( _cbgda [ ] * textLine ) map [ float64 ] [ ] * textLine { _a . Slice ( _cbgda , func ( _aebff , _dgdf int ) bool { return _cbgda [ _aebff ] . _gaca < _cbgda [ _dgdf ] . _gaca } ) ;
_abdc := map [ float64 ] [ ] * textLine { } ; for _ , _cdfd := range _cbgda { _addd := _egfdf ( _cdfd ) ; _addd = _ea . Round ( _addd ) ; _abdc [ _addd ] = append ( _abdc [ _addd ] , _cdfd ) ; } ; return _abdc ; } ; func ( _bbda * wordBag ) removeDuplicates ( ) { if _gabga { _fc . Log . Info ( "r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071" , _bbda . text ( ) ) ;
} ; for _ , _ggbad := range _bbda . depthIndexes ( ) { if len ( _bbda . _gbbd [ _ggbad ] ) == 0 { continue ; } ; _abgf := _bbda . _gbbd [ _ggbad ] [ 0 ] ; _ddgab := _dfdcc * _abgf . _aeegf ; _ddad := _abgf . _aecg ; for _ , _gfce := range _bbda . depthBand ( _ddad , _ddad + _ddgab ) { _fdec := map [ * textWord ] struct { } { } ;
_ecfe := _bbda . _gbbd [ _gfce ] ; for _ , _aagg := range _ecfe { if _ , _abgec := _fdec [ _aagg ] ; _abgec { continue ; } ; for _ , _afcg := range _ecfe { if _ , _cefc := _fdec [ _afcg ] ; _cefc { continue ; } ; if _afcg != _aagg && _afcg . _eedc == _aagg . _eedc && _ea . Abs ( _afcg . Llx - _aagg . Llx ) < _ddgab && _ea . Abs ( _afcg . Urx - _aagg . Urx ) < _ddgab && _ea . Abs ( _afcg . Lly - _aagg . Lly ) < _ddgab && _ea . Abs ( _afcg . Ury - _aagg . Ury ) < _ddgab { _fdec [ _afcg ] = struct { } { } ;
} ; } ; } ; if len ( _fdec ) > 0 { _cfgdb := 0 ; for _ , _gecb := range _ecfe { if _ , _addec := _fdec [ _gecb ] ; ! _addec { _ecfe [ _cfgdb ] = _gecb ; _cfgdb ++ ; } ; } ; _bbda . _gbbd [ _gfce ] = _ecfe [ : len ( _ecfe ) - len ( _fdec ) ] ; if len ( _bbda . _gbbd [ _gfce ] ) == 0 { delete ( _bbda . _gbbd , _gfce ) ;
} ; } ; } ; } ; } ; func ( _feeg * subpath ) last ( ) _g . Point { return _feeg . _acfg [ len ( _feeg . _acfg ) - 1 ] } ; type lineRuling struct { _cdbfg rulingKind ; _cdac markKind ; _eg . Color ; _bcfgb , _befe _g . Point ; } ; func _cgdbc ( _fcff [ ] int ) [ ] int { _babfd := make ( [ ] int , len ( _fcff ) ) ;
for _cafc , _gcee := range _fcff { _babfd [ len ( _fcff ) - 1 - _cafc ] = _gcee ; } ; return _babfd ; } ; func ( _bffce paraList ) toTextMarks ( ) [ ] TextMark { _gbdgg := 0 ; var _eadea [ ] TextMark ; for _egde , _egbe := range _bffce { if _egbe . _egbea { continue ; } ; _gegc := _egbe . toTextMarks ( & _gbdgg ) ;
_eadea = append ( _eadea , _gegc ... ) ; if _egde != len ( _bffce ) - 1 { if _gceb ( _egbe , _bffce [ _egde + 1 ] ) { _eadea = _ggcce ( _eadea , & _gbdgg , "\u0020" ) ; } else { _eadea = _ggcce ( _eadea , & _gbdgg , "\u000a" ) ; _eadea = _ggcce ( _eadea , & _gbdgg , "\u000a" ) ; } ; } ; } ; _eadea = _ggcce ( _eadea , & _gbdgg , "\u000a" ) ;
_eadea = _ggcce ( _eadea , & _gbdgg , "\u000a" ) ; return _eadea ; } ; func ( _aegf * textTable ) logComposite ( _cgfef string ) { if ! _gbead { return ; } ; _fc . Log . Info ( "\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073" , _aegf . _afcga , _aegf . _agdc , _cgfef ) ;
_ae . Printf ( "\u0025\u0035\u0073 \u007c" , "" ) ; for _eedee := 0 ; _eedee < _aegf . _afcga ; _eedee ++ { _ae . Printf ( "\u0025\u0033\u0064 \u007c" , _eedee ) ; } ; _ae . Println ( "" ) ; _ae . Printf ( "\u0025\u0035\u0073 \u002b" , "" ) ; for _cdgab := 0 ; _cdgab < _aegf . _afcga ; _cdgab ++ { _ae . Printf ( "\u0025\u0033\u0073 \u002b" , "\u002d\u002d\u002d" ) ;
} ; _ae . Println ( "" ) ; for _cbbad := 0 ; _cbbad < _aegf . _agdc ; _cbbad ++ { _ae . Printf ( "\u0025\u0035\u0064 \u007c" , _cbbad ) ; for _agdcc := 0 ; _agdcc < _aegf . _afcga ; _agdcc ++ { _fdce , _ := _aegf . _gaeb [ _fgged ( _agdcc , _cbbad ) ] . parasBBox ( ) ; _ae . Printf ( "\u0025\u0033\u0064 \u007c" , len ( _fdce ) ) ;
} ; _ae . Println ( "" ) ; } ; _fc . Log . Info ( "\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073" , _aegf . _afcga , _aegf . _agdc , _cgfef ) ; _ae . Printf ( "\u0025\u0035\u0073 \u007c" , "" ) ; for _ecaf := 0 ; _ecaf < _aegf . _afcga ; _ecaf ++ { _ae . Printf ( "\u0025\u0031\u0032\u0064\u0020\u007c" , _ecaf ) ;
} ; _ae . Println ( "" ) ; _ae . Printf ( "\u0025\u0035\u0073 \u002b" , "" ) ; for _ggcd := 0 ; _ggcd < _aegf . _afcga ; _ggcd ++ { _ae . Print ( "\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b" ) ; } ; _ae . Println ( "" ) ; for _bfgdb := 0 ; _bfgdb < _aegf . _agdc ;
_bfgdb ++ { _ae . Printf ( "\u0025\u0035\u0064 \u007c" , _bfgdb ) ; for _ddebd := 0 ; _ddebd < _aegf . _afcga ; _ddebd ++ { _gabdg , _ := _aegf . _gaeb [ _fgged ( _ddebd , _bfgdb ) ] . parasBBox ( ) ; _fcbe := "" ; _addbd := _gabdg . merge ( ) ; if _addbd != nil { _fcbe = _addbd . text ( ) ; } ;
_fcbe = _ae . Sprintf ( "\u0025\u0071" , _bgfd ( _fcbe , 12 ) ) ; _fcbe = _fcbe [ 1 : len ( _fcbe ) - 1 ] ; _ae . Printf ( "\u0025\u0031\u0032\u0073\u0020\u007c" , _fcbe ) ; } ; _ae . Println ( "" ) ; } ; } ; func ( _gdbc * textPara ) bbox ( ) _aec . PdfRectangle { return _gdbc . PdfRectangle } ; func ( _cadb * structElement ) parseStructElement ( _aaag _bad . PdfObject ) { _ggcc , _dbga := _bad . GetDict ( _aaag ) ;
if ! _dbga { _fc . Log . Debug ( "\u0070\u0061\u0072\u0073\u0065\u0053\u0074\u0072u\u0063\u0074\u0045le\u006d\u0065\u006e\u0074\u003a\u0020d\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006f\u0062\u006a\u0065\u0063t\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075n\u0064\u002e" ) ;
return ; } ; _bcafg := _ggcc . Get ( "\u0053" ) ; _cdgd := _ggcc . Get ( "\u0050\u0067" ) ; _gaadf := "" ; if _bcafg != nil { _gaadf = _bcafg . String ( ) ; } ; _badb := _ggcc . Get ( "\u004b" ) ; _cadb . _aeff = _gaadf ; _cadb . _ecdd = _cdgd ; switch _feg := _badb . ( type ) { case * _bad . PdfObjectInteger : _cadb . _aeff = _gaadf ;
_cadb . _fab = int64 ( * _feg ) ; _cadb . _ecdd = _cdgd ; case * _bad . PdfObjectReference : _efeaa := * _bad . MakeArray ( _feg ) ; var _ddce int64 = - 1 ; _cadb . _fab = _ddce ; if _efeaa . Len ( ) == 1 { _cadc := _efeaa . Elements ( ) [ 0 ] ; _cbgb , _ddfag := _cadc . ( * _bad . PdfObjectInteger ) ;
if _ddfag { _ddce = int64 ( * _cbgb ) ; _cadb . _fab = _ddce ; _cadb . _aeff = _gaadf ; _cadb . _ecdd = _cdgd ; return ; } ; } ; _eedg := [ ] structElement { } ; for _ , _agcce := range _efeaa . Elements ( ) { _geeb , _dge := _agcce . ( * _bad . PdfObjectInteger ) ; if _dge { _ddce = int64 ( * _geeb ) ;
_cadb . _fab = _ddce ; _cadb . _aeff = _gaadf ; } else { _gbfa := & structElement { } ; _gbfa . parseStructElement ( _agcce ) ; _eedg = append ( _eedg , * _gbfa ) ; } ; _ddce = - 1 ; } ; _cadb . _ccaac = _eedg ; case * _bad . PdfObjectArray : _efaa := _badb . ( * _bad . PdfObjectArray ) ; var _ecgb int64 = - 1 ;
_cadb . _fab = _ecgb ; if _efaa . Len ( ) == 1 { _gdea := _efaa . Elements ( ) [ 0 ] ; _eece , _gdee := _gdea . ( * _bad . PdfObjectInteger ) ; if _gdee { _ecgb = int64 ( * _eece ) ; _cadb . _fab = _ecgb ; _cadb . _aeff = _gaadf ; _cadb . _ecdd = _cdgd ; return ; } ; } ; _cdbb := [ ] structElement { } ;
for _ , _egge := range _efaa . Elements ( ) { _cadfb , _cfeeg := _egge . ( * _bad . PdfObjectInteger ) ; if _cfeeg { _ecgb = int64 ( * _cadfb ) ; _cadb . _fab = _ecgb ; _cadb . _aeff = _gaadf ; _cadb . _ecdd = _cdgd ; } else { _fedfc := & structElement { } ; _fedfc . parseStructElement ( _egge ) ;
_cdbb = append ( _cdbb , * _fedfc ) ; } ; _ecgb = - 1 ; } ; _cadb . _ccaac = _cdbb ; } ; } ; func ( _ffgb * imageExtractContext ) extractContentStreamImages ( _ggc string , _ef * _aec . PdfPageResources ) error { _dbf := _ba . NewContentStreamParser ( _ggc ) ; _eff , _bga := _dbf . Parse ( ) ;
if _bga != nil { return _bga ; } ; if _ffgb . _cf == nil { _ffgb . _cf = map [ * _bad . PdfObjectStream ] * cachedImage { } ; } ; if _ffgb . _gfc == nil { _ffgb . _gfc = & ImageExtractOptions { } ; } ; _fdc := _ba . NewContentStreamProcessor ( * _eff ) ; _fdc . AddHandler ( _ba . HandlerConditionEnumAllOperands , "" , _ffgb . processOperand ) ;
return _fdc . Process ( _ef ) ; } ; func ( _badea * textTable ) emptyCompositeRow ( _ebde int ) bool { for _fcdg := 0 ; _fcdg < _badea . _afcga ; _fcdg ++ { if _bdba , _ggcba := _badea . _gaeb [ _fgged ( _fcdg , _ebde ) ] ; _ggcba { if len ( _bdba . paraList ) > 0 { return false ; } ; } ; } ;
return true ; } ; type pathSection struct { _gbag [ ] * subpath ; _eg . Color ; } ;
2024-01-22 01:16:41 +00:00
2024-03-27 22:34:33 +00:00
// NewWithOptions an Extractor instance for extracting content from the input PDF page with options.
func NewWithOptions ( page * _aec . PdfPage , options * Options ) ( * Extractor , error ) { const _ffc = "\u0065x\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077W\u0069\u0074\u0068\u004f\u0070\u0074\u0069\u006f\u006e\u0073" ; _fa , _df := page . GetAllContentStreams ( ) ;
if _df != nil { return nil , _df ; } ; _dac , _afa := page . GetStructTreeRoot ( ) ; if ! _afa { _fc . Log . Info ( "T\u0068\u0065\u0020\u0070\u0064\u0066\u0020\u0064\u006f\u0063\u0075\u006d\u0065\u006e\u0074\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020\u0074\u0061\u0067g\u0065d\u002e\u0020\u0053\u0074r\u0075\u0063t\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e\u0027\u0074\u0020\u0065\u0078\u0069\u0073\u0074\u002e" ) ;
} ; _cd := page . GetContainingPdfObject ( ) ; _agg , _df := page . GetMediaBox ( ) ; if _df != nil { return nil , _ae . Errorf ( "\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076" , _df ) ;
} ; _gga := & Extractor { _ffd : _fa , _gf : page . Resources , _c : * _agg , _af : page . CropBox , _fba : map [ string ] fontEntry { } , _gfe : map [ string ] textResult { } , _fg : options , _ga : _dac , _bd : _cd } ; if _gga . _c . Llx > _gga . _c . Urx { _fc . Log . Info ( "\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e" , _gga . _c ) ;
_gga . _c . Llx , _gga . _c . Urx = _gga . _c . Urx , _gga . _c . Llx ; } ; if _gga . _c . Lly > _gga . _c . Ury { _fc . Log . Info ( "\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e" , _gga . _c ) ;
_gga . _c . Lly , _gga . _c . Ury = _gga . _c . Ury , _gga . _c . Lly ; } ; _ff . TrackUse ( _ffc ) ; return _gga , nil ; } ; func ( _efe * shapesState ) lastpointEstablished ( ) ( _g . Point , bool ) { if _efe . _effc { return _efe . _cgbcf , false ; } ; _agbef := len ( _efe . _edc ) ; if _agbef > 0 && _efe . _edc [ _agbef - 1 ] . _bbdf { return _efe . _edc [ _agbef - 1 ] . last ( ) , false ;
} ; return _g . Point { } , true ; } ; func ( _efddf * ruling ) encloses ( _dcae , _edgdc float64 ) bool { return _efddf . _fgad - _ecce <= _dcae && _edgdc <= _efddf . _ababc + _ecce ; } ; func _fbdc ( _dfa , _gfaf _aec . PdfRectangle ) bool { return _bae ( _dfa , _gfaf ) && _bfc ( _dfa , _gfaf ) } ;
2024-02-11 21:29:32 +00:00
2024-01-22 01:16:41 +00:00
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
2024-03-27 22:34:33 +00:00
func ( _ge * Extractor ) ExtractPageImages ( options * ImageExtractOptions ) ( * PageImages , error ) { _ade := & imageExtractContext { _gfc : options } ; _gcd := _ade . extractContentStreamImages ( _ge . _ffd , _ge . _gf ) ; if _gcd != nil { return nil , _gcd ; } ; return & PageImages { Images : _ade . _bff } , nil ;
} ; func ( _dfedb * wordBag ) depthBand ( _aefee , _cgffb float64 ) [ ] int { if len ( _dfedb . _gbbd ) == 0 { return nil ; } ; return _dfedb . depthRange ( _dfedb . getDepthIdx ( _aefee ) , _dfedb . getDepthIdx ( _cgffb ) ) ; } ; func ( _geee * shapesState ) lineTo ( _adgdc , _cagf float64 ) { if _bcge { _fc . Log . Info ( "\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066" , _adgdc , _cagf , _geee . devicePoint ( _adgdc , _cagf ) ) ;
} ; _geee . addPoint ( _adgdc , _cagf ) ; } ; func _affac ( _ebga * list ) [ ] * textLine { for _ , _dgced := range _ebga . _bffd { switch _dgced . _efac { case "\u004c\u0042\u006fd\u0079" : if len ( _dgced . _gagag ) != 0 { return _dgced . _gagag ; } ; return _affac ( _dgced ) ; case "\u0053\u0070\u0061\u006e" : return _dgced . _gagag ;
case "I\u006e\u006c\u0069\u006e\u0065\u0053\u0068\u0061\u0070\u0065" : return _dgced . _gagag ; } ; } ; return nil ; } ; func ( _bfaag rulingList ) comp ( _gcffb , _dadbc int ) bool { _adaef , _aeagc := _bfaag [ _gcffb ] , _bfaag [ _dadbc ] ; _ceef , _ebf := _adaef . _egdf , _aeagc . _egdf ;
if _ceef != _ebf { return _ceef > _ebf ; } ; if _ceef == _cbfe { return false ; } ; _fdac := func ( _cbac bool ) bool { if _ceef == _bfgb { return _cbac ; } ; return ! _cbac ; } ; _dbae , _cggd := _adaef . _gbgc , _aeagc . _gbgc ; if _dbae != _cggd { return _fdac ( _dbae > _cggd ) ; } ;
_dbae , _cggd = _adaef . _fgad , _aeagc . _fgad ; if _dbae != _cggd { return _fdac ( _dbae < _cggd ) ; } ; return _fdac ( _adaef . _ababc < _aeagc . _ababc ) ; } ; var _debga = map [ markKind ] string { _bggb : "\u0073\u0074\u0072\u006f\u006b\u0065" , _gagfa : "\u0066\u0069\u006c\u006c" , _dbed : "\u0061u\u0067\u006d\u0065\u006e\u0074" } ;
2024-02-11 21:29:32 +00:00
// String returns a description of `k`.
2024-03-27 22:34:33 +00:00
func ( _dfff rulingKind ) String ( ) string { _bfcef , _gacf := _bcccf [ _dfff ] ; if ! _gacf { return _ae . Sprintf ( "\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064" , _dfff ) ; } ; return _bfcef ; } ; func ( _fgfd * textLine ) markWordBoundaries ( ) { _fcae := _gdda * _fgfd . _fgcb ;
for _edef , _gdab := range _fgfd . _bfag [ 1 : ] { if _cdfb ( _gdab , _fgfd . _bfag [ _edef ] ) >= _fcae { _gdab . _ceff = true ; } ; } ; } ;
2024-02-11 21:29:32 +00:00
2024-03-27 22:34:33 +00:00
// String returns a description of `tm`.
func ( _gfag * textMark ) String ( ) string { return _ae . Sprintf ( "\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022" , _gfag . PdfRectangle , _gfag . _cbbae , _gfag . _gded ) ; } ; func _faff ( _cgdg structElement ) [ ] structElement { _fdcbc := [ ] structElement { } ;
for _ , _dedg := range _cgdg . _ccaac { for _ , _def := range _dedg . _ccaac { for _ , _efce := range _def . _ccaac { if _efce . _aeff == "\u004c" { _fdcbc = append ( _fdcbc , _efce ) ; } ; } ; } ; } ; return _fdcbc ; } ; func ( _abaf * TextMarkArray ) exists ( _fcacd TextMark ) bool { for _ , _bdb := range _abaf . Elements ( ) { if _fb . DeepEqual ( _fcacd . DirectObject , _bdb . DirectObject ) && _fb . DeepEqual ( _fcacd . BBox , _bdb . BBox ) && _bdb . Text == _fcacd . Text { return true ;
} ; } ; return false ; } ; var _ddac string = "\u005e\u005b\u0061\u002d\u007a\u0041\u002dZ\u005d\u0028\u005c)\u007c\u005c\u002e)\u007c\u005e[\u005c\u0064\u005d\u002b\u0028\u005c)\u007c\\.\u0029\u007c\u005e\u005c\u0028\u005b\u0061\u002d\u007a\u0041\u002d\u005a\u005d\u005c\u0029\u007c\u005e\u005c\u0028\u005b\u005c\u0064\u005d\u002b\u005c\u0029" ;
func _edcb ( _edgfg * list ) [ ] * list { var _bbccd [ ] * list ; for _ , _cgeg := range _edgfg . _bffd { switch _cgeg . _efac { case "\u004c\u0049" : _dbdcb := _affac ( _cgeg ) ; _eceb := _edcb ( _cgeg ) ; _ebef := _deec ( _dbdcb , "\u0062\u0075\u006c\u006c\u0065\u0074" , _eceb ) ; _cagfe := _aadg ( _dbdcb , "" ) ;
_ebef . _begg = _cagfe ; _bbccd = append ( _bbccd , _ebef ) ; case "\u004c\u0042\u006fd\u0079" : return _edcb ( _cgeg ) ; case "\u004c" : _dgcf := _edcb ( _cgeg ) ; _bbccd = append ( _bbccd , _dgcf ... ) ; return _bbccd ; } ; } ; return _bbccd ; } ; func ( _fbeg * wordBag ) allWords ( ) [ ] * textWord { var _ddf [ ] * textWord ;
for _ , _dbdc := range _fbeg . _gbbd { _ddf = append ( _ddf , _dbdc ... ) ; } ; return _ddf ; } ; func _dgc ( _bbbe , _eade bounded ) float64 { return _aadc ( _bbbe ) - _aadc ( _eade ) } ; func ( _acgb * stateStack ) pop ( ) * textState { if _acgb . empty ( ) { return nil ; } ; _cdcc := * ( * _acgb ) [ len ( * _acgb ) - 1 ] ;
* _acgb = ( * _acgb ) [ : len ( * _acgb ) - 1 ] ; return & _cdcc ; } ; func _eedce ( _edcf [ ] * textWord , _cbbbd int ) [ ] * textWord { _bcbdb := len ( _edcf ) ; copy ( _edcf [ _cbbbd : ] , _edcf [ _cbbbd + 1 : ] ) ; return _edcf [ : _bcbdb - 1 ] ; } ; func ( _aaaca gridTile ) contains ( _eafb _aec . PdfRectangle ) bool { if _aaaca . numBorders ( ) < 3 { return false ;
} ; if _aaaca . _cbfd && _eafb . Llx < _aaaca . Llx - _ebcd { return false ; } ; if _aaaca . _cbdbf && _eafb . Urx > _aaaca . Urx + _ebcd { return false ; } ; if _aaaca . _fcgc && _eafb . Lly < _aaaca . Lly - _ebcd { return false ; } ; if _aaaca . _fbbf && _eafb . Ury > _aaaca . Ury + _ebcd { return false ;
} ; return true ; } ;
2024-02-11 21:29:32 +00:00
2024-03-27 22:34:33 +00:00
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
func ( _ecae * TextMarkArray ) BBox ( ) ( _aec . PdfRectangle , bool ) { var _cfe _aec . PdfRectangle ; _aeea := false ; for _ , _cfea := range _ecae . _bade { if _cfea . Meta || _dfca ( _cfea . Text ) { continue ; } ; if _aeea { _cfe = _agfb ( _cfe , _cfea . BBox ) ; } else { _cfe = _cfea . BBox ;
_aeea = true ; } ; } ; return _cfe , _aeea ; } ; func ( _bcbgf rulingList ) removeDuplicates ( ) rulingList { if len ( _bcbgf ) == 0 { return nil ; } ; _bcbgf . sort ( ) ; _bcgcf := rulingList { _bcbgf [ 0 ] } ; for _ , _daebe := range _bcbgf [ 1 : ] { if _daebe . equals ( _bcgcf [ len ( _bcgcf ) - 1 ] ) { continue ;
} ; _bcgcf = append ( _bcgcf , _daebe ) ; } ; return _bcgcf ; } ; var ( _abec = map [ rune ] string { 0x0060 : "\u0300" , 0x02CB : "\u0300" , 0x0027 : "\u0301" , 0x00B4 : "\u0301" , 0x02B9 : "\u0301" , 0x02CA : "\u0301" , 0x005E : "\u0302" , 0x02C6 : "\u0302" , 0x007E : "\u0303" , 0x02DC : "\u0303" , 0x00AF : "\u0304" , 0x02C9 : "\u0304" , 0x02D8 : "\u0306" , 0x02D9 : "\u0307" , 0x00A8 : "\u0308" , 0x00B0 : "\u030a" , 0x02DA : "\u030a" , 0x02BA : "\u030b" , 0x02DD : "\u030b" , 0x02C7 : "\u030c" , 0x02C8 : "\u030d" , 0x0022 : "\u030e" , 0x02BB : "\u0312" , 0x02BC : "\u0313" , 0x0486 : "\u0313" , 0x055A : "\u0313" , 0x02BD : "\u0314" , 0x0485 : "\u0314" , 0x0559 : "\u0314" , 0x02D4 : "\u031d" , 0x02D5 : "\u031e" , 0x02D6 : "\u031f" , 0x02D7 : "\u0320" , 0x02B2 : "\u0321" , 0x00B8 : "\u0327" , 0x02CC : "\u0329" , 0x02B7 : "\u032b" , 0x02CD : "\u0331" , 0x005F : "\u0332" , 0x204E : "\u0359" } ;
) ; func ( _agbbf rulingList ) toGrids ( ) [ ] rulingList { if _eceg { _fc . Log . Info ( "t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073" , _agbbf ) ; } ; _bdfb := _agbbf . intersections ( ) ; if _eceg { _fc . Log . Info ( "\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020" , len ( _agbbf ) , len ( _bdfb ) ) ;
for _ , _dffa := range _fcea ( _bdfb ) { _ae . Printf ( "\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n" , _dffa , _bdfb [ _dffa ] ) ; } ; } ; _bdgg := make ( map [ int ] intSet , len ( _agbbf ) ) ; for _gecc := range _agbbf { _bccg := _agbbf . connections ( _bdfb , _gecc ) ; if len ( _bccg ) > 0 { _bdgg [ _gecc ] = _bccg ;
} ; } ; if _eceg { _fc . Log . Info ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064" , len ( _bdgg ) ) ; for _ , _gcgac := range _fcea ( _bdgg ) { _ae . Printf ( "\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n" , _gcgac , _bdgg [ _gcgac ] ) ;
} ; } ; _bcace := _dbega ( len ( _agbbf ) , func ( _fcfeg , _cdde int ) bool { _fdbbb , _eggeg := len ( _bdgg [ _fcfeg ] ) , len ( _bdgg [ _cdde ] ) ; if _fdbbb != _eggeg { return _fdbbb > _eggeg ; } ; return _agbbf . comp ( _fcfeg , _cdde ) ; } ) ; if _eceg { _fc . Log . Info ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076" , _bcace ) ;
} ; _gede := [ ] [ ] int { { _bcace [ 0 ] } } ; _fdaa : for _ , _gebc := range _bcace [ 1 : ] { for _acef , _adcc := range _gede { for _ , _beff := range _adcc { if _bdgg [ _beff ] . has ( _gebc ) { _gede [ _acef ] = append ( _adcc , _gebc ) ; continue _fdaa ; } ; } ; } ; _gede = append ( _gede , [ ] int { _gebc } ) ;
} ; if _eceg { _fc . Log . Info ( "\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076" , _gede ) ; } ; _a . SliceStable ( _gede , func ( _dgbad , _bebdb int ) bool { return len ( _gede [ _dgbad ] ) > len ( _gede [ _bebdb ] ) } ) ; for _ , _geaaa := range _gede { _a . Slice ( _geaaa , func ( _cagd , _degef int ) bool { return _agbbf . comp ( _geaaa [ _cagd ] , _geaaa [ _degef ] ) } ) ;
} ; _cacd := make ( [ ] rulingList , len ( _gede ) ) ; for _fgbb , _afbb := range _gede { _gcacb := make ( rulingList , len ( _afbb ) ) ; for _efgcg , _dggc := range _afbb { _gcacb [ _efgcg ] = _agbbf [ _dggc ] ; } ; _cacd [ _fgbb ] = _gcacb ; } ; if _eceg { _fc . Log . Info ( "\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076" , _cacd ) ;
} ; var _aabb [ ] rulingList ; for _ , _cffg := range _cacd { if _eefc , _ffab := _cffg . isActualGrid ( ) ; _ffab { _cffg = _eefc ; _cffg = _cffg . snapToGroups ( ) ; _aabb = append ( _aabb , _cffg ) ; } ; } ; if _eceg { _bgbf ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073" , _aabb ) ;
_fc . Log . Info ( "\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064" , len ( _cacd ) , len ( _aabb ) ) ; } ; return _aabb ; } ;
2024-02-11 21:29:32 +00:00
2024-03-27 22:34:33 +00:00
// Len returns the number of TextMarks in `ma`.
func ( _fef * TextMarkArray ) Len ( ) int { if _fef == nil { return 0 ; } ; return len ( _fef . _bade ) ; } ;
2024-02-11 21:29:32 +00:00
// Font represents the font properties on a PDF page.
2024-03-27 22:34:33 +00:00
type Font struct { PdfFont * _aec . PdfFont ;
2024-02-11 21:29:32 +00:00
// FontName represents Font Name from font properties.
FontName string ;
// FontType represents Font Subtype entry in the font dictionary inside page resources.
// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont.
FontType string ;
// ToUnicode is true if font provides a `ToUnicode` mapping.
ToUnicode bool ;
// IsCID is true if underlying font is a composite font.
// Composite font is represented by a font dictionary whose Subtype is `Type0`
IsCID bool ;
// IsSimple is true if font is simple font.
// A simple font is limited to only 8 bit (255) character codes.
IsSimple bool ;
// FontData represents the raw data of the embedded font file.
// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF).
// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor.
// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value.
FontData [ ] byte ;
// FontFileName is a name representing the font. it has format:
// (Font Name) + (Font Type Extension), example: helvetica.ttf.
FontFileName string ;
// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor).
2024-03-27 22:34:33 +00:00
FontDescriptor * _aec . PdfFontDescriptor ; } ; func ( _dfag * textMark ) inDiacriticArea ( _cdcf * textMark ) bool { _dcea := _dfag . Llx - _cdcf . Llx ; _dgfd := _dfag . Urx - _cdcf . Urx ; _eabf := _dfag . Lly - _cdcf . Lly ; return _ea . Abs ( _dcea + _dgfd ) < _dfag . Width ( ) * _gccc && _ea . Abs ( _eabf ) < _dfag . Height ( ) * _gccc ;
} ; func ( _gecac * textWord ) addDiacritic ( _fadd string ) { _afgf := _gecac . _ebfa [ len ( _gecac . _ebfa ) - 1 ] ; _afgf . _gded += _fadd ; _afgf . _gded = _da . NFKC . String ( _afgf . _gded ) ; } ; func ( _bafe rectRuling ) asRuling ( ) ( * ruling , bool ) { _decb := ruling { _egdf : _bafe . _cbae , Color : _bafe . Color , _dcebd : _gagfa } ;
switch _bafe . _cbae { case _eebe : _decb . _gbgc = 0.5 * ( _bafe . Llx + _bafe . Urx ) ; _decb . _fgad = _bafe . Lly ; _decb . _ababc = _bafe . Ury ; _agbfa , _bbag := _bafe . checkWidth ( _bafe . Llx , _bafe . Urx ) ; if ! _bbag { if _bdf { _fc . Log . Error ( "\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076" , _bafe ) ;
} ; return nil , false ; } ; _decb . _aecega = _agbfa ; case _bfgb : _decb . _gbgc = 0.5 * ( _bafe . Lly + _bafe . Ury ) ; _decb . _fgad = _bafe . Llx ; _decb . _ababc = _bafe . Urx ; _eagcf , _gbcb := _bafe . checkWidth ( _bafe . Lly , _bafe . Ury ) ; if ! _gbcb { if _bdf { _fc . Log . Error ( "\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076" , _bafe ) ;
} ; return nil , false ; } ; _decb . _aecega = _eagcf ; default : _fc . Log . Error ( "\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064" , _bafe . _cbae ) ; return nil , false ; } ; return & _decb , true ; } ; func ( _eaac * textTable ) bbox ( ) _aec . PdfRectangle { return _eaac . PdfRectangle } ;
2024-02-11 21:29:32 +00:00
2024-03-27 22:34:33 +00:00
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20> ).
func ( _aaf * Extractor ) ExtractText ( ) ( string , error ) { _cfc , _ , _ , _edf := _aaf . ExtractTextWithStats ( ) ; return _cfc , _edf ; } ; func _egf ( _dfgf _g . Point ) _g . Matrix { return _g . TranslationMatrix ( _dfgf . X , _dfgf . Y ) } ; func ( _eccgb * textTable ) reduceTiling ( _eecc gridTiling , _gfafd float64 ) * textTable { _efcg := make ( [ ] int , 0 , _eccgb . _agdc ) ;
_cgaa := make ( [ ] int , 0 , _eccgb . _afcga ) ; _dagf := _eecc . _fecg ; _ebfg := _eecc . _eceeed ; for _dcdcc := 0 ; _dcdcc < _eccgb . _agdc ; _dcdcc ++ { _cafga := _dcdcc > 0 && _ea . Abs ( _ebfg [ _dcdcc - 1 ] - _ebfg [ _dcdcc ] ) < _gfafd && _eccgb . emptyCompositeRow ( _dcdcc ) ; if ! _cafga { _efcg = append ( _efcg , _dcdcc ) ;
} ; } ; for _ecba := 0 ; _ecba < _eccgb . _afcga ; _ecba ++ { _dgcfd := _ecba < _eccgb . _afcga - 1 && _ea . Abs ( _dagf [ _ecba + 1 ] - _dagf [ _ecba ] ) < _gfafd && _eccgb . emptyCompositeColumn ( _ecba ) ; if ! _dgcfd { _cgaa = append ( _cgaa , _ecba ) ; } ; } ; if len ( _efcg ) == _eccgb . _agdc && len ( _cgaa ) == _eccgb . _afcga { return _eccgb ;
} ; _ffga := textTable { _fbccb : _eccgb . _fbccb , _afcga : len ( _cgaa ) , _agdc : len ( _efcg ) , _gaeb : make ( map [ uint64 ] compositeCell , len ( _cgaa ) * len ( _efcg ) ) } ; if _gbead { _fc . Log . Info ( "\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064" , _eccgb . _afcga , _eccgb . _agdc , len ( _cgaa ) , len ( _efcg ) ) ;
_fc . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076" , _cgaa ) ; _fc . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076" , _efcg ) ; } ; for _ffee , _bbfdb := range _efcg { for _acecd , _agec := range _cgaa { _gaba , _bege := _eccgb . getComposite ( _agec , _bbfdb ) ;
if len ( _gaba ) == 0 { continue ; } ; if _gbead { _ae . Printf ( "\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n" , _acecd , _ffee , _agec , _bbfdb , _bgfd ( _gaba . merge ( ) . text ( ) , 50 ) ) ; } ; _ffga . putComposite ( _acecd , _ffee , _gaba , _bege ) ;
} ; } ; return & _ffga ; } ; func ( _dffg rulingList ) toTilings ( ) ( rulingList , [ ] gridTiling ) { _dffg . log ( "\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s" ) ; if len ( _dffg ) == 0 { return nil , nil ; } ; _dffg = _dffg . tidied ( "\u0061\u006c\u006c" ) ; _dffg . log ( "\u0074\u0069\u0064\u0069\u0065\u0064" ) ;
_cgdba := _dffg . toGrids ( ) ; _gbce := make ( [ ] gridTiling , len ( _cgdba ) ) ; for _agab , _bgde := range _cgdba { _gbce [ _agab ] = _bgde . asTiling ( ) ; } ; return _dffg , _gbce ; } ; func ( _cdbab paraList ) findTables ( _cgcge [ ] gridTiling ) [ ] * textTable { _cdbab . addNeighbours ( ) ;
_a . Slice ( _cdbab , func ( _gfedd , _acbdf int ) bool { return _abaa ( _cdbab [ _gfedd ] , _cdbab [ _acbdf ] ) < 0 } ) ; var _fbebg [ ] * textTable ; if _eecg { _effcf := _cdbab . findGridTables ( _cgcge ) ; _fbebg = append ( _fbebg , _effcf ... ) ; } ; if _ebada { _affe := _cdbab . findTextTables ( ) ;
_fbebg = append ( _fbebg , _affe ... ) ; } ; return _fbebg ; } ; const ( _aea = "\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065" ;
_bf = "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064" ;
_de = "\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065" ; ) ;
func ( _fedb * textObject ) getFillColor ( ) _eg . Color { return _ffdg ( _fedb . _ggd . ColorspaceNonStroking , _fedb . _ggd . ColorNonStroking ) ; } ; type event struct { _dfada float64 ; _aedb bool ; _dacaa int ; } ; func _faadf ( _bbgd * textLine , _aead [ ] * textLine , _fddf [ ] float64 ) float64 { var _dfab float64 = - 1 ;
for _ , _bdcf := range _aead { if _bdcf . _gaca > _bbgd . _gaca { if _ea . Round ( _bdcf . Llx ) >= _ea . Round ( _bbgd . Llx ) { _dfab = _bdcf . _gaca ; } else { break ; } ; } ; } ; return _dfab ; } ; func _aced ( _ccec map [ float64 ] map [ float64 ] gridTile ) [ ] float64 { _ggcg := make ( [ ] float64 , 0 , len ( _ccec ) ) ;
for _dgdbe := range _ccec { _ggcg = append ( _ggcg , _dgdbe ) ; } ; _a . Float64s ( _ggcg ) ; _gbdfb := len ( _ggcg ) ; for _efaab := 0 ; _efaab < _gbdfb / 2 ; _efaab ++ { _ggcg [ _efaab ] , _ggcg [ _gbdfb - 1 - _efaab ] = _ggcg [ _gbdfb - 1 - _efaab ] , _ggcg [ _efaab ] ; } ; return _ggcg ; } ; func ( _efdgb intSet ) del ( _agfae int ) { delete ( _efdgb , _agfae ) } ;
func _fcfa ( _cadff [ ] * wordBag ) [ ] * wordBag { if len ( _cadff ) <= 1 { return _cadff ; } ; if _fbbd { _fc . Log . Info ( "\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a" ) ; } ; _a . Slice ( _cadff , func ( _gfed , _bcdb int ) bool { _acffgb , _bce := _cadff [ _gfed ] , _cadff [ _bcdb ] ;
_gbebf := _acffgb . Width ( ) * _acffgb . Height ( ) ; _fdbb := _bce . Width ( ) * _bce . Height ( ) ; if _gbebf != _fdbb { return _gbebf > _fdbb ; } ; if _acffgb . Height ( ) != _bce . Height ( ) { return _acffgb . Height ( ) > _bce . Height ( ) ; } ; return _gfed < _bcdb ; } ) ; var _aagc [ ] * wordBag ;
_gefba := make ( intSet ) ; for _dbcfb := 0 ; _dbcfb < len ( _cadff ) ; _dbcfb ++ { if _gefba . has ( _dbcfb ) { continue ; } ; _aaga := _cadff [ _dbcfb ] ; for _geac := _dbcfb + 1 ; _geac < len ( _cadff ) ; _geac ++ { if _gefba . has ( _dbcfb ) { continue ; } ; _abfe := _cadff [ _geac ] ; _ffbd := _aaga . PdfRectangle ;
_ffbd . Llx -= _aaga . _aad ; if _dfdf ( _ffbd , _abfe . PdfRectangle ) { _aaga . absorb ( _abfe ) ; _gefba . add ( _geac ) ; } ; } ; _aagc = append ( _aagc , _aaga ) ; } ; if len ( _cadff ) != len ( _aagc ) + len ( _gefba ) { _fc . Log . Error ( "\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064" , len ( _cadff ) , len ( _aagc ) , len ( _gefba ) ) ;
} ; return _aagc ; } ; func _dgde ( _dadac , _ebeff _g . Point , _faeb _eg . Color ) ( * ruling , bool ) { _dcgg := lineRuling { _bcfgb : _dadac , _befe : _ebeff , _cdbfg : _cgbfe ( _dadac , _ebeff ) , Color : _faeb } ; if _dcgg . _cdbfg == _cbfe { return nil , false ; } ; return _dcgg . asRuling ( ) ;
} ; func ( _gfbc paraList ) llyRange ( _cgbcc [ ] int , _aecc , _cfgd float64 ) [ ] int { _dfdd := len ( _gfbc ) ; if _cfgd < _gfbc [ _cgbcc [ 0 ] ] . Lly || _aecc > _gfbc [ _cgbcc [ _dfdd - 1 ] ] . Lly { return nil ; } ; _cfbb := _a . Search ( _dfdd , func ( _dagd int ) bool { return _gfbc [ _cgbcc [ _dagd ] ] . Lly >= _aecc } ) ;
_daad := _a . Search ( _dfdd , func ( _ffaae int ) bool { return _gfbc [ _cgbcc [ _ffaae ] ] . Lly > _cfgd } ) ; return _cgbcc [ _cfbb : _daad ] ; } ; func ( _aageb rectRuling ) checkWidth ( _fagdc , _fdga float64 ) ( float64 , bool ) { _acfcd := _fdga - _fagdc ; _adbb := _acfcd <= _cabc ;
return _acfcd , _adbb ; } ; func ( _gad * imageExtractContext ) extractFormImages ( _ed * _bad . PdfObjectName , _cgb _ba . GraphicsState , _cdag * _aec . PdfPageResources ) error { _dga , _edg := _cdag . GetXObjectFormByName ( * _ed ) ; if _edg != nil { return _edg ; } ; if _dga == nil { return nil ;
} ; _cbf , _edg := _dga . GetContentStream ( ) ; if _edg != nil { return _edg ; } ; _geb := _dga . Resources ; if _geb == nil { _geb = _cdag ; } ; _edg = _gad . extractContentStreamImages ( string ( _cbf ) , _geb ) ; if _edg != nil { return _edg ; } ; _gad . _bgb ++ ; return nil ; } ; func _afgg ( _gabd * Extractor , _fcdc * _aec . PdfPageResources , _gdfb _ba . GraphicsState , _aag * textState , _bdde * stateStack ) * textObject { return & textObject { _eegd : _gabd , _bcg : _fcdc , _ggd : _gdfb , _aed : _bdde , _cbgd : _aag , _acc : _g . IdentityMatrix ( ) , _dde : _g . IdentityMatrix ( ) } ;
} ; const ( _cbfe rulingKind = iota ; _bfgb ; _eebe ; ) ; func ( _cbdb * textMark ) bbox ( ) _aec . PdfRectangle { return _cbdb . PdfRectangle } ;
// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents ( contents string , resources * _aec . PdfPageResources ) ( * Extractor , error ) { const _ffa = "\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s" ; _eb := & Extractor { _ffd : contents , _gf : resources , _fba : map [ string ] fontEntry { } , _gfe : map [ string ] textResult { } } ;
_ff . TrackUse ( _ffa ) ; return _eb , nil ; } ; func ( _bgdg * textObject ) getStrokeColor ( ) _eg . Color { return _ffdg ( _bgdg . _ggd . ColorspaceStroking , _bgdg . _ggd . ColorStroking ) ; } ; func ( _bbbgc * ruling ) gridIntersecting ( _cdee * ruling ) bool { return _cdffd ( _bbbgc . _fgad , _cdee . _fgad ) && _cdffd ( _bbbgc . _ababc , _cdee . _ababc ) ;
} ; func _fdfb ( _caeba _aec . PdfRectangle ) * ruling { return & ruling { _egdf : _eebe , _gbgc : _caeba . Llx , _fgad : _caeba . Lly , _ababc : _caeba . Ury } ; } ; func _fcbd ( _aaff [ ] * textWord , _cgbd float64 , _acbc , _gebb rulingList ) * wordBag { _gacg := _ecea ( _aaff [ 0 ] , _cgbd , _acbc , _gebb ) ;
for _ , _aaa := range _aaff [ 1 : ] { _acd := _dafa ( _aaa . _aecg ) ; _gacg . _gbbd [ _acd ] = append ( _gacg . _gbbd [ _acd ] , _aaa ) ; _gacg . PdfRectangle = _agfb ( _gacg . PdfRectangle , _aaa . PdfRectangle ) ; } ; _gacg . sort ( ) ; return _gacg ; } ; func _cdfb ( _bde , _eeac bounded ) float64 { return _bde . bbox ( ) . Llx - _eeac . bbox ( ) . Urx } ;
func _ffdg ( _acged _aec . PdfColorspace , _bbfffb _aec . PdfColor ) _eg . Color { if _acged == nil || _bbfffb == nil { return _eg . Black ; } ; _agfg , _adeef := _acged . ColorToRGB ( _bbfffb ) ; if _adeef != nil { _fc . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073" , _bbfffb , _acged , _adeef ) ;
return _eg . Black ; } ; _aedc , _ccdfg := _agfg . ( * _aec . PdfColorDeviceRGB ) ; if ! _ccdfg { _fc . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076" , _agfg ) ;
return _eg . Black ; } ; return _eg . NRGBA { R : uint8 ( _aedc . R ( ) * 255 ) , G : uint8 ( _aedc . G ( ) * 255 ) , B : uint8 ( _aedc . B ( ) * 255 ) , A : uint8 ( 255 ) } ; } ; func ( _bfdfa rulingList ) merge ( ) * ruling { _ecbd := _bfdfa [ 0 ] . _gbgc ; _bcbe := _bfdfa [ 0 ] . _fgad ; _ddaa := _bfdfa [ 0 ] . _ababc ;
for _ , _cgcc := range _bfdfa [ 1 : ] { _ecbd += _cgcc . _gbgc ; if _cgcc . _fgad < _bcbe { _bcbe = _cgcc . _fgad ; } ; if _cgcc . _ababc > _ddaa { _ddaa = _cgcc . _ababc ; } ; } ; _febb := & ruling { _egdf : _bfdfa [ 0 ] . _egdf , _dcebd : _bfdfa [ 0 ] . _dcebd , Color : _bfdfa [ 0 ] . Color , _gbgc : _ecbd / float64 ( len ( _bfdfa ) ) , _fgad : _bcbe , _ababc : _ddaa } ;
if _geda { _fc . Log . Info ( "\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073" , len ( _bfdfa ) , _febb ) ; for _aeeg , _fefb := range _bfdfa { _ae . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _aeeg , _fefb ) ; } ;
} ; return _febb ; } ;
// String returns a string describing `ma`.
func ( _adab TextMarkArray ) String ( ) string { _egb := len ( _adab . _bade ) ; if _egb == 0 { return "\u0045\u004d\u0050T\u0059" ; } ; _abdb := _adab . _bade [ 0 ] ; _bbe := _adab . _bade [ _egb - 1 ] ; return _ae . Sprintf ( "\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d" , _egb , _abdb , _bbe ) ;
} ; func ( _deeb * stateStack ) top ( ) * textState { if _deeb . empty ( ) { return nil ; } ; return ( * _deeb ) [ _deeb . size ( ) - 1 ] ; } ; func ( _bbcd * stateStack ) size ( ) int { return len ( * _bbcd ) } ; func _ggeeg ( _cbefb [ ] pathSection ) rulingList { _aeefg ( _cbefb ) ; if _eceg { _fc . Log . Info ( "\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs" , len ( _cbefb ) ) ;
} ; var _daba rulingList ; for _ , _fbac := range _cbefb { for _ , _dgbb := range _fbac . _gbag { if ! _dgbb . isQuadrilateral ( ) { if _eceg { _fc . Log . Error ( "!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073" , _dgbb ) ; } ;
continue ; } ; if _befg , _caebb := _dgbb . makeRectRuling ( _fbac . Color ) ; _caebb { _daba = append ( _daba , _befg ) ; } else { if _bdf { _fc . Log . Error ( "\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073" , _dgbb ) ;
} ; } ; } ; } ; if _eceg { _fc . Log . Info ( "\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073" , _daba . String ( ) ) ; } ; return _daba ; } ; func _bgfd ( _bdgea string , _ggae int ) string { if len ( _bdgea ) < _ggae { return _bdgea ;
} ; return _bdgea [ : _ggae ] ; } ; func ( _ffca * textLine ) text ( ) string { var _cbba [ ] string ; for _ , _ecabe := range _ffca . _bfag { if _ecabe . _ceff { _cbba = append ( _cbba , "\u0020" ) ; } ; _cbba = append ( _cbba , _ecabe . _eedc ) ; } ; return _bb . Join ( _cbba , "" ) ; } ; type markKind int ;
func _gbeb ( _ccfa _aec . PdfRectangle ) textState { return textState { _aafe : 100 , _ffgba : RenderModeFill , _edfa : _ccfa } ; } ;
// String returns a description of `p`.
func ( _beda * textPara ) String ( ) string { if _beda . _egbea { return _ae . Sprintf ( "\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d" , _beda . PdfRectangle ) ; } ; _fecda := "" ; if _beda . _caaa != nil { _fecda = _ae . Sprintf ( "\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020" , _beda . _caaa . _afcga , _beda . _caaa . _agdc ) ;
} ; return _ae . Sprintf ( "\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071" , _beda . PdfRectangle , _fecda , len ( _beda . _bfagf ) , _bgfd ( _beda . text ( ) , 50 ) ) ; } ; func ( _bdgec * wordBag ) highestWord ( _ddeb int , _adbg , _adeb float64 ) * textWord { for _ , _dffb := range _bdgec . _gbbd [ _ddeb ] { if _adbg <= _dffb . _aecg && _dffb . _aecg <= _adeb { return _dffb ;
} ; } ; return nil ; } ; func ( _ecdaf * textTable ) getDown ( ) paraList { _fdbf := make ( paraList , _ecdaf . _afcga ) ; for _cabg := 0 ; _cabg < _ecdaf . _afcga ; _cabg ++ { _gfbg := _ecdaf . get ( _cabg , _ecdaf . _agdc - 1 ) . _ccee ; if _gfbg . taken ( ) { return nil ; } ; _fdbf [ _cabg ] = _gfbg ;
} ; for _dcga := 0 ; _dcga < _ecdaf . _afcga - 1 ; _dcga ++ { if _fdbf [ _dcga ] . _aabe != _fdbf [ _dcga + 1 ] { return nil ; } ; } ; return _fdbf ; } ; func ( _gdg * shapesState ) moveTo ( _eedeg , _aceb float64 ) { _gdg . _effc = true ; _gdg . _cgbcf = _gdg . devicePoint ( _eedeg , _aceb ) ; if _bcge { _fc . Log . Info ( "\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066" , _eedeg , _aceb , _gdg . _cgbcf ) ;
} ; } ; func ( _bfef * PageText ) computeViews ( ) { _ebd := _bfef . getParagraphs ( ) ; _ead := new ( _fe . Buffer ) ; _ebd . writeText ( _ead ) ; _bfef . _eede = _ead . String ( ) ; _bfef . _fgb = _ebd . toTextMarks ( ) ; _bfef . _gba = _ebd . tables ( ) ; if _gbead { _fc . Log . Info ( "\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064" , len ( _bfef . _gba ) ) ;
} ; } ;
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
func ( _egec * TextMarkArray ) RangeOffset ( start , end int ) ( * TextMarkArray , error ) { if _egec == nil { return nil , _d . New ( "\u006da\u003d\u003d\u006e\u0069\u006c" ) ; } ; if end < start { return nil , _ae . Errorf ( "\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020" , start , end ) ;
} ; _gagg := len ( _egec . _bade ) ; if _gagg == 0 { return _egec , nil ; } ; if start < _egec . _bade [ 0 ] . Offset { start = _egec . _bade [ 0 ] . Offset ; } ; if end > _egec . _bade [ _gagg - 1 ] . Offset + 1 { end = _egec . _bade [ _gagg - 1 ] . Offset + 1 ; } ; _ggdg := _a . Search ( _gagg , func ( _aagf int ) bool { return _egec . _bade [ _aagf ] . Offset + len ( _egec . _bade [ _aagf ] . Text ) - 1 >= start } ) ;
if ! ( 0 <= _ggdg && _ggdg < _gagg ) { _ecdf := _ae . Errorf ( "\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076" , start , _ggdg , _gagg , _egec . _bade [ 0 ] , _egec . _bade [ _gagg - 1 ] ) ;
return nil , _ecdf ; } ; _dgba := _a . Search ( _gagg , func ( _ggf int ) bool { return _egec . _bade [ _ggf ] . Offset > end - 1 } ) ; if ! ( 0 <= _dgba && _dgba < _gagg ) { _deaa := _ae . Errorf ( "\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076" , end , _dgba , _gagg , _egec . _bade [ 0 ] , _egec . _bade [ _gagg - 1 ] ) ;
return nil , _deaa ; } ; if _dgba <= _ggdg { return nil , _ae . Errorf ( "\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064" , start , end , _ggdg , _dgba ) ;
} ; return & TextMarkArray { _bade : _egec . _bade [ _ggdg : _dgba ] } , nil ; } ; func ( _aefe * shapesState ) quadraticTo ( _ggbf , _gefe , _ggab , _cade float64 ) { if _bcge { _fc . Log . Info ( "\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a" ) ; } ; _aefe . addPoint ( _ggab , _cade ) ;
} ; func ( _abbgd lineRuling ) yMean ( ) float64 { return 0.5 * ( _abbgd . _bcfgb . Y + _abbgd . _befe . Y ) } ;
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
func ( _eae * Extractor ) ExtractTextWithStats ( ) ( _bge string , _dgb int , _fbe int , _gfb error ) { _eef , _dgb , _fbe , _gfb := _eae . ExtractPageText ( ) ; if _gfb != nil { return "" , _dgb , _fbe , _gfb ; } ; return _eef . Text ( ) , _dgb , _fbe , nil ; } ; func ( _bfbd * shapesState ) establishSubpath ( ) * subpath { _gacd , _bgab := _bfbd . lastpointEstablished ( ) ;
if ! _bgab { _bfbd . _edc = append ( _bfbd . _edc , _bbcb ( _gacd ) ) ; } ; if len ( _bfbd . _edc ) == 0 { return nil ; } ; _bfbd . _effc = false ; return _bfbd . _edc [ len ( _bfbd . _edc ) - 1 ] ; } ; func _gdba ( _cgdcd , _dbac float64 ) bool { return _cgdcd / _ea . Max ( _bedf , _dbac ) < _dccc } ;
func ( _gadbe paraList ) findGridTables ( _eccf [ ] gridTiling ) [ ] * textTable { if _gbead { _fc . Log . Info ( "\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073" , len ( _gadbe ) ) ;
for _bdgf , _ecddce := range _gadbe { _ae . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bdgf , _ecddce ) ; } ; } ; var _cgfa [ ] * textTable ; for _cdbbd , _fgdg := range _eccf { _fagcd , _bdfe := _gadbe . findTableGrid ( _fgdg ) ; if _fagcd != nil { _fagcd . log ( _ae . Sprintf ( "\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064" , _cdbbd ) ) ;
_cgfa = append ( _cgfa , _fagcd ) ; _fagcd . markCells ( ) ; } ; for _ddfce := range _bdfe { _ddfce . _dcddf = true ; } ; } ; if _gbead { _fc . Log . Info ( "\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s" , len ( _cgfa ) ) ;
} ; return _cgfa ; } ; var _cgce string = "\u0028\u003f\u0069\u0029\u005e\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028?\u003a\u0044\u007cM\u0029\u007c\u0044\u003f\u0043{\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028\u003f\u003a\u004c\u007c\u0043\u0029\u007cL\u003f\u0058\u007b\u0030\u002c\u0033}\u0029\u0028\u0049\u0028\u003f\u003a\u0056\u007c\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u005c\u0029\u007c\u005c\u002e\u0029\u007c\u005e\u005c\u0028\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028\u003f\u003aD\u007cM\u0029\u007c\u0044\u003f\u0043\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028?\u003a\u004c\u007c\u0043\u0029\u007c\u004c?\u0058\u007b0\u002c\u0033\u007d\u0029(\u0049\u0028\u003f\u003a\u0056|\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u005c\u0029" ;
func _egfdf ( _acbb * textLine ) float64 { return _acbb . _bfag [ 0 ] . Llx } ; type textLine struct { _aec . PdfRectangle ; _gaca float64 ; _bfag [ ] * textWord ; _fgcb float64 ; } ; func ( _bfd * textObject ) newTextMark ( _egcg string , _ecdc _g . Matrix , _bcged _g . Point , _dgabb float64 , _abe * _aec . PdfFont , _bbcf float64 , _dfbd , _fgbg _eg . Color , _gada _bad . PdfObject , _ggaa [ ] string , _gcac int , _cgdb int ) ( textMark , bool ) { _cdga := _ecdc . Angle ( ) ;
_bggg := _begdb ( _cdga , _ceba ) ; var _geec float64 ; if _bggg % 180 != 90 { _geec = _ecdc . ScalingFactorY ( ) ; } else { _geec = _ecdc . ScalingFactorX ( ) ; } ; _ggfe := _eee ( _ecdc ) ; _fdcc := _aec . PdfRectangle { Llx : _ggfe . X , Lly : _ggfe . Y , Urx : _bcged . X , Ury : _bcged . Y } ;
switch _bggg % 360 { case 90 : _fdcc . Urx -= _geec ; case 180 : _fdcc . Ury -= _geec ; case 270 : _fdcc . Urx += _geec ; case 0 : _fdcc . Ury += _geec ; default : _bggg = 0 ; _fdcc . Ury += _geec ; } ; if _fdcc . Llx > _fdcc . Urx { _fdcc . Llx , _fdcc . Urx = _fdcc . Urx , _fdcc . Llx ; } ; if _fdcc . Lly > _fdcc . Ury { _fdcc . Lly , _fdcc . Ury = _fdcc . Ury , _fdcc . Lly ;
} ; _dgef := true ; if _bfd . _eegd . _c . Width ( ) > 0 { _dgaa , _dged := _degd ( _fdcc , _bfd . _eegd . _c ) ; if ! _dged { _dgef = false ; _fc . Log . Debug ( "\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q" , _fdcc , _bfd . _eegd . _c , _egcg ) ;
} ; _fdcc = _dgaa ; } ; _gagc := _fdcc ; _gbba := _bfd . _eegd . _c ; switch _bggg % 360 { case 90 : _gbba . Urx , _gbba . Ury = _gbba . Ury , _gbba . Urx ; _gagc = _aec . PdfRectangle { Llx : _gbba . Urx - _fdcc . Ury , Urx : _gbba . Urx - _fdcc . Lly , Lly : _fdcc . Llx , Ury : _fdcc . Urx } ;
case 180 : _gagc = _aec . PdfRectangle { Llx : _gbba . Urx - _fdcc . Llx , Urx : _gbba . Urx - _fdcc . Urx , Lly : _gbba . Ury - _fdcc . Lly , Ury : _gbba . Ury - _fdcc . Ury } ; case 270 : _gbba . Urx , _gbba . Ury = _gbba . Ury , _gbba . Urx ; _gagc = _aec . PdfRectangle { Llx : _fdcc . Ury , Urx : _fdcc . Lly , Lly : _gbba . Ury - _fdcc . Llx , Ury : _gbba . Ury - _fdcc . Urx } ;
} ; if _gagc . Llx > _gagc . Urx { _gagc . Llx , _gagc . Urx = _gagc . Urx , _gagc . Llx ; } ; if _gagc . Lly > _gagc . Ury { _gagc . Lly , _gagc . Ury = _gagc . Ury , _gagc . Lly ; } ; _cefe := textMark { _gded : _egcg , PdfRectangle : _gagc , _gega : _fdcc , _bccc : _abe , _cbbae : _geec , _dcfd : _bbcf , _ddebg : _ecdc , _bdbd : _bcged , _eeacf : _bggg , _fdag : _dfbd , _aabcb : _fgbg , _gafcd : _gada , _agff : _ggaa , Th : _bfd . _cbgd . _aafe , Tw : _bfd . _cbgd . _ecd , _ebgag : _cgdb , _daeff : _gcac } ;
if _baf { _fc . Log . Info ( "n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073" , _ggfe , _bcged , _cefe . String ( ) ) ; } ; return _cefe , _dgef ;
} ; func ( _gdbcc * wordBag ) arrangeText ( ) * textPara { _gdbcc . sort ( ) ; if _cfee { _gdbcc . removeDuplicates ( ) ; } ; var _fcaeb [ ] * textLine ; for _ , _gdbd := range _gdbcc . depthIndexes ( ) { for ! _gdbcc . empty ( _gdbd ) { _daedb := _gdbcc . firstReadingIndex ( _gdbd ) ; _eggb := _gdbcc . firstWord ( _daedb ) ;
_efgg := _cffa ( _gdbcc , _daedb ) ; _fgfee := _eggb . _aeegf ; _fcfff := _eggb . _aecg - _ddfa * _fgfee ; _cffbd := _eggb . _aecg + _ddfa * _fgfee ; _cfeaf := _ggad * _fgfee ; _caeeb := _decc * _fgfee ; _dceg : for { var _bdbdg * textWord ; _dcgb := 0 ; for _ , _gbbaa := range _gdbcc . depthBand ( _fcfff , _cffbd ) { _faaa := _gdbcc . highestWord ( _gbbaa , _fcfff , _cffbd ) ;
if _faaa == nil { continue ; } ; _accf := _cdfb ( _faaa , _efgg . _bfag [ len ( _efgg . _bfag ) - 1 ] ) ; if _accf < - _caeeb { break _dceg ; } ; if _accf > _cfeaf { continue ; } ; if _bdbdg != nil && _bcea ( _faaa , _bdbdg ) >= 0 { continue ; } ; _bdbdg = _faaa ; _dcgb = _gbbaa ; } ; if _bdbdg == nil { break ;
} ; _efgg . pullWord ( _gdbcc , _bdbdg , _dcgb ) ; } ; _efgg . markWordBoundaries ( ) ; _fcaeb = append ( _fcaeb , _efgg ) ; } ; } ; if len ( _fcaeb ) == 0 { return nil ; } ; _a . Slice ( _fcaeb , func ( _efdgd , _eaad int ) bool { return _dddc ( _fcaeb [ _efdgd ] , _fcaeb [ _eaad ] ) < 0 } ) ; _ebab := _geaa ( _gdbcc . PdfRectangle , _fcaeb ) ;
if _fbbd { _fc . Log . Info ( "\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073" , _ebab . String ( ) ) ; if _ecda { for _dfbag , _gegg := range _ebab . _bfagf { _ae . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _dfbag , _gegg . String ( ) ) ;
if _baag { for _cbbab , _fccfg := range _gegg . _bfag { _ae . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _cbbab , _fccfg . String ( ) ) ; for _bbfd , _aeab := range _fccfg . _ebfa { _ae . Printf ( "\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n" , _bbfd , _aeab . String ( ) ) ;
} ; } ; } ; } ; } ; } ; return _ebab ; } ; const ( _bfab = true ; _cfee = true ; _ccgb = true ; _acge = false ; _cbaf = false ; _fgeg = 6 ; _bbbgd = 3.0 ; _bgdcg = 200 ; _eecg = true ; _ebada = true ; _bffc = true ; _gbdg = true ; _aaeb = false ; ) ; func _fgdfed ( _gcga , _ddfg _g . Point ) rulingKind { _bfea := _ea . Abs ( _gcga . X - _ddfg . X ) ;
_ggbgca := _ea . Abs ( _gcga . Y - _ddfg . Y ) ; return _gdgbf ( _bfea , _ggbgca , _dccc ) ; } ; func ( _fdbc * wordBag ) removeWord ( _accg * textWord , _bcbg int ) { _adgg := _fdbc . _gbbd [ _bcbg ] ; _adgg = _fgacb ( _adgg , _accg ) ; if len ( _adgg ) == 0 { delete ( _fdbc . _gbbd , _bcbg ) ;
} else { _fdbc . _gbbd [ _bcbg ] = _adgg ; } ; } ; func ( _eebg * textWord ) absorb ( _bdcfa * textWord ) { _eebg . PdfRectangle = _agfb ( _eebg . PdfRectangle , _bdcfa . PdfRectangle ) ; _eebg . _ebfa = append ( _eebg . _ebfa , _bdcfa . _ebfa ... ) ; } ; func _fccge ( _abee [ ] TextMark , _gfde * int , _gagga TextMark ) [ ] TextMark { _gagga . Offset = * _gfde ;
_abee = append ( _abee , _gagga ) ; * _gfde += len ( _gagga . Text ) ; return _abee ; } ;
// PageText represents the layout of text on a device page.
type PageText struct { _dbfe [ ] * textMark ; _eede string ; _fgb [ ] TextMark ; _gba [ ] TextTable ; _babf _aec . PdfRectangle ; _afgd [ ] pathSection ; _ddae [ ] pathSection ; _cegb * _bad . PdfObject ; _dfgd _bad . PdfObject ; _cafg * _ba . ContentStreamOperations ; _cdgg PageTextOptions ;
} ; func ( _dae * textObject ) setFont ( _cfcd string , _ged float64 ) error { if _dae == nil { return nil ; } ; _dae . _cbgd . _dgad = _ged ; _edaf , _fcga := _dae . getFont ( _cfcd ) ; if _fcga != nil { return _fcga ; } ; _dae . _cbgd . _cbad = _edaf ; return nil ; } ; func ( _gcdg rulingList ) mergePrimary ( ) float64 { _befd := _gcdg [ 0 ] . _gbgc ;
for _ , _bfca := range _gcdg [ 1 : ] { _befd += _bfca . _gbgc ; } ; return _befd / float64 ( len ( _gcdg ) ) ; } ; const ( _acbdc markKind = iota ; _bggb ; _gagfa ; _dbed ; ) ; func _aeefg ( _acdef [ ] pathSection ) { if _edge < 0.0 { return ; } ; if _eceg { _fc . Log . Info ( "\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073" , len ( _acdef ) ) ;
} ; for _fagdcf , _cgfec := range _acdef { for _geege , _fcca := range _cgfec . _gbag { for _ecgc , _cfcc := range _fcca . _acfg { _fcca . _acfg [ _ecgc ] = _g . Point { X : _aagaf ( _cfcc . X ) , Y : _aagaf ( _cfcc . Y ) } ; if _eceg { _bfcc := _fcca . _acfg [ _ecgc ] ; if ! _ceab ( _cfcc , _bfcc ) { _cdce := _g . Point { X : _bfcc . X - _cfcc . X , Y : _bfcc . Y - _cfcc . Y } ;
_ae . Printf ( "\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a" , _fagdcf , _geege , _ecgc , _cfcc , _bfcc , _cdce ) ; } ; } ; } ; } ; } ; } ; func ( _cfade * textPara ) depth ( ) float64 { if _cfade . _egbea { return - 1.0 ;
} ; if len ( _cfade . _bfagf ) > 0 { return _cfade . _bfagf [ 0 ] . _gaca ; } ; return _cfade . _caaa . depth ( ) ; } ;
// String returns a description of `w`.
func ( _aeacd * textWord ) String ( ) string { return _ae . Sprintf ( "\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022" , _aeacd . _aecg , _aeacd . PdfRectangle , _aeacd . _aeegf , _aeacd . _eedc ) ;
} ; type textPara struct { _aec . PdfRectangle ; _bbbc _aec . PdfRectangle ; _bfagf [ ] * textLine ; _caaa * textTable ; _dcddf bool ; _egbea bool ; _bceaa * textPara ; _aabe * textPara ; _ggfbb * textPara ; _ccee * textPara ; _ddef [ ] list ; } ; func _begdb ( _eefe float64 , _cgbb int ) int { if _cgbb == 0 { _cgbb = 1 ;
} ; _eded := float64 ( _cgbb ) ; return int ( _ea . Round ( _eefe / _eded ) * _eded ) ; } ; func ( _bbcce paraList ) sortReadingOrder ( ) { _fc . Log . Trace ( "\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _bbcce ) ) ;
if len ( _bbcce ) <= 1 { return ; } ; _bbcce . computeEBBoxes ( ) ; _a . Slice ( _bbcce , func ( _gdaeg , _cfae int ) bool { return _dddc ( _bbcce [ _gdaeg ] , _bbcce [ _cfae ] ) <= 0 } ) ; } ; func ( _cadfbb * textTable ) growTable ( ) { _aggd := func ( _eafd paraList ) { _cadfbb . _agdc ++ ; for _dcab := 0 ;
_dcab < _cadfbb . _afcga ; _dcab ++ { _geafb := _eafd [ _dcab ] ; _cadfbb . put ( _dcab , _cadfbb . _agdc - 1 , _geafb ) ; } ; } ; _cdaa := func ( _bddeb paraList ) { _cadfbb . _afcga ++ ; for _fefba := 0 ; _fefba < _cadfbb . _agdc ; _fefba ++ { _dcebb := _bddeb [ _fefba ] ; _cadfbb . put ( _cadfbb . _afcga - 1 , _fefba , _dcebb ) ;
} ; } ; if _efcc { _cadfbb . log ( "\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce" ) ; } ; for _gfec := 0 ; ; _gfec ++ { _baabg := false ; _beaa := _cadfbb . getDown ( ) ; _dcdcd := _cadfbb . getRight ( ) ; if _efcc { _ae . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _gfec , _cadfbb ) ;
_ae . Printf ( "\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a" , _beaa ) ; _ae . Printf ( "\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a" , _dcdcd ) ; } ; if _beaa != nil && _dcdcd != nil { _edgcg := _beaa [ len ( _beaa ) - 1 ] ;
if ! _edgcg . taken ( ) && _edgcg == _dcdcd [ len ( _dcdcd ) - 1 ] { _aggd ( _beaa ) ; if _dcdcd = _cadfbb . getRight ( ) ; _dcdcd != nil { _cdaa ( _dcdcd ) ; _cadfbb . put ( _cadfbb . _afcga - 1 , _cadfbb . _agdc - 1 , _edgcg ) ; } ; _baabg = true ; } ; } ; if ! _baabg && _beaa != nil { _aggd ( _beaa ) ;
_baabg = true ; } ; if ! _baabg && _dcdcd != nil { _cdaa ( _dcdcd ) ; _baabg = true ; } ; if ! _baabg { break ; } ; } ; } ; func ( _cfcf * textPara ) isAtom ( ) * textTable { _adaa := _cfcf ; _cbgcg := _cfcf . _aabe ; _dcbf := _cfcf . _ccee ; if _cbgcg . taken ( ) || _dcbf . taken ( ) { return nil ;
} ; _ebbae := _cbgcg . _ccee ; if _ebbae . taken ( ) || _ebbae != _dcbf . _aabe { return nil ; } ; return _eadd ( _adaa , _cbgcg , _dcbf , _ebbae ) ; } ; func ( _cfba rulingList ) connections ( _fgcd map [ int ] intSet , _gege int ) intSet { _bgdgf := make ( intSet ) ; _gfgd := make ( intSet ) ;
var _aeceb func ( int ) ; _aeceb = func ( _bgedf int ) { if ! _gfgd . has ( _bgedf ) { _gfgd . add ( _bgedf ) ; for _acefc := range _cfba { if _fgcd [ _acefc ] . has ( _bgedf ) { _bgdgf . add ( _acefc ) ; } ; } ; for _cadffb := range _cfba { if _bgdgf . has ( _cadffb ) { _aeceb ( _cadffb ) ;
} ; } ; } ; } ; _aeceb ( _gege ) ; return _bgdgf ; } ;
// String returns a description of `t`.
func ( _adgge * textTable ) String ( ) string { return _ae . Sprintf ( "\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074" , _adgge . _afcga , _adgge . _agdc , _adgge . _fbccb ) ; } ;
// ToTextMark returns the public view of `tm`.
func ( _fcgg * textMark ) ToTextMark ( ) TextMark { return TextMark { Text : _fcgg . _gded , Original : _fcgg . _dcff , BBox : _fcgg . _gega , Font : _fcgg . _bccc , FontSize : _fcgg . _cbbae , FillColor : _fcgg . _fdag , StrokeColor : _fcgg . _aabcb , Orientation : _fcgg . _eeacf , DirectObject : _fcgg . _gafcd , ObjString : _fcgg . _agff , Tw : _fcgg . Tw , Th : _fcgg . Th , Tc : _fcgg . _dcfd , Index : _fcgg . _daeff } ;
} ; func ( _afed * ruling ) alignsSec ( _adbcg * ruling ) bool { const _eggdg = _cabc + 1.0 ; return _afed . _fgad - _eggdg <= _adbcg . _ababc && _adbcg . _fgad - _eggdg <= _afed . _ababc ; } ;
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
type ImageMark struct { Image * _aec . Image ;
// Dimensions of the image as displayed in the PDF.
Width float64 ; Height float64 ;
// Position of the image in PDF coordinates (lower left corner).
X float64 ; Y float64 ;
// Angle in degrees, if rotated.
Angle float64 ; } ; func ( _acdeb * textWord ) appendMark ( _cddge * textMark , _adcaec _aec . PdfRectangle ) { _acdeb . _ebfa = append ( _acdeb . _ebfa , _cddge ) ; _acdeb . PdfRectangle = _agfb ( _acdeb . PdfRectangle , _cddge . PdfRectangle ) ; if _cddge . _cbbae > _acdeb . _aeegf { _acdeb . _aeegf = _cddge . _cbbae ;
} ; _acdeb . _aecg = _adcaec . Ury - _acdeb . PdfRectangle . Lly ; } ; func ( _bgfe rulingList ) sortStrict ( ) { _a . Slice ( _bgfe , func ( _dgdg , _edbd int ) bool { _egfa , _daadcc := _bgfe [ _dgdg ] , _bgfe [ _edbd ] ; _fccd , _bdea := _egfa . _egdf , _daadcc . _egdf ; if _fccd != _bdea { return _fccd > _bdea ;
} ; _bggbg , _baca := _egfa . _gbgc , _daadcc . _gbgc ; if ! _ecfbd ( _bggbg - _baca ) { return _bggbg < _baca ; } ; _bggbg , _baca = _egfa . _fgad , _daadcc . _fgad ; if _bggbg != _baca { return _bggbg < _baca ; } ; return _egfa . _ababc < _daadcc . _ababc ; } ) ; } ; func ( _gbdgb * textPara ) taken ( ) bool { return _gbdgb == nil || _gbdgb . _dcddf } ;
var _bcccf = map [ rulingKind ] string { _cbfe : "\u006e\u006f\u006e\u0065" , _bfgb : "\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c" , _eebe : "\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c" } ; func ( _feagc gridTile ) complete ( ) bool { return _feagc . numBorders ( ) == 4 } ;
func ( _gefd * textPara ) toTextMarks ( _bbbgg * int ) [ ] TextMark { if _gefd . _caaa == nil { return _gefd . toCellTextMarks ( _bbbgg ) ; } ; var _cbbf [ ] TextMark ; for _eegeb := 0 ; _eegeb < _gefd . _caaa . _agdc ; _eegeb ++ { for _ddge := 0 ; _ddge < _gefd . _caaa . _afcga ; _ddge ++ { _gfgc := _gefd . _caaa . get ( _ddge , _eegeb ) ;
if _gfgc == nil { _cbbf = _ggcce ( _cbbf , _bbbgg , "\u0009" ) ; } else { _bedac := _gfgc . toCellTextMarks ( _bbbgg ) ; _cbbf = append ( _cbbf , _bedac ... ) ; } ; _cbbf = _ggcce ( _cbbf , _bbbgg , "\u0020" ) ; } ; if _eegeb < _gefd . _caaa . _agdc - 1 { _cbbf = _ggcce ( _cbbf , _bbbgg , "\u000a" ) ;
} ; } ; _eaff := _gefd . _caaa ; if _eaff . isExportable ( ) { _deab := _eaff . toTextTable ( ) ; _cbbf = _cbabg ( _cbbf , & _deab ) ; } ; return _cbbf ; } ; func ( _gebad rulingList ) blocks ( _bafa , _bfbb * ruling ) bool { if _bafa . _fgad > _bfbb . _ababc || _bfbb . _fgad > _bafa . _ababc { return false ;
} ; _bceaf := _ea . Max ( _bafa . _fgad , _bfbb . _fgad ) ; _ebdf := _ea . Min ( _bafa . _ababc , _bfbb . _ababc ) ; if _bafa . _gbgc > _bfbb . _gbgc { _bafa , _bfbb = _bfbb , _bafa ; } ; for _ , _bgdd := range _gebad { if _bafa . _gbgc <= _bgdd . _gbgc + _cabc && _bgdd . _gbgc <= _bfbb . _gbgc + _cabc && _bgdd . _fgad <= _ebdf && _bceaf <= _bgdd . _ababc { return true ;
} ; } ; return false ; } ; func _abaa ( _bcaf , _bcff bounded ) float64 { _fbaea := _bcea ( _bcaf , _bcff ) ; if ! _ecfbd ( _fbaea ) { return _fbaea ; } ; return _dgc ( _bcaf , _bcff ) ; } ; func _gged ( _gefc map [ int ] [ ] float64 ) [ ] int { _ebbe := make ( [ ] int , len ( _gefc ) ) ; _gbbgd := 0 ;
for _feaac := range _gefc { _ebbe [ _gbbgd ] = _feaac ; _gbbgd ++ ; } ; _a . Ints ( _ebbe ) ; return _ebbe ; } ; func ( _adae paraList ) topoOrder ( ) [ ] int { if _beee { _fc . Log . Info ( "\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a" ) ; } ; _bgec := len ( _adae ) ; _gfea := make ( [ ] bool , _bgec ) ;
_dbec := make ( [ ] int , 0 , _bgec ) ; _cgbbc := _adae . llyOrdering ( ) ; var _daabe func ( _gbbag int ) ; _daabe = func ( _cbdf int ) { _gfea [ _cbdf ] = true ; for _feac := 0 ; _feac < _bgec ; _feac ++ { if ! _gfea [ _feac ] { if _adae . readBefore ( _cgbbc , _cbdf , _feac ) { _daabe ( _feac ) ;
} ; } ; } ; _dbec = append ( _dbec , _cbdf ) ; } ; for _aacg := 0 ; _aacg < _bgec ; _aacg ++ { if ! _gfea [ _aacg ] { _daabe ( _aacg ) ; } ; } ; return _cgdbc ( _dbec ) ; } ; func _eeef ( _fggb [ ] * textLine , _fege , _fda float64 ) [ ] * textLine { var _ddceb [ ] * textLine ; for _ , _cfg := range _fggb { if _fege == - 1 { if _cfg . _gaca > _fda { _ddceb = append ( _ddceb , _cfg ) ;
} ; } else { if _cfg . _gaca > _fda && _cfg . _gaca < _fege { _ddceb = append ( _ddceb , _cfg ) ; } ; } ; } ; return _ddceb ; } ;
// String returns a human readable description of `s`.
func ( _fecc intSet ) String ( ) string { var _bfed [ ] int ; for _eegde := range _fecc { if _fecc . has ( _eegde ) { _bfed = append ( _bfed , _eegde ) ; } ; } ; _a . Ints ( _bfed ) ; return _ae . Sprintf ( "\u0025\u002b\u0076" , _bfed ) ; } ; func _eadd ( _ceac , _egga , _feaad , _eaba * textPara ) * textTable { _ccdg := & textTable { _afcga : 2 , _agdc : 2 , _bfdff : make ( map [ uint64 ] * textPara , 4 ) } ;
_ccdg . put ( 0 , 0 , _ceac ) ; _ccdg . put ( 1 , 0 , _egga ) ; _ccdg . put ( 0 , 1 , _feaad ) ; _ccdg . put ( 1 , 1 , _eaba ) ; return _ccdg ; } ; func _agfb ( _fcacdd , _ffge _aec . PdfRectangle ) _aec . PdfRectangle { return _aec . PdfRectangle { Llx : _ea . Min ( _fcacdd . Llx , _ffge . Llx ) , Lly : _ea . Min ( _fcacdd . Lly , _ffge . Lly ) , Urx : _ea . Max ( _fcacdd . Urx , _ffge . Urx ) , Ury : _ea . Max ( _fcacdd . Ury , _ffge . Ury ) } ;
} ; func _fdba ( _fbage [ ] * textMark , _cddb _aec . PdfRectangle , _aeffa rulingList , _cfdga [ ] gridTiling , _adad bool ) paraList { _fc . Log . Trace ( "\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066" , len ( _fbage ) , _cddb ) ;
if len ( _fbage ) == 0 { return nil ; } ; _cabbe := _eefcd ( _fbage , _cddb ) ; if len ( _cabbe ) == 0 { return nil ; } ; _aeffa . log ( "\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065" ) ; _ceddb , _dedf := _aeffa . vertsHorzs ( ) ; _dafac := _fcbd ( _cabbe , _cddb . Ury , _ceddb , _dedf ) ;
_cgac := _fcaa ( _dafac , _cddb . Ury , _ceddb , _dedf ) ; _cgac = _fcfa ( _cgac ) ; _gegaf := make ( paraList , 0 , len ( _cgac ) ) ; for _ , _bccb := range _cgac { _gbcc := _bccb . arrangeText ( ) ; if _gbcc != nil { _gegaf = append ( _gegaf , _gbcc ) ; } ; } ; if ! _adad && len ( _gegaf ) >= _dbdg { _gegaf = _gegaf . extractTables ( _cfdga ) ;
} ; _gegaf . sortReadingOrder ( ) ; if ! _adad { _gegaf . sortTopoOrder ( ) ; } ; _gegaf . log ( "\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072" ) ; return _gegaf ; } ; type structElement struct { _aeff string ;
_ccaac [ ] structElement ; _fab int64 ; _ecdd _bad . PdfObject ; } ; func ( _dgcd * subpath ) makeRectRuling ( _gfabf _eg . Color ) ( * ruling , bool ) { if _bdf { _fc . Log . Info ( "\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076" , _dgcd ) ;
} ; _acgac := _dgcd . _acfg [ : 4 ] ; _ababe := make ( map [ int ] rulingKind , len ( _acgac ) ) ; for _dcffc , _aaacf := range _acgac { _bfde := _dgcd . _acfg [ ( _dcffc + 1 ) % 4 ] ; _ababe [ _dcffc ] = _fgdfed ( _aaacf , _bfde ) ; if _bdf { _ae . Printf ( "\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066" , _dcffc , _ababe [ _dcffc ] , _aaacf , _bfde ) ;
} ; } ; if _bdf { _ae . Printf ( "\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a" , _ababe ) ; } ; var _adebe , _acbf [ ] int ; for _ecca , _cegc := range _ababe { switch _cegc { case _bfgb : _acbf = append ( _acbf , _ecca ) ; case _eebe : _adebe = append ( _adebe , _ecca ) ;
} ; } ; if _bdf { _ae . Printf ( "\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a" , len ( _acbf ) , _acbf ) ; _ae . Printf ( "\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a" , len ( _adebe ) , _adebe ) ;
} ; _geba := ( len ( _acbf ) == 2 && len ( _adebe ) == 2 ) || ( len ( _acbf ) == 2 && len ( _adebe ) == 0 && _dbef ( _acgac [ _acbf [ 0 ] ] , _acgac [ _acbf [ 1 ] ] ) ) || ( len ( _adebe ) == 2 && len ( _acbf ) == 0 && _bbfff ( _acgac [ _adebe [ 0 ] ] , _acgac [ _adebe [ 1 ] ] ) ) ; if _bdf { _ae . Printf ( " \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a" , len ( _acbf ) , len ( _adebe ) , _geba ) ;
} ; if ! _geba { if _bdf { _fc . Log . Error ( "\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v" , _dgcd ) ; _ae . Printf ( " \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a" , len ( _acbf ) , len ( _adebe ) , _geba ) ;
} ; return & ruling { } , false ; } ; if len ( _adebe ) == 0 { for _aaea , _aadd := range _ababe { if _aadd != _bfgb { _adebe = append ( _adebe , _aaea ) ; } ; } ; } ; if len ( _acbf ) == 0 { for _bdfd , _fccb := range _ababe { if _fccb != _eebe { _acbf = append ( _acbf , _bdfd ) ; } ; } ; } ; if _bdf { _fc . Log . Info ( "\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a" + "\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a" + "\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a" + "\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076" , len ( _acbf ) , len ( _adebe ) , len ( _acgac ) , _acbf , _adebe , _acgac ) ;
} ; var _bgca , _gceff , _cdfge , _gfbcd _g . Point ; if _acgac [ _acbf [ 0 ] ] . Y > _acgac [ _acbf [ 1 ] ] . Y { _cdfge , _gfbcd = _acgac [ _acbf [ 0 ] ] , _acgac [ _acbf [ 1 ] ] ; } else { _cdfge , _gfbcd = _acgac [ _acbf [ 1 ] ] , _acgac [ _acbf [ 0 ] ] ; } ; if _acgac [ _adebe [ 0 ] ] . X > _acgac [ _adebe [ 1 ] ] . X { _bgca , _gceff = _acgac [ _adebe [ 0 ] ] , _acgac [ _adebe [ 1 ] ] ;
} else { _bgca , _gceff = _acgac [ _adebe [ 1 ] ] , _acgac [ _adebe [ 0 ] ] ; } ; _fffe := _aec . PdfRectangle { Llx : _bgca . X , Urx : _gceff . X , Lly : _gfbcd . Y , Ury : _cdfge . Y } ; if _fffe . Llx > _fffe . Urx { _fffe . Llx , _fffe . Urx = _fffe . Urx , _fffe . Llx ; } ; if _fffe . Lly > _fffe . Ury { _fffe . Lly , _fffe . Ury = _fffe . Ury , _fffe . Lly ;
} ; _gaac := rectRuling { PdfRectangle : _fffe , _cbae : _faab ( _fffe ) , Color : _gfabf } ; if _gaac . _cbae == _cbfe { if _bdf { _fc . Log . Error ( "\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c" ) ;
} ; return nil , false ; } ; _egafg , _dbfec := _gaac . asRuling ( ) ; if ! _dbfec { if _bdf { _fc . Log . Error ( "\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg" ) ; } ; return nil , false ; } ; if _eceg { _ae . Printf ( "\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a" , _egafg . String ( ) ) ;
} ; return _egafg , true ; } ; func _fcaa ( _bdaa * wordBag , _gbaf float64 , _caag , _edb rulingList ) [ ] * wordBag { var _beec [ ] * wordBag ; for _ , _dada := range _bdaa . depthIndexes ( ) { _gaadff := false ; for ! _bdaa . empty ( _dada ) { _fcfcb := _bdaa . firstReadingIndex ( _dada ) ;
_cfca := _bdaa . firstWord ( _fcfcb ) ; _gecf := _ecea ( _cfca , _gbaf , _caag , _edb ) ; _bdaa . removeWord ( _cfca , _fcfcb ) ; if _fedc { _fc . Log . Info ( "\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073" , _cfca . String ( ) ) ;
} ; for _cfadb := true ; _cfadb ; _cfadb = _gaadff { _gaadff = false ; _bfbc := _gefa * _gecf . _aad ; _dfabe := _debe * _gecf . _aad ; _gfda := _fefg * _gecf . _aad ; if _fedc { _fc . Log . Info ( "\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066" , _gecf . minDepth ( ) , _gecf . maxDepth ( ) , _gfda , _dfabe ) ;
} ; if _bdaa . scanBand ( "\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c" , _gecf , _age ( _gcda , 0 ) , _gecf . minDepth ( ) - _gfda , _gecf . maxDepth ( ) + _gfda , _fdgc , false , false ) > 0 { _gaadff = true ; } ; if _bdaa . scanBand ( "\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c" , _gecf , _age ( _gcda , _dfabe ) , _gecf . minDepth ( ) , _gecf . maxDepth ( ) , _facc , false , false ) > 0 { _gaadff = true ;
} ; if _gaadff { continue ; } ; _bdded := _bdaa . scanBand ( "" , _gecf , _age ( _cecg , _bfbc ) , _gecf . minDepth ( ) , _gecf . maxDepth ( ) , _fbc , true , false ) ; if _bdded > 0 { _eadeg := ( _gecf . maxDepth ( ) - _gecf . minDepth ( ) ) / _gecf . _aad ; if ( _bdded > 1 && float64 ( _bdded ) > 0.3 * _eadeg ) || _bdded <= 10 { if _bdaa . scanBand ( "\u006f\u0074\u0068e\u0072" , _gecf , _age ( _cecg , _bfbc ) , _gecf . minDepth ( ) , _gecf . maxDepth ( ) , _fbc , false , true ) > 0 { _gaadff = true ;
} ; } ; } ; } ; _beec = append ( _beec , _gecf ) ; } ; } ; return _beec ; } ; func ( _cbe * PageText ) getParagraphs ( ) paraList { var _eeda rulingList ; if _bffc { _aba := _efda ( _cbe . _afgd ) ; _eeda = append ( _eeda , _aba ... ) ; } ; if _gbdg { _cdd := _ggeeg ( _cbe . _ddae ) ; _eeda = append ( _eeda , _cdd ... ) ;
} ; _eeda , _geca := _eeda . toTilings ( ) ; var _affa paraList ; _gaad := len ( _cbe . _dbfe ) ; for _addb := 0 ; _addb < 360 && _gaad > 0 ; _addb += 90 { _ebed := make ( [ ] * textMark , 0 , len ( _cbe . _dbfe ) - _gaad ) ; for _ , _bddd := range _cbe . _dbfe { if _bddd . _eeacf == _addb { _ebed = append ( _ebed , _bddd ) ;
} ; } ; if len ( _ebed ) > 0 { _cae := _fdba ( _ebed , _cbe . _babf , _eeda , _geca , _cbe . _cdgg . _cgff ) ; _affa = append ( _affa , _cae ... ) ; _gaad -= len ( _ebed ) ; } ; } ; return _affa ; } ;
// PageTextOptions holds various options available in extraction process.
type PageTextOptions struct { _cdec bool ; _cgff bool ; } ; func _dafa ( _dabfc float64 ) int { var _dgbc int ; if _dabfc >= 0 { _dgbc = int ( _dabfc / _fdedc ) ; } else { _dgbc = int ( _dabfc / _fdedc ) - 1 ; } ; return _dgbc ; } ; func ( _dcadb * textLine ) appendWord ( _bcaga * textWord ) { _dcadb . _bfag = append ( _dcadb . _bfag , _bcaga ) ;
_dcadb . PdfRectangle = _agfb ( _dcadb . PdfRectangle , _bcaga . PdfRectangle ) ; if _bcaga . _aeegf > _dcadb . _fgcb { _dcadb . _fgcb = _bcaga . _aeegf ; } ; if _bcaga . _aecg > _dcadb . _gaca { _dcadb . _gaca = _bcaga . _aecg ; } ; } ; func ( _gbgdg lineRuling ) xMean ( ) float64 { return 0.5 * ( _gbgdg . _bcfgb . X + _gbgdg . _befe . X ) } ;
// String returns a description of `v`.
func ( _addde * ruling ) String ( ) string { if _addde . _egdf == _cbfe { return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047" ; } ; _afaa , _ecacc := "\u0078" , "\u0079" ; if _addde . _egdf == _bfgb { _afaa , _ecacc = "\u0079" , "\u0078" ; } ; _ebdg := "" ; if _addde . _aecega != 0.0 { _ebdg = _ae . Sprintf ( " \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066" , _addde . _aecega ) ;
} ; return _ae . Sprintf ( "\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073" , _addde . _egdf , _afaa , _addde . _gbgc , _ecacc , _addde . _fgad , _addde . _ababc , _addde . _ababc - _addde . _fgad , _addde . _dcebd , _addde . Color , _ebdg ) ;
} ; type stateStack [ ] * textState ; type gridTile struct { _aec . PdfRectangle ; _fbbf , _cbfd , _fcgc , _cbdbf bool ; } ; func _gegca ( _fafgc string ) string { _caeb := [ ] rune ( _fafgc ) ; return string ( _caeb [ : len ( _caeb ) - 1 ] ) } ;
// String returns a human readable description of `path`.
func ( _gfae * subpath ) String ( ) string { _fbea := _gfae . _acfg ; _ccag := len ( _fbea ) ; if _ccag <= 5 { return _ae . Sprintf ( "\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f" , _ccag , _fbea ) ; } ; return _ae . Sprintf ( "\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f" , _ccag , _fbea [ 0 ] , _fbea [ 1 ] , _fbea [ _ccag - 1 ] ) ;
} ; const ( RenderModeStroke RenderMode = 1 << iota ; RenderModeFill ; RenderModeClip ; ) ; type textMark struct { _aec . PdfRectangle ; _eeacf int ; _gded string ; _dcff string ; _bccc * _aec . PdfFont ; _cbbae float64 ; _dcfd float64 ; _ddebg _g . Matrix ; _bdbd _g . Point ; _gega _aec . PdfRectangle ;
_fdag _eg . Color ; _aabcb _eg . Color ; _gafcd _bad . PdfObject ; _agff [ ] string ; Tw float64 ; Th float64 ; _ebgag int ; _daeff int ; } ;
// String returns a human readable description of `ss`.
func ( _beb * shapesState ) String ( ) string { return _ae . Sprintf ( "\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d" , len ( _beb . _edc ) , _beb . _effc ) ; } ; func _ecaa ( _gfbfg , _bedab float64 ) string { _egbb := ! _ecfbd ( _gfbfg - _bedab ) ;
if _egbb { return "\u000a" ; } ; return "\u0020" ; } ; func ( _ccbbc paraList ) extractTables ( _acgdf [ ] gridTiling ) paraList { if _gbead { _fc . Log . Debug ( "\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _ccbbc ) ) ;
} ; if len ( _ccbbc ) < _dbdg { return _ccbbc ; } ; _eedea := _ccbbc . findTables ( _acgdf ) ; if _gbead { _fc . Log . Info ( "c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d" , len ( _eedea ) ) ;
for _bddee , _fggaa := range _eedea { _fggaa . log ( _ae . Sprintf ( "c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064" , _bddee ) ) ; } ; } ; return _ccbbc . applyTables ( _eedea ) ; } ;
// TableCell is a cell in a TextTable.
type TableCell struct { _aec . PdfRectangle ;
// Text is the extracted text.
Text string ;
// Marks returns the TextMarks corresponding to the text in Text.
Marks TextMarkArray ; } ; func ( _efgf * compositeCell ) updateBBox ( ) { for _ , _cgge := range _efgf . paraList { _efgf . PdfRectangle = _agfb ( _efgf . PdfRectangle , _cgge . PdfRectangle ) ; } ; } ; func ( _agcg rulingList ) augmentGrid ( ) ( rulingList , rulingList ) { _bcec , _aegb := _agcg . vertsHorzs ( ) ;
if len ( _bcec ) == 0 || len ( _aegb ) == 0 { return _bcec , _aegb ; } ; _ddagg , _debc := _bcec , _aegb ; _eedac := _bcec . bbox ( ) ; _egfg := _aegb . bbox ( ) ; if _eceg { _fc . Log . Info ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066" , _eedac ) ;
_fc . Log . Info ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066" , _egfg ) ; } ; var _gdbdc , _faea , _dfabf , _eaeff * ruling ; if _egfg . Llx < _eedac . Llx - _ecce { _gdbdc = & ruling { _dcebd : _dbed , _egdf : _eebe , _gbgc : _egfg . Llx , _fgad : _eedac . Lly , _ababc : _eedac . Ury } ;
_bcec = append ( rulingList { _gdbdc } , _bcec ... ) ; } ; if _egfg . Urx > _eedac . Urx + _ecce { _faea = & ruling { _dcebd : _dbed , _egdf : _eebe , _gbgc : _egfg . Urx , _fgad : _eedac . Lly , _ababc : _eedac . Ury } ; _bcec = append ( _bcec , _faea ) ; } ; if _eedac . Lly < _egfg . Lly - _ecce { _dfabf = & ruling { _dcebd : _dbed , _egdf : _bfgb , _gbgc : _eedac . Lly , _fgad : _egfg . Llx , _ababc : _egfg . Urx } ;
_aegb = append ( rulingList { _dfabf } , _aegb ... ) ; } ; if _eedac . Ury > _egfg . Ury + _ecce { _eaeff = & ruling { _dcebd : _dbed , _egdf : _bfgb , _gbgc : _eedac . Ury , _fgad : _egfg . Llx , _ababc : _egfg . Urx } ; _aegb = append ( _aegb , _eaeff ) ; } ; if len ( _bcec ) + len ( _aegb ) == len ( _agcg ) { return _ddagg , _debc ;
} ; _eefcf := append ( _bcec , _aegb ... ) ; _agcg . log ( "u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064" ) ; _eefcf . log ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d" ) ; return _bcec , _aegb ; } ; func ( _ecad paraList ) xNeighbours ( _fbabf float64 ) map [ * textPara ] [ ] int { _bdddf := make ( [ ] event , 2 * len ( _ecad ) ) ;
if _fbabf == 0 { for _fddc , _ebadc := range _ecad { _bdddf [ 2 * _fddc ] = event { _ebadc . Llx , true , _fddc } ; _bdddf [ 2 * _fddc + 1 ] = event { _ebadc . Urx , false , _fddc } ; } ; } else { for _dccg , _aacdf := range _ecad { _bdddf [ 2 * _dccg ] = event { _aacdf . Llx - _fbabf * _aacdf . fontsize ( ) , true , _dccg } ;
_bdddf [ 2 * _dccg + 1 ] = event { _aacdf . Urx + _fbabf * _aacdf . fontsize ( ) , false , _dccg } ; } ; } ; return _ecad . eventNeighbours ( _bdddf ) ; } ; func ( _cbd * textObject ) moveTextSetLeading ( _gdcc , _edgf float64 ) { _cbd . _cbgd . _gbff = - _edgf ; _cbd . moveLP ( _gdcc , _edgf ) ;
} ;
// Text returns the text content of the `bulletLists`.
func ( _cbag * lists ) Text ( ) string { _efaf := & _bb . Builder { } ; for _ , _bdec := range * _cbag { _dafd := _bdec . Text ( ) ; _efaf . WriteString ( _dafd ) ; } ; return _efaf . String ( ) ; } ; func ( _ecef * wordBag ) scanBand ( _beeb string , _eada * wordBag , _bbefg func ( _gbbe * wordBag , _cac * textWord ) bool , _aeda , _deb , _agbg float64 , _ddcc , _eagf bool ) int { _dfbf := _eada . _aad ;
var _aeg map [ int ] map [ * textWord ] struct { } ; if ! _ddcc { _aeg = _ecef . makeRemovals ( ) ; } ; _aafd := _ddfa * _dfbf ; _fbda := 0 ; for _ , _addf := range _ecef . depthBand ( _aeda - _aafd , _deb + _aafd ) { if len ( _ecef . _gbbd [ _addf ] ) == 0 { continue ; } ; for _ , _bcagb := range _ecef . _gbbd [ _addf ] { if ! ( _aeda - _aafd <= _bcagb . _aecg && _bcagb . _aecg <= _deb + _aafd ) { continue ;
} ; if ! _bbefg ( _eada , _bcagb ) { continue ; } ; _afdf := 2.0 * _ea . Abs ( _bcagb . _aeegf - _eada . _aad ) / ( _bcagb . _aeegf + _eada . _aad ) ; _gdaa := _ea . Max ( _bcagb . _aeegf / _eada . _aad , _eada . _aad / _bcagb . _aeegf ) ; _adea := _ea . Min ( _afdf , _gdaa ) ; if _agbg > 0 && _adea > _agbg { continue ;
} ; if _eada . blocked ( _bcagb ) { continue ; } ; if ! _ddcc { _eada . pullWord ( _bcagb , _addf , _aeg ) ; } ; _fbda ++ ; if ! _eagf { if _bcagb . _aecg < _aeda { _aeda = _bcagb . _aecg ; } ; if _bcagb . _aecg > _deb { _deb = _bcagb . _aecg ; } ; } ; if _ddcc { break ; } ; } ; } ; if ! _ddcc { _ecef . applyRemovals ( _aeg ) ;
} ; return _fbda ; } ; func ( _fcdcc rulingList ) findPrimSec ( _cafcf , _ecfee float64 ) * ruling { for _ , _cfdag := range _fcdcc { if _ecfbd ( _cfdag . _gbgc - _cafcf ) && _cfdag . _fgad - _ecce <= _ecfee && _ecfee <= _cfdag . _ababc + _ecce { return _cfdag ; } ; } ; return nil ; } ;
func ( _bcgag paraList ) addNeighbours ( ) { _badbd := func ( _adgcca [ ] int , _agbd * textPara ) ( [ ] * textPara , [ ] * textPara ) { _dbbgf := make ( [ ] * textPara , 0 , len ( _adgcca ) - 1 ) ; _gccf := make ( [ ] * textPara , 0 , len ( _adgcca ) - 1 ) ; for _ , _geeba := range _adgcca { _faaec := _bcgag [ _geeba ] ;
if _faaec . Urx <= _agbd . Llx { _dbbgf = append ( _dbbgf , _faaec ) ; } else if _faaec . Llx >= _agbd . Urx { _gccf = append ( _gccf , _faaec ) ; } ; } ; return _dbbgf , _gccf ; } ; _cgfc := func ( _bgbbf [ ] int , _ggdbc * textPara ) ( [ ] * textPara , [ ] * textPara ) { _cdae := make ( [ ] * textPara , 0 , len ( _bgbbf ) - 1 ) ;
_eecf := make ( [ ] * textPara , 0 , len ( _bgbbf ) - 1 ) ; for _ , _gfafa := range _bgbbf { _bafg := _bcgag [ _gfafa ] ; if _bafg . Ury <= _ggdbc . Lly { _eecf = append ( _eecf , _bafg ) ; } else if _bafg . Lly >= _ggdbc . Ury { _cdae = append ( _cdae , _bafg ) ; } ; } ; return _cdae , _eecf ; } ;
_dfcf := _bcgag . yNeighbours ( _ffff ) ; for _ , _edbda := range _bcgag { _cafgag := _dfcf [ _edbda ] ; if len ( _cafgag ) == 0 { continue ; } ; _edgfe , _dcabb := _badbd ( _cafgag , _edbda ) ; if len ( _edgfe ) == 0 && len ( _dcabb ) == 0 { continue ; } ; if len ( _edgfe ) > 0 { _cbggg := _edgfe [ 0 ] ;
for _ , _accd := range _edgfe [ 1 : ] { if _accd . Urx >= _cbggg . Urx { _cbggg = _accd ; } ; } ; for _ , _fffga := range _edgfe { if _fffga != _cbggg && _fffga . Urx > _cbggg . Llx { _cbggg = nil ; break ; } ; } ; if _cbggg != nil && _bfc ( _edbda . PdfRectangle , _cbggg . PdfRectangle ) { _edbda . _bceaa = _cbggg ;
} ; } ; if len ( _dcabb ) > 0 { _faagd := _dcabb [ 0 ] ; for _ , _egafd := range _dcabb [ 1 : ] { if _egafd . Llx <= _faagd . Llx { _faagd = _egafd ; } ; } ; for _ , _ggafg := range _dcabb { if _ggafg != _faagd && _ggafg . Llx < _faagd . Urx { _faagd = nil ; break ; } ; } ; if _faagd != nil && _bfc ( _edbda . PdfRectangle , _faagd . PdfRectangle ) { _edbda . _aabe = _faagd ;
} ; } ; } ; _dfcf = _bcgag . xNeighbours ( _fgde ) ; for _ , _cebb := range _bcgag { _cbgcb := _dfcf [ _cebb ] ; if len ( _cbgcb ) == 0 { continue ; } ; _cdcbd , _aeec := _cgfc ( _cbgcb , _cebb ) ; if len ( _cdcbd ) == 0 && len ( _aeec ) == 0 { continue ; } ; if len ( _aeec ) > 0 { _gegba := _aeec [ 0 ] ;
for _ , _bggac := range _aeec [ 1 : ] { if _bggac . Ury >= _gegba . Ury { _gegba = _bggac ; } ; } ; for _ , _deeda := range _aeec { if _deeda != _gegba && _deeda . Ury > _gegba . Lly { _gegba = nil ; break ; } ; } ; if _gegba != nil && _bae ( _cebb . PdfRectangle , _gegba . PdfRectangle ) { _cebb . _ccee = _gegba ;
} ; } ; if len ( _cdcbd ) > 0 { _eefec := _cdcbd [ 0 ] ; for _ , _fgba := range _cdcbd [ 1 : ] { if _fgba . Lly <= _eefec . Lly { _eefec = _fgba ; } ; } ; for _ , _cafb := range _cdcbd { if _cafb != _eefec && _cafb . Lly < _eefec . Ury { _eefec = nil ; break ; } ; } ; if _eefec != nil && _bae ( _cebb . PdfRectangle , _eefec . PdfRectangle ) { _cebb . _ggfbb = _eefec ;
} ; } ; } ; for _ , _dffac := range _bcgag { if _dffac . _bceaa != nil && _dffac . _bceaa . _aabe != _dffac { _dffac . _bceaa = nil ; } ; if _dffac . _ggfbb != nil && _dffac . _ggfbb . _ccee != _dffac { _dffac . _ggfbb = nil ; } ; if _dffac . _aabe != nil && _dffac . _aabe . _bceaa != _dffac { _dffac . _aabe = nil ;
} ; if _dffac . _ccee != nil && _dffac . _ccee . _ggfbb != _dffac { _dffac . _ccee = nil ; } ; } ; } ; func ( _bdbe * subpath ) removeDuplicates ( ) { if len ( _bdbe . _acfg ) == 0 { return ; } ; _fgga := [ ] _g . Point { _bdbe . _acfg [ 0 ] } ; for _ , _fged := range _bdbe . _acfg [ 1 : ] { if ! _ceab ( _fged , _fgga [ len ( _fgga ) - 1 ] ) { _fgga = append ( _fgga , _fged ) ;
} ; } ; _bdbe . _acfg = _fgga ; } ; func ( _cfda lineRuling ) asRuling ( ) ( * ruling , bool ) { _ffbdd := ruling { _egdf : _cfda . _cdbfg , Color : _cfda . Color , _dcebd : _bggb } ; switch _cfda . _cdbfg { case _eebe : _ffbdd . _gbgc = _cfda . xMean ( ) ; _ffbdd . _fgad = _ea . Min ( _cfda . _bcfgb . Y , _cfda . _befe . Y ) ;
_ffbdd . _ababc = _ea . Max ( _cfda . _bcfgb . Y , _cfda . _befe . Y ) ; case _bfgb : _ffbdd . _gbgc = _cfda . yMean ( ) ; _ffbdd . _fgad = _ea . Min ( _cfda . _bcfgb . X , _cfda . _befe . X ) ; _ffbdd . _ababc = _ea . Max ( _cfda . _bcfgb . X , _cfda . _befe . X ) ; default : _fc . Log . Error ( "\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064" , _cfda . _cdbfg ) ;
return nil , false ; } ; return & _ffbdd , true ; } ;
// Elements returns the TextMarks in `ma`.
func ( _ddc * TextMarkArray ) Elements ( ) [ ] TextMark { return _ddc . _bade } ; const ( _bedg = 1.0e-6 ; _edge = 1.0e-4 ; _ceba = 10 ; _fdedc = 6 ; _ddfa = 0.5 ; _fbdbc = 0.12 ; _babe = 0.19 ; _accc = 0.04 ; _agbf = 0.04 ; _fefg = 1.0 ; _fdgc = 0.04 ; _debe = 0.4 ; _facc = 0.7 ; _gefa = 1.0 ; _fbc = 0.1 ;
_ggad = 1.4 ; _decc = 0.46 ; _gdda = 0.02 ; _dfdcc = 0.2 ; _gccc = 0.5 ; _abdda = 4 ; _dgce = 4.0 ; _dbdg = 6 ; _gcfa = 0.3 ; _fgde = 0.01 ; _ffff = 0.02 ; _aaaa = 2 ; _acebd = 2 ; _edfde = 500 ; _ebede = 4.0 ; _gcb = 4.0 ; _dccc = 0.05 ; _bedf = 0.1 ; _ecce = 2.0 ; _cabc = 2.0 ; _ebcd = 1.5 ; _acggf = 3.0 ; _efea = 0.25 ;
) ; func _dgea ( _ebadb _aec . PdfRectangle ) * ruling { return & ruling { _egdf : _eebe , _gbgc : _ebadb . Urx , _fgad : _ebadb . Lly , _ababc : _ebadb . Ury } ; } ; func ( _bdgfd * textTable ) get ( _gaaf , _fabb int ) * textPara { return _bdgfd . _bfdff [ _fgged ( _gaaf , _fabb ) ] } ; func _ecfgf ( _bcfc string ) bool { if _e . RuneCountInString ( _bcfc ) < _abdda { return false ;
} ; _adgcc , _gbad := _e . DecodeLastRuneInString ( _bcfc ) ; if _gbad <= 0 || ! _be . Is ( _be . Hyphen , _adgcc ) { return false ; } ; _adgcc , _gbad = _e . DecodeLastRuneInString ( _bcfc [ : len ( _bcfc ) - _gbad ] ) ; return _gbad > 0 && ! _be . IsSpace ( _adgcc ) ; } ; func _ecfbd ( _gddab float64 ) bool { return _ea . Abs ( _gddab ) < _bedg } ;
func ( _fbgg * textLine ) toTextMarks ( _gcfb * int ) [ ] TextMark { var _bfgc [ ] TextMark ; for _ , _ccae := range _fbgg . _bfag { if _ccae . _ceff { _bfgc = _ggcce ( _bfgc , _gcfb , "\u0020" ) ; } ; _gbfb := _ccae . toTextMarks ( _gcfb ) ; _bfgc = append ( _bfgc , _gbfb ... ) ; } ; return _bfgc ;
} ; func _gdgbf ( _ecbb , _fbbee , _geedg float64 ) rulingKind { if _ecbb >= _geedg && _gdba ( _fbbee , _ecbb ) { return _bfgb ; } ; if _fbbee >= _geedg && _gdba ( _ecbb , _fbbee ) { return _eebe ; } ; return _cbfe ; } ; func _cgbfe ( _daac , _geab _g . Point ) rulingKind { _bbce := _ea . Abs ( _daac . X - _geab . X ) ;
_dege := _ea . Abs ( _daac . Y - _geab . Y ) ; return _gdgbf ( _bbce , _dege , _ebede ) ; } ; func _bcea ( _beefb , _baaba bounded ) float64 { return _beefb . bbox ( ) . Llx - _baaba . bbox ( ) . Llx } ; func ( _daf * textObject ) setTextMatrix ( _eabe [ ] float64 ) { if len ( _eabe ) != 6 { _fc . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029" , len ( _eabe ) ) ;
return ; } ; _geag , _adecg , _daff , _ecfg , _ada , _cec := _eabe [ 0 ] , _eabe [ 1 ] , _eabe [ 2 ] , _eabe [ 3 ] , _eabe [ 4 ] , _eabe [ 5 ] ; _daf . _acc = _g . NewMatrix ( _geag , _adecg , _daff , _ecfg , _ada , _cec ) ; _daf . _dde = _daf . _acc ; } ; func ( _eba * textObject ) setTextRenderMode ( _cde int ) { if _eba == nil { return ;
} ; _eba . _cbgd . _ffgba = RenderMode ( _cde ) ; } ; func ( _abfef * wordBag ) depthIndexes ( ) [ ] int { if len ( _abfef . _gbbd ) == 0 { return nil ; } ; _beef := make ( [ ] int , len ( _abfef . _gbbd ) ) ; _fdbd := 0 ; for _cdcab := range _abfef . _gbbd { _beef [ _fdbd ] = _cdcab ; _fdbd ++ ; } ; _a . Ints ( _beef ) ;
return _beef ; } ; func ( _agdcg intSet ) has ( _abfgb int ) bool { _ , _eaea := _agdcg [ _abfgb ] ; return _eaea } ; func ( _ecabf paraList ) sortTopoOrder ( ) { _bccd := _ecabf . topoOrder ( ) ; _ecabf . reorder ( _bccd ) } ; func ( _agad paraList ) inTile ( _ccea gridTile ) paraList { var _beab paraList ;
for _ , _afef := range _agad { if _ccea . contains ( _afef . PdfRectangle ) { _beab = append ( _beab , _afef ) ; } ; } ; if _gbead { _ae . Printf ( "\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n" , _ccea , len ( _beab ) ) ;
for _bgagf , _geae := range _beab { _ae . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bgagf , _geae ) ; } ; _ae . Println ( "" ) ; } ; return _beab ; } ; var _gdec = [ ] string { "\u2756" , "\u27a2" , "\u2713" , "\u2022" , "\uf0a7" , "\u25a1" , "\u2212" , "\u25a0" , "\u25aa" , "\u006f" } ;
func _gadb ( _edfc _aec . PdfRectangle , _abga bounded ) float64 { return _edfc . Ury - _abga . bbox ( ) . Lly } ; func _gceb ( _ecaeg , _cddg * textPara ) bool { if _ecaeg . _egbea || _cddg . _egbea { return true ; } ; return _ecfbd ( _ecaeg . depth ( ) - _cddg . depth ( ) ) ; } ; type fontEntry struct { _gfab * _aec . PdfFont ;
_egfcg int64 ; } ; func ( _cgfg * ruling ) intersects ( _ddedb * ruling ) bool { _fbdaa := ( _cgfg . _egdf == _eebe && _ddedb . _egdf == _bfgb ) || ( _ddedb . _egdf == _eebe && _cgfg . _egdf == _bfgb ) ; _bfgf := func ( _dfgdg , _ebabb * ruling ) bool { return _dfgdg . _fgad - _ecce <= _ebabb . _gbgc && _ebabb . _gbgc <= _dfgdg . _ababc + _ecce ;
} ; _fdbdf := _bfgf ( _cgfg , _ddedb ) ; _ffbg := _bfgf ( _ddedb , _cgfg ) ; if _eceg { _ae . Printf ( "\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a" + "\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a" + " \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a" , _fbdaa , _fdbdf , _ffbg , _fbdaa && _fdbdf && _ffbg , _cgfg , _ddedb ) ;
} ; return _fbdaa && _fdbdf && _ffbg ; } ; func _fbccc ( _afec [ ] * textMark , _agcgg _aec . PdfRectangle ) * textWord { _cacc := _afec [ 0 ] . PdfRectangle ; _gdeg := _afec [ 0 ] . _cbbae ; for _ , _gaaeg := range _afec [ 1 : ] { _cacc = _agfb ( _cacc , _gaaeg . PdfRectangle ) ; if _gaaeg . _cbbae > _gdeg { _gdeg = _gaaeg . _cbbae ;
} ; } ; return & textWord { PdfRectangle : _cacc , _ebfa : _afec , _aecg : _agcgg . Ury - _cacc . Lly , _aeegf : _gdeg } ; } ; func ( _ecfef rulingList ) asTiling ( ) gridTiling { if _ddb { _fc . Log . Info ( "r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d" , len ( _ecfef ) ) ;
} ; for _faaf , _egfcb := range _ecfef [ 1 : ] { _cedae := _ecfef [ _faaf ] ; if _cedae . alignsPrimary ( _egfcb ) && _cedae . alignsSec ( _egfcb ) { _fc . Log . Error ( "a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073" , _egfcb , _cedae ) ;
} ; } ; _ecfef . sortStrict ( ) ; _ecfef . log ( "\u0073n\u0061\u0070\u0070\u0065\u0064" ) ; _fccce , _faadfc := _ecfef . vertsHorzs ( ) ; _bedbc := _fccce . primaries ( ) ; _egdb := _faadfc . primaries ( ) ; _daedd := len ( _bedbc ) - 1 ; _efbeag := len ( _egdb ) - 1 ; if _daedd == 0 || _efbeag == 0 { return gridTiling { } ;
} ; _agccf := _aec . PdfRectangle { Llx : _bedbc [ 0 ] , Urx : _bedbc [ _daedd ] , Lly : _egdb [ 0 ] , Ury : _egdb [ _efbeag ] } ; if _ddb { _fc . Log . Info ( "\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064" , len ( _fccce ) ) ;
for _bfcf , _ffbb := range _fccce { _ae . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bfcf , _ffbb ) ; } ; _fc . Log . Info ( "\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064" , len ( _faadfc ) ) ;
for _egece , _bgagd := range _faadfc { _ae . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _egece , _bgagd ) ; } ; _fc . Log . Info ( "\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f" , _daedd , _efbeag , _bedbc , _egdb ) ;
} ; _ffbc := make ( [ ] gridTile , _daedd * _efbeag ) ; for _adabd := _efbeag - 1 ; _adabd >= 0 ; _adabd -- { _acec := _egdb [ _adabd ] ; _dafbg := _egdb [ _adabd + 1 ] ; for _edda := 0 ; _edda < _daedd ; _edda ++ { _gdbg := _bedbc [ _edda ] ; _deccf := _bedbc [ _edda + 1 ] ; _bebb := _fccce . findPrimSec ( _gdbg , _acec ) ;
_bbgdgc := _fccce . findPrimSec ( _deccf , _acec ) ; _gaeg := _faadfc . findPrimSec ( _acec , _gdbg ) ; _acgefd := _faadfc . findPrimSec ( _dafbg , _gdbg ) ; _babd := _aec . PdfRectangle { Llx : _gdbg , Urx : _deccf , Lly : _acec , Ury : _dafbg } ; _cbgg := _aggg ( _babd , _bebb , _bbgdgc , _gaeg , _acgefd ) ;
_ffbc [ _adabd * _daedd + _edda ] = _cbgg ; if _ddb { _ae . Printf ( "\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , _edda , _adabd , _cbgg . String ( ) , _cbgg . Width ( ) , _cbgg . Height ( ) ) ;
} ; } ; } ; if _ddb { _fc . Log . Info ( "r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066" , _agccf ) ;
} ; _gfcfc := make ( [ ] map [ float64 ] gridTile , _efbeag ) ; for _febag := _efbeag - 1 ; _febag >= 0 ; _febag -- { if _ddb { _ae . Printf ( "\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a" , _febag ) ; } ; _gfcfc [ _febag ] = make ( map [ float64 ] gridTile , _daedd ) ; for _dcgd := 0 ; _dcgd < _daedd ;
_dcgd ++ { _efdf := _ffbc [ _febag * _daedd + _dcgd ] ; if _ddb { _ae . Printf ( "\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _dcgd , _efdf ) ; } ; if ! _efdf . _cbfd { continue ; } ; _gcgd := _dcgd ; for _bgbg := _dcgd + 1 ; ! _efdf . _cbdbf && _bgbg < _daedd ; _bgbg ++ { _adca := _ffbc [ _febag * _daedd + _bgbg ] ;
_efdf . Urx = _adca . Urx ; _efdf . _fbbf = _efdf . _fbbf || _adca . _fbbf ; _efdf . _fcgc = _efdf . _fcgc || _adca . _fcgc ; _efdf . _cbdbf = _adca . _cbdbf ; if _ddb { _ae . Printf ( "\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a" , _bgbg , _adca , _efdf ) ;
} ; _gcgd = _bgbg ; } ; if _ddb { _ae . Printf ( " \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n" , _dcgd , _gcgd , _efdf ) ; } ; _dcgd = _gcgd ; _gfcfc [ _febag ] [ _efdf . Llx ] = _efdf ; } ; } ; _geccf := make ( map [ float64 ] map [ float64 ] gridTile , _efbeag ) ;
_ecde := make ( map [ float64 ] map [ float64 ] struct { } , _efbeag ) ; for _dcceg := _efbeag - 1 ; _dcceg >= 0 ; _dcceg -- { _ebdb := _ffbc [ _dcceg * _daedd ] . Lly ; _geccf [ _ebdb ] = make ( map [ float64 ] gridTile , _daedd ) ; _ecde [ _ebdb ] = make ( map [ float64 ] struct { } , _daedd ) ; } ; if _ddb { _fc . Log . Info ( "\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066" , _agccf ) ;
} ; for _bgfeb := _efbeag - 1 ; _bgfeb >= 0 ; _bgfeb -- { _gbagg := _ffbc [ _bgfeb * _daedd ] . Lly ; _cgdgb := _gfcfc [ _bgfeb ] ; if _ddb { _ae . Printf ( "\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a" , _bgfeb ) ; } ; for _ , _bbbaa := range _affg ( _cgdgb ) { if _ , _gafgf := _ecde [ _gbagg ] [ _bbbaa ] ;
_gafgf { continue ; } ; _dbff := _cgdgb [ _bbbaa ] ; if _ddb { _ae . Printf ( " \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a" , _dbff . String ( ) ) ; } ; for _baaa := _bgfeb - 1 ; _baaa >= 0 ; _baaa -- { if _dbff . _fcgc { break ; } ; _ebfd := _gfcfc [ _baaa ] ; _fafad , _bdda := _ebfd [ _bbbaa ] ;
if ! _bdda { break ; } ; if _fafad . Urx != _dbff . Urx { break ; } ; _dbff . _fcgc = _fafad . _fcgc ; _dbff . Lly = _fafad . Lly ; if _ddb { _ae . Printf ( "\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a" , _fafad . String ( ) , _dbff . String ( ) ) ;
} ; _ecde [ _fafad . Lly ] [ _fafad . Llx ] = struct { } { } ; } ; if _bgfeb == 0 { _dbff . _fcgc = true ; } ; if _dbff . complete ( ) { _geccf [ _gbagg ] [ _bbbaa ] = _dbff ; } ; } ; } ; _gdccc := gridTiling { PdfRectangle : _agccf , _fecg : _eagbe ( _geccf ) , _eceeed : _aced ( _geccf ) , _bbfc : _geccf } ;
_gdccc . log ( "\u0043r\u0065\u0061\u0074\u0065\u0064" ) ; return _gdccc ; } ;
// Options extractor options.
type Options struct {
// DisableDocumentTags specifies whether to use the document tags during list extraction.
DisableDocumentTags bool ;
// ApplyCropBox will extract page text based on page cropbox if set to `true`.
ApplyCropBox bool ;
// UseSimplerExtractionProcess will skip topological text ordering and table processing.
//
// NOTE: While normally the extra processing is beneficial, it can also lead to problems when it does not work.
// Thus it is a flag to allow the user to control this process.
//
// Skipping some extraction processes would also lead to the reduced processing time.
UseSimplerExtractionProcess bool ; } ; func _edacb ( _cafgf , _aecaa _ec . Image ) _ec . Image { _eagcd , _deda := _aecaa . Bounds ( ) . Size ( ) , _cafgf . Bounds ( ) . Size ( ) ; _ecege , _cedf := _eagcd . X , _eagcd . Y ; if _deda . X > _ecege { _ecege = _deda . X ; } ; if _deda . Y > _cedf { _cedf = _deda . Y ;
} ; _ggfeg := _ec . Rect ( 0 , 0 , _ecege , _cedf ) ; if _eagcd . X != _ecege || _eagcd . Y != _cedf { _ddaba := _ec . NewRGBA ( _ggfeg ) ; _ag . BiLinear . Scale ( _ddaba , _ggfeg , _cafgf , _aecaa . Bounds ( ) , _ag . Over , nil ) ; _aecaa = _ddaba ; } ; if _deda . X != _ecege || _deda . Y != _cedf { _cfeeb := _ec . NewRGBA ( _ggfeg ) ;
_ag . BiLinear . Scale ( _cfeeb , _ggfeg , _cafgf , _cafgf . Bounds ( ) , _ag . Over , nil ) ; _cafgf = _cfeeb ; } ; _cggef := _ec . NewRGBA ( _ggfeg ) ; _ag . DrawMask ( _cggef , _ggfeg , _cafgf , _ec . Point { } , _aecaa , _ec . Point { } , _ag . Over ) ; return _cggef ; } ; func ( _ccaa * subpath ) add ( _ddeg ... _g . Point ) { _ccaa . _acfg = append ( _ccaa . _acfg , _ddeg ... ) } ;
func ( _dfeg paraList ) findTableGrid ( _cggfb gridTiling ) ( * textTable , map [ * textPara ] struct { } ) { _affec := len ( _cggfb . _fecg ) ; _eaefe := len ( _cggfb . _eceeed ) ; _cacga := textTable { _fbccb : true , _afcga : _affec , _agdc : _eaefe , _bfdff : make ( map [ uint64 ] * textPara , _affec * _eaefe ) , _gaeb : make ( map [ uint64 ] compositeCell , _affec * _eaefe ) } ;
_cacga . PdfRectangle = _cggfb . PdfRectangle ; _gcdbf := make ( map [ * textPara ] struct { } ) ; _adbaf := int ( ( 1.0 - _gcfa ) * float64 ( _affec * _eaefe ) ) ; _addgd := 0 ; if _ddb { _fc . Log . Info ( "\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064" , _affec , _eaefe ) ;
} ; for _cedde , _ddfbf := range _cggfb . _eceeed { _gegb , _gfeee := _cggfb . _bbfc [ _ddfbf ] ; if ! _gfeee { continue ; } ; for _efad , _ecec := range _cggfb . _fecg { _dbadb , _geebcf := _gegb [ _ecec ] ; if ! _geebcf { continue ; } ; _feff := _dfeg . inTile ( _dbadb ) ; if len ( _feff ) == 0 { _addgd ++ ;
if _addgd > _adbaf { if _ddb { _fc . Log . Info ( "\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064" , _addgd ) ; } ; return nil , nil ; } ; } else { _cacga . putComposite ( _efad , _cedde , _feff , _dbadb . PdfRectangle ) ; for _ , _bdca := range _feff { _gcdbf [ _bdca ] = struct { } { } ;
} ; } ; } ; } ; _fffce := 0 ; for _cbee := 0 ; _cbee < _affec ; _cbee ++ { _bbced := _cacga . get ( _cbee , 0 ) ; if _bbced == nil || ! _bbced . _egbea { _fffce ++ ; } ; } ; if _fffce == 0 { if _ddb { _fc . Log . Info ( "\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030" ) ; } ; return nil , nil ;
} ; _adebf := _cacga . reduceTiling ( _cggfb , _acggf ) ; _adebf = _adebf . subdivide ( ) ; return _adebf , _gcdbf ; } ; func _aadc ( _afdg bounded ) float64 { return - _afdg . bbox ( ) . Lly } ; func ( _agcd * wordBag ) minDepth ( ) float64 { return _agcd . _aeceg - ( _agcd . Ury - _agcd . _aad ) } ;
func _cdffd ( _bdfbb , _beed float64 ) bool { return _ea . Abs ( _bdfbb - _beed ) <= _ecce } ; func ( _afcc * textPara ) writeCellText ( _gagfc _bc . Writer ) { for _eadee , _ecdce := range _afcc . _bfagf { _cdff := _ecdce . text ( ) ; _acggc := _bfab && _ecdce . endsInHyphen ( ) && _eadee != len ( _afcc . _bfagf ) - 1 ;
if _acggc { _cdff = _gegca ( _cdff ) ; } ; _gagfc . Write ( [ ] byte ( _cdff ) ) ; if ! ( _acggc || _eadee == len ( _afcc . _bfagf ) - 1 ) { _gagfc . Write ( [ ] byte ( _ecaa ( _ecdce . _gaca , _afcc . _bfagf [ _eadee + 1 ] . _gaca ) ) ) ; } ; } ; } ; func ( _cdgc paraList ) findTextTables ( ) [ ] * textTable { var _cdgfg [ ] * textTable ;
for _ , _bggd := range _cdgc { if _bggd . taken ( ) || _bggd . Width ( ) == 0 { continue ; } ; _faabc := _bggd . isAtom ( ) ; if _faabc == nil { continue ; } ; _faabc . growTable ( ) ; if _faabc . _afcga * _faabc . _agdc < _dbdg { continue ; } ; _faabc . markCells ( ) ; _faabc . log ( "\u0067\u0072\u006fw\u006e" ) ;
_cdgfg = append ( _cdgfg , _faabc ) ; } ; return _cdgfg ; } ; func ( _agb * imageExtractContext ) extractInlineImage ( _bcc * _ba . ContentStreamInlineImage , _gabf _ba . GraphicsState , _fee * _aec . PdfPageResources ) error { _dfd , _acf := _bcc . ToImage ( _fee ) ; if _acf != nil { return _acf ;
} ; _ced , _acf := _bcc . GetColorSpace ( _fee ) ; if _acf != nil { return _acf ; } ; if _ced == nil { _ced = _aec . NewPdfColorspaceDeviceGray ( ) ; } ; _dfdg , _acf := _ced . ImageToRGB ( * _dfd ) ; if _acf != nil { return _acf ; } ; _bgbd := ImageMark { Image : & _dfdg , Width : _gabf . CTM . ScalingFactorX ( ) , Height : _gabf . CTM . ScalingFactorY ( ) , Angle : _gabf . CTM . Angle ( ) } ;
_bgbd . X , _bgbd . Y = _gabf . CTM . Translation ( ) ; _agb . _bff = append ( _agb . _bff , _bgbd ) ; _agb . _aa ++ ; return nil ; } ; func ( _agegf rulingList ) vertsHorzs ( ) ( rulingList , rulingList ) { var _fgge , _dgda rulingList ; for _ , _geff := range _agegf { switch _geff . _egdf { case _eebe : _fgge = append ( _fgge , _geff ) ;
case _bfgb : _dgda = append ( _dgda , _geff ) ; } ; } ; return _fgge , _dgda ; } ; func ( _cceb gridTiling ) complete ( ) bool { for _ , _bcbd := range _cceb . _bbfc { for _ , _agfa := range _bcbd { if ! _agfa . complete ( ) { return false ; } ; } ; } ; return true ; } ; type gridTiling struct { _aec . PdfRectangle ;
_fecg [ ] float64 ; _eceeed [ ] float64 ; _bbfc map [ float64 ] map [ float64 ] gridTile ; } ; func _bdce ( _ffce byte ) bool { for _ , _agcc := range _gdec { if [ ] byte ( _agcc ) [ 0 ] == _ffce { return true ; } ; } ; return false ; } ; func ( _befc * textTable ) reduce ( ) * textTable { _ddabe := make ( [ ] int , 0 , _befc . _agdc ) ;
_cedc := make ( [ ] int , 0 , _befc . _afcga ) ; for _bcdf := 0 ; _bcdf < _befc . _agdc ; _bcdf ++ { if ! _befc . emptyCompositeRow ( _bcdf ) { _ddabe = append ( _ddabe , _bcdf ) ; } ; } ; for _ccfbf := 0 ; _ccfbf < _befc . _afcga ; _ccfbf ++ { if ! _befc . emptyCompositeColumn ( _ccfbf ) { _cedc = append ( _cedc , _ccfbf ) ;
} ; } ; if len ( _ddabe ) == _befc . _agdc && len ( _cedc ) == _befc . _afcga { return _befc ; } ; _cbbb := textTable { _fbccb : _befc . _fbccb , _afcga : len ( _cedc ) , _agdc : len ( _ddabe ) , _bfdff : make ( map [ uint64 ] * textPara , len ( _cedc ) * len ( _ddabe ) ) } ; if _gbead { _fc . Log . Info ( "\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064" , _befc . _afcga , _befc . _agdc , len ( _cedc ) , len ( _ddabe ) ) ;
_fc . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076" , _cedc ) ; _fc . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076" , _ddabe ) ; } ; for _adcf , _eeea := range _ddabe { for _bdfdb , _dfabg := range _cedc { _bgbgg , _fdfd := _befc . getComposite ( _dfabg , _eeea ) ;
if _bgbgg == nil { continue ; } ; if _gbead { _ae . Printf ( "\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n" , _bdfdb , _adcf , _dfabg , _eeea , _bgfd ( _bgbgg . merge ( ) . text ( ) , 50 ) ) ; } ; _cbbb . putComposite ( _bdfdb , _adcf , _bgbgg , _fdfd ) ;
} ; } ; return & _cbbb ; } ; func ( _fdccb * textTable ) depth ( ) float64 { _faga := 1e10 ; for _addge := 0 ; _addge < _fdccb . _afcga ; _addge ++ { _cgbg := _fdccb . get ( _addge , 0 ) ; if _cgbg == nil || _cgbg . _egbea { continue ; } ; _faga = _ea . Min ( _faga , _cgbg . depth ( ) ) ; } ; return _faga ;
} ; func _eee ( _dgfag _g . Matrix ) _g . Point { _abf , _gaa := _dgfag . Translation ( ) ; return _g . Point { X : _abf , Y : _gaa } ; } ; func ( _gaffb * textTable ) compositeColCorridors ( ) map [ int ] [ ] float64 { _eaeg := make ( map [ int ] [ ] float64 , _gaffb . _afcga ) ; if _gbead { _fc . Log . Info ( "\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020" , _gaffb . _afcga ) ;
} ; for _deed := 0 ; _deed < _gaffb . _afcga ; _deed ++ { _eaeg [ _deed ] = nil ; } ; return _eaeg ; } ; func _faab ( _bagf _aec . PdfRectangle ) rulingKind { _aefd := _bagf . Width ( ) ; _acgec := _bagf . Height ( ) ; if _aefd > _acgec { if _aefd >= _ebede { return _bfgb ; } ; } else { if _acgec >= _ebede { return _eebe ;
} ; } ; return _cbfe ; } ; func _ggge ( _aebg * list , _gcgc * _bb . Builder , _edfe * string ) { _cfdg := _aeced ( _aebg , _edfe ) ; _gcgc . WriteString ( _cfdg ) ; for _ , _eacf := range _aebg . _bffd { _abfc := * _edfe + "\u0020\u0020\u0020" ; _ggge ( _eacf , _gcgc , & _abfc ) ; } ; } ; func _ecag ( _dggg float64 ) bool { return _ea . Abs ( _dggg ) < _cabc } ;
func _fegba ( _beac [ ] _bad . PdfObject ) ( _ceade , _ffaba float64 , _ggdgg error ) { if len ( _beac ) != 2 { return 0 , 0 , _ae . Errorf ( "\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064" , len ( _beac ) ) ;
} ; _faaab , _ggdgg := _bad . GetNumbersAsFloat ( _beac ) ; if _ggdgg != nil { return 0 , 0 , _ggdgg ; } ; return _faaab [ 0 ] , _faaab [ 1 ] , nil ; } ; func ( _fccg * PageFonts ) extractPageResourcesToFont ( _ffaa * _aec . PdfPageResources ) error { _bg , _ecc := _bad . GetDict ( _ffaa . Font ) ;
if ! _ecc { return _d . New ( _aea ) ; } ; for _ , _adg := range _bg . Keys ( ) { var ( _bac = true ; _eeg [ ] byte ; _dg string ; ) ; _ce , _aff := _ffaa . GetFontByName ( _adg ) ; if ! _aff { return _d . New ( _bf ) ; } ; _db , _cee := _aec . NewPdfFontFromPdfObject ( _ce ) ; if _cee != nil { return _cee ;
} ; _afe := _db . FontDescriptor ( ) ; _bba := _db . FontDescriptor ( ) . FontName . String ( ) ; _gab := _db . Subtype ( ) ; if _ggb ( _fccg . Fonts , _bba ) { continue ; } ; if len ( _db . ToUnicode ( ) ) == 0 { _bac = false ; } ; if _afe . FontFile != nil { if _ega , _gb := _bad . GetStream ( _afe . FontFile ) ;
_gb { _eeg , _cee = _bad . DecodeStream ( _ega ) ; if _cee != nil { return _cee ; } ; _dg = _bba + "\u002e\u0070\u0066\u0062" ; } ; } else if _afe . FontFile2 != nil { if _fd , _add := _bad . GetStream ( _afe . FontFile2 ) ; _add { _eeg , _cee = _bad . DecodeStream ( _fd ) ; if _cee != nil { return _cee ;
} ; _dg = _bba + "\u002e\u0074\u0074\u0066" ; } ; } else if _afe . FontFile3 != nil { if _beg , _ffg := _bad . GetStream ( _afe . FontFile3 ) ; _ffg { _eeg , _cee = _bad . DecodeStream ( _beg ) ; if _cee != nil { return _cee ; } ; _dg = _bba + "\u002e\u0063\u0066\u0066" ; } ; } ; if len ( _dg ) < 1 { _fc . Log . Debug ( _de ) ;
} ; _ebb := Font { FontName : _bba , PdfFont : _db , IsCID : _db . IsCID ( ) , IsSimple : _db . IsSimple ( ) , ToUnicode : _bac , FontType : _gab , FontData : _eeg , FontFileName : _dg , FontDescriptor : _afe } ; _fccg . Fonts = append ( _fccg . Fonts , _ebb ) ; } ; return nil ; } ; func _gabfb ( _fgbf _bad . PdfObject , _ebfe _eg . Color ) ( _ec . Image , error ) { _ddccc , _bbadd := _bad . GetStream ( _fgbf ) ;
if ! _bbadd { return nil , nil ; } ; _fcgd , _eccd := _aec . NewXObjectImageFromStream ( _ddccc ) ; if _eccd != nil { return nil , _eccd ; } ; _gcgeb , _eccd := _fcgd . ToImage ( ) ; if _eccd != nil { return nil , _eccd ; } ; return _cbace ( _gcgeb , _ebfe ) , nil ; } ; func ( _feba pathSection ) bbox ( ) _aec . PdfRectangle { _aacd := _feba . _gbag [ 0 ] . _acfg [ 0 ] ;
_cbfa := _aec . PdfRectangle { Llx : _aacd . X , Urx : _aacd . X , Lly : _aacd . Y , Ury : _aacd . Y } ; _fgdf := func ( _gefg _g . Point ) { if _gefg . X < _cbfa . Llx { _cbfa . Llx = _gefg . X ; } else if _gefg . X > _cbfa . Urx { _cbfa . Urx = _gefg . X ; } ; if _gefg . Y < _cbfa . Lly { _cbfa . Lly = _gefg . Y ;
} else if _gefg . Y > _cbfa . Ury { _cbfa . Ury = _gefg . Y ; } ; } ; for _ , _cgcg := range _feba . _gbag [ 0 ] . _acfg [ 1 : ] { _fgdf ( _cgcg ) ; } ; for _ , _fgfe := range _feba . _gbag [ 1 : ] { for _ , _ffda := range _fgfe . _acfg { _fgdf ( _ffda ) ; } ; } ; return _cbfa ; } ; func ( _afba * textObject ) setCharSpacing ( _gbf float64 ) { if _afba == nil { return ;
} ; _afba . _cbgd . _cga = _gbf ; if _eged { _fc . Log . Info ( "\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073" , _gbf , _afba . _cbgd . String ( ) ) ; } ; } ; func ( _ecbf TextTable ) getCellInfo ( _gcg TextMark ) [ ] [ ] int { for _daca , _cgebc := range _ecbf . Cells { for _egdg := range _cgebc { _gafe := & _cgebc [ _egdg ] . Marks ;
if _gafe . exists ( _gcg ) { return [ ] [ ] int { { _daca } , { _egdg } } ; } ; } ; } ; return nil ; } ; var _dfba = TextMark { Text : "\u005b\u0058\u005d" , Original : "\u0020" , Meta : true , FillColor : _eg . White , StrokeColor : _eg . White } ; type wordBag struct { _aec . PdfRectangle ; _aad float64 ;
_eddc , _gbfd rulingList ; _aeceg float64 ; _gbbd map [ int ] [ ] * textWord ; } ;
// String returns a string describing `tm`.
func ( _eag TextMark ) String ( ) string { _bfff := _eag . BBox ; var _cag string ; if _eag . Font != nil { _cag = _eag . Font . String ( ) ; if len ( _cag ) > 50 { _cag = _cag [ : 50 ] + "\u002e\u002e\u002e" ; } ; } ; var _fbdf string ; if _eag . Meta { _fbdf = "\u0020\u002a\u004d\u002a" ;
} ; return _ae . Sprintf ( "\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d" , _eag . Offset , _eag . Text , [ ] rune ( _eag . Text ) , _bfff . Llx , _bfff . Lly , _bfff . Urx , _bfff . Ury , _cag , _fbdf ) ;
} ; func ( _abag paraList ) lines ( ) [ ] * textLine { var _ecfdg [ ] * textLine ; for _ , _dgfc := range _abag { _ecfdg = append ( _ecfdg , _dgfc . _bfagf ... ) ; } ; return _ecfdg ; } ; type subpath struct { _acfg [ ] _g . Point ; _bbdf bool ; } ;
2024-02-11 21:29:32 +00:00
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
2024-03-27 22:34:33 +00:00
type RenderMode int ; func ( _baeb rulingList ) snapToGroups ( ) rulingList { _dfffg , _edba := _baeb . vertsHorzs ( ) ; if len ( _dfffg ) > 0 { _dfffg = _dfffg . snapToGroupsDirection ( ) ; } ; if len ( _edba ) > 0 { _edba = _edba . snapToGroupsDirection ( ) ; } ; _ffceb := append ( _dfffg , _edba ... ) ;
_ffceb . log ( "\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073" ) ; return _ffceb ; } ; func _fgeda ( _eagb [ ] * textLine , _eadg map [ float64 ] [ ] * textLine , _bdaf [ ] float64 , _abfdb int , _fegd , _cffb float64 ) [ ] * list { _ddbf := [ ] * list { } ; _ffac := _abfdb ;
_abfdb = _abfdb + 1 ; _aade := _bdaf [ _ffac ] ; _fbag := _eadg [ _aade ] ; _bdag := _eeef ( _fbag , _cffb , _fegd ) ; for _ageg , _fgedaf := range _bdag { var _eagg float64 ; _fegg := [ ] * list { } ; _cgfd := _fgedaf . _gaca ; _fagd := _cffb ; if _ageg < len ( _bdag ) - 1 { _fagd = _bdag [ _ageg + 1 ] . _gaca ;
} ; if _abfdb < len ( _bdaf ) { _fegg = _fgeda ( _eagb , _eadg , _bdaf , _abfdb , _cgfd , _fagd ) ; } ; _eagg = _fagd ; if len ( _fegg ) > 0 { _eegdb := _fegg [ 0 ] ; if len ( _eegdb . _gagag ) > 0 { _eagg = _eegdb . _gagag [ 0 ] . _gaca ; } ; } ; _gbda := [ ] * textLine { _fgedaf } ; _daab := _eagc ( _fgedaf , _eagb , _bdaf , _cgfd , _eagg ) ;
_gbda = append ( _gbda , _daab ... ) ; _gfca := _deec ( _gbda , "\u0062\u0075\u006c\u006c\u0065\u0074" , _fegg ) ; _gfca . _begg = _aadg ( _gbda , "" ) ; _ddbf = append ( _ddbf , _gfca ) ; } ; return _ddbf ; } ; const _cdab = 1.0 / 1000.0 ; type bounded interface { bbox ( ) _aec . PdfRectangle } ;
func ( _eddg * textObject ) setWordSpacing ( _aebf float64 ) { if _eddg == nil { return ; } ; _eddg . _cbgd . _ecd = _aebf ; } ; func ( _adba paraList ) applyTables ( _bebdbf [ ] * textTable ) paraList { var _fgef paraList ; for _ , _gbbccd := range _bebdbf { _fgef = append ( _fgef , _gbbccd . newTablePara ( ) ) ;
} ; for _ , _fffa := range _adba { if _fffa . _dcddf { continue ; } ; _fgef = append ( _fgef , _fffa ) ; } ; return _fgef ; } ; func ( _eaef * textObject ) moveText ( _aef , _gbg float64 ) { _eaef . moveLP ( _aef , _gbg ) } ;
2024-02-11 21:29:32 +00:00
2024-03-27 22:34:33 +00:00
// String returns a string describing the current state of the textState stack.
func ( _ab * stateStack ) String ( ) string { _ceb := [ ] string { _ae . Sprintf ( "\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064" , len ( * _ab ) ) } ; for _cgf , _begd := range * _ab { _dfdb := "\u003c\u006e\u0069l\u003e" ;
if _begd != nil { _dfdb = _begd . String ( ) ; } ; _ceb = append ( _ceb , _ae . Sprintf ( "\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073" , _cgf , _dfdb ) ) ; } ; return _bb . Join ( _ceb , "\u000a" ) ; } ; func _fgged ( _geggf , _cadfe int ) uint64 { return uint64 ( _geggf ) * 0x1000000 + uint64 ( _cadfe ) } ;
func _ceab ( _gdbgf , _dbfdc _g . Point ) bool { return _gdbgf . X == _dbfdc . X && _gdbgf . Y == _dbfdc . Y } ; func ( _gdabc gridTiling ) log ( _gbeg string ) { if ! _ddb { return ; } ; _fc . Log . Info ( "\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071" , len ( _gdabc . _fecg ) , len ( _gdabc . _eceeed ) , _gbeg ) ;
_ae . Printf ( "\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a" , _gdabc . _fecg ) ; _ae . Printf ( "\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a" , _gdabc . _eceeed ) ; for _befa , _dgec := range _gdabc . _eceeed { _fabc , _fffd := _gdabc . _bbfc [ _dgec ] ;
if ! _fffd { continue ; } ; _ae . Printf ( "%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , _befa , _dgec ) ; for _daee , _efba := range _gdabc . _fecg { _efcb , _ecff := _fabc [ _efba ] ; if ! _ecff { continue ; } ; _ae . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _daee , _efcb . String ( ) ) ;
} ; } ; } ; func _ffeed ( _gfdgc * _aec . Image , _cgag _eg . Color ) _ec . Image { _ebdbg , _eaega := int ( _gfdgc . Width ) , int ( _gfdgc . Height ) ; _aefbb := _ec . NewRGBA ( _ec . Rect ( 0 , 0 , _ebdbg , _eaega ) ) ; for _bfge := 0 ; _bfge < _eaega ; _bfge ++ { for _gcdaf := 0 ; _gcdaf < _ebdbg ;
_gcdaf ++ { _cdacb , _fgade := _gfdgc . ColorAt ( _gcdaf , _bfge ) ; if _fgade != nil { _fc . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063o\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0072\u0065\u0074\u0072\u0069\u0065v\u0065 \u0069\u006d\u0061\u0067\u0065\u0020m\u0061\u0073\u006b\u0020\u0076\u0061\u006cu\u0065\u0020\u0061\u0074\u0020\u0028\u0025\u0064\u002c\u0020\u0025\u0064\u0029\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006da\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063t\u002e" , _gcdaf , _bfge ) ;
continue ; } ; _babc , _dbdgc , _cgacg , _ := _cdacb . RGBA ( ) ; var _cbfbe _eg . Color ; if _babc + _dbdgc + _cgacg == 0 { _cbfbe = _eg . Transparent ; } else { _cbfbe = _cgag ; } ; _aefbb . Set ( _gcdaf , _bfge , _cbfbe ) ; } ; } ; return _aefbb ; } ; const _bfa = 20 ; func ( _ffe * textObject ) getCurrentFont ( ) * _aec . PdfFont { _bgda := _ffe . _cbgd . _cbad ;
if _bgda == nil { _fc . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e" ) ; return _aec . DefaultFont ( ) ;
} ; return _bgda ; } ; type rulingList [ ] * ruling ; func ( _cffba compositeCell ) String ( ) string { _eedd := "" ; if len ( _cffba . paraList ) > 0 { _eedd = _bgfd ( _cffba . paraList . merge ( ) . text ( ) , 50 ) ; } ; return _ae . Sprintf ( "\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071" , _cffba . PdfRectangle , len ( _cffba . paraList ) , _eedd ) ;
} ; func ( _dgeaa * textTable ) markCells ( ) { for _cdcbf := 0 ; _cdcbf < _dgeaa . _agdc ; _cdcbf ++ { for _abagd := 0 ; _abagd < _dgeaa . _afcga ; _abagd ++ { _bgga := _dgeaa . get ( _abagd , _cdcbf ) ; if _bgga != nil { _bgga . _dcddf = true ; } ; } ; } ; } ; func ( _egfc * textObject ) getFont ( _gaff string ) ( * _aec . PdfFont , error ) { if _egfc . _eegd . _fba != nil { _gegf , _afge := _egfc . getFontDict ( _gaff ) ;
if _afge != nil { _fc . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073" , _gaff , _afge . Error ( ) ) ; return nil , _afge ;
} ; _egfc . _eegd . _fec ++ ; _bfeg , _gcfd := _egfc . _eegd . _fba [ _gegf . String ( ) ] ; if _gcfd { _bfeg . _egfcg = _egfc . _eegd . _fec ; return _bfeg . _gfab , nil ; } ; } ; _efde , _bfb := _egfc . getFontDict ( _gaff ) ; if _bfb != nil { return nil , _bfb ; } ; _gee , _bfb := _egfc . getFontDirect ( _gaff ) ;
if _bfb != nil { return nil , _bfb ; } ; if _egfc . _eegd . _fba != nil { _dbda := fontEntry { _gee , _egfc . _eegd . _fec } ; if len ( _egfc . _eegd . _fba ) >= _gbgd { var _beeg [ ] string ; for _fede := range _egfc . _eegd . _fba { _beeg = append ( _beeg , _fede ) ; } ; _a . Slice ( _beeg , func ( _fdg , _begf int ) bool { return _egfc . _eegd . _fba [ _beeg [ _fdg ] ] . _egfcg < _egfc . _eegd . _fba [ _beeg [ _begf ] ] . _egfcg ;
} ) ; delete ( _egfc . _eegd . _fba , _beeg [ 0 ] ) ; } ; _egfc . _eegd . _fba [ _efde . String ( ) ] = _dbda ; } ; return _gee , nil ; } ; func _dcaa ( _gbgaa , _aegc * textPara ) bool { return _bae ( _gbgaa . _bbbc , _aegc . _bbbc ) } ; func _dbega ( _egdcb int , _fagg func ( int , int ) bool ) [ ] int { _cead := make ( [ ] int , _egdcb ) ;
for _gggb := range _cead { _cead [ _gggb ] = _gggb ; } ; _a . Slice ( _cead , func ( _dffc , _abfccd int ) bool { return _fagg ( _cead [ _dffc ] , _cead [ _abfccd ] ) } ) ; return _cead ; } ; func ( _bgbca compositeCell ) hasLines ( _gbcdc [ ] * textLine ) bool { for _cbabc , _cede := range _gbcdc { _eecb := _fbdc ( _bgbca . PdfRectangle , _cede . PdfRectangle ) ;
if _gbead { _ae . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a" , _eecb , _cbabc , len ( _gbcdc ) ) ; _ae . Printf ( "\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a" , _bgbca ) ;
_ae . Printf ( "\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a" , _cede ) ; } ; if _eecb { return true ; } ; } ; return false ; } ; func ( _dgae * textObject ) getFontDict ( _agc string ) ( _bbef _bad . PdfObject , _egfd error ) { _abab := _dgae . _bcg ;
if _abab == nil { _fc . Log . Debug ( "g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071" , _agc ) ; return nil , nil ; } ; _bbef , _ccfd := _abab . GetFontByName ( _bad . PdfObjectName ( _agc ) ) ;
if ! _ccfd { _fc . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071" , _agc ) ;
return nil , _d . New ( "f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073" ) ; } ; return _bbef , nil ; } ; func ( _cged * textObject ) setTextLeading ( _dba float64 ) { if _cged == nil { return ; } ; _cged . _cbgd . _gbff = _dba ;
} ; func _gcda ( _cgba * wordBag , _dgdb * textWord , _eeaca float64 ) bool { return _dgdb . Llx < _cgba . Urx + _eeaca && _cgba . Llx - _eeaca < _dgdb . Urx ; } ; func ( _gef * imageExtractContext ) extractXObjectImage ( _fdf * _bad . PdfObjectName , _gbd _ba . GraphicsState , _ffb * _aec . PdfPageResources ) error { _cda , _ := _ffb . GetXObjectByName ( * _fdf ) ;
if _cda == nil { return nil ; } ; _cge , _cbc := _gef . _cf [ _cda ] ; if ! _cbc { _egae , _fca := _ffb . GetXObjectImageByName ( * _fdf ) ; if _fca != nil { return _fca ; } ; if _egae == nil { return nil ; } ; _acb , _fca := _egae . ToImage ( ) ; if _fca != nil { return _fca ; } ; var _eaf _ec . Image ;
if _egae . Mask != nil { if _eaf , _fca = _gabfb ( _egae . Mask , _eg . Opaque ) ; _fca != nil { _fc . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a \u0063\u006f\u0075\u006c\u0064 \u006eo\u0074\u0020\u0067\u0065\u0074\u0020\u0065\u0078\u0070\u006c\u0069\u0063\u0069\u0074\u0020\u0069\u006d\u0061\u0067e\u0020\u006d\u0061\u0073\u006b\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006d\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" ) ;
} ; } else if _egae . SMask != nil { _eaf , _fca = _ddcae ( _egae . SMask , _eg . Opaque ) ; if _fca != nil { _fc . Log . Debug ( "W\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0067\u0065\u0074\u0020\u0073\u006f\u0066\u0074\u0020\u0069\u006da\u0067e\u0020\u006d\u0061\u0073k\u002e\u0020O\u0075\u0074\u0070\u0075\u0074\u0020\u006d\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063\u0074\u002e" ) ;
} ; } ; if _eaf != nil { _fad , _eggf := _acb . ToGoImage ( ) ; if _eggf != nil { return _eggf ; } ; _fad = _edacb ( _fad , _eaf ) ; switch _egae . ColorSpace . String ( ) { case "\u0044\u0065\u0076\u0069\u0063\u0065\u0047\u0072\u0061\u0079" , "\u0049n\u0064\u0065\u0078\u0065\u0064" : _acb , _eggf = _aec . ImageHandling . NewGrayImageFromGoImage ( _fad ) ;
if _eggf != nil { return _eggf ; } ; default : _acb , _eggf = _aec . ImageHandling . NewImageFromGoImage ( _fad ) ; if _eggf != nil { return _eggf ; } ; } ; } ; _cge = & cachedImage { _cb : _acb , _ecf : _egae . ColorSpace } ; _gef . _cf [ _cda ] = _cge ; } ; _fcd := _cge . _cb ; _cgec := _cge . _ecf ;
_gca , _cdc := _cgec . ImageToRGB ( * _fcd ) ; if _cdc != nil { return _cdc ; } ; _fc . Log . Debug ( "@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073" , _gbd . CTM . String ( ) ) ; _gcf := ImageMark { Image : & _gca , Width : _gbd . CTM . ScalingFactorX ( ) , Height : _gbd . CTM . ScalingFactorY ( ) , Angle : _gbd . CTM . Angle ( ) } ;
_gcf . X , _gcf . Y = _gbd . CTM . Translation ( ) ; _gef . _bff = append ( _gef . _bff , _gcf ) ; _gef . _cdb ++ ; return nil ; } ; func _ggabg ( _gbbac _aec . PdfRectangle ) * ruling { return & ruling { _egdf : _bfgb , _gbgc : _gbbac . Ury , _fgad : _gbbac . Llx , _ababc : _gbbac . Urx } ;
} ; func ( _bcca * shapesState ) addPoint ( _bcb , _dbcf float64 ) { _adgbc := _bcca . establishSubpath ( ) ; _ecac := _bcca . devicePoint ( _bcb , _dbcf ) ; if _adgbc == nil { _bcca . _effc = true ; _bcca . _cgbcf = _ecac ; } else { _adgbc . add ( _ecac ) ; } ; } ; func _fbacb ( _gaaag int , _gbdd map [ int ] [ ] float64 ) ( [ ] int , int ) { _bgfb := make ( [ ] int , _gaaag ) ;
_dgfg := 0 ; for _acgag := 0 ; _acgag < _gaaag ; _acgag ++ { _bgfb [ _acgag ] = _dgfg ; _dgfg += len ( _gbdd [ _acgag ] ) + 1 ; } ; return _bgfb , _dgfg ; } ; func ( _dceb * subpath ) close ( ) { if ! _ceab ( _dceb . _acfg [ 0 ] , _dceb . last ( ) ) { _dceb . add ( _dceb . _acfg [ 0 ] ) ; } ; _dceb . _bbdf = true ;
_dceb . removeDuplicates ( ) ; } ; func ( _gcfe * textObject ) renderText ( _gfa _bad . PdfObject , _fbaa [ ] byte , _dgfa int ) error { if _gcfe . _bbcdf { _fc . Log . Debug ( "\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e" ) ;
return nil ; } ; _gcef := _gcfe . getCurrentFont ( ) ; _cbb := _gcef . BytesToCharcodes ( _fbaa ) ; _ebad , _aage , _feea := _gcef . CharcodesToStrings ( _cbb ) ; if _feea > 0 { _fc . Log . Debug ( "\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064" , _aage , _feea ) ;
} ; _gcfe . _cbgd . _cgee += _aage ; _gcfe . _cbgd . _aece += _feea ; _gbde := _gcfe . _cbgd ; _cdbfc := _gbde . _dgad ; _ggbc := _gbde . _aafe / 100.0 ; _acff := _cdab ; if _gcef . Subtype ( ) == "\u0054\u0079\u0070e\u0033" { _acff = 1 ; } ; _egac , _fccc := _gcef . GetRuneMetrics ( ' ' ) ;
if ! _fccc { _egac , _fccc = _gcef . GetCharMetrics ( 32 ) ; } ; if ! _fccc { _egac , _ = _aec . DefaultFont ( ) . GetRuneMetrics ( ' ' ) ; } ; _cbgc := _egac . Wx * _acff ; _fc . Log . Trace ( "\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066" , _cbgc , _ebad , _gcef , _cdbfc ) ;
_dcbd := _g . NewMatrix ( _cdbfc * _ggbc , 0 , 0 , _cdbfc , 0 , _gbde . _egc ) ; if _eged { _fc . Log . Info ( "\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071" , len ( _cbb ) , _cbb , _ebad ) ;
} ; _fc . Log . Trace ( "\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071" , len ( _cbb ) , _cbb , len ( _ebad ) ) ; _cdccc := _gcfe . getFillColor ( ) ;
_gde := _gcfe . getStrokeColor ( ) ; for _dbbf , _gdae := range _ebad { _gdce := [ ] rune ( _gdae ) ; if len ( _gdce ) == 1 && _gdce [ 0 ] == '\x00' { continue ; } ; _fbb := _cbb [ _dbbf ] ; _dbfd := _gcfe . _ggd . CTM . Mult ( _gcfe . _acc ) . Mult ( _dcbd ) ; _gefb := 0.0 ; if len ( _gdce ) == 1 && _gdce [ 0 ] == 32 { _gefb = _gbde . _ecd ;
} ; _afab , _abd := _gcef . GetCharMetrics ( _fbb ) ; if ! _abd { _fc . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073" , _fbb , _gdce , _gdce , _gcef ) ;
return _ae . Errorf ( "\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064" , _gcef . String ( ) , _fbb ) ; } ; _eafg := _g . Point { X : _afab . Wx * _acff , Y : _afab . Wy * _acff } ;
_fadb := _g . Point { X : ( _eafg . X * _cdbfc + _gefb ) * _ggbc } ; _efd := _g . Point { X : ( _eafg . X * _cdbfc + _gbde . _cga + _gefb ) * _ggbc } ; if _eged { _fc . Log . Info ( "\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066" , _cdbfc , _gbde . _cga , _gbde . _ecd , _ggbc ) ;
_fc . Log . Info ( "\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f" , _eafg , _fadb , _efd ) ; } ; _dbgcb := _egf ( _fadb ) ; _ecfd := _egf ( _efd ) ; _fadf := _gcfe . _ggd . CTM . Mult ( _gcfe . _acc ) . Mult ( _dbgcb ) ;
if _egdce { _fc . Log . Info ( "e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a" + "\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a" + "\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073" , _gcfe . _ggd . CTM , _gcfe . _acc , _ecfd , _eee ( _gcfe . _ggd . CTM . Mult ( _gcfe . _acc ) . Mult ( _ecfd ) ) , _dbgcb , _fadf , _eee ( _fadf ) ) ;
} ; _fcdfc , _accb := _gcfe . newTextMark ( _bbg . ExpandLigatures ( _gdce ) , _dbfd , _eee ( _fadf ) , _ea . Abs ( _cbgc * _dbfd . ScalingFactorX ( ) ) , _gcef , _gcfe . _cbgd . _cga , _cdccc , _gde , _gfa , _ebad , _dbbf , _dgfa ) ; if ! _accb { _fc . Log . Debug ( "\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067" ) ;
continue ; } ; if _gcef == nil { _fc . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e" ) ; } else if _gcef . Encoder ( ) == nil { _fc . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073" , _gcef ) ;
} else { if _bacf , _efc := _gcef . Encoder ( ) . CharcodeToRune ( _fbb ) ; _efc { _fcdfc . _dcff = string ( _bacf ) ; } ; } ; _fc . Log . Trace ( "i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073" , _dbbf , _fbb , _fcdfc , _dbfd ) ;
_gcfe . _fcee = append ( _gcfe . _fcee , & _fcdfc ) ; _gcfe . _acc . Concat ( _ecfd ) ; } ; return nil ; } ; func ( _bfga * textPara ) fontsize ( ) float64 { return _bfga . _bfagf [ 0 ] . _fgcb } ; func ( _fbed * textObject ) showTextAdjusted ( _bbbb * _bad . PdfObjectArray , _eabea int ) error { _dda := false ;
for _ , _ceg := range _bbbb . Elements ( ) { switch _ceg . ( type ) { case * _bad . PdfObjectFloat , * _bad . PdfObjectInteger : _dee , _ddg := _bad . GetNumberAsFloat ( _ceg ) ; if _ddg != nil { _fc . Log . Debug ( "\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _ceg , _bbbb ) ;
return _ddg ; } ; _cfad , _fgf := - _dee * 0.001 * _fbed . _cbgd . _dgad , 0.0 ; if _dda { _fgf , _cfad = _cfad , _fgf ; } ; _fdcb := _egf ( _g . Point { X : _cfad , Y : _fgf } ) ; _fbed . _acc . Concat ( _fdcb ) ; case * _bad . PdfObjectString : _bed := _bad . TraceToDirectObject ( _ceg ) ; _adga , _cgcd := _bad . GetStringBytes ( _bed ) ;
if ! _cgcd { _fc . Log . Trace ( "s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _ceg , _bbbb ) ;
return _bad . ErrTypeError ; } ; _fbed . renderText ( _bed , _adga , _eabea ) ; default : _fc . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _ceg , _bbbb ) ;
return _bad . ErrTypeError ; } ; } ; return nil ; } ; func ( _fbgga * textTable ) compositeRowCorridors ( ) map [ int ] [ ] float64 { _egaa := make ( map [ int ] [ ] float64 , _fbgga . _agdc ) ; if _gbead { _fc . Log . Info ( "c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064" , _fbgga . _agdc ) ;
} ; for _aggga := 1 ; _aggga < _fbgga . _agdc ; _aggga ++ { var _eaefa [ ] compositeCell ; for _bdcg := 0 ; _bdcg < _fbgga . _afcga ; _bdcg ++ { if _gbagc , _dedfb := _fbgga . _gaeb [ _fgged ( _bdcg , _aggga ) ] ; _dedfb { _eaefa = append ( _eaefa , _gbagc ) ; } ; } ; if len ( _eaefa ) == 0 { continue ;
} ; _ccgc := _bbbba ( _eaefa ) ; _egaa [ _aggga ] = _ccgc ; if _gbead { _ae . Printf ( "\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a" , _aggga , _ccgc ) ; } ; } ; return _egaa ; } ; type textResult struct { _cgeb PageText ; _eefd int ; _cdbf int ;
} ; func _efda ( _ggdgea [ ] pathSection ) rulingList { _aeefg ( _ggdgea ) ; if _eceg { _fc . Log . Info ( "\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073" , len ( _ggdgea ) ) ;
} ; var _fcccg rulingList ; for _ , _dccf := range _ggdgea { for _ , _eebd := range _dccf . _gbag { if len ( _eebd . _acfg ) < 2 { continue ; } ; _bbde := _eebd . _acfg [ 0 ] ; for _ , _fecfg := range _eebd . _acfg [ 1 : ] { if _efge , _ddag := _dgde ( _bbde , _fecfg , _dccf . Color ) ; _ddag { _fcccg = append ( _fcccg , _efge ) ;
} ; _bbde = _fecfg ; } ; } ; } ; if _eceg { _fc . Log . Info ( "m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073" , _fcccg ) ; } ; return _fcccg ; } ; type paraList [ ] * textPara ; func _fcea ( _eaadd map [ int ] intSet ) [ ] int { _egdbb := make ( [ ] int , 0 , len ( _eaadd ) ) ;
for _bgfg := range _eaadd { _egdbb = append ( _egdbb , _bgfg ) ; } ; _a . Ints ( _egdbb ) ; return _egdbb ; } ; func ( _eeff * Extractor ) extractPageText ( _dcd string , _gdc * _aec . PdfPageResources , _dbg _g . Matrix , _caba int ) ( * PageText , int , int , error ) { _fc . Log . Trace ( "\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d" , _caba ) ;
_bcf := & PageText { _babf : _eeff . _c , _cegb : _eeff . _ga , _dfgd : _eeff . _bd } ; _cc := _gbeb ( _eeff . _c ) ; var _dab stateStack ; _eda := _afgg ( _eeff , _gdc , _ba . GraphicsState { } , & _cc , & _dab ) ; _dea := shapesState { _gdfbg : _dbg , _affc : _g . IdentityMatrix ( ) , _bbdg : _eda } ;
var _baa bool ; _gaf := - 1 ; if _caba > _bfa { _geg := _d . New ( "\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077" ) ; _fc . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076" , _caba , _geg ) ;
return _bcf , _cc . _cgee , _cc . _aece , _geg ; } ; _gea := _ba . NewContentStreamParser ( _dcd ) ; _fcf , _dcg := _gea . Parse ( ) ; if _dcg != nil { _fc . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _dcg ) ;
return _bcf , _cc . _cgee , _cc . _aece , _dcg ; } ; _bcf . _cafg = _fcf ; _agf := _ba . NewContentStreamProcessor ( * _fcf ) ; _agf . AddHandler ( _ba . HandlerConditionEnumAllOperands , "" , func ( _ebbf * _ba . ContentStreamOperation , _faa _ba . GraphicsState , _edd * _aec . PdfPageResources ) error { _dd := _ebbf . Operand ;
if _ecab { _fc . Log . Info ( "\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s" , _ebbf ) ; } ; switch _dd { case "\u0071" : if _bcge { _fc . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _dea . _affc ) ; } ; _dab . push ( & _cc ) ; case "\u0051" : if ! _dab . empty ( ) { _cc = * _dab . pop ( ) ;
} ; _dea . _affc = _faa . CTM ; if _bcge { _fc . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _dea . _affc ) ; } ; case "\u0042\u0044\u0043" : _efa , _bdg := _bad . GetDict ( _ebbf . Params [ 1 ] ) ; if ! _bdg { _fc . Log . Debug ( "\u0045\u0052\u0052O\u0052\u003a\u0020\u0042D\u0043\u0020\u006f\u0070\u003d\u0025\u0073 \u0047\u0065\u0074\u0044\u0069\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _ebbf ) ;
return _dcg ; } ; _ffcf := _efa . Get ( "\u004d\u0043\u0049\u0044" ) ; if _ffcf != nil { _cfd , _fbd := _bad . GetIntVal ( _ffcf ) ; if ! _fbd { _fc . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u0042\u0044C\u0020\u006f\u0070=\u0025\u0073\u002e\u0020\u0042\u0061\u0064\u0020\u006eum\u0065\u0072\u0069c\u0061\u006c \u006f\u0062\u006a\u0065\u0063\u0074.\u0020\u006f=\u0025\u0073" , _ebbf , _ffcf ) ;
} ; _gaf = _cfd ; } else { _gaf = - 1 ; } ; case "\u0045\u004d\u0043" : _gaf = - 1 ; case "\u0042\u0054" : if _baa { _fc . Log . Debug ( "\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074" ) ;
_bcf . _dbfe = append ( _bcf . _dbfe , _eda . _fcee ... ) ; } ; _baa = true ; _bab := _faa ; _bab . CTM = _dbg . Mult ( _bab . CTM ) ; _eda = _afgg ( _eeff , _edd , _bab , & _cc , & _dab ) ; _dea . _bbdg = _eda ; case "\u0045\u0054" : if ! _baa { _fc . Log . Debug ( "\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074" ) ;
} ; _baa = false ; _bcf . _dbfe = append ( _bcf . _dbfe , _eda . _fcee ... ) ; _eda . reset ( ) ; case "\u0054\u002a" : _eda . nextLine ( ) ; case "\u0054\u0064" : if _fcdd , _fbdb := _eda . checkOp ( _ebbf , 2 , true ) ; ! _fcdd { _fc . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fbdb ) ;
return _fbdb ; } ; _gafa , _bgd , _dfg := _fegba ( _ebbf . Params ) ; if _dfg != nil { return _dfg ; } ; _eda . moveText ( _gafa , _bgd ) ; case "\u0054\u0044" : if _bgaf , _gac := _eda . checkOp ( _ebbf , 2 , true ) ; ! _bgaf { _fc . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gac ) ;
return _gac ; } ; _dgge , _ecg , _cgc := _fegba ( _ebbf . Params ) ; if _cgc != nil { _fc . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _cgc ) ; return _cgc ; } ; _eda . moveTextSetLeading ( _dgge , _ecg ) ; case "\u0054\u006a" : if _fdb , _afg := _eda . checkOp ( _ebbf , 1 , true ) ;
! _fdb { _fc . Log . Debug ( "\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076" , _ebbf , _afg ) ; return _afg ; } ; _edgc := _bad . TraceToDirectObject ( _ebbf . Params [ 0 ] ) ; _aae , _gge := _bad . GetStringBytes ( _edgc ) ;
if ! _gge { _fc . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064" , _ebbf ) ; return _bad . ErrTypeError ;
} ; return _eda . showText ( _edgc , _aae , _gaf ) ; case "\u0054\u004a" : if _cafd , _cgbc := _eda . checkOp ( _ebbf , 1 , true ) ; ! _cafd { _fc . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _cgbc ) ; return _cgbc ; } ;
_gec , _cff := _bad . GetArray ( _ebbf . Params [ 0 ] ) ; if ! _cff { _fc . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _ebbf ) ;
return _dcg ; } ; return _eda . showTextAdjusted ( _gec , _gaf ) ; case "\u0027" : if _ccf , _bfg := _eda . checkOp ( _ebbf , 1 , true ) ; ! _ccf { _fc . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bfg ) ; return _bfg ; } ; _baab := _bad . TraceToDirectObject ( _ebbf . Params [ 0 ] ) ;
_dgf , _fbeb := _bad . GetStringBytes ( _baab ) ; if ! _fbeb { _fc . Log . Debug ( "\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064" , _ebbf ) ; return _bad . ErrTypeError ;
} ; _eda . nextLine ( ) ; return _eda . showText ( _baab , _dgf , _gaf ) ; case "\u0022" : if _dgab , _cfa := _eda . checkOp ( _ebbf , 3 , true ) ; ! _dgab { _fc . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _cfa ) ; return _cfa ;
} ; _dff , _afc , _dcf := _fegba ( _ebbf . Params [ : 2 ] ) ; if _dcf != nil { return _dcf ; } ; _edgd := _bad . TraceToDirectObject ( _ebbf . Params [ 2 ] ) ; _dcde , _ggba := _bad . GetStringBytes ( _edgd ) ; if ! _ggba { _fc . Log . Debug ( "\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064" , _ebbf ) ;
return _bad . ErrTypeError ; } ; _eda . setCharSpacing ( _dff ) ; _eda . setWordSpacing ( _afc ) ; _eda . nextLine ( ) ; return _eda . showText ( _edgd , _dcde , _gaf ) ; case "\u0054\u004c" : _bbb , _ceed := _cffd ( _ebbf ) ; if _ceed != nil { _fc . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ceed ) ;
return _ceed ; } ; _eda . setTextLeading ( _bbb ) ; case "\u0054\u0063" : _bag , _bcfg := _cffd ( _ebbf ) ; if _bcfg != nil { _fc . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bcfg ) ; return _bcfg ; } ; _eda . setCharSpacing ( _bag ) ;
case "\u0054\u0066" : if _aaed , _afag := _eda . checkOp ( _ebbf , 2 , true ) ; ! _aaed { _fc . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _afag ) ; return _afag ; } ; _ccfc , _bee := _bad . GetNameVal ( _ebbf . Params [ 0 ] ) ;
if ! _bee { _fc . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064" , _ebbf ) ; return _bad . ErrTypeError ; } ; _acg , _bcd := _bad . GetNumberAsFloat ( _ebbf . Params [ 1 ] ) ;
if ! _bee { _fc . Log . Debug ( "\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ebbf , _bcd ) ;
return _bcd ; } ; _bcd = _eda . setFont ( _ccfc , _acg ) ; _eda . _bbcdf = _d . Is ( _bcd , _bad . ErrNotSupported ) ; if _bcd != nil && ! _eda . _bbcdf { return _bcd ; } ; case "\u0054\u006d" : if _bfaa , _fge := _eda . checkOp ( _ebbf , 6 , true ) ; ! _bfaa { _fc . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fge ) ;
return _fge ; } ; _gbe , _bdd := _bad . GetNumbersAsFloat ( _ebbf . Params ) ; if _bdd != nil { _fc . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bdd ) ; return _bdd ; } ; _eda . setTextMatrix ( _gbe ) ; case "\u0054\u0072" : if _gbc , _dag := _eda . checkOp ( _ebbf , 1 , true ) ;
! _gbc { _fc . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _dag ) ; return _dag ; } ; _eddd , _adf := _bad . GetIntVal ( _ebbf . Params [ 0 ] ) ; if ! _adf { _fc . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _ebbf ) ;
return _bad . ErrTypeError ; } ; _eda . setTextRenderMode ( _eddd ) ; case "\u0054\u0073" : if _dagc , _fdfa := _eda . checkOp ( _ebbf , 1 , true ) ; ! _dagc { _fc . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fdfa ) ;
return _fdfa ; } ; _fgg , _dbd := _bad . GetNumberAsFloat ( _ebbf . Params [ 0 ] ) ; if _dbd != nil { _fc . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _dbd ) ; return _dbd ; } ; _eda . setTextRise ( _fgg ) ; case "\u0054\u0077" : if _eccg , _adec := _eda . checkOp ( _ebbf , 1 , true ) ;
! _eccg { _fc . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _adec ) ; return _adec ; } ; _dfde , _bgbe := _bad . GetNumberAsFloat ( _ebbf . Params [ 0 ] ) ; if _bgbe != nil { _fc . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bgbe ) ;
return _bgbe ; } ; _eda . setWordSpacing ( _dfde ) ; case "\u0054\u007a" : if _acga , _gfdd := _eda . checkOp ( _ebbf , 1 , true ) ; ! _acga { _fc . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gfdd ) ; return _gfdd ; } ; _baad , _eggfb := _bad . GetNumberAsFloat ( _ebbf . Params [ 0 ] ) ;
if _eggfb != nil { _fc . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _eggfb ) ; return _eggfb ; } ; _eda . setHorizScaling ( _baad ) ; case "\u0063\u006d" : _dea . _affc = _faa . CTM ; if _dea . _affc . Singular ( ) { _aebe := _g . IdentityMatrix ( ) . Translate ( _dea . _affc . Translation ( ) ) ;
_fc . Log . Debug ( "S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s" , _dea . _affc , _aebe ) ; _dea . _affc = _aebe ; } ; if _bcge { _fc . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _dea . _affc ) ; } ; case "\u006d" : if len ( _ebbf . Params ) != 2 { _fc . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" , _ac ) ;
return nil ; } ; _ebg , _gcaf := _bad . GetNumbersAsFloat ( _ebbf . Params ) ; if _gcaf != nil { return _gcaf ; } ; _dea . moveTo ( _ebg [ 0 ] , _ebg [ 1 ] ) ; case "\u006c" : if len ( _ebbf . Params ) != 2 { _fc . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" , _ac ) ;
return nil ; } ; _ace , _fea := _bad . GetNumbersAsFloat ( _ebbf . Params ) ; if _fea != nil { return _fea ; } ; _dea . lineTo ( _ace [ 0 ] , _ace [ 1 ] ) ; case "\u0063" : if len ( _ebbf . Params ) != 6 { return _ac ; } ; _ccg , _bged := _bad . GetNumbersAsFloat ( _ebbf . Params ) ; if _bged != nil { return _bged ;
} ; _fc . Log . Debug ( "\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f" , _ccg ) ; _dea . cubicTo ( _ccg [ 0 ] , _ccg [ 1 ] , _ccg [ 2 ] , _ccg [ 3 ] , _ccg [ 4 ] , _ccg [ 5 ] ) ; case "\u0076" , "\u0079" : if len ( _ebbf . Params ) != 4 { return _ac ;
} ; _efb , _dcb := _bad . GetNumbersAsFloat ( _ebbf . Params ) ; if _dcb != nil { return _dcb ; } ; _fc . Log . Debug ( "\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f" , _efb ) ; _dea . quadraticTo ( _efb [ 0 ] , _efb [ 1 ] , _efb [ 2 ] , _efb [ 3 ] ) ;
case "\u0068" : _dea . closePath ( ) ; case "\u0072\u0065" : if len ( _ebbf . Params ) != 4 { return _ac ; } ; _ded , _cba := _bad . GetNumbersAsFloat ( _ebbf . Params ) ; if _cba != nil { return _cba ; } ; _dea . drawRectangle ( _ded [ 0 ] , _ded [ 1 ] , _ded [ 2 ] , _ded [ 3 ] ) ; _dea . closePath ( ) ;
case "\u0053" : _dea . stroke ( & _bcf . _afgd ) ; _dea . clearPath ( ) ; case "\u0073" : _dea . closePath ( ) ; _dea . stroke ( & _bcf . _afgd ) ; _dea . clearPath ( ) ; case "\u0046" : _dea . fill ( & _bcf . _ddae ) ; _dea . clearPath ( ) ; case "\u0066" , "\u0066\u002a" : _dea . closePath ( ) ; _dea . fill ( & _bcf . _ddae ) ;
_dea . clearPath ( ) ; case "\u0042" , "\u0042\u002a" : _dea . fill ( & _bcf . _ddae ) ; _dea . stroke ( & _bcf . _afgd ) ; _dea . clearPath ( ) ; case "\u0062" , "\u0062\u002a" : _dea . closePath ( ) ; _dea . fill ( & _bcf . _ddae ) ; _dea . stroke ( & _bcf . _afgd ) ; _dea . clearPath ( ) ; case "\u006e" : _dea . clearPath ( ) ;
case "\u0044\u006f" : if len ( _ebbf . Params ) == 0 { _fc . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e" , _ebbf . Params ) ;
return _bad . ErrRangeError ; } ; _afde , _acgg := _bad . GetName ( _ebbf . Params [ 0 ] ) ; if ! _acgg { _fc . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e" , _ebbf . Params [ 0 ] ) ;
return _bad . ErrTypeError ; } ; _ , _feb := _edd . GetXObjectByName ( * _afde ) ; if _feb != _aec . XObjectTypeForm { break ; } ; _adgc , _acgg := _eeff . _gfe [ _afde . String ( ) ] ; if ! _acgg { _dbb , _aebb := _edd . GetXObjectFormByName ( * _afde ) ; if _aebb != nil { _fc . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _aebb ) ;
return _aebb ; } ; _fcac , _aebb := _dbb . GetContentStream ( ) ; if _aebb != nil { _fc . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _aebb ) ; return _aebb ; } ; _gff := _dbb . Resources ; if _gff == nil { _gff = _edd ; } ; _fae := _faa . CTM ; if _dfdc , _dgag := _bad . GetArray ( _dbb . Matrix ) ;
_dgag { _ebc , _gdd := _dfdc . GetAsFloat64Slice ( ) ; if _gdd != nil { return _gdd ; } ; if len ( _ebc ) != 6 { return _ac ; } ; _gbcd := _g . NewMatrix ( _ebc [ 0 ] , _ebc [ 1 ] , _ebc [ 2 ] , _ebc [ 3 ] , _ebc [ 4 ] , _ebc [ 5 ] ) ; _fae = _faa . CTM . Mult ( _gbcd ) ; } ; _dedc , _ege , _bgfc , _aebb := _eeff . extractPageText ( string ( _fcac ) , _gff , _dbg . Mult ( _fae ) , _caba + 1 ) ;
if _aebb != nil { _fc . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _aebb ) ; return _aebb ; } ; _adgc = textResult { * _dedc , _ege , _bgfc } ; _eeff . _gfe [ _afde . String ( ) ] = _adgc ; } ; _dea . _affc = _faa . CTM ; if _bcge { _fc . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _dea . _affc ) ;
} ; _bcf . _dbfe = append ( _bcf . _dbfe , _adgc . _cgeb . _dbfe ... ) ; _bcf . _afgd = append ( _bcf . _afgd , _adgc . _cgeb . _afgd ... ) ; _bcf . _ddae = append ( _bcf . _ddae , _adgc . _cgeb . _ddae ... ) ; _cc . _cgee += _adgc . _eefd ; _cc . _aece += _adgc . _cdbf ; case "\u0072\u0067" , "\u0067" , "\u006b" , "\u0063\u0073" , "\u0073\u0063" , "\u0073\u0063\u006e" : _eda . _ggd . ColorspaceNonStroking = _faa . ColorspaceNonStroking ;
_eda . _ggd . ColorNonStroking = _faa . ColorNonStroking ; case "\u0052\u0047" , "\u0047" , "\u004b" , "\u0043\u0053" , "\u0053\u0043" , "\u0053\u0043\u004e" : _eda . _ggd . ColorspaceStroking = _faa . ColorspaceStroking ; _eda . _ggd . ColorStroking = _faa . ColorStroking ; } ; return nil ;
} ) ; _dcg = _agf . Process ( _gdc ) ; return _bcf , _cc . _cgee , _cc . _aece , _dcg ; } ; type imageExtractContext struct { _bff [ ] ImageMark ; _aa int ; _cdb int ; _bgb int ; _cf map [ * _bad . PdfObjectStream ] * cachedImage ; _gfc * ImageExtractOptions ; _ece bool ; } ;
// Text returns the extracted page text.
func ( _fcgaf PageText ) Text ( ) string { return _fcgaf . _eede } ; func _gffa ( _edac [ ] TextMark , _fafda * int ) [ ] TextMark { _aacgc := _edac [ len ( _edac ) - 1 ] ; _aggf := [ ] rune ( _aacgc . Text ) ; if len ( _aggf ) == 1 { _edac = _edac [ : len ( _edac ) - 1 ] ; _fgbd := _edac [ len ( _edac ) - 1 ] ;
* _fafda = _fgbd . Offset + len ( _fgbd . Text ) ; } else { _gddc := _gegca ( _aacgc . Text ) ; * _fafda += len ( _gddc ) - len ( _aacgc . Text ) ; _aacgc . Text = _gddc ; } ; return _edac ; } ; func _dbeg ( _ffabf map [ int ] [ ] float64 ) string { _caabb := _gged ( _ffabf ) ; _adfg := make ( [ ] string , len ( _ffabf ) ) ;
for _gegec , _edceb := range _caabb { _adfg [ _gegec ] = _ae . Sprintf ( "\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066" , _edceb , _ffabf [ _edceb ] ) ; } ; return _ae . Sprintf ( "\u007b\u0025\u0073\u007d" , _bb . Join ( _adfg , "\u002c\u0020" ) ) ; } ; func ( _gedcf rulingList ) log ( _acgcf string ) { if ! _eceg { return ;
} ; _fc . Log . Info ( "\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073" , _acgcf , _gedcf . String ( ) ) ; for _fcef , _fcbc := range _gedcf { _ae . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _fcef , _fcbc . String ( ) ) ;
} ; } ; func ( _cggee * textTable ) putComposite ( _ccebe , _decbf int , _efdgde paraList , _dacd _aec . PdfRectangle ) { if len ( _efdgde ) == 0 { _fc . Log . Error ( "\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073" ) ;
return ; } ; _cdbabb := compositeCell { PdfRectangle : _dacd , paraList : _efdgde } ; if _gbead { _ae . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a" , _ccebe , _decbf , _cdbabb . String ( ) ) ;
} ; _cdbabb . updateBBox ( ) ; _cggee . _gaeb [ _fgged ( _ccebe , _decbf ) ] = _cdbabb ; } ; func ( _cbcf * textTable ) toTextTable ( ) TextTable { if _gbead { _fc . Log . Info ( "t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064" , _cbcf . _afcga , _cbcf . _agdc ) ;
} ; _gecff := make ( [ ] [ ] TableCell , _cbcf . _agdc ) ; for _beece := 0 ; _beece < _cbcf . _agdc ; _beece ++ { _gecff [ _beece ] = make ( [ ] TableCell , _cbcf . _afcga ) ; for _ecfdd := 0 ; _ecfdd < _cbcf . _afcga ; _ecfdd ++ { _dfad := _cbcf . get ( _ecfdd , _beece ) ; if _dfad == nil { continue ;
} ; if _gbead { _ae . Printf ( "\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a" , _ecfdd , _beece , _dfad ) ; } ; _gecff [ _beece ] [ _ecfdd ] . Text = _dfad . text ( ) ; _fgfb := 0 ; _gecff [ _beece ] [ _ecfdd ] . Marks . _bade = _dfad . toTextMarks ( & _fgfb ) ; } ; } ;
_fegeg := TextTable { W : _cbcf . _afcga , H : _cbcf . _agdc , Cells : _gecff } ; _fegeg . PdfRectangle = _cbcf . bbox ( ) ; return _fegeg ; } ; func ( _cagg paraList ) yNeighbours ( _geaea float64 ) map [ * textPara ] [ ] int { _aceg := make ( [ ] event , 2 * len ( _cagg ) ) ; if _geaea == 0 { for _affce , _eefeb := range _cagg { _aceg [ 2 * _affce ] = event { _eefeb . Lly , true , _affce } ;
_aceg [ 2 * _affce + 1 ] = event { _eefeb . Ury , false , _affce } ; } ; } else { for _effg , _cdggd := range _cagg { _aceg [ 2 * _effg ] = event { _cdggd . Lly - _geaea * _cdggd . fontsize ( ) , true , _effg } ; _aceg [ 2 * _effg + 1 ] = event { _cdggd . Ury + _geaea * _cdggd . fontsize ( ) , false , _effg } ;
} ; } ; return _cagg . eventNeighbours ( _aceg ) ; } ; func _eagc ( _ffgf * textLine , _adabc [ ] * textLine , _eccga [ ] float64 , _fafe , _ecaef float64 ) [ ] * textLine { _bacd := [ ] * textLine { } ; for _ , _ggfb := range _adabc { if _ggfb . _gaca >= _fafe { if _ecaef != - 1 && _ggfb . _gaca < _ecaef { if _ggfb . text ( ) != _ffgf . text ( ) { if _ea . Round ( _ggfb . Llx ) < _ea . Round ( _ffgf . Llx ) { break ;
} ; _bacd = append ( _bacd , _ggfb ) ; } ; } else if _ecaef == - 1 { if _ggfb . _gaca == _ffgf . _gaca { if _ggfb . text ( ) != _ffgf . text ( ) { _bacd = append ( _bacd , _ggfb ) ; } ; continue ; } ; _gcce := _faadf ( _ffgf , _adabc , _eccga ) ; if _gcce != - 1 && _ggfb . _gaca <= _gcce { _bacd = append ( _bacd , _ggfb ) ;
} ; } ; } ; } ; return _bacd ; } ; func ( _aege * wordBag ) depthRange ( _aadb , _gbbc int ) [ ] int { var _abbg [ ] int ; for _fbae := range _aege . _gbbd { if _aadb <= _fbae && _fbae <= _gbbc { _abbg = append ( _abbg , _fbae ) ; } ; } ; if len ( _abbg ) == 0 { return nil ; } ; _a . Ints ( _abbg ) ; return _abbg ;
} ; func ( _dcaf * wordBag ) firstReadingIndex ( _abbd int ) int { _fafd := _dcaf . firstWord ( _abbd ) . _aeegf ; _ccbb := float64 ( _abbd + 1 ) * _fdedc ; _ffdaf := _ccbb + _dgce * _fafd ; _aagfd := _abbd ; for _ , _cdf := range _dcaf . depthBand ( _ccbb , _ffdaf ) { if _bcea ( _dcaf . firstWord ( _cdf ) , _dcaf . firstWord ( _aagfd ) ) < 0 { _aagfd = _cdf ;
} ; } ; return _aagfd ; } ; func ( _bcadd paraList ) tables ( ) [ ] TextTable { var _adfdc [ ] TextTable ; if _gbead { _fc . Log . Info ( "\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a" ) ; } ; for _ , _dcdc := range _bcadd { _cfdf := _dcdc . _caaa ;
if _cfdf != nil && _cfdf . isExportable ( ) { _adfdc = append ( _adfdc , _cfdf . toTextTable ( ) ) ; } ; } ; return _adfdc ; } ; type textObject struct { _eegd * Extractor ; _bcg * _aec . PdfPageResources ; _ggd _ba . GraphicsState ; _cbgd * textState ; _aed * stateStack ; _acc _g . Matrix ;
_dde _g . Matrix ; _fcee [ ] * textMark ; _bbcdf bool ; } ; func ( _begcf * textTable ) newTablePara ( ) * textPara { _eccb := _begcf . computeBbox ( ) ; _gcagb := & textPara { PdfRectangle : _eccb , _bbbc : _eccb , _caaa : _begcf } ; if _gbead { _fc . Log . Info ( "\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073" , _gcagb ) ;
} ; return _gcagb ; } ; func ( _afdee * textPara ) getListLines ( ) [ ] * textLine { var _bced [ ] * textLine ; _cgbf := _dbgcg ( _afdee . _bfagf ) ; for _ , _bdac := range _afdee . _bfagf { _gaaa := _bdac . _bfag [ 0 ] . _eedc [ 0 ] ; if _bdce ( _gaaa ) { _bced = append ( _bced , _bdac ) ; } ; } ; _bced = append ( _bced , _cgbf ... ) ;
return _bced ; } ; func _cbabg ( _cgeef [ ] TextMark , _ecdfd * TextTable ) [ ] TextMark { var _ccagc [ ] TextMark ; for _ , _bccdf := range _cgeef { _bccdf . _adgb = true ; _bccdf . _dfeb = _ecdfd ; _ccagc = append ( _ccagc , _bccdf ) ; } ; return _ccagc ; } ; type intSet map [ int ] struct { } ;
func ( _bbca gridTile ) numBorders ( ) int { _deba := 0 ; if _bbca . _cbfd { _deba ++ ; } ; if _bbca . _cbdbf { _deba ++ ; } ; if _bbca . _fcgc { _deba ++ ; } ; if _bbca . _fbbf { _deba ++ ; } ; return _deba ; } ; func _dfdf ( _adfd , _dafb _aec . PdfRectangle ) bool { return _adfd . Llx <= _dafb . Llx && _dafb . Urx <= _adfd . Urx && _adfd . Lly <= _dafb . Lly && _dafb . Ury <= _adfd . Ury ;
} ;
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
//
// Replace with a function like Extract() (*PageText, error)
func ( _bbf * Extractor ) ExtractPageText ( ) ( * PageText , int , int , error ) { _daa , _aeb , _fcg , _gda := _bbf . extractPageText ( _bbf . _ffd , _bbf . _gf , _g . IdentityMatrix ( ) , 0 ) ; if _gda != nil && _gda != _aec . ErrColorOutOfRange { return nil , 0 , 0 , _gda ; } ; if _bbf . _fg != nil { _daa . _cdgg . _cgff = _bbf . _fg . UseSimplerExtractionProcess ;
} ; _daa . computeViews ( ) ; _gda = _cgfcc ( _daa ) ; if _gda != nil { return nil , 0 , 0 , _gda ; } ; if _bbf . _fg != nil { if _bbf . _fg . ApplyCropBox && _bbf . _af != nil { _daa . ApplyArea ( * _bbf . _af ) ; } ; _daa . _cdgg . _cdec = _bbf . _fg . DisableDocumentTags ; } ; return _daa , _aeb , _fcg , nil ;
} ; func ( _bcac * shapesState ) cubicTo ( _fac , _acea , _dcdd , _aeace , _ede , _ggcb float64 ) { if _bcge { _fc . Log . Info ( "\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a" ) ; } ; _bcac . addPoint ( _ede , _ggcb ) ; } ;
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct { _bade [ ] TextMark } ; func ( _daedfd rulingList ) primaries ( ) [ ] float64 { _cedeb := make ( map [ float64 ] struct { } , len ( _daedfd ) ) ; for _ , _bbgb := range _daedfd { _cedeb [ _bbgb . _gbgc ] = struct { } { } ; } ; _dddd := make ( [ ] float64 , len ( _cedeb ) ) ;
_cbeb := 0 ; for _cdcb := range _cedeb { _dddd [ _cbeb ] = _cdcb ; _cbeb ++ ; } ; _a . Float64s ( _dddd ) ; return _dddd ; } ; type textTable struct { _aec . PdfRectangle ; _afcga , _agdc int ; _fbccb bool ; _bfdff map [ uint64 ] * textPara ; _gaeb map [ uint64 ] compositeCell ; } ; func ( _fceg * subpath ) clear ( ) { * _fceg = subpath { } } ;
func ( _bdebd rulingList ) tidied ( _dadd string ) rulingList { _dgca := _bdebd . removeDuplicates ( ) ; _dgca . log ( "\u0075n\u0069\u0071\u0075\u0065\u0073" ) ; _ccd := _dgca . snapToGroups ( ) ; if _ccd == nil { return nil ; } ; _ccd . sort ( ) ; if _eceg { _fc . Log . Info ( "\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064" , _dadd , len ( _bdebd ) , len ( _dgca ) , len ( _ccd ) ) ;
} ; _ccd . log ( "\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d" ) ; return _ccd ; } ; func _eabc ( _bcgg string ) ( string , bool ) { _eefde := [ ] rune ( _bcgg ) ; if len ( _eefde ) != 1 { return "" , false ; } ; _dgaac , _fgcgb := _abec [ _eefde [ 0 ] ] ; return _dgaac , _fgcgb ; } ; func _ggb ( _dc [ ] Font , _gfd string ) bool { for _ , _bfe := range _dc { if _bfe . FontName == _gfd { return true ;
} ; } ; return false ; } ;
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
func ( _bdgc PageText ) ToText ( ) string { return _bdgc . Text ( ) } ;
// String returns a description of `k`.
func ( _dgaec markKind ) String ( ) string { _bbgdg , _dfc := _debga [ _dgaec ] ; if ! _dfc { return _ae . Sprintf ( "\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064" , _dgaec ) ; } ; return _bbgdg ; } ; func ( _ccge paraList ) merge ( ) * textPara { _fc . Log . Trace ( "\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _ccge ) ) ;
if len ( _ccge ) == 0 { return nil ; } ; _ccge . sortReadingOrder ( ) ; _ffcg := _ccge [ 0 ] . PdfRectangle ; _gggf := _ccge [ 0 ] . _bfagf ; for _ , _cfgdf := range _ccge [ 1 : ] { _ffcg = _agfb ( _ffcg , _cfgdf . PdfRectangle ) ; _gggf = append ( _gggf , _cfgdf . _bfagf ... ) ; } ; return _geaa ( _ffcg , _gggf ) ;
} ; func _aeced ( _dagg * list , _ffcc * string ) string { _dcdb := _bb . Split ( _dagg . _begg , "\u000a" ) ; _fadbc := & _bb . Builder { } ; for _ , _ddbd := range _dcdb { if _ddbd != "" { _fadbc . WriteString ( * _ffcc ) ; _fadbc . WriteString ( _ddbd ) ; _fadbc . WriteString ( "\u000a" ) ;
} ; } ; return _fadbc . String ( ) ; } ; func ( _gaga * textObject ) moveLP ( _bgaa , _aee float64 ) { _gaga . _dde . Concat ( _g . NewMatrix ( 1 , 0 , 0 , 1 , _bgaa , _aee ) ) ; _gaga . _acc = _gaga . _dde ; } ; func _dbgcg ( _bfae [ ] * textLine ) [ ] * textLine { _gae := [ ] * textLine { } ; for _ , _gcba := range _bfae { _fafbg := _gcba . text ( ) ;
_fccf := _gdfd . Find ( [ ] byte ( _fafbg ) ) ; if _fccf != nil { _gae = append ( _gae , _gcba ) ; } ; } ; return _gae ; } ; func ( _bgag paraList ) log ( _ggda string ) { if ! _beee { return ; } ; _fc . Log . Info ( "%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d" , _ggda , len ( _bgag ) ) ;
for _dacf , _eadag := range _bgag { if _eadag == nil { continue ; } ; _caac := _eadag . text ( ) ; _bbba := "\u0020\u0020" ; if _eadag . _caaa != nil { _bbba = _ae . Sprintf ( "\u005b%\u0064\u0078\u0025\u0064\u005d" , _eadag . _caaa . _afcga , _eadag . _caaa . _agdc ) ; } ; _ae . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a" , _dacf , _eadag . PdfRectangle , _bbba , _bgfd ( _caac , 50 ) ) ;
} ; } ; type textState struct { _cga float64 ; _ecd float64 ; _aafe float64 ; _gbff float64 ; _dgad float64 ; _ffgba RenderMode ; _egc float64 ; _cbad * _aec . PdfFont ; _edfa _aec . PdfRectangle ; _cgee int ; _aece int ; } ; func ( _eea * textObject ) nextLine ( ) { _eea . moveLP ( 0 , - _eea . _cbgd . _gbff ) } ;
const ( _acgbb = false ; _baf = false ; _ecab = false ; _egdce = false ; _bcge = false ; _eged = false ; _fedc = false ; _beee = false ; _fbbd = false ; _ecda = _fbbd && true ; _baag = _ecda && false ; _gabga = _fbbd && true ; _gbead = false ; _efcc = _gbead && false ; _caee = _gbead && true ;
_eceg = false ; _ggbbe = _eceg && false ; _geda = _eceg && false ; _ddb = _eceg && true ; _bdf = _eceg && false ; _dfbb = _eceg && false ; ) ; func ( _cffge * textWord ) toTextMarks ( _cdffa * int ) [ ] TextMark { var _daeffc [ ] TextMark ; for _ , _dbeca := range _cffge . _ebfa { _daeffc = _fccge ( _daeffc , _cdffa , _dbeca . ToTextMark ( ) ) ;
} ; return _daeffc ; } ; func ( _dcdg * textLine ) pullWord ( _abdbf * wordBag , _ecaca * textWord , _dfgdb int ) { _dcdg . appendWord ( _ecaca ) ; _abdbf . removeWord ( _ecaca , _dfgdb ) ; } ;
// GetContentStreamOps returns the contentStreamOps field of `pt`.
func ( _cbce * PageText ) GetContentStreamOps ( ) * _ba . ContentStreamOperations { return _cbce . _cafg } ; func ( _ggef * textPara ) text ( ) string { _gdga := new ( _fe . Buffer ) ; _ggef . writeText ( _gdga ) ; return _gdga . String ( ) ; } ;
// List returns all the list objects detected on the page.
// It detects all the bullet point Lists from a given pdf page and builds a slice of bullet list objects.
// A given bullet list object has a tree structure.
// Each bullet point list is extracted with the text content it contains and all the sub lists found under it as children in the tree.
// The rest content of the pdf is ignored and only text in the bullet point lists are extracted.
// The list extraction is done in two ways.
// 1. If the document is tagged then the lists are extracted using the tags provided in the document.
// 2. Otherwise the bullet lists are extracted from the raw text using regex matching.
// By default the document tag is used if available.
// However this can be disabled using `DisableDocumentTags` in the `Options` object.
// Sometimes disabling document tags option might give a better bullet list extraction if the document was tagged incorrectly.
//
// options := &Options{
// DisableDocumentTags: false, // this means use document tag if available
// }
// ex, err := NewWithOptions(page, options)
// // handle error
// pageText, _, _, err := ex.ExtractPageText()
// // handle error
// lists := pageText.List()
// txt := lists.Text()
func ( _cabcb PageText ) List ( ) lists { _fgcg := ! _cabcb . _cdgg . _cdec ; _ddcdg := _cabcb . getParagraphs ( ) ; _abfd := true ; if _cabcb . _cegb == nil || * _cabcb . _cegb == nil { _abfd = false ; } ; _ggdb := _ddcdg . list ( ) ; if _abfd && _fgcg { _bbdb := _fbbg ( & _ddcdg ) ; _fgedb := & structTreeRoot { } ;
_fgedb . parseStructTreeRoot ( * _cabcb . _cegb ) ; if _fgedb . _bagdg == nil { _fc . Log . Debug ( "\u004c\u0069\u0073\u0074\u003a\u0020\u0073t\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e'\u0074\u0020\u0068\u0061\u0076e\u0020\u0061\u006e\u0079\u0020\u0063\u006f\u006e\u0074e\u006e\u0074\u002c\u0020\u0075\u0073\u0069\u006e\u0067\u0020\u0074\u0065\u0078\u0074\u0020\u006d\u0061\u0074\u0063\u0068\u0069\u006e\u0067\u0020\u006d\u0065\u0074\u0068\u006f\u0064\u0020\u0069\u006e\u0073\u0074\u0065\u0061\u0064\u002e" ) ;
return _ggdb ; } ; _ggdb = _fgedb . buildList ( _bbdb , _cabcb . _dfgd ) ; } ; return _ggdb ; } ; func ( _bfcfg * textWord ) computeText ( ) string { _cebaa := make ( [ ] string , len ( _bfcfg . _ebfa ) ) ; for _baed , _ccdb := range _bfcfg . _ebfa { _cebaa [ _baed ] = _ccdb . _gded ; } ; return _bb . Join ( _cebaa , "" ) ;
} ;
// String returns a description of `l`.
func ( _cgde * textLine ) String ( ) string { return _ae . Sprintf ( "\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022" , _cgde . _gaca , _cgde . PdfRectangle , _cgde . _fgcb , _cgde . text ( ) ) ;
} ; func _afgc ( _baada [ ] float64 , _eegge , _ebff float64 ) [ ] float64 { _ebfdg , _gbac := _eegge , _ebff ; if _gbac < _ebfdg { _ebfdg , _gbac = _gbac , _ebfdg ; } ; _cefa := make ( [ ] float64 , 0 , len ( _baada ) + 2 ) ; _cefa = append ( _cefa , _eegge ) ; for _ , _gfdaa := range _baada { if _gfdaa <= _ebfdg { continue ;
} else if _gfdaa >= _gbac { break ; } ; _cefa = append ( _cefa , _gfdaa ) ; } ; _cefa = append ( _cefa , _ebff ) ; return _cefa ; } ; func ( _bbbbg paraList ) list ( ) [ ] * list { var _egaf [ ] * textLine ; var _faae [ ] * textLine ; for _ , _dccca := range _bbbbg { _feaf := _dccca . getListLines ( ) ;
_egaf = append ( _egaf , _feaf ... ) ; _faae = append ( _faae , _dccca . _bfagf ... ) ; } ; _dafab := _acfc ( _egaf ) ; _acde := _dbdad ( _faae , _dafab ) ; return _acde ; } ; func _cecg ( _dcfc * wordBag , _gbef * textWord , _fffg float64 ) bool { return _dcfc . Urx <= _gbef . Llx && _gbef . Llx < _dcfc . Urx + _fffg ;
} ; func ( _cccc * textPara ) toCellTextMarks ( _fcfdf * int ) [ ] TextMark { var _cdbe [ ] TextMark ; for _ffcab , _fagf := range _cccc . _bfagf { _ddbfg := _fagf . toTextMarks ( _fcfdf ) ; _cfbd := _bfab && _fagf . endsInHyphen ( ) && _ffcab != len ( _cccc . _bfagf ) - 1 ; if _cfbd { _ddbfg = _gffa ( _ddbfg , _fcfdf ) ;
} ; _cdbe = append ( _cdbe , _ddbfg ... ) ; if ! ( _cfbd || _ffcab == len ( _cccc . _bfagf ) - 1 ) { _cdbe = _ggcce ( _cdbe , _fcfdf , _ecaa ( _fagf . _gaca , _cccc . _bfagf [ _ffcab + 1 ] . _gaca ) ) ; } ; } ; return _cdbe ; } ; func ( _eca * textObject ) setTextRise ( _gdf float64 ) { if _eca == nil { return ;
} ; _eca . _cbgd . _egc = _gdf ; } ;
// String returns a string describing `pt`.
func ( _fde PageText ) String ( ) string { _dbdd := _ae . Sprintf ( "P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073" , len ( _fde . _dbfe ) ) ; _efdd := [ ] string { "\u002d" + _dbdd } ; for _ , _gfcf := range _fde . _dbfe { _efdd = append ( _efdd , _gfcf . String ( ) ) ;
} ; _efdd = append ( _efdd , "\u002b" + _dbdd ) ; return _bb . Join ( _efdd , "\u000a" ) ; } ; func _affg ( _gfbd map [ float64 ] gridTile ) [ ] float64 { _fega := make ( [ ] float64 , 0 , len ( _gfbd ) ) ; for _defd := range _gfbd { _fega = append ( _fega , _defd ) ; } ; _a . Float64s ( _fega ) ; return _fega ;
} ;
// Tables returns the tables extracted from the page.
func ( _gbb PageText ) Tables ( ) [ ] TextTable { if _gbead { _fc . Log . Info ( "\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064" , len ( _gbb . _gba ) ) ; } ; return _gbb . _gba ; } ; func _bae ( _ecee , _bfbe _aec . PdfRectangle ) bool { return _bfbe . Llx <= _ecee . Urx && _ecee . Llx <= _bfbe . Urx ;
} ; func ( _ebcf * wordBag ) text ( ) string { _gacdg := _ebcf . allWords ( ) ; _fdea := make ( [ ] string , len ( _gacdg ) ) ; for _gcag , _abdd := range _gacdg { _fdea [ _gcag ] = _abdd . _eedc ; } ; return _bb . Join ( _fdea , "\u0020" ) ; } ; func _bbcb ( _ddcd _g . Point ) * subpath { return & subpath { _acfg : [ ] _g . Point { _ddcd } } } ;
func _dfca ( _fcgae string ) bool { for _ , _cdgdd := range _fcgae { if ! _be . IsSpace ( _cdgdd ) { return false ; } ; } ; return true ; } ; var ( _ad = _d . New ( "\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072" ) ; _ac = _d . New ( "\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072" ) ;
) ; func ( _fagc * wordBag ) firstWord ( _geef int ) * textWord { return _fagc . _gbbd [ _geef ] [ 0 ] } ; func ( _bbcc * stateStack ) empty ( ) bool { return len ( * _bbcc ) == 0 } ; func ( _acbd compositeCell ) parasBBox ( ) ( paraList , _aec . PdfRectangle ) { return _acbd . paraList , _acbd . PdfRectangle ;
} ;
// Append appends `mark` to the mark array.
func ( _abg * TextMarkArray ) Append ( mark TextMark ) { _abg . _bade = append ( _abg . _bade , mark ) } ; type structTreeRoot struct { _bagdg [ ] structElement ; _gage string ; } ; func ( _aeac * textObject ) setHorizScaling ( _fcdf float64 ) { if _aeac == nil { return ; } ; _aeac . _cbgd . _aafe = _fcdf ;
} ; func ( _ecfb * wordBag ) pullWord ( _gafg * textWord , _cbaa int , _dcad map [ int ] map [ * textWord ] struct { } ) { _ecfb . PdfRectangle = _agfb ( _ecfb . PdfRectangle , _gafg . PdfRectangle ) ; if _gafg . _aeegf > _ecfb . _aad { _ecfb . _aad = _gafg . _aeegf ; } ; _ecfb . _gbbd [ _cbaa ] = append ( _ecfb . _gbbd [ _cbaa ] , _gafg ) ;
_dcad [ _cbaa ] [ _gafg ] = struct { } { } ; } ; func ( _eegg * TextMarkArray ) getTextMarkAtOffset ( _dfb int ) * TextMark { for _ , _bca := range _eegg . _bade { if _bca . Offset == _dfb { return & _bca ; } ; } ; return nil ; } ; func _aadg ( _bdbg [ ] * textLine , _cffe string ) string { var _fbf _bb . Builder ;
_aagag := 0.0 ; for _dgdd , _ccgd := range _bdbg { _eebb := _ccgd . text ( ) ; _fccff := _ccgd . _gaca ; if _dgdd < len ( _bdbg ) - 1 { _aagag = _bdbg [ _dgdd + 1 ] . _gaca ; } else { _aagag = 0.0 ; } ; _fbf . WriteString ( _cffe ) ; _fbf . WriteString ( _eebb ) ; if _aagag != _fccff { _fbf . WriteString ( "\u000a" ) ;
} else { _fbf . WriteString ( "\u0020" ) ; } ; } ; return _fbf . String ( ) ; } ;
// String returns a string descibing `i`.
func ( _efec gridTile ) String ( ) string { _acfa := func ( _aeba bool , _cgdd string ) string { if _aeba { return _cgdd ; } ; return "\u005f" ; } ; return _ae . Sprintf ( "\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073" , _efec . PdfRectangle , _acfa ( _efec . _cbfd , "\u004c" ) , _acfa ( _efec . _cbdbf , "\u0052" ) , _acfa ( _efec . _fcgc , "\u0042" ) , _acfa ( _efec . _fbbf , "\u0054" ) ) ;
} ; func ( _bbab * subpath ) isQuadrilateral ( ) bool { if len ( _bbab . _acfg ) < 4 || len ( _bbab . _acfg ) > 5 { return false ; } ; if len ( _bbab . _acfg ) == 5 { _ddfc := _bbab . _acfg [ 0 ] ; _bdff := _bbab . _acfg [ 4 ] ; if _ddfc . X != _bdff . X || _ddfc . Y != _bdff . Y { return false ; } ;
} ; return true ; } ; func ( _abgc * shapesState ) fill ( _cdgb * [ ] pathSection ) { _cdabf := pathSection { _gbag : _abgc . _edc , Color : _abgc . _bbdg . getFillColor ( ) } ; * _cdgb = append ( * _cdgb , _cdabf ) ; if _eceg { _dagb := _cdabf . bbox ( ) ; _ae . Printf ( "\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a" , len ( * _cdgb ) , len ( _cdabf . _gbag ) , _abgc , _cdabf . Color , _dagb , _dagb . Width ( ) , _dagb . Height ( ) ) ;
if _ggbbe { for _aefb , _daeb := range _cdabf . _gbag { _ae . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _aefb , _daeb ) ; if _aefb == 10 { break ; } ; } ; } ; } ; } ; func _ddcae ( _bbgfa _bad . PdfObject , _cdced _eg . Color ) ( _ec . Image , error ) { _ccbg , _aedfd := _bad . GetStream ( _bbgfa ) ;
if ! _aedfd { return nil , nil ; } ; _dade , _dfcb := _aec . NewXObjectImageFromStream ( _ccbg ) ; if _dfcb != nil { return nil , _dfcb ; } ; _bbcec , _dfcb := _dade . ToImage ( ) ; if _dfcb != nil { return nil , _dfcb ; } ; return _ffeed ( _bbcec , _cdced ) , nil ; } ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
//
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
type TextMark struct {
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Text is the extracted text.
Text string ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// BBox is the bounding box of the text.
2024-03-27 22:34:33 +00:00
BBox _aec . PdfRectangle ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Font is the font the text was drawn with.
2024-03-27 22:34:33 +00:00
Font * _aec . PdfFont ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// FontSize is the font size the text was drawn with.
FontSize float64 ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2024-03-27 22:34:33 +00:00
FillColor _eg . Color ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2024-03-27 22:34:33 +00:00
StrokeColor _eg . Color ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Orientation is the text orientation
Orientation int ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// DirectObject is the underlying PdfObject (Text Object) that represents the visible texts. This is introduced to get
// a simple access to the TextObject in case editing or replacment of some text is needed. E.g during redaction.
2024-03-27 22:34:33 +00:00
DirectObject _bad . PdfObject ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// ObjString is a decoded string operand of a text-showing operator. It has the same value as `Text` attribute except
// when many glyphs are represented with the same Text Object that contains multiple length string operand in which case
// ObjString spans more than one character string that falls in different TextMark objects.
2024-03-27 22:34:33 +00:00
ObjString [ ] string ; Tw float64 ; Th float64 ; Tc float64 ; Index int ; _adgb bool ; _dfeb * TextTable ; } ; var _aecab = _f . MustCompile ( "\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024" ) ;
func ( _acege * textWord ) bbox ( ) _aec . PdfRectangle { return _acege . PdfRectangle } ; func ( _caa * structTreeRoot ) buildList ( _ccfcb map [ int ] [ ] * textLine , _fcdb _bad . PdfObject ) [ ] * list { if _caa == nil { _fc . Log . Debug ( "\u0062\u0075\u0069\u006c\u0064\u004c\u0069\u0073\u0074\u003a\u0020t\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0069\u0073 \u006e\u0069\u006c" ) ;
return nil ; } ; var _cffdc * structElement ; _fadc := [ ] structElement { } ; if len ( _caa . _bagdg ) == 1 { _ggg := _caa . _bagdg [ 0 ] . _aeff ; if _ggg == "\u0044\u006f\u0063\u0075\u006d\u0065\u006e\u0074" || _ggg == "\u0053\u0065\u0063\u0074" || _ggg == "\u0050\u0061\u0072\u0074" || _ggg == "\u0044\u0069\u0076" || _ggg == "\u0041\u0072\u0074" { _cffdc = & _caa . _bagdg [ 0 ] ;
} ; } else { _cffdc = & structElement { _ccaac : _caa . _bagdg , _aeff : _caa . _gage } ; } ; if _cffdc == nil { _fc . Log . Debug ( "\u0062\u0075\u0069\u006cd\u004c\u0069\u0073\u0074\u003a\u0020\u0074\u006f\u0070\u0045l\u0065m\u0065\u006e\u0074\u0020\u0069\u0073\u0020n\u0069\u006c" ) ;
return nil ; } ; for _ , _eccea := range _cffdc . _ccaac { if _eccea . _aeff == "\u004c" { _fadc = append ( _fadc , _eccea ) ; } else if _eccea . _aeff == "\u0054\u0061\u0062l\u0065" { _dfec := _faff ( _eccea ) ; _fadc = append ( _fadc , _dfec ... ) ; } ; } ; _ffdf := _gfbf ( _fadc , _ccfcb , _fcdb ) ;
var _acca [ ] * list ; for _ , _agca := range _ffdf { _bebd := _edcb ( _agca ) ; _acca = append ( _acca , _bebd ... ) ; } ; return _acca ; } ; func ( _ccfb * wordBag ) empty ( _ebba int ) bool { _ , _ebgc := _ccfb . _gbbd [ _ebba ] ; return ! _ebgc } ;