2020-08-27 21:45:09 +00:00
//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
2018-03-22 14:03:47 +00:00
2020-08-27 21:45:09 +00:00
//
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
//
2022-06-06 22:48:24 +00:00
package extractor ; import ( _cf "bytes" ; _c "errors" ; _be "fmt" ; _ff "github.com/unidoc/unipdf/v3/common" ; _bd "github.com/unidoc/unipdf/v3/contentstream" ; _ca "github.com/unidoc/unipdf/v3/core" ; _dg "github.com/unidoc/unipdf/v3/internal/license" ; _a "github.com/unidoc/unipdf/v3/internal/textencoding" ;
_de "github.com/unidoc/unipdf/v3/internal/transform" ; _ee "github.com/unidoc/unipdf/v3/model" ; _gf "golang.org/x/text/unicode/norm" ; _da "golang.org/x/xerrors" ; _ga "image/color" ; _d "io" ; _f "math" ; _cd "regexp" ; _ef "sort" ; _df "strings" ; _g "unicode" ; _e "unicode/utf8" ;
) ;
2022-03-13 12:41:53 +00:00
2022-06-06 22:48:24 +00:00
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20> ).
func ( _bcg * Extractor ) ExtractText ( ) ( string , error ) { _cdb , _ , _ , _fcf := _bcg . ExtractTextWithStats ( ) ; return _cdb , _fcf ; } ; func ( _beca paraList ) reorder ( _ebfc [ ] int ) { _decbg := make ( paraList , len ( _beca ) ) ; for _dbcg , _bgad := range _ebfc { _decbg [ _dbcg ] = _beca [ _bgad ] ;
} ; copy ( _beca , _decbg ) ; } ; func ( _agcac rulingList ) augmentGrid ( ) ( rulingList , rulingList ) { _bccae , _faagg := _agcac . vertsHorzs ( ) ; if len ( _bccae ) == 0 || len ( _faagg ) == 0 { return _bccae , _faagg ; } ; _gbfc , _ddef := _bccae , _faagg ; _ggce := _bccae . bbox ( ) ; _ddfgd := _faagg . bbox ( ) ;
if _dgac { _ff . Log . Info ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066" , _ggce ) ; _ff . Log . Info ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066" , _ddfgd ) ;
} ; var _fbffg , _bbege , _edff , _cdcgc * ruling ; if _ddfgd . Llx < _ggce . Llx - _cbfg { _fbffg = & ruling { _bggf : _ggebb , _cgac : _fafbf , _facf : _ddfgd . Llx , _fgbfa : _ggce . Lly , _ebeb : _ggce . Ury } ; _bccae = append ( rulingList { _fbffg } , _bccae ... ) ; } ; if _ddfgd . Urx > _ggce . Urx + _cbfg { _bbege = & ruling { _bggf : _ggebb , _cgac : _fafbf , _facf : _ddfgd . Urx , _fgbfa : _ggce . Lly , _ebeb : _ggce . Ury } ;
_bccae = append ( _bccae , _bbege ) ; } ; if _ggce . Lly < _ddfgd . Lly - _cbfg { _edff = & ruling { _bggf : _ggebb , _cgac : _dfbe , _facf : _ggce . Lly , _fgbfa : _ddfgd . Llx , _ebeb : _ddfgd . Urx } ; _faagg = append ( rulingList { _edff } , _faagg ... ) ; } ; if _ggce . Ury > _ddfgd . Ury + _cbfg { _cdcgc = & ruling { _bggf : _ggebb , _cgac : _dfbe , _facf : _ggce . Ury , _fgbfa : _ddfgd . Llx , _ebeb : _ddfgd . Urx } ;
_faagg = append ( _faagg , _cdcgc ) ; } ; if len ( _bccae ) + len ( _faagg ) == len ( _agcac ) { return _gbfc , _ddef ; } ; _bdea := append ( _bccae , _faagg ... ) ; _agcac . log ( "u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064" ) ; _bdea . log ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d" ) ;
return _bccae , _faagg ; } ; func _dbgg ( _dbea _ee . PdfRectangle , _cagb , _gfcfb , _bebf , _gdffa * ruling ) gridTile { _ecda := _dbea . Llx ; _cfaaa := _dbea . Urx ; _cdae := _dbea . Lly ; _ddbf := _dbea . Ury ; return gridTile { PdfRectangle : _dbea , _gdge : _cagb != nil && _cagb . encloses ( _cdae , _ddbf ) , _geaa : _gfcfb != nil && _gfcfb . encloses ( _cdae , _ddbf ) , _gaaf : _bebf != nil && _bebf . encloses ( _ecda , _cfaaa ) , _efab : _gdffa != nil && _gdffa . encloses ( _ecda , _cfaaa ) } ;
} ; func ( _bbbd * wordBag ) minDepth ( ) float64 { return _bbbd . _fec - ( _bbbd . Ury - _bbbd . _adbbf ) } ; func ( _bbdd * textPara ) text ( ) string { _bdgd := new ( _cf . Buffer ) ; _bbdd . writeText ( _bdgd ) ; return _bdgd . String ( ) ; } ; func ( _cecg * textObject ) showText ( _fee [ ] byte ) error { return _cecg . renderText ( _fee ) } ;
func _agfg ( _fcge bounded ) float64 { return - _fcge . bbox ( ) . Lly } ; func ( _fgeaf lineRuling ) asRuling ( ) ( * ruling , bool ) { _fega := ruling { _cgac : _fgeaf . _fbga , Color : _fgeaf . Color , _bggf : _edeaf } ; switch _fgeaf . _fbga { case _fafbf : _fega . _facf = _fgeaf . xMean ( ) ;
_fega . _fgbfa = _f . Min ( _fgeaf . _aagg . Y , _fgeaf . _aafd . Y ) ; _fega . _ebeb = _f . Max ( _fgeaf . _aagg . Y , _fgeaf . _aafd . Y ) ; case _dfbe : _fega . _facf = _fgeaf . yMean ( ) ; _fega . _fgbfa = _f . Min ( _fgeaf . _aagg . X , _fgeaf . _aafd . X ) ; _fega . _ebeb = _f . Max ( _fgeaf . _aagg . X , _fgeaf . _aafd . X ) ;
default : _ff . Log . Error ( "\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064" , _fgeaf . _fbga ) ; return nil , false ; } ; return & _fega , true ; } ; func ( _fef * imageExtractContext ) extractXObjectImage ( _acd * _ca . PdfObjectName , _ffb _bd . GraphicsState , _gcc * _ee . PdfPageResources ) error { _fbg , _ := _gcc . GetXObjectByName ( * _acd ) ;
if _fbg == nil { return nil ; } ; _deb , _cgga := _fef . _aca [ _fbg ] ; if ! _cgga { _acf , _fdg := _gcc . GetXObjectImageByName ( * _acd ) ; if _fdg != nil { return _fdg ; } ; if _acf == nil { return nil ; } ; _bgg , _fdg := _acf . ToImage ( ) ; if _fdg != nil { return _fdg ; } ; _deb = & cachedImage { _efe : _bgg , _cda : _acf . ColorSpace } ;
_fef . _aca [ _fbg ] = _deb ; } ; _edc := _deb . _efe ; _faa := _deb . _cda ; _ade , _fff := _faa . ImageToRGB ( * _edc ) ; if _fff != nil { return _fff ; } ; _ff . Log . Debug ( "@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073" , _ffb . CTM . String ( ) ) ; _ccge := ImageMark { Image : & _ade , Width : _ffb . CTM . ScalingFactorX ( ) , Height : _ffb . CTM . ScalingFactorY ( ) , Angle : _ffb . CTM . Angle ( ) } ;
_ccge . X , _ccge . Y = _ffb . CTM . Translation ( ) ; _fef . _eee = append ( _fef . _eee , _ccge ) ; _fef . _abef ++ ; return nil ; } ; func _gfb ( _aeaa float64 ) int { var _gecd int ; if _aeaa >= 0 { _gecd = int ( _aeaa / _fdbf ) ; } else { _gecd = int ( _aeaa / _fdbf ) - 1 ; } ; return _gecd ;
} ; func _fbfd ( _fgcf , _bgbfb , _gabac float64 ) rulingKind { if _fgcf >= _gabac && _abac ( _bgbfb , _fgcf ) { return _dfbe ; } ; if _bgbfb >= _gabac && _abac ( _fgcf , _bgbfb ) { return _fafbf ; } ; return _fbdff ; } ; func ( _dfefb * textTable ) get ( _faagb , _dgdae int ) * textPara { return _dfefb . _dbfba [ _cgccd ( _faagb , _dgdae ) ] ;
} ; type imageExtractContext struct { _eee [ ] ImageMark ; _fd int ; _abef int ; _cgg int ; _aca map [ * _ca . PdfObjectStream ] * cachedImage ; _dfc * ImageExtractOptions ; } ; func _eabca ( _aedc * wordBag , _gbdg * textWord , _ceeg float64 ) bool { return _aedc . Urx <= _gbdg . Llx && _gbdg . Llx < _aedc . Urx + _ceeg ;
2022-03-13 12:41:53 +00:00
} ;
2021-09-23 22:37:42 +00:00
2022-06-06 22:48:24 +00:00
// Tables returns the tables extracted from the page.
func ( _cacc PageText ) Tables ( ) [ ] TextTable { if _bcag { _ff . Log . Info ( "\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064" , len ( _cacc . _fdacg ) ) ; } ; return _cacc . _fdacg ; } ;
2022-04-27 00:10:33 +00:00
2022-06-06 22:48:24 +00:00
// PageText represents the layout of text on a device page.
type PageText struct { _fcb [ ] * textMark ; _daaf string ; _gdbg [ ] TextMark ; _fdacg [ ] TextTable ; _dda _ee . PdfRectangle ; _dccc [ ] pathSection ; _aacb [ ] pathSection ; } ;
2021-10-22 10:53:20 +00:00
2022-06-06 22:48:24 +00:00
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
// Replace with a function like Extract() (*PageText, error)
func ( _fed * Extractor ) ExtractPageText ( ) ( * PageText , int , int , error ) { _bbgdg , _acb , _edd , _dbc := _fed . extractPageText ( _fed . _gc , _fed . _ea , _de . IdentityMatrix ( ) , 0 ) ; if _dbc != nil && _dbc != _ee . ErrColorOutOfRange { return nil , 0 , 0 , _dbc ; } ; _bbgdg . computeViews ( ) ;
_dbc = _effec ( _bbgdg ) ; if _dbc != nil { return nil , 0 , 0 , _dbc ; } ; return _bbgdg , _acb , _edd , nil ; } ;
2021-10-22 10:53:20 +00:00
2022-06-06 22:48:24 +00:00
// String returns a description of `p`.
func ( _fbag * textPara ) String ( ) string { if _fbag . _gbdd { return _be . Sprintf ( "\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d" , _fbag . PdfRectangle ) ; } ; _edab := "" ; if _fbag . _cegd != nil { _edab = _be . Sprintf ( "\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020" , _fbag . _cegd . _aage , _fbag . _cegd . _eabcaa ) ;
} ; return _be . Sprintf ( "\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071" , _fbag . PdfRectangle , _edab , len ( _fbag . _cecgd ) , _ggece ( _fbag . text ( ) , 50 ) ) ; } ; func ( _cbb * textObject ) setTextRenderMode ( _abf int ) { if _cbb == nil { return ;
} ; _cbb . _ecb . _ccc = RenderMode ( _abf ) ; } ; func _gcag ( _caccc [ ] TextMark , _efce * int , _afef string ) [ ] TextMark { _efbb := _caea ; _efbb . Text = _afef ; return _gefg ( _caccc , _efce , _efbb ) ; } ; func ( _fafb * textObject ) setWordSpacing ( _baf float64 ) { if _fafb == nil { return ;
} ; _fafb . _ecb . _bbfa = _baf ; } ;
2021-07-30 00:21:16 +00:00
2022-06-06 22:48:24 +00:00
// String returns a description of `state`.
func ( _aac * textState ) String ( ) string { _bba := "\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]" ; if _aac . _fea != nil { _bba = _aac . _fea . BaseFont ( ) ; } ; return _be . Sprintf ( "\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071" , _aac . _bace , _aac . _bbfa , _aac . _ggda , _bba ) ;
} ; func ( _gdbe * shapesState ) fill ( _aaba * [ ] pathSection ) { _faacf := pathSection { _fbdc : _gdbe . _deff , Color : _gdbe . _agbc . getFillColor ( ) } ; * _aaba = append ( * _aaba , _faacf ) ; if _dgac { _gecb := _faacf . bbox ( ) ; _be . Printf ( "\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a" , len ( * _aaba ) , len ( _faacf . _fbdc ) , _gdbe , _faacf . Color , _gecb , _gecb . Width ( ) , _gecb . Height ( ) ) ;
if _acg { for _eda , _acdg := range _faacf . _fbdc { _be . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _eda , _acdg ) ; if _eda == 10 { break ; } ; } ; } ; } ; } ; func ( _gbfg * textTable ) depth ( ) float64 { _bebfd := 1e10 ; for _eeead := 0 ; _eeead < _gbfg . _aage ; _eeead ++ { _gcab := _gbfg . get ( _eeead , 0 ) ;
if _gcab == nil || _gcab . _gbdd { continue ; } ; _bebfd = _f . Min ( _bebfd , _gcab . depth ( ) ) ; } ; return _bebfd ; } ; func _efa ( _caag , _aeab bounded ) float64 { return _caag . bbox ( ) . Llx - _aeab . bbox ( ) . Llx } ; func ( _gafc rectRuling ) checkWidth ( _bcbbg , _baaeb float64 ) ( float64 , bool ) { _gfacf := _baaeb - _bcbbg ;
_bafde := _gfacf <= _fbfc ; return _gfacf , _bafde ; } ; func ( _bcgec * compositeCell ) updateBBox ( ) { for _ , _fbbg := range _bcgec . paraList { _bcgec . PdfRectangle = _bgcf ( _bcgec . PdfRectangle , _fbbg . PdfRectangle ) ; } ; } ; func _efgf ( _eabb string ) bool { for _ , _dada := range _eabb { if ! _g . IsSpace ( _dada ) { return false ;
} ; } ; return true ; } ; func ( _afaf * textTable ) toTextTable ( ) TextTable { if _bcag { _ff . Log . Info ( "t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064" , _afaf . _aage , _afaf . _eabcaa ) ; } ; _aecc := make ( [ ] [ ] TableCell , _afaf . _eabcaa ) ;
for _fagf := 0 ; _fagf < _afaf . _eabcaa ; _fagf ++ { _aecc [ _fagf ] = make ( [ ] TableCell , _afaf . _aage ) ; for _bdddf := 0 ; _bdddf < _afaf . _aage ; _bdddf ++ { _eaag := _afaf . get ( _bdddf , _fagf ) ; if _eaag == nil { continue ; } ; if _bcag { _be . Printf ( "\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a" , _bdddf , _fagf , _eaag ) ;
} ; _aecc [ _fagf ] [ _bdddf ] . Text = _eaag . text ( ) ; _agdf := 0 ; _aecc [ _fagf ] [ _bdddf ] . Marks . _beaa = _eaag . toTextMarks ( & _agdf ) ; } ; } ; return TextTable { W : _afaf . _aage , H : _afaf . _eabcaa , Cells : _aecc } ; } ;
2021-12-14 01:08:28 +00:00
2022-04-27 00:10:33 +00:00
// String returns a description of `w`.
2022-06-06 22:48:24 +00:00
func ( _gbga * textWord ) String ( ) string { return _be . Sprintf ( "\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022" , _gbga . _aagef , _gbga . PdfRectangle , _gbga . _eedb , _gbga . _bfdfd ) ;
2022-04-27 00:10:33 +00:00
} ;
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
2022-06-06 22:48:24 +00:00
func ( _eaea * TextMarkArray ) BBox ( ) ( _ee . PdfRectangle , bool ) { var _dbf _ee . PdfRectangle ; _dcfg := false ; for _ , _ddce := range _eaea . _beaa { if _ddce . Meta || _efgf ( _ddce . Text ) { continue ; } ; if _dcfg { _dbf = _bgcf ( _dbf , _ddce . BBox ) ; } else { _dbf = _ddce . BBox ;
_dcfg = true ; } ; } ; return _dbf , _dcfg ; } ; func ( _ffedf rulingList ) splitSec ( ) [ ] rulingList { _ef . Slice ( _ffedf , func ( _dfddf , _feefd int ) bool { _adbf , _abegg := _ffedf [ _dfddf ] , _ffedf [ _feefd ] ; if _adbf . _fgbfa != _abegg . _fgbfa { return _adbf . _fgbfa < _abegg . _fgbfa ;
} ; return _adbf . _ebeb < _abegg . _ebeb ; } ) ; _acfc := make ( map [ * ruling ] struct { } , len ( _ffedf ) ) ; _ffcea := func ( _baegb * ruling ) rulingList { _ggcb := rulingList { _baegb } ; _acfc [ _baegb ] = struct { } { } ; for _ , _ddfc := range _ffedf { if _ , _ffcf := _acfc [ _ddfc ] ; _ffcf { continue ;
} ; for _ , _bfba := range _ggcb { if _ddfc . alignsSec ( _bfba ) { _ggcb = append ( _ggcb , _ddfc ) ; _acfc [ _ddfc ] = struct { } { } ; break ; } ; } ; } ; return _ggcb ; } ; _gaae := [ ] rulingList { _ffcea ( _ffedf [ 0 ] ) } ; for _ , _cfdb := range _ffedf [ 1 : ] { if _ , _bgdf := _acfc [ _cfdb ] ; _bgdf { continue ;
} ; _gaae = append ( _gaae , _ffcea ( _cfdb ) ) ; } ; return _gaae ; } ; var _caea = TextMark { Text : "\u005b\u0058\u005d" , Original : "\u0020" , Meta : true , FillColor : _ga . White , StrokeColor : _ga . White } ; func _af ( _abe [ ] Font , _dee string ) bool { for _ , _abeg := range _abe { if _abeg . FontName == _dee { return true ;
} ; } ; return false ; } ; func ( _bbffd rulingList ) aligned ( ) bool { if len ( _bbffd ) < 2 { return false ; } ; _egeg := make ( map [ * ruling ] int ) ; _egeg [ _bbffd [ 0 ] ] = 0 ; for _ , _abcg := range _bbffd [ 1 : ] { _cgeb := false ; for _beeg := range _egeg { if _abcg . gridIntersecting ( _beeg ) { _egeg [ _beeg ] ++ ;
_cgeb = true ; break ; } ; } ; if ! _cgeb { _egeg [ _abcg ] = 0 ; } ; } ; _cfacc := 0 ; for _ , _abgd := range _egeg { if _abgd == 0 { _cfacc ++ ; } ; } ; _cdbe := float64 ( _cfacc ) / float64 ( len ( _bbffd ) ) ; _ccgf := _cdbe <= 1.0 - _cece ; if _dgac { _ff . Log . Info ( "\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073" , _ccgf , _cdbe , _cfacc , len ( _bbffd ) , _bbffd . String ( ) ) ;
} ; return _ccgf ; } ; func ( _ffdb paraList ) toTextMarks ( ) [ ] TextMark { _aadb := 0 ; var _ggbc [ ] TextMark ; for _eabf , _daae := range _ffdb { if _daae . _gbdd { continue ; } ; _bcfac := _daae . toTextMarks ( & _aadb ) ; _ggbc = append ( _ggbc , _bcfac ... ) ; if _eabf != len ( _ffdb ) - 1 { if _bdaf ( _daae , _ffdb [ _eabf + 1 ] ) { _ggbc = _gcag ( _ggbc , & _aadb , "\u0020" ) ;
} else { _ggbc = _gcag ( _ggbc , & _aadb , "\u000a" ) ; _ggbc = _gcag ( _ggbc , & _aadb , "\u000a" ) ; } ; } ; } ; _ggbc = _gcag ( _ggbc , & _aadb , "\u000a" ) ; _ggbc = _gcag ( _ggbc , & _aadb , "\u000a" ) ; return _ggbc ; } ; func _ggece ( _ffbf string , _bcdd int ) string { if len ( _ffbf ) < _bcdd { return _ffbf ;
} ; return _ffbf [ : _bcdd ] ; } ;
2022-04-27 00:10:33 +00:00
2022-06-06 22:48:24 +00:00
// Len returns the number of TextMarks in `ma`.
func ( _dcbd * TextMarkArray ) Len ( ) int { if _dcbd == nil { return 0 ; } ; return len ( _dcbd . _beaa ) ; } ; func ( _ggg * textObject ) getStrokeColor ( ) _ga . Color { return _ccfa ( _ggg . _bcgf . ColorspaceStroking , _ggg . _bcgf . ColorStroking ) ; } ; func ( _ege * textObject ) setHorizScaling ( _geea float64 ) { if _ege == nil { return ;
} ; _ege . _ecb . _cfa = _geea ; } ; func ( _cbga * textMark ) inDiacriticArea ( _bafeb * textMark ) bool { _fgbd := _cbga . Llx - _bafeb . Llx ; _fdbba := _cbga . Urx - _bafeb . Urx ; _adcb := _cbga . Lly - _bafeb . Lly ; return _f . Abs ( _fgbd + _fdbba ) < _cbga . Width ( ) * _fabde && _f . Abs ( _adcb ) < _cbga . Height ( ) * _fabde ;
} ; func _fcg ( _dfcb _ee . PdfRectangle , _eccde bounded ) float64 { return _dfcb . Ury - _eccde . bbox ( ) . Lly } ;
2022-04-27 00:10:33 +00:00
2022-06-06 22:48:24 +00:00
// Extractor stores and offers functionality for extracting content from PDF pages.
type Extractor struct { _gc string ; _ea * _ee . PdfPageResources ; _eab _ee . PdfRectangle ; _bg map [ string ] fontEntry ; _bb map [ string ] textResult ; _bge int64 ; _gaf int ; } ; func ( _ebgd * wordBag ) removeDuplicates ( ) { if _ced { _ff . Log . Info ( "r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071" , _ebgd . text ( ) ) ;
} ; for _ , _dffac := range _ebgd . depthIndexes ( ) { if len ( _ebgd . _aceg [ _dffac ] ) == 0 { continue ; } ; _gagg := _ebgd . _aceg [ _dffac ] [ 0 ] ; _efcc := _gbbc * _gagg . _eedb ; _efaef := _gagg . _aagef ; for _ , _defa := range _ebgd . depthBand ( _efaef , _efaef + _efcc ) { _fcgc := map [ * textWord ] struct { } { } ;
_adeb := _ebgd . _aceg [ _defa ] ; for _ , _fbgbb := range _adeb { if _ , _gac := _fcgc [ _fbgbb ] ; _gac { continue ; } ; for _ , _befb := range _adeb { if _ , _agec := _fcgc [ _befb ] ; _agec { continue ; } ; if _befb != _fbgbb && _befb . _bfdfd == _fbgbb . _bfdfd && _f . Abs ( _befb . Llx - _fbgbb . Llx ) < _efcc && _f . Abs ( _befb . Urx - _fbgbb . Urx ) < _efcc && _f . Abs ( _befb . Lly - _fbgbb . Lly ) < _efcc && _f . Abs ( _befb . Ury - _fbgbb . Ury ) < _efcc { _fcgc [ _befb ] = struct { } { } ;
} ; } ; } ; if len ( _fcgc ) > 0 { _ebadaf := 0 ; for _ , _ccec := range _adeb { if _ , _fagg := _fcgc [ _ccec ] ; ! _fagg { _adeb [ _ebadaf ] = _ccec ; _ebadaf ++ ; } ; } ; _ebgd . _aceg [ _defa ] = _adeb [ : len ( _adeb ) - len ( _fcgc ) ] ; if len ( _ebgd . _aceg [ _defa ] ) == 0 { delete ( _ebgd . _aceg , _defa ) ;
} ; } ; } ; } ; } ; func ( _cdcg compositeCell ) hasLines ( _bgdb [ ] * textLine ) bool { for _dagff , _egfb := range _bgdb { _fcadg := _gddc ( _cdcg . PdfRectangle , _egfb . PdfRectangle ) ; if _bcag { _be . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a" , _fcadg , _dagff , len ( _bgdb ) ) ;
_be . Printf ( "\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a" , _cdcg ) ; _be . Printf ( "\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a" , _egfb ) ; } ; if _fcadg { return true ;
} ; } ; return false ; } ; func _dadcg ( _dgcd func ( * wordBag , * textWord , float64 ) bool , _feeea float64 ) func ( * wordBag , * textWord ) bool { return func ( _affd * wordBag , _dfbc * textWord ) bool { return _dgcd ( _affd , _dfbc , _feeea ) } ; } ; const ( _gged markKind = iota ; _edeaf ;
_cgbe ; _ggebb ; ) ; func ( _bfg * textObject ) getCurrentFont ( ) * _ee . PdfFont { _cde := _bfg . _ecb . _fea ; if _cde == nil { _ff . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e" ) ;
return _ee . DefaultFont ( ) ; } ; return _cde ; } ; func _cfff ( _fabdeb , _egbg float64 ) bool { return _f . Abs ( _fabdeb - _egbg ) <= _cbfg } ; const _cbf = 20 ; func ( _bfgcc gridTile ) contains ( _fbcfd _ee . PdfRectangle ) bool { if _bfgcc . numBorders ( ) < 3 { return false ; } ;
if _bfgcc . _gdge && _fbcfd . Llx < _bfgcc . Llx - _ebcf { return false ; } ; if _bfgcc . _geaa && _fbcfd . Urx > _bfgcc . Urx + _ebcf { return false ; } ; if _bfgcc . _gaaf && _fbcfd . Lly < _bfgcc . Lly - _ebcf { return false ; } ; if _bfgcc . _efab && _fbcfd . Ury > _bfgcc . Ury + _ebcf { return false ;
} ; return true ; } ; func ( _ebgb * textObject ) getFont ( _bgaa string ) ( * _ee . PdfFont , error ) { if _ebgb . _decb . _bg != nil { _edeae , _fbgg := _ebgb . getFontDict ( _bgaa ) ; if _fbgg != nil { _ff . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073" , _bgaa , _fbgg . Error ( ) ) ;
return nil , _fbgg ; } ; _ebgb . _decb . _bge ++ ; _bgcd , _eef := _ebgb . _decb . _bg [ _edeae . String ( ) ] ; if _eef { _bgcd . _eea = _ebgb . _decb . _bge ; return _bgcd . _afgf , nil ; } ; } ; _dedb , _cgd := _ebgb . getFontDict ( _bgaa ) ; if _cgd != nil { return nil , _cgd ; } ; _cee , _cgd := _ebgb . getFontDirect ( _bgaa ) ;
if _cgd != nil { return nil , _cgd ; } ; if _ebgb . _decb . _bg != nil { _gea := fontEntry { _cee , _ebgb . _decb . _bge } ; if len ( _ebgb . _decb . _bg ) >= _bda { var _adac [ ] string ; for _agb := range _ebgb . _decb . _bg { _adac = append ( _adac , _agb ) ; } ; _ef . Slice ( _adac , func ( _dgda , _bdgbg int ) bool { return _ebgb . _decb . _bg [ _adac [ _dgda ] ] . _eea < _ebgb . _decb . _bg [ _adac [ _bdgbg ] ] . _eea ;
} ) ; delete ( _ebgb . _decb . _bg , _adac [ 0 ] ) ; } ; _ebgb . _decb . _bg [ _dedb . String ( ) ] = _gea ; } ; return _cee , nil ; } ; func ( _cdecb * textTable ) compositeRowCorridors ( ) map [ int ] [ ] float64 { _fece := make ( map [ int ] [ ] float64 , _cdecb . _eabcaa ) ; if _bcag { _ff . Log . Info ( "c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064" , _cdecb . _eabcaa ) ;
} ; for _dcdgd := 1 ; _dcdgd < _cdecb . _eabcaa ; _dcdgd ++ { var _bbce [ ] compositeCell ; for _fgbfag := 0 ; _fgbfag < _cdecb . _aage ; _fgbfag ++ { if _dfee , _cfg := _cdecb . _ebgbb [ _cgccd ( _fgbfag , _dcdgd ) ] ; _cfg { _bbce = append ( _bbce , _dfee ) ; } ; } ; if len ( _bbce ) == 0 { continue ;
} ; _cebd := _edgg ( _bbce ) ; _fece [ _dcdgd ] = _cebd ; if _bcag { _be . Printf ( "\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a" , _dcdgd , _cebd ) ; } ; } ; return _fece ; } ; func _efcg ( _gcgc * wordBag , _gbbe * textWord , _gcda float64 ) bool { return _gbbe . Llx < _gcgc . Urx + _gcda && _gcgc . Llx - _gcda < _gbbe . Urx ;
} ; const _feef = 1.0 / 1000.0 ; func ( _bcff paraList ) extractTables ( _bcbgb [ ] gridTiling ) paraList { if _bcag { _ff . Log . Debug ( "\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _bcff ) ) ;
} ; if len ( _bcff ) < _gfec { return _bcff ; } ; _efde := _bcff . findTables ( _bcbgb ) ; if _bcag { _ff . Log . Info ( "c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d" , len ( _efde ) ) ;
for _gebc , _ddbe := range _efde { _ddbe . log ( _be . Sprintf ( "c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064" , _gebc ) ) ; } ; } ; return _bcff . applyTables ( _efde ) ; } ;
2022-04-27 00:10:33 +00:00
2022-06-06 22:48:24 +00:00
// String returns a description of `k`.
func ( _gcbae rulingKind ) String ( ) string { _dace , _abcdg := _bddd [ _gcbae ] ; if ! _abcdg { return _be . Sprintf ( "\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064" , _gcbae ) ; } ; return _dace ; } ; func ( _fcfb * textTable ) reduceTiling ( _bfad gridTiling , _ffcef float64 ) * textTable { _cdca := make ( [ ] int , 0 , _fcfb . _eabcaa ) ;
_bffc := make ( [ ] int , 0 , _fcfb . _aage ) ; _befed := _bfad . _dgcdc ; _bceaa := _bfad . _dgbfg ; for _fcdf := 0 ; _fcdf < _fcfb . _eabcaa ; _fcdf ++ { _efaf := _fcdf > 0 && _f . Abs ( _bceaa [ _fcdf - 1 ] - _bceaa [ _fcdf ] ) < _ffcef && _fcfb . emptyCompositeRow ( _fcdf ) ; if ! _efaf { _cdca = append ( _cdca , _fcdf ) ;
} ; } ; for _cgggg := 0 ; _cgggg < _fcfb . _aage ; _cgggg ++ { _cddfg := _cgggg < _fcfb . _aage - 1 && _f . Abs ( _befed [ _cgggg + 1 ] - _befed [ _cgggg ] ) < _ffcef && _fcfb . emptyCompositeColumn ( _cgggg ) ; if ! _cddfg { _bffc = append ( _bffc , _cgggg ) ; } ; } ; if len ( _cdca ) == _fcfb . _eabcaa && len ( _bffc ) == _fcfb . _aage { return _fcfb ;
} ; _aebfd := textTable { _efea : _fcfb . _efea , _aage : len ( _bffc ) , _eabcaa : len ( _cdca ) , _ebgbb : make ( map [ uint64 ] compositeCell , len ( _bffc ) * len ( _cdca ) ) } ; if _bcag { _ff . Log . Info ( "\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064" , _fcfb . _aage , _fcfb . _eabcaa , len ( _bffc ) , len ( _cdca ) ) ;
_ff . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076" , _bffc ) ; _ff . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076" , _cdca ) ; } ; for _bbcea , _bcfd := range _cdca { for _ceabd , _ccged := range _bffc { _fgfdc , _fdbfa := _fcfb . getComposite ( _ccged , _bcfd ) ;
if len ( _fgfdc ) == 0 { continue ; } ; if _bcag { _be . Printf ( "\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n" , _ceabd , _bbcea , _ccged , _bcfd , _ggece ( _fgfdc . merge ( ) . text ( ) , 50 ) ) ; } ; _aebfd . putComposite ( _ceabd , _bbcea , _fgfdc , _fdbfa ) ;
} ; } ; return & _aebfd ; } ; func _bcfgd ( _dcef , _baced int ) int { if _dcef > _baced { return _dcef ; } ; return _baced ; } ; func ( _eagc * shapesState ) devicePoint ( _baac , _fggdc float64 ) _de . Point { _caaac := _eagc . _gfaf . Mult ( _eagc . _gcgd ) ; _baac , _fggdc = _caaac . Transform ( _baac , _fggdc ) ;
return _de . NewPoint ( _baac , _fggdc ) ; } ; func _ecgf ( _aggbg [ ] _ca . PdfObject ) ( _ecbca , _agecc float64 , _acfae error ) { if len ( _aggbg ) != 2 { return 0 , 0 , _be . Errorf ( "\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064" , len ( _aggbg ) ) ;
} ; _cfcb , _acfae := _ca . GetNumbersAsFloat ( _aggbg ) ; if _acfae != nil { return 0 , 0 , _acfae ; } ; return _cfcb [ 0 ] , _cfcb [ 1 ] , nil ; } ; type fontEntry struct { _afgf * _ee . PdfFont ; _eea int64 ; } ; func ( _gfgc paraList ) eventNeighbours ( _edeff [ ] event ) map [ * textPara ] [ ] int { _ef . Slice ( _edeff , func ( _edbc , _eeede int ) bool { _dbadf , _adbg := _edeff [ _edbc ] , _edeff [ _eeede ] ;
_acce , _cgfd := _dbadf . _ebgdd , _adbg . _ebgdd ; if _acce != _cgfd { return _acce < _cgfd ; } ; if _dbadf . _ddfe != _adbg . _ddfe { return _dbadf . _ddfe ; } ; return _edbc < _eeede ; } ) ; _caagd := make ( map [ int ] intSet ) ; _gdgg := make ( intSet ) ; for _ , _gafe := range _edeff { if _gafe . _ddfe { _caagd [ _gafe . _egca ] = make ( intSet ) ;
for _baade := range _gdgg { if _baade != _gafe . _egca { _caagd [ _gafe . _egca ] . add ( _baade ) ; _caagd [ _baade ] . add ( _gafe . _egca ) ; } ; } ; _gdgg . add ( _gafe . _egca ) ; } else { _gdgg . del ( _gafe . _egca ) ; } ; } ; _geced := map [ * textPara ] [ ] int { } ; for _abad , _eadab := range _caagd { _aafa := _gfgc [ _abad ] ;
if len ( _eadab ) == 0 { _geced [ _aafa ] = nil ; continue ; } ; _dgec := make ( [ ] int , len ( _eadab ) ) ; _aece := 0 ; for _gedf := range _eadab { _dgec [ _aece ] = _gedf ; _aece ++ ; } ; _geced [ _aafa ] = _dgec ; } ; return _geced ; } ; func ( _ceadd * shapesState ) newSubPath ( ) { _ceadd . clearPath ( ) ;
if _gcga { _ff . Log . Info ( "\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073" , _ceadd ) ; } ; } ; func ( _cab * PageText ) computeViews ( ) { var _dfge rulingList ; if _ebggd { _baad := _adeeb ( _cab . _dccc ) ; _dfge = append ( _dfge , _baad ... ) ;
} ; if _degd { _fdde := _bgbcf ( _cab . _aacb ) ; _dfge = append ( _dfge , _fdde ... ) ; } ; _dfge , _bafd := _dfge . toTilings ( ) ; var _cbg paraList ; _fafa := len ( _cab . _fcb ) ; for _agdb := 0 ; _agdb < 360 && _fafa > 0 ; _agdb += 90 { _efd := make ( [ ] * textMark , 0 , len ( _cab . _fcb ) - _fafa ) ;
for _ , _dbbf := range _cab . _fcb { if _dbbf . _bdaa == _agdb { _efd = append ( _efd , _dbbf ) ; } ; } ; if len ( _efd ) > 0 { _aab := _ebdb ( _efd , _cab . _dda , _dfge , _bafd ) ; _cbg = append ( _cbg , _aab ... ) ; _fafa -= len ( _efd ) ; } ; } ; _eccd := new ( _cf . Buffer ) ; _cbg . writeText ( _eccd ) ;
_cab . _daaf = _eccd . String ( ) ; _cab . _gdbg = _cbg . toTextMarks ( ) ; _cab . _fdacg = _cbg . tables ( ) ; if _bcag { _ff . Log . Info ( "\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064" , len ( _cab . _fdacg ) ) ;
} ; } ; func _gefg ( _eabaed [ ] TextMark , _dagaa * int , _agfd TextMark ) [ ] TextMark { _agfd . Offset = * _dagaa ; _eabaed = append ( _eabaed , _agfd ) ; * _dagaa += len ( _agfd . Text ) ; return _eabaed ; } ; type textResult struct { _bcc PageText ; _fgc int ; _cfd int ; } ;
2022-04-27 00:10:33 +00:00
2022-06-06 22:48:24 +00:00
// Append appends `mark` to the mark array.
func ( _efdb * TextMarkArray ) Append ( mark TextMark ) { _efdb . _beaa = append ( _efdb . _beaa , mark ) } ; func _dege ( _dbcbg , _fdfb _ee . PdfRectangle ) bool { return _fdfb . Llx <= _dbcbg . Urx && _dbcbg . Llx <= _fdfb . Urx ; } ;
2022-04-27 00:10:33 +00:00
// Elements returns the TextMarks in `ma`.
2022-06-06 22:48:24 +00:00
func ( _cadfg * TextMarkArray ) Elements ( ) [ ] TextMark { return _cadfg . _beaa } ; func ( _gege * wordBag ) scanBand ( _decd string , _abbd * wordBag , _cebf func ( _cca * wordBag , _cgdf * textWord ) bool , _dcfa , _bdfb , _gbd float64 , _cbgd , _dbae bool ) int { _cegb := _abbd . _adbbf ;
var _bgca map [ int ] map [ * textWord ] struct { } ; if ! _cbgd { _bgca = _gege . makeRemovals ( ) ; } ; _bafdd := _bafe * _cegb ; _fbcea := 0 ; for _ , _cbac := range _gege . depthBand ( _dcfa - _bafdd , _bdfb + _bafdd ) { if len ( _gege . _aceg [ _cbac ] ) == 0 { continue ; } ; for _ , _dgeg := range _gege . _aceg [ _cbac ] { if ! ( _dcfa - _bafdd <= _dgeg . _aagef && _dgeg . _aagef <= _bdfb + _bafdd ) { continue ;
} ; if ! _cebf ( _abbd , _dgeg ) { continue ; } ; _dcac := 2.0 * _f . Abs ( _dgeg . _eedb - _abbd . _adbbf ) / ( _dgeg . _eedb + _abbd . _adbbf ) ; _ggac := _f . Max ( _dgeg . _eedb / _abbd . _adbbf , _abbd . _adbbf / _dgeg . _eedb ) ; _gdfb := _f . Min ( _dcac , _ggac ) ; if _gbd > 0 && _gdfb > _gbd { continue ;
} ; if _abbd . blocked ( _dgeg ) { continue ; } ; if ! _cbgd { _abbd . pullWord ( _dgeg , _cbac , _bgca ) ; } ; _fbcea ++ ; if ! _dbae { if _dgeg . _aagef < _dcfa { _dcfa = _dgeg . _aagef ; } ; if _dgeg . _aagef > _bdfb { _bdfb = _dgeg . _aagef ; } ; } ; if _cbgd { break ; } ; } ; } ; if ! _cbgd { _gege . applyRemovals ( _bgca ) ;
} ; return _fbcea ; } ; const ( _bgcb = 1.0e-6 ; _efdg = 1.0e-4 ; _eacf = 10 ; _fdbf = 6 ; _bafe = 0.5 ; _acff = 0.12 ; _dagf = 0.19 ; _dgfg = 0.04 ; _ebec = 0.04 ; _dddf = 1.0 ; _gfcf = 0.04 ; _bgbf = 0.4 ; _eecb = 0.7 ; _effd = 1.0 ; _debad = 0.1 ; _cgb = 1.4 ; _fgee = 0.46 ; _edeb = 0.02 ; _gbbc = 0.2 ; _fabde = 0.5 ;
_cfaf = 4 ; _fdab = 4.0 ; _gfec = 6 ; _befcc = 0.3 ; _bce = 0.01 ; _gdgbd = 0.02 ; _abed = 2 ; _gcad = 2 ; _ffbcf = 500 ; _acaa = 4.0 ; _dbeb = 4.0 ; _acad = 0.05 ; _cbgf = 0.1 ; _cbfg = 2.0 ; _fbfc = 2.0 ; _ebcf = 1.5 ; _faag = 3.0 ; _cece = 0.25 ; ) ; func ( _bgee * wordBag ) allWords ( ) [ ] * textWord { var _gcbd [ ] * textWord ;
for _ , _bacf := range _bgee . _aceg { _gcbd = append ( _gcbd , _bacf ... ) ; } ; return _gcbd ; } ; func ( _bagf * stateStack ) empty ( ) bool { return len ( * _bagf ) == 0 } ;
2022-04-27 00:10:33 +00:00
2022-06-06 22:48:24 +00:00
// String returns a description of `v`.
func ( _cafg * ruling ) String ( ) string { if _cafg . _cgac == _fbdff { return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047" ; } ; _bded , _dbaee := "\u0078" , "\u0079" ; if _cafg . _cgac == _dfbe { _bded , _dbaee = "\u0079" , "\u0078" ; } ; _dfcd := "" ; if _cafg . _gaeb != 0.0 { _dfcd = _be . Sprintf ( " \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066" , _cafg . _gaeb ) ;
} ; return _be . Sprintf ( "\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073" , _cafg . _cgac , _bded , _cafg . _facf , _dbaee , _cafg . _fgbfa , _cafg . _ebeb , _cafg . _ebeb - _cafg . _fgbfa , _cafg . _bggf , _cafg . Color , _dfcd ) ;
} ;
2022-04-27 00:10:33 +00:00
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
2022-06-06 22:48:24 +00:00
type RenderMode int ; func _acaf ( _bcac string ) bool { if _e . RuneCountInString ( _bcac ) < _cfaf { return false ; } ; _bbee , _fcfa := _e . DecodeLastRuneInString ( _bcac ) ; if _fcfa <= 0 || ! _g . Is ( _g . Hyphen , _bbee ) { return false ; } ; _bbee , _fcfa = _e . DecodeLastRuneInString ( _bcac [ : len ( _bcac ) - _fcfa ] ) ;
return _fcfa > 0 && ! _g . IsSpace ( _bbee ) ; } ; func _efdf ( _eaec , _febc _ee . PdfRectangle ) bool { return _eaec . Llx <= _febc . Llx && _febc . Urx <= _eaec . Urx && _eaec . Lly <= _febc . Lly && _febc . Ury <= _eaec . Ury ; } ; var ( _addg = map [ rune ] string { 0x0060 : "\u0300" , 0x02CB : "\u0300" , 0x0027 : "\u0301" , 0x00B4 : "\u0301" , 0x02B9 : "\u0301" , 0x02CA : "\u0301" , 0x005E : "\u0302" , 0x02C6 : "\u0302" , 0x007E : "\u0303" , 0x02DC : "\u0303" , 0x00AF : "\u0304" , 0x02C9 : "\u0304" , 0x02D8 : "\u0306" , 0x02D9 : "\u0307" , 0x00A8 : "\u0308" , 0x00B0 : "\u030a" , 0x02DA : "\u030a" , 0x02BA : "\u030b" , 0x02DD : "\u030b" , 0x02C7 : "\u030c" , 0x02C8 : "\u030d" , 0x0022 : "\u030e" , 0x02BB : "\u0312" , 0x02BC : "\u0313" , 0x0486 : "\u0313" , 0x055A : "\u0313" , 0x02BD : "\u0314" , 0x0485 : "\u0314" , 0x0559 : "\u0314" , 0x02D4 : "\u031d" , 0x02D5 : "\u031e" , 0x02D6 : "\u031f" , 0x02D7 : "\u0320" , 0x02B2 : "\u0321" , 0x00B8 : "\u0327" , 0x02CC : "\u0329" , 0x02B7 : "\u032b" , 0x02CD : "\u0331" , 0x005F : "\u0332" , 0x204E : "\u0359" } ;
) ; func _gdag ( _aebfe , _ceac _de . Point , _afgg _ga . Color ) ( * ruling , bool ) { _geac := lineRuling { _aagg : _aebfe , _aafd : _ceac , _fbga : _gebfb ( _aebfe , _ceac ) , Color : _afgg } ; if _geac . _fbga == _fbdff { return nil , false ; } ; return _geac . asRuling ( ) ; } ; func _ccag ( _ccdgc , _ffdf bounded ) float64 { return _agfg ( _ccdgc ) - _agfg ( _ffdf ) } ;
func _bccdg ( _dadd , _dagc float64 ) string { _bcdf := ! _dcfga ( _dadd - _dagc ) ; if _bcdf { return "\u000a" ; } ; return "\u0020" ; } ; func _ebbgg ( _dafac map [ float64 ] gridTile ) [ ] float64 { _aagb := make ( [ ] float64 , 0 , len ( _dafac ) ) ; for _ffgdc := range _dafac { _aagb = append ( _aagb , _ffgdc ) ;
} ; _ef . Float64s ( _aagb ) ; return _aagb ; } ; func ( _dfa * imageExtractContext ) extractInlineImage ( _beac * _bd . ContentStreamInlineImage , _cc _bd . GraphicsState , _bgc * _ee . PdfPageResources ) error { _daa , _gg := _beac . ToImage ( _bgc ) ; if _gg != nil { return _gg ;
} ; _fdf , _gg := _beac . GetColorSpace ( _bgc ) ; if _gg != nil { return _gg ; } ; if _fdf == nil { _fdf = _ee . NewPdfColorspaceDeviceGray ( ) ; } ; _db , _gg := _fdf . ImageToRGB ( * _daa ) ; if _gg != nil { return _gg ; } ; _dcee := ImageMark { Image : & _db , Width : _cc . CTM . ScalingFactorX ( ) , Height : _cc . CTM . ScalingFactorY ( ) , Angle : _cc . CTM . Angle ( ) } ;
_dcee . X , _dcee . Y = _cc . CTM . Translation ( ) ; _dfa . _eee = append ( _dfa . _eee , _dcee ) ; _dfa . _fd ++ ; return nil ; } ; func ( _gde * imageExtractContext ) extractFormImages ( _eg * _ca . PdfObjectName , _ag _bd . GraphicsState , _fgb * _ee . PdfPageResources ) error { _gff , _gded := _fgb . GetXObjectFormByName ( * _eg ) ;
if _gded != nil { return _gded ; } ; if _gff == nil { return nil ; } ; _adc , _gded := _gff . GetContentStream ( ) ; if _gded != nil { return _gded ; } ; _aec := _gff . Resources ; if _aec == nil { _aec = _fgb ; } ; _gded = _gde . extractContentStreamImages ( string ( _adc ) , _aec ) ; if _gded != nil { return _gded ;
} ; _gde . _cgg ++ ; return nil ; } ; func _beafe ( _gfeg [ ] * textWord , _gfgd * textWord ) [ ] * textWord { for _bdbfb , _feegb := range _gfeg { if _feegb == _gfgd { return _cgbed ( _gfeg , _bdbfb ) ; } ; } ; _ff . Log . Error ( "\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073" , _gfgd ) ;
return nil ; } ; func ( _gbcd * wordBag ) depthBand ( _dbaa , _daga float64 ) [ ] int { if len ( _gbcd . _aceg ) == 0 { return nil ; } ; return _gbcd . depthRange ( _gbcd . getDepthIdx ( _dbaa ) , _gbcd . getDepthIdx ( _daga ) ) ; } ; func ( _dgdd * wordBag ) depthIndexes ( ) [ ] int { if len ( _dgdd . _aceg ) == 0 { return nil ;
} ; _dfea := make ( [ ] int , len ( _dgdd . _aceg ) ) ; _fcbg := 0 ; for _dfcg := range _dgdd . _aceg { _dfea [ _fcbg ] = _dfcg ; _fcbg ++ ; } ; _ef . Ints ( _dfea ) ; return _dfea ; } ; func _dea ( _cgdc * wordBag , _fbcf int ) * textLine { _fedg := _cgdc . firstWord ( _fbcf ) ; _bddgc := textLine { PdfRectangle : _fedg . PdfRectangle , _caccd : _fedg . _eedb , _decg : _fedg . _aagef } ;
_bddgc . pullWord ( _cgdc , _fedg , _fbcf ) ; return & _bddgc ; } ; type pathSection struct { _fbdc [ ] * subpath ; _ga . Color ; } ; func ( _ddegb * textPara ) writeCellText ( _afab _d . Writer ) { for _gdbb , _cbbd := range _ddegb . _cecgd { _aefc := _cbbd . text ( ) ; _ffgc := _bcfgf && _cbbd . endsInHyphen ( ) && _gdbb != len ( _ddegb . _cecgd ) - 1 ;
if _ffgc { _aefc = _bgdcd ( _aefc ) ; } ; _afab . Write ( [ ] byte ( _aefc ) ) ; if ! ( _ffgc || _gdbb == len ( _ddegb . _cecgd ) - 1 ) { _afab . Write ( [ ] byte ( _bccdg ( _cbbd . _decg , _ddegb . _cecgd [ _gdbb + 1 ] . _decg ) ) ) ; } ; } ; } ; func ( _aea * textObject ) getFillColor ( ) _ga . Color { return _ccfa ( _aea . _bcgf . ColorspaceNonStroking , _aea . _bcgf . ColorNonStroking ) ;
} ; func ( _defb rectRuling ) asRuling ( ) ( * ruling , bool ) { _dbdf := ruling { _cgac : _defb . _bbbf , Color : _defb . Color , _bggf : _cgbe } ; switch _defb . _bbbf { case _fafbf : _dbdf . _facf = 0.5 * ( _defb . Llx + _defb . Urx ) ; _dbdf . _fgbfa = _defb . Lly ; _dbdf . _ebeb = _defb . Ury ;
_ccgb , _fgfd := _defb . checkWidth ( _defb . Llx , _defb . Urx ) ; if ! _fgfd { if _fbgb { _ff . Log . Error ( "\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076" , _defb ) ;
} ; return nil , false ; } ; _dbdf . _gaeb = _ccgb ; case _dfbe : _dbdf . _facf = 0.5 * ( _defb . Lly + _defb . Ury ) ; _dbdf . _fgbfa = _defb . Llx ; _dbdf . _ebeb = _defb . Urx ; _dddc , _beed := _defb . checkWidth ( _defb . Lly , _defb . Ury ) ; if ! _beed { if _fbgb { _ff . Log . Error ( "\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076" , _defb ) ;
} ; return nil , false ; } ; _dbdf . _gaeb = _dddc ; default : _ff . Log . Error ( "\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064" , _defb . _bbbf ) ; return nil , false ; } ; return & _dbdf , true ; } ; func _ccfa ( _bcfe _ee . PdfColorspace , _fcgcg _ee . PdfColor ) _ga . Color { if _bcfe == nil || _fcgcg == nil { return _ga . Black ;
} ; _eebd , _bgbe := _bcfe . ColorToRGB ( _fcgcg ) ; if _bgbe != nil { _ff . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073" , _fcgcg , _bcfe , _bgbe ) ;
return _ga . Black ; } ; _gcfde , _egdgf := _eebd . ( * _ee . PdfColorDeviceRGB ) ; if ! _egdgf { _ff . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076" , _eebd ) ;
return _ga . Black ; } ; return _ga . NRGBA { R : uint8 ( _gcfde . R ( ) * 255 ) , G : uint8 ( _gcfde . G ( ) * 255 ) , B : uint8 ( _gcfde . B ( ) * 255 ) , A : uint8 ( 255 ) } ; } ; func _beafa ( _fabdg , _dddff _de . Point ) bool { return _fabdg . X == _dddff . X && _fabdg . Y == _dddff . Y } ; func ( _bdca * textTable ) getRight ( ) paraList { _cfeb := make ( paraList , _bdca . _eabcaa ) ;
for _gecdd := 0 ; _gecdd < _bdca . _eabcaa ; _gecdd ++ { _ggcag := _bdca . get ( _bdca . _aage - 1 , _gecdd ) . _cacae ; if _ggcag . taken ( ) { return nil ; } ; _cfeb [ _gecdd ] = _ggcag ; } ; for _eacag := 0 ; _eacag < _bdca . _eabcaa - 1 ; _eacag ++ { if _cfeb [ _eacag ] . _fdec != _cfeb [ _eacag + 1 ] { return nil ;
} ; } ; return _cfeb ; } ; func ( _febf rulingList ) merge ( ) * ruling { _efge := _febf [ 0 ] . _facf ; _fedgab := _febf [ 0 ] . _fgbfa ; _dceg := _febf [ 0 ] . _ebeb ; for _ , _fabdd := range _febf [ 1 : ] { _efge += _fabdd . _facf ; if _fabdd . _fgbfa < _fedgab { _fedgab = _fabdd . _fgbfa ; } ; if _fabdd . _ebeb > _dceg { _dceg = _fabdd . _ebeb ;
} ; } ; _baegd := & ruling { _cgac : _febf [ 0 ] . _cgac , _bggf : _febf [ 0 ] . _bggf , Color : _febf [ 0 ] . Color , _facf : _efge / float64 ( len ( _febf ) ) , _fgbfa : _fedgab , _ebeb : _dceg } ; if _edcb { _ff . Log . Info ( "\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073" , len ( _febf ) , _baegd ) ;
for _cfafd , _gfef := range _febf { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _cfafd , _gfef ) ; } ; } ; return _baegd ; } ; func ( _eeea * wordBag ) makeRemovals ( ) map [ int ] map [ * textWord ] struct { } { _ffc := make ( map [ int ] map [ * textWord ] struct { } , len ( _eeea . _aceg ) ) ;
for _dddg := range _eeea . _aceg { _ffc [ _dddg ] = make ( map [ * textWord ] struct { } ) ; } ; return _ffc ; } ; func _fbde ( _ecdf [ ] float64 , _agecd , _ddba float64 ) [ ] float64 { _eecdg , _efggg := _agecd , _ddba ; if _efggg < _eecdg { _eecdg , _efggg = _efggg , _eecdg ; } ; _dfeg := make ( [ ] float64 , 0 , len ( _ecdf ) + 2 ) ;
_dfeg = append ( _dfeg , _agecd ) ; for _ , _aede := range _ecdf { if _aede <= _eecdg { continue ; } else if _aede >= _efggg { break ; } ; _dfeg = append ( _dfeg , _aede ) ; } ; _dfeg = append ( _dfeg , _ddba ) ; return _dfeg ; } ; func ( _ddgf * wordBag ) removeWord ( _feee * textWord , _gdgb int ) { _aaf := _ddgf . _aceg [ _gdgb ] ;
_aaf = _beafe ( _aaf , _feee ) ; if len ( _aaf ) == 0 { delete ( _ddgf . _aceg , _gdgb ) ; } else { _ddgf . _aceg [ _gdgb ] = _aaf ; } ; } ; type cachedImage struct { _efe * _ee . Image ; _cda _ee . PdfColorspace ; } ;
2022-04-27 00:10:33 +00:00
2022-06-06 22:48:24 +00:00
// String returns a string describing the current state of the textState stack.
func ( _dbcbe * stateStack ) String ( ) string { _fgbf := [ ] string { _be . Sprintf ( "\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064" , len ( * _dbcbe ) ) } ; for _acbc , _ffbc := range * _dbcbe { _ddcab := "\u003c\u006e\u0069l\u003e" ;
if _ffbc != nil { _ddcab = _ffbc . String ( ) ; } ; _fgbf = append ( _fgbf , _be . Sprintf ( "\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073" , _acbc , _ddcab ) ) ; } ; return _df . Join ( _fgbf , "\u000a" ) ; } ; func ( _dadb paraList ) readBefore ( _fcad [ ] int , _gaac , _bfb int ) bool { _cdbf , _adefa := _dadb [ _gaac ] , _dadb [ _bfb ] ;
if _efae ( _cdbf , _adefa ) && _cdbf . Lly > _adefa . Lly { return true ; } ; if ! ( _cdbf . _dbfe . Urx < _adefa . _dbfe . Llx ) { return false ; } ; _edf , _gbe := _cdbf . Lly , _adefa . Lly ; if _edf > _gbe { _gbe , _edf = _edf , _gbe ; } ; _cagg := _f . Max ( _cdbf . _dbfe . Llx , _adefa . _dbfe . Llx ) ;
_dffe := _f . Min ( _cdbf . _dbfe . Urx , _adefa . _dbfe . Urx ) ; _dddd := _dadb . llyRange ( _fcad , _edf , _gbe ) ; for _ , _bbge := range _dddd { if _bbge == _gaac || _bbge == _bfb { continue ; } ; _ddcg := _dadb [ _bbge ] ; if _ddcg . _dbfe . Llx <= _dffe && _cagg <= _ddcg . _dbfe . Urx { return false ;
} ; } ; return true ; } ; func ( _ebge * textObject ) setTextLeading ( _befc float64 ) { if _ebge == nil { return ; } ; _ebge . _ecb . _bgbc = _befc ; } ; func ( _cbba * ruling ) encloses ( _dffb , _accgb float64 ) bool { return _cbba . _fgbfa - _cbfg <= _dffb && _accgb <= _cbba . _ebeb + _cbfg ;
} ; func ( _fdcf paraList ) inTile ( _eeedb gridTile ) paraList { var _dcddf paraList ; for _ , _cgbcd := range _fdcf { if _eeedb . contains ( _cgbcd . PdfRectangle ) { _dcddf = append ( _dcddf , _cgbcd ) ; } ; } ; if _bcag { _be . Printf ( "\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n" , _eeedb , len ( _dcddf ) ) ;
for _eccgca , _dagea := range _dcddf { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _eccgca , _dagea ) ; } ; _be . Println ( "" ) ; } ; return _dcddf ; } ; func ( _fgce * textLine ) endsInHyphen ( ) bool { _geeb := _fgce . _eaab [ len ( _fgce . _eaab ) - 1 ] ; _gade := _geeb . _bfdfd ;
_bcge , _acde := _e . DecodeLastRuneInString ( _gade ) ; if _acde <= 0 || ! _g . Is ( _g . Hyphen , _bcge ) { return false ; } ; if _geeb . _adacg && _acaf ( _gade ) { return true ; } ; return _acaf ( _fgce . text ( ) ) ; } ; type bounded interface { bbox ( ) _ee . PdfRectangle } ; func ( _accc paraList ) topoOrder ( ) [ ] int { if _fgga { _ff . Log . Info ( "\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a" ) ;
} ; _acdf := len ( _accc ) ; _efee := make ( [ ] bool , _acdf ) ; _dbfd := make ( [ ] int , 0 , _acdf ) ; _bgedc := _accc . llyOrdering ( ) ; var _cgaa func ( _feecc int ) ; _cgaa = func ( _afadd int ) { _efee [ _afadd ] = true ; for _gdbee := 0 ; _gdbee < _acdf ; _gdbee ++ { if ! _efee [ _gdbee ] { if _accc . readBefore ( _bgedc , _afadd , _gdbee ) { _cgaa ( _gdbee ) ;
} ; } ; } ; _dbfd = append ( _dbfd , _afadd ) ; } ; for _eeeec := 0 ; _eeeec < _acdf ; _eeeec ++ { if ! _efee [ _eeeec ] { _cgaa ( _eeeec ) ; } ; } ; return _dbac ( _dbfd ) ; } ; type markKind int ; func ( _fcbb * wordBag ) blocked ( _aae * textWord ) bool { if _aae . Urx < _fcbb . Llx { _gfe := _gbee ( _aae . PdfRectangle ) ;
_adda := _beb ( _fcbb . PdfRectangle ) ; if _fcbb . _degg . blocks ( _gfe , _adda ) { if _beab { _ff . Log . Info ( "\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073" , _aae , _fcbb ) ; } ; return true ; } ; } else if _fcbb . Urx < _aae . Llx { _gdfc := _gbee ( _fcbb . PdfRectangle ) ;
_ebda := _beb ( _aae . PdfRectangle ) ; if _fcbb . _degg . blocks ( _gdfc , _ebda ) { if _beab { _ff . Log . Info ( "b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s" , _aae , _fcbb ) ; } ; return true ; } ; } ; if _aae . Ury < _fcbb . Lly { _eabae := _egfd ( _aae . PdfRectangle ) ;
_gfag := _cdge ( _fcbb . PdfRectangle ) ; if _fcbb . _aegf . blocks ( _eabae , _gfag ) { if _beab { _ff . Log . Info ( "\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073" , _aae , _fcbb ) ; } ; return true ; } ; } else if _fcbb . Ury < _aae . Lly { _gbf := _egfd ( _fcbb . PdfRectangle ) ;
_dfag := _cdge ( _aae . PdfRectangle ) ; if _fcbb . _aegf . blocks ( _gbf , _dfag ) { if _beab { _ff . Log . Info ( "b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s" , _aae , _fcbb ) ; } ; return true ; } ; } ; return false ; } ; func ( _efed lineRuling ) xMean ( ) float64 { return 0.5 * ( _efed . _aagg . X + _efed . _aafd . X ) } ;
2022-04-27 00:10:33 +00:00
// String returns a string describing `ma`.
2022-06-06 22:48:24 +00:00
func ( _bdb TextMarkArray ) String ( ) string { _ccb := len ( _bdb . _beaa ) ; if _ccb == 0 { return "\u0045\u004d\u0050T\u0059" ; } ; _fgea := _bdb . _beaa [ 0 ] ; _abae := _bdb . _beaa [ _ccb - 1 ] ; return _be . Sprintf ( "\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d" , _ccb , _fgea , _abae ) ;
} ; func ( _agcf * wordBag ) pullWord ( _bdfbb * textWord , _bgag int , _dbfb map [ int ] map [ * textWord ] struct { } ) { _agcf . PdfRectangle = _bgcf ( _agcf . PdfRectangle , _bdfbb . PdfRectangle ) ; if _bdfbb . _eedb > _agcf . _adbbf { _agcf . _adbbf = _bdfbb . _eedb ; } ; _agcf . _aceg [ _bgag ] = append ( _agcf . _aceg [ _bgag ] , _bdfbb ) ;
_dbfb [ _bgag ] [ _bdfbb ] = struct { } { } ; } ; func ( _aaefc intSet ) add ( _cdefb int ) { _aaefc [ _cdefb ] = struct { } { } } ; func _dfefd ( _fgbag [ ] TextMark , _gbaf * int ) [ ] TextMark { _gfbd := _fgbag [ len ( _fgbag ) - 1 ] ; _ggdg := [ ] rune ( _gfbd . Text ) ; if len ( _ggdg ) == 1 { _fgbag = _fgbag [ : len ( _fgbag ) - 1 ] ;
_badfd := _fgbag [ len ( _fgbag ) - 1 ] ; * _gbaf = _badfd . Offset + len ( _badfd . Text ) ; } else { _bgdc := _bgdcd ( _gfbd . Text ) ; * _gbaf += len ( _bgdc ) - len ( _gfbd . Text ) ; _gfbd . Text = _bgdc ; } ; return _fgbag ; } ; func ( _gefffe rulingList ) blocks ( _ddgc , _edec * ruling ) bool { if _ddgc . _fgbfa > _edec . _ebeb || _edec . _fgbfa > _ddgc . _ebeb { return false ;
} ; _beaff := _f . Max ( _ddgc . _fgbfa , _edec . _fgbfa ) ; _fcab := _f . Min ( _ddgc . _ebeb , _edec . _ebeb ) ; if _ddgc . _facf > _edec . _facf { _ddgc , _edec = _edec , _ddgc ; } ; for _ , _ecf := range _gefffe { if _ddgc . _facf <= _ecf . _facf + _fbfc && _ecf . _facf <= _edec . _facf + _fbfc && _ecf . _fgbfa <= _fcab && _beaff <= _ecf . _ebeb { return true ;
} ; } ; return false ; } ; func ( _cddgd rulingList ) toGrids ( ) [ ] rulingList { if _dgac { _ff . Log . Info ( "t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073" , _cddgd ) ; } ; _faeb := _cddgd . intersections ( ) ; if _dgac { _ff . Log . Info ( "\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020" , len ( _cddgd ) , len ( _faeb ) ) ;
for _ , _efdc := range _aacc ( _faeb ) { _be . Printf ( "\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n" , _efdc , _faeb [ _efdc ] ) ; } ; } ; _caga := make ( map [ int ] intSet , len ( _cddgd ) ) ; for _ddegbe := range _cddgd { _cefc := _cddgd . connections ( _faeb , _ddegbe ) ; if len ( _cefc ) > 0 { _caga [ _ddegbe ] = _cefc ;
} ; } ; if _dgac { _ff . Log . Info ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064" , len ( _caga ) ) ; for _ , _abgfg := range _aacc ( _caga ) { _be . Printf ( "\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n" , _abgfg , _caga [ _abgfg ] ) ;
} ; } ; _cfaa := _ddec ( len ( _cddgd ) , func ( _cdcf , _abdca int ) bool { _edbdc , _aefcf := len ( _caga [ _cdcf ] ) , len ( _caga [ _abdca ] ) ; if _edbdc != _aefcf { return _edbdc > _aefcf ; } ; return _cddgd . comp ( _cdcf , _abdca ) ; } ) ; if _dgac { _ff . Log . Info ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076" , _cfaa ) ;
} ; _geab := [ ] [ ] int { { _cfaa [ 0 ] } } ; _gfbb : for _ , _abefc := range _cfaa [ 1 : ] { for _caed , _ggfcc := range _geab { for _ , _fgdc := range _ggfcc { if _caga [ _fgdc ] . has ( _abefc ) { _geab [ _caed ] = append ( _ggfcc , _abefc ) ; continue _gfbb ; } ; } ; } ; _geab = append ( _geab , [ ] int { _abefc } ) ;
} ; if _dgac { _ff . Log . Info ( "\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076" , _geab ) ; } ; _ef . SliceStable ( _geab , func ( _ecbc , _beccg int ) bool { return len ( _geab [ _ecbc ] ) > len ( _geab [ _beccg ] ) } ) ; for _ , _edeg := range _geab { _ef . Slice ( _edeg , func ( _ffbd , _ebgf int ) bool { return _cddgd . comp ( _edeg [ _ffbd ] , _edeg [ _ebgf ] ) } ) ;
} ; _agade := make ( [ ] rulingList , len ( _geab ) ) ; for _cdgc , _fdbbc := range _geab { _dfdg := make ( rulingList , len ( _fdbbc ) ) ; for _bafc , _gccgd := range _fdbbc { _dfdg [ _bafc ] = _cddgd [ _gccgd ] ; } ; _agade [ _cdgc ] = _dfdg ; } ; if _dgac { _ff . Log . Info ( "\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076" , _agade ) ;
} ; var _abba [ ] rulingList ; for _ , _gdca := range _agade { if _fgec , _cecc := _gdca . isActualGrid ( ) ; _cecc { _gdca = _fgec ; _gdca = _gdca . snapToGroups ( ) ; _abba = append ( _abba , _gdca ) ; } ; } ; if _dgac { _dgced ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073" , _abba ) ;
_ff . Log . Info ( "\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064" , len ( _agade ) , len ( _abba ) ) ; } ; return _abba ; } ; func ( _gbcg * textTable ) compositeColCorridors ( ) map [ int ] [ ] float64 { _cbcc := make ( map [ int ] [ ] float64 , _gbcg . _aage ) ;
if _bcag { _ff . Log . Info ( "\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020" , _gbcg . _aage ) ; } ; for _cfbe := 0 ; _cfbe < _gbcg . _aage ; _cfbe ++ { _cbcc [ _cfbe ] = nil ;
} ; return _cbcc ; } ;
2021-01-07 14:20:10 +00:00
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
//
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
type TextMark struct {
2020-11-23 22:15:56 +00:00
2020-12-06 13:03:03 +00:00
// Text is the extracted text.
Text string ;
2020-11-23 22:15:56 +00:00
2021-01-07 14:20:10 +00:00
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
2020-11-23 22:15:56 +00:00
2021-01-07 14:20:10 +00:00
// BBox is the bounding box of the text.
2022-06-06 22:48:24 +00:00
BBox _ee . PdfRectangle ;
2021-01-07 14:20:10 +00:00
// Font is the font the text was drawn with.
2022-06-06 22:48:24 +00:00
Font * _ee . PdfFont ;
2021-01-07 14:20:10 +00:00
// FontSize is the font size the text was drawn with.
FontSize float64 ;
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2022-06-06 22:48:24 +00:00
FillColor _ga . Color ;
2021-01-07 14:20:10 +00:00
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2022-06-06 22:48:24 +00:00
StrokeColor _ga . Color ;
2021-01-07 14:20:10 +00:00
// Orientation is the text orientation
2022-06-06 22:48:24 +00:00
Orientation int ; } ; func _faab ( _fdcg string ) ( string , bool ) { _gcef := [ ] rune ( _fdcg ) ; if len ( _gcef ) != 1 { return "" , false ; } ; _eage , _dfgcf := _addg [ _gcef [ 0 ] ] ; return _eage , _dfgcf ; } ;
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
func ( _ddgg * PageText ) ApplyArea ( bbox _ee . PdfRectangle ) { _egd := make ( [ ] * textMark , 0 , len ( _ddgg . _fcb ) ) ; for _ , _ddac := range _ddgg . _fcb { if _gddc ( _ddac . bbox ( ) , bbox ) { _egd = append ( _egd , _ddac ) ; } ; } ; var _caf paraList ; _gcfb := len ( _egd ) ; for _ffa := 0 ;
_ffa < 360 && _gcfb > 0 ; _ffa += 90 { _feaf := make ( [ ] * textMark , 0 , len ( _egd ) - _gcfb ) ; for _ , _cadf := range _egd { if _cadf . _bdaa == _ffa { _feaf = append ( _feaf , _cadf ) ; } ; } ; if len ( _feaf ) > 0 { _gfad := _ebdb ( _feaf , _ddgg . _dda , nil , nil ) ; _caf = append ( _caf , _gfad ... ) ;
_gcfb -= len ( _feaf ) ; } ; } ; _fbeb := new ( _cf . Buffer ) ; _caf . writeText ( _fbeb ) ; _ddgg . _daaf = _fbeb . String ( ) ; _ddgg . _gdbg = _caf . toTextMarks ( ) ; _ddgg . _fdacg = _caf . tables ( ) ; } ; func ( _dgbf rulingList ) bbox ( ) _ee . PdfRectangle { var _bfed _ee . PdfRectangle ;
if len ( _dgbf ) == 0 { _ff . Log . Error ( "r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073" ) ; return _ee . PdfRectangle { } ; } ; if _dgbf [ 0 ] . _cgac == _dfbe { _bfed . Llx , _bfed . Urx = _dgbf . secMinMax ( ) ;
_bfed . Lly , _bfed . Ury = _dgbf . primMinMax ( ) ; } else { _bfed . Llx , _bfed . Urx = _dgbf . primMinMax ( ) ; _bfed . Lly , _bfed . Ury = _dgbf . secMinMax ( ) ; } ; return _bfed ; } ;
// String returns a human readable description of `path`.
func ( _dgcc * subpath ) String ( ) string { _agbce := _dgcc . _eaeg ; _dggg := len ( _agbce ) ; if _dggg <= 5 { return _be . Sprintf ( "\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f" , _dggg , _agbce ) ; } ; return _be . Sprintf ( "\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f" , _dggg , _agbce [ 0 ] , _agbce [ 1 ] , _agbce [ _dggg - 1 ] ) ;
} ; func ( _edea * Extractor ) extractPageText ( _dfg string , _beag * _ee . PdfPageResources , _eddc _de . Matrix , _afe int ) ( * PageText , int , int , error ) { _ff . Log . Trace ( "\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d" , _afe ) ;
_gdd := & PageText { _dda : _edea . _eab } ; _bgeg := _eabc ( _edea . _eab ) ; var _daad stateStack ; _gdb := _gabb ( _edea , _beag , _bd . GraphicsState { } , & _bgeg , & _daad ) ; _bag := shapesState { _gfaf : _eddc , _gcgd : _de . IdentityMatrix ( ) , _agbc : _gdb } ; var _feb bool ; if _afe > _cbf { _bga := _c . New ( "\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077" ) ;
_ff . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076" , _afe , _bga ) ;
return _gdd , _bgeg . _acc , _bgeg . _deeb , _bga ; } ; _aa := _bd . NewContentStreamParser ( _dfg ) ; _fdaa , _cec := _aa . Parse ( ) ; if _cec != nil { _ff . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _cec ) ;
return _gdd , _bgeg . _acc , _bgeg . _deeb , _cec ; } ; _ada := _bd . NewContentStreamProcessor ( * _fdaa ) ; _ada . AddHandler ( _bd . HandlerConditionEnumAllOperands , "" , func ( _afg * _bd . ContentStreamOperation , _ceb _bd . GraphicsState , _gfa * _ee . PdfPageResources ) error { _bdd := _afg . Operand ;
if _bfc { _ff . Log . Info ( "\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s" , _afg ) ; } ; switch _bdd { case "\u0071" : if _gcga { _ff . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _bag . _gcgd ) ; } ; _daad . push ( & _bgeg ) ; case "\u0051" : if ! _daad . empty ( ) { _bgeg = * _daad . pop ( ) ;
} ; _bag . _gcgd = _ceb . CTM ; if _gcga { _ff . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _bag . _gcgd ) ; } ; case "\u0042\u0054" : if _feb { _ff . Log . Debug ( "\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074" ) ;
_gdd . _fcb = append ( _gdd . _fcb , _gdb . _fbf ... ) ; } ; _feb = true ; _faac := _ceb ; _faac . CTM = _eddc . Mult ( _faac . CTM ) ; _gdb = _gabb ( _edea , _gfa , _faac , & _bgeg , & _daad ) ; _bag . _agbc = _gdb ; case "\u0045\u0054" : if ! _feb { _ff . Log . Debug ( "\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074" ) ;
} ; _feb = false ; _gdd . _fcb = append ( _gdd . _fcb , _gdb . _fbf ... ) ; _gdb . reset ( ) ; case "\u0054\u002a" : _gdb . nextLine ( ) ; case "\u0054\u0064" : if _fac , _bcb := _gdb . checkOp ( _afg , 2 , true ) ; ! _fac { _ff . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bcb ) ;
return _bcb ; } ; _ddc , _dca , _dba := _ecgf ( _afg . Params ) ; if _dba != nil { return _dba ; } ; _gdb . moveText ( _ddc , _dca ) ; case "\u0054\u0044" : if _dfca , _acea := _gdb . checkOp ( _afg , 2 , true ) ; ! _dfca { _ff . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _acea ) ;
return _acea ; } ; _gbgd , _fde , _gec := _ecgf ( _afg . Params ) ; if _gec != nil { _ff . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gec ) ; return _gec ; } ; _gdb . moveTextSetLeading ( _gbgd , _fde ) ; case "\u0054\u006a" : if _fab , _fdgg := _gdb . checkOp ( _afg , 1 , true ) ;
! _fab { _ff . Log . Debug ( "\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076" , _afg , _fdgg ) ; return _fdgg ; } ; _bgb , _bab := _ca . GetStringBytes ( _afg . Params [ 0 ] ) ; if ! _bab { _ff . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064" , _afg ) ;
return _ca . ErrTypeError ; } ; return _gdb . showText ( _bgb ) ; case "\u0054\u004a" : if _faf , _ggd := _gdb . checkOp ( _afg , 1 , true ) ; ! _faf { _ff . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ggd ) ; return _ggd ;
} ; _cff , _gab := _ca . GetArray ( _afg . Params [ 0 ] ) ; if ! _gab { _ff . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _afg ) ;
return _cec ; } ; return _gdb . showTextAdjusted ( _cff ) ; case "\u0027" : if _dcf , _agg := _gdb . checkOp ( _afg , 1 , true ) ; ! _dcf { _ff . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _agg ) ; return _agg ; } ; _gee , _dece := _ca . GetStringBytes ( _afg . Params [ 0 ] ) ;
if ! _dece { _ff . Log . Debug ( "\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064" , _afg ) ; return _ca . ErrTypeError ; } ; _gdb . nextLine ( ) ; return _gdb . showText ( _gee ) ;
case "\u0022" : if _bgcg , _afad := _gdb . checkOp ( _afg , 3 , true ) ; ! _bgcg { _ff . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _afad ) ; return _afad ; } ; _cbc , _efeg , _cbcg := _ecgf ( _afg . Params [ : 2 ] ) ; if _cbcg != nil { return _cbcg ;
} ; _eec , _dfe := _ca . GetStringBytes ( _afg . Params [ 2 ] ) ; if ! _dfe { _ff . Log . Debug ( "\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064" , _afg ) ;
return _ca . ErrTypeError ; } ; _gdb . setCharSpacing ( _cbc ) ; _gdb . setWordSpacing ( _efeg ) ; _gdb . nextLine ( ) ; return _gdb . showText ( _eec ) ; case "\u0054\u004c" : _dcg , _ebg := _fgge ( _afg ) ; if _ebg != nil { _ff . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ebg ) ;
return _ebg ; } ; _gdb . setTextLeading ( _dcg ) ; case "\u0054\u0063" : _gba , _bgfb := _fgge ( _afg ) ; if _bgfb != nil { _ff . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bgfb ) ; return _bgfb ; } ; _gdb . setCharSpacing ( _gba ) ;
case "\u0054\u0066" : if _ddcf , _bcf := _gdb . checkOp ( _afg , 2 , true ) ; ! _ddcf { _ff . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bcf ) ; return _bcf ; } ; _cce , _fcc := _ca . GetNameVal ( _afg . Params [ 0 ] ) ; if ! _fcc { _ff . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064" , _afg ) ;
return _ca . ErrTypeError ; } ; _deca , _fdac := _ca . GetNumberAsFloat ( _afg . Params [ 1 ] ) ; if ! _fcc { _ff . Log . Debug ( "\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _afg , _fdac ) ;
return _fdac ; } ; _fdac = _gdb . setFont ( _cce , _deca ) ; _gdb . _ecc = _da . Is ( _fdac , _ca . ErrNotSupported ) ; if _fdac != nil && ! _gdb . _ecc { return _fdac ; } ; case "\u0054\u006d" : if _gce , _ec := _gdb . checkOp ( _afg , 6 , true ) ; ! _gce { _ff . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ec ) ;
return _ec ; } ; _eaa , _dgc := _ca . GetNumbersAsFloat ( _afg . Params ) ; if _dgc != nil { _ff . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _dgc ) ; return _dgc ; } ; _gdb . setTextMatrix ( _eaa ) ; case "\u0054\u0072" : if _baa , _gcg := _gdb . checkOp ( _afg , 1 , true ) ;
! _baa { _ff . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gcg ) ; return _gcg ; } ; _cae , _bed := _ca . GetIntVal ( _afg . Params [ 0 ] ) ; if ! _bed { _ff . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _afg ) ;
return _ca . ErrTypeError ; } ; _gdb . setTextRenderMode ( _cae ) ; case "\u0054\u0073" : if _ead , _adf := _gdb . checkOp ( _afg , 1 , true ) ; ! _ead { _ff . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _adf ) ; return _adf ;
} ; _fgd , _bae := _ca . GetNumberAsFloat ( _afg . Params [ 0 ] ) ; if _bae != nil { _ff . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bae ) ; return _bae ; } ; _gdb . setTextRise ( _fgd ) ; case "\u0054\u0077" : if _fdacc , _fdc := _gdb . checkOp ( _afg , 1 , true ) ;
! _fdacc { _ff . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fdc ) ; return _fdc ; } ; _ceg , _cdd := _ca . GetNumberAsFloat ( _afg . Params [ 0 ] ) ; if _cdd != nil { _ff . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _cdd ) ;
return _cdd ; } ; _gdb . setWordSpacing ( _ceg ) ; case "\u0054\u007a" : if _feba , _gfac := _gdb . checkOp ( _afg , 1 , true ) ; ! _feba { _ff . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gfac ) ; return _gfac ; } ; _cddf , _geg := _ca . GetNumberAsFloat ( _afg . Params [ 0 ] ) ;
if _geg != nil { _ff . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _geg ) ; return _geg ; } ; _gdb . setHorizScaling ( _cddf ) ; case "\u0063\u006d" : _bag . _gcgd = _ceb . CTM ; if _bag . _gcgd . Singular ( ) { _ddd := _de . IdentityMatrix ( ) . Translate ( _bag . _gcgd . Translation ( ) ) ;
_ff . Log . Debug ( "S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s" , _bag . _gcgd , _ddd ) ; _bag . _gcgd = _ddd ; } ; if _gcga { _ff . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _bag . _gcgd ) ; } ; case "\u006d" : if len ( _afg . Params ) != 2 { _ff . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" , _fg ) ;
return nil ; } ; _gge , _fabf := _ca . GetNumbersAsFloat ( _afg . Params ) ; if _fabf != nil { return _fabf ; } ; _bag . moveTo ( _gge [ 0 ] , _gge [ 1 ] ) ; case "\u006c" : if len ( _afg . Params ) != 2 { _ff . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" , _fg ) ;
return nil ; } ; _dfaf , _eece := _ca . GetNumbersAsFloat ( _afg . Params ) ; if _eece != nil { return _eece ; } ; _bag . lineTo ( _dfaf [ 0 ] , _dfaf [ 1 ] ) ; case "\u0063" : if len ( _afg . Params ) != 6 { return _fg ; } ; _adea , _cggg := _ca . GetNumbersAsFloat ( _afg . Params ) ; if _cggg != nil { return _cggg ;
} ; _ff . Log . Debug ( "\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f" , _adea ) ; _bag . cubicTo ( _adea [ 0 ] , _adea [ 1 ] , _adea [ 2 ] , _adea [ 3 ] , _adea [ 4 ] , _adea [ 5 ] ) ; case "\u0076" , "\u0079" : if len ( _afg . Params ) != 4 { return _fg ;
} ; _afd , _fbe := _ca . GetNumbersAsFloat ( _afg . Params ) ; if _fbe != nil { return _fbe ; } ; _ff . Log . Debug ( "\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f" , _afd ) ; _bag . quadraticTo ( _afd [ 0 ] , _afd [ 1 ] , _afd [ 2 ] , _afd [ 3 ] ) ;
case "\u0068" : _bag . closePath ( ) ; case "\u0072\u0065" : if len ( _afg . Params ) != 4 { return _fg ; } ; _bbc , _ffg := _ca . GetNumbersAsFloat ( _afg . Params ) ; if _ffg != nil { return _ffg ; } ; _bag . drawRectangle ( _bbc [ 0 ] , _bbc [ 1 ] , _bbc [ 2 ] , _bbc [ 3 ] ) ; _bag . closePath ( ) ;
case "\u0053" : _bag . stroke ( & _gdd . _dccc ) ; _bag . clearPath ( ) ; case "\u0073" : _bag . closePath ( ) ; _bag . stroke ( & _gdd . _dccc ) ; _bag . clearPath ( ) ; case "\u0046" : _bag . fill ( & _gdd . _aacb ) ; _bag . clearPath ( ) ; case "\u0066" , "\u0066\u002a" : _bag . closePath ( ) ; _bag . fill ( & _gdd . _aacb ) ;
_bag . clearPath ( ) ; case "\u0042" , "\u0042\u002a" : _bag . fill ( & _gdd . _aacb ) ; _bag . stroke ( & _gdd . _dccc ) ; _bag . clearPath ( ) ; case "\u0062" , "\u0062\u002a" : _bag . closePath ( ) ; _bag . fill ( & _gdd . _aacb ) ; _bag . stroke ( & _gdd . _dccc ) ; _bag . clearPath ( ) ; case "\u006e" : _bag . clearPath ( ) ;
case "\u0044\u006f" : if len ( _afg . Params ) == 0 { _ff . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e" , _afg . Params ) ;
return _ca . ErrRangeError ; } ; _adee , _fgbc := _ca . GetName ( _afg . Params [ 0 ] ) ; if ! _fgbc { _ff . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e" , _afg . Params [ 0 ] ) ;
return _ca . ErrTypeError ; } ; _ , _aaa := _gfa . GetXObjectByName ( * _adee ) ; if _aaa != _ee . XObjectTypeForm { break ; } ; _ccd , _fgbc := _edea . _bb [ _adee . String ( ) ] ; if ! _fgbc { _cea , _bdf := _gfa . GetXObjectFormByName ( * _adee ) ; if _bdf != nil { _ff . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _bdf ) ;
return _bdf ; } ; _caa , _bdf := _cea . GetContentStream ( ) ; if _bdf != nil { _ff . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _bdf ) ; return _bdf ; } ; _fdgb := _cea . Resources ; if _fdgb == nil { _fdgb = _gfa ; } ; _dcce , _geb , _fdb , _bdf := _edea . extractPageText ( string ( _caa ) , _fdgb , _eddc . Mult ( _ceb . CTM ) , _afe + 1 ) ;
if _bdf != nil { _ff . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _bdf ) ; return _bdf ; } ; _ccd = textResult { * _dcce , _geb , _fdb } ; _edea . _bb [ _adee . String ( ) ] = _ccd ; } ; _bag . _gcgd = _ceb . CTM ; if _gcga { _ff . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _bag . _gcgd ) ;
} ; _gdd . _fcb = append ( _gdd . _fcb , _ccd . _bcc . _fcb ... ) ; _gdd . _dccc = append ( _gdd . _dccc , _ccd . _bcc . _dccc ... ) ; _gdd . _aacb = append ( _gdd . _aacb , _ccd . _bcc . _aacb ... ) ; _bgeg . _acc += _ccd . _fgc ; _bgeg . _deeb += _ccd . _cfd ; case "\u0072\u0067" , "\u0067" , "\u006b" , "\u0063\u0073" , "\u0073\u0063" , "\u0073\u0063\u006e" : _gdb . _bcgf . ColorspaceNonStroking = _ceb . ColorspaceNonStroking ;
_gdb . _bcgf . ColorNonStroking = _ceb . ColorNonStroking ; case "\u0052\u0047" , "\u0047" , "\u004b" , "\u0043\u0053" , "\u0053\u0043" , "\u0053\u0043\u004e" : _gdb . _bcgf . ColorspaceStroking = _ceb . ColorspaceStroking ; _gdb . _bcgf . ColorStroking = _ceb . ColorStroking ;
} ; return nil ; } ) ; _cec = _ada . Process ( _beag ) ; return _gdd , _bgeg . _acc , _bgeg . _deeb , _cec ; } ; func _bcd ( _ecca _de . Matrix ) _de . Point { _caec , _gffd := _ecca . Translation ( ) ; return _de . Point { X : _caec , Y : _gffd } ; } ; func ( _gfefd paraList ) applyTables ( _bbaeb [ ] * textTable ) paraList { var _gcae paraList ;
for _ , _ffgee := range _bbaeb { _gcae = append ( _gcae , _ffgee . newTablePara ( ) ) ; } ; for _ , _cacb := range _gfefd { if _cacb . _ebad { continue ; } ; _gcae = append ( _gcae , _cacb ) ; } ; return _gcae ; } ;
2022-03-13 12:41:53 +00:00
2022-04-27 00:10:33 +00:00
// String returns a human readable description of `s`.
2022-06-06 22:48:24 +00:00
func ( _fafd intSet ) String ( ) string { var _fedcf [ ] int ; for _gebeb := range _fafd { if _fafd . has ( _gebeb ) { _fedcf = append ( _fedcf , _gebeb ) ; } ; } ; _ef . Ints ( _fedcf ) ; return _be . Sprintf ( "\u0025\u002b\u0076" , _fedcf ) ; } ; type gridTile struct { _ee . PdfRectangle ;
_efab , _gdge , _gaaf , _geaa bool ; } ; func _efb ( _dgce [ ] * wordBag ) [ ] * wordBag { if len ( _dgce ) <= 1 { return _dgce ; } ; if _dagg { _ff . Log . Info ( "\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a" ) ; } ; _ef . Slice ( _dgce , func ( _ceged , _dbbc int ) bool { _bdbc , _cega := _dgce [ _ceged ] , _dgce [ _dbbc ] ;
_bgfa := _bdbc . Width ( ) * _bdbc . Height ( ) ; _bfdg := _cega . Width ( ) * _cega . Height ( ) ; if _bgfa != _bfdg { return _bgfa > _bfdg ; } ; if _bdbc . Height ( ) != _cega . Height ( ) { return _bdbc . Height ( ) > _cega . Height ( ) ; } ; return _ceged < _dbbc ; } ) ; var _gcdf [ ] * wordBag ;
_face := make ( intSet ) ; for _gcdfe := 0 ; _gcdfe < len ( _dgce ) ; _gcdfe ++ { if _face . has ( _gcdfe ) { continue ; } ; _bbfb := _dgce [ _gcdfe ] ; for _cfdf := _gcdfe + 1 ; _cfdf < len ( _dgce ) ; _cfdf ++ { if _face . has ( _gcdfe ) { continue ; } ; _fffa := _dgce [ _cfdf ] ; _dbaab := _bbfb . PdfRectangle ;
_dbaab . Llx -= _bbfb . _adbbf ; if _efdf ( _dbaab , _fffa . PdfRectangle ) { _bbfb . absorb ( _fffa ) ; _face . add ( _cfdf ) ; } ; } ; _gcdf = append ( _gcdf , _bbfb ) ; } ; if len ( _dgce ) != len ( _gcdf ) + len ( _face ) { _ff . Log . Error ( "\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064" , len ( _dgce ) , len ( _gcdf ) , len ( _face ) ) ;
} ; return _gcdf ; } ; func _cgccd ( _gbgc , _bcgee int ) uint64 { return uint64 ( _gbgc ) * 0x1000000 + uint64 ( _bcgee ) } ; func ( _bcbed * textWord ) toTextMarks ( _cfeg * int ) [ ] TextMark { var _fadfg [ ] TextMark ; for _ , _eddgd := range _bcbed . _gceff { _fadfg = _gefg ( _fadfg , _cfeg , _eddgd . ToTextMark ( ) ) ;
} ; return _fadfg ; } ; func ( _afbd paraList ) merge ( ) * textPara { _ff . Log . Trace ( "\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _afbd ) ) ;
if len ( _afbd ) == 0 { return nil ; } ; _afbd . sortReadingOrder ( ) ; _gcde := _afbd [ 0 ] . PdfRectangle ; _gbbg := _afbd [ 0 ] . _cecgd ; for _ , _gcadb := range _afbd [ 1 : ] { _gcde = _bgcf ( _gcde , _gcadb . PdfRectangle ) ; _gbbg = append ( _gbbg , _gcadb . _cecgd ... ) ; } ; return _dafa ( _gcde , _gbbg ) ;
} ; func ( _fga * stateStack ) top ( ) * textState { if _fga . empty ( ) { return nil ; } ; return ( * _fga ) [ _fga . size ( ) - 1 ] ; } ; func _fadf ( _adca , _bfbcg int ) int { if _adca < _bfbcg { return _adca ; } ; return _bfbcg ; } ; type rectRuling struct { _bbbf rulingKind ; _fcgd markKind ;
_ga . Color ; _ee . PdfRectangle ; } ; func ( _cgage rulingList ) primMinMax ( ) ( float64 , float64 ) { _cgdb , _fcag := _cgage [ 0 ] . _facf , _cgage [ 0 ] . _facf ; for _ , _gfdd := range _cgage [ 1 : ] { if _gfdd . _facf < _cgdb { _cgdb = _gfdd . _facf ; } else if _gfdd . _facf > _fcag { _fcag = _gfdd . _facf ;
} ; } ; return _cgdb , _fcag ; } ; func _gced ( _cfc [ ] * textWord , _gaba float64 , _eafb , _ebf rulingList ) * wordBag { _cge := _cgdg ( _cfc [ 0 ] , _gaba , _eafb , _ebf ) ; for _ , _dedg := range _cfc [ 1 : ] { _feac := _gfb ( _dedg . _aagef ) ; _cge . _aceg [ _feac ] = append ( _cge . _aceg [ _feac ] , _dedg ) ;
_cge . PdfRectangle = _bgcf ( _cge . PdfRectangle , _dedg . PdfRectangle ) ; } ; _cge . sort ( ) ; return _cge ; } ;
2022-03-13 12:41:53 +00:00
2022-06-06 22:48:24 +00:00
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
func ( _gabc PageText ) ToText ( ) string { return _gabc . Text ( ) } ; func ( _afdf * subpath ) isQuadrilateral ( ) bool { if len ( _afdf . _eaeg ) < 4 || len ( _afdf . _eaeg ) > 5 { return false ; } ; if len ( _afdf . _eaeg ) == 5 { _gbgf := _afdf . _eaeg [ 0 ] ; _eaaad := _afdf . _eaeg [ 4 ] ; if _gbgf . X != _eaaad . X || _gbgf . Y != _eaaad . Y { return false ;
} ; } ; return true ; } ; func ( _bde * textLine ) text ( ) string { var _ffdfc [ ] string ; for _ , _eddce := range _bde . _eaab { if _eddce . _adacg { _ffdfc = append ( _ffdfc , "\u0020" ) ; } ; _ffdfc = append ( _ffdfc , _eddce . _bfdfd ) ; } ; return _df . Join ( _ffdfc , "" ) ; } ; func ( _acbf * shapesState ) addPoint ( _ffd , _eccg float64 ) { _ecab := _acbf . establishSubpath ( ) ;
_dage := _acbf . devicePoint ( _ffd , _eccg ) ; if _ecab == nil { _acbf . _ccef = true ; _acbf . _ddcc = _dage ; } else { _ecab . add ( _dage ) ; } ; } ; func ( _dbfbf paraList ) sortReadingOrder ( ) { _ff . Log . Trace ( "\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _dbfbf ) ) ;
if len ( _dbfbf ) <= 1 { return ; } ; _dbfbf . computeEBBoxes ( ) ; _ef . Slice ( _dbfbf , func ( _dedc , _bbcf int ) bool { return _cbae ( _dbfbf [ _dedc ] , _dbfbf [ _bbcf ] ) <= 0 } ) ; _cbca := _dbfbf . topoOrder ( ) ; _dbfbf . reorder ( _cbca ) ; } ; func ( _dceb * shapesState ) lineTo ( _ffad , _bcfa float64 ) { if _gcga { _ff . Log . Info ( "\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066" , _ffad , _bcfa , _dceb . devicePoint ( _ffad , _bcfa ) ) ;
} ; _dceb . addPoint ( _ffad , _bcfa ) ; } ; func _gabb ( _agf * Extractor , _bccd * _ee . PdfPageResources , _cffe _bd . GraphicsState , _cac * textState , _gdg * stateStack ) * textObject { return & textObject { _decb : _agf , _bca : _bccd , _bcgf : _cffe , _accf : _gdg , _ecb : _cac , _geff : _de . IdentityMatrix ( ) , _abg : _de . IdentityMatrix ( ) } ;
} ; func ( _bcgecb * textTable ) putComposite ( _cefeb , _gdbce int , _gdcd paraList , _ceabe _ee . PdfRectangle ) { if len ( _gdcd ) == 0 { _ff . Log . Error ( "\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073" ) ;
return ; } ; _ggdb := compositeCell { PdfRectangle : _ceabe , paraList : _gdcd } ; if _bcag { _be . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a" , _cefeb , _gdbce , _ggdb . String ( ) ) ;
} ; _ggdb . updateBBox ( ) ; _bcgecb . _ebgbb [ _cgccd ( _cefeb , _gdbce ) ] = _ggdb ; } ; func _dcfga ( _bdcaa float64 ) bool { return _f . Abs ( _bdcaa ) < _bgcb } ; func ( _ggaea lineRuling ) yMean ( ) float64 { return 0.5 * ( _ggaea . _aagg . Y + _ggaea . _aafd . Y ) } ; const ( _bcfgf = true ;
_adfc = true ; _aagc = true ; _aecd = false ; _afae = false ; _dbe = 6 ; _ecdd = 3.0 ; _feeb = 200 ; _bcba = true ; _deee = true ; _ebggd = true ; _degd = true ; _ecge = false ; ) ; func ( _afgd * textObject ) moveLP ( _fbge , _fdd float64 ) { _afgd . _abg . Concat ( _de . NewMatrix ( 1 , 0 , 0 , 1 , _fbge , _fdd ) ) ;
_afgd . _geff = _afgd . _abg ; } ;
2022-02-05 21:34:53 +00:00
2022-06-06 22:48:24 +00:00
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
type ImageMark struct { Image * _ee . Image ;
2021-12-14 01:08:28 +00:00
2022-06-06 22:48:24 +00:00
// Dimensions of the image as displayed in the PDF.
Width float64 ; Height float64 ;
// Position of the image in PDF coordinates (lower left corner).
X float64 ; Y float64 ;
// Angle in degrees, if rotated.
Angle float64 ; } ; type gridTiling struct { _ee . PdfRectangle ; _dgcdc [ ] float64 ; _dgbfg [ ] float64 ; _dcaac map [ float64 ] map [ float64 ] gridTile ; } ; type textMark struct { _ee . PdfRectangle ; _bdaa int ; _fdbb string ; _aad string ; _ggfc * _ee . PdfFont ; _ceba float64 ;
_eggg float64 ; _abd _de . Matrix ; _ddf _de . Point ; _ebdd _ee . PdfRectangle ; _eaee _ga . Color ; _fca _ga . Color ; } ; func ( _acbcd * wordBag ) empty ( _eafbc int ) bool { _ , _fag := _acbcd . _aceg [ _eafbc ] ; return ! _fag } ; func ( _bdcf * PageFonts ) extractPageResourcesToFont ( _fb * _ee . PdfPageResources ) error { _dc , _bf := _ca . GetDict ( _fb . Font ) ;
if ! _bf { return _c . New ( _ab ) ; } ; for _ , _dd := range _dc . Keys ( ) { var ( _dac = true ; _fgg [ ] byte ; _bbg string ; ) ; _ae , _dge := _fb . GetFontByName ( _dd ) ; if ! _dge { return _c . New ( _eac ) ; } ; _def , _bea := _ee . NewPdfFontFromPdfObject ( _ae ) ; if _bea != nil { return _bea ;
} ; _bc := _def . FontDescriptor ( ) ; _gfd := _def . FontDescriptor ( ) . FontName . String ( ) ; _gcd := _def . Subtype ( ) ; if _af ( _bdcf . Fonts , _gfd ) { continue ; } ; if len ( _def . ToUnicode ( ) ) == 0 { _dac = false ; } ; if _bc . FontFile != nil { if _gd , _gae := _ca . GetStream ( _bc . FontFile ) ;
_gae { _fgg , _bea = _ca . DecodeStream ( _gd ) ; if _bea != nil { return _bea ; } ; _bbg = _gfd + "\u002e\u0070\u0066\u0062" ; } ; } else if _bc . FontFile2 != nil { if _ac , _bbgd := _ca . GetStream ( _bc . FontFile2 ) ; _bbgd { _fgg , _bea = _ca . DecodeStream ( _ac ) ; if _bea != nil { return _bea ;
} ; _bbg = _gfd + "\u002e\u0074\u0074\u0066" ; } ; } else if _bc . FontFile3 != nil { if _cg , _bac := _ca . GetStream ( _bc . FontFile3 ) ; _bac { _fgg , _bea = _ca . DecodeStream ( _cg ) ; if _bea != nil { return _bea ; } ; _bbg = _gfd + "\u002e\u0063\u0066\u0066" ; } ; } ; if len ( _bbg ) < 1 { _ff . Log . Debug ( _ad ) ;
} ; _bbf := Font { FontName : _gfd , PdfFont : _def , IsCID : _def . IsCID ( ) , IsSimple : _def . IsSimple ( ) , ToUnicode : _dac , FontType : _gcd , FontData : _fgg , FontFileName : _bbg , FontDescriptor : _bc } ; _bdcf . Fonts = append ( _bdcf . Fonts , _bbf ) ; } ; return nil ; } ; func _bgdcd ( _gbea string ) string { _bdgg := [ ] rune ( _gbea ) ;
return string ( _bdgg [ : len ( _bdgg ) - 1 ] ) } ; const ( _fbdff rulingKind = iota ; _dfbe ; _fafbf ; ) ;
// String returns a description of `k`.
func ( _fgff markKind ) String ( ) string { _aedg , _dgcg := _bbfbab [ _fgff ] ; if ! _dgcg { return _be . Sprintf ( "\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064" , _fgff ) ; } ; return _aedg ; } ; func ( _feegc * textTable ) subdivide ( ) * textTable { _feegc . logComposite ( "\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e" ) ;
_fbfb := _feegc . compositeRowCorridors ( ) ; _ecfa := _feegc . compositeColCorridors ( ) ; if _bcag { _ff . Log . Info ( "\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073" , _beda ( _fbfb ) , _beda ( _ecfa ) ) ;
} ; if len ( _fbfb ) == 0 || len ( _ecfa ) == 0 { return _feegc ; } ; _aaeac ( _fbfb ) ; _aaeac ( _ecfa ) ; if _bcag { _ff . Log . Info ( "\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073" , _beda ( _fbfb ) , _beda ( _ecfa ) ) ;
} ; _bagga , _gbcb := _bfaa ( _feegc . _eabcaa , _fbfb ) ; _cdecd , _bacd := _bfaa ( _feegc . _aage , _ecfa ) ; _degce := make ( map [ uint64 ] * textPara , _bacd * _gbcb ) ; _cdac := & textTable { PdfRectangle : _feegc . PdfRectangle , _efea : _feegc . _efea , _eabcaa : _gbcb , _aage : _bacd , _dbfba : _degce } ;
if _bcag { _ff . Log . Info ( "\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a" + "\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a" + "\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a" + "\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a" + "\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076" , _feegc . _aage , _feegc . _eabcaa , _bacd , _gbcb , _beda ( _fbfb ) , _beda ( _ecfa ) , _bagga , _cdecd ) ;
} ; for _efdcf := 0 ; _efdcf < _feegc . _eabcaa ; _efdcf ++ { _cdba := _bagga [ _efdcf ] ; for _gbbeb := 0 ; _gbbeb < _feegc . _aage ; _gbbeb ++ { _dfbee := _cdecd [ _gbbeb ] ; if _bcag { _be . Printf ( "\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a" , _gbbeb , _efdcf , _dfbee , _cdba ) ;
} ; _cgbd , _geee := _feegc . _ebgbb [ _cgccd ( _gbbeb , _efdcf ) ] ; if ! _geee { continue ; } ; _bcbag := _cgbd . split ( _fbfb [ _efdcf ] , _ecfa [ _gbbeb ] ) ; for _ddgbd := 0 ; _ddgbd < _bcbag . _eabcaa ; _ddgbd ++ { for _dgafe := 0 ; _dgafe < _bcbag . _aage ; _dgafe ++ { _aefd := _bcbag . get ( _dgafe , _ddgbd ) ;
_cdac . put ( _dfbee + _dgafe , _cdba + _ddgbd , _aefd ) ; if _bcag { _be . Printf ( "\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a" , _dfbee + _dgafe , _cdba + _ddgbd , _aefd ) ; } ; } ; } ; } ; } ; return _cdac ; } ; func _beb ( _bbef _ee . PdfRectangle ) * ruling { return & ruling { _cgac : _fafbf , _facf : _bbef . Llx , _fgbfa : _bbef . Lly , _ebeb : _bbef . Ury } ;
} ; func ( _degc pathSection ) bbox ( ) _ee . PdfRectangle { _dfgc := _degc . _fbdc [ 0 ] . _eaeg [ 0 ] ; _gaeae := _ee . PdfRectangle { Llx : _dfgc . X , Urx : _dfgc . X , Lly : _dfgc . Y , Ury : _dfgc . Y } ; _gfcec := func ( _bfe _de . Point ) { if _bfe . X < _gaeae . Llx { _gaeae . Llx = _bfe . X ;
} else if _bfe . X > _gaeae . Urx { _gaeae . Urx = _bfe . X ; } ; if _bfe . Y < _gaeae . Lly { _gaeae . Lly = _bfe . Y ; } else if _bfe . Y > _gaeae . Ury { _gaeae . Ury = _bfe . Y ; } ; } ; for _ , _ggec := range _degc . _fbdc [ 0 ] . _eaeg [ 1 : ] { _gfcec ( _ggec ) ; } ; for _ , _ffdc := range _degc . _fbdc [ 1 : ] { for _ , _fbce := range _ffdc . _eaeg { _gfcec ( _fbce ) ;
} ; } ; return _gaeae ; } ; func ( _cdf * textObject ) getFontDirect ( _deba string ) ( * _ee . PdfFont , error ) { _abaa , _agc := _cdf . getFontDict ( _deba ) ; if _agc != nil { return nil , _agc ; } ; _eged , _agc := _ee . NewPdfFontFromPdfObject ( _abaa ) ; if _agc != nil { _ff . Log . Debug ( "\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _deba , _agc ) ;
} ; return _eged , _agc ; } ; func _aaeac ( _ggbf map [ int ] [ ] float64 ) { if len ( _ggbf ) <= 1 { return ; } ; _egae := _aegg ( _ggbf ) ; if _bcag { _ff . Log . Info ( "\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076" , _egae ) ; } ; var _eeca , _ggfda int ;
for _eeca , _ggfda = range _egae { if _ggbf [ _ggfda ] != nil { break ; } ; } ; for _abga , _gdgef := range _egae [ _eeca : ] { _dfbd := _ggbf [ _gdgef ] ; if _dfbd == nil { continue ; } ; if _bcag { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a" , _eeca + _abga , _ggfda , _gdgef ) ;
} ; _caaaff := _ggbf [ _gdgef ] ; if _caaaff [ len ( _caaaff ) - 1 ] > _dfbd [ 0 ] { _caaaff [ len ( _caaaff ) - 1 ] = _dfbd [ 0 ] ; _ggbf [ _ggfda ] = _caaaff ; } ; _ggfda = _gdgef ; } ; } ; func _dafa ( _acag _ee . PdfRectangle , _fgaf [ ] * textLine ) * textPara { return & textPara { PdfRectangle : _acag , _cecgd : _fgaf } ;
} ;
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
type TextTable struct { W , H int ; Cells [ ] [ ] TableCell ; } ; func _cdge ( _dafd _ee . PdfRectangle ) * ruling { return & ruling { _cgac : _dfbe , _facf : _dafd . Lly , _fgbfa : _dafd . Llx , _ebeb : _dafd . Urx } ; } ; func ( _egdd intSet ) has ( _dgbfb int ) bool { _ , _edfc := _egdd [ _dgbfb ] ;
return _edfc } ;
2021-12-14 01:08:28 +00:00
2022-04-27 00:10:33 +00:00
// TableCell is a cell in a TextTable.
type TableCell struct {
2021-12-14 01:08:28 +00:00
2022-04-27 00:10:33 +00:00
// Text is the extracted text.
Text string ;
2021-12-14 01:08:28 +00:00
2022-04-27 00:10:33 +00:00
// Marks returns the TextMarks corresponding to the text in Text.
2022-06-06 22:48:24 +00:00
Marks TextMarkArray ; } ; func ( _eba * textObject ) checkOp ( _afb * _bd . ContentStreamOperation , _gdc int , _cgca bool ) ( _bgea bool , _dfd error ) { if _eba == nil { var _aeg [ ] _ca . PdfObject ; if _gdc > 0 { _aeg = _afb . Params ; if len ( _aeg ) > _gdc { _aeg = _aeg [ : _gdc ] ;
} ; } ; _ff . Log . Debug ( "\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076" , _afb . Operand , _aeg ) ; } ; if _gdc >= 0 { if len ( _afb . Params ) != _gdc { if _cgca { _dfd = _c . New ( "\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et" ) ;
} ; _ff . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076" , _afb . Operand , _gdc , len ( _afb . Params ) , _afb . Params ) ;
return false , _dfd ; } ; } ; return true , nil ; } ; func ( _agda rulingList ) mergePrimary ( ) float64 { _afcd := _agda [ 0 ] . _facf ; for _ , _feag := range _agda [ 1 : ] { _afcd += _feag . _facf ; } ; return _afcd / float64 ( len ( _agda ) ) ; } ; func _cffd ( _gfcd , _dcbdc _ee . PdfRectangle ) bool { return _gfcd . Lly <= _dcbdc . Ury && _dcbdc . Lly <= _gfcd . Ury ;
} ; func _abac ( _bfgb , _dfdd float64 ) bool { return _bfgb / _f . Max ( _cbgf , _dfdd ) < _acad } ; func ( _dcb * textObject ) reset ( ) { _dcb . _geff = _de . IdentityMatrix ( ) ; _dcb . _abg = _de . IdentityMatrix ( ) ; _dcb . _fbf = nil ; } ; func ( _gdf * textObject ) showTextAdjusted ( _dacf * _ca . PdfObjectArray ) error { _badf := false ;
for _ , _bdff := range _dacf . Elements ( ) { switch _bdff . ( type ) { case * _ca . PdfObjectFloat , * _ca . PdfObjectInteger : _acec , _ddg := _ca . GetNumberAsFloat ( _bdff ) ; if _ddg != nil { _ff . Log . Debug ( "\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _bdff , _dacf ) ;
return _ddg ; } ; _cggd , _dbcb := - _acec * 0.001 * _gdf . _ecb . _ggda , 0.0 ; if _badf { _dbcb , _cggd = _cggd , _dbcb ; } ; _gdfd := _ebb ( _de . Point { X : _cggd , Y : _dbcb } ) ; _gdf . _geff . Concat ( _gdfd ) ; case * _ca . PdfObjectString : _dbg , _fgfg := _ca . GetStringBytes ( _bdff ) ;
if ! _fgfg { _ff . Log . Trace ( "s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _bdff , _dacf ) ;
return _ca . ErrTypeError ; } ; _gdf . renderText ( _dbg ) ; default : _ff . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _bdff , _dacf ) ;
return _ca . ErrTypeError ; } ; } ; return nil ; } ; func ( _gcf * stateStack ) push ( _ebc * textState ) { _bfd := * _ebc ; * _gcf = append ( * _gcf , & _bfd ) } ; func _edbd ( _aecg _de . Point ) * subpath { return & subpath { _eaeg : [ ] _de . Point { _aecg } } } ; func ( _cgcde * textWord ) appendMark ( _gdgea * textMark , _bfef _ee . PdfRectangle ) { _cgcde . _gceff = append ( _cgcde . _gceff , _gdgea ) ;
_cgcde . PdfRectangle = _bgcf ( _cgcde . PdfRectangle , _gdgea . PdfRectangle ) ; if _gdgea . _ceba > _cgcde . _eedb { _cgcde . _eedb = _gdgea . _ceba ; } ; _cgcde . _aagef = _bfef . Ury - _cgcde . PdfRectangle . Lly ; } ; type textState struct { _bace float64 ; _bbfa float64 ; _cfa float64 ;
_bgbc float64 ; _ggda float64 ; _ccc RenderMode ; _fefb float64 ; _fea * _ee . PdfFont ; _gbc _ee . PdfRectangle ; _acc int ; _deeb int ; } ; func _egfd ( _ebadc _ee . PdfRectangle ) * ruling { return & ruling { _cgac : _dfbe , _facf : _ebadc . Ury , _fgbfa : _ebadc . Llx , _ebeb : _ebadc . Urx } ;
} ; func ( _gbddb rulingList ) secMinMax ( ) ( float64 , float64 ) { _cedg , _cgdfe := _gbddb [ 0 ] . _fgbfa , _gbddb [ 0 ] . _ebeb ; for _ , _fcccd := range _gbddb [ 1 : ] { if _fcccd . _fgbfa < _cedg { _cedg = _fcccd . _fgbfa ; } ; if _fcccd . _ebeb > _cgdfe { _cgdfe = _fcccd . _ebeb ; } ; } ;
return _cedg , _cgdfe ; } ; func ( _cdef rulingList ) removeDuplicates ( ) rulingList { if len ( _cdef ) == 0 { return nil ; } ; _cdef . sort ( ) ; _eccdec := rulingList { _cdef [ 0 ] } ; for _ , _ebag := range _cdef [ 1 : ] { if _ebag . equals ( _eccdec [ len ( _eccdec ) - 1 ] ) { continue ; } ; _eccdec = append ( _eccdec , _ebag ) ;
} ; return _eccdec ; } ; func ( _eaadb paraList ) findTableGrid ( _efeac gridTiling ) ( * textTable , map [ * textPara ] struct { } ) { _gaff := len ( _efeac . _dgcdc ) ; _cdfcd := len ( _efeac . _dgbfg ) ; _gcbgae := textTable { _efea : true , _aage : _gaff , _eabcaa : _cdfcd , _dbfba : make ( map [ uint64 ] * textPara , _gaff * _cdfcd ) , _ebgbb : make ( map [ uint64 ] compositeCell , _gaff * _cdfcd ) } ;
_bebd := make ( map [ * textPara ] struct { } ) ; _fgdd := int ( ( 1.0 - _befcc ) * float64 ( _gaff * _cdfcd ) ) ; _gegc := 0 ; if _gfdb { _ff . Log . Info ( "\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064" , _gaff , _cdfcd ) ;
} ; for _fbfa , _bfggd := range _efeac . _dgbfg { _cffeg , _fedb := _efeac . _dcaac [ _bfggd ] ; if ! _fedb { continue ; } ; for _aeeb , _bgeec := range _efeac . _dgcdc { _dade , _cegf := _cffeg [ _bgeec ] ; if ! _cegf { continue ; } ; _efede := _eaadb . inTile ( _dade ) ; if len ( _efede ) == 0 { _gegc ++ ;
if _gegc > _fgdd { if _gfdb { _ff . Log . Info ( "\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064" , _gegc ) ; } ; return nil , nil ; } ; } else { _gcbgae . putComposite ( _aeeb , _fbfa , _efede , _dade . PdfRectangle ) ; for _ , _fage := range _efede { _bebd [ _fage ] = struct { } { } ;
} ; } ; } ; } ; _acdc := 0 ; for _afbe := 0 ; _afbe < _gaff ; _afbe ++ { _ebfdd := _gcbgae . get ( _afbe , 0 ) ; if _ebfdd == nil || ! _ebfdd . _gbdd { _acdc ++ ; } ; } ; if _acdc == 0 { if _gfdb { _ff . Log . Info ( "\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030" ) ; } ; return nil , nil ;
} ; _dbfec := _gcbgae . reduceTiling ( _efeac , _faag ) ; _dbfec = _dbfec . subdivide ( ) ; return _dbfec , _bebd ; } ; func ( _agbcfa * textTable ) computeBbox ( ) _ee . PdfRectangle { var _aacfd _ee . PdfRectangle ; _eefde := false ; for _bcea := 0 ; _bcea < _agbcfa . _eabcaa ; _bcea ++ { for _aeaf := 0 ;
_aeaf < _agbcfa . _aage ; _aeaf ++ { _dcde := _agbcfa . get ( _aeaf , _bcea ) ; if _dcde == nil { continue ; } ; if ! _eefde { _aacfd = _dcde . PdfRectangle ; _eefde = true ; } else { _aacfd = _bgcf ( _aacfd , _dcde . PdfRectangle ) ; } ; } ; } ; return _aacfd ; } ; func ( _cdda * textObject ) nextLine ( ) { _cdda . moveLP ( 0 , - _cdda . _ecb . _bgbc ) } ;
var _ge = false ; func ( _efac * textPara ) fontsize ( ) float64 { return _efac . _cecgd [ 0 ] . _caccd } ; type shapesState struct { _gcgd _de . Matrix ; _gfaf _de . Matrix ; _deff [ ] * subpath ; _ccef bool ; _ddcc _de . Point ; _agbc * textObject ; } ; const ( _afdb = false ; _ggfb = false ;
_bfc = false ; _gagc = false ; _gcga = false ; _gcff = false ; _abbdd = false ; _fgga = false ; _dagg = false ; _dfcfb = _dagg && true ; _cbd = _dfcfb && false ; _ced = _dagg && true ; _bcag = false ; _dcfc = _bcag && false ; _fbdcg = _bcag && true ; _dgac = false ; _acg = _dgac && false ;
_edcb = _dgac && false ; _gfdb = _dgac && true ; _fbgb = _dgac && false ; _beab = _dgac && false ; ) ; func ( _ggad rulingList ) sortStrict ( ) { _ef . Slice ( _ggad , func ( _dagd , _bccgf int ) bool { _fgafa , _eebba := _ggad [ _dagd ] , _ggad [ _bccgf ] ; _cbdc , _beacg := _fgafa . _cgac , _eebba . _cgac ;
if _cbdc != _beacg { return _cbdc > _beacg ; } ; _aedb , _gdcgd := _fgafa . _facf , _eebba . _facf ; if ! _dcfga ( _aedb - _gdcgd ) { return _aedb < _gdcgd ; } ; _aedb , _gdcgd = _fgafa . _fgbfa , _eebba . _fgbfa ; if _aedb != _gdcgd { return _aedb < _gdcgd ; } ; return _fgafa . _ebeb < _eebba . _ebeb ;
} ) ; } ; func ( _eag * shapesState ) clearPath ( ) { _eag . _deff = nil ; _eag . _ccef = false ; if _gcga { _ff . Log . Info ( "\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073" , _eag ) ; } ; } ; func ( _gfde * textLine ) bbox ( ) _ee . PdfRectangle { return _gfde . PdfRectangle } ;
func ( _ebfde rulingList ) isActualGrid ( ) ( rulingList , bool ) { _fdffe , _eaad := _ebfde . augmentGrid ( ) ; if ! ( len ( _fdffe ) >= _abed + 1 && len ( _eaad ) >= _gcad + 1 ) { if _dgac { _ff . Log . Info ( "\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064" , len ( _fdffe ) , len ( _eaad ) , _abed + 1 , _gcad + 1 ) ;
} ; return nil , false ; } ; if _dgac { _ff . Log . Info ( "\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074" , _ebfde , len ( _fdffe ) >= 2 , len ( _eaad ) >= 2 , len ( _fdffe ) >= 2 && len ( _eaad ) >= 2 ) ;
for _degea , _cfab := range _ebfde { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a" , _degea , _cfab ) ; } ; } ; if _ecge { _gffgc , _debe := _fdffe [ 0 ] , _fdffe [ len ( _fdffe ) - 1 ] ; _cfbc , _ebdc := _eaad [ 0 ] , _eaad [ len ( _eaad ) - 1 ] ; if ! ( _egcg ( _gffgc . _facf - _cfbc . _fgbfa ) && _egcg ( _debe . _facf - _cfbc . _ebeb ) && _egcg ( _cfbc . _facf - _gffgc . _ebeb ) && _egcg ( _ebdc . _facf - _gffgc . _fgbfa ) ) { if _dgac { _ff . Log . Info ( "\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073" , _gffgc , _debe , _cfbc , _ebdc ) ;
} ; return nil , false ; } ; } else { if ! _fdffe . aligned ( ) { if _edcb { _ff . Log . Info ( "i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064" , len ( _fdffe ) ) ;
} ; return nil , false ; } ; if ! _eaad . aligned ( ) { if _dgac { _ff . Log . Info ( "i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064" , len ( _eaad ) ) ;
} ; return nil , false ; } ; } ; _eaeca := append ( _fdffe , _eaad ... ) ; return _eaeca , true ; } ; func ( _cag * imageExtractContext ) processOperand ( _dce * _bd . ContentStreamOperation , _dfb _bd . GraphicsState , _fda * _ee . PdfPageResources ) error { if _dce . Operand == "\u0042\u0049" && len ( _dce . Params ) == 1 { _bfa , _fbc := _dce . Params [ 0 ] . ( * _bd . ContentStreamInlineImage ) ;
if ! _fbc { return nil ; } ; if _eed , _bee := _ca . GetBoolVal ( _bfa . ImageMask ) ; _bee { if _eed && ! _cag . _dfc . IncludeInlineStencilMasks { return nil ; } ; } ; return _cag . extractInlineImage ( _bfa , _dfb , _fda ) ; } else if _dce . Operand == "\u0044\u006f" && len ( _dce . Params ) == 1 { _aeb , _dec := _ca . GetName ( _dce . Params [ 0 ] ) ;
if ! _dec { _ff . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065" ) ; return _bdc ; } ; _ , _fe := _fda . GetXObjectByName ( * _aeb ) ; switch _fe { case _ee . XObjectTypeImage : return _cag . extractXObjectImage ( _aeb , _dfb , _fda ) ; case _ee . XObjectTypeForm : return _cag . extractFormImages ( _aeb , _dfb , _fda ) ;
} ; } ; return nil ; } ; func ( _ebba paraList ) findGridTables ( _gcfd [ ] gridTiling ) [ ] * textTable { if _bcag { _ff . Log . Info ( "\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073" , len ( _ebba ) ) ;
for _dfae , _eccdf := range _ebba { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _dfae , _eccdf ) ; } ; } ; var _eddf [ ] * textTable ; for _acdb , _cgff := range _gcfd { _ccdb , _ddaa := _ebba . findTableGrid ( _cgff ) ; if _ccdb != nil { _ccdb . log ( _be . Sprintf ( "\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064" , _acdb ) ) ;
_eddf = append ( _eddf , _ccdb ) ; _ccdb . markCells ( ) ; } ; for _cded := range _ddaa { _cded . _ebad = true ; } ; } ; if _bcag { _ff . Log . Info ( "\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s" , len ( _eddf ) ) ;
} ; return _eddf ; } ; func ( _ffgbd * subpath ) last ( ) _de . Point { return _ffgbd . _eaeg [ len ( _ffgbd . _eaeg ) - 1 ] } ; func ( _aaed * wordBag ) highestWord ( _bggc int , _aee , _ddeg float64 ) * textWord { for _ , _gegg := range _aaed . _aceg [ _bggc ] { if _aee <= _gegg . _aagef && _gegg . _aagef <= _ddeg { return _gegg ;
} ; } ; return nil ; } ; func ( _acbbd * textTable ) logComposite ( _gcaef string ) { if ! _bcag { return ; } ; _ff . Log . Info ( "\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073" , _acbbd . _aage , _acbbd . _eabcaa , _gcaef ) ; _be . Printf ( "\u0025\u0035\u0073 \u007c" , "" ) ;
for _decdc := 0 ; _decdc < _acbbd . _aage ; _decdc ++ { _be . Printf ( "\u0025\u0033\u0064 \u007c" , _decdc ) ; } ; _be . Println ( "" ) ; _be . Printf ( "\u0025\u0035\u0073 \u002b" , "" ) ; for _cbag := 0 ; _cbag < _acbbd . _aage ; _cbag ++ { _be . Printf ( "\u0025\u0033\u0073 \u002b" , "\u002d\u002d\u002d" ) ;
} ; _be . Println ( "" ) ; for _ebcc := 0 ; _ebcc < _acbbd . _eabcaa ; _ebcc ++ { _be . Printf ( "\u0025\u0035\u0064 \u007c" , _ebcc ) ; for _fafec := 0 ; _fafec < _acbbd . _aage ; _fafec ++ { _cace , _ := _acbbd . _ebgbb [ _cgccd ( _fafec , _ebcc ) ] . parasBBox ( ) ; _be . Printf ( "\u0025\u0033\u0064 \u007c" , len ( _cace ) ) ;
} ; _be . Println ( "" ) ; } ; _ff . Log . Info ( "\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073" , _acbbd . _aage , _acbbd . _eabcaa , _gcaef ) ; _be . Printf ( "\u0025\u0035\u0073 \u007c" , "" ) ; for _bbcc := 0 ; _bbcc < _acbbd . _aage ;
_bbcc ++ { _be . Printf ( "\u0025\u0031\u0032\u0064\u0020\u007c" , _bbcc ) ; } ; _be . Println ( "" ) ; _be . Printf ( "\u0025\u0035\u0073 \u002b" , "" ) ; for _agcfc := 0 ; _agcfc < _acbbd . _aage ; _agcfc ++ { _be . Print ( "\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b" ) ;
} ; _be . Println ( "" ) ; for _ccfd := 0 ; _ccfd < _acbbd . _eabcaa ; _ccfd ++ { _be . Printf ( "\u0025\u0035\u0064 \u007c" , _ccfd ) ; for _eefgc := 0 ; _eefgc < _acbbd . _aage ; _eefgc ++ { _edaf , _ := _acbbd . _ebgbb [ _cgccd ( _eefgc , _ccfd ) ] . parasBBox ( ) ; _ggag := "" ; _gebgc := _edaf . merge ( ) ;
if _gebgc != nil { _ggag = _gebgc . text ( ) ; } ; _ggag = _be . Sprintf ( "\u0025\u0071" , _ggece ( _ggag , 12 ) ) ; _ggag = _ggag [ 1 : len ( _ggag ) - 1 ] ; _be . Printf ( "\u0025\u0031\u0032\u0073\u0020\u007c" , _ggag ) ; } ; _be . Println ( "" ) ; } ; } ; func _beda ( _agfa map [ int ] [ ] float64 ) string { _gfca := _aegg ( _agfa ) ;
_bgdg := make ( [ ] string , len ( _agfa ) ) ; for _fegb , _gcbga := range _gfca { _bgdg [ _fegb ] = _be . Sprintf ( "\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066" , _gcbga , _agfa [ _gcbga ] ) ; } ; return _be . Sprintf ( "\u007b\u0025\u0073\u007d" , _df . Join ( _bgdg , "\u002c\u0020" ) ) ;
} ; func _fgge ( _adbb * _bd . ContentStreamOperation ) ( float64 , error ) { if len ( _adbb . Params ) != 1 { _agd := _c . New ( "\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et" ) ; _ff . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076" , _adbb . Operand , 1 , len ( _adbb . Params ) , _adbb . Params ) ;
return 0.0 , _agd ; } ; return _ca . GetNumberAsFloat ( _adbb . Params [ 0 ] ) ; } ; func ( _bdda * subpath ) close ( ) { if ! _beafa ( _bdda . _eaeg [ 0 ] , _bdda . last ( ) ) { _bdda . add ( _bdda . _eaeg [ 0 ] ) ; } ; _bdda . _feeg = true ; _bdda . removeDuplicates ( ) ; } ; func _ebdb ( _gebf [ ] * textMark , _bcaf _ee . PdfRectangle , _efbf rulingList , _cage [ ] gridTiling ) paraList { _ff . Log . Trace ( "\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066" , len ( _gebf ) , _bcaf ) ;
if len ( _gebf ) == 0 { return nil ; } ; _dgaa := _abcdc ( _gebf , _bcaf ) ; if len ( _dgaa ) == 0 { return nil ; } ; _efbf . log ( "\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065" ) ; _daf , _dfff := _efbf . vertsHorzs ( ) ; _bbcb := _gced ( _dgaa , _bcaf . Ury , _daf , _dfff ) ;
_efcd := _ggfcf ( _bbcb , _bcaf . Ury , _daf , _dfff ) ; _efcd = _efb ( _efcd ) ; _eddg := make ( paraList , 0 , len ( _efcd ) ) ; for _ , _bfeg := range _efcd { _aagcg := _bfeg . arrangeText ( ) ; if _aagcg != nil { _eddg = append ( _eddg , _aagcg ) ; } ; } ; if len ( _eddg ) >= _gfec { _eddg = _eddg . extractTables ( _cage ) ;
} ; _eddg . sortReadingOrder ( ) ; _eddg . log ( "\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072" ) ; return _eddg ; } ; const ( RenderModeStroke RenderMode = 1 << iota ; RenderModeFill ; RenderModeClip ;
) ; func ( _cdbd * stateStack ) pop ( ) * textState { if _cdbd . empty ( ) { return nil ; } ; _eff := * ( * _cdbd ) [ len ( * _cdbd ) - 1 ] ; * _cdbd = ( * _cdbd ) [ : len ( * _cdbd ) - 1 ] ; return & _eff ; } ; func ( _cgdcf * textTable ) growTable ( ) { _dbdb := func ( _ffff paraList ) { _cgdcf . _eabcaa ++ ;
for _defbb := 0 ; _defbb < _cgdcf . _aage ; _defbb ++ { _geba := _ffff [ _defbb ] ; _cgdcf . put ( _defbb , _cgdcf . _eabcaa - 1 , _geba ) ; } ; } ; _fafbg := func ( _bedf paraList ) { _cgdcf . _aage ++ ; for _cefe := 0 ; _cefe < _cgdcf . _eabcaa ; _cefe ++ { _caaaf := _bedf [ _cefe ] ; _cgdcf . put ( _cgdcf . _aage - 1 , _cefe , _caaaf ) ;
} ; } ; if _dcfc { _cgdcf . log ( "\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce" ) ; } ; for _ffdbd := 0 ; ; _ffdbd ++ { _gfdbb := false ; _feeba := _cgdcf . getDown ( ) ; _dfdge := _cgdcf . getRight ( ) ; if _dcfc { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _ffdbd , _cgdcf ) ;
_be . Printf ( "\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a" , _feeba ) ; _be . Printf ( "\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a" , _dfdge ) ; } ; if _feeba != nil && _dfdge != nil { _dcdd := _feeba [ len ( _feeba ) - 1 ] ;
if ! _dcdd . taken ( ) && _dcdd == _dfdge [ len ( _dfdge ) - 1 ] { _dbdb ( _feeba ) ; if _dfdge = _cgdcf . getRight ( ) ; _dfdge != nil { _fafbg ( _dfdge ) ; _cgdcf . put ( _cgdcf . _aage - 1 , _cgdcf . _eabcaa - 1 , _dcdd ) ; } ; _gfdbb = true ; } ; } ; if ! _gfdbb && _feeba != nil { _dbdb ( _feeba ) ;
_gfdbb = true ; } ; if ! _gfdbb && _dfdge != nil { _fafbg ( _dfdge ) ; _gfdbb = true ; } ; if ! _gfdbb { break ; } ; } ; } ; func ( _aeee gridTile ) complete ( ) bool { return _aeee . numBorders ( ) == 4 } ;
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
func ( _cb * Extractor ) ExtractPageImages ( options * ImageExtractOptions ) ( * PageImages , error ) { _bgf := & imageExtractContext { _dfc : options } ; _cfb := _bgf . extractContentStreamImages ( _cb . _gc , _cb . _ea ) ; if _cfb != nil { return nil , _cfb ; } ; return & PageImages { Images : _bgf . _eee } , nil ;
} ; func _cgbed ( _ffbce [ ] * textWord , _egcf int ) [ ] * textWord { _afbf := len ( _ffbce ) ; copy ( _ffbce [ _egcf : ] , _ffbce [ _egcf + 1 : ] ) ; return _ffbce [ : _afbf - 1 ] ; } ; func _fcfe ( _gag , _ggeg bounded ) float64 { return _gag . bbox ( ) . Llx - _ggeg . bbox ( ) . Urx } ; func ( _agcgc paraList ) findTables ( _bgcgg [ ] gridTiling ) [ ] * textTable { _agcgc . addNeighbours ( ) ;
_ef . Slice ( _agcgc , func ( _ddae , _ddge int ) bool { return _aef ( _agcgc [ _ddae ] , _agcgc [ _ddge ] ) < 0 } ) ; var _cgfc [ ] * textTable ; if _bcba { _dfdgb := _agcgc . findGridTables ( _bgcgg ) ; _cgfc = append ( _cgfc , _dfdgb ... ) ; } ; if _deee { _ggcf := _agcgc . findTextTables ( ) ;
_cgfc = append ( _cgfc , _ggcf ... ) ; } ; return _cgfc ; } ; func ( _dfba * subpath ) clear ( ) { * _dfba = subpath { } } ; func _gebfb ( _dcfaf , _fgdb _de . Point ) rulingKind { _dcda := _f . Abs ( _dcfaf . X - _fgdb . X ) ; _adcff := _f . Abs ( _dcfaf . Y - _fgdb . Y ) ; return _fbfd ( _dcda , _adcff , _acaa ) ;
} ; var _beacge = _cd . MustCompile ( "\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024" ) ; type textWord struct { _ee . PdfRectangle ; _aagef float64 ; _bfdfd string ;
_gceff [ ] * textMark ; _eedb float64 ; _adacg bool ; } ; func ( _fafe * ruling ) gridIntersecting ( _eegcf * ruling ) bool { return _cfff ( _fafe . _fgbfa , _eegcf . _fgbfa ) && _cfff ( _fafe . _ebeb , _eegcf . _ebeb ) ; } ; func _ggfcf ( _caca * wordBag , _acac float64 , _gcac , _bec rulingList ) [ ] * wordBag { var _bbfc [ ] * wordBag ;
for _ , _cacfa := range _caca . depthIndexes ( ) { _cbgfd := false ; for ! _caca . empty ( _cacfa ) { _baag := _caca . firstReadingIndex ( _cacfa ) ; _gead := _caca . firstWord ( _baag ) ; _dbgdd := _cgdg ( _gead , _acac , _gcac , _bec ) ; _caca . removeWord ( _gead , _baag ) ; if _abbdd { _ff . Log . Info ( "\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073" , _gead . String ( ) ) ;
} ; for _ccbc := true ; _ccbc ; _ccbc = _cbgfd { _cbgfd = false ; _eege := _effd * _dbgdd . _adbbf ; _dcdg := _bgbf * _dbgdd . _adbbf ; _dfeb := _dddf * _dbgdd . _adbbf ; if _abbdd { _ff . Log . Info ( "\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066" , _dbgdd . minDepth ( ) , _dbgdd . maxDepth ( ) , _dfeb , _dcdg ) ;
} ; if _caca . scanBand ( "\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c" , _dbgdd , _dadcg ( _efcg , 0 ) , _dbgdd . minDepth ( ) - _dfeb , _dbgdd . maxDepth ( ) + _dfeb , _gfcf , false , false ) > 0 { _cbgfd = true ; } ; if _caca . scanBand ( "\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c" , _dbgdd , _dadcg ( _efcg , _dcdg ) , _dbgdd . minDepth ( ) , _dbgdd . maxDepth ( ) , _eecb , false , false ) > 0 { _cbgfd = true ;
} ; if _cbgfd { continue ; } ; _dcbc := _caca . scanBand ( "" , _dbgdd , _dadcg ( _eabca , _eege ) , _dbgdd . minDepth ( ) , _dbgdd . maxDepth ( ) , _debad , true , false ) ; if _dcbc > 0 { _caaab := ( _dbgdd . maxDepth ( ) - _dbgdd . minDepth ( ) ) / _dbgdd . _adbbf ; if ( _dcbc > 1 && float64 ( _dcbc ) > 0.3 * _caaab ) || _dcbc <= 10 { if _caca . scanBand ( "\u006f\u0074\u0068e\u0072" , _dbgdd , _dadcg ( _eabca , _eege ) , _dbgdd . minDepth ( ) , _dbgdd . maxDepth ( ) , _debad , false , true ) > 0 { _cbgfd = true ;
} ; } ; } ; } ; _bbfc = append ( _bbfc , _dbgdd ) ; } ; } ; return _bbfc ; } ;
// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents ( contents string , resources * _ee . PdfPageResources ) ( * Extractor , error ) { const _ded = "\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s" ; _bad := & Extractor { _gc : contents , _ea : resources , _bg : map [ string ] fontEntry { } , _bb : map [ string ] textResult { } } ;
_dg . TrackUse ( _ded ) ; return _bad , nil ; } ; func _bebb ( _dbad map [ float64 ] map [ float64 ] gridTile ) [ ] float64 { _dfcfa := make ( [ ] float64 , 0 , len ( _dbad ) ) ; for _efgg := range _dbad { _dfcfa = append ( _dfcfa , _efgg ) ; } ; _ef . Float64s ( _dfcfa ) ; _ecafg := len ( _dfcfa ) ;
for _eced := 0 ; _eced < _ecafg / 2 ; _eced ++ { _dfcfa [ _eced ] , _dfcfa [ _ecafg - 1 - _eced ] = _dfcfa [ _ecafg - 1 - _eced ] , _dfcfa [ _eced ] ; } ; return _dfcfa ; } ; func ( _eb * imageExtractContext ) extractContentStreamImages ( _fc string , _ede * _ee . PdfPageResources ) error { _dcc := _bd . NewContentStreamParser ( _fc ) ;
_dag , _add := _dcc . Parse ( ) ; if _add != nil { return _add ; } ; if _eb . _aca == nil { _eb . _aca = map [ * _ca . PdfObjectStream ] * cachedImage { } ; } ; if _eb . _dfc == nil { _eb . _dfc = & ImageExtractOptions { } ; } ; _eeg := _bd . NewContentStreamProcessor ( * _dag ) ; _eeg . AddHandler ( _bd . HandlerConditionEnumAllOperands , "" , _eb . processOperand ) ;
return _eeg . Process ( _ede ) ; } ; func _bcbac ( _afgfg [ ] pathSection ) { if _efdg < 0.0 { return ; } ; if _dgac { _ff . Log . Info ( "\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073" , len ( _afgfg ) ) ;
} ; for _debf , _abgfd := range _afgfg { for _fbgeb , _fdgd := range _abgfd . _fbdc { for _fada , _gbdfa := range _fdgd . _eaeg { _fdgd . _eaeg [ _fada ] = _de . Point { X : _ffgcc ( _gbdfa . X ) , Y : _ffgcc ( _gbdfa . Y ) } ; if _dgac { _gfed := _fdgd . _eaeg [ _fada ] ; if ! _beafa ( _gbdfa , _gfed ) { _begcf := _de . Point { X : _gfed . X - _gbdfa . X , Y : _gfed . Y - _gbdfa . Y } ;
_be . Printf ( "\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a" , _debf , _fbgeb , _fada , _gbdfa , _gfed , _begcf ) ; } ; } ; } ; } ; } ; } ; func _bagc ( _gagd map [ float64 ] map [ float64 ] gridTile ) [ ] float64 { _cecef := make ( [ ] float64 , 0 , len ( _gagd ) ) ;
_gfae := make ( map [ float64 ] struct { } , len ( _gagd ) ) ; for _ , _bccc := range _gagd { for _cade := range _bccc { if _ , _cecd := _gfae [ _cade ] ; _cecd { continue ; } ; _cecef = append ( _cecef , _cade ) ; _gfae [ _cade ] = struct { } { } ; } ; } ; _ef . Float64s ( _cecef ) ; return _cecef ;
} ; type wordBag struct { _ee . PdfRectangle ; _adbbf float64 ; _degg , _aegf rulingList ; _fec float64 ; _aceg map [ int ] [ ] * textWord ; } ; func ( _cdadb * textTable ) isExportable ( ) bool { if _cdadb . _efea { return true ; } ; _afbc := func ( _eggd int ) bool { _bdce := _cdadb . get ( 0 , _eggd ) ;
if _bdce == nil { return false ; } ; _eecf := _bdce . text ( ) ; _effa := _e . RuneCountInString ( _eecf ) ; _bead := _beacge . MatchString ( _eecf ) ; return _effa <= 1 || _bead ; } ; for _deae := 0 ; _deae < _cdadb . _eabcaa ; _deae ++ { if ! _afbc ( _deae ) { return true ; } ; } ; return false ;
} ; func ( _bcbe rulingList ) connections ( _bega map [ int ] intSet , _fcgca int ) intSet { _gefa := make ( intSet ) ; _ffce := make ( intSet ) ; var _cdfb func ( int ) ; _cdfb = func ( _dfbcg int ) { if ! _ffce . has ( _dfbcg ) { _ffce . add ( _dfbcg ) ; for _cdec := range _bcbe { if _bega [ _cdec ] . has ( _dfbcg ) { _gefa . add ( _cdec ) ;
} ; } ; for _bgcbc := range _bcbe { if _gefa . has ( _bgcbc ) { _cdfb ( _bgcbc ) ; } ; } ; } ; } ; _cdfb ( _fcgca ) ; return _gefa ; } ; func ( _cebg * textTable ) put ( _aeec , _fcdbf int , _ddea * textPara ) { _cebg . _dbfba [ _cgccd ( _aeec , _fcdbf ) ] = _ddea ; } ; var ( _bdc = _c . New ( "\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072" ) ;
_fg = _c . New ( "\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072" ) ; ) ; func ( _gabag * textTable ) emptyCompositeRow ( _dffcc int ) bool { for _afeb := 0 ; _afeb < _gabag . _aage ; _afeb ++ { if _geeaa , _caad := _gabag . _ebgbb [ _cgccd ( _afeb , _dffcc ) ] ;
_caad { if len ( _geeaa . paraList ) > 0 { return false ; } ; } ; } ; return true ; } ; func ( _aced * textLine ) markWordBoundaries ( ) { _aggg := _edeb * _aced . _caccd ; for _fdaccc , _gcgb := range _aced . _eaab [ 1 : ] { if _fcfe ( _gcgb , _aced . _eaab [ _fdaccc ] ) >= _aggg { _gcgb . _adacg = true ;
} ; } ; } ;
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct { _beaa [ ] TextMark } ;
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
type PageImages struct { Images [ ] ImageMark ; } ; func _bdaae ( _bbac [ ] * textMark , _febcf _ee . PdfRectangle ) * textWord { _dceff := _bbac [ 0 ] . PdfRectangle ; _gadea := _bbac [ 0 ] . _ceba ; for _ , _bebg := range _bbac [ 1 : ] { _dceff = _bgcf ( _dceff , _bebg . PdfRectangle ) ; if _bebg . _ceba > _gadea { _gadea = _bebg . _ceba ;
} ; } ; return & textWord { PdfRectangle : _dceff , _gceff : _bbac , _aagef : _febcf . Ury - _dceff . Lly , _eedb : _gadea } ; } ; func ( _ggbg paraList ) yNeighbours ( _bdab float64 ) map [ * textPara ] [ ] int { _cbeg := make ( [ ] event , 2 * len ( _ggbg ) ) ; if _bdab == 0 { for _ffab , _gdgc := range _ggbg { _cbeg [ 2 * _ffab ] = event { _gdgc . Lly , true , _ffab } ;
_cbeg [ 2 * _ffab + 1 ] = event { _gdgc . Ury , false , _ffab } ; } ; } else { for _ffec , _agadg := range _ggbg { _cbeg [ 2 * _ffec ] = event { _agadg . Lly - _bdab * _agadg . fontsize ( ) , true , _ffec } ; _cbeg [ 2 * _ffec + 1 ] = event { _agadg . Ury + _bdab * _agadg . fontsize ( ) , false , _ffec } ;
} ; } ; return _ggbg . eventNeighbours ( _cbeg ) ; } ; const ( _ab = "\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065" ;
_eac = "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064" ;
_ad = "\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065" ; ) ;
func _fgdg ( _agcg float64 , _dfbcf int ) int { if _dfbcf == 0 { _dfbcf = 1 ; } ; _gfded := float64 ( _dfbcf ) ; return int ( _f . Round ( _agcg / _gfded ) * _gfded ) ; } ; func _bgbcf ( _cagea [ ] pathSection ) rulingList { _bcbac ( _cagea ) ; if _dgac { _ff . Log . Info ( "\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs" , len ( _cagea ) ) ;
} ; var _abde rulingList ; for _ , _gfecb := range _cagea { for _ , _ecbe := range _gfecb . _fbdc { if ! _ecbe . isQuadrilateral ( ) { if _dgac { _ff . Log . Error ( "!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073" , _ecbe ) ;
} ; continue ; } ; if _bdfg , _dedad := _ecbe . makeRectRuling ( _gfecb . Color ) ; _dedad { _abde = append ( _abde , _bdfg ) ; } else { if _fbgb { _ff . Log . Error ( "\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073" , _ecbe ) ;
} ; } ; } ; } ; if _dgac { _ff . Log . Info ( "\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073" , _abde . String ( ) ) ; } ; return _abde ; } ; func ( _fbed * textTable ) getDown ( ) paraList { _abce := make ( paraList , _fbed . _aage ) ;
for _fgfgb := 0 ; _fgfgb < _fbed . _aage ; _fgfgb ++ { _dbff := _fbed . get ( _fgfgb , _fbed . _eabcaa - 1 ) . _fdec ; if _dbff . taken ( ) { return nil ; } ; _abce [ _fgfgb ] = _dbff ; } ; for _efgba := 0 ; _efgba < _fbed . _aage - 1 ; _efgba ++ { if _abce [ _efgba ] . _cacae != _abce [ _efgba + 1 ] { return nil ;
} ; } ; return _abce ; } ; func ( _ddca * textObject ) setTextMatrix ( _fgf [ ] float64 ) { if len ( _fgf ) != 6 { _ff . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029" , len ( _fgf ) ) ;
return ; } ; _fdga , _bff , _cdg , _gga , _fabd , _fabe := _fgf [ 0 ] , _fgf [ 1 ] , _fgf [ 2 ] , _fgf [ 3 ] , _fgf [ 4 ] , _fgf [ 5 ] ; _ddca . _geff = _de . NewMatrix ( _fdga , _bff , _cdg , _gga , _fabd , _fabe ) ; _ddca . _abg = _ddca . _geff ; } ; func ( _bbcga * textTable ) log ( _cegbe string ) { if ! _bcag { return ;
} ; _ff . Log . Info ( "~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066" , _cegbe , _bbcga . _aage , _bbcga . _eabcaa , _bbcga . _efea , _bbcga . PdfRectangle ) ;
for _egag := 0 ; _egag < _bbcga . _eabcaa ; _egag ++ { for _deedd := 0 ; _deedd < _bbcga . _aage ; _deedd ++ { _eebga := _bbcga . get ( _deedd , _egag ) ; if _eebga == nil { continue ; } ; _be . Printf ( "%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a" , _deedd , _egag , _eebga . PdfRectangle , _ggece ( _eebga . text ( ) , 50 ) , _e . RuneCountInString ( _eebga . text ( ) ) ) ;
} ; } ; } ; func ( _deceb * wordBag ) depthRange ( _aabd , _aeea int ) [ ] int { var _efdbg [ ] int ; for _bfgc := range _deceb . _aceg { if _aabd <= _bfgc && _bfgc <= _aeea { _efdbg = append ( _efdbg , _bfgc ) ; } ; } ; if len ( _efdbg ) == 0 { return nil ; } ; _ef . Ints ( _efdbg ) ; return _efdbg ;
} ; func ( _eccgc * wordBag ) firstWord ( _aag int ) * textWord { return _eccgc . _aceg [ _aag ] [ 0 ] } ; func ( _ebegc * ruling ) equals ( _baefcf * ruling ) bool { return _ebegc . _cgac == _baefcf . _cgac && _cfff ( _ebegc . _facf , _baefcf . _facf ) && _cfff ( _ebegc . _fgbfa , _baefcf . _fgbfa ) && _cfff ( _ebegc . _ebeb , _baefcf . _ebeb ) ;
} ; func ( _eegc * textObject ) moveTextSetLeading ( _gdbc , _dgca float64 ) { _eegc . _ecb . _bgbc = - _dgca ; _eegc . moveLP ( _gdbc , _dgca ) ; } ; func ( _cebab paraList ) tables ( ) [ ] TextTable { var _cceg [ ] TextTable ; if _bcag { _ff . Log . Info ( "\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a" ) ;
} ; for _ , _ggeb := range _cebab { _eaecg := _ggeb . _cegd ; if _eaecg != nil && _eaecg . isExportable ( ) { _cceg = append ( _cceg , _eaecg . toTextTable ( ) ) ; } ; } ; return _cceg ; } ; func ( _fcga rulingList ) primaries ( ) [ ] float64 { _cbec := make ( map [ float64 ] struct { } , len ( _fcga ) ) ;
for _ , _eadc := range _fcga { _cbec [ _eadc . _facf ] = struct { } { } ; } ; _edebe := make ( [ ] float64 , len ( _cbec ) ) ; _dgbd := 0 ; for _faef := range _cbec { _edebe [ _dgbd ] = _faef ; _dgbd ++ ; } ; _ef . Float64s ( _edebe ) ; return _edebe ; } ; func ( _efdd gridTiling ) log ( _ddddb string ) { if ! _gfdb { return ;
} ; _ff . Log . Info ( "\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071" , len ( _efdd . _dgcdc ) , len ( _efdd . _dgbfg ) , _ddddb ) ; _be . Printf ( "\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a" , _efdd . _dgcdc ) ;
_be . Printf ( "\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a" , _efdd . _dgbfg ) ; for _ebage , _cbfc := range _efdd . _dgbfg { _deec , _afdc := _efdd . _dcaac [ _cbfc ] ; if ! _afdc { continue ; } ; _be . Printf ( "%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , _ebage , _cbfc ) ;
for _gadaf , _eeed := range _efdd . _dgcdc { _ccfe , _gbged := _deec [ _eeed ] ; if ! _gbged { continue ; } ; _be . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _gadaf , _ccfe . String ( ) ) ; } ; } ; } ; func ( _dbadd * textWord ) addDiacritic ( _gfbc string ) { _afgbe := _dbadd . _gceff [ len ( _dbadd . _gceff ) - 1 ] ;
_afgbe . _fdbb += _gfbc ; _afgbe . _fdbb = _gf . NFKC . String ( _afgbe . _fdbb ) ; } ; func ( _gcbg * textLine ) pullWord ( _eded * wordBag , _agbf * textWord , _eagf int ) { _gcbg . appendWord ( _agbf ) ; _eded . removeWord ( _agbf , _eagf ) ; } ; func ( _cfdgf * textTable ) reduce ( ) * textTable { _cdcdg := make ( [ ] int , 0 , _cfdgf . _eabcaa ) ;
_bgbg := make ( [ ] int , 0 , _cfdgf . _aage ) ; for _afcg := 0 ; _afcg < _cfdgf . _eabcaa ; _afcg ++ { if ! _cfdgf . emptyCompositeRow ( _afcg ) { _cdcdg = append ( _cdcdg , _afcg ) ; } ; } ; for _bfdf := 0 ; _bfdf < _cfdgf . _aage ; _bfdf ++ { if ! _cfdgf . emptyCompositeColumn ( _bfdf ) { _bgbg = append ( _bgbg , _bfdf ) ;
} ; } ; if len ( _cdcdg ) == _cfdgf . _eabcaa && len ( _bgbg ) == _cfdgf . _aage { return _cfdgf ; } ; _gbda := textTable { _efea : _cfdgf . _efea , _aage : len ( _bgbg ) , _eabcaa : len ( _cdcdg ) , _dbfba : make ( map [ uint64 ] * textPara , len ( _bgbg ) * len ( _cdcdg ) ) } ; if _bcag { _ff . Log . Info ( "\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064" , _cfdgf . _aage , _cfdgf . _eabcaa , len ( _bgbg ) , len ( _cdcdg ) ) ;
_ff . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076" , _bgbg ) ; _ff . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076" , _cdcdg ) ; } ; for _cbda , _eaef := range _cdcdg { for _dbcgd , _aedce := range _bgbg { _ggaa , _gfefdf := _cfdgf . getComposite ( _aedce , _eaef ) ;
if _ggaa == nil { continue ; } ; if _bcag { _be . Printf ( "\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n" , _dbcgd , _cbda , _aedce , _eaef , _ggece ( _ggaa . merge ( ) . text ( ) , 50 ) ) ; } ; _gbda . putComposite ( _dbcgd , _cbda , _ggaa , _gfefdf ) ;
} ; } ; return & _gbda ; } ; func ( _bfgcb * ruling ) alignsPrimary ( _dfbfa * ruling ) bool { return _bfgcb . _cgac == _dfbfa . _cgac && _f . Abs ( _bfgcb . _facf - _dfbfa . _facf ) < _fbfc * 0.5 ; } ; func _effec ( _cbdgb * PageText ) error { _adaca := _dg . GetLicenseKey ( ) ; if _adaca != nil && _adaca . IsLicensed ( ) || _ge { return nil ;
} ; _be . Printf ( "\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a" ) ; _be . Println ( "-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f" ) ;
return _c . New ( "\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064" ) ; } ; func _aegg ( _ddaee map [ int ] [ ] float64 ) [ ] int { _cdgcc := make ( [ ] int , len ( _ddaee ) ) ;
_fedf := 0 ; for _gebe := range _ddaee { _cdgcc [ _fedf ] = _gebe ; _fedf ++ ; } ; _ef . Ints ( _cdgcc ) ; return _cdgcc ; } ; func _bfbe ( _bgbd , _ffdcd _de . Point ) rulingKind { _aabdc := _f . Abs ( _bgbd . X - _ffdcd . X ) ; _cbbdd := _f . Abs ( _bgbd . Y - _ffdcd . Y ) ; return _fbfd ( _aabdc , _cbbdd , _acad ) ;
} ; func ( _gadcf * textPara ) taken ( ) bool { return _gadcf == nil || _gadcf . _ebad } ; func ( _aefcg gridTile ) numBorders ( ) int { _bbdg := 0 ; if _aefcg . _gdge { _bbdg ++ ; } ; if _aefcg . _geaa { _bbdg ++ ; } ; if _aefcg . _gaaf { _bbdg ++ ; } ; if _aefcg . _efab { _bbdg ++ ; } ; return _bbdg ;
} ; func ( _cgcd paraList ) llyOrdering ( ) [ ] int { _fbgf := make ( [ ] int , len ( _cgcd ) ) ; for _fedc := range _cgcd { _fbgf [ _fedc ] = _fedc ; } ; _ef . SliceStable ( _fbgf , func ( _cfee , _eefd int ) bool { _cabe , _ebfb := _fbgf [ _cfee ] , _fbgf [ _eefd ] ; return _cgcd [ _cabe ] . Lly < _cgcd [ _ebfb ] . Lly ;
} ) ; return _fbgf ; } ;
// String returns a description of `tm`.
func ( _fba * textMark ) String ( ) string { return _be . Sprintf ( "\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022" , _fba . PdfRectangle , _fba . _ceba , _fba . _fdbb ) ; } ; func ( _dddgb paraList ) lines ( ) [ ] * textLine { var _deac [ ] * textLine ;
for _ , _eeeee := range _dddgb { _deac = append ( _deac , _eeeee . _cecgd ... ) ; } ; return _deac ; } ; func _egcg ( _gaab float64 ) bool { return _f . Abs ( _gaab ) < _fbfc } ; func _bgcf ( _fgcb , _ccgc _ee . PdfRectangle ) _ee . PdfRectangle { return _ee . PdfRectangle { Llx : _f . Min ( _fgcb . Llx , _ccgc . Llx ) , Lly : _f . Min ( _fgcb . Lly , _ccgc . Lly ) , Urx : _f . Max ( _fgcb . Urx , _ccgc . Urx ) , Ury : _f . Max ( _fgcb . Ury , _ccgc . Ury ) } ;
} ; func ( _ccea * textPara ) toCellTextMarks ( _ddee * int ) [ ] TextMark { var _dffa [ ] TextMark ; for _ebada , _cbe := range _ccea . _cecgd { _agbb := _cbe . toTextMarks ( _ddee ) ; _dfab := _bcfgf && _cbe . endsInHyphen ( ) && _ebada != len ( _ccea . _cecgd ) - 1 ; if _dfab { _agbb = _dfefd ( _agbb , _ddee ) ;
} ; _dffa = append ( _dffa , _agbb ... ) ; if ! ( _dfab || _ebada == len ( _ccea . _cecgd ) - 1 ) { _dffa = _gcag ( _dffa , _ddee , _bccdg ( _cbe . _decg , _ccea . _cecgd [ _ebada + 1 ] . _decg ) ) ; } ; } ; return _dffa ; } ; type textLine struct { _ee . PdfRectangle ; _decg float64 ; _eaab [ ] * textWord ;
_caccd float64 ; } ; func ( _feaa * textPara ) writeText ( _dgaf _d . Writer ) { if _feaa . _cegd == nil { _feaa . writeCellText ( _dgaf ) ; return ; } ; for _abdc := 0 ; _abdc < _feaa . _cegd . _eabcaa ; _abdc ++ { for _ebeg := 0 ; _ebeg < _feaa . _cegd . _aage ; _ebeg ++ { _befea := _feaa . _cegd . get ( _ebeg , _abdc ) ;
if _befea == nil { _dgaf . Write ( [ ] byte ( "\u0009" ) ) ; } else { _befea . writeCellText ( _dgaf ) ; } ; _dgaf . Write ( [ ] byte ( "\u0020" ) ) ; } ; if _abdc < _feaa . _cegd . _eabcaa - 1 { _dgaf . Write ( [ ] byte ( "\u000a" ) ) ; } ; } ; } ; type textTable struct { _ee . PdfRectangle ; _aage , _eabcaa int ;
_efea bool ; _dbfba map [ uint64 ] * textPara ; _ebgbb map [ uint64 ] compositeCell ; } ;
// PageFonts represents extracted fonts on a PDF page.
type PageFonts struct { Fonts [ ] Font ; } ; const _bda = 10 ; func ( _aga * textObject ) renderText ( _fgad [ ] byte ) error { if _aga . _ecc { _ff . Log . Debug ( "\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e" ) ;
return nil ; } ; _feec := _aga . getCurrentFont ( ) ; _fgeb := _feec . BytesToCharcodes ( _fgad ) ; _fcd , _aecb , _gece := _feec . CharcodesToStrings ( _fgeb ) ; if _gece > 0 { _ff . Log . Debug ( "\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064" , _aecb , _gece ) ;
} ; _aga . _ecb . _acc += _aecb ; _aga . _ecb . _deeb += _gece ; _bbd := _aga . _ecb ; _ffe := _bbd . _ggda ; _fbd := _bbd . _cfa / 100.0 ; _cead := _feef ; if _feec . Subtype ( ) == "\u0054\u0079\u0070e\u0033" { _cead = 1 ; } ; _aed , _eaf := _feec . GetRuneMetrics ( ' ' ) ; if ! _eaf { _aed , _eaf = _feec . GetCharMetrics ( 32 ) ;
} ; if ! _eaf { _aed , _ = _ee . DefaultFont ( ) . GetRuneMetrics ( ' ' ) ; } ; _dbcd := _aed . Wx * _cead ; _ff . Log . Trace ( "\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066" , _dbcd , _fcd , _feec , _ffe ) ;
_bfda := _de . NewMatrix ( _ffe * _fbd , 0 , 0 , _ffe , 0 , _bbd . _fefb ) ; if _gcff { _ff . Log . Info ( "\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071" , len ( _fgeb ) , _fgeb , _fcd ) ;
} ; _ff . Log . Trace ( "\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071" , len ( _fgeb ) , _fgeb , len ( _fcd ) ) ; _feg := _aga . getFillColor ( ) ;
_aba := _aga . getStrokeColor ( ) ; for _fabg , _fgebc := range _fcd { _fggd := [ ] rune ( _fgebc ) ; if len ( _fggd ) == 1 && _fggd [ 0 ] == '\x00' { continue ; } ; _fcff := _fgeb [ _fabg ] ; _eeee := _aga . _bcgf . CTM . Mult ( _aga . _geff ) . Mult ( _bfda ) ; _egg := 0.0 ; if len ( _fggd ) == 1 && _fggd [ 0 ] == 32 { _egg = _bbd . _bbfa ;
} ; _bddf , _deed := _feec . GetCharMetrics ( _fcff ) ; if ! _deed { _ff . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073" , _fcff , _fggd , _fggd , _feec ) ;
return _be . Errorf ( "\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064" , _feec . String ( ) , _fcff ) ; } ; _gecf := _de . Point { X : _bddf . Wx * _cead , Y : _bddf . Wy * _cead } ;
_bdg := _de . Point { X : ( _gecf . X * _ffe + _egg ) * _fbd } ; _ecbb := _de . Point { X : ( _gecf . X * _ffe + _bbd . _bace + _egg ) * _fbd } ; if _gcff { _ff . Log . Info ( "\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066" , _ffe , _bbd . _bace , _bbd . _bbfa , _fbd ) ;
_ff . Log . Info ( "\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f" , _gecf , _bdg , _ecbb ) ; } ; _dad := _ebb ( _bdg ) ; _efg := _ebb ( _ecbb ) ; _dga := _aga . _bcgf . CTM . Mult ( _aga . _geff ) . Mult ( _dad ) ;
if _gagc { _ff . Log . Info ( "e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a" + "\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a" + "\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073" , _aga . _bcgf . CTM , _aga . _geff , _efg , _bcd ( _aga . _bcgf . CTM . Mult ( _aga . _geff ) . Mult ( _efg ) ) , _dad , _dga , _bcd ( _dga ) ) ;
} ; _gdcb , _bfaf := _aga . newTextMark ( _a . ExpandLigatures ( _fggd ) , _eeee , _bcd ( _dga ) , _f . Abs ( _dbcd * _eeee . ScalingFactorX ( ) ) , _feec , _aga . _ecb . _bace , _feg , _aba ) ; if ! _bfaf { _ff . Log . Debug ( "\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067" ) ;
continue ; } ; if _feec == nil { _ff . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e" ) ; } else if _feec . Encoder ( ) == nil { _ff . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073" , _feec ) ;
} else { if _deg , _dfcf := _feec . Encoder ( ) . CharcodeToRune ( _fcff ) ; _dfcf { _gdcb . _aad = string ( _deg ) ; } ; } ; _ff . Log . Trace ( "i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073" , _fabg , _fcff , _gdcb , _eeee ) ;
_aga . _fbf = append ( _aga . _fbf , & _gdcb ) ; _aga . _geff . Concat ( _efg ) ; } ; return nil ; } ; func _edgg ( _dgeb [ ] compositeCell ) [ ] float64 { var _acab [ ] * textLine ; _cgfg := 0 ; for _ , _ggfg := range _dgeb { _cgfg += len ( _ggfg . paraList ) ; _acab = append ( _acab , _ggfg . lines ( ) ... ) ;
} ; _ef . Slice ( _acab , func ( _edae , _fdgge int ) bool { _accb , _bffg := _acab [ _edae ] , _acab [ _fdgge ] ; _ebae , _eeaa := _accb . _decg , _bffg . _decg ; if ! _dcfga ( _ebae - _eeaa ) { return _ebae < _eeaa ; } ; return _accb . Llx < _bffg . Llx ; } ) ; if _bcag { _be . Printf ( "\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a" , _cgfg , len ( _acab ) ) ;
for _cdaed , _gbgde := range _acab { _be . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _cdaed , _gbgde ) ; } ; } ; var _gcdfa [ ] float64 ; _addb := _acab [ 0 ] ; var _bccdd [ ] [ ] * textLine ; _dbee := [ ] * textLine { _addb } ; for _egec , _gffc := range _acab [ 1 : ] { if _gffc . Ury < _addb . Lly { _ebfg := 0.5 * ( _gffc . Ury + _addb . Lly ) ;
if _bcag { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a" + "\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a" , _egec , _gffc . Ury , _addb . Lly , _ebfg , _addb , _gffc ) ;
} ; _gcdfa = append ( _gcdfa , _ebfg ) ; _bccdd = append ( _bccdd , _dbee ) ; _dbee = nil ; } ; _dbee = append ( _dbee , _gffc ) ; if _gffc . Lly < _addb . Lly { _addb = _gffc ; } ; } ; if len ( _dbee ) > 0 { _bccdd = append ( _bccdd , _dbee ) ; } ; if _bcag { _be . Printf ( " \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a" , _gcdfa ) ;
} ; if _bcag { _ff . Log . Info ( "\u0072\u006f\u0077\u003d\u0025\u0064" , len ( _dgeb ) ) ; for _decc , _adce := range _dgeb { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _decc , _adce ) ; } ; _ff . Log . Info ( "\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d" , len ( _bccdd ) ) ;
for _gbfd , _afca := range _bccdd { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a" , _gbfd , len ( _afca ) ) ; for _efceg , _fgbdg := range _afca { _be . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _efceg , _fgbdg ) ; } ; } ; } ; _efdcb := true ;
for _bgdcg , _ddbb := range _bccdd { _fegag := true ; for _fecce , _eeeecb := range _dgeb { if _bcag { _be . Printf ( "\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a" , _bgdcg , len ( _bccdd ) , _fecce , len ( _dgeb ) , _eeeecb ) ;
} ; if ! _eeeecb . hasLines ( _ddbb ) { if _bcag { _be . Printf ( "\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a" , _bgdcg , len ( _bccdd ) , _fecce , len ( _dgeb ) ) ;
} ; _fegag = false ; break ; } ; } ; if ! _fegag { _efdcb = false ; break ; } ; } ; if ! _efdcb { if _bcag { _ff . Log . Info ( "\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg" ) ;
} ; _gcdfa = nil ; } ; if _bcag && _gcdfa != nil { _be . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a" , _gcdfa ) ; } ; return _gcdfa ;
} ; func ( _ebe * textObject ) setCharSpacing ( _gfg float64 ) { if _ebe == nil { return ; } ; _ebe . _ecb . _bace = _gfg ; if _gcff { _ff . Log . Info ( "\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073" , _gfg , _ebe . _ecb . String ( ) ) ;
} ; } ; func _aacc ( _bgdbd map [ int ] intSet ) [ ] int { _egef := make ( [ ] int , 0 , len ( _bgdbd ) ) ; for _gcefb := range _bgdbd { _egef = append ( _egef , _gcefb ) ; } ; _ef . Ints ( _egef ) ; return _egef ; } ; func _efae ( _abge , _cddg * textPara ) bool { return _dege ( _abge . _dbfe , _cddg . _dbfe ) } ;
func ( _eaed * textObject ) getFontDict ( _dfef string ) ( _ece _ca . PdfObject , _fded error ) { _gaa := _eaed . _bca ; if _gaa == nil { _ff . Log . Debug ( "g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071" , _dfef ) ;
return nil , nil ; } ; _ece , _ccf := _gaa . GetFontByName ( _ca . PdfObjectName ( _dfef ) ) ; if ! _ccf { _ff . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071" , _dfef ) ;
return nil , _c . New ( "f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073" ) ; } ; return _ece , nil ; } ; func ( _agaa * textLine ) toTextMarks ( _abcf * int ) [ ] TextMark { var _baceg [ ] TextMark ; for _ , _eada := range _agaa . _eaab { if _eada . _adacg { _baceg = _gcag ( _baceg , _abcf , "\u0020" ) ;
} ; _fdcb := _eada . toTextMarks ( _abcf ) ; _baceg = append ( _baceg , _fdcb ... ) ; } ; return _baceg ; } ; var _bbfbab = map [ markKind ] string { _edeaf : "\u0073\u0074\u0072\u006f\u006b\u0065" , _cgbe : "\u0066\u0069\u006c\u006c" , _ggebb : "\u0061u\u0067\u006d\u0065\u006e\u0074" } ;
func ( _bdbe * textTable ) markCells ( ) { for _debd := 0 ; _debd < _bdbe . _eabcaa ; _debd ++ { for _addc := 0 ; _addc < _bdbe . _aage ; _addc ++ { _afdg := _bdbe . get ( _addc , _debd ) ; if _afdg != nil { _afdg . _ebad = true ; } ; } ; } ; } ; func ( _aaef * wordBag ) maxDepth ( ) float64 { return _aaef . _fec - _aaef . Lly } ;
func ( _ddcfg compositeCell ) parasBBox ( ) ( paraList , _ee . PdfRectangle ) { return _ddcfg . paraList , _ddcfg . PdfRectangle ; } ; func _ggef ( _abcfc , _ecad _de . Point ) bool { _abgb := _f . Abs ( _abcfc . X - _ecad . X ) ; _cgeg := _f . Abs ( _abcfc . Y - _ecad . Y ) ; return _abac ( _abgb , _cgeg ) ;
} ; func ( _cgbdg * textTable ) emptyCompositeColumn ( _fcbfe int ) bool { for _adfcd := 0 ; _adfcd < _cgbdg . _eabcaa ; _adfcd ++ { if _gaga , _egdbe := _cgbdg . _ebgbb [ _cgccd ( _fcbfe , _adfcd ) ] ; _egdbe { if len ( _gaga . paraList ) > 0 { return false ; } ; } ; } ; return true ; } ; type textPara struct { _ee . PdfRectangle ;
_dbfe _ee . PdfRectangle ; _cecgd [ ] * textLine ; _cegd * textTable ; _ebad bool ; _gbdd bool ; _bbff * textPara ; _cacae * textPara ; _decgf * textPara ; _fdec * textPara ; } ; func _gbee ( _gaeba _ee . PdfRectangle ) * ruling { return & ruling { _cgac : _fafbf , _facf : _gaeba . Urx , _fgbfa : _gaeba . Lly , _ebeb : _gaeba . Ury } ;
} ; func _ffgcc ( _decda float64 ) float64 { return _efdg * _f . Round ( _decda / _efdg ) } ; func _egcd ( _edefg [ ] rulingList ) ( rulingList , rulingList ) { var _agecb rulingList ; for _ , _ccbd := range _edefg { _agecb = append ( _agecb , _ccbd ... ) ; } ; return _agecb . vertsHorzs ( ) ;
} ; func ( _gfbgb intSet ) del ( _ffaf int ) { delete ( _gfbgb , _ffaf ) } ; func ( _acecc paraList ) findTextTables ( ) [ ] * textTable { var _gadc [ ] * textTable ; for _ , _bdgf := range _acecc { if _bdgf . taken ( ) || _bdgf . Width ( ) == 0 { continue ; } ; _abgeb := _bdgf . isAtom ( ) ; if _abgeb == nil { continue ;
} ; _abgeb . growTable ( ) ; if _abgeb . _aage * _abgeb . _eabcaa < _gfec { continue ; } ; _abgeb . markCells ( ) ; _abgeb . log ( "\u0067\u0072\u006fw\u006e" ) ; _gadc = append ( _gadc , _abgeb ) ; } ; return _gadc ; } ; func ( _bbfe * wordBag ) arrangeText ( ) * textPara { _bbfe . sort ( ) ;
if _adfc { _bbfe . removeDuplicates ( ) ; } ; var _fdgc [ ] * textLine ; for _ , _ddfg := range _bbfe . depthIndexes ( ) { for ! _bbfe . empty ( _ddfg ) { _cddd := _bbfe . firstReadingIndex ( _ddfg ) ; _ebbg := _bbfe . firstWord ( _cddd ) ; _defd := _dea ( _bbfe , _cddd ) ; _aeaba := _ebbg . _eedb ;
_eaga := _ebbg . _aagef - _bafe * _aeaba ; _dbacb := _ebbg . _aagef + _bafe * _aeaba ; _fgbaa := _cgb * _aeaba ; _fce := _fgee * _aeaba ; _dgfgg : for { var _fbff * textWord ; _fcac := 0 ; for _ , _cgag := range _bbfe . depthBand ( _eaga , _dbacb ) { _gebb := _bbfe . highestWord ( _cgag , _eaga , _dbacb ) ;
if _gebb == nil { continue ; } ; _efbe := _fcfe ( _gebb , _defd . _eaab [ len ( _defd . _eaab ) - 1 ] ) ; if _efbe < - _fce { break _dgfgg ; } ; if _efbe > _fgbaa { continue ; } ; if _fbff != nil && _efa ( _gebb , _fbff ) >= 0 { continue ; } ; _fbff = _gebb ; _fcac = _cgag ; } ; if _fbff == nil { break ;
} ; _defd . pullWord ( _bbfe , _fbff , _fcac ) ; } ; _defd . markWordBoundaries ( ) ; _fdgc = append ( _fdgc , _defd ) ; } ; } ; if len ( _fdgc ) == 0 { return nil ; } ; _ef . Slice ( _fdgc , func ( _agadd , _ebfcf int ) bool { return _cbae ( _fdgc [ _agadd ] , _fdgc [ _ebfcf ] ) < 0 } ) ; _acaac := _dafa ( _bbfe . PdfRectangle , _fdgc ) ;
if _dagg { _ff . Log . Info ( "\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073" , _acaac . String ( ) ) ; if _dfcfb { for _bcfc , _gaag := range _acaac . _cecgd { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bcfc , _gaag . String ( ) ) ;
if _cbd { for _dgdc , _dcaag := range _gaag . _eaab { _be . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _dgdc , _dcaag . String ( ) ) ; for _ffgd , _fbdf := range _dcaag . _gceff { _be . Printf ( "\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n" , _ffgd , _fbdf . String ( ) ) ;
} ; } ; } ; } ; } ; } ; return _acaac ; } ; func ( _bdcc paraList ) addNeighbours ( ) { _bedae := func ( _egfde [ ] int , _feed * textPara ) ( [ ] * textPara , [ ] * textPara ) { _fefe := make ( [ ] * textPara , 0 , len ( _egfde ) - 1 ) ; _bcbabc := make ( [ ] * textPara , 0 , len ( _egfde ) - 1 ) ; for _ , _bada := range _egfde { _ddcfa := _bdcc [ _bada ] ;
if _ddcfa . Urx <= _feed . Llx { _fefe = append ( _fefe , _ddcfa ) ; } else if _ddcfa . Llx >= _feed . Urx { _bcbabc = append ( _bcbabc , _ddcfa ) ; } ; } ; return _fefe , _bcbabc ; } ; _cbgg := func ( _fgeda [ ] int , _gbeg * textPara ) ( [ ] * textPara , [ ] * textPara ) { _bfab := make ( [ ] * textPara , 0 , len ( _fgeda ) - 1 ) ;
_aeeea := make ( [ ] * textPara , 0 , len ( _fgeda ) - 1 ) ; for _ , _fabgd := range _fgeda { _bfeca := _bdcc [ _fabgd ] ; if _bfeca . Ury <= _gbeg . Lly { _aeeea = append ( _aeeea , _bfeca ) ; } else if _bfeca . Lly >= _gbeg . Ury { _bfab = append ( _bfab , _bfeca ) ; } ; } ; return _bfab , _aeeea ;
} ; _cgge := _bdcc . yNeighbours ( _gdgbd ) ; for _ , _edbf := range _bdcc { _eedf := _cgge [ _edbf ] ; if len ( _eedf ) == 0 { continue ; } ; _cfcc , _abdcc := _bedae ( _eedf , _edbf ) ; if len ( _cfcc ) == 0 && len ( _abdcc ) == 0 { continue ; } ; if len ( _cfcc ) > 0 { _cgdbd := _cfcc [ 0 ] ; for _ , _ebcg := range _cfcc [ 1 : ] { if _ebcg . Urx >= _cgdbd . Urx { _cgdbd = _ebcg ;
} ; } ; for _ , _dbacc := range _cfcc { if _dbacc != _cgdbd && _dbacc . Urx > _cgdbd . Llx { _cgdbd = nil ; break ; } ; } ; if _cgdbd != nil && _cffd ( _edbf . PdfRectangle , _cgdbd . PdfRectangle ) { _edbf . _bbff = _cgdbd ; } ; } ; if len ( _abdcc ) > 0 { _eecc := _abdcc [ 0 ] ; for _ , _eddfc := range _abdcc [ 1 : ] { if _eddfc . Llx <= _eecc . Llx { _eecc = _eddfc ;
} ; } ; for _ , _dgfa := range _abdcc { if _dgfa != _eecc && _dgfa . Llx < _eecc . Urx { _eecc = nil ; break ; } ; } ; if _eecc != nil && _cffd ( _edbf . PdfRectangle , _eecc . PdfRectangle ) { _edbf . _cacae = _eecc ; } ; } ; } ; _cgge = _bdcc . xNeighbours ( _bce ) ; for _ , _cbaab := range _bdcc { _cbdb := _cgge [ _cbaab ] ;
if len ( _cbdb ) == 0 { continue ; } ; _efebe , _bdbg := _cbgg ( _cbdb , _cbaab ) ; if len ( _efebe ) == 0 && len ( _bdbg ) == 0 { continue ; } ; if len ( _bdbg ) > 0 { _cegdg := _bdbg [ 0 ] ; for _ , _acba := range _bdbg [ 1 : ] { if _acba . Ury >= _cegdg . Ury { _cegdg = _acba ; } ; } ; for _ , _cdfe := range _bdbg { if _cdfe != _cegdg && _cdfe . Ury > _cegdg . Lly { _cegdg = nil ;
break ; } ; } ; if _cegdg != nil && _dege ( _cbaab . PdfRectangle , _cegdg . PdfRectangle ) { _cbaab . _fdec = _cegdg ; } ; } ; if len ( _efebe ) > 0 { _acdd := _efebe [ 0 ] ; for _ , _gfge := range _efebe [ 1 : ] { if _gfge . Lly <= _acdd . Lly { _acdd = _gfge ; } ; } ; for _ , _feaae := range _efebe { if _feaae != _acdd && _feaae . Lly < _acdd . Ury { _acdd = nil ;
break ; } ; } ; if _acdd != nil && _dege ( _cbaab . PdfRectangle , _acdd . PdfRectangle ) { _cbaab . _decgf = _acdd ; } ; } ; } ; for _ , _dcfd := range _bdcc { if _dcfd . _bbff != nil && _dcfd . _bbff . _cacae != _dcfd { _dcfd . _bbff = nil ; } ; if _dcfd . _decgf != nil && _dcfd . _decgf . _fdec != _dcfd { _dcfd . _decgf = nil ;
} ; if _dcfd . _cacae != nil && _dcfd . _cacae . _bbff != _dcfd { _dcfd . _cacae = nil ; } ; if _dcfd . _fdec != nil && _dcfd . _fdec . _decgf != _dcfd { _dcfd . _fdec = nil ; } ; } ; } ; func ( _aggc * textObject ) setFont ( _bbe string , _gfc float64 ) error { if _aggc == nil { return nil ;
} ; _aggc . _ecb . _ggda = _gfc ; _fgba , _adb := _aggc . getFont ( _bbe ) ; if _adb != nil { return _adb ; } ; _aggc . _ecb . _fea = _fgba ; return nil ; } ; func ( _ecae * wordBag ) absorb ( _abab * wordBag ) { _eaaa := _abab . makeRemovals ( ) ; for _aebe , _fggc := range _abab . _aceg { for _ , _bfgd := range _fggc { _ecae . pullWord ( _bfgd , _aebe , _eaaa ) ;
} ; } ; _abab . applyRemovals ( _eaaa ) ; } ; func _adeeb ( _gdcg [ ] pathSection ) rulingList { _bcbac ( _gdcg ) ; if _dgac { _ff . Log . Info ( "\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073" , len ( _gdcg ) ) ;
} ; var _bggbf rulingList ; for _ , _ecaf := range _gdcg { for _ , _dacfb := range _ecaf . _fbdc { if len ( _dacfb . _eaeg ) < 2 { continue ; } ; _abdb := _dacfb . _eaeg [ 0 ] ; for _ , _eebb := range _dacfb . _eaeg [ 1 : ] { if _affb , _geca := _gdag ( _abdb , _eebb , _ecaf . Color ) ; _geca { _bggbf = append ( _bggbf , _affb ) ;
} ; _abdb = _eebb ; } ; } ; } ; if _dgac { _ff . Log . Info ( "m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073" , _bggbf ) ; } ; return _bggbf ; } ; func ( _bcbb * textObject ) newTextMark ( _efeba string , _gfafc _de . Matrix , _bacg _de . Point , _ggca float64 , _egc * _ee . PdfFont , _faaa float64 , _aecdf , _fdff _ga . Color ) ( textMark , bool ) { _ddag := _gfafc . Angle ( ) ;
_dcgg := _fgdg ( _ddag , _eacf ) ; var _dcaa float64 ; if _dcgg % 180 != 90 { _dcaa = _gfafc . ScalingFactorY ( ) ; } else { _dcaa = _gfafc . ScalingFactorX ( ) ; } ; _aedd := _bcd ( _gfafc ) ; _fabb := _ee . PdfRectangle { Llx : _aedd . X , Lly : _aedd . Y , Urx : _bacg . X , Ury : _bacg . Y } ;
switch _dcgg % 360 { case 90 : _fabb . Urx -= _dcaa ; case 180 : _fabb . Ury -= _dcaa ; case 270 : _fabb . Urx += _dcaa ; case 0 : _fabb . Ury += _dcaa ; default : _dcgg = 0 ; _fabb . Ury += _dcaa ; } ; if _fabb . Llx > _fabb . Urx { _fabb . Llx , _fabb . Urx = _fabb . Urx , _fabb . Llx ; } ; if _fabb . Lly > _fabb . Ury { _fabb . Lly , _fabb . Ury = _fabb . Ury , _fabb . Lly ;
} ; _ffade := true ; if _bcbb . _decb . _eab . Width ( ) > 0 { _bddfd , _deebe := _ccad ( _fabb , _bcbb . _decb . _eab ) ; if ! _deebe { _ffade = false ; _ff . Log . Debug ( "\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q" , _fabb , _bcbb . _decb . _eab , _efeba ) ;
} ; _fabb = _bddfd ; } ; _cga := _fabb ; _gccg := _bcbb . _decb . _eab ; switch _dcgg % 360 { case 90 : _gccg . Urx , _gccg . Ury = _gccg . Ury , _gccg . Urx ; _cga = _ee . PdfRectangle { Llx : _gccg . Urx - _fabb . Ury , Urx : _gccg . Urx - _fabb . Lly , Lly : _fabb . Llx , Ury : _fabb . Urx } ;
case 180 : _cga = _ee . PdfRectangle { Llx : _gccg . Urx - _fabb . Llx , Urx : _gccg . Urx - _fabb . Urx , Lly : _gccg . Ury - _fabb . Lly , Ury : _gccg . Ury - _fabb . Ury } ; case 270 : _gccg . Urx , _gccg . Ury = _gccg . Ury , _gccg . Urx ; _cga = _ee . PdfRectangle { Llx : _fabb . Ury , Urx : _fabb . Lly , Lly : _gccg . Ury - _fabb . Llx , Ury : _gccg . Ury - _fabb . Urx } ;
} ; if _cga . Llx > _cga . Urx { _cga . Llx , _cga . Urx = _cga . Urx , _cga . Llx ; } ; if _cga . Lly > _cga . Ury { _cga . Lly , _cga . Ury = _cga . Ury , _cga . Lly ; } ; _gddf := textMark { _fdbb : _efeba , PdfRectangle : _cga , _ebdd : _fabb , _ggfc : _egc , _ceba : _dcaa , _eggg : _faaa , _abd : _gfafc , _ddf : _bacg , _bdaa : _dcgg , _eaee : _aecdf , _fca : _fdff } ;
if _ggfb { _ff . Log . Info ( "n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073" , _aedd , _bacg , _gddf . String ( ) ) ; } ; return _gddf , _ffade ;
} ; type ruling struct { _cgac rulingKind ; _bggf markKind ; _ga . Color ; _facf float64 ; _fgbfa float64 ; _ebeb float64 ; _gaeb float64 ; } ; func ( _cdad * subpath ) makeRectRuling ( _ggdag _ga . Color ) ( * ruling , bool ) { if _fbgb { _ff . Log . Info ( "\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076" , _cdad ) ;
} ; _bdfa := _cdad . _eaeg [ : 4 ] ; _dagb := make ( map [ int ] rulingKind , len ( _bdfa ) ) ; for _cfdg , _effe := range _bdfa { _ebfd := _cdad . _eaeg [ ( _cfdg + 1 ) % 4 ] ; _dagb [ _cfdg ] = _bfbe ( _effe , _ebfd ) ; if _fbgb { _be . Printf ( "\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066" , _cfdg , _dagb [ _cfdg ] , _effe , _ebfd ) ;
} ; } ; if _fbgb { _be . Printf ( "\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a" , _dagb ) ; } ; var _gdad , _dbfa [ ] int ; for _cddca , _dfcfe := range _dagb { switch _dfcfe { case _dfbe : _dbfa = append ( _dbfa , _cddca ) ; case _fafbf : _gdad = append ( _gdad , _cddca ) ;
} ; } ; if _fbgb { _be . Printf ( "\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a" , len ( _dbfa ) , _dbfa ) ; _be . Printf ( "\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a" , len ( _gdad ) , _gdad ) ;
} ; _acbb := ( len ( _dbfa ) == 2 && len ( _gdad ) == 2 ) || ( len ( _dbfa ) == 2 && len ( _gdad ) == 0 && _gcgg ( _bdfa [ _dbfa [ 0 ] ] , _bdfa [ _dbfa [ 1 ] ] ) ) || ( len ( _gdad ) == 2 && len ( _dbfa ) == 0 && _ggef ( _bdfa [ _gdad [ 0 ] ] , _bdfa [ _gdad [ 1 ] ] ) ) ; if _fbgb { _be . Printf ( " \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a" , len ( _dbfa ) , len ( _gdad ) , _acbb ) ;
} ; if ! _acbb { if _fbgb { _ff . Log . Error ( "\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v" , _cdad ) ; _be . Printf ( " \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a" , len ( _dbfa ) , len ( _gdad ) , _acbb ) ;
} ; return & ruling { } , false ; } ; if len ( _gdad ) == 0 { for _bcgc , _cgbc := range _dagb { if _cgbc != _dfbe { _gdad = append ( _gdad , _bcgc ) ; } ; } ; } ; if len ( _dbfa ) == 0 { for _gbba , _begb := range _dagb { if _begb != _fafbf { _dbfa = append ( _dbfa , _gbba ) ; } ; } ; } ; if _fbgb { _ff . Log . Info ( "\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a" + "\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a" + "\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a" + "\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076" , len ( _dbfa ) , len ( _gdad ) , len ( _bdfa ) , _dbfa , _gdad , _bdfa ) ;
} ; var _bbfba , _gbeab , _febcc , _abdce _de . Point ; if _bdfa [ _dbfa [ 0 ] ] . Y > _bdfa [ _dbfa [ 1 ] ] . Y { _febcc , _abdce = _bdfa [ _dbfa [ 0 ] ] , _bdfa [ _dbfa [ 1 ] ] ; } else { _febcc , _abdce = _bdfa [ _dbfa [ 1 ] ] , _bdfa [ _dbfa [ 0 ] ] ; } ; if _bdfa [ _gdad [ 0 ] ] . X > _bdfa [ _gdad [ 1 ] ] . X { _bbfba , _gbeab = _bdfa [ _gdad [ 0 ] ] , _bdfa [ _gdad [ 1 ] ] ;
} else { _bbfba , _gbeab = _bdfa [ _gdad [ 1 ] ] , _bdfa [ _gdad [ 0 ] ] ; } ; _gdbea := _ee . PdfRectangle { Llx : _bbfba . X , Urx : _gbeab . X , Lly : _abdce . Y , Ury : _febcc . Y } ; if _gdbea . Llx > _gdbea . Urx { _gdbea . Llx , _gdbea . Urx = _gdbea . Urx , _gdbea . Llx ; } ; if _gdbea . Lly > _gdbea . Ury { _gdbea . Lly , _gdbea . Ury = _gdbea . Ury , _gdbea . Lly ;
} ; _aaggb := rectRuling { PdfRectangle : _gdbea , _bbbf : _ccdd ( _gdbea ) , Color : _ggdag } ; if _aaggb . _bbbf == _fbdff { if _fbgb { _ff . Log . Error ( "\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c" ) ;
} ; return nil , false ; } ; _cddb , _ddgfa := _aaggb . asRuling ( ) ; if ! _ddgfa { if _fbgb { _ff . Log . Error ( "\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg" ) ; } ; return nil , false ; } ; if _dgac { _be . Printf ( "\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a" , _cddb . String ( ) ) ;
} ; return _cddb , true ; } ; func _adff ( _dffg , _bege , _efdga , _fcfg * textPara ) * textTable { _aefg := & textTable { _aage : 2 , _eabcaa : 2 , _dbfba : make ( map [ uint64 ] * textPara , 4 ) } ; _aefg . put ( 0 , 0 , _dffg ) ; _aefg . put ( 1 , 0 , _bege ) ; _aefg . put ( 0 , 1 , _efdga ) ; _aefg . put ( 1 , 1 , _fcfg ) ;
return _aefg ; } ;
// String returns a description of `l`.
func ( _ecba * textLine ) String ( ) string { return _be . Sprintf ( "\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022" , _ecba . _decg , _ecba . PdfRectangle , _ecba . _caccd , _ecba . text ( ) ) ;
} ; func ( _cdcdd * textTable ) getComposite ( _ffac , _aedf int ) ( paraList , _ee . PdfRectangle ) { _affg , _eebg := _cdcdd . _ebgbb [ _cgccd ( _ffac , _aedf ) ] ; if _bcag { _be . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a" , _ffac , _aedf , _affg . String ( ) ) ;
} ; if ! _eebg { return nil , _ee . PdfRectangle { } ; } ; return _affg . parasBBox ( ) ; } ; func _eabc ( _age _ee . PdfRectangle ) textState { return textState { _cfa : 100 , _ccc : RenderModeFill , _gbc : _age } ; } ;
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
func ( _adgd * TextMarkArray ) RangeOffset ( start , end int ) ( * TextMarkArray , error ) { if _adgd == nil { return nil , _c . New ( "\u006da\u003d\u003d\u006e\u0069\u006c" ) ; } ; if end < start { return nil , _be . Errorf ( "\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020" , start , end ) ;
} ; _cdcd := len ( _adgd . _beaa ) ; if _cdcd == 0 { return _adgd , nil ; } ; if start < _adgd . _beaa [ 0 ] . Offset { start = _adgd . _beaa [ 0 ] . Offset ; } ; if end > _adgd . _beaa [ _cdcd - 1 ] . Offset + 1 { end = _adgd . _beaa [ _cdcd - 1 ] . Offset + 1 ; } ; _cccb := _ef . Search ( _cdcd , func ( _aebf int ) bool { return _adgd . _beaa [ _aebf ] . Offset + len ( _adgd . _beaa [ _aebf ] . Text ) - 1 >= start } ) ;
if ! ( 0 <= _cccb && _cccb < _cdcd ) { _caae := _be . Errorf ( "\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076" , start , _cccb , _cdcd , _adgd . _beaa [ 0 ] , _adgd . _beaa [ _cdcd - 1 ] ) ;
return nil , _caae ; } ; _eca := _ef . Search ( _cdcd , func ( _bcca int ) bool { return _adgd . _beaa [ _bcca ] . Offset > end - 1 } ) ; if ! ( 0 <= _eca && _eca < _cdcd ) { _cabc := _be . Errorf ( "\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076" , end , _eca , _cdcd , _adgd . _beaa [ 0 ] , _adgd . _beaa [ _cdcd - 1 ] ) ;
return nil , _cabc ; } ; if _eca <= _cccb { return nil , _be . Errorf ( "\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064" , start , end , _cccb , _eca ) ;
} ; return & TextMarkArray { _beaa : _adgd . _beaa [ _cccb : _eca ] } , nil ; } ; func ( _fdbd rulingList ) sort ( ) { _ef . Slice ( _fdbd , _fdbd . comp ) } ; func ( _agcd * textWord ) computeText ( ) string { _cgee := make ( [ ] string , len ( _agcd . _gceff ) ) ; for _ffabe , _dbfbg := range _agcd . _gceff { _cgee [ _ffabe ] = _dbfbg . _fdbb ;
} ; return _df . Join ( _cgee , "" ) ; } ;
// String returns a string descibing `i`.
func ( _cagad gridTile ) String ( ) string { _decf := func ( _cafa bool , _fccca string ) string { if _cafa { return _fccca ; } ; return "\u005f" ; } ; return _be . Sprintf ( "\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073" , _cagad . PdfRectangle , _decf ( _cagad . _gdge , "\u004c" ) , _decf ( _cagad . _geaa , "\u0052" ) , _decf ( _cagad . _gaaf , "\u0042" ) , _decf ( _cagad . _efab , "\u0054" ) ) ;
} ;
// String returns a string describing `pt`.
func ( _dgd PageText ) String ( ) string { _ebd := _be . Sprintf ( "P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073" , len ( _dgd . _fcb ) ) ; _ffgb := [ ] string { "\u002d" + _ebd } ; for _ , _dacg := range _dgd . _fcb { _ffgb = append ( _ffgb , _dacg . String ( ) ) ;
} ; _ffgb = append ( _ffgb , "\u002b" + _ebd ) ; return _df . Join ( _ffgb , "\u000a" ) ; } ; func ( _fegd rulingList ) snapToGroups ( ) rulingList { _ddgb , _bbcg := _fegd . vertsHorzs ( ) ; if len ( _ddgb ) > 0 { _ddgb = _ddgb . snapToGroupsDirection ( ) ; } ; if len ( _bbcg ) > 0 { _bbcg = _bbcg . snapToGroupsDirection ( ) ;
} ; _ddcgc := append ( _ddgb , _bbcg ... ) ; _ddcgc . log ( "\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073" ) ; return _ddcgc ; } ;
// New returns an Extractor instance for extracting content from the input PDF page.
func New ( page * _ee . PdfPage ) ( * Extractor , error ) { const _ba = "\u0065\u0078\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077" ; _gb , _ed := page . GetAllContentStreams ( ) ; if _ed != nil { return nil , _ed ; } ; _gbg , _ed := page . GetMediaBox ( ) ; if _ed != nil { return nil , _be . Errorf ( "\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076" , _ed ) ;
} ; _fa := & Extractor { _gc : _gb , _ea : page . Resources , _eab : * _gbg , _bg : map [ string ] fontEntry { } , _bb : map [ string ] textResult { } } ; if _fa . _eab . Llx > _fa . _eab . Urx { _ff . Log . Info ( "\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e" , _fa . _eab ) ;
_fa . _eab . Llx , _fa . _eab . Urx = _fa . _eab . Urx , _fa . _eab . Llx ; } ; if _fa . _eab . Lly > _fa . _eab . Ury { _ff . Log . Info ( "\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e" , _fa . _eab ) ;
_fa . _eab . Lly , _fa . _eab . Ury = _fa . _eab . Ury , _fa . _eab . Lly ; } ; _dg . TrackUse ( _ba ) ; return _fa , nil ; } ; func ( _eabaf compositeCell ) String ( ) string { _eefe := "" ; if len ( _eabaf . paraList ) > 0 { _eefe = _ggece ( _eabaf . paraList . merge ( ) . text ( ) , 50 ) ; } ; return _be . Sprintf ( "\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071" , _eabaf . PdfRectangle , len ( _eabaf . paraList ) , _eefe ) ;
} ; func ( _edef * shapesState ) stroke ( _cddc * [ ] pathSection ) { _bbeg := pathSection { _fbdc : _edef . _deff , Color : _edef . _agbc . getStrokeColor ( ) } ; * _cddc = append ( * _cddc , _bbeg ) ; if _dgac { _be . Printf ( "\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , len ( * _cddc ) , _edef , _edef . _agbc . getStrokeColor ( ) , _bbeg . bbox ( ) ) ;
if _acg { for _gaea , _ggb := range _edef . _deff { _be . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _gaea , _ggb ) ; if _gaea == 10 { break ; } ; } ; } ; } ; } ; func ( _gbcc * textTable ) newTablePara ( ) * textPara { _eedg := _gbcc . computeBbox ( ) ; _feefc := & textPara { PdfRectangle : _eedg , _dbfe : _eedg , _cegd : _gbcc } ;
if _bcag { _ff . Log . Info ( "\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073" , _feefc ) ; } ; return _feefc ; } ; func ( _gebg * wordBag ) sort ( ) { for _ , _fffg := range _gebg . _aceg { _ef . Slice ( _fffg , func ( _gcfe , _bbgb int ) bool { return _efa ( _fffg [ _gcfe ] , _fffg [ _bbgb ] ) < 0 } ) ;
} ; } ; func _bfaa ( _cdaa int , _egeb map [ int ] [ ] float64 ) ( [ ] int , int ) { _cede := make ( [ ] int , _cdaa ) ; _cgec := 0 ; for _aeba := 0 ; _aeba < _cdaa ; _aeba ++ { _cede [ _aeba ] = _cgec ; _cgec += len ( _egeb [ _aeba ] ) + 1 ; } ; return _cede , _cgec ; } ;
// Font represents the font properties on a PDF page.
type Font struct { PdfFont * _ee . PdfFont ;
// FontName represents Font Name from font properties.
FontName string ;
// FontType represents Font Subtype entry in the font dictionary inside page resources.
// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont.
FontType string ;
// ToUnicode is true if font provides a `ToUnicode` mapping.
ToUnicode bool ;
// IsCID is true if underlying font is a composite font.
// Composite font is represented by a font dictionary whose Subtype is `Type0`
IsCID bool ;
// IsSimple is true if font is simple font.
// A simple font is limited to only 8 bit (255) character codes.
IsSimple bool ;
// FontData represents the raw data of the embedded font file.
// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF).
// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor.
// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value.
FontData [ ] byte ;
// FontFileName is a name representing the font. it has format:
// (Font Name) + (Font Type Extension), example: helvetica.ttf.
FontFileName string ;
// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor).
FontDescriptor * _ee . PdfFontDescriptor ; } ; func _bdaf ( _bddaf , _cdfc * textPara ) bool { if _bddaf . _gbdd || _cdfc . _gbdd { return true ; } ; return _dcfga ( _bddaf . depth ( ) - _cdfc . depth ( ) ) ; } ;
// String returns a string describing `tm`.
func ( _beae TextMark ) String ( ) string { _adef := _beae . BBox ; var _ebgg string ; if _beae . Font != nil { _ebgg = _beae . Font . String ( ) ; if len ( _ebgg ) > 50 { _ebgg = _ebgg [ : 50 ] + "\u002e\u002e\u002e" ; } ; } ; var _ecbg string ; if _beae . Meta { _ecbg = "\u0020\u002a\u004d\u002a" ;
} ; return _be . Sprintf ( "\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d" , _beae . Offset , _beae . Text , [ ] rune ( _beae . Text ) , _adef . Llx , _adef . Lly , _adef . Urx , _adef . Ury , _ebgg , _ecbg ) ;
} ; func ( _ddcef * shapesState ) drawRectangle ( _fbcd , _dadc , _debc , _cege float64 ) { if _gcga { _eefb := _ddcef . devicePoint ( _fbcd , _dadc ) ; _bage := _ddcef . devicePoint ( _fbcd + _debc , _dadc + _cege ) ; _gbad := _ee . PdfRectangle { Llx : _eefb . X , Lly : _eefb . Y , Urx : _bage . X , Ury : _bage . Y } ;
_ff . Log . Info ( "d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066" , _gbad ) ; } ; _ddcef . newSubPath ( ) ; _ddcef . moveTo ( _fbcd , _dadc ) ; _ddcef . lineTo ( _fbcd + _debc , _dadc ) ; _ddcef . lineTo ( _fbcd + _debc , _dadc + _cege ) ;
_ddcef . lineTo ( _fbcd , _dadc + _cege ) ; _ddcef . closePath ( ) ; } ; func ( _caaa * shapesState ) moveTo ( _baef , _eeb float64 ) { _caaa . _ccef = true ; _caaa . _ddcc = _caaa . devicePoint ( _baef , _eeb ) ; if _gcga { _ff . Log . Info ( "\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066" , _baef , _eeb , _caaa . _ddcc ) ;
} ; } ;
// String returns a human readable description of `ss`.
func ( _adgc * shapesState ) String ( ) string { return _be . Sprintf ( "\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d" , len ( _adgc . _deff ) , _adgc . _ccef ) ; } ; func ( _fdba paraList ) llyRange ( _egf [ ] int , _dgcb , _gafdb float64 ) [ ] int { _ddda := len ( _fdba ) ;
if _gafdb < _fdba [ _egf [ 0 ] ] . Lly || _dgcb > _fdba [ _egf [ _ddda - 1 ] ] . Lly { return nil ; } ; _cdgg := _ef . Search ( _ddda , func ( _dffd int ) bool { return _fdba [ _egf [ _dffd ] ] . Lly >= _dgcb } ) ; _cegec := _ef . Search ( _ddda , func ( _aacf int ) bool { return _fdba [ _egf [ _aacf ] ] . Lly > _gafdb } ) ;
return _egf [ _cdgg : _cegec ] ; } ; func ( _ddb rulingList ) comp ( _ecaa , _ddcefd int ) bool { _cacg , _ffed := _ddb [ _ecaa ] , _ddb [ _ddcefd ] ; _ceaa , _eedee := _cacg . _cgac , _ffed . _cgac ; if _ceaa != _eedee { return _ceaa > _eedee ; } ; if _ceaa == _fbdff { return false ;
} ; _fggb := func ( _feffg bool ) bool { if _ceaa == _dfbe { return _feffg ; } ; return ! _feffg ; } ; _eabe , _cdag := _cacg . _facf , _ffed . _facf ; if _eabe != _cdag { return _fggb ( _eabe > _cdag ) ; } ; _eabe , _cdag = _cacg . _fgbfa , _ffed . _fgbfa ; if _eabe != _cdag { return _fggb ( _eabe < _cdag ) ;
} ; return _fggb ( _cacg . _ebeb < _ffed . _ebeb ) ; } ; func ( _agba compositeCell ) split ( _gdgf , _fdfe [ ] float64 ) * textTable { _cbgaf := len ( _gdgf ) + 1 ; _egb := len ( _fdfe ) + 1 ; if _bcag { _ff . Log . Info ( "\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a" + "\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066" , _egb , _cbgaf , _agba , _gdgf , _fdfe ) ;
_be . Printf ( "\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a" , len ( _agba . paraList ) ) ; for _ddagc , _abefa := range _agba . paraList { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _ddagc , _abefa . String ( ) ) ;
} ; _be . Printf ( "\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a" , len ( _agba . lines ( ) ) ) ; for _cbee , _begg := range _agba . lines ( ) { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _cbee , _begg ) ; } ; } ; _gdgf = _fbde ( _gdgf , _agba . Ury , _agba . Lly ) ;
_fdfe = _fbde ( _fdfe , _agba . Llx , _agba . Urx ) ; _fccc := make ( map [ uint64 ] * textPara , _egb * _cbgaf ) ; _afga := textTable { _aage : _egb , _eabcaa : _cbgaf , _dbfba : _fccc } ; _bgeb := _agba . paraList ; _ef . Slice ( _bgeb , func ( _gegeg , _eecd int ) bool { _eacg , _cgdd := _bgeb [ _gegeg ] , _bgeb [ _eecd ] ;
_ggdgb , _gdef := _eacg . Lly , _cgdd . Lly ; if _ggdgb != _gdef { return _ggdgb < _gdef ; } ; return _eacg . Llx < _cgdd . Llx ; } ) ; _aded := make ( map [ uint64 ] _ee . PdfRectangle , _egb * _cbgaf ) ; for _ceec , _dcca := range _gdgf [ 1 : ] { _bbcd := _gdgf [ _ceec ] ; for _fbffa , _cadb := range _fdfe [ 1 : ] { _gccb := _fdfe [ _fbffa ] ;
_aded [ _cgccd ( _fbffa , _ceec ) ] = _ee . PdfRectangle { Llx : _gccb , Urx : _cadb , Lly : _dcca , Ury : _bbcd } ; } ; } ; if _bcag { _ff . Log . Info ( "\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073" ) ;
_be . Printf ( "\u0020\u0020\u0020\u0020" ) ; for _fgdf := 0 ; _fgdf < _egb ; _fgdf ++ { _be . Printf ( "\u0025\u0033\u0030\u0064\u002c\u0020" , _fgdf ) ; } ; _be . Println ( ) ; for _ddff := 0 ; _ddff < _cbgaf ; _ddff ++ { _be . Printf ( "\u0020\u0020\u0025\u0032\u0064\u003a" , _ddff ) ;
for _cffda := 0 ; _cffda < _egb ; _cffda ++ { _be . Printf ( "\u00256\u002e\u0032\u0066\u002c\u0020" , _aded [ _cgccd ( _cffda , _ddff ) ] ) ; } ; _be . Println ( ) ; } ; } ; _afda := func ( _fgebcf * textLine ) ( int , int ) { for _faea := 0 ; _faea < _cbgaf ; _faea ++ { for _fdbg := 0 ; _fdbg < _egb ;
_fdbg ++ { if _efdf ( _aded [ _cgccd ( _fdbg , _faea ) ] , _fgebcf . PdfRectangle ) { return _fdbg , _faea ; } ; } ; } ; return - 1 , - 1 ; } ; _edebb := make ( map [ uint64 ] [ ] * textLine , _egb * _cbgaf ) ; for _ , _bbega := range _bgeb . lines ( ) { _bfgg , _fbdfd := _afda ( _bbega ) ; if _bfgg < 0 { continue ;
} ; _edebb [ _cgccd ( _bfgg , _fbdfd ) ] = append ( _edebb [ _cgccd ( _bfgg , _fbdfd ) ] , _bbega ) ; } ; for _beaf := 0 ; _beaf < len ( _gdgf ) - 1 ; _beaf ++ { _bccda := _gdgf [ _beaf ] ; _gaeda := _gdgf [ _beaf + 1 ] ; for _fedgb := 0 ; _fedgb < len ( _fdfe ) - 1 ; _fedgb ++ { _cccd := _fdfe [ _fedgb ] ;
_bfdbd := _fdfe [ _fedgb + 1 ] ; _bggb := _ee . PdfRectangle { Llx : _cccd , Urx : _bfdbd , Lly : _gaeda , Ury : _bccda } ; _ffbcg := _edebb [ _cgccd ( _fedgb , _beaf ) ] ; if len ( _ffbcg ) == 0 { continue ; } ; _cegg := _dafa ( _bggb , _ffbcg ) ; _afga . put ( _fedgb , _beaf , _cegg ) ; } ; } ; return & _afga ;
} ; func _aef ( _efcf , _caee bounded ) float64 { _ggae := _efa ( _efcf , _caee ) ; if ! _dcfga ( _ggae ) { return _ggae ; } ; return _ccag ( _efcf , _caee ) ; } ; func ( _cgaag paraList ) xNeighbours ( _aegfg float64 ) map [ * textPara ] [ ] int { _fcfd := make ( [ ] event , 2 * len ( _cgaag ) ) ;
if _aegfg == 0 { for _ecff , _ffbg := range _cgaag { _fcfd [ 2 * _ecff ] = event { _ffbg . Llx , true , _ecff } ; _fcfd [ 2 * _ecff + 1 ] = event { _ffbg . Urx , false , _ecff } ; } ; } else { for _egdg , _bcfcf := range _cgaag { _fcfd [ 2 * _egdg ] = event { _bcfcf . Llx - _aegfg * _bcfcf . fontsize ( ) , true , _egdg } ;
_fcfd [ 2 * _egdg + 1 ] = event { _bcfcf . Urx + _aegfg * _bcfcf . fontsize ( ) , false , _egdg } ; } ; } ; return _cgaag . eventNeighbours ( _fcfd ) ; } ; type rulingList [ ] * ruling ; func ( _dcdde * textWord ) absorb ( _bbacf * textWord ) { _dcdde . PdfRectangle = _bgcf ( _dcdde . PdfRectangle , _bbacf . PdfRectangle ) ;
_dcdde . _gceff = append ( _dcdde . _gceff , _bbacf . _gceff ... ) ; } ; func ( _aggca rulingList ) findPrimSec ( _ceacc , _eaeda float64 ) * ruling { for _ , _dcbb := range _aggca { if _dcfga ( _dcbb . _facf - _ceacc ) && _dcbb . _fgbfa - _cbfg <= _eaeda && _eaeda <= _dcbb . _ebeb + _cbfg { return _dcbb ;
} ; } ; return nil ; } ; type subpath struct { _eaeg [ ] _de . Point ; _feeg bool ; } ; func ( _baea rulingList ) vertsHorzs ( ) ( rulingList , rulingList ) { var _cggf , _ffgbg rulingList ; for _ , _dgdag := range _baea { switch _dgdag . _cgac { case _fafbf : _cggf = append ( _cggf , _dgdag ) ;
case _dfbe : _ffgbg = append ( _ffgbg , _dgdag ) ; } ; } ; return _cggf , _ffgbg ; } ; type event struct { _ebgdd float64 ; _ddfe bool ; _egca int ; } ;
// String returns a description of `t`.
func ( _fbgbbb * textTable ) String ( ) string { return _be . Sprintf ( "\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074" , _fbgbbb . _aage , _fbgbbb . _eabcaa , _fbgbbb . _efea ) ; } ; func _ebb ( _adg _de . Point ) _de . Matrix { return _de . TranslationMatrix ( _adg . X , _adg . Y ) } ;
func _ccad ( _dcd , _dff _ee . PdfRectangle ) ( _ee . PdfRectangle , bool ) { if ! _gddc ( _dcd , _dff ) { return _ee . PdfRectangle { } , false ; } ; return _ee . PdfRectangle { Llx : _f . Max ( _dcd . Llx , _dff . Llx ) , Urx : _f . Min ( _dcd . Urx , _dff . Urx ) , Lly : _f . Max ( _dcd . Lly , _dff . Lly ) , Ury : _f . Min ( _dcd . Ury , _dff . Ury ) } , true ;
} ; func ( _cba * textObject ) moveText ( _dedd , _ecg float64 ) { _cba . moveLP ( _dedd , _ecg ) } ;
// ExtractFonts returns all font information from the page extractor, including
// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more.
//
// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document.
// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries.
//
// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range.
func ( _eaba * Extractor ) ExtractFonts ( previousPageFonts * PageFonts ) ( * PageFonts , error ) { _cdc := PageFonts { } ; _ce := _cdc . extractPageResourcesToFont ( _eaba . _ea ) ; if _ce != nil { return nil , _ce ; } ; if previousPageFonts != nil { for _ , _gad := range previousPageFonts . Fonts { if ! _af ( _cdc . Fonts , _gad . FontName ) { _cdc . Fonts = append ( _cdc . Fonts , _gad ) ;
} ; } ; } ; return & PageFonts { Fonts : _cdc . Fonts } , nil ; } ; func ( _abc * wordBag ) firstReadingIndex ( _bged int ) int { _agde := _abc . firstWord ( _bged ) . _eedb ; _bddg := float64 ( _bged + 1 ) * _fdbf ; _gafd := _bddg + _fdab * _agde ; _cbgb := _bged ; for _ , _caac := range _abc . depthBand ( _bddg , _gafd ) { if _efa ( _abc . firstWord ( _caac ) , _abc . firstWord ( _cbgb ) ) < 0 { _cbgb = _caac ;
} ; } ; return _cbgb ; } ; func ( _ccagb rulingList ) tidied ( _dcgga string ) rulingList { _fedga := _ccagb . removeDuplicates ( ) ; _fedga . log ( "\u0075n\u0069\u0071\u0075\u0065\u0073" ) ; _gcdea := _fedga . snapToGroups ( ) ; if _gcdea == nil { return nil ; } ; _gcdea . sort ( ) ; if _dgac { _ff . Log . Info ( "\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064" , _dcgga , len ( _ccagb ) , len ( _fedga ) , len ( _gcdea ) ) ;
} ; _gcdea . log ( "\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d" ) ; return _gcdea ; } ; func _cbae ( _afc , _eabcb bounded ) float64 { _gabe := _ccag ( _afc , _eabcb ) ; if ! _dcfga ( _gabe ) { return _gabe ; } ; return _efa ( _afc , _eabcb ) ; } ; func ( _agbac * ruling ) intersects ( _acfd * ruling ) bool { _baaa := ( _agbac . _cgac == _fafbf && _acfd . _cgac == _dfbe ) || ( _acfd . _cgac == _fafbf && _agbac . _cgac == _dfbe ) ;
_cegbb := func ( _effb , _gbdf * ruling ) bool { return _effb . _fgbfa - _cbfg <= _gbdf . _facf && _gbdf . _facf <= _effb . _ebeb + _cbfg ; } ; _cbcf := _cegbb ( _agbac , _acfd ) ; _cdggc := _cegbb ( _acfd , _agbac ) ; if _dgac { _be . Printf ( "\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a" + "\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a" + " \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a" , _baaa , _cbcf , _cdggc , _baaa && _cbcf && _cdggc , _agbac , _acfd ) ;
} ; return _baaa && _cbcf && _cdggc ; } ; func ( _dgbfd * ruling ) alignsSec ( _ddbg * ruling ) bool { const _bcfacg = _fbfc + 1.0 ; return _dgbfd . _fgbfa - _bcfacg <= _ddbg . _ebeb && _ddbg . _fgbfa - _bcfacg <= _dgbfd . _ebeb ; } ; func _gddc ( _bgd , _ggbe _ee . PdfRectangle ) bool { return _dege ( _bgd , _ggbe ) && _cffd ( _bgd , _ggbe ) } ;
// ToTextMark returns the public view of `tm`.
func ( _abec * textMark ) ToTextMark ( ) TextMark { return TextMark { Text : _abec . _fdbb , Original : _abec . _aad , BBox : _abec . _ebdd , Font : _abec . _ggfc , FontSize : _abec . _ceba , FillColor : _abec . _eaee , StrokeColor : _abec . _fca , Orientation : _abec . _bdaa } ; } ; func ( _aada paraList ) writeText ( _efgb _d . Writer ) { for _beeb , _ddga := range _aada { if _ddga . _gbdd { continue ;
} ; _ddga . writeText ( _efgb ) ; if _beeb != len ( _aada ) - 1 { if _bdaf ( _ddga , _aada [ _beeb + 1 ] ) { _efgb . Write ( [ ] byte ( "\u0020" ) ) ; } else { _efgb . Write ( [ ] byte ( "\u000a" ) ) ; _efgb . Write ( [ ] byte ( "\u000a" ) ) ; } ; } ; } ; _efgb . Write ( [ ] byte ( "\u000a" ) ) ; _efgb . Write ( [ ] byte ( "\u000a" ) ) ;
} ; type textObject struct { _decb * Extractor ; _bca * _ee . PdfPageResources ; _bcgf _bd . GraphicsState ; _ecb * textState ; _accf * stateStack ; _geff _de . Matrix ; _abg _de . Matrix ; _fbf [ ] * textMark ; _ecc bool ; } ; func ( _bcfg * shapesState ) establishSubpath ( ) * subpath { _dgf , _gbge := _bcfg . lastpointEstablished ( ) ;
if ! _gbge { _bcfg . _deff = append ( _bcfg . _deff , _edbd ( _dgf ) ) ; } ; if len ( _bcfg . _deff ) == 0 { return nil ; } ; _bcfg . _ccef = false ; return _bcfg . _deff [ len ( _bcfg . _deff ) - 1 ] ; } ; func ( _fdbdg * textWord ) bbox ( ) _ee . PdfRectangle { return _fdbdg . PdfRectangle } ;
// String returns a description of `b`.
func ( _dgb * wordBag ) String ( ) string { var _caecc [ ] string ; for _ , _gffe := range _dgb . depthIndexes ( ) { _adab := _dgb . _aceg [ _gffe ] ; for _ , _ffge := range _adab { _caecc = append ( _caecc , _ffge . _bfdfd ) ; } ; } ; return _be . Sprintf ( "\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071" , _dgb . PdfRectangle , _dgb . _adbbf , len ( _caecc ) , _caecc ) ;
} ; func ( _gaed * wordBag ) getDepthIdx ( _edg float64 ) int { _efga := _gaed . depthIndexes ( ) ; _fbdb := _gfb ( _edg ) ; if _fbdb < _efga [ 0 ] { return _efga [ 0 ] ; } ; if _fbdb > _efga [ len ( _efga ) - 1 ] { return _efga [ len ( _efga ) - 1 ] ; } ; return _fbdb ; } ; func ( _feda * shapesState ) quadraticTo ( _bdbf , _dfce , _befe , _dbd float64 ) { if _gcga { _ff . Log . Info ( "\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a" ) ;
} ; _feda . addPoint ( _befe , _dbd ) ; } ; func ( _acade * textPara ) isAtom ( ) * textTable { _adcd := _acade ; _fbad := _acade . _cacae ; _cagee := _acade . _fdec ; if _fbad . taken ( ) || _cagee . taken ( ) { return nil ; } ; _baace := _fbad . _fdec ; if _baace . taken ( ) || _baace != _cagee . _cacae { return nil ;
} ; return _adff ( _adcd , _fbad , _cagee , _baace ) ; } ; func ( _efgd * textMark ) bbox ( ) _ee . PdfRectangle { return _efgd . PdfRectangle } ; func ( _efda paraList ) computeEBBoxes ( ) { if _afdb { _ff . Log . Info ( "\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a" ) ;
} ; for _ , _ggfd := range _efda { _ggfd . _dbfe = _ggfd . PdfRectangle ; } ; _dfefc := _efda . yNeighbours ( 0 ) ; for _fgae , _gcba := range _efda { _gecdf := _gcba . _dbfe ; _gfeb , _cbbg := - 1.0e9 , + 1.0e9 ; for _ , _cfac := range _dfefc [ _gcba ] { _fecc := _efda [ _cfac ] . _dbfe ; if _fecc . Urx < _gecdf . Llx { _gfeb = _f . Max ( _gfeb , _fecc . Urx ) ;
} else if _gecdf . Urx < _fecc . Llx { _cbbg = _f . Min ( _cbbg , _fecc . Llx ) ; } ; } ; for _cdgf , _dcdga := range _efda { _fbb := _dcdga . _dbfe ; if _fgae == _cdgf || _fbb . Ury > _gecdf . Lly { continue ; } ; if _gfeb <= _fbb . Llx && _fbb . Llx < _gecdf . Llx { _gecdf . Llx = _fbb . Llx ;
} else if _fbb . Urx <= _cbbg && _gecdf . Urx < _fbb . Urx { _gecdf . Urx = _fbb . Urx ; } ; } ; if _afdb { _be . Printf ( "\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a" , _fgae , _gcba . _dbfe , _gecdf , _ggece ( _gcba . text ( ) , 50 ) ) ;
} ; _gcba . _dbfe = _gecdf ; } ; if _aecd { for _ , _bdag := range _efda { _bdag . PdfRectangle = _bdag . _dbfe ; } ; } ; } ;
// String returns a human readable description of `vecs`.
func ( _egbc rulingList ) String ( ) string { if len ( _egbc ) == 0 { return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}" ; } ; _dbgb , _edda := _egbc . vertsHorzs ( ) ; _bdfae := len ( _dbgb ) ; _egggg := len ( _edda ) ; if _bdfae == 0 || _egggg == 0 { return _be . Sprintf ( "\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}" , _bdfae , _egggg ) ;
} ; _egfdd := _ee . PdfRectangle { Llx : _dbgb [ 0 ] . _facf , Urx : _dbgb [ _bdfae - 1 ] . _facf , Lly : _edda [ _egggg - 1 ] . _facf , Ury : _edda [ 0 ] . _facf } ; return _be . Sprintf ( "\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d" , _bdfae , _egggg , _egfdd ) ;
} ; var _bddd = map [ rulingKind ] string { _fbdff : "\u006e\u006f\u006e\u0065" , _dfbe : "\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c" , _fafbf : "\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c" } ; func ( _bdfe * textPara ) bbox ( ) _ee . PdfRectangle { return _bdfe . PdfRectangle } ;
func _cgdg ( _ceff * textWord , _fdbe float64 , _egga , _ddgga rulingList ) * wordBag { _ccga := _gfb ( _ceff . _aagef ) ; _ccdg := [ ] * textWord { _ceff } ; _abb := wordBag { _aceg : map [ int ] [ ] * textWord { _ccga : _ccdg } , PdfRectangle : _ceff . PdfRectangle , _adbbf : _ceff . _eedb , _fec : _fdbe , _degg : _egga , _aegf : _ddgga } ;
return & _abb ; } ; func ( _gbgea paraList ) log ( _dceee string ) { if ! _fgga { return ; } ; _ff . Log . Info ( "%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d" , _dceee , len ( _gbgea ) ) ;
for _eaeb , _fedac := range _gbgea { if _fedac == nil { continue ; } ; _egdb := _fedac . text ( ) ; _cccg := "\u0020\u0020" ; if _fedac . _cegd != nil { _cccg = _be . Sprintf ( "\u005b%\u0064\u0078\u0025\u0064\u005d" , _fedac . _cegd . _aage , _fedac . _cegd . _eabcaa ) ; } ; _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a" , _eaeb , _fedac . PdfRectangle , _cccg , _ggece ( _egdb , 50 ) ) ;
} ; } ; func ( _ddage rulingList ) snapToGroupsDirection ( ) rulingList { _ddage . sortStrict ( ) ; _gbfce := make ( map [ * ruling ] rulingList , len ( _ddage ) ) ; _fgeag := _ddage [ 0 ] ; _dgcdb := func ( _gacc * ruling ) { _fgeag = _gacc ; _gbfce [ _fgeag ] = rulingList { _gacc } } ; _dgcdb ( _ddage [ 0 ] ) ;
for _ , _aead := range _ddage [ 1 : ] { if _aead . _facf < _fgeag . _facf - _bgcb { _ff . Log . Error ( "\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073" , _fgeag , _aead ) ;
} ; if _aead . _facf > _fgeag . _facf + _fbfc { _dgcdb ( _aead ) ; } else { _gbfce [ _fgeag ] = append ( _gbfce [ _fgeag ] , _aead ) ; } ; } ; _debg := make ( map [ * ruling ] float64 , len ( _gbfce ) ) ; _ddggb := make ( map [ * ruling ] * ruling , len ( _ddage ) ) ; for _feege , _bggd := range _gbfce { _debg [ _feege ] = _bggd . mergePrimary ( ) ;
for _ , _dbaag := range _bggd { _ddggb [ _dbaag ] = _feege ; } ; } ; for _ , _bbgda := range _ddage { _bbgda . _facf = _debg [ _ddggb [ _bbgda ] ] ; } ; _aace := make ( rulingList , 0 , len ( _ddage ) ) ; for _ , _fdbfe := range _gbfce { _cbbdc := _fdbfe . splitSec ( ) ; for _ggcd , _agbfa := range _cbbdc { _bgab := _agbfa . merge ( ) ;
if len ( _aace ) > 0 { _ecbbb := _aace [ len ( _aace ) - 1 ] ; if _ecbbb . alignsPrimary ( _bgab ) && _ecbbb . alignsSec ( _bgab ) { _ff . Log . Error ( "\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073" , _ggcd , _ecbbb , _bgab ) ;
continue ; } ; } ; _aace = append ( _aace , _bgab ) ; } ; } ; _aace . sortStrict ( ) ; return _aace ; } ; func ( _bdfbbg * textPara ) toTextMarks ( _ecaeg * int ) [ ] TextMark { if _bdfbbg . _cegd == nil { return _bdfbbg . toCellTextMarks ( _ecaeg ) ; } ; var _deda [ ] TextMark ; for _fdggg := 0 ;
_fdggg < _bdfbbg . _cegd . _eabcaa ; _fdggg ++ { for _aaac := 0 ; _aaac < _bdfbbg . _cegd . _aage ; _aaac ++ { _caba := _bdfbbg . _cegd . get ( _aaac , _fdggg ) ; if _caba == nil { _deda = _gcag ( _deda , _ecaeg , "\u0009" ) ; } else { _acbg := _caba . toCellTextMarks ( _ecaeg ) ; _deda = append ( _deda , _acbg ... ) ;
} ; _deda = _gcag ( _deda , _ecaeg , "\u0020" ) ; } ; if _fdggg < _bdfbbg . _cegd . _eabcaa - 1 { _deda = _gcag ( _deda , _ecaeg , "\u000a" ) ; } ; } ; return _deda ; } ; func ( _aedcb rulingList ) intersections ( ) map [ int ] intSet { var _agca , _dfabc [ ] int ; for _ecbcc , _bccg := range _aedcb { switch _bccg . _cgac { case _fafbf : _agca = append ( _agca , _ecbcc ) ;
case _dfbe : _dfabc = append ( _dfabc , _ecbcc ) ; } ; } ; if len ( _agca ) < _abed + 1 || len ( _dfabc ) < _gcad + 1 { return nil ; } ; if len ( _agca ) + len ( _dfabc ) > _ffbcf { _ff . Log . Debug ( "\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064" , len ( _aedcb ) , len ( _agca ) , len ( _dfabc ) ) ;
return nil ; } ; _defbg := make ( map [ int ] intSet , len ( _agca ) + len ( _dfabc ) ) ; for _ , _egfa := range _agca { for _ , _bbfcd := range _dfabc { if _aedcb [ _egfa ] . intersects ( _aedcb [ _bbfcd ] ) { if _ , _eagb := _defbg [ _egfa ] ; ! _eagb { _defbg [ _egfa ] = make ( intSet ) ; } ;
if _ , _dbeg := _defbg [ _bbfcd ] ; ! _dbeg { _defbg [ _bbfcd ] = make ( intSet ) ; } ; _defbg [ _egfa ] . add ( _bbfcd ) ; _defbg [ _bbfcd ] . add ( _egfa ) ; } ; } ; } ; return _defbg ; } ;
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
func ( _eae * Extractor ) ExtractTextWithStats ( ) ( _cad string , _cgc int , _dbb int , _eegf error ) { _aff , _cgc , _dbb , _eegf := _eae . ExtractPageText ( ) ; if _eegf != nil { return "" , _cgc , _dbb , _eegf ; } ; return _aff . Text ( ) , _cgc , _dbb , nil ; } ;
2021-12-14 01:08:28 +00:00
2022-04-27 00:10:33 +00:00
// Marks returns the TextMark collection for a page. It represents all the text on the page.
2022-06-06 22:48:24 +00:00
func ( _fcda PageText ) Marks ( ) * TextMarkArray { return & TextMarkArray { _beaa : _fcda . _gdbg } } ; func ( _ddfd rulingList ) toTilings ( ) ( rulingList , [ ] gridTiling ) { _ddfd . log ( "\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s" ) ; if len ( _ddfd ) == 0 { return nil , nil ;
} ; _ddfd = _ddfd . tidied ( "\u0061\u006c\u006c" ) ; _ddfd . log ( "\u0074\u0069\u0064\u0069\u0065\u0064" ) ; _badfg := _ddfd . toGrids ( ) ; _edfga := make ( [ ] gridTiling , len ( _badfg ) ) ; for _ecdc , _abbg := range _badfg { _edfga [ _ecdc ] = _abbg . asTiling ( ) ; } ; return _ddfd , _edfga ;
2022-03-13 12:41:53 +00:00
} ;
2021-12-14 01:08:28 +00:00
2022-06-06 22:48:24 +00:00
// Text returns the extracted page text.
func ( _ddab PageText ) Text ( ) string { return _ddab . _daaf } ; func ( _gdgd * subpath ) removeDuplicates ( ) { if len ( _gdgd . _eaeg ) == 0 { return ; } ; _cgcc := [ ] _de . Point { _gdgd . _eaeg [ 0 ] } ; for _ , _beg := range _gdgd . _eaeg [ 1 : ] { if ! _beafa ( _beg , _cgcc [ len ( _cgcc ) - 1 ] ) { _cgcc = append ( _cgcc , _beg ) ;
} ; } ; _gdgd . _eaeg = _cgcc ; } ; func ( _fegg * shapesState ) closePath ( ) { if _fegg . _ccef { _fegg . _deff = append ( _fegg . _deff , _edbd ( _fegg . _ddcc ) ) ; _fegg . _ccef = false ; } else if len ( _fegg . _deff ) == 0 { if _gcga { _ff . Log . Debug ( "\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068" ) ;
} ; _fegg . _ccef = false ; return ; } ; _fegg . _deff [ len ( _fegg . _deff ) - 1 ] . close ( ) ; if _gcga { _ff . Log . Info ( "\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073" , _fegg ) ; } ; } ; func _ccdd ( _bgec _ee . PdfRectangle ) rulingKind { _dacc := _bgec . Width ( ) ;
_agebe := _bgec . Height ( ) ; if _dacc > _agebe { if _dacc >= _acaa { return _dfbe ; } ; } else { if _agebe >= _acaa { return _fafbf ; } ; } ; return _fbdff ; } ; func ( _ddaf * wordBag ) text ( ) string { _cfag := _ddaf . allWords ( ) ; _agga := make ( [ ] string , len ( _cfag ) ) ; for _aggb , _dcebb := range _cfag { _agga [ _aggb ] = _dcebb . _bfdfd ;
} ; return _df . Join ( _agga , "\u0020" ) ; } ; func _abcdc ( _acca [ ] * textMark , _fbba _ee . PdfRectangle ) [ ] * textWord { var _aaccd [ ] * textWord ; var _ddcb * textWord ; if _ggfb { _ff . Log . Info ( "\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073" , len ( _acca ) ) ;
} ; _bdggb := func ( ) { if _ddcb != nil { _dcaf := _ddcb . computeText ( ) ; if ! _efgf ( _dcaf ) { _ddcb . _bfdfd = _dcaf ; _aaccd = append ( _aaccd , _ddcb ) ; if _ggfb { _ff . Log . Info ( "\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073" , len ( _aaccd ) - 1 , _ddcb . String ( ) ) ;
for _adebb , _dgef := range _ddcb . _gceff { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _adebb , _dgef . String ( ) ) ; } ; } ; } ; _ddcb = nil ; } ; } ; for _ , _bcbf := range _acca { if _aagc && _ddcb != nil && len ( _ddcb . _gceff ) > 0 { _gbegd := _ddcb . _gceff [ len ( _ddcb . _gceff ) - 1 ] ;
_acddc , _gbeaa := _faab ( _bcbf . _fdbb ) ; _bbcda , _ccfc := _faab ( _gbegd . _fdbb ) ; if _gbeaa && ! _ccfc && _gbegd . inDiacriticArea ( _bcbf ) { _ddcb . addDiacritic ( _acddc ) ; continue ; } ; if _ccfc && ! _gbeaa && _bcbf . inDiacriticArea ( _gbegd ) { _ddcb . _gceff = _ddcb . _gceff [ : len ( _ddcb . _gceff ) - 1 ] ;
_ddcb . appendMark ( _bcbf , _fbba ) ; _ddcb . addDiacritic ( _bbcda ) ; continue ; } ; } ; _ffdg := _efgf ( _bcbf . _fdbb ) ; if _ffdg { _bdggb ( ) ; continue ; } ; if _ddcb == nil && ! _ffdg { _ddcb = _bdaae ( [ ] * textMark { _bcbf } , _fbba ) ; continue ; } ; _ggbb := _ddcb . _eedb ; _acafb := _f . Abs ( _fcg ( _fbba , _bcbf ) - _ddcb . _aagef ) / _ggbb ;
_cbea := _fcfe ( _bcbf , _ddcb ) / _ggbb ; if _cbea >= _acff || ! ( - _dagf <= _cbea && _acafb <= _dgfg ) { _bdggb ( ) ; _ddcb = _bdaae ( [ ] * textMark { _bcbf } , _fbba ) ; continue ; } ; _ddcb . appendMark ( _bcbf , _fbba ) ; } ; _bdggb ( ) ; return _aaccd ; } ; type lineRuling struct { _fbga rulingKind ;
_bcbg markKind ; _ga . Color ; _aagg , _aafd _de . Point ; } ; func ( _decbc * textTable ) bbox ( ) _ee . PdfRectangle { return _decbc . PdfRectangle } ; func ( _gcb * subpath ) add ( _cdeb ... _de . Point ) { _gcb . _eaeg = append ( _gcb . _eaeg , _cdeb ... ) } ; type intSet map [ int ] struct { } ;
func _gcgg ( _gegb , _abega _de . Point ) bool { _abdcb := _f . Abs ( _gegb . X - _abega . X ) ; _gdec := _f . Abs ( _gegb . Y - _abega . Y ) ; return _abac ( _gdec , _abdcb ) ; } ; func ( _bfcg rulingList ) asTiling ( ) gridTiling { if _gfdb { _ff . Log . Info ( "r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d" , len ( _bfcg ) ) ;
} ; for _aggf , _gdaa := range _bfcg [ 1 : ] { _eecec := _bfcg [ _aggf ] ; if _eecec . alignsPrimary ( _gdaa ) && _eecec . alignsSec ( _gdaa ) { _ff . Log . Error ( "a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073" , _gdaa , _eecec ) ;
} ; } ; _bfcg . sortStrict ( ) ; _bfcg . log ( "\u0073n\u0061\u0070\u0070\u0065\u0064" ) ; _abda , _feecd := _bfcg . vertsHorzs ( ) ; _bdge := _abda . primaries ( ) ; _feecf := _feecd . primaries ( ) ; _decaf := len ( _bdge ) - 1 ; _bedb := len ( _feecf ) - 1 ; if _decaf == 0 || _bedb == 0 { return gridTiling { } ;
} ; _fbcc := _ee . PdfRectangle { Llx : _bdge [ 0 ] , Urx : _bdge [ _decaf ] , Lly : _feecf [ 0 ] , Ury : _feecf [ _bedb ] } ; if _gfdb { _ff . Log . Info ( "\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064" , len ( _abda ) ) ;
for _bgeeb , _bdfgg := range _abda { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bgeeb , _bdfgg ) ; } ; _ff . Log . Info ( "\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064" , len ( _feecd ) ) ;
for _acfa , _cfacb := range _feecd { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _acfa , _cfacb ) ; } ; _ff . Log . Info ( "\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f" , _decaf , _bedb , _bdge , _feecf ) ;
} ; _gdce := make ( [ ] gridTile , _decaf * _bedb ) ; for _eacfe := _bedb - 1 ; _eacfe >= 0 ; _eacfe -- { _gabaa := _feecf [ _eacfe ] ; _cbdg := _feecf [ _eacfe + 1 ] ; for _gfdedc := 0 ; _gfdedc < _decaf ; _gfdedc ++ { _efcfa := _bdge [ _gfdedc ] ; _begba := _bdge [ _gfdedc + 1 ] ; _dabd := _abda . findPrimSec ( _efcfa , _gabaa ) ;
_fcbgg := _abda . findPrimSec ( _begba , _gabaa ) ; _gabeg := _feecd . findPrimSec ( _gabaa , _efcfa ) ; _ddad := _feecd . findPrimSec ( _cbdg , _efcfa ) ; _dffc := _ee . PdfRectangle { Llx : _efcfa , Urx : _begba , Lly : _gabaa , Ury : _cbdg } ; _gabea := _dbgg ( _dffc , _dabd , _fcbgg , _gabeg , _ddad ) ;
_gdce [ _eacfe * _decaf + _gfdedc ] = _gabea ; if _gfdb { _be . Printf ( "\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , _gfdedc , _eacfe , _gabea . String ( ) , _gabea . Width ( ) , _gabea . Height ( ) ) ;
} ; } ; } ; if _gfdb { _ff . Log . Info ( "r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066" , _fbcc ) ;
} ; _daff := make ( [ ] map [ float64 ] gridTile , _bedb ) ; for _cgfe := _bedb - 1 ; _cgfe >= 0 ; _cgfe -- { if _gfdb { _be . Printf ( "\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a" , _cgfe ) ; } ; _daff [ _cgfe ] = make ( map [ float64 ] gridTile , _decaf ) ; for _bcbab := 0 ; _bcbab < _decaf ;
_bcbab ++ { _bfae := _gdce [ _cgfe * _decaf + _bcbab ] ; if _gfdb { _be . Printf ( "\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bcbab , _bfae ) ; } ; if ! _bfae . _gdge { continue ; } ; _aefcc := _bcbab ; for _ccac := _bcbab + 1 ; ! _bfae . _geaa && _ccac < _decaf ;
_ccac ++ { _ffcc := _gdce [ _cgfe * _decaf + _ccac ] ; _bfae . Urx = _ffcc . Urx ; _bfae . _efab = _bfae . _efab || _ffcc . _efab ; _bfae . _gaaf = _bfae . _gaaf || _ffcc . _gaaf ; _bfae . _geaa = _ffcc . _geaa ; if _gfdb { _be . Printf ( "\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a" , _ccac , _ffcc , _bfae ) ;
} ; _aefcc = _ccac ; } ; if _gfdb { _be . Printf ( " \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n" , _bcbab , _aefcc , _bfae ) ; } ; _bcbab = _aefcc ; _daff [ _cgfe ] [ _bfae . Llx ] = _bfae ; } ; } ; _gcfee := make ( map [ float64 ] map [ float64 ] gridTile , _bedb ) ;
_efbed := make ( map [ float64 ] map [ float64 ] struct { } , _bedb ) ; for _bbae := _bedb - 1 ; _bbae >= 0 ; _bbae -- { _fcfea := _gdce [ _bbae * _decaf ] . Lly ; _gcfee [ _fcfea ] = make ( map [ float64 ] gridTile , _decaf ) ; _efbed [ _fcfea ] = make ( map [ float64 ] struct { } , _decaf ) ; } ; if _gfdb { _ff . Log . Info ( "\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066" , _fbcc ) ;
} ; for _cdea := _bedb - 1 ; _cdea >= 0 ; _cdea -- { _febca := _gdce [ _cdea * _decaf ] . Lly ; _gfadd := _daff [ _cdea ] ; if _gfdb { _be . Printf ( "\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a" , _cdea ) ; } ; for _ , _ecaad := range _ebbgg ( _gfadd ) { if _ , _aaacb := _efbed [ _febca ] [ _ecaad ] ;
_aaacb { continue ; } ; _fcbf := _gfadd [ _ecaad ] ; if _gfdb { _be . Printf ( " \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a" , _fcbf . String ( ) ) ; } ; for _gdff := _cdea - 1 ; _gdff >= 0 ; _gdff -- { if _fcbf . _gaaf { break ; } ; _eeac := _daff [ _gdff ] ; _bfgf , _adfa := _eeac [ _ecaad ] ;
if ! _adfa { break ; } ; if _bfgf . Urx != _fcbf . Urx { break ; } ; _fcbf . _gaaf = _bfgf . _gaaf ; _fcbf . Lly = _bfgf . Lly ; if _gfdb { _be . Printf ( "\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a" , _bfgf . String ( ) , _fcbf . String ( ) ) ;
} ; _efbed [ _bfgf . Lly ] [ _bfgf . Llx ] = struct { } { } ; } ; if _cdea == 0 { _fcbf . _gaaf = true ; } ; if _fcbf . complete ( ) { _gcfee [ _febca ] [ _ecaad ] = _fcbf ; } ; } ; } ; _acfde := gridTiling { PdfRectangle : _fbcc , _dgcdc : _bagc ( _gcfee ) , _dgbfg : _bebb ( _gcfee ) , _dcaac : _gcfee } ;
_acfde . log ( "\u0043r\u0065\u0061\u0074\u0065\u0064" ) ; return _acfde ; } ; func ( _babg * stateStack ) size ( ) int { return len ( * _babg ) } ; func ( _bgfe * wordBag ) applyRemovals ( _abaf map [ int ] map [ * textWord ] struct { } ) { for _abgf , _ggc := range _abaf { if len ( _ggc ) == 0 { continue ;
} ; _gbb := _bgfe . _aceg [ _abgf ] ; _gca := len ( _gbb ) - len ( _ggc ) ; if _gca == 0 { delete ( _bgfe . _aceg , _abgf ) ; continue ; } ; _ecd := make ( [ ] * textWord , _gca ) ; _dbgd := 0 ; for _ , _fcdb := range _gbb { if _ , _eaca := _ggc [ _fcdb ] ; ! _eaca { _ecd [ _dbgd ] = _fcdb ; _dbgd ++ ;
} ; } ; _bgfe . _aceg [ _abgf ] = _ecd ; } ; } ; func ( _cacf * shapesState ) cubicTo ( _baefc , _fcbc , _fad , _ecgd , _gbab , _cfe float64 ) { if _gcga { _ff . Log . Info ( "\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a" ) ; } ; _cacf . addPoint ( _gbab , _cfe ) ; } ; func ( _beace * textLine ) appendWord ( _bbea * textWord ) { _beace . _eaab = append ( _beace . _eaab , _bbea ) ;
_beace . PdfRectangle = _bgcf ( _beace . PdfRectangle , _bbea . PdfRectangle ) ; if _bbea . _eedb > _beace . _caccd { _beace . _caccd = _bbea . _eedb ; } ; if _bbea . _aagef > _beace . _decg { _beace . _decg = _bbea . _aagef ; } ; } ; type paraList [ ] * textPara ; func ( _gcdb * shapesState ) lastpointEstablished ( ) ( _de . Point , bool ) { if _gcdb . _ccef { return _gcdb . _ddcc , false ;
} ; _dbcdg := len ( _gcdb . _deff ) ; if _dbcdg > 0 && _gcdb . _deff [ _dbcdg - 1 ] . _feeg { return _gcdb . _deff [ _dbcdg - 1 ] . last ( ) , false ; } ; return _de . Point { } , true ; } ;
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
type ImageExtractOptions struct { IncludeInlineStencilMasks bool ; } ; type compositeCell struct { _ee . PdfRectangle ; paraList ; } ; func ( _dacgb gridTiling ) complete ( ) bool { for _ , _fgac := range _dacgb . _dcaac { for _ , _cbaee := range _fgac { if ! _cbaee . complete ( ) { return false ;
} ; } ; } ; return true ; } ; func _ddec ( _cadc int , _adde func ( int , int ) bool ) [ ] int { _bccdc := make ( [ ] int , _cadc ) ; for _cadcc := range _bccdc { _bccdc [ _cadcc ] = _cadcc ; } ; _ef . Slice ( _bccdc , func ( _dbaeeg , _deag int ) bool { return _adde ( _bccdc [ _dbaeeg ] , _bccdc [ _deag ] ) } ) ;
return _bccdc ; } ; func _dgced ( _baeg string , _fbea [ ] rulingList ) { _ff . Log . Info ( "\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073" , len ( _fbea ) , _baeg ) ; for _acbeg , _dab := range _fbea { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _acbeg , _dab . String ( ) ) ;
} ; } ; type stateStack [ ] * textState ; func ( _adabe rulingList ) log ( _bgde string ) { if ! _dgac { return ; } ; _ff . Log . Info ( "\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073" , _bgde , _adabe . String ( ) ) ; for _abca , _aabg := range _adabe { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _abca , _aabg . String ( ) ) ;
} ; } ; func ( _aaea * textPara ) depth ( ) float64 { if _aaea . _gbdd { return - 1.0 ; } ; if len ( _aaea . _cecgd ) > 0 { return _aaea . _cecgd [ 0 ] . _decg ; } ; return _aaea . _cegd . depth ( ) ; } ; type rulingKind int ; func _dbac ( _dbebf [ ] int ) [ ] int { _begc := make ( [ ] int , len ( _dbebf ) ) ;
for _gagcd , _gfee := range _dbebf { _begc [ len ( _dbebf ) - 1 - _gagcd ] = _gfee ; } ; return _begc ; } ; func ( _gfce * textObject ) setTextRise ( _adcf float64 ) { if _gfce == nil { return ; } ; _gfce . _ecb . _fefb = _adcf ; } ;