unipdf/extractor/extractor.go

788 lines
181 KiB
Go
Raw Normal View History

2020-08-27 21:45:09 +00:00
//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
2020-08-27 21:45:09 +00:00
//
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
//
2021-09-23 22:37:42 +00:00
package extractor ;import (_d "bytes";_g "errors";_gfc "fmt";_fgc "github.com/unidoc/unipdf/v3/common";_ca "github.com/unidoc/unipdf/v3/contentstream";_e "github.com/unidoc/unipdf/v3/core";_fge "github.com/unidoc/unipdf/v3/internal/license";_dd "github.com/unidoc/unipdf/v3/internal/textencoding";
_gfa "github.com/unidoc/unipdf/v3/internal/transform";_bbe "github.com/unidoc/unipdf/v3/model";_fd "golang.org/x/text/unicode/norm";_bf "golang.org/x/xerrors";_a "image/color";_f "io";_gf "math";_be "regexp";_c "sort";_fg "strings";_bg "unicode";_bb "unicode/utf8";
);type compositeCell struct{_bbe .PdfRectangle ;paraList ;};func (_ccc *textObject )setTextMatrix (_cg []float64 ){if len (_cg )!=6{_fgc .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_cg ));
return ;};_cceb ,_ffd ,_gda ,_cgd ,_dfd ,_bcd :=_cg [0],_cg [1],_cg [2],_cg [3],_cg [4],_cg [5];_ccc ._adb =_gfa .NewMatrix (_cceb ,_ffd ,_gda ,_cgd ,_dfd ,_bcd );_ccc ._eca =_ccc ._adb ;};func (_fdabd *textTable )emptyRow (_edfgc int )bool {for _efaa :=0;
_efaa < _fdabd ._caea ;_efaa ++{_gddfe :=_fdabd .get (_efaa ,_edfgc );if _gddfe !=nil &&_gddfe .text ()!=""{return false ;};};return true ;};func (_ggdb paraList )llyRange (_bfaf []int ,_dbg ,_bgda float64 )[]int {_gedc :=len (_ggdb );if _bgda < _ggdb [_bfaf [0]].Lly ||_dbg > _ggdb [_bfaf [_gedc -1]].Lly {return nil ;
};_fbdf :=_c .Search (_gedc ,func (_cbfg int )bool {return _ggdb [_bfaf [_cbfg ]].Lly >=_dbg });_dgfac :=_c .Search (_gedc ,func (_fgecd int )bool {return _ggdb [_bfaf [_fgecd ]].Lly > _bgda });return _bfaf [_fbdf :_dgfac ];};func (_debe rectRuling )asRuling ()(*ruling ,bool ){_dbgg :=ruling {_ccfa :_debe ._bbabc ,Color :_debe .Color ,_bdcf :_ebdbd };
switch _debe ._bbabc {case _bgecg :_dbgg ._eacd =0.5*(_debe .Llx +_debe .Urx );_dbgg ._dgbc =_debe .Lly ;_dbgg ._cgcfd =_debe .Ury ;_fdaaa ,_edda :=_debe .checkWidth (_debe .Llx ,_debe .Urx );if !_edda {if _fcce {_fgc .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_debe );
};return nil ,false ;};_dbgg ._fdaf =_fdaaa ;case _eeca :_dbgg ._eacd =0.5*(_debe .Lly +_debe .Ury );_dbgg ._dgbc =_debe .Llx ;_dbgg ._cgcfd =_debe .Urx ;_edfg ,_gbed :=_debe .checkWidth (_debe .Lly ,_debe .Ury );if !_gbed {if _fcce {_fgc .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_debe );
};return nil ,false ;};_dbgg ._fdaf =_edfg ;default:_fgc .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_debe ._bbabc );return nil ,false ;};return &_dbgg ,true ;};func (_cbdb *ruling )alignsPrimary (_babf *ruling )bool {return _cbdb ._ccfa ==_babf ._ccfa &&_gf .Abs (_cbdb ._eacd -_babf ._eacd )< _eedc *0.5;
};type textObject struct{_ebba *Extractor ;_gbde *_bbe .PdfPageResources ;_fag _ca .GraphicsState ;_eada *textState ;_fef *stateStack ;_adb _gfa .Matrix ;_eca _gfa .Matrix ;_cebe []*textMark ;_fga bool ;};func (_agbdf paraList )findGridTables (_fgcde []gridTiling )[]*textTable {if _bdfb {_fgc .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_agbdf ));
for _cggac ,_dcba :=range _agbdf {_gfc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cggac ,_dcba );};};var _gbga []*textTable ;for _aabda ,_eceeg :=range _fgcde {_dgd ,_aafb :=_agbdf .findTableGrid (_eceeg );if _dgd !=nil {_dgd .log (_gfc .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_aabda ));
_gbga =append (_gbga ,_dgd );_dgd .markCells ();};for _gfed :=range _aafb {_gfed ._eeeg =true ;};};if _bdfb {_fgc .Log .Info ("\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s",len (_gbga ));
};return _gbga ;};func (_fcfa rulingList )merge ()*ruling {_gbdbd :=_fcfa [0]._eacd ;_aaaa :=_fcfa [0]._dgbc ;_afdfe :=_fcfa [0]._cgcfd ;for _ ,_efea :=range _fcfa [1:]{_gbdbd +=_efea ._eacd ;if _efea ._dgbc < _aaaa {_aaaa =_efea ._dgbc ;};if _efea ._cgcfd > _afdfe {_afdfe =_efea ._cgcfd ;
};};_cbge :=&ruling {_ccfa :_fcfa [0]._ccfa ,_bdcf :_fcfa [0]._bdcf ,Color :_fcfa [0].Color ,_eacd :_gbdbd /float64 (len (_fcfa )),_dgbc :_aaaa ,_cgcfd :_afdfe };if _ggfe {_fgc .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_fcfa ),_cbge );
for _cfcc ,_ccff :=range _fcfa {_gfc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cfcc ,_ccff );};};return _cbge ;};func (_efgdb *ruling )encloses (_ageg ,_feceb float64 )bool {return _efgdb ._dgbc -_ddee <=_ageg &&_feceb <=_efgdb ._cgcfd +_ddee ;
};func (_eddb *wordBag )maxDepth ()float64 {return _eddb ._gdddb -_eddb .Lly };func (_ddbfa *textLine )pullWord (_aadce *wordBag ,_cgga *textWord ,_cbec int ){_ddbfa .appendWord (_cgga );_aadce .removeWord (_cgga ,_cbec );};
2020-08-27 21:45:09 +00:00
2021-09-23 22:37:42 +00:00
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
type PageImages struct{Images []ImageMark ;};
2021-03-23 23:12:52 +00:00
2021-09-23 22:37:42 +00:00
// Text returns the extracted page text.
func (_gbac PageText )Text ()string {return _gbac ._fca };const (_beeff =1.0e-6;_adde =1.0e-4;_gebg =10;_gecf =6;_acge =0.5;_cdfeb =0.12;_efba =0.19;_bcdc =0.04;_dgfdb =0.04;_efbe =1.0;_geec =0.04;_adaf =0.4;_edaa =0.7;_fdee =1.0;_ggfea =0.1;_fcdg =1.4;
_aecc =0.46;_cbbb =0.02;_acgeg =0.2;_ffga =0.5;_dfdf =4;_bbfce =4.0;_ggeg =6;_becad =0.3;_gcce =0.01;_dcea =0.02;_acef =2;_gegf =2;_fbe =500;_bdfg =4.0;_gebf =4.0;_ffec =0.05;_aaf =0.1;_ddee =2.0;_eedc =2.0;_afda =1.5;_gbce =3.0;_agd =0.25;);
2021-05-31 17:17:31 +00:00
2021-08-13 01:33:42 +00:00
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
2021-09-23 22:37:42 +00:00
func (_geca *PageText )ApplyArea (bbox _bbe .PdfRectangle ){_gaf :=make ([]*textMark ,0,len (_geca ._cgb ));for _ ,_bafb :=range _geca ._cgb {if _eda (_bafb .bbox (),bbox ){_gaf =append (_gaf ,_bafb );};};var _gdfb paraList ;_feacg :=len (_gaf );for _ddc :=0;
_ddc < 360&&_feacg > 0;_ddc +=90{_eebgg :=make ([]*textMark ,0,len (_gaf )-_feacg );for _ ,_agcb :=range _gaf {if _agcb ._degdg ==_ddc {_eebgg =append (_eebgg ,_agcb );};};if len (_eebgg )> 0{_cdef :=_fgcb (_eebgg ,_geca ._fbg ,nil ,nil );_gdfb =append (_gdfb ,_cdef ...);
_feacg -=len (_eebgg );};};_edbb :=new (_d .Buffer );_gdfb .writeText (_edbb );_geca ._fca =_edbb .String ();_geca ._gffb =_gdfb .toTextMarks ();_geca ._dbee =_gdfb .tables ();};type textResult struct{_befd PageText ;_agg int ;_aebf int ;};type textLine struct{_bbe .PdfRectangle ;
_fdga float64 ;_gfbc []*textWord ;_ddac float64 ;};func (_ggbce lineRuling )asRuling ()(*ruling ,bool ){_dfac :=ruling {_ccfa :_ggbce ._fgecf ,Color :_ggbce .Color ,_bdcf :_gdaea };switch _ggbce ._fgecf {case _bgecg :_dfac ._eacd =_ggbce .xMean ();_dfac ._dgbc =_gf .Min (_ggbce ._adbgb .Y ,_ggbce ._gagb .Y );
_dfac ._cgcfd =_gf .Max (_ggbce ._adbgb .Y ,_ggbce ._gagb .Y );case _eeca :_dfac ._eacd =_ggbce .yMean ();_dfac ._dgbc =_gf .Min (_ggbce ._adbgb .X ,_ggbce ._gagb .X );_dfac ._cgcfd =_gf .Max (_ggbce ._adbgb .X ,_ggbce ._gagb .X );default:_fgc .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_ggbce ._fgecf );
return nil ,false ;};return &_dfac ,true ;};
2021-05-31 17:17:31 +00:00
2021-08-13 01:33:42 +00:00
// String returns a human readable description of `ss`.
2021-09-23 22:37:42 +00:00
func (_dgg *shapesState )String ()string {return _gfc .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_dgg ._gbag ),_dgg ._cea );};func (_dggb rulingList )snapToGroupsDirection ()rulingList {_dggb .sortStrict ();
_gefa :=make (map[*ruling ]rulingList ,len (_dggb ));_ddbgf :=_dggb [0];_gbdb :=func (_fcca *ruling ){_ddbgf =_fcca ;_gefa [_ddbgf ]=rulingList {_fcca }};_gbdb (_dggb [0]);for _ ,_cdge :=range _dggb [1:]{if _cdge ._eacd < _ddbgf ._eacd -_beeff {_fgc .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_ddbgf ,_cdge );
};if _cdge ._eacd > _ddbgf ._eacd +_eedc {_gbdb (_cdge );}else {_gefa [_ddbgf ]=append (_gefa [_ddbgf ],_cdge );};};_gbbdd :=make (map[*ruling ]float64 ,len (_gefa ));_bfaa :=make (map[*ruling ]*ruling ,len (_dggb ));for _efgbb ,_ddcdb :=range _gefa {_gbbdd [_efgbb ]=_ddcdb .mergePrimary ();
for _ ,_cba :=range _ddcdb {_bfaa [_cba ]=_efgbb ;};};for _ ,_gaag :=range _dggb {_gaag ._eacd =_gbbdd [_bfaa [_gaag ]];};_bdac :=make (rulingList ,0,len (_dggb ));for _ ,_bacf :=range _gefa {_ccebf :=_bacf .splitSec ();for _aefg ,_egaa :=range _ccebf {_bgdeb :=_egaa .merge ();
if len (_bdac )> 0{_bagg :=_bdac [len (_bdac )-1];if _bagg .alignsPrimary (_bgdeb )&&_bagg .alignsSec (_bgdeb ){_fgc .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_aefg ,_bagg ,_bgdeb );
continue ;};};_bdac =append (_bdac ,_bgdeb );};};_bdac .sortStrict ();return _bdac ;};
// String returns a human readable description of `vecs`.
func (_gdag rulingList )String ()string {if len (_gdag )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_badd ,_bcdcc :=_gdag .vertsHorzs ();_cfbe :=len (_badd );_adagc :=len (_bcdcc );if _cfbe ==0||_adagc ==0{return _gfc .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_cfbe ,_adagc );
};_cgcd :=_bbe .PdfRectangle {Llx :_badd [0]._eacd ,Urx :_badd [_cfbe -1]._eacd ,Lly :_bcdcc [_adagc -1]._eacd ,Ury :_bcdcc [0]._eacd };return _gfc .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_cfbe ,_adagc ,_cgcd );
};type textPara struct{_bbe .PdfRectangle ;_beeg _bbe .PdfRectangle ;_dbed []*textLine ;_gbfb *textTable ;_eeeg bool ;_eecg bool ;_fffe *textPara ;_gadgg *textPara ;_gbceb *textPara ;_cedg *textPara ;};func _aebd (_bbde ,_deef int )uint64 {return uint64 (_bbde )*0x1000000+uint64 (_deef )};
2021-06-21 14:01:56 +00:00
2021-08-13 01:33:42 +00:00
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
2021-09-23 22:37:42 +00:00
func (_fdd *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _fdd ==nil {return nil ,_g .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_gfc .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );
};_bbcc :=len (_fdd ._fbcc );if _bbcc ==0{return _fdd ,nil ;};if start < _fdd ._fbcc [0].Offset {start =_fdd ._fbcc [0].Offset ;};if end > _fdd ._fbcc [_bbcc -1].Offset +1{end =_fdd ._fbcc [_bbcc -1].Offset +1;};_dcc :=_c .Search (_bbcc ,func (_efa int )bool {return _fdd ._fbcc [_efa ].Offset +len (_fdd ._fbcc [_efa ].Text )-1>=start });
if !(0<=_dcc &&_dcc < _bbcc ){_dade :=_gfc .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_dcc ,_bbcc ,_fdd ._fbcc [0],_fdd ._fbcc [_bbcc -1]);
return nil ,_dade ;};_cdg :=_c .Search (_bbcc ,func (_dacd int )bool {return _fdd ._fbcc [_dacd ].Offset > end -1});if !(0<=_cdg &&_cdg < _bbcc ){_bcgb :=_gfc .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_cdg ,_bbcc ,_fdd ._fbcc [0],_fdd ._fbcc [_bbcc -1]);
return nil ,_bcgb ;};if _cdg <=_dcc {return nil ,_gfc .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_dcc ,_cdg );
};return &TextMarkArray {_fbcc :_fdd ._fbcc [_dcc :_cdg ]},nil ;};func (_eage paraList )topoOrder ()[]int {if _fcde {_fgc .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");};_dbca :=len (_eage );_aecca :=make ([]bool ,_dbca );
_ecgf :=make ([]int ,0,_dbca );_dafbc :=_eage .llyOrdering ();var _cfaad func (_cfde int );_cfaad =func (_bbdf int ){_aecca [_bbdf ]=true ;for _eaed :=0;_eaed < _dbca ;_eaed ++{if !_aecca [_eaed ]{if _eage .readBefore (_dafbc ,_bbdf ,_eaed ){_cfaad (_eaed );
};};};_ecgf =append (_ecgf ,_bbdf );};for _gffe :=0;_gffe < _dbca ;_gffe ++{if !_aecca [_gffe ]{_cfaad (_gffe );};};return _bbdfb (_ecgf );};func (_egcf paraList )lines ()[]*textLine {var _debfg []*textLine ;for _ ,_ffecc :=range _egcf {_debfg =append (_debfg ,_ffecc ._dbed ...);
};return _debfg ;};func _cddf (_dff *Extractor ,_ddd *_bbe .PdfPageResources ,_cafe _ca .GraphicsState ,_dddd *textState ,_faff *stateStack )*textObject {return &textObject {_ebba :_dff ,_gbde :_ddd ,_fag :_cafe ,_fef :_faff ,_eada :_dddd ,_adb :_gfa .IdentityMatrix (),_eca :_gfa .IdentityMatrix ()};
};func (_ceed rulingList )removeDuplicates ()rulingList {if len (_ceed )==0{return nil ;};_ceed .sort ();_fdeeb :=rulingList {_ceed [0]};for _ ,_cdgac :=range _ceed [1:]{if _cdgac .equals (_fdeeb [len (_fdeeb )-1]){continue ;};_fdeeb =append (_fdeeb ,_cdgac );
};return _fdeeb ;};func (_face *shapesState )fill (_beba *[]pathSection ){_bcfe :=pathSection {_dfaa :_face ._gbag ,Color :_face ._cdbf .getFillColor ()};*_beba =append (*_beba ,_bcfe );if _bcbc {_cead :=_bcfe .bbox ();_gfc .Printf ("\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a",len (*_beba ),len (_bcfe ._dfaa ),_face ,_bcfe .Color ,_cead ,_cead .Width (),_cead .Height ());
if _cgdfea {for _bafc ,_gddd :=range _bcfe ._dfaa {_gfc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_bafc ,_gddd );if _bafc ==10{break ;};};};};};func (_fcdgg rulingList )sort (){_c .Slice (_fcdgg ,_fcdgg .comp )};func (_cec *wordBag )applyRemovals (_feea map[int ]map[*textWord ]struct{}){for _bfeb ,_bge :=range _feea {if len (_bge )==0{continue ;
};_aea :=_cec ._gbec [_bfeb ];_ddbe :=len (_aea )-len (_bge );if _ddbe ==0{delete (_cec ._gbec ,_bfeb );continue ;};_fgfd :=make ([]*textWord ,_ddbe );_fcg :=0;for _ ,_afb :=range _aea {if _ ,_dabf :=_bge [_afb ];!_dabf {_fgfd [_fcg ]=_afb ;_fcg ++;};};
_cec ._gbec [_bfeb ]=_fgfd ;};};func (_eeea *textObject )setCharSpacing (_abb float64 ){if _eeea ==nil {return ;};_eeea ._eada ._cfa =_abb ;if _bafe {_fgc .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_abb ,_eeea ._eada .String ());
};};func (_ggga *textObject )setTextLeading (_efee float64 ){if _ggga ==nil {return ;};_ggga ._eada ._cac =_efee ;};func _gadg (_bgec ,_bbfa bounded )float64 {return _bfage (_bgec )-_bfage (_bbfa )};type bounded interface{bbox ()_bbe .PdfRectangle };func _fcge (_dagb ,_ebed *textPara )bool {return _dgad (_dagb ._beeg ,_ebed ._beeg )};
func (_fbaa *subpath )clear (){*_fbaa =subpath {}};
2021-06-21 14:01:56 +00:00
2021-09-23 22:37:42 +00:00
// String returns a description of `k`.
func (_dgab markKind )String ()string {_ebg ,_egcg :=_gccf [_dgab ];if !_egcg {return _gfc .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_dgab );};return _ebg ;};func _dbfa (_eaadb _bbe .PdfRectangle ,_dfc []*textLine )*textPara {return &textPara {PdfRectangle :_eaadb ,_dbed :_dfc };
2021-08-13 01:33:42 +00:00
};
2021-09-23 22:37:42 +00:00
// ToTextMark returns the public view of `tm`.
func (_fegd *textMark )ToTextMark ()TextMark {return TextMark {Text :_fegd ._cgge ,Original :_fegd ._aabb ,BBox :_fegd ._acfa ,Font :_fegd ._fabf ,FontSize :_fegd ._gbae ,FillColor :_fegd ._dafb ,StrokeColor :_fegd ._aada ,Orientation :_fegd ._degdg };
};func (_bfcfe *subpath )makeRectRuling (_acda _a .Color )(*ruling ,bool ){if _fcce {_fgc .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_bfcfe );};_gbdeb :=_bfcfe ._gbee [:4];
_eeef :=make (map[int ]rulingKind ,len (_gbdeb ));for _cgaf ,_cgce :=range _gbdeb {_fbee :=_bfcfe ._gbee [(_cgaf +1)%4];_eeef [_cgaf ]=_caeg (_cgce ,_fbee );if _fcce {_gfc .Printf ("\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066",_cgaf ,_eeef [_cgaf ],_cgce ,_fbee );
};};if _fcce {_gfc .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_eeef );};var _gada ,_cbef []int ;for _egeba ,_ddeae :=range _eeef {switch _ddeae {case _eeca :_cbef =append (_cbef ,_egeba );case _bgecg :_gada =append (_gada ,_egeba );
};};if _fcce {_gfc .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_cbef ),_cbef );_gfc .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_gada ),_gada );
};_agcf :=(len (_cbef )==2&&len (_gada )==2)||(len (_cbef )==2&&len (_gada )==0&&_effb (_gbdeb [_cbef [0]],_gbdeb [_cbef [1]]))||(len (_gada )==2&&len (_cbef )==0&&_fbfe (_gbdeb [_gada [0]],_gbdeb [_gada [1]]));if _fcce {_gfc .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_cbef ),len (_gada ),_agcf );
};if !_agcf {if _fcce {_fgc .Log .Error ("\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v",_bfcfe );_gfc .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_cbef ),len (_gada ),_agcf );
};return &ruling {},false ;};if len (_gada )==0{for _gdcg ,_faabd :=range _eeef {if _faabd !=_eeca {_gada =append (_gada ,_gdcg );};};};if len (_cbef )==0{for _febec ,_bdfee :=range _eeef {if _bdfee !=_bgecg {_cbef =append (_cbef ,_febec );};};};if _fcce {_fgc .Log .Info ("\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_cbef ),len (_gada ),len (_gbdeb ),_cbef ,_gada ,_gbdeb );
};var _addc ,_eccf ,_badge ,_deba _gfa .Point ;if _gbdeb [_cbef [0]].Y > _gbdeb [_cbef [1]].Y {_badge ,_deba =_gbdeb [_cbef [0]],_gbdeb [_cbef [1]];}else {_badge ,_deba =_gbdeb [_cbef [1]],_gbdeb [_cbef [0]];};if _gbdeb [_gada [0]].X > _gbdeb [_gada [1]].X {_addc ,_eccf =_gbdeb [_gada [0]],_gbdeb [_gada [1]];
}else {_addc ,_eccf =_gbdeb [_gada [1]],_gbdeb [_gada [0]];};_dbdg :=_bbe .PdfRectangle {Llx :_addc .X ,Urx :_eccf .X ,Lly :_deba .Y ,Ury :_badge .Y };if _dbdg .Llx > _dbdg .Urx {_dbdg .Llx ,_dbdg .Urx =_dbdg .Urx ,_dbdg .Llx ;};if _dbdg .Lly > _dbdg .Ury {_dbdg .Lly ,_dbdg .Ury =_dbdg .Ury ,_dbdg .Lly ;
};_dgadc :=rectRuling {PdfRectangle :_dbdg ,_bbabc :_gfcd (_dbdg ),Color :_acda };if _dgadc ._bbabc ==_fgcbb {if _fcce {_fgc .Log .Error ("\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c");
};return nil ,false ;};_dffa ,_gbgb :=_dgadc .asRuling ();if !_gbgb {if _fcce {_fgc .Log .Error ("\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg");};return nil ,false ;};if _bcbc {_gfc .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_dffa .String ());
};return _dffa ,true ;};func (_bcaae *textTable )compositeColCorridors ()map[int ][]float64 {_ddaff :=make (map[int ][]float64 ,_bcaae ._caea );if _bdfb {_fgc .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_bcaae ._caea );
};for _eegg :=0;_eegg < _bcaae ._caea ;_eegg ++{_ddaff [_eegg ]=nil ;};return _ddaff ;};func (_fbc *textObject )getFillColor ()_a .Color {return _accf (_fbc ._fag .ColorspaceNonStroking ,_fbc ._fag .ColorNonStroking );};
2021-06-21 14:01:56 +00:00
2021-09-23 22:37:42 +00:00
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
type ImageMark struct{Image *_bbe .Image ;
2021-08-13 01:33:42 +00:00
2021-09-23 22:37:42 +00:00
// Dimensions of the image as displayed in the PDF.
Width float64 ;Height float64 ;
2021-08-13 01:33:42 +00:00
2021-09-23 22:37:42 +00:00
// Position of the image in PDF coordinates (lower left corner).
X float64 ;Y float64 ;
2021-06-21 14:01:56 +00:00
2021-09-23 22:37:42 +00:00
// Angle in degrees, if rotated.
Angle float64 ;};func (_gfabd *wordBag )firstReadingIndex (_aaef int )int {_gede :=_gfabd .firstWord (_aaef )._gddg ;_cdaa :=float64 (_aaef +1)*_gecf ;_bfbb :=_cdaa +_bbfce *_gede ;_dbfc :=_aaef ;for _ ,_dfgb :=range _gfabd .depthBand (_cdaa ,_bfbb ){if _gffd (_gfabd .firstWord (_dfgb ),_gfabd .firstWord (_dbfc ))< 0{_dbfc =_dfgb ;
};};return _dbfc ;};const _ggc =10;func (_eadaa *shapesState )addPoint (_cadb ,_eadd float64 ){_aaae :=_eadaa .establishSubpath ();_geda :=_eadaa .devicePoint (_cadb ,_eadd );if _aaae ==nil {_eadaa ._cea =true ;_eadaa ._dgfbe =_geda ;}else {_aaae .add (_geda );
};};func _bfda (_eadc []float64 ,_ecfg ,_efdc float64 )[]float64 {_abfe ,_agga :=_ecfg ,_efdc ;if _agga < _abfe {_abfe ,_agga =_agga ,_abfe ;};_eggb :=make ([]float64 ,0,len (_eadc )+2);_eggb =append (_eggb ,_ecfg );for _ ,_bfdfg :=range _eadc {if _bfdfg <=_abfe {continue ;
}else if _bfdfg >=_agga {break ;};_eggb =append (_eggb ,_bfdfg );};_eggb =append (_eggb ,_efdc );return _eggb ;};const (_fgcbb rulingKind =iota ;_eeca ;_bgecg ;);func (_abfb rulingList )secMinMax ()(float64 ,float64 ){_egag ,_efaf :=_abfb [0]._dgbc ,_abfb [0]._cgcfd ;
for _ ,_eddf :=range _abfb [1:]{if _eddf ._dgbc < _egag {_egag =_eddf ._dgbc ;};if _eddf ._cgcfd > _efaf {_efaf =_eddf ._cgcfd ;};};return _egag ,_efaf ;};
2021-06-21 14:01:56 +00:00
2021-09-23 22:37:42 +00:00
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
func (_ggg *Extractor )ExtractTextWithStats ()(_eebg string ,_bdb int ,_eb int ,_gdc error ){_fba ,_bdb ,_eb ,_gdc :=_ggg .ExtractPageText ();if _gdc !=nil {return "",_bdb ,_eb ,_gdc ;};return _fba .Text (),_bdb ,_eb ,nil ;};func (_eagb *textTable )getDown ()paraList {_dfcf :=make (paraList ,_eagb ._caea );
for _badb :=0;_badb < _eagb ._caea ;_badb ++{_fbfb :=_eagb .get (_badb ,_eagb ._dccb -1)._cedg ;if _fbfb ==nil ||_fbfb ._eeeg {return nil ;};_dfcf [_badb ]=_fbfb ;};for _acac :=0;_acac < _eagb ._caea -1;_acac ++{if _dfcf [_acac ]._gadgg !=_dfcf [_acac +1]{return nil ;
};};return _dfcf ;};type subpath struct{_gbee []_gfa .Point ;_aef bool ;};
2021-06-21 14:01:56 +00:00
2021-08-13 01:33:42 +00:00
// Append appends `mark` to the mark array.
2021-09-23 22:37:42 +00:00
func (_egae *TextMarkArray )Append (mark TextMark ){_egae ._fbcc =append (_egae ._fbcc ,mark )};const _fdb =20;func (_bgafc paraList )toTextMarks ()[]TextMark {_dgba :=0;var _ebeg []TextMark ;for _gdaa ,_ffeb :=range _bgafc {if _ffeb ._eecg {continue ;
};_eedd :=_ffeb .toTextMarks (&_dgba );_ebeg =append (_ebeg ,_eedd ...);if _gdaa !=len (_bgafc )-1{if _cgcc (_ffeb ,_bgafc [_gdaa +1]){_ebeg =_acea (_ebeg ,&_dgba ,"\u0020");}else {_ebeg =_acea (_ebeg ,&_dgba ,"\u000a");_ebeg =_acea (_ebeg ,&_dgba ,"\u000a");
};};};_ebeg =_acea (_ebeg ,&_dgba ,"\u000a");_ebeg =_acea (_ebeg ,&_dgba ,"\u000a");return _ebeg ;};func _bae (_eege string )bool {if _bb .RuneCountInString (_eege )< _dfdf {return false ;};_caae ,_ggbb :=_bb .DecodeLastRuneInString (_eege );if _ggbb <=0||!_bg .Is (_bg .Hyphen ,_caae ){return false ;
};_caae ,_ggbb =_bb .DecodeLastRuneInString (_eege [:len (_eege )-_ggbb ]);return _ggbb > 0&&!_bg .IsSpace (_caae );};
2021-05-31 17:17:31 +00:00
2021-09-23 22:37:42 +00:00
// String returns a description of `k`.
func (_gdgad rulingKind )String ()string {_dedg ,_ccfd :=_cfgg [_gdgad ];if !_ccfd {return _gfc .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_gdgad );};return _dedg ;};func _gbgd (_bbadf _bbe .PdfRectangle ,_fgad bounded )float64 {return _bbadf .Ury -_fgad .bbox ().Lly };
func (_eged *textTable )put (_acbeg ,_dcfae int ,_ddcdf *textPara ){_eged ._bffe [_aebd (_acbeg ,_dcfae )]=_ddcdf ;};func (_fdcb *shapesState )closePath (){if _fdcb ._cea {_fdcb ._gbag =append (_fdcb ._gbag ,_cbdf (_fdcb ._dgfbe ));_fdcb ._cea =false ;
}else if len (_fdcb ._gbag )==0{if _egf {_fgc .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");};_fdcb ._cea =false ;return ;};_fdcb ._gbag [len (_fdcb ._gbag )-1].close ();
if _egf {_fgc .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_fdcb );};};
2021-05-31 17:17:31 +00:00
2021-09-23 22:37:42 +00:00
// String returns a human readable description of `path`.
func (_cagb *subpath )String ()string {_daf :=_cagb ._gbee ;_acc :=len (_daf );if _acc <=5{return _gfc .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_acc ,_daf );};return _gfc .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_acc ,_daf [0],_daf [1],_daf [_acc -1]);
};func _afaa (_ggda []*textMark ,_ddgb _bbe .PdfRectangle )[]*textWord {var _dgdc []*textWord ;var _gaeab *textWord ;if _dega {_fgc .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_ggda ));
};_fegdca :=func (){if _gaeab !=nil {_fffa :=_gaeab .computeText ();if !_bedg (_fffa ){_gaeab ._fadea =_fffa ;_dgdc =append (_dgdc ,_gaeab );if _dega {_fgc .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_dgdc )-1,_gaeab .String ());
for _ggfb ,_dbfbg :=range _gaeab ._cgceb {_gfc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ggfb ,_dbfbg .String ());};};};_gaeab =nil ;};};for _ ,_fgcf :=range _ggda {if _eaad &&_gaeab !=nil &&len (_gaeab ._cgceb )> 0{_aadcf :=_gaeab ._cgceb [len (_gaeab ._cgceb )-1];
_caad ,_gecfd :=_bceg (_fgcf ._cgge );_dgbfb ,_gefg :=_bceg (_aadcf ._cgge );if _gecfd &&!_gefg &&_aadcf .inDiacriticArea (_fgcf ){_gaeab .addDiacritic (_caad );continue ;};if _gefg &&!_gecfd &&_fgcf .inDiacriticArea (_aadcf ){_gaeab ._cgceb =_gaeab ._cgceb [:len (_gaeab ._cgceb )-1];
_gaeab .appendMark (_fgcf ,_ddgb );_gaeab .addDiacritic (_dgbfb );continue ;};};_gcac :=_bedg (_fgcf ._cgge );if _gcac {_fegdca ();continue ;};if _gaeab ==nil &&!_gcac {_gaeab =_gebd ([]*textMark {_fgcf },_ddgb );continue ;};_fdfeg :=_gaeab ._gddg ;_egggf :=_gf .Abs (_gbgd (_ddgb ,_fgcf )-_gaeab ._fdcbf )/_fdfeg ;
_ccga :=_cfdac (_fgcf ,_gaeab )/_fdfeg ;if _ccga >=_cdfeb ||!(-_efba <=_ccga &&_egggf <=_bcdc ){_fegdca ();_gaeab =_gebd ([]*textMark {_fgcf },_ddgb );continue ;};_gaeab .appendMark (_fgcf ,_ddgb );};_fegdca ();return _dgdc ;};func (_caeef *textLine )endsInHyphen ()bool {_bggb :=_caeef ._gfbc [len (_caeef ._gfbc )-1];
_bgaff :=_bggb ._fadea ;_fbbca ,_dede :=_bb .DecodeLastRuneInString (_bgaff );if _dede <=0||!_bg .Is (_bg .Hyphen ,_fbbca ){return false ;};if _bggb ._aaad &&_bae (_bgaff ){return true ;};return _bae (_caeef .text ());};func (_eacde *textWord )addDiacritic (_ddeab string ){_befgc :=_eacde ._cgceb [len (_eacde ._cgceb )-1];
_befgc ._cgge +=_ddeab ;_befgc ._cgge =_fd .NFKC .String (_befgc ._cgge );};func (_agbf *compositeCell )updateBBox (){for _ ,_gbad :=range _agbf .paraList {_agbf .PdfRectangle =_eacc (_agbf .PdfRectangle ,_gbad .PdfRectangle );};};func _gafb (_fbadd _bbe .PdfRectangle )*ruling {return &ruling {_ccfa :_bgecg ,_eacd :_fbadd .Urx ,_dgbc :_fbadd .Lly ,_cgcfd :_fbadd .Ury };
2021-08-13 01:33:42 +00:00
};
2021-09-23 22:37:42 +00:00
// PageText represents the layout of text on a device page.
type PageText struct{_cgb []*textMark ;_fca string ;_gffb []TextMark ;_dbee []TextTable ;_fbg _bbe .PdfRectangle ;_fcf []pathSection ;_daag []pathSection ;};func (_bdge *textObject )showTextAdjusted (_ace *_e .PdfObjectArray )error {_feac :=false ;for _ ,_ccca :=range _ace .Elements (){switch _ccca .(type ){case *_e .PdfObjectFloat ,*_e .PdfObjectInteger :_addf ,_beeb :=_e .GetNumberAsFloat (_ccca );
if _beeb !=nil {_fgc .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_ccca ,_ace );
return _beeb ;};_abed ,_cbe :=-_addf *0.001*_bdge ._eada ._gbaf ,0.0;if _feac {_cbe ,_abed =_abed ,_cbe ;};_aee :=_bggd (_gfa .Point {X :_abed ,Y :_cbe });_bdge ._adb .Concat (_aee );case *_e .PdfObjectString :_gge ,_fcbf :=_e .GetStringBytes (_ccca );
if !_fcbf {_fgc .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_ccca ,_ace );
return _e .ErrTypeError ;};_bdge .renderText (_gge );default:_fgc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_ccca ,_ace );
return _e .ErrTypeError ;};};return nil ;};func (_afcac paraList )writeText (_eec _f .Writer ){for _dbaf ,_gdac :=range _afcac {if _gdac ._eecg {continue ;};_gdac .writeText (_eec );if _dbaf !=len (_afcac )-1{if _cgcc (_gdac ,_afcac [_dbaf +1]){_eec .Write ([]byte ("\u0020"));
}else {_eec .Write ([]byte ("\u000a"));_eec .Write ([]byte ("\u000a"));};};};_eec .Write ([]byte ("\u000a"));_eec .Write ([]byte ("\u000a"));};func (_gde *textObject )showText (_fcb []byte )error {return _gde .renderText (_fcb )};func (_ecddg *textWord )bbox ()_bbe .PdfRectangle {return _ecddg .PdfRectangle };
func _bggd (_bcbga _gfa .Point )_gfa .Matrix {return _gfa .TranslationMatrix (_bcbga .X ,_bcbga .Y )};var (_ef =_g .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072");_aa =_g .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072");
);func (_bacg *Extractor )extractPageText (_ebb string ,_fac *_bbe .PdfPageResources ,_dcf _gfa .Matrix ,_aabf int )(*PageText ,int ,int ,error ){_fgc .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_aabf );
_ged :=&PageText {_fbg :_bacg ._caf };_dae :=_bcc (_bacg ._caf );var _dfg stateStack ;_aff :=_cddf (_bacg ,_fac ,_ca .GraphicsState {},&_dae ,&_dfg );_fgeb :=shapesState {_bce :_dcf ,_beg :_gfa .IdentityMatrix (),_cdbf :_aff };var _dbf bool ;if _aabf > _fdb {_cf :=_g .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");
_fgc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_aabf ,_cf );
return _ged ,_dae ._ccb ,_dae ._eacg ,_cf ;};_bdga :=_ca .NewContentStreamParser (_ebb );_bba ,_beb :=_bdga .Parse ();if _beb !=nil {_fgc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_beb );
return _ged ,_dae ._ccb ,_dae ._eacg ,_beb ;};_ecg :=_ca .NewContentStreamProcessor (*_bba );_ecg .AddHandler (_ca .HandlerConditionEnumAllOperands ,"",func (_fbb *_ca .ContentStreamOperation ,_eac _ca .GraphicsState ,_bee *_bbe .PdfPageResources )error {_edg :=_fbb .Operand ;
if _egab {_fgc .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_fbb );};switch _edg {case "\u0071":if _egf {_fgc .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_fgeb ._beg );};_dfg .push (&_dae );case "\u0051":if !_dfg .empty (){_dae =*_dfg .pop ();
};_fgeb ._beg =_eac .CTM ;if _egf {_fgc .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_fgeb ._beg );};case "\u0042\u0054":if _dbf {_fgc .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
_ged ._cgb =append (_ged ._cgb ,_aff ._cebe ...);};_dbf =true ;_bbb :=_eac ;_bbb .CTM =_dcf .Mult (_bbb .CTM );_aff =_cddf (_bacg ,_bee ,_bbb ,&_dae ,&_dfg );_fgeb ._cdbf =_aff ;case "\u0045\u0054":if !_dbf {_fgc .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
};_dbf =false ;_ged ._cgb =append (_ged ._cgb ,_aff ._cebe ...);_aff .reset ();case "\u0054\u002a":_aff .nextLine ();case "\u0054\u0064":if _dab ,_cb :=_aff .checkOp (_fbb ,2,true );!_dab {_fgc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cb );
return _cb ;};_fad ,_cdf ,_egb :=_edbf (_fbb .Params );if _egb !=nil {return _egb ;};_aff .moveText (_fad ,_cdf );case "\u0054\u0044":if _efc ,_bbec :=_aff .checkOp (_fbb ,2,true );!_efc {_fgc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bbec );
return _bbec ;};_bbad ,_ddb ,_cc :=_edbf (_fbb .Params );if _cc !=nil {_fgc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cc );return _cc ;};_aff .moveTextSetLeading (_bbad ,_ddb );case "\u0054\u006a":if _agf ,_de :=_aff .checkOp (_fbb ,1,true );
!_agf {_fgc .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_fbb ,_de );return _de ;};_fbf ,_ebf :=_e .GetStringBytes (_fbb .Params [0]);if !_ebf {_fgc .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_fbb );
return _e .ErrTypeError ;};return _aff .showText (_fbf );case "\u0054\u004a":if _fafc ,_fgd :=_aff .checkOp (_fbb ,1,true );!_fafc {_fgc .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fgd );return _fgd ;
};_cdfe ,_gdb :=_e .GetArray (_fbb .Params [0]);if !_gdb {_fgc .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_fbb );
return _beb ;};return _aff .showTextAdjusted (_cdfe );case "\u0027":if _gggf ,_adc :=_aff .checkOp (_fbb ,1,true );!_gggf {_fgc .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_adc );return _adc ;};_add ,_dbff :=_e .GetStringBytes (_fbb .Params [0]);
if !_dbff {_fgc .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_fbb );return _e .ErrTypeError ;};_aff .nextLine ();return _aff .showText (_add );
case "\u0022":if _bgg ,_gbf :=_aff .checkOp (_fbb ,3,true );!_bgg {_fgc .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gbf );return _gbf ;};_bga ,_fdc ,_efbg :=_edbf (_fbb .Params [:2]);if _efbg !=nil {return _efbg ;
};_bgf ,_gdcb :=_e .GetStringBytes (_fbb .Params [2]);if !_gdcb {_fgc .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_fbb );
return _e .ErrTypeError ;};_aff .setCharSpacing (_bga );_aff .setWordSpacing (_fdc );_aff .nextLine ();return _aff .showText (_bgf );case "\u0054\u004c":_adcg ,_dad :=_gbeg (_fbb );if _dad !=nil {_fgc .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dad );
return _dad ;};_aff .setTextLeading (_adcg );case "\u0054\u0063":_cdc ,_ff :=_gbeg (_fbb );if _ff !=nil {_fgc .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ff );return _ff ;};_aff .setCharSpacing (_cdc );
case "\u0054\u0066":if _baf ,_fea :=_aff .checkOp (_fbb ,2,true );!_baf {_fgc .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fea );return _fea ;};_aaa ,_bgb :=_e .GetNameVal (_fbb .Params [0]);if !_bgb {_fgc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_fbb );
return _e .ErrTypeError ;};_bcb ,_eee :=_e .GetNumberAsFloat (_fbb .Params [1]);if !_bgb {_fgc .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fbb ,_eee );
return _eee ;};_eee =_aff .setFont (_aaa ,_bcb );_aff ._fga =_bf .Is (_eee ,_e .ErrNotSupported );if _eee !=nil &&!_aff ._fga {return _eee ;};case "\u0054\u006d":if _adfa ,_ggb :=_aff .checkOp (_fbb ,6,true );!_adfa {_fgc .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ggb );
return _ggb ;};_fee ,_aede :=_e .GetNumbersAsFloat (_fbb .Params );if _aede !=nil {_fgc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aede );return _aede ;};_aff .setTextMatrix (_fee );case "\u0054\u0072":if _dac ,_dee :=_aff .checkOp (_fbb ,1,true );
!_dac {_fgc .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dee );return _dee ;};_ffe ,_ade :=_e .GetIntVal (_fbb .Params [0]);if !_ade {_fgc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_fbb );
return _e .ErrTypeError ;};_aff .setTextRenderMode (_ffe );case "\u0054\u0073":if _dgc ,_bca :=_aff .checkOp (_fbb ,1,true );!_dgc {_fgc .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bca );return _bca ;
};_deb ,_afca :=_e .GetNumberAsFloat (_fbb .Params [0]);if _afca !=nil {_fgc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_afca );return _afca ;};_aff .setTextRise (_deb );case "\u0054\u0077":if _cce ,_abe :=_aff .checkOp (_fbb ,1,true );
!_cce {_fgc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_abe );return _abe ;};_bef ,_bbea :=_e .GetNumberAsFloat (_fbb .Params [0]);if _bbea !=nil {_fgc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bbea );
return _bbea ;};_aff .setWordSpacing (_bef );case "\u0054\u007a":if _ddgag ,_bdbg :=_aff .checkOp (_fbb ,1,true );!_ddgag {_fgc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bdbg );return _bdbg ;};_daba ,_aeb :=_e .GetNumberAsFloat (_fbb .Params [0]);
if _aeb !=nil {_fgc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aeb );return _aeb ;};_aff .setHorizScaling (_daba );case "\u0063\u006d":_fgeb ._beg =_eac .CTM ;if _fgeb ._beg .Singular (){_aaca :=_gfa .IdentityMatrix ().Translate (_fgeb ._beg .Translation ());
_fgc .Log .Debug ("S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s",_fgeb ._beg ,_aaca );_fgeb ._beg =_aaca ;};if _egf {_fgc .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_fgeb ._beg );};case "\u006d":if len (_fbb .Params )!=2{_fgc .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_aa );
return nil ;};_fda ,_afcaf :=_e .GetNumbersAsFloat (_fbb .Params );if _afcaf !=nil {return _afcaf ;};_fgeb .moveTo (_fda [0],_fda [1]);case "\u006c":if len (_fbb .Params )!=2{_fgc .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_aa );
return nil ;};_bdc ,_afd :=_e .GetNumbersAsFloat (_fbb .Params );if _afd !=nil {return _afd ;};_fgeb .lineTo (_bdc [0],_bdc [1]);case "\u0063":if len (_fbb .Params )!=6{return _aa ;};_bcbg ,_ebda :=_e .GetNumbersAsFloat (_fbb .Params );if _ebda !=nil {return _ebda ;
};_fgc .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_bcbg );_fgeb .cubicTo (_bcbg [0],_bcbg [1],_bcbg [2],_bcbg [3],_bcbg [4],_bcbg [5]);case "\u0076","\u0079":if len (_fbb .Params )!=4{return _aa ;
};_afg ,_gff :=_e .GetNumbersAsFloat (_fbb .Params );if _gff !=nil {return _gff ;};_fgc .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_afg );_fgeb .quadraticTo (_afg [0],_afg [1],_afg [2],_afg [3]);
case "\u0068":_fgeb .closePath ();case "\u0072\u0065":if len (_fbb .Params )!=4{return _aa ;};_gcbc ,_fdbc :=_e .GetNumbersAsFloat (_fbb .Params );if _fdbc !=nil {return _fdbc ;};_fgeb .drawRectangle (_gcbc [0],_gcbc [1],_gcbc [2],_gcbc [3]);_fgeb .closePath ();
case "\u0053":_fgeb .stroke (&_ged ._fcf );_fgeb .clearPath ();case "\u0073":_fgeb .closePath ();_fgeb .stroke (&_ged ._fcf );_fgeb .clearPath ();case "\u0046":_fgeb .fill (&_ged ._daag );_fgeb .clearPath ();case "\u0066","\u0066\u002a":_fgeb .closePath ();
_fgeb .fill (&_ged ._daag );_fgeb .clearPath ();case "\u0042","\u0042\u002a":_fgeb .fill (&_ged ._daag );_fgeb .stroke (&_ged ._fcf );_fgeb .clearPath ();case "\u0062","\u0062\u002a":_fgeb .closePath ();_fgeb .fill (&_ged ._daag );_fgeb .stroke (&_ged ._fcf );
_fgeb .clearPath ();case "\u006e":_fgeb .clearPath ();case "\u0044\u006f":if len (_fbb .Params )==0{_fgc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_fbb .Params );
return _e .ErrRangeError ;};_gbfc ,_daaf :=_e .GetName (_fbb .Params [0]);if !_daaf {_fgc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_fbb .Params [0]);
return _e .ErrTypeError ;};_ ,_gec :=_bee .GetXObjectByName (*_gbfc );if _gec !=_bbe .XObjectTypeForm {break ;};_baa ,_daaf :=_bacg ._ee [_gbfc .String ()];if !_daaf {_abf ,_fc :=_bee .GetXObjectFormByName (*_gbfc );if _fc !=nil {_fgc .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_fc );
return _fc ;};_ecd ,_fc :=_abf .GetContentStream ();if _fc !=nil {_fgc .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_fc );return _fc ;};_gffc :=_abf .Resources ;if _gffc ==nil {_gffc =_bee ;};_dcd ,_abfa ,_bbbc ,_fc :=_bacg .extractPageText (string (_ecd ),_gffc ,_dcf .Mult (_eac .CTM ),_aabf +1);
if _fc !=nil {_fgc .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_fc );return _fc ;};_baa =textResult {*_dcd ,_abfa ,_bbbc };_bacg ._ee [_gbfc .String ()]=_baa ;};_fgeb ._beg =_eac .CTM ;if _egf {_fgc .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_fgeb ._beg );
};_ged ._cgb =append (_ged ._cgb ,_baa ._befd ._cgb ...);_ged ._fcf =append (_ged ._fcf ,_baa ._befd ._fcf ...);_ged ._daag =append (_ged ._daag ,_baa ._befd ._daag ...);_dae ._ccb +=_baa ._agg ;_dae ._eacg +=_baa ._aebf ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_aff ._fag .ColorspaceNonStroking =_eac .ColorspaceNonStroking ;
_aff ._fag .ColorNonStroking =_eac .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_aff ._fag .ColorspaceStroking =_eac .ColorspaceStroking ;_aff ._fag .ColorStroking =_eac .ColorStroking ;};return nil ;
});_beb =_ecg .Process (_fac );return _ged ,_dae ._ccb ,_dae ._eacg ,_beb ;};func (_afcg *shapesState )lineTo (_adbg ,_edec float64 ){if _egf {_fgc .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_adbg ,_edec ,_afcg .devicePoint (_adbg ,_edec ));
};_afcg .addPoint (_adbg ,_edec );};func _cfdac (_aedd ,_bbecc bounded )float64 {return _aedd .bbox ().Llx -_bbecc .bbox ().Urx };func _edbg (_acba _gfa .Matrix )_gfa .Point {_ega ,_cdfd :=_acba .Translation ();return _gfa .Point {X :_ega ,Y :_cdfd };};
func (_ceg *textObject )getFontDirect (_abd string )(*_bbe .PdfFont ,error ){_cfab ,_ffeg :=_ceg .getFontDict (_abd );if _ffeg !=nil {return nil ,_ffeg ;};_bda ,_ffeg :=_bbe .NewPdfFontFromPdfObject (_cfab );if _ffeg !=nil {_fgc .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_abd ,_ffeg );
};return _bda ,_ffeg ;};
2021-06-21 14:01:56 +00:00
2021-07-30 00:21:16 +00:00
// TableCell is a cell in a TextTable.
type TableCell struct{
2021-06-21 14:01:56 +00:00
2021-07-30 00:21:16 +00:00
// Text is the extracted text.
Text string ;
2021-05-31 17:17:31 +00:00
2021-07-30 00:21:16 +00:00
// Marks returns the TextMarks corresponding to the text in Text.
2021-09-23 22:37:42 +00:00
Marks TextMarkArray ;};func _dgad (_ffcdd ,_afeb _bbe .PdfRectangle )bool {return _afeb .Llx <=_ffcdd .Urx &&_ffcdd .Llx <=_afeb .Urx ;};func _aeae (_egff _bbe .PdfRectangle )*ruling {return &ruling {_ccfa :_eeca ,_eacd :_egff .Lly ,_dgbc :_egff .Llx ,_cgcfd :_egff .Urx };
};func (_dagbd paraList )extractTables (_fcfb []gridTiling )paraList {if _bdfb {_fgc .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_dagbd ));
};if len (_dagbd )< _ggeg {return _dagbd ;};_bafcb :=_dagbd .findTables (_fcfb );if _bdfb {_fgc .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_bafcb ));
for _dbbc ,_fcbdef :=range _bafcb {_fcbdef .log (_gfc .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_dbbc ));};};return _dagbd .applyTables (_bafcb );};func (_gagda *wordBag )depthBand (_bgaf ,_dada float64 )[]int {if len (_gagda ._gbec )==0{return nil ;
};return _gagda .depthRange (_gagda .getDepthIdx (_bgaf ),_gagda .getDepthIdx (_dada ));};func (_cacd *shapesState )stroke (_gaee *[]pathSection ){_cbc :=pathSection {_dfaa :_cacd ._gbag ,Color :_cacd ._cdbf .getStrokeColor ()};*_gaee =append (*_gaee ,_cbc );
if _bcbc {_gfc .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_gaee ),_cacd ,_cacd ._cdbf .getStrokeColor (),_cbc .bbox ());
if _cgdfea {for _abeg ,_cbd :=range _cacd ._gbag {_gfc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_abeg ,_cbd );if _abeg ==10{break ;};};};};};type intSet map[int ]struct{};func _cecbd (_caa func (*wordBag ,*textWord ,float64 )bool ,_fagd float64 )func (*wordBag ,*textWord )bool {return func (_def *wordBag ,_gggb *textWord )bool {return _caa (_def ,_gggb ,_fagd )};
};
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct{_fbcc []TextMark };func _efag (_agcg map[float64 ]gridTile )[]float64 {_abdef :=make ([]float64 ,0,len (_agcg ));for _cacb :=range _agcg {_abdef =append (_abdef ,_cacb );};_c .Float64s (_abdef );return _abdef ;};func _ggacf (_dfcd []pathSection ){if _adde < 0.0{return ;
};if _bcbc {_fgc .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_dfcd ));};for _bfdeg ,_ecfe :=range _dfcd {for _bbbb ,_cgbe :=range _ecfe ._dfaa {for _geab ,_efeeaa :=range _cgbe ._gbee {_cgbe ._gbee [_geab ]=_gfa .Point {X :_abdd (_efeeaa .X ),Y :_abdd (_efeeaa .Y )};
if _bcbc {_edede :=_cgbe ._gbee [_geab ];if !_fded (_efeeaa ,_edede ){_gbcad :=_gfa .Point {X :_edede .X -_efeeaa .X ,Y :_edede .Y -_efeeaa .Y };_gfc .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_bfdeg ,_bbbb ,_geab ,_efeeaa ,_edede ,_gbcad );
};};};};};};func (_ead *imageExtractContext )processOperand (_dg *_ca .ContentStreamOperation ,_cag _ca .GraphicsState ,_ed *_bbe .PdfPageResources )error {if _dg .Operand =="\u0042\u0049"&&len (_dg .Params )==1{_bgc ,_bcf :=_dg .Params [0].(*_ca .ContentStreamInlineImage );
if !_bcf {return nil ;};if _af ,_adf :=_e .GetBoolVal (_bgc .ImageMask );_adf {if _af &&!_ead ._fde .IncludeInlineStencilMasks {return nil ;};};return _ead .extractInlineImage (_bgc ,_cag ,_ed );}else if _dg .Operand =="\u0044\u006f"&&len (_dg .Params )==1{_gb ,_bcg :=_e .GetName (_dg .Params [0]);
if !_bcg {_fgc .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _ef ;};_ ,_aab :=_ed .GetXObjectByName (*_gb );switch _aab {case _bbe .XObjectTypeImage :return _ead .extractXObjectImage (_gb ,_cag ,_ed );case _bbe .XObjectTypeForm :return _ead .extractFormImages (_gb ,_cag ,_ed );
};};return nil ;};func (_bdbd paraList )reorder (_dabg []int ){_dedb :=make (paraList ,len (_bdbd ));for _dfeg ,_fcecf :=range _dabg {_dedb [_dfeg ]=_bdbd [_fcecf ];};copy (_bdbd ,_dedb );};type shapesState struct{_beg _gfa .Matrix ;_bce _gfa .Matrix ;
_gbag []*subpath ;_cea bool ;_dgfbe _gfa .Point ;_cdbf *textObject ;};func (_acdd paraList )eventNeighbours (_faae []event )map[*textPara ][]int {_c .Slice (_faae ,func (_bdgg ,_bfdg int )bool {_babc ,_dcac :=_faae [_bdgg ],_faae [_bfdg ];_cgba ,_bagc :=_babc ._fcead ,_dcac ._fcead ;
if _cgba !=_bagc {return _cgba < _bagc ;};if _babc ._bege !=_dcac ._bege {return _babc ._bege ;};return _bdgg < _bfdg ;});_afgg :=make (map[int ]intSet );_ebdbdb :=make (intSet );for _ ,_aged :=range _faae {if _aged ._bege {_afgg [_aged ._decb ]=make (intSet );
for _ceddc :=range _ebdbdb {if _ceddc !=_aged ._decb {_afgg [_aged ._decb ].add (_ceddc );_afgg [_ceddc ].add (_aged ._decb );};};_ebdbdb .add (_aged ._decb );}else {_ebdbdb .del (_aged ._decb );};};_eafcb :=map[*textPara ][]int {};for _dceba ,_agcca :=range _afgg {_ebff :=_acdd [_dceba ];
if len (_agcca )==0{_eafcb [_ebff ]=nil ;continue ;};_cage :=make ([]int ,len (_agcca ));_acbcg :=0;for _cagde :=range _agcca {_cage [_acbcg ]=_cagde ;_acbcg ++;};_eafcb [_ebff ]=_cage ;};return _eafcb ;};func _gfbdcc (_dcgcg map[int ][]float64 )[]int {_ddecg :=make ([]int ,len (_dcgcg ));
_ebdca :=0;for _gfeda :=range _dcgcg {_ddecg [_ebdca ]=_gfeda ;_ebdca ++;};_c .Ints (_ddecg );return _ddecg ;};type textWord struct{_bbe .PdfRectangle ;_fdcbf float64 ;_fadea string ;_cgceb []*textMark ;_gddg float64 ;_aaad bool ;};
// String returns a string descibing `i`.
func (_fgcdf gridTile )String ()string {_becfg :=func (_dbacd bool ,_cedgg string )string {if _dbacd {return _cedgg ;};return "\u005f";};return _gfc .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_fgcdf .PdfRectangle ,_becfg (_fgcdf ._bcbb ,"\u004c"),_becfg (_fgcdf ._ddgab ,"\u0052"),_becfg (_fgcdf ._ebdg ,"\u0042"),_becfg (_fgcdf ._bacb ,"\u0054"));
};func (_adeg intSet )del (_gcab int ){delete (_adeg ,_gcab )};func _adgg (_cabd []pathSection )rulingList {_ggacf (_cabd );if _bcbc {_fgc .Log .Info ("\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073",len (_cabd ));
};var _afde rulingList ;for _ ,_adcd :=range _cabd {for _ ,_dbac :=range _adcd ._dfaa {if len (_dbac ._gbee )< 2{continue ;};_ddce :=_dbac ._gbee [0];for _ ,_babgd :=range _dbac ._gbee [1:]{if _fegf ,_bgeb :=_fgga (_ddce ,_babgd ,_adcd .Color );_bgeb {_afde =append (_afde ,_fegf );
};_ddce =_babgd ;};};};if _bcbc {_fgc .Log .Info ("m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073",_afde );};return _afde ;};func (_febgc *textTable )reduceTiling (_bead gridTiling ,_gddfg float64 )*textTable {_ddgad :=make ([]int ,0,_febgc ._dccb );
_dfbe :=make ([]int ,0,_febgc ._caea );_efdb :=_bead ._fcgfg ;_aadae :=_bead ._ccge ;for _gbfae :=0;_gbfae < _febgc ._dccb ;_gbfae ++{_agcbc :=_gbfae > 0&&_gf .Abs (_aadae [_gbfae -1]-_aadae [_gbfae ])< _gddfg &&_febgc .emptyRow (_gbfae );if !_agcbc {_ddgad =append (_ddgad ,_gbfae );
};};for _agce :=0;_agce < _febgc ._caea ;_agce ++{_gffa :=_agce < _febgc ._caea -1&&_gf .Abs (_efdb [_agce +1]-_efdb [_agce ])< _gddfg &&_febgc .emptyColumn (_agce );if !_gffa {_dfbe =append (_dfbe ,_agce );};};if len (_ddgad )==_febgc ._dccb &&len (_dfbe )==_febgc ._caea {return _febgc ;
};_beadb :=textTable {_gbaca :_febgc ._gbaca ,_caea :len (_dfbe ),_dccb :len (_ddgad ),_aeffd :make (map[uint64 ]compositeCell ,len (_dfbe )*len (_ddgad ))};if _bdfb {_fgc .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_febgc ._caea ,_febgc ._dccb ,len (_dfbe ),len (_ddgad ));
_fgc .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_dfbe );_fgc .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_ddgad );};for _gffdg ,_cbgc :=range _ddgad {for _bcfa ,_gecbe :=range _dfbe {_faabda ,_gbba :=_febgc .getComposite (_gecbe ,_cbgc );
if len (_faabda )==0{continue ;};if _bdfb {_gfc .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_bcfa ,_gffdg ,_gecbe ,_cbgc ,_fbfea (_faabda .merge ().text (),50));};
_beadb .putComposite (_bcfa ,_gffdg ,_faabda ,_gbba );};};return &_beadb ;};type gridTiling struct{_bbe .PdfRectangle ;_fcgfg []float64 ;_ccge []float64 ;_dddg map[float64 ]map[float64 ]gridTile ;};func (_aaeg *textPara )writeCellText (_acgc _f .Writer ){for _eedgd ,_fcgf :=range _aaeg ._dbed {_gfacg :=_fcgf .text ();
_fafbe :=_becd &&_fcgf .endsInHyphen ()&&_eedgd !=len (_aaeg ._dbed )-1;if _fafbe {_gfacg =_gfge (_gfacg );};_acgc .Write ([]byte (_gfacg ));if !(_fafbe ||_eedgd ==len (_aaeg ._dbed )-1){_acgc .Write ([]byte (_fcffc (_fcgf ._fdga ,_aaeg ._dbed [_eedgd +1]._fdga )));
};};};func (_gabf *textLine )toTextMarks (_fbad *int )[]TextMark {var _fecff []TextMark ;for _ ,_acefd :=range _gabf ._gfbc {if _acefd ._aaad {_fecff =_acea (_fecff ,_fbad ,"\u0020");};_afef :=_acefd .toTextMarks (_fbad );_fecff =append (_fecff ,_afef ...);
};return _fecff ;};var _bc =false ;func (_cbg *textPara )toTextMarks (_cbgf *int )[]TextMark {if _cbg ._gbfb ==nil {return _cbg .toCellTextMarks (_cbgf );};var _eeab []TextMark ;for _ffcb :=0;_ffcb < _cbg ._gbfb ._dccb ;_ffcb ++{for _dfgbc :=0;_dfgbc < _cbg ._gbfb ._caea ;
_dfgbc ++{_gea :=_cbg ._gbfb .get (_dfgbc ,_ffcb );if _gea ==nil {_eeab =_acea (_eeab ,_cbgf ,"\u0009");}else {_fafb :=_gea .toCellTextMarks (_cbgf );_eeab =append (_eeab ,_fafb ...);};_eeab =_acea (_eeab ,_cbgf ,"\u0020");};if _ffcb < _cbg ._gbfb ._dccb -1{_eeab =_acea (_eeab ,_cbgf ,"\u000a");
};};return _eeab ;};
// String returns a description of `t`.
func (_eddba *textTable )String ()string {return _gfc .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_eddba ._caea ,_eddba ._dccb ,_eddba ._gbaca );};func (_ffcfa *textTable )reduce ()*textTable {_cgedge :=make ([]int ,0,_ffcfa ._dccb );
_eaeea :=make ([]int ,0,_ffcfa ._caea );for _fgdff :=0;_fgdff < _ffcfa ._dccb ;_fgdff ++{if !_ffcfa .emptyRow (_fgdff ){_cgedge =append (_cgedge ,_fgdff );};};for _ccde :=0;_ccde < _ffcfa ._caea ;_ccde ++{if !_ffcfa .emptyColumn (_ccde ){_eaeea =append (_eaeea ,_ccde );
};};if len (_cgedge )==_ffcfa ._dccb &&len (_eaeea )==_ffcfa ._caea {return _ffcfa ;};_eggag :=textTable {_gbaca :_ffcfa ._gbaca ,_caea :len (_eaeea ),_dccb :len (_cgedge ),_bffe :make (map[uint64 ]*textPara ,len (_eaeea )*len (_cgedge ))};if _bdfb {_fgc .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_ffcfa ._caea ,_ffcfa ._dccb ,len (_eaeea ),len (_cgedge ));
_fgc .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_eaeea );_fgc .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_cgedge );};for _dgfgf ,_ffad :=range _cgedge {for _edcb ,_cgad :=range _eaeea {_daeb :=_ffcfa .get (_cgad ,_ffad );
if _daeb ==nil {continue ;};if _bdfb {_gfc .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_edcb ,_dgfgf ,_cgad ,_ffad ,_fbfea (_daeb .text (),50));};_eggag .put (_edcb ,_dgfgf ,_daeb );
};};return &_eggag ;};func _cgcc (_egebg ,_bgbfe *textPara )bool {if _egebg ._eecg ||_bgbfe ._eecg {return true ;};return _bfab (_egebg .depth ()-_bgbfe .depth ());};type rulingKind int ;var _gdea =_be .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024");
func (_ffb *stateStack )pop ()*textState {if _ffb .empty (){return nil ;};_bbecb :=*(*_ffb )[len (*_ffb )-1];*_ffb =(*_ffb )[:len (*_ffb )-1];return &_bbecb ;};func (_eeeba *textMark )inDiacriticArea (_begdg *textMark )bool {_eegec :=_eeeba .Llx -_begdg .Llx ;
_bffg :=_eeeba .Urx -_begdg .Urx ;_fcbd :=_eeeba .Lly -_begdg .Lly ;return _gf .Abs (_eegec +_bffg )< _eeeba .Width ()*_ffga &&_gf .Abs (_fcbd )< _eeeba .Height ()*_ffga ;};const (_eabd markKind =iota ;_gdaea ;_ebdbd ;_deegc ;);func (_ec *imageExtractContext )extractXObjectImage (_df *_e .PdfObjectName ,_ddga _ca .GraphicsState ,_bd *_bbe .PdfPageResources )error {_efb ,_ :=_bd .GetXObjectByName (*_df );
if _efb ==nil {return nil ;};_aacb ,_aeg :=_ec ._feb [_efb ];if !_aeg {_age ,_ece :=_bd .GetXObjectImageByName (*_df );if _ece !=nil {return _ece ;};if _age ==nil {return nil ;};_aed ,_ece :=_age .ToImage ();if _ece !=nil {return _ece ;};_aacb =&cachedImage {_bfb :_aed ,_faf :_age .ColorSpace };
_ec ._feb [_efb ]=_aacb ;};_ceb :=_aacb ._bfb ;_dbd :=_aacb ._faf ;_bfa ,_cdd :=_dbd .ImageToRGB (*_ceb );if _cdd !=nil {return _cdd ;};_fgc .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_ddga .CTM .String ());_gfe :=ImageMark {Image :&_bfa ,Width :_ddga .CTM .ScalingFactorX (),Height :_ddga .CTM .ScalingFactorY (),Angle :_ddga .CTM .Angle ()};
_gfe .X ,_gfe .Y =_ddga .CTM .Translation ();_ec ._aad =append (_ec ._aad ,_gfe );_ec ._fe ++;return nil ;};func (_fcdeb rulingList )connections (_fdegc map[int ]intSet ,_caaf int )intSet {_gbdf :=make (intSet );_gebb :=make (intSet );var _eagf func (int );
_eagf =func (_cgae int ){if !_gebb .has (_cgae ){_gebb .add (_cgae );for _bdcgb :=range _fcdeb {if _fdegc [_bdcgb ].has (_cgae ){_gbdf .add (_bdcgb );};};for _ggfaf :=range _fcdeb {if _gbdf .has (_ggfaf ){_eagf (_ggfaf );};};};};_eagf (_caaf );return _gbdf ;
};func (_gfg *imageExtractContext )extractContentStreamImages (_db string ,_fgf *_bbe .PdfPageResources )error {_ad :=_ca .NewContentStreamParser (_db );_gcb ,_cd :=_ad .Parse ();if _cd !=nil {return _cd ;};if _gfg ._feb ==nil {_gfg ._feb =map[*_e .PdfObjectStream ]*cachedImage {};
};if _gfg ._fde ==nil {_gfg ._fde =&ImageExtractOptions {};};_dc :=_ca .NewContentStreamProcessor (*_gcb );_dc .AddHandler (_ca .HandlerConditionEnumAllOperands ,"",_gfg .processOperand );return _dc .Process (_fgf );};func _dfccb (_ffba map[int ]intSet )[]int {_ddbfc :=make ([]int ,0,len (_ffba ));
for _bfbc :=range _ffba {_ddbfc =append (_ddbfc ,_bfbc );};_c .Ints (_ddbfc );return _ddbfc ;};func (_eabc rectRuling )checkWidth (_adfac ,_aabde float64 )(float64 ,bool ){_bggdf :=_aabde -_adfac ;_aadf :=_bggdf <=_eedc ;return _bggdf ,_aadf ;};func (_dgec *wordBag )text ()string {_egga :=_dgec .allWords ();
_geed :=make ([]string ,len (_egga ));for _bbfd ,_bfbd :=range _egga {_geed [_bbfd ]=_bfbd ._fadea ;};return _fg .Join (_geed ,"\u0020");};type textTable struct{_bbe .PdfRectangle ;_caea ,_dccb int ;_gbaca bool ;_bffe map[uint64 ]*textPara ;_aeffd map[uint64 ]compositeCell ;
};
2021-05-31 17:17:31 +00:00
2021-08-13 01:33:42 +00:00
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
2021-09-23 22:37:42 +00:00
type TextTable struct{W ,H int ;Cells [][]TableCell ;};func (_ecdg lineRuling )yMean ()float64 {return 0.5*(_ecdg ._adbgb .Y +_ecdg ._gagb .Y )};func (_bedd rulingList )isActualGrid ()(rulingList ,bool ){_cdced ,_bdfc :=_bedd .augmentGrid ();if !(len (_cdced )>=_acef +1&&len (_bdfc )>=_gegf +1){if _bcbc {_fgc .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_cdced ),len (_bdfc ),_acef +1,_gegf +1);
};return nil ,false ;};if _bcbc {_fgc .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_bedd ,len (_cdced )>=2,len (_bdfc )>=2,len (_cdced )>=2&&len (_bdfc )>=2);
for _afag ,_baac :=range _bedd {_gfc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_afag ,_baac );};};if _dfe {_adfe ,_dbdd :=_cdced [0],_cdced [len (_cdced )-1];_fagdag ,_ebdc :=_bdfc [0],_bdfc [len (_bdfc )-1];if !(_cbebd (_adfe ._eacd -_fagdag ._dgbc )&&_cbebd (_dbdd ._eacd -_fagdag ._cgcfd )&&_cbebd (_fagdag ._eacd -_adfe ._cgcfd )&&_cbebd (_ebdc ._eacd -_adfe ._dgbc )){if _bcbc {_fgc .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_adfe ,_dbdd ,_fagdag ,_ebdc );
};return nil ,false ;};}else {if !_cdced .aligned (){if _ggfe {_fgc .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_cdced ));
};return nil ,false ;};if !_bdfc .aligned (){if _bcbc {_fgc .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_bdfc ));
};return nil ,false ;};};_ggec :=append (_cdced ,_bdfc ...);return _ggec ,true ;};func (_gbcfg *textWord )appendMark (_fefgf *textMark ,_ebfgf _bbe .PdfRectangle ){_gbcfg ._cgceb =append (_gbcfg ._cgceb ,_fefgf );_gbcfg .PdfRectangle =_eacc (_gbcfg .PdfRectangle ,_fefgf .PdfRectangle );
if _fefgf ._gbae > _gbcfg ._gddg {_gbcfg ._gddg =_fefgf ._gbae ;};_gbcfg ._fdcbf =_ebfgf .Ury -_gbcfg .PdfRectangle .Lly ;};func _becae (_dcaa ,_febg float64 )bool {return _dcaa /_gf .Max (_aaf ,_febg )< _ffec };func (_ggf *stateStack )top ()*textState {if _ggf .empty (){return nil ;
};return (*_ggf )[_ggf .size ()-1];};func (_becg *subpath )add (_gce ..._gfa .Point ){_becg ._gbee =append (_becg ._gbee ,_gce ...)};func (_bbae *wordBag )removeDuplicates (){if _ddff {_fgc .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_bbae .text ());
};for _ ,_fbbf :=range _bbae .depthIndexes (){if len (_bbae ._gbec [_fbbf ])==0{continue ;};_gedab :=_bbae ._gbec [_fbbf ][0];_ebdb :=_acgeg *_gedab ._gddg ;_deda :=_gedab ._fdcbf ;for _ ,_ceee :=range _bbae .depthBand (_deda ,_deda +_ebdb ){_fdebf :=map[*textWord ]struct{}{};
_cab :=_bbae ._gbec [_ceee ];for _ ,_eabf :=range _cab {if _ ,_adebb :=_fdebf [_eabf ];_adebb {continue ;};for _ ,_ddec :=range _cab {if _ ,_degg :=_fdebf [_ddec ];_degg {continue ;};if _ddec !=_eabf &&_ddec ._fadea ==_eabf ._fadea &&_gf .Abs (_ddec .Llx -_eabf .Llx )< _ebdb &&_gf .Abs (_ddec .Urx -_eabf .Urx )< _ebdb &&_gf .Abs (_ddec .Lly -_eabf .Lly )< _ebdb &&_gf .Abs (_ddec .Ury -_eabf .Ury )< _ebdb {_fdebf [_ddec ]=struct{}{};
};};};if len (_fdebf )> 0{_dccd :=0;for _ ,_gedag :=range _cab {if _ ,_bgde :=_fdebf [_gedag ];!_bgde {_cab [_dccd ]=_gedag ;_dccd ++;};};_bbae ._gbec [_ceee ]=_cab [:len (_cab )-len (_fdebf )];if len (_bbae ._gbec [_ceee ])==0{delete (_bbae ._gbec ,_ceee );
};};};};};func (_acadc *wordBag )sort (){for _ ,_bgff :=range _acadc ._gbec {_c .Slice (_bgff ,func (_ffbb ,_faca int )bool {return _gffd (_bgff [_ffbb ],_bgff [_faca ])< 0});};};func (_eabda rulingList )log (_bcbgb string ){if !_bcbc {return ;};_fgc .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_bcbgb ,_eabda .String ());
for _dacdg ,_bbeb :=range _eabda {_gfc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dacdg ,_bbeb .String ());};};
2021-08-13 01:33:42 +00:00
2021-09-23 22:37:42 +00:00
// String returns a description of `b`.
func (_fabg *wordBag )String ()string {var _fgde []string ;for _ ,_gcda :=range _fabg .depthIndexes (){_bbfc :=_fabg ._gbec [_gcda ];for _ ,_dcb :=range _bbfc {_fgde =append (_fgde ,_dcb ._fadea );};};return _gfc .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_fabg .PdfRectangle ,_fabg ._ggcf ,len (_fgde ),_fgde );
};func (_bdde *ruling )gridIntersecting (_agag *ruling )bool {return _bdgea (_bdde ._dgbc ,_agag ._dgbc )&&_bdgea (_bdde ._cgcfd ,_agag ._cgcfd );};func (_gfcf lineRuling )xMean ()float64 {return 0.5*(_gfcf ._adbgb .X +_gfcf ._gagb .X )};func (_feccb *textTable )depth ()float64 {_abdefe :=1e10;
for _aeefe :=0;_aeefe < _feccb ._caea ;_aeefe ++{_ebfga :=_feccb .get (_aeefe ,0);if _ebfga ==nil ||_ebfga ._eecg {continue ;};_abdefe =_gf .Min (_abdefe ,_ebfga .depth ());};return _abdefe ;};func _gfge (_ggbc string )string {_fgef :=[]rune (_ggbc );return string (_fgef [:len (_fgef )-1])};
func _gdee (_efcf ,_eacf _bbe .PdfRectangle )bool {return _efcf .Lly <=_eacf .Ury &&_eacf .Lly <=_efcf .Ury ;};func (_gfecd *textTable )newTablePara ()*textPara {_bfdc :=_gfecd .computeBbox ();_cfgda :=&textPara {PdfRectangle :_bfdc ,_beeg :_bfdc ,_gbfb :_gfecd };
if _bdfb {_fgc .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_cfgda );};return _cfgda ;};func _fgga (_agge ,_fbeaa _gfa .Point ,_dced _a .Color )(*ruling ,bool ){_ecbce :=lineRuling {_adbgb :_agge ,_gagb :_fbeaa ,_fgecf :_cadd (_agge ,_fbeaa ),Color :_dced };
if _ecbce ._fgecf ==_fgcbb {return nil ,false ;};return _ecbce .asRuling ();};func _ecac (_dggd float64 ,_bbeca int )int {if _bbeca ==0{_bbeca =1;};_dbag :=float64 (_bbeca );return int (_gf .Round (_dggd /_dbag )*_dbag );};func _dbcc (_cgde *wordBag ,_efbf *textWord ,_acf float64 )bool {return _efbf .Llx < _cgde .Urx +_acf &&_cgde .Llx -_acf < _efbf .Urx ;
};const (RenderModeStroke RenderMode =1<<iota ;RenderModeFill ;RenderModeClip ;);func _fded (_ebbb ,_dcfd _gfa .Point )bool {return _ebbb .X ==_dcfd .X &&_ebbb .Y ==_dcfd .Y };func (_cdfb paraList )sortReadingOrder (){_fgc .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_cdfb ));
if len (_cdfb )<=1{return ;};_cdfb .computeEBBoxes ();_c .Slice (_cdfb ,func (_ddcd ,_bafbc int )bool {return _dcaf (_cdfb [_ddcd ],_cdfb [_bafbc ])<=0});_aag :=_cdfb .topoOrder ();_cdfb .reorder (_aag );};func (_eaf rulingList )intersections ()map[int ]intSet {var _febeb ,_ecdgc []int ;
for _ebfa ,_cff :=range _eaf {switch _cff ._ccfa {case _bgecg :_febeb =append (_febeb ,_ebfa );case _eeca :_ecdgc =append (_ecdgc ,_ebfa );};};if len (_febeb )< _acef +1||len (_ecdgc )< _gegf +1{return nil ;};if len (_febeb )+len (_ecdgc )> _fbe {_fgc .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_eaf ),len (_febeb ),len (_ecdgc ));
return nil ;};_cdbc :=make (map[int ]intSet ,len (_febeb )+len (_ecdgc ));for _ ,_ffed :=range _febeb {for _ ,_agae :=range _ecdgc {if _eaf [_ffed ].intersects (_eaf [_agae ]){if _ ,_aeca :=_cdbc [_ffed ];!_aeca {_cdbc [_ffed ]=make (intSet );};if _ ,_fgae :=_cdbc [_agae ];
!_fgae {_cdbc [_agae ]=make (intSet );};_cdbc [_ffed ].add (_agae );_cdbc [_agae ].add (_ffed );};};};return _cdbc ;};func (_cbcf *textWord )computeText ()string {_dfdfe :=make ([]string ,len (_cbcf ._cgceb ));for _bafeg ,_ceba :=range _cbcf ._cgceb {_dfdfe [_bafeg ]=_ceba ._cgge ;
};return _fg .Join (_dfdfe ,"");};func _fgcb (_ccbd []*textMark ,_ccee _bbe .PdfRectangle ,_fdbcb rulingList ,_fdaae []gridTiling )paraList {_fgc .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_ccbd ),_ccee );
if len (_ccbd )==0{return nil ;};_bagb :=_afaa (_ccbd ,_ccee );if len (_bagb )==0{return nil ;};_fdbcb .log ("\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065");_cggg ,_aege :=_fdbcb .vertsHorzs ();_dcgc :=_fede (_bagb ,_ccee .Ury ,_cggg ,_aege );
_cfe :=_fdfe (_dcgc ,_ccee .Ury ,_cggg ,_aege );_cfe =_gbdc (_cfe );_ecb :=make (paraList ,0,len (_cfe ));for _ ,_gfeg :=range _cfe {_gaga :=_gfeg .arrangeText ();if _gaga !=nil {_ecb =append (_ecb ,_gaga );};};if len (_ecb )>=_ggeg {_ecb =_ecb .extractTables (_fdaae );
};_ecb .sortReadingOrder ();_ecb .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _ecb ;};func (_gee *shapesState )cubicTo (_bdbge ,_fgecc ,_fab ,_ced ,_gcc ,_gbgg float64 ){if _egf {_fgc .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a");
};_gee .addPoint (_gcc ,_gbgg );};func _bcc (_fdeb _bbe .PdfRectangle )textState {return textState {_fbd :100,_fgec :RenderModeFill ,_dda :_fdeb };};func (_fdbg *textObject )nextLine (){_fdbg .moveLP (0,-_fdbg ._eada ._cac )};func (_faed compositeCell )parasBBox ()(paraList ,_bbe .PdfRectangle ){return _faed .paraList ,_faed .PdfRectangle ;
};func _bceg (_dadeb string )(string ,bool ){_dbcae :=[]rune (_dadeb );if len (_dbcae )!=1{return "",false ;};_ccbed ,_cbceb :=_dfcad [_dbcae [0]];return _ccbed ,_cbceb ;};type wordBag struct{_bbe .PdfRectangle ;_ggcf float64 ;_ffbe ,_ddaa rulingList ;
_gdddb float64 ;_gbec map[int ][]*textWord ;};func _dcga (_cdcb string ,_cefbb []rulingList ){_fgc .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_cefbb ),_cdcb );for _ddbcg ,_feefa :=range _cefbb {_gfc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ddbcg ,_feefa .String ());
};};func (_aeba *textPara )text ()string {_beegg :=new (_d .Buffer );_aeba .writeText (_beegg );return _beegg .String ();};func _cbgb (_fgeba ,_caba ,_fdec ,_fabad *textPara )*textTable {_ddadd :=&textTable {_caea :2,_dccb :2,_bffe :make (map[uint64 ]*textPara ,4)};
_ddadd .put (0,0,_fgeba );_ddadd .put (1,0,_caba );_ddadd .put (0,1,_fdec );_ddadd .put (1,1,_fabad );return _ddadd ;};func (_aggc *shapesState )moveTo (_agfa ,_bcag float64 ){_aggc ._cea =true ;_aggc ._dgfbe =_aggc .devicePoint (_agfa ,_bcag );if _egf {_fgc .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_agfa ,_bcag ,_aggc ._dgfbe );
};};func (_afbd *ruling )equals (_bacd *ruling )bool {return _afbd ._ccfa ==_bacd ._ccfa &&_bdgea (_afbd ._eacd ,_bacd ._eacd )&&_bdgea (_afbd ._dgbc ,_bacd ._dgbc )&&_bdgea (_afbd ._cgcfd ,_bacd ._cgcfd );};func (_faa *textLine )appendWord (_bece *textWord ){_faa ._gfbc =append (_faa ._gfbc ,_bece );
_faa .PdfRectangle =_eacc (_faa .PdfRectangle ,_bece .PdfRectangle );if _bece ._gddg > _faa ._ddac {_faa ._ddac =_bece ._gddg ;};if _bece ._fdcbf > _faa ._fdga {_faa ._fdga =_bece ._fdcbf ;};};func _deea (_gcfe *wordBag ,_dfbf int )*textLine {_ggccd :=_gcfe .firstWord (_dfbf );
_dcdc :=textLine {PdfRectangle :_ggccd .PdfRectangle ,_ddac :_ggccd ._gddg ,_fdga :_ggccd ._fdcbf };_dcdc .pullWord (_gcfe ,_ggccd ,_dfbf );return &_dcdc ;};func _bedg (_fffg string )bool {for _ ,_dgecd :=range _fffg {if !_bg .IsSpace (_dgecd ){return false ;
};};return true ;};func (_ddfa compositeCell )split (_geae ,_fece []float64 )*textTable {_gdaff :=len (_geae )+1;_cgcf :=len (_fece )+1;if _bdfb {_fgc .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066",_cgcf ,_gdaff ,_ddfa ,_geae ,_fece );
_gfc .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a",len (_ddfa .paraList ));for _dcbe ,_baca :=range _ddfa .paraList {_gfc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dcbe ,_baca .String ());};
_gfc .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",len (_ddfa .lines ()));for _dfddd ,_ddbc :=range _ddfa .lines (){_gfc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dfddd ,_ddbc );};};_geae =_bfda (_geae ,_ddfa .Ury ,_ddfa .Lly );
_fece =_bfda (_fece ,_ddfa .Llx ,_ddfa .Urx );_befgea :=make (map[uint64 ]*textPara ,_cgcf *_gdaff );_bfdb :=textTable {_caea :_cgcf ,_dccb :_gdaff ,_bffe :_befgea };_abfg :=_ddfa .paraList ;_c .Slice (_abfg ,func (_aafe ,_ffca int )bool {_agdb ,_ebdd :=_abfg [_aafe ],_abfg [_ffca ];
_egegf ,_efca :=_agdb .Lly ,_ebdd .Lly ;if _egegf !=_efca {return _egegf < _efca ;};return _agdb .Llx < _ebdd .Llx ;});_dfgg :=make (map[uint64 ]_bbe .PdfRectangle ,_cgcf *_gdaff );for _abdf ,_eeaf :=range _geae [1:]{_dafc :=_geae [_abdf ];for _cefb ,_cfgd :=range _fece [1:]{_bbgd :=_fece [_cefb ];
_dfgg [_aebd (_cefb ,_abdf )]=_bbe .PdfRectangle {Llx :_bbgd ,Urx :_cfgd ,Lly :_eeaf ,Ury :_dafc };};};if _bdfb {_fgc .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073");
_gfc .Printf ("\u0020\u0020\u0020\u0020");for _ecaa :=0;_ecaa < _cgcf ;_ecaa ++{_gfc .Printf ("\u0025\u0033\u0030\u0064\u002c\u0020",_ecaa );};_gfc .Println ();for _gcfgf :=0;_gcfgf < _gdaff ;_gcfgf ++{_gfc .Printf ("\u0020\u0020\u0025\u0032\u0064\u003a",_gcfgf );
for _bbdg :=0;_bbdg < _cgcf ;_bbdg ++{_gfc .Printf ("\u00256\u002e\u0032\u0066\u002c\u0020",_dfgg [_aebd (_bbdg ,_gcfgf )]);};_gfc .Println ();};};_fegc :=func (_efbfa *textLine )(int ,int ){for _fada :=0;_fada < _gdaff ;_fada ++{for _gecef :=0;_gecef < _cgcf ;
_gecef ++{if _cced (_dfgg [_aebd (_gecef ,_fada )],_efbfa .PdfRectangle ){return _gecef ,_fada ;};};};return -1,-1;};_cbfe :=make (map[uint64 ][]*textLine ,_cgcf *_gdaff );for _ ,_dbef :=range _abfg .lines (){_cbce ,_cbbg :=_fegc (_dbef );if _cbce < 0{continue ;
};_cbfe [_aebd (_cbce ,_cbbg )]=append (_cbfe [_aebd (_cbce ,_cbbg )],_dbef );};for _gfbg :=0;_gfbg < len (_geae )-1;_gfbg ++{_aebb :=_geae [_gfbg ];_cfgdg :=_geae [_gfbg +1];for _dcbb :=0;_dcbb < len (_fece )-1;_dcbb ++{_egegff :=_fece [_dcbb ];_aegb :=_fece [_dcbb +1];
_caga :=_bbe .PdfRectangle {Llx :_egegff ,Urx :_aegb ,Lly :_cfgdg ,Ury :_aebb };_eaba :=_cbfe [_aebd (_dcbb ,_gfbg )];if len (_eaba )==0{continue ;};_gdab :=_dbfa (_caga ,_eaba );_bfdb .put (_dcbb ,_gfbg ,_gdab );};};return &_bfdb ;};func (_cfda *wordBag )removeWord (_aegae *textWord ,_cgegag int ){_geef :=_cfda ._gbec [_cgegag ];
_geef =_egecd (_geef ,_aegae );if len (_geef )==0{delete (_cfda ._gbec ,_cgegag );}else {_cfda ._gbec [_cgegag ]=_geef ;};};func (_ccg rulingList )toGrids ()[]rulingList {if _bcbc {_fgc .Log .Info ("t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073",_ccg );
};_ceef :=_ccg .intersections ();if _bcbc {_fgc .Log .Info ("\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020",len (_ccg ),len (_ceef ));
for _ ,_befe :=range _dfccb (_ceef ){_gfc .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_befe ,_ceef [_befe ]);};};_gcg :=make (map[int ]intSet ,len (_ccg ));for _bcbf :=range _ccg {_caecd :=_ccg .connections (_ceef ,_bcbf );if len (_caecd )> 0{_gcg [_bcbf ]=_caecd ;
};};if _bcbc {_fgc .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064",len (_gcg ));for _ ,_fefc :=range _dfccb (_gcg ){_gfc .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_fefc ,_gcg [_fefc ]);
};};_ageaf :=_edfe (len (_ccg ),func (_eeae ,_efbb int )bool {_edac ,_dfcc :=len (_gcg [_eeae ]),len (_gcg [_efbb ]);if _edac !=_dfcc {return _edac > _dfcc ;};return _ccg .comp (_eeae ,_efbb );});if _bcbc {_fgc .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_ageaf );
};_ggbg :=[][]int {{_ageaf [0]}};_efgc :for _ ,_acdc :=range _ageaf [1:]{for _adgf ,_eba :=range _ggbg {for _ ,_befff :=range _eba {if _gcg [_befff ].has (_acdc ){_ggbg [_adgf ]=append (_eba ,_acdc );continue _efgc ;};};};_ggbg =append (_ggbg ,[]int {_acdc });
};if _bcbc {_fgc .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_ggbg );};_c .SliceStable (_ggbg ,func (_cdff ,_gebc int )bool {return len (_ggbg [_cdff ])> len (_ggbg [_gebc ])});for _ ,_cbead :=range _ggbg {_c .Slice (_cbead ,func (_ecdb ,_ddad int )bool {return _ccg .comp (_cbead [_ecdb ],_cbead [_ddad ])});
};_gaac :=make ([]rulingList ,len (_ggbg ));for _gcdaa ,_cgef :=range _ggbg {_bcaa :=make (rulingList ,len (_cgef ));for _cfc ,_ddefb :=range _cgef {_bcaa [_cfc ]=_ccg [_ddefb ];};_gaac [_gcdaa ]=_bcaa ;};if _bcbc {_fgc .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076",_gaac );
};var _adfb []rulingList ;for _ ,_fbbd :=range _gaac {if _bcfee ,_acfd :=_fbbd .isActualGrid ();_acfd {_fbbd =_bcfee ;_fbbd =_fbbd .snapToGroups ();_adfb =append (_adfb ,_fbbd );};};if _bcbc {_dcga ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073",_adfb );
_fgc .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_gaac ),len (_adfb ));};return _adfb ;};func (_eadf *wordBag )scanBand (_edd string ,_dga *wordBag ,_gbbf func (_ecfc *wordBag ,_fecf *textWord )bool ,_cdca ,_gfad ,_dgb float64 ,_gegc ,_bfcb bool )int {_acbae :=_dga ._ggcf ;
var _cfd map[int ]map[*textWord ]struct{};if !_gegc {_cfd =_eadf .makeRemovals ();};_gfab :=_acge *_acbae ;_cacdg :=0;for _ ,_cadbf :=range _eadf .depthBand (_cdca -_gfab ,_gfad +_gfab ){if len (_eadf ._gbec [_cadbf ])==0{continue ;};for _ ,_dafe :=range _eadf ._gbec [_cadbf ]{if !(_cdca -_gfab <=_dafe ._fdcbf &&_dafe ._fdcbf <=_gfad +_gfab ){continue ;
};if !_gbbf (_dga ,_dafe ){continue ;};_facf :=2.0*_gf .Abs (_dafe ._gddg -_dga ._ggcf )/(_dafe ._gddg +_dga ._ggcf );_dba :=_gf .Max (_dafe ._gddg /_dga ._ggcf ,_dga ._ggcf /_dafe ._gddg );_ddaf :=_gf .Min (_facf ,_dba );if _dgb > 0&&_ddaf > _dgb {continue ;
};if _dga .blocked (_dafe ){continue ;};if !_gegc {_dga .pullWord (_dafe ,_cadbf ,_cfd );};_cacdg ++;if !_bfcb {if _dafe ._fdcbf < _cdca {_cdca =_dafe ._fdcbf ;};if _dafe ._fdcbf > _gfad {_gfad =_dafe ._fdcbf ;};};if _gegc {break ;};};};if !_gegc {_eadf .applyRemovals (_cfd );
};return _cacdg ;};func (_cgecd *textTable )getComposite (_fcdcf ,_bfdaa int )(paraList ,_bbe .PdfRectangle ){_bdbdg ,_fdda :=_cgecd ._aeffd [_aebd (_fcdcf ,_bfdaa )];if _bdfb {_gfc .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a",_fcdcf ,_bfdaa ,_bdbdg .String ());
};if !_fdda {return nil ,_bbe .PdfRectangle {};};return _bdbdg .parasBBox ();};func (_badga *textTable )toTextTable ()TextTable {if _bdfb {_fgc .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_badga ._caea ,_badga ._dccb );
};_gadf :=make ([][]TableCell ,_badga ._dccb );for _edeb :=0;_edeb < _badga ._dccb ;_edeb ++{_gadf [_edeb ]=make ([]TableCell ,_badga ._caea );for _dbbb :=0;_dbbb < _badga ._caea ;_dbbb ++{_bggab :=_badga .get (_dbbb ,_edeb );if _bggab ==nil {continue ;
};if _bdfb {_gfc .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_dbbb ,_edeb ,_bggab );};_gadf [_edeb ][_dbbb ].Text =_bggab .text ();_ffcg :=0;_gadf [_edeb ][_dbbb ].Marks ._fbcc =_bggab .toTextMarks (&_ffcg );};};return TextTable {W :_badga ._caea ,H :_badga ._dccb ,Cells :_gadf };
2021-07-30 00:21:16 +00:00
};
2021-05-31 17:17:31 +00:00
2021-09-23 22:37:42 +00:00
// String returns a description of `state`.
func (_caed *textState )String ()string {_gfdb :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _caed ._ebfb !=nil {_gfdb =_caed ._ebfb .BaseFont ();};return _gfc .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_caed ._cfa ,_caed ._bea ,_caed ._gbaf ,_gfdb );
};type cachedImage struct{_bfb *_bbe .Image ;_faf _bbe .PdfColorspace ;};
2021-06-21 14:01:56 +00:00
2021-09-23 22:37:42 +00:00
// Len returns the number of TextMarks in `ma`.
func (_beaa *TextMarkArray )Len ()int {if _beaa ==nil {return 0;};return len (_beaa ._fbcc );};func (_gadbd paraList )xNeighbours (_cfdace float64 )map[*textPara ][]int {_fefg :=make ([]event ,2*len (_gadbd ));if _cfdace ==0{for _ecad ,_efgda :=range _gadbd {_fefg [2*_ecad ]=event {_efgda .Llx ,true ,_ecad };
_fefg [2*_ecad +1]=event {_efgda .Urx ,false ,_ecad };};}else {for _bedca ,_afea :=range _gadbd {_fefg [2*_bedca ]=event {_afea .Llx -_cfdace *_afea .fontsize (),true ,_bedca };_fefg [2*_bedca +1]=event {_afea .Urx +_cfdace *_afea .fontsize (),false ,_bedca };
};};return _gadbd .eventNeighbours (_fefg );};func (_acad *shapesState )quadraticTo (_ggbd ,_aeed ,_egdb ,_ecf float64 ){if _egf {_fgc .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a");};_acad .addPoint (_egdb ,_ecf );};
func (_dbeb *textPara )taken ()bool {return _dbeb ==nil ||_dbeb ._eeeg };func (_bgbf *shapesState )clearPath (){_bgbf ._gbag =nil ;_bgbf ._cea =false ;if _egf {_fgc .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_bgbf );
};};func _egaf (_beabe *PageText )error {_gdecd :=_fge .GetLicenseKey ();if _gdecd !=nil &&_gdecd .IsLicensed ()||_bc {return nil ;};_gfc .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");
_gfc .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");
return _g .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};func _eeaff (_dffgd ,_eaeeg ,_fdgf float64 )rulingKind {if _dffgd >=_fdgf &&_becae (_eaeeg ,_dffgd ){return _eeca ;
};if _eaeeg >=_fdgf &&_becae (_dffgd ,_eaeeg ){return _bgecg ;};return _fgcbb ;};func (_bdba rulingList )mergePrimary ()float64 {_aefc :=_bdba [0]._eacd ;for _ ,_gfbd :=range _bdba [1:]{_aefc +=_gfbd ._eacd ;};return _aefc /float64 (len (_bdba ));};func _dcaf (_gdae ,_bdgf bounded )float64 {_dfabd :=_gadg (_gdae ,_bdgf );
if !_bfab (_dfabd ){return _dfabd ;};return _gffd (_gdae ,_bdgf );};func (_edacb paraList )inTile (_gcfefe gridTile )paraList {var _dcgcgf paraList ;for _ ,_fcgca :=range _edacb {if _gcfefe .contains (_fcgca .PdfRectangle ){_dcgcgf =append (_dcgcgf ,_fcgca );
};};if _bdfb {_gfc .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_gcfefe ,len (_dcgcgf ));for _ffefc ,_cdgea :=range _dcgcgf {_gfc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ffefc ,_cdgea );
};_gfc .Println ("");};return _dcgcgf ;};func (_faefe *textTable )computeBbox ()_bbe .PdfRectangle {var _fbca _bbe .PdfRectangle ;_eccb :=false ;for _acgea :=0;_acgea < _faefe ._dccb ;_acgea ++{for _fgbb :=0;_fgbb < _faefe ._caea ;_fgbb ++{_aaac :=_faefe .get (_fgbb ,_acgea );
if _aaac ==nil {continue ;};if !_eccb {_fbca =_aaac .PdfRectangle ;_eccb =true ;}else {_fbca =_eacc (_fbca ,_aaac .PdfRectangle );};};};return _fbca ;};func (_eff *textPara )fontsize ()float64 {return _eff ._dbed [0]._ddac };func (_ga *textObject )moveText (_efe ,_gag float64 ){_ga .moveLP (_efe ,_gag )};
func (_ddef *stateStack )empty ()bool {return len (*_ddef )==0};func (_dcgb intSet )add (_fccbb int ){_dcgb [_fccbb ]=struct{}{}};func (_ffecd *textPara )depth ()float64 {if _ffecd ._eecg {return -1.0;};if len (_ffecd ._dbed )> 0{return _ffecd ._dbed [0]._fdga ;
};return _ffecd ._gbfb .depth ();};func (_gfgc rulingList )findPrimSec (_bcdeg ,_gbge float64 )*ruling {for _ ,_cfac :=range _gfgc {if _bfab (_cfac ._eacd -_bcdeg )&&_cfac ._dgbc -_ddee <=_gbge &&_gbge <=_cfac ._cgcfd +_ddee {return _cfac ;};};return nil ;
};func (_ebe *wordBag )absorb (_aacc *wordBag ){_beef :=_aacc .makeRemovals ();for _caee ,_fbbc :=range _aacc ._gbec {for _ ,_ecfb :=range _fbbc {_ebe .pullWord (_ecfb ,_caee ,_beef );};};_aacc .applyRemovals (_beef );};func _geeaa (_fgdf _bbe .PdfRectangle )*ruling {return &ruling {_ccfa :_bgecg ,_eacd :_fgdf .Llx ,_dgbc :_fgdf .Lly ,_cgcfd :_fgdf .Ury };
};func (_dgbb rulingList )comp (_efbae ,_edad int )bool {_gdebe ,_cfgb :=_dgbb [_efbae ],_dgbb [_edad ];_eece ,_cfded :=_gdebe ._ccfa ,_cfgb ._ccfa ;if _eece !=_cfded {return _eece > _cfded ;};if _eece ==_fgcbb {return false ;};_ebaa :=func (_fdebb bool )bool {if _eece ==_eeca {return _fdebb ;
};return !_fdebb ;};_efcc ,_eedfb :=_gdebe ._eacd ,_cfgb ._eacd ;if _efcc !=_eedfb {return _ebaa (_efcc > _eedfb );};_efcc ,_eedfb =_gdebe ._dgbc ,_cfgb ._dgbc ;if _efcc !=_eedfb {return _ebaa (_efcc < _eedfb );};return _ebaa (_gdebe ._cgcfd < _cfgb ._cgcfd );
};func _egecd (_ffecf []*textWord ,_adege *textWord )[]*textWord {for _ggfab ,_ccfcf :=range _ffecf {if _ccfcf ==_adege {return _efcfa (_ffecf ,_ggfab );};};_fgc .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_adege );
return nil ;};func (_aacg paraList )merge ()*textPara {_fgc .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_aacg ));
if len (_aacg )==0{return nil ;};_aacg .sortReadingOrder ();_cdgd :=_aacg [0].PdfRectangle ;_daee :=_aacg [0]._dbed ;for _ ,_ccbe :=range _aacg [1:]{_cdgd =_eacc (_cdgd ,_ccbe .PdfRectangle );_daee =append (_daee ,_ccbe ._dbed ...);};return _dbfa (_cdgd ,_daee );
};func (_cee *shapesState )establishSubpath ()*subpath {_gdec ,_cgeg :=_cee .lastpointEstablished ();if !_cgeg {_cee ._gbag =append (_cee ._gbag ,_cbdf (_gdec ));};if len (_cee ._gbag )==0{return nil ;};_cee ._cea =false ;return _cee ._gbag [len (_cee ._gbag )-1];
};func (_aagg *textPara )toCellTextMarks (_gaed *int )[]TextMark {var _cagg []TextMark ;for _fegb ,_eeag :=range _aagg ._dbed {_afba :=_eeag .toTextMarks (_gaed );_cdag :=_becd &&_eeag .endsInHyphen ()&&_fegb !=len (_aagg ._dbed )-1;if _cdag {_afba =_baaf (_afba ,_gaed );
};_cagg =append (_cagg ,_afba ...);if !(_cdag ||_fegb ==len (_aagg ._dbed )-1){_cagg =_acea (_cagg ,_gaed ,_fcffc (_eeag ._fdga ,_aagg ._dbed [_fegb +1]._fdga ));};};return _cagg ;};func (_geac *textTable )logComposite (_gdgg string ){if !_bdfb {return ;
};_fgc .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_geac ._caea ,_geac ._dccb ,_gdgg );_gfc .Printf ("\u0025\u0035\u0073 \u007c","");for _fegcd :=0;_fegcd < _geac ._caea ;_fegcd ++{_gfc .Printf ("\u0025\u0033\u0064 \u007c",_fegcd );
};_gfc .Println ("");_gfc .Printf ("\u0025\u0035\u0073 \u002b","");for _cecbg :=0;_cecbg < _geac ._caea ;_cecbg ++{_gfc .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d");};_gfc .Println ("");for _geded :=0;_geded < _geac ._dccb ;_geded ++{_gfc .Printf ("\u0025\u0035\u0064 \u007c",_geded );
for _cbdbg :=0;_cbdbg < _geac ._caea ;_cbdbg ++{_gabfa ,_ :=_geac ._aeffd [_aebd (_cbdbg ,_geded )].parasBBox ();_gfc .Printf ("\u0025\u0033\u0064 \u007c",len (_gabfa ));};_gfc .Println ("");};_fgc .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_geac ._caea ,_geac ._dccb ,_gdgg );
_gfc .Printf ("\u0025\u0035\u0073 \u007c","");for _bbcd :=0;_bbcd < _geac ._caea ;_bbcd ++{_gfc .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_bbcd );};_gfc .Println ("");_gfc .Printf ("\u0025\u0035\u0073 \u002b","");for _febgg :=0;_febgg < _geac ._caea ;
_febgg ++{_gfc .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");};_gfc .Println ("");for _feaab :=0;_feaab < _geac ._dccb ;_feaab ++{_gfc .Printf ("\u0025\u0035\u0064 \u007c",_feaab );for _cfce :=0;_cfce < _geac ._caea ;
_cfce ++{_gfacda ,_ :=_geac ._aeffd [_aebd (_cfce ,_feaab )].parasBBox ();_eeff :="";_feda :=_gfacda .merge ();if _feda !=nil {_eeff =_feda .text ();};_eeff =_gfc .Sprintf ("\u0025\u0071",_fbfea (_eeff ,12));_eeff =_eeff [1:len (_eeff )-1];_gfc .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_eeff );
};_gfc .Println ("");};};func (_gbb *textObject )checkOp (_dge *_ca .ContentStreamOperation ,_cge int ,_gbff bool )(_fgg bool ,_afe error ){if _gbb ==nil {var _acg []_e .PdfObject ;if _cge > 0{_acg =_dge .Params ;if len (_acg )> _cge {_acg =_acg [:_cge ];
};};_fgc .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_dge .Operand ,_acg );};if _cge >=0{if len (_dge .Params )!=_cge {if _gbff {_afe =_g .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");
};_fgc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_dge .Operand ,_cge ,len (_dge .Params ),_dge .Params );
return false ,_afe ;};};return true ,nil ;};
2021-06-21 14:01:56 +00:00
2021-09-23 22:37:42 +00:00
// New returns an Extractor instance for extracting content from the input PDF page.
func New (page *_bbe .PdfPage )(*Extractor ,error ){const _fa ="\u0065\u0078\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077";_ab ,_eag :=page .GetAllContentStreams ();if _eag !=nil {return nil ,_eag ;};_ddg ,_eag :=page .GetMediaBox ();
if _eag !=nil {return nil ,_gfc .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_eag );};_gd :=&Extractor {_ddf :_ab ,_da :page .Resources ,_caf :*_ddg ,_ea :map[string ]fontEntry {},_ee :map[string ]textResult {}};
if _gd ._caf .Llx > _gd ._caf .Urx {_fgc .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_gd ._caf );
_gd ._caf .Llx ,_gd ._caf .Urx =_gd ._caf .Urx ,_gd ._caf .Llx ;};if _gd ._caf .Lly > _gd ._caf .Ury {_fgc .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_gd ._caf );
_gd ._caf .Lly ,_gd ._caf .Ury =_gd ._caf .Ury ,_gd ._caf .Lly ;};_fge .TrackUse (_fa );return _gd ,nil ;};func (_deg *textObject )setTextRenderMode (_agc int ){if _deg ==nil {return ;};_deg ._eada ._fgec =RenderMode (_agc );};func _fede (_bcgbf []*textWord ,_abbb float64 ,_abeb ,_bfbf rulingList )*wordBag {_ggcff :=_dca (_bcgbf [0],_abbb ,_abeb ,_bfbf );
for _ ,_gdg :=range _bcgbf [1:]{_feba :=_fbgb (_gdg ._fdcbf );_ggcff ._gbec [_feba ]=append (_ggcff ._gbec [_feba ],_gdg );_ggcff .PdfRectangle =_eacc (_ggcff .PdfRectangle ,_gdg .PdfRectangle );};_ggcff .sort ();return _ggcff ;};func (_fegdc rulingList )aligned ()bool {if len (_fegdc )< 2{return false ;
};_gccdf :=make (map[*ruling ]int );_gccdf [_fegdc [0]]=0;for _ ,_efgd :=range _fegdc [1:]{_cgedg :=false ;for _aadb :=range _gccdf {if _efgd .gridIntersecting (_aadb ){_gccdf [_aadb ]++;_cgedg =true ;break ;};};if !_cgedg {_gccdf [_efgd ]=0;};};_gdbd :=0;
for _ ,_cdcd :=range _gccdf {if _cdcd ==0{_gdbd ++;};};_cggfe :=float64 (_gdbd )/float64 (len (_fegdc ));_faef :=_cggfe <=1.0-_agd ;if _bcbc {_fgc .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_faef ,_cggfe ,_gdbd ,len (_fegdc ),_fegdc .String ());
};return _faef ;};func (_bdbf gridTile )complete ()bool {return _bdbf .numBorders ()==4};func (_aca *textObject )getStrokeColor ()_a .Color {return _accf (_aca ._fag .ColorspaceStroking ,_aca ._fag .ColorStroking );};func _cadd (_dgca ,_abac _gfa .Point )rulingKind {_bgggb :=_gf .Abs (_dgca .X -_abac .X );
_gbca :=_gf .Abs (_dgca .Y -_abac .Y );return _eeaff (_bgggb ,_gbca ,_bdfg );};
2021-06-21 14:01:56 +00:00
2021-09-23 22:37:42 +00:00
// Extractor stores and offers functionality for extracting content from PDF pages.
type Extractor struct{_ddf string ;_da *_bbe .PdfPageResources ;_caf _bbe .PdfRectangle ;_ea map[string ]fontEntry ;_ee map[string ]textResult ;_ae int64 ;_eab int ;};func (_dfca paraList )yNeighbours (_babda float64 )map[*textPara ][]int {_abacd :=make ([]event ,2*len (_dfca ));
if _babda ==0{for _egcgf ,_daebe :=range _dfca {_abacd [2*_egcgf ]=event {_daebe .Lly ,true ,_egcgf };_abacd [2*_egcgf +1]=event {_daebe .Ury ,false ,_egcgf };};}else {for _abce ,_gdcac :=range _dfca {_abacd [2*_abce ]=event {_gdcac .Lly -_babda *_gdcac .fontsize (),true ,_abce };
_abacd [2*_abce +1]=event {_gdcac .Ury +_babda *_gdcac .fontsize (),false ,_abce };};};return _dfca .eventNeighbours (_abacd );};func _agbe (_ffg ,_affd bounded )float64 {_egebe :=_gffd (_ffg ,_affd );if !_bfab (_egebe ){return _egebe ;};return _gadg (_ffg ,_affd );
};func _dbcf (_afbf int ,_cbad map[int ][]float64 )([]int ,int ){_edaeb :=make ([]int ,_afbf );_ecgd :=0;for _gacb :=0;_gacb < _afbf ;_gacb ++{_edaeb [_gacb ]=_ecgd ;_ecgd +=len (_cbad [_gacb ])+1;};return _edaeb ,_ecgd ;};func (_gbffc *shapesState )devicePoint (_dfgc ,_dddde float64 )_gfa .Point {_aegc :=_gbffc ._bce .Mult (_gbffc ._beg );
_dfgc ,_dddde =_aegc .Transform (_dfgc ,_dddde );return _gfa .NewPoint (_dfgc ,_dddde );};
2021-07-30 00:21:16 +00:00
2021-09-23 22:37:42 +00:00
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
type RenderMode int ;func _bbdfb (_ecbc []int )[]int {_gedb :=make ([]int ,len (_ecbc ));for _gggg ,_fdcd :=range _ecbc {_gedb [len (_ecbc )-1-_gggg ]=_fdcd ;};return _gedb ;};func (_caec *wordBag )blocked (_abegb *textWord )bool {if _abegb .Urx < _caec .Llx {_bebd :=_gafb (_abegb .PdfRectangle );
_gece :=_geeaa (_caec .PdfRectangle );if _caec ._ffbe .blocks (_bebd ,_gece ){if _eeeb {_fgc .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_abegb ,_caec );};return true ;};}else if _caec .Urx < _abegb .Llx {_aefe :=_gafb (_caec .PdfRectangle );
_feff :=_geeaa (_abegb .PdfRectangle );if _caec ._ffbe .blocks (_aefe ,_feff ){if _eeeb {_fgc .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s",_abegb ,_caec );};return true ;};};if _abegb .Ury < _caec .Lly {_ecdd :=_gabfc (_abegb .PdfRectangle );
_bdd :=_aeae (_caec .PdfRectangle );if _caec ._ddaa .blocks (_ecdd ,_bdd ){if _eeeb {_fgc .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_abegb ,_caec );};return true ;};}else if _caec .Ury < _abegb .Lly {_bdcd :=_gabfc (_caec .PdfRectangle );
_cbeaa :=_aeae (_abegb .PdfRectangle );if _caec ._ddaa .blocks (_bdcd ,_cbeaa ){if _eeeb {_fgc .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s",_abegb ,_caec );};return true ;};};return false ;};func (_aedg *subpath )removeDuplicates (){if len (_aedg ._gbee )==0{return ;
};_gdfg :=[]_gfa .Point {_aedg ._gbee [0]};for _ ,_fggf :=range _aedg ._gbee [1:]{if !_fded (_fggf ,_gdfg [len (_gdfg )-1]){_gdfg =append (_gdfg ,_fggf );};};_aedg ._gbee =_gdfg ;};func _bdgea (_adfec ,_adag float64 )bool {return _gf .Abs (_adfec -_adag )<=_ddee };
func (_cgf *subpath )close (){if !_fded (_cgf ._gbee [0],_cgf .last ()){_cgf .add (_cgf ._gbee [0]);};_cgf ._aef =true ;_cgf .removeDuplicates ();};func _eda (_ded ,_abbg _bbe .PdfRectangle )bool {return _dgad (_ded ,_abbg )&&_gdee (_ded ,_abbg )};func _gebd (_added []*textMark ,_gcgd _bbe .PdfRectangle )*textWord {_gdfbg :=_added [0].PdfRectangle ;
_efefe :=_added [0]._gbae ;for _ ,_dfbgf :=range _added [1:]{_gdfbg =_eacc (_gdfbg ,_dfbgf .PdfRectangle );if _dfbgf ._gbae > _efefe {_efefe =_dfbgf ._gbae ;};};return &textWord {PdfRectangle :_gdfbg ,_cgceb :_added ,_fdcbf :_gcgd .Ury -_gdfbg .Lly ,_gddg :_efefe };
};func (_bdf *textObject )getFontDict (_cdga string )(_fdaa _e .PdfObject ,_acbe error ){_fefe :=_bdf ._gbde ;if _fefe ==nil {_fgc .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_cdga );
return nil ,nil ;};_fdaa ,_efda :=_fefe .GetFontByName (_e .PdfObjectName (_cdga ));if !_efda {_fgc .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_cdga );
return nil ,_g .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _fdaa ,nil ;};
2021-07-30 00:21:16 +00:00
2021-09-23 22:37:42 +00:00
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
func (_ge *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_ag :=&imageExtractContext {_fde :options };_gg :=_ag .extractContentStreamImages (_ge ._ddf ,_ge ._da );if _gg !=nil {return nil ,_gg ;};return &PageImages {Images :_ag ._aad },nil ;
};type paraList []*textPara ;func (_acbfa paraList )findTableGrid (_fabdd gridTiling )(*textTable ,map[*textPara ]struct{}){_bggac :=len (_fabdd ._fcgfg );_cddgc :=len (_fabdd ._ccge );_cbdgg :=textTable {_gbaca :true ,_caea :_bggac ,_dccb :_cddgc ,_bffe :make (map[uint64 ]*textPara ,_bggac *_cddgc ),_aeffd :make (map[uint64 ]compositeCell ,_bggac *_cddgc )};
_gfag :=make (map[*textPara ]struct{});_cgcg :=int ((1.0-_becad )*float64 (_bggac *_cddgc ));_bdfbb :=0;if _eedf {_fgc .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_bggac ,_cddgc );
};for _adfgg ,_beab :=range _fabdd ._ccge {_cecdb ,_bccb :=_fabdd ._dddg [_beab ];if !_bccb {continue ;};for _cffdb ,_cdgdf :=range _fabdd ._fcgfg {_fcfga ,_cgfd :=_cecdb [_cdgdf ];if !_cgfd {continue ;};_gffeb :=_acbfa .inTile (_fcfga );if len (_gffeb )==0{_bdfbb ++;
if _bdfbb > _cgcg {if _eedf {_fgc .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_bdfbb );};return nil ,nil ;};}else {_cbdgg .putComposite (_cffdb ,_adfgg ,_gffeb ,_fcfga .PdfRectangle );for _ ,_ddca :=range _gffeb {_gfag [_ddca ]=struct{}{};
};};};};_fbccc :=0;for _gcga :=0;_gcga < _bggac ;_gcga ++{_abee :=_cbdgg .get (_gcga ,0);if _abee ==nil ||!_abee ._eecg {_fbccc ++;};};if _fbccc ==0{if _eedf {_fgc .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};return nil ,nil ;
};_dddeb :=_cbdgg .reduceTiling (_fabdd ,_gbce );_dddeb =_dddeb .subdivide ();return _dddeb ,_gfag ;};func (_befg *textObject )setWordSpacing (_fecc float64 ){if _befg ==nil {return ;};_befg ._eada ._bea =_fecc ;};func (_gac *textObject )setFont (_ffef string ,_ddea float64 )error {if _gac ==nil {return nil ;
};_gac ._eada ._gbaf =_ddea ;_agbd ,_gdaf :=_gac .getFont (_ffef );if _gdaf !=nil {return _gdaf ;};_gac ._eada ._ebfb =_agbd ;return nil ;};func (_deec *textTable )compositeRowCorridors ()map[int ][]float64 {_bacfd :=make (map[int ][]float64 ,_deec ._dccb );
if _bdfb {_fgc .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_deec ._dccb );};for _cece :=1;_cece < _deec ._dccb ;_cece ++{var _cbbbd []compositeCell ;
for _dcff :=0;_dcff < _deec ._caea ;_dcff ++{if _bfaac ,_cbcb :=_deec ._aeffd [_aebd (_dcff ,_cece )];_cbcb {_cbbbd =append (_cbbbd ,_bfaac );};};if len (_cbbbd )==0{continue ;};_dcdf :=_feca (_cbbbd );_bacfd [_cece ]=_dcdf ;if _bdfb {_gfc .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_cece ,_dcdf );
};};return _bacfd ;};func (_ddcb *textMark )bbox ()_bbe .PdfRectangle {return _ddcb .PdfRectangle };func _bfab (_afefe float64 )bool {return _gf .Abs (_afefe )< _beeff };func (_aage rulingList )vertsHorzs ()(rulingList ,rulingList ){var _bdfec ,_cffd rulingList ;
for _ ,_ccd :=range _aage {switch _ccd ._ccfa {case _bgecg :_bdfec =append (_bdfec ,_ccd );case _eeca :_cffd =append (_cffd ,_ccd );};};return _bdfec ,_cffd ;};
2021-05-31 17:17:31 +00:00
2021-09-23 22:37:42 +00:00
// String returns a description of `l`.
func (_abdc *textLine )String ()string {return _gfc .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_abdc ._fdga ,_abdc .PdfRectangle ,_abdc ._ddac ,_abdc .text ());
};func (_fdfef *ruling )intersects (_aacf *ruling )bool {_cdaf :=(_fdfef ._ccfa ==_bgecg &&_aacf ._ccfa ==_eeca )||(_aacf ._ccfa ==_bgecg &&_fdfef ._ccfa ==_eeca );_egfa :=func (_ffgef ,_fbgf *ruling )bool {return _ffgef ._dgbc -_ddee <=_fbgf ._eacd &&_fbgf ._eacd <=_ffgef ._cgcfd +_ddee ;
};_gecb :=_egfa (_fdfef ,_aacf );_efdd :=_egfa (_aacf ,_fdfef );if _bcbc {_gfc .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_cdaf ,_gecb ,_efdd ,_cdaf &&_gecb &&_efdd ,_fdfef ,_aacf );
};return _cdaf &&_gecb &&_efdd ;};func _fcbde (_bfgf []rulingList )(rulingList ,rulingList ){var _feced rulingList ;for _ ,_dagbb :=range _bfgf {_feced =append (_feced ,_dagbb ...);};return _feced .vertsHorzs ();};func _fdfe (_bfgb *wordBag ,_faab float64 ,_bbd ,_dged rulingList )[]*wordBag {var _dcab []*wordBag ;
for _ ,_ccbde :=range _bfgb .depthIndexes (){_accc :=false ;for !_bfgb .empty (_ccbde ){_cafb :=_bfgb .firstReadingIndex (_ccbde );_bfed :=_bfgb .firstWord (_cafb );_babg :=_dca (_bfed ,_faab ,_bbd ,_dged );_bfgb .removeWord (_bfed ,_cafb );if _cegg {_fgc .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_bfed .String ());
};for _aeffb :=true ;_aeffb ;_aeffb =_accc {_accc =false ;_adff :=_fdee *_babg ._ggcf ;_gcec :=_adaf *_babg ._ggcf ;_dfdg :=_efbe *_babg ._ggcf ;if _cegg {_fgc .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_babg .minDepth (),_babg .maxDepth (),_dfdg ,_gcec );
};if _bfgb .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_babg ,_cecbd (_dbcc ,0),_babg .minDepth ()-_dfdg ,_babg .maxDepth ()+_dfdg ,_geec ,false ,false )> 0{_accc =true ;};if _bfgb .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_babg ,_cecbd (_dbcc ,_gcec ),_babg .minDepth (),_babg .maxDepth (),_edaa ,false ,false )> 0{_accc =true ;
};if _accc {continue ;};_dcgf :=_bfgb .scanBand ("",_babg ,_cecbd (_cedf ,_adff ),_babg .minDepth (),_babg .maxDepth (),_ggfea ,true ,false );if _dcgf > 0{_gdff :=(_babg .maxDepth ()-_babg .minDepth ())/_babg ._ggcf ;if (_dcgf > 1&&float64 (_dcgf )> 0.3*_gdff )||_dcgf <=10{if _bfgb .scanBand ("\u006f\u0074\u0068e\u0072",_babg ,_cecbd (_cedf ,_adff ),_babg .minDepth (),_babg .maxDepth (),_ggfea ,false ,true )> 0{_accc =true ;
};};};};_dcab =append (_dcab ,_babg );};};return _dcab ;};func _cgag (_eecae _bbe .PdfRectangle ,_dbba ,_ccfb ,_dgega ,_aadced *ruling )gridTile {_efgb :=_eecae .Llx ;_gaab :=_eecae .Urx ;_gdbbg :=_eecae .Lly ;_egagd :=_eecae .Ury ;return gridTile {PdfRectangle :_eecae ,_bcbb :_dbba !=nil &&_dbba .encloses (_gdbbg ,_egagd ),_ddgab :_ccfb !=nil &&_ccfb .encloses (_gdbbg ,_egagd ),_ebdg :_dgega !=nil &&_dgega .encloses (_efgb ,_gaab ),_bacb :_aadced !=nil &&_aadced .encloses (_efgb ,_gaab )};
};func (_begg *textPara )bbox ()_bbe .PdfRectangle {return _begg .PdfRectangle };type ruling struct{_ccfa rulingKind ;_bdcf markKind ;_a .Color ;_eacd float64 ;_dgbc float64 ;_cgcfd float64 ;_fdaf float64 ;};func (_cbf *textLine )markWordBoundaries (){_gdga :=_cbbb *_cbf ._ddac ;
for _bfbdb ,_egec :=range _cbf ._gfbc [1:]{if _cfdac (_egec ,_cbf ._gfbc [_bfbdb ])>=_gdga {_egec ._aaad =true ;};};};
2021-05-31 17:17:31 +00:00
2021-08-13 01:33:42 +00:00
// String returns a string describing `pt`.
2021-09-23 22:37:42 +00:00
func (_gfce PageText )String ()string {_bbc :=_gfc .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_gfce ._cgb ));_eae :=[]string {"\u002d"+_bbc };for _ ,_cgec :=range _gfce ._cgb {_eae =append (_eae ,_cgec .String ());
};_eae =append (_eae ,"\u002b"+_bbc );return _fg .Join (_eae ,"\u000a");};func (_bcab *textObject )moveLP (_afga ,_eeg float64 ){_bcab ._eca .Concat (_gfa .NewMatrix (1,0,0,1,_afga ,_eeg ));_bcab ._adb =_bcab ._eca ;};func _eacc (_cgdb ,_gddc _bbe .PdfRectangle )_bbe .PdfRectangle {return _bbe .PdfRectangle {Llx :_gf .Min (_cgdb .Llx ,_gddc .Llx ),Lly :_gf .Min (_cgdb .Lly ,_gddc .Lly ),Urx :_gf .Max (_cgdb .Urx ,_gddc .Urx ),Ury :_gf .Max (_cgdb .Ury ,_gddc .Ury )};
};func (_gfaa compositeCell )hasLines (_cedd []*textLine )bool {for _fecg ,_dfff :=range _cedd {_fbef :=_eda (_gfaa .PdfRectangle ,_dfff .PdfRectangle );if _bdfb {_gfc .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a",_fbef ,_fecg ,len (_cedd ));
_gfc .Printf ("\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a",_gfaa );_gfc .Printf ("\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a",_dfff );};if _fbef {return true ;
};};return false ;};func (_bfbeb rulingList )splitSec ()[]rulingList {_c .Slice (_bfbeb ,func (_fcedg ,_fadc int )bool {_aadd ,_adbgf :=_bfbeb [_fcedg ],_bfbeb [_fadc ];if _aadd ._dgbc !=_adbgf ._dgbc {return _aadd ._dgbc < _adbgf ._dgbc ;};return _aadd ._cgcfd < _adbgf ._cgcfd ;
});_dbfd :=make (map[*ruling ]struct{},len (_bfbeb ));_fggff :=func (_feaa *ruling )rulingList {_dadcd :=rulingList {_feaa };_dbfd [_feaa ]=struct{}{};for _ ,_cfcg :=range _bfbeb {if _ ,_cgafg :=_dbfd [_cfcg ];_cgafg {continue ;};for _ ,_ccbc :=range _dadcd {if _cfcg .alignsSec (_ccbc ){_dadcd =append (_dadcd ,_cfcg );
_dbfd [_cfcg ]=struct{}{};break ;};};};return _dadcd ;};_aeag :=[]rulingList {_fggff (_bfbeb [0])};for _ ,_gbef :=range _bfbeb [1:]{if _ ,_efef :=_dbfd [_gbef ];_efef {continue ;};_aeag =append (_aeag ,_fggff (_gbef ));};return _aeag ;};func _fagda (_dggf []TextMark ,_eedg *int ,_deab TextMark )[]TextMark {_deab .Offset =*_eedg ;
_dggf =append (_dggf ,_deab );*_eedg +=len (_deab .Text );return _dggf ;};func (_baag pathSection )bbox ()_bbe .PdfRectangle {_feg :=_baag ._dfaa [0]._gbee [0];_cagc :=_bbe .PdfRectangle {Llx :_feg .X ,Urx :_feg .X ,Lly :_feg .Y ,Ury :_feg .Y };_gca :=func (_egc _gfa .Point ){if _egc .X < _cagc .Llx {_cagc .Llx =_egc .X ;
}else if _egc .X > _cagc .Urx {_cagc .Urx =_egc .X ;};if _egc .Y < _cagc .Lly {_cagc .Lly =_egc .Y ;}else if _egc .Y > _cagc .Ury {_cagc .Ury =_egc .Y ;};};for _ ,_faba :=range _baag ._dfaa [0]._gbee [1:]{_gca (_faba );};for _ ,_bbbd :=range _baag ._dfaa [1:]{for _ ,_eaaa :=range _bbbd ._gbee {_gca (_eaaa );
};};return _cagc ;};func _baaf (_bdbe []TextMark ,_ecacg *int )[]TextMark {_egbg :=_bdbe [len (_bdbe )-1];_caff :=[]rune (_egbg .Text );if len (_caff )==1{_bdbe =_bdbe [:len (_bdbe )-1];_fagdg :=_bdbe [len (_bdbe )-1];*_ecacg =_fagdg .Offset +len (_fagdg .Text );
}else {_caede :=_gfge (_egbg .Text );*_ecacg +=len (_caede )-len (_egbg .Text );_egbg .Text =_caede ;};return _bdbe ;};func _bfage (_efad bounded )float64 {return -_efad .bbox ().Lly };
2021-05-31 17:17:31 +00:00
2021-09-23 22:37:42 +00:00
// String returns a string describing `tm`.
func (_gbg TextMark )String ()string {_fcff :=_gbg .BBox ;var _ggd string ;if _gbg .Font !=nil {_ggd =_gbg .Font .String ();if len (_ggd )> 50{_ggd =_ggd [:50]+"\u002e\u002e\u002e";};};var _cdb string ;if _gbg .Meta {_cdb ="\u0020\u002a\u004d\u002a";};
return _gfc .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_gbg .Offset ,_gbg .Text ,[]rune (_gbg .Text ),_fcff .Llx ,_fcff .Lly ,_fcff .Urx ,_fcff .Ury ,_ggd ,_cdb );
};var (_dfcad =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};
);func (_fcd *textObject )setHorizScaling (_edb float64 ){if _fcd ==nil {return ;};_fcd ._eada ._fbd =_edb ;};func (_bcdf *shapesState )drawRectangle (_bcde ,_fffc ,_dbc ,_fbfg float64 ){if _egf {_dce :=_bcdf .devicePoint (_bcde ,_fffc );_acbf :=_bcdf .devicePoint (_bcde +_dbc ,_fffc +_fbfg );
_aba :=_bbe .PdfRectangle {Llx :_dce .X ,Lly :_dce .Y ,Urx :_acbf .X ,Ury :_acbf .Y };_fgc .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_aba );};_bcdf .newSubPath ();_bcdf .moveTo (_bcde ,_fffc );
_bcdf .lineTo (_bcde +_dbc ,_fffc );_bcdf .lineTo (_bcde +_dbc ,_fffc +_fbfg );_bcdf .lineTo (_bcde ,_fffc +_fbfg );_bcdf .closePath ();};func (_gfd *imageExtractContext )extractInlineImage (_gba *_ca .ContentStreamInlineImage ,_ac _ca .GraphicsState ,_cda *_bbe .PdfPageResources )error {_aec ,_eeb :=_gba .ToImage (_cda );
if _eeb !=nil {return _eeb ;};_bec ,_eeb :=_gba .GetColorSpace (_cda );if _eeb !=nil {return _eeb ;};if _bec ==nil {_bec =_bbe .NewPdfColorspaceDeviceGray ();};_fb ,_eeb :=_bec .ImageToRGB (*_aec );if _eeb !=nil {return _eeb ;};_fec :=ImageMark {Image :&_fb ,Width :_ac .CTM .ScalingFactorX (),Height :_ac .CTM .ScalingFactorY (),Angle :_ac .CTM .Angle ()};
_fec .X ,_fec .Y =_ac .CTM .Translation ();_gfd ._aad =append (_gfd ._aad ,_fec );_gfd ._ce ++;return nil ;};
// String returns a description of `w`.
func (_gdgd *textWord )String ()string {return _gfc .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_gdgd ._fdcbf ,_gdgd .PdfRectangle ,_gdgd ._gddg ,_gdgd ._fadea );
};func (_bbbf *textTable )get (_gbdff ,_afcc int )*textPara {return _bbbf ._bffe [_aebd (_gbdff ,_afcc )]};type rectRuling struct{_bbabc rulingKind ;_eaee markKind ;_a .Color ;_bbe .PdfRectangle ;};func _beaac (_eaec ,_dace int )int {if _eaec < _dace {return _eaec ;
};return _dace ;};func _efcfa (_ddbb []*textWord ,_fccaa int )[]*textWord {_cagbe :=len (_ddbb );copy (_ddbb [_fccaa :],_ddbb [_fccaa +1:]);return _ddbb [:_cagbe -1];};func _cbebd (_bbgga float64 )bool {return _gf .Abs (_bbgga )< _eedc };
2021-05-31 17:17:31 +00:00
2021-08-13 01:33:42 +00:00
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
2021-09-23 22:37:42 +00:00
func (_egd *Extractor )ExtractText ()(string ,error ){_bfbe ,_ ,_ ,_aabd :=_egd .ExtractTextWithStats ();return _bfbe ,_aabd ;};func (_dfbc *wordBag )allWords ()[]*textWord {var _cfdb []*textWord ;for _ ,_bdfe :=range _dfbc ._gbec {_cfdb =append (_cfdb ,_bdfe ...);
};return _cfdb ;};func _gbdc (_feef []*wordBag )[]*wordBag {if len (_feef )<=1{return _feef ;};if _acd {_fgc .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");};_c .Slice (_feef ,func (_bfga ,_fbbe int )bool {_bab ,_dfba :=_feef [_bfga ],_feef [_fbbe ];
_aegaf :=_bab .Width ()*_bab .Height ();_bfebb :=_dfba .Width ()*_dfba .Height ();if _aegaf !=_bfebb {return _aegaf > _bfebb ;};if _bab .Height ()!=_dfba .Height (){return _bab .Height ()> _dfba .Height ();};return _bfga < _fbbe ;});var _badg []*wordBag ;
_bfcf :=make (intSet );for _aabfe :=0;_aabfe < len (_feef );_aabfe ++{if _bfcf .has (_aabfe ){continue ;};_bdbc :=_feef [_aabfe ];for _dceb :=_aabfe +1;_dceb < len (_feef );_dceb ++{if _bfcf .has (_aabfe ){continue ;};_dfdd :=_feef [_dceb ];_edcc :=_bdbc .PdfRectangle ;
_edcc .Llx -=_bdbc ._ggcf ;if _cced (_edcc ,_dfdd .PdfRectangle ){_bdbc .absorb (_dfdd );_bfcf .add (_dceb );};};_badg =append (_badg ,_bdbc );};if len (_feef )!=len (_badg )+len (_bfcf ){_fgc .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_feef ),len (_badg ),len (_bfcf ));
};return _badg ;};func _edfe (_gcdf int ,_aedc func (int ,int )bool )[]int {_ddffg :=make ([]int ,_gcdf );for _gacbd :=range _ddffg {_ddffg [_gacbd ]=_gacbd ;};_c .Slice (_ddffg ,func (_fbdb ,_ebad int )bool {return _aedc (_ddffg [_fbdb ],_ddffg [_ebad ])});
return _ddffg ;};
2021-05-31 17:17:31 +00:00
2021-09-23 22:37:42 +00:00
// Elements returns the TextMarks in `ma`.
func (_gad *TextMarkArray )Elements ()[]TextMark {return _gad ._fbcc };func _cedf (_cef *wordBag ,_befb *textWord ,_bdgb float64 )bool {return _cef .Urx <=_befb .Llx &&_befb .Llx < _cef .Urx +_bdgb ;};func (_eded gridTile )contains (_egcd _bbe .PdfRectangle )bool {if _eded .numBorders ()< 3{return false ;
};if _eded ._bcbb &&_egcd .Llx < _eded .Llx -_afda {return false ;};if _eded ._ddgab &&_egcd .Urx > _eded .Urx +_afda {return false ;};if _eded ._ebdg &&_egcd .Lly < _eded .Lly -_afda {return false ;};if _eded ._bacb &&_egcd .Ury > _eded .Ury +_afda {return false ;
};return true ;};func (_cgdc *textTable )getRight ()paraList {_fcbcb :=make (paraList ,_cgdc ._dccb );for _gdcc :=0;_gdcc < _cgdc ._dccb ;_gdcc ++{_cfddf :=_cgdc .get (_cgdc ._caea -1,_gdcc )._gadgg ;if _cfddf ==nil ||_cfddf ._eeeg {return nil ;};_fcbcb [_gdcc ]=_cfddf ;
};for _abbf :=0;_abbf < _cgdc ._dccb -1;_abbf ++{if _fcbcb [_abbf ]._cedg !=_fcbcb [_abbf +1]{return nil ;};};return _fcbcb ;};func _abdd (_fafed float64 )float64 {return _adde *_gf .Round (_fafed /_adde )};func _gfcd (_ccbea _bbe .PdfRectangle )rulingKind {_aagd :=_ccbea .Width ();
_dgce :=_ccbea .Height ();if _aagd > _dgce {if _aagd >=_bdfg {return _eeca ;};}else {if _dgce >=_bdfg {return _bgecg ;};};return _fgcbb ;};func (_dadeg *wordBag )depthIndexes ()[]int {if len (_dadeg ._gbec )==0{return nil ;};_becb :=make ([]int ,len (_dadeg ._gbec ));
_baae :=0;for _dbdb :=range _dadeg ._gbec {_becb [_baae ]=_dbdb ;_baae ++;};_c .Ints (_becb );return _becb ;};func (_agcfg rulingList )snapToGroups ()rulingList {_ggbf ,_dcega :=_agcfg .vertsHorzs ();if len (_ggbf )> 0{_ggbf =_ggbf .snapToGroupsDirection ();
};if len (_dcega )> 0{_dcega =_dcega .snapToGroupsDirection ();};_cefbd :=append (_ggbf ,_dcega ...);_cefbd .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");return _cefbd ;};func (_fcab paraList )log (_bcfba string ){if !_fcde {return ;
};_fgc .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_bcfba ,len (_fcab ));for _gbbd ,_adfg :=range _fcab {if _adfg ==nil {continue ;
};_bddd :=_adfg .text ();_fcgc :="\u0020\u0020";if _adfg ._gbfb !=nil {_fcgc =_gfc .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_adfg ._gbfb ._caea ,_adfg ._gbfb ._dccb );};_gfc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_gbbd ,_adfg .PdfRectangle ,_fcgc ,_fbfea (_bddd ,50));
};};func _gbcb (_gdcge map[int ][]float64 ){if len (_gdcge )<=1{return ;};_gdbdg :=_gfbdcc (_gdcge );if _bdfb {_fgc .Log .Info ("\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076",_gdbdg );};var _ecef ,_gfbdc int ;
for _ecef ,_gfbdc =range _gdbdg {if _gdcge [_gfbdc ]!=nil {break ;};};for _fggda ,_dgdf :=range _gdbdg [_ecef :]{_ebef :=_gdcge [_dgdf ];if _ebef ==nil {continue ;};if _bdfb {_gfc .Printf ("\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a",_ecef +_fggda ,_gfbdc ,_dgdf );
};_gagf :=_gdcge [_dgdf ];if _gagf [len (_gagf )-1]> _ebef [0]{_gagf [len (_gagf )-1]=_ebef [0];_gdcge [_gfbdc ]=_gagf ;};_gfbdc =_dgdf ;};};type fontEntry struct{_gaec *_bbe .PdfFont ;_fdca int64 ;};func _eafe (_fgfg map[int ][]float64 )string {_eebef :=_gfbdcc (_fgfg );
_cdea :=make ([]string ,len (_fgfg ));for _gabb ,_ddge :=range _eebef {_cdea [_gabb ]=_gfc .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_ddge ,_fgfg [_ddge ]);};return _gfc .Sprintf ("\u007b\u0025\u0073\u007d",_fg .Join (_cdea ,"\u002c\u0020"));
};func _cced (_aabad ,_dgeg _bbe .PdfRectangle )bool {return _aabad .Llx <=_dgeg .Llx &&_dgeg .Urx <=_aabad .Urx &&_aabad .Lly <=_dgeg .Lly &&_dgeg .Ury <=_aabad .Ury ;};func _fcffc (_bbff ,_bbda float64 )string {_fdfb :=!_bfab (_bbff -_bbda );if _fdfb {return "\u000a";
};return "\u0020";};var _cfgg =map[rulingKind ]string {_fgcbb :"\u006e\u006f\u006e\u0065",_eeca :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_bgecg :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};func (_fccf *wordBag )depthRange (_cfaa ,_fbga int )[]int {var _cbb []int ;
for _aega :=range _fccf ._gbec {if _cfaa <=_aega &&_aega <=_fbga {_cbb =append (_cbb ,_aega );};};if len (_cbb )==0{return nil ;};_c .Ints (_cbb );return _cbb ;};func _fbgb (_cecd float64 )int {var _bed int ;if _cecd >=0{_bed =int (_cecd /_gecf );}else {_bed =int (_cecd /_gecf )-1;
};return _bed ;};func (_dggdd *textTable )log (_fade string ){if !_bdfb {return ;};_fgc .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_fade ,_dggdd ._caea ,_dggdd ._dccb ,_dggdd ._gbaca ,_dggdd .PdfRectangle );
for _agccc :=0;_agccc < _dggdd ._dccb ;_agccc ++{for _eeaea :=0;_eeaea < _dggdd ._caea ;_eeaea ++{_ccbf :=_dggdd .get (_eeaea ,_agccc );if _ccbf ==nil {continue ;};_gfc .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_eeaea ,_agccc ,_ccbf .PdfRectangle ,_fbfea (_ccbf .text (),50),_bb .RuneCountInString (_ccbf .text ()));
};};};func (_bgd *stateStack )size ()int {return len (*_bgd )};func _dca (_fdfc *textWord ,_edc float64 ,_edf ,_gcde rulingList )*wordBag {_bgbc :=_fbgb (_fdfc ._fdcbf );_gcef :=[]*textWord {_fdfc };_agfb :=wordBag {_gbec :map[int ][]*textWord {_bgbc :_gcef },PdfRectangle :_fdfc .PdfRectangle ,_ggcf :_fdfc ._gddg ,_gdddb :_edc ,_ffbe :_edf ,_ddaa :_gcde };
return &_agfb ;};func _gbeg (_cgg *_ca .ContentStreamOperation )(float64 ,error ){if len (_cgg .Params )!=1{_cae :=_g .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");_fgc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_cgg .Operand ,1,len (_cgg .Params ),_cgg .Params );
return 0.0,_cae ;};return _e .GetNumberAsFloat (_cgg .Params [0]);};func _acde (_bgcg map[float64 ]map[float64 ]gridTile )[]float64 {_fceg :=make ([]float64 ,0,len (_bgcg ));_dgbg :=make (map[float64 ]struct{},len (_bgcg ));for _ ,_bdcfc :=range _bgcg {for _adgd :=range _bdcfc {if _ ,_gggbd :=_dgbg [_adgd ];
_gggbd {continue ;};_fceg =append (_fceg ,_adgd );_dgbg [_adgd ]=struct{}{};};};_c .Float64s (_fceg );return _fceg ;};func (_bfag *textObject )getCurrentFont ()*_bbe .PdfFont {_dfa :=_bfag ._eada ._ebfb ;if _dfa ==nil {_fgc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e");
return _bbe .DefaultFont ();};return _dfa ;};func (_bfgd *wordBag )arrangeText ()*textPara {_bfgd .sort ();if _ggcc {_bfgd .removeDuplicates ();};var _gdca []*textLine ;for _ ,_gfgb :=range _bfgd .depthIndexes (){for !_bfgd .empty (_gfgb ){_adad :=_bfgd .firstReadingIndex (_gfgb );
_dgcd :=_bfgd .firstWord (_adad );_edgc :=_deea (_bfgd ,_adad );_cdae :=_dgcd ._gddg ;_effe :=_dgcd ._fdcbf -_acge *_cdae ;_ggce :=_dgcd ._fdcbf +_acge *_cdae ;_bbdc :=_fcdg *_cdae ;_gfacd :=_aecc *_cdae ;_gga :for {var _faec *textWord ;_abbba :=0;for _ ,_baec :=range _bfgd .depthBand (_effe ,_ggce ){_dcfa :=_bfgd .highestWord (_baec ,_effe ,_ggce );
if _dcfa ==nil {continue ;};_eef :=_cfdac (_dcfa ,_edgc ._gfbc [len (_edgc ._gfbc )-1]);if _eef < -_gfacd {break _gga ;};if _eef > _bbdc {continue ;};if _faec !=nil &&_gffd (_dcfa ,_faec )>=0{continue ;};_faec =_dcfa ;_abbba =_baec ;};if _faec ==nil {break ;
};_edgc .pullWord (_bfgd ,_faec ,_abbba );};_edgc .markWordBoundaries ();_gdca =append (_gdca ,_edgc );};};if len (_gdca )==0{return nil ;};_c .Slice (_gdca ,func (_gdfe ,_ffcdf int )bool {return _dcaf (_gdca [_gdfe ],_gdca [_ffcdf ])< 0});_ggac :=_dbfa (_bfgd .PdfRectangle ,_gdca );
if _acd {_fgc .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_ggac .String ());if _cgfc {for _ddgg ,_bbdce :=range _ggac ._dbed {_gfc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ddgg ,_bbdce .String ());
if _dgfd {for _gfegf ,_fedg :=range _bbdce ._gfbc {_gfc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_gfegf ,_fedg .String ());for _gdeb ,_baeb :=range _fedg ._cgceb {_gfc .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_gdeb ,_baeb .String ());
};};};};};};return _ggac ;};
2021-07-30 00:21:16 +00:00
2021-09-23 22:37:42 +00:00
// String returns a string describing `ma`.
func (_dddf TextMarkArray )String ()string {_dgfa :=len (_dddf ._fbcc );if _dgfa ==0{return "\u0045\u004d\u0050T\u0059";};_bfg :=_dddf ._fbcc [0];_bggdc :=_dddf ._fbcc [_dgfa -1];return _gfc .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_dgfa ,_bfg ,_bggdc );
};func (_dfaf *textTable )bbox ()_bbe .PdfRectangle {return _dfaf .PdfRectangle };func (_baab *stateStack )push (_gbd *textState ){_bfe :=*_gbd ;*_baab =append (*_baab ,&_bfe )};
2021-07-30 00:21:16 +00:00
2021-09-23 22:37:42 +00:00
// String returns a human readable description of `s`.
func (_afbfb intSet )String ()string {var _dgfea []int ;for _dfgbe :=range _afbfb {if _afbfb .has (_dfgbe ){_dgfea =append (_dgfea ,_dfgbe );};};_c .Ints (_dgfea );return _gfc .Sprintf ("\u0025\u002b\u0076",_dgfea );};func (_cagd paraList )addNeighbours (){_gadfc :=func (_gcbcf []int ,_ggggc *textPara )([]*textPara ,[]*textPara ){_eeed :=make ([]*textPara ,0,len (_gcbcf )-1);
_afcgda :=make ([]*textPara ,0,len (_gcbcf )-1);for _ ,_cdgdc :=range _gcbcf {_baage :=_cagd [_cdgdc ];if _baage .Urx <=_ggggc .Llx {_eeed =append (_eeed ,_baage );}else if _baage .Llx >=_ggggc .Urx {_afcgda =append (_afcgda ,_baage );};};return _eeed ,_afcgda ;
};_gafd :=func (_ebdba []int ,_gabc *textPara )([]*textPara ,[]*textPara ){_ceggb :=make ([]*textPara ,0,len (_ebdba )-1);_bdcdb :=make ([]*textPara ,0,len (_ebdba )-1);for _ ,_gcgb :=range _ebdba {_fgbe :=_cagd [_gcgb ];if _fgbe .Ury <=_gabc .Lly {_bdcdb =append (_bdcdb ,_fgbe );
}else if _fgbe .Lly >=_gabc .Ury {_ceggb =append (_ceggb ,_fgbe );};};return _ceggb ,_bdcdb ;};_bcdca :=_cagd .yNeighbours (_dcea );for _ ,_gcfa :=range _cagd {_bcfbaa :=_bcdca [_gcfa ];if len (_bcfbaa )==0{continue ;};_gabbf ,_bcce :=_gadfc (_bcfbaa ,_gcfa );
if len (_gabbf )==0&&len (_bcce )==0{continue ;};if len (_gabbf )> 0{_cfdfb :=_gabbf [0];for _ ,_egbgf :=range _gabbf [1:]{if _egbgf .Urx >=_cfdfb .Urx {_cfdfb =_egbgf ;};};for _ ,_aefff :=range _gabbf {if _aefff !=_cfdfb &&_aefff .Urx > _cfdfb .Llx {_cfdfb =nil ;
break ;};};if _cfdfb !=nil &&_gdee (_gcfa .PdfRectangle ,_cfdfb .PdfRectangle ){_gcfa ._fffe =_cfdfb ;};};if len (_bcce )> 0{_bfgbb :=_bcce [0];for _ ,_ebgc :=range _bcce [1:]{if _ebgc .Llx <=_bfgbb .Llx {_bfgbb =_ebgc ;};};for _ ,_bega :=range _bcce {if _bega !=_bfgbb &&_bega .Llx < _bfgbb .Urx {_bfgbb =nil ;
break ;};};if _bfgbb !=nil &&_gdee (_gcfa .PdfRectangle ,_bfgbb .PdfRectangle ){_gcfa ._gadgg =_bfgbb ;};};};_bcdca =_cagd .xNeighbours (_gcce );for _ ,_ccfbb :=range _cagd {_fegdg :=_bcdca [_ccfbb ];if len (_fegdg )==0{continue ;};_gggad ,_fcgb :=_gafd (_fegdg ,_ccfbb );
if len (_gggad )==0&&len (_fcgb )==0{continue ;};if len (_fcgb )> 0{_egdf :=_fcgb [0];for _ ,_acbc :=range _fcgb [1:]{if _acbc .Ury >=_egdf .Ury {_egdf =_acbc ;};};for _ ,_gfga :=range _fcgb {if _gfga !=_egdf &&_gfga .Ury > _egdf .Lly {_egdf =nil ;break ;
};};if _egdf !=nil &&_dgad (_ccfbb .PdfRectangle ,_egdf .PdfRectangle ){_ccfbb ._cedg =_egdf ;};};if len (_gggad )> 0{_eddde :=_gggad [0];for _ ,_fdafe :=range _gggad [1:]{if _fdafe .Lly <=_eddde .Lly {_eddde =_fdafe ;};};for _ ,_bgac :=range _gggad {if _bgac !=_eddde &&_bgac .Lly < _eddde .Ury {_eddde =nil ;
break ;};};if _eddde !=nil &&_dgad (_ccfbb .PdfRectangle ,_eddde .PdfRectangle ){_ccfbb ._gbceb =_eddde ;};};};for _ ,_gaca :=range _cagd {if _gaca ._fffe !=nil &&_gaca ._fffe ._gadgg !=_gaca {_gaca ._fffe =nil ;};if _gaca ._gbceb !=nil &&_gaca ._gbceb ._cedg !=_gaca {_gaca ._gbceb =nil ;
};if _gaca ._gadgg !=nil &&_gaca ._gadgg ._fffe !=_gaca {_gaca ._gadgg =nil ;};if _gaca ._cedg !=nil &&_gaca ._cedg ._gbceb !=_gaca {_gaca ._cedg =nil ;};};};func (_ebbc *wordBag )firstWord (_cdbb int )*textWord {return _ebbc ._gbec [_cdbb ][0]};func (_facfg rulingList )augmentGrid ()(rulingList ,rulingList ){_bfdf ,_egfg :=_facfg .vertsHorzs ();
if len (_bfdf )==0||len (_egfg )==0{return _bfdf ,_egfg ;};_baef ,_cdad :=_bfdf ,_egfg ;_dbdfd :=_bfdf .bbox ();_dbcd :=_egfg .bbox ();if _bcbc {_fgc .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066",_dbdfd );
_fgc .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066",_dbcd );};var _dbagd ,_effa ,_ddffe ,_edee *ruling ;if _dbcd .Llx < _dbdfd .Llx -_ddee {_dbagd =&ruling {_bdcf :_deegc ,_ccfa :_bgecg ,_eacd :_dbcd .Llx ,_dgbc :_dbdfd .Lly ,_cgcfd :_dbdfd .Ury };
_bfdf =append (rulingList {_dbagd },_bfdf ...);};if _dbcd .Urx > _dbdfd .Urx +_ddee {_effa =&ruling {_bdcf :_deegc ,_ccfa :_bgecg ,_eacd :_dbcd .Urx ,_dgbc :_dbdfd .Lly ,_cgcfd :_dbdfd .Ury };_bfdf =append (_bfdf ,_effa );};if _dbdfd .Lly < _dbcd .Lly -_ddee {_ddffe =&ruling {_bdcf :_deegc ,_ccfa :_eeca ,_eacd :_dbdfd .Lly ,_dgbc :_dbcd .Llx ,_cgcfd :_dbcd .Urx };
_egfg =append (rulingList {_ddffe },_egfg ...);};if _dbdfd .Ury > _dbcd .Ury +_ddee {_edee =&ruling {_bdcf :_deegc ,_ccfa :_eeca ,_eacd :_dbdfd .Ury ,_dgbc :_dbcd .Llx ,_cgcfd :_dbcd .Urx };_egfg =append (_egfg ,_edee );};if len (_bfdf )+len (_egfg )==len (_facfg ){return _baef ,_cdad ;
};_dec :=append (_bfdf ,_egfg ...);_facfg .log ("u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064");_dec .log ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d");return _bfdf ,_egfg ;};func (_afabb *textTable )growTable (){_adbd :=func (_adafa paraList ){_afabb ._dccb ++;
for _adada :=0;_adada < _afabb ._caea ;_adada ++{_afed :=_adafa [_adada ];_afabb .put (_adada ,_afabb ._dccb -1,_afed );};};_acca :=func (_dccg paraList ){_afabb ._caea ++;for _fdgg :=0;_fdgg < _afabb ._dccb ;_fdgg ++{_ffbf :=_dccg [_fdgg ];_afabb .put (_afabb ._caea -1,_fdgg ,_ffbf );
};};if _agba {_afabb .log ("\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce");};for _fcgeg :=0;;_fcgeg ++{_debfa :=false ;_babge :=_afabb .getDown ();_cbbd :=_afabb .getRight ();if _agba {_gfc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fcgeg ,_afabb );
_gfc .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a",_babge );_gfc .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a",_cbbd );};if _babge !=nil &&_cbbd !=nil {_fcgea :=_babge [len (_babge )-1];
if _fcgea !=nil &&!_fcgea ._eeeg &&_fcgea ==_cbbd [len (_cbbd )-1]{_adbd (_babge );if _cbbd =_afabb .getRight ();_cbbd !=nil {_acca (_cbbd );_afabb .put (_afabb ._caea -1,_afabb ._dccb -1,_fcgea );};_debfa =true ;};};if !_debfa &&_babge !=nil {_adbd (_babge );
_debfa =true ;};if !_debfa &&_cbbd !=nil {_acca (_cbbd );_debfa =true ;};if !_debfa {break ;};};};func _fbfea (_fafd string ,_fcffga int )string {if len (_fafd )< _fcffga {return _fafd ;};return _fafd [:_fcffga ];};func _bdgde (_gfgee map[float64 ]map[float64 ]gridTile )[]float64 {_egabf :=make ([]float64 ,0,len (_gfgee ));
for _fecd :=range _gfgee {_egabf =append (_egabf ,_fecd );};_c .Float64s (_egabf );_egca :=len (_egabf );for _gaad :=0;_gaad < _egca /2;_gaad ++{_egabf [_gaad ],_egabf [_egca -1-_gaad ]=_egabf [_egca -1-_gaad ],_egabf [_gaad ];};return _egabf ;};func (_ffff *textObject )newTextMark (_bgca string ,_fgb _gfa .Matrix ,_deeg _gfa .Point ,_gceg float64 ,_gfae *_bbe .PdfFont ,_fafe float64 ,_dfbcb ,_fggba _a .Color )(textMark ,bool ){_bgae :=_fgb .Angle ();
_bfbg :=_ecac (_bgae ,_gebg );var _eace float64 ;if _bfbg %180!=90{_eace =_fgb .ScalingFactorY ();}else {_eace =_fgb .ScalingFactorX ();};_fcec :=_edbg (_fgb );_abdb :=_bbe .PdfRectangle {Llx :_fcec .X ,Lly :_fcec .Y ,Urx :_deeg .X ,Ury :_deeg .Y };switch _bfbg %360{case 90:_abdb .Urx -=_eace ;
case 180:_abdb .Ury -=_eace ;case 270:_abdb .Urx +=_eace ;case 0:_abdb .Ury +=_eace ;default:_bfbg =0;_abdb .Ury +=_eace ;};if _abdb .Llx > _abdb .Urx {_abdb .Llx ,_abdb .Urx =_abdb .Urx ,_abdb .Llx ;};if _abdb .Lly > _abdb .Ury {_abdb .Lly ,_abdb .Ury =_abdb .Ury ,_abdb .Lly ;
};_fefa :=true ;if _ffff ._ebba ._caf .Width ()> 0{_gfaba ,_cgc :=_agea (_abdb ,_ffff ._ebba ._caf );if !_cgc {_fefa =false ;_fgc .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_abdb ,_ffff ._ebba ._caf ,_bgca );
};_abdb =_gfaba ;};_ffdb :=_abdb ;_ggdf :=_ffff ._ebba ._caf ;switch _bfbg %360{case 90:_ggdf .Urx ,_ggdf .Ury =_ggdf .Ury ,_ggdf .Urx ;_ffdb =_bbe .PdfRectangle {Llx :_ggdf .Urx -_abdb .Ury ,Urx :_ggdf .Urx -_abdb .Lly ,Lly :_abdb .Llx ,Ury :_abdb .Urx };
case 180:_ffdb =_bbe .PdfRectangle {Llx :_ggdf .Urx -_abdb .Llx ,Urx :_ggdf .Urx -_abdb .Urx ,Lly :_ggdf .Ury -_abdb .Lly ,Ury :_ggdf .Ury -_abdb .Ury };case 270:_ggdf .Urx ,_ggdf .Ury =_ggdf .Ury ,_ggdf .Urx ;_ffdb =_bbe .PdfRectangle {Llx :_abdb .Ury ,Urx :_abdb .Lly ,Lly :_ggdf .Ury -_abdb .Llx ,Ury :_ggdf .Ury -_abdb .Urx };
};if _ffdb .Llx > _ffdb .Urx {_ffdb .Llx ,_ffdb .Urx =_ffdb .Urx ,_ffdb .Llx ;};if _ffdb .Lly > _ffdb .Ury {_ffdb .Lly ,_ffdb .Ury =_ffdb .Ury ,_ffdb .Lly ;};_eaccf :=textMark {_cgge :_bgca ,PdfRectangle :_ffdb ,_acfa :_abdb ,_fabf :_gfae ,_gbae :_eace ,_gfabg :_fafe ,_gbfe :_fgb ,_gcfc :_deeg ,_degdg :_bfbg ,_dafb :_dfbcb ,_aada :_fggba };
if _dega {_fgc .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_fcec ,_deeg ,_eaccf .String ());};return _eaccf ,_fefa ;
};func (_fcbb intSet )has (_eddc int )bool {_ ,_eaab :=_fcbb [_eddc ];return _eaab };type rulingList []*ruling ;
2021-01-07 14:20:10 +00:00
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
//
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
type TextMark struct{
2020-11-23 22:15:56 +00:00
2020-12-06 13:03:03 +00:00
// Text is the extracted text.
Text string ;
2020-11-23 22:15:56 +00:00
2021-01-07 14:20:10 +00:00
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
2020-11-23 22:15:56 +00:00
2021-01-07 14:20:10 +00:00
// BBox is the bounding box of the text.
2021-09-23 22:37:42 +00:00
BBox _bbe .PdfRectangle ;
2021-01-07 14:20:10 +00:00
// Font is the font the text was drawn with.
2021-09-23 22:37:42 +00:00
Font *_bbe .PdfFont ;
2021-01-07 14:20:10 +00:00
// FontSize is the font size the text was drawn with.
FontSize float64 ;
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2021-09-23 22:37:42 +00:00
FillColor _a .Color ;
2021-01-07 14:20:10 +00:00
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2021-09-23 22:37:42 +00:00
StrokeColor _a .Color ;
2021-01-07 14:20:10 +00:00
// Orientation is the text orientation
2021-09-23 22:37:42 +00:00
Orientation int ;};func (_bacbb paraList )findTextTables ()[]*textTable {var _gead []*textTable ;for _ ,_efccd :=range _bacbb {if _efccd .taken ()||_efccd .Width ()==0{continue ;};_gdbg :=_efccd .isAtom ();if _gdbg ==nil {continue ;};_gdbg .growTable ();
if _gdbg ._caea *_gdbg ._dccb < _ggeg {continue ;};_gdbg .markCells ();_gdbg .log ("\u0067\u0072\u006fw\u006e");_gead =append (_gead ,_gdbg );};return _gead ;};func (_afab rulingList )toTilings ()(rulingList ,[]gridTiling ){_afab .log ("\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s");
if len (_afab )==0{return nil ,nil ;};_afab =_afab .tidied ("\u0061\u006c\u006c");_afab .log ("\u0074\u0069\u0064\u0069\u0065\u0064");_cabf :=_afab .toGrids ();_cegc :=make ([]gridTiling ,len (_cabf ));for _afee ,_befa :=range _cabf {_cegc [_afee ]=_befa .asTiling ();
};return _afab ,_cegc ;};func (_gagd *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_fced :=make (map[int ]map[*textWord ]struct{},len (_gagd ._gbec ));for _fdabc :=range _gagd ._gbec {_fced [_fdabc ]=make (map[*textWord ]struct{});};return _fced ;
};func (_gabfb rulingList )primMinMax ()(float64 ,float64 ){_cgda ,_fbcf :=_gabfb [0]._eacd ,_gabfb [0]._eacd ;for _ ,_gadb :=range _gabfb [1:]{if _gadb ._eacd < _cgda {_cgda =_gadb ._eacd ;}else if _gadb ._eacd > _fbcf {_fbcf =_gadb ._eacd ;};};return _cgda ,_fbcf ;
};func (_cged *wordBag )highestWord (_fdeg int ,_gdbff ,_begd float64 )*textWord {for _ ,_dfab :=range _cged ._gbec [_fdeg ]{if _gdbff <=_dfab ._fdcbf &&_dfab ._fdcbf <=_begd {return _dfab ;};};return nil ;};func _edbf (_bfebg []_e .PdfObject )(_gcag ,_gaecf float64 ,_ccbcf error ){if len (_bfebg )!=2{return 0,0,_gfc .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_bfebg ));
};_gdggc ,_ccbcf :=_e .GetNumbersAsFloat (_bfebg );if _ccbcf !=nil {return 0,0,_ccbcf ;};return _gdggc [0],_gdggc [1],nil ;};func (_cabda gridTile )numBorders ()int {_fffb :=0;if _cabda ._bcbb {_fffb ++;};if _cabda ._ddgab {_fffb ++;};if _cabda ._ebdg {_fffb ++;
};if _cabda ._bacb {_fffb ++;};return _fffb ;};func _accf (_bebdf _bbe .PdfColorspace ,_eccc _bbe .PdfColor )_a .Color {if _bebdf ==nil ||_eccc ==nil {return _a .Black ;};_bdgff ,_gfbe :=_bebdf .ColorToRGB (_eccc );if _gfbe !=nil {_fgc .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_eccc ,_bebdf ,_gfbe );
return _a .Black ;};_bedgf ,_gafe :=_bdgff .(*_bbe .PdfColorDeviceRGB );if !_gafe {_fgc .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_bdgff );
return _a .Black ;};return _a .NRGBA {R :uint8 (_bedgf .R ()*255),G :uint8 (_bedgf .G ()*255),B :uint8 (_bedgf .B ()*255),A :uint8 (255)};};type markKind int ;func (_dcag *wordBag )minDepth ()float64 {return _dcag ._gdddb -(_dcag .Ury -_dcag ._ggcf )};
func (_fcdc *textTable )isExportable ()bool {if _fcdc ._gbaca {return true ;};_afcf :=func (_cegd int )bool {_cade :=_fcdc .get (0,_cegd );if _cade ==nil {return false ;};_addcg :=_cade .text ();_bbaf :=_bb .RuneCountInString (_addcg );_cbab :=_gdea .MatchString (_addcg );
return _bbaf <=1||_cbab ;};for _cfbg :=0;_cfbg < _fcdc ._dccb ;_cfbg ++{if !_afcf (_cfbg ){return true ;};};return false ;};func (_ebbe *subpath )isQuadrilateral ()bool {if len (_ebbe ._gbee )< 4||len (_ebbe ._gbee )> 5{return false ;};if len (_ebbe ._gbee )==5{_dfgd :=_ebbe ._gbee [0];
_dffef :=_ebbe ._gbee [4];if _dfgd .X !=_dffef .X ||_dfgd .Y !=_dffef .Y {return false ;};};return true ;};func _effb (_baad ,_aebe _gfa .Point )bool {_acgb :=_gf .Abs (_baad .X -_aebe .X );_ffegf :=_gf .Abs (_baad .Y -_aebe .Y );return _becae (_ffegf ,_acgb );
};const _cad =1.0/1000.0;
// String returns a description of `v`.
func (_bbggb *ruling )String ()string {if _bbggb ._ccfa ==_fgcbb {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_egbc ,_fdgac :="\u0078","\u0079";if _bbggb ._ccfa ==_eeca {_egbc ,_fdgac ="\u0079","\u0078";};_bcfg :="";if _bbggb ._fdaf !=0.0{_bcfg =_gfc .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_bbggb ._fdaf );
};return _gfc .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_bbggb ._ccfa ,_egbc ,_bbggb ._eacd ,_fdgac ,_bbggb ._dgbc ,_bbggb ._cgcfd ,_bbggb ._cgcfd -_bbggb ._dgbc ,_bbggb ._bdcf ,_bbggb .Color ,_bcfg );
};func _acea (_dddb []TextMark ,_debf *int ,_bggg string )[]TextMark {_adbc :=_edbge ;_adbc .Text =_bggg ;return _fagda (_dddb ,_debf ,_adbc );};func (_accd *wordBag )empty (_gfcea int )bool {_ ,_dffg :=_accd ._gbec [_gfcea ];return !_dffg };func (_fabb rulingList )sortStrict (){_c .Slice (_fabb ,func (_ggbdb ,_decf int )bool {_fcffg ,_ccgf :=_fabb [_ggbdb ],_fabb [_decf ];
_bdgd ,_gacg :=_fcffg ._ccfa ,_ccgf ._ccfa ;if _bdgd !=_gacg {return _bdgd > _gacg ;};_cbfgf ,_gfegg :=_fcffg ._eacd ,_ccgf ._eacd ;if !_bfab (_cbfgf -_gfegg ){return _cbfgf < _gfegg ;};_cbfgf ,_gfegg =_fcffg ._dgbc ,_ccgf ._dgbc ;if _cbfgf !=_gfegg {return _cbfgf < _gfegg ;
};return _fcffg ._cgcfd < _ccgf ._cgcfd ;});};func _facg (_egdd ,_fagg int )int {if _egdd > _fagg {return _egdd ;};return _fagg ;};
// String returns a description of `tm`.
func (_ecab *textMark )String ()string {return _gfc .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_ecab .PdfRectangle ,_ecab ._gbae ,_ecab ._cgge );};func (_fgdd rulingList )tidied (_bebab string )rulingList {_bgeba :=_fgdd .removeDuplicates ();
_bgeba .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");_gbeab :=_bgeba .snapToGroups ();if _gbeab ==nil {return nil ;};_gbeab .sort ();if _bcbc {_fgc .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_bebab ,len (_fgdd ),len (_bgeba ),len (_gbeab ));
};_gbeab .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _gbeab ;};
// String returns a description of `p`.
func (_bffge *textPara )String ()string {if _bffge ._eecg {return _gfc .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_bffge .PdfRectangle );};_fbea :="";if _bffge ._gbfb !=nil {_fbea =_gfc .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_bffge ._gbfb ._caea ,_bffge ._gbfb ._dccb );
};return _gfc .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_bffge .PdfRectangle ,_fbea ,len (_bffge ._dbed ),_fbfea (_bffge .text (),50));};
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
func (_cggc PageText )ToText ()string {return _cggc .Text ()};func (_bfd *textObject )reset (){_bfd ._adb =_gfa .IdentityMatrix ();_bfd ._eca =_gfa .IdentityMatrix ();_bfd ._cebe =nil ;};func (_gaacd gridTiling )complete ()bool {for _ ,_cggd :=range _gaacd ._dddg {for _ ,_eead :=range _cggd {if !_eead .complete (){return false ;
};};};return true ;};func _gffd (_edga ,_dadd bounded )float64 {return _edga .bbox ().Llx -_dadd .bbox ().Llx };func _feca (_feabb []compositeCell )[]float64 {var _eede []*textLine ;_bdacd :=0;for _ ,_cbbc :=range _feabb {_bdacd +=len (_cbbc .paraList );
_eede =append (_eede ,_cbbc .lines ()...);};_c .Slice (_eede ,func (_cfdf ,_ecba int )bool {_beeggg ,_aacd :=_eede [_cfdf ],_eede [_ecba ];_adea ,_bffd :=_beeggg ._fdga ,_aacd ._fdga ;if !_bfab (_adea -_bffd ){return _adea < _bffd ;};return _beeggg .Llx < _aacd .Llx ;
});if _bdfb {_gfc .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_bdacd ,len (_eede ));for _gfabc ,_fdag :=range _eede {_gfc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_gfabc ,_fdag );
};};var _gaaa []float64 ;_agcgg :=_eede [0];var _bcgg [][]*textLine ;_dfbff :=[]*textLine {_agcgg };for _ggfec ,_eggc :=range _eede [1:]{if _eggc .Ury < _agcgg .Lly {_cffbb :=0.5*(_eggc .Ury +_agcgg .Lly );if _bdfb {_gfc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_ggfec ,_eggc .Ury ,_agcgg .Lly ,_cffbb ,_agcgg ,_eggc );
};_gaaa =append (_gaaa ,_cffbb );_bcgg =append (_bcgg ,_dfbff );_dfbff =nil ;};_dfbff =append (_dfbff ,_eggc );if _eggc .Lly < _agcgg .Lly {_agcgg =_eggc ;};};if len (_dfbff )> 0{_bcgg =append (_bcgg ,_dfbff );};if _bdfb {_gfc .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_gaaa );
};if _bdfb {_fgc .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len (_feabb ));for _gcfef ,_defg :=range _feabb {_gfc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gcfef ,_defg );};_fgc .Log .Info ("\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d",len (_bcgg ));
for _bagbf ,_dgfgc :=range _bcgg {_gfc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a",_bagbf ,len (_dgfgc ));for _ffab ,_bafee :=range _dgfgc {_gfc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_ffab ,_bafee );};};};_aggg :=true ;
for _ggaf ,_gaaf :=range _bcgg {_gddfd :=true ;for _ggcee ,_eafd :=range _feabb {if _bdfb {_gfc .Printf ("\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a",_ggaf ,len (_bcgg ),_ggcee ,len (_feabb ),_eafd );
};if !_eafd .hasLines (_gaaf ){if _bdfb {_gfc .Printf ("\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a",_ggaf ,len (_bcgg ),_ggcee ,len (_feabb ));
};_gddfd =false ;break ;};};if !_gddfd {_aggg =false ;break ;};};if !_aggg {if _bdfb {_fgc .Log .Info ("\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg");
};_gaaa =nil ;};if _bdfb &&_gaaa !=nil {_gfc .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a",_gaaa );};return _gaaa ;};
func (_gbe *textObject )setTextRise (_aaba float64 ){if _gbe ==nil {return ;};_gbe ._eada ._gdf =_aaba ;};func _afbae (_ffac []pathSection )rulingList {_ggacf (_ffac );if _bcbc {_fgc .Log .Info ("\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs",len (_ffac ));
};var _gggdb rulingList ;for _ ,_dbcb :=range _ffac {for _ ,_cdbe :=range _dbcb ._dfaa {if !_cdbe .isQuadrilateral (){if _bcbc {_fgc .Log .Error ("!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073",_cdbe );
};continue ;};if _acdb ,_dceg :=_cdbe .makeRectRuling (_dbcb .Color );_dceg {_gggdb =append (_gggdb ,_acdb );}else {if _fcce {_fgc .Log .Error ("\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073",_cdbe );
};};};};if _bcbc {_fgc .Log .Info ("\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073",_gggdb .String ());};return _gggdb ;};
// Tables returns the tables extracted from the page.
func (_dadb PageText )Tables ()[]TextTable {if _bdfb {_fgc .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_dadb ._dbee ));};return _dadb ._dbee ;};func (_geg *textObject )moveTextSetLeading (_gfac ,_gcd float64 ){_geg ._eada ._cac =-_gcd ;
_geg .moveLP (_gfac ,_gcd );};func _agea (_cbeaag ,_begf _bbe .PdfRectangle )(_bbe .PdfRectangle ,bool ){if !_eda (_cbeaag ,_begf ){return _bbe .PdfRectangle {},false ;};return _bbe .PdfRectangle {Llx :_gf .Max (_cbeaag .Llx ,_begf .Llx ),Urx :_gf .Min (_cbeaag .Urx ,_begf .Urx ),Lly :_gf .Max (_cbeaag .Lly ,_begf .Lly ),Ury :_gf .Min (_cbeaag .Ury ,_begf .Ury )},true ;
};const (_bcfb =false ;_dega =false ;_egab =false ;_edbd =false ;_egf =false ;_bafe =false ;_cegg =false ;_fcde =false ;_acd =false ;_cgfc =_acd &&true ;_dgfd =_cgfc &&false ;_ddff =_acd &&true ;_bdfb =false ;_agba =_bdfb &&false ;_gdbb =_bdfb &&true ;
_bcbc =false ;_cgdfea =_bcbc &&false ;_ggfe =_bcbc &&false ;_eedf =_bcbc &&true ;_fcce =_bcbc &&false ;_eeeb =_bcbc &&false ;);type textState struct{_cfa float64 ;_bea float64 ;_fbd float64 ;_cac float64 ;_gbaf float64 ;_fgec RenderMode ;_gdf float64 ;
_ebfb *_bbe .PdfFont ;_dda _bbe .PdfRectangle ;_ccb int ;_eacg int ;};
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
// Replace with a function like Extract() (*PageText, error)
func (_bad *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_bac ,_egg ,_becf ,_ebd :=_bad .extractPageText (_bad ._ddf ,_bad ._da ,_gfa .IdentityMatrix (),0);if _ebd !=nil {return nil ,0,0,_ebd ;};_bac .computeViews ();_ebd =_egaf (_bac );if _ebd !=nil {return nil ,0,0,_ebd ;
};return _bac ,_egg ,_becf ,nil ;};func (_faffc *textObject )renderText (_ddde []byte )error {if _faffc ._fga {_fgc .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");
return nil ;};_cgdf :=_faffc .getCurrentFont ();_fbcg :=_cgdf .BytesToCharcodes (_ddde );_bdce ,_dadc ,_ddbg :=_cgdf .CharcodesToStrings (_fbcg );if _ddbg > 0{_fgc .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_dadc ,_ddbg );
};_faffc ._eada ._ccb +=_dadc ;_faffc ._eada ._eacg +=_ddbg ;_ada :=_faffc ._eada ;_dcg :=_ada ._gbaf ;_bbee :=_ada ._fbd /100.0;_acaf :=_cad ;if _cgdf .Subtype ()=="\u0054\u0079\u0070e\u0033"{_acaf =1;};_bfef ,_fggb :=_cgdf .GetRuneMetrics (' ');if !_fggb {_bfef ,_fggb =_cgdf .GetCharMetrics (32);
};if !_fggb {_bfef ,_ =_bbe .DefaultFont ().GetRuneMetrics (' ');};_fce :=_bfef .Wx *_acaf ;_fgc .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_fce ,_bdce ,_cgdf ,_dcg );
_fcc :=_gfa .NewMatrix (_dcg *_bbee ,0,0,_dcg ,0,_ada ._gdf );if _bafe {_fgc .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_fbcg ),_fbcg ,_bdce );
};_fgc .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_fbcg ),_fbcg ,len (_bdce ));_eed :=_faffc .getFillColor ();
_cbea :=_faffc .getStrokeColor ();for _ecee ,_egeg :=range _bdce {_eeee :=[]rune (_egeg );if len (_eeee )==1&&_eeee [0]=='\x00'{continue ;};_fdf :=_fbcg [_ecee ];_cde :=_faffc ._fag .CTM .Mult (_faffc ._adb ).Mult (_fcc );_efeea :=0.0;if len (_eeee )==1&&_eeee [0]==32{_efeea =_ada ._bea ;
};_adbb ,_ddbf :=_cgdf .GetCharMetrics (_fdf );if !_ddbf {_fgc .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_fdf ,_eeee ,_eeee ,_cgdf );
return _gfc .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_cgdf .String (),_fdf );};_acb :=_gfa .Point {X :_adbb .Wx *_acaf ,Y :_adbb .Wy *_acaf };
_efd :=_gfa .Point {X :(_acb .X *_dcg +_efeea )*_bbee };_fff :=_gfa .Point {X :(_acb .X *_dcg +_ada ._cfa +_efeea )*_bbee };if _bafe {_fgc .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_dcg ,_ada ._cfa ,_ada ._bea ,_bbee );
_fgc .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_acb ,_efd ,_fff );};_ffc :=_bggd (_efd );_cga :=_bggd (_fff );_degd :=_faffc ._fag .CTM .Mult (_faffc ._adb ).Mult (_ffc );
if _edbd {_fgc .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_faffc ._fag .CTM ,_faffc ._adb ,_cga ,_edbg (_faffc ._fag .CTM .Mult (_faffc ._adb ).Mult (_cga )),_ffc ,_degd ,_edbg (_degd ));
};_bbg ,_ddgc :=_faffc .newTextMark (_dd .ExpandLigatures (_eeee ),_cde ,_edbg (_degd ),_gf .Abs (_fce *_cde .ScalingFactorX ()),_cgdf ,_faffc ._eada ._cfa ,_eed ,_cbea );if !_ddgc {_fgc .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067");
continue ;};if _cgdf ==nil {_fgc .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _cgdf .Encoder ()==nil {_fgc .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_cgdf );
}else {if _cfb ,_bgga :=_cgdf .Encoder ().CharcodeToRune (_fdf );_bgga {_bbg ._aabb =string (_cfb );};};_fgc .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_ecee ,_fdf ,_bbg ,_cde );
_faffc ._cebe =append (_faffc ._cebe ,&_bbg );_faffc ._adb .Concat (_cga );};return nil ;};type imageExtractContext struct{_aad []ImageMark ;_ce int ;_fe int ;_ba int ;_feb map[*_e .PdfObjectStream ]*cachedImage ;_fde *ImageExtractOptions ;};
// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents (contents string ,resources *_bbe .PdfPageResources )(*Extractor ,error ){const _gc ="\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s";_bff :=&Extractor {_ddf :contents ,_da :resources ,_ea :map[string ]fontEntry {},_ee :map[string ]textResult {}};
_fge .TrackUse (_gc );return _bff ,nil ;};func (_cdba *textPara )isAtom ()*textTable {_dgece :=_cdba ;_gdad :=_cdba ._gadgg ;_daga :=_cdba ._cedg ;if !(_gdad !=nil &&!_gdad ._eeeg &&_daga !=nil &&!_daga ._eeeg ){return nil ;};_agcc :=_gdad ._cedg ;if !(_agcc !=nil &&!_agcc ._eeeg &&_agcc ==_daga ._gadgg ){return nil ;
};return _cbgb (_dgece ,_gdad ,_daga ,_agcc );};type textMark struct{_bbe .PdfRectangle ;_degdg int ;_cgge string ;_aabb string ;_fabf *_bbe .PdfFont ;_gbae float64 ;_gfabg float64 ;_gbfe _gfa .Matrix ;_gcfc _gfa .Point ;_acfa _bbe .PdfRectangle ;_dafb _a .Color ;
_aada _a .Color ;};type event struct{_fcead float64 ;_bege bool ;_decb int ;};type lineRuling struct{_fgecf rulingKind ;_addd markKind ;_a .Color ;_adbgb ,_gagb _gfa .Point ;};var _edbge =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_a .White ,StrokeColor :_a .White };
type gridTile struct{_bbe .PdfRectangle ;_bacb ,_bcbb ,_ebdg ,_ddgab bool ;};func (_dccc *shapesState )lastpointEstablished ()(_gfa .Point ,bool ){if _dccc ._cea {return _dccc ._dgfbe ,false ;};_bbf :=len (_dccc ._gbag );if _bbf > 0&&_dccc ._gbag [_bbf -1]._aef {return _dccc ._gbag [_bbf -1].last (),false ;
};return _gfa .Point {},true ;};func (_adebe *textPara )writeText (_gfaea _f .Writer ){if _adebe ._gbfb ==nil {_adebe .writeCellText (_gfaea );return ;};for _cfed :=0;_cfed < _adebe ._gbfb ._dccb ;_cfed ++{for _dgcc :=0;_dgcc < _adebe ._gbfb ._caea ;_dgcc ++{_bbef :=_adebe ._gbfb .get (_dgcc ,_cfed );
if _bbef ==nil {_gfaea .Write ([]byte ("\u0009"));}else {_bbef .writeCellText (_gfaea );};_gfaea .Write ([]byte ("\u0020"));};if _cfed < _adebe ._gbfb ._dccb -1{_gfaea .Write ([]byte ("\u000a"));};};};func (_bedc rulingList )bbox ()_bbe .PdfRectangle {var _feed _bbe .PdfRectangle ;
if len (_bedc )==0{_fgc .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073");return _bbe .PdfRectangle {};};if _bedc [0]._ccfa ==_eeca {_feed .Llx ,_feed .Urx =_bedc .secMinMax ();
_feed .Lly ,_feed .Ury =_bedc .primMinMax ();}else {_feed .Llx ,_feed .Urx =_bedc .primMinMax ();_feed .Lly ,_feed .Ury =_bedc .secMinMax ();};return _feed ;};func (_gecfa compositeCell )String ()string {_cacdb :="";if len (_gecfa .paraList )> 0{_cacdb =_fbfea (_gecfa .paraList .merge ().text (),50);
};return _gfc .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_gecfa .PdfRectangle ,len (_gecfa .paraList ),_cacdb );};func (_cgbf *wordBag )pullWord (_ebbf *textWord ,_acbfd int ,_ggfc map[int ]map[*textWord ]struct{}){_cgbf .PdfRectangle =_eacc (_cgbf .PdfRectangle ,_ebbf .PdfRectangle );
if _ebbf ._gddg > _cgbf ._ggcf {_cgbf ._ggcf =_ebbf ._gddg ;};_cgbf ._gbec [_acbfd ]=append (_cgbf ._gbec [_acbfd ],_ebbf );_ggfc [_acbfd ][_ebbf ]=struct{}{};};func (_beca *PageText )computeViews (){var _gab rulingList ;if _fgebb {_dfb :=_adgg (_beca ._fcf );
_gab =append (_gab ,_dfb ...);};if _gaa {_aae :=_afbae (_beca ._daag );_gab =append (_gab ,_aae ...);};_gab ,_ecae :=_gab .toTilings ();var _ggfa paraList ;_addg :=len (_beca ._cgb );for _gdd :=0;_gdd < 360&&_addg > 0;_gdd +=90{_dabad :=make ([]*textMark ,0,len (_beca ._cgb )-_addg );
for _ ,_gdbf :=range _beca ._cgb {if _gdbf ._degdg ==_gdd {_dabad =append (_dabad ,_gdbf );};};if len (_dabad )> 0{_fae :=_fgcb (_dabad ,_beca ._fbg ,_gab ,_ecae );_ggfa =append (_ggfa ,_fae ...);_addg -=len (_dabad );};};_cgeb :=new (_d .Buffer );_ggfa .writeText (_cgeb );
_beca ._fca =_cgeb .String ();_beca ._gffb =_ggfa .toTextMarks ();_beca ._dbee =_ggfa .tables ();if _bdfb {_fgc .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_beca ._dbee ));
};};func (_fbfc *wordBag )getDepthIdx (_cccc float64 )int {_dddee :=_fbfc .depthIndexes ();_ecc :=_fbgb (_cccc );if _ecc < _dddee [0]{return _dddee [0];};if _ecc > _dddee [len (_dddee )-1]{return _dddee [len (_dddee )-1];};return _ecc ;};func (_afdaea paraList )findTables (_dgcbce []gridTiling )[]*textTable {_afdaea .addNeighbours ();
_c .Slice (_afdaea ,func (_bbbdd ,_fcbc int )bool {return _agbe (_afdaea [_bbbdd ],_afdaea [_fcbc ])< 0});var _bbaeb []*textTable ;if _dffe {_cecba :=_afdaea .findGridTables (_dgcbce );_bbaeb =append (_bbaeb ,_cecba ...);};if _fgdb {_eaeb :=_afdaea .findTextTables ();
_bbaeb =append (_bbaeb ,_eaeb ...);};return _bbaeb ;};func _fbfe (_deaba ,_bcdff _gfa .Point )bool {_afce :=_gf .Abs (_deaba .X -_bcdff .X );_gfega :=_gf .Abs (_deaba .Y -_bcdff .Y );return _becae (_afce ,_gfega );};func (_aafc *textWord )absorb (_cedfb *textWord ){_aafc .PdfRectangle =_eacc (_aafc .PdfRectangle ,_cedfb .PdfRectangle );
_aafc ._cgceb =append (_aafc ._cgceb ,_cedfb ._cgceb ...);};func (_bde paraList )readBefore (_fbde []int ,_ffa ,_afa int )bool {_aeaf ,_afbc :=_bde [_ffa ],_bde [_afa ];if _fcge (_aeaf ,_afbc )&&_aeaf .Lly > _afbc .Lly {return true ;};if !(_aeaf ._beeg .Urx < _afbc ._beeg .Llx ){return false ;
};_bcbe ,_ebegc :=_aeaf .Lly ,_afbc .Lly ;if _bcbe > _ebegc {_ebegc ,_bcbe =_bcbe ,_ebegc ;};_ceea :=_gf .Max (_aeaf ._beeg .Llx ,_afbc ._beeg .Llx );_cacg :=_gf .Min (_aeaf ._beeg .Urx ,_afbc ._beeg .Urx );_dbfg :=_bde .llyRange (_fbde ,_bcbe ,_ebegc );
for _ ,_fged :=range _dbfg {if _fged ==_ffa ||_fged ==_afa {continue ;};_bbfe :=_bde [_fged ];if _bbfe ._beeg .Llx <=_cacg &&_ceea <=_bbfe ._beeg .Urx {return false ;};};return true ;};
// String returns a string describing the current state of the textState stack.
func (_gdef *stateStack )String ()string {_adg :=[]string {_gfc .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_gdef ))};for _fdg ,_dbe :=range *_gdef {_ege :="\u003c\u006e\u0069l\u003e";
if _dbe !=nil {_ege =_dbe .String ();};_adg =append (_adg ,_gfc .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_fdg ,_ege ));};return _fg .Join (_adg ,"\u000a");};func (_dabc *textTable )subdivide ()*textTable {_dabc .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");
_dadda :=_dabc .compositeRowCorridors ();_ddaaf :=_dabc .compositeColCorridors ();if _bdfb {_fgc .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_eafe (_dadda ),_eafe (_ddaaf ));
};if len (_dadda )==0||len (_ddaaf )==0{return _dabc ;};_gbcb (_dadda );_gbcb (_ddaaf );if _bdfb {_fgc .Log .Info ("\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073",_eafe (_dadda ),_eafe (_ddaaf ));
};_ffega ,_cdgdd :=_dbcf (_dabc ._dccb ,_dadda );_effd ,_cffc :=_dbcf (_dabc ._caea ,_ddaaf );_adbf :=make (map[uint64 ]*textPara ,_cffc *_cdgdd );_bbfb :=&textTable {PdfRectangle :_dabc .PdfRectangle ,_gbaca :_dabc ._gbaca ,_dccb :_cdgdd ,_caea :_cffc ,_bffe :_adbf };
if _bdfb {_fgc .Log .Info ("\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a"+"\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076",_dabc ._caea ,_dabc ._dccb ,_cffc ,_cdgdd ,_eafe (_dadda ),_eafe (_ddaaf ),_ffega ,_effd );
};for _agbc :=0;_agbc < _dabc ._dccb ;_agbc ++{_ffcda :=_ffega [_agbc ];for _fcad :=0;_fcad < _dabc ._caea ;_fcad ++{_ceeaa :=_effd [_fcad ];if _bdfb {_gfc .Printf ("\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a",_fcad ,_agbc ,_ceeaa ,_ffcda );
};_fcade ,_afdc :=_dabc ._aeffd [_aebd (_fcad ,_agbc )];if !_afdc {continue ;};_febf :=_fcade .split (_dadda [_agbc ],_ddaaf [_fcad ]);for _ffcac :=0;_ffcac < _febf ._dccb ;_ffcac ++{for _fadca :=0;_fadca < _febf ._caea ;_fadca ++{_fcea :=_febf .get (_fadca ,_ffcac );
_bbfb .put (_ceeaa +_fadca ,_ffcda +_ffcac ,_fcea );if _bdfb {_gfc .Printf ("\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_ceeaa +_fadca ,_ffcda +_ffcac ,_fcea );};};};};};return _bbfb ;};func (_dacc *textTable )emptyColumn (_efeff int )bool {for _bbcf :=0;
_bbcf < _dacc ._dccb ;_bbcf ++{_ccdf :=_dacc .get (_efeff ,_bbcf );if _ccdf !=nil &&_ccdf .text ()!=""{return false ;};};return true ;};func (_affg paraList )tables ()[]TextTable {var _bagf []TextTable ;if _bdfb {_fgc .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a");
};for _ ,_dfae :=range _affg {_debb :=_dfae ._gbfb ;if _debb !=nil &&_debb .isExportable (){_bagf =append (_bagf ,_debb .toTextTable ());};};return _bagf ;};type stateStack []*textState ;
// Marks returns the TextMark collection for a page. It represents all the text on the page.
func (_aace PageText )Marks ()*TextMarkArray {return &TextMarkArray {_fbcc :_aace ._gffb }};func (_eaa *imageExtractContext )extractFormImages (_eg *_e .PdfObjectName ,_febe _ca .GraphicsState ,_bdg *_bbe .PdfPageResources )error {_agb ,_dgf :=_bdg .GetXObjectFormByName (*_eg );
if _dgf !=nil {return _dgf ;};if _agb ==nil {return nil ;};_gcbg ,_dgf :=_agb .GetContentStream ();if _dgf !=nil {return _dgf ;};_geb :=_agb .Resources ;if _geb ==nil {_geb =_bdg ;};_dgf =_eaa .extractContentStreamImages (string (_gcbg ),_geb );if _dgf !=nil {return _dgf ;
};_eaa ._ba ++;return nil ;};type pathSection struct{_dfaa []*subpath ;_a .Color ;};var _gccf =map[markKind ]string {_gdaea :"\u0073\u0074\u0072\u006f\u006b\u0065",_ebdbd :"\u0066\u0069\u006c\u006c",_deegc :"\u0061u\u0067\u006d\u0065\u006e\u0074"};func (_cefbdf *ruling )alignsSec (_fcfg *ruling )bool {const _dbgd =_eedc +1.0;
return _cefbdf ._dgbc -_dbgd <=_fcfg ._cgcfd &&_fcfg ._dgbc -_dbgd <=_cefbdf ._cgcfd ;};func (_abde *shapesState )newSubPath (){_abde .clearPath ();if _egf {_fgc .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_abde );
};};func (_gedg rulingList )asTiling ()gridTiling {if _eedf {_fgc .Log .Info ("r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_gedg ));
};for _cgcde ,_aaed :=range _gedg [1:]{_cfdd :=_gedg [_cgcde ];if _cfdd .alignsPrimary (_aaed )&&_cfdd .alignsSec (_aaed ){_fgc .Log .Error ("a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073",_aaed ,_cfdd );
};};_gedg .sortStrict ();_gedg .log ("\u0073n\u0061\u0070\u0070\u0065\u0064");_bbgb ,_gbfa :=_gedg .vertsHorzs ();_eceb :=_bbgb .primaries ();_fdae :=_gbfa .primaries ();_aaea :=len (_eceb )-1;_gdace :=len (_fdae )-1;if _aaea ==0||_gdace ==0{return gridTiling {};
};_bbefe :=_bbe .PdfRectangle {Llx :_eceb [0],Urx :_eceb [_aaea ],Lly :_fdae [0],Ury :_fdae [_gdace ]};if _eedf {_fgc .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064",len (_bbgb ));
for _edecb ,_bfde :=range _bbgb {_gfc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_edecb ,_bfde );};_fgc .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064",len (_gbfa ));
for _acefb ,_dgcb :=range _gbfa {_gfc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_acefb ,_dgcb );};_fgc .Log .Info ("\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f",_aaea ,_gdace ,_eceb ,_fdae );
};_dgff :=make ([]gridTile ,_aaea *_gdace );for _daea :=_gdace -1;_daea >=0;_daea --{_adgc :=_fdae [_daea ];_fcded :=_fdae [_daea +1];for _eedfd :=0;_eedfd < _aaea ;_eedfd ++{_cbde :=_eceb [_eedfd ];_fbaad :=_eceb [_eedfd +1];_decc :=_bbgb .findPrimSec (_cbde ,_adgc );
_debfe :=_bbgb .findPrimSec (_fbaad ,_adgc );_edaf :=_gbfa .findPrimSec (_adgc ,_cbde );_gfgcd :=_gbfa .findPrimSec (_fcded ,_cbde );_ecfd :=_bbe .PdfRectangle {Llx :_cbde ,Urx :_fbaad ,Lly :_adgc ,Ury :_fcded };_effg :=_cgag (_ecfd ,_decc ,_debfe ,_edaf ,_gfgcd );
_dgff [_daea *_aaea +_eedfd ]=_effg ;if _eedf {_gfc .Printf ("\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_eedfd ,_daea ,_effg .String (),_effg .Width (),_effg .Height ());
};};};if _eedf {_fgc .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_bbefe );
};_begdf :=make ([]map[float64 ]gridTile ,_gdace );for _fdbgb :=_gdace -1;_fdbgb >=0;_fdbgb --{if _eedf {_gfc .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_fdbgb );};_begdf [_fdbgb ]=make (map[float64 ]gridTile ,_aaea );for _ccbdg :=0;_ccbdg < _aaea ;
_ccbdg ++{_dbfb :=_dgff [_fdbgb *_aaea +_ccbdg ];if _eedf {_gfc .Printf ("\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ccbdg ,_dbfb );};if !_dbfb ._bcbb {continue ;};_cbcee :=_ccbdg ;for _adbba :=_ccbdg +1;!_dbfb ._ddgab &&_adbba < _aaea ;
_adbba ++{_gaea :=_dgff [_fdbgb *_aaea +_adbba ];_dbfb .Urx =_gaea .Urx ;_dbfb ._bacb =_dbfb ._bacb ||_gaea ._bacb ;_dbfb ._ebdg =_dbfb ._ebdg ||_gaea ._ebdg ;_dbfb ._ddgab =_gaea ._ddgab ;if _eedf {_gfc .Printf ("\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a",_adbba ,_gaea ,_dbfb );
};_cbcee =_adbba ;};if _eedf {_gfc .Printf (" \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n",_ccbdg ,_cbcee ,_dbfb );};_ccbdg =_cbcee ;_begdf [_fdbgb ][_dbfb .Llx ]=_dbfb ;};};_bgbe :=make (map[float64 ]map[float64 ]gridTile ,_gdace );
_facfc :=make (map[float64 ]map[float64 ]struct{},_gdace );for _acbg :=_gdace -1;_acbg >=0;_acbg --{_gdge :=_dgff [_acbg *_aaea ].Lly ;_bgbe [_gdge ]=make (map[float64 ]gridTile ,_aaea );_facfc [_gdge ]=make (map[float64 ]struct{},_aaea );};if _eedf {_fgc .Log .Info ("\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_bbefe );
};for _fbada :=_gdace -1;_fbada >=0;_fbada --{_afdb :=_dgff [_fbada *_aaea ].Lly ;_gbgbd :=_begdf [_fbada ];if _eedf {_gfc .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_fbada );};for _ ,_abebg :=range _efag (_gbgbd ){if _ ,_cafbb :=_facfc [_afdb ][_abebg ];
_cafbb {continue ;};_egaef :=_gbgbd [_abebg ];if _eedf {_gfc .Printf (" \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_egaef .String ());};for _dgcbc :=_fbada -1;_dgcbc >=0;_dgcbc --{if _egaef ._ebdg {break ;};_eafc :=_begdf [_dgcbc ];_gbcf ,_beaf :=_eafc [_abebg ];
if !_beaf {break ;};if _gbcf .Urx !=_egaef .Urx {break ;};_egaef ._ebdg =_gbcf ._ebdg ;_egaef .Lly =_gbcf .Lly ;if _eedf {_gfc .Printf ("\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_gbcf .String (),_egaef .String ());
};_facfc [_gbcf .Lly ][_gbcf .Llx ]=struct{}{};};if _fbada ==0{_egaef ._ebdg =true ;};if _egaef .complete (){_bgbe [_afdb ][_abebg ]=_egaef ;};};};_afdd :=gridTiling {PdfRectangle :_bbefe ,_fcgfg :_acde (_bgbe ),_ccge :_bdgde (_bgbe ),_dddg :_bgbe };_afdd .log ("\u0043r\u0065\u0061\u0074\u0065\u0064");
return _afdd ;};func (_dcedf *textTable )putComposite (_aaggg ,_abfaa int ,_dcbd paraList ,_agfg _bbe .PdfRectangle ){if len (_dcbd )==0{_fgc .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073");
return ;};_eegb :=compositeCell {_agfg ,_dcbd };if _bdfb {_gfc .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_aaggg ,_abfaa ,_eegb .String ());
};_eegb .updateBBox ();_dcedf ._aeffd [_aebd (_aaggg ,_abfaa )]=_eegb ;};func (_gfgg *textWord )toTextMarks (_bdgbd *int )[]TextMark {var _fcdga []TextMark ;for _ ,_ffgc :=range _gfgg ._cgceb {_fcdga =_fagda (_fcdga ,_bdgbd ,_ffgc .ToTextMark ());};return _fcdga ;
};func _cbdf (_afcgd _gfa .Point )*subpath {return &subpath {_gbee :[]_gfa .Point {_afcgd }}};const (_becd =true ;_ggcc =true ;_eaad =true ;_adeb =false ;_gabg =false ;_ebfc =6;_fbffc =3.0;_ggfeg =200;_dffe =true ;_fgdb =true ;_fgebb =true ;_gaa =true ;
_dfe =false ;);func (_baagg gridTiling )log (_ceae string ){if !_eedf {return ;};_fgc .Log .Info ("\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071",len (_baagg ._fcgfg ),len (_baagg ._ccge ),_ceae );_gfc .Printf ("\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a",_baagg ._fcgfg );
_gfc .Printf ("\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a",_baagg ._ccge );for _acaa ,_gfec :=range _baagg ._ccge {_abcc ,_geeg :=_baagg ._dddg [_gfec ];if !_geeg {continue ;};_gfc .Printf ("%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_acaa ,_gfec );
for _bbac ,_beggc :=range _baagg ._fcgfg {_acae ,_ddbfg :=_abcc [_beggc ];if !_ddbfg {continue ;};_gfc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_bbac ,_acae .String ());};};};func (_egegb paraList )llyOrdering ()[]int {_dgfe :=make ([]int ,len (_egegb ));
for _fggfc :=range _egegb {_dgfe [_fggfc ]=_fggfc ;};_c .SliceStable (_dgfe ,func (_gfea ,_gccd int )bool {_acfg ,_bcaf :=_dgfe [_gfea ],_dgfe [_gccd ];return _egegb [_acfg ].Lly < _egegb [_bcaf ].Lly ;});return _dgfe ;};func (_caef rulingList )primaries ()[]float64 {_fcgd :=make (map[float64 ]struct{},len (_caef ));
for _ ,_ccfc :=range _caef {_fcgd [_ccfc ._eacd ]=struct{}{};};_efbgg :=make ([]float64 ,len (_fcgd ));_bdeb :=0;for _gefc :=range _fcgd {_efbgg [_bdeb ]=_gefc ;_bdeb ++;};_c .Float64s (_efbgg );return _efbgg ;};func (_bgbff *textLine )bbox ()_bbe .PdfRectangle {return _bgbff .PdfRectangle };
func (_afdf rulingList )blocks (_feab ,_gedba *ruling )bool {if _feab ._dgbc > _gedba ._cgcfd ||_gedba ._dgbc > _feab ._cgcfd {return false ;};_dfbg :=_gf .Max (_feab ._dgbc ,_gedba ._dgbc );_acbb :=_gf .Min (_feab ._cgcfd ,_gedba ._cgcfd );if _feab ._eacd > _gedba ._eacd {_feab ,_gedba =_gedba ,_feab ;
};for _ ,_gbdg :=range _afdf {if _feab ._eacd <=_gbdg ._eacd +_eedc &&_gbdg ._eacd <=_gedba ._eacd +_eedc &&_gbdg ._dgbc <=_acbb &&_dfbg <=_gbdg ._cgcfd {return true ;};};return false ;};func (_fbff *subpath )last ()_gfa .Point {return _fbff ._gbee [len (_fbff ._gbee )-1]};
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};func (_aacag paraList )computeEBBoxes (){if _bcfb {_fgc .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a");};for _ ,_cddg :=range _aacag {_cddg ._beeg =_cddg .PdfRectangle ;
};_beff :=_aacag .yNeighbours (0);for _gbagd ,_dbec :=range _aacag {_dcef :=_dbec ._beeg ;_bbab ,_gbea :=-1.0e9,+1.0e9;for _ ,_feec :=range _beff [_dbec ]{_gfee :=_aacag [_feec ]._beeg ;if _gfee .Urx < _dcef .Llx {_bbab =_gf .Max (_bbab ,_gfee .Urx );}else if _dcef .Urx < _gfee .Llx {_gbea =_gf .Min (_gbea ,_gfee .Llx );
};};for _geea ,_cfea :=range _aacag {_cddc :=_cfea ._beeg ;if _gbagd ==_geea ||_cddc .Ury > _dcef .Lly {continue ;};if _bbab <=_cddc .Llx &&_cddc .Llx < _dcef .Llx {_dcef .Llx =_cddc .Llx ;}else if _cddc .Urx <=_gbea &&_dcef .Urx < _cddc .Urx {_dcef .Urx =_cddc .Urx ;
};};if _bcfb {_gfc .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_gbagd ,_dbec ._beeg ,_dcef ,_fbfea (_dbec .text (),50));};_dbec ._beeg =_dcef ;};if _adeb {for _ ,_edae :=range _aacag {_edae .PdfRectangle =_edae ._beeg ;
};};};func _gabfc (_dbdf _bbe .PdfRectangle )*ruling {return &ruling {_ccfa :_eeca ,_eacd :_dbdf .Ury ,_dgbc :_dbdf .Llx ,_cgcfd :_dbdf .Urx };};func (_caffa paraList )applyTables (_bgaee []*textTable )paraList {var _cbbe paraList ;for _ ,_bacbf :=range _bgaee {_cbbe =append (_cbbe ,_bacbf .newTablePara ());
};for _ ,_cegcd :=range _caffa {if _cegcd ._eeeg {continue ;};_cbbe =append (_cbbe ,_cegcd );};return _cbbe ;};func (_gggd *textObject )getFont (_abg string )(*_bbe .PdfFont ,error ){if _gggd ._ebba ._ea !=nil {_gggd ._ebba ._ae ++;_ede ,_feae :=_gggd ._ebba ._ea [_abg ];
if _feae {_ede ._fdca =_gggd ._ebba ._ae ;return _ede ._gaec ,nil ;};};_fadf ,_aegg :=_gggd .getFontDirect (_abg );if _aegg !=nil {return nil ,_aegg ;};if _gggd ._ebba ._ea !=nil {_egeb :=fontEntry {_fadf ,_gggd ._ebba ._ae };if len (_gggd ._ebba ._ea )>=_ggc {var _bfc []string ;
for _dbb :=range _gggd ._ebba ._ea {_bfc =append (_bfc ,_dbb );};_c .Slice (_bfc ,func (_cgdfe ,_bdcg int )bool {return _gggd ._ebba ._ea [_bfc [_cgdfe ]]._fdca < _gggd ._ebba ._ea [_bfc [_bdcg ]]._fdca ;});delete (_gggd ._ebba ._ea ,_bfc [0]);};_gggd ._ebba ._ea [_abg ]=_egeb ;
};return _fadf ,nil ;};func (_cfg *textLine )text ()string {var _bbgg []string ;for _ ,_eebe :=range _cfg ._gfbc {if _eebe ._aaad {_bbgg =append (_bbgg ,"\u0020");};_bbgg =append (_bbgg ,_eebe ._fadea );};return _fg .Join (_bbgg ,"");};func _caeg (_ceab ,_aceg _gfa .Point )rulingKind {_edgg :=_gf .Abs (_ceab .X -_aceg .X );
_becc :=_gf .Abs (_ceab .Y -_aceg .Y );return _eeaff (_edgg ,_becc ,_ffec );};func (_cffb *textTable )markCells (){for _fccb :=0;_fccb < _cffb ._dccb ;_fccb ++{for _agdg :=0;_agdg < _cffb ._caea ;_agdg ++{_deeac :=_cffb .get (_agdg ,_fccb );if _deeac !=nil {_deeac ._eeeg =true ;
};};};};
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
func (_fdab *TextMarkArray )BBox ()(_bbe .PdfRectangle ,bool ){var _cgee _bbe .PdfRectangle ;_dgfb :=false ;for _ ,_fefd :=range _fdab ._fbcc {if _fefd .Meta ||_bedg (_fefd .Text ){continue ;};if _dgfb {_cgee =_eacc (_cgee ,_fefd .BBox );}else {_cgee =_fefd .BBox ;
_dgfb =true ;};};return _cgee ,_dgfb ;};