unipdf/extractor/extractor.go

846 lines
186 KiB
Go
Raw Normal View History

2020-08-27 21:45:09 +00:00
//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
2020-08-27 21:45:09 +00:00
//
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
//
2022-06-06 22:48:24 +00:00
package extractor ;import (_cf "bytes";_c "errors";_be "fmt";_ff "github.com/unidoc/unipdf/v3/common";_bd "github.com/unidoc/unipdf/v3/contentstream";_ca "github.com/unidoc/unipdf/v3/core";_dg "github.com/unidoc/unipdf/v3/internal/license";_a "github.com/unidoc/unipdf/v3/internal/textencoding";
_de "github.com/unidoc/unipdf/v3/internal/transform";_ee "github.com/unidoc/unipdf/v3/model";_gf "golang.org/x/text/unicode/norm";_da "golang.org/x/xerrors";_ga "image/color";_d "io";_f "math";_cd "regexp";_ef "sort";_df "strings";_g "unicode";_e "unicode/utf8";
);
2022-03-13 12:41:53 +00:00
2022-06-06 22:48:24 +00:00
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
func (_bcg *Extractor )ExtractText ()(string ,error ){_cdb ,_ ,_ ,_fcf :=_bcg .ExtractTextWithStats ();return _cdb ,_fcf ;};func (_beca paraList )reorder (_ebfc []int ){_decbg :=make (paraList ,len (_beca ));for _dbcg ,_bgad :=range _ebfc {_decbg [_dbcg ]=_beca [_bgad ];
};copy (_beca ,_decbg );};func (_agcac rulingList )augmentGrid ()(rulingList ,rulingList ){_bccae ,_faagg :=_agcac .vertsHorzs ();if len (_bccae )==0||len (_faagg )==0{return _bccae ,_faagg ;};_gbfc ,_ddef :=_bccae ,_faagg ;_ggce :=_bccae .bbox ();_ddfgd :=_faagg .bbox ();
if _dgac {_ff .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066",_ggce );_ff .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066",_ddfgd );
};var _fbffg ,_bbege ,_edff ,_cdcgc *ruling ;if _ddfgd .Llx < _ggce .Llx -_cbfg {_fbffg =&ruling {_bggf :_ggebb ,_cgac :_fafbf ,_facf :_ddfgd .Llx ,_fgbfa :_ggce .Lly ,_ebeb :_ggce .Ury };_bccae =append (rulingList {_fbffg },_bccae ...);};if _ddfgd .Urx > _ggce .Urx +_cbfg {_bbege =&ruling {_bggf :_ggebb ,_cgac :_fafbf ,_facf :_ddfgd .Urx ,_fgbfa :_ggce .Lly ,_ebeb :_ggce .Ury };
_bccae =append (_bccae ,_bbege );};if _ggce .Lly < _ddfgd .Lly -_cbfg {_edff =&ruling {_bggf :_ggebb ,_cgac :_dfbe ,_facf :_ggce .Lly ,_fgbfa :_ddfgd .Llx ,_ebeb :_ddfgd .Urx };_faagg =append (rulingList {_edff },_faagg ...);};if _ggce .Ury > _ddfgd .Ury +_cbfg {_cdcgc =&ruling {_bggf :_ggebb ,_cgac :_dfbe ,_facf :_ggce .Ury ,_fgbfa :_ddfgd .Llx ,_ebeb :_ddfgd .Urx };
_faagg =append (_faagg ,_cdcgc );};if len (_bccae )+len (_faagg )==len (_agcac ){return _gbfc ,_ddef ;};_bdea :=append (_bccae ,_faagg ...);_agcac .log ("u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064");_bdea .log ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d");
return _bccae ,_faagg ;};func _dbgg (_dbea _ee .PdfRectangle ,_cagb ,_gfcfb ,_bebf ,_gdffa *ruling )gridTile {_ecda :=_dbea .Llx ;_cfaaa :=_dbea .Urx ;_cdae :=_dbea .Lly ;_ddbf :=_dbea .Ury ;return gridTile {PdfRectangle :_dbea ,_gdge :_cagb !=nil &&_cagb .encloses (_cdae ,_ddbf ),_geaa :_gfcfb !=nil &&_gfcfb .encloses (_cdae ,_ddbf ),_gaaf :_bebf !=nil &&_bebf .encloses (_ecda ,_cfaaa ),_efab :_gdffa !=nil &&_gdffa .encloses (_ecda ,_cfaaa )};
};func (_bbbd *wordBag )minDepth ()float64 {return _bbbd ._fec -(_bbbd .Ury -_bbbd ._adbbf )};func (_bbdd *textPara )text ()string {_bdgd :=new (_cf .Buffer );_bbdd .writeText (_bdgd );return _bdgd .String ();};func (_cecg *textObject )showText (_fee []byte )error {return _cecg .renderText (_fee )};
func _agfg (_fcge bounded )float64 {return -_fcge .bbox ().Lly };func (_fgeaf lineRuling )asRuling ()(*ruling ,bool ){_fega :=ruling {_cgac :_fgeaf ._fbga ,Color :_fgeaf .Color ,_bggf :_edeaf };switch _fgeaf ._fbga {case _fafbf :_fega ._facf =_fgeaf .xMean ();
_fega ._fgbfa =_f .Min (_fgeaf ._aagg .Y ,_fgeaf ._aafd .Y );_fega ._ebeb =_f .Max (_fgeaf ._aagg .Y ,_fgeaf ._aafd .Y );case _dfbe :_fega ._facf =_fgeaf .yMean ();_fega ._fgbfa =_f .Min (_fgeaf ._aagg .X ,_fgeaf ._aafd .X );_fega ._ebeb =_f .Max (_fgeaf ._aagg .X ,_fgeaf ._aafd .X );
default:_ff .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_fgeaf ._fbga );return nil ,false ;};return &_fega ,true ;};func (_fef *imageExtractContext )extractXObjectImage (_acd *_ca .PdfObjectName ,_ffb _bd .GraphicsState ,_gcc *_ee .PdfPageResources )error {_fbg ,_ :=_gcc .GetXObjectByName (*_acd );
if _fbg ==nil {return nil ;};_deb ,_cgga :=_fef ._aca [_fbg ];if !_cgga {_acf ,_fdg :=_gcc .GetXObjectImageByName (*_acd );if _fdg !=nil {return _fdg ;};if _acf ==nil {return nil ;};_bgg ,_fdg :=_acf .ToImage ();if _fdg !=nil {return _fdg ;};_deb =&cachedImage {_efe :_bgg ,_cda :_acf .ColorSpace };
_fef ._aca [_fbg ]=_deb ;};_edc :=_deb ._efe ;_faa :=_deb ._cda ;_ade ,_fff :=_faa .ImageToRGB (*_edc );if _fff !=nil {return _fff ;};_ff .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_ffb .CTM .String ());_ccge :=ImageMark {Image :&_ade ,Width :_ffb .CTM .ScalingFactorX (),Height :_ffb .CTM .ScalingFactorY (),Angle :_ffb .CTM .Angle ()};
_ccge .X ,_ccge .Y =_ffb .CTM .Translation ();_fef ._eee =append (_fef ._eee ,_ccge );_fef ._abef ++;return nil ;};func _gfb (_aeaa float64 )int {var _gecd int ;if _aeaa >=0{_gecd =int (_aeaa /_fdbf );}else {_gecd =int (_aeaa /_fdbf )-1;};return _gecd ;
};func _fbfd (_fgcf ,_bgbfb ,_gabac float64 )rulingKind {if _fgcf >=_gabac &&_abac (_bgbfb ,_fgcf ){return _dfbe ;};if _bgbfb >=_gabac &&_abac (_fgcf ,_bgbfb ){return _fafbf ;};return _fbdff ;};func (_dfefb *textTable )get (_faagb ,_dgdae int )*textPara {return _dfefb ._dbfba [_cgccd (_faagb ,_dgdae )];
};type imageExtractContext struct{_eee []ImageMark ;_fd int ;_abef int ;_cgg int ;_aca map[*_ca .PdfObjectStream ]*cachedImage ;_dfc *ImageExtractOptions ;};func _eabca (_aedc *wordBag ,_gbdg *textWord ,_ceeg float64 )bool {return _aedc .Urx <=_gbdg .Llx &&_gbdg .Llx < _aedc .Urx +_ceeg ;
2022-03-13 12:41:53 +00:00
};
2021-09-23 22:37:42 +00:00
2022-06-06 22:48:24 +00:00
// Tables returns the tables extracted from the page.
func (_cacc PageText )Tables ()[]TextTable {if _bcag {_ff .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_cacc ._fdacg ));};return _cacc ._fdacg ;};
2022-04-27 00:10:33 +00:00
2022-06-06 22:48:24 +00:00
// PageText represents the layout of text on a device page.
type PageText struct{_fcb []*textMark ;_daaf string ;_gdbg []TextMark ;_fdacg []TextTable ;_dda _ee .PdfRectangle ;_dccc []pathSection ;_aacb []pathSection ;};
2021-10-22 10:53:20 +00:00
2022-06-06 22:48:24 +00:00
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
// Replace with a function like Extract() (*PageText, error)
func (_fed *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_bbgdg ,_acb ,_edd ,_dbc :=_fed .extractPageText (_fed ._gc ,_fed ._ea ,_de .IdentityMatrix (),0);if _dbc !=nil &&_dbc !=_ee .ErrColorOutOfRange {return nil ,0,0,_dbc ;};_bbgdg .computeViews ();
_dbc =_effec (_bbgdg );if _dbc !=nil {return nil ,0,0,_dbc ;};return _bbgdg ,_acb ,_edd ,nil ;};
2021-10-22 10:53:20 +00:00
2022-06-06 22:48:24 +00:00
// String returns a description of `p`.
func (_fbag *textPara )String ()string {if _fbag ._gbdd {return _be .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_fbag .PdfRectangle );};_edab :="";if _fbag ._cegd !=nil {_edab =_be .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_fbag ._cegd ._aage ,_fbag ._cegd ._eabcaa );
};return _be .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_fbag .PdfRectangle ,_edab ,len (_fbag ._cecgd ),_ggece (_fbag .text (),50));};func (_cbb *textObject )setTextRenderMode (_abf int ){if _cbb ==nil {return ;
};_cbb ._ecb ._ccc =RenderMode (_abf );};func _gcag (_caccc []TextMark ,_efce *int ,_afef string )[]TextMark {_efbb :=_caea ;_efbb .Text =_afef ;return _gefg (_caccc ,_efce ,_efbb );};func (_fafb *textObject )setWordSpacing (_baf float64 ){if _fafb ==nil {return ;
};_fafb ._ecb ._bbfa =_baf ;};
2021-07-30 00:21:16 +00:00
2022-06-06 22:48:24 +00:00
// String returns a description of `state`.
func (_aac *textState )String ()string {_bba :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _aac ._fea !=nil {_bba =_aac ._fea .BaseFont ();};return _be .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_aac ._bace ,_aac ._bbfa ,_aac ._ggda ,_bba );
};func (_gdbe *shapesState )fill (_aaba *[]pathSection ){_faacf :=pathSection {_fbdc :_gdbe ._deff ,Color :_gdbe ._agbc .getFillColor ()};*_aaba =append (*_aaba ,_faacf );if _dgac {_gecb :=_faacf .bbox ();_be .Printf ("\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a",len (*_aaba ),len (_faacf ._fbdc ),_gdbe ,_faacf .Color ,_gecb ,_gecb .Width (),_gecb .Height ());
if _acg {for _eda ,_acdg :=range _faacf ._fbdc {_be .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_eda ,_acdg );if _eda ==10{break ;};};};};};func (_gbfg *textTable )depth ()float64 {_bebfd :=1e10;for _eeead :=0;_eeead < _gbfg ._aage ;_eeead ++{_gcab :=_gbfg .get (_eeead ,0);
if _gcab ==nil ||_gcab ._gbdd {continue ;};_bebfd =_f .Min (_bebfd ,_gcab .depth ());};return _bebfd ;};func _efa (_caag ,_aeab bounded )float64 {return _caag .bbox ().Llx -_aeab .bbox ().Llx };func (_gafc rectRuling )checkWidth (_bcbbg ,_baaeb float64 )(float64 ,bool ){_gfacf :=_baaeb -_bcbbg ;
_bafde :=_gfacf <=_fbfc ;return _gfacf ,_bafde ;};func (_bcgec *compositeCell )updateBBox (){for _ ,_fbbg :=range _bcgec .paraList {_bcgec .PdfRectangle =_bgcf (_bcgec .PdfRectangle ,_fbbg .PdfRectangle );};};func _efgf (_eabb string )bool {for _ ,_dada :=range _eabb {if !_g .IsSpace (_dada ){return false ;
};};return true ;};func (_afaf *textTable )toTextTable ()TextTable {if _bcag {_ff .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_afaf ._aage ,_afaf ._eabcaa );};_aecc :=make ([][]TableCell ,_afaf ._eabcaa );
for _fagf :=0;_fagf < _afaf ._eabcaa ;_fagf ++{_aecc [_fagf ]=make ([]TableCell ,_afaf ._aage );for _bdddf :=0;_bdddf < _afaf ._aage ;_bdddf ++{_eaag :=_afaf .get (_bdddf ,_fagf );if _eaag ==nil {continue ;};if _bcag {_be .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_bdddf ,_fagf ,_eaag );
};_aecc [_fagf ][_bdddf ].Text =_eaag .text ();_agdf :=0;_aecc [_fagf ][_bdddf ].Marks ._beaa =_eaag .toTextMarks (&_agdf );};};return TextTable {W :_afaf ._aage ,H :_afaf ._eabcaa ,Cells :_aecc };};
2021-12-14 01:08:28 +00:00
2022-04-27 00:10:33 +00:00
// String returns a description of `w`.
2022-06-06 22:48:24 +00:00
func (_gbga *textWord )String ()string {return _be .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_gbga ._aagef ,_gbga .PdfRectangle ,_gbga ._eedb ,_gbga ._bfdfd );
2022-04-27 00:10:33 +00:00
};
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
2022-06-06 22:48:24 +00:00
func (_eaea *TextMarkArray )BBox ()(_ee .PdfRectangle ,bool ){var _dbf _ee .PdfRectangle ;_dcfg :=false ;for _ ,_ddce :=range _eaea ._beaa {if _ddce .Meta ||_efgf (_ddce .Text ){continue ;};if _dcfg {_dbf =_bgcf (_dbf ,_ddce .BBox );}else {_dbf =_ddce .BBox ;
_dcfg =true ;};};return _dbf ,_dcfg ;};func (_ffedf rulingList )splitSec ()[]rulingList {_ef .Slice (_ffedf ,func (_dfddf ,_feefd int )bool {_adbf ,_abegg :=_ffedf [_dfddf ],_ffedf [_feefd ];if _adbf ._fgbfa !=_abegg ._fgbfa {return _adbf ._fgbfa < _abegg ._fgbfa ;
};return _adbf ._ebeb < _abegg ._ebeb ;});_acfc :=make (map[*ruling ]struct{},len (_ffedf ));_ffcea :=func (_baegb *ruling )rulingList {_ggcb :=rulingList {_baegb };_acfc [_baegb ]=struct{}{};for _ ,_ddfc :=range _ffedf {if _ ,_ffcf :=_acfc [_ddfc ];_ffcf {continue ;
};for _ ,_bfba :=range _ggcb {if _ddfc .alignsSec (_bfba ){_ggcb =append (_ggcb ,_ddfc );_acfc [_ddfc ]=struct{}{};break ;};};};return _ggcb ;};_gaae :=[]rulingList {_ffcea (_ffedf [0])};for _ ,_cfdb :=range _ffedf [1:]{if _ ,_bgdf :=_acfc [_cfdb ];_bgdf {continue ;
};_gaae =append (_gaae ,_ffcea (_cfdb ));};return _gaae ;};var _caea =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_ga .White ,StrokeColor :_ga .White };func _af (_abe []Font ,_dee string )bool {for _ ,_abeg :=range _abe {if _abeg .FontName ==_dee {return true ;
};};return false ;};func (_bbffd rulingList )aligned ()bool {if len (_bbffd )< 2{return false ;};_egeg :=make (map[*ruling ]int );_egeg [_bbffd [0]]=0;for _ ,_abcg :=range _bbffd [1:]{_cgeb :=false ;for _beeg :=range _egeg {if _abcg .gridIntersecting (_beeg ){_egeg [_beeg ]++;
_cgeb =true ;break ;};};if !_cgeb {_egeg [_abcg ]=0;};};_cfacc :=0;for _ ,_abgd :=range _egeg {if _abgd ==0{_cfacc ++;};};_cdbe :=float64 (_cfacc )/float64 (len (_bbffd ));_ccgf :=_cdbe <=1.0-_cece ;if _dgac {_ff .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_ccgf ,_cdbe ,_cfacc ,len (_bbffd ),_bbffd .String ());
};return _ccgf ;};func (_ffdb paraList )toTextMarks ()[]TextMark {_aadb :=0;var _ggbc []TextMark ;for _eabf ,_daae :=range _ffdb {if _daae ._gbdd {continue ;};_bcfac :=_daae .toTextMarks (&_aadb );_ggbc =append (_ggbc ,_bcfac ...);if _eabf !=len (_ffdb )-1{if _bdaf (_daae ,_ffdb [_eabf +1]){_ggbc =_gcag (_ggbc ,&_aadb ,"\u0020");
}else {_ggbc =_gcag (_ggbc ,&_aadb ,"\u000a");_ggbc =_gcag (_ggbc ,&_aadb ,"\u000a");};};};_ggbc =_gcag (_ggbc ,&_aadb ,"\u000a");_ggbc =_gcag (_ggbc ,&_aadb ,"\u000a");return _ggbc ;};func _ggece (_ffbf string ,_bcdd int )string {if len (_ffbf )< _bcdd {return _ffbf ;
};return _ffbf [:_bcdd ];};
2022-04-27 00:10:33 +00:00
2022-06-06 22:48:24 +00:00
// Len returns the number of TextMarks in `ma`.
func (_dcbd *TextMarkArray )Len ()int {if _dcbd ==nil {return 0;};return len (_dcbd ._beaa );};func (_ggg *textObject )getStrokeColor ()_ga .Color {return _ccfa (_ggg ._bcgf .ColorspaceStroking ,_ggg ._bcgf .ColorStroking );};func (_ege *textObject )setHorizScaling (_geea float64 ){if _ege ==nil {return ;
};_ege ._ecb ._cfa =_geea ;};func (_cbga *textMark )inDiacriticArea (_bafeb *textMark )bool {_fgbd :=_cbga .Llx -_bafeb .Llx ;_fdbba :=_cbga .Urx -_bafeb .Urx ;_adcb :=_cbga .Lly -_bafeb .Lly ;return _f .Abs (_fgbd +_fdbba )< _cbga .Width ()*_fabde &&_f .Abs (_adcb )< _cbga .Height ()*_fabde ;
};func _fcg (_dfcb _ee .PdfRectangle ,_eccde bounded )float64 {return _dfcb .Ury -_eccde .bbox ().Lly };
2022-04-27 00:10:33 +00:00
2022-06-06 22:48:24 +00:00
// Extractor stores and offers functionality for extracting content from PDF pages.
type Extractor struct{_gc string ;_ea *_ee .PdfPageResources ;_eab _ee .PdfRectangle ;_bg map[string ]fontEntry ;_bb map[string ]textResult ;_bge int64 ;_gaf int ;};func (_ebgd *wordBag )removeDuplicates (){if _ced {_ff .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_ebgd .text ());
};for _ ,_dffac :=range _ebgd .depthIndexes (){if len (_ebgd ._aceg [_dffac ])==0{continue ;};_gagg :=_ebgd ._aceg [_dffac ][0];_efcc :=_gbbc *_gagg ._eedb ;_efaef :=_gagg ._aagef ;for _ ,_defa :=range _ebgd .depthBand (_efaef ,_efaef +_efcc ){_fcgc :=map[*textWord ]struct{}{};
_adeb :=_ebgd ._aceg [_defa ];for _ ,_fbgbb :=range _adeb {if _ ,_gac :=_fcgc [_fbgbb ];_gac {continue ;};for _ ,_befb :=range _adeb {if _ ,_agec :=_fcgc [_befb ];_agec {continue ;};if _befb !=_fbgbb &&_befb ._bfdfd ==_fbgbb ._bfdfd &&_f .Abs (_befb .Llx -_fbgbb .Llx )< _efcc &&_f .Abs (_befb .Urx -_fbgbb .Urx )< _efcc &&_f .Abs (_befb .Lly -_fbgbb .Lly )< _efcc &&_f .Abs (_befb .Ury -_fbgbb .Ury )< _efcc {_fcgc [_befb ]=struct{}{};
};};};if len (_fcgc )> 0{_ebadaf :=0;for _ ,_ccec :=range _adeb {if _ ,_fagg :=_fcgc [_ccec ];!_fagg {_adeb [_ebadaf ]=_ccec ;_ebadaf ++;};};_ebgd ._aceg [_defa ]=_adeb [:len (_adeb )-len (_fcgc )];if len (_ebgd ._aceg [_defa ])==0{delete (_ebgd ._aceg ,_defa );
};};};};};func (_cdcg compositeCell )hasLines (_bgdb []*textLine )bool {for _dagff ,_egfb :=range _bgdb {_fcadg :=_gddc (_cdcg .PdfRectangle ,_egfb .PdfRectangle );if _bcag {_be .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a",_fcadg ,_dagff ,len (_bgdb ));
_be .Printf ("\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a",_cdcg );_be .Printf ("\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a",_egfb );};if _fcadg {return true ;
};};return false ;};func _dadcg (_dgcd func (*wordBag ,*textWord ,float64 )bool ,_feeea float64 )func (*wordBag ,*textWord )bool {return func (_affd *wordBag ,_dfbc *textWord )bool {return _dgcd (_affd ,_dfbc ,_feeea )};};const (_gged markKind =iota ;_edeaf ;
_cgbe ;_ggebb ;);func (_bfg *textObject )getCurrentFont ()*_ee .PdfFont {_cde :=_bfg ._ecb ._fea ;if _cde ==nil {_ff .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e");
return _ee .DefaultFont ();};return _cde ;};func _cfff (_fabdeb ,_egbg float64 )bool {return _f .Abs (_fabdeb -_egbg )<=_cbfg };const _cbf =20;func (_bfgcc gridTile )contains (_fbcfd _ee .PdfRectangle )bool {if _bfgcc .numBorders ()< 3{return false ;};
if _bfgcc ._gdge &&_fbcfd .Llx < _bfgcc .Llx -_ebcf {return false ;};if _bfgcc ._geaa &&_fbcfd .Urx > _bfgcc .Urx +_ebcf {return false ;};if _bfgcc ._gaaf &&_fbcfd .Lly < _bfgcc .Lly -_ebcf {return false ;};if _bfgcc ._efab &&_fbcfd .Ury > _bfgcc .Ury +_ebcf {return false ;
};return true ;};func (_ebgb *textObject )getFont (_bgaa string )(*_ee .PdfFont ,error ){if _ebgb ._decb ._bg !=nil {_edeae ,_fbgg :=_ebgb .getFontDict (_bgaa );if _fbgg !=nil {_ff .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073",_bgaa ,_fbgg .Error ());
return nil ,_fbgg ;};_ebgb ._decb ._bge ++;_bgcd ,_eef :=_ebgb ._decb ._bg [_edeae .String ()];if _eef {_bgcd ._eea =_ebgb ._decb ._bge ;return _bgcd ._afgf ,nil ;};};_dedb ,_cgd :=_ebgb .getFontDict (_bgaa );if _cgd !=nil {return nil ,_cgd ;};_cee ,_cgd :=_ebgb .getFontDirect (_bgaa );
if _cgd !=nil {return nil ,_cgd ;};if _ebgb ._decb ._bg !=nil {_gea :=fontEntry {_cee ,_ebgb ._decb ._bge };if len (_ebgb ._decb ._bg )>=_bda {var _adac []string ;for _agb :=range _ebgb ._decb ._bg {_adac =append (_adac ,_agb );};_ef .Slice (_adac ,func (_dgda ,_bdgbg int )bool {return _ebgb ._decb ._bg [_adac [_dgda ]]._eea < _ebgb ._decb ._bg [_adac [_bdgbg ]]._eea ;
});delete (_ebgb ._decb ._bg ,_adac [0]);};_ebgb ._decb ._bg [_dedb .String ()]=_gea ;};return _cee ,nil ;};func (_cdecb *textTable )compositeRowCorridors ()map[int ][]float64 {_fece :=make (map[int ][]float64 ,_cdecb ._eabcaa );if _bcag {_ff .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_cdecb ._eabcaa );
};for _dcdgd :=1;_dcdgd < _cdecb ._eabcaa ;_dcdgd ++{var _bbce []compositeCell ;for _fgbfag :=0;_fgbfag < _cdecb ._aage ;_fgbfag ++{if _dfee ,_cfg :=_cdecb ._ebgbb [_cgccd (_fgbfag ,_dcdgd )];_cfg {_bbce =append (_bbce ,_dfee );};};if len (_bbce )==0{continue ;
};_cebd :=_edgg (_bbce );_fece [_dcdgd ]=_cebd ;if _bcag {_be .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_dcdgd ,_cebd );};};return _fece ;};func _efcg (_gcgc *wordBag ,_gbbe *textWord ,_gcda float64 )bool {return _gbbe .Llx < _gcgc .Urx +_gcda &&_gcgc .Llx -_gcda < _gbbe .Urx ;
};const _feef =1.0/1000.0;func (_bcff paraList )extractTables (_bcbgb []gridTiling )paraList {if _bcag {_ff .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_bcff ));
};if len (_bcff )< _gfec {return _bcff ;};_efde :=_bcff .findTables (_bcbgb );if _bcag {_ff .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_efde ));
for _gebc ,_ddbe :=range _efde {_ddbe .log (_be .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_gebc ));};};return _bcff .applyTables (_efde );};
2022-04-27 00:10:33 +00:00
2022-06-06 22:48:24 +00:00
// String returns a description of `k`.
func (_gcbae rulingKind )String ()string {_dace ,_abcdg :=_bddd [_gcbae ];if !_abcdg {return _be .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_gcbae );};return _dace ;};func (_fcfb *textTable )reduceTiling (_bfad gridTiling ,_ffcef float64 )*textTable {_cdca :=make ([]int ,0,_fcfb ._eabcaa );
_bffc :=make ([]int ,0,_fcfb ._aage );_befed :=_bfad ._dgcdc ;_bceaa :=_bfad ._dgbfg ;for _fcdf :=0;_fcdf < _fcfb ._eabcaa ;_fcdf ++{_efaf :=_fcdf > 0&&_f .Abs (_bceaa [_fcdf -1]-_bceaa [_fcdf ])< _ffcef &&_fcfb .emptyCompositeRow (_fcdf );if !_efaf {_cdca =append (_cdca ,_fcdf );
};};for _cgggg :=0;_cgggg < _fcfb ._aage ;_cgggg ++{_cddfg :=_cgggg < _fcfb ._aage -1&&_f .Abs (_befed [_cgggg +1]-_befed [_cgggg ])< _ffcef &&_fcfb .emptyCompositeColumn (_cgggg );if !_cddfg {_bffc =append (_bffc ,_cgggg );};};if len (_cdca )==_fcfb ._eabcaa &&len (_bffc )==_fcfb ._aage {return _fcfb ;
};_aebfd :=textTable {_efea :_fcfb ._efea ,_aage :len (_bffc ),_eabcaa :len (_cdca ),_ebgbb :make (map[uint64 ]compositeCell ,len (_bffc )*len (_cdca ))};if _bcag {_ff .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_fcfb ._aage ,_fcfb ._eabcaa ,len (_bffc ),len (_cdca ));
_ff .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_bffc );_ff .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_cdca );};for _bbcea ,_bcfd :=range _cdca {for _ceabd ,_ccged :=range _bffc {_fgfdc ,_fdbfa :=_fcfb .getComposite (_ccged ,_bcfd );
if len (_fgfdc )==0{continue ;};if _bcag {_be .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_ceabd ,_bbcea ,_ccged ,_bcfd ,_ggece (_fgfdc .merge ().text (),50));};_aebfd .putComposite (_ceabd ,_bbcea ,_fgfdc ,_fdbfa );
};};return &_aebfd ;};func _bcfgd (_dcef ,_baced int )int {if _dcef > _baced {return _dcef ;};return _baced ;};func (_eagc *shapesState )devicePoint (_baac ,_fggdc float64 )_de .Point {_caaac :=_eagc ._gfaf .Mult (_eagc ._gcgd );_baac ,_fggdc =_caaac .Transform (_baac ,_fggdc );
return _de .NewPoint (_baac ,_fggdc );};func _ecgf (_aggbg []_ca .PdfObject )(_ecbca ,_agecc float64 ,_acfae error ){if len (_aggbg )!=2{return 0,0,_be .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_aggbg ));
};_cfcb ,_acfae :=_ca .GetNumbersAsFloat (_aggbg );if _acfae !=nil {return 0,0,_acfae ;};return _cfcb [0],_cfcb [1],nil ;};type fontEntry struct{_afgf *_ee .PdfFont ;_eea int64 ;};func (_gfgc paraList )eventNeighbours (_edeff []event )map[*textPara ][]int {_ef .Slice (_edeff ,func (_edbc ,_eeede int )bool {_dbadf ,_adbg :=_edeff [_edbc ],_edeff [_eeede ];
_acce ,_cgfd :=_dbadf ._ebgdd ,_adbg ._ebgdd ;if _acce !=_cgfd {return _acce < _cgfd ;};if _dbadf ._ddfe !=_adbg ._ddfe {return _dbadf ._ddfe ;};return _edbc < _eeede ;});_caagd :=make (map[int ]intSet );_gdgg :=make (intSet );for _ ,_gafe :=range _edeff {if _gafe ._ddfe {_caagd [_gafe ._egca ]=make (intSet );
for _baade :=range _gdgg {if _baade !=_gafe ._egca {_caagd [_gafe ._egca ].add (_baade );_caagd [_baade ].add (_gafe ._egca );};};_gdgg .add (_gafe ._egca );}else {_gdgg .del (_gafe ._egca );};};_geced :=map[*textPara ][]int {};for _abad ,_eadab :=range _caagd {_aafa :=_gfgc [_abad ];
if len (_eadab )==0{_geced [_aafa ]=nil ;continue ;};_dgec :=make ([]int ,len (_eadab ));_aece :=0;for _gedf :=range _eadab {_dgec [_aece ]=_gedf ;_aece ++;};_geced [_aafa ]=_dgec ;};return _geced ;};func (_ceadd *shapesState )newSubPath (){_ceadd .clearPath ();
if _gcga {_ff .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_ceadd );};};func (_cab *PageText )computeViews (){var _dfge rulingList ;if _ebggd {_baad :=_adeeb (_cab ._dccc );_dfge =append (_dfge ,_baad ...);
};if _degd {_fdde :=_bgbcf (_cab ._aacb );_dfge =append (_dfge ,_fdde ...);};_dfge ,_bafd :=_dfge .toTilings ();var _cbg paraList ;_fafa :=len (_cab ._fcb );for _agdb :=0;_agdb < 360&&_fafa > 0;_agdb +=90{_efd :=make ([]*textMark ,0,len (_cab ._fcb )-_fafa );
for _ ,_dbbf :=range _cab ._fcb {if _dbbf ._bdaa ==_agdb {_efd =append (_efd ,_dbbf );};};if len (_efd )> 0{_aab :=_ebdb (_efd ,_cab ._dda ,_dfge ,_bafd );_cbg =append (_cbg ,_aab ...);_fafa -=len (_efd );};};_eccd :=new (_cf .Buffer );_cbg .writeText (_eccd );
_cab ._daaf =_eccd .String ();_cab ._gdbg =_cbg .toTextMarks ();_cab ._fdacg =_cbg .tables ();if _bcag {_ff .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_cab ._fdacg ));
};};func _gefg (_eabaed []TextMark ,_dagaa *int ,_agfd TextMark )[]TextMark {_agfd .Offset =*_dagaa ;_eabaed =append (_eabaed ,_agfd );*_dagaa +=len (_agfd .Text );return _eabaed ;};type textResult struct{_bcc PageText ;_fgc int ;_cfd int ;};
2022-04-27 00:10:33 +00:00
2022-06-06 22:48:24 +00:00
// Append appends `mark` to the mark array.
func (_efdb *TextMarkArray )Append (mark TextMark ){_efdb ._beaa =append (_efdb ._beaa ,mark )};func _dege (_dbcbg ,_fdfb _ee .PdfRectangle )bool {return _fdfb .Llx <=_dbcbg .Urx &&_dbcbg .Llx <=_fdfb .Urx ;};
2022-04-27 00:10:33 +00:00
// Elements returns the TextMarks in `ma`.
2022-06-06 22:48:24 +00:00
func (_cadfg *TextMarkArray )Elements ()[]TextMark {return _cadfg ._beaa };func (_gege *wordBag )scanBand (_decd string ,_abbd *wordBag ,_cebf func (_cca *wordBag ,_cgdf *textWord )bool ,_dcfa ,_bdfb ,_gbd float64 ,_cbgd ,_dbae bool )int {_cegb :=_abbd ._adbbf ;
var _bgca map[int ]map[*textWord ]struct{};if !_cbgd {_bgca =_gege .makeRemovals ();};_bafdd :=_bafe *_cegb ;_fbcea :=0;for _ ,_cbac :=range _gege .depthBand (_dcfa -_bafdd ,_bdfb +_bafdd ){if len (_gege ._aceg [_cbac ])==0{continue ;};for _ ,_dgeg :=range _gege ._aceg [_cbac ]{if !(_dcfa -_bafdd <=_dgeg ._aagef &&_dgeg ._aagef <=_bdfb +_bafdd ){continue ;
};if !_cebf (_abbd ,_dgeg ){continue ;};_dcac :=2.0*_f .Abs (_dgeg ._eedb -_abbd ._adbbf )/(_dgeg ._eedb +_abbd ._adbbf );_ggac :=_f .Max (_dgeg ._eedb /_abbd ._adbbf ,_abbd ._adbbf /_dgeg ._eedb );_gdfb :=_f .Min (_dcac ,_ggac );if _gbd > 0&&_gdfb > _gbd {continue ;
};if _abbd .blocked (_dgeg ){continue ;};if !_cbgd {_abbd .pullWord (_dgeg ,_cbac ,_bgca );};_fbcea ++;if !_dbae {if _dgeg ._aagef < _dcfa {_dcfa =_dgeg ._aagef ;};if _dgeg ._aagef > _bdfb {_bdfb =_dgeg ._aagef ;};};if _cbgd {break ;};};};if !_cbgd {_gege .applyRemovals (_bgca );
};return _fbcea ;};const (_bgcb =1.0e-6;_efdg =1.0e-4;_eacf =10;_fdbf =6;_bafe =0.5;_acff =0.12;_dagf =0.19;_dgfg =0.04;_ebec =0.04;_dddf =1.0;_gfcf =0.04;_bgbf =0.4;_eecb =0.7;_effd =1.0;_debad =0.1;_cgb =1.4;_fgee =0.46;_edeb =0.02;_gbbc =0.2;_fabde =0.5;
_cfaf =4;_fdab =4.0;_gfec =6;_befcc =0.3;_bce =0.01;_gdgbd =0.02;_abed =2;_gcad =2;_ffbcf =500;_acaa =4.0;_dbeb =4.0;_acad =0.05;_cbgf =0.1;_cbfg =2.0;_fbfc =2.0;_ebcf =1.5;_faag =3.0;_cece =0.25;);func (_bgee *wordBag )allWords ()[]*textWord {var _gcbd []*textWord ;
for _ ,_bacf :=range _bgee ._aceg {_gcbd =append (_gcbd ,_bacf ...);};return _gcbd ;};func (_bagf *stateStack )empty ()bool {return len (*_bagf )==0};
2022-04-27 00:10:33 +00:00
2022-06-06 22:48:24 +00:00
// String returns a description of `v`.
func (_cafg *ruling )String ()string {if _cafg ._cgac ==_fbdff {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_bded ,_dbaee :="\u0078","\u0079";if _cafg ._cgac ==_dfbe {_bded ,_dbaee ="\u0079","\u0078";};_dfcd :="";if _cafg ._gaeb !=0.0{_dfcd =_be .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_cafg ._gaeb );
};return _be .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_cafg ._cgac ,_bded ,_cafg ._facf ,_dbaee ,_cafg ._fgbfa ,_cafg ._ebeb ,_cafg ._ebeb -_cafg ._fgbfa ,_cafg ._bggf ,_cafg .Color ,_dfcd );
};
2022-04-27 00:10:33 +00:00
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
2022-06-06 22:48:24 +00:00
type RenderMode int ;func _acaf (_bcac string )bool {if _e .RuneCountInString (_bcac )< _cfaf {return false ;};_bbee ,_fcfa :=_e .DecodeLastRuneInString (_bcac );if _fcfa <=0||!_g .Is (_g .Hyphen ,_bbee ){return false ;};_bbee ,_fcfa =_e .DecodeLastRuneInString (_bcac [:len (_bcac )-_fcfa ]);
return _fcfa > 0&&!_g .IsSpace (_bbee );};func _efdf (_eaec ,_febc _ee .PdfRectangle )bool {return _eaec .Llx <=_febc .Llx &&_febc .Urx <=_eaec .Urx &&_eaec .Lly <=_febc .Lly &&_febc .Ury <=_eaec .Ury ;};var (_addg =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};
);func _gdag (_aebfe ,_ceac _de .Point ,_afgg _ga .Color )(*ruling ,bool ){_geac :=lineRuling {_aagg :_aebfe ,_aafd :_ceac ,_fbga :_gebfb (_aebfe ,_ceac ),Color :_afgg };if _geac ._fbga ==_fbdff {return nil ,false ;};return _geac .asRuling ();};func _ccag (_ccdgc ,_ffdf bounded )float64 {return _agfg (_ccdgc )-_agfg (_ffdf )};
func _bccdg (_dadd ,_dagc float64 )string {_bcdf :=!_dcfga (_dadd -_dagc );if _bcdf {return "\u000a";};return "\u0020";};func _ebbgg (_dafac map[float64 ]gridTile )[]float64 {_aagb :=make ([]float64 ,0,len (_dafac ));for _ffgdc :=range _dafac {_aagb =append (_aagb ,_ffgdc );
};_ef .Float64s (_aagb );return _aagb ;};func (_dfa *imageExtractContext )extractInlineImage (_beac *_bd .ContentStreamInlineImage ,_cc _bd .GraphicsState ,_bgc *_ee .PdfPageResources )error {_daa ,_gg :=_beac .ToImage (_bgc );if _gg !=nil {return _gg ;
};_fdf ,_gg :=_beac .GetColorSpace (_bgc );if _gg !=nil {return _gg ;};if _fdf ==nil {_fdf =_ee .NewPdfColorspaceDeviceGray ();};_db ,_gg :=_fdf .ImageToRGB (*_daa );if _gg !=nil {return _gg ;};_dcee :=ImageMark {Image :&_db ,Width :_cc .CTM .ScalingFactorX (),Height :_cc .CTM .ScalingFactorY (),Angle :_cc .CTM .Angle ()};
_dcee .X ,_dcee .Y =_cc .CTM .Translation ();_dfa ._eee =append (_dfa ._eee ,_dcee );_dfa ._fd ++;return nil ;};func (_gde *imageExtractContext )extractFormImages (_eg *_ca .PdfObjectName ,_ag _bd .GraphicsState ,_fgb *_ee .PdfPageResources )error {_gff ,_gded :=_fgb .GetXObjectFormByName (*_eg );
if _gded !=nil {return _gded ;};if _gff ==nil {return nil ;};_adc ,_gded :=_gff .GetContentStream ();if _gded !=nil {return _gded ;};_aec :=_gff .Resources ;if _aec ==nil {_aec =_fgb ;};_gded =_gde .extractContentStreamImages (string (_adc ),_aec );if _gded !=nil {return _gded ;
};_gde ._cgg ++;return nil ;};func _beafe (_gfeg []*textWord ,_gfgd *textWord )[]*textWord {for _bdbfb ,_feegb :=range _gfeg {if _feegb ==_gfgd {return _cgbed (_gfeg ,_bdbfb );};};_ff .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_gfgd );
return nil ;};func (_gbcd *wordBag )depthBand (_dbaa ,_daga float64 )[]int {if len (_gbcd ._aceg )==0{return nil ;};return _gbcd .depthRange (_gbcd .getDepthIdx (_dbaa ),_gbcd .getDepthIdx (_daga ));};func (_dgdd *wordBag )depthIndexes ()[]int {if len (_dgdd ._aceg )==0{return nil ;
};_dfea :=make ([]int ,len (_dgdd ._aceg ));_fcbg :=0;for _dfcg :=range _dgdd ._aceg {_dfea [_fcbg ]=_dfcg ;_fcbg ++;};_ef .Ints (_dfea );return _dfea ;};func _dea (_cgdc *wordBag ,_fbcf int )*textLine {_fedg :=_cgdc .firstWord (_fbcf );_bddgc :=textLine {PdfRectangle :_fedg .PdfRectangle ,_caccd :_fedg ._eedb ,_decg :_fedg ._aagef };
_bddgc .pullWord (_cgdc ,_fedg ,_fbcf );return &_bddgc ;};type pathSection struct{_fbdc []*subpath ;_ga .Color ;};func (_ddegb *textPara )writeCellText (_afab _d .Writer ){for _gdbb ,_cbbd :=range _ddegb ._cecgd {_aefc :=_cbbd .text ();_ffgc :=_bcfgf &&_cbbd .endsInHyphen ()&&_gdbb !=len (_ddegb ._cecgd )-1;
if _ffgc {_aefc =_bgdcd (_aefc );};_afab .Write ([]byte (_aefc ));if !(_ffgc ||_gdbb ==len (_ddegb ._cecgd )-1){_afab .Write ([]byte (_bccdg (_cbbd ._decg ,_ddegb ._cecgd [_gdbb +1]._decg )));};};};func (_aea *textObject )getFillColor ()_ga .Color {return _ccfa (_aea ._bcgf .ColorspaceNonStroking ,_aea ._bcgf .ColorNonStroking );
};func (_defb rectRuling )asRuling ()(*ruling ,bool ){_dbdf :=ruling {_cgac :_defb ._bbbf ,Color :_defb .Color ,_bggf :_cgbe };switch _defb ._bbbf {case _fafbf :_dbdf ._facf =0.5*(_defb .Llx +_defb .Urx );_dbdf ._fgbfa =_defb .Lly ;_dbdf ._ebeb =_defb .Ury ;
_ccgb ,_fgfd :=_defb .checkWidth (_defb .Llx ,_defb .Urx );if !_fgfd {if _fbgb {_ff .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_defb );
};return nil ,false ;};_dbdf ._gaeb =_ccgb ;case _dfbe :_dbdf ._facf =0.5*(_defb .Lly +_defb .Ury );_dbdf ._fgbfa =_defb .Llx ;_dbdf ._ebeb =_defb .Urx ;_dddc ,_beed :=_defb .checkWidth (_defb .Lly ,_defb .Ury );if !_beed {if _fbgb {_ff .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_defb );
};return nil ,false ;};_dbdf ._gaeb =_dddc ;default:_ff .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_defb ._bbbf );return nil ,false ;};return &_dbdf ,true ;};func _ccfa (_bcfe _ee .PdfColorspace ,_fcgcg _ee .PdfColor )_ga .Color {if _bcfe ==nil ||_fcgcg ==nil {return _ga .Black ;
};_eebd ,_bgbe :=_bcfe .ColorToRGB (_fcgcg );if _bgbe !=nil {_ff .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_fcgcg ,_bcfe ,_bgbe );
return _ga .Black ;};_gcfde ,_egdgf :=_eebd .(*_ee .PdfColorDeviceRGB );if !_egdgf {_ff .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_eebd );
return _ga .Black ;};return _ga .NRGBA {R :uint8 (_gcfde .R ()*255),G :uint8 (_gcfde .G ()*255),B :uint8 (_gcfde .B ()*255),A :uint8 (255)};};func _beafa (_fabdg ,_dddff _de .Point )bool {return _fabdg .X ==_dddff .X &&_fabdg .Y ==_dddff .Y };func (_bdca *textTable )getRight ()paraList {_cfeb :=make (paraList ,_bdca ._eabcaa );
for _gecdd :=0;_gecdd < _bdca ._eabcaa ;_gecdd ++{_ggcag :=_bdca .get (_bdca ._aage -1,_gecdd )._cacae ;if _ggcag .taken (){return nil ;};_cfeb [_gecdd ]=_ggcag ;};for _eacag :=0;_eacag < _bdca ._eabcaa -1;_eacag ++{if _cfeb [_eacag ]._fdec !=_cfeb [_eacag +1]{return nil ;
};};return _cfeb ;};func (_febf rulingList )merge ()*ruling {_efge :=_febf [0]._facf ;_fedgab :=_febf [0]._fgbfa ;_dceg :=_febf [0]._ebeb ;for _ ,_fabdd :=range _febf [1:]{_efge +=_fabdd ._facf ;if _fabdd ._fgbfa < _fedgab {_fedgab =_fabdd ._fgbfa ;};if _fabdd ._ebeb > _dceg {_dceg =_fabdd ._ebeb ;
};};_baegd :=&ruling {_cgac :_febf [0]._cgac ,_bggf :_febf [0]._bggf ,Color :_febf [0].Color ,_facf :_efge /float64 (len (_febf )),_fgbfa :_fedgab ,_ebeb :_dceg };if _edcb {_ff .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_febf ),_baegd );
for _cfafd ,_gfef :=range _febf {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cfafd ,_gfef );};};return _baegd ;};func (_eeea *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_ffc :=make (map[int ]map[*textWord ]struct{},len (_eeea ._aceg ));
for _dddg :=range _eeea ._aceg {_ffc [_dddg ]=make (map[*textWord ]struct{});};return _ffc ;};func _fbde (_ecdf []float64 ,_agecd ,_ddba float64 )[]float64 {_eecdg ,_efggg :=_agecd ,_ddba ;if _efggg < _eecdg {_eecdg ,_efggg =_efggg ,_eecdg ;};_dfeg :=make ([]float64 ,0,len (_ecdf )+2);
_dfeg =append (_dfeg ,_agecd );for _ ,_aede :=range _ecdf {if _aede <=_eecdg {continue ;}else if _aede >=_efggg {break ;};_dfeg =append (_dfeg ,_aede );};_dfeg =append (_dfeg ,_ddba );return _dfeg ;};func (_ddgf *wordBag )removeWord (_feee *textWord ,_gdgb int ){_aaf :=_ddgf ._aceg [_gdgb ];
_aaf =_beafe (_aaf ,_feee );if len (_aaf )==0{delete (_ddgf ._aceg ,_gdgb );}else {_ddgf ._aceg [_gdgb ]=_aaf ;};};type cachedImage struct{_efe *_ee .Image ;_cda _ee .PdfColorspace ;};
2022-04-27 00:10:33 +00:00
2022-06-06 22:48:24 +00:00
// String returns a string describing the current state of the textState stack.
func (_dbcbe *stateStack )String ()string {_fgbf :=[]string {_be .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_dbcbe ))};for _acbc ,_ffbc :=range *_dbcbe {_ddcab :="\u003c\u006e\u0069l\u003e";
if _ffbc !=nil {_ddcab =_ffbc .String ();};_fgbf =append (_fgbf ,_be .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_acbc ,_ddcab ));};return _df .Join (_fgbf ,"\u000a");};func (_dadb paraList )readBefore (_fcad []int ,_gaac ,_bfb int )bool {_cdbf ,_adefa :=_dadb [_gaac ],_dadb [_bfb ];
if _efae (_cdbf ,_adefa )&&_cdbf .Lly > _adefa .Lly {return true ;};if !(_cdbf ._dbfe .Urx < _adefa ._dbfe .Llx ){return false ;};_edf ,_gbe :=_cdbf .Lly ,_adefa .Lly ;if _edf > _gbe {_gbe ,_edf =_edf ,_gbe ;};_cagg :=_f .Max (_cdbf ._dbfe .Llx ,_adefa ._dbfe .Llx );
_dffe :=_f .Min (_cdbf ._dbfe .Urx ,_adefa ._dbfe .Urx );_dddd :=_dadb .llyRange (_fcad ,_edf ,_gbe );for _ ,_bbge :=range _dddd {if _bbge ==_gaac ||_bbge ==_bfb {continue ;};_ddcg :=_dadb [_bbge ];if _ddcg ._dbfe .Llx <=_dffe &&_cagg <=_ddcg ._dbfe .Urx {return false ;
};};return true ;};func (_ebge *textObject )setTextLeading (_befc float64 ){if _ebge ==nil {return ;};_ebge ._ecb ._bgbc =_befc ;};func (_cbba *ruling )encloses (_dffb ,_accgb float64 )bool {return _cbba ._fgbfa -_cbfg <=_dffb &&_accgb <=_cbba ._ebeb +_cbfg ;
};func (_fdcf paraList )inTile (_eeedb gridTile )paraList {var _dcddf paraList ;for _ ,_cgbcd :=range _fdcf {if _eeedb .contains (_cgbcd .PdfRectangle ){_dcddf =append (_dcddf ,_cgbcd );};};if _bcag {_be .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_eeedb ,len (_dcddf ));
for _eccgca ,_dagea :=range _dcddf {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_eccgca ,_dagea );};_be .Println ("");};return _dcddf ;};func (_fgce *textLine )endsInHyphen ()bool {_geeb :=_fgce ._eaab [len (_fgce ._eaab )-1];_gade :=_geeb ._bfdfd ;
_bcge ,_acde :=_e .DecodeLastRuneInString (_gade );if _acde <=0||!_g .Is (_g .Hyphen ,_bcge ){return false ;};if _geeb ._adacg &&_acaf (_gade ){return true ;};return _acaf (_fgce .text ());};type bounded interface{bbox ()_ee .PdfRectangle };func (_accc paraList )topoOrder ()[]int {if _fgga {_ff .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");
};_acdf :=len (_accc );_efee :=make ([]bool ,_acdf );_dbfd :=make ([]int ,0,_acdf );_bgedc :=_accc .llyOrdering ();var _cgaa func (_feecc int );_cgaa =func (_afadd int ){_efee [_afadd ]=true ;for _gdbee :=0;_gdbee < _acdf ;_gdbee ++{if !_efee [_gdbee ]{if _accc .readBefore (_bgedc ,_afadd ,_gdbee ){_cgaa (_gdbee );
};};};_dbfd =append (_dbfd ,_afadd );};for _eeeec :=0;_eeeec < _acdf ;_eeeec ++{if !_efee [_eeeec ]{_cgaa (_eeeec );};};return _dbac (_dbfd );};type markKind int ;func (_fcbb *wordBag )blocked (_aae *textWord )bool {if _aae .Urx < _fcbb .Llx {_gfe :=_gbee (_aae .PdfRectangle );
_adda :=_beb (_fcbb .PdfRectangle );if _fcbb ._degg .blocks (_gfe ,_adda ){if _beab {_ff .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_aae ,_fcbb );};return true ;};}else if _fcbb .Urx < _aae .Llx {_gdfc :=_gbee (_fcbb .PdfRectangle );
_ebda :=_beb (_aae .PdfRectangle );if _fcbb ._degg .blocks (_gdfc ,_ebda ){if _beab {_ff .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s",_aae ,_fcbb );};return true ;};};if _aae .Ury < _fcbb .Lly {_eabae :=_egfd (_aae .PdfRectangle );
_gfag :=_cdge (_fcbb .PdfRectangle );if _fcbb ._aegf .blocks (_eabae ,_gfag ){if _beab {_ff .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_aae ,_fcbb );};return true ;};}else if _fcbb .Ury < _aae .Lly {_gbf :=_egfd (_fcbb .PdfRectangle );
_dfag :=_cdge (_aae .PdfRectangle );if _fcbb ._aegf .blocks (_gbf ,_dfag ){if _beab {_ff .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s",_aae ,_fcbb );};return true ;};};return false ;};func (_efed lineRuling )xMean ()float64 {return 0.5*(_efed ._aagg .X +_efed ._aafd .X )};
2022-04-27 00:10:33 +00:00
// String returns a string describing `ma`.
2022-06-06 22:48:24 +00:00
func (_bdb TextMarkArray )String ()string {_ccb :=len (_bdb ._beaa );if _ccb ==0{return "\u0045\u004d\u0050T\u0059";};_fgea :=_bdb ._beaa [0];_abae :=_bdb ._beaa [_ccb -1];return _be .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_ccb ,_fgea ,_abae );
};func (_agcf *wordBag )pullWord (_bdfbb *textWord ,_bgag int ,_dbfb map[int ]map[*textWord ]struct{}){_agcf .PdfRectangle =_bgcf (_agcf .PdfRectangle ,_bdfbb .PdfRectangle );if _bdfbb ._eedb > _agcf ._adbbf {_agcf ._adbbf =_bdfbb ._eedb ;};_agcf ._aceg [_bgag ]=append (_agcf ._aceg [_bgag ],_bdfbb );
_dbfb [_bgag ][_bdfbb ]=struct{}{};};func (_aaefc intSet )add (_cdefb int ){_aaefc [_cdefb ]=struct{}{}};func _dfefd (_fgbag []TextMark ,_gbaf *int )[]TextMark {_gfbd :=_fgbag [len (_fgbag )-1];_ggdg :=[]rune (_gfbd .Text );if len (_ggdg )==1{_fgbag =_fgbag [:len (_fgbag )-1];
_badfd :=_fgbag [len (_fgbag )-1];*_gbaf =_badfd .Offset +len (_badfd .Text );}else {_bgdc :=_bgdcd (_gfbd .Text );*_gbaf +=len (_bgdc )-len (_gfbd .Text );_gfbd .Text =_bgdc ;};return _fgbag ;};func (_gefffe rulingList )blocks (_ddgc ,_edec *ruling )bool {if _ddgc ._fgbfa > _edec ._ebeb ||_edec ._fgbfa > _ddgc ._ebeb {return false ;
};_beaff :=_f .Max (_ddgc ._fgbfa ,_edec ._fgbfa );_fcab :=_f .Min (_ddgc ._ebeb ,_edec ._ebeb );if _ddgc ._facf > _edec ._facf {_ddgc ,_edec =_edec ,_ddgc ;};for _ ,_ecf :=range _gefffe {if _ddgc ._facf <=_ecf ._facf +_fbfc &&_ecf ._facf <=_edec ._facf +_fbfc &&_ecf ._fgbfa <=_fcab &&_beaff <=_ecf ._ebeb {return true ;
};};return false ;};func (_cddgd rulingList )toGrids ()[]rulingList {if _dgac {_ff .Log .Info ("t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073",_cddgd );};_faeb :=_cddgd .intersections ();if _dgac {_ff .Log .Info ("\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020",len (_cddgd ),len (_faeb ));
for _ ,_efdc :=range _aacc (_faeb ){_be .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_efdc ,_faeb [_efdc ]);};};_caga :=make (map[int ]intSet ,len (_cddgd ));for _ddegbe :=range _cddgd {_cefc :=_cddgd .connections (_faeb ,_ddegbe );if len (_cefc )> 0{_caga [_ddegbe ]=_cefc ;
};};if _dgac {_ff .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064",len (_caga ));for _ ,_abgfg :=range _aacc (_caga ){_be .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_abgfg ,_caga [_abgfg ]);
};};_cfaa :=_ddec (len (_cddgd ),func (_cdcf ,_abdca int )bool {_edbdc ,_aefcf :=len (_caga [_cdcf ]),len (_caga [_abdca ]);if _edbdc !=_aefcf {return _edbdc > _aefcf ;};return _cddgd .comp (_cdcf ,_abdca );});if _dgac {_ff .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_cfaa );
};_geab :=[][]int {{_cfaa [0]}};_gfbb :for _ ,_abefc :=range _cfaa [1:]{for _caed ,_ggfcc :=range _geab {for _ ,_fgdc :=range _ggfcc {if _caga [_fgdc ].has (_abefc ){_geab [_caed ]=append (_ggfcc ,_abefc );continue _gfbb ;};};};_geab =append (_geab ,[]int {_abefc });
};if _dgac {_ff .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_geab );};_ef .SliceStable (_geab ,func (_ecbc ,_beccg int )bool {return len (_geab [_ecbc ])> len (_geab [_beccg ])});for _ ,_edeg :=range _geab {_ef .Slice (_edeg ,func (_ffbd ,_ebgf int )bool {return _cddgd .comp (_edeg [_ffbd ],_edeg [_ebgf ])});
};_agade :=make ([]rulingList ,len (_geab ));for _cdgc ,_fdbbc :=range _geab {_dfdg :=make (rulingList ,len (_fdbbc ));for _bafc ,_gccgd :=range _fdbbc {_dfdg [_bafc ]=_cddgd [_gccgd ];};_agade [_cdgc ]=_dfdg ;};if _dgac {_ff .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076",_agade );
};var _abba []rulingList ;for _ ,_gdca :=range _agade {if _fgec ,_cecc :=_gdca .isActualGrid ();_cecc {_gdca =_fgec ;_gdca =_gdca .snapToGroups ();_abba =append (_abba ,_gdca );};};if _dgac {_dgced ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073",_abba );
_ff .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_agade ),len (_abba ));};return _abba ;};func (_gbcg *textTable )compositeColCorridors ()map[int ][]float64 {_cbcc :=make (map[int ][]float64 ,_gbcg ._aage );
if _bcag {_ff .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_gbcg ._aage );};for _cfbe :=0;_cfbe < _gbcg ._aage ;_cfbe ++{_cbcc [_cfbe ]=nil ;
};return _cbcc ;};
2021-01-07 14:20:10 +00:00
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
//
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
type TextMark struct{
2020-11-23 22:15:56 +00:00
2020-12-06 13:03:03 +00:00
// Text is the extracted text.
Text string ;
2020-11-23 22:15:56 +00:00
2021-01-07 14:20:10 +00:00
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
2020-11-23 22:15:56 +00:00
2021-01-07 14:20:10 +00:00
// BBox is the bounding box of the text.
2022-06-06 22:48:24 +00:00
BBox _ee .PdfRectangle ;
2021-01-07 14:20:10 +00:00
// Font is the font the text was drawn with.
2022-06-06 22:48:24 +00:00
Font *_ee .PdfFont ;
2021-01-07 14:20:10 +00:00
// FontSize is the font size the text was drawn with.
FontSize float64 ;
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2022-06-06 22:48:24 +00:00
FillColor _ga .Color ;
2021-01-07 14:20:10 +00:00
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2022-06-06 22:48:24 +00:00
StrokeColor _ga .Color ;
2021-01-07 14:20:10 +00:00
// Orientation is the text orientation
2022-06-06 22:48:24 +00:00
Orientation int ;};func _faab (_fdcg string )(string ,bool ){_gcef :=[]rune (_fdcg );if len (_gcef )!=1{return "",false ;};_eage ,_dfgcf :=_addg [_gcef [0]];return _eage ,_dfgcf ;};
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
func (_ddgg *PageText )ApplyArea (bbox _ee .PdfRectangle ){_egd :=make ([]*textMark ,0,len (_ddgg ._fcb ));for _ ,_ddac :=range _ddgg ._fcb {if _gddc (_ddac .bbox (),bbox ){_egd =append (_egd ,_ddac );};};var _caf paraList ;_gcfb :=len (_egd );for _ffa :=0;
_ffa < 360&&_gcfb > 0;_ffa +=90{_feaf :=make ([]*textMark ,0,len (_egd )-_gcfb );for _ ,_cadf :=range _egd {if _cadf ._bdaa ==_ffa {_feaf =append (_feaf ,_cadf );};};if len (_feaf )> 0{_gfad :=_ebdb (_feaf ,_ddgg ._dda ,nil ,nil );_caf =append (_caf ,_gfad ...);
_gcfb -=len (_feaf );};};_fbeb :=new (_cf .Buffer );_caf .writeText (_fbeb );_ddgg ._daaf =_fbeb .String ();_ddgg ._gdbg =_caf .toTextMarks ();_ddgg ._fdacg =_caf .tables ();};func (_dgbf rulingList )bbox ()_ee .PdfRectangle {var _bfed _ee .PdfRectangle ;
if len (_dgbf )==0{_ff .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073");return _ee .PdfRectangle {};};if _dgbf [0]._cgac ==_dfbe {_bfed .Llx ,_bfed .Urx =_dgbf .secMinMax ();
_bfed .Lly ,_bfed .Ury =_dgbf .primMinMax ();}else {_bfed .Llx ,_bfed .Urx =_dgbf .primMinMax ();_bfed .Lly ,_bfed .Ury =_dgbf .secMinMax ();};return _bfed ;};
// String returns a human readable description of `path`.
func (_dgcc *subpath )String ()string {_agbce :=_dgcc ._eaeg ;_dggg :=len (_agbce );if _dggg <=5{return _be .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_dggg ,_agbce );};return _be .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_dggg ,_agbce [0],_agbce [1],_agbce [_dggg -1]);
};func (_edea *Extractor )extractPageText (_dfg string ,_beag *_ee .PdfPageResources ,_eddc _de .Matrix ,_afe int )(*PageText ,int ,int ,error ){_ff .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_afe );
_gdd :=&PageText {_dda :_edea ._eab };_bgeg :=_eabc (_edea ._eab );var _daad stateStack ;_gdb :=_gabb (_edea ,_beag ,_bd .GraphicsState {},&_bgeg ,&_daad );_bag :=shapesState {_gfaf :_eddc ,_gcgd :_de .IdentityMatrix (),_agbc :_gdb };var _feb bool ;if _afe > _cbf {_bga :=_c .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");
_ff .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_afe ,_bga );
return _gdd ,_bgeg ._acc ,_bgeg ._deeb ,_bga ;};_aa :=_bd .NewContentStreamParser (_dfg );_fdaa ,_cec :=_aa .Parse ();if _cec !=nil {_ff .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cec );
return _gdd ,_bgeg ._acc ,_bgeg ._deeb ,_cec ;};_ada :=_bd .NewContentStreamProcessor (*_fdaa );_ada .AddHandler (_bd .HandlerConditionEnumAllOperands ,"",func (_afg *_bd .ContentStreamOperation ,_ceb _bd .GraphicsState ,_gfa *_ee .PdfPageResources )error {_bdd :=_afg .Operand ;
if _bfc {_ff .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_afg );};switch _bdd {case "\u0071":if _gcga {_ff .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_bag ._gcgd );};_daad .push (&_bgeg );case "\u0051":if !_daad .empty (){_bgeg =*_daad .pop ();
};_bag ._gcgd =_ceb .CTM ;if _gcga {_ff .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_bag ._gcgd );};case "\u0042\u0054":if _feb {_ff .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
_gdd ._fcb =append (_gdd ._fcb ,_gdb ._fbf ...);};_feb =true ;_faac :=_ceb ;_faac .CTM =_eddc .Mult (_faac .CTM );_gdb =_gabb (_edea ,_gfa ,_faac ,&_bgeg ,&_daad );_bag ._agbc =_gdb ;case "\u0045\u0054":if !_feb {_ff .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
};_feb =false ;_gdd ._fcb =append (_gdd ._fcb ,_gdb ._fbf ...);_gdb .reset ();case "\u0054\u002a":_gdb .nextLine ();case "\u0054\u0064":if _fac ,_bcb :=_gdb .checkOp (_afg ,2,true );!_fac {_ff .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bcb );
return _bcb ;};_ddc ,_dca ,_dba :=_ecgf (_afg .Params );if _dba !=nil {return _dba ;};_gdb .moveText (_ddc ,_dca );case "\u0054\u0044":if _dfca ,_acea :=_gdb .checkOp (_afg ,2,true );!_dfca {_ff .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_acea );
return _acea ;};_gbgd ,_fde ,_gec :=_ecgf (_afg .Params );if _gec !=nil {_ff .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gec );return _gec ;};_gdb .moveTextSetLeading (_gbgd ,_fde );case "\u0054\u006a":if _fab ,_fdgg :=_gdb .checkOp (_afg ,1,true );
!_fab {_ff .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_afg ,_fdgg );return _fdgg ;};_bgb ,_bab :=_ca .GetStringBytes (_afg .Params [0]);if !_bab {_ff .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_afg );
return _ca .ErrTypeError ;};return _gdb .showText (_bgb );case "\u0054\u004a":if _faf ,_ggd :=_gdb .checkOp (_afg ,1,true );!_faf {_ff .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ggd );return _ggd ;
};_cff ,_gab :=_ca .GetArray (_afg .Params [0]);if !_gab {_ff .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_afg );
return _cec ;};return _gdb .showTextAdjusted (_cff );case "\u0027":if _dcf ,_agg :=_gdb .checkOp (_afg ,1,true );!_dcf {_ff .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_agg );return _agg ;};_gee ,_dece :=_ca .GetStringBytes (_afg .Params [0]);
if !_dece {_ff .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_afg );return _ca .ErrTypeError ;};_gdb .nextLine ();return _gdb .showText (_gee );
case "\u0022":if _bgcg ,_afad :=_gdb .checkOp (_afg ,3,true );!_bgcg {_ff .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_afad );return _afad ;};_cbc ,_efeg ,_cbcg :=_ecgf (_afg .Params [:2]);if _cbcg !=nil {return _cbcg ;
};_eec ,_dfe :=_ca .GetStringBytes (_afg .Params [2]);if !_dfe {_ff .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_afg );
return _ca .ErrTypeError ;};_gdb .setCharSpacing (_cbc );_gdb .setWordSpacing (_efeg );_gdb .nextLine ();return _gdb .showText (_eec );case "\u0054\u004c":_dcg ,_ebg :=_fgge (_afg );if _ebg !=nil {_ff .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ebg );
return _ebg ;};_gdb .setTextLeading (_dcg );case "\u0054\u0063":_gba ,_bgfb :=_fgge (_afg );if _bgfb !=nil {_ff .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bgfb );return _bgfb ;};_gdb .setCharSpacing (_gba );
case "\u0054\u0066":if _ddcf ,_bcf :=_gdb .checkOp (_afg ,2,true );!_ddcf {_ff .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bcf );return _bcf ;};_cce ,_fcc :=_ca .GetNameVal (_afg .Params [0]);if !_fcc {_ff .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_afg );
return _ca .ErrTypeError ;};_deca ,_fdac :=_ca .GetNumberAsFloat (_afg .Params [1]);if !_fcc {_ff .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_afg ,_fdac );
return _fdac ;};_fdac =_gdb .setFont (_cce ,_deca );_gdb ._ecc =_da .Is (_fdac ,_ca .ErrNotSupported );if _fdac !=nil &&!_gdb ._ecc {return _fdac ;};case "\u0054\u006d":if _gce ,_ec :=_gdb .checkOp (_afg ,6,true );!_gce {_ff .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ec );
return _ec ;};_eaa ,_dgc :=_ca .GetNumbersAsFloat (_afg .Params );if _dgc !=nil {_ff .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dgc );return _dgc ;};_gdb .setTextMatrix (_eaa );case "\u0054\u0072":if _baa ,_gcg :=_gdb .checkOp (_afg ,1,true );
!_baa {_ff .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gcg );return _gcg ;};_cae ,_bed :=_ca .GetIntVal (_afg .Params [0]);if !_bed {_ff .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_afg );
return _ca .ErrTypeError ;};_gdb .setTextRenderMode (_cae );case "\u0054\u0073":if _ead ,_adf :=_gdb .checkOp (_afg ,1,true );!_ead {_ff .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_adf );return _adf ;
};_fgd ,_bae :=_ca .GetNumberAsFloat (_afg .Params [0]);if _bae !=nil {_ff .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bae );return _bae ;};_gdb .setTextRise (_fgd );case "\u0054\u0077":if _fdacc ,_fdc :=_gdb .checkOp (_afg ,1,true );
!_fdacc {_ff .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fdc );return _fdc ;};_ceg ,_cdd :=_ca .GetNumberAsFloat (_afg .Params [0]);if _cdd !=nil {_ff .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cdd );
return _cdd ;};_gdb .setWordSpacing (_ceg );case "\u0054\u007a":if _feba ,_gfac :=_gdb .checkOp (_afg ,1,true );!_feba {_ff .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gfac );return _gfac ;};_cddf ,_geg :=_ca .GetNumberAsFloat (_afg .Params [0]);
if _geg !=nil {_ff .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_geg );return _geg ;};_gdb .setHorizScaling (_cddf );case "\u0063\u006d":_bag ._gcgd =_ceb .CTM ;if _bag ._gcgd .Singular (){_ddd :=_de .IdentityMatrix ().Translate (_bag ._gcgd .Translation ());
_ff .Log .Debug ("S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s",_bag ._gcgd ,_ddd );_bag ._gcgd =_ddd ;};if _gcga {_ff .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_bag ._gcgd );};case "\u006d":if len (_afg .Params )!=2{_ff .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_fg );
return nil ;};_gge ,_fabf :=_ca .GetNumbersAsFloat (_afg .Params );if _fabf !=nil {return _fabf ;};_bag .moveTo (_gge [0],_gge [1]);case "\u006c":if len (_afg .Params )!=2{_ff .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_fg );
return nil ;};_dfaf ,_eece :=_ca .GetNumbersAsFloat (_afg .Params );if _eece !=nil {return _eece ;};_bag .lineTo (_dfaf [0],_dfaf [1]);case "\u0063":if len (_afg .Params )!=6{return _fg ;};_adea ,_cggg :=_ca .GetNumbersAsFloat (_afg .Params );if _cggg !=nil {return _cggg ;
};_ff .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_adea );_bag .cubicTo (_adea [0],_adea [1],_adea [2],_adea [3],_adea [4],_adea [5]);case "\u0076","\u0079":if len (_afg .Params )!=4{return _fg ;
};_afd ,_fbe :=_ca .GetNumbersAsFloat (_afg .Params );if _fbe !=nil {return _fbe ;};_ff .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_afd );_bag .quadraticTo (_afd [0],_afd [1],_afd [2],_afd [3]);
case "\u0068":_bag .closePath ();case "\u0072\u0065":if len (_afg .Params )!=4{return _fg ;};_bbc ,_ffg :=_ca .GetNumbersAsFloat (_afg .Params );if _ffg !=nil {return _ffg ;};_bag .drawRectangle (_bbc [0],_bbc [1],_bbc [2],_bbc [3]);_bag .closePath ();
case "\u0053":_bag .stroke (&_gdd ._dccc );_bag .clearPath ();case "\u0073":_bag .closePath ();_bag .stroke (&_gdd ._dccc );_bag .clearPath ();case "\u0046":_bag .fill (&_gdd ._aacb );_bag .clearPath ();case "\u0066","\u0066\u002a":_bag .closePath ();_bag .fill (&_gdd ._aacb );
_bag .clearPath ();case "\u0042","\u0042\u002a":_bag .fill (&_gdd ._aacb );_bag .stroke (&_gdd ._dccc );_bag .clearPath ();case "\u0062","\u0062\u002a":_bag .closePath ();_bag .fill (&_gdd ._aacb );_bag .stroke (&_gdd ._dccc );_bag .clearPath ();case "\u006e":_bag .clearPath ();
case "\u0044\u006f":if len (_afg .Params )==0{_ff .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_afg .Params );
return _ca .ErrRangeError ;};_adee ,_fgbc :=_ca .GetName (_afg .Params [0]);if !_fgbc {_ff .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_afg .Params [0]);
return _ca .ErrTypeError ;};_ ,_aaa :=_gfa .GetXObjectByName (*_adee );if _aaa !=_ee .XObjectTypeForm {break ;};_ccd ,_fgbc :=_edea ._bb [_adee .String ()];if !_fgbc {_cea ,_bdf :=_gfa .GetXObjectFormByName (*_adee );if _bdf !=nil {_ff .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_bdf );
return _bdf ;};_caa ,_bdf :=_cea .GetContentStream ();if _bdf !=nil {_ff .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_bdf );return _bdf ;};_fdgb :=_cea .Resources ;if _fdgb ==nil {_fdgb =_gfa ;};_dcce ,_geb ,_fdb ,_bdf :=_edea .extractPageText (string (_caa ),_fdgb ,_eddc .Mult (_ceb .CTM ),_afe +1);
if _bdf !=nil {_ff .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_bdf );return _bdf ;};_ccd =textResult {*_dcce ,_geb ,_fdb };_edea ._bb [_adee .String ()]=_ccd ;};_bag ._gcgd =_ceb .CTM ;if _gcga {_ff .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_bag ._gcgd );
};_gdd ._fcb =append (_gdd ._fcb ,_ccd ._bcc ._fcb ...);_gdd ._dccc =append (_gdd ._dccc ,_ccd ._bcc ._dccc ...);_gdd ._aacb =append (_gdd ._aacb ,_ccd ._bcc ._aacb ...);_bgeg ._acc +=_ccd ._fgc ;_bgeg ._deeb +=_ccd ._cfd ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_gdb ._bcgf .ColorspaceNonStroking =_ceb .ColorspaceNonStroking ;
_gdb ._bcgf .ColorNonStroking =_ceb .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_gdb ._bcgf .ColorspaceStroking =_ceb .ColorspaceStroking ;_gdb ._bcgf .ColorStroking =_ceb .ColorStroking ;
};return nil ;});_cec =_ada .Process (_beag );return _gdd ,_bgeg ._acc ,_bgeg ._deeb ,_cec ;};func _bcd (_ecca _de .Matrix )_de .Point {_caec ,_gffd :=_ecca .Translation ();return _de .Point {X :_caec ,Y :_gffd };};func (_gfefd paraList )applyTables (_bbaeb []*textTable )paraList {var _gcae paraList ;
for _ ,_ffgee :=range _bbaeb {_gcae =append (_gcae ,_ffgee .newTablePara ());};for _ ,_cacb :=range _gfefd {if _cacb ._ebad {continue ;};_gcae =append (_gcae ,_cacb );};return _gcae ;};
2022-03-13 12:41:53 +00:00
2022-04-27 00:10:33 +00:00
// String returns a human readable description of `s`.
2022-06-06 22:48:24 +00:00
func (_fafd intSet )String ()string {var _fedcf []int ;for _gebeb :=range _fafd {if _fafd .has (_gebeb ){_fedcf =append (_fedcf ,_gebeb );};};_ef .Ints (_fedcf );return _be .Sprintf ("\u0025\u002b\u0076",_fedcf );};type gridTile struct{_ee .PdfRectangle ;
_efab ,_gdge ,_gaaf ,_geaa bool ;};func _efb (_dgce []*wordBag )[]*wordBag {if len (_dgce )<=1{return _dgce ;};if _dagg {_ff .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");};_ef .Slice (_dgce ,func (_ceged ,_dbbc int )bool {_bdbc ,_cega :=_dgce [_ceged ],_dgce [_dbbc ];
_bgfa :=_bdbc .Width ()*_bdbc .Height ();_bfdg :=_cega .Width ()*_cega .Height ();if _bgfa !=_bfdg {return _bgfa > _bfdg ;};if _bdbc .Height ()!=_cega .Height (){return _bdbc .Height ()> _cega .Height ();};return _ceged < _dbbc ;});var _gcdf []*wordBag ;
_face :=make (intSet );for _gcdfe :=0;_gcdfe < len (_dgce );_gcdfe ++{if _face .has (_gcdfe ){continue ;};_bbfb :=_dgce [_gcdfe ];for _cfdf :=_gcdfe +1;_cfdf < len (_dgce );_cfdf ++{if _face .has (_gcdfe ){continue ;};_fffa :=_dgce [_cfdf ];_dbaab :=_bbfb .PdfRectangle ;
_dbaab .Llx -=_bbfb ._adbbf ;if _efdf (_dbaab ,_fffa .PdfRectangle ){_bbfb .absorb (_fffa );_face .add (_cfdf );};};_gcdf =append (_gcdf ,_bbfb );};if len (_dgce )!=len (_gcdf )+len (_face ){_ff .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_dgce ),len (_gcdf ),len (_face ));
};return _gcdf ;};func _cgccd (_gbgc ,_bcgee int )uint64 {return uint64 (_gbgc )*0x1000000+uint64 (_bcgee )};func (_bcbed *textWord )toTextMarks (_cfeg *int )[]TextMark {var _fadfg []TextMark ;for _ ,_eddgd :=range _bcbed ._gceff {_fadfg =_gefg (_fadfg ,_cfeg ,_eddgd .ToTextMark ());
};return _fadfg ;};func (_afbd paraList )merge ()*textPara {_ff .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_afbd ));
if len (_afbd )==0{return nil ;};_afbd .sortReadingOrder ();_gcde :=_afbd [0].PdfRectangle ;_gbbg :=_afbd [0]._cecgd ;for _ ,_gcadb :=range _afbd [1:]{_gcde =_bgcf (_gcde ,_gcadb .PdfRectangle );_gbbg =append (_gbbg ,_gcadb ._cecgd ...);};return _dafa (_gcde ,_gbbg );
};func (_fga *stateStack )top ()*textState {if _fga .empty (){return nil ;};return (*_fga )[_fga .size ()-1];};func _fadf (_adca ,_bfbcg int )int {if _adca < _bfbcg {return _adca ;};return _bfbcg ;};type rectRuling struct{_bbbf rulingKind ;_fcgd markKind ;
_ga .Color ;_ee .PdfRectangle ;};func (_cgage rulingList )primMinMax ()(float64 ,float64 ){_cgdb ,_fcag :=_cgage [0]._facf ,_cgage [0]._facf ;for _ ,_gfdd :=range _cgage [1:]{if _gfdd ._facf < _cgdb {_cgdb =_gfdd ._facf ;}else if _gfdd ._facf > _fcag {_fcag =_gfdd ._facf ;
};};return _cgdb ,_fcag ;};func _gced (_cfc []*textWord ,_gaba float64 ,_eafb ,_ebf rulingList )*wordBag {_cge :=_cgdg (_cfc [0],_gaba ,_eafb ,_ebf );for _ ,_dedg :=range _cfc [1:]{_feac :=_gfb (_dedg ._aagef );_cge ._aceg [_feac ]=append (_cge ._aceg [_feac ],_dedg );
_cge .PdfRectangle =_bgcf (_cge .PdfRectangle ,_dedg .PdfRectangle );};_cge .sort ();return _cge ;};
2022-03-13 12:41:53 +00:00
2022-06-06 22:48:24 +00:00
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
func (_gabc PageText )ToText ()string {return _gabc .Text ()};func (_afdf *subpath )isQuadrilateral ()bool {if len (_afdf ._eaeg )< 4||len (_afdf ._eaeg )> 5{return false ;};if len (_afdf ._eaeg )==5{_gbgf :=_afdf ._eaeg [0];_eaaad :=_afdf ._eaeg [4];if _gbgf .X !=_eaaad .X ||_gbgf .Y !=_eaaad .Y {return false ;
};};return true ;};func (_bde *textLine )text ()string {var _ffdfc []string ;for _ ,_eddce :=range _bde ._eaab {if _eddce ._adacg {_ffdfc =append (_ffdfc ,"\u0020");};_ffdfc =append (_ffdfc ,_eddce ._bfdfd );};return _df .Join (_ffdfc ,"");};func (_acbf *shapesState )addPoint (_ffd ,_eccg float64 ){_ecab :=_acbf .establishSubpath ();
_dage :=_acbf .devicePoint (_ffd ,_eccg );if _ecab ==nil {_acbf ._ccef =true ;_acbf ._ddcc =_dage ;}else {_ecab .add (_dage );};};func (_dbfbf paraList )sortReadingOrder (){_ff .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_dbfbf ));
if len (_dbfbf )<=1{return ;};_dbfbf .computeEBBoxes ();_ef .Slice (_dbfbf ,func (_dedc ,_bbcf int )bool {return _cbae (_dbfbf [_dedc ],_dbfbf [_bbcf ])<=0});_cbca :=_dbfbf .topoOrder ();_dbfbf .reorder (_cbca );};func (_dceb *shapesState )lineTo (_ffad ,_bcfa float64 ){if _gcga {_ff .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_ffad ,_bcfa ,_dceb .devicePoint (_ffad ,_bcfa ));
};_dceb .addPoint (_ffad ,_bcfa );};func _gabb (_agf *Extractor ,_bccd *_ee .PdfPageResources ,_cffe _bd .GraphicsState ,_cac *textState ,_gdg *stateStack )*textObject {return &textObject {_decb :_agf ,_bca :_bccd ,_bcgf :_cffe ,_accf :_gdg ,_ecb :_cac ,_geff :_de .IdentityMatrix (),_abg :_de .IdentityMatrix ()};
};func (_bcgecb *textTable )putComposite (_cefeb ,_gdbce int ,_gdcd paraList ,_ceabe _ee .PdfRectangle ){if len (_gdcd )==0{_ff .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073");
return ;};_ggdb :=compositeCell {PdfRectangle :_ceabe ,paraList :_gdcd };if _bcag {_be .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_cefeb ,_gdbce ,_ggdb .String ());
};_ggdb .updateBBox ();_bcgecb ._ebgbb [_cgccd (_cefeb ,_gdbce )]=_ggdb ;};func _dcfga (_bdcaa float64 )bool {return _f .Abs (_bdcaa )< _bgcb };func (_ggaea lineRuling )yMean ()float64 {return 0.5*(_ggaea ._aagg .Y +_ggaea ._aafd .Y )};const (_bcfgf =true ;
_adfc =true ;_aagc =true ;_aecd =false ;_afae =false ;_dbe =6;_ecdd =3.0;_feeb =200;_bcba =true ;_deee =true ;_ebggd =true ;_degd =true ;_ecge =false ;);func (_afgd *textObject )moveLP (_fbge ,_fdd float64 ){_afgd ._abg .Concat (_de .NewMatrix (1,0,0,1,_fbge ,_fdd ));
_afgd ._geff =_afgd ._abg ;};
2022-02-05 21:34:53 +00:00
2022-06-06 22:48:24 +00:00
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
type ImageMark struct{Image *_ee .Image ;
2021-12-14 01:08:28 +00:00
2022-06-06 22:48:24 +00:00
// Dimensions of the image as displayed in the PDF.
Width float64 ;Height float64 ;
// Position of the image in PDF coordinates (lower left corner).
X float64 ;Y float64 ;
// Angle in degrees, if rotated.
Angle float64 ;};type gridTiling struct{_ee .PdfRectangle ;_dgcdc []float64 ;_dgbfg []float64 ;_dcaac map[float64 ]map[float64 ]gridTile ;};type textMark struct{_ee .PdfRectangle ;_bdaa int ;_fdbb string ;_aad string ;_ggfc *_ee .PdfFont ;_ceba float64 ;
_eggg float64 ;_abd _de .Matrix ;_ddf _de .Point ;_ebdd _ee .PdfRectangle ;_eaee _ga .Color ;_fca _ga .Color ;};func (_acbcd *wordBag )empty (_eafbc int )bool {_ ,_fag :=_acbcd ._aceg [_eafbc ];return !_fag };func (_bdcf *PageFonts )extractPageResourcesToFont (_fb *_ee .PdfPageResources )error {_dc ,_bf :=_ca .GetDict (_fb .Font );
if !_bf {return _c .New (_ab );};for _ ,_dd :=range _dc .Keys (){var (_dac =true ;_fgg []byte ;_bbg string ;);_ae ,_dge :=_fb .GetFontByName (_dd );if !_dge {return _c .New (_eac );};_def ,_bea :=_ee .NewPdfFontFromPdfObject (_ae );if _bea !=nil {return _bea ;
};_bc :=_def .FontDescriptor ();_gfd :=_def .FontDescriptor ().FontName .String ();_gcd :=_def .Subtype ();if _af (_bdcf .Fonts ,_gfd ){continue ;};if len (_def .ToUnicode ())==0{_dac =false ;};if _bc .FontFile !=nil {if _gd ,_gae :=_ca .GetStream (_bc .FontFile );
_gae {_fgg ,_bea =_ca .DecodeStream (_gd );if _bea !=nil {return _bea ;};_bbg =_gfd +"\u002e\u0070\u0066\u0062";};}else if _bc .FontFile2 !=nil {if _ac ,_bbgd :=_ca .GetStream (_bc .FontFile2 );_bbgd {_fgg ,_bea =_ca .DecodeStream (_ac );if _bea !=nil {return _bea ;
};_bbg =_gfd +"\u002e\u0074\u0074\u0066";};}else if _bc .FontFile3 !=nil {if _cg ,_bac :=_ca .GetStream (_bc .FontFile3 );_bac {_fgg ,_bea =_ca .DecodeStream (_cg );if _bea !=nil {return _bea ;};_bbg =_gfd +"\u002e\u0063\u0066\u0066";};};if len (_bbg )< 1{_ff .Log .Debug (_ad );
};_bbf :=Font {FontName :_gfd ,PdfFont :_def ,IsCID :_def .IsCID (),IsSimple :_def .IsSimple (),ToUnicode :_dac ,FontType :_gcd ,FontData :_fgg ,FontFileName :_bbg ,FontDescriptor :_bc };_bdcf .Fonts =append (_bdcf .Fonts ,_bbf );};return nil ;};func _bgdcd (_gbea string )string {_bdgg :=[]rune (_gbea );
return string (_bdgg [:len (_bdgg )-1])};const (_fbdff rulingKind =iota ;_dfbe ;_fafbf ;);
// String returns a description of `k`.
func (_fgff markKind )String ()string {_aedg ,_dgcg :=_bbfbab [_fgff ];if !_dgcg {return _be .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_fgff );};return _aedg ;};func (_feegc *textTable )subdivide ()*textTable {_feegc .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");
_fbfb :=_feegc .compositeRowCorridors ();_ecfa :=_feegc .compositeColCorridors ();if _bcag {_ff .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_beda (_fbfb ),_beda (_ecfa ));
};if len (_fbfb )==0||len (_ecfa )==0{return _feegc ;};_aaeac (_fbfb );_aaeac (_ecfa );if _bcag {_ff .Log .Info ("\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073",_beda (_fbfb ),_beda (_ecfa ));
};_bagga ,_gbcb :=_bfaa (_feegc ._eabcaa ,_fbfb );_cdecd ,_bacd :=_bfaa (_feegc ._aage ,_ecfa );_degce :=make (map[uint64 ]*textPara ,_bacd *_gbcb );_cdac :=&textTable {PdfRectangle :_feegc .PdfRectangle ,_efea :_feegc ._efea ,_eabcaa :_gbcb ,_aage :_bacd ,_dbfba :_degce };
if _bcag {_ff .Log .Info ("\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a"+"\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076",_feegc ._aage ,_feegc ._eabcaa ,_bacd ,_gbcb ,_beda (_fbfb ),_beda (_ecfa ),_bagga ,_cdecd );
};for _efdcf :=0;_efdcf < _feegc ._eabcaa ;_efdcf ++{_cdba :=_bagga [_efdcf ];for _gbbeb :=0;_gbbeb < _feegc ._aage ;_gbbeb ++{_dfbee :=_cdecd [_gbbeb ];if _bcag {_be .Printf ("\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a",_gbbeb ,_efdcf ,_dfbee ,_cdba );
};_cgbd ,_geee :=_feegc ._ebgbb [_cgccd (_gbbeb ,_efdcf )];if !_geee {continue ;};_bcbag :=_cgbd .split (_fbfb [_efdcf ],_ecfa [_gbbeb ]);for _ddgbd :=0;_ddgbd < _bcbag ._eabcaa ;_ddgbd ++{for _dgafe :=0;_dgafe < _bcbag ._aage ;_dgafe ++{_aefd :=_bcbag .get (_dgafe ,_ddgbd );
_cdac .put (_dfbee +_dgafe ,_cdba +_ddgbd ,_aefd );if _bcag {_be .Printf ("\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_dfbee +_dgafe ,_cdba +_ddgbd ,_aefd );};};};};};return _cdac ;};func _beb (_bbef _ee .PdfRectangle )*ruling {return &ruling {_cgac :_fafbf ,_facf :_bbef .Llx ,_fgbfa :_bbef .Lly ,_ebeb :_bbef .Ury };
};func (_degc pathSection )bbox ()_ee .PdfRectangle {_dfgc :=_degc ._fbdc [0]._eaeg [0];_gaeae :=_ee .PdfRectangle {Llx :_dfgc .X ,Urx :_dfgc .X ,Lly :_dfgc .Y ,Ury :_dfgc .Y };_gfcec :=func (_bfe _de .Point ){if _bfe .X < _gaeae .Llx {_gaeae .Llx =_bfe .X ;
}else if _bfe .X > _gaeae .Urx {_gaeae .Urx =_bfe .X ;};if _bfe .Y < _gaeae .Lly {_gaeae .Lly =_bfe .Y ;}else if _bfe .Y > _gaeae .Ury {_gaeae .Ury =_bfe .Y ;};};for _ ,_ggec :=range _degc ._fbdc [0]._eaeg [1:]{_gfcec (_ggec );};for _ ,_ffdc :=range _degc ._fbdc [1:]{for _ ,_fbce :=range _ffdc ._eaeg {_gfcec (_fbce );
};};return _gaeae ;};func (_cdf *textObject )getFontDirect (_deba string )(*_ee .PdfFont ,error ){_abaa ,_agc :=_cdf .getFontDict (_deba );if _agc !=nil {return nil ,_agc ;};_eged ,_agc :=_ee .NewPdfFontFromPdfObject (_abaa );if _agc !=nil {_ff .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_deba ,_agc );
};return _eged ,_agc ;};func _aaeac (_ggbf map[int ][]float64 ){if len (_ggbf )<=1{return ;};_egae :=_aegg (_ggbf );if _bcag {_ff .Log .Info ("\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076",_egae );};var _eeca ,_ggfda int ;
for _eeca ,_ggfda =range _egae {if _ggbf [_ggfda ]!=nil {break ;};};for _abga ,_gdgef :=range _egae [_eeca :]{_dfbd :=_ggbf [_gdgef ];if _dfbd ==nil {continue ;};if _bcag {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a",_eeca +_abga ,_ggfda ,_gdgef );
};_caaaff :=_ggbf [_gdgef ];if _caaaff [len (_caaaff )-1]> _dfbd [0]{_caaaff [len (_caaaff )-1]=_dfbd [0];_ggbf [_ggfda ]=_caaaff ;};_ggfda =_gdgef ;};};func _dafa (_acag _ee .PdfRectangle ,_fgaf []*textLine )*textPara {return &textPara {PdfRectangle :_acag ,_cecgd :_fgaf };
};
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
type TextTable struct{W ,H int ;Cells [][]TableCell ;};func _cdge (_dafd _ee .PdfRectangle )*ruling {return &ruling {_cgac :_dfbe ,_facf :_dafd .Lly ,_fgbfa :_dafd .Llx ,_ebeb :_dafd .Urx };};func (_egdd intSet )has (_dgbfb int )bool {_ ,_edfc :=_egdd [_dgbfb ];
return _edfc };
2021-12-14 01:08:28 +00:00
2022-04-27 00:10:33 +00:00
// TableCell is a cell in a TextTable.
type TableCell struct{
2021-12-14 01:08:28 +00:00
2022-04-27 00:10:33 +00:00
// Text is the extracted text.
Text string ;
2021-12-14 01:08:28 +00:00
2022-04-27 00:10:33 +00:00
// Marks returns the TextMarks corresponding to the text in Text.
2022-06-06 22:48:24 +00:00
Marks TextMarkArray ;};func (_eba *textObject )checkOp (_afb *_bd .ContentStreamOperation ,_gdc int ,_cgca bool )(_bgea bool ,_dfd error ){if _eba ==nil {var _aeg []_ca .PdfObject ;if _gdc > 0{_aeg =_afb .Params ;if len (_aeg )> _gdc {_aeg =_aeg [:_gdc ];
};};_ff .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_afb .Operand ,_aeg );};if _gdc >=0{if len (_afb .Params )!=_gdc {if _cgca {_dfd =_c .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");
};_ff .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_afb .Operand ,_gdc ,len (_afb .Params ),_afb .Params );
return false ,_dfd ;};};return true ,nil ;};func (_agda rulingList )mergePrimary ()float64 {_afcd :=_agda [0]._facf ;for _ ,_feag :=range _agda [1:]{_afcd +=_feag ._facf ;};return _afcd /float64 (len (_agda ));};func _cffd (_gfcd ,_dcbdc _ee .PdfRectangle )bool {return _gfcd .Lly <=_dcbdc .Ury &&_dcbdc .Lly <=_gfcd .Ury ;
};func _abac (_bfgb ,_dfdd float64 )bool {return _bfgb /_f .Max (_cbgf ,_dfdd )< _acad };func (_dcb *textObject )reset (){_dcb ._geff =_de .IdentityMatrix ();_dcb ._abg =_de .IdentityMatrix ();_dcb ._fbf =nil ;};func (_gdf *textObject )showTextAdjusted (_dacf *_ca .PdfObjectArray )error {_badf :=false ;
for _ ,_bdff :=range _dacf .Elements (){switch _bdff .(type ){case *_ca .PdfObjectFloat ,*_ca .PdfObjectInteger :_acec ,_ddg :=_ca .GetNumberAsFloat (_bdff );if _ddg !=nil {_ff .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_bdff ,_dacf );
return _ddg ;};_cggd ,_dbcb :=-_acec *0.001*_gdf ._ecb ._ggda ,0.0;if _badf {_dbcb ,_cggd =_cggd ,_dbcb ;};_gdfd :=_ebb (_de .Point {X :_cggd ,Y :_dbcb });_gdf ._geff .Concat (_gdfd );case *_ca .PdfObjectString :_dbg ,_fgfg :=_ca .GetStringBytes (_bdff );
if !_fgfg {_ff .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_bdff ,_dacf );
return _ca .ErrTypeError ;};_gdf .renderText (_dbg );default:_ff .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_bdff ,_dacf );
return _ca .ErrTypeError ;};};return nil ;};func (_gcf *stateStack )push (_ebc *textState ){_bfd :=*_ebc ;*_gcf =append (*_gcf ,&_bfd )};func _edbd (_aecg _de .Point )*subpath {return &subpath {_eaeg :[]_de .Point {_aecg }}};func (_cgcde *textWord )appendMark (_gdgea *textMark ,_bfef _ee .PdfRectangle ){_cgcde ._gceff =append (_cgcde ._gceff ,_gdgea );
_cgcde .PdfRectangle =_bgcf (_cgcde .PdfRectangle ,_gdgea .PdfRectangle );if _gdgea ._ceba > _cgcde ._eedb {_cgcde ._eedb =_gdgea ._ceba ;};_cgcde ._aagef =_bfef .Ury -_cgcde .PdfRectangle .Lly ;};type textState struct{_bace float64 ;_bbfa float64 ;_cfa float64 ;
_bgbc float64 ;_ggda float64 ;_ccc RenderMode ;_fefb float64 ;_fea *_ee .PdfFont ;_gbc _ee .PdfRectangle ;_acc int ;_deeb int ;};func _egfd (_ebadc _ee .PdfRectangle )*ruling {return &ruling {_cgac :_dfbe ,_facf :_ebadc .Ury ,_fgbfa :_ebadc .Llx ,_ebeb :_ebadc .Urx };
};func (_gbddb rulingList )secMinMax ()(float64 ,float64 ){_cedg ,_cgdfe :=_gbddb [0]._fgbfa ,_gbddb [0]._ebeb ;for _ ,_fcccd :=range _gbddb [1:]{if _fcccd ._fgbfa < _cedg {_cedg =_fcccd ._fgbfa ;};if _fcccd ._ebeb > _cgdfe {_cgdfe =_fcccd ._ebeb ;};};
return _cedg ,_cgdfe ;};func (_cdef rulingList )removeDuplicates ()rulingList {if len (_cdef )==0{return nil ;};_cdef .sort ();_eccdec :=rulingList {_cdef [0]};for _ ,_ebag :=range _cdef [1:]{if _ebag .equals (_eccdec [len (_eccdec )-1]){continue ;};_eccdec =append (_eccdec ,_ebag );
};return _eccdec ;};func (_eaadb paraList )findTableGrid (_efeac gridTiling )(*textTable ,map[*textPara ]struct{}){_gaff :=len (_efeac ._dgcdc );_cdfcd :=len (_efeac ._dgbfg );_gcbgae :=textTable {_efea :true ,_aage :_gaff ,_eabcaa :_cdfcd ,_dbfba :make (map[uint64 ]*textPara ,_gaff *_cdfcd ),_ebgbb :make (map[uint64 ]compositeCell ,_gaff *_cdfcd )};
_bebd :=make (map[*textPara ]struct{});_fgdd :=int ((1.0-_befcc )*float64 (_gaff *_cdfcd ));_gegc :=0;if _gfdb {_ff .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_gaff ,_cdfcd );
};for _fbfa ,_bfggd :=range _efeac ._dgbfg {_cffeg ,_fedb :=_efeac ._dcaac [_bfggd ];if !_fedb {continue ;};for _aeeb ,_bgeec :=range _efeac ._dgcdc {_dade ,_cegf :=_cffeg [_bgeec ];if !_cegf {continue ;};_efede :=_eaadb .inTile (_dade );if len (_efede )==0{_gegc ++;
if _gegc > _fgdd {if _gfdb {_ff .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_gegc );};return nil ,nil ;};}else {_gcbgae .putComposite (_aeeb ,_fbfa ,_efede ,_dade .PdfRectangle );for _ ,_fage :=range _efede {_bebd [_fage ]=struct{}{};
};};};};_acdc :=0;for _afbe :=0;_afbe < _gaff ;_afbe ++{_ebfdd :=_gcbgae .get (_afbe ,0);if _ebfdd ==nil ||!_ebfdd ._gbdd {_acdc ++;};};if _acdc ==0{if _gfdb {_ff .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};return nil ,nil ;
};_dbfec :=_gcbgae .reduceTiling (_efeac ,_faag );_dbfec =_dbfec .subdivide ();return _dbfec ,_bebd ;};func (_agbcfa *textTable )computeBbox ()_ee .PdfRectangle {var _aacfd _ee .PdfRectangle ;_eefde :=false ;for _bcea :=0;_bcea < _agbcfa ._eabcaa ;_bcea ++{for _aeaf :=0;
_aeaf < _agbcfa ._aage ;_aeaf ++{_dcde :=_agbcfa .get (_aeaf ,_bcea );if _dcde ==nil {continue ;};if !_eefde {_aacfd =_dcde .PdfRectangle ;_eefde =true ;}else {_aacfd =_bgcf (_aacfd ,_dcde .PdfRectangle );};};};return _aacfd ;};func (_cdda *textObject )nextLine (){_cdda .moveLP (0,-_cdda ._ecb ._bgbc )};
var _ge =false ;func (_efac *textPara )fontsize ()float64 {return _efac ._cecgd [0]._caccd };type shapesState struct{_gcgd _de .Matrix ;_gfaf _de .Matrix ;_deff []*subpath ;_ccef bool ;_ddcc _de .Point ;_agbc *textObject ;};const (_afdb =false ;_ggfb =false ;
_bfc =false ;_gagc =false ;_gcga =false ;_gcff =false ;_abbdd =false ;_fgga =false ;_dagg =false ;_dfcfb =_dagg &&true ;_cbd =_dfcfb &&false ;_ced =_dagg &&true ;_bcag =false ;_dcfc =_bcag &&false ;_fbdcg =_bcag &&true ;_dgac =false ;_acg =_dgac &&false ;
_edcb =_dgac &&false ;_gfdb =_dgac &&true ;_fbgb =_dgac &&false ;_beab =_dgac &&false ;);func (_ggad rulingList )sortStrict (){_ef .Slice (_ggad ,func (_dagd ,_bccgf int )bool {_fgafa ,_eebba :=_ggad [_dagd ],_ggad [_bccgf ];_cbdc ,_beacg :=_fgafa ._cgac ,_eebba ._cgac ;
if _cbdc !=_beacg {return _cbdc > _beacg ;};_aedb ,_gdcgd :=_fgafa ._facf ,_eebba ._facf ;if !_dcfga (_aedb -_gdcgd ){return _aedb < _gdcgd ;};_aedb ,_gdcgd =_fgafa ._fgbfa ,_eebba ._fgbfa ;if _aedb !=_gdcgd {return _aedb < _gdcgd ;};return _fgafa ._ebeb < _eebba ._ebeb ;
});};func (_eag *shapesState )clearPath (){_eag ._deff =nil ;_eag ._ccef =false ;if _gcga {_ff .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_eag );};};func (_gfde *textLine )bbox ()_ee .PdfRectangle {return _gfde .PdfRectangle };
func (_ebfde rulingList )isActualGrid ()(rulingList ,bool ){_fdffe ,_eaad :=_ebfde .augmentGrid ();if !(len (_fdffe )>=_abed +1&&len (_eaad )>=_gcad +1){if _dgac {_ff .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_fdffe ),len (_eaad ),_abed +1,_gcad +1);
};return nil ,false ;};if _dgac {_ff .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_ebfde ,len (_fdffe )>=2,len (_eaad )>=2,len (_fdffe )>=2&&len (_eaad )>=2);
for _degea ,_cfab :=range _ebfde {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_degea ,_cfab );};};if _ecge {_gffgc ,_debe :=_fdffe [0],_fdffe [len (_fdffe )-1];_cfbc ,_ebdc :=_eaad [0],_eaad [len (_eaad )-1];if !(_egcg (_gffgc ._facf -_cfbc ._fgbfa )&&_egcg (_debe ._facf -_cfbc ._ebeb )&&_egcg (_cfbc ._facf -_gffgc ._ebeb )&&_egcg (_ebdc ._facf -_gffgc ._fgbfa )){if _dgac {_ff .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_gffgc ,_debe ,_cfbc ,_ebdc );
};return nil ,false ;};}else {if !_fdffe .aligned (){if _edcb {_ff .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_fdffe ));
};return nil ,false ;};if !_eaad .aligned (){if _dgac {_ff .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_eaad ));
};return nil ,false ;};};_eaeca :=append (_fdffe ,_eaad ...);return _eaeca ,true ;};func (_cag *imageExtractContext )processOperand (_dce *_bd .ContentStreamOperation ,_dfb _bd .GraphicsState ,_fda *_ee .PdfPageResources )error {if _dce .Operand =="\u0042\u0049"&&len (_dce .Params )==1{_bfa ,_fbc :=_dce .Params [0].(*_bd .ContentStreamInlineImage );
if !_fbc {return nil ;};if _eed ,_bee :=_ca .GetBoolVal (_bfa .ImageMask );_bee {if _eed &&!_cag ._dfc .IncludeInlineStencilMasks {return nil ;};};return _cag .extractInlineImage (_bfa ,_dfb ,_fda );}else if _dce .Operand =="\u0044\u006f"&&len (_dce .Params )==1{_aeb ,_dec :=_ca .GetName (_dce .Params [0]);
if !_dec {_ff .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _bdc ;};_ ,_fe :=_fda .GetXObjectByName (*_aeb );switch _fe {case _ee .XObjectTypeImage :return _cag .extractXObjectImage (_aeb ,_dfb ,_fda );case _ee .XObjectTypeForm :return _cag .extractFormImages (_aeb ,_dfb ,_fda );
};};return nil ;};func (_ebba paraList )findGridTables (_gcfd []gridTiling )[]*textTable {if _bcag {_ff .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_ebba ));
for _dfae ,_eccdf :=range _ebba {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dfae ,_eccdf );};};var _eddf []*textTable ;for _acdb ,_cgff :=range _gcfd {_ccdb ,_ddaa :=_ebba .findTableGrid (_cgff );if _ccdb !=nil {_ccdb .log (_be .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_acdb ));
_eddf =append (_eddf ,_ccdb );_ccdb .markCells ();};for _cded :=range _ddaa {_cded ._ebad =true ;};};if _bcag {_ff .Log .Info ("\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s",len (_eddf ));
};return _eddf ;};func (_ffgbd *subpath )last ()_de .Point {return _ffgbd ._eaeg [len (_ffgbd ._eaeg )-1]};func (_aaed *wordBag )highestWord (_bggc int ,_aee ,_ddeg float64 )*textWord {for _ ,_gegg :=range _aaed ._aceg [_bggc ]{if _aee <=_gegg ._aagef &&_gegg ._aagef <=_ddeg {return _gegg ;
};};return nil ;};func (_acbbd *textTable )logComposite (_gcaef string ){if !_bcag {return ;};_ff .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_acbbd ._aage ,_acbbd ._eabcaa ,_gcaef );_be .Printf ("\u0025\u0035\u0073 \u007c","");
for _decdc :=0;_decdc < _acbbd ._aage ;_decdc ++{_be .Printf ("\u0025\u0033\u0064 \u007c",_decdc );};_be .Println ("");_be .Printf ("\u0025\u0035\u0073 \u002b","");for _cbag :=0;_cbag < _acbbd ._aage ;_cbag ++{_be .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d");
};_be .Println ("");for _ebcc :=0;_ebcc < _acbbd ._eabcaa ;_ebcc ++{_be .Printf ("\u0025\u0035\u0064 \u007c",_ebcc );for _fafec :=0;_fafec < _acbbd ._aage ;_fafec ++{_cace ,_ :=_acbbd ._ebgbb [_cgccd (_fafec ,_ebcc )].parasBBox ();_be .Printf ("\u0025\u0033\u0064 \u007c",len (_cace ));
};_be .Println ("");};_ff .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_acbbd ._aage ,_acbbd ._eabcaa ,_gcaef );_be .Printf ("\u0025\u0035\u0073 \u007c","");for _bbcc :=0;_bbcc < _acbbd ._aage ;
_bbcc ++{_be .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_bbcc );};_be .Println ("");_be .Printf ("\u0025\u0035\u0073 \u002b","");for _agcfc :=0;_agcfc < _acbbd ._aage ;_agcfc ++{_be .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");
};_be .Println ("");for _ccfd :=0;_ccfd < _acbbd ._eabcaa ;_ccfd ++{_be .Printf ("\u0025\u0035\u0064 \u007c",_ccfd );for _eefgc :=0;_eefgc < _acbbd ._aage ;_eefgc ++{_edaf ,_ :=_acbbd ._ebgbb [_cgccd (_eefgc ,_ccfd )].parasBBox ();_ggag :="";_gebgc :=_edaf .merge ();
if _gebgc !=nil {_ggag =_gebgc .text ();};_ggag =_be .Sprintf ("\u0025\u0071",_ggece (_ggag ,12));_ggag =_ggag [1:len (_ggag )-1];_be .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_ggag );};_be .Println ("");};};func _beda (_agfa map[int ][]float64 )string {_gfca :=_aegg (_agfa );
_bgdg :=make ([]string ,len (_agfa ));for _fegb ,_gcbga :=range _gfca {_bgdg [_fegb ]=_be .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_gcbga ,_agfa [_gcbga ]);};return _be .Sprintf ("\u007b\u0025\u0073\u007d",_df .Join (_bgdg ,"\u002c\u0020"));
};func _fgge (_adbb *_bd .ContentStreamOperation )(float64 ,error ){if len (_adbb .Params )!=1{_agd :=_c .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");_ff .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_adbb .Operand ,1,len (_adbb .Params ),_adbb .Params );
return 0.0,_agd ;};return _ca .GetNumberAsFloat (_adbb .Params [0]);};func (_bdda *subpath )close (){if !_beafa (_bdda ._eaeg [0],_bdda .last ()){_bdda .add (_bdda ._eaeg [0]);};_bdda ._feeg =true ;_bdda .removeDuplicates ();};func _ebdb (_gebf []*textMark ,_bcaf _ee .PdfRectangle ,_efbf rulingList ,_cage []gridTiling )paraList {_ff .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_gebf ),_bcaf );
if len (_gebf )==0{return nil ;};_dgaa :=_abcdc (_gebf ,_bcaf );if len (_dgaa )==0{return nil ;};_efbf .log ("\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065");_daf ,_dfff :=_efbf .vertsHorzs ();_bbcb :=_gced (_dgaa ,_bcaf .Ury ,_daf ,_dfff );
_efcd :=_ggfcf (_bbcb ,_bcaf .Ury ,_daf ,_dfff );_efcd =_efb (_efcd );_eddg :=make (paraList ,0,len (_efcd ));for _ ,_bfeg :=range _efcd {_aagcg :=_bfeg .arrangeText ();if _aagcg !=nil {_eddg =append (_eddg ,_aagcg );};};if len (_eddg )>=_gfec {_eddg =_eddg .extractTables (_cage );
};_eddg .sortReadingOrder ();_eddg .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _eddg ;};const (RenderModeStroke RenderMode =1<<iota ;RenderModeFill ;RenderModeClip ;
);func (_cdbd *stateStack )pop ()*textState {if _cdbd .empty (){return nil ;};_eff :=*(*_cdbd )[len (*_cdbd )-1];*_cdbd =(*_cdbd )[:len (*_cdbd )-1];return &_eff ;};func (_cgdcf *textTable )growTable (){_dbdb :=func (_ffff paraList ){_cgdcf ._eabcaa ++;
for _defbb :=0;_defbb < _cgdcf ._aage ;_defbb ++{_geba :=_ffff [_defbb ];_cgdcf .put (_defbb ,_cgdcf ._eabcaa -1,_geba );};};_fafbg :=func (_bedf paraList ){_cgdcf ._aage ++;for _cefe :=0;_cefe < _cgdcf ._eabcaa ;_cefe ++{_caaaf :=_bedf [_cefe ];_cgdcf .put (_cgdcf ._aage -1,_cefe ,_caaaf );
};};if _dcfc {_cgdcf .log ("\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce");};for _ffdbd :=0;;_ffdbd ++{_gfdbb :=false ;_feeba :=_cgdcf .getDown ();_dfdge :=_cgdcf .getRight ();if _dcfc {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ffdbd ,_cgdcf );
_be .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a",_feeba );_be .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a",_dfdge );};if _feeba !=nil &&_dfdge !=nil {_dcdd :=_feeba [len (_feeba )-1];
if !_dcdd .taken ()&&_dcdd ==_dfdge [len (_dfdge )-1]{_dbdb (_feeba );if _dfdge =_cgdcf .getRight ();_dfdge !=nil {_fafbg (_dfdge );_cgdcf .put (_cgdcf ._aage -1,_cgdcf ._eabcaa -1,_dcdd );};_gfdbb =true ;};};if !_gfdbb &&_feeba !=nil {_dbdb (_feeba );
_gfdbb =true ;};if !_gfdbb &&_dfdge !=nil {_fafbg (_dfdge );_gfdbb =true ;};if !_gfdbb {break ;};};};func (_aeee gridTile )complete ()bool {return _aeee .numBorders ()==4};
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
func (_cb *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_bgf :=&imageExtractContext {_dfc :options };_cfb :=_bgf .extractContentStreamImages (_cb ._gc ,_cb ._ea );if _cfb !=nil {return nil ,_cfb ;};return &PageImages {Images :_bgf ._eee },nil ;
};func _cgbed (_ffbce []*textWord ,_egcf int )[]*textWord {_afbf :=len (_ffbce );copy (_ffbce [_egcf :],_ffbce [_egcf +1:]);return _ffbce [:_afbf -1];};func _fcfe (_gag ,_ggeg bounded )float64 {return _gag .bbox ().Llx -_ggeg .bbox ().Urx };func (_agcgc paraList )findTables (_bgcgg []gridTiling )[]*textTable {_agcgc .addNeighbours ();
_ef .Slice (_agcgc ,func (_ddae ,_ddge int )bool {return _aef (_agcgc [_ddae ],_agcgc [_ddge ])< 0});var _cgfc []*textTable ;if _bcba {_dfdgb :=_agcgc .findGridTables (_bgcgg );_cgfc =append (_cgfc ,_dfdgb ...);};if _deee {_ggcf :=_agcgc .findTextTables ();
_cgfc =append (_cgfc ,_ggcf ...);};return _cgfc ;};func (_dfba *subpath )clear (){*_dfba =subpath {}};func _gebfb (_dcfaf ,_fgdb _de .Point )rulingKind {_dcda :=_f .Abs (_dcfaf .X -_fgdb .X );_adcff :=_f .Abs (_dcfaf .Y -_fgdb .Y );return _fbfd (_dcda ,_adcff ,_acaa );
};var _beacge =_cd .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024");type textWord struct{_ee .PdfRectangle ;_aagef float64 ;_bfdfd string ;
_gceff []*textMark ;_eedb float64 ;_adacg bool ;};func (_fafe *ruling )gridIntersecting (_eegcf *ruling )bool {return _cfff (_fafe ._fgbfa ,_eegcf ._fgbfa )&&_cfff (_fafe ._ebeb ,_eegcf ._ebeb );};func _ggfcf (_caca *wordBag ,_acac float64 ,_gcac ,_bec rulingList )[]*wordBag {var _bbfc []*wordBag ;
for _ ,_cacfa :=range _caca .depthIndexes (){_cbgfd :=false ;for !_caca .empty (_cacfa ){_baag :=_caca .firstReadingIndex (_cacfa );_gead :=_caca .firstWord (_baag );_dbgdd :=_cgdg (_gead ,_acac ,_gcac ,_bec );_caca .removeWord (_gead ,_baag );if _abbdd {_ff .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_gead .String ());
};for _ccbc :=true ;_ccbc ;_ccbc =_cbgfd {_cbgfd =false ;_eege :=_effd *_dbgdd ._adbbf ;_dcdg :=_bgbf *_dbgdd ._adbbf ;_dfeb :=_dddf *_dbgdd ._adbbf ;if _abbdd {_ff .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_dbgdd .minDepth (),_dbgdd .maxDepth (),_dfeb ,_dcdg );
};if _caca .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_dbgdd ,_dadcg (_efcg ,0),_dbgdd .minDepth ()-_dfeb ,_dbgdd .maxDepth ()+_dfeb ,_gfcf ,false ,false )> 0{_cbgfd =true ;};if _caca .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_dbgdd ,_dadcg (_efcg ,_dcdg ),_dbgdd .minDepth (),_dbgdd .maxDepth (),_eecb ,false ,false )> 0{_cbgfd =true ;
};if _cbgfd {continue ;};_dcbc :=_caca .scanBand ("",_dbgdd ,_dadcg (_eabca ,_eege ),_dbgdd .minDepth (),_dbgdd .maxDepth (),_debad ,true ,false );if _dcbc > 0{_caaab :=(_dbgdd .maxDepth ()-_dbgdd .minDepth ())/_dbgdd ._adbbf ;if (_dcbc > 1&&float64 (_dcbc )> 0.3*_caaab )||_dcbc <=10{if _caca .scanBand ("\u006f\u0074\u0068e\u0072",_dbgdd ,_dadcg (_eabca ,_eege ),_dbgdd .minDepth (),_dbgdd .maxDepth (),_debad ,false ,true )> 0{_cbgfd =true ;
};};};};_bbfc =append (_bbfc ,_dbgdd );};};return _bbfc ;};
// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents (contents string ,resources *_ee .PdfPageResources )(*Extractor ,error ){const _ded ="\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s";_bad :=&Extractor {_gc :contents ,_ea :resources ,_bg :map[string ]fontEntry {},_bb :map[string ]textResult {}};
_dg .TrackUse (_ded );return _bad ,nil ;};func _bebb (_dbad map[float64 ]map[float64 ]gridTile )[]float64 {_dfcfa :=make ([]float64 ,0,len (_dbad ));for _efgg :=range _dbad {_dfcfa =append (_dfcfa ,_efgg );};_ef .Float64s (_dfcfa );_ecafg :=len (_dfcfa );
for _eced :=0;_eced < _ecafg /2;_eced ++{_dfcfa [_eced ],_dfcfa [_ecafg -1-_eced ]=_dfcfa [_ecafg -1-_eced ],_dfcfa [_eced ];};return _dfcfa ;};func (_eb *imageExtractContext )extractContentStreamImages (_fc string ,_ede *_ee .PdfPageResources )error {_dcc :=_bd .NewContentStreamParser (_fc );
_dag ,_add :=_dcc .Parse ();if _add !=nil {return _add ;};if _eb ._aca ==nil {_eb ._aca =map[*_ca .PdfObjectStream ]*cachedImage {};};if _eb ._dfc ==nil {_eb ._dfc =&ImageExtractOptions {};};_eeg :=_bd .NewContentStreamProcessor (*_dag );_eeg .AddHandler (_bd .HandlerConditionEnumAllOperands ,"",_eb .processOperand );
return _eeg .Process (_ede );};func _bcbac (_afgfg []pathSection ){if _efdg < 0.0{return ;};if _dgac {_ff .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_afgfg ));
};for _debf ,_abgfd :=range _afgfg {for _fbgeb ,_fdgd :=range _abgfd ._fbdc {for _fada ,_gbdfa :=range _fdgd ._eaeg {_fdgd ._eaeg [_fada ]=_de .Point {X :_ffgcc (_gbdfa .X ),Y :_ffgcc (_gbdfa .Y )};if _dgac {_gfed :=_fdgd ._eaeg [_fada ];if !_beafa (_gbdfa ,_gfed ){_begcf :=_de .Point {X :_gfed .X -_gbdfa .X ,Y :_gfed .Y -_gbdfa .Y };
_be .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_debf ,_fbgeb ,_fada ,_gbdfa ,_gfed ,_begcf );};};};};};};func _bagc (_gagd map[float64 ]map[float64 ]gridTile )[]float64 {_cecef :=make ([]float64 ,0,len (_gagd ));
_gfae :=make (map[float64 ]struct{},len (_gagd ));for _ ,_bccc :=range _gagd {for _cade :=range _bccc {if _ ,_cecd :=_gfae [_cade ];_cecd {continue ;};_cecef =append (_cecef ,_cade );_gfae [_cade ]=struct{}{};};};_ef .Float64s (_cecef );return _cecef ;
};type wordBag struct{_ee .PdfRectangle ;_adbbf float64 ;_degg ,_aegf rulingList ;_fec float64 ;_aceg map[int ][]*textWord ;};func (_cdadb *textTable )isExportable ()bool {if _cdadb ._efea {return true ;};_afbc :=func (_eggd int )bool {_bdce :=_cdadb .get (0,_eggd );
if _bdce ==nil {return false ;};_eecf :=_bdce .text ();_effa :=_e .RuneCountInString (_eecf );_bead :=_beacge .MatchString (_eecf );return _effa <=1||_bead ;};for _deae :=0;_deae < _cdadb ._eabcaa ;_deae ++{if !_afbc (_deae ){return true ;};};return false ;
};func (_bcbe rulingList )connections (_bega map[int ]intSet ,_fcgca int )intSet {_gefa :=make (intSet );_ffce :=make (intSet );var _cdfb func (int );_cdfb =func (_dfbcg int ){if !_ffce .has (_dfbcg ){_ffce .add (_dfbcg );for _cdec :=range _bcbe {if _bega [_cdec ].has (_dfbcg ){_gefa .add (_cdec );
};};for _bgcbc :=range _bcbe {if _gefa .has (_bgcbc ){_cdfb (_bgcbc );};};};};_cdfb (_fcgca );return _gefa ;};func (_cebg *textTable )put (_aeec ,_fcdbf int ,_ddea *textPara ){_cebg ._dbfba [_cgccd (_aeec ,_fcdbf )]=_ddea ;};var (_bdc =_c .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072");
_fg =_c .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072"););func (_gabag *textTable )emptyCompositeRow (_dffcc int )bool {for _afeb :=0;_afeb < _gabag ._aage ;_afeb ++{if _geeaa ,_caad :=_gabag ._ebgbb [_cgccd (_afeb ,_dffcc )];
_caad {if len (_geeaa .paraList )> 0{return false ;};};};return true ;};func (_aced *textLine )markWordBoundaries (){_aggg :=_edeb *_aced ._caccd ;for _fdaccc ,_gcgb :=range _aced ._eaab [1:]{if _fcfe (_gcgb ,_aced ._eaab [_fdaccc ])>=_aggg {_gcgb ._adacg =true ;
};};};
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct{_beaa []TextMark };
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
type PageImages struct{Images []ImageMark ;};func _bdaae (_bbac []*textMark ,_febcf _ee .PdfRectangle )*textWord {_dceff :=_bbac [0].PdfRectangle ;_gadea :=_bbac [0]._ceba ;for _ ,_bebg :=range _bbac [1:]{_dceff =_bgcf (_dceff ,_bebg .PdfRectangle );if _bebg ._ceba > _gadea {_gadea =_bebg ._ceba ;
};};return &textWord {PdfRectangle :_dceff ,_gceff :_bbac ,_aagef :_febcf .Ury -_dceff .Lly ,_eedb :_gadea };};func (_ggbg paraList )yNeighbours (_bdab float64 )map[*textPara ][]int {_cbeg :=make ([]event ,2*len (_ggbg ));if _bdab ==0{for _ffab ,_gdgc :=range _ggbg {_cbeg [2*_ffab ]=event {_gdgc .Lly ,true ,_ffab };
_cbeg [2*_ffab +1]=event {_gdgc .Ury ,false ,_ffab };};}else {for _ffec ,_agadg :=range _ggbg {_cbeg [2*_ffec ]=event {_agadg .Lly -_bdab *_agadg .fontsize (),true ,_ffec };_cbeg [2*_ffec +1]=event {_agadg .Ury +_bdab *_agadg .fontsize (),false ,_ffec };
};};return _ggbg .eventNeighbours (_cbeg );};const (_ab ="\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";
_eac ="\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064";
_ad ="\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";);
func _fgdg (_agcg float64 ,_dfbcf int )int {if _dfbcf ==0{_dfbcf =1;};_gfded :=float64 (_dfbcf );return int (_f .Round (_agcg /_gfded )*_gfded );};func _bgbcf (_cagea []pathSection )rulingList {_bcbac (_cagea );if _dgac {_ff .Log .Info ("\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs",len (_cagea ));
};var _abde rulingList ;for _ ,_gfecb :=range _cagea {for _ ,_ecbe :=range _gfecb ._fbdc {if !_ecbe .isQuadrilateral (){if _dgac {_ff .Log .Error ("!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073",_ecbe );
};continue ;};if _bdfg ,_dedad :=_ecbe .makeRectRuling (_gfecb .Color );_dedad {_abde =append (_abde ,_bdfg );}else {if _fbgb {_ff .Log .Error ("\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073",_ecbe );
};};};};if _dgac {_ff .Log .Info ("\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073",_abde .String ());};return _abde ;};func (_fbed *textTable )getDown ()paraList {_abce :=make (paraList ,_fbed ._aage );
for _fgfgb :=0;_fgfgb < _fbed ._aage ;_fgfgb ++{_dbff :=_fbed .get (_fgfgb ,_fbed ._eabcaa -1)._fdec ;if _dbff .taken (){return nil ;};_abce [_fgfgb ]=_dbff ;};for _efgba :=0;_efgba < _fbed ._aage -1;_efgba ++{if _abce [_efgba ]._cacae !=_abce [_efgba +1]{return nil ;
};};return _abce ;};func (_ddca *textObject )setTextMatrix (_fgf []float64 ){if len (_fgf )!=6{_ff .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_fgf ));
return ;};_fdga ,_bff ,_cdg ,_gga ,_fabd ,_fabe :=_fgf [0],_fgf [1],_fgf [2],_fgf [3],_fgf [4],_fgf [5];_ddca ._geff =_de .NewMatrix (_fdga ,_bff ,_cdg ,_gga ,_fabd ,_fabe );_ddca ._abg =_ddca ._geff ;};func (_bbcga *textTable )log (_cegbe string ){if !_bcag {return ;
};_ff .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_cegbe ,_bbcga ._aage ,_bbcga ._eabcaa ,_bbcga ._efea ,_bbcga .PdfRectangle );
for _egag :=0;_egag < _bbcga ._eabcaa ;_egag ++{for _deedd :=0;_deedd < _bbcga ._aage ;_deedd ++{_eebga :=_bbcga .get (_deedd ,_egag );if _eebga ==nil {continue ;};_be .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_deedd ,_egag ,_eebga .PdfRectangle ,_ggece (_eebga .text (),50),_e .RuneCountInString (_eebga .text ()));
};};};func (_deceb *wordBag )depthRange (_aabd ,_aeea int )[]int {var _efdbg []int ;for _bfgc :=range _deceb ._aceg {if _aabd <=_bfgc &&_bfgc <=_aeea {_efdbg =append (_efdbg ,_bfgc );};};if len (_efdbg )==0{return nil ;};_ef .Ints (_efdbg );return _efdbg ;
};func (_eccgc *wordBag )firstWord (_aag int )*textWord {return _eccgc ._aceg [_aag ][0]};func (_ebegc *ruling )equals (_baefcf *ruling )bool {return _ebegc ._cgac ==_baefcf ._cgac &&_cfff (_ebegc ._facf ,_baefcf ._facf )&&_cfff (_ebegc ._fgbfa ,_baefcf ._fgbfa )&&_cfff (_ebegc ._ebeb ,_baefcf ._ebeb );
};func (_eegc *textObject )moveTextSetLeading (_gdbc ,_dgca float64 ){_eegc ._ecb ._bgbc =-_dgca ;_eegc .moveLP (_gdbc ,_dgca );};func (_cebab paraList )tables ()[]TextTable {var _cceg []TextTable ;if _bcag {_ff .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a");
};for _ ,_ggeb :=range _cebab {_eaecg :=_ggeb ._cegd ;if _eaecg !=nil &&_eaecg .isExportable (){_cceg =append (_cceg ,_eaecg .toTextTable ());};};return _cceg ;};func (_fcga rulingList )primaries ()[]float64 {_cbec :=make (map[float64 ]struct{},len (_fcga ));
for _ ,_eadc :=range _fcga {_cbec [_eadc ._facf ]=struct{}{};};_edebe :=make ([]float64 ,len (_cbec ));_dgbd :=0;for _faef :=range _cbec {_edebe [_dgbd ]=_faef ;_dgbd ++;};_ef .Float64s (_edebe );return _edebe ;};func (_efdd gridTiling )log (_ddddb string ){if !_gfdb {return ;
};_ff .Log .Info ("\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071",len (_efdd ._dgcdc ),len (_efdd ._dgbfg ),_ddddb );_be .Printf ("\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a",_efdd ._dgcdc );
_be .Printf ("\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a",_efdd ._dgbfg );for _ebage ,_cbfc :=range _efdd ._dgbfg {_deec ,_afdc :=_efdd ._dcaac [_cbfc ];if !_afdc {continue ;};_be .Printf ("%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_ebage ,_cbfc );
for _gadaf ,_eeed :=range _efdd ._dgcdc {_ccfe ,_gbged :=_deec [_eeed ];if !_gbged {continue ;};_be .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_gadaf ,_ccfe .String ());};};};func (_dbadd *textWord )addDiacritic (_gfbc string ){_afgbe :=_dbadd ._gceff [len (_dbadd ._gceff )-1];
_afgbe ._fdbb +=_gfbc ;_afgbe ._fdbb =_gf .NFKC .String (_afgbe ._fdbb );};func (_gcbg *textLine )pullWord (_eded *wordBag ,_agbf *textWord ,_eagf int ){_gcbg .appendWord (_agbf );_eded .removeWord (_agbf ,_eagf );};func (_cfdgf *textTable )reduce ()*textTable {_cdcdg :=make ([]int ,0,_cfdgf ._eabcaa );
_bgbg :=make ([]int ,0,_cfdgf ._aage );for _afcg :=0;_afcg < _cfdgf ._eabcaa ;_afcg ++{if !_cfdgf .emptyCompositeRow (_afcg ){_cdcdg =append (_cdcdg ,_afcg );};};for _bfdf :=0;_bfdf < _cfdgf ._aage ;_bfdf ++{if !_cfdgf .emptyCompositeColumn (_bfdf ){_bgbg =append (_bgbg ,_bfdf );
};};if len (_cdcdg )==_cfdgf ._eabcaa &&len (_bgbg )==_cfdgf ._aage {return _cfdgf ;};_gbda :=textTable {_efea :_cfdgf ._efea ,_aage :len (_bgbg ),_eabcaa :len (_cdcdg ),_dbfba :make (map[uint64 ]*textPara ,len (_bgbg )*len (_cdcdg ))};if _bcag {_ff .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_cfdgf ._aage ,_cfdgf ._eabcaa ,len (_bgbg ),len (_cdcdg ));
_ff .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_bgbg );_ff .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_cdcdg );};for _cbda ,_eaef :=range _cdcdg {for _dbcgd ,_aedce :=range _bgbg {_ggaa ,_gfefdf :=_cfdgf .getComposite (_aedce ,_eaef );
if _ggaa ==nil {continue ;};if _bcag {_be .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_dbcgd ,_cbda ,_aedce ,_eaef ,_ggece (_ggaa .merge ().text (),50));};_gbda .putComposite (_dbcgd ,_cbda ,_ggaa ,_gfefdf );
};};return &_gbda ;};func (_bfgcb *ruling )alignsPrimary (_dfbfa *ruling )bool {return _bfgcb ._cgac ==_dfbfa ._cgac &&_f .Abs (_bfgcb ._facf -_dfbfa ._facf )< _fbfc *0.5;};func _effec (_cbdgb *PageText )error {_adaca :=_dg .GetLicenseKey ();if _adaca !=nil &&_adaca .IsLicensed ()||_ge {return nil ;
};_be .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");_be .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");
return _c .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};func _aegg (_ddaee map[int ][]float64 )[]int {_cdgcc :=make ([]int ,len (_ddaee ));
_fedf :=0;for _gebe :=range _ddaee {_cdgcc [_fedf ]=_gebe ;_fedf ++;};_ef .Ints (_cdgcc );return _cdgcc ;};func _bfbe (_bgbd ,_ffdcd _de .Point )rulingKind {_aabdc :=_f .Abs (_bgbd .X -_ffdcd .X );_cbbdd :=_f .Abs (_bgbd .Y -_ffdcd .Y );return _fbfd (_aabdc ,_cbbdd ,_acad );
};func (_gadcf *textPara )taken ()bool {return _gadcf ==nil ||_gadcf ._ebad };func (_aefcg gridTile )numBorders ()int {_bbdg :=0;if _aefcg ._gdge {_bbdg ++;};if _aefcg ._geaa {_bbdg ++;};if _aefcg ._gaaf {_bbdg ++;};if _aefcg ._efab {_bbdg ++;};return _bbdg ;
};func (_cgcd paraList )llyOrdering ()[]int {_fbgf :=make ([]int ,len (_cgcd ));for _fedc :=range _cgcd {_fbgf [_fedc ]=_fedc ;};_ef .SliceStable (_fbgf ,func (_cfee ,_eefd int )bool {_cabe ,_ebfb :=_fbgf [_cfee ],_fbgf [_eefd ];return _cgcd [_cabe ].Lly < _cgcd [_ebfb ].Lly ;
});return _fbgf ;};
// String returns a description of `tm`.
func (_fba *textMark )String ()string {return _be .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_fba .PdfRectangle ,_fba ._ceba ,_fba ._fdbb );};func (_dddgb paraList )lines ()[]*textLine {var _deac []*textLine ;
for _ ,_eeeee :=range _dddgb {_deac =append (_deac ,_eeeee ._cecgd ...);};return _deac ;};func _egcg (_gaab float64 )bool {return _f .Abs (_gaab )< _fbfc };func _bgcf (_fgcb ,_ccgc _ee .PdfRectangle )_ee .PdfRectangle {return _ee .PdfRectangle {Llx :_f .Min (_fgcb .Llx ,_ccgc .Llx ),Lly :_f .Min (_fgcb .Lly ,_ccgc .Lly ),Urx :_f .Max (_fgcb .Urx ,_ccgc .Urx ),Ury :_f .Max (_fgcb .Ury ,_ccgc .Ury )};
};func (_ccea *textPara )toCellTextMarks (_ddee *int )[]TextMark {var _dffa []TextMark ;for _ebada ,_cbe :=range _ccea ._cecgd {_agbb :=_cbe .toTextMarks (_ddee );_dfab :=_bcfgf &&_cbe .endsInHyphen ()&&_ebada !=len (_ccea ._cecgd )-1;if _dfab {_agbb =_dfefd (_agbb ,_ddee );
};_dffa =append (_dffa ,_agbb ...);if !(_dfab ||_ebada ==len (_ccea ._cecgd )-1){_dffa =_gcag (_dffa ,_ddee ,_bccdg (_cbe ._decg ,_ccea ._cecgd [_ebada +1]._decg ));};};return _dffa ;};type textLine struct{_ee .PdfRectangle ;_decg float64 ;_eaab []*textWord ;
_caccd float64 ;};func (_feaa *textPara )writeText (_dgaf _d .Writer ){if _feaa ._cegd ==nil {_feaa .writeCellText (_dgaf );return ;};for _abdc :=0;_abdc < _feaa ._cegd ._eabcaa ;_abdc ++{for _ebeg :=0;_ebeg < _feaa ._cegd ._aage ;_ebeg ++{_befea :=_feaa ._cegd .get (_ebeg ,_abdc );
if _befea ==nil {_dgaf .Write ([]byte ("\u0009"));}else {_befea .writeCellText (_dgaf );};_dgaf .Write ([]byte ("\u0020"));};if _abdc < _feaa ._cegd ._eabcaa -1{_dgaf .Write ([]byte ("\u000a"));};};};type textTable struct{_ee .PdfRectangle ;_aage ,_eabcaa int ;
_efea bool ;_dbfba map[uint64 ]*textPara ;_ebgbb map[uint64 ]compositeCell ;};
// PageFonts represents extracted fonts on a PDF page.
type PageFonts struct{Fonts []Font ;};const _bda =10;func (_aga *textObject )renderText (_fgad []byte )error {if _aga ._ecc {_ff .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");
return nil ;};_feec :=_aga .getCurrentFont ();_fgeb :=_feec .BytesToCharcodes (_fgad );_fcd ,_aecb ,_gece :=_feec .CharcodesToStrings (_fgeb );if _gece > 0{_ff .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_aecb ,_gece );
};_aga ._ecb ._acc +=_aecb ;_aga ._ecb ._deeb +=_gece ;_bbd :=_aga ._ecb ;_ffe :=_bbd ._ggda ;_fbd :=_bbd ._cfa /100.0;_cead :=_feef ;if _feec .Subtype ()=="\u0054\u0079\u0070e\u0033"{_cead =1;};_aed ,_eaf :=_feec .GetRuneMetrics (' ');if !_eaf {_aed ,_eaf =_feec .GetCharMetrics (32);
};if !_eaf {_aed ,_ =_ee .DefaultFont ().GetRuneMetrics (' ');};_dbcd :=_aed .Wx *_cead ;_ff .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_dbcd ,_fcd ,_feec ,_ffe );
_bfda :=_de .NewMatrix (_ffe *_fbd ,0,0,_ffe ,0,_bbd ._fefb );if _gcff {_ff .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_fgeb ),_fgeb ,_fcd );
};_ff .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_fgeb ),_fgeb ,len (_fcd ));_feg :=_aga .getFillColor ();
_aba :=_aga .getStrokeColor ();for _fabg ,_fgebc :=range _fcd {_fggd :=[]rune (_fgebc );if len (_fggd )==1&&_fggd [0]=='\x00'{continue ;};_fcff :=_fgeb [_fabg ];_eeee :=_aga ._bcgf .CTM .Mult (_aga ._geff ).Mult (_bfda );_egg :=0.0;if len (_fggd )==1&&_fggd [0]==32{_egg =_bbd ._bbfa ;
};_bddf ,_deed :=_feec .GetCharMetrics (_fcff );if !_deed {_ff .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_fcff ,_fggd ,_fggd ,_feec );
return _be .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_feec .String (),_fcff );};_gecf :=_de .Point {X :_bddf .Wx *_cead ,Y :_bddf .Wy *_cead };
_bdg :=_de .Point {X :(_gecf .X *_ffe +_egg )*_fbd };_ecbb :=_de .Point {X :(_gecf .X *_ffe +_bbd ._bace +_egg )*_fbd };if _gcff {_ff .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_ffe ,_bbd ._bace ,_bbd ._bbfa ,_fbd );
_ff .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_gecf ,_bdg ,_ecbb );};_dad :=_ebb (_bdg );_efg :=_ebb (_ecbb );_dga :=_aga ._bcgf .CTM .Mult (_aga ._geff ).Mult (_dad );
if _gagc {_ff .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_aga ._bcgf .CTM ,_aga ._geff ,_efg ,_bcd (_aga ._bcgf .CTM .Mult (_aga ._geff ).Mult (_efg )),_dad ,_dga ,_bcd (_dga ));
};_gdcb ,_bfaf :=_aga .newTextMark (_a .ExpandLigatures (_fggd ),_eeee ,_bcd (_dga ),_f .Abs (_dbcd *_eeee .ScalingFactorX ()),_feec ,_aga ._ecb ._bace ,_feg ,_aba );if !_bfaf {_ff .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067");
continue ;};if _feec ==nil {_ff .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _feec .Encoder ()==nil {_ff .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_feec );
}else {if _deg ,_dfcf :=_feec .Encoder ().CharcodeToRune (_fcff );_dfcf {_gdcb ._aad =string (_deg );};};_ff .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_fabg ,_fcff ,_gdcb ,_eeee );
_aga ._fbf =append (_aga ._fbf ,&_gdcb );_aga ._geff .Concat (_efg );};return nil ;};func _edgg (_dgeb []compositeCell )[]float64 {var _acab []*textLine ;_cgfg :=0;for _ ,_ggfg :=range _dgeb {_cgfg +=len (_ggfg .paraList );_acab =append (_acab ,_ggfg .lines ()...);
};_ef .Slice (_acab ,func (_edae ,_fdgge int )bool {_accb ,_bffg :=_acab [_edae ],_acab [_fdgge ];_ebae ,_eeaa :=_accb ._decg ,_bffg ._decg ;if !_dcfga (_ebae -_eeaa ){return _ebae < _eeaa ;};return _accb .Llx < _bffg .Llx ;});if _bcag {_be .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_cgfg ,len (_acab ));
for _cdaed ,_gbgde :=range _acab {_be .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_cdaed ,_gbgde );};};var _gcdfa []float64 ;_addb :=_acab [0];var _bccdd [][]*textLine ;_dbee :=[]*textLine {_addb };for _egec ,_gffc :=range _acab [1:]{if _gffc .Ury < _addb .Lly {_ebfg :=0.5*(_gffc .Ury +_addb .Lly );
if _bcag {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_egec ,_gffc .Ury ,_addb .Lly ,_ebfg ,_addb ,_gffc );
};_gcdfa =append (_gcdfa ,_ebfg );_bccdd =append (_bccdd ,_dbee );_dbee =nil ;};_dbee =append (_dbee ,_gffc );if _gffc .Lly < _addb .Lly {_addb =_gffc ;};};if len (_dbee )> 0{_bccdd =append (_bccdd ,_dbee );};if _bcag {_be .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_gcdfa );
};if _bcag {_ff .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len (_dgeb ));for _decc ,_adce :=range _dgeb {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_decc ,_adce );};_ff .Log .Info ("\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d",len (_bccdd ));
for _gbfd ,_afca :=range _bccdd {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a",_gbfd ,len (_afca ));for _efceg ,_fgbdg :=range _afca {_be .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_efceg ,_fgbdg );};};};_efdcb :=true ;
for _bgdcg ,_ddbb :=range _bccdd {_fegag :=true ;for _fecce ,_eeeecb :=range _dgeb {if _bcag {_be .Printf ("\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a",_bgdcg ,len (_bccdd ),_fecce ,len (_dgeb ),_eeeecb );
};if !_eeeecb .hasLines (_ddbb ){if _bcag {_be .Printf ("\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a",_bgdcg ,len (_bccdd ),_fecce ,len (_dgeb ));
};_fegag =false ;break ;};};if !_fegag {_efdcb =false ;break ;};};if !_efdcb {if _bcag {_ff .Log .Info ("\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg");
};_gcdfa =nil ;};if _bcag &&_gcdfa !=nil {_be .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a",_gcdfa );};return _gcdfa ;
};func (_ebe *textObject )setCharSpacing (_gfg float64 ){if _ebe ==nil {return ;};_ebe ._ecb ._bace =_gfg ;if _gcff {_ff .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_gfg ,_ebe ._ecb .String ());
};};func _aacc (_bgdbd map[int ]intSet )[]int {_egef :=make ([]int ,0,len (_bgdbd ));for _gcefb :=range _bgdbd {_egef =append (_egef ,_gcefb );};_ef .Ints (_egef );return _egef ;};func _efae (_abge ,_cddg *textPara )bool {return _dege (_abge ._dbfe ,_cddg ._dbfe )};
func (_eaed *textObject )getFontDict (_dfef string )(_ece _ca .PdfObject ,_fded error ){_gaa :=_eaed ._bca ;if _gaa ==nil {_ff .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_dfef );
return nil ,nil ;};_ece ,_ccf :=_gaa .GetFontByName (_ca .PdfObjectName (_dfef ));if !_ccf {_ff .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_dfef );
return nil ,_c .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _ece ,nil ;};func (_agaa *textLine )toTextMarks (_abcf *int )[]TextMark {var _baceg []TextMark ;for _ ,_eada :=range _agaa ._eaab {if _eada ._adacg {_baceg =_gcag (_baceg ,_abcf ,"\u0020");
};_fdcb :=_eada .toTextMarks (_abcf );_baceg =append (_baceg ,_fdcb ...);};return _baceg ;};var _bbfbab =map[markKind ]string {_edeaf :"\u0073\u0074\u0072\u006f\u006b\u0065",_cgbe :"\u0066\u0069\u006c\u006c",_ggebb :"\u0061u\u0067\u006d\u0065\u006e\u0074"};
func (_bdbe *textTable )markCells (){for _debd :=0;_debd < _bdbe ._eabcaa ;_debd ++{for _addc :=0;_addc < _bdbe ._aage ;_addc ++{_afdg :=_bdbe .get (_addc ,_debd );if _afdg !=nil {_afdg ._ebad =true ;};};};};func (_aaef *wordBag )maxDepth ()float64 {return _aaef ._fec -_aaef .Lly };
func (_ddcfg compositeCell )parasBBox ()(paraList ,_ee .PdfRectangle ){return _ddcfg .paraList ,_ddcfg .PdfRectangle ;};func _ggef (_abcfc ,_ecad _de .Point )bool {_abgb :=_f .Abs (_abcfc .X -_ecad .X );_cgeg :=_f .Abs (_abcfc .Y -_ecad .Y );return _abac (_abgb ,_cgeg );
};func (_cgbdg *textTable )emptyCompositeColumn (_fcbfe int )bool {for _adfcd :=0;_adfcd < _cgbdg ._eabcaa ;_adfcd ++{if _gaga ,_egdbe :=_cgbdg ._ebgbb [_cgccd (_fcbfe ,_adfcd )];_egdbe {if len (_gaga .paraList )> 0{return false ;};};};return true ;};type textPara struct{_ee .PdfRectangle ;
_dbfe _ee .PdfRectangle ;_cecgd []*textLine ;_cegd *textTable ;_ebad bool ;_gbdd bool ;_bbff *textPara ;_cacae *textPara ;_decgf *textPara ;_fdec *textPara ;};func _gbee (_gaeba _ee .PdfRectangle )*ruling {return &ruling {_cgac :_fafbf ,_facf :_gaeba .Urx ,_fgbfa :_gaeba .Lly ,_ebeb :_gaeba .Ury };
};func _ffgcc (_decda float64 )float64 {return _efdg *_f .Round (_decda /_efdg )};func _egcd (_edefg []rulingList )(rulingList ,rulingList ){var _agecb rulingList ;for _ ,_ccbd :=range _edefg {_agecb =append (_agecb ,_ccbd ...);};return _agecb .vertsHorzs ();
};func (_gfbgb intSet )del (_ffaf int ){delete (_gfbgb ,_ffaf )};func (_acecc paraList )findTextTables ()[]*textTable {var _gadc []*textTable ;for _ ,_bdgf :=range _acecc {if _bdgf .taken ()||_bdgf .Width ()==0{continue ;};_abgeb :=_bdgf .isAtom ();if _abgeb ==nil {continue ;
};_abgeb .growTable ();if _abgeb ._aage *_abgeb ._eabcaa < _gfec {continue ;};_abgeb .markCells ();_abgeb .log ("\u0067\u0072\u006fw\u006e");_gadc =append (_gadc ,_abgeb );};return _gadc ;};func (_bbfe *wordBag )arrangeText ()*textPara {_bbfe .sort ();
if _adfc {_bbfe .removeDuplicates ();};var _fdgc []*textLine ;for _ ,_ddfg :=range _bbfe .depthIndexes (){for !_bbfe .empty (_ddfg ){_cddd :=_bbfe .firstReadingIndex (_ddfg );_ebbg :=_bbfe .firstWord (_cddd );_defd :=_dea (_bbfe ,_cddd );_aeaba :=_ebbg ._eedb ;
_eaga :=_ebbg ._aagef -_bafe *_aeaba ;_dbacb :=_ebbg ._aagef +_bafe *_aeaba ;_fgbaa :=_cgb *_aeaba ;_fce :=_fgee *_aeaba ;_dgfgg :for {var _fbff *textWord ;_fcac :=0;for _ ,_cgag :=range _bbfe .depthBand (_eaga ,_dbacb ){_gebb :=_bbfe .highestWord (_cgag ,_eaga ,_dbacb );
if _gebb ==nil {continue ;};_efbe :=_fcfe (_gebb ,_defd ._eaab [len (_defd ._eaab )-1]);if _efbe < -_fce {break _dgfgg ;};if _efbe > _fgbaa {continue ;};if _fbff !=nil &&_efa (_gebb ,_fbff )>=0{continue ;};_fbff =_gebb ;_fcac =_cgag ;};if _fbff ==nil {break ;
};_defd .pullWord (_bbfe ,_fbff ,_fcac );};_defd .markWordBoundaries ();_fdgc =append (_fdgc ,_defd );};};if len (_fdgc )==0{return nil ;};_ef .Slice (_fdgc ,func (_agadd ,_ebfcf int )bool {return _cbae (_fdgc [_agadd ],_fdgc [_ebfcf ])< 0});_acaac :=_dafa (_bbfe .PdfRectangle ,_fdgc );
if _dagg {_ff .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_acaac .String ());if _dfcfb {for _bcfc ,_gaag :=range _acaac ._cecgd {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bcfc ,_gaag .String ());
if _cbd {for _dgdc ,_dcaag :=range _gaag ._eaab {_be .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_dgdc ,_dcaag .String ());for _ffgd ,_fbdf :=range _dcaag ._gceff {_be .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_ffgd ,_fbdf .String ());
};};};};};};return _acaac ;};func (_bdcc paraList )addNeighbours (){_bedae :=func (_egfde []int ,_feed *textPara )([]*textPara ,[]*textPara ){_fefe :=make ([]*textPara ,0,len (_egfde )-1);_bcbabc :=make ([]*textPara ,0,len (_egfde )-1);for _ ,_bada :=range _egfde {_ddcfa :=_bdcc [_bada ];
if _ddcfa .Urx <=_feed .Llx {_fefe =append (_fefe ,_ddcfa );}else if _ddcfa .Llx >=_feed .Urx {_bcbabc =append (_bcbabc ,_ddcfa );};};return _fefe ,_bcbabc ;};_cbgg :=func (_fgeda []int ,_gbeg *textPara )([]*textPara ,[]*textPara ){_bfab :=make ([]*textPara ,0,len (_fgeda )-1);
_aeeea :=make ([]*textPara ,0,len (_fgeda )-1);for _ ,_fabgd :=range _fgeda {_bfeca :=_bdcc [_fabgd ];if _bfeca .Ury <=_gbeg .Lly {_aeeea =append (_aeeea ,_bfeca );}else if _bfeca .Lly >=_gbeg .Ury {_bfab =append (_bfab ,_bfeca );};};return _bfab ,_aeeea ;
};_cgge :=_bdcc .yNeighbours (_gdgbd );for _ ,_edbf :=range _bdcc {_eedf :=_cgge [_edbf ];if len (_eedf )==0{continue ;};_cfcc ,_abdcc :=_bedae (_eedf ,_edbf );if len (_cfcc )==0&&len (_abdcc )==0{continue ;};if len (_cfcc )> 0{_cgdbd :=_cfcc [0];for _ ,_ebcg :=range _cfcc [1:]{if _ebcg .Urx >=_cgdbd .Urx {_cgdbd =_ebcg ;
};};for _ ,_dbacc :=range _cfcc {if _dbacc !=_cgdbd &&_dbacc .Urx > _cgdbd .Llx {_cgdbd =nil ;break ;};};if _cgdbd !=nil &&_cffd (_edbf .PdfRectangle ,_cgdbd .PdfRectangle ){_edbf ._bbff =_cgdbd ;};};if len (_abdcc )> 0{_eecc :=_abdcc [0];for _ ,_eddfc :=range _abdcc [1:]{if _eddfc .Llx <=_eecc .Llx {_eecc =_eddfc ;
};};for _ ,_dgfa :=range _abdcc {if _dgfa !=_eecc &&_dgfa .Llx < _eecc .Urx {_eecc =nil ;break ;};};if _eecc !=nil &&_cffd (_edbf .PdfRectangle ,_eecc .PdfRectangle ){_edbf ._cacae =_eecc ;};};};_cgge =_bdcc .xNeighbours (_bce );for _ ,_cbaab :=range _bdcc {_cbdb :=_cgge [_cbaab ];
if len (_cbdb )==0{continue ;};_efebe ,_bdbg :=_cbgg (_cbdb ,_cbaab );if len (_efebe )==0&&len (_bdbg )==0{continue ;};if len (_bdbg )> 0{_cegdg :=_bdbg [0];for _ ,_acba :=range _bdbg [1:]{if _acba .Ury >=_cegdg .Ury {_cegdg =_acba ;};};for _ ,_cdfe :=range _bdbg {if _cdfe !=_cegdg &&_cdfe .Ury > _cegdg .Lly {_cegdg =nil ;
break ;};};if _cegdg !=nil &&_dege (_cbaab .PdfRectangle ,_cegdg .PdfRectangle ){_cbaab ._fdec =_cegdg ;};};if len (_efebe )> 0{_acdd :=_efebe [0];for _ ,_gfge :=range _efebe [1:]{if _gfge .Lly <=_acdd .Lly {_acdd =_gfge ;};};for _ ,_feaae :=range _efebe {if _feaae !=_acdd &&_feaae .Lly < _acdd .Ury {_acdd =nil ;
break ;};};if _acdd !=nil &&_dege (_cbaab .PdfRectangle ,_acdd .PdfRectangle ){_cbaab ._decgf =_acdd ;};};};for _ ,_dcfd :=range _bdcc {if _dcfd ._bbff !=nil &&_dcfd ._bbff ._cacae !=_dcfd {_dcfd ._bbff =nil ;};if _dcfd ._decgf !=nil &&_dcfd ._decgf ._fdec !=_dcfd {_dcfd ._decgf =nil ;
};if _dcfd ._cacae !=nil &&_dcfd ._cacae ._bbff !=_dcfd {_dcfd ._cacae =nil ;};if _dcfd ._fdec !=nil &&_dcfd ._fdec ._decgf !=_dcfd {_dcfd ._fdec =nil ;};};};func (_aggc *textObject )setFont (_bbe string ,_gfc float64 )error {if _aggc ==nil {return nil ;
};_aggc ._ecb ._ggda =_gfc ;_fgba ,_adb :=_aggc .getFont (_bbe );if _adb !=nil {return _adb ;};_aggc ._ecb ._fea =_fgba ;return nil ;};func (_ecae *wordBag )absorb (_abab *wordBag ){_eaaa :=_abab .makeRemovals ();for _aebe ,_fggc :=range _abab ._aceg {for _ ,_bfgd :=range _fggc {_ecae .pullWord (_bfgd ,_aebe ,_eaaa );
};};_abab .applyRemovals (_eaaa );};func _adeeb (_gdcg []pathSection )rulingList {_bcbac (_gdcg );if _dgac {_ff .Log .Info ("\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073",len (_gdcg ));
};var _bggbf rulingList ;for _ ,_ecaf :=range _gdcg {for _ ,_dacfb :=range _ecaf ._fbdc {if len (_dacfb ._eaeg )< 2{continue ;};_abdb :=_dacfb ._eaeg [0];for _ ,_eebb :=range _dacfb ._eaeg [1:]{if _affb ,_geca :=_gdag (_abdb ,_eebb ,_ecaf .Color );_geca {_bggbf =append (_bggbf ,_affb );
};_abdb =_eebb ;};};};if _dgac {_ff .Log .Info ("m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073",_bggbf );};return _bggbf ;};func (_bcbb *textObject )newTextMark (_efeba string ,_gfafc _de .Matrix ,_bacg _de .Point ,_ggca float64 ,_egc *_ee .PdfFont ,_faaa float64 ,_aecdf ,_fdff _ga .Color )(textMark ,bool ){_ddag :=_gfafc .Angle ();
_dcgg :=_fgdg (_ddag ,_eacf );var _dcaa float64 ;if _dcgg %180!=90{_dcaa =_gfafc .ScalingFactorY ();}else {_dcaa =_gfafc .ScalingFactorX ();};_aedd :=_bcd (_gfafc );_fabb :=_ee .PdfRectangle {Llx :_aedd .X ,Lly :_aedd .Y ,Urx :_bacg .X ,Ury :_bacg .Y };
switch _dcgg %360{case 90:_fabb .Urx -=_dcaa ;case 180:_fabb .Ury -=_dcaa ;case 270:_fabb .Urx +=_dcaa ;case 0:_fabb .Ury +=_dcaa ;default:_dcgg =0;_fabb .Ury +=_dcaa ;};if _fabb .Llx > _fabb .Urx {_fabb .Llx ,_fabb .Urx =_fabb .Urx ,_fabb .Llx ;};if _fabb .Lly > _fabb .Ury {_fabb .Lly ,_fabb .Ury =_fabb .Ury ,_fabb .Lly ;
};_ffade :=true ;if _bcbb ._decb ._eab .Width ()> 0{_bddfd ,_deebe :=_ccad (_fabb ,_bcbb ._decb ._eab );if !_deebe {_ffade =false ;_ff .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_fabb ,_bcbb ._decb ._eab ,_efeba );
};_fabb =_bddfd ;};_cga :=_fabb ;_gccg :=_bcbb ._decb ._eab ;switch _dcgg %360{case 90:_gccg .Urx ,_gccg .Ury =_gccg .Ury ,_gccg .Urx ;_cga =_ee .PdfRectangle {Llx :_gccg .Urx -_fabb .Ury ,Urx :_gccg .Urx -_fabb .Lly ,Lly :_fabb .Llx ,Ury :_fabb .Urx };
case 180:_cga =_ee .PdfRectangle {Llx :_gccg .Urx -_fabb .Llx ,Urx :_gccg .Urx -_fabb .Urx ,Lly :_gccg .Ury -_fabb .Lly ,Ury :_gccg .Ury -_fabb .Ury };case 270:_gccg .Urx ,_gccg .Ury =_gccg .Ury ,_gccg .Urx ;_cga =_ee .PdfRectangle {Llx :_fabb .Ury ,Urx :_fabb .Lly ,Lly :_gccg .Ury -_fabb .Llx ,Ury :_gccg .Ury -_fabb .Urx };
};if _cga .Llx > _cga .Urx {_cga .Llx ,_cga .Urx =_cga .Urx ,_cga .Llx ;};if _cga .Lly > _cga .Ury {_cga .Lly ,_cga .Ury =_cga .Ury ,_cga .Lly ;};_gddf :=textMark {_fdbb :_efeba ,PdfRectangle :_cga ,_ebdd :_fabb ,_ggfc :_egc ,_ceba :_dcaa ,_eggg :_faaa ,_abd :_gfafc ,_ddf :_bacg ,_bdaa :_dcgg ,_eaee :_aecdf ,_fca :_fdff };
if _ggfb {_ff .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_aedd ,_bacg ,_gddf .String ());};return _gddf ,_ffade ;
};type ruling struct{_cgac rulingKind ;_bggf markKind ;_ga .Color ;_facf float64 ;_fgbfa float64 ;_ebeb float64 ;_gaeb float64 ;};func (_cdad *subpath )makeRectRuling (_ggdag _ga .Color )(*ruling ,bool ){if _fbgb {_ff .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_cdad );
};_bdfa :=_cdad ._eaeg [:4];_dagb :=make (map[int ]rulingKind ,len (_bdfa ));for _cfdg ,_effe :=range _bdfa {_ebfd :=_cdad ._eaeg [(_cfdg +1)%4];_dagb [_cfdg ]=_bfbe (_effe ,_ebfd );if _fbgb {_be .Printf ("\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066",_cfdg ,_dagb [_cfdg ],_effe ,_ebfd );
};};if _fbgb {_be .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_dagb );};var _gdad ,_dbfa []int ;for _cddca ,_dfcfe :=range _dagb {switch _dfcfe {case _dfbe :_dbfa =append (_dbfa ,_cddca );case _fafbf :_gdad =append (_gdad ,_cddca );
};};if _fbgb {_be .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_dbfa ),_dbfa );_be .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_gdad ),_gdad );
};_acbb :=(len (_dbfa )==2&&len (_gdad )==2)||(len (_dbfa )==2&&len (_gdad )==0&&_gcgg (_bdfa [_dbfa [0]],_bdfa [_dbfa [1]]))||(len (_gdad )==2&&len (_dbfa )==0&&_ggef (_bdfa [_gdad [0]],_bdfa [_gdad [1]]));if _fbgb {_be .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_dbfa ),len (_gdad ),_acbb );
};if !_acbb {if _fbgb {_ff .Log .Error ("\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v",_cdad );_be .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_dbfa ),len (_gdad ),_acbb );
};return &ruling {},false ;};if len (_gdad )==0{for _bcgc ,_cgbc :=range _dagb {if _cgbc !=_dfbe {_gdad =append (_gdad ,_bcgc );};};};if len (_dbfa )==0{for _gbba ,_begb :=range _dagb {if _begb !=_fafbf {_dbfa =append (_dbfa ,_gbba );};};};if _fbgb {_ff .Log .Info ("\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_dbfa ),len (_gdad ),len (_bdfa ),_dbfa ,_gdad ,_bdfa );
};var _bbfba ,_gbeab ,_febcc ,_abdce _de .Point ;if _bdfa [_dbfa [0]].Y > _bdfa [_dbfa [1]].Y {_febcc ,_abdce =_bdfa [_dbfa [0]],_bdfa [_dbfa [1]];}else {_febcc ,_abdce =_bdfa [_dbfa [1]],_bdfa [_dbfa [0]];};if _bdfa [_gdad [0]].X > _bdfa [_gdad [1]].X {_bbfba ,_gbeab =_bdfa [_gdad [0]],_bdfa [_gdad [1]];
}else {_bbfba ,_gbeab =_bdfa [_gdad [1]],_bdfa [_gdad [0]];};_gdbea :=_ee .PdfRectangle {Llx :_bbfba .X ,Urx :_gbeab .X ,Lly :_abdce .Y ,Ury :_febcc .Y };if _gdbea .Llx > _gdbea .Urx {_gdbea .Llx ,_gdbea .Urx =_gdbea .Urx ,_gdbea .Llx ;};if _gdbea .Lly > _gdbea .Ury {_gdbea .Lly ,_gdbea .Ury =_gdbea .Ury ,_gdbea .Lly ;
};_aaggb :=rectRuling {PdfRectangle :_gdbea ,_bbbf :_ccdd (_gdbea ),Color :_ggdag };if _aaggb ._bbbf ==_fbdff {if _fbgb {_ff .Log .Error ("\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c");
};return nil ,false ;};_cddb ,_ddgfa :=_aaggb .asRuling ();if !_ddgfa {if _fbgb {_ff .Log .Error ("\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg");};return nil ,false ;};if _dgac {_be .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_cddb .String ());
};return _cddb ,true ;};func _adff (_dffg ,_bege ,_efdga ,_fcfg *textPara )*textTable {_aefg :=&textTable {_aage :2,_eabcaa :2,_dbfba :make (map[uint64 ]*textPara ,4)};_aefg .put (0,0,_dffg );_aefg .put (1,0,_bege );_aefg .put (0,1,_efdga );_aefg .put (1,1,_fcfg );
return _aefg ;};
// String returns a description of `l`.
func (_ecba *textLine )String ()string {return _be .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_ecba ._decg ,_ecba .PdfRectangle ,_ecba ._caccd ,_ecba .text ());
};func (_cdcdd *textTable )getComposite (_ffac ,_aedf int )(paraList ,_ee .PdfRectangle ){_affg ,_eebg :=_cdcdd ._ebgbb [_cgccd (_ffac ,_aedf )];if _bcag {_be .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a",_ffac ,_aedf ,_affg .String ());
};if !_eebg {return nil ,_ee .PdfRectangle {};};return _affg .parasBBox ();};func _eabc (_age _ee .PdfRectangle )textState {return textState {_cfa :100,_ccc :RenderModeFill ,_gbc :_age };};
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
func (_adgd *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _adgd ==nil {return nil ,_c .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_be .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );
};_cdcd :=len (_adgd ._beaa );if _cdcd ==0{return _adgd ,nil ;};if start < _adgd ._beaa [0].Offset {start =_adgd ._beaa [0].Offset ;};if end > _adgd ._beaa [_cdcd -1].Offset +1{end =_adgd ._beaa [_cdcd -1].Offset +1;};_cccb :=_ef .Search (_cdcd ,func (_aebf int )bool {return _adgd ._beaa [_aebf ].Offset +len (_adgd ._beaa [_aebf ].Text )-1>=start });
if !(0<=_cccb &&_cccb < _cdcd ){_caae :=_be .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_cccb ,_cdcd ,_adgd ._beaa [0],_adgd ._beaa [_cdcd -1]);
return nil ,_caae ;};_eca :=_ef .Search (_cdcd ,func (_bcca int )bool {return _adgd ._beaa [_bcca ].Offset > end -1});if !(0<=_eca &&_eca < _cdcd ){_cabc :=_be .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_eca ,_cdcd ,_adgd ._beaa [0],_adgd ._beaa [_cdcd -1]);
return nil ,_cabc ;};if _eca <=_cccb {return nil ,_be .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_cccb ,_eca );
};return &TextMarkArray {_beaa :_adgd ._beaa [_cccb :_eca ]},nil ;};func (_fdbd rulingList )sort (){_ef .Slice (_fdbd ,_fdbd .comp )};func (_agcd *textWord )computeText ()string {_cgee :=make ([]string ,len (_agcd ._gceff ));for _ffabe ,_dbfbg :=range _agcd ._gceff {_cgee [_ffabe ]=_dbfbg ._fdbb ;
};return _df .Join (_cgee ,"");};
// String returns a string descibing `i`.
func (_cagad gridTile )String ()string {_decf :=func (_cafa bool ,_fccca string )string {if _cafa {return _fccca ;};return "\u005f";};return _be .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_cagad .PdfRectangle ,_decf (_cagad ._gdge ,"\u004c"),_decf (_cagad ._geaa ,"\u0052"),_decf (_cagad ._gaaf ,"\u0042"),_decf (_cagad ._efab ,"\u0054"));
};
// String returns a string describing `pt`.
func (_dgd PageText )String ()string {_ebd :=_be .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_dgd ._fcb ));_ffgb :=[]string {"\u002d"+_ebd };for _ ,_dacg :=range _dgd ._fcb {_ffgb =append (_ffgb ,_dacg .String ());
};_ffgb =append (_ffgb ,"\u002b"+_ebd );return _df .Join (_ffgb ,"\u000a");};func (_fegd rulingList )snapToGroups ()rulingList {_ddgb ,_bbcg :=_fegd .vertsHorzs ();if len (_ddgb )> 0{_ddgb =_ddgb .snapToGroupsDirection ();};if len (_bbcg )> 0{_bbcg =_bbcg .snapToGroupsDirection ();
};_ddcgc :=append (_ddgb ,_bbcg ...);_ddcgc .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");return _ddcgc ;};
// New returns an Extractor instance for extracting content from the input PDF page.
func New (page *_ee .PdfPage )(*Extractor ,error ){const _ba ="\u0065\u0078\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077";_gb ,_ed :=page .GetAllContentStreams ();if _ed !=nil {return nil ,_ed ;};_gbg ,_ed :=page .GetMediaBox ();if _ed !=nil {return nil ,_be .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_ed );
};_fa :=&Extractor {_gc :_gb ,_ea :page .Resources ,_eab :*_gbg ,_bg :map[string ]fontEntry {},_bb :map[string ]textResult {}};if _fa ._eab .Llx > _fa ._eab .Urx {_ff .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_fa ._eab );
_fa ._eab .Llx ,_fa ._eab .Urx =_fa ._eab .Urx ,_fa ._eab .Llx ;};if _fa ._eab .Lly > _fa ._eab .Ury {_ff .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_fa ._eab );
_fa ._eab .Lly ,_fa ._eab .Ury =_fa ._eab .Ury ,_fa ._eab .Lly ;};_dg .TrackUse (_ba );return _fa ,nil ;};func (_eabaf compositeCell )String ()string {_eefe :="";if len (_eabaf .paraList )> 0{_eefe =_ggece (_eabaf .paraList .merge ().text (),50);};return _be .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_eabaf .PdfRectangle ,len (_eabaf .paraList ),_eefe );
};func (_edef *shapesState )stroke (_cddc *[]pathSection ){_bbeg :=pathSection {_fbdc :_edef ._deff ,Color :_edef ._agbc .getStrokeColor ()};*_cddc =append (*_cddc ,_bbeg );if _dgac {_be .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_cddc ),_edef ,_edef ._agbc .getStrokeColor (),_bbeg .bbox ());
if _acg {for _gaea ,_ggb :=range _edef ._deff {_be .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_gaea ,_ggb );if _gaea ==10{break ;};};};};};func (_gbcc *textTable )newTablePara ()*textPara {_eedg :=_gbcc .computeBbox ();_feefc :=&textPara {PdfRectangle :_eedg ,_dbfe :_eedg ,_cegd :_gbcc };
if _bcag {_ff .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_feefc );};return _feefc ;};func (_gebg *wordBag )sort (){for _ ,_fffg :=range _gebg ._aceg {_ef .Slice (_fffg ,func (_gcfe ,_bbgb int )bool {return _efa (_fffg [_gcfe ],_fffg [_bbgb ])< 0});
};};func _bfaa (_cdaa int ,_egeb map[int ][]float64 )([]int ,int ){_cede :=make ([]int ,_cdaa );_cgec :=0;for _aeba :=0;_aeba < _cdaa ;_aeba ++{_cede [_aeba ]=_cgec ;_cgec +=len (_egeb [_aeba ])+1;};return _cede ,_cgec ;};
// Font represents the font properties on a PDF page.
type Font struct{PdfFont *_ee .PdfFont ;
// FontName represents Font Name from font properties.
FontName string ;
// FontType represents Font Subtype entry in the font dictionary inside page resources.
// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont.
FontType string ;
// ToUnicode is true if font provides a `ToUnicode` mapping.
ToUnicode bool ;
// IsCID is true if underlying font is a composite font.
// Composite font is represented by a font dictionary whose Subtype is `Type0`
IsCID bool ;
// IsSimple is true if font is simple font.
// A simple font is limited to only 8 bit (255) character codes.
IsSimple bool ;
// FontData represents the raw data of the embedded font file.
// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF).
// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor.
// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value.
FontData []byte ;
// FontFileName is a name representing the font. it has format:
// (Font Name) + (Font Type Extension), example: helvetica.ttf.
FontFileName string ;
// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor).
FontDescriptor *_ee .PdfFontDescriptor ;};func _bdaf (_bddaf ,_cdfc *textPara )bool {if _bddaf ._gbdd ||_cdfc ._gbdd {return true ;};return _dcfga (_bddaf .depth ()-_cdfc .depth ());};
// String returns a string describing `tm`.
func (_beae TextMark )String ()string {_adef :=_beae .BBox ;var _ebgg string ;if _beae .Font !=nil {_ebgg =_beae .Font .String ();if len (_ebgg )> 50{_ebgg =_ebgg [:50]+"\u002e\u002e\u002e";};};var _ecbg string ;if _beae .Meta {_ecbg ="\u0020\u002a\u004d\u002a";
};return _be .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_beae .Offset ,_beae .Text ,[]rune (_beae .Text ),_adef .Llx ,_adef .Lly ,_adef .Urx ,_adef .Ury ,_ebgg ,_ecbg );
};func (_ddcef *shapesState )drawRectangle (_fbcd ,_dadc ,_debc ,_cege float64 ){if _gcga {_eefb :=_ddcef .devicePoint (_fbcd ,_dadc );_bage :=_ddcef .devicePoint (_fbcd +_debc ,_dadc +_cege );_gbad :=_ee .PdfRectangle {Llx :_eefb .X ,Lly :_eefb .Y ,Urx :_bage .X ,Ury :_bage .Y };
_ff .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_gbad );};_ddcef .newSubPath ();_ddcef .moveTo (_fbcd ,_dadc );_ddcef .lineTo (_fbcd +_debc ,_dadc );_ddcef .lineTo (_fbcd +_debc ,_dadc +_cege );
_ddcef .lineTo (_fbcd ,_dadc +_cege );_ddcef .closePath ();};func (_caaa *shapesState )moveTo (_baef ,_eeb float64 ){_caaa ._ccef =true ;_caaa ._ddcc =_caaa .devicePoint (_baef ,_eeb );if _gcga {_ff .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_baef ,_eeb ,_caaa ._ddcc );
};};
// String returns a human readable description of `ss`.
func (_adgc *shapesState )String ()string {return _be .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_adgc ._deff ),_adgc ._ccef );};func (_fdba paraList )llyRange (_egf []int ,_dgcb ,_gafdb float64 )[]int {_ddda :=len (_fdba );
if _gafdb < _fdba [_egf [0]].Lly ||_dgcb > _fdba [_egf [_ddda -1]].Lly {return nil ;};_cdgg :=_ef .Search (_ddda ,func (_dffd int )bool {return _fdba [_egf [_dffd ]].Lly >=_dgcb });_cegec :=_ef .Search (_ddda ,func (_aacf int )bool {return _fdba [_egf [_aacf ]].Lly > _gafdb });
return _egf [_cdgg :_cegec ];};func (_ddb rulingList )comp (_ecaa ,_ddcefd int )bool {_cacg ,_ffed :=_ddb [_ecaa ],_ddb [_ddcefd ];_ceaa ,_eedee :=_cacg ._cgac ,_ffed ._cgac ;if _ceaa !=_eedee {return _ceaa > _eedee ;};if _ceaa ==_fbdff {return false ;
};_fggb :=func (_feffg bool )bool {if _ceaa ==_dfbe {return _feffg ;};return !_feffg ;};_eabe ,_cdag :=_cacg ._facf ,_ffed ._facf ;if _eabe !=_cdag {return _fggb (_eabe > _cdag );};_eabe ,_cdag =_cacg ._fgbfa ,_ffed ._fgbfa ;if _eabe !=_cdag {return _fggb (_eabe < _cdag );
};return _fggb (_cacg ._ebeb < _ffed ._ebeb );};func (_agba compositeCell )split (_gdgf ,_fdfe []float64 )*textTable {_cbgaf :=len (_gdgf )+1;_egb :=len (_fdfe )+1;if _bcag {_ff .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066",_egb ,_cbgaf ,_agba ,_gdgf ,_fdfe );
_be .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a",len (_agba .paraList ));for _ddagc ,_abefa :=range _agba .paraList {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ddagc ,_abefa .String ());
};_be .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",len (_agba .lines ()));for _cbee ,_begg :=range _agba .lines (){_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cbee ,_begg );};};_gdgf =_fbde (_gdgf ,_agba .Ury ,_agba .Lly );
_fdfe =_fbde (_fdfe ,_agba .Llx ,_agba .Urx );_fccc :=make (map[uint64 ]*textPara ,_egb *_cbgaf );_afga :=textTable {_aage :_egb ,_eabcaa :_cbgaf ,_dbfba :_fccc };_bgeb :=_agba .paraList ;_ef .Slice (_bgeb ,func (_gegeg ,_eecd int )bool {_eacg ,_cgdd :=_bgeb [_gegeg ],_bgeb [_eecd ];
_ggdgb ,_gdef :=_eacg .Lly ,_cgdd .Lly ;if _ggdgb !=_gdef {return _ggdgb < _gdef ;};return _eacg .Llx < _cgdd .Llx ;});_aded :=make (map[uint64 ]_ee .PdfRectangle ,_egb *_cbgaf );for _ceec ,_dcca :=range _gdgf [1:]{_bbcd :=_gdgf [_ceec ];for _fbffa ,_cadb :=range _fdfe [1:]{_gccb :=_fdfe [_fbffa ];
_aded [_cgccd (_fbffa ,_ceec )]=_ee .PdfRectangle {Llx :_gccb ,Urx :_cadb ,Lly :_dcca ,Ury :_bbcd };};};if _bcag {_ff .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073");
_be .Printf ("\u0020\u0020\u0020\u0020");for _fgdf :=0;_fgdf < _egb ;_fgdf ++{_be .Printf ("\u0025\u0033\u0030\u0064\u002c\u0020",_fgdf );};_be .Println ();for _ddff :=0;_ddff < _cbgaf ;_ddff ++{_be .Printf ("\u0020\u0020\u0025\u0032\u0064\u003a",_ddff );
for _cffda :=0;_cffda < _egb ;_cffda ++{_be .Printf ("\u00256\u002e\u0032\u0066\u002c\u0020",_aded [_cgccd (_cffda ,_ddff )]);};_be .Println ();};};_afda :=func (_fgebcf *textLine )(int ,int ){for _faea :=0;_faea < _cbgaf ;_faea ++{for _fdbg :=0;_fdbg < _egb ;
_fdbg ++{if _efdf (_aded [_cgccd (_fdbg ,_faea )],_fgebcf .PdfRectangle ){return _fdbg ,_faea ;};};};return -1,-1;};_edebb :=make (map[uint64 ][]*textLine ,_egb *_cbgaf );for _ ,_bbega :=range _bgeb .lines (){_bfgg ,_fbdfd :=_afda (_bbega );if _bfgg < 0{continue ;
};_edebb [_cgccd (_bfgg ,_fbdfd )]=append (_edebb [_cgccd (_bfgg ,_fbdfd )],_bbega );};for _beaf :=0;_beaf < len (_gdgf )-1;_beaf ++{_bccda :=_gdgf [_beaf ];_gaeda :=_gdgf [_beaf +1];for _fedgb :=0;_fedgb < len (_fdfe )-1;_fedgb ++{_cccd :=_fdfe [_fedgb ];
_bfdbd :=_fdfe [_fedgb +1];_bggb :=_ee .PdfRectangle {Llx :_cccd ,Urx :_bfdbd ,Lly :_gaeda ,Ury :_bccda };_ffbcg :=_edebb [_cgccd (_fedgb ,_beaf )];if len (_ffbcg )==0{continue ;};_cegg :=_dafa (_bggb ,_ffbcg );_afga .put (_fedgb ,_beaf ,_cegg );};};return &_afga ;
};func _aef (_efcf ,_caee bounded )float64 {_ggae :=_efa (_efcf ,_caee );if !_dcfga (_ggae ){return _ggae ;};return _ccag (_efcf ,_caee );};func (_cgaag paraList )xNeighbours (_aegfg float64 )map[*textPara ][]int {_fcfd :=make ([]event ,2*len (_cgaag ));
if _aegfg ==0{for _ecff ,_ffbg :=range _cgaag {_fcfd [2*_ecff ]=event {_ffbg .Llx ,true ,_ecff };_fcfd [2*_ecff +1]=event {_ffbg .Urx ,false ,_ecff };};}else {for _egdg ,_bcfcf :=range _cgaag {_fcfd [2*_egdg ]=event {_bcfcf .Llx -_aegfg *_bcfcf .fontsize (),true ,_egdg };
_fcfd [2*_egdg +1]=event {_bcfcf .Urx +_aegfg *_bcfcf .fontsize (),false ,_egdg };};};return _cgaag .eventNeighbours (_fcfd );};type rulingList []*ruling ;func (_dcdde *textWord )absorb (_bbacf *textWord ){_dcdde .PdfRectangle =_bgcf (_dcdde .PdfRectangle ,_bbacf .PdfRectangle );
_dcdde ._gceff =append (_dcdde ._gceff ,_bbacf ._gceff ...);};func (_aggca rulingList )findPrimSec (_ceacc ,_eaeda float64 )*ruling {for _ ,_dcbb :=range _aggca {if _dcfga (_dcbb ._facf -_ceacc )&&_dcbb ._fgbfa -_cbfg <=_eaeda &&_eaeda <=_dcbb ._ebeb +_cbfg {return _dcbb ;
};};return nil ;};type subpath struct{_eaeg []_de .Point ;_feeg bool ;};func (_baea rulingList )vertsHorzs ()(rulingList ,rulingList ){var _cggf ,_ffgbg rulingList ;for _ ,_dgdag :=range _baea {switch _dgdag ._cgac {case _fafbf :_cggf =append (_cggf ,_dgdag );
case _dfbe :_ffgbg =append (_ffgbg ,_dgdag );};};return _cggf ,_ffgbg ;};type event struct{_ebgdd float64 ;_ddfe bool ;_egca int ;};
// String returns a description of `t`.
func (_fbgbbb *textTable )String ()string {return _be .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_fbgbbb ._aage ,_fbgbbb ._eabcaa ,_fbgbbb ._efea );};func _ebb (_adg _de .Point )_de .Matrix {return _de .TranslationMatrix (_adg .X ,_adg .Y )};
func _ccad (_dcd ,_dff _ee .PdfRectangle )(_ee .PdfRectangle ,bool ){if !_gddc (_dcd ,_dff ){return _ee .PdfRectangle {},false ;};return _ee .PdfRectangle {Llx :_f .Max (_dcd .Llx ,_dff .Llx ),Urx :_f .Min (_dcd .Urx ,_dff .Urx ),Lly :_f .Max (_dcd .Lly ,_dff .Lly ),Ury :_f .Min (_dcd .Ury ,_dff .Ury )},true ;
};func (_cba *textObject )moveText (_dedd ,_ecg float64 ){_cba .moveLP (_dedd ,_ecg )};
// ExtractFonts returns all font information from the page extractor, including
// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more.
//
// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document.
// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries.
//
// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range.
func (_eaba *Extractor )ExtractFonts (previousPageFonts *PageFonts )(*PageFonts ,error ){_cdc :=PageFonts {};_ce :=_cdc .extractPageResourcesToFont (_eaba ._ea );if _ce !=nil {return nil ,_ce ;};if previousPageFonts !=nil {for _ ,_gad :=range previousPageFonts .Fonts {if !_af (_cdc .Fonts ,_gad .FontName ){_cdc .Fonts =append (_cdc .Fonts ,_gad );
};};};return &PageFonts {Fonts :_cdc .Fonts },nil ;};func (_abc *wordBag )firstReadingIndex (_bged int )int {_agde :=_abc .firstWord (_bged )._eedb ;_bddg :=float64 (_bged +1)*_fdbf ;_gafd :=_bddg +_fdab *_agde ;_cbgb :=_bged ;for _ ,_caac :=range _abc .depthBand (_bddg ,_gafd ){if _efa (_abc .firstWord (_caac ),_abc .firstWord (_cbgb ))< 0{_cbgb =_caac ;
};};return _cbgb ;};func (_ccagb rulingList )tidied (_dcgga string )rulingList {_fedga :=_ccagb .removeDuplicates ();_fedga .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");_gcdea :=_fedga .snapToGroups ();if _gcdea ==nil {return nil ;};_gcdea .sort ();if _dgac {_ff .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_dcgga ,len (_ccagb ),len (_fedga ),len (_gcdea ));
};_gcdea .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _gcdea ;};func _cbae (_afc ,_eabcb bounded )float64 {_gabe :=_ccag (_afc ,_eabcb );if !_dcfga (_gabe ){return _gabe ;};return _efa (_afc ,_eabcb );};func (_agbac *ruling )intersects (_acfd *ruling )bool {_baaa :=(_agbac ._cgac ==_fafbf &&_acfd ._cgac ==_dfbe )||(_acfd ._cgac ==_fafbf &&_agbac ._cgac ==_dfbe );
_cegbb :=func (_effb ,_gbdf *ruling )bool {return _effb ._fgbfa -_cbfg <=_gbdf ._facf &&_gbdf ._facf <=_effb ._ebeb +_cbfg ;};_cbcf :=_cegbb (_agbac ,_acfd );_cdggc :=_cegbb (_acfd ,_agbac );if _dgac {_be .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_baaa ,_cbcf ,_cdggc ,_baaa &&_cbcf &&_cdggc ,_agbac ,_acfd );
};return _baaa &&_cbcf &&_cdggc ;};func (_dgbfd *ruling )alignsSec (_ddbg *ruling )bool {const _bcfacg =_fbfc +1.0;return _dgbfd ._fgbfa -_bcfacg <=_ddbg ._ebeb &&_ddbg ._fgbfa -_bcfacg <=_dgbfd ._ebeb ;};func _gddc (_bgd ,_ggbe _ee .PdfRectangle )bool {return _dege (_bgd ,_ggbe )&&_cffd (_bgd ,_ggbe )};
// ToTextMark returns the public view of `tm`.
func (_abec *textMark )ToTextMark ()TextMark {return TextMark {Text :_abec ._fdbb ,Original :_abec ._aad ,BBox :_abec ._ebdd ,Font :_abec ._ggfc ,FontSize :_abec ._ceba ,FillColor :_abec ._eaee ,StrokeColor :_abec ._fca ,Orientation :_abec ._bdaa };};func (_aada paraList )writeText (_efgb _d .Writer ){for _beeb ,_ddga :=range _aada {if _ddga ._gbdd {continue ;
};_ddga .writeText (_efgb );if _beeb !=len (_aada )-1{if _bdaf (_ddga ,_aada [_beeb +1]){_efgb .Write ([]byte ("\u0020"));}else {_efgb .Write ([]byte ("\u000a"));_efgb .Write ([]byte ("\u000a"));};};};_efgb .Write ([]byte ("\u000a"));_efgb .Write ([]byte ("\u000a"));
};type textObject struct{_decb *Extractor ;_bca *_ee .PdfPageResources ;_bcgf _bd .GraphicsState ;_ecb *textState ;_accf *stateStack ;_geff _de .Matrix ;_abg _de .Matrix ;_fbf []*textMark ;_ecc bool ;};func (_bcfg *shapesState )establishSubpath ()*subpath {_dgf ,_gbge :=_bcfg .lastpointEstablished ();
if !_gbge {_bcfg ._deff =append (_bcfg ._deff ,_edbd (_dgf ));};if len (_bcfg ._deff )==0{return nil ;};_bcfg ._ccef =false ;return _bcfg ._deff [len (_bcfg ._deff )-1];};func (_fdbdg *textWord )bbox ()_ee .PdfRectangle {return _fdbdg .PdfRectangle };
// String returns a description of `b`.
func (_dgb *wordBag )String ()string {var _caecc []string ;for _ ,_gffe :=range _dgb .depthIndexes (){_adab :=_dgb ._aceg [_gffe ];for _ ,_ffge :=range _adab {_caecc =append (_caecc ,_ffge ._bfdfd );};};return _be .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_dgb .PdfRectangle ,_dgb ._adbbf ,len (_caecc ),_caecc );
};func (_gaed *wordBag )getDepthIdx (_edg float64 )int {_efga :=_gaed .depthIndexes ();_fbdb :=_gfb (_edg );if _fbdb < _efga [0]{return _efga [0];};if _fbdb > _efga [len (_efga )-1]{return _efga [len (_efga )-1];};return _fbdb ;};func (_feda *shapesState )quadraticTo (_bdbf ,_dfce ,_befe ,_dbd float64 ){if _gcga {_ff .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a");
};_feda .addPoint (_befe ,_dbd );};func (_acade *textPara )isAtom ()*textTable {_adcd :=_acade ;_fbad :=_acade ._cacae ;_cagee :=_acade ._fdec ;if _fbad .taken ()||_cagee .taken (){return nil ;};_baace :=_fbad ._fdec ;if _baace .taken ()||_baace !=_cagee ._cacae {return nil ;
};return _adff (_adcd ,_fbad ,_cagee ,_baace );};func (_efgd *textMark )bbox ()_ee .PdfRectangle {return _efgd .PdfRectangle };func (_efda paraList )computeEBBoxes (){if _afdb {_ff .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a");
};for _ ,_ggfd :=range _efda {_ggfd ._dbfe =_ggfd .PdfRectangle ;};_dfefc :=_efda .yNeighbours (0);for _fgae ,_gcba :=range _efda {_gecdf :=_gcba ._dbfe ;_gfeb ,_cbbg :=-1.0e9,+1.0e9;for _ ,_cfac :=range _dfefc [_gcba ]{_fecc :=_efda [_cfac ]._dbfe ;if _fecc .Urx < _gecdf .Llx {_gfeb =_f .Max (_gfeb ,_fecc .Urx );
}else if _gecdf .Urx < _fecc .Llx {_cbbg =_f .Min (_cbbg ,_fecc .Llx );};};for _cdgf ,_dcdga :=range _efda {_fbb :=_dcdga ._dbfe ;if _fgae ==_cdgf ||_fbb .Ury > _gecdf .Lly {continue ;};if _gfeb <=_fbb .Llx &&_fbb .Llx < _gecdf .Llx {_gecdf .Llx =_fbb .Llx ;
}else if _fbb .Urx <=_cbbg &&_gecdf .Urx < _fbb .Urx {_gecdf .Urx =_fbb .Urx ;};};if _afdb {_be .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_fgae ,_gcba ._dbfe ,_gecdf ,_ggece (_gcba .text (),50));
};_gcba ._dbfe =_gecdf ;};if _aecd {for _ ,_bdag :=range _efda {_bdag .PdfRectangle =_bdag ._dbfe ;};};};
// String returns a human readable description of `vecs`.
func (_egbc rulingList )String ()string {if len (_egbc )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_dbgb ,_edda :=_egbc .vertsHorzs ();_bdfae :=len (_dbgb );_egggg :=len (_edda );if _bdfae ==0||_egggg ==0{return _be .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_bdfae ,_egggg );
};_egfdd :=_ee .PdfRectangle {Llx :_dbgb [0]._facf ,Urx :_dbgb [_bdfae -1]._facf ,Lly :_edda [_egggg -1]._facf ,Ury :_edda [0]._facf };return _be .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_bdfae ,_egggg ,_egfdd );
};var _bddd =map[rulingKind ]string {_fbdff :"\u006e\u006f\u006e\u0065",_dfbe :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_fafbf :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};func (_bdfe *textPara )bbox ()_ee .PdfRectangle {return _bdfe .PdfRectangle };
func _cgdg (_ceff *textWord ,_fdbe float64 ,_egga ,_ddgga rulingList )*wordBag {_ccga :=_gfb (_ceff ._aagef );_ccdg :=[]*textWord {_ceff };_abb :=wordBag {_aceg :map[int ][]*textWord {_ccga :_ccdg },PdfRectangle :_ceff .PdfRectangle ,_adbbf :_ceff ._eedb ,_fec :_fdbe ,_degg :_egga ,_aegf :_ddgga };
return &_abb ;};func (_gbgea paraList )log (_dceee string ){if !_fgga {return ;};_ff .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_dceee ,len (_gbgea ));
for _eaeb ,_fedac :=range _gbgea {if _fedac ==nil {continue ;};_egdb :=_fedac .text ();_cccg :="\u0020\u0020";if _fedac ._cegd !=nil {_cccg =_be .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_fedac ._cegd ._aage ,_fedac ._cegd ._eabcaa );};_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_eaeb ,_fedac .PdfRectangle ,_cccg ,_ggece (_egdb ,50));
};};func (_ddage rulingList )snapToGroupsDirection ()rulingList {_ddage .sortStrict ();_gbfce :=make (map[*ruling ]rulingList ,len (_ddage ));_fgeag :=_ddage [0];_dgcdb :=func (_gacc *ruling ){_fgeag =_gacc ;_gbfce [_fgeag ]=rulingList {_gacc }};_dgcdb (_ddage [0]);
for _ ,_aead :=range _ddage [1:]{if _aead ._facf < _fgeag ._facf -_bgcb {_ff .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_fgeag ,_aead );
};if _aead ._facf > _fgeag ._facf +_fbfc {_dgcdb (_aead );}else {_gbfce [_fgeag ]=append (_gbfce [_fgeag ],_aead );};};_debg :=make (map[*ruling ]float64 ,len (_gbfce ));_ddggb :=make (map[*ruling ]*ruling ,len (_ddage ));for _feege ,_bggd :=range _gbfce {_debg [_feege ]=_bggd .mergePrimary ();
for _ ,_dbaag :=range _bggd {_ddggb [_dbaag ]=_feege ;};};for _ ,_bbgda :=range _ddage {_bbgda ._facf =_debg [_ddggb [_bbgda ]];};_aace :=make (rulingList ,0,len (_ddage ));for _ ,_fdbfe :=range _gbfce {_cbbdc :=_fdbfe .splitSec ();for _ggcd ,_agbfa :=range _cbbdc {_bgab :=_agbfa .merge ();
if len (_aace )> 0{_ecbbb :=_aace [len (_aace )-1];if _ecbbb .alignsPrimary (_bgab )&&_ecbbb .alignsSec (_bgab ){_ff .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_ggcd ,_ecbbb ,_bgab );
continue ;};};_aace =append (_aace ,_bgab );};};_aace .sortStrict ();return _aace ;};func (_bdfbbg *textPara )toTextMarks (_ecaeg *int )[]TextMark {if _bdfbbg ._cegd ==nil {return _bdfbbg .toCellTextMarks (_ecaeg );};var _deda []TextMark ;for _fdggg :=0;
_fdggg < _bdfbbg ._cegd ._eabcaa ;_fdggg ++{for _aaac :=0;_aaac < _bdfbbg ._cegd ._aage ;_aaac ++{_caba :=_bdfbbg ._cegd .get (_aaac ,_fdggg );if _caba ==nil {_deda =_gcag (_deda ,_ecaeg ,"\u0009");}else {_acbg :=_caba .toCellTextMarks (_ecaeg );_deda =append (_deda ,_acbg ...);
};_deda =_gcag (_deda ,_ecaeg ,"\u0020");};if _fdggg < _bdfbbg ._cegd ._eabcaa -1{_deda =_gcag (_deda ,_ecaeg ,"\u000a");};};return _deda ;};func (_aedcb rulingList )intersections ()map[int ]intSet {var _agca ,_dfabc []int ;for _ecbcc ,_bccg :=range _aedcb {switch _bccg ._cgac {case _fafbf :_agca =append (_agca ,_ecbcc );
case _dfbe :_dfabc =append (_dfabc ,_ecbcc );};};if len (_agca )< _abed +1||len (_dfabc )< _gcad +1{return nil ;};if len (_agca )+len (_dfabc )> _ffbcf {_ff .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_aedcb ),len (_agca ),len (_dfabc ));
return nil ;};_defbg :=make (map[int ]intSet ,len (_agca )+len (_dfabc ));for _ ,_egfa :=range _agca {for _ ,_bbfcd :=range _dfabc {if _aedcb [_egfa ].intersects (_aedcb [_bbfcd ]){if _ ,_eagb :=_defbg [_egfa ];!_eagb {_defbg [_egfa ]=make (intSet );};
if _ ,_dbeg :=_defbg [_bbfcd ];!_dbeg {_defbg [_bbfcd ]=make (intSet );};_defbg [_egfa ].add (_bbfcd );_defbg [_bbfcd ].add (_egfa );};};};return _defbg ;};
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
func (_eae *Extractor )ExtractTextWithStats ()(_cad string ,_cgc int ,_dbb int ,_eegf error ){_aff ,_cgc ,_dbb ,_eegf :=_eae .ExtractPageText ();if _eegf !=nil {return "",_cgc ,_dbb ,_eegf ;};return _aff .Text (),_cgc ,_dbb ,nil ;};
2021-12-14 01:08:28 +00:00
2022-04-27 00:10:33 +00:00
// Marks returns the TextMark collection for a page. It represents all the text on the page.
2022-06-06 22:48:24 +00:00
func (_fcda PageText )Marks ()*TextMarkArray {return &TextMarkArray {_beaa :_fcda ._gdbg }};func (_ddfd rulingList )toTilings ()(rulingList ,[]gridTiling ){_ddfd .log ("\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s");if len (_ddfd )==0{return nil ,nil ;
};_ddfd =_ddfd .tidied ("\u0061\u006c\u006c");_ddfd .log ("\u0074\u0069\u0064\u0069\u0065\u0064");_badfg :=_ddfd .toGrids ();_edfga :=make ([]gridTiling ,len (_badfg ));for _ecdc ,_abbg :=range _badfg {_edfga [_ecdc ]=_abbg .asTiling ();};return _ddfd ,_edfga ;
2022-03-13 12:41:53 +00:00
};
2021-12-14 01:08:28 +00:00
2022-06-06 22:48:24 +00:00
// Text returns the extracted page text.
func (_ddab PageText )Text ()string {return _ddab ._daaf };func (_gdgd *subpath )removeDuplicates (){if len (_gdgd ._eaeg )==0{return ;};_cgcc :=[]_de .Point {_gdgd ._eaeg [0]};for _ ,_beg :=range _gdgd ._eaeg [1:]{if !_beafa (_beg ,_cgcc [len (_cgcc )-1]){_cgcc =append (_cgcc ,_beg );
};};_gdgd ._eaeg =_cgcc ;};func (_fegg *shapesState )closePath (){if _fegg ._ccef {_fegg ._deff =append (_fegg ._deff ,_edbd (_fegg ._ddcc ));_fegg ._ccef =false ;}else if len (_fegg ._deff )==0{if _gcga {_ff .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");
};_fegg ._ccef =false ;return ;};_fegg ._deff [len (_fegg ._deff )-1].close ();if _gcga {_ff .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_fegg );};};func _ccdd (_bgec _ee .PdfRectangle )rulingKind {_dacc :=_bgec .Width ();
_agebe :=_bgec .Height ();if _dacc > _agebe {if _dacc >=_acaa {return _dfbe ;};}else {if _agebe >=_acaa {return _fafbf ;};};return _fbdff ;};func (_ddaf *wordBag )text ()string {_cfag :=_ddaf .allWords ();_agga :=make ([]string ,len (_cfag ));for _aggb ,_dcebb :=range _cfag {_agga [_aggb ]=_dcebb ._bfdfd ;
};return _df .Join (_agga ,"\u0020");};func _abcdc (_acca []*textMark ,_fbba _ee .PdfRectangle )[]*textWord {var _aaccd []*textWord ;var _ddcb *textWord ;if _ggfb {_ff .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_acca ));
};_bdggb :=func (){if _ddcb !=nil {_dcaf :=_ddcb .computeText ();if !_efgf (_dcaf ){_ddcb ._bfdfd =_dcaf ;_aaccd =append (_aaccd ,_ddcb );if _ggfb {_ff .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_aaccd )-1,_ddcb .String ());
for _adebb ,_dgef :=range _ddcb ._gceff {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_adebb ,_dgef .String ());};};};_ddcb =nil ;};};for _ ,_bcbf :=range _acca {if _aagc &&_ddcb !=nil &&len (_ddcb ._gceff )> 0{_gbegd :=_ddcb ._gceff [len (_ddcb ._gceff )-1];
_acddc ,_gbeaa :=_faab (_bcbf ._fdbb );_bbcda ,_ccfc :=_faab (_gbegd ._fdbb );if _gbeaa &&!_ccfc &&_gbegd .inDiacriticArea (_bcbf ){_ddcb .addDiacritic (_acddc );continue ;};if _ccfc &&!_gbeaa &&_bcbf .inDiacriticArea (_gbegd ){_ddcb ._gceff =_ddcb ._gceff [:len (_ddcb ._gceff )-1];
_ddcb .appendMark (_bcbf ,_fbba );_ddcb .addDiacritic (_bbcda );continue ;};};_ffdg :=_efgf (_bcbf ._fdbb );if _ffdg {_bdggb ();continue ;};if _ddcb ==nil &&!_ffdg {_ddcb =_bdaae ([]*textMark {_bcbf },_fbba );continue ;};_ggbb :=_ddcb ._eedb ;_acafb :=_f .Abs (_fcg (_fbba ,_bcbf )-_ddcb ._aagef )/_ggbb ;
_cbea :=_fcfe (_bcbf ,_ddcb )/_ggbb ;if _cbea >=_acff ||!(-_dagf <=_cbea &&_acafb <=_dgfg ){_bdggb ();_ddcb =_bdaae ([]*textMark {_bcbf },_fbba );continue ;};_ddcb .appendMark (_bcbf ,_fbba );};_bdggb ();return _aaccd ;};type lineRuling struct{_fbga rulingKind ;
_bcbg markKind ;_ga .Color ;_aagg ,_aafd _de .Point ;};func (_decbc *textTable )bbox ()_ee .PdfRectangle {return _decbc .PdfRectangle };func (_gcb *subpath )add (_cdeb ..._de .Point ){_gcb ._eaeg =append (_gcb ._eaeg ,_cdeb ...)};type intSet map[int ]struct{};
func _gcgg (_gegb ,_abega _de .Point )bool {_abdcb :=_f .Abs (_gegb .X -_abega .X );_gdec :=_f .Abs (_gegb .Y -_abega .Y );return _abac (_gdec ,_abdcb );};func (_bfcg rulingList )asTiling ()gridTiling {if _gfdb {_ff .Log .Info ("r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_bfcg ));
};for _aggf ,_gdaa :=range _bfcg [1:]{_eecec :=_bfcg [_aggf ];if _eecec .alignsPrimary (_gdaa )&&_eecec .alignsSec (_gdaa ){_ff .Log .Error ("a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073",_gdaa ,_eecec );
};};_bfcg .sortStrict ();_bfcg .log ("\u0073n\u0061\u0070\u0070\u0065\u0064");_abda ,_feecd :=_bfcg .vertsHorzs ();_bdge :=_abda .primaries ();_feecf :=_feecd .primaries ();_decaf :=len (_bdge )-1;_bedb :=len (_feecf )-1;if _decaf ==0||_bedb ==0{return gridTiling {};
};_fbcc :=_ee .PdfRectangle {Llx :_bdge [0],Urx :_bdge [_decaf ],Lly :_feecf [0],Ury :_feecf [_bedb ]};if _gfdb {_ff .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064",len (_abda ));
for _bgeeb ,_bdfgg :=range _abda {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bgeeb ,_bdfgg );};_ff .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064",len (_feecd ));
for _acfa ,_cfacb :=range _feecd {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_acfa ,_cfacb );};_ff .Log .Info ("\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f",_decaf ,_bedb ,_bdge ,_feecf );
};_gdce :=make ([]gridTile ,_decaf *_bedb );for _eacfe :=_bedb -1;_eacfe >=0;_eacfe --{_gabaa :=_feecf [_eacfe ];_cbdg :=_feecf [_eacfe +1];for _gfdedc :=0;_gfdedc < _decaf ;_gfdedc ++{_efcfa :=_bdge [_gfdedc ];_begba :=_bdge [_gfdedc +1];_dabd :=_abda .findPrimSec (_efcfa ,_gabaa );
_fcbgg :=_abda .findPrimSec (_begba ,_gabaa );_gabeg :=_feecd .findPrimSec (_gabaa ,_efcfa );_ddad :=_feecd .findPrimSec (_cbdg ,_efcfa );_dffc :=_ee .PdfRectangle {Llx :_efcfa ,Urx :_begba ,Lly :_gabaa ,Ury :_cbdg };_gabea :=_dbgg (_dffc ,_dabd ,_fcbgg ,_gabeg ,_ddad );
_gdce [_eacfe *_decaf +_gfdedc ]=_gabea ;if _gfdb {_be .Printf ("\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_gfdedc ,_eacfe ,_gabea .String (),_gabea .Width (),_gabea .Height ());
};};};if _gfdb {_ff .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_fbcc );
};_daff :=make ([]map[float64 ]gridTile ,_bedb );for _cgfe :=_bedb -1;_cgfe >=0;_cgfe --{if _gfdb {_be .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_cgfe );};_daff [_cgfe ]=make (map[float64 ]gridTile ,_decaf );for _bcbab :=0;_bcbab < _decaf ;
_bcbab ++{_bfae :=_gdce [_cgfe *_decaf +_bcbab ];if _gfdb {_be .Printf ("\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bcbab ,_bfae );};if !_bfae ._gdge {continue ;};_aefcc :=_bcbab ;for _ccac :=_bcbab +1;!_bfae ._geaa &&_ccac < _decaf ;
_ccac ++{_ffcc :=_gdce [_cgfe *_decaf +_ccac ];_bfae .Urx =_ffcc .Urx ;_bfae ._efab =_bfae ._efab ||_ffcc ._efab ;_bfae ._gaaf =_bfae ._gaaf ||_ffcc ._gaaf ;_bfae ._geaa =_ffcc ._geaa ;if _gfdb {_be .Printf ("\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a",_ccac ,_ffcc ,_bfae );
};_aefcc =_ccac ;};if _gfdb {_be .Printf (" \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n",_bcbab ,_aefcc ,_bfae );};_bcbab =_aefcc ;_daff [_cgfe ][_bfae .Llx ]=_bfae ;};};_gcfee :=make (map[float64 ]map[float64 ]gridTile ,_bedb );
_efbed :=make (map[float64 ]map[float64 ]struct{},_bedb );for _bbae :=_bedb -1;_bbae >=0;_bbae --{_fcfea :=_gdce [_bbae *_decaf ].Lly ;_gcfee [_fcfea ]=make (map[float64 ]gridTile ,_decaf );_efbed [_fcfea ]=make (map[float64 ]struct{},_decaf );};if _gfdb {_ff .Log .Info ("\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_fbcc );
};for _cdea :=_bedb -1;_cdea >=0;_cdea --{_febca :=_gdce [_cdea *_decaf ].Lly ;_gfadd :=_daff [_cdea ];if _gfdb {_be .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_cdea );};for _ ,_ecaad :=range _ebbgg (_gfadd ){if _ ,_aaacb :=_efbed [_febca ][_ecaad ];
_aaacb {continue ;};_fcbf :=_gfadd [_ecaad ];if _gfdb {_be .Printf (" \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_fcbf .String ());};for _gdff :=_cdea -1;_gdff >=0;_gdff --{if _fcbf ._gaaf {break ;};_eeac :=_daff [_gdff ];_bfgf ,_adfa :=_eeac [_ecaad ];
if !_adfa {break ;};if _bfgf .Urx !=_fcbf .Urx {break ;};_fcbf ._gaaf =_bfgf ._gaaf ;_fcbf .Lly =_bfgf .Lly ;if _gfdb {_be .Printf ("\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_bfgf .String (),_fcbf .String ());
};_efbed [_bfgf .Lly ][_bfgf .Llx ]=struct{}{};};if _cdea ==0{_fcbf ._gaaf =true ;};if _fcbf .complete (){_gcfee [_febca ][_ecaad ]=_fcbf ;};};};_acfde :=gridTiling {PdfRectangle :_fbcc ,_dgcdc :_bagc (_gcfee ),_dgbfg :_bebb (_gcfee ),_dcaac :_gcfee };
_acfde .log ("\u0043r\u0065\u0061\u0074\u0065\u0064");return _acfde ;};func (_babg *stateStack )size ()int {return len (*_babg )};func (_bgfe *wordBag )applyRemovals (_abaf map[int ]map[*textWord ]struct{}){for _abgf ,_ggc :=range _abaf {if len (_ggc )==0{continue ;
};_gbb :=_bgfe ._aceg [_abgf ];_gca :=len (_gbb )-len (_ggc );if _gca ==0{delete (_bgfe ._aceg ,_abgf );continue ;};_ecd :=make ([]*textWord ,_gca );_dbgd :=0;for _ ,_fcdb :=range _gbb {if _ ,_eaca :=_ggc [_fcdb ];!_eaca {_ecd [_dbgd ]=_fcdb ;_dbgd ++;
};};_bgfe ._aceg [_abgf ]=_ecd ;};};func (_cacf *shapesState )cubicTo (_baefc ,_fcbc ,_fad ,_ecgd ,_gbab ,_cfe float64 ){if _gcga {_ff .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a");};_cacf .addPoint (_gbab ,_cfe );};func (_beace *textLine )appendWord (_bbea *textWord ){_beace ._eaab =append (_beace ._eaab ,_bbea );
_beace .PdfRectangle =_bgcf (_beace .PdfRectangle ,_bbea .PdfRectangle );if _bbea ._eedb > _beace ._caccd {_beace ._caccd =_bbea ._eedb ;};if _bbea ._aagef > _beace ._decg {_beace ._decg =_bbea ._aagef ;};};type paraList []*textPara ;func (_gcdb *shapesState )lastpointEstablished ()(_de .Point ,bool ){if _gcdb ._ccef {return _gcdb ._ddcc ,false ;
};_dbcdg :=len (_gcdb ._deff );if _dbcdg > 0&&_gcdb ._deff [_dbcdg -1]._feeg {return _gcdb ._deff [_dbcdg -1].last (),false ;};return _de .Point {},true ;};
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};type compositeCell struct{_ee .PdfRectangle ;paraList ;};func (_dacgb gridTiling )complete ()bool {for _ ,_fgac :=range _dacgb ._dcaac {for _ ,_cbaee :=range _fgac {if !_cbaee .complete (){return false ;
};};};return true ;};func _ddec (_cadc int ,_adde func (int ,int )bool )[]int {_bccdc :=make ([]int ,_cadc );for _cadcc :=range _bccdc {_bccdc [_cadcc ]=_cadcc ;};_ef .Slice (_bccdc ,func (_dbaeeg ,_deag int )bool {return _adde (_bccdc [_dbaeeg ],_bccdc [_deag ])});
return _bccdc ;};func _dgced (_baeg string ,_fbea []rulingList ){_ff .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_fbea ),_baeg );for _acbeg ,_dab :=range _fbea {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_acbeg ,_dab .String ());
};};type stateStack []*textState ;func (_adabe rulingList )log (_bgde string ){if !_dgac {return ;};_ff .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_bgde ,_adabe .String ());for _abca ,_aabg :=range _adabe {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_abca ,_aabg .String ());
};};func (_aaea *textPara )depth ()float64 {if _aaea ._gbdd {return -1.0;};if len (_aaea ._cecgd )> 0{return _aaea ._cecgd [0]._decg ;};return _aaea ._cegd .depth ();};type rulingKind int ;func _dbac (_dbebf []int )[]int {_begc :=make ([]int ,len (_dbebf ));
for _gagcd ,_gfee :=range _dbebf {_begc [len (_dbebf )-1-_gagcd ]=_gfee ;};return _begc ;};func (_gfce *textObject )setTextRise (_adcf float64 ){if _gfce ==nil {return ;};_gfce ._ecb ._fefb =_adcf ;};