unipdf/extractor/extractor.go
2024-11-22 00:42:43 +00:00

1032 lines
233 KiB
Go
Raw Blame History

//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
package extractor ;import (_eg "bytes";_ad "errors";_bf "fmt";_e "github.com/unidoc/unipdf/v3/common";_gd "github.com/unidoc/unipdf/v3/contentstream";_ff "github.com/unidoc/unipdf/v3/core";_cge "github.com/unidoc/unipdf/v3/internal/license";_gac "github.com/unidoc/unipdf/v3/internal/textencoding";
_da "github.com/unidoc/unipdf/v3/internal/transform";_b "github.com/unidoc/unipdf/v3/model";_a "golang.org/x/image/draw";_c "golang.org/x/text/unicode/norm";_ga "image";_dd "image/color";_f "io";_ce "math";_dg "reflect";_df "regexp";_fd "sort";_bfb "strings";
_g "unicode";_cg "unicode/utf8";);func (_fbe *imageExtractContext )extractInlineImage (_ddf *_gd .ContentStreamInlineImage ,_gcda _gd .GraphicsState ,_bgf *_b .PdfPageResources )error {_gcdd ,_bgee :=_ddf .ToImage (_bgf );if _bgee !=nil {return _bgee ;
};_fcb ,_bgee :=_ddf .GetColorSpace (_bgf );if _bgee !=nil {return _bgee ;};if _fcb ==nil {_fcb =_b .NewPdfColorspaceDeviceGray ();};_agc ,_bgee :=_fcb .ImageToRGB (*_gcdd );if _bgee !=nil {return _bgee ;};_cee :=ImageMark {Image :&_agc ,Width :_gcda .CTM .ScalingFactorX (),Height :_gcda .CTM .ScalingFactorY (),Angle :_gcda .CTM .Angle ()};
_cee .X ,_cee .Y =_gcda .CTM .Translation ();_fbe ._bcf =append (_fbe ._bcf ,_cee );_fbe ._abd ++;return nil ;};
// TableCell is a cell in a TextTable.
type TableCell struct{_b .PdfRectangle ;
// Text is the extracted text.
Text string ;
// Marks returns the TextMarks corresponding to the text in Text.
Marks TextMarkArray ;};func (_fcdf *textObject )getFontDirect (_fegf string )(*_b .PdfFont ,error ){_affd ,_fda :=_fcdf .getFontDict (_fegf );if _fda !=nil {return nil ,_fda ;};_cfgf ,_fda :=_b .NewPdfFontFromPdfObject (_affd );if _fda !=nil {_e .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fegf ,_fda );
};return _cfgf ,_fda ;};
// String returns a description of `tm`.
func (_cddf *textMark )String ()string {return _bf .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_cddf .PdfRectangle ,_cddf ._dgfec ,_cddf ._gddf );};var _dfda =[]string {"\u2756","\u27a2","\u2713","\u2022","\uf0a7","\u25a1","\u2212","\u25a0","\u25aa","\u006f"};
type event struct{_caacf float64 ;_bdec bool ;_cfgdd int ;};type compositeCell struct{_b .PdfRectangle ;paraList ;};func _gfeee (_ddfb ,_eebd float64 )bool {return _ce .Abs (_ddfb -_eebd )<=_baaf };const (_beeg =1.0e-6;_cegb =1.0e-4;_adgg =10;_adeg =6;
_dacc =0.5;_dgab =0.12;_gbcc =0.19;_ebagb =0.04;_bdeb =0.04;_ffbc =1.0;_fcga =0.04;_fbee =12;_efce =0.4;_eaac =0.7;_gbeb =1.0;_cbb =0.1;_aaca =1.4;_ddfg =0.46;_gebb =0.02;_abdd =0.2;_bcgc =0.5;_ggcab =4;_aeaf =4.0;_dbebd =6;_degb =0.3;_ecde =0.01;_bdcb =0.02;
_dbee =2;_fbedf =2;_bgfd =500;_cef =4.0;_ggbb =4.0;_adda =0.05;_acgde =0.1;_baaf =2.0;_acgdd =2.0;_aae =1.5;_cgcb =3.0;_dgga =0.25;);func _afbc (_gfggge ,_eea _b .PdfRectangle )bool {return _gfggge .Llx <=_eea .Llx &&_eea .Urx <=_gfggge .Urx &&_gfggge .Lly <=_eea .Lly &&_eea .Ury <=_gfggge .Ury ;
};func (_bcbfg paraList )log (_bfeda string ){if !_ddcfg {return ;};_e .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_bfeda ,len (_bcbfg ));
for _ggge ,_ddaf :=range _bcbfg {if _ddaf ==nil {continue ;};_fbfg :=_ddaf .text ();_bbdcb :="\u0020\u0020";if _ddaf ._cegg !=nil {_bbdcb =_bf .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_ddaf ._cegg ._addag ,_ddaf ._cegg ._cffff );};_bf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_ggge ,_ddaf .PdfRectangle ,_bbdcb ,_ggcea (_fbfg ,50));
};};func _gbba (_fedcb func (*wordBag ,*textWord ,float64 )bool ,_gfbb float64 )func (*wordBag ,*textWord )bool {return func (_cffd *wordBag ,_aabg *textWord )bool {return _fedcb (_cffd ,_aabg ,_gfbb )};};func (_dgccf rulingList )blocks (_gbeg ,_cbce *ruling )bool {if _gbeg ._efbba > _cbce ._daba ||_cbce ._efbba > _gbeg ._daba {return false ;
};_cdege :=_ce .Max (_gbeg ._efbba ,_cbce ._efbba );_gcefa :=_ce .Min (_gbeg ._daba ,_cbce ._daba );if _gbeg ._efadf > _cbce ._efadf {_gbeg ,_cbce =_cbce ,_gbeg ;};for _ ,_febda :=range _dgccf {if _gbeg ._efadf <=_febda ._efadf +_acgdd &&_febda ._efadf <=_cbce ._efadf +_acgdd &&_febda ._efbba <=_gcefa &&_cdege <=_febda ._daba {return true ;
};};return false ;};
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
func (_gefb PageText )ToText ()string {return _gefb .Text ()};func _fccd (_afgb ,_fdege _b .PdfRectangle )bool {return _fbad (_afgb ,_fdege )&&_cbdd (_afgb ,_fdege )};func (_afdg *textTable )growTable (){_gfaf :=func (_fbceb paraList ){_afdg ._cffff ++;
for _gdcgg :=0;_gdcgg < _afdg ._addag ;_gdcgg ++{_fcee :=_fbceb [_gdcgg ];_afdg .put (_gdcgg ,_afdg ._cffff -1,_fcee );};};_edcc :=func (_acfdb paraList ){_afdg ._addag ++;for _fccgf :=0;_fccgf < _afdg ._cffff ;_fccgf ++{_dfdad :=_acfdb [_fccgf ];_afdg .put (_afdg ._addag -1,_fccgf ,_dfdad );
};};if _cfdc {_afdg .log ("\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce");};for _fead :=0;;_fead ++{_bagb :=false ;_aadcd :=_afdg .getDown ();_fdabc :=_afdg .getRight ();if _cfdc {_bf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fead ,_afdg );
_bf .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a",_aadcd );_bf .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a",_fdabc );};if _aadcd !=nil &&_fdabc !=nil {_dcdb :=_aadcd [len (_aadcd )-1];
if !_dcdb .taken ()&&_dcdb ==_fdabc [len (_fdabc )-1]{_gfaf (_aadcd );if _fdabc =_afdg .getRight ();_fdabc !=nil {_edcc (_fdabc );_afdg .put (_afdg ._addag -1,_afdg ._cffff -1,_dcdb );};_bagb =true ;};};if !_bagb &&_aadcd !=nil {_gfaf (_aadcd );_bagb =true ;
};if !_bagb &&_fdabc !=nil {_edcc (_fdabc );_bagb =true ;};if !_bagb {break ;};};};func _cfea (_cede ,_bbgde _da .Point )bool {_ccce :=_ce .Abs (_cede .X -_bbgde .X );_dafcd :=_ce .Abs (_cede .Y -_bbgde .Y );return _fgdc (_ccce ,_dafcd );};func (_ccf *imageExtractContext )extractFormImages (_gga *_ff .PdfObjectName ,_gdd _gd .GraphicsState ,_bbca *_b .PdfPageResources )error {_faf ,_abed :=_bbca .GetXObjectFormByName (*_gga );
if _abed !=nil {return _abed ;};if _faf ==nil {return nil ;};_cgef ,_abed :=_faf .GetContentStream ();if _abed !=nil {return _abed ;};_cdc :=_faf .Resources ;if _cdc ==nil {_cdc =_bbca ;};_abed =_ccf .extractContentStreamImages (string (_cgef ),_cdc );
if _abed !=nil {return _abed ;};_ccf ._dagc ++;return nil ;};func _cafae (_bcfe string )(string ,bool ){_faebe :=[]rune (_bcfe );if len (_faebe )!=1{return "",false ;};_fdea ,_dabda :=_dagcb [_faebe [0]];return _fdea ,_dabda ;};func (_ggcac *wordBag )text ()string {_ggad :=_ggcac .allWords ();
_cabb :=make ([]string ,len (_ggad ));for _fccc ,_dagb :=range _ggad {_cabb [_fccc ]=_dagb ._deccc ;};return _bfb .Join (_cabb ,"\u0020");};func (_adac *shapesState )moveTo (_bdca ,_dbdf float64 ){_adac ._cbeb =true ;_adac ._dfca =_adac .devicePoint (_bdca ,_dbdf );
if _gegd {_e .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_bdca ,_dbdf ,_adac ._dfca );};};func _aged (_acdf ,_bece bounded )float64 {return _acdf .bbox ().Llx -_bece .bbox ().Urx };
func _fdd (_fbeag float64 )int {var _edgc int ;if _fbeag >=0{_edgc =int (_fbeag /_adeg );}else {_edgc =int (_fbeag /_adeg )-1;};return _edgc ;};func (_dafd *textTable )putComposite (_bfecb ,_gbgc int ,_fdfb paraList ,_dcbf _b .PdfRectangle ){if len (_fdfb )==0{_e .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073");
return ;};_ebgf :=compositeCell {PdfRectangle :_dcbf ,paraList :_fdfb };if _aada {_bf .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_bfecb ,_gbgc ,_ebgf .String ());
};_ebgf .updateBBox ();_dafd ._bdfcg [_fbfgf (_bfecb ,_gbgc )]=_ebgf ;};type subpath struct{_gfb []_da .Point ;_dffd bool ;};
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
func (_fec *TextMarkArray )BBox ()(_b .PdfRectangle ,bool ){var _fbdc _b .PdfRectangle ;_gece :=false ;for _ ,_bcba :=range _fec ._gaac {if _bcba .Meta ||_gbcg (_bcba .Text ){continue ;};if _gece {_fbdc =_badbb (_fbdc ,_bcba .BBox );}else {_fbdc =_bcba .BBox ;
_gece =true ;};};return _fbdc ,_gece ;};
// NewWithOptions an Extractor instance for extracting content from the input PDF page with options.
func NewWithOptions (page *_b .PdfPage ,options *Options )(*Extractor ,error ){const _gcfe ="\u0065x\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077W\u0069\u0074\u0068\u004f\u0070\u0074\u0069\u006f\u006e\u0073";_cbe ,_fca :=page .GetAllContentStreams ();
if _fca !=nil {return nil ,_fca ;};_gdb ,_ddb :=page .GetStructTreeRoot ();if !_ddb {_e .Log .Debug ("T\u0068\u0065\u0020\u0070\u0064\u0066\u0020\u0064\u006f\u0063\u0075\u006d\u0065\u006e\u0074\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020\u0074\u0061\u0067g\u0065d\u002e\u0020\u0053\u0074r\u0075\u0063t\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e\u0027\u0074\u0020\u0065\u0078\u0069\u0073\u0074\u002e");
};_gad :=page .GetContainingPdfObject ();_ab ,_fca :=page .GetMediaBox ();if _fca !=nil {return nil ,_bf .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_fca );
};_abf :=&Extractor {_ddgf :_cbe ,_eba :page .Resources ,_fg :*_ab ,_fce :page .CropBox ,_cbg :map[string ]fontEntry {},_cdgf :map[string ]textResult {},_dfd :map[string ]textResult {},_ddde :options ,_fcdd :_gdb ,_fdgc :_gad };if _abf ._fg .Llx > _abf ._fg .Urx {_e .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_abf ._fg );
_abf ._fg .Llx ,_abf ._fg .Urx =_abf ._fg .Urx ,_abf ._fg .Llx ;};if _abf ._fg .Lly > _abf ._fg .Ury {_e .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_abf ._fg );
_abf ._fg .Lly ,_abf ._fg .Ury =_abf ._fg .Ury ,_abf ._fg .Lly ;};if _abf ._ddde !=nil {if _abf ._ddde .IncludeAnnotations {_abf ._bge ,_fca =page .GetAnnotations ();if _fca !=nil {_e .Log .Debug ("\u0045\u0072r\u006f\u0072\u0020\u0067\u0065\u0074\u0074\u0069\u006e\u0067\u0020\u0061\u006e\u006e\u006f\u0074\u0061\u0074\u0069\u006f\u006e\u0073: \u0025\u0076",_fca );
};};};_cge .TrackUse (_gcfe );return _abf ,nil ;};func _dcag (_gcca ,_bege _da .Point )bool {_eddb :=_ce .Abs (_gcca .X -_bege .X );_aacf :=_ce .Abs (_gcca .Y -_bege .Y );return _fgdc (_aacf ,_eddb );};func _bdgd (_dgecg []*textWord ,_ebgge *textWord )[]*textWord {for _dbage ,_bgedc :=range _dgecg {if _bgedc ==_ebgge {return _ecbbg (_dgecg ,_dbage );
};};_e .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_ebgge );
return nil ;};func _badbb (_cfaed ,_ccdd _b .PdfRectangle )_b .PdfRectangle {return _b .PdfRectangle {Llx :_ce .Min (_cfaed .Llx ,_ccdd .Llx ),Lly :_ce .Min (_cfaed .Lly ,_ccdd .Lly ),Urx :_ce .Max (_cfaed .Urx ,_ccdd .Urx ),Ury :_ce .Max (_cfaed .Ury ,_ccdd .Ury )};
};func (_afb *shapesState )fill (_fge *[]pathSection ){_eegf :=pathSection {_cdad :_afb ._fgdb ,Color :_afb ._aaag .getFillColor ()};*_fge =append (*_fge ,_eegf );if _ccffg {_cccg :=_eegf .bbox ();_bf .Printf ("\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a",len (*_fge ),len (_eegf ._cdad ),_afb ,_eegf .Color ,_cccg ,_cccg .Width (),_cccg .Height ());
if _dad {for _dcge ,_fcdfa :=range _eegf ._cdad {_bf .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_dcge ,_fcdfa );if _dcge ==10{break ;};};};};};const (_cgaa =true ;_bbdc =true ;_bgcff =true ;_egef =false ;_degdd =false ;_debd =6;_feff =3.0;
_fecg =200;_fcbff =true ;_cabf =true ;_febc =true ;_bfce =true ;_dbcdd =false ;);
// GetContentStreamOps returns the contentStreamOps field of `pt`.
func (_badc *PageText )GetContentStreamOps ()*_gd .ContentStreamOperations {return _badc ._cfcc };
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
func (_fff *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_acd :=&imageExtractContext {_fcaf :options };_bbcc :=_acd .extractContentStreamImages (_fff ._ddgf ,_fff ._eba );if _bbcc !=nil {return nil ,_bbcc ;};return &PageImages {Images :_acd ._bcf },nil ;
};func (_bgbca *textWord )addDiacritic (_egfc string ){_edbgc :=_bgbca ._bggbb [len (_bgbca ._bggbb )-1];_edbgc ._gddf +=_egfc ;_edbgc ._gddf =_c .NFKC .String (_edbgc ._gddf );};
// Text gets the extracted text contained in `l`.
func (_bdfe *list )Text ()string {_efdaa :=&_bfb .Builder {};_degc :="";_aaee (_bdfe ,_efdaa ,&_degc );return _efdaa .String ();};func (_abad *textPara )isAtom ()*textTable {_gbaf :=_abad ;_bbedc :=_abad ._abbb ;_fefb :=_abad ._dabf ;if _bbedc .taken ()||_fefb .taken (){return nil ;
};_adfdg :=_bbedc ._dabf ;if _adfdg .taken ()||_adfdg !=_fefb ._abbb {return nil ;};return _ccbed (_gbaf ,_bbedc ,_fefb ,_adfdg );};func (_ccff *textObject )moveTextSetLeading (_acgd ,_bgaa float64 ){_ccff ._eef ._efda =-_bgaa ;_ccff .moveLP (_acgd ,_bgaa );
};func (_cfbbb gridTile )numBorders ()int {_beeca :=0;if _cfbbb ._cfgg {_beeca ++;};if _cfbbb ._bbed {_beeca ++;};if _cfbbb ._cega {_beeca ++;};if _cfbbb ._fdde {_beeca ++;};return _beeca ;};type list struct{_efgb []*textLine ;_eaag string ;_ddfgc []*list ;
_cceae string ;};func (_dcfd *stateStack )top ()*textState {if _dcfd .empty (){return nil ;};return (*_dcfd )[_dcfd .size ()-1];};func (_ffcg *shapesState )drawRectangle (_ebcff ,_cgeg ,_cgdff ,_babf float64 ){if _gegd {_febg :=_ffcg .devicePoint (_ebcff ,_cgeg );
_edgg :=_ffcg .devicePoint (_ebcff +_cgdff ,_cgeg +_babf );_dbga :=_b .PdfRectangle {Llx :_febg .X ,Lly :_febg .Y ,Urx :_edgg .X ,Ury :_edgg .Y };_e .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_dbga );
};_ffcg .newSubPath ();_ffcg .moveTo (_ebcff ,_cgeg );_ffcg .lineTo (_ebcff +_cgdff ,_cgeg );_ffcg .lineTo (_ebcff +_cgdff ,_cgeg +_babf );_ffcg .lineTo (_ebcff ,_cgeg +_babf );_ffcg .closePath ();};func (_eecca *textPara )depth ()float64 {if _eecca ._cdcca {return -1.0;
};if len (_eecca ._bbgab )> 0{return _eecca ._bbgab [0]._dce ;};return _eecca ._cegg .depth ();};func (_aafa *wordBag )highestWord (_bdagf int ,_ebfgf ,_eaa float64 )*textWord {for _ ,_fega :=range _aafa ._ecfd [_bdagf ]{if _ebfgf <=_fega ._bagdd &&_fega ._bagdd <=_eaa {return _fega ;
};};return nil ;};
// PageTextOptions holds various options available in extraction process.
type PageTextOptions struct{_bfgf bool ;_gabb bool ;};func (_caaf rulingList )toTilings ()(rulingList ,[]gridTiling ){_caaf .log ("\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s");if len (_caaf )==0{return nil ,nil ;};_caaf =_caaf .tidied ("\u0061\u006c\u006c");
_caaf .log ("\u0074\u0069\u0064\u0069\u0065\u0064");_egdfd :=_caaf .toGrids ();_addg :=make ([]gridTiling ,len (_egdfd ));for _dggcb ,_dbceb :=range _egdfd {_addg [_dggcb ]=_dbceb .asTiling ();};return _caaf ,_addg ;};
// Options extractor options.
type Options struct{
// DisableDocumentTags specifies whether to use the document tags during list extraction.
DisableDocumentTags bool ;
// ApplyCropBox will extract page text based on page cropbox if set to `true`.
ApplyCropBox bool ;
// UseSimplerExtractionProcess will skip topological text ordering and table processing.
//
// NOTE: While normally the extra processing is beneficial, it can also lead to problems when it does not work.
// Thus it is a flag to allow the user to control this process.
//
// Skipping some extraction processes would also lead to the reduced processing time.
UseSimplerExtractionProcess bool ;
// IncludeAnnotations specifies whether to include annotations in the extraction process, default value is `false`.
IncludeAnnotations bool ;};func (_gegdd paraList )tables ()[]TextTable {var _ebbf []TextTable ;if _aada {_e .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a");};for _ ,_ggecf :=range _gegdd {_cbfa :=_ggecf ._cegg ;
if _cbfa !=nil &&_cbfa .isExportable (){_ebbf =append (_ebbf ,_cbfa .toTextTable ());};};return _ebbf ;};func _fbfgf (_ddeae ,_aeba int )uint64 {return uint64 (_ddeae )*0x1000000+uint64 (_aeba )};func (_baf *wordBag )sort (){for _ ,_cedbd :=range _baf ._ecfd {_fd .Slice (_cedbd ,func (_dgbf ,_cba int )bool {return _deedf (_cedbd [_dgbf ],_cedbd [_cba ])< 0});
};};
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
type TextTable struct{_b .PdfRectangle ;W ,H int ;Cells [][]TableCell ;};var _edbf =map[rulingKind ]string {_aagc :"\u006e\u006f\u006e\u0065",_geggc :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_eeeae :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};
func _dbfa (_ggda []*textLine ,_bafd string )string {var _cabfb _bfb .Builder ;_ccgee :=0.0;for _afcg ,_bdad :=range _ggda {_fdda :=_bdad .text ();_fcbfa :=_bdad ._dce ;if _afcg < len (_ggda )-1{_ccgee =_ggda [_afcg +1]._dce ;}else {_ccgee =0.0;};_cabfb .WriteString (_bafd );
_cabfb .WriteString (_fdda );if _ccgee !=_fcbfa {_cabfb .WriteString ("\u000a");}else {_cabfb .WriteString ("\u0020");};};return _cabfb .String ();};func (_fegda compositeCell )String ()string {_affebf :="";if len (_fegda .paraList )> 0{_affebf =_ggcea (_fegda .paraList .merge ().text (),50);
};return _bf .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_fegda .PdfRectangle ,len (_fegda .paraList ),_affebf );};
// PageFonts represents extracted fonts on a PDF page.
type PageFonts struct{Fonts []Font ;};func (_bbadf *textTable )emptyCompositeRow (_agcc int )bool {for _bbcgd :=0;_bbcgd < _bbadf ._addag ;_bbcgd ++{if _gbfdd ,_ccbee :=_bbadf ._bdfcg [_fbfgf (_bbcgd ,_agcc )];_ccbee {if len (_gbfdd .paraList )> 0{return false ;
};};};return true ;};func (_cccd paraList )llyRange (_decb []int ,_gfde ,_degca float64 )[]int {_gbab :=len (_cccd );if _degca < _cccd [_decb [0]].Lly ||_gfde > _cccd [_decb [_gbab -1]].Lly {return nil ;};_bffaf :=_fd .Search (_gbab ,func (_baefc int )bool {return _cccd [_decb [_baefc ]].Lly >=_gfde });
_gaefa :=_fd .Search (_gbab ,func (_ddga int )bool {return _cccd [_decb [_ddga ]].Lly > _degca });return _decb [_bffaf :_gaefa ];};func _bc (_dfa string ,_cf bool ,_gb bool )BidiText {_fdg :="\u006c\u0074\u0072";if _gb {_fdg ="\u0074\u0074\u0062";}else if !_cf {_fdg ="\u0072\u0074\u006c";
};return BidiText {_db :_dfa ,_bg :_fdg };};func _bcdc (_dgdg *wordBag ,_cgebg *textWord ,_dfebf float64 )bool {return _cgebg .Llx < _dgdg .Urx +_dfebf &&_dgdg .Llx -_dfebf < _cgebg .Urx ;};func (_adff *textObject )getCurrentFont ()*_b .PdfFont {_bab :=_adff ._eef ._ggec ;
if _bab ==nil {_e .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e");return _b .DefaultFont ();
};return _bab ;};func _gaacb (_dcfee ,_bgdc _da .Point )rulingKind {_eaae :=_ce .Abs (_dcfee .X -_bgdc .X );_fgec :=_ce .Abs (_dcfee .Y -_bgdc .Y );return _bdfee (_eaae ,_fgec ,_adda );};func (_ccfe *textPara )writeCellText (_cbfac _f .Writer ){for _dgbc ,_geaf :=range _ccfe ._bbgab {_fcdge :=_geaf .text ();
_bfba :=_cgaa &&_geaf .endsInHyphen ()&&_dgbc !=len (_ccfe ._bbgab )-1;if _bfba {_fcdge =_ffag (_fcdge );};_cbfac .Write ([]byte (_fcdge ));if !(_bfba ||_dgbc ==len (_ccfe ._bbgab )-1){_cbfac .Write ([]byte (_dadac (_geaf ._dce ,_ccfe ._bbgab [_dgbc +1]._dce )));
};};};func (_gaf *textObject )setTextRise (_cff float64 ){if _gaf ==nil {return ;};_gaf ._eef ._gcfd =_cff ;};const _fgcg =1.0/1000.0;func (_aggcb paraList )toTextMarks ()[]TextMark {_fefdg :=0;var _adbg []TextMark ;for _gbdb ,_gadb :=range _aggcb {if _gadb ._cdcca {continue ;
};_fccf :=_gadb .toTextMarks (&_fefdg );_adbg =append (_adbg ,_fccf ...);if _gbdb !=len (_aggcb )-1{if _egad (_gadb ,_aggcb [_gbdb +1]){_adbg =_bacbe (_adbg ,&_fefdg ,"\u0020");}else {_adbg =_bacbe (_adbg ,&_fefdg ,"\u000a");_adbg =_bacbe (_adbg ,&_fefdg ,"\u000a");
};};};_adbg =_bacbe (_adbg ,&_fefdg ,"\u000a");_adbg =_bacbe (_adbg ,&_fefdg ,"\u000a");return _adbg ;};const (_gge ="\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";
_gfa ="\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064";
_dbb ="\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";_agd ="E\u0052R\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066o\u006e\u0074\u0020\u0066ou\u006e\u0064";
);type paraList []*textPara ;func _babc (_ggbbe []float64 ,_eebb ,_fffa float64 )[]float64 {_feece ,_dbcda :=_eebb ,_fffa ;if _dbcda < _feece {_feece ,_dbcda =_dbcda ,_feece ;};_cfabd :=make ([]float64 ,0,len (_ggbbe )+2);_cfabd =append (_cfabd ,_eebb );
for _ ,_faebd :=range _ggbbe {if _faebd <=_feece {continue ;}else if _faebd >=_dbcda {break ;};_cfabd =append (_cfabd ,_faebd );};_cfabd =append (_cfabd ,_fffa );return _cfabd ;};func (_dddb *stateStack )empty ()bool {return len (*_dddb )==0};func _cbc (_fbbd _da .Matrix )_da .Point {_dbae ,_bfc :=_fbbd .Translation ();
return _da .Point {X :_dbae ,Y :_bfc };};func (_cfc *imageExtractContext )extractXObjectImage (_abe *_ff .PdfObjectName ,_dfac _gd .GraphicsState ,_ffc *_b .PdfPageResources )error {_afg ,_ :=_ffc .GetXObjectByName (*_abe );if _afg ==nil {return nil ;};
_edda ,_fee :=_cfc ._ba [_afg ];if !_fee {_acde ,_bcga :=_ffc .GetXObjectImageByName (*_abe );if _bcga !=nil {return _bcga ;};if _acde ==nil {return nil ;};_cga ,_bcga :=_acde .ToImage ();if _bcga !=nil {return _bcga ;};var _dge _ga .Image ;if _acde .Mask !=nil {if _dge ,_bcga =_gfdc (_acde .Mask ,_dd .Opaque );
_bcga !=nil {_e .Log .Debug ("\u0057\u0041\u0052\u004e\u003a \u0063\u006f\u0075\u006c\u0064 \u006eo\u0074\u0020\u0067\u0065\u0074\u0020\u0065\u0078\u0070\u006c\u0069\u0063\u0069\u0074\u0020\u0069\u006d\u0061\u0067e\u0020\u006d\u0061\u0073\u006b\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006d\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e");
};}else if _acde .SMask !=nil {_dge ,_bcga =_cecfa (_acde .SMask ,_dd .Opaque );if _bcga !=nil {_e .Log .Debug ("W\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0067\u0065\u0074\u0020\u0073\u006f\u0066\u0074\u0020\u0069\u006da\u0067e\u0020\u006d\u0061\u0073k\u002e\u0020O\u0075\u0074\u0070\u0075\u0074\u0020\u006d\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063\u0074\u002e");
};};if _dge !=nil {_dbf ,_dac :=_cga .ToGoImage ();if _dac !=nil {return _dac ;};_dbf =_fcagb (_dbf ,_dge );switch _acde .ColorSpace .String (){case "\u0044\u0065\u0076\u0069\u0063\u0065\u0047\u0072\u0061\u0079","\u0049n\u0064\u0065\u0078\u0065\u0064":_cga ,_dac =_b .ImageHandling .NewGrayImageFromGoImage (_dbf );
if _dac !=nil {return _dac ;};default:_cga ,_dac =_b .ImageHandling .NewImageFromGoImage (_dbf );if _dac !=nil {return _dac ;};};};_edda =&cachedImage {_cfa :_cga ,_gfe :_acde .ColorSpace };_cfc ._ba [_afg ]=_edda ;};_ccb :=_edda ._cfa ;_fegc :=_edda ._gfe ;
_edf ,_agda :=_fegc .ImageToRGB (*_ccb );if _agda !=nil {return _agda ;};_e .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_dfac .CTM .String ());_cdb :=ImageMark {Image :&_edf ,Width :_dfac .CTM .ScalingFactorX (),Height :_dfac .CTM .ScalingFactorY (),Angle :_dfac .CTM .Angle ()};
_cdb .X ,_cdb .Y =_dfac .CTM .Translation ();_cfc ._bcf =append (_cfc ._bcf ,_cdb );_cfc ._begg ++;return nil ;};const _ddc =20;func (_gdgb *textPara )bbox ()_b .PdfRectangle {return _gdgb .PdfRectangle };type bounded interface{bbox ()_b .PdfRectangle };
func (_cecc *textLine )endsInHyphen ()bool {_eadgb :=_cecc ._edge [len (_cecc ._edge )-1];_bcee :=_eadgb ._deccc ;_dfefa ,_febd :=_cg .DecodeLastRuneInString (_bcee );if _febd <=0||!_g .Is (_g .Hyphen ,_dfefa ){return false ;};if _eadgb ._gcccd &&_cdca (_bcee ){return true ;
};return _cdca (_cecc .text ());};func _dgc (_ddg []string ,_bee int ,_fb int ){for _fc ,_ge :=_bee ,_fb -1;_fc < _ge ;_fc ,_ge =_fc +1,_ge -1{_adg :=_ddg [_fc ];_ddg [_fc ]=_ddg [_ge ];_ddg [_ge ]=_adg ;};};func (_eaagd rulingList )sortStrict (){_fd .Slice (_eaagd ,func (_adbc ,_dgcad int )bool {_ceabe ,_bfbf :=_eaagd [_adbc ],_eaagd [_dgcad ];
_bddgf ,_fgff :=_ceabe ._facdf ,_bfbf ._facdf ;if _bddgf !=_fgff {return _bddgf > _fgff ;};_bagc ,_dgfg :=_ceabe ._efadf ,_bfbf ._efadf ;if !_dbfgf (_bagc -_dgfg ){return _bagc < _dgfg ;};_bagc ,_dgfg =_ceabe ._efbba ,_bfbf ._efbba ;if _bagc !=_dgfg {return _bagc < _dgfg ;
};return _ceabe ._daba < _bfbf ._daba ;});};func (_ecgad rulingList )snapToGroups ()rulingList {_fedgcb ,_afce :=_ecgad .vertsHorzs ();if len (_fedgcb )> 0{_fedgcb =_fedgcb .snapToGroupsDirection ();};if len (_afce )> 0{_afce =_afce .snapToGroupsDirection ();
};_gdefe :=append (_fedgcb ,_afce ...);_gdefe .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");return _gdefe ;};func _cgcbb (_cedcg map[int ][]float64 )string {_gecb :=_bcef (_cedcg );_gbffg :=make ([]string ,len (_cedcg ));
for _fbdf ,_gafe :=range _gecb {_gbffg [_fbdf ]=_bf .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_gafe ,_cedcg [_gafe ]);};return _bf .Sprintf ("\u007b\u0025\u0073\u007d",_bfb .Join (_gbffg ,"\u002c\u0020"));};func (_becc gridTile )contains (_gedc _b .PdfRectangle )bool {if _becc .numBorders ()< 3{return false ;
};if _becc ._cfgg &&_gedc .Llx < _becc .Llx -_aae {return false ;};if _becc ._bbed &&_gedc .Urx > _becc .Urx +_aae {return false ;};if _becc ._cega &&_gedc .Lly < _becc .Lly -_aae {return false ;};if _becc ._fdde &&_gedc .Ury > _becc .Ury +_aae {return false ;
};return true ;};type textMark struct{_b .PdfRectangle ;_egba int ;_gddf string ;_fcfce string ;_facf *_b .PdfFont ;_dgfec float64 ;_dggd float64 ;_eebef _da .Matrix ;_degf _da .Point ;_badbe _b .PdfRectangle ;_ebgb _dd .Color ;_eage _dd .Color ;_facd _ff .PdfObject ;
_ccffd []string ;Tw float64 ;Th float64 ;_faad int ;_bafde int ;};func _gdcb (_bdagb _b .PdfRectangle )*ruling {return &ruling {_facdf :_geggc ,_efadf :_bdagb .Lly ,_efbba :_bdagb .Llx ,_daba :_bdagb .Urx };};func (_dcfeb lineRuling )asRuling ()(*ruling ,bool ){_aadfe :=ruling {_facdf :_dcfeb ._becb ,Color :_dcfeb .Color ,_bagdc :_eede };
switch _dcfeb ._becb {case _eeeae :_aadfe ._efadf =_dcfeb .xMean ();_aadfe ._efbba =_ce .Min (_dcfeb ._eefea .Y ,_dcfeb ._edcb .Y );_aadfe ._daba =_ce .Max (_dcfeb ._eefea .Y ,_dcfeb ._edcb .Y );case _geggc :_aadfe ._efadf =_dcfeb .yMean ();_aadfe ._efbba =_ce .Min (_dcfeb ._eefea .X ,_dcfeb ._edcb .X );
_aadfe ._daba =_ce .Max (_dcfeb ._eefea .X ,_dcfeb ._edcb .X );default:_e .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_dcfeb ._becb );return nil ,false ;};return &_aadfe ,true ;
};var (_dagcb =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};
);type fontEntry struct{_eeecc *_b .PdfFont ;_dcgd int64 ;};func _fbad (_gfcc ,_egcc _b .PdfRectangle )bool {return _egcc .Llx <=_gfcc .Urx &&_gfcc .Llx <=_egcc .Urx ;};
// String returns a string describing `tm`.
func (_efbb TextMark )String ()string {_cfae :=_efbb .BBox ;var _fbff string ;if _efbb .Font !=nil {_fbff =_efbb .Font .String ();if len (_fbff )> 50{_fbff =_fbff [:50]+"\u002e\u002e\u002e";};};var _bbdg string ;if _efbb .Meta {_bbdg ="\u0020\u002a\u004d\u002a";
};return _bf .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_efbb .Offset ,_efbb .Text ,[]rune (_efbb .Text ),_cfae .Llx ,_cfae .Lly ,_cfae .Urx ,_cfae .Ury ,_fbff ,_bbdg );
};func _efbf (_gefa int ,_aeafa func (int ,int )bool )[]int {_bbfg :=make ([]int ,_gefa );for _cgfba :=range _bbfg {_bbfg [_cgfba ]=_cgfba ;};_fd .Slice (_bbfg ,func (_ddfff ,_cdde int )bool {return _aeafa (_bbfg [_ddfff ],_bbfg [_cdde ])});return _bbfg ;
};type markKind int ;func (_cegf *textObject )newTextMark (_bdee string ,_fbag _da .Matrix ,_ccad _da .Point ,_bfdb float64 ,_febe *_b .PdfFont ,_agea float64 ,_bfed ,_cbcg _dd .Color ,_febf _ff .PdfObject ,_bbbe []string ,_cceb int ,_gbfeb int )(textMark ,bool ){_bcab :=_fbag .Angle ();
_geeef :=_abdbd (_bcab ,_adgg );var _bdab float64 ;if _geeef %180!=90{_bdab =_fbag .ScalingFactorY ();}else {_bdab =_fbag .ScalingFactorX ();};_gfd :=_cbc (_fbag );_cgfe :=_b .PdfRectangle {Llx :_gfd .X ,Lly :_gfd .Y ,Urx :_ccad .X ,Ury :_ccad .Y };switch _geeef %360{case 90:_cgfe .Urx -=_bdab ;
case 180:_cgfe .Ury -=_bdab ;case 270:_cgfe .Urx +=_bdab ;case 0:_cgfe .Ury +=_bdab ;default:_geeef =0;_cgfe .Ury +=_bdab ;};if _cgfe .Llx > _cgfe .Urx {_cgfe .Llx ,_cgfe .Urx =_cgfe .Urx ,_cgfe .Llx ;};if _cgfe .Lly > _cgfe .Ury {_cgfe .Lly ,_cgfe .Ury =_cgfe .Ury ,_cgfe .Lly ;
};_edc :=true ;if _cegf ._beec ._fg .Width ()> 0{_becf ,_cgga :=_agdbc (_cgfe ,_cegf ._beec ._fg );if !_cgga {_edc =false ;_e .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_cgfe ,_cegf ._beec ._fg ,_bdee );
};_cgfe =_becf ;};_ggadb :=_cgfe ;_afda :=_cegf ._beec ._fg ;switch _geeef %360{case 90:_afda .Urx ,_afda .Ury =_afda .Ury ,_afda .Urx ;_ggadb =_b .PdfRectangle {Llx :_afda .Urx -_cgfe .Ury ,Urx :_afda .Urx -_cgfe .Lly ,Lly :_cgfe .Llx ,Ury :_cgfe .Urx };
case 180:_ggadb =_b .PdfRectangle {Llx :_afda .Urx -_cgfe .Llx ,Urx :_afda .Urx -_cgfe .Urx ,Lly :_afda .Ury -_cgfe .Lly ,Ury :_afda .Ury -_cgfe .Ury };case 270:_afda .Urx ,_afda .Ury =_afda .Ury ,_afda .Urx ;_ggadb =_b .PdfRectangle {Llx :_cgfe .Ury ,Urx :_cgfe .Lly ,Lly :_afda .Ury -_cgfe .Llx ,Ury :_afda .Ury -_cgfe .Urx };
};if _ggadb .Llx > _ggadb .Urx {_ggadb .Llx ,_ggadb .Urx =_ggadb .Urx ,_ggadb .Llx ;};if _ggadb .Lly > _ggadb .Ury {_ggadb .Lly ,_ggadb .Ury =_ggadb .Ury ,_ggadb .Lly ;};_cfda :=textMark {_gddf :_bdee ,PdfRectangle :_ggadb ,_badbe :_cgfe ,_facf :_febe ,_dgfec :_bdab ,_dggd :_agea ,_eebef :_fbag ,_degf :_ccad ,_egba :_geeef ,_ebgb :_bfed ,_eage :_cbcg ,_facd :_febf ,_ccffd :_bbbe ,Th :_cegf ._eef ._ddcc ,Tw :_cegf ._eef ._fagd ,_faad :_gbfeb ,_bafde :_cceb };
if _ecfc {_e .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_gfd ,_ccad ,_cfda .String ());};return _cfda ,_edc ;
};
// String returns a human readable description of `ss`.
func (_ggaf *shapesState )String ()string {return _bf .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_ggaf ._fgdb ),_ggaf ._cbeb );};func _adfb (_adbb ,_ecgg bounded )float64 {_acac :=_ddabf (_adbb ,_ecgg );
if !_dbfgf (_acac ){return _acac ;};return _deedf (_adbb ,_ecgg );};
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
type ImageMark struct{Image *_b .Image ;
// Dimensions of the image as displayed in the PDF.
Width float64 ;Height float64 ;
// Position of the image in PDF coordinates (lower left corner).
X float64 ;Y float64 ;
// Angle in degrees, if rotated.
Angle float64 ;};
// Text returns the text content of the `bulletLists`.
func (_gadf *lists )Text ()string {_ceafe :=&_bfb .Builder {};for _ ,_fecd :=range *_gadf {_gbgd :=_fecd .Text ();_ceafe .WriteString (_gbgd );};return _ceafe .String ();};type rectRuling struct{_dbcg rulingKind ;_adgeg markKind ;_dd .Color ;_b .PdfRectangle ;
};func (_aggd *textMark )bbox ()_b .PdfRectangle {return _aggd .PdfRectangle };
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
type PageImages struct{Images []ImageMark ;};func _cfegg (_deeda []*textMark ,_gadfa _b .PdfRectangle )*textWord {_fgfg :=_deeda [0].PdfRectangle ;_cgfbg :=_deeda [0]._dgfec ;for _ ,_acdb :=range _deeda [1:]{_fgfg =_badbb (_fgfg ,_acdb .PdfRectangle );
if _acdb ._dgfec > _cgfbg {_cgfbg =_acdb ._dgfec ;};};return &textWord {PdfRectangle :_fgfg ,_bggbb :_deeda ,_bagdd :_gadfa .Ury -_fgfg .Lly ,_ecdce :_cgfbg };};func (_ggcaf *TextMarkArray )exists (_fgdf TextMark )bool {for _ ,_abbe :=range _ggcaf .Elements (){if _dg .DeepEqual (_fgdf .DirectObject ,_abbe .DirectObject )&&_dg .DeepEqual (_fgdf .BBox ,_abbe .BBox )&&_abbe .Text ==_fgdf .Text {return true ;
};};return false ;};func (_ddea *shapesState )stroke (_dgfe *[]pathSection ){_cgba :=pathSection {_cdad :_ddea ._fgdb ,Color :_ddea ._aaag .getStrokeColor ()};*_dgfe =append (*_dgfe ,_cgba );if _ccffg {_bf .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_dgfe ),_ddea ,_ddea ._aaag .getStrokeColor (),_cgba .bbox ());
if _dad {for _bfaac ,_ggd :=range _ddea ._fgdb {_bf .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_bfaac ,_ggd );if _bfaac ==10{break ;};};};};};type structElement struct{_bgec string ;_fefd []structElement ;_cabe int64 ;_fbfd _ff .PdfObject ;
};func _bdfee (_fceaa ,_abef ,_deegb float64 )rulingKind {if _fceaa >=_deegb &&_fgdc (_abef ,_fceaa ){return _geggc ;};if _abef >=_deegb &&_fgdc (_fceaa ,_abef ){return _eeeae ;};return _aagc ;};func (_facdfd paraList )applyTables (_aeda []*textTable )paraList {var _afdb paraList ;
for _ ,_efded :=range _aeda {_afdb =append (_afdb ,_efded .newTablePara ());};for _ ,_afcf :=range _facdfd {if _afcf ._cdaad {continue ;};_afdb =append (_afdb ,_afcf );};return _afdb ;};var _beg =false ;func _cbdc (_faec []*textMark ,_efdd _b .PdfRectangle )string {_e .Log .Trace ("\u006d\u0061\u006b\u0065\u0053i\u006d\u0070\u006c\u0065\u0054\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020e\u006c\u0065\u006d\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",len (_faec ),_efdd );
_cfcfd :="";if len (_faec )==0{return _cfcfd ;};_def :=_gabdf (_faec ,_efdd );if len (_def )==0{return _cfcfd ;};_faaf :=0.0;_gcge :=true ;_acfbf :="";for _ ,_deabe :=range _def {_ebcc :=_deabe ._ecdce ;if _ebcc > _fbee {_ebcc =_fbee ;};if (_deabe ._bagdd -_faaf > _dacc *_ebcc &&_faaf !=0.0)||(_faaf -_deabe ._bagdd > _ebcc *10){_eccgdb :=_dc ([]rune (_acfbf ));
_acfbf =_eccgdb ._db ;_acfbf +="\u000a";_cfcfd +=_acfbf ;_acfbf ="";}else {if !_gcge {_acfbf +="\u0020";};};_acfbf +=_deabe ._deccc ;_gcge =false ;_faaf =_deabe ._bagdd ;};if _acfbf !=""{_efaf :=_dc ([]rune (_acfbf ));_acfbf =_efaf ._db ;_acfbf +="\u000a";
_cfcfd +=_acfbf ;};return _cfcfd ;};func _gbdcf (_gdcc []*textLine )map[float64 ][]*textLine {_fd .Slice (_gdcc ,func (_fggc ,_eecc int )bool {return _gdcc [_fggc ]._dce < _gdcc [_eecc ]._dce });_eafbb :=map[float64 ][]*textLine {};for _ ,_dgfc :=range _gdcc {_cdbf :=_bbfb (_dgfc );
_cdbf =_ce .Round (_cdbf );_eafbb [_cdbf ]=append (_eafbb [_cdbf ],_dgfc );};return _eafbb ;};func (_ccgcf paraList )eventNeighbours (_ceecbdd []event )map[*textPara ][]int {_fd .Slice (_ceecbdd ,func (_fdfgc ,_egega int )bool {_abffg ,_aedb :=_ceecbdd [_fdfgc ],_ceecbdd [_egega ];
_dadgg ,_ggde :=_abffg ._caacf ,_aedb ._caacf ;if _dadgg !=_ggde {return _dadgg < _ggde ;};if _abffg ._bdec !=_aedb ._bdec {return _abffg ._bdec ;};return _fdfgc < _egega ;});_dgbff :=make (map[int ]intSet );_cfaff :=make (intSet );for _ ,_gbae :=range _ceecbdd {if _gbae ._bdec {_dgbff [_gbae ._cfgdd ]=make (intSet );
for _agdga :=range _cfaff {if _agdga !=_gbae ._cfgdd {_dgbff [_gbae ._cfgdd ].add (_agdga );_dgbff [_agdga ].add (_gbae ._cfgdd );};};_cfaff .add (_gbae ._cfgdd );}else {_cfaff .del (_gbae ._cfgdd );};};_dgddc :=map[*textPara ][]int {};for _fbbfb ,_fbcba :=range _dgbff {_dddf :=_ccgcf [_fbbfb ];
if len (_fbcba )==0{_dgddc [_dddf ]=nil ;continue ;};_bafg :=make ([]int ,len (_fbcba ));_gdgeac :=0;for _bage :=range _fbcba {_bafg [_gdgeac ]=_bage ;_gdgeac ++;};_dgddc [_dddf ]=_bafg ;};return _dgddc ;};func (_cdcd rulingList )bbox ()_b .PdfRectangle {var _geeeb _b .PdfRectangle ;
if len (_cdcd )==0{_e .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073");return _b .PdfRectangle {};};if _cdcd [0]._facdf ==_geggc {_geeeb .Llx ,_geeeb .Urx =_cdcd .secMinMax ();
_geeeb .Lly ,_geeeb .Ury =_cdcd .primMinMax ();}else {_geeeb .Llx ,_geeeb .Urx =_cdcd .primMinMax ();_geeeb .Lly ,_geeeb .Ury =_cdcd .secMinMax ();};return _geeeb ;};
// String returns a human readable description of `path`.
func (_ade *subpath )String ()string {_cdeg :=_ade ._gfb ;_beggc :=len (_cdeg );if _beggc <=5{return _bf .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_beggc ,_cdeg );};return _bf .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_beggc ,_cdeg [0],_cdeg [1],_cdeg [_beggc -1]);
};type structTreeRoot struct{_addd []structElement ;_fedg string ;};func _fdab (_ecag _b .PdfRectangle )*ruling {return &ruling {_facdf :_eeeae ,_efadf :_ecag .Urx ,_efbba :_ecag .Lly ,_daba :_ecag .Ury };};func (_daca *wordBag )removeWord (_cddc *textWord ,_dfae int ){_dgfa :=_daca ._ecfd [_dfae ];
_dgfa =_bdgd (_dgfa ,_cddc );if len (_dgfa )==0{delete (_daca ._ecfd ,_dfae );}else {_daca ._ecfd [_dfae ]=_dgfa ;};};func _fagdd (_dgdd *textWord ,_gecea float64 ,_dbdg ,_badd rulingList )*wordBag {_bdfd :=_fdd (_dgdd ._bagdd );_bded :=[]*textWord {_dgdd };
_dab :=wordBag {_ecfd :map[int ][]*textWord {_bdfd :_bded },PdfRectangle :_dgdd .PdfRectangle ,_dbfbe :_dgdd ._ecdce ,_gdgea :_gecea ,_dbgbc :_dbdg ,_edfe :_badd };return &_dab ;};type gridTiling struct{_b .PdfRectangle ;_aabgb []float64 ;_fdgd []float64 ;
_caabf map[float64 ]map[float64 ]gridTile ;};func _gc (_ed []string ,_aa int ,_gcd string )int {_dff :=_aa ;for ;_dff < len (_ed );_dff ++{if _ed [_dff ]!=_gcd {return _dff ;};};return _dff ;};func (_cbgg paraList )llyOrdering ()[]int {_eafe :=make ([]int ,len (_cbgg ));
for _fefg :=range _cbgg {_eafe [_fefg ]=_fefg ;};_fd .SliceStable (_eafe ,func (_gbdf ,_gdgg int )bool {_fedcc ,_bebe :=_eafe [_gbdf ],_eafe [_gdgg ];return _cbgg [_fedcc ].Lly < _cbgg [_bebe ].Lly ;});return _eafe ;};func _geec (_fcbd *_b .Image ,_debda _dd .Color )_ga .Image {_edbd ,_egbfg :=int (_fcbd .Width ),int (_fcbd .Height );
_afbgd :=_ga .NewRGBA (_ga .Rect (0,0,_edbd ,_egbfg ));for _ebggg :=0;_ebggg < _egbfg ;_ebggg ++{for _dcagf :=0;_dcagf < _edbd ;_dcagf ++{_acbb ,_aegdf :=_fcbd .ColorAt (_dcagf ,_ebggg );if _aegdf !=nil {_e .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063o\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0072\u0065\u0074\u0072\u0069\u0065v\u0065 \u0069\u006d\u0061\u0067\u0065\u0020m\u0061\u0073\u006b\u0020\u0076\u0061\u006cu\u0065\u0020\u0061\u0074\u0020\u0028\u0025\u0064\u002c\u0020\u0025\u0064\u0029\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006da\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063t\u002e",_dcagf ,_ebggg );
continue ;};_cddd ,_bfbae ,_dbccf ,_ :=_acbb .RGBA ();var _agff _dd .Color ;if _cddd +_bfbae +_dbccf ==0{_agff =_dd .Transparent ;}else {_agff =_debda ;};_afbgd .Set (_dcagf ,_ebggg ,_agff );};};return _afbgd ;};func (_gcdcb *TextMarkArray )getTextMarkAtOffset (_afa int )*TextMark {for _ ,_ccdc :=range _gcdcb ._gaac {if _ccdc .Offset ==_afa {return &_ccdc ;
};};return nil ;};func (_fbae paraList )findTextTables ()[]*textTable {var _gbeef []*textTable ;for _ ,_faee :=range _fbae {if _faee .taken ()||_faee .Width ()==0{continue ;};_gfffbf :=_faee .isAtom ();if _gfffbf ==nil {continue ;};_gfffbf .growTable ();
if _gfffbf ._addag *_gfffbf ._cffff < _dbebd {continue ;};_gfffbf .markCells ();_gfffbf .log ("\u0067\u0072\u006fw\u006e");_gbeef =append (_gbeef ,_gfffbf );};return _gbeef ;};func (_baef *textLine )text ()string {var _adgec []string ;for _ ,_bdcg :=range _baef ._edge {if _bdcg ._gcccd {_adgec =append (_adgec ,"\u0020");
};_adgec =append (_adgec ,_bdcg ._deccc );};_bdbc :=_bfb .Join (_adgec ,"");_bbce :=_dc ([]rune (_bdbc ));return _bbce ._db ;};func _dea (_gbd *_gd .ContentStreamOperation )(float64 ,error ){if len (_gbd .Params )!=1{_fgae :=_ad .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");
_e .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_gbd .Operand ,1,len (_gbd .Params ),_gbd .Params );
return 0.0,_fgae ;};return _ff .GetNumberAsFloat (_gbd .Params [0]);};var _be =[]string {"\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0053","\u0042","\u0053","\u0057\u0053","\u0042","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042","\u0042","\u0042","\u0053","\u0057\u0053","\u004f\u004e","\u004f\u004e","\u0045\u0054","\u0045\u0054","\u0045\u0054","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u0045\u0053","\u0043\u0053","\u0045\u0053","\u0043\u0053","\u0043\u0053","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0043\u0053","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0042\u004e","\u0043\u0053","\u004f\u004e","\u0045\u0054","\u0045\u0054","\u0045\u0054","\u0045\u0054","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004c","\u004f\u004e","\u004f\u004e","\u0042\u004e","\u004f\u004e","\u004f\u004e","\u0045\u0054","\u0045\u0054","\u0045\u004e","\u0045\u004e","\u004f\u004e","\u004c","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u0045\u004e","\u004c","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004f\u004e","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004f\u004e","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004f\u004e","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c","\u004c"};
func _caaae (_bdgfa map[int ]intSet )[]int {_feffg :=make ([]int ,0,len (_bdgfa ));for _dffce :=range _bdgfa {_feffg =append (_feffg ,_dffce );};_fd .Ints (_feffg );return _feffg ;};func (_ffab *subpath )close (){if !_dcba (_ffab ._gfb [0],_ffab .last ()){_ffab .add (_ffab ._gfb [0]);
};_ffab ._dffd =true ;_ffab .removeDuplicates ();};
// List returns all the list objects detected on the page.
// It detects all the bullet point Lists from a given pdf page and builds a slice of bullet list objects.
// A given bullet list object has a tree structure.
// Each bullet point list is extracted with the text content it contains and all the sub lists found under it as children in the tree.
// The rest content of the pdf is ignored and only text in the bullet point lists are extracted.
// The list extraction is done in two ways.
// 1. If the document is tagged then the lists are extracted using the tags provided in the document.
// 2. Otherwise the bullet lists are extracted from the raw text using regex matching.
// By default the document tag is used if available.
// However this can be disabled using `DisableDocumentTags` in the `Options` object.
// Sometimes disabling document tags option might give a better bullet list extraction if the document was tagged incorrectly.
//
// options := &Options{
// DisableDocumentTags: false, // this means use document tag if available
// }
// ex, err := NewWithOptions(page, options)
// // handle error
// pageText, _, _, err := ex.ExtractPageText()
// // handle error
// lists := pageText.List()
// txt := lists.Text()
func (_ffbb PageText )List ()lists {_adaga :=!_ffbb ._ddfd ._bfgf ;_dfgg :=_ffbb .getParagraphs ();_egee :=true ;if _ffbb ._cagg ==nil ||*_ffbb ._cagg ==nil {_egee =false ;};_bced :=_dfgg .list ();if _egee &&_adaga {_eadc :=_baag (&_dfgg );_feab :=&structTreeRoot {};
_feab .parseStructTreeRoot (*_ffbb ._cagg );if _feab ._addd ==nil {_e .Log .Debug ("\u004c\u0069\u0073\u0074\u003a\u0020\u0073t\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e'\u0074\u0020\u0068\u0061\u0076e\u0020\u0061\u006e\u0079\u0020\u0063\u006f\u006e\u0074e\u006e\u0074\u002c\u0020\u0075\u0073\u0069\u006e\u0067\u0020\u0074\u0065\u0078\u0074\u0020\u006d\u0061\u0074\u0063\u0068\u0069\u006e\u0067\u0020\u006d\u0065\u0074\u0068\u006f\u0064\u0020\u0069\u006e\u0073\u0074\u0065\u0061\u0064\u002e");
return _bced ;};_bced =_feab .buildList (_eadc ,_ffbb ._cfec );};return _bced ;};func _fgefe (_fgga _b .PdfRectangle ,_dabc ,_edbce ,_aacdd ,_dafg *ruling )gridTile {_acdd :=_fgga .Llx ;_bffab :=_fgga .Urx ;_edbbc :=_fgga .Lly ;_deffd :=_fgga .Ury ;return gridTile {PdfRectangle :_fgga ,_cfgg :_dabc !=nil &&_dabc .encloses (_edbbc ,_deffd ),_bbed :_edbce !=nil &&_edbce .encloses (_edbbc ,_deffd ),_cega :_aacdd !=nil &&_aacdd .encloses (_acdd ,_bffab ),_fdde :_dafg !=nil &&_dafg .encloses (_acdd ,_bffab )};
};func (_dcfad paraList )xNeighbours (_dbega float64 )map[*textPara ][]int {_fgdfe :=make ([]event ,2*len (_dcfad ));if _dbega ==0{for _gdgd ,_fddg :=range _dcfad {_fgdfe [2*_gdgd ]=event {_fddg .Llx ,true ,_gdgd };_fgdfe [2*_gdgd +1]=event {_fddg .Urx ,false ,_gdgd };
};}else {for _ebbd ,_dfgee :=range _dcfad {_fgdfe [2*_ebbd ]=event {_dfgee .Llx -_dbega *_dfgee .fontsize (),true ,_ebbd };_fgdfe [2*_ebbd +1]=event {_dfgee .Urx +_dbega *_dfgee .fontsize (),false ,_ebbd };};};return _dcfad .eventNeighbours (_fgdfe );};
func (_gegg *wordBag )depthIndexes ()[]int {if len (_gegg ._ecfd )==0{return nil ;};_afag :=make ([]int ,len (_gegg ._ecfd ));_fcbab :=0;for _gcgg :=range _gegg ._ecfd {_afag [_fcbab ]=_gcgg ;_fcbab ++;};_fd .Ints (_afag );return _afag ;};func _bfcg (_bfab *textLine ,_fffd []*textLine ,_gbef []float64 ,_dfbg ,_fcfe float64 )[]*textLine {_egddc :=[]*textLine {};
for _ ,_fbeeg :=range _fffd {if _fbeeg ._dce >=_dfbg {if _fcfe !=-1&&_fbeeg ._dce < _fcfe {if _fbeeg .text ()!=_bfab .text (){if _ce .Round (_fbeeg .Llx )< _ce .Round (_bfab .Llx ){break ;};_egddc =append (_egddc ,_fbeeg );};}else if _fcfe ==-1{if _fbeeg ._dce ==_bfab ._dce {if _fbeeg .text ()!=_bfab .text (){_egddc =append (_egddc ,_fbeeg );
};continue ;};_bgad :=_adad (_bfab ,_fffd ,_gbef );if _bgad !=-1&&_fbeeg ._dce <=_bgad {_egddc =append (_egddc ,_fbeeg );};};};};return _egddc ;};func (_caab *PageText )computeViews (){if _caab ._ddfd ._gabb {_caab ._ceef =_caab .getText ();return ;};_abc :=_caab .getParagraphs ();
_aba :=new (_eg .Buffer );_abc .writeText (_aba );_caab ._ceef =_aba .String ();_caab ._ebdc =_abc .toTextMarks ();_caab ._eeg =_abc .tables ();if _aada {_e .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_caab ._eeg ));
};};
// Extractor stores and offers functionality for extracting content from PDF pages.
type Extractor struct{_ddgf string ;_eba *_b .PdfPageResources ;_fg _b .PdfRectangle ;_fce *_b .PdfRectangle ;_cbg map[string ]fontEntry ;_cdgf map[string ]textResult ;_dfd map[string ]textResult ;_ddd int64 ;_bbc int ;_ddde *Options ;_fcdd *_ff .PdfObject ;
_fdgc _ff .PdfObject ;_bge []*_b .PdfAnnotation ;};func (_fegd *wordBag )firstReadingIndex (_edff int )int {_cbcbb :=_fegd .firstWord (_edff )._ecdce ;_eebe :=float64 (_edff +1)*_adeg ;_dbcd :=_eebe +_aeaf *_cbcbb ;_dcfb :=_edff ;for _ ,_babb :=range _fegd .depthBand (_eebe ,_dbcd ){if _deedf (_fegd .firstWord (_babb ),_fegd .firstWord (_dcfb ))< 0{_dcfb =_babb ;
};};return _dcfb ;};func (_bbdd *textTable )log (_gbbc string ){if !_aada {return ;};_e .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_gbbc ,_bbdd ._addag ,_bbdd ._cffff ,_bbdd ._cbbdd ,_bbdd .PdfRectangle );
for _dfacf :=0;_dfacf < _bbdd ._cffff ;_dfacf ++{for _ceggb :=0;_ceggb < _bbdd ._addag ;_ceggb ++{_acdca :=_bbdd .get (_ceggb ,_dfacf );if _acdca ==nil {continue ;};_bf .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_ceggb ,_dfacf ,_acdca .PdfRectangle ,_ggcea (_acdca .text (),50),_cg .RuneCountInString (_acdca .text ()));
};};};func _cbdd (_efcb ,_ecfg _b .PdfRectangle )bool {return _efcb .Lly <=_ecfg .Ury &&_ecfg .Lly <=_efcb .Ury ;};func (_ecg *textObject )checkOp (_fcdg *_gd .ContentStreamOperation ,_efg int ,_dbgb bool )(_cdeb bool ,_adb error ){if _ecg ==nil {var _aafc []_ff .PdfObject ;
if _efg > 0{_aafc =_fcdg .Params ;if len (_aafc )> _efg {_aafc =_aafc [:_efg ];};};_e .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_fcdg .Operand ,_aafc );
};if _efg >=0{if len (_fcdg .Params )!=_efg {if _dbgb {_adb =_ad .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");};_e .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_fcdg .Operand ,_efg ,len (_fcdg .Params ),_fcdg .Params );
return false ,_adb ;};};return true ,nil ;};func (_gffa paraList )merge ()*textPara {_e .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_gffa ));
if len (_gffa )==0{return nil ;};_gffa .sortReadingOrder ();_bfae :=_gffa [0].PdfRectangle ;_adec :=_gffa [0]._bbgab ;for _ ,_aeacb :=range _gffa [1:]{_bfae =_badbb (_bfae ,_aeacb .PdfRectangle );_adec =append (_adec ,_aeacb ._bbgab ...);};return _gfbeg (_bfae ,_adec );
};func (_gdbf *textTable )markCells (){for _fdgcg :=0;_fdgcg < _gdbf ._cffff ;_fdgcg ++{for _ggbd :=0;_ggbd < _gdbf ._addag ;_ggbd ++{_defeae :=_gdbf .get (_ggbd ,_fdgcg );if _defeae !=nil {_defeae ._cdaad =true ;};};};};func (_gedeb *textPara )taken ()bool {return _gedeb ==nil ||_gedeb ._cdaad };
var _bb =[]string {"\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u004f\u004e","\u004f\u004e","\u0041\u004c","\u0045\u0054","\u0045\u0054","\u0041\u004c","\u0043\u0053","\u0041\u004c","\u004f\u004e","\u004f\u004e","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u0041\u004c","\u0041\u004c","","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0041\u004e","\u0045\u0054","\u0041\u004e","\u0041\u004e","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u004e\u0053\u004d","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u0041\u004e","\u004f\u004e","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u0041\u004c","\u0041\u004c","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004f\u004e","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u004e\u0053\u004d","\u0041\u004c","\u0041\u004c","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0045\u004e","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c","\u0041\u004c"};
func (_gdede *textTable )subdivide ()*textTable {_gdede .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");_aebba :=_gdede .compositeRowCorridors ();_eace :=_gdede .compositeColCorridors ();if _aada {_e .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_cgcbb (_aebba ),_cgcbb (_eace ));
};if len (_aebba )==0||len (_eace )==0{return _gdede ;};_gfeg (_aebba );_gfeg (_eace );if _aada {_e .Log .Info ("\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073",_cgcbb (_aebba ),_cgcbb (_eace ));
};_bdade ,_cacd :=_edbe (_gdede ._cffff ,_aebba );_aadbb ,_dccbb :=_edbe (_gdede ._addag ,_eace );_ecabf :=make (map[uint64 ]*textPara ,_dccbb *_cacd );_ddged :=&textTable {PdfRectangle :_gdede .PdfRectangle ,_cbbdd :_gdede ._cbbdd ,_cffff :_cacd ,_addag :_dccbb ,_egbgg :_ecabf };
if _aada {_e .Log .Info ("\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a"+"\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076",_gdede ._addag ,_gdede ._cffff ,_dccbb ,_cacd ,_cgcbb (_aebba ),_cgcbb (_eace ),_bdade ,_aadbb );
};for _eggd :=0;_eggd < _gdede ._cffff ;_eggd ++{_bffb :=_bdade [_eggd ];for _fbcbg :=0;_fbcbg < _gdede ._addag ;_fbcbg ++{_dgcb :=_aadbb [_fbcbg ];if _aada {_bf .Printf ("\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a",_fbcbg ,_eggd ,_dgcb ,_bffb );
};_cbacb ,_bbeef :=_gdede ._bdfcg [_fbfgf (_fbcbg ,_eggd )];if !_bbeef {continue ;};_gffea :=_cbacb .split (_aebba [_eggd ],_eace [_fbcbg ]);for _cacbc :=0;_cacbc < _gffea ._cffff ;_cacbc ++{for _eccc :=0;_eccc < _gffea ._addag ;_eccc ++{_fcff :=_gffea .get (_eccc ,_cacbc );
_ddged .put (_dgcb +_eccc ,_bffb +_cacbc ,_fcff );if _aada {_bf .Printf ("\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_dgcb +_eccc ,_bffb +_cacbc ,_fcff );};};};};};return _ddged ;};func (_bedad *textTable )compositeRowCorridors ()map[int ][]float64 {_dcee :=make (map[int ][]float64 ,_bedad ._cffff );
if _aada {_e .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_bedad ._cffff );};for _bdbef :=1;_bdbef < _bedad ._cffff ;_bdbef ++{var _bgbg []compositeCell ;
for _fbga :=0;_fbga < _bedad ._addag ;_fbga ++{if _gfacf ,_acdab :=_bedad ._bdfcg [_fbfgf (_fbga ,_bdbef )];_acdab {_bgbg =append (_bgbg ,_gfacf );};};if len (_bgbg )==0{continue ;};_cedeg :=_aafaa (_bgbg );_dcee [_bdbef ]=_cedeg ;if _aada {_bf .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_bdbef ,_cedeg );
};};return _dcee ;};func _cdca (_cfb string )bool {if _cg .RuneCountInString (_cfb )< _ggcab {return false ;};_cccb ,_cbac :=_cg .DecodeLastRuneInString (_cfb );if _cbac <=0||!_g .Is (_g .Hyphen ,_cccb ){return false ;};_cccb ,_cbac =_cg .DecodeLastRuneInString (_cfb [:len (_cfb )-_cbac ]);
return _cbac > 0&&!_g .IsSpace (_cccb );};const (_ggafb markKind =iota ;_eede ;_befdb ;_ecdge ;);func _ddabf (_dabd ,_agdb bounded )float64 {return _ecdg (_dabd )-_ecdg (_agdb )};func _beba (_aeabe *list )[]*list {var _dace []*list ;for _ ,_cbfg :=range _aeabe ._ddfgc {switch _cbfg ._eaag {case "\u004c\u0049":_dbcf :=_bdge (_cbfg );
_ddbe :=_beba (_cbfg );_dgcgf :=_affeb (_dbcf ,"\u0062\u0075\u006c\u006c\u0065\u0074",_ddbe );_fagf :=_dbfa (_dbcf ,"");_dgcgf ._cceae =_fagf ;_dace =append (_dace ,_dgcgf );case "\u004c\u0042\u006fd\u0079":return _beba (_cbfg );case "\u004c":_ddef :=_beba (_cbfg );
_dace =append (_dace ,_ddef ...);return _dace ;};};return _dace ;};func _fbefd (_bbegg float64 )float64 {return _cegb *_ce .Round (_bbegg /_cegb )};type wordBag struct{_b .PdfRectangle ;_dbfbe float64 ;_dbgbc ,_edfe rulingList ;_gdgea float64 ;_ecfd map[int ][]*textWord ;
};func _caabc (_cgagb *list ,_cage *string )string {_gacb :=_bfb .Split (_cgagb ._cceae ,"\u000a");_facg :=&_bfb .Builder {};for _ ,_cdcc :=range _gacb {if _cdcc !=""{_facg .WriteString (*_cage );_facg .WriteString (_cdcc );_facg .WriteString ("\u000a");
};};return _facg .String ();};func (_gcfc *subpath )removeDuplicates (){if len (_gcfc ._gfb )==0{return ;};_dec :=[]_da .Point {_gcfc ._gfb [0]};for _ ,_bbdf :=range _gcfc ._gfb [1:]{if !_dcba (_bbdf ,_dec [len (_dec )-1]){_dec =append (_dec ,_bbdf );};
};_gcfc ._gfb =_dec ;};func _aaba (_cdcab []*textLine )[]*textLine {_cfeg :=[]*textLine {};for _ ,_fbfe :=range _cdcab {_dcgg :=_fbfe .text ();_efbee :=_agge .Find ([]byte (_dcgg ));if _efbee !=nil {_cfeg =append (_cfeg ,_fbfe );};};return _cfeg ;};func _ecbbg (_dbegab []*textWord ,_dega int )[]*textWord {_becfg :=len (_dbegab );
copy (_dbegab [_dega :],_dbegab [_dega +1:]);return _dbegab [:_becfg -1];};func (_ddee *shapesState )devicePoint (_gca ,_dgce float64 )_da .Point {_abae :=_ddee ._cdgfg .Mult (_ddee ._gefd );_gca ,_dgce =_abae .Transform (_gca ,_dgce );return _da .NewPoint (_gca ,_dgce );
};func (_bfaa *stateStack )push (_efd *textState ){_beda :=*_efd ;*_bfaa =append (*_bfaa ,&_beda )};type rulingKind int ;func (_ddba paraList )sortReadingOrder (){_e .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_ddba ));
if len (_ddba )<=1{return ;};_ddba .computeEBBoxes ();_fd .Slice (_ddba ,func (_decg ,_dbgd int )bool {return _adfb (_ddba [_decg ],_ddba [_dbgd ])<=0});};func (_gcfea paraList )extractTables (_cbadg []gridTiling )paraList {if _aada {_e .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_gcfea ));
};if len (_gcfea )< _dbebd {return _gcfea ;};_bebf :=_gcfea .findTables (_cbadg );if _aada {_e .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_bebf ));
for _ffcb ,_eeda :=range _bebf {_eeda .log (_bf .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_ffcb ));};};return _gcfea .applyTables (_bebf );};func (_cgdc *shapesState )quadraticTo (_ffgda ,_fbg ,_add ,_cfaf float64 ){if _gegd {_e .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a");
};_cgdc .addPoint (_add ,_cfaf );};func _ffag (_addda string )string {_bbabc :=[]rune (_addda );return string (_bbabc [:len (_bbabc )-1])};func (_gaaa rectRuling )asRuling ()(*ruling ,bool ){_eaee :=ruling {_facdf :_gaaa ._dbcg ,Color :_gaaa .Color ,_bagdc :_befdb };
switch _gaaa ._dbcg {case _eeeae :_eaee ._efadf =0.5*(_gaaa .Llx +_gaaa .Urx );_eaee ._efbba =_gaaa .Lly ;_eaee ._daba =_gaaa .Ury ;_edggc ,_gccc :=_gaaa .checkWidth (_gaaa .Llx ,_gaaa .Urx );if !_gccc {if _dcbb {_e .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_gaaa );
};return nil ,false ;};_eaee ._geeb =_edggc ;case _geggc :_eaee ._efadf =0.5*(_gaaa .Lly +_gaaa .Ury );_eaee ._efbba =_gaaa .Llx ;_eaee ._daba =_gaaa .Urx ;_fgabg ,_fdedf :=_gaaa .checkWidth (_gaaa .Lly ,_gaaa .Ury );if !_fdedf {if _dcbb {_e .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_gaaa );
};return nil ,false ;};_eaee ._geeb =_fgabg ;default:_e .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_gaaa ._dbcg );return nil ,false ;};return &_eaee ,true ;};func (_dedg compositeCell )split (_eada ,_decgd []float64 )*textTable {_gbggf :=len (_eada )+1;
_eecg :=len (_decgd )+1;if _aada {_e .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066",_eecg ,_gbggf ,_dedg ,_eada ,_decgd );
_bf .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a",len (_dedg .paraList ));for _bggf ,_egbec :=range _dedg .paraList {_bf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bggf ,_egbec .String ());};
_bf .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",len (_dedg .lines ()));for _badbea ,_fagb :=range _dedg .lines (){_bf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_badbea ,_fagb );};};_eada =_babc (_eada ,_dedg .Ury ,_dedg .Lly );
_decgd =_babc (_decgd ,_dedg .Llx ,_dedg .Urx );_cece :=make (map[uint64 ]*textPara ,_eecg *_gbggf );_dbeg :=textTable {_addag :_eecg ,_cffff :_gbggf ,_egbgg :_cece };_gacf :=_dedg .paraList ;_fd .Slice (_gacf ,func (_gcedc ,_gbee int )bool {_bfeb ,_bgbfg :=_gacf [_gcedc ],_gacf [_gbee ];
_adffg ,_defe :=_bfeb .Lly ,_bgbfg .Lly ;if _adffg !=_defe {return _adffg < _defe ;};return _bfeb .Llx < _bgbfg .Llx ;});_dgecc :=make (map[uint64 ]_b .PdfRectangle ,_eecg *_gbggf );for _gfaag ,_egddb :=range _eada [1:]{_aeef :=_eada [_gfaag ];for _fdgf ,_abdbb :=range _decgd [1:]{_bdcc :=_decgd [_fdgf ];
_dgecc [_fbfgf (_fdgf ,_gfaag )]=_b .PdfRectangle {Llx :_bdcc ,Urx :_abdbb ,Lly :_egddb ,Ury :_aeef };};};if _aada {_e .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073");
_bf .Printf ("\u0020\u0020\u0020\u0020");for _gcgac :=0;_gcgac < _eecg ;_gcgac ++{_bf .Printf ("\u0025\u0033\u0030\u0064\u002c\u0020",_gcgac );};_bf .Println ();for _gegb :=0;_gegb < _gbggf ;_gegb ++{_bf .Printf ("\u0020\u0020\u0025\u0032\u0064\u003a",_gegb );
for _cegc :=0;_cegc < _eecg ;_cegc ++{_bf .Printf ("\u00256\u002e\u0032\u0066\u002c\u0020",_dgecc [_fbfgf (_cegc ,_gegb )]);};_bf .Println ();};};_agag :=func (_faed *textLine )(int ,int ){for _gace :=0;_gace < _gbggf ;_gace ++{for _aecd :=0;_aecd < _eecg ;
_aecd ++{if _afbc (_dgecc [_fbfgf (_aecd ,_gace )],_faed .PdfRectangle ){return _aecd ,_gace ;};};};return -1,-1;};_gedecb :=make (map[uint64 ][]*textLine ,_eecg *_gbggf );for _ ,_gfecf :=range _gacf .lines (){_cddb ,_beaa :=_agag (_gfecf );if _cddb < 0{continue ;
};_gedecb [_fbfgf (_cddb ,_beaa )]=append (_gedecb [_fbfgf (_cddb ,_beaa )],_gfecf );};for _efab :=0;_efab < len (_eada )-1;_efab ++{_bdadc :=_eada [_efab ];_dfab :=_eada [_efab +1];for _ebac :=0;_ebac < len (_decgd )-1;_ebac ++{_fbbda :=_decgd [_ebac ];
_fbef :=_decgd [_ebac +1];_fgde :=_b .PdfRectangle {Llx :_fbbda ,Urx :_fbef ,Lly :_dfab ,Ury :_bdadc };_cfdeg :=_gedecb [_fbfgf (_ebac ,_efab )];if len (_cfdeg )==0{continue ;};_cfbbf :=_gfbeg (_fgde ,_cfdeg );_dbeg .put (_ebac ,_efab ,_cfbbf );};};return &_dbeg ;
};func _cdcg (_abdg ,_acfd _da .Point )rulingKind {_bfgfd :=_ce .Abs (_abdg .X -_acfd .X );_gafa :=_ce .Abs (_abdg .Y -_acfd .Y );return _bdfee (_bfgfd ,_gafa ,_cef );};
// Tables returns the tables extracted from the page.
func (_cfcfg PageText )Tables ()[]TextTable {if _aada {_e .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_cfcfg ._eeg ));};return _cfcfg ._eeg ;};func (_bbcaa *wordBag )arrangeText ()*textPara {_bbcaa .sort ();if _bbdc {_bbcaa .removeDuplicates ();
};var _gagb []*textLine ;for _ ,_bfaag :=range _bbcaa .depthIndexes (){for !_bbcaa .empty (_bfaag ){_cfbd :=_bbcaa .firstReadingIndex (_bfaag );_cefb :=_bbcaa .firstWord (_cfbd );_aeeee :=_gdged (_bbcaa ,_cfbd );_ecga :=_cefb ._ecdce ;if _ecga < _fbee {_ecga =_fbee ;
};_dddg :=_cefb ._bagdd -_dacc *_ecga ;_cdaca :=_cefb ._bagdd +_dacc *_ecga ;_gagg :=_aaca *_ecga ;_fdbb :=_ddfg *_ecga ;_dgaeg :for {var _adeed *textWord ;_cdccf :=0;for _ ,_ffedf :=range _bbcaa .depthBand (_dddg ,_cdaca ){_bcaf :=_bbcaa .highestWord (_ffedf ,_dddg ,_cdaca );
if _bcaf ==nil {continue ;};_dceab :=_aged (_bcaf ,_aeeee ._edge [len (_aeeee ._edge )-1]);if _dceab < -_fdbb {break _dgaeg ;};if _dceab > _gagg {continue ;};if _adeed !=nil &&_deedf (_bcaf ,_adeed )>=0{continue ;};_adeed =_bcaf ;_cdccf =_ffedf ;};if _adeed ==nil {break ;
};_aeeee .pullWord (_bbcaa ,_adeed ,_cdccf );};_aeeee .markWordBoundaries ();_gagb =append (_gagb ,_aeeee );};};if len (_gagb )==0{return nil ;};_fd .Slice (_gagb ,func (_fabe ,_dbgac int )bool {return _adfb (_gagb [_fabe ],_gagb [_dbgac ])< 0});_eggf :=_gfbeg (_bbcaa .PdfRectangle ,_gagb );
if _abfb {_e .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_eggf .String ());if _fbed {for _gdgeg ,_dbdgd :=range _eggf ._bbgab {_bf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gdgeg ,_dbdgd .String ());
if _gecf {for _fcdgb ,_bcgaf :=range _dbdgd ._edge {_bf .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_fcdgb ,_bcgaf .String ());for _fdfg ,_efcc :=range _bcgaf ._bggbb {_bf .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_fdfg ,_efcc .String ());
};};};};};};return _eggf ;};func (_ecbc lineRuling )xMean ()float64 {return 0.5*(_ecbc ._eefea .X +_ecbc ._edcb .X )};func (_bgcg rulingList )sort (){_fd .Slice (_bgcg ,_bgcg .comp )};func _dadac (_dgcc ,_egge float64 )string {_beef :=!_dbfgf (_dgcc -_egge );
if _beef {return "\u000a";};return "\u0020";};func (_edgdc paraList )sortTopoOrder (){_babd :=_edgdc .topoOrder ();_edgdc .reorder (_babd )};
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
func (_bgcf *PageText )ApplyArea (bbox _b .PdfRectangle ){_aegf :=make ([]*textMark ,0,len (_bgcf ._eca ));for _ ,_cfccc :=range _bgcf ._eca {if _fccd (_cfccc .bbox (),bbox ){_aegf =append (_aegf ,_cfccc );};};var _fcbgf paraList ;_cca :="";_fcba :=len (_aegf );
for _cdaa :=0;_cdaa < 360&&_fcba > 0;_cdaa +=90{_bbcg :=make ([]*textMark ,0,len (_aegf )-_fcba );for _ ,_bfgb :=range _aegf {if _bfgb ._egba ==_cdaa {_bbcg =append (_bbcg ,_bfgb );};};if len (_bbcg )> 0{if _bgcf ._ddfd ._gabb {_cca +=_cbdc (_bbcg ,_bgcf ._bfaf );
}else {_fgb :=_fabd (_bbcg ,_bgcf ._bfaf ,nil ,nil );_fcbgf =append (_fcbgf ,_fgb ...);};_fcba -=len (_bbcg );};};if _bgcf ._ddfd ._gabb {_bgcf ._ceef =_cca ;}else {_gcdf :=new (_eg .Buffer );_fcbgf .writeText (_gcdf );_bgcf ._ceef =_gcdf .String ();_bgcf ._ebdc =_fcbgf .toTextMarks ();
_bgcf ._eeg =_fcbgf .tables ();};};func (_eec *textObject )nextLine (){_eec .moveLP (0,-_eec ._eef ._efda )};func (_bbbg *textObject )showTextAdjusted (_gdge *_ff .PdfObjectArray ,_aaf int ,_gcc string )error {_cgaf :=false ;for _ ,_gef :=range _gdge .Elements (){switch _gef .(type ){case *_ff .PdfObjectFloat ,*_ff .PdfObjectInteger :_fccg ,_dca :=_ff .GetNumberAsFloat (_gef );
if _dca !=nil {_e .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_gef ,_gdge );
return _dca ;};_dfee ,_cedg :=-_fccg *0.001*_bbbg ._eef ._gec ,0.0;if _cgaf {_cedg ,_dfee =_dfee ,_cedg ;};_ggca :=_cadc (_da .Point {X :_dfee ,Y :_cedg });_bbbg ._cdffg .Concat (_ggca );case *_ff .PdfObjectString :_bce :=_ff .TraceToDirectObject (_gef );
_agee ,_gcdc :=_ff .GetStringBytes (_bce );if !_gcdc {_e .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_gef ,_gdge );
return _ff .ErrTypeError ;};_bbbg .renderText (_bce ,_agee ,_aaf ,_gcc );default:_e .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_gef ,_gdge );
return _ff .ErrTypeError ;};};return nil ;};func _defa (_accc []pathSection )rulingList {_dadaf (_accc );if _ccffg {_e .Log .Info ("\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs",len (_accc ));
};var _abaeb rulingList ;for _ ,_gbcd :=range _accc {for _ ,_acdeb :=range _gbcd ._cdad {if !_acdeb .isQuadrilateral (){if _ccffg {_e .Log .Error ("!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073",_acdeb );
};continue ;};if _dgcgb ,_abff :=_acdeb .makeRectRuling (_gbcd .Color );_abff {_abaeb =append (_abaeb ,_dgcgb );}else {if _dcbb {_e .Log .Error ("\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073",_acdeb );
};};};};if _ccffg {_e .Log .Info ("\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073",_abaeb .String ());};return _abaeb ;};func _abdbd (_fgdg float64 ,_dagbd int )int {if _dagbd ==0{_dagbd =1;};_dddbf :=float64 (_dagbd );
return int (_ce .Round (_fgdg /_dddbf )*_dddbf );};func _ebde (_gaef []*textLine ,_dcef map[float64 ][]*textLine )[]*list {_babfa :=_dceb (_dcef );_cgad :=[]*list {};if len (_babfa )==0{return _cgad ;};_caba :=_babfa [0];_fgdd :=1;_eaga :=_dcef [_caba ];
for _gabd ,_ccgf :=range _eaga {var _gfgc float64 ;_gcaf :=[]*list {};_daegd :=_ccgf ._dce ;_cfdf :=-1.0;if _gabd < len (_eaga )-1{_cfdf =_eaga [_gabd +1]._dce ;};if _fgdd < len (_babfa ){_gcaf =_dbead (_gaef ,_dcef ,_babfa ,_fgdd ,_daegd ,_cfdf );};_gfgc =_cfdf ;
if len (_gcaf )> 0{_bdcf :=_gcaf [0];if len (_bdcf ._efgb )> 0{_gfgc =_bdcf ._efgb [0]._dce ;};};_fcddc :=[]*textLine {_ccgf };_cfcb :=_bfcg (_ccgf ,_gaef ,_babfa ,_daegd ,_gfgc );_fcddc =append (_fcddc ,_cfcb ...);_abgag :=_affeb (_fcddc ,"\u0062\u0075\u006c\u006c\u0065\u0074",_gcaf );
_abgag ._cceae =_dbfa (_fcddc ,"");_cgad =append (_cgad ,_abgag );};return _cgad ;};func (_acdg *textObject )getStrokeColor ()_dd .Color {return _edggf (_acdg ._fcbe .ColorspaceStroking ,_acdg ._fcbe .ColorStroking );};func _edbe (_eefg int ,_ebafd map[int ][]float64 )([]int ,int ){_eabg :=make ([]int ,_eefg );
_ccebd :=0;for _acge :=0;_acge < _eefg ;_acge ++{_eabg [_acge ]=_ccebd ;_ccebd +=len (_ebafd [_acge ])+1;};return _eabg ,_ccebd ;};func (_fdcf paraList )topoOrder ()[]int {if _ddcfg {_e .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");
};_baddf :=len (_fdcf );_dfaed :=make ([]bool ,_baddf );_babbf :=make ([]int ,0,_baddf );_dade :=_fdcf .llyOrdering ();var _dcfa func (_adba int );_dcfa =func (_ccba int ){_dfaed [_ccba ]=true ;for _dfad :=0;_dfad < _baddf ;_dfad ++{if !_dfaed [_dfad ]{if _fdcf .readBefore (_dade ,_ccba ,_dfad ){_dcfa (_dfad );
};};};_babbf =append (_babbf ,_ccba );};for _ebea :=0;_ebea < _baddf ;_ebea ++{if !_dfaed [_ebea ]{_dcfa (_ebea );};};return _gddd (_babbf );};func _agdbc (_eeea ,_bdbd _b .PdfRectangle )(_b .PdfRectangle ,bool ){if !_fccd (_eeea ,_bdbd ){return _b .PdfRectangle {},false ;
};return _b .PdfRectangle {Llx :_ce .Max (_eeea .Llx ,_bdbd .Llx ),Urx :_ce .Min (_eeea .Urx ,_bdbd .Urx ),Lly :_ce .Max (_eeea .Lly ,_bdbd .Lly ),Ury :_ce .Min (_eeea .Ury ,_bdbd .Ury )},true ;};func (_dfecd *textTable )logComposite (_cbfdc string ){if !_aada {return ;
};_e .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_dfecd ._addag ,_dfecd ._cffff ,_cbfdc );_bf .Printf ("\u0025\u0035\u0073 \u007c","");for _dffc :=0;_dffc < _dfecd ._addag ;_dffc ++{_bf .Printf ("\u0025\u0033\u0064 \u007c",_dffc );
};_bf .Println ("");_bf .Printf ("\u0025\u0035\u0073 \u002b","");for _cfdfe :=0;_cfdfe < _dfecd ._addag ;_cfdfe ++{_bf .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d");};_bf .Println ("");for _fabae :=0;_fabae < _dfecd ._cffff ;_fabae ++{_bf .Printf ("\u0025\u0035\u0064 \u007c",_fabae );
for _bcbec :=0;_bcbec < _dfecd ._addag ;_bcbec ++{_dfefdc ,_ :=_dfecd ._bdfcg [_fbfgf (_bcbec ,_fabae )].parasBBox ();_bf .Printf ("\u0025\u0033\u0064 \u007c",len (_dfefdc ));};_bf .Println ("");};_e .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_dfecd ._addag ,_dfecd ._cffff ,_cbfdc );
_bf .Printf ("\u0025\u0035\u0073 \u007c","");for _ecfdc :=0;_ecfdc < _dfecd ._addag ;_ecfdc ++{_bf .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_ecfdc );};_bf .Println ("");_bf .Printf ("\u0025\u0035\u0073 \u002b","");for _ddffc :=0;_ddffc < _dfecd ._addag ;
_ddffc ++{_bf .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");};_bf .Println ("");for _facag :=0;_facag < _dfecd ._cffff ;_facag ++{_bf .Printf ("\u0025\u0035\u0064 \u007c",_facag );for _ebgbb :=0;_ebgbb < _dfecd ._addag ;
_ebgbb ++{_cbbgf ,_ :=_dfecd ._bdfcg [_fbfgf (_ebgbb ,_facag )].parasBBox ();_bcfa :="";_eaea :=_cbbgf .merge ();if _eaea !=nil {_bcfa =_eaea .text ();};_bcfa =_bf .Sprintf ("\u0025\u0071",_ggcea (_bcfa ,12));_bcfa =_bcfa [1:len (_bcfa )-1];_bf .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_bcfa );
};_bf .Println ("");};};func _agfc (_ccae _da .Point )*subpath {return &subpath {_gfb :[]_da .Point {_ccae }}};type lineRuling struct{_becb rulingKind ;_cafe markKind ;_dd .Color ;_eefea ,_edcb _da .Point ;};func (_egfgf *subpath )isQuadrilateral ()bool {if len (_egfgf ._gfb )< 4||len (_egfgf ._gfb )> 5{return false ;
};if len (_egfgf ._gfb )==5{_bgcce :=_egfgf ._gfb [0];_cdcff :=_egfgf ._gfb [4];if _bgcce .X !=_cdcff .X ||_bgcce .Y !=_cdcff .Y {return false ;};};return true ;};func (_aeae rulingList )snapToGroupsDirection ()rulingList {_aeae .sortStrict ();_fcafb :=make (map[*ruling ]rulingList ,len (_aeae ));
_dfeab :=_aeae [0];_gbff :=func (_gbcf *ruling ){_dfeab =_gbcf ;_fcafb [_dfeab ]=rulingList {_gbcf }};_gbff (_aeae [0]);for _ ,_abedf :=range _aeae [1:]{if _abedf ._efadf < _dfeab ._efadf -_beeg {_e .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_dfeab ,_abedf );
};if _abedf ._efadf > _dfeab ._efadf +_acgdd {_gbff (_abedf );}else {_fcafb [_dfeab ]=append (_fcafb [_dfeab ],_abedf );};};_cccda :=make (map[*ruling ]float64 ,len (_fcafb ));_eeed :=make (map[*ruling ]*ruling ,len (_aeae ));for _egcg ,_abaef :=range _fcafb {_cccda [_egcg ]=_abaef .mergePrimary ();
for _ ,_dddba :=range _abaef {_eeed [_dddba ]=_egcg ;};};for _ ,_aeead :=range _aeae {_aeead ._efadf =_cccda [_eeed [_aeead ]];};_abcc :=make (rulingList ,0,len (_aeae ));for _ ,_egdff :=range _fcafb {_dbcb :=_egdff .splitSec ();for _cbbd ,_cgadd :=range _dbcb {_aafccf :=_cgadd .merge ();
if len (_abcc )> 0{_afgg :=_abcc [len (_abcc )-1];if _afgg .alignsPrimary (_aafccf )&&_afgg .alignsSec (_aafccf ){_e .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_cbbd ,_afgg ,_aafccf );
continue ;};};_abcc =append (_abcc ,_aafccf );};};_abcc .sortStrict ();return _abcc ;};func (_dcd *textObject )getFillColor ()_dd .Color {return _edggf (_dcd ._fcbe .ColorspaceNonStroking ,_dcd ._fcbe .ColorNonStroking );};func (_cgfg paraList )reorder (_cegd []int ){_ecfb :=make (paraList ,len (_cgfg ));
for _facb ,_aef :=range _cegd {_ecfb [_facb ]=_cgfg [_aef ];};copy (_cgfg ,_ecfb );};type shapesState struct{_gefd _da .Matrix ;_cdgfg _da .Matrix ;_fgdb []*subpath ;_cbeb bool ;_dfca _da .Point ;_aaag *textObject ;};func _feabd (_cfcg *_b .Image ,_gggf _dd .Color )_ga .Image {_gdccd ,_fabcf :=int (_cfcg .Width ),int (_cfcg .Height );
_efcce :=_ga .NewRGBA (_ga .Rect (0,0,_gdccd ,_fabcf ));for _faedc :=0;_faedc < _fabcf ;_faedc ++{for _gcbd :=0;_gcbd < _gdccd ;_gcbd ++{_edfaa ,_abedb :=_cfcg .ColorAt (_gcbd ,_faedc );if _abedb !=nil {_e .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063o\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0072\u0065\u0074\u0072\u0069\u0065v\u0065 \u0069\u006d\u0061\u0067\u0065\u0020m\u0061\u0073\u006b\u0020\u0076\u0061\u006cu\u0065\u0020\u0061\u0074\u0020\u0028\u0025\u0064\u002c\u0020\u0025\u0064\u0029\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006da\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063t\u002e",_gcbd ,_faedc );
continue ;};_cdbb ,_becg ,_fdca ,_ :=_edfaa .RGBA ();var _fgbf _dd .Color ;if _cdbb +_becg +_fdca ==0{_fgbf =_gggf ;}else {_fgbf =_dd .Transparent ;};_efcce .Set (_gcbd ,_faedc ,_fgbf );};};return _efcce ;};
// Font represents the font properties on a PDF page.
type Font struct{PdfFont *_b .PdfFont ;
// FontName represents Font Name from font properties.
FontName string ;
// FontType represents Font Subtype entry in the font dictionary inside page resources.
// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont.
FontType string ;
// ToUnicode is true if font provides a `ToUnicode` mapping.
ToUnicode bool ;
// IsCID is true if underlying font is a composite font.
// Composite font is represented by a font dictionary whose Subtype is `Type0`
IsCID bool ;
// IsSimple is true if font is simple font.
// A simple font is limited to only 8 bit (255) character codes.
IsSimple bool ;
// FontData represents the raw data of the embedded font file.
// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF).
// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor.
// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value.
FontData []byte ;
// FontFileName is a name representing the font. it has format:
// (Font Name) + (Font Type Extension), example: helvetica.ttf.
FontFileName string ;
// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor).
FontDescriptor *_b .PdfFontDescriptor ;};
// PageText represents the layout of text on a device page.
type PageText struct{_eca []*textMark ;_ceef string ;_ebdc []TextMark ;_eeg []TextTable ;_bfaf _b .PdfRectangle ;_eff []pathSection ;_caa []pathSection ;_cagg *_ff .PdfObject ;_cfec _ff .PdfObject ;_cfcc *_gd .ContentStreamOperations ;_ddfd PageTextOptions ;
};func _cacef (_gcfa ,_dcae bounded )float64 {_cdcfc :=_deedf (_gcfa ,_dcae );if !_dbfgf (_cdcfc ){return _cdcfc ;};return _ddabf (_gcfa ,_dcae );};
// String returns a description of `k`.
func (_cabdd markKind )String ()string {_egfdf ,_dgbg :=_eafg [_cabdd ];if !_dgbg {return _bf .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_cabdd );};return _egfdf ;};
// String returns a string describing the current state of the textState stack.
func (_gbf *stateStack )String ()string {_fcbf :=[]string {_bf .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_gbf ))};for _ecd ,_cacb :=range *_gbf {_gdac :="\u003c\u006e\u0069l\u003e";
if _cacb !=nil {_gdac =_cacb .String ();};_fcbf =append (_fcbf ,_bf .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_ecd ,_gdac ));};return _bfb .Join (_fcbf ,"\u000a");};func (_aeee *stateStack )pop ()*textState {if _aeee .empty (){return nil ;
};_ffee :=*(*_aeee )[len (*_aeee )-1];*_aeee =(*_aeee )[:len (*_aeee )-1];return &_ffee ;};func (_geeba *subpath )makeRectRuling (_dafcc _dd .Color )(*ruling ,bool ){if _dcbb {_e .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_geeba );
};_ffbge :=_geeba ._gfb [:4];_bcae :=make (map[int ]rulingKind ,len (_ffbge ));for _efdg ,_gfaca :=range _ffbge {_afca :=_geeba ._gfb [(_efdg +1)%4];_bcae [_efdg ]=_gaacb (_gfaca ,_afca );if _dcbb {_bf .Printf ("\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066",_efdg ,_bcae [_efdg ],_gfaca ,_afca );
};};if _dcbb {_bf .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_bcae );};var _befe ,_gadg []int ;for _cdcbf ,_acdac :=range _bcae {switch _acdac {case _geggc :_gadg =append (_gadg ,_cdcbf );case _eeeae :_befe =append (_befe ,_cdcbf );
};};if _dcbb {_bf .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_gadg ),_gadg );_bf .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_befe ),_befe );
};_bgcffe :=(len (_gadg )==2&&len (_befe )==2)||(len (_gadg )==2&&len (_befe )==0&&_dcag (_ffbge [_gadg [0]],_ffbge [_gadg [1]]))||(len (_befe )==2&&len (_gadg )==0&&_cfea (_ffbge [_befe [0]],_ffbge [_befe [1]]));if _dcbb {_bf .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_gadg ),len (_befe ),_bgcffe );
};if !_bgcffe {if _dcbb {_e .Log .Error ("\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v",_geeba );_bf .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_gadg ),len (_befe ),_bgcffe );
};return &ruling {},false ;};if len (_befe )==0{for _cgegc ,_debg :=range _bcae {if _debg !=_geggc {_befe =append (_befe ,_cgegc );};};};if len (_gadg )==0{for _bacf ,_abbdf :=range _bcae {if _abbdf !=_eeeae {_gadg =append (_gadg ,_bacf );};};};if _dcbb {_e .Log .Info ("\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_gadg ),len (_befe ),len (_ffbge ),_gadg ,_befe ,_ffbge );
};var _abege ,_dadg ,_dbce ,_bfgfb _da .Point ;if _ffbge [_gadg [0]].Y > _ffbge [_gadg [1]].Y {_dbce ,_bfgfb =_ffbge [_gadg [0]],_ffbge [_gadg [1]];}else {_dbce ,_bfgfb =_ffbge [_gadg [1]],_ffbge [_gadg [0]];};if _ffbge [_befe [0]].X > _ffbge [_befe [1]].X {_abege ,_dadg =_ffbge [_befe [0]],_ffbge [_befe [1]];
}else {_abege ,_dadg =_ffbge [_befe [1]],_ffbge [_befe [0]];};_baae :=_b .PdfRectangle {Llx :_abege .X ,Urx :_dadg .X ,Lly :_bfgfb .Y ,Ury :_dbce .Y };if _baae .Llx > _baae .Urx {_baae .Llx ,_baae .Urx =_baae .Urx ,_baae .Llx ;};if _baae .Lly > _baae .Ury {_baae .Lly ,_baae .Ury =_baae .Ury ,_baae .Lly ;
};_egefc :=rectRuling {PdfRectangle :_baae ,_dbcg :_bcbfb (_baae ),Color :_dafcc };if _egefc ._dbcg ==_aagc {if _dcbb {_e .Log .Error ("\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c");
};return nil ,false ;};_cacc ,_dbcc :=_egefc .asRuling ();if !_dbcc {if _dcbb {_e .Log .Error ("\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg");};return nil ,false ;};if _ccffg {_bf .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_cacc .String ());
};return _cacc ,true ;};func _gdc (_bfceb structElement )[]structElement {_fbcf :=[]structElement {};for _ ,_edga :=range _bfceb ._fefd {for _ ,_ebbb :=range _edga ._fefd {for _ ,_eaad :=range _ebbb ._fefd {if _eaad ._bgec =="\u004c"{_fbcf =append (_fbcf ,_eaad );
};};};};return _fbcf ;};func _cac (_acc []Font ,_fdc string )bool {for _ ,_gdg :=range _acc {if _gdg .FontName ==_fdc {return true ;};};return false ;};func (_abecf rulingList )primaries ()[]float64 {_bbgd :=make (map[float64 ]struct{},len (_abecf ));for _ ,_eaaec :=range _abecf {_bbgd [_eaaec ._efadf ]=struct{}{};
};_feaba :=make ([]float64 ,len (_bbgd ));_gcbce :=0;for _aefg :=range _bbgd {_feaba [_gcbce ]=_aefg ;_gcbce ++;};_fd .Float64s (_feaba );return _feaba ;};
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};func (_dece *structTreeRoot )buildList (_gbdc map[int ][]*textLine ,_cbbb _ff .PdfObject )[]*list {if _dece ==nil {_e .Log .Debug ("\u0062\u0075\u0069\u006c\u0064\u004c\u0069\u0073\u0074\u003a\u0020t\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0069\u0073 \u006e\u0069\u006c");
return nil ;};var _ebegb *structElement ;_fffe :=[]structElement {};if len (_dece ._addd )==1{_ffgaa :=_dece ._addd [0]._bgec ;if _ffgaa =="\u0044\u006f\u0063\u0075\u006d\u0065\u006e\u0074"||_ffgaa =="\u0053\u0065\u0063\u0074"||_ffgaa =="\u0050\u0061\u0072\u0074"||_ffgaa =="\u0044\u0069\u0076"||_ffgaa =="\u0041\u0072\u0074"{_ebegb =&_dece ._addd [0];
};}else {_ebegb =&structElement {_fefd :_dece ._addd ,_bgec :_dece ._fedg };};if _ebegb ==nil {_e .Log .Debug ("\u0062\u0075\u0069\u006cd\u004c\u0069\u0073\u0074\u003a\u0020\u0074\u006f\u0070\u0045l\u0065m\u0065\u006e\u0074\u0020\u0069\u0073\u0020n\u0069\u006c");
return nil ;};for _ ,_dadabg :=range _ebegb ._fefd {if _dadabg ._bgec =="\u004c"{_fffe =append (_fffe ,_dadabg );}else if _dadabg ._bgec =="\u0054\u0061\u0062l\u0065"{_fcaa :=_gdc (_dadabg );_fffe =append (_fffe ,_fcaa ...);};};_gege :=_egbf (_fffe ,_gbdc ,_cbbb );
var _feeb []*list ;for _ ,_ecef :=range _gege {_cdda :=_beba (_ecef );_feeb =append (_feeb ,_cdda ...);};return _feeb ;};
// String returns a human readable description of `vecs`.
func (_afge rulingList )String ()string {if len (_afge )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_cgecd ,_acad :=_afge .vertsHorzs ();_agfcg :=len (_cgecd );_eega :=len (_acad );if _agfcg ==0||_eega ==0{return _bf .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_agfcg ,_eega );
};_dcdgb :=_b .PdfRectangle {Llx :_cgecd [0]._efadf ,Urx :_cgecd [_agfcg -1]._efadf ,Lly :_acad [_eega -1]._efadf ,Ury :_acad [0]._efadf };return _bf .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_agfcg ,_eega ,_dcdgb );
};func (_abbfc rulingList )removeDuplicates ()rulingList {if len (_abbfc )==0{return nil ;};_abbfc .sort ();_gbbe :=rulingList {_abbfc [0]};for _ ,_abcdc :=range _abbfc [1:]{if _abcdc .equals (_gbbe [len (_gbbe )-1]){continue ;};_gbbe =append (_gbbe ,_abcdc );
};return _gbbe ;};type textResult struct{_bbe PageText ;_dcg int ;_ebbe int ;};func (_bgde *textTable )computeBbox ()_b .PdfRectangle {var _fdba _b .PdfRectangle ;_gbgb :=false ;for _cedea :=0;_cedea < _bgde ._cffff ;_cedea ++{for _fgdca :=0;_fgdca < _bgde ._addag ;
_fgdca ++{_eeca :=_bgde .get (_fgdca ,_cedea );if _eeca ==nil {continue ;};if !_gbgb {_fdba =_eeca .PdfRectangle ;_gbgb =true ;}else {_fdba =_badbb (_fdba ,_eeca .PdfRectangle );};};};return _fdba ;};func (_fabc lineRuling )yMean ()float64 {return 0.5*(_fabc ._eefea .Y +_fabc ._edcb .Y )};
type textWord struct{_b .PdfRectangle ;_bagdd float64 ;_deccc string ;_bggbb []*textMark ;_ecdce float64 ;_gcccd bool ;};
// String returns a string describing `ma`.
func (_beca TextMarkArray )String ()string {_fbeb :=len (_beca ._gaac );if _fbeb ==0{return "\u0045\u004d\u0050T\u0059";};_cdef :=_beca ._gaac [0];_egg :=_beca ._gaac [_fbeb -1];return _bf .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_fbeb ,_cdef ,_egg );
};func (_gcae paraList )lines ()[]*textLine {var _cbae []*textLine ;for _ ,_aaaab :=range _gcae {_cbae =append (_cbae ,_aaaab ._bbgab ...);};return _cbae ;};func (_acb *textObject )showText (_aede _ff .PdfObject ,_ebfb []byte ,_ffd int ,_bdaa string )error {return _acb .renderText (_aede ,_ebfb ,_ffd ,_bdaa );
};func _bbfb (_acec *textLine )float64 {return _acec ._edge [0].Llx };func (_ccge *wordBag )allWords ()[]*textWord {var _gbdd []*textWord ;for _ ,_adcd :=range _ccge ._ecfd {_gbdd =append (_gbdd ,_adcd ...);};return _gbdd ;};func (_dbeef paraList )findGridTables (_fgcad []gridTiling )[]*textTable {if _aada {_e .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_dbeef ));
for _ggfd ,_cbdbc :=range _dbeef {_bf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ggfd ,_cbdbc );};};var _faca []*textTable ;for _aafbb ,_ebbc :=range _fgcad {_acgfee ,_feabf :=_dbeef .findTableGrid (_ebbc );if _acgfee !=nil {_acgfee .log (_bf .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_aafbb ));
_faca =append (_faca ,_acgfee );_acgfee .markCells ();};for _aeaed :=range _feabf {_aeaed ._cdaad =true ;};};if _aada {_e .Log .Info ("\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s",len (_faca ));
};return _faca ;};func (_fafdd *textTable )isExportable ()bool {if _fafdd ._cbbdd {return true ;};_cdbc :=func (_faabb int )bool {_badg :=_fafdd .get (0,_faabb );if _badg ==nil {return false ;};_cbbgb :=_badg .text ();_cccf :=_cg .RuneCountInString (_cbbgb );
_ebcce :=_aeccb .MatchString (_cbbgb );return _cccf <=1||_ebcce ;};for _ddedd :=0;_ddedd < _fafdd ._cffff ;_ddedd ++{if !_cdbc (_ddedd ){return true ;};};return false ;};type ruling struct{_facdf rulingKind ;_bagdc markKind ;_dd .Color ;_efadf float64 ;
_efbba float64 ;_daba float64 ;_geeb float64 ;};
// String returns a description of `k`.
func (_acbgg rulingKind )String ()string {_agdc ,_adga :=_edbf [_acbgg ];if !_adga {return _bf .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_acbgg );};return _agdc ;};func (_bffd intSet )add (_ddffe int ){_bffd [_ddffe ]=struct{}{}};
// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents (contents string ,resources *_b .PdfPageResources )(*Extractor ,error ){const _eabf ="\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s";_bda :=&Extractor {_ddgf :contents ,_eba :resources ,_cbg :map[string ]fontEntry {},_cdgf :map[string ]textResult {}};
_cge .TrackUse (_eabf );return _bda ,nil ;};
// BidiText represents a bidi text organized in its visual order
// with base direction of the text.
type BidiText struct{_db string ;_bg string ;};const (_aagc rulingKind =iota ;_geggc ;_eeeae ;);func (_cabfc *textLine )pullWord (_aceg *wordBag ,_abga *textWord ,_caac int ){_cabfc .appendWord (_abga );_aceg .removeWord (_abga ,_caac );};func _egbf (_dcgf []structElement ,_gdcg map[int ][]*textLine ,_dbbgg _ff .PdfObject )[]*list {_eccgf :=[]*list {};
for _ ,_acbgb :=range _dcgf {_egae :=_acbgb ._fefd ;_fbdg :=int (_acbgb ._cabe );_gaacf :=_acbgb ._bgec ;_ggg :=[]*textLine {};_dgde :=[]*list {};_cdbdd :=_acbgb ._fbfd ;_gbbg ,_bgcc :=(_cdbdd .(*_ff .PdfObjectReference ));if !_bgcc {_e .Log .Debug ("\u0066\u0061\u0069l\u0065\u0064\u0020\u006f\u0074\u0020\u0063\u0061\u0073\u0074\u0020\u0074\u006f\u0020\u002a\u0063\u006f\u0072\u0065\u002e\u0050\u0064\u0066\u004f\u0062\u006a\u0065\u0063\u0074R\u0065\u0066\u0065\u0072\u0065\u006e\u0063\u0065");
};if _fbdg !=-1&&_gbbg !=nil {if _fefe ,_ccaa :=_gdcg [_fbdg ];_ccaa {if _gbbd ,_gfeaf :=_dbbgg .(*_ff .PdfIndirectObject );_gfeaf {_beb :=_gbbd .PdfObjectReference ;if _dg .DeepEqual (*_gbbg ,_beb ){_ggg =_fefe ;};};};};if _egae !=nil {_dgde =_egbf (_egae ,_gdcg ,_dbbgg );
};_gdcd :=_affeb (_ggg ,_gaacf ,_dgde );_eccgf =append (_eccgf ,_gdcd );};return _eccgf ;};func (_bcgf *textTable )put (_ecca ,_gaage int ,_bddf *textPara ){_bcgf ._egbgg [_fbfgf (_ecca ,_gaage )]=_bddf ;};func _bfecf (_dggc byte )bool {for _ ,_abeg :=range _dfda {if []byte (_abeg )[0]==_dggc {return true ;
};};return false ;};func (_acddf *textTable )getDown ()paraList {_agde :=make (paraList ,_acddf ._addag );for _eaeef :=0;_eaeef < _acddf ._addag ;_eaeef ++{_efag :=_acddf .get (_eaeef ,_acddf ._cffff -1)._dabf ;if _efag .taken (){return nil ;};_agde [_eaeef ]=_efag ;
};for _aeff :=0;_aeff < _acddf ._addag -1;_aeff ++{if _agde [_aeff ]._abbb !=_agde [_aeff +1]{return nil ;};};return _agde ;};func (_ggba intSet )del (_dgcgaf int ){delete (_ggba ,_dgcgaf )};func (_acda *wordBag )firstWord (_agbgf int )*textWord {return _acda ._ecfd [_agbgf ][0]};
func (_adag *imageExtractContext )processOperand (_af *_gd .ContentStreamOperation ,_ddgfe _gd .GraphicsState ,_fga *_b .PdfPageResources )error {if _af .Operand =="\u0042\u0049"&&len (_af .Params )==1{_aca ,_gfc :=_af .Params [0].(*_gd .ContentStreamInlineImage );
if !_gfc {return nil ;};if _dgd ,_aaa :=_ff .GetBoolVal (_aca .ImageMask );_aaa {if _dgd &&!_adag ._fcaf .IncludeInlineStencilMasks {return nil ;};};return _adag .extractInlineImage (_aca ,_ddgfe ,_fga );}else if _af .Operand =="\u0044\u006f"&&len (_af .Params )==1{_dbag ,_abfad :=_ff .GetName (_af .Params [0]);
if !_abfad {_e .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _eddg ;};_ ,_afd :=_fga .GetXObjectByName (*_dbag );switch _afd {case _b .XObjectTypeImage :return _adag .extractXObjectImage (_dbag ,_ddgfe ,_fga );case _b .XObjectTypeForm :return _adag .extractFormImages (_dbag ,_ddgfe ,_fga );
};}else if _adag ._eag &&(_af .Operand =="\u0073\u0063\u006e"||_af .Operand =="\u0053\u0043\u004e")&&len (_af .Params )==1{_bcc ,_cce :=_ff .GetName (_af .Params [0]);if !_cce {_e .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");
return _eddg ;};_cfd ,_cce :=_fga .GetPatternByName (*_bcc );if !_cce {_e .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0050\u0061\u0074\u0074\u0065\u0072n\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075\u006e\u0064");return nil ;};if _cfd .IsTiling (){_bgbc :=_cfd .GetAsTilingPattern ();
_dege ,_fa :=_bgbc .GetContentStream ();if _fa !=nil {return _fa ;};_fa =_adag .extractContentStreamImages (string (_dege ),_bgbc .Resources );if _fa !=nil {return _fa ;};};}else if (_af .Operand =="\u0063\u0073"||_af .Operand =="\u0043\u0053")&&len (_af .Params )>=1{_adag ._eag =_af .Params [0].String ()=="\u0050a\u0074\u0074\u0065\u0072\u006e";
};return nil ;};func (_bfbe paraList )writeText (_gbgfd _f .Writer ){for _fcce ,_gfaa :=range _bfbe {if _gfaa ._cdcca {continue ;};_gfaa .writeText (_gbgfd );if _fcce !=len (_bfbe )-1{if _egad (_gfaa ,_bfbe [_fcce +1]){_gbgfd .Write ([]byte ("\u0020"));
}else {_gbgfd .Write ([]byte ("\u000a"));_gbgfd .Write ([]byte ("\u000a"));};};};_gbgfd .Write ([]byte ("\u000a"));_gbgfd .Write ([]byte ("\u000a"));};func (_ddce rulingList )connections (_feffe map[int ]intSet ,_fgf int )intSet {_gbggd :=make (intSet );
_aecda :=make (intSet );var _gadd func (int );_gadd =func (_acgfd int ){if !_aecda .has (_acgfd ){_aecda .add (_acgfd );for _afbg :=range _ddce {if _feffe [_afbg ].has (_acgfd ){_gbggd .add (_afbg );};};for _dcbe :=range _ddce {if _gbggd .has (_dcbe ){_gadd (_dcbe );
};};};};_gadd (_fgf );return _gbggd ;};func (_bcfg *textObject )setTextRenderMode (_cfe int ){if _bcfg ==nil {return ;};_bcfg ._eef ._ddgg =RenderMode (_cfe );};
// String returns a description of `p`.
func (_eeeg *textPara )String ()string {if _eeeg ._cdcca {return _bf .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_eeeg .PdfRectangle );};_cdgga :="";if _eeeg ._cegg !=nil {_cdgga =_bf .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_eeeg ._cegg ._addag ,_eeeg ._cegg ._cffff );
};return _bf .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_eeeg .PdfRectangle ,_cdgga ,len (_eeeg ._bbgab ),_ggcea (_eeeg .text (),50));};func (_cbaf *textTable )getComposite (_decc ,_cbfd int )(paraList ,_b .PdfRectangle ){_dcgba ,_cceacg :=_cbaf ._bdfcg [_fbfgf (_decc ,_cbfd )];
if _aada {_bf .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a",_decc ,_cbfd ,_dcgba .String ());};if !_cceacg {return nil ,_b .PdfRectangle {};
};return _dcgba .parasBBox ();};func (_ebb *imageExtractContext )extractContentStreamImages (_cea string ,_abfa *_b .PdfPageResources )error {_bcb :=_gd .NewContentStreamParser (_cea );_cbd ,_cedb :=_bcb .Parse ();if _cedb !=nil {return _cedb ;};if _ebb ._ba ==nil {_ebb ._ba =map[*_ff .PdfObjectStream ]*cachedImage {};
};if _ebb ._fcaf ==nil {_ebb ._fcaf =&ImageExtractOptions {};};_bbcd :=_gd .NewContentStreamProcessor (*_cbd );_bbcd .AddHandler (_gd .HandlerConditionEnumAllOperands ,"",_ebb .processOperand );return _bbcd .Process (_abfa );};func _gbcg (_afec string )bool {for _ ,_aaeeea :=range _afec {if !_g .IsSpace (_aaeeea ){return false ;
};};return true ;};func _deabc (_adadg ,_geacb int )int {if _adadg < _geacb {return _adadg ;};return _geacb ;};func _dceb (_edgd map[float64 ][]*textLine )[]float64 {_fcfc :=[]float64 {};for _aegdg :=range _edgd {_fcfc =append (_fcfc ,_aegdg );};_fd .Float64s (_fcfc );
return _fcfc ;};func (_cbcb *subpath )add (_ceaa ..._da .Point ){_cbcb ._gfb =append (_cbcb ._gfb ,_ceaa ...)};func (_fcfa *textTable )toTextTable ()TextTable {if _aada {_e .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_fcfa ._addag ,_fcfa ._cffff );
};_fgdbg :=make ([][]TableCell ,_fcfa ._cffff );for _feaa :=0;_feaa < _fcfa ._cffff ;_feaa ++{_fgdbg [_feaa ]=make ([]TableCell ,_fcfa ._addag );for _cdeca :=0;_cdeca < _fcfa ._addag ;_cdeca ++{_cbefe :=_fcfa .get (_cdeca ,_feaa );if _cbefe ==nil {continue ;
};_bagcg (_cbefe ._bbgab );if _aada {_bf .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_cdeca ,_feaa ,_cbefe );};_fgdbg [_feaa ][_cdeca ].Text =_cbefe .text ();_gbec :=0;_fgdbg [_feaa ][_cdeca ].Marks ._gaac =_cbefe .toTextMarks (&_gbec );
};};_fdfd :=TextTable {W :_fcfa ._addag ,H :_fcfa ._cffff ,Cells :_fgdbg };_fdfd .PdfRectangle =_fcfa .bbox ();return _fdfd ;};func (_fdeed rulingList )aligned ()bool {if len (_fdeed )< 2{return false ;};_decbe :=make (map[*ruling ]int );_decbe [_fdeed [0]]=0;
for _ ,_gfceb :=range _fdeed [1:]{_eafc :=false ;for _gged :=range _decbe {if _gfceb .gridIntersecting (_gged ){_decbe [_gged ]++;_eafc =true ;break ;};};if !_eafc {_decbe [_gfceb ]=0;};};_ggdg :=0;for _ ,_aebc :=range _decbe {if _aebc ==0{_ggdg ++;};};
_ffcgc :=float64 (_ggdg )/float64 (len (_fdeed ));_edgce :=_ffcgc <=1.0-_dgga ;if _ccffg {_e .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_edgce ,_ffcgc ,_ggdg ,len (_fdeed ),_fdeed .String ());
};return _edgce ;};func (_gcea *textObject )setFont (_cgd string ,_daga float64 )error {if _gcea ==nil {return nil ;};_gcea ._eef ._gec =_daga ;_gfffb ,_dbg :=_gcea .getFont (_cgd );if _dbg !=nil {return _dbg ;};_gcea ._eef ._ggec =_gfffb ;return nil ;
};type textObject struct{_beec *Extractor ;_cbgda *_b .PdfPageResources ;_fcbe _gd .GraphicsState ;_eef *textState ;_cgdf *stateStack ;_cdffg _da .Matrix ;_fea _da .Matrix ;_daeb []*textMark ;_bdfc bool ;};func _adad (_feca *textLine ,_gcfdb []*textLine ,_ecbb []float64 )float64 {var _aggf float64 =-1;
for _ ,_bbfa :=range _gcfdb {if _bbfa ._dce > _feca ._dce {if _ce .Round (_bbfa .Llx )>=_ce .Round (_feca .Llx ){_aggf =_bbfa ._dce ;}else {break ;};};};return _aggf ;};func (_gbeeb *textTable )emptyCompositeColumn (_gadae int )bool {for _fbgcb :=0;_fbgcb < _gbeeb ._cffff ;
_fbgcb ++{if _bfgg ,_ceae :=_gbeeb ._bdfcg [_fbfgf (_gadae ,_fbgcb )];_ceae {if len (_bfgg .paraList )> 0{return false ;};};};return true ;};func _cdge (_eadg *wordBag ,_abge *textWord ,_adfd float64 )bool {return _eadg .Urx <=_abge .Llx &&_abge .Llx < _eadg .Urx +_adfd ;
};func (_aedd *ruling )alignsPrimary (_fddf *ruling )bool {return _aedd ._facdf ==_fddf ._facdf &&_ce .Abs (_aedd ._efadf -_fddf ._efadf )< _acgdd *0.5;};func _gaec (_gcdcf []pathSection )rulingList {_dadaf (_gcdcf );if _ccffg {_e .Log .Info ("\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073",len (_gcdcf ));
};var _daaa rulingList ;for _ ,_geaa :=range _gcdcf {for _ ,_fbcg :=range _geaa ._cdad {if len (_fbcg ._gfb )< 2{continue ;};_bfafg :=_fbcg ._gfb [0];for _ ,_dbbd :=range _fbcg ._gfb [1:]{if _fefcf ,_ceff :=_abag (_bfafg ,_dbbd ,_geaa .Color );_ceff {_daaa =append (_daaa ,_fefcf );
};_bfafg =_dbbd ;};};};if _ccffg {_e .Log .Info ("m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073",_daaa );};return _daaa ;};type textLine struct{_b .PdfRectangle ;_dce float64 ;_edge []*textWord ;_efde float64 ;
};func _ebgg (_ddff []*textLine ,_cdbdc ,_bdcfc float64 )[]*textLine {var _fae []*textLine ;for _ ,_bffa :=range _ddff {if _cdbdc ==-1{if _bffa ._dce > _bdcfc {_fae =append (_fae ,_bffa );};}else {if _bffa ._dce > _bdcfc &&_bffa ._dce < _cdbdc {_fae =append (_fae ,_bffa );
};};};return _fae ;};func (_egdd *subpath )last ()_da .Point {return _egdd ._gfb [len (_egdd ._gfb )-1]};func (_feed *textPara )text ()string {_cbga :=new (_eg .Buffer );_feed .writeText (_cbga );return _cbga .String ();};func (_ddfc *textObject )moveText (_eade ,_gede float64 ){_ddfc .moveLP (_eade ,_gede )};
func _gddd (_dead []int )[]int {_bgaf :=make ([]int ,len (_dead ));for _ecbgc ,_dccf :=range _dead {_bgaf [len (_dead )-1-_ecbgc ]=_dccf ;};return _bgaf ;};func _cadc (_gffd _da .Point )_da .Matrix {return _da .TranslationMatrix (_gffd .X ,_gffd .Y )};
func _acaf (_deeg _b .PdfRectangle )*ruling {return &ruling {_facdf :_geggc ,_efadf :_deeg .Ury ,_efbba :_deeg .Llx ,_daba :_deeg .Urx };};func (_bddg rulingList )isActualGrid ()(rulingList ,bool ){_eadgbe ,_gegag :=_bddg .augmentGrid ();if !(len (_eadgbe )>=_dbee +1&&len (_gegag )>=_fbedf +1){if _ccffg {_e .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_eadgbe ),len (_gegag ),_dbee +1,_fbedf +1);
};return nil ,false ;};if _ccffg {_e .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_bddg ,len (_eadgbe )>=2,len (_gegag )>=2,len (_eadgbe )>=2&&len (_gegag )>=2);
for _bbdb ,_bdff :=range _bddg {_bf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_bbdb ,_bdff );};};if _dbcdd {_ddcb ,_agdg :=_eadgbe [0],_eadgbe [len (_eadgbe )-1];_bebd ,_edbff :=_gegag [0],_gegag [len (_gegag )-1];if !(_aebg (_ddcb ._efadf -_bebd ._efbba )&&_aebg (_agdg ._efadf -_bebd ._daba )&&_aebg (_bebd ._efadf -_ddcb ._daba )&&_aebg (_edbff ._efadf -_ddcb ._efbba )){if _ccffg {_e .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_ddcb ,_agdg ,_bebd ,_edbff );
};return nil ,false ;};}else {if !_eadgbe .aligned (){if _dcbc {_e .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_eadgbe ));
};return nil ,false ;};if !_gegag .aligned (){if _ccffg {_e .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_gegag ));
};return nil ,false ;};};_fdee :=append (_eadgbe ,_gegag ...);return _fdee ,true ;};func _dbead (_edbg []*textLine ,_gdeba map[float64 ][]*textLine ,_edef []float64 ,_bfgff int ,_dfec ,_daec float64 )[]*list {_baefb :=[]*list {};_fccgb :=_bfgff ;_bfgff =_bfgff +1;
_eeaf :=_edef [_fccgb ];_dfge :=_gdeba [_eeaf ];_fgbe :=_ebgg (_dfge ,_daec ,_dfec );for _ebfcg ,_dgcd :=range _fgbe {var _dcefd float64 ;_aggc :=[]*list {};_cfab :=_dgcd ._dce ;_gbgg :=_daec ;if _ebfcg < len (_fgbe )-1{_gbgg =_fgbe [_ebfcg +1]._dce ;};
if _bfgff < len (_edef ){_aggc =_dbead (_edbg ,_gdeba ,_edef ,_bfgff ,_cfab ,_gbgg );};_dcefd =_gbgg ;if len (_aggc )> 0{_cdffa :=_aggc [0];if len (_cdffa ._efgb )> 0{_dcefd =_cdffa ._efgb [0]._dce ;};};_adcc :=[]*textLine {_dgcd };_ebaf :=_bfcg (_dgcd ,_edbg ,_edef ,_cfab ,_dcefd );
_adcc =append (_adcc ,_ebaf ...);_bcbb :=_affeb (_adcc ,"\u0062\u0075\u006c\u006c\u0065\u0074",_aggc );_bcbb ._cceae =_dbfa (_adcc ,"");_baefb =append (_baefb ,_bcbb );};return _baefb ;};func _bcbfb (_bbee _b .PdfRectangle )rulingKind {_bada :=_bbee .Width ();
_gffe :=_bbee .Height ();if _bada > _gffe {if _bada >=_cef {return _geggc ;};}else {if _gffe >=_cef {return _eeeae ;};};return _aagc ;};
// New returns an Extractor instance for extracting content from the input PDF page.
func New (page *_b .PdfPage )(*Extractor ,error ){return NewWithOptions (page ,nil )};func (_bde *PageText )getText ()string {_ffgd :="";_gbb :=len (_bde ._eca );for _cgec :=0;_cgec < 360&&_gbb > 0;_cgec +=90{_ace :=make ([]*textMark ,0,len (_bde ._eca )-_gbb );
for _ ,_afc :=range _bde ._eca {if _afc ._egba ==_cgec {_ace =append (_ace ,_afc );};};if len (_ace )> 0{_ffgd +=_cbdc (_ace ,_bde ._bfaf );_gbb -=len (_ace );};};return _ffgd ;};func (_gcdfc *wordBag )absorb (_fggb *wordBag ){_aded :=_fggb .makeRemovals ();
for _cdebe ,_dgee :=range _fggb ._ecfd {for _ ,_dcaf :=range _dgee {_gcdfc .pullWord (_dcaf ,_cdebe ,_aded );};};_fggb .applyRemovals (_aded );};type lists []*list ;func (_cgcbc gridTiling )complete ()bool {for _ ,_fadb :=range _cgcbc ._caabf {for _ ,_aefc :=range _fadb {if !_aefc .complete (){return false ;
};};};return true ;};func (_bdd *Extractor )extractPageText (_aacc string ,_ede *_b .PdfPageResources ,_bggd _da .Matrix ,_dffg int ,_dgf bool )(*PageText ,int ,int ,error ){_e .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_dffg );
_bgbe :=&PageText {_bfaf :_bdd ._fg ,_cagg :_bdd ._fcdd ,_cfec :_bdd ._fdgc };_ffg :=_aagd (_bdd ._fg );var _dgca stateStack ;_cdbg :=_gaad (_bdd ,_ede ,_gd .GraphicsState {},&_ffg ,&_dgca );_aag :=shapesState {_cdgfg :_bggd ,_gefd :_da .IdentityMatrix (),_aaag :_cdbg };
var _daea bool ;_afe :=-1;_abec :="";if _dffg > _ddc {_gfea :=_ad .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");_e .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_dffg ,_gfea );
return _bgbe ,_ffg ._gedec ,_ffg ._ffed ,_gfea ;};_gfad :=_gd .NewContentStreamParser (_aacc );_efc ,_bae :=_gfad .Parse ();if _bae !=nil {_e .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bae );
return _bgbe ,_ffg ._gedec ,_ffg ._ffed ,_bae ;};_bgbe ._cfcc =_efc ;_bccf :=_gd .NewContentStreamProcessor (*_efc );_bccf .AddHandler (_gd .HandlerConditionEnumAllOperands ,"",func (_ead *_gd .ContentStreamOperation ,_gceb _gd .GraphicsState ,_fceb *_b .PdfPageResources )error {_cde :=_ead .Operand ;
if _gfbd {_e .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_ead );};switch _cde {case "\u0071":if _gegd {_e .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_aag ._gefd );};_dgca .push (&_ffg );case "\u0051":if !_dgca .empty (){_ffg =*_dgca .pop ();
};_aag ._gefd =_gceb .CTM ;if _gegd {_e .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_aag ._gefd );};case "\u0042\u0044\u0043":_dfcf ,_ggc :=_ff .GetDict (_ead .Params [1]);if !_ggc {_e .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0042D\u0043\u0020\u006f\u0070\u003d\u0025\u0073 \u0047\u0065\u0074\u0044\u0069\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_ead );
return _bae ;};_ceaf :=_dfcf .Get ("\u004d\u0043\u0049\u0044");if _ceaf !=nil {_dfe ,_dacf :=_ff .GetIntVal (_ceaf );if !_dacf {_e .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0042\u0044C\u0020\u006f\u0070=\u0025\u0073\u002e\u0020\u0042\u0061\u0064\u0020\u006eum\u0065\u0072\u0069c\u0061\u006c \u006f\u0062\u006a\u0065\u0063\u0074.\u0020\u006f=\u0025\u0073",_ead ,_ceaf );
};_afe =_dfe ;}else {_afe =-1;};_gfff :=_dfcf .Get ("\u0041\u0063\u0074\u0075\u0061\u006c\u0054\u0065\u0078\u0074");if _gfff !=nil {_abec =_gfff .String ();};case "\u0045\u004d\u0043":_afe =-1;_abec ="";case "\u0042\u0054":if _daea {_e .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
_bgbe ._eca =append (_bgbe ._eca ,_cdbg ._daeb ...);};_daea =true ;_bbab :=_gceb ;if _dgf {_bbab =_gd .GraphicsState {};_bbab .CTM =_aag ._gefd ;};_bbab .CTM =_bggd .Mult (_bbab .CTM );_cdbg =_gaad (_bdd ,_fceb ,_bbab ,&_ffg ,&_dgca );_aag ._aaag =_cdbg ;
case "\u0045\u0054":if !_daea {_e .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");};_daea =false ;_bgbe ._eca =append (_bgbe ._eca ,_cdbg ._daeb ...);
_cdbg .reset ();case "\u0054\u002a":_cdbg .nextLine ();case "\u0054\u0064":if _abdb ,_dbdd :=_cdbg .checkOp (_ead ,2,true );!_abdb {_e .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dbdd );return _dbdd ;};
_gda ,_cgf ,_fbd :=_fgfb (_ead .Params );if _fbd !=nil {return _fbd ;};_cdbg .moveText (_gda ,_cgf );case "\u0054\u0044":if _gaa ,_fbc :=_cdbg .checkOp (_ead ,2,true );!_gaa {_e .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fbc );
return _fbc ;};_bfg ,_gcdg ,_gbg :=_fgfb (_ead .Params );if _gbg !=nil {_e .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gbg );return _gbg ;};_cdbg .moveTextSetLeading (_bfg ,_gcdg );case "\u0054\u006a":if _fag ,_gdeb :=_cdbg .checkOp (_ead ,1,true );
!_fag {_e .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_ead ,_gdeb );return _gdeb ;};_bff :=_ff .TraceToDirectObject (_ead .Params [0]);_fed ,_eac :=_ff .GetStringBytes (_bff );
if !_eac {_e .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_ead );return _ff .ErrTypeError ;
};return _cdbg .showText (_bff ,_fed ,_afe ,_abec );case "\u0054\u004a":if _gfcb ,_aff :=_cdbg .checkOp (_ead ,1,true );!_gfcb {_e .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aff );return _aff ;
};_cace ,_dee :=_ff .GetArray (_ead .Params [0]);if !_dee {_e .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_ead );
return _bae ;};return _cdbg .showTextAdjusted (_cace ,_afe ,_abec );case "\u0027":if _bgbeb ,_aeg :=_cdbg .checkOp (_ead ,1,true );!_bgbeb {_e .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aeg );return _aeg ;
};_dbfg :=_ff .TraceToDirectObject (_ead .Params [0]);_bed ,_bbf :=_ff .GetStringBytes (_dbfg );if !_bbf {_e .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_ead );
return _ff .ErrTypeError ;};_cdbg .nextLine ();return _cdbg .showText (_dbfg ,_bed ,_afe ,_abec );case "\u0022":if _fdf ,_cbdb :=_cdbg .checkOp (_ead ,3,true );!_fdf {_e .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cbdb );
return _cbdb ;};_affe ,_fbfb ,_ggcg :=_fgfb (_ead .Params [:2]);if _ggcg !=nil {return _ggcg ;};_bcbg :=_ff .TraceToDirectObject (_ead .Params [2]);_ggea ,_cdce :=_ff .GetStringBytes (_bcbg );if !_cdce {_e .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_ead );
return _ff .ErrTypeError ;};_cdbg .setCharSpacing (_affe );_cdbg .setWordSpacing (_fbfb );_cdbg .nextLine ();return _cdbg .showText (_bcbg ,_ggea ,_afe ,_abec );case "\u0054\u004c":_eabfg ,_fcea :=_dea (_ead );if _fcea !=nil {_e .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fcea );
return _fcea ;};_cdbg .setTextLeading (_eabfg );case "\u0054\u0063":_gdbd ,_afdd :=_dea (_ead );if _afdd !=nil {_e .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_afdd );return _afdd ;};_cdbg .setCharSpacing (_gdbd );
case "\u0054\u0066":if _ebc ,_fdeg :=_cdbg .checkOp (_ead ,2,true );!_ebc {_e .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fdeg );return _fdeg ;};_geg ,_cedc :=_ff .GetNameVal (_ead .Params [0]);
if !_cedc {_e .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_ead );return _ff .ErrTypeError ;};_gcb ,_gcga :=_ff .GetNumberAsFloat (_ead .Params [1]);
if !_cedc {_e .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ead ,_gcga );
return _gcga ;};_gcga =_cdbg .setFont (_geg ,_gcb );_cdbg ._bdfc =_ad .Is (_gcga ,_ff .ErrNotSupported );if _gcga !=nil &&!_cdbg ._bdfc {return _gcga ;};case "\u0054\u006d":if _fbb ,_fcec :=_cdbg .checkOp (_ead ,6,true );!_fbb {_e .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fcec );
return _fcec ;};_ece ,_afgf :=_ff .GetNumbersAsFloat (_ead .Params );if _afgf !=nil {_e .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_afgf );return _afgf ;};_cdbg .setTextMatrix (_ece );case "\u0054\u0072":if _aee ,_cdf :=_cdbg .checkOp (_ead ,1,true );
!_aee {_e .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cdf );return _cdf ;};_cbea ,_fcag :=_ff .GetIntVal (_ead .Params [0]);if !_fcag {_e .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_ead );
return _ff .ErrTypeError ;};_cdbg .setTextRenderMode (_cbea );case "\u0054\u0073":if _cdff ,_fgg :=_cdbg .checkOp (_ead ,1,true );!_cdff {_e .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fgg );return _fgg ;
};_gcdda ,_dda :=_ff .GetNumberAsFloat (_ead .Params [0]);if _dda !=nil {_e .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dda );return _dda ;};_cdbg .setTextRise (_gcdda );case "\u0054\u0077":if _cbgd ,_ggb :=_cdbg .checkOp (_ead ,1,true );
!_cbgd {_e .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ggb );return _ggb ;};_bec ,_aga :=_ff .GetNumberAsFloat (_ead .Params [0]);if _aga !=nil {_e .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aga );
return _aga ;};_cdbg .setWordSpacing (_bec );case "\u0054\u007a":if _eagd ,_dbdc :=_cdbg .checkOp (_ead ,1,true );!_eagd {_e .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dbdc );return _dbdc ;};_cdcf ,_ebf :=_ff .GetNumberAsFloat (_ead .Params [0]);
if _ebf !=nil {_e .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ebf );return _ebf ;};_cdbg .setHorizScaling (_cdcf );case "\u0063\u006d":if !_dgf {_aag ._gefd =_gceb .CTM ;};if _aag ._gefd .Singular (){_fcf :=_da .IdentityMatrix ().Translate (_aag ._gefd .Translation ());
_e .Log .Debug ("S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s",_aag ._gefd ,_fcf );_aag ._gefd =_fcf ;};if _gegd {_e .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_aag ._gefd );};case "\u006d":if len (_ead .Params )!=2{_e .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_gcg );
return nil ;};_dbeb ,_dbfb :=_ff .GetNumbersAsFloat (_ead .Params );if _dbfb !=nil {return _dbfb ;};_aag .moveTo (_dbeb [0],_dbeb [1]);case "\u006c":if len (_ead .Params )!=2{_e .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_gcg );
return nil ;};_dbc ,_agb :=_ff .GetNumbersAsFloat (_ead .Params );if _agb !=nil {return _agb ;};_aag .lineTo (_dbc [0],_dbc [1]);case "\u0063":if len (_ead .Params )!=6{return _gcg ;};_edg ,_cfg :=_ff .GetNumbersAsFloat (_ead .Params );if _cfg !=nil {return _cfg ;
};_e .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_edg );_aag .cubicTo (_edg [0],_edg [1],_edg [2],_edg [3],_edg [4],_edg [5]);case "\u0076","\u0079":if len (_ead .Params )!=4{return _gcg ;
};_degd ,_fbbg :=_ff .GetNumbersAsFloat (_ead .Params );if _fbbg !=nil {return _fbbg ;};_e .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_degd );_aag .quadraticTo (_degd [0],_degd [1],_degd [2],_degd [3]);
case "\u0068":_aag .closePath ();case "\u0072\u0065":if len (_ead .Params )!=4{return _gcg ;};_eee ,_fbbf :=_ff .GetNumbersAsFloat (_ead .Params );if _fbbf !=nil {return _fbbf ;};_aag .drawRectangle (_eee [0],_eee [1],_eee [2],_eee [3]);_aag .closePath ();
case "\u0053":_aag .stroke (&_bgbe ._eff );_aag .clearPath ();case "\u0073":_aag .closePath ();_aag .stroke (&_bgbe ._eff );_aag .clearPath ();case "\u0046":_aag .fill (&_bgbe ._caa );_aag .clearPath ();case "\u0066","\u0066\u002a":_aag .closePath ();_aag .fill (&_bgbe ._caa );
_aag .clearPath ();case "\u0042","\u0042\u002a":_aag .fill (&_bgbe ._caa );_aag .stroke (&_bgbe ._eff );_aag .clearPath ();case "\u0062","\u0062\u002a":_aag .closePath ();_aag .fill (&_bgbe ._caa );_aag .stroke (&_bgbe ._eff );_aag .clearPath ();case "\u006e":_aag .clearPath ();
case "\u0044\u006f":if len (_ead .Params )==0{_e .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_ead .Params );
return _ff .ErrRangeError ;};_gbea ,_aea :=_ff .GetName (_ead .Params [0]);if !_aea {_e .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_ead .Params [0]);
return _ff .ErrTypeError ;};_ ,_aeb :=_fceb .GetXObjectByName (*_gbea );if _aeb !=_b .XObjectTypeForm {break ;};_fbcb ,_aea :=_bdd ._cdgf [_gbea .String ()];if !_aea {_adfc ,_cbfc :=_fceb .GetXObjectFormByName (*_gbea );if _cbfc !=nil {_e .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_cbfc );
return _cbfc ;};_bggg ,_cbfc :=_adfc .GetContentStream ();if _cbfc !=nil {_e .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_cbfc );return _cbfc ;};_ebfg :=_adfc .Resources ;if _ebfg ==nil {_ebfg =_fceb ;};_gaba :=_gceb .CTM ;if _ege ,_eeb :=_ff .GetArray (_adfc .Matrix );
_eeb {_aec ,_bac :=_ege .GetAsFloat64Slice ();if _bac !=nil {return _bac ;};if len (_aec )!=6{return _gcg ;};_aebb :=_da .NewMatrix (_aec [0],_aec [1],_aec [2],_aec [3],_aec [4],_aec [5]);_gaba =_gceb .CTM .Mult (_aebb );};_ecc ,_egac ,_dbbc ,_cbfc :=_bdd .extractPageText (string (_bggg ),_ebfg ,_bggd .Mult (_gaba ),_dffg +1,false );
if _cbfc !=nil {_e .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_cbfc );return _cbfc ;};_fbcb =textResult {*_ecc ,_egac ,_dbbc };_bdd ._cdgf [_gbea .String ()]=_fbcb ;};_aag ._gefd =_gceb .CTM ;if _gegd {_e .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_aag ._gefd );
};_bgbe ._eca =append (_bgbe ._eca ,_fbcb ._bbe ._eca ...);_bgbe ._eff =append (_bgbe ._eff ,_fbcb ._bbe ._eff ...);_bgbe ._caa =append (_bgbe ._caa ,_fbcb ._bbe ._caa ...);_ffg ._gedec +=_fbcb ._dcg ;_ffg ._ffed +=_fbcb ._ebbe ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_cdbg ._fcbe .ColorspaceNonStroking =_gceb .ColorspaceNonStroking ;
_cdbg ._fcbe .ColorNonStroking =_gceb .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_cdbg ._fcbe .ColorspaceStroking =_gceb .ColorspaceStroking ;_cdbg ._fcbe .ColorStroking =_gceb .ColorStroking ;
};return nil ;});_bae =_bccf .Process (_ede );if _bdd ._ddde !=nil &&_bdd ._ddde .IncludeAnnotations &&!_dgf {for _ ,_gba :=range _bdd ._bge {_agba ,_fffg :=_ff .GetDict (_gba .AP );if !_fffg {continue ;};_bag ,_fffg :=_agba .Get ("\u004e").(*_ff .PdfObjectStream );
if !_fffg {continue ;};_dgbb ,_fac :=_ff .DecodeStream (_bag );if _fac !=nil {_e .Log .Debug ("\u0045\u0072\u0072\u006f\u0072\u0020\u006f\u006e\u0020\u0064\u0065c\u006f\u0064\u0065\u0020\u0073\u0074\u0072\u0065\u0061\u006d:\u0020\u0025\u0076",_fac );continue ;
};_ebd :=_bag .PdfObjectDictionary .Get ("\u0052e\u0073\u006f\u0075\u0072\u0063\u0065s");_gedg ,_fac :=_b .NewPdfPageResourcesFromDict (_ebd .(*_ff .PdfObjectDictionary ));if _fac !=nil {_e .Log .Debug ("\u0045\u0072\u0072\u006f\u0072 \u006f\u006e\u0020\u0067\u0065\u0074\u0074\u0069\u006e\u0067\u0020\u0061\u006en\u006f\u0074\u0061\u0074\u0069\u006f\u006e\u0020\u0072\u0065\u0073\u006f\u0075\u0072\u0063\u0065\u0073\u003a\u0020\u0025\u0076",_fac );
continue ;};_abb :=_da .IdentityMatrix ();_cgag ,_fffg :=_bag .PdfObjectDictionary .Get ("\u004d\u0061\u0074\u0072\u0069\u0078").(*_ff .PdfObjectArray );if _fffg {_ffa ,_feb :=_cgag .GetAsFloat64Slice ();if _feb !=nil {_e .Log .Debug ("\u0045\u0072\u0072or\u0020\u006f\u006e\u0020\u0067\u0065\u0074\u0074\u0069n\u0067 \u0066l\u006fa\u0074\u0036\u0034\u0020\u0073\u006c\u0069\u0063\u0065\u003a\u0020\u0025\u0076",_feb );
continue ;};if len (_ffa )!=6{_e .Log .Debug ("I\u006e\u0076\u0061\u006c\u0069\u0064 \u006d\u0061\u0074\u0072\u0069\u0078\u0020\u0073\u006ci\u0063\u0065\u0020l\u0065n\u0067\u0074\u0068");continue ;};_abb =_da .NewMatrix (_ffa [0],_ffa [1],_ffa [2],_ffa [3],_ffa [4],_ffa [5]);
};_fba ,_fffg :=_bdd ._dfd [_bag .String ()];if !_fffg {_gfae ,_fgac ,_cad ,_caf :=_bdd .extractPageText (string (_dgbb ),_gedg ,_abb ,_dffg +1,true );if _caf !=nil {_e .Log .Debug ("\u0045\u0052R\u004f\u0052\u0020\u0065x\u0074\u0072a\u0063\u0074\u0069\u006e\u0067\u0020\u0061\u006en\u006f\u0074\u0061\u0074\u0069\u006f\u006e\u0020\u0074\u0065\u0078\u0074s\u003a\u0020\u0025\u0076",_caf );
continue ;};_fba =textResult {*_gfae ,_fgac ,_cad };_bdd ._dfd [_bag .String ()]=_fba ;};_bgbe ._eca =append (_bgbe ._eca ,_fba ._bbe ._eca ...);_bgbe ._eff =append (_bgbe ._eff ,_fba ._bbe ._eff ...);_bgbe ._caa =append (_bgbe ._caa ,_fba ._bbe ._caa ...);
_ffg ._gedec +=_fba ._dcg ;_ffg ._ffed +=_fba ._ebbe ;};};return _bgbe ,_ffg ._gedec ,_ffg ._ffed ,_bae ;};func _gfeg (_agbd map[int ][]float64 ){if len (_agbd )<=1{return ;};_cfeff :=_bcef (_agbd );if _aada {_e .Log .Info ("\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076",_cfeff );
};var _adabe ,_eedab int ;for _adabe ,_eedab =range _cfeff {if _agbd [_eedab ]!=nil {break ;};};for _faeb ,_agbaa :=range _cfeff [_adabe :]{_fgddg :=_agbd [_agbaa ];if _fgddg ==nil {continue ;};if _aada {_bf .Printf ("\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a",_adabe +_faeb ,_eedab ,_agbaa );
};_dcgge :=_agbd [_agbaa ];if _dcgge [len (_dcgge )-1]> _fgddg [0]{_dcgge [len (_dcgge )-1]=_fgddg [0];_agbd [_eedab ]=_dcgge ;};_eedab =_agbaa ;};};func _gced (_fbce _b .PdfRectangle ,_ebfbb bounded )float64 {return _fbce .Ury -_ebfbb .bbox ().Lly };const (RenderModeStroke RenderMode =1<<iota ;
RenderModeFill ;RenderModeClip ;);func (_aad *stateStack )size ()int {return len (*_aad )};func (_fdec *textPara )fontsize ()float64 {return _fdec ._bbgab [0]._efde };func (_dfacc *textPara )getListLines ()[]*textLine {var _aedgf []*textLine ;_eegd :=_aaba (_dfacc ._bbgab );
for _ ,_cggc :=range _dfacc ._bbgab {_bfcd :=_cggc ._edge [0]._deccc [0];if _bfecf (_bfcd ){_aedgf =append (_aedgf ,_cggc );};};_aedgf =append (_aedgf ,_eegd ...);return _aedgf ;};func _dcgdf (_cagb _b .PdfRectangle )*ruling {return &ruling {_facdf :_eeeae ,_efadf :_cagb .Llx ,_efbba :_cagb .Lly ,_daba :_cagb .Ury };
};type textState struct{_bggb float64 ;_fagd float64 ;_ddcc float64 ;_efda float64 ;_gec float64 ;_ddgg RenderMode ;_gcfd float64 ;_ggec *_b .PdfFont ;_dgbba _b .PdfRectangle ;_gedec int ;_ffed int ;};func (_eded *shapesState )addPoint (_ccab ,_afbe float64 ){_ffae :=_eded .establishSubpath ();
_dgg :=_eded .devicePoint (_ccab ,_afbe );if _ffae ==nil {_eded ._cbeb =true ;_eded ._dfca =_dgg ;}else {_ffae .add (_dgg );};};func (_deedb *wordBag )blocked (_ffga *textWord )bool {if _ffga .Urx < _deedb .Llx {_geee :=_fdab (_ffga .PdfRectangle );_bgd :=_dcgdf (_deedb .PdfRectangle );
if _deedb ._dbgbc .blocks (_geee ,_bgd ){if _aadc {_e .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_ffga ,_deedb );};return true ;};}else if _deedb .Urx < _ffga .Llx {_fedb :=_fdab (_deedb .PdfRectangle );
_fab :=_dcgdf (_ffga .PdfRectangle );if _deedb ._dbgbc .blocks (_fedb ,_fab ){if _aadc {_e .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s",_ffga ,_deedb );};return true ;};};if _ffga .Ury < _deedb .Lly {_eafbf :=_acaf (_ffga .PdfRectangle );
_efa :=_gdcb (_deedb .PdfRectangle );if _deedb ._edfe .blocks (_eafbf ,_efa ){if _aadc {_e .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_ffga ,_deedb );};return true ;};}else if _deedb .Ury < _ffga .Lly {_acba :=_acaf (_deedb .PdfRectangle );
_efae :=_gdcb (_ffga .PdfRectangle );if _deedb ._edfe .blocks (_acba ,_efae ){if _aadc {_e .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s",_ffga ,_deedb );};return true ;};};return false ;};func (_eece *ruling )alignsSec (_dffeg *ruling )bool {const _dcbbe =_acgdd +1.0;
return _eece ._efbba -_dcbbe <=_dffeg ._daba &&_dffeg ._efbba -_dcbbe <=_eece ._daba ;};func (_fafg rulingList )splitSec ()[]rulingList {_fd .Slice (_fafg ,func (_acgdg ,_fedbb int )bool {_dede ,_bdgf :=_fafg [_acgdg ],_fafg [_fedbb ];if _dede ._efbba !=_bdgf ._efbba {return _dede ._efbba < _bdgf ._efbba ;
};return _dede ._daba < _bdgf ._daba ;});_cgdfe :=make (map[*ruling ]struct{},len (_fafg ));_babgd :=func (_ffff *ruling )rulingList {_bgccf :=rulingList {_ffff };_cgdfe [_ffff ]=struct{}{};for _ ,_bgdcb :=range _fafg {if _ ,_eebdf :=_cgdfe [_bgdcb ];_eebdf {continue ;
};for _ ,_degef :=range _bgccf {if _bgdcb .alignsSec (_degef ){_bgccf =append (_bgccf ,_bgdcb );_cgdfe [_bgdcb ]=struct{}{};break ;};};};return _bgccf ;};_dccd :=[]rulingList {_babgd (_fafg [0])};for _ ,_dcgb :=range _fafg [1:]{if _ ,_bcgd :=_cgdfe [_dcgb ];
_bcgd {continue ;};_dccd =append (_dccd ,_babgd (_dcgb ));};return _dccd ;};func (_ffcd *textWord )toTextMarks (_gedea *int )[]TextMark {var _fdcgg []TextMark ;for _ ,_ccga :=range _ffcd ._bggbb {_fdcgg =_fcef (_fdcgg ,_gedea ,_ccga .ToTextMark ());};return _fdcgg ;
};func (_acca *shapesState )newSubPath (){_acca .clearPath ();if _gegd {_e .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_acca );};};func _bagcg (_affb []*textLine ){_fd .Slice (_affb ,func (_befaa ,_aadb int )bool {_agad ,_ffbfb :=_affb [_befaa ],_affb [_aadb ];
return _agad ._dce < _ffbfb ._dce ;});};func _fgfb (_cgee []_ff .PdfObject )(_gdggc ,_cbde float64 ,_gbcfc error ){if len (_cgee )!=2{return 0,0,_bf .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_cgee ));
};_gfdb ,_gbcfc :=_ff .GetNumbersAsFloat (_cgee );if _gbcfc !=nil {return 0,0,_gbcfc ;};return _gfdb [0],_gfdb [1],nil ;};func (_gefc rulingList )comp (_dbff ,_dabbe int )bool {_gdaa ,_dgef :=_gefc [_dbff ],_gefc [_dabbe ];_bggae ,_ccaf :=_gdaa ._facdf ,_dgef ._facdf ;
if _bggae !=_ccaf {return _bggae > _ccaf ;};if _bggae ==_aagc {return false ;};_egadc :=func (_ccgd bool )bool {if _bggae ==_geggc {return _ccgd ;};return !_ccgd ;};_adbe ,_begd :=_gdaa ._efadf ,_dgef ._efadf ;if _adbe !=_begd {return _egadc (_adbe > _begd );
};_adbe ,_begd =_gdaa ._efbba ,_dgef ._efbba ;if _adbe !=_begd {return _egadc (_adbe < _begd );};return _egadc (_gdaa ._daba < _dgef ._daba );};
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
//
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
type TextMark struct{
// Text is the extracted text.
Text string ;
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
// BBox is the bounding box of the text.
BBox _b .PdfRectangle ;
// Font is the font the text was drawn with.
Font *_b .PdfFont ;
// FontSize is the font size the text was drawn with.
FontSize float64 ;
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
FillColor _dd .Color ;
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
StrokeColor _dd .Color ;
// Orientation is the text orientation
Orientation int ;
// DirectObject is the underlying PdfObject (Text Object) that represents the visible texts. This is introduced to get
// a simple access to the TextObject in case editing or replacment of some text is needed. E.g during redaction.
DirectObject _ff .PdfObject ;
// ObjString is a decoded string operand of a text-showing operator. It has the same value as `Text` attribute except
// when many glyphs are represented with the same Text Object that contains multiple length string operand in which case
// ObjString spans more than one character string that falls in different TextMark objects.
ObjString []string ;Tw float64 ;Th float64 ;Tc float64 ;Index int ;_gdfc bool ;_baeg *TextTable ;};func (_efbe *textObject )renderText (_bged _ff .PdfObject ,_dbfc []byte ,_dcb int ,_cdgg string )error {if _efbe ._bdfc {_e .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");
return nil ;};_abg :=_efbe .getCurrentFont ();_dbbg :=_abg .BytesToCharcodes (_dbfc );_acff ,_adfa ,_cdcb :=_abg .CharcodesToStrings (_dbbg ,_cdgg );if _cdcb > 0{_e .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_adfa ,_cdcb );
};_efbe ._eef ._gedec +=_adfa ;_efbe ._eef ._ffed +=_cdcb ;_dface :=_efbe ._eef ;_dbge :=_dface ._gec ;_bcad :=_dface ._ddcc /100.0;_ffede :=_fgcg ;if _abg .Subtype ()=="\u0054\u0079\u0070e\u0033"{_ffede =1;};_gaca ,_cgaff :=_abg .GetRuneMetrics (' ');
if !_cgaff {_gaca ,_cgaff =_abg .GetCharMetrics (32);};if !_cgaff {_gaca ,_ =_b .DefaultFont ().GetRuneMetrics (' ');};_edgf :=_gaca .Wx *_ffede ;_e .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_edgf ,_acff ,_abg ,_dbge );
_cacbf :=_da .NewMatrix (_dbge *_bcad ,0,0,_dbge ,0,_dface ._gcfd );if _dgfb {_e .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_dbbg ),_dbbg ,_acff );
};_e .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_dbbg ),_dbbg ,len (_acff ));_fcddg :=_efbe .getFillColor ();
_ebag :=_efbe .getStrokeColor ();for _egb ,_ffbg :=range _acff {_deb :=[]rune (_ffbg );if len (_deb )==1&&_deb [0]=='\x00'{continue ;};_gfgg :=_dbbg [_egb ];_aeac :=_efbe ._fcbe .CTM .Mult (_efbe ._cdffg ).Mult (_cacbf );_cgdb :=0.0;if len (_deb )==1&&_deb [0]==32{_cgdb =_dface ._fagd ;
};_dddee ,_ffde :=_abg .GetCharMetrics (_gfgg );if !_ffde {_e .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_gfgg ,_deb ,_deb ,_abg );
return _bf .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_abg .String (),_gfgg );};_gedf :=_da .Point {X :_dddee .Wx *_ffede ,Y :_dddee .Wy *_ffede };
_fcbg :=_da .Point {X :(_gedf .X *_dbge +_cgdb )*_bcad };_ffaf :=_da .Point {X :(_gedf .X *_dbge +_dface ._bggb +_cgdb )*_bcad };if _dgfb {_e .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_dbge ,_dface ._bggb ,_dface ._fagd ,_bcad );
_e .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_gedf ,_fcbg ,_ffaf );};_eaf :=_cadc (_fcbg );_egd :=_cadc (_ffaf );_ebcf :=_efbe ._fcbe .CTM .Mult (_efbe ._cdffg ).Mult (_eaf );
if _gade {_e .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_efbe ._fcbe .CTM ,_efbe ._cdffg ,_egd ,_cbc (_efbe ._fcbe .CTM .Mult (_efbe ._cdffg ).Mult (_egd )),_eaf ,_ebcf ,_cbc (_ebcf ));
};_cadg ,_gfee :=_efbe .newTextMark (_gac .ExpandLigatures (_deb ),_aeac ,_cbc (_ebcf ),_ce .Abs (_edgf *_aeac .ScalingFactorX ()),_abg ,_efbe ._eef ._bggb ,_fcddg ,_ebag ,_bged ,_acff ,_egb ,_dcb );if !_gfee {_e .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067");
continue ;};if _abg ==nil {_e .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _abg .Encoder ()==nil {_e .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_abg );
}else {if _dfdd ,_agg :=_abg .Encoder ().CharcodeToRune (_gfgg );_agg {_cadg ._fcfce =string (_dfdd );};};_e .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_egb ,_gfgg ,_cadg ,_aeac );
_efbe ._daeb =append (_efbe ._daeb ,&_cadg );_efbe ._cdffg .Concat (_egd );};return nil ;};func (_egbfd *textMark )inDiacriticArea (_fbfab *textMark )bool {_eabe :=_egbfd .Llx -_fbfab .Llx ;_fbdgg :=_egbfd .Urx -_fbfab .Urx ;_agcd :=_egbfd .Lly -_fbfab .Lly ;
return _ce .Abs (_eabe +_fbdgg )< _egbfd .Width ()*_bcgc &&_ce .Abs (_agcd )< _egbfd .Height ()*_bcgc ;};func (_baegg *textLine )appendWord (_bgbf *textWord ){_baegg ._edge =append (_baegg ._edge ,_bgbf );_baegg .PdfRectangle =_badbb (_baegg .PdfRectangle ,_bgbf .PdfRectangle );
if _bgbf ._ecdce > _baegg ._efde {_baegg ._efde =_bgbf ._ecdce ;};if _bgbf ._bagdd > _baegg ._dce {_baegg ._dce =_bgbf ._bagdd ;};};type textTable struct{_b .PdfRectangle ;_addag ,_cffff int ;_cbbdd bool ;_egbgg map[uint64 ]*textPara ;_bdfcg map[uint64 ]compositeCell ;
};func _aaee (_dfce *list ,_cgdce *_bfb .Builder ,_bcdf *string ){_ded :=_caabc (_dfce ,_bcdf );_cgdce .WriteString (_ded );for _ ,_cggf :=range _dfce ._ddfgc {_aaeg :=*_bcdf +"\u0020\u0020\u0020";_aaee (_cggf ,_cgdce ,&_aaeg );};};func (_bacb *PageText )getParagraphs ()paraList {var _ggce rulingList ;
if _febc {_abbd :=_gaec (_bacb ._eff );_ggce =append (_ggce ,_abbd ...);};if _bfce {_bgef :=_defa (_bacb ._caa );_ggce =append (_ggce ,_bgef ...);};_ggce ,_egdb :=_ggce .toTilings ();var _fbbfc paraList ;_gfgd :=len (_bacb ._eca );for _eedg :=0;_eedg < 360&&_gfgd > 0;
_eedg +=90{_fdge :=make ([]*textMark ,0,len (_bacb ._eca )-_gfgd );for _ ,_fbeae :=range _bacb ._eca {if _fbeae ._egba ==_eedg {_fdge =append (_fdge ,_fbeae );};};if len (_fdge )> 0{_dgcg :=_fabd (_fdge ,_bacb ._bfaf ,_ggce ,_egdb );_fbbfc =append (_fbbfc ,_dgcg ...);
_gfgd -=len (_fdge );};};return _fbbfc ;};var _eafg =map[markKind ]string {_eede :"\u0073\u0074\u0072\u006f\u006b\u0065",_befdb :"\u0066\u0069\u006c\u006c",_ecdge :"\u0061u\u0067\u006d\u0065\u006e\u0074"};func _baag (_aadf *paraList )map[int ][]*textLine {_daa :=map[int ][]*textLine {};
for _ ,_beed :=range *_aadf {for _ ,_faa :=range _beed ._bbgab {if !_gbdg (_faa ){_e .Log .Debug ("g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e");
continue ;};_faba :=_faa ._edge [0]._bggbb [0]._faad ;_daa [_faba ]=append (_daa [_faba ],_faa );};if _beed ._cegg !=nil {_dadab :=_beed ._cegg ._egbgg ;for _ ,_gdegf :=range _dadab {for _ ,_dccb :=range _gdegf ._bbgab {if !_gbdg (_dccb ){_e .Log .Debug ("g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e");
continue ;};_ebg :=_dccb ._edge [0]._bggbb [0]._faad ;_daa [_ebg ]=append (_daa [_ebg ],_dccb );};};};};return _daa ;};func _deedf (_dfba ,_bfcb bounded )float64 {return _dfba .bbox ().Llx -_bfcb .bbox ().Llx };
// String returns a description of `v`.
func (_dacfb *ruling )String ()string {if _dacfb ._facdf ==_aagc {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_dabb ,_edbc :="\u0078","\u0079";if _dacfb ._facdf ==_geggc {_dabb ,_edbc ="\u0079","\u0078";};_ffca :="";if _dacfb ._geeb !=0.0{_ffca =_bf .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_dacfb ._geeb );
};return _bf .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_dacfb ._facdf ,_dabb ,_dacfb ._efadf ,_edbc ,_dacfb ._efbba ,_dacfb ._daba ,_dacfb ._daba -_dacfb ._efbba ,_dacfb ._bagdc ,_dacfb .Color ,_ffca );
};func (_fcgaf *textTable )compositeColCorridors ()map[int ][]float64 {_egff :=make (map[int ][]float64 ,_fcgaf ._addag );if _aada {_e .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_fcgaf ._addag );
};for _gbegd :=0;_gbegd < _fcgaf ._addag ;_gbegd ++{_egff [_gbegd ]=nil ;};return _egff ;};func (_egdga rulingList )log (_fdgfa string ){if !_ccffg {return ;};_e .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_fdgfa ,_egdga .String ());
for _dccg ,_deef :=range _egdga {_bf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dccg ,_deef .String ());};};func _abag (_ccbg ,_abeb _da .Point ,_dggg _dd .Color )(*ruling ,bool ){_fdaa :=lineRuling {_eefea :_ccbg ,_edcb :_abeb ,_becb :_cdcg (_ccbg ,_abeb ),Color :_dggg };
if _fdaa ._becb ==_aagc {return nil ,false ;};return _fdaa .asRuling ();};func (_bbd *textObject )setWordSpacing (_badb float64 ){if _bbd ==nil {return ;};_bbd ._eef ._fagd =_badb ;};func (_dgba *textObject )setCharSpacing (_cda float64 ){if _dgba ==nil {return ;
};_dgba ._eef ._bggb =_cda ;if _dgfb {_e .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_cda ,_dgba ._eef .String ());};};type gridTile struct{_b .PdfRectangle ;
_fdde ,_cfgg ,_cega ,_bbed bool ;};func (_fggef paraList )readBefore (_bdafg []int ,_dfbgb ,_agfe int )bool {_gdbc ,_ecdc :=_fggef [_dfbgb ],_fggef [_agfe ];if _cfeb (_gdbc ,_ecdc )&&_gdbc .Lly > _ecdc .Lly {return true ;};if !(_gdbc ._cdcbd .Urx < _ecdc ._cdcbd .Llx ){return false ;
};_dbdgf ,_fged :=_gdbc .Lly ,_ecdc .Lly ;if _dbdgf > _fged {_fged ,_dbdgf =_dbdgf ,_fged ;};_gded :=_ce .Max (_gdbc ._cdcbd .Llx ,_ecdc ._cdcbd .Llx );_dfbac :=_ce .Min (_gdbc ._cdcbd .Urx ,_ecdc ._cdcbd .Urx );_acbd :=_fggef .llyRange (_bdafg ,_dbdgf ,_fged );
for _ ,_ecgb :=range _acbd {if _ecgb ==_dfbgb ||_ecgb ==_agfe {continue ;};_ageec :=_fggef [_ecgb ];if _ageec ._cdcbd .Llx <=_dfbac &&_gded <=_ageec ._cdcbd .Urx {return false ;};};return true ;};
// Append appends `mark` to the mark array.
func (_deed *TextMarkArray )Append (mark TextMark ){_deed ._gaac =append (_deed ._gaac ,mark )};type intSet map[int ]struct{};func (_cae pathSection )bbox ()_b .PdfRectangle {_gbc :=_cae ._cdad [0]._gfb [0];_dgae :=_b .PdfRectangle {Llx :_gbc .X ,Urx :_gbc .X ,Lly :_gbc .Y ,Ury :_gbc .Y };
_fggg :=func (_ebeg _da .Point ){if _ebeg .X < _dgae .Llx {_dgae .Llx =_ebeg .X ;}else if _ebeg .X > _dgae .Urx {_dgae .Urx =_ebeg .X ;};if _ebeg .Y < _dgae .Lly {_dgae .Lly =_ebeg .Y ;}else if _ebeg .Y > _dgae .Ury {_dgae .Ury =_ebeg .Y ;};};for _ ,_ccg :=range _cae ._cdad [0]._gfb [1:]{_fggg (_ccg );
};for _ ,_eaba :=range _cae ._cdad [1:]{for _ ,_fbdcb :=range _eaba ._gfb {_fggg (_fbdcb );};};return _dgae ;};func (_ceea *wordBag )depthRange (_aeeed ,_eagf int )[]int {var _dfb []int ;for _eegfd :=range _ceea ._ecfd {if _aeeed <=_eegfd &&_eegfd <=_eagf {_dfb =append (_dfb ,_eegfd );
};};if len (_dfb )==0{return nil ;};_fd .Ints (_dfb );return _dfb ;};func (_eefeag rulingList )vertsHorzs ()(rulingList ,rulingList ){var _bbfbe ,_dcacc rulingList ;for _ ,_ecbbc :=range _eefeag {switch _ecbbc ._facdf {case _eeeae :_bbfbe =append (_bbfbe ,_ecbbc );
case _geggc :_dcacc =append (_dcacc ,_ecbbc );};};return _bbfbe ,_dcacc ;};func _dcba (_cedd ,_fgegd _da .Point )bool {return _cedd .X ==_fgegd .X &&_cedd .Y ==_fgegd .Y };func (_acbg *shapesState )lastpointEstablished ()(_da .Point ,bool ){if _acbg ._cbeb {return _acbg ._dfca ,false ;
};_eaff :=len (_acbg ._fgdb );if _eaff > 0&&_acbg ._fgdb [_eaff -1]._dffd {return _acbg ._fgdb [_eaff -1].last (),false ;};return _da .Point {},true ;};var _agge *_df .Regexp =_df .MustCompile (_abcd +"\u007c"+_ebcdf );func _bacbe (_efcg []TextMark ,_fdbg *int ,_fafd string )[]TextMark {_gaeg :=_caaa ;
_gaeg .Text =_fafd ;return _fcef (_efcg ,_fdbg ,_gaeg );};func _dag (_gf []string ,_gab int ,_ec int ,_dde string ){for _edd :=_gab ;_edd < _ec ;_edd ++{_gf [_edd ]=_dde ;};};func (_ceca *textPara )writeText (_afcgb _f .Writer ){if _ceca ._cegg ==nil {_ceca .writeCellText (_afcgb );
return ;};for _beegc :=0;_beegc < _ceca ._cegg ._cffff ;_beegc ++{for _gfaea :=0;_gfaea < _ceca ._cegg ._addag ;_gfaea ++{_agdag :=_ceca ._cegg .get (_gfaea ,_beegc );if _agdag ==nil {_afcgb .Write ([]byte ("\u0009"));}else {_bagcg (_agdag ._bbgab );_agdag .writeCellText (_afcgb );
};_afcgb .Write ([]byte ("\u0020"));};if _beegc < _ceca ._cegg ._cffff -1{_afcgb .Write ([]byte ("\u000a"));};};};func (_cbbg *textPara )toTextMarks (_eaaf *int )[]TextMark {if _cbbg ._cegg ==nil {return _cbbg .toCellTextMarks (_eaaf );};var _eceb []TextMark ;
for _gccd :=0;_gccd < _cbbg ._cegg ._cffff ;_gccd ++{for _gdef :=0;_gdef < _cbbg ._cegg ._addag ;_gdef ++{_beea :=_cbbg ._cegg .get (_gdef ,_gccd );if _beea ==nil {_eceb =_bacbe (_eceb ,_eaaf ,"\u0009");}else {_gfec :=_beea .toCellTextMarks (_eaaf );_eceb =append (_eceb ,_gfec ...);
};_eceb =_bacbe (_eceb ,_eaaf ,"\u0020");};if _gccd < _cbbg ._cegg ._cffff -1{_eceb =_bacbe (_eceb ,_eaaf ,"\u000a");};};_aebbd :=_cbbg ._cegg ;if _aebbd .isExportable (){_bbfbd :=_aebbd .toTextTable ();_eceb =_egfd (_eceb ,&_bbfbd );};return _eceb ;};
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
//
// Replace with a function like Extract() (*PageText, error)
func (_cgbc *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_cag ,_ccbb ,_bga ,_ceed :=_cgbc .extractPageText (_cgbc ._ddgf ,_cgbc ._eba ,_da .IdentityMatrix (),0,false );if _ceed !=nil &&_ceed !=_b .ErrColorOutOfRange {return nil ,0,0,_ceed ;
};if _cgbc ._ddde !=nil {_cag ._ddfd ._gabb =_cgbc ._ddde .UseSimplerExtractionProcess ;};_cag .computeViews ();_ceed =_bbeea (_cag );if _ceed !=nil {return nil ,0,0,_ceed ;};if _cgbc ._ddde !=nil {if _cgbc ._ddde .ApplyCropBox &&_cgbc ._fce !=nil {_cag .ApplyArea (*_cgbc ._fce );
};_cag ._ddfd ._bfgf =_cgbc ._ddde .DisableDocumentTags ;};return _cag ,_ccbb ,_bga ,nil ;};
// ExtractFonts returns all font information from the page extractor, including
// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more.
//
// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document.
// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries.
//
// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range.
func (_fe *Extractor )ExtractFonts (previousPageFonts *PageFonts )(*PageFonts ,error ){_adab :=PageFonts {};_fdbe :=_adab .extractPageResourcesToFont (_fe ._eba );if _fdbe !=nil {return nil ,_fdbe ;};if previousPageFonts !=nil {for _ ,_fgc :=range previousPageFonts .Fonts {if !_cac (_adab .Fonts ,_fgc .FontName ){_adab .Fonts =append (_adab .Fonts ,_fgc );
};};};return &PageFonts {Fonts :_adab .Fonts },nil ;};func (_babe *wordBag )getDepthIdx (_bfec float64 )int {_gagd :=_babe .depthIndexes ();_edb :=_fdd (_bfec );if _edb < _gagd [0]{return _gagd [0];};if _edb > _gagd [len (_gagd )-1]{return _gagd [len (_gagd )-1];
};return _edb ;};func (_bdbb rulingList )tidied (_cfdca string )rulingList {_geba :=_bdbb .removeDuplicates ();_geba .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");_cgdfa :=_geba .snapToGroups ();if _cgdfa ==nil {return nil ;};_cgdfa .sort ();if _ccffg {_e .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_cfdca ,len (_bdbb ),len (_geba ),len (_cgdfa ));
};_cgdfa .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _cgdfa ;};func (_agbcg paraList )findTables (_gagbf []gridTiling )[]*textTable {_agbcg .addNeighbours ();_fd .Slice (_agbcg ,func (_efdde ,_dgbe int )bool {return _cacef (_agbcg [_efdde ],_agbcg [_dgbe ])< 0});
var _cffad []*textTable ;if _fcbff {_fgdgd :=_agbcg .findGridTables (_gagbf );_cffad =append (_cffad ,_fgdgd ...);};if _cabf {_ceecbd :=_agbcg .findTextTables ();_cffad =append (_cffad ,_ceecbd ...);};return _cffad ;};func (_afceg paraList )yNeighbours (_bbef float64 )map[*textPara ][]int {_faaed :=make ([]event ,2*len (_afceg ));
if _bbef ==0{for _aeabd ,_cgcg :=range _afceg {_faaed [2*_aeabd ]=event {_cgcg .Lly ,true ,_aeabd };_faaed [2*_aeabd +1]=event {_cgcg .Ury ,false ,_aeabd };};}else {for _fggdf ,_gbbca :=range _afceg {_faaed [2*_fggdf ]=event {_gbbca .Lly -_bbef *_gbbca .fontsize (),true ,_fggdf };
_faaed [2*_fggdf +1]=event {_gbbca .Ury +_bbef *_gbbca .fontsize (),false ,_fggdf };};};return _afceg .eventNeighbours (_faaed );};
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
func (_bbbdf *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _bbbdf ==nil {return nil ,_ad .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_bf .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );
};_eafb :=len (_bbbdf ._gaac );if _eafb ==0{return _bbbdf ,nil ;};if start < _bbbdf ._gaac [0].Offset {start =_bbbdf ._gaac [0].Offset ;};if end > _bbbdf ._gaac [_eafb -1].Offset +1{end =_bbbdf ._gaac [_eafb -1].Offset +1;};_ebcd :=_fd .Search (_eafb ,func (_ecf int )bool {return _bbbdf ._gaac [_ecf ].Offset +len (_bbbdf ._gaac [_ecf ].Text )-1>=start });
if !(0<=_ebcd &&_ebcd < _eafb ){_aebd :=_bf .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_ebcd ,_eafb ,_bbbdf ._gaac [0],_bbbdf ._gaac [_eafb -1]);
return nil ,_aebd ;};_bagd :=_fd .Search (_eafb ,func (_cead int )bool {return _bbbdf ._gaac [_cead ].Offset > end -1});if !(0<=_bagd &&_bagd < _eafb ){_ggag :=_bf .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_bagd ,_eafb ,_bbbdf ._gaac [0],_bbbdf ._gaac [_eafb -1]);
return nil ,_ggag ;};if _bagd <=_ebcd {return nil ,_bf .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_ebcd ,_bagd );
};return &TextMarkArray {_gaac :_bbbdf ._gaac [_ebcd :_bagd ]},nil ;};func (_ddge *textObject )setTextLeading (_abfd float64 ){if _ddge ==nil {return ;};_ddge ._eef ._efda =_abfd ;};const _bbg =10;
// TableInfo gets table information of the textmark `tm`.
func (_ffecg *TextMark )TableInfo ()(*TextTable ,[][]int ){if !_ffecg ._gdfc {return nil ,nil ;};_eda :=_ffecg ._baeg ;_egf :=_eda .getCellInfo (*_ffecg );return _eda ,_egf ;};func _fcagb (_fafe ,_fcbfc _ga .Image )_ga .Image {_baee ,_fbgccb :=_fcbfc .Bounds ().Size (),_fafe .Bounds ().Size ();
_dbaef ,_ffbbg :=_baee .X ,_baee .Y ;if _fbgccb .X > _dbaef {_dbaef =_fbgccb .X ;};if _fbgccb .Y > _ffbbg {_ffbbg =_fbgccb .Y ;};_bbdad :=_ga .Rect (0,0,_dbaef ,_ffbbg );if _baee .X !=_dbaef ||_baee .Y !=_ffbbg {_edgdg :=_ga .NewRGBA (_bbdad );_a .BiLinear .Scale (_edgdg ,_bbdad ,_fafe ,_fcbfc .Bounds (),_a .Over ,nil );
_fcbfc =_edgdg ;};if _fbgccb .X !=_dbaef ||_fbgccb .Y !=_ffbbg {_eeac :=_ga .NewRGBA (_bbdad );_a .BiLinear .Scale (_eeac ,_bbdad ,_fafe ,_fafe .Bounds (),_a .Over ,nil );_fafe =_eeac ;};_edgceb :=_ga .NewRGBA (_bbdad );_a .DrawMask (_edgceb ,_bbdad ,_fafe ,_ga .Point {},_fcbfc ,_ga .Point {},_a .Over );
return _edgceb ;};func (_adeb paraList )inTile (_accd gridTile )paraList {var _cbgbf paraList ;for _ ,_dcfbf :=range _adeb {if _accd .contains (_dcfbf .PdfRectangle ){_cbgbf =append (_cbgbf ,_dcfbf );};};if _aada {_bf .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_accd ,len (_cbgbf ));
for _fdcgc ,_ggdfg :=range _cbgbf {_bf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fdcgc ,_ggdfg );};_bf .Println ("");};return _cbgbf ;};
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
func (_dbec *Extractor )ExtractText ()(string ,error ){_ffec ,_ ,_ ,_ccc :=_dbec .ExtractTextWithStats ();return _ffec ,_ccc ;};func _fcef (_fgcc []TextMark ,_edba *int ,_cgfb TextMark )[]TextMark {_cgfb .Offset =*_edba ;_fgcc =append (_fgcc ,_cgfb );*_edba +=len (_cgfb .Text );
return _fgcc ;};
// ToTextMark returns the public view of `tm`.
func (_afbed *textMark )ToTextMark ()TextMark {return TextMark {Text :_afbed ._gddf ,Original :_afbed ._fcfce ,BBox :_afbed ._badbe ,Font :_afbed ._facf ,FontSize :_afbed ._dgfec ,FillColor :_afbed ._ebgb ,StrokeColor :_afbed ._eage ,Orientation :_afbed ._egba ,DirectObject :_afbed ._facd ,ObjString :_afbed ._ccffd ,Tw :_afbed .Tw ,Th :_afbed .Th ,Tc :_afbed ._dggd ,Index :_afbed ._bafde };
};func (_edfd *wordBag )empty (_aegd int )bool {_ ,_aeab :=_edfd ._ecfd [_aegd ];return !_aeab };func (_dfefag gridTiling )log (_febfe string ){if !_gacae {return ;};_e .Log .Info ("\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071",len (_dfefag ._aabgb ),len (_dfefag ._fdgd ),_febfe );
_bf .Printf ("\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a",_dfefag ._aabgb );_bf .Printf ("\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a",_dfefag ._fdgd );for _dcbca ,_fafbg :=range _dfefag ._fdgd {_aabe ,_ccbe :=_dfefag ._caabf [_fafbg ];
if !_ccbe {continue ;};_bf .Printf ("%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_dcbca ,_fafbg );for _ddgc ,_bfcbd :=range _dfefag ._aabgb {_babea ,_cfcd :=_aabe [_bfcbd ];if !_cfcd {continue ;};_bf .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_ddgc ,_babea .String ());
};};};func _egfd (_eafbfd []TextMark ,_bbcee *TextTable )[]TextMark {var _efef []TextMark ;for _ ,_fgeb :=range _eafbfd {_fgeb ._gdfc =true ;_fgeb ._baeg =_bbcee ;_efef =append (_efef ,_fgeb );};return _efef ;};
// Elements returns the TextMarks in `ma`.
func (_aafb *TextMarkArray )Elements ()[]TextMark {return _aafb ._gaac };func (_bfdbg rulingList )augmentGrid ()(rulingList ,rulingList ){_gagge ,_cagbf :=_bfdbg .vertsHorzs ();if len (_gagge )==0||len (_cagbf )==0{return _gagge ,_cagbf ;};_caea ,_fgeg :=_gagge ,_cagbf ;
_eaab :=_gagge .bbox ();_egag :=_cagbf .bbox ();if _ccffg {_e .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066",_eaab );_e .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066",_egag );
};var _aefb ,_gbfga ,_caccc ,_cdffge *ruling ;if _egag .Llx < _eaab .Llx -_baaf {_aefb =&ruling {_bagdc :_ecdge ,_facdf :_eeeae ,_efadf :_egag .Llx ,_efbba :_eaab .Lly ,_daba :_eaab .Ury };_gagge =append (rulingList {_aefb },_gagge ...);};if _egag .Urx > _eaab .Urx +_baaf {_gbfga =&ruling {_bagdc :_ecdge ,_facdf :_eeeae ,_efadf :_egag .Urx ,_efbba :_eaab .Lly ,_daba :_eaab .Ury };
_gagge =append (_gagge ,_gbfga );};if _eaab .Lly < _egag .Lly -_baaf {_caccc =&ruling {_bagdc :_ecdge ,_facdf :_geggc ,_efadf :_eaab .Lly ,_efbba :_egag .Llx ,_daba :_egag .Urx };_cagbf =append (rulingList {_caccc },_cagbf ...);};if _eaab .Ury > _egag .Ury +_baaf {_cdffge =&ruling {_bagdc :_ecdge ,_facdf :_geggc ,_efadf :_eaab .Ury ,_efbba :_egag .Llx ,_daba :_egag .Urx };
_cagbf =append (_cagbf ,_cdffge );};if len (_gagge )+len (_cagbf )==len (_bfdbg ){return _caea ,_fgeg ;};_dacd :=append (_gagge ,_cagbf ...);_bfdbg .log ("u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064");_dacd .log ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d");
return _gagge ,_cagbf ;};func _egad (_cgbca ,_dgdgb *textPara )bool {if _cgbca ._cdcca ||_dgdgb ._cdcca {return true ;};return _dbfgf (_cgbca .depth ()-_dgdgb .depth ());};func _cecfa (_ddcea _ff .PdfObject ,_afeb _dd .Color )(_ga .Image ,error ){_ebggf ,_eafcb :=_ff .GetStream (_ddcea );
if !_eafcb {return nil ,nil ;};_agbdd ,_fbcgd :=_b .NewXObjectImageFromStream (_ebggf );if _fbcgd !=nil {return nil ,_fbcgd ;};_deaef ,_fbcgd :=_agbdd .ToImage ();if _fbcgd !=nil {return nil ,_fbcgd ;};return _geec (_deaef ,_afeb ),nil ;};func _gaad (_dfeb *Extractor ,_aedc *_b .PdfPageResources ,_gafc _gd .GraphicsState ,_efb *textState ,_ebdg *stateStack )*textObject {return &textObject {_beec :_dfeb ,_cbgda :_aedc ,_fcbe :_gafc ,_cgdf :_ebdg ,_eef :_efb ,_cdffg :_da .IdentityMatrix (),_fea :_da .IdentityMatrix ()};
};func _gfdc (_eedgb _ff .PdfObject ,_agfg _dd .Color )(_ga .Image ,error ){_abfg ,_ggadbg :=_ff .GetStream (_eedgb );if !_ggadbg {return nil ,nil ;};_gedb ,_dgbcg :=_b .NewXObjectImageFromStream (_abfg );if _dgbcg !=nil {return nil ,_dgbcg ;};_ggecb ,_dgbcg :=_gedb .ToImage ();
if _dgbcg !=nil {return nil ,_dgbcg ;};return _feabd (_ggecb ,_agfg ),nil ;};func _ccbed (_cfbgc ,_ffaa ,_ffcbf ,_gaegf *textPara )*textTable {_gbdda :=&textTable {_addag :2,_cffff :2,_egbgg :make (map[uint64 ]*textPara ,4)};_gbdda .put (0,0,_cfbgc );_gbdda .put (1,0,_ffaa );
_gbdda .put (0,1,_ffcbf );_gbdda .put (1,1,_gaegf );return _gbdda ;};func (_cgae *textLine )toTextMarks (_dbadc *int )[]TextMark {var _ddeg []TextMark ;for _ ,_agdaf :=range _cgae ._edge {if _agdaf ._gcccd {_ddeg =_bacbe (_ddeg ,_dbadc ,"\u0020");};_bfgbe :=_agdaf .toTextMarks (_dbadc );
_ddeg =append (_ddeg ,_bfgbe ...);};return _ddeg ;};type rulingList []*ruling ;func _dbfgf (_gfabf float64 )bool {return _ce .Abs (_gfabf )< _beeg };func _eadb (_dcec []TextMark ,_dfde *int )[]TextMark {_baafb :=_dcec [len (_dcec )-1];_adaba :=[]rune (_baafb .Text );
if len (_adaba )==1{_dcec =_dcec [:len (_dcec )-1];_bdbe :=_dcec [len (_dcec )-1];*_dfde =_bdbe .Offset +len (_bdbe .Text );}else {_cbgb :=_ffag (_baafb .Text );*_dfde +=len (_cbgb )-len (_baafb .Text );_baafb .Text =_cbgb ;};return _dcec ;};func _fddag (_fgeba map[float64 ]map[float64 ]gridTile )[]float64 {_abcf :=make ([]float64 ,0,len (_fgeba ));
for _agbad :=range _fgeba {_abcf =append (_abcf ,_agbad );};_fd .Float64s (_abcf );_aedga :=len (_abcf );for _gdab :=0;_gdab < _aedga /2;_gdab ++{_abcf [_gdab ],_abcf [_aedga -1-_gdab ]=_abcf [_aedga -1-_gdab ],_abcf [_gdab ];};return _abcf ;};func (_dbad *wordBag )applyRemovals (_babg map[int ]map[*textWord ]struct{}){for _gdad ,_gdeg :=range _babg {if len (_gdeg )==0{continue ;
};_gfac :=_dbad ._ecfd [_gdad ];_egdf :=len (_gfac )-len (_gdeg );if _egdf ==0{delete (_dbad ._ecfd ,_gdad );continue ;};_ceecb :=make ([]*textWord ,_egdf );_agcf :=0;for _ ,_affc :=range _gfac {if _ ,_fded :=_gdeg [_affc ];!_fded {_ceecb [_agcf ]=_affc ;
_agcf ++;};};_dbad ._ecfd [_gdad ]=_ceecb ;};};func (_fafc *wordBag )minDepth ()float64 {return _fafc ._gdgea -(_fafc .Ury -_fafc ._dbfbe )};func (_adabb *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_cfdd :=make (map[int ]map[*textWord ]struct{},len (_adabb ._ecfd ));
for _gbfe :=range _adabb ._ecfd {_cfdd [_gbfe ]=make (map[*textWord ]struct{});};return _cfdd ;};func (_baff rectRuling )checkWidth (_effd ,_bdbfe float64 )(float64 ,bool ){_fbcd :=_bdbfe -_effd ;_efbec :=_fbcd <=_acgdd ;return _fbcd ,_efbec ;};func _cgb (_cd int )bool {return (_cd &1)==0};
func (_bgce *textPara )toCellTextMarks (_ecab *int )[]TextMark {var _fbfdd []TextMark ;for _gdebc ,_eefd :=range _bgce ._bbgab {_gcdga :=_eefd .toTextMarks (_ecab );_adbae :=_cgaa &&_eefd .endsInHyphen ()&&_gdebc !=len (_bgce ._bbgab )-1;if _adbae {_gcdga =_eadb (_gcdga ,_ecab );
};_fbfdd =append (_fbfdd ,_gcdga ...);if !(_adbae ||_gdebc ==len (_bgce ._bbgab )-1){_fbfdd =_bacbe (_fbfdd ,_ecab ,_dadac (_eefd ._dce ,_bgce ._bbgab [_gdebc +1]._dce ));};};return _fbfdd ;};func (_aade rulingList )merge ()*ruling {_bebg :=_aade [0]._efadf ;
_ecfde :=_aade [0]._efbba ;_bega :=_aade [0]._daba ;for _ ,_edffe :=range _aade [1:]{_bebg +=_edffe ._efadf ;if _edffe ._efbba < _ecfde {_ecfde =_edffe ._efbba ;};if _edffe ._daba > _bega {_bega =_edffe ._daba ;};};_dbgbcb :=&ruling {_facdf :_aade [0]._facdf ,_bagdc :_aade [0]._bagdc ,Color :_aade [0].Color ,_efadf :_bebg /float64 (len (_aade )),_efbba :_ecfde ,_daba :_bega };
if _dcbc {_e .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_aade ),_dbgbcb );for _daee ,_dacg :=range _aade {_bf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_daee ,_dacg );};
};return _dbgbcb ;};func (_ecbg *textObject )getFontDict (_begc string )(_fdcb _ff .PdfObject ,_bdc error ){_ecaf :=_ecbg ._cbgda ;if _ecaf ==nil {_e .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_begc );
return nil ,nil ;};_fdcb ,_ceag :=_ecaf .GetFontByName (_ff .PdfObjectName (_begc ));if !_ceag {_e .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_begc );
return nil ,_ad .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _fdcb ,nil ;};var (_eddg =_ad .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072");
_gcg =_ad .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072"););
// Len returns the number of TextMarks in `ma`.
func (_ccbc *TextMarkArray )Len ()int {if _ccbc ==nil {return 0;};return len (_ccbc ._gaac );};func (_fgad rulingList )intersections ()map[int ]intSet {var _fefa ,_gcdgg []int ;for _cdec ,_abcbf :=range _fgad {switch _abcbf ._facdf {case _eeeae :_fefa =append (_fefa ,_cdec );
case _geggc :_gcdgg =append (_gcdgg ,_cdec );};};if len (_fefa )< _dbee +1||len (_gcdgg )< _fbedf +1{return nil ;};if len (_fefa )+len (_gcdgg )> _bgfd {_e .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_fgad ),len (_fefa ),len (_gcdgg ));
return nil ;};_aeea :=make (map[int ]intSet ,len (_fefa )+len (_gcdgg ));for _ ,_eabfa :=range _fefa {for _ ,_gadfd :=range _gcdgg {if _fgad [_eabfa ].intersects (_fgad [_gadfd ]){if _ ,_adce :=_aeea [_eabfa ];!_adce {_aeea [_eabfa ]=make (intSet );};if _ ,_defec :=_aeea [_gadfd ];
!_defec {_aeea [_gadfd ]=make (intSet );};_aeea [_eabfa ].add (_gadfd );_aeea [_gadfd ].add (_eabfa );};};};return _aeea ;};func _ccdb (_agfd map[float64 ]map[float64 ]gridTile )[]float64 {_gdgc :=make ([]float64 ,0,len (_agfd ));_gdega :=make (map[float64 ]struct{},len (_agfd ));
for _ ,_cbbbg :=range _agfd {for _effb :=range _cbbbg {if _ ,_eefa :=_gdega [_effb ];_eefa {continue ;};_gdgc =append (_gdgc ,_effb );_gdega [_effb ]=struct{}{};};};_fd .Float64s (_gdgc );return _gdgc ;};var _ebcdf string ="\u005e\u005b\u0061\u002d\u007a\u0041\u002dZ\u005d\u0028\u005c)\u007c\u005c\u002e)\u007c\u005e[\u005c\u0064\u005d\u002b\u0028\u005c)\u007c\\.\u0029\u007c\u005e\u005c\u0028\u005b\u0061\u002d\u007a\u0041\u002d\u005a\u005d\u005c\u0029\u007c\u005e\u005c\u0028\u005b\u005c\u0064\u005d\u002b\u005c\u0029";
func (_gcdb *wordBag )scanBand (_dagf string ,_dcda *wordBag ,_gcgb func (_acfa *wordBag ,_dcdd *textWord )bool ,_cec ,_dfeae ,_deab float64 ,_baa ,_gae bool )int {_adagd :=_dcda ._dbfbe ;var _cgeb map[int ]map[*textWord ]struct{};if !_baa {_cgeb =_gcdb .makeRemovals ();
};_gcebe :=_dacc *_adagd ;_fgca :=0;for _ ,_agbc :=range _gcdb .depthBand (_cec -_gcebe ,_dfeae +_gcebe ){if len (_gcdb ._ecfd [_agbc ])==0{continue ;};for _ ,_cade :=range _gcdb ._ecfd [_agbc ]{if !(_cec -_gcebe <=_cade ._bagdd &&_cade ._bagdd <=_dfeae +_gcebe ){continue ;
};if !_gcgb (_dcda ,_cade ){continue ;};_bea :=2.0*_ce .Abs (_cade ._ecdce -_dcda ._dbfbe )/(_cade ._ecdce +_dcda ._dbfbe );_agac :=_ce .Max (_cade ._ecdce /_dcda ._dbfbe ,_dcda ._dbfbe /_cade ._ecdce );_dage :=_ce .Min (_bea ,_agac );if _deab > 0&&_dage > _deab {continue ;
};if _dcda .blocked (_cade ){continue ;};if !_baa {_dcda .pullWord (_cade ,_agbc ,_cgeb );};_fgca ++;if !_gae {if _cade ._bagdd < _cec {_cec =_cade ._bagdd ;};if _cade ._bagdd > _dfeae {_dfeae =_cade ._bagdd ;};};if _baa {break ;};};};if !_baa {_gcdb .applyRemovals (_cgeb );
};return _fgca ;};
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
func (_bbbd *Extractor )ExtractTextWithStats ()(_cgc string ,_bdaf int ,_gff int ,_ega error ){_dfc ,_bdaf ,_gff ,_ega :=_bbbd .ExtractPageText ();if _ega !=nil {return "",_bdaf ,_gff ,_ega ;};return _dfc .Text (),_bdaf ,_gff ,nil ;};func (_bdfeeb *ruling )intersects (_bfdbf *ruling )bool {_efbbf :=(_bdfeeb ._facdf ==_eeeae &&_bfdbf ._facdf ==_geggc )||(_bfdbf ._facdf ==_eeeae &&_bdfeeb ._facdf ==_geggc );
_bgdcd :=func (_beae ,_acdc *ruling )bool {return _beae ._efbba -_baaf <=_acdc ._efadf &&_acdc ._efadf <=_beae ._daba +_baaf ;};_aaegd :=_bgdcd (_bdfeeb ,_bfdbf );_befag :=_bgdcd (_bfdbf ,_bdfeeb );if _ccffg {_bf .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_efbbf ,_aaegd ,_befag ,_efbbf &&_aaegd &&_befag ,_bdfeeb ,_bfdbf );
};return _efbbf &&_aaegd &&_befag ;};func (_bbdae rulingList )asTiling ()gridTiling {if _gacae {_e .Log .Info ("r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_bbdae ));
};for _dfdb ,_fabcc :=range _bbdae [1:]{_fbdccd :=_bbdae [_dfdb ];if _fbdccd .alignsPrimary (_fabcc )&&_fbdccd .alignsSec (_fabcc ){_e .Log .Error ("a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073",_fabcc ,_fbdccd );
};};_bbdae .sortStrict ();_bbdae .log ("\u0073n\u0061\u0070\u0070\u0065\u0064");_accag ,_gabg :=_bbdae .vertsHorzs ();_bcge :=_accag .primaries ();_dded :=_gabg .primaries ();_debga :=len (_bcge )-1;_geeeg :=len (_dded )-1;if _debga ==0||_geeeg ==0{return gridTiling {};
};_agbge :=_b .PdfRectangle {Llx :_bcge [0],Urx :_bcge [_debga ],Lly :_dded [0],Ury :_dded [_geeeg ]};if _gacae {_e .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064",len (_accag ));
for _gaegb ,_debbc :=range _accag {_bf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gaegb ,_debbc );};_e .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064",len (_gabg ));
for _bcaed ,_gdec :=range _gabg {_bf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bcaed ,_gdec );};_e .Log .Info ("\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f",_debga ,_geeeg ,_bcge ,_dded );
};_deea :=make ([]gridTile ,_debga *_geeeg );for _deda :=_geeeg -1;_deda >=0;_deda --{_aecc :=_dded [_deda ];_eacfc :=_dded [_deda +1];for _ecbf :=0;_ecbf < _debga ;_ecbf ++{_dcaff :=_bcge [_ecbf ];_edceb :=_bcge [_ecbf +1];_abddg :=_accag .findPrimSec (_dcaff ,_aecc );
_ddccd :=_accag .findPrimSec (_edceb ,_aecc );_cfeda :=_gabg .findPrimSec (_aecc ,_dcaff );_fafde :=_gabg .findPrimSec (_eacfc ,_dcaff );_bcbfe :=_b .PdfRectangle {Llx :_dcaff ,Urx :_edceb ,Lly :_aecc ,Ury :_eacfc };_adfef :=_fgefe (_bcbfe ,_abddg ,_ddccd ,_cfeda ,_fafde );
_deea [_deda *_debga +_ecbf ]=_adfef ;if _gacae {_bf .Printf ("\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_ecbf ,_deda ,_adfef .String (),_adfef .Width (),_adfef .Height ());
};};};if _gacae {_e .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_agbge );
};_gfbbg :=make ([]map[float64 ]gridTile ,_geeeg );for _geda :=_geeeg -1;_geda >=0;_geda --{if _gacae {_bf .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_geda );};_gfbbg [_geda ]=make (map[float64 ]gridTile ,_debga );for _dbecg :=0;_dbecg < _debga ;
_dbecg ++{_fgaeb :=_deea [_geda *_debga +_dbecg ];if _gacae {_bf .Printf ("\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dbecg ,_fgaeb );};if !_fgaeb ._cfgg {continue ;};_cafa :=_dbecg ;for _dbef :=_dbecg +1;!_fgaeb ._bbed &&_dbef < _debga ;
_dbef ++{_egabf :=_deea [_geda *_debga +_dbef ];_fgaeb .Urx =_egabf .Urx ;_fgaeb ._fdde =_fgaeb ._fdde ||_egabf ._fdde ;_fgaeb ._cega =_fgaeb ._cega ||_egabf ._cega ;_fgaeb ._bbed =_egabf ._bbed ;if _gacae {_bf .Printf ("\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a",_dbef ,_egabf ,_fgaeb );
};_cafa =_dbef ;};if _gacae {_bf .Printf (" \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n",_dbecg ,_cafa ,_fgaeb );};_dbecg =_cafa ;_gfbbg [_geda ][_fgaeb .Llx ]=_fgaeb ;};};_cecfgc :=make (map[float64 ]map[float64 ]gridTile ,_geeeg );
_ebafg :=make (map[float64 ]map[float64 ]struct{},_geeeg );for _ceegb :=_geeeg -1;_ceegb >=0;_ceegb --{_ccgde :=_deea [_ceegb *_debga ].Lly ;_cecfgc [_ccgde ]=make (map[float64 ]gridTile ,_debga );_ebafg [_ccgde ]=make (map[float64 ]struct{},_debga );};
if _gacae {_e .Log .Info ("\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_agbge );
};for _cffaf :=_geeeg -1;_cffaf >=0;_cffaf --{_aegda :=_deea [_cffaf *_debga ].Lly ;_dgac :=_gfbbg [_cffaf ];if _gacae {_bf .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_cffaf );};for _ ,_bcafc :=range _gdbe (_dgac ){if _ ,_ecfgd :=_ebafg [_aegda ][_bcafc ];
_ecfgd {continue ;};_cdefb :=_dgac [_bcafc ];if _gacae {_bf .Printf (" \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_cdefb .String ());};for _dgcf :=_cffaf -1;_dgcf >=0;_dgcf --{if _cdefb ._cega {break ;};_ggcee :=_gfbbg [_dgcf ];_ggdd ,_bcdd :=_ggcee [_bcafc ];
if !_bcdd {break ;};if _ggdd .Urx !=_cdefb .Urx {break ;};_cdefb ._cega =_ggdd ._cega ;_cdefb .Lly =_ggdd .Lly ;if _gacae {_bf .Printf ("\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_ggdd .String (),_cdefb .String ());
};_ebafg [_ggdd .Lly ][_ggdd .Llx ]=struct{}{};};if _cffaf ==0{_cdefb ._cega =true ;};if _cdefb .complete (){_cecfgc [_aegda ][_bcafc ]=_cdefb ;};};};_fcdfb :=gridTiling {PdfRectangle :_agbge ,_aabgb :_ccdb (_cecfgc ),_fdgd :_fddag (_cecfgc ),_caabf :_cecfgc };
_fcdfb .log ("\u0043r\u0065\u0061\u0074\u0065\u0064");return _fcdfb ;};func _ecdg (_abgg bounded )float64 {return -_abgg .bbox ().Lly };func (_eacfe rulingList )mergePrimary ()float64 {_cadb :=_eacfe [0]._efadf ;for _ ,_ageaab :=range _eacfe [1:]{_cadb +=_ageaab ._efadf ;
};return _cadb /float64 (len (_eacfe ));};func (_gcde TextTable )getCellInfo (_dbgg TextMark )[][]int {for _ddccg ,_bbac :=range _gcde .Cells {for _aebfe :=range _bbac {_fgcf :=&_bbac [_aebfe ].Marks ;if _fgcf .exists (_dbgg ){return [][]int {{_ddccg },{_aebfe }};
};};};return nil ;};func (_gabge intSet )has (_gdcee int )bool {_ ,_ggagce :=_gabge [_gdcee ];return _ggagce };func _dc (_ged []rune )BidiText {_ffe :=-1;_dga :=false ;_dgb :=true ;_fcc :=len (_ged );_geb :=make ([]string ,_fcc );_adf :=make ([]string ,_fcc );
if _fcc ==0||_dga {return _bc (string (_ged ),_dgb ,_dga );};_gce :=0;for _bgg ,_gcf :=range _ged {_geb [_bgg ]=string (_gcf );_gde :="\u004c";if _gcf <=0x00ff{_gde =_be [_gcf ];}else if 0x0590<=_gcf &&_gcf <=0x05f4{_gde ="\u0052";}else if 0x0600<=_gcf &&_gcf <=0x06ff{_ea :=_gcf &0xff;
if int (_ea )>=len (_bb ){_e .Log .Debug ("\u0042\u0069\u0064\u0069\u003a\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0055n\u0069c\u006f\u0064\u0065\u0020\u0063\u0068\u0061\u0072\u0061\u0063\u0074\u0065\u0072\u0020"+string (_gcf ));};_gde =_bb [_gcf &0xff];
}else if (0x0700<=_gcf &&_gcf <=0x08ac)||(0xfb50<=_gcf &&_gcf <=0xfdff)||(0xfe70<=_gcf &&_gcf <=0xfeff){_gde ="\u0041\u004c";};if _gde =="\u0052"||_gde =="\u0041\u004c"||_gde =="\u0041\u004e"{_gce ++;};_adf [_bgg ]=_gde ;};if _gce ==0{_dgb =true ;return _bc (string (_ged ),_dgb ,false );
};if _ffe ==-1{if float64 (_gce )/float64 (_fcc )< 0.3&&_fcc > 4{_dgb =true ;_ffe =0;}else {_dgb =false ;_ffe =1;};};var _bbb []int ;for range _ged {_bbb =append (_bbb ,_ffe );};_ae :="\u004c";if _gg (_ffe ){_ae ="\u0052";};_ada :=_ae ;_dae :=_ada ;_fdb :=_ada ;
for _fcd :=range _ged {if _adf [_fcd ]=="\u004e\u0053\u004d"{_adf [_fcd ]=_fdb ;}else {_fdb =_adf [_fcd ];};};_fdb =_ada ;var _eae string ;for _de :=range _ged {_eae =_adf [_de ];if _eae =="\u0045\u004e"{if _fdb =="\u0041\u004c"{_adf [_de ]="\u0041\u004e";
}else {_adf [_de ]="\u0045\u004e";};}else if _eae =="\u0052"||_eae =="\u004c"||_eae =="\u0041\u004c"{_fdb =_eae ;};};for _ceb :=range _ged {_dfg :=_adf [_ceb ];if _dfg =="\u0041\u004c"{_adf [_ceb ]="\u0052";};};for _cc :=1;_cc < (len (_ged )-1);_cc ++{if _adf [_cc ]=="\u0045\u0053"&&_adf [_cc -1]=="\u0045\u004e"&&_adf [_cc +1]=="\u0045\u004e"{_adf [_cc ]="\u0045\u004e";
};if _adf [_cc ]=="\u0043\u0053"&&(_adf [_cc -1]=="\u0045\u004e"||_adf [_cc -1]=="\u0041\u004e")&&_adf [_cc +1]==_adf [_cc -1]{_adf [_cc ]=_adf [_cc -1];};};for _dbd :=range _ged {if _adf [_dbd ]=="\u0045\u004e"{for _adfg :=_dbd -1;_adfg >=0;_adfg --{if _adf [_adfg ]!="\u0045\u0054"{break ;
};_adf [_adfg ]="\u0045\u004e";};for _egc :=_dbd +1;_egc < _fcc ;_egc ++{if _adf [_egc ]!="\u0045\u0054"{break ;};_adf [_egc ]="\u0045\u004e";};};};for _deg :=range _ged {_ccd :=_adf [_deg ];if _ccd =="\u0057\u0053"||_ccd =="\u0045\u0053"||_ccd =="\u0045\u0054"||_ccd =="\u0043\u0053"{_adf [_deg ]="\u004f\u004e";
};};_fdb ="\u0073\u006f\u0072";for _cb :=range _ged {_cdd :=_adf [_cb ];if _cdd =="\u0045\u004e"{if _fdb =="\u004c"{_adf [_cb ]="\u004c";}else {_adf [_cb ]="\u0045\u004e";};}else if _cdd =="\u0052"||_cdd =="\u004c"{_fdb =_cdd ;};};for _dba :=0;_dba < len (_ged );
_dba ++{if _adf [_dba ]=="\u004f\u004e"{_cdg :=_gc (_adf ,_dba +1,"\u004f\u004e");_ee :=_dae ;if _dba > 0{_ee =_adf [_dba -1];};_cgg :=_dae ;if _cdg +1< _fcc {_cgg =_adf [_cdg +1];};if _ee !="\u004c"{_ee ="\u0052";};if _cgg !="\u004c"{_cgg ="\u0052";};
if _ee ==_cgg {_dag (_adf ,_dba ,_cdg ,_ee );};_dba =_cdg -1;};};for _eab :=range _ged {if _adf [_eab ]=="\u004f\u004e"{_adf [_eab ]=_ae ;};};for _gee :=range _ged {_bd :=_adf [_gee ];if _cgb (_bbb [_gee ]){if _bd =="\u0052"{_bbb [_gee ]++;}else if _bd =="\u0041\u004e"||_bd =="\u0045\u004e"{_bbb [_gee ]+=2;
};}else if _bd =="\u004c"||_bd =="\u0041\u004e"||_bd =="\u0045\u004e"{_bbb [_gee ]++;};};_ac :=-1;_acf :=99;var _eb int ;for _ag :=0;_ag < len (_bbb );_ag ++{_eb =_bbb [_ag ];if _ac < _eb {_ac =_eb ;};if _acf > _eb &&_gg (_eb ){_acf =_eb ;};};for _fbf :=_ac ;
_fbf >=_acf ;_fbf --{_gea :=-1;for _dcf :=0;_dcf < len (_bbb );_dcf ++{if _bbb [_dcf ]< _fbf {if _gea >=0{_dgc (_geb ,_gea ,_dcf );_gea =-1;};}else if _gea < 0{_gea =_dcf ;};};if _gea >=0{_dgc (_geb ,_gea ,len (_bbb ));};};for _bdf :=0;_bdf < len (_geb );
_bdf ++{_cbf :=_geb [_bdf ];if _cbf =="\u003c"||_cbf =="\u003e"{_geb [_bdf ]="";};};return _bc (_bfb .Join (_geb ,""),_dgb ,false );};func _aafaa (_eeaffd []compositeCell )[]float64 {var _cfeea []*textLine ;_egeb :=0;for _ ,_faecb :=range _eeaffd {_egeb +=len (_faecb .paraList );
_cfeea =append (_cfeea ,_faecb .lines ()...);};_fd .Slice (_cfeea ,func (_bffc ,_afad int )bool {_fced ,_dagd :=_cfeea [_bffc ],_cfeea [_afad ];_dgaa ,_gegf :=_fced ._dce ,_dagd ._dce ;if !_dbfgf (_dgaa -_gegf ){return _dgaa < _gegf ;};return _fced .Llx < _dagd .Llx ;
});if _aada {_bf .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_egeb ,len (_cfeea ));for _bgab ,_gaab :=range _cfeea {_bf .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_bgab ,_gaab );
};};var _eefc []float64 ;_aggda :=_cfeea [0];var _gcbb [][]*textLine ;_acadb :=[]*textLine {_aggda };for _fbcbgc ,_dbbcb :=range _cfeea [1:]{if _dbbcb .Ury < _aggda .Lly {_cccc :=0.5*(_dbbcb .Ury +_aggda .Lly );if _aada {_bf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_fbcbgc ,_dbbcb .Ury ,_aggda .Lly ,_cccc ,_aggda ,_dbbcb );
};_eefc =append (_eefc ,_cccc );_gcbb =append (_gcbb ,_acadb );_acadb =nil ;};_acadb =append (_acadb ,_dbbcb );if _dbbcb .Lly < _aggda .Lly {_aggda =_dbbcb ;};};if len (_acadb )> 0{_gcbb =append (_gcbb ,_acadb );};if _aada {_bf .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_eefc );
};if _aada {_e .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len (_eeaffd ));for _cefbc ,_dbebf :=range _eeaffd {_bf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cefbc ,_dbebf );};_e .Log .Info ("\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d",len (_gcbb ));
for _egeg ,_fgaeg :=range _gcbb {_bf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a",_egeg ,len (_fgaeg ));for _aafbg ,_dfdag :=range _fgaeg {_bf .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_aafbg ,_dfdag );};};};_fcdaf :=true ;
for _cedcf ,_agca :=range _gcbb {_feec :=true ;for _dcbed ,_fgdcg :=range _eeaffd {if _aada {_bf .Printf ("\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a",_cedcf ,len (_gcbb ),_dcbed ,len (_eeaffd ),_fgdcg );
};if !_fgdcg .hasLines (_agca ){if _aada {_bf .Printf ("\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a",_cedcf ,len (_gcbb ),_dcbed ,len (_eeaffd ));
};_feec =false ;break ;};};if !_feec {_fcdaf =false ;break ;};};if !_fcdaf {if _aada {_e .Log .Info ("\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg");
};_eefc =nil ;};if _aada &&_eefc !=nil {_bf .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a",_eefc );};return _eefc ;};
// Text returns the extracted page text.
func (_agf PageText )Text ()string {return _agf ._ceef };func (_baca *structTreeRoot )parseStructTreeRoot (_dced _ff .PdfObject ){if _dced !=nil {_bfff ,_degea :=_ff .GetDict (_dced );if !_degea {_e .Log .Debug ("\u0070\u0061\u0072s\u0065\u0053\u0074\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u003a\u0020\u0064\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006eo\u0074\u0020\u0066\u006f\u0075\u006e\u0064\u002e");
};K :=_bfff .Get ("\u004b");_acgf :=_bfff .Get ("\u0054\u0079\u0070\u0065").String ();var _dgfeg *_ff .PdfObjectArray ;switch _gfce :=K .(type ){case *_ff .PdfObjectArray :_dgfeg =_gfce ;case *_ff .PdfObjectReference :_dgfeg =_ff .MakeArray (K );};_dafc :=[]structElement {};
for _ ,_dacfa :=range _dgfeg .Elements (){_fefc :=&structElement {};_fefc .parseStructElement (_dacfa );_dafc =append (_dafc ,*_fefc );};_baca ._addd =_dafc ;_baca ._fedg =_acgf ;};};func (_cgca gridTile )complete ()bool {return _cgca .numBorders ()==4};
func (_egbbb *textWord )absorb (_ggbe *textWord ){_egbbb .PdfRectangle =_badbb (_egbbb .PdfRectangle ,_ggbe .PdfRectangle );_egbbb ._bggbb =append (_egbbb ._bggbb ,_ggbe ._bggbb ...);};var _caaa =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_dd .White ,StrokeColor :_dd .White };
// String returns a string descibing `i`.
func (_fggd gridTile )String ()string {_ebfcb :=func (_ebcg bool ,_ffbfc string )string {if _ebcg {return _ffbfc ;};return "\u005f";};return _bf .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_fggd .PdfRectangle ,_ebfcb (_fggd ._cfgg ,"\u004c"),_ebfcb (_fggd ._bbed ,"\u0052"),_ebfcb (_fggd ._cega ,"\u0042"),_ebfcb (_fggd ._fdde ,"\u0054"));
};func (_beecg *shapesState )closePath (){if _beecg ._cbeb {_beecg ._fgdb =append (_beecg ._fgdb ,_agfc (_beecg ._dfca ));_beecg ._cbeb =false ;}else if len (_beecg ._fgdb )==0{if _gegd {_e .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");
};_beecg ._cbeb =false ;return ;};_beecg ._fgdb [len (_beecg ._fgdb )-1].close ();if _gegd {_e .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_beecg );};};var _aeccb =_df .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024");
func (_aabd *textTable )depth ()float64 {_feee :=1e10;for _eeaa :=0;_eeaa < _aabd ._addag ;_eeaa ++{_aaeae :=_aabd .get (_eeaa ,0);if _aaeae ==nil ||_aaeae ._cdcca {continue ;};_feee =_ce .Min (_feee ,_aaeae .depth ());};return _feee ;};
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
type RenderMode int ;func (_gbfd *wordBag )maxDepth ()float64 {return _gbfd ._gdgea -_gbfd .Lly };type textPara struct{_b .PdfRectangle ;_cdcbd _b .PdfRectangle ;_bbgab []*textLine ;_cegg *textTable ;_cdaad bool ;_cdcca bool ;_ebff *textPara ;_abbb *textPara ;
_cfbb *textPara ;_dabf *textPara ;_aaeee []list ;};func (_abce *shapesState )establishSubpath ()*subpath {_dcfc ,_bcgb :=_abce .lastpointEstablished ();if !_bcgb {_abce ._fgdb =append (_abce ._fgdb ,_agfc (_dcfc ));};if len (_abce ._fgdb )==0{return nil ;
};_abce ._cbeb =false ;return _abce ._fgdb [len (_abce ._fgdb )-1];};func _gdbe (_cfbf map[float64 ]gridTile )[]float64 {_dagac :=make ([]float64 ,0,len (_cfbf ));for _bfcc :=range _cfbf {_dagac =append (_dagac ,_bfcc );};_fd .Float64s (_dagac );return _dagac ;
};func _gg (_bfa int )bool {return (_bfa &1)!=0};func (_ddab *wordBag )depthBand (_bef ,_egbe float64 )[]int {if len (_ddab ._ecfd )==0{return nil ;};return _ddab .depthRange (_ddab .getDepthIdx (_bef ),_ddab .getDepthIdx (_egbe ));};func _affeb (_gcbc []*textLine ,_aedg string ,_aacg []*list )*list {return &list {_efgb :_gcbc ,_eaag :_aedg ,_ddfgc :_aacg };
};func (_gffc paraList )list ()[]*list {var _fgge []*textLine ;var _egaec []*textLine ;for _ ,_gbad :=range _gffc {_dgff :=_gbad .getListLines ();_fgge =append (_fgge ,_dgff ...);_egaec =append (_egaec ,_gbad ._bbgab ...);};_ccddd :=_gbdcf (_fgge );_egab :=_ebde (_egaec ,_ccddd );
return _egab ;};func _aebg (_cbgf float64 )bool {return _ce .Abs (_cbgf )< _acgdd };func (_dcbd compositeCell )parasBBox ()(paraList ,_b .PdfRectangle ){return _dcbd .paraList ,_dcbd .PdfRectangle ;};func (_ggab *shapesState )cubicTo (_cfdg ,_deac ,_eagg ,_dcdg ,_fgbb ,_bcde float64 ){if _gegd {_e .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a");
};_ggab .addPoint (_fgbb ,_bcde );};func (_eeecf *textTable )bbox ()_b .PdfRectangle {return _eeecf .PdfRectangle };func (_gfbef rulingList )primMinMax ()(float64 ,float64 ){_eccf ,_adgb :=_gfbef [0]._efadf ,_gfbef [0]._efadf ;for _ ,_fbbdc :=range _gfbef [1:]{if _fbbdc ._efadf < _eccf {_eccf =_fbbdc ._efadf ;
}else if _fbbdc ._efadf > _adgb {_adgb =_fbbdc ._efadf ;};};return _eccf ,_adgb ;};
// String returns a description of `t`.
func (_gagf *textTable )String ()string {return _bf .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_gagf ._addag ,_gagf ._cffff ,_gagf ._cbbdd );};func (_bfggb *textWord )computeText ()string {_facc :=make ([]string ,len (_bfggb ._bggbb ));
for _defg ,_gddc :=range _bfggb ._bggbb {_facc [_defg ]=_gddc ._gddf ;};return _bfb .Join (_facc ,"");};func _efgd (_cecfg *wordBag ,_cdac float64 ,_fgab ,_aadaf rulingList )[]*wordBag {var _fgaf []*wordBag ;for _ ,_fbfaf :=range _cecfg .depthIndexes (){_ebdd :=false ;
for !_cecfg .empty (_fbfaf ){_caef :=_cecfg .firstReadingIndex (_fbfaf );_bdcd :=_cecfg .firstWord (_caef );_edbbb :=_fagdd (_bdcd ,_cdac ,_fgab ,_aadaf );_cecfg .removeWord (_bdcd ,_caef );if _gecc {_e .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_bdcd .String ());
};for _gfceg :=true ;_gfceg ;_gfceg =_ebdd {_ebdd =false ;_bcabc :=_gbeb *_edbbb ._dbfbe ;_bcgbf :=_efce *_edbbb ._dbfbe ;_deag :=_ffbc *_edbbb ._dbfbe ;if _gecc {_e .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_edbbb .minDepth (),_edbbb .maxDepth (),_deag ,_bcgbf );
};if _cecfg .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_edbbb ,_gbba (_bcdc ,0),_edbbb .minDepth ()-_deag ,_edbbb .maxDepth ()+_deag ,_fcga ,false ,false )> 0{_ebdd =true ;};if _cecfg .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_edbbb ,_gbba (_bcdc ,_bcgbf ),_edbbb .minDepth (),_edbbb .maxDepth (),_eaac ,false ,false )> 0{_ebdd =true ;
};if _ebdd {continue ;};_dgdgg :=_cecfg .scanBand ("",_edbbb ,_gbba (_cdge ,_bcabc ),_edbbb .minDepth (),_edbbb .maxDepth (),_cbb ,true ,false );if _dgdgg > 0{_ecfe :=(_edbbb .maxDepth ()-_edbbb .minDepth ())/_edbbb ._dbfbe ;if (_dgdgg > 1&&float64 (_dgdgg )> 0.3*_ecfe )||_dgdgg <=10{if _cecfg .scanBand ("\u006f\u0074\u0068e\u0072",_edbbb ,_gbba (_cdge ,_bcabc ),_edbbb .minDepth (),_edbbb .maxDepth (),_cbb ,false ,true )> 0{_ebdd =true ;
};};};};_fgaf =append (_fgaf ,_edbbb );};};return _fgaf ;};func _adef (_ecaa string ,_cbggf []rulingList ){_e .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_cbggf ),_ecaa );for _fbgc ,_acga :=range _cbggf {_bf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fbgc ,_acga .String ());
};};func (_dfecb *textTable )getRight ()paraList {_aafe :=make (paraList ,_dfecb ._cffff );for _affa :=0;_affa < _dfecb ._cffff ;_affa ++{_daead :=_dfecb .get (_dfecb ._addag -1,_affa )._abbb ;if _daead .taken (){return nil ;};_aafe [_affa ]=_daead ;};
for _ebga :=0;_ebga < _dfecb ._cffff -1;_ebga ++{if _aafe [_ebga ]._dabf !=_aafe [_ebga +1]{return nil ;};};return _aafe ;};func (_edgaa rulingList )toGrids ()[]rulingList {if _ccffg {_e .Log .Info ("t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073",_edgaa );
};_cccdg :=_edgaa .intersections ();if _ccffg {_e .Log .Info ("\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020",len (_edgaa ),len (_cccdg ));
for _ ,_daaaa :=range _caaae (_cccdg ){_bf .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_daaaa ,_cccdg [_daaaa ]);};};_dbebc :=make (map[int ]intSet ,len (_edgaa ));for _afcgc :=range _edgaa {_eebg :=_edgaa .connections (_cccdg ,_afcgc );if len (_eebg )> 0{_dbebc [_afcgc ]=_eebg ;
};};if _ccffg {_e .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064",len (_dbebc ));for _ ,_dbde :=range _caaae (_dbebc ){_bf .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_dbde ,_dbebc [_dbde ]);
};};_aacb :=_efbf (len (_edgaa ),func (_aacd ,_dggb int )bool {_fcad ,_geab :=len (_dbebc [_aacd ]),len (_dbebc [_dggb ]);if _fcad !=_geab {return _fcad > _geab ;};return _edgaa .comp (_aacd ,_dggb );});if _ccffg {_e .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_aacb );
};_cffea :=[][]int {{_aacb [0]}};_dcac :for _ ,_eceea :=range _aacb [1:]{for _gegc ,_fffb :=range _cffea {for _ ,_ecgbf :=range _fffb {if _dbebc [_ecgbf ].has (_eceea ){_cffea [_gegc ]=append (_fffb ,_eceea );continue _dcac ;};};};_cffea =append (_cffea ,[]int {_eceea });
};if _ccffg {_e .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_cffea );};_fd .SliceStable (_cffea ,func (_acgfe ,_aebe int )bool {return len (_cffea [_acgfe ])> len (_cffea [_aebe ])});for _ ,_ffcf :=range _cffea {_fd .Slice (_ffcf ,func (_dfdgc ,_acace int )bool {return _edgaa .comp (_ffcf [_dfdgc ],_ffcf [_acace ])});
};_daab :=make ([]rulingList ,len (_cffea ));for _fggce ,_ecggd :=range _cffea {_eebga :=make (rulingList ,len (_ecggd ));for _aeaa ,_ccde :=range _ecggd {_eebga [_aeaa ]=_edgaa [_ccde ];};_daab [_fggce ]=_eebga ;};if _ccffg {_e .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076",_daab );
};var _fdbeb []rulingList ;for _ ,_dgbce :=range _daab {if _gbadf ,_gdgbf :=_dgbce .isActualGrid ();_gdgbf {_dgbce =_gbadf ;_dgbce =_dgbce .snapToGroups ();_fdbeb =append (_fdbeb ,_dgbce );};};if _ccffg {_adef ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073",_fdbeb );
_e .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_daab ),len (_fdbeb ));};return _fdbeb ;};
// String returns a description of `w`.
func (_afaf *textWord )String ()string {return _bf .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_afaf ._bagdd ,_afaf .PdfRectangle ,_afaf ._ecdce ,_afaf ._deccc );
};func (_dfdba *textWord )appendMark (_cada *textMark ,_dgdbg _b .PdfRectangle ){_dfdba ._bggbb =append (_dfdba ._bggbb ,_cada );_dfdba .PdfRectangle =_badbb (_dfdba .PdfRectangle ,_cada .PdfRectangle );if _cada ._dgfec > _dfdba ._ecdce {_dfdba ._ecdce =_cada ._dgfec ;
};_dfdba ._bagdd =_dgdbg .Ury -_dfdba .PdfRectangle .Lly ;};type pathSection struct{_cdad []*subpath ;_dd .Color ;};
// String returns a description of `state`.
func (_bfd *textState )String ()string {_dfdg :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _bfd ._ggec !=nil {_dfdg =_bfd ._ggec .BaseFont ();};return _bf .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_bfd ._bggb ,_bfd ._fagd ,_bfd ._gec ,_dfdg );
};var _abcd string ="\u0028\u003f\u0069\u0029\u005e\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028?\u003a\u0044\u007cM\u0029\u007c\u0044\u003f\u0043{\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028\u003f\u003a\u004c\u007c\u0043\u0029\u007cL\u003f\u0058\u007b\u0030\u002c\u0033}\u0029\u0028\u0049\u0028\u003f\u003a\u0056\u007c\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u005c\u0029\u007c\u005c\u002e\u0029\u007c\u005e\u005c\u0028\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028\u003f\u003aD\u007cM\u0029\u007c\u0044\u003f\u0043\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028?\u003a\u004c\u007c\u0043\u0029\u007c\u004c?\u0058\u007b0\u002c\u0033\u007d\u0029(\u0049\u0028\u003f\u003a\u0056|\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u005c\u0029";
func (_ceec *shapesState )clearPath (){_ceec ._fgdb =nil ;_ceec ._cbeb =false ;if _gegd {_e .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_ceec );};};func _dadaf (_eeceb []pathSection ){if _cegb < 0.0{return ;};if _ccffg {_e .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_eeceb ));
};for _dgbcc ,_egabff :=range _eeceb {for _befg ,_accdc :=range _egabff ._cdad {for _bdccf ,_babbc :=range _accdc ._gfb {_accdc ._gfb [_bdccf ]=_da .Point {X :_fbefd (_babbc .X ),Y :_fbefd (_babbc .Y )};if _ccffg {_aggfe :=_accdc ._gfb [_bdccf ];if !_dcba (_babbc ,_aggfe ){_aaaaa :=_da .Point {X :_aggfe .X -_babbc .X ,Y :_aggfe .Y -_babbc .Y };
_bf .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_dgbcc ,_befg ,_bdccf ,_babbc ,_aggfe ,_aaaaa );};};};};};};func _fgdc (_eeaff ,_fdcgf float64 )bool {return _eeaff /_ce .Max (_acgde ,_fdcgf )< _adda };
func (_edgae rulingList )secMinMax ()(float64 ,float64 ){_daef ,_fafa :=_edgae [0]._efbba ,_edgae [0]._daba ;for _ ,_eceef :=range _edgae [1:]{if _eceef ._efbba < _daef {_daef =_eceef ._efbba ;};if _eceef ._daba > _fafa {_fafa =_eceef ._daba ;};};return _daef ,_fafa ;
};func (_ffcgd rulingList )findPrimSec (_bfdd ,_cefg float64 )*ruling {for _ ,_cfef :=range _ffcgd {if _dbfgf (_cfef ._efadf -_bfdd )&&_cfef ._efbba -_baaf <=_cefg &&_cefg <=_cfef ._daba +_baaf {return _cfef ;};};return nil ;};func _fabd (_dbdcd []*textMark ,_bagg _b .PdfRectangle ,_bdacd rulingList ,_daceg []gridTiling )paraList {_e .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_dbdcd ),_bagg );
if len (_dbdcd )==0{return nil ;};_dgcga :=_gabdf (_dbdcd ,_bagg );if len (_dgcga )==0{return nil ;};_bdacd .log ("\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065");_fegca ,_dfgce :=_bdacd .vertsHorzs ();_edbb :=_adee (_dgcga ,_bagg .Ury ,_fegca ,_dfgce );
_bdbf :=_efgd (_edbb ,_bagg .Ury ,_fegca ,_dfgce );_bdbf =_bbea (_bdbf );_aegg :=make (paraList ,0,len (_bdbf ));for _ ,_dcea :=range _bdbf {_feaf :=_dcea .arrangeText ();if _feaf !=nil {_aegg =append (_aegg ,_feaf );};};if len (_aegg )>=_dbebd {_aegg =_aegg .extractTables (_daceg );
};_aegg .sortReadingOrder ();_aegg .sortTopoOrder ();_aegg .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _aegg ;};func _gdged (_cbeec *wordBag ,_dfefd int )*textLine {_agcb :=_cbeec .firstWord (_dfefd );
_ccea :=textLine {PdfRectangle :_agcb .PdfRectangle ,_efde :_agcb ._ecdce ,_dce :_agcb ._bagdd };_ccea .pullWord (_cbeec ,_agcb ,_dfefd );return &_ccea ;};
// String returns a description of `l`.
func (_cfff *textLine )String ()string {return _bf .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_cfff ._dce ,_cfff .PdfRectangle ,_cfff ._efde ,_cfff .text ());
};func _bbeea (_cdacaf *PageText )error {_cbdee :=_cge .GetLicenseKey ();if _cbdee !=nil &&_cbdee .IsLicensed ()||_beg {return nil ;};_bf .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");
_bf .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");
return _ad .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};func _ffagf (_eacb ,_fgdeb int )int {if _eacb > _fgdeb {return _eacb ;};return _fgdeb ;
};func (_cab *textObject )setTextMatrix (_gebc []float64 ){if len (_gebc )!=6{_e .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_gebc ));return ;};_cfag ,_aed ,_bca ,_gfg ,_bdb ,_bad :=_gebc [0],_gebc [1],_gebc [2],_gebc [3],_gebc [4],_gebc [5];
_cab ._cdffg =_da .NewMatrix (_cfag ,_aed ,_bca ,_gfg ,_bdb ,_bad );_cab ._fea =_cab ._cdffg ;};func _adee (_fege []*textWord ,_cbcc float64 ,_bbabb ,_efcd rulingList )*wordBag {_dfgcc :=_fagdd (_fege [0],_cbcc ,_bbabb ,_efcd );for _ ,_gecd :=range _fege [1:]{_agga :=_fdd (_gecd ._bagdd );
_dfgcc ._ecfd [_agga ]=append (_dfgcc ._ecfd [_agga ],_gecd );_dfgcc .PdfRectangle =_badbb (_dfgcc .PdfRectangle ,_gecd .PdfRectangle );};_dfgcc .sort ();return _dfgcc ;};func (_accf *wordBag )removeDuplicates (){if _ecff {_e .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_accf .text ());
};for _ ,_ffdb :=range _accf .depthIndexes (){if len (_accf ._ecfd [_ffdb ])==0{continue ;};_cgaad :=_accf ._ecfd [_ffdb ][0];_debb :=_abdd *_cgaad ._ecdce ;_fdebf :=_cgaad ._bagdd ;for _ ,_edede :=range _accf .depthBand (_fdebf ,_fdebf +_debb ){_dbbe :=map[*textWord ]struct{}{};
_afgd :=_accf ._ecfd [_edede ];for _ ,_dcafg :=range _afgd {if _ ,_ddccb :=_dbbe [_dcafg ];_ddccb {continue ;};for _ ,_edggb :=range _afgd {if _ ,_bdbfd :=_dbbe [_edggb ];_bdbfd {continue ;};if _edggb !=_dcafg &&_edggb ._deccc ==_dcafg ._deccc &&_ce .Abs (_edggb .Llx -_dcafg .Llx )< _debb &&_ce .Abs (_edggb .Urx -_dcafg .Urx )< _debb &&_ce .Abs (_edggb .Lly -_dcafg .Lly )< _debb &&_ce .Abs (_edggb .Ury -_dcafg .Ury )< _debb {_dbbe [_edggb ]=struct{}{};
};};};if len (_dbbe )> 0{_aeca :=0;for _ ,_bdbg :=range _afgd {if _ ,_gfab :=_dbbe [_bdbg ];!_gfab {_afgd [_aeca ]=_bdbg ;_aeca ++;};};_accf ._ecfd [_edede ]=_afgd [:len (_afgd )-len (_dbbe )];if len (_accf ._ecfd [_edede ])==0{delete (_accf ._ecfd ,_edede );
};};};};};const (_bdg =false ;_ecfc =false ;_gfbd =false ;_gade =false ;_gegd =false ;_dgfb =false ;_gecc =false ;_ddcfg =false ;_abfb =false ;_fbed =_abfb &&true ;_gecf =_fbed &&false ;_ecff =_abfb &&true ;_aada =false ;_cfdc =_aada &&false ;_ggdf =_aada &&true ;
_ccffg =false ;_dad =_ccffg &&false ;_dcbc =_ccffg &&false ;_gacae =_ccffg &&true ;_dcbb =_ccffg &&false ;_aadc =_ccffg &&false ;);func _bbea (_ebfc []*wordBag )[]*wordBag {if len (_ebfc )<=1{return _ebfc ;};if _abfb {_e .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");
};_fd .Slice (_ebfc ,func (_dfgcd ,_bcbe int )bool {_gcef ,_fdcg :=_ebfc [_dfgcd ],_ebfc [_bcbe ];_agbae :=_gcef .Width ()*_gcef .Height ();_cdbd :=_fdcg .Width ()*_fdcg .Height ();if _agbae !=_cdbd {return _agbae > _cdbd ;};if _gcef .Height ()!=_fdcg .Height (){return _gcef .Height ()> _fdcg .Height ();
};return _dfgcd < _bcbe ;});var _bgdd []*wordBag ;_efgg :=make (intSet );for _ebeb :=0;_ebeb < len (_ebfc );_ebeb ++{if _efgg .has (_ebeb ){continue ;};_abgdb :=_ebfc [_ebeb ];for _fad :=_ebeb +1;_fad < len (_ebfc );_fad ++{if _efgg .has (_ebeb ){continue ;
};_ebdgc :=_ebfc [_fad ];_gfcf :=_abgdb .PdfRectangle ;_gfcf .Llx -=_abgdb ._dbfbe ;if _afbc (_gfcf ,_ebdgc .PdfRectangle ){_abgdb .absorb (_ebdgc );_efgg .add (_fad );};};_bgdd =append (_bgdd ,_abgdb );};if len (_ebfc )!=len (_bgdd )+len (_efgg ){_e .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_ebfc ),len (_bgdd ),len (_efgg ));
};return _bgdd ;};func _edggf (_ddded _b .PdfColorspace ,_fceed _b .PdfColor )_dd .Color {if _ddded ==nil ||_fceed ==nil {return _dd .Black ;};_fbeeb ,_gffdf :=_ddded .ColorToRGB (_fceed );if _gffdf !=nil {_e .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_fceed ,_ddded ,_gffdf );
return _dd .Black ;};_gaee ,_dcfef :=_fbeeb .(*_b .PdfColorDeviceRGB );if !_dcfef {_e .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_fbeeb );
return _dd .Black ;};return _dd .NRGBA {R :uint8 (_gaee .R ()*255),G :uint8 (_gaee .G ()*255),B :uint8 (_gaee .B ()*255),A :uint8 (255)};};
// String returns a human readable description of `s`.
func (_dbbcbd intSet )String ()string {var _dagfc []int ;for _degbf :=range _dbbcbd {if _dbbcbd .has (_degbf ){_dagfc =append (_dagfc ,_degbf );};};_fd .Ints (_dagfc );return _bf .Sprintf ("\u0025\u002b\u0076",_dagfc );};func _cfeb (_fdeb ,_deff *textPara )bool {return _fbad (_fdeb ._cdcbd ,_deff ._cdcbd )};
func _ggcea (_ebca string ,_aaead int )string {if len (_ebca )< _aaead {return _ebca ;};return _ebca [:_aaead ];};func (_efac *compositeCell )updateBBox (){for _ ,_geccb :=range _efac .paraList {_efac .PdfRectangle =_badbb (_efac .PdfRectangle ,_geccb .PdfRectangle );
};};type imageExtractContext struct{_bcf []ImageMark ;_abd int ;_begg int ;_dagc int ;_ba map[*_ff .PdfObjectStream ]*cachedImage ;_fcaf *ImageExtractOptions ;_eag bool ;};func (_agefd *textTable )reduce ()*textTable {_fddb :=make ([]int ,0,_agefd ._cffff );
_gcbg :=make ([]int ,0,_agefd ._addag );for _bbbc :=0;_bbbc < _agefd ._cffff ;_bbbc ++{if !_agefd .emptyCompositeRow (_bbbc ){_fddb =append (_fddb ,_bbbc );};};for _gabdb :=0;_gabdb < _agefd ._addag ;_gabdb ++{if !_agefd .emptyCompositeColumn (_gabdb ){_gcbg =append (_gcbg ,_gabdb );
};};if len (_fddb )==_agefd ._cffff &&len (_gcbg )==_agefd ._addag {return _agefd ;};_gabaga :=textTable {_cbbdd :_agefd ._cbbdd ,_addag :len (_gcbg ),_cffff :len (_fddb ),_egbgg :make (map[uint64 ]*textPara ,len (_gcbg )*len (_fddb ))};if _aada {_e .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_agefd ._addag ,_agefd ._cffff ,len (_gcbg ),len (_fddb ));
_e .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_gcbg );_e .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_fddb );};for _fbgb ,_adae :=range _fddb {for _eddaa ,_gggc :=range _gcbg {_ddbb ,_bdafga :=_agefd .getComposite (_gggc ,_adae );
if _ddbb ==nil {continue ;};if _aada {_bf .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_eddaa ,_fbgb ,_gggc ,_adae ,_ggcea (_ddbb .merge ().text (),50));};_gabaga .putComposite (_eddaa ,_fbgb ,_ddbb ,_bdafga );
};};return &_gabaga ;};
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct{_gaac []TextMark };func _bdge (_cceac *list )[]*textLine {for _ ,_addb :=range _cceac ._ddfgc {switch _addb ._eaag {case "\u004c\u0042\u006fd\u0079":if len (_addb ._efgb )!=0{return _addb ._efgb ;};return _bdge (_addb );case "\u0053\u0070\u0061\u006e":return _addb ._efgb ;
case "I\u006e\u006c\u0069\u006e\u0065\u0053\u0068\u0061\u0070\u0065":return _addb ._efgb ;};};return nil ;};func (_cgagf *ruling )encloses (_agbgg ,_caceg float64 )bool {return _cgagf ._efbba -_baaf <=_agbgg &&_caceg <=_cgagf ._daba +_baaf ;};
// String returns a description of `b`.
func (_bfga *wordBag )String ()string {var _eddgc []string ;for _ ,_eefe :=range _bfga .depthIndexes (){_dbagd :=_bfga ._ecfd [_eefe ];for _ ,_cbef :=range _dbagd {_eddgc =append (_eddgc ,_cbef ._deccc );};};return _bf .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_bfga .PdfRectangle ,_bfga ._dbfbe ,len (_eddgc ),_eddgc );
};func (_cccgf *structElement )parseStructElement (_dada _ff .PdfObject ){_ceccg ,_bddd :=_ff .GetDict (_dada );if !_bddd {_e .Log .Debug ("\u0070\u0061\u0072\u0073\u0065\u0053\u0074\u0072u\u0063\u0074\u0045le\u006d\u0065\u006e\u0074\u003a\u0020d\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006f\u0062\u006a\u0065\u0063t\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075n\u0064\u002e");
return ;};_cfbg :=_ceccg .Get ("\u0053");_dadd :=_ceccg .Get ("\u0050\u0067");_aead :="";if _cfbg !=nil {_aead =_cfbg .String ();};_baac :=_ceccg .Get ("\u004b");_cccgf ._bgec =_aead ;_cccgf ._fbfd =_dadd ;switch _cabd :=_baac .(type ){case *_ff .PdfObjectInteger :_cccgf ._bgec =_aead ;
_cccgf ._cabe =int64 (*_cabd );_cccgf ._fbfd =_dadd ;case *_ff .PdfObjectReference :_fdgeb :=*_ff .MakeArray (_cabd );var _gdfe int64 =-1;_cccgf ._cabe =_gdfe ;if _fdgeb .Len ()==1{_adfe :=_fdgeb .Elements ()[0];_edfa ,_cabde :=_adfe .(*_ff .PdfObjectInteger );
if _cabde {_gdfe =int64 (*_edfa );_cccgf ._cabe =_gdfe ;_cccgf ._bgec =_aead ;_cccgf ._fbfd =_dadd ;return ;};};_bdfb :=[]structElement {};for _ ,_cfgfe :=range _fdgeb .Elements (){_cagf ,_gfbe :=_cfgfe .(*_ff .PdfObjectInteger );if _gfbe {_gdfe =int64 (*_cagf );
_cccgf ._cabe =_gdfe ;_cccgf ._bgec =_aead ;}else {_gfef :=&structElement {};_gfef .parseStructElement (_cfgfe );_bdfb =append (_bdfb ,*_gfef );};_gdfe =-1;};_cccgf ._fefd =_bdfb ;case *_ff .PdfObjectArray :_adgee :=_baac .(*_ff .PdfObjectArray );var _bacc int64 =-1;
_cccgf ._cabe =_bacc ;if _adgee .Len ()==1{_dgbd :=_adgee .Elements ()[0];_eeeaa ,_ggef :=_dgbd .(*_ff .PdfObjectInteger );if _ggef {_bacc =int64 (*_eeeaa );_cccgf ._cabe =_bacc ;_cccgf ._bgec =_aead ;_cccgf ._fbfd =_dadd ;return ;};};_gcebf :=[]structElement {};
for _ ,_ddbd :=range _adgee .Elements (){_bgga ,_bcec :=_ddbd .(*_ff .PdfObjectInteger );if _bcec {_bacc =int64 (*_bgga );_cccgf ._cabe =_bacc ;_cccgf ._bgec =_aead ;_cccgf ._fbfd =_dadd ;}else {_ecee :=&structElement {};_ecee .parseStructElement (_ddbd );
_gcebf =append (_gcebf ,*_ecee );};_bacc =-1;};_cccgf ._fefd =_gcebf ;};};func (_adc *shapesState )lineTo (_ceab ,_cbee float64 ){if _gegd {_e .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_ceab ,_cbee ,_adc .devicePoint (_ceab ,_cbee ));
};_adc .addPoint (_ceab ,_cbee );};type stateStack []*textState ;func (_faac *textWord )bbox ()_b .PdfRectangle {return _faac .PdfRectangle };func (_dggdc *textTable )newTablePara ()*textPara {_gdbed :=_dggdc .computeBbox ();_beab :=&textPara {PdfRectangle :_gdbed ,_cdcbd :_gdbed ,_cegg :_dggdc };
if _aada {_e .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_beab );};return _beab ;};func (_eafbc paraList )computeEBBoxes (){if _bdg {_e .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a");
};for _ ,_dffe :=range _eafbc {_dffe ._cdcbd =_dffe .PdfRectangle ;};_cefc :=_eafbc .yNeighbours (0);for _ggf ,_bbeac :=range _eafbc {_dcgc :=_bbeac ._cdcbd ;_bbga ,_cbad :=-1.0e9,+1.0e9;for _ ,_fece :=range _cefc [_bbeac ]{_edea :=_eafbc [_fece ]._cdcbd ;
if _edea .Urx < _dcgc .Llx {_bbga =_ce .Max (_bbga ,_edea .Urx );}else if _dcgc .Urx < _edea .Llx {_cbad =_ce .Min (_cbad ,_edea .Llx );};};for _ccbbg ,_ceee :=range _eafbc {_cgcc :=_ceee ._cdcbd ;if _ggf ==_ccbbg ||_cgcc .Ury > _dcgc .Lly {continue ;};
if _bbga <=_cgcc .Llx &&_cgcc .Llx < _dcgc .Llx {_dcgc .Llx =_cgcc .Llx ;}else if _cgcc .Urx <=_cbad &&_dcgc .Urx < _cgcc .Urx {_dcgc .Urx =_cgcc .Urx ;};};if _bdg {_bf .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_ggf ,_bbeac ._cdcbd ,_dcgc ,_ggcea (_bbeac .text (),50));
};_bbeac ._cdcbd =_dcgc ;};if _egef {for _ ,_abbf :=range _eafbc {_abbf .PdfRectangle =_abbf ._cdcbd ;};};};func (_gffec *textTable )reduceTiling (_acccb gridTiling ,_cdaf float64 )*textTable {_fbaf :=make ([]int ,0,_gffec ._cffff );_ggeff :=make ([]int ,0,_gffec ._addag );
_gfcec :=_acccb ._aabgb ;_dbeeb :=_acccb ._fdgd ;for _gaga :=0;_gaga < _gffec ._cffff ;_gaga ++{_cgcba :=_gaga > 0&&_ce .Abs (_dbeeb [_gaga -1]-_dbeeb [_gaga ])< _cdaf &&_gffec .emptyCompositeRow (_gaga );if !_cgcba {_fbaf =append (_fbaf ,_gaga );};};for _eead :=0;
_eead < _gffec ._addag ;_eead ++{_gbabf :=_eead < _gffec ._addag -1&&_ce .Abs (_gfcec [_eead +1]-_gfcec [_eead ])< _cdaf &&_gffec .emptyCompositeColumn (_eead );if !_gbabf {_ggeff =append (_ggeff ,_eead );};};if len (_fbaf )==_gffec ._cffff &&len (_ggeff )==_gffec ._addag {return _gffec ;
};_eedd :=textTable {_cbbdd :_gffec ._cbbdd ,_addag :len (_ggeff ),_cffff :len (_fbaf ),_bdfcg :make (map[uint64 ]compositeCell ,len (_ggeff )*len (_fbaf ))};if _aada {_e .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_gffec ._addag ,_gffec ._cffff ,len (_ggeff ),len (_fbaf ));
_e .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_ggeff );_e .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_fbaf );};for _fbdcg ,_ggagc :=range _fbaf {for _dbba ,_cdceg :=range _ggeff {_fggac ,_edged :=_gffec .getComposite (_cdceg ,_ggagc );
if len (_fggac )==0{continue ;};if _aada {_bf .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_dbba ,_fbdcg ,_cdceg ,_ggagc ,_ggcea (_fggac .merge ().text (),50));};_eedd .putComposite (_dbba ,_fbdcg ,_fggac ,_edged );
};};return &_eedd ;};func (_eeec *textObject )setHorizScaling (_eed float64 ){if _eeec ==nil {return ;};_eeec ._eef ._ddcc =_eed ;};func (_dgfd *textLine )bbox ()_b .PdfRectangle {return _dgfd .PdfRectangle };
// String returns a string describing `pt`.
func (_ddcf PageText )String ()string {_ebe :=_bf .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_ddcf ._eca ));_gdea :=[]string {"\u002d"+_ebe };for _ ,_eccg :=range _ddcf ._eca {_gdea =append (_gdea ,_eccg .String ());
};_gdea =append (_gdea ,"\u002b"+_ebe );return _bfb .Join (_gdea ,"\u000a");};func (_dfeef *textObject )getFont (_dcce string )(*_b .PdfFont ,error ){if _dfeef ._beec ._cbg !=nil {_fef ,_fedc :=_dfeef .getFontDict (_dcce );if _fedc !=nil {_e .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073",_dcce ,_fedc .Error ());
return nil ,_fedc ;};_dfeef ._beec ._ddd ++;_fcde ,_efbc :=_dfeef ._beec ._cbg [_fef .String ()];if _efbc {_fcde ._dcgd =_dfeef ._beec ._ddd ;return _fcde ._eeecc ,nil ;};};_gag ,_cgdba :=_dfeef .getFontDict (_dcce );if _cgdba !=nil {return nil ,_cgdba ;
};_fbdcc ,_cgdba :=_dfeef .getFontDirect (_dcce );if _cgdba !=nil {return nil ,_cgdba ;};if _dfeef ._beec ._cbg !=nil {_aab :=fontEntry {_fbdcc ,_dfeef ._beec ._ddd };if len (_dfeef ._beec ._cbg )>=_bbg {var _abba []string ;for _agbg :=range _dfeef ._beec ._cbg {_abba =append (_abba ,_agbg );
};_fd .Slice (_abba ,func (_fbac ,_bbfe int )bool {return _dfeef ._beec ._cbg [_abba [_fbac ]]._dcgd < _dfeef ._beec ._cbg [_abba [_bbfe ]]._dcgd ;});delete (_dfeef ._beec ._cbg ,_abba [0]);};_dfeef ._beec ._cbg [_gag .String ()]=_aab ;};return _fbdcc ,nil ;
};func (_aagb compositeCell )hasLines (_cfffe []*textLine )bool {for _gdce ,_cafb :=range _cfffe {_gafcf :=_fccd (_aagb .PdfRectangle ,_cafb .PdfRectangle );if _aada {_bf .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a",_gafcf ,_gdce ,len (_cfffe ));
_bf .Printf ("\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a",_aagb );_bf .Printf ("\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a",_cafb );};if _gafcf {return true ;
};};return false ;};func (_ffbf *ruling )gridIntersecting (_fgaa *ruling )bool {return _gfeee (_ffbf ._efbba ,_fgaa ._efbba )&&_gfeee (_ffbf ._daba ,_fgaa ._daba );};func (_eafgg paraList )findTableGrid (_efbeb gridTiling )(*textTable ,map[*textPara ]struct{}){_acffg :=len (_efbeb ._aabgb );
_dfbc :=len (_efbeb ._fdgd );_dbgbd :=textTable {_cbbdd :true ,_addag :_acffg ,_cffff :_dfbc ,_egbgg :make (map[uint64 ]*textPara ,_acffg *_dfbc ),_bdfcg :make (map[uint64 ]compositeCell ,_acffg *_dfbc )};_dbgbd .PdfRectangle =_efbeb .PdfRectangle ;_agadd :=make (map[*textPara ]struct{});
_gcdbf :=int ((1.0-_degb )*float64 (_acffg *_dfbc ));_adbgf :=0;if _gacae {_e .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_acffg ,_dfbc );};for _gbggg ,_bdea :=range _efbeb ._fdgd {_acgb ,_faedf :=_efbeb ._caabf [_bdea ];
if !_faedf {continue ;};for _dgfcb ,_cafbc :=range _efbeb ._aabgb {_cdefc ,_ddad :=_acgb [_cafbc ];if !_ddad {continue ;};_ggfdc :=_eafgg .inTile (_cdefc );if len (_ggfdc )==0{_adbgf ++;if _adbgf > _gcdbf {if _gacae {_e .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_adbgf );
};return nil ,nil ;};}else {_dbgbd .putComposite (_dgfcb ,_gbggg ,_ggfdc ,_cdefc .PdfRectangle );for _ ,_ddeee :=range _ggfdc {_agadd [_ddeee ]=struct{}{};};};};};_edfg :=0;for _aggae :=0;_aggae < _acffg ;_aggae ++{_bbeg :=_dbgbd .get (_aggae ,0);if _bbeg ==nil ||!_bbeg ._cdcca {_edfg ++;
};};if _edfg ==0{if _gacae {_e .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};return nil ,nil ;};_fbeagd :=_dbgbd .reduceTiling (_efbeb ,_cgcb );_fbeagd =_fbeagd .subdivide ();return _fbeagd ,_agadd ;};func (_befd *textLine )markWordBoundaries (){_eeef :=_gebb *_befd ._efde ;
for _ceac ,_befa :=range _befd ._edge [1:]{if _aged (_befa ,_befd ._edge [_ceac ])>=_eeef {_befa ._gcccd =true ;};};};func (_geea *ruling )equals (_eaaga *ruling )bool {return _geea ._facdf ==_eaaga ._facdf &&_gfeee (_geea ._efadf ,_eaaga ._efadf )&&_gfeee (_geea ._efbba ,_eaaga ._efbba )&&_gfeee (_geea ._daba ,_eaaga ._daba );
};func (_aacfa *textTable )get (_efga ,_eafd int )*textPara {return _aacfa ._egbgg [_fbfgf (_efga ,_eafd )]};func _gabdf (_dfed []*textMark ,_ddffd _b .PdfRectangle )[]*textWord {var _ggcgg []*textWord ;var _debf *textWord ;if _ecfc {_e .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_dfed ));
};_aaed :=func (){if _debf !=nil {_ggdaa :=_debf .computeText ();if !_gbcg (_ggdaa ){_debf ._deccc =_ggdaa ;_ggcgg =append (_ggcgg ,_debf );if _ecfc {_e .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_ggcgg )-1,_debf .String ());
for _eegda ,_fdag :=range _debf ._bggbb {_bf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_eegda ,_fdag .String ());};};};_debf =nil ;};};for _ ,_dagae :=range _dfed {if _bgcff &&_debf !=nil &&len (_debf ._bggbb )> 0{_ceacf :=_debf ._bggbb [len (_debf ._bggbb )-1];
_bafa ,_fdcgb :=_cafae (_dagae ._gddf );_baffg ,_daac :=_cafae (_ceacf ._gddf );if _fdcgb &&!_daac &&_ceacf .inDiacriticArea (_dagae ){_debf .addDiacritic (_bafa );continue ;};if _daac &&!_fdcgb &&_dagae .inDiacriticArea (_ceacf ){_debf ._bggbb =_debf ._bggbb [:len (_debf ._bggbb )-1];
_debf .appendMark (_dagae ,_ddffd );_debf .addDiacritic (_baffg );continue ;};};_gafcfe :=_gbcg (_dagae ._gddf );if _gafcfe {_aaed ();continue ;};if _debf ==nil &&!_gafcfe {_debf =_cfegg ([]*textMark {_dagae },_ddffd );continue ;};_dbgc :=_debf ._ecdce ;
_bgadf :=_ce .Abs (_gced (_ddffd ,_dagae )-_debf ._bagdd )/_dbgc ;_abfaf :=_aged (_dagae ,_debf )/_dbgc ;if _abfaf >=_dgab ||!(-_gbcc <=_abfaf &&_bgadf <=_ebagb ){_aaed ();_debf =_cfegg ([]*textMark {_dagae },_ddffd );continue ;};_debf .appendMark (_dagae ,_ddffd );
};_aaed ();return _ggcgg ;};type cachedImage struct{_cfa *_b .Image ;_gfe _b .PdfColorspace ;};func (_cfcf *textObject )reset (){_cfcf ._cdffg =_da .IdentityMatrix ();_cfcf ._fea =_da .IdentityMatrix ();_cfcf ._daeb =nil ;};func (_fcg *textObject )moveLP (_bdag ,_gfggg float64 ){_fcg ._fea .Concat (_da .NewMatrix (1,0,0,1,_bdag ,_gfggg ));
_fcg ._cdffg =_fcg ._fea ;};
// Marks returns the TextMark collection for a page. It represents all the text on the page.
func (_gcbf PageText )Marks ()*TextMarkArray {return &TextMarkArray {_gaac :_gcbf ._ebdc }};func _gfbeg (_gfbbf _b .PdfRectangle ,_eeff []*textLine )*textPara {return &textPara {PdfRectangle :_gfbbf ,_bbgab :_eeff };};func _gbdg (_faae *textLine )bool {_acfb :=true ;
_fada :=-1;for _ ,_dadb :=range _faae ._edge {for _ ,_geeee :=range _dadb ._bggbb {_dbddf :=_geeee ._faad ;if _fada ==-1{_fada =_dbddf ;}else {if _fada !=_dbddf {_acfb =false ;break ;};};};};return _acfb ;};func (_eagb *subpath )clear (){*_eagb =subpath {}};
func _bcef (_cfega map[int ][]float64 )[]int {_acecb :=make ([]int ,len (_cfega ));_efagf :=0;for _cgfa :=range _cfega {_acecb [_efagf ]=_cgfa ;_efagf ++;};_fd .Ints (_acecb );return _acecb ;};func _aagd (_aebf _b .PdfRectangle )textState {return textState {_ddcc :100,_ddgg :RenderModeFill ,_dgbba :_aebf };
};func (_dbe *PageFonts )extractPageResourcesToFont (_bba *_b .PdfPageResources )error {if _bba .Font ==nil {return _ad .New (_agd );};_gbe ,_dbea :=_ff .GetDict (_bba .Font );if !_dbea {return _ad .New (_gge );};for _ ,_ffb :=range _gbe .Keys (){var (_ced =true ;
_dcc []byte ;_acg string ;);_cddg ,_age :=_bba .GetFontByName (_ffb );if !_age {return _ad .New (_gfa );};_bfe ,_bgc :=_b .NewPdfFontFromPdfObject (_cddg );if _bgc !=nil {return _bgc ;};_eabff :=_bfe .FontDescriptor ();_fbfa :=_bfe .FontDescriptor ().FontName .String ();
_bcd :=_bfe .Subtype ();if _cac (_dbe .Fonts ,_fbfa ){continue ;};if len (_bfe .ToUnicode ())==0{_ced =false ;};if _eabff .FontFile !=nil {if _dfgc ,_ca :=_ff .GetStream (_eabff .FontFile );_ca {_dcc ,_bgc =_ff .DecodeStream (_dfgc );if _bgc !=nil {return _bgc ;
};_acg =_fbfa +"\u002e\u0070\u0066\u0062";};}else if _eabff .FontFile2 !=nil {if _ecb ,_gfag :=_ff .GetStream (_eabff .FontFile2 );_gfag {_dcc ,_bgc =_ff .DecodeStream (_ecb );if _bgc !=nil {return _bgc ;};_acg =_fbfa +"\u002e\u0074\u0074\u0066";};}else if _eabff .FontFile3 !=nil {if _fgd ,_bdac :=_ff .GetStream (_eabff .FontFile3 );
_bdac {_dcc ,_bgc =_ff .DecodeStream (_fgd );if _bgc !=nil {return _bgc ;};_acg =_fbfa +"\u002e\u0063\u0066\u0066";};};if len (_acg )< 1{_e .Log .Debug (_dbb );};_bcg :=Font {FontName :_fbfa ,PdfFont :_bfe ,IsCID :_bfe .IsCID (),IsSimple :_bfe .IsSimple (),ToUnicode :_ced ,FontType :_bcd ,FontData :_dcc ,FontFileName :_acg ,FontDescriptor :_eabff };
_dbe .Fonts =append (_dbe .Fonts ,_bcg );};return nil ;};func _ddae (_adde []rulingList )(rulingList ,rulingList ){var _cffag rulingList ;for _ ,_cbddf :=range _adde {_cffag =append (_cffag ,_cbddf ...);};return _cffag .vertsHorzs ();};func (_cecf *wordBag )pullWord (_ceg *textWord ,_bbgc int ,_fcbeb map[int ]map[*textWord ]struct{}){_cecf .PdfRectangle =_badbb (_cecf .PdfRectangle ,_ceg .PdfRectangle );
if _ceg ._ecdce > _cecf ._dbfbe {_cecf ._dbfbe =_ceg ._ecdce ;};_cecf ._ecfd [_bbgc ]=append (_cecf ._ecfd [_bbgc ],_ceg );_fcbeb [_bbgc ][_ceg ]=struct{}{};};func (_gadc paraList )addNeighbours (){_cccef :=func (_bbcdb []int ,_aadea *textPara )([]*textPara ,[]*textPara ){_cbfdf :=make ([]*textPara ,0,len (_bbcdb )-1);
_bcaa :=make ([]*textPara ,0,len (_bbcdb )-1);for _ ,_egbb :=range _bbcdb {_fcab :=_gadc [_egbb ];if _fcab .Urx <=_aadea .Llx {_cbfdf =append (_cbfdf ,_fcab );}else if _fcab .Llx >=_aadea .Urx {_bcaa =append (_bcaa ,_fcab );};};return _cbfdf ,_bcaa ;};
_abfe :=func (_deabcf []int ,_dbefb *textPara )([]*textPara ,[]*textPara ){_ffdea :=make ([]*textPara ,0,len (_deabcf )-1);_abcgb :=make ([]*textPara ,0,len (_deabcf )-1);for _ ,_ebcfc :=range _deabcf {_gdae :=_gadc [_ebcfc ];if _gdae .Ury <=_dbefb .Lly {_abcgb =append (_abcgb ,_gdae );
}else if _gdae .Lly >=_dbefb .Ury {_ffdea =append (_ffdea ,_gdae );};};return _ffdea ,_abcgb ;};_ccgc :=_gadc .yNeighbours (_bdcb );for _ ,_ffbgd :=range _gadc {_gfcd :=_ccgc [_ffbgd ];if len (_gfcd )==0{continue ;};_cgfed ,_fdgg :=_cccef (_gfcd ,_ffbgd );
if len (_cgfed )==0&&len (_fdgg )==0{continue ;};if len (_cgfed )> 0{_gbfb :=_cgfed [0];for _ ,_caga :=range _cgfed [1:]{if _caga .Urx >=_gbfb .Urx {_gbfb =_caga ;};};for _ ,_fgcce :=range _cgfed {if _fgcce !=_gbfb &&_fgcce .Urx > _gbfb .Llx {_gbfb =nil ;
break ;};};if _gbfb !=nil &&_cbdd (_ffbgd .PdfRectangle ,_gbfb .PdfRectangle ){_ffbgd ._ebff =_gbfb ;};};if len (_fdgg )> 0{_bcfb :=_fdgg [0];for _ ,_dfefb :=range _fdgg [1:]{if _dfefb .Llx <=_bcfb .Llx {_bcfb =_dfefb ;};};for _ ,_fdbebg :=range _fdgg {if _fdbebg !=_bcfb &&_fdbebg .Llx < _bcfb .Urx {_bcfb =nil ;
break ;};};if _bcfb !=nil &&_cbdd (_ffbgd .PdfRectangle ,_bcfb .PdfRectangle ){_ffbgd ._abbb =_bcfb ;};};};_ccgc =_gadc .xNeighbours (_ecde );for _ ,_abfc :=range _gadc {_cfede :=_ccgc [_abfc ];if len (_cfede )==0{continue ;};_cfefb ,_ffedeg :=_abfe (_cfede ,_abfc );
if len (_cfefb )==0&&len (_ffedeg )==0{continue ;};if len (_ffedeg )> 0{_gdbca :=_ffedeg [0];for _ ,_dgdb :=range _ffedeg [1:]{if _dgdb .Ury >=_gdbca .Ury {_gdbca =_dgdb ;};};for _ ,_cacg :=range _ffedeg {if _cacg !=_gdbca &&_cacg .Ury > _gdbca .Lly {_gdbca =nil ;
break ;};};if _gdbca !=nil &&_fbad (_abfc .PdfRectangle ,_gdbca .PdfRectangle ){_abfc ._dabf =_gdbca ;};};if len (_cfefb )> 0{_baad :=_cfefb [0];for _ ,_aeada :=range _cfefb [1:]{if _aeada .Lly <=_baad .Lly {_baad =_aeada ;};};for _ ,_adacb :=range _cfefb {if _adacb !=_baad &&_adacb .Lly < _baad .Ury {_baad =nil ;
break ;};};if _baad !=nil &&_fbad (_abfc .PdfRectangle ,_baad .PdfRectangle ){_abfc ._cfbb =_baad ;};};};for _ ,_aeggb :=range _gadc {if _aeggb ._ebff !=nil &&_aeggb ._ebff ._abbb !=_aeggb {_aeggb ._ebff =nil ;};if _aeggb ._cfbb !=nil &&_aeggb ._cfbb ._dabf !=_aeggb {_aeggb ._cfbb =nil ;
};if _aeggb ._abbb !=nil &&_aeggb ._abbb ._ebff !=_aeggb {_aeggb ._abbb =nil ;};if _aeggb ._dabf !=nil &&_aeggb ._dabf ._cfbb !=_aeggb {_aeggb ._dabf =nil ;};};};