unipdf/extractor/extractor.go
2022-12-15 21:59:56 +00:00

952 lines
208 KiB
Go
Raw Blame History

//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
//
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
//
package extractor ;import (_cg "bytes";_b "errors";_bb "fmt";_ed "github.com/unidoc/unipdf/v3/common";_bc "github.com/unidoc/unipdf/v3/contentstream";_feb "github.com/unidoc/unipdf/v3/core";_fe "github.com/unidoc/unipdf/v3/internal/license";_bg "github.com/unidoc/unipdf/v3/internal/textencoding";
_cfa "github.com/unidoc/unipdf/v3/internal/transform";_cfb "github.com/unidoc/unipdf/v3/model";_d "golang.org/x/text/unicode/norm";_bdc "golang.org/x/xerrors";_a "image/color";_g "io";_e "math";_c "reflect";_ga "regexp";_gf "sort";_bd "strings";_cf "unicode";
_ba "unicode/utf8";);func (_caaf *stateStack )pop ()*textState {if _caaf .empty (){return nil ;};_fef :=*(*_caaf )[len (*_caaf )-1];*_caaf =(*_caaf )[:len (*_caaf )-1];return &_fef ;};
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
func (_fgab *TextMarkArray )BBox ()(_cfb .PdfRectangle ,bool ){var _gcda _cfb .PdfRectangle ;_fefg :=false ;for _ ,_bgb :=range _fgab ._fdda {if _bgb .Meta ||_aaag (_bgb .Text ){continue ;};if _fefg {_gcda =_bfad (_gcda ,_bgb .BBox );}else {_gcda =_bgb .BBox ;
_fefg =true ;};};return _gcda ,_fefg ;};func _bfdbg (_gcgd ,_bfag *textPara )bool {if _gcgd ._cea ||_bfag ._cea {return true ;};return _cbafb (_gcgd .depth ()-_bfag .depth ());};
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
func (_aed *PageText )ApplyArea (bbox _cfb .PdfRectangle ){_bcf :=make ([]*textMark ,0,len (_aed ._bebf ));for _ ,_edf :=range _aed ._bebf {if _efgb (_edf .bbox (),bbox ){_bcf =append (_bcf ,_edf );};};var _fcf paraList ;_fgeb :=len (_bcf );for _geef :=0;
_geef < 360&&_fgeb > 0;_geef +=90{_ffgda :=make ([]*textMark ,0,len (_bcf )-_fgeb );for _ ,_fcad :=range _bcf {if _fcad ._dgfd ==_geef {_ffgda =append (_ffgda ,_fcad );};};if len (_ffgda )> 0{_cdgg :=_egdb (_ffgda ,_aed ._bfcb ,nil ,nil );_fcf =append (_fcf ,_cdgg ...);
_fgeb -=len (_ffgda );};};_bab :=new (_cg .Buffer );_fcf .writeText (_bab );_aed ._bggg =_bab .String ();_aed ._dbgf =_fcf .toTextMarks ();_aed ._aegg =_fcf .tables ();};func (_dfd *imageExtractContext )extractXObjectImage (_fdc *_feb .PdfObjectName ,_ccf _bc .GraphicsState ,_dbe *_cfb .PdfPageResources )error {_fdff ,_ :=_dbe .GetXObjectByName (*_fdc );
if _fdff ==nil {return nil ;};_bbg ,_cde :=_dfd ._edb [_fdff ];if !_cde {_bbgf ,_eac :=_dbe .GetXObjectImageByName (*_fdc );if _eac !=nil {return _eac ;};if _bbgf ==nil {return nil ;};_aafb ,_eac :=_bbgf .ToImage ();if _eac !=nil {return _eac ;};_bbg =&cachedImage {_gea :_aafb ,_dfg :_bbgf .ColorSpace };
_dfd ._edb [_fdff ]=_bbg ;};_gcd :=_bbg ._gea ;_beg :=_bbg ._dfg ;_dfa ,_fgg :=_beg .ImageToRGB (*_gcd );if _fgg !=nil {return _fgg ;};_ed .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_ccf .CTM .String ());_aca :=ImageMark {Image :&_dfa ,Width :_ccf .CTM .ScalingFactorX (),Height :_ccf .CTM .ScalingFactorY (),Angle :_ccf .CTM .Angle ()};
_aca .X ,_aca .Y =_ccf .CTM .Translation ();_dfd ._eca =append (_dfd ._eca ,_aca );_dfd ._dcc ++;return nil ;};
// String returns a string describing `tm`.
func (_bfcf TextMark )String ()string {_bfe :=_bfcf .BBox ;var _cabc string ;if _bfcf .Font !=nil {_cabc =_bfcf .Font .String ();if len (_cabc )> 50{_cabc =_cabc [:50]+"\u002e\u002e\u002e";};};var _eef string ;if _bfcf .Meta {_eef ="\u0020\u002a\u004d\u002a";
};return _bb .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_bfcf .Offset ,_bfcf .Text ,[]rune (_bfcf .Text ),_bfe .Llx ,_bfe .Lly ,_bfe .Urx ,_bfe .Ury ,_cabc ,_eef );
};func _ggdcg (_cgbf _cfb .PdfRectangle )*ruling {return &ruling {_efdg :_dcfc ,_gged :_cgbf .Llx ,_abcc :_cgbf .Lly ,_dfad :_cgbf .Ury };};const _agd =10;type rulingKind int ;var _effb =map[markKind ]string {_eadd :"\u0073\u0074\u0072\u006f\u006b\u0065",_efcd :"\u0066\u0069\u006c\u006c",_baddd :"\u0061u\u0067\u006d\u0065\u006e\u0074"};
var _cfce =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_a .White ,StrokeColor :_a .White };func _egbbg (_edaa ,_ggbgc _cfa .Point )bool {_bdgd :=_e .Abs (_edaa .X -_ggbgc .X );_fgggg :=_e .Abs (_edaa .Y -_ggbgc .Y );return _geag (_bdgd ,_fgggg );
};func (_beef *shapesState )closePath (){if _beef ._fab {_beef ._ccac =append (_beef ._ccac ,_bcdg (_beef ._ggb ));_beef ._fab =false ;}else if len (_beef ._ccac )==0{if _bfcbef {_ed .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");
};_beef ._fab =false ;return ;};_beef ._ccac [len (_beef ._ccac )-1].close ();if _bfcbef {_ed .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_beef );};};func (_ddbfe *textTable )computeBbox ()_cfb .PdfRectangle {var _ggffd _cfb .PdfRectangle ;
_cedd :=false ;for _gadbb :=0;_gadbb < _ddbfe ._gggac ;_gadbb ++{for _eceaa :=0;_eceaa < _ddbfe ._dedd ;_eceaa ++{_edddg :=_ddbfe .get (_eceaa ,_gadbb );if _edddg ==nil {continue ;};if !_cedd {_ggffd =_edddg .PdfRectangle ;_cedd =true ;}else {_ggffd =_bfad (_ggffd ,_edddg .PdfRectangle );
};};};return _ggffd ;};func (_aebce rulingList )augmentGrid ()(rulingList ,rulingList ){_bgga ,_cbfca :=_aebce .vertsHorzs ();if len (_bgga )==0||len (_cbfca )==0{return _bgga ,_cbfca ;};_cddda ,_aceg :=_bgga ,_cbfca ;_ebgg :=_bgga .bbox ();_eecf :=_cbfca .bbox ();
if _dgab {_ed .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066",_ebgg );_ed .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066",_eecf );
};var _dffgf ,_gecf ,_agaab ,_dddge *ruling ;if _eecf .Llx < _ebgg .Llx -_dbgbb {_dffgf =&ruling {_edebb :_baddd ,_efdg :_dcfc ,_gged :_eecf .Llx ,_abcc :_ebgg .Lly ,_dfad :_ebgg .Ury };_bgga =append (rulingList {_dffgf },_bgga ...);};if _eecf .Urx > _ebgg .Urx +_dbgbb {_gecf =&ruling {_edebb :_baddd ,_efdg :_dcfc ,_gged :_eecf .Urx ,_abcc :_ebgg .Lly ,_dfad :_ebgg .Ury };
_bgga =append (_bgga ,_gecf );};if _ebgg .Lly < _eecf .Lly -_dbgbb {_agaab =&ruling {_edebb :_baddd ,_efdg :_ecbd ,_gged :_ebgg .Lly ,_abcc :_eecf .Llx ,_dfad :_eecf .Urx };_cbfca =append (rulingList {_agaab },_cbfca ...);};if _ebgg .Ury > _eecf .Ury +_dbgbb {_dddge =&ruling {_edebb :_baddd ,_efdg :_ecbd ,_gged :_ebgg .Ury ,_abcc :_eecf .Llx ,_dfad :_eecf .Urx };
_cbfca =append (_cbfca ,_dddge );};if len (_bgga )+len (_cbfca )==len (_aebce ){return _cddda ,_aceg ;};_gbae :=append (_bgga ,_cbfca ...);_aebce .log ("u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064");_gbae .log ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d");
return _bgga ,_cbfca ;};
// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents (contents string ,resources *_cfb .PdfPageResources )(*Extractor ,error ){const _gef ="\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s";_dc :=&Extractor {_ede :contents ,_cd :resources ,_af :map[string ]fontEntry {},_fb :map[string ]textResult {}};
_fe .TrackUse (_gef );return _dc ,nil ;};
// String returns a description of `tm`.
func (_gace *textMark )String ()string {return _bb .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_gace .PdfRectangle ,_gace ._gegdd ,_gace ._gbag );};func _acee (_ebga []*wordBag )[]*wordBag {if len (_ebga )<=1{return _ebga ;
};if _bfbe {_ed .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");};_gf .Slice (_ebga ,func (_acfb ,_dffc int )bool {_deeg ,_bfbd :=_ebga [_acfb ],_ebga [_dffc ];_gffc :=_deeg .Width ()*_deeg .Height ();_fcaa :=_bfbd .Width ()*_bfbd .Height ();
if _gffc !=_fcaa {return _gffc > _fcaa ;};if _deeg .Height ()!=_bfbd .Height (){return _deeg .Height ()> _bfbd .Height ();};return _acfb < _dffc ;});var _gagef []*wordBag ;_faef :=make (intSet );for _ggcgc :=0;_ggcgc < len (_ebga );_ggcgc ++{if _faef .has (_ggcgc ){continue ;
};_cdab :=_ebga [_ggcgc ];for _geegb :=_ggcgc +1;_geegb < len (_ebga );_geegb ++{if _faef .has (_ggcgc ){continue ;};_abeab :=_ebga [_geegb ];_gbf :=_cdab .PdfRectangle ;_gbf .Llx -=_cdab ._agg ;if _bfcgf (_gbf ,_abeab .PdfRectangle ){_cdab .absorb (_abeab );
_faef .add (_geegb );};};_gagef =append (_gagef ,_cdab );};if len (_ebga )!=len (_gagef )+len (_faef ){_ed .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_ebga ),len (_gagef ),len (_faef ));
};return _gagef ;};func (_dgdg *textTable )getRight ()paraList {_aagd :=make (paraList ,_dgdg ._gggac );for _edaga :=0;_edaga < _dgdg ._gggac ;_edaga ++{_aabbg :=_dgdg .get (_dgdg ._dedd -1,_edaga )._bdbeb ;if _aabbg .taken (){return nil ;};_aagd [_edaga ]=_aabbg ;
};for _cbadb :=0;_cbadb < _dgdg ._gggac -1;_cbadb ++{if _aagd [_cbadb ]._ggfa !=_aagd [_cbadb +1]{return nil ;};};return _aagd ;};func _ceed (_cfee []*textLine ,_edgg ,_adgf float64 )[]*textLine {var _ffeg []*textLine ;for _ ,_effd :=range _cfee {if _edgg ==-1{if _effd ._feff > _adgf {_ffeg =append (_ffeg ,_effd );
};}else {if _effd ._feff > _adgf &&_effd ._feff < _edgg {_ffeg =append (_ffeg ,_effd );};};};return _ffeg ;};func (_afbga *textTable )newTablePara ()*textPara {_cage :=_afbga .computeBbox ();_bcgc :=&textPara {PdfRectangle :_cage ,_bbgce :_cage ,_cgabf :_afbga };
if _ddbc {_ed .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_bcgc );};return _bcgc ;};const (RenderModeStroke RenderMode =1<<iota ;RenderModeFill ;RenderModeClip ;);
// List returns all the list objects detected on the page.
// It detects all the bullet point Lists from a given pdf page and builds a slice of bullet list objects.
// A given bullet list object has a tree structure.
// Each bullet point list is extracted with the text content it contains and all the sub lists found under it as children in the tree.
// The rest content of the pdf is ignored and only text in the bullet point lists are extracted.
// The list extraction is done in two ways.
// 1. If the document is tagged then the lists are extracted using the tags provided in the document.
// 2. Otherwise the bullet lists are extracted from the raw text using regex matching.
// By default the document tag is used if available.
// However this can be disabled using `DisableDocumentTags` in the `Options` object.
// Sometimes disabling document tags option might give a better bullet list extraction if the document was tagged incorrectly.
// options := &Options{
// DisableDocumentTags: false, // this means use document tag if available
// }
// ex, err := NewWithOptions(page, options)
// // handle error
// pageText, _, _, err := ex.ExtractPageText()
// // handle error
// lists := pageText.List()
// txt := lists.Text()
func (_deeee PageText )List ()lists {_bgef :=!_deeee ._fgcd ;_afgc :=_deeee .getParagraphs ();_fccc :=true ;if _deeee ._ecfa ==nil ||*_deeee ._ecfa ==nil {_fccc =false ;};_fadg :=_afgc .list ();if _fccc &&_bgef {_bfgec :=_bbdea (&_afgc );_bgdg :=&structTreeRoot {};
_bgdg .parseStructTreeRoot (*_deeee ._ecfa );if _bgdg ._acbec ==nil {_ed .Log .Debug ("\u004c\u0069\u0073\u0074\u003a\u0020\u0073t\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e'\u0074\u0020\u0068\u0061\u0076e\u0020\u0061\u006e\u0079\u0020\u0063\u006f\u006e\u0074e\u006e\u0074\u002c\u0020\u0075\u0073\u0069\u006e\u0067\u0020\u0074\u0065\u0078\u0074\u0020\u006d\u0061\u0074\u0063\u0068\u0069\u006e\u0067\u0020\u006d\u0065\u0074\u0068\u006f\u0064\u0020\u0069\u006e\u0073\u0074\u0065\u0061\u0064\u002e");
return _fadg ;};_fadg =_bgdg .buildList (_bfgec ,_deeee ._gcf );};return _fadg ;};var _ge =false ;func (_aeb *textObject )setTextRise (_dbf float64 ){if _aeb ==nil {return ;};_aeb ._fgfa ._baea =_dbf ;};func (_adg pathSection )bbox ()_cfb .PdfRectangle {_cfba :=_adg ._cafa [0]._edc [0];
_gaeea :=_cfb .PdfRectangle {Llx :_cfba .X ,Urx :_cfba .X ,Lly :_cfba .Y ,Ury :_cfba .Y };_edab :=func (_egea _cfa .Point ){if _egea .X < _gaeea .Llx {_gaeea .Llx =_egea .X ;}else if _egea .X > _gaeea .Urx {_gaeea .Urx =_egea .X ;};if _egea .Y < _gaeea .Lly {_gaeea .Lly =_egea .Y ;
}else if _egea .Y > _gaeea .Ury {_gaeea .Ury =_egea .Y ;};};for _ ,_dea :=range _adg ._cafa [0]._edc [1:]{_edab (_dea );};for _ ,_cad :=range _adg ._cafa [1:]{for _ ,_aagf :=range _cad ._edc {_edab (_aagf );};};return _gaeea ;};func (_adb *wordBag )text ()string {_ddfd :=_adb .allWords ();
_aaed :=make ([]string ,len (_ddfd ));for _gegc ,_bedb :=range _ddfd {_aaed [_gegc ]=_bedb ._ecgg ;};return _bd .Join (_aaed ,"\u0020");};
// String returns a human readable description of `s`.
func (_fcgdf intSet )String ()string {var _dgddg []int ;for _edbga :=range _fcgdf {if _fcgdf .has (_edbga ){_dgddg =append (_dgddg ,_edbga );};};_gf .Ints (_dgddg );return _bb .Sprintf ("\u0025\u002b\u0076",_dgddg );};
// String returns a description of `w`.
func (_edff *textWord )String ()string {return _bb .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_edff ._cefef ,_edff .PdfRectangle ,_edff ._feabg ,_edff ._ecgg );
};func (_bffb *wordBag )sort (){for _ ,_fafcc :=range _bffb ._agad {_gf .Slice (_fafcc ,func (_bgaf ,_ecab int )bool {return _aedd (_fafcc [_bgaf ],_fafcc [_ecab ])< 0});};};func _aegb (_faga string )bool {if _ba .RuneCountInString (_faga )< _bgff {return false ;
};_fdbb ,_gbdg :=_ba .DecodeLastRuneInString (_faga );if _gbdg <=0||!_cf .Is (_cf .Hyphen ,_fdbb ){return false ;};_fdbb ,_gbdg =_ba .DecodeLastRuneInString (_faga [:len (_faga )-_gbdg ]);return _gbdg > 0&&!_cf .IsSpace (_fdbb );};func (_gbggf *ruling )equals (_aegeb *ruling )bool {return _gbggf ._efdg ==_aegeb ._efdg &&_bdaf (_gbggf ._gged ,_aegeb ._gged )&&_bdaf (_gbggf ._abcc ,_aegeb ._abcc )&&_bdaf (_gbggf ._dfad ,_aegeb ._dfad );
};func _bcdg (_dffg _cfa .Point )*subpath {return &subpath {_edc :[]_cfa .Point {_dffg }}};func (_eefc *textTable )getDown ()paraList {_cfbdee :=make (paraList ,_eefc ._dedd );for _defc :=0;_defc < _eefc ._dedd ;_defc ++{_gcfee :=_eefc .get (_defc ,_eefc ._gggac -1)._ggfa ;
if _gcfee .taken (){return nil ;};_cfbdee [_defc ]=_gcfee ;};for _bdgdd :=0;_bdgdd < _eefc ._dedd -1;_bdgdd ++{if _cfbdee [_bdgdd ]._bdbeb !=_cfbdee [_bdgdd +1]{return nil ;};};return _cfbdee ;};func (_fceb *textPara )getListLines ()[]*textLine {var _dfcb []*textLine ;
_agga :=_dab (_fceb ._ecfaf );for _ ,_dcdb :=range _fceb ._ecfaf {_gcaf :=_dcdb ._eded [0]._ecgg [0];if _eabd (_gcaf ){_dfcb =append (_dfcb ,_dcdb );};};_dfcb =append (_dfcb ,_agga ...);return _dfcb ;};func (_ccb *wordBag )depthIndexes ()[]int {if len (_ccb ._agad )==0{return nil ;
};_dadb :=make ([]int ,len (_ccb ._agad ));_dcgg :=0;for _fbfb :=range _ccb ._agad {_dadb [_dcgg ]=_fbfb ;_dcgg ++;};_gf .Ints (_dadb );return _dadb ;};func _ffeee (_egab structElement )[]structElement {_affe :=[]structElement {};for _ ,_bbgb :=range _egab ._adae {for _ ,_bece :=range _bbgb ._adae {for _ ,_cfad :=range _bece ._adae {if _cfad ._aggf =="\u004c"{_affe =append (_affe ,_cfad );
};};};};return _affe ;};type gridTiling struct{_cfb .PdfRectangle ;_cgbc []float64 ;_feca []float64 ;_cabf map[float64 ]map[float64 ]gridTile ;};func _eae (_ee []Font ,_ff string )bool {for _ ,_gfb :=range _ee {if _gfb .FontName ==_ff {return true ;};};
return false ;};func (_gecc *textMark )bbox ()_cfb .PdfRectangle {return _gecc .PdfRectangle };func _bdcef (_dgfc []structElement ,_cdfd map[int ][]*textLine ,_fbaf _feb .PdfObject )[]*list {_ggbd :=[]*list {};for _ ,_cceg :=range _dgfc {_bdd :=_cceg ._adae ;
_decg :=int (_cceg ._cfbf );_eaab :=_cceg ._aggf ;_gfda :=[]*textLine {};_ccgff :=[]*list {};_bgfgc :=_cceg ._cccb ;_agee ,_febe :=(_bgfgc .(*_feb .PdfObjectReference ));if !_febe {_ed .Log .Debug ("\u0066\u0061\u0069l\u0065\u0064\u0020\u006f\u0074\u0020\u0063\u0061\u0073\u0074\u0020\u0074\u006f\u0020\u002a\u0063\u006f\u0072\u0065\u002e\u0050\u0064\u0066\u004f\u0062\u006a\u0065\u0063\u0074R\u0065\u0066\u0065\u0072\u0065\u006e\u0063\u0065");
};if _decg !=-1&&_agee !=nil {if _agdc ,_agcf :=_cdfd [_decg ];_agcf {if _cgdb ,_abad :=_fbaf .(*_feb .PdfIndirectObject );_abad {_gafd :=_cgdb .PdfObjectReference ;if _c .DeepEqual (*_agee ,_gafd ){_gfda =_agdc ;};};};};if _bdd !=nil {_ccgff =_bdcef (_bdd ,_cdfd ,_fbaf );
};_gaba :=_gagf (_gfda ,_eaab ,_ccgff );_ggbd =append (_ggbd ,_gaba );};return _ggbd ;};func (_edebad *shapesState )addPoint (_edfe ,_bgdc float64 ){_egbb :=_edebad .establishSubpath ();_afbc :=_edebad .devicePoint (_edfe ,_bgdc );if _egbb ==nil {_edebad ._fab =true ;
_edebad ._ggb =_afbc ;}else {_egbb .add (_afbc );};};func (_affb *textTable )emptyCompositeRow (_agbea int )bool {for _ffgdb :=0;_ffgdb < _affb ._dedd ;_ffgdb ++{if _ebbf ,_efca :=_affb ._gacgd [_fedd (_ffgdb ,_agbea )];_efca {if len (_ebbf .paraList )> 0{return false ;
};};};return true ;};func _bddbb (_ecfec map[int ][]float64 ){if len (_ecfec )<=1{return ;};_caacf :=_efec (_ecfec );if _ddbc {_ed .Log .Info ("\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076",_caacf );};var _abfd ,_bbeae int ;
for _abfd ,_bbeae =range _caacf {if _ecfec [_bbeae ]!=nil {break ;};};for _acec ,_abde :=range _caacf [_abfd :]{_ebae :=_ecfec [_abde ];if _ebae ==nil {continue ;};if _ddbc {_bb .Printf ("\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a",_abfd +_acec ,_bbeae ,_abde );
};_eccaa :=_ecfec [_abde ];if _eccaa [len (_eccaa )-1]> _ebae [0]{_eccaa [len (_eccaa )-1]=_ebae [0];_ecfec [_bbeae ]=_eccaa ;};_bbeae =_abde ;};};func (_acgf *textObject )setHorizScaling (_cgac float64 ){if _acgf ==nil {return ;};_acgf ._fgfa ._ddef =_cgac ;
};func _ddecc (_agab map[float64 ][]*textLine )[]float64 {_bbgg :=[]float64 {};for _ebbba :=range _agab {_bbgg =append (_bbgg ,_ebbba );};_gf .Float64s (_bbgg );return _bbgg ;};func (_cfef *shapesState )quadraticTo (_cfga ,_bdb ,_dga ,_fbce float64 ){if _bfcbef {_ed .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a");
};_cfef .addPoint (_dga ,_fbce );};func _aedd (_cdce ,_bgfgd bounded )float64 {return _cdce .bbox ().Llx -_bgfgd .bbox ().Llx };func (_cfgf *textObject )setFont (_ebfc string ,_bbdb float64 )error {if _cfgf ==nil {return nil ;};_cfgf ._fgfa ._ggc =_bbdb ;
_gge ,_cdfg :=_cfgf .getFont (_ebfc );if _cdfg !=nil {return _cdfg ;};_cfgf ._fgfa ._fdac =_gge ;return nil ;};var _ccbc =map[rulingKind ]string {_abbb :"\u006e\u006f\u006e\u0065",_ecbd :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_dcfc :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};
func (_fgcbb gridTiling )complete ()bool {for _ ,_eecba :=range _fgcbb ._cabf {for _ ,_fgbd :=range _eecba {if !_fgbd .complete (){return false ;};};};return true ;};func _gad (_bdbf ,_fgaf bounded )float64 {return _cdbd (_bdbf )-_cdbd (_fgaf )};func (_fgac rulingList )sort (){_gf .Slice (_fgac ,_fgac .comp )};
func (_eega *textWord )toTextMarks (_ggged *int )[]TextMark {var _afda []TextMark ;for _ ,_fagbe :=range _eega ._dece {_afda =_aeece (_afda ,_ggged ,_fagbe .ToTextMark ());};return _afda ;};
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
func (_cbd *Extractor )ExtractText ()(string ,error ){_cgf ,_ ,_ ,_gff :=_cbd .ExtractTextWithStats ();return _cgf ,_gff ;};
// PageText represents the layout of text on a device page.
type PageText struct{_bebf []*textMark ;_bggg string ;_dbgf []TextMark ;_aegg []TextTable ;_bfcb _cfb .PdfRectangle ;_bce []pathSection ;_caac []pathSection ;_ecfa *_feb .PdfObject ;_gcf _feb .PdfObject ;_fgcd bool ;_dbbd *_bc .ContentStreamOperations ;
};func (_gfcca paraList )findGridTables (_cabcb []gridTiling )[]*textTable {if _ddbc {_ed .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_gfcca ));
for _fcgg ,_defde :=range _gfcca {_bb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fcgg ,_defde );};};var _bdde []*textTable ;for _beagg ,_agcd :=range _cabcb {_fcbad ,_acedc :=_gfcca .findTableGrid (_agcd );if _fcbad !=nil {_fcbad .log (_bb .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_beagg ));
_bdde =append (_bdde ,_fcbad );_fcbad .markCells ();};for _adfb :=range _acedc {_adfb ._cacc =true ;};};if _ddbc {_ed .Log .Info ("\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s",len (_bdde ));
};return _bdde ;};func (_gedea *wordBag )pullWord (_gdg *textWord ,_degg int ,_ddfe map[int ]map[*textWord ]struct{}){_gedea .PdfRectangle =_bfad (_gedea .PdfRectangle ,_gdg .PdfRectangle );if _gdg ._feabg > _gedea ._agg {_gedea ._agg =_gdg ._feabg ;};
_gedea ._agad [_degg ]=append (_gedea ._agad [_degg ],_gdg );_ddfe [_degg ][_gdg ]=struct{}{};};
// String returns a string describing `pt`.
func (_fead PageText )String ()string {_eaae :=_bb .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_fead ._bebf ));_adag :=[]string {"\u002d"+_eaae };for _ ,_dfda :=range _fead ._bebf {_adag =append (_adag ,_dfda .String ());
};_adag =append (_adag ,"\u002b"+_eaae );return _bd .Join (_adag ,"\u000a");};func (_gafb *subpath )isQuadrilateral ()bool {if len (_gafb ._edc )< 4||len (_gafb ._edc )> 5{return false ;};if len (_gafb ._edc )==5{_daaed :=_gafb ._edc [0];_fdbfd :=_gafb ._edc [4];
if _daaed .X !=_fdbfd .X ||_daaed .Y !=_fdbfd .Y {return false ;};};return true ;};func (_aege *subpath )close (){if !_bebcb (_aege ._edc [0],_aege .last ()){_aege .add (_aege ._edc [0]);};_aege ._gdee =true ;_aege .removeDuplicates ();};var _ebeb string ="\u005e\u005b\u0061\u002d\u007a\u0041\u002dZ\u005d\u0028\u005c)\u007c\u005c\u002e)\u007c\u005e[\u005c\u0064\u005d\u002b\u0028\u005c)\u007c\\.\u0029\u007c\u005e\u005c\u0028\u005b\u0061\u002d\u007a\u0041\u002d\u005a\u005d\u005c\u0029\u007c\u005e\u005c\u0028\u005b\u005c\u0064\u005d\u002b\u005c\u0029";
type textMark struct{_cfb .PdfRectangle ;_dgfd int ;_gbag string ;_gaead string ;_eggf *_cfb .PdfFont ;_gegdd float64 ;_afff float64 ;_gfbab _cfa .Matrix ;_ecdc _cfa .Point ;_ecca _cfb .PdfRectangle ;_adcd _a .Color ;_abfg _a .Color ;_ecad _feb .PdfObject ;
_gedf []string ;Tw float64 ;Th float64 ;_bfdf int ;_dfac int ;};func (_acgd *wordBag )arrangeText ()*textPara {_acgd .sort ();if _cfgad {_acgd .removeDuplicates ();};var _eefb []*textLine ;for _ ,_eeebc :=range _acgd .depthIndexes (){for !_acgd .empty (_eeebc ){_gggd :=_acgd .firstReadingIndex (_eeebc );
_eaag :=_acgd .firstWord (_gggd );_cbda :=_cdaeg (_acgd ,_gggd );_gcfa :=_eaag ._feabg ;_ebcb :=_eaag ._cefef -_ffbb *_gcfa ;_gbgbc :=_eaag ._cefef +_ffbb *_gcfa ;_eaef :=_ebbc *_gcfa ;_abcd :=_ecbfg *_gcfa ;_ebgbf :for {var _dgeg *textWord ;_bfef :=0;
for _ ,_eceeg :=range _acgd .depthBand (_ebcb ,_gbgbc ){_bbed :=_acgd .highestWord (_eceeg ,_ebcb ,_gbgbc );if _bbed ==nil {continue ;};_efea :=_gfdc (_bbed ,_cbda ._eded [len (_cbda ._eded )-1]);if _efea < -_abcd {break _ebgbf ;};if _efea > _eaef {continue ;
};if _dgeg !=nil &&_aedd (_bbed ,_dgeg )>=0{continue ;};_dgeg =_bbed ;_bfef =_eceeg ;};if _dgeg ==nil {break ;};_cbda .pullWord (_acgd ,_dgeg ,_bfef );};_cbda .markWordBoundaries ();_eefb =append (_eefb ,_cbda );};};if len (_eefb )==0{return nil ;};_gf .Slice (_eefb ,func (_gadd ,_afab int )bool {return _fdfd (_eefb [_gadd ],_eefb [_afab ])< 0});
_efb :=_gfgfd (_acgd .PdfRectangle ,_eefb );if _bfbe {_ed .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_efb .String ());if _debg {for _eecd ,_cgcd :=range _efb ._ecfaf {_bb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_eecd ,_cgcd .String ());
if _faba {for _aeeb ,_fddcb :=range _cgcd ._eded {_bb .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_aeeb ,_fddcb .String ());for _feg ,_bgcg :=range _fddcb ._dece {_bb .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_feg ,_bgcg .String ());
};};};};};};return _efb ;};
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
type TextTable struct{W ,H int ;Cells [][]TableCell ;};
// Elements returns the TextMarks in `ma`.
func (_ggcg *TextMarkArray )Elements ()[]TextMark {return _ggcg ._fdda };func (_cca *textObject )moveTextSetLeading (_dcdd ,_fge float64 ){_cca ._fgfa ._bdf =-_fge ;_cca .moveLP (_dcdd ,_fge );};
// Font represents the font properties on a PDF page.
type Font struct{PdfFont *_cfb .PdfFont ;
// FontName represents Font Name from font properties.
FontName string ;
// FontType represents Font Subtype entry in the font dictionary inside page resources.
// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont.
FontType string ;
// ToUnicode is true if font provides a `ToUnicode` mapping.
ToUnicode bool ;
// IsCID is true if underlying font is a composite font.
// Composite font is represented by a font dictionary whose Subtype is `Type0`
IsCID bool ;
// IsSimple is true if font is simple font.
// A simple font is limited to only 8 bit (255) character codes.
IsSimple bool ;
// FontData represents the raw data of the embedded font file.
// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF).
// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor.
// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value.
FontData []byte ;
// FontFileName is a name representing the font. it has format:
// (Font Name) + (Font Type Extension), example: helvetica.ttf.
FontFileName string ;
// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor).
FontDescriptor *_cfb .PdfFontDescriptor ;};func (_efcb rulingList )blocks (_eaceb ,_gecg *ruling )bool {if _eaceb ._abcc > _gecg ._dfad ||_gecg ._abcc > _eaceb ._dfad {return false ;};_fbbf :=_e .Max (_eaceb ._abcc ,_gecg ._abcc );_ecgaa :=_e .Min (_eaceb ._dfad ,_gecg ._dfad );
if _eaceb ._gged > _gecg ._gged {_eaceb ,_gecg =_gecg ,_eaceb ;};for _ ,_eadda :=range _efcb {if _eaceb ._gged <=_eadda ._gged +_dffa &&_eadda ._gged <=_gecg ._gged +_dffa &&_eadda ._abcc <=_ecgaa &&_fbbf <=_eadda ._dfad {return true ;};};return false ;
};func (_fadge rulingList )splitSec ()[]rulingList {_gf .Slice (_fadge ,func (_adfa ,_dgcdf int )bool {_ggbf ,_dabb :=_fadge [_adfa ],_fadge [_dgcdf ];if _ggbf ._abcc !=_dabb ._abcc {return _ggbf ._abcc < _dabb ._abcc ;};return _ggbf ._dfad < _dabb ._dfad ;
});_ebcc :=make (map[*ruling ]struct{},len (_fadge ));_fbfd :=func (_bbdae *ruling )rulingList {_acbee :=rulingList {_bbdae };_ebcc [_bbdae ]=struct{}{};for _ ,_aage :=range _fadge {if _ ,_cgcfe :=_ebcc [_aage ];_cgcfe {continue ;};for _ ,_fdcef :=range _acbee {if _aage .alignsSec (_fdcef ){_acbee =append (_acbee ,_aage );
_ebcc [_aage ]=struct{}{};break ;};};};return _acbee ;};_dddgg :=[]rulingList {_fbfd (_fadge [0])};for _ ,_dacb :=range _fadge [1:]{if _ ,_cdfga :=_ebcc [_dacb ];_cdfga {continue ;};_dddgg =append (_dddgg ,_fbfd (_dacb ));};return _dddgg ;};func (_fcadfc paraList )findTextTables ()[]*textTable {var _fafa []*textTable ;
for _ ,_cgdd :=range _fcadfc {if _cgdd .taken ()||_cgdd .Width ()==0{continue ;};_gfbbe :=_cgdd .isAtom ();if _gfbbe ==nil {continue ;};_gfbbe .growTable ();if _gfbbe ._dedd *_gfbbe ._gggac < _gggg {continue ;};_gfbbe .markCells ();_gfbbe .log ("\u0067\u0072\u006fw\u006e");
_fafa =append (_fafa ,_gfbbe );};return _fafa ;};func (_abaa rulingList )sortStrict (){_gf .Slice (_abaa ,func (_eccgb ,_ddfbe int )bool {_cfgfb ,_dcfd :=_abaa [_eccgb ],_abaa [_ddfbe ];_faebc ,_ecec :=_cfgfb ._efdg ,_dcfd ._efdg ;if _faebc !=_ecec {return _faebc > _ecec ;
};_fegea ,_cdcec :=_cfgfb ._gged ,_dcfd ._gged ;if !_cbafb (_fegea -_cdcec ){return _fegea < _cdcec ;};_fegea ,_cdcec =_cfgfb ._abcc ,_dcfd ._abcc ;if _fegea !=_cdcec {return _fegea < _cdcec ;};return _cfgfb ._dfad < _dcfd ._dfad ;});};func (_baac *wordBag )empty (_ggee int )bool {_ ,_aee :=_baac ._agad [_ggee ];
return !_aee };
// String returns a string descibing `i`.
func (_adge gridTile )String ()string {_bbdeg :=func (_eccbb bool ,_fdfed string )string {if _eccbb {return _fdfed ;};return "\u005f";};return _bb .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_adge .PdfRectangle ,_bbdeg (_adge ._afce ,"\u004c"),_bbdeg (_adge ._gfbbcf ,"\u0052"),_bbdeg (_adge ._ggaf ,"\u0042"),_bbdeg (_adge ._cddec ,"\u0054"));
};func (_efgf *wordBag )removeWord (_ffga *textWord ,_dbae int ){_abfc :=_efgf ._agad [_dbae ];_abfc =_gfeca (_abfc ,_ffga );if len (_abfc )==0{delete (_efgf ._agad ,_dbae );}else {_efgf ._agad [_dbae ]=_abfc ;};};func _cadf (_gfecf string ,_adab []rulingList ){_ed .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_adab ),_gfecf );
for _gcfe ,_egeef :=range _adab {_bb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gcfe ,_egeef .String ());};};type imageExtractContext struct{_eca []ImageMark ;_fad int ;_dcc int ;_aaf int ;_edb map[*_feb .PdfObjectStream ]*cachedImage ;
_ead *ImageExtractOptions ;};
// String returns a description of `l`.
func (_ecag *textLine )String ()string {return _bb .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_ecag ._feff ,_ecag .PdfRectangle ,_ecag ._daag ,_ecag .text ());
};func _beab (_defb ,_dbbg _cfb .PdfRectangle )(_cfb .PdfRectangle ,bool ){if !_efgb (_defb ,_dbbg ){return _cfb .PdfRectangle {},false ;};return _cfb .PdfRectangle {Llx :_e .Max (_defb .Llx ,_dbbg .Llx ),Urx :_e .Min (_defb .Urx ,_dbbg .Urx ),Lly :_e .Max (_defb .Lly ,_dbbg .Lly ),Ury :_e .Min (_defb .Ury ,_dbbg .Ury )},true ;
};type fontEntry struct{_acbf *_cfb .PdfFont ;_gafa int64 ;};func (_gfeb rulingList )removeDuplicates ()rulingList {if len (_gfeb )==0{return nil ;};_gfeb .sort ();_adeg :=rulingList {_gfeb [0]};for _ ,_gdcce :=range _gfeb [1:]{if _gdcce .equals (_adeg [len (_adeg )-1]){continue ;
};_adeg =append (_adeg ,_gdcce );};return _adeg ;};func (_dcdbf compositeCell )hasLines (_acbfg []*textLine )bool {for _ddad ,_gcdf :=range _acbfg {_dffcd :=_efgb (_dcdbf .PdfRectangle ,_gcdf .PdfRectangle );if _ddbc {_bb .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a",_dffcd ,_ddad ,len (_acbfg ));
_bb .Printf ("\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a",_dcdbf );_bb .Printf ("\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a",_gcdf );};if _dffcd {return true ;
};};return false ;};func _badg (_eebd *textWord ,_fgffd float64 ,_dcfb ,_dag rulingList )*wordBag {_cbab :=_gaea (_eebd ._cefef );_eacd :=[]*textWord {_eebd };_cefc :=wordBag {_agad :map[int ][]*textWord {_cbab :_eacd },PdfRectangle :_eebd .PdfRectangle ,_agg :_eebd ._feabg ,_cdee :_fgffd ,_afbb :_dcfb ,_gacc :_dag };
return &_cefc ;};var (_efaf =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};
);func (_gccba *textTable )bbox ()_cfb .PdfRectangle {return _gccba .PdfRectangle };
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
func (_aebc PageText )ToText ()string {return _aebc .Text ()};func (_acgfd paraList )llyRange (_beag []int ,_dbde ,_aaad float64 )[]int {_ffag :=len (_acgfd );if _aaad < _acgfd [_beag [0]].Lly ||_dbde > _acgfd [_beag [_ffag -1]].Lly {return nil ;};_bafc :=_gf .Search (_ffag ,func (_dddg int )bool {return _acgfd [_beag [_dddg ]].Lly >=_dbde });
_gedgg :=_gf .Search (_ffag ,func (_cgdgb int )bool {return _acgfd [_beag [_cgdgb ]].Lly > _aaad });return _beag [_bafc :_gedgg ];};
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
func (_ddd *Extractor )ExtractTextWithStats ()(_cdd string ,_ebd int ,_gga int ,_cbdf error ){_def ,_ebd ,_gga ,_cbdf :=_ddd .ExtractPageText ();if _cbdf !=nil {return "",_ebd ,_gga ,_cbdf ;};return _def .Text (),_ebd ,_gga ,nil ;};func (_babdg *shapesState )stroke (_gdec *[]pathSection ){_bfab :=pathSection {_cafa :_babdg ._ccac ,Color :_babdg ._abac .getStrokeColor ()};
*_gdec =append (*_gdec ,_bfab );if _dgab {_bb .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_gdec ),_babdg ,_babdg ._abac .getStrokeColor (),_bfab .bbox ());
if _afecb {for _ffec ,_dfag :=range _babdg ._ccac {_bb .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_ffec ,_dfag );if _ffec ==10{break ;};};};};};func _bbdea (_baeaf *paraList )map[int ][]*textLine {_defdd :=map[int ][]*textLine {};for _ ,_babc :=range *_baeaf {for _ ,_bfdb :=range _babc ._ecfaf {if !_cdad (_bfdb ){_ed .Log .Debug ("g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e");
continue ;};_cbdd :=_bfdb ._eded [0]._dece [0]._bfdf ;_defdd [_cbdd ]=append (_defdd [_cbdd ],_bfdb );};if _babc ._cgabf !=nil {_fbga :=_babc ._cgabf ._dadef ;for _ ,_bbbc :=range _fbga {for _ ,_gdeeb :=range _bbbc ._ecfaf {if !_cdad (_gdeeb ){_ed .Log .Debug ("g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e");
continue ;};_caaa :=_gdeeb ._eded [0]._dece [0]._bfdf ;_defdd [_caaa ]=append (_defdd [_caaa ],_gdeeb );};};};};return _defdd ;};
// Extractor stores and offers functionality for extracting content from PDF pages.
type Extractor struct{_ede string ;_cd *_cfb .PdfPageResources ;_bf _cfb .PdfRectangle ;_ab *_cfb .PdfRectangle ;_af map[string ]fontEntry ;_fb map[string ]textResult ;_gd int64 ;_fbd int ;_fc *Options ;_cdf *_feb .PdfObject ;_bcd _feb .PdfObject ;};func (_dgba *stateStack )size ()int {return len (*_dgba )};
func (_beedc rulingList )snapToGroups ()rulingList {_fgfdc ,_eddcb :=_beedc .vertsHorzs ();if len (_fgfdc )> 0{_fgfdc =_fgfdc .snapToGroupsDirection ();};if len (_eddcb )> 0{_eddcb =_eddcb .snapToGroupsDirection ();};_deff :=append (_fgfdc ,_eddcb ...);
_deff .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");return _deff ;};func (_aebb *textTable )reduceTiling (_aeee gridTiling ,_ebgdd float64 )*textTable {_abeaa :=make ([]int ,0,_aebb ._gggac );_aagef :=make ([]int ,0,_aebb ._dedd );
_gdeb :=_aeee ._cgbc ;_bfedd :=_aeee ._feca ;for _gbcd :=0;_gbcd < _aebb ._gggac ;_gbcd ++{_adgc :=_gbcd > 0&&_e .Abs (_bfedd [_gbcd -1]-_bfedd [_gbcd ])< _ebgdd &&_aebb .emptyCompositeRow (_gbcd );if !_adgc {_abeaa =append (_abeaa ,_gbcd );};};for _gbcg :=0;
_gbcg < _aebb ._dedd ;_gbcg ++{_ffdff :=_gbcg < _aebb ._dedd -1&&_e .Abs (_gdeb [_gbcg +1]-_gdeb [_gbcg ])< _ebgdd &&_aebb .emptyCompositeColumn (_gbcg );if !_ffdff {_aagef =append (_aagef ,_gbcg );};};if len (_abeaa )==_aebb ._gggac &&len (_aagef )==_aebb ._dedd {return _aebb ;
};_ecbca :=textTable {_agde :_aebb ._agde ,_dedd :len (_aagef ),_gggac :len (_abeaa ),_gacgd :make (map[uint64 ]compositeCell ,len (_aagef )*len (_abeaa ))};if _ddbc {_ed .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_aebb ._dedd ,_aebb ._gggac ,len (_aagef ),len (_abeaa ));
_ed .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_aagef );_ed .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_abeaa );};for _fddeg ,_eagd :=range _abeaa {for _fbcef ,_acaf :=range _aagef {_edgdg ,_dgee :=_aebb .getComposite (_acaf ,_eagd );
if len (_edgdg )==0{continue ;};if _ddbc {_bb .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_fbcef ,_fddeg ,_acaf ,_eagd ,_adcgc (_edgdg .merge ().text (),50));};_ecbca .putComposite (_fbcef ,_fddeg ,_edgdg ,_dgee );
};};return &_ecbca ;};
// Tables returns the tables extracted from the page.
func (_begeb PageText )Tables ()[]TextTable {if _ddbc {_ed .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_begeb ._aegg ));};return _begeb ._aegg ;};func _fefe (_eaabb *list )[]*textLine {for _ ,_egeg :=range _eaabb ._bdbe {switch _egeg ._egc {case "\u004c\u0042\u006fd\u0079":if len (_egeg ._bcbgc )!=0{return _egeg ._bcbgc ;
};return _fefe (_egeg );case "\u0053\u0070\u0061\u006e":return _egeg ._bcbgc ;case "I\u006e\u006c\u0069\u006e\u0065\u0053\u0068\u0061\u0070\u0065":return _egeg ._bcbgc ;};};return nil ;};func (_aff *textObject )moveText (_bbd ,_gfga float64 ){_aff .moveLP (_bbd ,_gfga )};
func (_dbged paraList )topoOrder ()[]int {if _cdcd {_ed .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");};_cddg :=len (_dbged );_dcede :=make ([]bool ,_cddg );_cgdf :=make ([]int ,0,_cddg );_cgea :=_dbged .llyOrdering ();var _dca func (_egddb int );
_dca =func (_gdfgc int ){_dcede [_gdfgc ]=true ;for _fgfae :=0;_fgfae < _cddg ;_fgfae ++{if !_dcede [_fgfae ]{if _dbged .readBefore (_cgea ,_gdfgc ,_fgfae ){_dca (_fgfae );};};};_cgdf =append (_cgdf ,_gdfgc );};for _bgade :=0;_bgade < _cddg ;_bgade ++{if !_dcede [_bgade ]{_dca (_bgade );
};};return _agaf (_cgdf );};type list struct{_bcbgc []*textLine ;_egc string ;_bdbe []*list ;_cgee string ;};func _eacf (_afge []TextMark ,_ddaf *int )[]TextMark {_gaaec :=_afge [len (_afge )-1];_gfcdd :=[]rune (_gaaec .Text );if len (_gfcdd )==1{_afge =_afge [:len (_afge )-1];
_gbebe :=_afge [len (_afge )-1];*_ddaf =_gbebe .Offset +len (_gbebe .Text );}else {_bddb :=_gegdg (_gaaec .Text );*_ddaf +=len (_bddb )-len (_gaaec .Text );_gaaec .Text =_bddb ;};return _afge ;};var _degc =[]string {"\u2756","\u27a2","\u2713","\u2022","\uf0a7","\u25a1","\u2212","\u25a0","\u25aa","\u006f"};
// Len returns the number of TextMarks in `ma`.
func (_aabb *TextMarkArray )Len ()int {if _aabb ==nil {return 0;};return len (_aabb ._fdda );};func _efec (_gebe map[int ][]float64 )[]int {_fecag :=make ([]int ,len (_gebe ));_bbae :=0;for _agdb :=range _gebe {_fecag [_bbae ]=_agdb ;_bbae ++;};_gf .Ints (_fecag );
return _fecag ;};func _cdbd (_ccgf bounded )float64 {return -_ccgf .bbox ().Lly };
// String returns a description of `b`.
func (_gegb *wordBag )String ()string {var _gfff []string ;for _ ,_abdd :=range _gegb .depthIndexes (){_ddbf :=_gegb ._agad [_abdd ];for _ ,_dbbb :=range _ddbf {_gfff =append (_gfff ,_dbbb ._ecgg );};};return _bb .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_gegb .PdfRectangle ,_gegb ._agg ,len (_gfff ),_gfff );
};type markKind int ;func _ega (_agb *Extractor ,_cdg *_cfb .PdfPageResources ,_cgb _bc .GraphicsState ,_fbac *textState ,_cbfb *stateStack )*textObject {return &textObject {_fdgf :_agb ,_cgd :_cdg ,_dfgec :_cgb ,_cafe :_cbfb ,_fgfa :_fbac ,_bbde :_cfa .IdentityMatrix (),_gcgb :_cfa .IdentityMatrix ()};
};func (_cbgc *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_aace :=make (map[int ]map[*textWord ]struct{},len (_cbgc ._agad ));for _bcgg :=range _cbgc ._agad {_aace [_bcgg ]=make (map[*textWord ]struct{});};return _aace ;};func (_caeag *textWord )addDiacritic (_eccc string ){_efdc :=_caeag ._dece [len (_caeag ._dece )-1];
_efdc ._gbag +=_eccc ;_efdc ._gbag =_d .NFKC .String (_efdc ._gbag );};func (_cfeg *wordBag )depthBand (_fbg ,_eceg float64 )[]int {if len (_cfeg ._agad )==0{return nil ;};return _cfeg .depthRange (_cfeg .getDepthIdx (_fbg ),_cfeg .getDepthIdx (_eceg ));
};func (_cbbe rulingList )tidied (_gadb string )rulingList {_fgef :=_cbbe .removeDuplicates ();_fgef .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");_adgb :=_fgef .snapToGroups ();if _adgb ==nil {return nil ;};_adgb .sort ();if _dgab {_ed .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_gadb ,len (_cbbe ),len (_fgef ),len (_adgb ));
};_adgb .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _adgb ;};func _agaf (_ceebe []int )[]int {_fccde :=make ([]int ,len (_ceebe ));for _bagab ,_gdgde :=range _ceebe {_fccde [len (_ceebe )-1-_bagab ]=_gdgde ;};return _fccde ;};func (_cfdb *wordBag )applyRemovals (_bgab map[int ]map[*textWord ]struct{}){for _efee ,_gage :=range _bgab {if len (_gage )==0{continue ;
};_ecc :=_cfdb ._agad [_efee ];_cfbd :=len (_ecc )-len (_gage );if _cfbd ==0{delete (_cfdb ._agad ,_efee );continue ;};_caca :=make ([]*textWord ,_cfbd );_cdae :=0;for _ ,_aaac :=range _ecc {if _ ,_eba :=_gage [_aaac ];!_eba {_caca [_cdae ]=_aaac ;_cdae ++;
};};_cfdb ._agad [_efee ]=_caca ;};};func _dgaf (_facc *list )[]*list {var _acgc []*list ;for _ ,_affd :=range _facc ._bdbe {switch _affd ._egc {case "\u004c\u0049":_dgdb :=_fefe (_affd );_afcc :=_dgaf (_affd );_cdeeg :=_gagf (_dgdb ,"\u0062\u0075\u006c\u006c\u0065\u0074",_afcc );
_ddddd :=_gbeca (_dgdb ,"");_cdeeg ._cgee =_ddddd ;_acgc =append (_acgc ,_cdeeg );case "\u004c\u0042\u006fd\u0079":return _dgaf (_affd );case "\u004c":_ccab :=_dgaf (_affd );_acgc =append (_acgc ,_ccab ...);return _acgc ;};};return _acgc ;};
// NewWithOptions an Extractor instance for extracting content from the input PDF page with options.
func NewWithOptions (page *_cfb .PdfPage ,options *Options )(*Extractor ,error ){const _ac ="\u0065x\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077W\u0069\u0074\u0068\u004f\u0070\u0074\u0069\u006f\u006e\u0073";_gb ,_gee :=page .GetAllContentStreams ();
if _gee !=nil {return nil ,_gee ;};_eb ,_dgf :=page .GetStructTreeRoot ();if !_dgf {_ed .Log .Info ("T\u0068\u0065\u0020\u0070\u0064\u0066\u0020\u0064\u006f\u0063\u0075\u006d\u0065\u006e\u0074\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020\u0074\u0061\u0067g\u0065d\u002e\u0020\u0053\u0074r\u0075\u0063t\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e\u0027\u0074\u0020\u0065\u0078\u0069\u0073\u0074\u002e");
};_cb :=page .GetContainingPdfObject ();_da ,_gee :=page .GetMediaBox ();if _gee !=nil {return nil ,_bb .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_gee );
};_bff :=&Extractor {_ede :_gb ,_cd :page .Resources ,_bf :*_da ,_ab :page .CropBox ,_af :map[string ]fontEntry {},_fb :map[string ]textResult {},_fc :options ,_cdf :_eb ,_bcd :_cb };if _bff ._bf .Llx > _bff ._bf .Urx {_ed .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_bff ._bf );
_bff ._bf .Llx ,_bff ._bf .Urx =_bff ._bf .Urx ,_bff ._bf .Llx ;};if _bff ._bf .Lly > _bff ._bf .Ury {_ed .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_bff ._bf );
_bff ._bf .Lly ,_bff ._bf .Ury =_bff ._bf .Ury ,_bff ._bf .Lly ;};_fe .TrackUse (_ac );return _bff ,nil ;};func _gbfda (_cgec map[int ]intSet )[]int {_dbeae :=make ([]int ,0,len (_cgec ));for _agfcc :=range _cgec {_dbeae =append (_dbeae ,_agfcc );};_gf .Ints (_dbeae );
return _dbeae ;};func _aeece (_degb []TextMark ,_dgafb *int ,_dbebg TextMark )[]TextMark {_dbebg .Offset =*_dgafb ;_degb =append (_degb ,_dbebg );*_dgafb +=len (_dbebg .Text );return _degb ;};
// String returns a description of `v`.
func (_acda *ruling )String ()string {if _acda ._efdg ==_abbb {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_daeab ,_fagac :="\u0078","\u0079";if _acda ._efdg ==_ecbd {_daeab ,_fagac ="\u0079","\u0078";};_aaeda :="";if _acda ._bbgd !=0.0{_aaeda =_bb .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_acda ._bbgd );
};return _bb .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_acda ._efdg ,_daeab ,_acda ._gged ,_fagac ,_acda ._abcc ,_acda ._dfad ,_acda ._dfad -_acda ._abcc ,_acda ._edebb ,_acda .Color ,_aaeda );
};func (_cdddge *textPara )isAtom ()*textTable {_bdbbg :=_cdddge ;_aaadg :=_cdddge ._bdbeb ;_adgd :=_cdddge ._ggfa ;if _aaadg .taken ()||_adgd .taken (){return nil ;};_cdgbd :=_aaadg ._ggfa ;if _cdgbd .taken ()||_cdgbd !=_adgd ._bdbeb {return nil ;};return _eege (_bdbbg ,_aaadg ,_adgd ,_cdgbd );
};func _cbbda (_efba string )(string ,bool ){_befc :=[]rune (_efba );if len (_befc )!=1{return "",false ;};_beeec ,_fgbff :=_efaf [_befc [0]];return _beeec ,_fgbff ;};func (_eaea *wordBag )allWords ()[]*textWord {var _ced []*textWord ;for _ ,_bfd :=range _eaea ._agad {_ced =append (_ced ,_bfd ...);
};return _ced ;};func (_ddcdd rulingList )connections (_addd map[int ]intSet ,_fdcf int )intSet {_bedf :=make (intSet );_cadb :=make (intSet );var _cbgg func (int );_cbgg =func (_afbed int ){if !_cadb .has (_afbed ){_cadb .add (_afbed );for _gfeef :=range _ddcdd {if _addd [_gfeef ].has (_afbed ){_bedf .add (_gfeef );
};};for _baba :=range _ddcdd {if _bedf .has (_baba ){_cbgg (_baba );};};};};_cbgg (_fdcf );return _bedf ;};
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct{_fdda []TextMark };func (_eecb *wordBag )firstReadingIndex (_gdd int )int {_fdbe :=_eecb .firstWord (_gdd )._feabg ;_edfc :=float64 (_gdd +1)*_eacg ;_bbfe :=_edfc +_gdfg *_fdbe ;_gcae :=_gdd ;for _ ,_cefb :=range _eecb .depthBand (_edfc ,_bbfe ){if _aedd (_eecb .firstWord (_cefb ),_eecb .firstWord (_gcae ))< 0{_gcae =_cefb ;
};};return _gcae ;};
// New returns an Extractor instance for extracting content from the input PDF page.
func New (page *_cfb .PdfPage )(*Extractor ,error ){return NewWithOptions (page ,nil )};type gridTile struct{_cfb .PdfRectangle ;_cddec ,_afce ,_ggaf ,_gfbbcf bool ;};func (_gba *subpath )removeDuplicates (){if len (_gba ._edc )==0{return ;};_gdecd :=[]_cfa .Point {_gba ._edc [0]};
for _ ,_cfbcb :=range _gba ._edc [1:]{if !_bebcb (_cfbcb ,_gdecd [len (_gdecd )-1]){_gdecd =append (_gdecd ,_cfbcb );};};_gba ._edc =_gdecd ;};func (_feabc paraList )findTables (_bggfb []gridTiling )[]*textTable {_feabc .addNeighbours ();_gf .Slice (_feabc ,func (_cbde ,_ecfe int )bool {return _gacg (_feabc [_cbde ],_feabc [_ecfe ])< 0});
var _gbff []*textTable ;if _cefe {_bfddf :=_feabc .findGridTables (_bggfb );_gbff =append (_gbff ,_bfddf ...);};if _abfb {_becc :=_feabc .findTextTables ();_gbff =append (_gbff ,_becc ...);};return _gbff ;};func (_cebbf *textLine )toTextMarks (_fggbd *int )[]TextMark {var _deea []TextMark ;
for _ ,_fdcd :=range _cebbf ._eded {if _fdcd ._ggge {_deea =_dfbba (_deea ,_fggbd ,"\u0020");};_ffac :=_fdcd .toTextMarks (_fggbd );_deea =append (_deea ,_ffac ...);};return _deea ;};type paraList []*textPara ;
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
//
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
type TextMark struct{
// Text is the extracted text.
Text string ;
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
// BBox is the bounding box of the text.
BBox _cfb .PdfRectangle ;
// Font is the font the text was drawn with.
Font *_cfb .PdfFont ;
// FontSize is the font size the text was drawn with.
FontSize float64 ;
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
FillColor _a .Color ;
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
StrokeColor _a .Color ;
// Orientation is the text orientation
Orientation int ;
// DirectObject is the underlying PdfObject (Text Object) that represents the visible texts. This is introduced to get
// a simple access to the TextObject in case editing or replacment of some text is needed. E.g during redaction.
DirectObject _feb .PdfObject ;
// ObjString is a decoded string operand of a text-showing operator. It has the same value as `Text` attribute except
// when many glyphs are represented with the same Text Object that contains multiple length string operand in which case
// ObjString spans more than one character string that falls in different TextMark objects.
ObjString []string ;Tw float64 ;Th float64 ;Tc float64 ;Index int ;};func (_gafg *textObject )nextLine (){_gafg .moveLP (0,-_gafg ._fgfa ._bdf )};func (_cgfd *textPara )taken ()bool {return _cgfd ==nil ||_cgfd ._cacc };func (_fdf *imageExtractContext )extractInlineImage (_fea *_bc .ContentStreamInlineImage ,_gfg _bc .GraphicsState ,_afd *_cfb .PdfPageResources )error {_fg ,_deg :=_fea .ToImage (_afd );
if _deg !=nil {return _deg ;};_bac ,_deg :=_fea .GetColorSpace (_afd );if _deg !=nil {return _deg ;};if _bac ==nil {_bac =_cfb .NewPdfColorspaceDeviceGray ();};_cec ,_deg :=_bac .ImageToRGB (*_fg );if _deg !=nil {return _deg ;};_cba :=ImageMark {Image :&_cec ,Width :_gfg .CTM .ScalingFactorX (),Height :_gfg .CTM .ScalingFactorY (),Angle :_gfg .CTM .Angle ()};
_cba .X ,_cba .Y =_gfg .CTM .Translation ();_fdf ._eca =append (_fdf ._eca ,_cba );_fdf ._fad ++;return nil ;};func _fgbea (_cead []pathSection ){if _ffaf < 0.0{return ;};if _dgab {_ed .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_cead ));
};for _aeegg ,_egcg :=range _cead {for _dafc ,_dgdac :=range _egcg ._cafa {for _gacd ,_abfa :=range _dgdac ._edc {_dgdac ._edc [_gacd ]=_cfa .Point {X :_cgaba (_abfa .X ),Y :_cgaba (_abfa .Y )};if _dgab {_edbc :=_dgdac ._edc [_gacd ];if !_bebcb (_abfa ,_edbc ){_aead :=_cfa .Point {X :_edbc .X -_abfa .X ,Y :_edbc .Y -_abfa .Y };
_bb .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_aeegg ,_dafc ,_gacd ,_abfa ,_edbc ,_aead );};};};};};};type event struct{_abgeg float64 ;
_abcf bool ;_efbcf int ;};func _geag (_baeafg ,_effg float64 )bool {return _baeafg /_e .Max (_bgeb ,_effg )< _dfc };func (_acg *Extractor )extractPageText (_baa string ,_afg *_cfb .PdfPageResources ,_acd _cfa .Matrix ,_cfdd int )(*PageText ,int ,int ,error ){_ed .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_cfdd );
_ecg :=&PageText {_bfcb :_acg ._bf ,_ecfa :_acg ._cdf ,_gcf :_acg ._bcd };_ccc :=_ecd (_acg ._bf );var _gbg stateStack ;_abe :=_ega (_acg ,_afg ,_bc .GraphicsState {},&_ccc ,&_gbg );_gec :=shapesState {_bbgfd :_acd ,_fefd :_cfa .IdentityMatrix (),_abac :_abe };
var _fgf bool ;_fgcg :=-1;if _cfdd > _fgc {_acb :=_b .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");_ed .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_cfdd ,_acb );
return _ecg ,_ccc ._aecc ,_ccc ._egg ,_acb ;};_faf :=_bc .NewContentStreamParser (_baa );_fbdc ,_bbfg :=_faf .Parse ();if _bbfg !=nil {_ed .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bbfg );
return _ecg ,_ccc ._aecc ,_ccc ._egg ,_bbfg ;};_ecg ._dbbd =_fbdc ;_egd :=_bc .NewContentStreamProcessor (*_fbdc );_egd .AddHandler (_bc .HandlerConditionEnumAllOperands ,"",func (_defd *_bc .ContentStreamOperation ,_dcb _bc .GraphicsState ,_feab *_cfb .PdfPageResources )error {_afec :=_defd .Operand ;
if _eccb {_ed .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_defd );};switch _afec {case "\u0071":if _bfcbef {_ed .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_gec ._fefd );};_gbg .push (&_ccc );case "\u0051":if !_gbg .empty (){_ccc =*_gbg .pop ();
};_gec ._fefd =_dcb .CTM ;if _bfcbef {_ed .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_gec ._fefd );};case "\u0042\u0044\u0043":_dde ,_cgg :=_feb .GetDict (_defd .Params [1]);if !_cgg {_ed .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0042D\u0043\u0020\u006f\u0070\u003d\u0025\u0073 \u0047\u0065\u0074\u0044\u0069\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_defd );
return _bbfg ;};_dgc :=_dde .Get ("\u004d\u0043\u0049\u0044");if _dgc !=nil {_bcg ,_aba :=_feb .GetIntVal (_dgc );if !_aba {_ed .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0042\u0044C\u0020\u006f\u0070=\u0025\u0073\u002e\u0020\u0042\u0061\u0064\u0020\u006eum\u0065\u0072\u0069c\u0061\u006c \u006f\u0062\u006a\u0065\u0063\u0074.\u0020\u006f=\u0025\u0073",_defd ,_dgc );
};_fgcg =_bcg ;}else {_fgcg =-1;};case "\u0045\u004d\u0043":_fgcg =-1;case "\u0042\u0054":if _fgf {_ed .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
_ecg ._bebf =append (_ecg ._bebf ,_abe ._geca ...);};_fgf =true ;_aafa :=_dcb ;_aafa .CTM =_acd .Mult (_aafa .CTM );_abe =_ega (_acg ,_feab ,_aafa ,&_ccc ,&_gbg );_gec ._abac =_abe ;case "\u0045\u0054":if !_fgf {_ed .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
};_fgf =false ;_ecg ._bebf =append (_ecg ._bebf ,_abe ._geca ...);_abe .reset ();case "\u0054\u002a":_abe .nextLine ();case "\u0054\u0064":if _cfbb ,_ebf :=_abe .checkOp (_defd ,2,true );!_cfbb {_ed .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ebf );
return _ebf ;};_eeb ,_acab ,_fda :=_gaec (_defd .Params );if _fda !=nil {return _fda ;};_abe .moveText (_eeb ,_acab );case "\u0054\u0044":if _gbb ,_ggg :=_abe .checkOp (_defd ,2,true );!_gbb {_ed .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ggg );
return _ggg ;};_cae ,_fdcg ,_cga :=_gaec (_defd .Params );if _cga !=nil {_ed .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cga );return _cga ;};_abe .moveTextSetLeading (_cae ,_fdcg );case "\u0054\u006a":if _gaee ,_ffd :=_abe .checkOp (_defd ,1,true );
!_gaee {_ed .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_defd ,_ffd );return _ffd ;};_dbg :=_feb .TraceToDirectObject (_defd .Params [0]);_gcg ,_ege :=_feb .GetStringBytes (_dbg );
if !_ege {_ed .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_defd );return _feb .ErrTypeError ;
};return _abe .showText (_dbg ,_gcg ,_fgcg );case "\u0054\u004a":if _fdcc ,_dae :=_abe .checkOp (_defd ,1,true );!_fdcc {_ed .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dae );return _dae ;};_bgg ,_abec :=_feb .GetArray (_defd .Params [0]);
if !_abec {_ed .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_defd );return _bbfg ;};return _abe .showTextAdjusted (_bgg ,_fgcg );
case "\u0027":if _gfbb ,_cccc :=_abe .checkOp (_defd ,1,true );!_gfbb {_ed .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cccc );return _cccc ;};_dbd :=_feb .TraceToDirectObject (_defd .Params [0]);_bebg ,_bcb :=_feb .GetStringBytes (_dbd );
if !_bcb {_ed .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_defd );return _feb .ErrTypeError ;};_abe .nextLine ();return _abe .showText (_dbd ,_bebg ,_fgcg );
case "\u0022":if _bda ,_acc :=_abe .checkOp (_defd ,3,true );!_bda {_ed .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_acc );return _acc ;};_egb ,_bfc ,_cdff :=_gaec (_defd .Params [:2]);if _cdff !=nil {return _cdff ;
};_gdb :=_feb .TraceToDirectObject (_defd .Params [2]);_bdg ,_aac :=_feb .GetStringBytes (_gdb );if !_aac {_ed .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_defd );
return _feb .ErrTypeError ;};_abe .setCharSpacing (_egb );_abe .setWordSpacing (_bfc );_abe .nextLine ();return _abe .showText (_gdb ,_bdg ,_fgcg );case "\u0054\u004c":_gcb ,_gde :=_geeg (_defd );if _gde !=nil {_ed .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gde );
return _gde ;};_abe .setTextLeading (_gcb );case "\u0054\u0063":_eaa ,_eeea :=_geeg (_defd );if _eeea !=nil {_ed .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_eeea );return _eeea ;};_abe .setCharSpacing (_eaa );
case "\u0054\u0066":if _edeba ,_fae :=_abe .checkOp (_defd ,2,true );!_edeba {_ed .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fae );return _fae ;};_ecee ,_bgf :=_feb .GetNameVal (_defd .Params [0]);
if !_bgf {_ed .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_defd );return _feb .ErrTypeError ;};_ffe ,_bege :=_feb .GetNumberAsFloat (_defd .Params [1]);
if !_bgf {_ed .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_defd ,_bege );
return _bege ;};_bege =_abe .setFont (_ecee ,_ffe );_abe ._acde =_bdc .Is (_bege ,_feb .ErrNotSupported );if _bege !=nil &&!_abe ._acde {return _bege ;};case "\u0054\u006d":if _bgca ,_bbc :=_abe .checkOp (_defd ,6,true );!_bgca {_ed .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bbc );
return _bbc ;};_abg ,_ddf :=_feb .GetNumbersAsFloat (_defd .Params );if _ddf !=nil {_ed .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ddf );return _ddf ;};_abe .setTextMatrix (_abg );case "\u0054\u0072":if _gfa ,_fdg :=_abe .checkOp (_defd ,1,true );
!_gfa {_ed .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fdg );return _fdg ;};_ecgf ,_abea :=_feb .GetIntVal (_defd .Params [0]);if !_abea {_ed .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_defd );
return _feb .ErrTypeError ;};_abe .setTextRenderMode (_ecgf );case "\u0054\u0073":if _fec ,_ebdf :=_abe .checkOp (_defd ,1,true );!_fec {_ed .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ebdf );return _ebdf ;
};_gdcb ,_ccdc :=_feb .GetNumberAsFloat (_defd .Params [0]);if _ccdc !=nil {_ed .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ccdc );return _ccdc ;};_abe .setTextRise (_gdcb );case "\u0054\u0077":if _ag ,_dgca :=_abe .checkOp (_defd ,1,true );
!_ag {_ed .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dgca );return _dgca ;};_fdb ,_ebdfa :=_feb .GetNumberAsFloat (_defd .Params [0]);if _ebdfa !=nil {_ed .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ebdfa );
return _ebdfa ;};_abe .setWordSpacing (_fdb );case "\u0054\u007a":if _cfbc ,_afga :=_abe .checkOp (_defd ,1,true );!_cfbc {_ed .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_afga );return _afga ;};_dbea ,_deb :=_feb .GetNumberAsFloat (_defd .Params [0]);
if _deb !=nil {_ed .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_deb );return _deb ;};_abe .setHorizScaling (_dbea );case "\u0063\u006d":_gec ._fefd =_dcb .CTM ;if _gec ._fefd .Singular (){_dcd :=_cfa .IdentityMatrix ().Translate (_gec ._fefd .Translation ());
_ed .Log .Debug ("S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s",_gec ._fefd ,_dcd );_gec ._fefd =_dcd ;};if _bfcbef {_ed .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_gec ._fefd );};case "\u006d":if len (_defd .Params )!=2{_ed .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_bgd );
return nil ;};_gefe ,_cfc :=_feb .GetNumbersAsFloat (_defd .Params );if _cfc !=nil {return _cfc ;};_gec .moveTo (_gefe [0],_gefe [1]);case "\u006c":if len (_defd .Params )!=2{_ed .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_bgd );
return nil ;};_gce ,_agc :=_feb .GetNumbersAsFloat (_defd .Params );if _agc !=nil {return _agc ;};_gec .lineTo (_gce [0],_gce [1]);case "\u0063":if len (_defd .Params )!=6{return _bgd ;};_afecd ,_edbb :=_feb .GetNumbersAsFloat (_defd .Params );if _edbb !=nil {return _edbb ;
};_ed .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_afecd );_gec .cubicTo (_afecd [0],_afecd [1],_afecd [2],_afecd [3],_afecd [4],_afecd [5]);case "\u0076","\u0079":if len (_defd .Params )!=4{return _bgd ;
};_dac ,_adc :=_feb .GetNumbersAsFloat (_defd .Params );if _adc !=nil {return _adc ;};_ed .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_dac );_gec .quadraticTo (_dac [0],_dac [1],_dac [2],_dac [3]);
case "\u0068":_gec .closePath ();case "\u0072\u0065":if len (_defd .Params )!=4{return _bgd ;};_abdc ,_gfc :=_feb .GetNumbersAsFloat (_defd .Params );if _gfc !=nil {return _gfc ;};_gec .drawRectangle (_abdc [0],_abdc [1],_abdc [2],_abdc [3]);_gec .closePath ();
case "\u0053":_gec .stroke (&_ecg ._bce );_gec .clearPath ();case "\u0073":_gec .closePath ();_gec .stroke (&_ecg ._bce );_gec .clearPath ();case "\u0046":_gec .fill (&_ecg ._caac );_gec .clearPath ();case "\u0066","\u0066\u002a":_gec .closePath ();_gec .fill (&_ecg ._caac );
_gec .clearPath ();case "\u0042","\u0042\u002a":_gec .fill (&_ecg ._caac );_gec .stroke (&_ecg ._bce );_gec .clearPath ();case "\u0062","\u0062\u002a":_gec .closePath ();_gec .fill (&_ecg ._caac );_gec .stroke (&_ecg ._bce );_gec .clearPath ();case "\u006e":_gec .clearPath ();
case "\u0044\u006f":if len (_defd .Params )==0{_ed .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_defd .Params );
return _feb .ErrRangeError ;};_afa ,_daf :=_feb .GetName (_defd .Params [0]);if !_daf {_ed .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_defd .Params [0]);
return _feb .ErrTypeError ;};_ ,_ggd :=_feab .GetXObjectByName (*_afa );if _ggd !=_cfb .XObjectTypeForm {break ;};_begc ,_daf :=_acg ._fb [_afa .String ()];if !_daf {_gag ,_gfe :=_feab .GetXObjectFormByName (*_afa );if _gfe !=nil {_ed .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_gfe );
return _gfe ;};_bea ,_gfe :=_gag .GetContentStream ();if _gfe !=nil {_ed .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_gfe );return _gfe ;};_gfce :=_gag .Resources ;if _gfce ==nil {_gfce =_feab ;};_dgb :=_dcb .CTM ;if _caag ,_ccdce :=_feb .GetArray (_gag .Matrix );
_ccdce {_beed ,_gaa :=_caag .GetAsFloat64Slice ();if _gaa !=nil {return _gaa ;};if len (_beed )!=6{return _bgd ;};_fdgb :=_cfa .NewMatrix (_beed [0],_beed [1],_beed [2],_beed [3],_beed [4],_beed [5]);_dgb =_dcb .CTM .Mult (_fdgb );};_age ,_fada ,_cdeb ,_gfe :=_acg .extractPageText (string (_bea ),_gfce ,_acd .Mult (_dgb ),_cfdd +1);
if _gfe !=nil {_ed .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_gfe );return _gfe ;};_begc =textResult {*_age ,_fada ,_cdeb };_acg ._fb [_afa .String ()]=_begc ;};_gec ._fefd =_dcb .CTM ;if _bfcbef {_ed .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_gec ._fefd );
};_ecg ._bebf =append (_ecg ._bebf ,_begc ._fdce ._bebf ...);_ecg ._bce =append (_ecg ._bce ,_begc ._fdce ._bce ...);_ecg ._caac =append (_ecg ._caac ,_begc ._fdce ._caac ...);_ccc ._aecc +=_begc ._ebg ;_ccc ._egg +=_begc ._gcdg ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_abe ._dfgec .ColorspaceNonStroking =_dcb .ColorspaceNonStroking ;
_abe ._dfgec .ColorNonStroking =_dcb .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_abe ._dfgec .ColorspaceStroking =_dcb .ColorspaceStroking ;_abe ._dfgec .ColorStroking =_dcb .ColorStroking ;
};return nil ;});_bbfg =_egd .Process (_afg );return _ecg ,_ccc ._aecc ,_ccc ._egg ,_bbfg ;};func (_ceggc *textTable )put (_dgdab ,_eecbd int ,_cfdbc *textPara ){_ceggc ._dadef [_fedd (_dgdab ,_eecbd )]=_cfdbc ;};func _eege (_eaec ,_cbgf ,_fefc ,_afbcg *textPara )*textTable {_adgdg :=&textTable {_dedd :2,_gggac :2,_dadef :make (map[uint64 ]*textPara ,4)};
_adgdg .put (0,0,_eaec );_adgdg .put (1,0,_cbgf );_adgdg .put (0,1,_fefc );_adgdg .put (1,1,_afbcg );return _adgdg ;};func (_ffcc *shapesState )cubicTo (_cgbd ,_ddb ,_bbga ,_bdeb ,_geg ,_cbdg float64 ){if _bfcbef {_ed .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a");
};_ffcc .addPoint (_geg ,_cbdg );};type wordBag struct{_cfb .PdfRectangle ;_agg float64 ;_afbb ,_gacc rulingList ;_cdee float64 ;_agad map[int ][]*textWord ;};const (_egddf =1.0e-6;_ffaf =1.0e-4;_gbgg =10;_eacg =6;_ffbb =0.5;_ebbe =0.12;_daae =0.19;_acae =0.04;
_aeed =0.04;_aeca =1.0;_cdcea =0.04;_ceeb =0.4;_edcc =0.7;_abdb =1.0;_babf =0.1;_ebbc =1.4;_ecbfg =0.46;_gbdd =0.02;_bgee =0.2;_gfec =0.5;_bgff =4;_gdfg =4.0;_gggg =6;_edg =0.3;_eceb =0.01;_faec =0.02;_gabg =2;_edca =2;_gdgf =500;_edd =4.0;_agff =4.0;_dfc =0.05;
_bgeb =0.1;_dbgbb =2.0;_dffa =2.0;_dafb =1.5;_gebbb =3.0;_ddec =0.25;);func (_eccae intSet )has (_bdfc int )bool {_ ,_agfd :=_eccae [_bdfc ];return _agfd };func (_ceda paraList )eventNeighbours (_gecae []event )map[*textPara ][]int {_gf .Slice (_gecae ,func (_aece ,_gfbg int )bool {_bcbbc ,_acaee :=_gecae [_aece ],_gecae [_gfbg ];
_bdeg ,_ccef :=_bcbbc ._abgeg ,_acaee ._abgeg ;if _bdeg !=_ccef {return _bdeg < _ccef ;};if _bcbbc ._abcf !=_acaee ._abcf {return _bcbbc ._abcf ;};return _aece < _gfbg ;});_caabe :=make (map[int ]intSet );_gcaa :=make (intSet );for _ ,_bcbga :=range _gecae {if _bcbga ._abcf {_caabe [_bcbga ._efbcf ]=make (intSet );
for _daega :=range _gcaa {if _daega !=_bcbga ._efbcf {_caabe [_bcbga ._efbcf ].add (_daega );_caabe [_daega ].add (_bcbga ._efbcf );};};_gcaa .add (_bcbga ._efbcf );}else {_gcaa .del (_bcbga ._efbcf );};};_fgaed :=map[*textPara ][]int {};for _aaadd ,_ffab :=range _caabe {_gbef :=_ceda [_aaadd ];
if len (_ffab )==0{_fgaed [_gbef ]=nil ;continue ;};_bcbgd :=make ([]int ,len (_ffab ));_gbda :=0;for _agabd :=range _ffab {_bcbgd [_gbda ]=_agabd ;_gbda ++;};_fgaed [_gbef ]=_bcbgd ;};return _fgaed ;};func (_ccdf *textObject )setWordSpacing (_dgd float64 ){if _ccdf ==nil {return ;
};_ccdf ._fgfa ._dfea =_dgd ;};type structElement struct{_aggf string ;_adae []structElement ;_cfbf int64 ;_cccb _feb .PdfObject ;};func _gdbe (_eeab float64 ,_gedg int )int {if _gedg ==0{_gedg =1;};_ecebg :=float64 (_gedg );return int (_e .Round (_eeab /_ecebg )*_ecebg );
};type compositeCell struct{_cfb .PdfRectangle ;paraList ;};func (_gaag *textWord )absorb (_ddede *textWord ){_gaag .PdfRectangle =_bfad (_gaag .PdfRectangle ,_ddede .PdfRectangle );_gaag ._dece =append (_gaag ._dece ,_ddede ._dece ...);};
// String returns a human readable description of `ss`.
func (_dggf *shapesState )String ()string {return _bb .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_dggf ._ccac ),_dggf ._fab );};func (_gac *imageExtractContext )extractContentStreamImages (_daa string ,_gae *_cfb .PdfPageResources )error {_edeb :=_bc .NewContentStreamParser (_daa );
_db ,_cfg :=_edeb .Parse ();if _cfg !=nil {return _cfg ;};if _gac ._edb ==nil {_gac ._edb =map[*_feb .PdfObjectStream ]*cachedImage {};};if _gac ._ead ==nil {_gac ._ead =&ImageExtractOptions {};};_fadd :=_bc .NewContentStreamProcessor (*_db );_fadd .AddHandler (_bc .HandlerConditionEnumAllOperands ,"",_gac .processOperand );
return _fadd .Process (_gae );};func (_agea *ruling )intersects (_ggfdg *ruling )bool {_cecd :=(_agea ._efdg ==_dcfc &&_ggfdg ._efdg ==_ecbd )||(_ggfdg ._efdg ==_dcfc &&_agea ._efdg ==_ecbd );_ffdg :=func (_acdfg ,_fbggb *ruling )bool {return _acdfg ._abcc -_dbgbb <=_fbggb ._gged &&_fbggb ._gged <=_acdfg ._dfad +_dbgbb ;
};_dgad :=_ffdg (_agea ,_ggfdg );_cdec :=_ffdg (_ggfdg ,_agea );if _dgab {_bb .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_cecd ,_dgad ,_cdec ,_cecd &&_dgad &&_cdec ,_agea ,_ggfdg );
};return _cecd &&_dgad &&_cdec ;};func (_dbfg paraList )lines ()[]*textLine {var _cfbec []*textLine ;for _ ,_caba :=range _dbfg {_cfbec =append (_cfbec ,_caba ._ecfaf ...);};return _cfbec ;};func (_bggc *wordBag )maxDepth ()float64 {return _bggc ._cdee -_bggc .Lly };
func (_dgbe *textLine )markWordBoundaries (){_edfb :=_gbdd *_dgbe ._daag ;for _adbc ,_dfbg :=range _dgbe ._eded [1:]{if _gfdc (_dfbg ,_dgbe ._eded [_adbc ])>=_edfb {_dfbg ._ggge =true ;};};};func _cfdda (_fagf float64 )bool {return _e .Abs (_fagf )< _dffa };
const (_gfgf =true ;_cfgad =true ;_fgdf =true ;_bca =false ;_aeec =false ;_cgfa =6;_agag =3.0;_eccf =200;_cefe =true ;_abfb =true ;_fgbf =true ;_dfgd =true ;_ecge =false ;);func (_cdac lineRuling )xMean ()float64 {return 0.5*(_cdac ._egad .X +_cdac ._bggga .X )};
func (_ffdf *subpath )add (_gefed ..._cfa .Point ){_ffdf ._edc =append (_ffdf ._edc ,_gefed ...)};func (_cdb *textObject )setTextMatrix (_cbg []float64 ){if len (_cbg )!=6{_ed .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_cbg ));
return ;};_faa ,_bga ,_fdd ,_ebbg ,_fgff ,_cge :=_cbg [0],_cbg [1],_cbg [2],_cbg [3],_cbg [4],_cbg [5];_cdb ._bbde =_cfa .NewMatrix (_faa ,_bga ,_fdd ,_ebbg ,_fgff ,_cge );_cdb ._gcgb =_cdb ._bbde ;};func _fde (_efcc _cfa .Point )_cfa .Matrix {return _cfa .TranslationMatrix (_efcc .X ,_efcc .Y )};
func _ebgdb (_efeaf ,_ffdcg _cfa .Point )bool {_cgbfb :=_e .Abs (_efeaf .X -_ffdcg .X );_edea :=_e .Abs (_efeaf .Y -_ffdcg .Y );return _geag (_edea ,_cgbfb );};func (_gfcad *textObject )newTextMark (_afbe string ,_dbce _cfa .Matrix ,_cdgc _cfa .Point ,_bfecf float64 ,_ggf *_cfb .PdfFont ,_abbc float64 ,_fefec ,_cag _a .Color ,_dbge _feb .PdfObject ,_edgd []string ,_edbg int ,_bbea int )(textMark ,bool ){_ebbec :=_dbce .Angle ();
_gegcd :=_gdbe (_ebbec ,_gbgg );var _cgff float64 ;if _gegcd %180!=90{_cgff =_dbce .ScalingFactorY ();}else {_cgff =_dbce .ScalingFactorX ();};_dge :=_fgee (_dbce );_ggdf :=_cfb .PdfRectangle {Llx :_dge .X ,Lly :_dge .Y ,Urx :_cdgc .X ,Ury :_cdgc .Y };
switch _gegcd %360{case 90:_ggdf .Urx -=_cgff ;case 180:_ggdf .Ury -=_cgff ;case 270:_ggdf .Urx +=_cgff ;case 0:_ggdf .Ury +=_cgff ;default:_gegcd =0;_ggdf .Ury +=_cgff ;};if _ggdf .Llx > _ggdf .Urx {_ggdf .Llx ,_ggdf .Urx =_ggdf .Urx ,_ggdf .Llx ;};if _ggdf .Lly > _ggdf .Ury {_ggdf .Lly ,_ggdf .Ury =_ggdf .Ury ,_ggdf .Lly ;
};_bfgc :=true ;if _gfcad ._fdgf ._bf .Width ()> 0{_ddfb ,_eea :=_beab (_ggdf ,_gfcad ._fdgf ._bf );if !_eea {_bfgc =false ;_ed .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_ggdf ,_gfcad ._fdgf ._bf ,_afbe );
};_ggdf =_ddfb ;};_fbdg :=_ggdf ;_dafd :=_gfcad ._fdgf ._bf ;switch _gegcd %360{case 90:_dafd .Urx ,_dafd .Ury =_dafd .Ury ,_dafd .Urx ;_fbdg =_cfb .PdfRectangle {Llx :_dafd .Urx -_ggdf .Ury ,Urx :_dafd .Urx -_ggdf .Lly ,Lly :_ggdf .Llx ,Ury :_ggdf .Urx };
case 180:_fbdg =_cfb .PdfRectangle {Llx :_dafd .Urx -_ggdf .Llx ,Urx :_dafd .Urx -_ggdf .Urx ,Lly :_dafd .Ury -_ggdf .Lly ,Ury :_dafd .Ury -_ggdf .Ury };case 270:_dafd .Urx ,_dafd .Ury =_dafd .Ury ,_dafd .Urx ;_fbdg =_cfb .PdfRectangle {Llx :_ggdf .Ury ,Urx :_ggdf .Lly ,Lly :_dafd .Ury -_ggdf .Llx ,Ury :_dafd .Ury -_ggdf .Urx };
};if _fbdg .Llx > _fbdg .Urx {_fbdg .Llx ,_fbdg .Urx =_fbdg .Urx ,_fbdg .Llx ;};if _fbdg .Lly > _fbdg .Ury {_fbdg .Lly ,_fbdg .Ury =_fbdg .Ury ,_fbdg .Lly ;};_aaceb :=textMark {_gbag :_afbe ,PdfRectangle :_fbdg ,_ecca :_ggdf ,_eggf :_ggf ,_gegdd :_cgff ,_afff :_abbc ,_gfbab :_dbce ,_ecdc :_cdgc ,_dgfd :_gegcd ,_adcd :_fefec ,_abfg :_cag ,_ecad :_dbge ,_gedf :_edgd ,Th :_gfcad ._fgfa ._ddef ,Tw :_gfcad ._fgfa ._dfea ,_bfdf :_bbea ,_dfac :_edbg };
if _ebac {_ed .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_dge ,_cdgc ,_aaceb .String ());};return _aaceb ,_bfgc ;
};type textTable struct{_cfb .PdfRectangle ;_dedd ,_gggac int ;_agde bool ;_dadef map[uint64 ]*textPara ;_gacgd map[uint64 ]compositeCell ;};func _cdaeg (_gfac *wordBag ,_gdbb int )*textLine {_gfbbcc :=_gfac .firstWord (_gdbb );_gbbg :=textLine {PdfRectangle :_gfbbcc .PdfRectangle ,_daag :_gfbbcc ._feabg ,_feff :_gfbbcc ._cefef };
_gbbg .pullWord (_gfac ,_gfbbcc ,_gdbb );return &_gbbg ;};type bounded interface{bbox ()_cfb .PdfRectangle };func _gagf (_gdegc []*textLine ,_ebbdf string ,_bbgab []*list )*list {return &list {_bcbgc :_gdegc ,_egc :_ebbdf ,_bdbe :_bbgab };};func (_ceg *wordBag )highestWord (_cded int ,_ebea ,_bacg float64 )*textWord {for _ ,_fdcgb :=range _ceg ._agad [_cded ]{if _ebea <=_fdcgb ._cefef &&_fdcgb ._cefef <=_bacg {return _fdcgb ;
};};return nil ;};func (_cgda paraList )sortReadingOrder (){_ed .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_cgda ));
if len (_cgda )<=1{return ;};_cgda .computeEBBoxes ();_gf .Slice (_cgda ,func (_aafef ,_ddfde int )bool {return _fdfd (_cgda [_aafef ],_cgda [_ddfde ])<=0});_daff :=_cgda .topoOrder ();_cgda .reorder (_daff );};func (_agbd *compositeCell )updateBBox (){for _ ,_fdaa :=range _agbd .paraList {_agbd .PdfRectangle =_bfad (_agbd .PdfRectangle ,_fdaa .PdfRectangle );
};};var (_dg =_b .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072");_bgd =_b .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072"););func (_bcfba *textTable )getComposite (_bgbbc ,_decgd int )(paraList ,_cfb .PdfRectangle ){_ffdb ,_abee :=_bcfba ._gacgd [_fedd (_bgbbc ,_decgd )];
if _ddbc {_bb .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a",_bgbbc ,_decgd ,_ffdb .String ());};if !_abee {return nil ,_cfb .PdfRectangle {};
};return _ffdb .parasBBox ();};func (_egdg *textTable )compositeColCorridors ()map[int ][]float64 {_ggec :=make (map[int ][]float64 ,_egdg ._dedd );if _ddbc {_ed .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_egdg ._dedd );
};for _eaaf :=0;_eaaf < _egdg ._dedd ;_eaaf ++{_ggec [_eaaf ]=nil ;};return _ggec ;};const (_abbb rulingKind =iota ;_ecbd ;_dcfc ;);
// TableCell is a cell in a TextTable.
type TableCell struct{
// Text is the extracted text.
Text string ;
// Marks returns the TextMarks corresponding to the text in Text.
Marks TextMarkArray ;};func _eegc (_gdbfe ,_cgbb _cfa .Point ,_bcbgcb _a .Color )(*ruling ,bool ){_eabda :=lineRuling {_egad :_gdbfe ,_bggga :_cgbb ,_fbcc :_fccg (_gdbfe ,_cgbb ),Color :_bcbgcb };if _eabda ._fbcc ==_abbb {return nil ,false ;};return _eabda .asRuling ();
};func (_deabc paraList )extractTables (_addgg []gridTiling )paraList {if _ddbc {_ed .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_deabc ));
};if len (_deabc )< _gggg {return _deabc ;};_fcebf :=_deabc .findTables (_addgg );if _ddbc {_ed .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_fcebf ));
for _eaeab ,_fgfe :=range _fcebf {_fgfe .log (_bb .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_eaeab ));};};return _deabc .applyTables (_fcebf );};const (_bafdf markKind =iota ;_eadd ;_efcd ;_baddd ;);func (_bfbg *shapesState )clearPath (){_bfbg ._ccac =nil ;
_bfbg ._fab =false ;if _bfcbef {_ed .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_bfbg );};};func (_gdgd *wordBag )minDepth ()float64 {return _gdgd ._cdee -(_gdgd .Ury -_gdgd ._agg )};func _gedfg (_cdgbf int ,_dfbd func (int ,int )bool )[]int {_afcd :=make ([]int ,_cdgbf );
for _bdddg :=range _afcd {_afcd [_bdddg ]=_bdddg ;};_gf .Slice (_afcd ,func (_effcc ,_aabbe int )bool {return _dfbd (_afcd [_effcc ],_afcd [_aabbe ])});return _afcd ;};func (_fdga *stateStack )empty ()bool {return len (*_fdga )==0};func (_daed rectRuling )checkWidth (_cddd ,_dagg float64 )(float64 ,bool ){_aegba :=_dagg -_cddd ;
_bffc :=_aegba <=_dffa ;return _aegba ,_bffc ;};func (_deee *wordBag )blocked (_cdde *textWord )bool {if _cdde .Urx < _deee .Llx {_bafd :=_cddgd (_cdde .PdfRectangle );_fgcbg :=_ggdcg (_deee .PdfRectangle );if _deee ._afbb .blocks (_bafd ,_fgcbg ){if _fgcf {_ed .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_cdde ,_deee );
};return true ;};}else if _deee .Urx < _cdde .Llx {_gdfa :=_cddgd (_deee .PdfRectangle );_ecbf :=_ggdcg (_cdde .PdfRectangle );if _deee ._afbb .blocks (_gdfa ,_ecbf ){if _fgcf {_ed .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s",_cdde ,_deee );
};return true ;};};if _cdde .Ury < _deee .Lly {_acdf :=_fgfd (_cdde .PdfRectangle );_dgcd :=_eebc (_deee .PdfRectangle );if _deee ._gacc .blocks (_acdf ,_dgcd ){if _fgcf {_ed .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_cdde ,_deee );
};return true ;};}else if _deee .Ury < _cdde .Lly {_gaaee :=_fgfd (_deee .PdfRectangle );_bbgaf :=_eebc (_cdde .PdfRectangle );if _deee ._gacc .blocks (_gaaee ,_bbgaf ){if _fgcf {_ed .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s",_cdde ,_deee );
};return true ;};};return false ;};func (_defa *textTable )logComposite (_ecff string ){if !_ddbc {return ;};_ed .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_defa ._dedd ,_defa ._gggac ,_ecff );
_bb .Printf ("\u0025\u0035\u0073 \u007c","");for _bdea :=0;_bdea < _defa ._dedd ;_bdea ++{_bb .Printf ("\u0025\u0033\u0064 \u007c",_bdea );};_bb .Println ("");_bb .Printf ("\u0025\u0035\u0073 \u002b","");for _gdfgd :=0;_gdfgd < _defa ._dedd ;_gdfgd ++{_bb .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d");
};_bb .Println ("");for _acea :=0;_acea < _defa ._gggac ;_acea ++{_bb .Printf ("\u0025\u0035\u0064 \u007c",_acea );for _ceefg :=0;_ceefg < _defa ._dedd ;_ceefg ++{_cdadc ,_ :=_defa ._gacgd [_fedd (_ceefg ,_acea )].parasBBox ();_bb .Printf ("\u0025\u0033\u0064 \u007c",len (_cdadc ));
};_bb .Println ("");};_ed .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_defa ._dedd ,_defa ._gggac ,_ecff );_bb .Printf ("\u0025\u0035\u0073 \u007c","");for _ddda :=0;_ddda < _defa ._dedd ;_ddda ++{_bb .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_ddda );
};_bb .Println ("");_bb .Printf ("\u0025\u0035\u0073 \u002b","");for _bbbeda :=0;_bbbeda < _defa ._dedd ;_bbbeda ++{_bb .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");};_bb .Println ("");for _gded :=0;_gded < _defa ._gggac ;
_gded ++{_bb .Printf ("\u0025\u0035\u0064 \u007c",_gded );for _ecddf :=0;_ecddf < _defa ._dedd ;_ecddf ++{_gdafc ,_ :=_defa ._gacgd [_fedd (_ecddf ,_gded )].parasBBox ();_aegd :="";_efbb :=_gdafc .merge ();if _efbb !=nil {_aegd =_efbb .text ();};_aegd =_bb .Sprintf ("\u0025\u0071",_adcgc (_aegd ,12));
_aegd =_aegd [1:len (_aegd )-1];_bb .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_aegd );};_bb .Println ("");};};func (_acfa *textObject )getCurrentFont ()*_cfb .PdfFont {_baee :=_acfa ._fgfa ._fdac ;if _baee ==nil {_ed .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e");
return _cfb .DefaultFont ();};return _baee ;};
// String returns a string describing `ma`.
func (_afc TextMarkArray )String ()string {_cccf :=len (_afc ._fdda );if _cccf ==0{return "\u0045\u004d\u0050T\u0059";};_babg :=_afc ._fdda [0];_bggd :=_afc ._fdda [_cccf -1];return _bb .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_cccf ,_babg ,_bggd );
};func (_geege paraList )tables ()[]TextTable {var _faab []TextTable ;if _ddbc {_ed .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a");};for _ ,_bcfg :=range _geege {_fcba :=_bcfg ._cgabf ;if _fcba !=nil &&_fcba .isExportable (){_faab =append (_faab ,_fcba .toTextTable ());
};};return _faab ;};func (_fga *textObject )setCharSpacing (_gaae float64 ){if _fga ==nil {return ;};_fga ._fgfa ._bacd =_gaae ;if _gfcb {_ed .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_gaae ,_fga ._fgfa .String ());
};};func _ecd (_cffg _cfb .PdfRectangle )textState {return textState {_ddef :100,_cbbc :RenderModeFill ,_fce :_cffg };};func _fgee (_eec _cfa .Matrix )_cfa .Point {_cbcf ,_bdccg :=_eec .Translation ();return _cfa .Point {X :_cbcf ,Y :_bdccg };};func (_faac paraList )toTextMarks ()[]TextMark {_eafc :=0;
var _ecga []TextMark ;for _feaa ,_dced :=range _faac {if _dced ._cea {continue ;};_fdae :=_dced .toTextMarks (&_eafc );_ecga =append (_ecga ,_fdae ...);if _feaa !=len (_faac )-1{if _bfdbg (_dced ,_faac [_feaa +1]){_ecga =_dfbba (_ecga ,&_eafc ,"\u0020");
}else {_ecga =_dfbba (_ecga ,&_eafc ,"\u000a");_ecga =_dfbba (_ecga ,&_eafc ,"\u000a");};};};_ecga =_dfbba (_ecga ,&_eafc ,"\u000a");_ecga =_dfbba (_ecga ,&_eafc ,"\u000a");return _ecga ;};func _gacg (_daaa ,_gebb bounded )float64 {_aaef :=_aedd (_daaa ,_gebb );
if !_cbafb (_aaef ){return _aaef ;};return _gad (_daaa ,_gebb );};func (_dfdd paraList )writeText (_acdc _g .Writer ){for _dadf ,_dcbf :=range _dfdd {if _dcbf ._cea {continue ;};_dcbf .writeText (_acdc );if _dadf !=len (_dfdd )-1{if _bfdbg (_dcbf ,_dfdd [_dadf +1]){_acdc .Write ([]byte ("\u0020"));
}else {_acdc .Write ([]byte ("\u000a"));_acdc .Write ([]byte ("\u000a"));};};};_acdc .Write ([]byte ("\u000a"));_acdc .Write ([]byte ("\u000a"));};func _efgb (_afde ,_bbca _cfb .PdfRectangle )bool {return _ecef (_afde ,_bbca )&&_fcadf (_afde ,_bbca )};
// Text gets the extracted text contained in `l`.
func (_ceea *list )Text ()string {_aafaa :=&_bd .Builder {};_cbbb :="";_dcbc (_ceea ,_aafaa ,&_cbbb );return _aafaa .String ();};
// Marks returns the TextMark collection for a page. It represents all the text on the page.
func (_egdd PageText )Marks ()*TextMarkArray {return &TextMarkArray {_fdda :_egdd ._dbgf }};
// String returns a description of `state`.
func (_cggf *textState )String ()string {_fbaa :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _cggf ._fdac !=nil {_fbaa =_cggf ._fdac .BaseFont ();};return _bb .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_cggf ._bacd ,_cggf ._dfea ,_cggf ._ggc ,_fbaa );
};func (_bccd *textPara )toCellTextMarks (_fcdfc *int )[]TextMark {var _fgabe []TextMark ;for _fgde ,_cbfg :=range _bccd ._ecfaf {_cfcc :=_cbfg .toTextMarks (_fcdfc );_faag :=_gfgf &&_cbfg .endsInHyphen ()&&_fgde !=len (_bccd ._ecfaf )-1;if _faag {_cfcc =_eacf (_cfcc ,_fcdfc );
};_fgabe =append (_fgabe ,_cfcc ...);if !(_faag ||_fgde ==len (_bccd ._ecfaf )-1){_fgabe =_dfbba (_fgabe ,_fcdfc ,_aeac (_cbfg ._feff ,_bccd ._ecfaf [_fgde +1]._feff ));};};return _fgabe ;};
// String returns a description of `t`.
func (_caab *textTable )String ()string {return _bb .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_caab ._dedd ,_caab ._gggac ,_caab ._agde );};func (_dedg intSet )add (_edfg int ){_dedg [_edfg ]=struct{}{}};func _dgfa (_cabg *wordBag ,_gbgb float64 ,_agfc ,_fdfb rulingList )[]*wordBag {var _eccac []*wordBag ;
for _ ,_bddd :=range _cabg .depthIndexes (){_aaee :=false ;for !_cabg .empty (_bddd ){_dded :=_cabg .firstReadingIndex (_bddd );_fddca :=_cabg .firstWord (_dded );_gfcc :=_badg (_fddca ,_gbgb ,_agfc ,_fdfb );_cabg .removeWord (_fddca ,_dded );if _feac {_ed .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_fddca .String ());
};for _accd :=true ;_accd ;_accd =_aaee {_aaee =false ;_bgbd :=_abdb *_gfcc ._agg ;_gega :=_ceeb *_gfcc ._agg ;_faefc :=_aeca *_gfcc ._agg ;if _feac {_ed .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_gfcc .minDepth (),_gfcc .maxDepth (),_faefc ,_gega );
};if _cabg .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_gfcc ,_cgef (_gbedc ,0),_gfcc .minDepth ()-_faefc ,_gfcc .maxDepth ()+_faefc ,_cdcea ,false ,false )> 0{_aaee =true ;};if _cabg .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_gfcc ,_cgef (_gbedc ,_gega ),_gfcc .minDepth (),_gfcc .maxDepth (),_edcc ,false ,false )> 0{_aaee =true ;
};if _aaee {continue ;};_eabb :=_cabg .scanBand ("",_gfcc ,_cgef (_abfca ,_bgbd ),_gfcc .minDepth (),_gfcc .maxDepth (),_babf ,true ,false );if _eabb > 0{_afdf :=(_gfcc .maxDepth ()-_gfcc .minDepth ())/_gfcc ._agg ;if (_eabb > 1&&float64 (_eabb )> 0.3*_afdf )||_eabb <=10{if _cabg .scanBand ("\u006f\u0074\u0068e\u0072",_gfcc ,_cgef (_abfca ,_bgbd ),_gfcc .minDepth (),_gfcc .maxDepth (),_babf ,false ,true )> 0{_aaee =true ;
};};};};_eccac =append (_eccac ,_gfcc );};};return _eccac ;};func (_aeacg lineRuling )yMean ()float64 {return 0.5*(_aeacg ._egad .Y +_aeacg ._bggga .Y )};const _bfb =1.0/1000.0;func _abfca (_fbge *wordBag ,_afea *textWord ,_egddd float64 )bool {return _fbge .Urx <=_afea .Llx &&_afea .Llx < _fbge .Urx +_egddd ;
};
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
// Replace with a function like Extract() (*PageText, error)
func (_ace *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_ece ,_cfd ,_fff ,_cbe :=_ace .extractPageText (_ace ._ede ,_ace ._cd ,_cfa .IdentityMatrix (),0);if _cbe !=nil &&_cbe !=_cfb .ErrColorOutOfRange {return nil ,0,0,_cbe ;};_ece .computeViews ();
_cbe =_eabde (_ece );if _cbe !=nil {return nil ,0,0,_cbe ;};if _ace ._fc !=nil {if _ace ._fc .ApplyCropBox &&_ace ._ab !=nil {_ece .ApplyArea (*_ace ._ab );};_ece ._fgcd =_ace ._fc .DisableDocumentTags ;};return _ece ,_cfd ,_fff ,nil ;};func _dfbba (_fbcdb []TextMark ,_afba *int ,_egca string )[]TextMark {_ffff :=_cfce ;
_ffff .Text =_egca ;return _aeece (_fbcdb ,_afba ,_ffff );};func _cbafb (_dcaaf float64 )bool {return _e .Abs (_dcaaf )< _egddf };func _eabde (_eafcc *PageText )error {_bbcea :=_fe .GetLicenseKey ();if _bbcea !=nil &&_bbcea .IsLicensed ()||_ge {return nil ;
};_bb .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");_bb .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");
return _b .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};
// ToTextMark returns the public view of `tm`.
func (_fbgb *textMark )ToTextMark ()TextMark {return TextMark {Text :_fbgb ._gbag ,Original :_fbgb ._gaead ,BBox :_fbgb ._ecca ,Font :_fbgb ._eggf ,FontSize :_fbgb ._gegdd ,FillColor :_fbgb ._adcd ,StrokeColor :_fbgb ._abfg ,Orientation :_fbgb ._dgfd ,DirectObject :_fbgb ._ecad ,ObjString :_fbgb ._gedf ,Tw :_fbgb .Tw ,Th :_fbgb .Th ,Tc :_fbgb ._afff ,Index :_fbgb ._dfac };
};func _bdffcd (_fbacd map[int ][]float64 )string {_cgbg :=_efec (_fbacd );_aeedb :=make ([]string ,len (_fbacd ));for _fgfgb ,_dbdg :=range _cgbg {_aeedb [_fgfgb ]=_bb .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_dbdg ,_fbacd [_dbdg ]);
};return _bb .Sprintf ("\u007b\u0025\u0073\u007d",_bd .Join (_aeedb ,"\u002c\u0020"));};type ruling struct{_efdg rulingKind ;_edebb markKind ;_a .Color ;_gged float64 ;_abcc float64 ;_dfad float64 ;_bbgd float64 ;};func (_fbdcc *ruling )gridIntersecting (_fbed *ruling )bool {return _bdaf (_fbdcc ._abcc ,_fbed ._abcc )&&_bdaf (_fbdcc ._dfad ,_fbed ._dfad );
};func (_bdab *shapesState )drawRectangle (_agf ,_egf ,_fbff ,_afb float64 ){if _bfcbef {_ffef :=_bdab .devicePoint (_agf ,_egf );_fged :=_bdab .devicePoint (_agf +_fbff ,_egf +_afb );_dbeda :=_cfb .PdfRectangle {Llx :_ffef .X ,Lly :_ffef .Y ,Urx :_fged .X ,Ury :_fged .Y };
_ed .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_dbeda );};_bdab .newSubPath ();_bdab .moveTo (_agf ,_egf );_bdab .lineTo (_agf +_fbff ,_egf );_bdab .lineTo (_agf +_fbff ,_egf +_afb );_bdab .lineTo (_agf ,_egf +_afb );
_bdab .closePath ();};func _gegdg (_cfea string )string {_eace :=[]rune (_cfea );return string (_eace [:len (_eace )-1])};func _gcee (_gdecf _cfb .PdfRectangle ,_bge bounded )float64 {return _gdecf .Ury -_bge .bbox ().Lly };func (_dadc *structTreeRoot )parseStructTreeRoot (_ffcf _feb .PdfObject ){if _ffcf !=nil {_fade ,_agae :=_feb .GetDict (_ffcf );
if !_agae {_ed .Log .Debug ("\u0070\u0061\u0072s\u0065\u0053\u0074\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u003a\u0020\u0064\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006eo\u0074\u0020\u0066\u006f\u0075\u006e\u0064\u002e");
};K :=_fade .Get ("\u004b");_gefb :=_fade .Get ("\u0054\u0079\u0070\u0065").String ();var _abda *_feb .PdfObjectArray ;switch _dfbb :=K .(type ){case *_feb .PdfObjectArray :_abda =_dfbb ;case *_feb .PdfObjectReference :_abda =_feb .MakeArray (K );};_ggga :=[]structElement {};
for _ ,_gfba :=range _abda .Elements (){_bfbeg :=&structElement {};_bfbeg .parseStructElement (_gfba );_ggga =append (_ggga ,*_bfbeg );};_dadc ._acbec =_ggga ;_dadc ._efdb =_gefb ;};};func (_fbaef gridTile )complete ()bool {return _fbaef .numBorders ()==4};
func (_gcgg *shapesState )moveTo (_gdeg ,_fgffa float64 ){_gcgg ._fab =true ;_gcgg ._ggb =_gcgg .devicePoint (_gdeg ,_fgffa );if _bfcbef {_ed .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_gdeg ,_fgffa ,_gcgg ._ggb );
};};func _fdfd (_bebgb ,_ecdb bounded )float64 {_ccdg :=_gad (_bebgb ,_ecdb );if !_cbafb (_ccdg ){return _ccdg ;};return _aedd (_bebgb ,_ecdb );};func _dfbad (_ebcf _cfb .PdfRectangle ,_edge ,_ddcg ,_geaga ,_eeeg *ruling )gridTile {_fbaad :=_ebcf .Llx ;
_cfbde :=_ebcf .Urx ;_bdafd :=_ebcf .Lly ;_abcg :=_ebcf .Ury ;return gridTile {PdfRectangle :_ebcf ,_afce :_edge !=nil &&_edge .encloses (_bdafd ,_abcg ),_gfbbcf :_ddcg !=nil &&_ddcg .encloses (_bdafd ,_abcg ),_ggaf :_geaga !=nil &&_geaga .encloses (_fbaad ,_cfbde ),_cddec :_eeeg !=nil &&_eeeg .encloses (_fbaad ,_cfbde )};
};func (_fgce *textLine )text ()string {var _daad []string ;for _ ,_cdedf :=range _fgce ._eded {if _cdedf ._ggge {_daad =append (_daad ,"\u0020");};_daad =append (_daad ,_cdedf ._ecgg );};return _bd .Join (_daad ,"");};func (_gbed *shapesState )lastpointEstablished ()(_cfa .Point ,bool ){if _gbed ._fab {return _gbed ._ggb ,false ;
};_ceef :=len (_gbed ._ccac );if _ceef > 0&&_gbed ._ccac [_ceef -1]._gdee {return _gbed ._ccac [_ceef -1].last (),false ;};return _cfa .Point {},true ;};func (_cfadb compositeCell )split (_cfgfc ,_gfab []float64 )*textTable {_gadf :=len (_cfgfc )+1;_gdbf :=len (_gfab )+1;
if _ddbc {_ed .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066",_gdbf ,_gadf ,_cfadb ,_cfgfc ,_gfab );
_bb .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a",len (_cfadb .paraList ));for _aad ,_eaba :=range _cfadb .paraList {_bb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_aad ,_eaba .String ());};_bb .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",len (_cfadb .lines ()));
for _ggbab ,_gfad :=range _cfadb .lines (){_bb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ggbab ,_gfad );};};_cfgfc =_cefeg (_cfgfc ,_cfadb .Ury ,_cfadb .Lly );_gfab =_cefeg (_gfab ,_cfadb .Llx ,_cfadb .Urx );_cffc :=make (map[uint64 ]*textPara ,_gdbf *_gadf );
_fdfdb :=textTable {_dedd :_gdbf ,_gggac :_gadf ,_dadef :_cffc };_ffcff :=_cfadb .paraList ;_gf .Slice (_ffcff ,func (_cbff ,_fcge int )bool {_aacc ,_gfecd :=_ffcff [_cbff ],_ffcff [_fcge ];_gdaf ,_aegc :=_aacc .Lly ,_gfecd .Lly ;if _gdaf !=_aegc {return _gdaf < _aegc ;
};return _aacc .Llx < _gfecd .Llx ;});_gbbbbd :=make (map[uint64 ]_cfb .PdfRectangle ,_gdbf *_gadf );for _gfacf ,_gdfgg :=range _cfgfc [1:]{_bggf :=_cfgfc [_gfacf ];for _dcde ,_abada :=range _gfab [1:]{_baebg :=_gfab [_dcde ];_gbbbbd [_fedd (_dcde ,_gfacf )]=_cfb .PdfRectangle {Llx :_baebg ,Urx :_abada ,Lly :_gdfgg ,Ury :_bggf };
};};if _ddbc {_ed .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073");_bb .Printf ("\u0020\u0020\u0020\u0020");for _caed :=0;_caed < _gdbf ;_caed ++{_bb .Printf ("\u0025\u0033\u0030\u0064\u002c\u0020",_caed );
};_bb .Println ();for _bede :=0;_bede < _gadf ;_bede ++{_bb .Printf ("\u0020\u0020\u0025\u0032\u0064\u003a",_bede );for _beagc :=0;_beagc < _gdbf ;_beagc ++{_bb .Printf ("\u00256\u002e\u0032\u0066\u002c\u0020",_gbbbbd [_fedd (_beagc ,_bede )]);};_bb .Println ();
};};_dgga :=func (_acdd *textLine )(int ,int ){for _gddc :=0;_gddc < _gadf ;_gddc ++{for _agca :=0;_agca < _gdbf ;_agca ++{if _bfcgf (_gbbbbd [_fedd (_agca ,_gddc )],_acdd .PdfRectangle ){return _agca ,_gddc ;};};};return -1,-1;};_ggdg :=make (map[uint64 ][]*textLine ,_gdbf *_gadf );
for _ ,_cfacg :=range _ffcff .lines (){_baeaa ,_efgg :=_dgga (_cfacg );if _baeaa < 0{continue ;};_ggdg [_fedd (_baeaa ,_efgg )]=append (_ggdg [_fedd (_baeaa ,_efgg )],_cfacg );};for _bbbcg :=0;_bbbcg < len (_cfgfc )-1;_bbbcg ++{_gffe :=_cfgfc [_bbbcg ];
_cffea :=_cfgfc [_bbbcg +1];for _cdgb :=0;_cdgb < len (_gfab )-1;_cdgb ++{_agaga :=_gfab [_cdgb ];_fcade :=_gfab [_cdgb +1];_gdba :=_cfb .PdfRectangle {Llx :_agaga ,Urx :_fcade ,Lly :_cffea ,Ury :_gffe };_afead :=_ggdg [_fedd (_cdgb ,_bbbcg )];if len (_afead )==0{continue ;
};_deba :=_gfgfd (_gdba ,_afead );_fdfdb .put (_cdgb ,_bbbcg ,_deba );};};return &_fdfdb ;};
// GetContentStreamOps returns the contentStreamOps field of `pt`.
func (_aaa *PageText )GetContentStreamOps ()*_bc .ContentStreamOperations {return _aaa ._dbbd };func (_gbfdg *textPara )fontsize ()float64 {return _gbfdg ._ecfaf [0]._daag };func _eabd (_bfbb byte )bool {for _ ,_ebbef :=range _degc {if []byte (_ebbef )[0]==_bfbb {return true ;
};};return false ;};func (_fgad paraList )reorder (_dfba []int ){_aafc :=make (paraList ,len (_fgad ));for _bagaf ,_efcga :=range _dfba {_aafc [_bagaf ]=_fgad [_efcga ];};copy (_fgad ,_aafc );};
// PageFonts represents extracted fonts on a PDF page.
type PageFonts struct{Fonts []Font ;};func (_bbcg rulingList )primMinMax ()(float64 ,float64 ){_defbc ,_cebe :=_bbcg [0]._gged ,_bbcg [0]._gged ;for _ ,_ggbg :=range _bbcg [1:]{if _ggbg ._gged < _defbc {_defbc =_ggbg ._gged ;}else if _ggbg ._gged > _cebe {_cebe =_ggbg ._gged ;
};};return _defbc ,_cebe ;};func (_faeca paraList )readBefore (_ddaae []int ,_befa ,_bebfa int )bool {_ddff ,_befag :=_faeca [_befa ],_faeca [_bebfa ];if _cfbe (_ddff ,_befag )&&_ddff .Lly > _befag .Lly {return true ;};if !(_ddff ._bbgce .Urx < _befag ._bbgce .Llx ){return false ;
};_ffba ,_faaf :=_ddff .Lly ,_befag .Lly ;if _ffba > _faaf {_faaf ,_ffba =_ffba ,_faaf ;};_bedc :=_e .Max (_ddff ._bbgce .Llx ,_befag ._bbgce .Llx );_becfe :=_e .Min (_ddff ._bbgce .Urx ,_befag ._bbgce .Urx );_baggc :=_faeca .llyRange (_ddaae ,_ffba ,_faaf );
for _ ,_baeab :=range _baggc {if _baeab ==_befa ||_baeab ==_bebfa {continue ;};_gddb :=_faeca [_baeab ];if _gddb ._bbgce .Llx <=_becfe &&_bedc <=_gddb ._bbgce .Urx {return false ;};};return true ;};type cachedImage struct{_gea *_cfb .Image ;_dfg _cfb .PdfColorspace ;
};func _dagc (_beaec _cfb .PdfColorspace ,_dgfad _cfb .PdfColor )_a .Color {if _beaec ==nil ||_dgfad ==nil {return _a .Black ;};_baggfb ,_dggdc :=_beaec .ColorToRGB (_dgfad );if _dggdc !=nil {_ed .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_dgfad ,_beaec ,_dggdc );
return _a .Black ;};_bagb ,_dfcbf :=_baggfb .(*_cfb .PdfColorDeviceRGB );if !_dfcbf {_ed .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_baggfb );
return _a .Black ;};return _a .NRGBA {R :uint8 (_bagb .R ()*255),G :uint8 (_bagb .G ()*255),B :uint8 (_bagb .B ()*255),A :uint8 (255)};};
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};func _bed (_ebaf []*textWord ,_egeae float64 ,_efg ,_cfca rulingList )*wordBag {_caec :=_badg (_ebaf [0],_egeae ,_efg ,_cfca );for _ ,_agcb :=range _ebaf [1:]{_ffa :=_gaea (_agcb ._cefef );
_caec ._agad [_ffa ]=append (_caec ._agad [_ffa ],_agcb );_caec .PdfRectangle =_bfad (_caec .PdfRectangle ,_agcb .PdfRectangle );};_caec .sort ();return _caec ;};func (_cdced *textTable )depth ()float64 {_cgeda :=1e10;for _eaeb :=0;_eaeb < _cdced ._dedd ;
_eaeb ++{_begf :=_cdced .get (_eaeb ,0);if _begf ==nil ||_begf ._cea {continue ;};_cgeda =_e .Min (_cgeda ,_begf .depth ());};return _cgeda ;};func (_dfded rulingList )log (_fcbba string ){if !_dgab {return ;};_ed .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_fcbba ,_dfded .String ());
for _faee ,_cdcf :=range _dfded {_bb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_faee ,_cdcf .String ());};};func (_bde *imageExtractContext )processOperand (_ca *_bc .ContentStreamOperation ,_gaf _bc .GraphicsState ,_ada *_cfb .PdfPageResources )error {if _ca .Operand =="\u0042\u0049"&&len (_ca .Params )==1{_bdcc ,_gbe :=_ca .Params [0].(*_bc .ContentStreamInlineImage );
if !_gbe {return nil ;};if _ade ,_eed :=_feb .GetBoolVal (_bdcc .ImageMask );_eed {if _ade &&!_bde ._ead .IncludeInlineStencilMasks {return nil ;};};return _bde .extractInlineImage (_bdcc ,_gaf ,_ada );}else if _ca .Operand =="\u0044\u006f"&&len (_ca .Params )==1{_bae ,_efc :=_feb .GetName (_ca .Params [0]);
if !_efc {_ed .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _dg ;};_ ,_cee :=_ada .GetXObjectByName (*_bae );switch _cee {case _cfb .XObjectTypeImage :return _bde .extractXObjectImage (_bae ,_gaf ,_ada );case _cfb .XObjectTypeForm :return _bde .extractFormImages (_bae ,_gaf ,_ada );
};};return nil ;};func (_ffffe *textPara )bbox ()_cfb .PdfRectangle {return _ffffe .PdfRectangle };func _acead (_dbdb ,_fddb int )int {if _dbdb > _fddb {return _dbdb ;};return _fddb ;};func (_cfe *stateStack )top ()*textState {if _cfe .empty (){return nil ;
};return (*_cfe )[_cfe .size ()-1];};
// String returns a description of `k`.
func (_ceacb markKind )String ()string {_ffdd ,_adcg :=_effb [_ceacb ];if !_adcg {return _bb .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_ceacb );};return _ffdd ;};func (_addg *textPara )writeCellText (_fdfe _g .Writer ){for _daeg ,_cddeb :=range _addg ._ecfaf {_gcba :=_cddeb .text ();
_gdbgc :=_gfgf &&_cddeb .endsInHyphen ()&&_daeg !=len (_addg ._ecfaf )-1;if _gdbgc {_gcba =_gegdg (_gcba );};_fdfe .Write ([]byte (_gcba ));if !(_gdbgc ||_daeg ==len (_addg ._ecfaf )-1){_fdfe .Write ([]byte (_aeac (_cddeb ._feff ,_addg ._ecfaf [_daeg +1]._feff )));
};};};func _cdad (_ecbg *textLine )bool {_gaca :=true ;_feabf :=-1;for _ ,_cgbde :=range _ecbg ._eded {for _ ,_dafbd :=range _cgbde ._dece {_bded :=_dafbd ._bfdf ;if _feabf ==-1{_feabf =_bded ;}else {if _feabf !=_bded {_gaca =false ;break ;};};};};return _gaca ;
};func (_dgbd *textObject )setTextLeading (_aec float64 ){if _dgbd ==nil {return ;};_dgbd ._fgfa ._bdf =_aec ;};func _gegf (_agafd []rulingList )(rulingList ,rulingList ){var _gada rulingList ;for _ ,_eccbfe :=range _agafd {_gada =append (_gada ,_eccbfe ...);
};return _gada .vertsHorzs ();};func (_bcdf *textLine )endsInHyphen ()bool {_bced :=_bcdf ._eded [len (_bcdf ._eded )-1];_dbbf :=_bced ._ecgg ;_bgda ,_edfcf :=_ba .DecodeLastRuneInString (_dbbf );if _edfcf <=0||!_cf .Is (_cf .Hyphen ,_bgda ){return false ;
};if _bced ._ggge &&_aegb (_dbbf ){return true ;};return _aegb (_bcdf .text ());};func _fcadf (_adcf ,_fccd _cfb .PdfRectangle )bool {return _adcf .Lly <=_fccd .Ury &&_fccd .Lly <=_adcf .Ury ;};func _geeg (_dcf *_bc .ContentStreamOperation )(float64 ,error ){if len (_dcf .Params )!=1{_dba :=_b .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");
_ed .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_dcf .Operand ,1,len (_dcf .Params ),_dcf .Params );
return 0.0,_dba ;};return _feb .GetNumberAsFloat (_dcf .Params [0]);};
// String returns a human readable description of `vecs`.
func (_efccg rulingList )String ()string {if len (_efccg )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_eeef ,_gaaa :=_efccg .vertsHorzs ();_ddeg :=len (_eeef );_dgbge :=len (_gaaa );if _ddeg ==0||_dgbge ==0{return _bb .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_ddeg ,_dgbge );
};_begea :=_cfb .PdfRectangle {Llx :_eeef [0]._gged ,Urx :_eeef [_ddeg -1]._gged ,Lly :_gaaa [_dgbge -1]._gged ,Ury :_gaaa [0]._gged };return _bb .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_ddeg ,_dgbge ,_begea );
};func (_ded *textObject )reset (){_ded ._bbde =_cfa .IdentityMatrix ();_ded ._gcgb =_cfa .IdentityMatrix ();_ded ._geca =nil ;};func (_egfe *textPara )text ()string {_dbcec :=new (_cg .Buffer );_egfe .writeText (_dbcec );return _dbcec .String ();};func (_cdfac *PageText )computeViews (){_fdde :=_cdfac .getParagraphs ();
_bebc :=new (_cg .Buffer );_fdde .writeText (_bebc );_cdfac ._bggg =_bebc .String ();_cdfac ._dbgf =_fdde .toTextMarks ();_cdfac ._aegg =_fdde .tables ();if _ddbc {_ed .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_cdfac ._aegg ));
};};func (_cffgf *textTable )putComposite (_bbad ,_fafe int ,_efeb paraList ,_fagebe _cfb .PdfRectangle ){if len (_efeb )==0{_ed .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073");
return ;};_ebdea :=compositeCell {PdfRectangle :_fagebe ,paraList :_efeb };if _ddbc {_bb .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_bbad ,_fafe ,_ebdea .String ());
};_ebdea .updateBBox ();_cffgf ._gacgd [_fedd (_bbad ,_fafe )]=_ebdea ;};type lineRuling struct{_fbcc rulingKind ;_ddc markKind ;_a .Color ;_egad ,_bggga _cfa .Point ;};func (_baeb *textLine )appendWord (_acgff *textWord ){_baeb ._eded =append (_baeb ._eded ,_acgff );
_baeb .PdfRectangle =_bfad (_baeb .PdfRectangle ,_acgff .PdfRectangle );if _acgff ._feabg > _baeb ._daag {_baeb ._daag =_acgff ._feabg ;};if _acgff ._cefef > _baeb ._feff {_baeb ._feff =_acgff ._cefef ;};};func _gbedc (_gfcg *wordBag ,_bcgf *textWord ,_egac float64 )bool {return _bcgf .Llx < _gfcg .Urx +_egac &&_gfcg .Llx -_egac < _bcgf .Urx ;
};func _bfae (_bebgd map[float64 ]map[float64 ]gridTile )[]float64 {_gfdf :=make ([]float64 ,0,len (_bebgd ));_gbedf :=make (map[float64 ]struct{},len (_bebgd ));for _ ,_bfbgf :=range _bebgd {for _aeafa :=range _bfbgf {if _ ,_badaf :=_gbedf [_aeafa ];_badaf {continue ;
};_gfdf =append (_gfdf ,_aeafa );_gbedf [_aeafa ]=struct{}{};};};_gf .Float64s (_gfdf );return _gfdf ;};func (_dgccd paraList )yNeighbours (_bfgb float64 )map[*textPara ][]int {_edcaa :=make ([]event ,2*len (_dgccd ));if _bfgb ==0{for _effgb ,_gcgdb :=range _dgccd {_edcaa [2*_effgb ]=event {_gcgdb .Lly ,true ,_effgb };
_edcaa [2*_effgb +1]=event {_gcgdb .Ury ,false ,_effgb };};}else {for _dbag ,_cdbc :=range _dgccd {_edcaa [2*_dbag ]=event {_cdbc .Lly -_bfgb *_cdbc .fontsize (),true ,_dbag };_edcaa [2*_dbag +1]=event {_cdbc .Ury +_bfgb *_cdbc .fontsize (),false ,_dbag };
};};return _dgccd .eventNeighbours (_edcaa );};func (_ddge rulingList )merge ()*ruling {_egddde :=_ddge [0]._gged ;_bcaf :=_ddge [0]._abcc ;_aeff :=_ddge [0]._dfad ;for _ ,_cefg :=range _ddge [1:]{_egddde +=_cefg ._gged ;if _cefg ._abcc < _bcaf {_bcaf =_cefg ._abcc ;
};if _cefg ._dfad > _aeff {_aeff =_cefg ._dfad ;};};_fageb :=&ruling {_efdg :_ddge [0]._efdg ,_edebb :_ddge [0]._edebb ,Color :_ddge [0].Color ,_gged :_egddde /float64 (len (_ddge )),_abcc :_bcaf ,_dfad :_aeff };if _dgbbc {_ed .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_ddge ),_fageb );
for _dcdeae ,_fabgf :=range _ddge {_bb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dcdeae ,_fabgf );};};return _fageb ;};func (_gbbb *textObject )renderText (_bfg _feb .PdfObject ,_egbc []byte ,_fbf int )error {if _gbbb ._acde {_ed .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");
return nil ;};_aecca :=_gbbb .getCurrentFont ();_ecf :=_aecca .BytesToCharcodes (_egbc );_bbfc ,_acbe ,_faeb :=_aecca .CharcodesToStrings (_ecf );if _faeb > 0{_ed .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_acbe ,_faeb );
};_gbbb ._fgfa ._aecc +=_acbe ;_gbbb ._fgfa ._egg +=_faeb ;_gfbbc :=_gbbb ._fgfa ;_dbc :=_gfbbc ._ggc ;_cffec :=_gfbbc ._ddef /100.0;_ffb :=_bfb ;if _aecca .Subtype ()=="\u0054\u0079\u0070e\u0033"{_ffb =1;};_aeg ,_ddg :=_aecca .GetRuneMetrics (' ');if !_ddg {_aeg ,_ddg =_aecca .GetCharMetrics (32);
};if !_ddg {_aeg ,_ =_cfb .DefaultFont ().GetRuneMetrics (' ');};_add :=_aeg .Wx *_ffb ;_ed .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_add ,_bbfc ,_aecca ,_dbc );
_gdcc :=_cfa .NewMatrix (_dbc *_cffec ,0,0,_dbc ,0,_gfbbc ._baea );if _gfcb {_ed .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_ecf ),_ecf ,_bbfc );
};_ed .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_ecf ),_ecf ,len (_bbfc ));_cbca :=_gbbb .getFillColor ();
_fcd :=_gbbb .getStrokeColor ();for _bef ,_cef :=range _bbfc {_gcdb :=[]rune (_cef );if len (_gcdb )==1&&_gcdb [0]=='\x00'{continue ;};_feag :=_ecf [_bef ];_bec :=_gbbb ._dfgec .CTM .Mult (_gbbb ._bbde ).Mult (_gdcc );_fag :=0.0;if len (_gcdb )==1&&_gcdb [0]==32{_fag =_gfbbc ._dfea ;
};_dbb ,_ebfe :=_aecca .GetCharMetrics (_feag );if !_ebfe {_ed .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_feag ,_gcdb ,_gcdb ,_aecca );
return _bb .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_aecca .String (),_feag );};_aae :=_cfa .Point {X :_dbb .Wx *_ffb ,Y :_dbb .Wy *_ffb };
_feaba :=_cfa .Point {X :(_aae .X *_dbc +_fag )*_cffec };_bcc :=_cfa .Point {X :(_aae .X *_dbc +_gfbbc ._bacd +_fag )*_cffec };if _gfcb {_ed .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_dbc ,_gfbbc ._bacd ,_gfbbc ._dfea ,_cffec );
_ed .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_aae ,_feaba ,_bcc );};_dcfa :=_fde (_feaba );_eab :=_fde (_bcc );_ffgd :=_gbbb ._dfgec .CTM .Mult (_gbbb ._bbde ).Mult (_dcfa );
if _cggg {_ed .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_gbbb ._dfgec .CTM ,_gbbb ._bbde ,_eab ,_fgee (_gbbb ._dfgec .CTM .Mult (_gbbb ._bbde ).Mult (_eab )),_dcfa ,_ffgd ,_fgee (_ffgd ));
};_defg ,_gfbf :=_gbbb .newTextMark (_bg .ExpandLigatures (_gcdb ),_bec ,_fgee (_ffgd ),_e .Abs (_add *_bec .ScalingFactorX ()),_aecca ,_gbbb ._fgfa ._bacd ,_cbca ,_fcd ,_bfg ,_bbfc ,_bef ,_fbf );if !_gfbf {_ed .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067");
continue ;};if _aecca ==nil {_ed .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _aecca .Encoder ()==nil {_ed .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_aecca );
}else {if _bfa ,_bbgc :=_aecca .Encoder ().CharcodeToRune (_feag );_bbgc {_defg ._gaead =string (_bfa );};};_ed .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_bef ,_feag ,_defg ,_bec );
_gbbb ._geca =append (_gbbb ._geca ,&_defg );_gbbb ._bbde .Concat (_eab );};return nil ;};
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
type PageImages struct{Images []ImageMark ;};func (_aegaf rulingList )toTilings ()(rulingList ,[]gridTiling ){_aegaf .log ("\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s");if len (_aegaf )==0{return nil ,nil ;};_aegaf =_aegaf .tidied ("\u0061\u006c\u006c");
_aegaf .log ("\u0074\u0069\u0064\u0069\u0065\u0064");_affde :=_aegaf .toGrids ();_ageg :=make ([]gridTiling ,len (_affde ));for _ggff ,_cffd :=range _affde {_ageg [_ggff ]=_cffd .asTiling ();};return _aegaf ,_ageg ;};func (_ggbe rulingList )aligned ()bool {if len (_ggbe )< 2{return false ;
};_aeedc :=make (map[*ruling ]int );_aeedc [_ggbe [0]]=0;for _ ,_bbfd :=range _ggbe [1:]{_baggf :=false ;for _adcc :=range _aeedc {if _bbfd .gridIntersecting (_adcc ){_aeedc [_adcc ]++;_baggf =true ;break ;};};if !_baggf {_aeedc [_bbfd ]=0;};};_ceba :=0;
for _ ,_bcge :=range _aeedc {if _bcge ==0{_ceba ++;};};_bccdf :=float64 (_ceba )/float64 (len (_ggbe ));_eabab :=_bccdf <=1.0-_ddec ;if _dgab {_ed .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_eabab ,_bccdf ,_ceba ,len (_ggbe ),_ggbe .String ());
};return _eabab ;};func _gbeca (_cecc []*textLine ,_fadb string )string {var _ddaa _bd .Builder ;_dbab :=0.0;for _dade ,_fcea :=range _cecc {_cafb :=_fcea .text ();_cgdg :=_fcea ._feff ;if _dade < len (_cecc )-1{_dbab =_cecc [_dade +1]._feff ;}else {_dbab =0.0;
};_ddaa .WriteString (_fadb );_ddaa .WriteString (_cafb );if _dbab !=_cgdg {_ddaa .WriteString ("\u000a");}else {_ddaa .WriteString ("\u0020");};};return _ddaa .String ();};func (_gcbd *shapesState )devicePoint (_dec ,_deab float64 )_cfa .Point {_bfcbe :=_gcbd ._bbgfd .Mult (_gcbd ._fefd );
_dec ,_deab =_bfcbe .Transform (_dec ,_deab );return _cfa .NewPoint (_dec ,_deab );};func (_fgcb *textObject )checkOp (_daea *_bc .ContentStreamOperation ,_dbed int ,_ddfa bool )(_fggb bool ,_fgb error ){if _fgcb ==nil {var _ged []_feb .PdfObject ;if _dbed > 0{_ged =_daea .Params ;
if len (_ged )> _dbed {_ged =_ged [:_dbed ];};};_ed .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_daea .Operand ,_ged );
};if _dbed >=0{if len (_daea .Params )!=_dbed {if _ddfa {_fgb =_b .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");};_ed .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_daea .Operand ,_dbed ,len (_daea .Params ),_daea .Params );
return false ,_fgb ;};};return true ,nil ;};func (_gddcc rulingList )comp (_dgda ,_dfca int )bool {_gcgda ,_aaca :=_gddcc [_dgda ],_gddcc [_dfca ];_dggd ,_fgega :=_gcgda ._efdg ,_aaca ._efdg ;if _dggd !=_fgega {return _dggd > _fgega ;};if _dggd ==_abbb {return false ;
};_fgbc :=func (_aggg bool )bool {if _dggd ==_ecbd {return _aggg ;};return !_aggg ;};_dafgg ,_aabg :=_gcgda ._gged ,_aaca ._gged ;if _dafgg !=_aabg {return _fgbc (_dafgg > _aabg );};_dafgg ,_aabg =_gcgda ._abcc ,_aaca ._abcc ;if _dafgg !=_aabg {return _fgbc (_dafgg < _aabg );
};return _fgbc (_gcgda ._dfad < _aaca ._dfad );};const _fgc =20;const (_be ="\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";
_fbc ="\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064";
_bbf ="\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";);
func (_agef *textObject )getStrokeColor ()_a .Color {return _dagc (_agef ._dfgec .ColorspaceStroking ,_agef ._dfgec .ColorStroking );};func (_fgcge *wordBag )getDepthIdx (_dfde float64 )int {_eadf :=_fgcge .depthIndexes ();_bfcg :=_gaea (_dfde );if _bfcg < _eadf [0]{return _eadf [0];
};if _bfcg > _eadf [len (_eadf )-1]{return _eadf [len (_eadf )-1];};return _bfcg ;};func _cgaba (_gfdbg float64 )float64 {return _ffaf *_e .Round (_gfdbg /_ffaf )};func (_dbff *shapesState )establishSubpath ()*subpath {_ggba ,_edef :=_dbff .lastpointEstablished ();
if !_edef {_dbff ._ccac =append (_dbff ._ccac ,_bcdg (_ggba ));};if len (_dbff ._ccac )==0{return nil ;};_dbff ._fab =false ;return _dbff ._ccac [len (_dbff ._ccac )-1];};func _cgef (_dafg func (*wordBag ,*textWord ,float64 )bool ,_bbcf float64 )func (*wordBag ,*textWord )bool {return func (_dgff *wordBag ,_ecdd *textWord )bool {return _dafg (_dgff ,_ecdd ,_bbcf )};
};
// Text returns the text content of the `bulletLists`.
func (_bdce *lists )Text ()string {_gecea :=&_bd .Builder {};for _ ,_bcffd :=range *_bdce {_fdfda :=_bcffd .Text ();_gecea .WriteString (_fdfda );};return _gecea .String ();};func (_dfbfe *structTreeRoot )buildList (_edag map[int ][]*textLine ,_dege _feb .PdfObject )[]*list {if _dfbfe ==nil {_ed .Log .Debug ("\u0062\u0075\u0069\u006c\u0064\u004c\u0069\u0073\u0074\u003a\u0020t\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0069\u0073 \u006e\u0069\u006c");
return nil ;};var _dafa *structElement ;_ebfeg :=[]structElement {};if len (_dfbfe ._acbec )==1{_aefc :=_dfbfe ._acbec [0]._aggf ;if _aefc =="\u0044\u006f\u0063\u0075\u006d\u0065\u006e\u0074"||_aefc =="\u0053\u0065\u0063\u0074"||_aefc =="\u0050\u0061\u0072\u0074"||_aefc =="\u0044\u0069\u0076"||_aefc =="\u0041\u0072\u0074"{_dafa =&_dfbfe ._acbec [0];
};}else {_dafa =&structElement {_adae :_dfbfe ._acbec ,_aggf :_dfbfe ._efdb };};if _dafa ==nil {_ed .Log .Debug ("\u0062\u0075\u0069\u006cd\u004c\u0069\u0073\u0074\u003a\u0020\u0074\u006f\u0070\u0045l\u0065m\u0065\u006e\u0074\u0020\u0069\u0073\u0020n\u0069\u006c");
return nil ;};for _ ,_ggeeg :=range _dafa ._adae {if _ggeeg ._aggf =="\u004c"{_ebfeg =append (_ebfeg ,_ggeeg );}else if _ggeeg ._aggf =="\u0054\u0061\u0062l\u0065"{_aded :=_ffeee (_ggeeg );_ebfeg =append (_ebfeg ,_aded ...);};};_baeag :=_bdcef (_ebfeg ,_edag ,_dege );
var _eddd []*list ;for _ ,_beac :=range _baeag {_cce :=_dgaf (_beac );_eddd =append (_eddd ,_cce ...);};return _eddd ;};
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
func (_cbad *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _cbad ==nil {return nil ,_b .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_bb .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );
};_adda :=len (_cbad ._fdda );if _adda ==0{return _cbad ,nil ;};if start < _cbad ._fdda [0].Offset {start =_cbad ._fdda [0].Offset ;};if end > _cbad ._fdda [_adda -1].Offset +1{end =_cbad ._fdda [_adda -1].Offset +1;};_eeg :=_gf .Search (_adda ,func (_dfb int )bool {return _cbad ._fdda [_dfb ].Offset +len (_cbad ._fdda [_dfb ].Text )-1>=start });
if !(0<=_eeg &&_eeg < _adda ){_ffee :=_bb .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_eeg ,_adda ,_cbad ._fdda [0],_cbad ._fdda [_adda -1]);
return nil ,_ffee ;};_cda :=_gf .Search (_adda ,func (_bceb int )bool {return _cbad ._fdda [_bceb ].Offset > end -1});if !(0<=_cda &&_cda < _adda ){_afee :=_bb .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_cda ,_adda ,_cbad ._fdda [0],_cbad ._fdda [_adda -1]);
return nil ,_afee ;};if _cda <=_eeg {return nil ,_bb .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_eeg ,_cda );
};return &TextMarkArray {_fdda :_cbad ._fdda [_eeg :_cda ]},nil ;};func (_befe intSet )del (_aacde int ){delete (_befe ,_aacde )};func (_cfec *shapesState )fill (_bfac *[]pathSection ){_ebbb :=pathSection {_cafa :_cfec ._ccac ,Color :_cfec ._abac .getFillColor ()};
*_bfac =append (*_bfac ,_ebbb );if _dgab {_cac :=_ebbb .bbox ();_bb .Printf ("\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a",len (*_bfac ),len (_ebbb ._cafa ),_cfec ,_ebbb .Color ,_cac ,_cac .Width (),_cac .Height ());
if _afecb {for _gede ,_gcggb :=range _ebbb ._cafa {_bb .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_gede ,_gcggb );if _gede ==10{break ;};};};};};func (_bdgc gridTiling )log (_gfed string ){if !_dgcg {return ;};_ed .Log .Info ("\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071",len (_bdgc ._cgbc ),len (_bdgc ._feca ),_gfed );
_bb .Printf ("\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a",_bdgc ._cgbc );_bb .Printf ("\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a",_bdgc ._feca );for _eacc ,_bbgac :=range _bdgc ._feca {_fcac ,_abgec :=_bdgc ._cabf [_bbgac ];
if !_abgec {continue ;};_bb .Printf ("%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_eacc ,_bbgac );for _abcb ,_fbedc :=range _bdgc ._cgbc {_decgc ,_ebfcb :=_fcac [_fbedc ];if !_ebfcb {continue ;};_bb .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_abcb ,_decgc .String ());
};};};func _bfad (_becf ,_gegd _cfb .PdfRectangle )_cfb .PdfRectangle {return _cfb .PdfRectangle {Llx :_e .Min (_becf .Llx ,_gegd .Llx ),Lly :_e .Min (_becf .Lly ,_gegd .Lly ),Urx :_e .Max (_becf .Urx ,_gegd .Urx ),Ury :_e .Max (_becf .Ury ,_gegd .Ury )};
};func (_dcbe *subpath )clear (){*_dcbe =subpath {}};func _gaec (_gfcag []_feb .PdfObject )(_acfdad ,_cbdef float64 ,_gcgc error ){if len (_gfcag )!=2{return 0,0,_bb .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_gfcag ));
};_egfd ,_gcgc :=_feb .GetNumbersAsFloat (_gfcag );if _gcgc !=nil {return 0,0,_gcgc ;};return _egfd [0],_egfd [1],nil ;};func (_cfdde rulingList )secMinMax ()(float64 ,float64 ){_eegf ,_gdab :=_cfdde [0]._abcc ,_cfdde [0]._dfad ;for _ ,_fbdgd :=range _cfdde [1:]{if _fbdgd ._abcc < _eegf {_eegf =_fbdgd ._abcc ;
};if _fbdgd ._dfad > _gdab {_gdab =_fbdgd ._dfad ;};};return _eegf ,_gdab ;};func (_bffe *wordBag )absorb (_dage *wordBag ){_dgcc :=_dage .makeRemovals ();for _deggc ,_gebc :=range _dage ._agad {for _ ,_ggcf :=range _gebc {_bffe .pullWord (_ggcf ,_deggc ,_dgcc );
};};_dage .applyRemovals (_dgcc );};func _gfdc (_dfdab ,_fbad bounded )float64 {return _dfdab .bbox ().Llx -_fbad .bbox ().Urx };type textResult struct{_fdce PageText ;_ebg int ;_gcdg int ;};func (_adf paraList )llyOrdering ()[]int {_fabg :=make ([]int ,len (_adf ));
for _aebe :=range _adf {_fabg [_aebe ]=_aebe ;};_gf .SliceStable (_fabg ,func (_cffeb ,_ddde int )bool {_gfdb ,_eeeb :=_fabg [_cffeb ],_fabg [_ddde ];return _adf [_gfdb ].Lly < _adf [_eeeb ].Lly ;});return _fabg ;};
// Append appends `mark` to the mark array.
func (_ceb *TextMarkArray )Append (mark TextMark ){_ceb ._fdda =append (_ceb ._fdda ,mark )};func (_gfeeb *textWord )bbox ()_cfb .PdfRectangle {return _gfeeb .PdfRectangle };func (_babgc *textTable )log (_gdad string ){if !_ddbc {return ;};_ed .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_gdad ,_babgc ._dedd ,_babgc ._gggac ,_babgc ._agde ,_babgc .PdfRectangle );
for _cdgdc :=0;_cdgdc < _babgc ._gggac ;_cdgdc ++{for _egec :=0;_egec < _babgc ._dedd ;_egec ++{_dggaf :=_babgc .get (_egec ,_cdgdc );if _dggaf ==nil {continue ;};_bb .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_egec ,_cdgdc ,_dggaf .PdfRectangle ,_adcgc (_dggaf .text (),50),_ba .RuneCountInString (_dggaf .text ()));
};};};func _cged (_agddd map[float64 ]gridTile )[]float64 {_adgbd :=make ([]float64 ,0,len (_agddd ));for _bgcf :=range _agddd {_adgbd =append (_adgbd ,_bgcf );};_gf .Float64s (_adgbd );return _adgbd ;};func (_ceaad *textTable )isExportable ()bool {if _ceaad ._agde {return true ;
};_gdef :=func (_aabc int )bool {_ecfb :=_ceaad .get (0,_aabc );if _ecfb ==nil {return false ;};_beaga :=_ecfb .text ();_fdade :=_ba .RuneCountInString (_beaga );_dffcdb :=_dbgad .MatchString (_beaga );return _fdade <=1||_dffcdb ;};for _dadeg :=0;_dadeg < _ceaad ._gggac ;
_dadeg ++{if !_gdef (_dadeg ){return true ;};};return false ;};func (_cfced rulingList )vertsHorzs ()(rulingList ,rulingList ){var _dfgg ,_aagg rulingList ;for _ ,_ecgaf :=range _cfced {switch _ecgaf ._efdg {case _dcfc :_dfgg =append (_dfgg ,_ecgaf );case _ecbd :_aagg =append (_aagg ,_ecgaf );
};};return _dfgg ,_aagg ;};func _eabc (_daca _cfb .PdfRectangle )rulingKind {_bcfgg :=_daca .Width ();_afgab :=_daca .Height ();if _bcfgg > _afgab {if _bcfgg >=_edd {return _ecbd ;};}else {if _afgab >=_edd {return _dcfc ;};};return _abbb ;};func _ecef (_aeba ,_fcdf _cfb .PdfRectangle )bool {return _fcdf .Llx <=_aeba .Urx &&_aeba .Llx <=_fcdf .Urx ;
};func (_cdef *textPara )writeText (_fcbd _g .Writer ){if _cdef ._cgabf ==nil {_cdef .writeCellText (_fcbd );return ;};for _aaeb :=0;_aaeb < _cdef ._cgabf ._gggac ;_aaeb ++{for _aacd :=0;_aacd < _cdef ._cgabf ._dedd ;_aacd ++{_eedg :=_cdef ._cgabf .get (_aacd ,_aaeb );
if _eedg ==nil {_fcbd .Write ([]byte ("\u0009"));}else {_eedg .writeCellText (_fcbd );};_fcbd .Write ([]byte ("\u0020"));};if _aaeb < _cdef ._cgabf ._gggac -1{_fcbd .Write ([]byte ("\u000a"));};};};
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
type RenderMode int ;func (_ffbg paraList )findTableGrid (_fffb gridTiling )(*textTable ,map[*textPara ]struct{}){_fede :=len (_fffb ._cgbc );_bbeab :=len (_fffb ._feca );_aebee :=textTable {_agde :true ,_dedd :_fede ,_gggac :_bbeab ,_dadef :make (map[uint64 ]*textPara ,_fede *_bbeab ),_gacgd :make (map[uint64 ]compositeCell ,_fede *_bbeab )};
_fdfcc :=make (map[*textPara ]struct{});_ccee :=int ((1.0-_edg )*float64 (_fede *_bbeab ));_bbagf :=0;if _dgcg {_ed .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_fede ,_bbeab );
};for _bbfb ,_gagea :=range _fffb ._feca {_bgfb ,_ffeda :=_fffb ._cabf [_gagea ];if !_ffeda {continue ;};for _gefd ,_feede :=range _fffb ._cgbc {_bbab ,_efedb :=_bgfb [_feede ];if !_efedb {continue ;};_fecaf :=_ffbg .inTile (_bbab );if len (_fecaf )==0{_bbagf ++;
if _bbagf > _ccee {if _dgcg {_ed .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_bbagf );};return nil ,nil ;};}else {_aebee .putComposite (_gefd ,_bbfb ,_fecaf ,_bbab .PdfRectangle );for _ ,_cfed :=range _fecaf {_fdfcc [_cfed ]=struct{}{};
};};};};_bacc :=0;for _ffbd :=0;_ffbd < _fede ;_ffbd ++{_caedc :=_aebee .get (_ffbd ,0);if _caedc ==nil ||!_caedc ._cea {_bacc ++;};};if _bacc ==0{if _dgcg {_ed .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};return nil ,nil ;
};_fgefg :=_aebee .reduceTiling (_fffb ,_gebbb );_fgefg =_fgefg .subdivide ();return _fgefg ,_fdfcc ;};func (_gbbbb paraList )merge ()*textPara {_ed .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_gbbbb ));
if len (_gbbbb )==0{return nil ;};_gbbbb .sortReadingOrder ();_deacb :=_gbbbb [0].PdfRectangle ;_bccc :=_gbbbb [0]._ecfaf ;for _ ,_bgcde :=range _gbbbb [1:]{_deacb =_bfad (_deacb ,_bgcde .PdfRectangle );_bccc =append (_bccc ,_bgcde ._ecfaf ...);};return _gfgfd (_deacb ,_bccc );
};const (_cbeab =false ;_ebac =false ;_eccb =false ;_cggg =false ;_bfcbef =false ;_gfcb =false ;_feac =false ;_cdcd =false ;_bfbe =false ;_debg =_bfbe &&true ;_faba =_debg &&false ;_daab =_bfbe &&true ;_ddbc =false ;_dbfdf =_ddbc &&false ;_caeac =_ddbc &&true ;
_dgab =false ;_afecb =_dgab &&false ;_dgbbc =_dgab &&false ;_dgcg =_dgab &&true ;_fee =_dgab &&false ;_fgcf =_dgab &&false ;);func (_gafc *textObject )getFont (_edba string )(*_cfb .PdfFont ,error ){if _gafc ._fdgf ._af !=nil {_bbff ,_efa :=_gafc .getFontDict (_edba );
if _efa !=nil {_ed .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073",_edba ,_efa .Error ());return nil ,_efa ;};
_gafc ._fdgf ._gd ++;_ebde ,_bada :=_gafc ._fdgf ._af [_bbff .String ()];if _bada {_ebde ._gafa =_gafc ._fdgf ._gd ;return _ebde ._acbf ,nil ;};};_bebb ,_eedd :=_gafc .getFontDict (_edba );if _eedd !=nil {return nil ,_eedd ;};_babd ,_eedd :=_gafc .getFontDirect (_edba );
if _eedd !=nil {return nil ,_eedd ;};if _gafc ._fdgf ._af !=nil {_gece :=fontEntry {_babd ,_gafc ._fdgf ._gd };if len (_gafc ._fdgf ._af )>=_agd {var _dfga []string ;for _bgcd :=range _gafc ._fdgf ._af {_dfga =append (_dfga ,_bgcd );};_gf .Slice (_dfga ,func (_ebbd ,_gfcd int )bool {return _gafc ._fdgf ._af [_dfga [_ebbd ]]._gafa < _gafc ._fdgf ._af [_dfga [_gfcd ]]._gafa ;
});delete (_gafc ._fdgf ._af ,_dfga [0]);};_gafc ._fdgf ._af [_bebb .String ()]=_gece ;};return _babd ,nil ;};func _eebc (_fbfccb _cfb .PdfRectangle )*ruling {return &ruling {_efdg :_ecbd ,_gged :_fbfccb .Lly ,_abcc :_fbfccb .Llx ,_dfad :_fbfccb .Urx };
};func (_fbdcg paraList )inTile (_bfbgfe gridTile )paraList {var _bfcbb paraList ;for _ ,_cfceg :=range _fbdcg {if _bfbgfe .contains (_cfceg .PdfRectangle ){_bfcbb =append (_bfcbb ,_cfceg );};};if _ddbc {_bb .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_bfbgfe ,len (_bfcbb ));
for _dcaf ,_dfaa :=range _bfcbb {_bb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dcaf ,_dfaa );};_bb .Println ("");};return _bfcbb ;};func _dcbc (_gefc *list ,_bfaf *_bd .Builder ,_edfd *string ){_eecc :=_dcbce (_gefc ,_edfd );_bfaf .WriteString (_eecc );
for _ ,_cbcfg :=range _gefc ._bdbe {_fbe :=*_edfd +"\u0020\u0020\u0020";_dcbc (_cbcfg ,_bfaf ,&_fbe );};};func _eecbc (_eacgd ,_badgb _cfa .Point )rulingKind {_gbbbf :=_e .Abs (_eacgd .X -_badgb .X );_cgdc :=_e .Abs (_eacgd .Y -_badgb .Y );return _ddedg (_gbbbf ,_cgdc ,_dfc );
};func _egdb (_gfbeb []*textMark ,_fddc _cfb .PdfRectangle ,_cfda rulingList ,_bfdbe []gridTiling )paraList {_ed .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_gfbeb ),_fddc );
if len (_gfbeb )==0{return nil ;};_fcgd :=_eebdd (_gfbeb ,_fddc );if len (_fcgd )==0{return nil ;};_cfda .log ("\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065");_abfcg ,_aeaf :=_cfda .vertsHorzs ();_agbe :=_bed (_fcgd ,_fddc .Ury ,_abfcg ,_aeaf );
_cgffa :=_dgfa (_agbe ,_fddc .Ury ,_abfcg ,_aeaf );_cgffa =_acee (_cgffa );_fbfcc :=make (paraList ,0,len (_cgffa ));for _ ,_fccb :=range _cgffa {_aaea :=_fccb .arrangeText ();if _aaea !=nil {_fbfcc =append (_fbfcc ,_aaea );};};if len (_fbfcc )>=_gggg {_fbfcc =_fbfcc .extractTables (_bfdbe );
};_fbfcc .sortReadingOrder ();_fbfcc .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _fbfcc ;};func _fgfd (_eabg _cfb .PdfRectangle )*ruling {return &ruling {_efdg :_ecbd ,_gged :_eabg .Ury ,_abcc :_eabg .Llx ,_dfad :_eabg .Urx };
};func _aggb (_cgbca ,_aaaf int )int {if _cgbca < _aaaf {return _cgbca ;};return _aaaf ;};func (_eff *textObject )getFontDict (_cdge string )(_dgbb _feb .PdfObject ,_feba error ){_bdcb :=_eff ._cgd ;if _bdcb ==nil {_ed .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_cdge );
return nil ,nil ;};_dgbb ,_gdca :=_bdcb .GetFontByName (_feb .PdfObjectName (_cdge ));if !_gdca {_ed .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_cdge );
return nil ,_b .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _dgbb ,nil ;};func (_fgfc *ruling )alignsPrimary (_gbbd *ruling )bool {return _fgfc ._efdg ==_gbbd ._efdg &&_e .Abs (_fgfc ._gged -_gbbd ._gged )< _dffa *0.5;
};func _dgdd (_abb []*textLine )map[float64 ][]*textLine {_gf .Slice (_abb ,func (_bbdf ,_bgeg int )bool {return _abb [_bbdf ]._feff < _abb [_bgeg ]._feff });_efed :=map[float64 ][]*textLine {};for _ ,_fcg :=range _abb {_abgb :=_gafad (_fcg );_abgb =_e .Round (_abgb );
_efed [_abgb ]=append (_efed [_abgb ],_fcg );};return _efed ;};func (_abdg *wordBag )removeDuplicates (){if _daab {_ed .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_abdg .text ());};for _ ,_cefa :=range _abdg .depthIndexes (){if len (_abdg ._agad [_cefa ])==0{continue ;
};_fbgg :=_abdg ._agad [_cefa ][0];_cega :=_bgee *_fbgg ._feabg ;_abc :=_fbgg ._cefef ;for _ ,_bgbf :=range _abdg .depthBand (_abc ,_abc +_cega ){_fdad :=map[*textWord ]struct{}{};_bcbf :=_abdg ._agad [_bgbf ];for _ ,_gbgc :=range _bcbf {if _ ,_dfbgd :=_fdad [_gbgc ];
_dfbgd {continue ;};for _ ,_dgbag :=range _bcbf {if _ ,_efac :=_fdad [_dgbag ];_efac {continue ;};if _dgbag !=_gbgc &&_dgbag ._ecgg ==_gbgc ._ecgg &&_e .Abs (_dgbag .Llx -_gbgc .Llx )< _cega &&_e .Abs (_dgbag .Urx -_gbgc .Urx )< _cega &&_e .Abs (_dgbag .Lly -_gbgc .Lly )< _cega &&_e .Abs (_dgbag .Ury -_gbgc .Ury )< _cega {_fdad [_dgbag ]=struct{}{};
};};};if len (_fdad )> 0{_bgdcd :=0;for _ ,_adee :=range _bcbf {if _ ,_aabbd :=_fdad [_adee ];!_aabbd {_bcbf [_bgdcd ]=_adee ;_bgdcd ++;};};_abdg ._agad [_bgbf ]=_bcbf [:len (_bcbf )-len (_fdad )];if len (_abdg ._agad [_bgbf ])==0{delete (_abdg ._agad ,_bgbf );
};};};};};func _fcfb (_gafga *textLine ,_eacb []*textLine ,_dbfec []float64 ,_ebbbe ,_bgfc float64 )[]*textLine {_gbega :=[]*textLine {};for _ ,_babee :=range _eacb {if _babee ._feff >=_ebbbe {if _bgfc !=-1&&_babee ._feff < _bgfc {if _babee .text ()!=_gafga .text (){if _e .Round (_babee .Llx )< _e .Round (_gafga .Llx ){break ;
};_gbega =append (_gbega ,_babee );};}else if _bgfc ==-1{if _babee ._feff ==_gafga ._feff {if _babee .text ()!=_gafga .text (){_gbega =append (_gbega ,_babee );};continue ;};_gbfd :=_abecb (_gafga ,_eacb ,_dbfec );if _gbfd !=-1&&_babee ._feff <=_gbfd {_gbega =append (_gbega ,_babee );
};};};};return _gbega ;};func _baaf (_fbfc []*textLine ,_gaccd map[float64 ][]*textLine )[]*list {_fdfg :=_ddecc (_gaccd );_bgbg :=[]*list {};if len (_fdfg )==0{return _bgbg ;};_cbdgg :=_fdfg [0];_eaeg :=1;_dbeb :=_gaccd [_cbdgg ];for _aged ,_edbf :=range _dbeb {var _bacf float64 ;
_fdcge :=[]*list {};_agffe :=_edbf ._feff ;_cafba :=-1.0;if _aged < len (_dbeb )-1{_cafba =_dbeb [_aged +1]._feff ;};if _eaeg < len (_fdfg ){_fdcge =_ebdff (_fbfc ,_gaccd ,_fdfg ,_eaeg ,_agffe ,_cafba );};_bacf =_cafba ;if len (_fdcge )> 0{_agcc :=_fdcge [0];
if len (_agcc ._bcbgc )> 0{_bacf =_agcc ._bcbgc [0]._feff ;};};_bdcd :=[]*textLine {_edbf };_gdcgg :=_fcfb (_edbf ,_fbfc ,_fdfg ,_agffe ,_bacf );_bdcd =append (_bdcd ,_gdcgg ...);_eebg :=_gagf (_bdcd ,"\u0062\u0075\u006c\u006c\u0065\u0074",_fdcge );_eebg ._cgee =_gbeca (_bdcd ,"");
_bgbg =append (_bgbg ,_eebg );};return _bgbg ;};
// String returns a string describing the current state of the textState stack.
func (_bad *stateStack )String ()string {_dceb :=[]string {_bb .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_bad ))};for _ffg ,_dee :=range *_bad {_dfge :="\u003c\u006e\u0069l\u003e";
if _dee !=nil {_dfge =_dee .String ();};_dceb =append (_dceb ,_bb .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_ffg ,_dfge ));};return _bd .Join (_dceb ,"\u000a");};func _bebcb (_ecgfc ,_fbcg _cfa .Point )bool {return _ecgfc .X ==_fbcg .X &&_ecgfc .Y ==_fbcg .Y };
func _gcbde (_bfedb []pathSection )rulingList {_fgbea (_bfedb );if _dgab {_ed .Log .Info ("\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073",len (_bfedb ));
};var _cadg rulingList ;for _ ,_afgca :=range _bfedb {for _ ,_gefgg :=range _afgca ._cafa {if len (_gefgg ._edc )< 2{continue ;};_ccgfc :=_gefgg ._edc [0];for _ ,_eccg :=range _gefgg ._edc [1:]{if _fcga ,_gcbg :=_eegc (_ccgfc ,_eccg ,_afgca .Color );_gcbg {_cadg =append (_cadg ,_fcga );
};_ccgfc =_eccg ;};};};if _dgab {_ed .Log .Info ("m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073",_cadg );};return _cadg ;};func _cefeg (_gcagc []float64 ,_dcag ,_gdefg float64 )[]float64 {_cfcd ,_egcd :=_dcag ,_gdefg ;
if _egcd < _cfcd {_cfcd ,_egcd =_egcd ,_cfcd ;};_ebfef :=make ([]float64 ,0,len (_gcagc )+2);_ebfef =append (_ebfef ,_dcag );for _ ,_bccf :=range _gcagc {if _bccf <=_cfcd {continue ;}else if _bccf >=_egcd {break ;};_ebfef =append (_ebfef ,_bccf );};_ebfef =append (_ebfef ,_gdefg );
return _ebfef ;};var _afbg *_ga .Regexp =_ga .MustCompile (_dddd +"\u007c"+_ebeb );type textState struct{_bacd float64 ;_dfea float64 ;_ddef float64 ;_bdf float64 ;_ggc float64 ;_cbbc RenderMode ;_baea float64 ;_fdac *_cfb .PdfFont ;_fce _cfb .PdfRectangle ;
_aecc int ;_egg int ;};func (_fgedf *subpath )makeRectRuling (_cbga _a .Color )(*ruling ,bool ){if _fee {_ed .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_fgedf );
};_fed :=_fgedf ._edc [:4];_ecbc :=make (map[int ]rulingKind ,len (_fed ));for _dgcde ,_bbge :=range _fed {_bgcad :=_fgedf ._edc [(_dgcde +1)%4];_ecbc [_dgcde ]=_eecbc (_bbge ,_bgcad );if _fee {_bb .Printf ("\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066",_dgcde ,_ecbc [_dgcde ],_bbge ,_bgcad );
};};if _fee {_bb .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_ecbc );};var _ecea ,_ccfca []int ;for _befd ,_gbagg :=range _ecbc {switch _gbagg {case _ecbd :_ccfca =append (_ccfca ,_befd );case _dcfc :_ecea =append (_ecea ,_befd );
};};if _fee {_bb .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_ccfca ),_ccfca );_bb .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_ecea ),_ecea );
};_efgfa :=(len (_ccfca )==2&&len (_ecea )==2)||(len (_ccfca )==2&&len (_ecea )==0&&_ebgdb (_fed [_ccfca [0]],_fed [_ccfca [1]]))||(len (_ecea )==2&&len (_ccfca )==0&&_egbbg (_fed [_ecea [0]],_fed [_ecea [1]]));if _fee {_bb .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_ccfca ),len (_ecea ),_efgfa );
};if !_efgfa {if _fee {_ed .Log .Error ("\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v",_fgedf );_bb .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_ccfca ),len (_ecea ),_efgfa );
};return &ruling {},false ;};if len (_ecea )==0{for _cbbg ,_adde :=range _ecbc {if _adde !=_ecbd {_ecea =append (_ecea ,_cbbg );};};};if len (_ccfca )==0{for _cdga ,_ddcb :=range _ecbc {if _ddcb !=_dcfc {_ccfca =append (_ccfca ,_cdga );};};};if _fee {_ed .Log .Info ("\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_ccfca ),len (_ecea ),len (_fed ),_ccfca ,_ecea ,_fed );
};var _feadg ,_abdaa ,_gggc ,_geea _cfa .Point ;if _fed [_ccfca [0]].Y > _fed [_ccfca [1]].Y {_gggc ,_geea =_fed [_ccfca [0]],_fed [_ccfca [1]];}else {_gggc ,_geea =_fed [_ccfca [1]],_fed [_ccfca [0]];};if _fed [_ecea [0]].X > _fed [_ecea [1]].X {_feadg ,_abdaa =_fed [_ecea [0]],_fed [_ecea [1]];
}else {_feadg ,_abdaa =_fed [_ecea [1]],_fed [_ecea [0]];};_adfc :=_cfb .PdfRectangle {Llx :_feadg .X ,Urx :_abdaa .X ,Lly :_geea .Y ,Ury :_gggc .Y };if _adfc .Llx > _adfc .Urx {_adfc .Llx ,_adfc .Urx =_adfc .Urx ,_adfc .Llx ;};if _adfc .Lly > _adfc .Ury {_adfc .Lly ,_adfc .Ury =_adfc .Ury ,_adfc .Lly ;
};_agcg :=rectRuling {PdfRectangle :_adfc ,_gfbc :_eabc (_adfc ),Color :_cbga };if _agcg ._gfbc ==_abbb {if _fee {_ed .Log .Error ("\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c");
};return nil ,false ;};_dcdg ,_dcge :=_agcg .asRuling ();if !_dcge {if _fee {_ed .Log .Error ("\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg");};return nil ,false ;};if _dgab {_bb .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_dcdg .String ());
};return _dcdg ,true ;};func (_ebggg *textTable )subdivide ()*textTable {_ebggg .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");_dcfbg :=_ebggg .compositeRowCorridors ();_dbafe :=_ebggg .compositeColCorridors ();if _ddbc {_ed .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_bdffcd (_dcfbg ),_bdffcd (_dbafe ));
};if len (_dcfbg )==0||len (_dbafe )==0{return _ebggg ;};_bddbb (_dcfbg );_bddbb (_dbafe );if _ddbc {_ed .Log .Info ("\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073",_bdffcd (_dcfbg ),_bdffcd (_dbafe ));
};_bbeb ,_gfcga :=_eeabb (_ebggg ._gggac ,_dcfbg );_gfgc ,_ddce :=_eeabb (_ebggg ._dedd ,_dbafe );_ddfba :=make (map[uint64 ]*textPara ,_ddce *_gfcga );_fddcf :=&textTable {PdfRectangle :_ebggg .PdfRectangle ,_agde :_ebggg ._agde ,_gggac :_gfcga ,_dedd :_ddce ,_dadef :_ddfba };
if _ddbc {_ed .Log .Info ("\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a"+"\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076",_ebggg ._dedd ,_ebggg ._gggac ,_ddce ,_gfcga ,_bdffcd (_dcfbg ),_bdffcd (_dbafe ),_bbeb ,_gfgc );
};for _accdg :=0;_accdg < _ebggg ._gggac ;_accdg ++{_ffecf :=_bbeb [_accdg ];for _ccdfc :=0;_ccdfc < _ebggg ._dedd ;_ccdfc ++{_gdcd :=_gfgc [_ccdfc ];if _ddbc {_bb .Printf ("\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a",_ccdfc ,_accdg ,_gdcd ,_ffecf );
};_dcfde ,_bcef :=_ebggg ._gacgd [_fedd (_ccdfc ,_accdg )];if !_bcef {continue ;};_dgcfg :=_dcfde .split (_dcfbg [_accdg ],_dbafe [_ccdfc ]);for _fecfa :=0;_fecfa < _dgcfg ._gggac ;_fecfa ++{for _egfad :=0;_egfad < _dgcfg ._dedd ;_egfad ++{_gfaa :=_dgcfg .get (_egfad ,_fecfa );
_fddcf .put (_gdcd +_egfad ,_ffecf +_fecfa ,_gfaa );if _ddbc {_bb .Printf ("\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_gdcd +_egfad ,_ffecf +_fecfa ,_gfaa );};};};};};return _fddcf ;};
// String returns a human readable description of `path`.
func (_ccaf *subpath )String ()string {_cdgd :=_ccaf ._edc ;_bcga :=len (_cdgd );if _bcga <=5{return _bb .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_bcga ,_cdgd );};return _bb .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_bcga ,_cdgd [0],_cdgd [1],_cdgd [_bcga -1]);
};func (_bgad *textObject )getFontDirect (_fafc string )(*_cfb .PdfFont ,error ){_bag ,_gfcf :=_bgad .getFontDict (_fafc );if _gfcf !=nil {return nil ,_gfcf ;};_adagf ,_gfcf :=_cfb .NewPdfFontFromPdfObject (_bag );if _gfcf !=nil {_ed .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fafc ,_gfcf );
};return _adagf ,_gfcf ;};func (_fagc rulingList )asTiling ()gridTiling {if _dgcg {_ed .Log .Info ("r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_fagc ));
};for _gadc ,_fecae :=range _fagc [1:]{_dbfef :=_fagc [_gadc ];if _dbfef .alignsPrimary (_fecae )&&_dbfef .alignsSec (_fecae ){_ed .Log .Error ("a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073",_fecae ,_dbfef );
};};_fagc .sortStrict ();_fagc .log ("\u0073n\u0061\u0070\u0070\u0065\u0064");_cbdb ,_ecdce :=_fagc .vertsHorzs ();_efeef :=_cbdb .primaries ();_ggafa :=_ecdce .primaries ();_cgdcg :=len (_efeef )-1;_fedb :=len (_ggafa )-1;if _cgdcg ==0||_fedb ==0{return gridTiling {};
};_bdfg :=_cfb .PdfRectangle {Llx :_efeef [0],Urx :_efeef [_cgdcg ],Lly :_ggafa [0],Ury :_ggafa [_fedb ]};if _dgcg {_ed .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064",len (_cbdb ));
for _bfaa ,_agbee :=range _cbdb {_bb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bfaa ,_agbee );};_ed .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064",len (_ecdce ));
for _cecde ,_fgaef :=range _ecdce {_bb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cecde ,_fgaef );};_ed .Log .Info ("\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f",_cgdcg ,_fedb ,_efeef ,_ggafa );
};_dbebb :=make ([]gridTile ,_cgdcg *_fedb );for _dbga :=_fedb -1;_dbga >=0;_dbga --{_aadc :=_ggafa [_dbga ];_bcdgb :=_ggafa [_dbga +1];for _ecabe :=0;_ecabe < _cgdcg ;_ecabe ++{_gaccdc :=_efeef [_ecabe ];_acfd :=_efeef [_ecabe +1];_ebacb :=_cbdb .findPrimSec (_gaccdc ,_aadc );
_ggef :=_cbdb .findPrimSec (_acfd ,_aadc );_dgdfb :=_ecdce .findPrimSec (_aadc ,_gaccdc );_gegcb :=_ecdce .findPrimSec (_bcdgb ,_gaccdc );_egaa :=_cfb .PdfRectangle {Llx :_gaccdc ,Urx :_acfd ,Lly :_aadc ,Ury :_bcdgb };_bgbgb :=_dfbad (_egaa ,_ebacb ,_ggef ,_dgdfb ,_gegcb );
_dbebb [_dbga *_cgdcg +_ecabe ]=_bgbgb ;if _dgcg {_bb .Printf ("\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_ecabe ,_dbga ,_bgbgb .String (),_bgbgb .Width (),_bgbgb .Height ());
};};};if _dgcg {_ed .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_bdfg );
};_ecegg :=make ([]map[float64 ]gridTile ,_fedb );for _cdefg :=_fedb -1;_cdefg >=0;_cdefg --{if _dgcg {_bb .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_cdefg );};_ecegg [_cdefg ]=make (map[float64 ]gridTile ,_cgdcg );for _geeb :=0;_geeb < _cgdcg ;
_geeb ++{_dcdea :=_dbebb [_cdefg *_cgdcg +_geeb ];if _dgcg {_bb .Printf ("\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_geeb ,_dcdea );};if !_dcdea ._afce {continue ;};_ebgc :=_geeb ;for _dcbcc :=_geeb +1;!_dcdea ._gfbbcf &&_dcbcc < _cgdcg ;
_dcbcc ++{_cccce :=_dbebb [_cdefg *_cgdcg +_dcbcc ];_dcdea .Urx =_cccce .Urx ;_dcdea ._cddec =_dcdea ._cddec ||_cccce ._cddec ;_dcdea ._ggaf =_dcdea ._ggaf ||_cccce ._ggaf ;_dcdea ._gfbbcf =_cccce ._gfbbcf ;if _dgcg {_bb .Printf ("\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a",_dcbcc ,_cccce ,_dcdea );
};_ebgc =_dcbcc ;};if _dgcg {_bb .Printf (" \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n",_geeb ,_ebgc ,_dcdea );};_geeb =_ebgc ;_ecegg [_cdefg ][_dcdea .Llx ]=_dcdea ;};};_cacb :=make (map[float64 ]map[float64 ]gridTile ,_fedb );
_edcb :=make (map[float64 ]map[float64 ]struct{},_fedb );for _efccd :=_fedb -1;_efccd >=0;_efccd --{_effc :=_dbebb [_efccd *_cgdcg ].Lly ;_cacb [_effc ]=make (map[float64 ]gridTile ,_cgdcg );_edcb [_effc ]=make (map[float64 ]struct{},_cgdcg );};if _dgcg {_ed .Log .Info ("\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_bdfg );
};for _cgcdd :=_fedb -1;_cgcdd >=0;_cgcdd --{_dgafd :=_dbebb [_cgcdd *_cgdcg ].Lly ;_efeeb :=_ecegg [_cgcdd ];if _dgcg {_bb .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_cgcdd );};for _ ,_cfade :=range _cged (_efeeb ){if _ ,_eaaa :=_edcb [_dgafd ][_cfade ];
_eaaa {continue ;};_gfcaa :=_efeeb [_cfade ];if _dgcg {_bb .Printf (" \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_gfcaa .String ());};for _ebgbg :=_cgcdd -1;_ebgbg >=0;_ebgbg --{if _gfcaa ._ggaf {break ;};_bdgb :=_ecegg [_ebgbg ];_cfgada ,_faddd :=_bdgb [_cfade ];
if !_faddd {break ;};if _cfgada .Urx !=_gfcaa .Urx {break ;};_gfcaa ._ggaf =_cfgada ._ggaf ;_gfcaa .Lly =_cfgada .Lly ;if _dgcg {_bb .Printf ("\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_cfgada .String (),_gfcaa .String ());
};_edcb [_cfgada .Lly ][_cfgada .Llx ]=struct{}{};};if _cgcdd ==0{_gfcaa ._ggaf =true ;};if _gfcaa .complete (){_cacb [_dgafd ][_cfade ]=_gfcaa ;};};};_ddga :=gridTiling {PdfRectangle :_bdfg ,_cgbc :_bfae (_cacb ),_feca :_aaedg (_cacb ),_cabf :_cacb };
_ddga .log ("\u0043r\u0065\u0061\u0074\u0065\u0064");return _ddga ;};func (_efcf *textTable )markCells (){for _ceaa :=0;_ceaa < _efcf ._gggac ;_ceaa ++{for _adbf :=0;_adbf < _efcf ._dedd ;_adbf ++{_ebge :=_efcf .get (_adbf ,_ceaa );if _ebge !=nil {_ebge ._cacc =true ;
};};};};func (_bcdc *wordBag )depthRange (_cfgg ,_geb int )[]int {var _cfag []int ;for _dfbf :=range _bcdc ._agad {if _cfgg <=_dfbf &&_dfbf <=_geb {_cfag =append (_cfag ,_dfbf );};};if len (_cfag )==0{return nil ;};_gf .Ints (_cfag );return _cfag ;};func (_eebdb *ruling )alignsSec (_eaac *ruling )bool {const _eaabg =_dffa +1.0;
return _eebdb ._abcc -_eaabg <=_eaac ._dfad &&_eaac ._abcc -_eaabg <=_eebdb ._dfad ;};func _dab (_gbeb []*textLine )[]*textLine {_dgdc :=[]*textLine {};for _ ,_cbgb :=range _gbeb {_aafe :=_cbgb .text ();_badd :=_afbg .Find ([]byte (_aafe ));if _badd !=nil {_dgdc =append (_dgdc ,_cbgb );
};};return _dgdc ;};type subpath struct{_edc []_cfa .Point ;_gdee bool ;};func (_abdac compositeCell )String ()string {_fdaf :="";if len (_abdac .paraList )> 0{_fdaf =_adcgc (_abdac .paraList .merge ().text (),50);};return _bb .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_abdac .PdfRectangle ,len (_abdac .paraList ),_fdaf );
};func (_fcgb *textPara )toTextMarks (_abdad *int )[]TextMark {if _fcgb ._cgabf ==nil {return _fcgb .toCellTextMarks (_abdad );};var _eedf []TextMark ;for _edgc :=0;_edgc < _fcgb ._cgabf ._gggac ;_edgc ++{for _ggaa :=0;_ggaa < _fcgb ._cgabf ._dedd ;_ggaa ++{_dacc :=_fcgb ._cgabf .get (_ggaa ,_edgc );
if _dacc ==nil {_eedf =_dfbba (_eedf ,_abdad ,"\u0009");}else {_dcdc :=_dacc .toCellTextMarks (_abdad );_eedf =append (_eedf ,_dcdc ...);};_eedf =_dfbba (_eedf ,_abdad ,"\u0020");};if _edgc < _fcgb ._cgabf ._gggac -1{_eedf =_dfbba (_eedf ,_abdad ,"\u000a");
};};return _eedf ;};func (_effbf *textTable )get (_bcea ,_bbdaa int )*textPara {return _effbf ._dadef [_fedd (_bcea ,_bbdaa )]};func (_caea *wordBag )firstWord (_acbd int )*textWord {return _caea ._agad [_acbd ][0]};func _cgddg (_cdedc []*textMark ,_bagag _cfb .PdfRectangle )*textWord {_afed :=_cdedc [0].PdfRectangle ;
_dgfe :=_cdedc [0]._gegdd ;for _ ,_fecaa :=range _cdedc [1:]{_afed =_bfad (_afed ,_fecaa .PdfRectangle );if _fecaa ._gegdd > _dgfe {_dgfe =_fecaa ._gegdd ;};};return &textWord {PdfRectangle :_afed ,_dece :_cdedc ,_cefef :_bagag .Ury -_afed .Lly ,_feabg :_dgfe };
};type lists []*list ;func (_ebeed *textWord )appendMark (_beccfg *textMark ,_dbfa _cfb .PdfRectangle ){_ebeed ._dece =append (_ebeed ._dece ,_beccfg );_ebeed .PdfRectangle =_bfad (_ebeed .PdfRectangle ,_beccfg .PdfRectangle );if _beccfg ._gegdd > _ebeed ._feabg {_ebeed ._feabg =_beccfg ._gegdd ;
};_ebeed ._cefef =_dbfa .Ury -_ebeed .PdfRectangle .Lly ;};type textLine struct{_cfb .PdfRectangle ;_feff float64 ;_eded []*textWord ;_daag float64 ;};type structTreeRoot struct{_acbec []structElement ;_efdb string ;};func _aaedg (_afabd map[float64 ]map[float64 ]gridTile )[]float64 {_gfadb :=make ([]float64 ,0,len (_afabd ));
for _bccg :=range _afabd {_gfadb =append (_gfadb ,_bccg );};_gf .Float64s (_gfadb );_ffed :=len (_gfadb );for _gabe :=0;_gabe < _ffed /2;_gabe ++{_gfadb [_gabe ],_gfadb [_ffed -1-_gabe ]=_gfadb [_ffed -1-_gabe ],_gfadb [_gabe ];};return _gfadb ;};func (_dbfd *shapesState )newSubPath (){_dbfd .clearPath ();
if _bfcbef {_ed .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_dbfd );};};func (_cab *stateStack )push (_dcbg *textState ){_bbb :=*_dcbg ;*_cab =append (*_cab ,&_bbb )};func (_cbcag *textPara )depth ()float64 {if _cbcag ._cea {return -1.0;
};if len (_cbcag ._ecfaf )> 0{return _cbcag ._ecfaf [0]._feff ;};return _cbcag ._cgabf .depth ();};type shapesState struct{_fefd _cfa .Matrix ;_bbgfd _cfa .Matrix ;_ccac []*subpath ;_fab bool ;_ggb _cfa .Point ;_abac *textObject ;};func (_bdcf *textObject )setTextRenderMode (_aag int ){if _bdcf ==nil {return ;
};_bdcf ._fgfa ._cbbc =RenderMode (_aag );};func _ddedg (_fbgc ,_cefac ,_eeccd float64 )rulingKind {if _fbgc >=_eeccd &&_geag (_cefac ,_fbgc ){return _ecbd ;};if _cefac >=_eeccd &&_geag (_fbgc ,_cefac ){return _dcfc ;};return _abbb ;};func (_cgedd *textTable )growTable (){_cdcb :=func (_edcd paraList ){_cgedd ._gggac ++;
for _cgddf :=0;_cgddf < _cgedd ._dedd ;_cgddf ++{_bdfge :=_edcd [_cgddf ];_cgedd .put (_cgddf ,_cgedd ._gggac -1,_bdfge );};};_deeae :=func (_ceee paraList ){_cgedd ._dedd ++;for _cegd :=0;_cegd < _cgedd ._gggac ;_cegd ++{_dfbe :=_ceee [_cegd ];_cgedd .put (_cgedd ._dedd -1,_cegd ,_dfbe );
};};if _dbfdf {_cgedd .log ("\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce");};for _bbbed :=0;;_bbbed ++{_ecba :=false ;_ggde :=_cgedd .getDown ();_ebee :=_cgedd .getRight ();if _dbfdf {_bb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bbbed ,_cgedd );
_bb .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a",_ggde );_bb .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a",_ebee );};if _ggde !=nil &&_ebee !=nil {_cedb :=_ggde [len (_ggde )-1];
if !_cedb .taken ()&&_cedb ==_ebee [len (_ebee )-1]{_cdcb (_ggde );if _ebee =_cgedd .getRight ();_ebee !=nil {_deeae (_ebee );_cgedd .put (_cgedd ._dedd -1,_cgedd ._gggac -1,_cedb );};_ecba =true ;};};if !_ecba &&_ggde !=nil {_cdcb (_ggde );_ecba =true ;
};if !_ecba &&_ebee !=nil {_deeae (_ebee );_ecba =true ;};if !_ecba {break ;};};};func (_bacca paraList )xNeighbours (_eabf float64 )map[*textPara ][]int {_ebebf :=make ([]event ,2*len (_bacca ));if _eabf ==0{for _gdfbe ,_deffe :=range _bacca {_ebebf [2*_gdfbe ]=event {_deffe .Llx ,true ,_gdfbe };
_ebebf [2*_gdfbe +1]=event {_deffe .Urx ,false ,_gdfbe };};}else {for _dggaa ,_deec :=range _bacca {_ebebf [2*_dggaa ]=event {_deec .Llx -_eabf *_deec .fontsize (),true ,_dggaa };_ebebf [2*_dggaa +1]=event {_deec .Urx +_eabf *_deec .fontsize (),false ,_dggaa };
};};return _bacca .eventNeighbours (_ebebf );};
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
type ImageMark struct{Image *_cfb .Image ;
// Dimensions of the image as displayed in the PDF.
Width float64 ;Height float64 ;
// Position of the image in PDF coordinates (lower left corner).
X float64 ;Y float64 ;
// Angle in degrees, if rotated.
Angle float64 ;};
// String returns a description of `p`.
func (_ggag *textPara )String ()string {if _ggag ._cea {return _bb .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_ggag .PdfRectangle );};_fdfc :="";if _ggag ._cgabf !=nil {_fdfc =_bb .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_ggag ._cgabf ._dedd ,_ggag ._cgabf ._gggac );
};return _bb .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_ggag .PdfRectangle ,_fdfc ,len (_ggag ._ecfaf ),_adcgc (_ggag .text (),50));};func _cddgd (_fegb _cfb .PdfRectangle )*ruling {return &ruling {_efdg :_dcfc ,_gged :_fegb .Urx ,_abcc :_fegb .Lly ,_dfad :_fegb .Ury };
};
// ExtractFonts returns all font information from the page extractor, including
// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more.
//
// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document.
// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries.
//
// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range.
func (_cc *Extractor )ExtractFonts (previousPageFonts *PageFonts )(*PageFonts ,error ){_gda :=PageFonts {};_de :=_gda .extractPageResourcesToFont (_cc ._cd );if _de !=nil {return nil ,_de ;};if previousPageFonts !=nil {for _ ,_eg :=range previousPageFonts .Fonts {if !_eae (_gda .Fonts ,_eg .FontName ){_gda .Fonts =append (_gda .Fonts ,_eg );
};};};return &PageFonts {Fonts :_gda .Fonts },nil ;};func (_aecd *textTable )toTextTable ()TextTable {if _ddbc {_ed .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_aecd ._dedd ,_aecd ._gggac );
};_cdgac :=make ([][]TableCell ,_aecd ._gggac );for _ddeag :=0;_ddeag < _aecd ._gggac ;_ddeag ++{_cdgac [_ddeag ]=make ([]TableCell ,_aecd ._dedd );for _fgdb :=0;_fgdb < _aecd ._dedd ;_fgdb ++{_bcgea :=_aecd .get (_fgdb ,_ddeag );if _bcgea ==nil {continue ;
};if _ddbc {_bb .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_fgdb ,_ddeag ,_bcgea );};_cdgac [_ddeag ][_fgdb ].Text =_bcgea .text ();_beccf :=0;_cdgac [_ddeag ][_fgdb ].Marks ._fdda =_bcgea .toTextMarks (&_beccf );};};
return TextTable {W :_aecd ._dedd ,H :_aecd ._gggac ,Cells :_cdgac };};func (_ceac lineRuling )asRuling ()(*ruling ,bool ){_eafg :=ruling {_efdg :_ceac ._fbcc ,Color :_ceac .Color ,_edebb :_eadd };switch _ceac ._fbcc {case _dcfc :_eafg ._gged =_ceac .xMean ();
_eafg ._abcc =_e .Min (_ceac ._egad .Y ,_ceac ._bggga .Y );_eafg ._dfad =_e .Max (_ceac ._egad .Y ,_ceac ._bggga .Y );case _ecbd :_eafg ._gged =_ceac .yMean ();_eafg ._abcc =_e .Min (_ceac ._egad .X ,_ceac ._bggga .X );_eafg ._dfad =_e .Max (_ceac ._egad .X ,_ceac ._bggga .X );
default:_ed .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_ceac ._fbcc );return nil ,false ;};return &_eafg ,true ;};func _abgg (_efad []pathSection )rulingList {_fgbea (_efad );
if _dgab {_ed .Log .Info ("\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs",len (_efad ));};var _cfgfcf rulingList ;for _ ,_cfcefe :=range _efad {for _ ,_gafdb :=range _cfcefe ._cafa {if !_gafdb .isQuadrilateral (){if _dgab {_ed .Log .Error ("!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073",_gafdb );
};continue ;};if _cbfdg ,_bbce :=_gafdb .makeRectRuling (_cfcefe .Color );_bbce {_cfgfcf =append (_cfgfcf ,_cbfdg );}else {if _fee {_ed .Log .Error ("\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073",_gafdb );
};};};};if _dgab {_ed .Log .Info ("\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073",_cfgfcf .String ());};return _cfgfcf ;};type pathSection struct{_cafa []*subpath ;_a .Color ;};type rectRuling struct{_gfbc rulingKind ;
_daaf markKind ;_a .Color ;_cfb .PdfRectangle ;};func (_bfce *structElement )parseStructElement (_bbcb _feb .PdfObject ){_gaac ,_effa :=_feb .GetDict (_bbcb );if !_effa {_ed .Log .Debug ("\u0070\u0061\u0072\u0073\u0065\u0053\u0074\u0072u\u0063\u0074\u0045le\u006d\u0065\u006e\u0074\u003a\u0020d\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006f\u0062\u006a\u0065\u0063t\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075n\u0064\u002e");
return ;};_bffed :=_gaac .Get ("\u0053");_fbbg :=_gaac .Get ("\u0050\u0067");_fdbf :="";if _bffed !=nil {_fdbf =_bffed .String ();};_fafd :=_gaac .Get ("\u004b");_bfce ._aggf =_fdbf ;_bfce ._cccb =_fbbg ;switch _gccb :=_fafd .(type ){case *_feb .PdfObjectInteger :_bfce ._aggf =_fdbf ;
_bfce ._cfbf =int64 (*_gccb );_bfce ._cccb =_fbbg ;case *_feb .PdfObjectReference :_gbdgc :=*_feb .MakeArray (_gccb );var _ffbe int64 =-1;_bfce ._cfbf =_ffbe ;if _gbdgc .Len ()==1{_eedb :=_gbdgc .Elements ()[0];_ddddb ,_gdfe :=_eedb .(*_feb .PdfObjectInteger );
if _gdfe {_ffbe =int64 (*_ddddb );_bfce ._cfbf =_ffbe ;_bfce ._aggf =_fdbf ;_bfce ._cccb =_fbbg ;return ;};};_bdbb :=[]structElement {};for _ ,_dbfe :=range _gbdgc .Elements (){_bged ,_bba :=_dbfe .(*_feb .PdfObjectInteger );if _bba {_ffbe =int64 (*_bged );
_bfce ._cfbf =_ffbe ;_bfce ._aggf =_fdbf ;}else {_gdde :=&structElement {};_gdde .parseStructElement (_dbfe );_bdbb =append (_bdbb ,*_gdde );};_ffbe =-1;};_bfce ._adae =_bdbb ;case *_feb .PdfObjectArray :_dfeb :=_fafd .(*_feb .PdfObjectArray );var _debc int64 =-1;
_bfce ._cfbf =_debc ;if _dfeb .Len ()==1{_fabf :=_dfeb .Elements ()[0];_aefe ,_cefbc :=_fabf .(*_feb .PdfObjectInteger );if _cefbc {_debc =int64 (*_aefe );_bfce ._cfbf =_debc ;_bfce ._aggf =_fdbf ;_bfce ._cccb =_fbbg ;return ;};};_afeg :=[]structElement {};
for _ ,_aabe :=range _dfeb .Elements (){_eaf ,_cegg :=_aabe .(*_feb .PdfObjectInteger );if _cegg {_debc =int64 (*_eaf );_bfce ._cfbf =_debc ;_bfce ._aggf =_fdbf ;_bfce ._cccb =_fbbg ;}else {_gbeg :=&structElement {};_gbeg .parseStructElement (_aabe );_afeg =append (_afeg ,*_gbeg );
};_debc =-1;};_bfce ._adae =_afeg ;};};func (_acf *textObject )showText (_bgge _feb .PdfObject ,_baeg []byte ,_aab int )error {return _acf .renderText (_bgge ,_baeg ,_aab );};func _fccg (_gaeade ,_gafaa _cfa .Point )rulingKind {_afef :=_e .Abs (_gaeade .X -_gafaa .X );
_abgc :=_e .Abs (_gaeade .Y -_gafaa .Y );return _ddedg (_afef ,_abgc ,_edd );};func (_bafb *PageText )getParagraphs ()paraList {var _aga rulingList ;if _fgbf {_ebgd :=_gcbde (_bafb ._bce );_aga =append (_aga ,_ebgd ...);};if _dfgd {_ebe :=_abgg (_bafb ._caac );
_aga =append (_aga ,_ebe ...);};_aga ,_dbca :=_aga .toTilings ();var _cddc paraList ;_ccfc :=len (_bafb ._bebf );for _gedc :=0;_gedc < 360&&_ccfc > 0;_gedc +=90{_fgd :=make ([]*textMark ,0,len (_bafb ._bebf )-_ccfc );for _ ,_dda :=range _bafb ._bebf {if _dda ._dgfd ==_gedc {_fgd =append (_fgd ,_dda );
};};if len (_fgd )> 0{_bbbb :=_egdb (_fgd ,_bafb ._bfcb ,_aga ,_dbca );_cddc =append (_cddc ,_bbbb ...);_ccfc -=len (_fgd );};};return _cddc ;};func _eeabb (_fgegg int ,_dcaa map[int ][]float64 )([]int ,int ){_fdgba :=make ([]int ,_fgegg );_dgfcc :=0;for _bdgg :=0;
_bdgg < _fgegg ;_bdgg ++{_fdgba [_bdgg ]=_dgfcc ;_dgfcc +=len (_dcaa [_bdgg ])+1;};return _fdgba ,_dgfcc ;};func (_bgdca *ruling )encloses (_eaga ,_acca float64 )bool {return _bgdca ._abcc -_dbgbb <=_eaga &&_acca <=_bgdca ._dfad +_dbgbb ;};func _adcgc (_gcaaa string ,_eecdd int )string {if len (_gcaaa )< _eecdd {return _gcaaa ;
};return _gcaaa [:_eecdd ];};type intSet map[int ]struct{};func (_acad rulingList )isActualGrid ()(rulingList ,bool ){_ggagf ,_abeg :=_acad .augmentGrid ();if !(len (_ggagf )>=_gabg +1&&len (_abeg )>=_edca +1){if _dgab {_ed .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_ggagf ),len (_abeg ),_gabg +1,_edca +1);
};return nil ,false ;};if _dgab {_ed .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_acad ,len (_ggagf )>=2,len (_abeg )>=2,len (_ggagf )>=2&&len (_abeg )>=2);
for _bfege ,_cdadb :=range _acad {_bb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_bfege ,_cdadb );};};if _ecge {_bcgd ,_egade :=_ggagf [0],_ggagf [len (_ggagf )-1];_cgcf ,_cbgbd :=_abeg [0],_abeg [len (_abeg )-1];if !(_cfdda (_bcgd ._gged -_cgcf ._abcc )&&_cfdda (_egade ._gged -_cgcf ._dfad )&&_cfdda (_cgcf ._gged -_bcgd ._dfad )&&_cfdda (_cbgbd ._gged -_bcgd ._abcc )){if _dgab {_ed .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_bcgd ,_egade ,_cgcf ,_cbgbd );
};return nil ,false ;};}else {if !_ggagf .aligned (){if _dgbbc {_ed .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_ggagf ));
};return nil ,false ;};if !_abeg .aligned (){if _dgab {_ed .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_abeg ));
};return nil ,false ;};};_adbe :=append (_ggagf ,_abeg ...);return _adbe ,true ;};func (_acfda *textTable )reduce ()*textTable {_ddab :=make ([]int ,0,_acfda ._gggac );_ccgg :=make ([]int ,0,_acfda ._dedd );for _fbfg :=0;_fbfg < _acfda ._gggac ;_fbfg ++{if !_acfda .emptyCompositeRow (_fbfg ){_ddab =append (_ddab ,_fbfg );
};};for _edebg :=0;_edebg < _acfda ._dedd ;_edebg ++{if !_acfda .emptyCompositeColumn (_edebg ){_ccgg =append (_ccgg ,_edebg );};};if len (_ddab )==_acfda ._gggac &&len (_ccgg )==_acfda ._dedd {return _acfda ;};_ebebe :=textTable {_agde :_acfda ._agde ,_dedd :len (_ccgg ),_gggac :len (_ddab ),_dadef :make (map[uint64 ]*textPara ,len (_ccgg )*len (_ddab ))};
if _ddbc {_ed .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_acfda ._dedd ,_acfda ._gggac ,len (_ccgg ),len (_ddab ));_ed .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_ccgg );
_ed .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_ddab );};for _efff ,_fcbac :=range _ddab {for _fgcde ,_abccg :=range _ccgg {_dacf ,_ecdcb :=_acfda .getComposite (_abccg ,_fcbac );if _dacf ==nil {continue ;
};if _ddbc {_bb .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_fgcde ,_efff ,_abccg ,_fcbac ,_adcgc (_dacf .merge ().text (),50));};_ebebe .putComposite (_fgcde ,_efff ,_dacf ,_ecdcb );
};};return &_ebebe ;};
// String returns a description of `k`.
func (_accf rulingKind )String ()string {_ffcfc ,_bacge :=_ccbc [_accf ];if !_bacge {return _bb .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_accf );};return _ffcfc ;};func (_dbcd gridTile )contains (_daege _cfb .PdfRectangle )bool {if _dbcd .numBorders ()< 3{return false ;
};if _dbcd ._afce &&_daege .Llx < _dbcd .Llx -_dafb {return false ;};if _dbcd ._gfbbcf &&_daege .Urx > _dbcd .Urx +_dafb {return false ;};if _dbcd ._ggaf &&_daege .Lly < _dbcd .Lly -_dafb {return false ;};if _dbcd ._cddec &&_daege .Ury > _dbcd .Ury +_dafb {return false ;
};return true ;};func (_gdcaf paraList )computeEBBoxes (){if _cbeab {_ed .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a");};for _ ,_accb :=range _gdcaf {_accb ._bbgce =_accb .PdfRectangle ;};_cebg :=_gdcaf .yNeighbours (0);
for _gfbd ,_geccf :=range _gdcaf {_fgbe :=_geccf ._bbgce ;_bedcf ,_ddbd :=-1.0e9,+1.0e9;for _ ,_cgc :=range _cebg [_geccf ]{_cdbe :=_gdcaf [_cgc ]._bbgce ;if _cdbe .Urx < _fgbe .Llx {_bedcf =_e .Max (_bedcf ,_cdbe .Urx );}else if _fgbe .Urx < _cdbe .Llx {_ddbd =_e .Min (_ddbd ,_cdbe .Llx );
};};for _egegd ,_cebgf :=range _gdcaf {_fcdd :=_cebgf ._bbgce ;if _gfbd ==_egegd ||_fcdd .Ury > _fgbe .Lly {continue ;};if _bedcf <=_fcdd .Llx &&_fcdd .Llx < _fgbe .Llx {_fgbe .Llx =_fcdd .Llx ;}else if _fcdd .Urx <=_ddbd &&_fgbe .Urx < _fcdd .Urx {_fgbe .Urx =_fcdd .Urx ;
};};if _cbeab {_bb .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_gfbd ,_geccf ._bbgce ,_fgbe ,_adcgc (_geccf .text (),50));};_geccf ._bbgce =_fgbe ;};if _bca {for _ ,_gageg :=range _gdcaf {_gageg .PdfRectangle =_gageg ._bbgce ;
};};};func (_ccec *textTable )emptyCompositeColumn (_caecg int )bool {for _efef :=0;_efef < _ccec ._gggac ;_efef ++{if _egbd ,_bggee :=_ccec ._gacgd [_fedd (_caecg ,_efef )];_bggee {if len (_egbd .paraList )> 0{return false ;};};};return true ;};func _bdaf (_fgeg ,_daef float64 )bool {return _e .Abs (_fgeg -_daef )<=_dbgbb };
func _bfcgf (_bgbb ,_beff _cfb .PdfRectangle )bool {return _bgbb .Llx <=_beff .Llx &&_beff .Urx <=_bgbb .Urx &&_bgbb .Lly <=_beff .Lly &&_beff .Ury <=_bgbb .Ury ;};func _acbc (_degea []*textWord ,_fdcdf int )[]*textWord {_febg :=len (_degea );copy (_degea [_fdcdf :],_degea [_fdcdf +1:]);
return _degea [:_febg -1];};func (_bcff *subpath )last ()_cfa .Point {return _bcff ._edc [len (_bcff ._edc )-1]};func (_fgefe *textWord )computeText ()string {_ebad :=make ([]string ,len (_fgefe ._dece ));for _beddf ,_gfgad :=range _fgefe ._dece {_ebad [_beddf ]=_gfgad ._gbag ;
};return _bd .Join (_ebad ,"");};func _ebdff (_fcbb []*textLine ,_beacc map[float64 ][]*textLine ,_eggd []float64 ,_ccgb int ,_cfcef ,_edfbg float64 )[]*list {_bcce :=[]*list {};_cgge :=_ccgb ;_ccgb =_ccgb +1;_fdbd :=_eggd [_cgge ];_affg :=_beacc [_fdbd ];
_fbcd :=_ceed (_affg ,_edfbg ,_cfcef );for _acdb ,_bfed :=range _fbcd {var _bdag float64 ;_fagb :=[]*list {};_bdcff :=_bfed ._feff ;_acbeca :=_edfbg ;if _acdb < len (_fbcd )-1{_acbeca =_fbcd [_acdb +1]._feff ;};if _ccgb < len (_eggd ){_fagb =_ebdff (_fcbb ,_beacc ,_eggd ,_ccgb ,_bdcff ,_acbeca );
};_bdag =_acbeca ;if len (_fagb )> 0{_eadc :=_fagb [0];if len (_eadc ._bcbgc )> 0{_bdag =_eadc ._bcbgc [0]._feff ;};};_abdf :=[]*textLine {_bfed };_caad :=_fcfb (_bfed ,_fcbb ,_eggd ,_bdcff ,_bdag );_abdf =append (_abdf ,_caad ...);_bdffc :=_gagf (_abdf ,"\u0062\u0075\u006c\u006c\u0065\u0074",_fagb );
_bdffc ._cgee =_gbeca (_abdf ,"");_bcce =append (_bcce ,_bdffc );};return _bcce ;};func (_cegf rulingList )bbox ()_cfb .PdfRectangle {var _cbaa _cfb .PdfRectangle ;if len (_cegf )==0{_ed .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073");
return _cfb .PdfRectangle {};};if _cegf [0]._efdg ==_ecbd {_cbaa .Llx ,_cbaa .Urx =_cegf .secMinMax ();_cbaa .Lly ,_cbaa .Ury =_cegf .primMinMax ();}else {_cbaa .Llx ,_cbaa .Urx =_cegf .primMinMax ();_cbaa .Lly ,_cbaa .Ury =_cegf .secMinMax ();};return _cbaa ;
};type textWord struct{_cfb .PdfRectangle ;_cefef float64 ;_ecgg string ;_dece []*textMark ;_feabg float64 ;_ggge bool ;};func (_fecf *textLine )bbox ()_cfb .PdfRectangle {return _fecf .PdfRectangle };type rulingList []*ruling ;func (_abf *imageExtractContext )extractFormImages (_ebb *_feb .PdfObjectName ,_cbb _bc .GraphicsState ,_abd *_cfb .PdfPageResources )error {_efd ,_ffc :=_abd .GetXObjectFormByName (*_ebb );
if _ffc !=nil {return _ffc ;};if _efd ==nil {return nil ;};_gdc ,_ffc :=_efd .GetContentStream ();if _ffc !=nil {return _ffc ;};_dce :=_efd .Resources ;if _dce ==nil {_dce =_abd ;};_ffc =_abf .extractContentStreamImages (string (_gdc ),_dce );if _ffc !=nil {return _ffc ;
};_abf ._aaf ++;return nil ;};func (_edfeg rectRuling )asRuling ()(*ruling ,bool ){_aeae :=ruling {_efdg :_edfeg ._gfbc ,Color :_edfeg .Color ,_edebb :_efcd };switch _edfeg ._gfbc {case _dcfc :_aeae ._gged =0.5*(_edfeg .Llx +_edfeg .Urx );_aeae ._abcc =_edfeg .Lly ;
_aeae ._dfad =_edfeg .Ury ;_cfgb ,_dcfe :=_edfeg .checkWidth (_edfeg .Llx ,_edfeg .Urx );if !_dcfe {if _fee {_ed .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_edfeg );
};return nil ,false ;};_aeae ._bbgd =_cfgb ;case _ecbd :_aeae ._gged =0.5*(_edfeg .Lly +_edfeg .Ury );_aeae ._abcc =_edfeg .Llx ;_aeae ._dfad =_edfeg .Urx ;_dffd ,_baed :=_edfeg .checkWidth (_edfeg .Lly ,_edfeg .Ury );if !_baed {if _fee {_ed .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_edfeg );
};return nil ,false ;};_aeae ._bbgd =_dffd ;default:_ed .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_edfeg ._gfbc );return nil ,false ;};return &_aeae ,true ;};func (_bcbb paraList )applyTables (_ccbe []*textTable )paraList {var _aafeg paraList ;
for _ ,_bfgf :=range _ccbe {_aafeg =append (_aafeg ,_bfgf .newTablePara ());};for _ ,_deca :=range _bcbb {if _deca ._cacc {continue ;};_aafeg =append (_aafeg ,_deca );};return _aafeg ;};
// Options extractor options.
type Options struct{
// DisableDocumentTags specifies whether to use the document tags during list extraction.
DisableDocumentTags bool ;
// ApplyCropBox will extract page text based on page cropbox if set to `true`.
ApplyCropBox bool ;};func _abecb (_aceef *textLine ,_bgea []*textLine ,_gcga []float64 )float64 {var _dgde float64 =-1;for _ ,_gfbe :=range _bgea {if _gfbe ._feff > _aceef ._feff {if _e .Round (_gfbe .Llx )>=_e .Round (_aceef .Llx ){_dgde =_gfbe ._feff ;
}else {break ;};};};return _dgde ;};func (_egfa gridTile )numBorders ()int {_bcfe :=0;if _egfa ._afce {_bcfe ++;};if _egfa ._gfbbcf {_bcfe ++;};if _egfa ._ggaf {_bcfe ++;};if _egfa ._cddec {_bcfe ++;};return _bcfe ;};func (_gfde *textObject )moveLP (_gdbg ,_gefg float64 ){_gfde ._gcgb .Concat (_cfa .NewMatrix (1,0,0,1,_gdbg ,_gefg ));
_gfde ._bbde =_gfde ._gcgb ;};func (_agaee rulingList )findPrimSec (_fdgbg ,_bbag float64 )*ruling {for _ ,_gdgg :=range _agaee {if _cbafb (_gdgg ._gged -_fdgbg )&&_gdgg ._abcc -_dbgbb <=_bbag &&_bbag <=_gdgg ._dfad +_dbgbb {return _gdgg ;};};return nil ;
};func _fedd (_ggbc ,_eegbd int )uint64 {return uint64 (_ggbc )*0x1000000+uint64 (_eegbd )};func (_cbfd paraList )list ()[]*list {var _fabfd []*textLine ;var _fgfg []*textLine ;for _ ,_eagb :=range _cbfd {_cgab :=_eagb .getListLines ();_fabfd =append (_fabfd ,_cgab ...);
_fgfg =append (_fgfg ,_eagb ._ecfaf ...);};_edgb :=_dgdd (_fabfd );_dggfa :=_baaf (_fgfg ,_edgb );return _dggfa ;};var _dddd string ="\u0028\u003f\u0069\u0029\u005e\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028?\u003a\u0044\u007cM\u0029\u007c\u0044\u003f\u0043{\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028\u003f\u003a\u004c\u007c\u0043\u0029\u007cL\u003f\u0058\u007b\u0030\u002c\u0033}\u0029\u0028\u0049\u0028\u003f\u003a\u0056\u007c\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u005c\u0029\u007c\u005c\u002e\u0029\u007c\u005e\u005c\u0028\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028\u003f\u003aD\u007cM\u0029\u007c\u0044\u003f\u0043\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028?\u003a\u004c\u007c\u0043\u0029\u007c\u004c?\u0058\u007b0\u002c\u0033\u007d\u0029(\u0049\u0028\u003f\u003a\u0056|\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u005c\u0029";
func (_fgae *wordBag )scanBand (_beae string ,_fcb *wordBag ,_ebgb func (_cebb *wordBag ,_deac *textWord )bool ,_dgac ,_fgge ,_faed float64 ,_aagfc ,_afag bool )int {_egee :=_fcb ._agg ;var _ccg map[int ]map[*textWord ]struct{};if !_aagfc {_ccg =_fgae .makeRemovals ();
};_gca :=_ffbb *_egee ;_dbgb :=0;for _ ,_gaff :=range _fgae .depthBand (_dgac -_gca ,_fgge +_gca ){if len (_fgae ._agad [_gaff ])==0{continue ;};for _ ,_feae :=range _fgae ._agad [_gaff ]{if !(_dgac -_gca <=_feae ._cefef &&_feae ._cefef <=_fgge +_gca ){continue ;
};if !_ebgb (_fcb ,_feae ){continue ;};_cbea :=2.0*_e .Abs (_feae ._feabg -_fcb ._agg )/(_feae ._feabg +_fcb ._agg );_eegb :=_e .Max (_feae ._feabg /_fcb ._agg ,_fcb ._agg /_feae ._feabg );_gcc :=_e .Min (_cbea ,_eegb );if _faed > 0&&_gcc > _faed {continue ;
};if _fcb .blocked (_feae ){continue ;};if !_aagfc {_fcb .pullWord (_feae ,_gaff ,_ccg );};_dbgb ++;if !_afag {if _feae ._cefef < _dgac {_dgac =_feae ._cefef ;};if _feae ._cefef > _fgge {_fgge =_feae ._cefef ;};};if _aagfc {break ;};};};if !_aagfc {_fgae .applyRemovals (_ccg );
};return _dbgb ;};func (_gdf *shapesState )lineTo (_bfec ,_bebbf float64 ){if _bfcbef {_ed .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_bfec ,_bebbf ,_gdf .devicePoint (_bfec ,_bebbf ));
};_gdf .addPoint (_bfec ,_bebbf );};type stateStack []*textState ;func _gafad (_gbab *textLine )float64 {return _gbab ._eded [0].Llx };func (_egde *textObject )showTextAdjusted (_caf *_feb .PdfObjectArray ,_ecb int )error {_dgg :=false ;for _ ,_gaeef :=range _caf .Elements (){switch _gaeef .(type ){case *_feb .PdfObjectFloat ,*_feb .PdfObjectInteger :_dfe ,_dad :=_feb .GetNumberAsFloat (_gaeef );
if _dad !=nil {_ed .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_gaeef ,_caf );
return _dad ;};_abge ,_cdfa :=-_dfe *0.001*_egde ._fgfa ._ggc ,0.0;if _dgg {_cdfa ,_abge =_abge ,_cdfa ;};_fcc :=_fde (_cfa .Point {X :_abge ,Y :_cdfa });_egde ._bbde .Concat (_fcc );case *_feb .PdfObjectString :_fbae :=_feb .TraceToDirectObject (_gaeef );
_cffe ,_ae :=_feb .GetStringBytes (_fbae );if !_ae {_ed .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_gaeef ,_caf );
return _feb .ErrTypeError ;};_egde .renderText (_fbae ,_cffe ,_ecb );default:_ed .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_gaeef ,_caf );
return _feb .ErrTypeError ;};};return nil ;};var _dbgad =_ga .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024");type textPara struct{_cfb .PdfRectangle ;
_bbgce _cfb .PdfRectangle ;_ecfaf []*textLine ;_cgabf *textTable ;_cacc bool ;_cea bool ;_eccbf *textPara ;_bdbeb *textPara ;_adgg *textPara ;_ggfa *textPara ;_agffg []list ;};func _gaea (_cdgf float64 )int {var _aef int ;if _cdgf >=0{_aef =int (_cdgf /_eacg );
}else {_aef =int (_cdgf /_eacg )-1;};return _aef ;};func (_bfge *textLine )pullWord (_cfac *wordBag ,_bbfce *textWord ,_cfdf int ){_bfge .appendWord (_bbfce );_cfac .removeWord (_bbfce ,_cfdf );};type textObject struct{_fdgf *Extractor ;_cgd *_cfb .PdfPageResources ;
_dfgec _bc .GraphicsState ;_fgfa *textState ;_cafe *stateStack ;_bbde _cfa .Matrix ;_gcgb _cfa .Matrix ;_geca []*textMark ;_acde bool ;};func (_egce rulingList )toGrids ()[]rulingList {if _dgab {_ed .Log .Info ("t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073",_egce );
};_ceeg :=_egce .intersections ();if _dgab {_ed .Log .Info ("\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020",len (_egce ),len (_ceeg ));
for _ ,_fdbc :=range _gbfda (_ceeg ){_bb .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_fdbc ,_ceeg [_fdbc ]);};};_agaa :=make (map[int ]intSet ,len (_egce ));for _aggfe :=range _egce {_cggec :=_egce .connections (_ceeg ,_aggfe );if len (_cggec )> 0{_agaa [_aggfe ]=_cggec ;
};};if _dgab {_ed .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064",len (_agaa ));for _ ,_debce :=range _gbfda (_agaa ){_bb .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_debce ,_agaa [_debce ]);
};};_gced :=_gedfg (len (_egce ),func (_cdddg ,_bbbe int )bool {_eaff ,_gcggf :=len (_agaa [_cdddg ]),len (_agaa [_bbbe ]);if _eaff !=_gcggf {return _eaff > _gcggf ;};return _egce .comp (_cdddg ,_bbbe );});if _dgab {_ed .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_gced );
};_gadbg :=[][]int {{_gced [0]}};_bebe :for _ ,_geeaa :=range _gced [1:]{for _bbbg ,_dcac :=range _gadbg {for _ ,_ecbb :=range _dcac {if _agaa [_ecbb ].has (_geeaa ){_gadbg [_bbbg ]=append (_dcac ,_geeaa );continue _bebe ;};};};_gadbg =append (_gadbg ,[]int {_geeaa });
};if _dgab {_ed .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_gadbg );};_gf .SliceStable (_gadbg ,func (_egegc ,_bedd int )bool {return len (_gadbg [_egegc ])> len (_gadbg [_bedd ])});for _ ,_fage :=range _gadbg {_gf .Slice (_fage ,func (_ggfd ,_geaf int )bool {return _egce .comp (_fage [_ggfd ],_fage [_geaf ])});
};_dcfbd :=make ([]rulingList ,len (_gadbg ));for _aeag ,_bbeda :=range _gadbg {_cbcfgc :=make (rulingList ,len (_bbeda ));for _ddcd ,_abab :=range _bbeda {_cbcfgc [_ddcd ]=_egce [_abab ];};_dcfbd [_aeag ]=_cbcfgc ;};if _dgab {_ed .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076",_dcfbd );
};var _dbebf []rulingList ;for _ ,_fffa :=range _dcfbd {if _edae ,_acddd :=_fffa .isActualGrid ();_acddd {_fffa =_edae ;_fffa =_fffa .snapToGroups ();_dbebf =append (_dbebf ,_fffa );};};if _dgab {_cadf ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073",_dbebf );
_ed .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_dcfbd ),len (_dbebf ));};return _dbebf ;};func (_cbge compositeCell )parasBBox ()(paraList ,_cfb .PdfRectangle ){return _cbge .paraList ,_cbge .PdfRectangle ;
};func (_cdc *textObject )getFillColor ()_a .Color {return _dagc (_cdc ._dfgec .ColorspaceNonStroking ,_cdc ._dfgec .ColorNonStroking );};func _beacf (_dfggc []compositeCell )[]float64 {var _cbcd []*textLine ;_accdb :=0;for _ ,_gaeab :=range _dfggc {_accdb +=len (_gaeab .paraList );
_cbcd =append (_cbcd ,_gaeab .lines ()...);};_gf .Slice (_cbcd ,func (_bfefe ,_fbbb int )bool {_feed ,_edfdb :=_cbcd [_bfefe ],_cbcd [_fbbb ];_ebbee ,_dccc :=_feed ._feff ,_edfdb ._feff ;if !_cbafb (_ebbee -_dccc ){return _ebbee < _dccc ;};return _feed .Llx < _edfdb .Llx ;
});if _ddbc {_bb .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_accdb ,len (_cbcd ));for _gfbdg ,_bafde :=range _cbcd {_bb .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_gfbdg ,_bafde );
};};var _cbeb []float64 ;_bagd :=_cbcd [0];var _cgbdb [][]*textLine ;_gdae :=[]*textLine {_bagd };for _bcedd ,_dcebb :=range _cbcd [1:]{if _dcebb .Ury < _bagd .Lly {_adad :=0.5*(_dcebb .Ury +_bagd .Lly );if _ddbc {_bb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_bcedd ,_dcebb .Ury ,_bagd .Lly ,_adad ,_bagd ,_dcebb );
};_cbeb =append (_cbeb ,_adad );_cgbdb =append (_cgbdb ,_gdae );_gdae =nil ;};_gdae =append (_gdae ,_dcebb );if _dcebb .Lly < _bagd .Lly {_bagd =_dcebb ;};};if len (_gdae )> 0{_cgbdb =append (_cgbdb ,_gdae );};if _ddbc {_bb .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_cbeb );
};if _ddbc {_ed .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len (_dfggc ));for _eecca ,_cebge :=range _dfggc {_bb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_eecca ,_cebge );};_ed .Log .Info ("\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d",len (_cgbdb ));
for _ffae ,_gdfggb :=range _cgbdb {_bb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a",_ffae ,len (_gdfggb ));for _eggaf ,_dcab :=range _gdfggb {_bb .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_eggaf ,_dcab );};};};_cfde :=true ;
for _dbad ,_cbfa :=range _cgbdb {_gaed :=true ;for _gabgd ,_feabcg :=range _dfggc {if _ddbc {_bb .Printf ("\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a",_dbad ,len (_cgbdb ),_gabgd ,len (_dfggc ),_feabcg );
};if !_feabcg .hasLines (_cbfa ){if _ddbc {_bb .Printf ("\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a",_dbad ,len (_cgbdb ),_gabgd ,len (_dfggc ));
};_gaed =false ;break ;};};if !_gaed {_cfde =false ;break ;};};if !_cfde {if _ddbc {_ed .Log .Info ("\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg");
};_cbeb =nil ;};if _ddbc &&_cbeb !=nil {_bb .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a",_cbeb );};return _cbeb ;};
func _gfeca (_ecgaad []*textWord ,_feda *textWord )[]*textWord {for _bfecd ,_deae :=range _ecgaad {if _deae ==_feda {return _acbc (_ecgaad ,_bfecd );};};_ed .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_feda );
return nil ;};func _eebdd (_daffa []*textMark ,_dgdbe _cfb .PdfRectangle )[]*textWord {var _eacdf []*textWord ;var _cgde *textWord ;if _ebac {_ed .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_daffa ));
};_gaaaa :=func (){if _cgde !=nil {_bcbgbd :=_cgde .computeText ();if !_aaag (_bcbgbd ){_cgde ._ecgg =_bcbgbd ;_eacdf =append (_eacdf ,_cgde );if _ebac {_ed .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_eacdf )-1,_cgde .String ());
for _cfefe ,_baeea :=range _cgde ._dece {_bb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cfefe ,_baeea .String ());};};};_cgde =nil ;};};for _ ,_cegga :=range _daffa {if _fgdf &&_cgde !=nil &&len (_cgde ._dece )> 0{_ggac :=_cgde ._dece [len (_cgde ._dece )-1];
_aeegd ,_caecc :=_cbbda (_cegga ._gbag );_gcad ,_adgeg :=_cbbda (_ggac ._gbag );if _caecc &&!_adgeg &&_ggac .inDiacriticArea (_cegga ){_cgde .addDiacritic (_aeegd );continue ;};if _adgeg &&!_caecc &&_cegga .inDiacriticArea (_ggac ){_cgde ._dece =_cgde ._dece [:len (_cgde ._dece )-1];
_cgde .appendMark (_cegga ,_dgdbe );_cgde .addDiacritic (_gcad );continue ;};};_ceeaf :=_aaag (_cegga ._gbag );if _ceeaf {_gaaaa ();continue ;};if _cgde ==nil &&!_ceeaf {_cgde =_cgddg ([]*textMark {_cegga },_dgdbe );continue ;};_defcb :=_cgde ._feabg ;
_bgbda :=_e .Abs (_gcee (_dgdbe ,_cegga )-_cgde ._cefef )/_defcb ;_agefa :=_gfdc (_cegga ,_cgde )/_defcb ;if _agefa >=_ebbe ||!(-_daae <=_agefa &&_bgbda <=_acae ){_gaaaa ();_cgde =_cgddg ([]*textMark {_cegga },_dgdbe );continue ;};_cgde .appendMark (_cegga ,_dgdbe );
};_gaaaa ();return _eacdf ;};func _gfgfd (_cdcee _cfb .PdfRectangle ,_dcad []*textLine )*textPara {return &textPara {PdfRectangle :_cdcee ,_ecfaf :_dcad };};func _cfbe (_bcbc ,_gfee *textPara )bool {return _ecef (_bcbc ._bbgce ,_gfee ._bbgce )};func (_fdbfc paraList )addNeighbours (){_dfbag :=func (_gaeed []int ,_ccgbb *textPara )([]*textPara ,[]*textPara ){_cdgdb :=make ([]*textPara ,0,len (_gaeed )-1);
_eggfe :=make ([]*textPara ,0,len (_gaeed )-1);for _ ,_fega :=range _gaeed {_geda :=_fdbfc [_fega ];if _geda .Urx <=_ccgbb .Llx {_cdgdb =append (_cdgdb ,_geda );}else if _geda .Llx >=_ccgbb .Urx {_eggfe =append (_eggfe ,_geda );};};return _cdgdb ,_eggfe ;
};_gfffb :=func (_efggd []int ,_ffbf *textPara )([]*textPara ,[]*textPara ){_begff :=make ([]*textPara ,0,len (_efggd )-1);_dbgc :=make ([]*textPara ,0,len (_efggd )-1);for _ ,_afdc :=range _efggd {_cbdgc :=_fdbfc [_afdc ];if _cbdgc .Ury <=_ffbf .Lly {_dbgc =append (_dbgc ,_cbdgc );
}else if _cbdgc .Lly >=_ffbf .Ury {_begff =append (_begff ,_cbdgc );};};return _begff ,_dbgc ;};_gedaf :=_fdbfc .yNeighbours (_faec );for _ ,_agdf :=range _fdbfc {_dfaf :=_gedaf [_agdf ];if len (_dfaf )==0{continue ;};_bgdb ,_fegc :=_dfbag (_dfaf ,_agdf );
if len (_bgdb )==0&&len (_fegc )==0{continue ;};if len (_bgdb )> 0{_gggfe :=_bgdb [0];for _ ,_gdfbd :=range _bgdb [1:]{if _gdfbd .Urx >=_gggfe .Urx {_gggfe =_gdfbd ;};};for _ ,_fggbe :=range _bgdb {if _fggbe !=_gggfe &&_fggbe .Urx > _gggfe .Llx {_gggfe =nil ;
break ;};};if _gggfe !=nil &&_fcadf (_agdf .PdfRectangle ,_gggfe .PdfRectangle ){_agdf ._eccbf =_gggfe ;};};if len (_fegc )> 0{_gbfe :=_fegc [0];for _ ,_agcdg :=range _fegc [1:]{if _agcdg .Llx <=_gbfe .Llx {_gbfe =_agcdg ;};};for _ ,_efbd :=range _fegc {if _efbd !=_gbfe &&_efbd .Llx < _gbfe .Urx {_gbfe =nil ;
break ;};};if _gbfe !=nil &&_fcadf (_agdf .PdfRectangle ,_gbfe .PdfRectangle ){_agdf ._bdbeb =_gbfe ;};};};_gedaf =_fdbfc .xNeighbours (_eceb );for _ ,_adegg :=range _fdbfc {_bebge :=_gedaf [_adegg ];if len (_bebge )==0{continue ;};_eebe ,_dadbb :=_gfffb (_bebge ,_adegg );
if len (_eebe )==0&&len (_dadbb )==0{continue ;};if len (_dadbb )> 0{_gfbef :=_dadbb [0];for _ ,_agdg :=range _dadbb [1:]{if _agdg .Ury >=_gfbef .Ury {_gfbef =_agdg ;};};for _ ,_aadb :=range _dadbb {if _aadb !=_gfbef &&_aadb .Ury > _gfbef .Lly {_gfbef =nil ;
break ;};};if _gfbef !=nil &&_ecef (_adegg .PdfRectangle ,_gfbef .PdfRectangle ){_adegg ._ggfa =_gfbef ;};};if len (_eebe )> 0{_efaa :=_eebe [0];for _ ,_dadd :=range _eebe [1:]{if _dadd .Lly <=_efaa .Lly {_efaa =_dadd ;};};for _ ,_fcdb :=range _eebe {if _fcdb !=_efaa &&_fcdb .Lly < _efaa .Ury {_efaa =nil ;
break ;};};if _efaa !=nil &&_ecef (_adegg .PdfRectangle ,_efaa .PdfRectangle ){_adegg ._adgg =_efaa ;};};};for _ ,_dcbgg :=range _fdbfc {if _dcbgg ._eccbf !=nil &&_dcbgg ._eccbf ._bdbeb !=_dcbgg {_dcbgg ._eccbf =nil ;};if _dcbgg ._adgg !=nil &&_dcbgg ._adgg ._ggfa !=_dcbgg {_dcbgg ._adgg =nil ;
};if _dcbgg ._bdbeb !=nil &&_dcbgg ._bdbeb ._eccbf !=_dcbgg {_dcbgg ._bdbeb =nil ;};if _dcbgg ._ggfa !=nil &&_dcbgg ._ggfa ._adgg !=_dcbgg {_dcbgg ._ggfa =nil ;};};};func (_cgeff rulingList )intersections ()map[int ]intSet {var _afcg ,_dcce []int ;for _bcbe ,_dgbg :=range _cgeff {switch _dgbg ._efdg {case _dcfc :_afcg =append (_afcg ,_bcbe );
case _ecbd :_dcce =append (_dcce ,_bcbe );};};if len (_afcg )< _gabg +1||len (_dcce )< _edca +1{return nil ;};if len (_afcg )+len (_dcce )> _gdgf {_ed .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_cgeff ),len (_afcg ),len (_dcce ));
return nil ;};_bcba :=make (map[int ]intSet ,len (_afcg )+len (_dcce ));for _ ,_dbbc :=range _afcg {for _ ,_fccgg :=range _dcce {if _cgeff [_dbbc ].intersects (_cgeff [_fccgg ]){if _ ,_dgdf :=_bcba [_dbbc ];!_dgdf {_bcba [_dbbc ]=make (intSet );};if _ ,_gggad :=_bcba [_fccgg ];
!_gggad {_bcba [_fccgg ]=make (intSet );};_bcba [_dbbc ].add (_fccgg );_bcba [_fccgg ].add (_dbbc );};};};return _bcba ;};func _aaag (_dede string )bool {for _ ,_dgbbcb :=range _dede {if !_cf .IsSpace (_dgbbcb ){return false ;};};return true ;};func (_cgcg rulingList )mergePrimary ()float64 {_dadfa :=_cgcg [0]._gged ;
for _ ,_fdfcb :=range _cgcg [1:]{_dadfa +=_fdfcb ._gged ;};return _dadfa /float64 (len (_cgcg ));};
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
func (_eee *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_efe :=&imageExtractContext {_ead :options };_ec :=_efe .extractContentStreamImages (_eee ._ede ,_eee ._cd );if _ec !=nil {return nil ,_ec ;};return &PageImages {Images :_efe ._eca },nil ;
};func (_bbda rulingList )snapToGroupsDirection ()rulingList {_bbda .sortStrict ();_bddbc :=make (map[*ruling ]rulingList ,len (_bbda ));_cfae :=_bbda [0];_dcebc :=func (_fcdde *ruling ){_cfae =_fcdde ;_bddbc [_cfae ]=rulingList {_fcdde }};_dcebc (_bbda [0]);
for _ ,_ccae :=range _bbda [1:]{if _ccae ._gged < _cfae ._gged -_egddf {_ed .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_cfae ,_ccae );
};if _ccae ._gged > _cfae ._gged +_dffa {_dcebc (_ccae );}else {_bddbc [_cfae ]=append (_bddbc [_cfae ],_ccae );};};_effbc :=make (map[*ruling ]float64 ,len (_bddbc ));_dffdg :=make (map[*ruling ]*ruling ,len (_bbda ));for _bfdd ,_dgcf :=range _bddbc {_effbc [_bfdd ]=_dgcf .mergePrimary ();
for _ ,_egga :=range _dgcf {_dffdg [_egga ]=_bfdd ;};};for _ ,_gdgga :=range _bbda {_gdgga ._gged =_effbc [_dffdg [_gdgga ]];};_abaad :=make (rulingList ,0,len (_bbda ));for _ ,_dada :=range _bddbc {_cgeg :=_dada .splitSec ();for _bbbd ,_fdfa :=range _cgeg {_dedf :=_fdfa .merge ();
if len (_abaad )> 0{_abccf :=_abaad [len (_abaad )-1];if _abccf .alignsPrimary (_dedf )&&_abccf .alignsSec (_dedf ){_ed .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_bbbd ,_abccf ,_dedf );
continue ;};};_abaad =append (_abaad ,_dedf );};};_abaad .sortStrict ();return _abaad ;};func _dcbce (_aceee *list ,_edfa *string )string {_gbec :=_bd .Split (_aceee ._cgee ,"\u000a");_ffeb :=&_bd .Builder {};for _ ,_babe :=range _gbec {if _babe !=""{_ffeb .WriteString (*_edfa );
_ffeb .WriteString (_babe );_ffeb .WriteString ("\u000a");};};return _ffeb .String ();};func (_bcfb *textTable )compositeRowCorridors ()map[int ][]float64 {_bgcb :=make (map[int ][]float64 ,_bcfb ._gggac );if _ddbc {_ed .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_bcfb ._gggac );
};for _ebfg :=1;_ebfg < _bcfb ._gggac ;_ebfg ++{var _aafg []compositeCell ;for _dbec :=0;_dbec < _bcfb ._dedd ;_dbec ++{if _acegc ,_bfbc :=_bcfb ._gacgd [_fedd (_dbec ,_ebfg )];_bfbc {_aafg =append (_aafg ,_acegc );};};if len (_aafg )==0{continue ;};_gcfd :=_beacf (_aafg );
_bgcb [_ebfg ]=_gcfd ;if _ddbc {_bb .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_ebfg ,_gcfd );};};return _bgcb ;};func (_bee *PageFonts )extractPageResourcesToFont (_ad *_cfb .PdfPageResources )error {_cbc ,_ef :=_feb .GetDict (_ad .Font );
if !_ef {return _b .New (_be );};for _ ,_ce :=range _cbc .Keys (){var (_bbe =true ;_fca []byte ;_beb string ;);_bgc ,_ebc :=_ad .GetFontByName (_ce );if !_ebc {return _b .New (_fbc );};_df ,_gc :=_cfb .NewPdfFontFromPdfObject (_bgc );if _gc !=nil {return _gc ;
};_dff :=_df .FontDescriptor ();_afe :=_df .FontDescriptor ().FontName .String ();_gg :=_df .Subtype ();if _eae (_bee .Fonts ,_afe ){continue ;};if len (_df .ToUnicode ())==0{_bbe =false ;};if _dff .FontFile !=nil {if _fd ,_dd :=_feb .GetStream (_dff .FontFile );
_dd {_fca ,_gc =_feb .DecodeStream (_fd );if _gc !=nil {return _gc ;};_beb =_afe +"\u002e\u0070\u0066\u0062";};}else if _dff .FontFile2 !=nil {if _ea ,_aa :=_feb .GetStream (_dff .FontFile2 );_aa {_fca ,_gc =_feb .DecodeStream (_ea );if _gc !=nil {return _gc ;
};_beb =_afe +"\u002e\u0074\u0074\u0066";};}else if _dff .FontFile3 !=nil {if _cbf ,_ccd :=_feb .GetStream (_dff .FontFile3 );_ccd {_fca ,_gc =_feb .DecodeStream (_cbf );if _gc !=nil {return _gc ;};_beb =_afe +"\u002e\u0063\u0066\u0066";};};if len (_beb )< 1{_ed .Log .Debug (_bbf );
};_baf :=Font {FontName :_afe ,PdfFont :_df ,IsCID :_df .IsCID (),IsSimple :_df .IsSimple (),ToUnicode :_bbe ,FontType :_gg ,FontData :_fca ,FontFileName :_beb ,FontDescriptor :_dff };_bee .Fonts =append (_bee .Fonts ,_baf );};return nil ;};
// Text returns the extracted page text.
func (_cfaf PageText )Text ()string {return _cfaf ._bggg };func (_gbc paraList )log (_afccd string ){if !_cdcd {return ;};_ed .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_afccd ,len (_gbc ));
for _dgabe ,_dgea :=range _gbc {if _dgea ==nil {continue ;};_effab :=_dgea .text ();_agffb :="\u0020\u0020";if _dgea ._cgabf !=nil {_agffb =_bb .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_dgea ._cgabf ._dedd ,_dgea ._cgabf ._gggac );};_bb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_dgabe ,_dgea .PdfRectangle ,_agffb ,_adcgc (_effab ,50));
};};func (_fggg rulingList )primaries ()[]float64 {_gbfa :=make (map[float64 ]struct{},len (_fggg ));for _ ,_dfeg :=range _fggg {_gbfa [_dfeg ._gged ]=struct{}{};};_decf :=make ([]float64 ,len (_gbfa ));_bgaed :=0;for _eabgc :=range _gbfa {_decf [_bgaed ]=_eabgc ;
_bgaed ++;};_gf .Float64s (_decf );return _decf ;};func _aeac (_egdf ,_fdfff float64 )string {_dgcae :=!_cbafb (_egdf -_fdfff );if _dgcae {return "\u000a";};return "\u0020";};func (_daabg *textMark )inDiacriticArea (_gade *textMark )bool {_ggdc :=_daabg .Llx -_gade .Llx ;
_dcff :=_daabg .Urx -_gade .Urx ;_fcabb :=_daabg .Lly -_gade .Lly ;return _e .Abs (_ggdc +_dcff )< _daabg .Width ()*_gfec &&_e .Abs (_fcabb )< _daabg .Height ()*_gfec ;};