unipdf/extractor/extractor.go

984 lines
214 KiB
Go
Raw Normal View History

2020-08-27 21:45:09 +00:00
//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
2020-08-27 21:45:09 +00:00
//
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
//
2023-07-28 12:14:31 +00:00
package extractor ;import (_dfe "bytes";_d "errors";_ce "fmt";_b "github.com/unidoc/unipdf/v3/common";_fb "github.com/unidoc/unipdf/v3/contentstream";_ea "github.com/unidoc/unipdf/v3/core";_gb "github.com/unidoc/unipdf/v3/internal/license";_aa "github.com/unidoc/unipdf/v3/internal/textencoding";
_gab "github.com/unidoc/unipdf/v3/internal/transform";_bg "github.com/unidoc/unipdf/v3/model";_ec "golang.org/x/image/draw";_dg "golang.org/x/text/unicode/norm";_ae "golang.org/x/xerrors";_fa "image";_ag "image/color";_ga "io";_ef "math";_e "reflect";_gg "regexp";
_df "sort";_c "strings";_f "unicode";_a "unicode/utf8";);func (_fedg *wordBag )getDepthIdx (_eebg float64 )int {_aaca :=_fedg .depthIndexes ();_edf :=_ebfc (_eebg );if _edf < _aaca [0]{return _aaca [0];};if _edf > _aaca [len (_aaca )-1]{return _aaca [len (_aaca )-1];
};return _edf ;};func _cffd (_bfedd ,_acdbg int )int {if _bfedd < _acdbg {return _bfedd ;};return _acdbg ;};func (_fabee *subpath )last ()_gab .Point {return _fabee ._fbcgf [len (_fabee ._fbcgf )-1]};var _daed =map[markKind ]string {_bafga :"\u0073\u0074\u0072\u006f\u006b\u0065",_ceag :"\u0066\u0069\u006c\u006c",_abgg :"\u0061u\u0067\u006d\u0065\u006e\u0074"};
2023-02-07 17:17:49 +00:00
2023-04-06 19:57:40 +00:00
2023-07-28 12:14:31 +00:00
// String returns a human readable description of `path`.
func (_fcg *subpath )String ()string {_gbgb :=_fcg ._fbcgf ;_edbb :=len (_gbgb );if _edbb <=5{return _ce .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_edbb ,_gbgb );};return _ce .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_edbb ,_gbgb [0],_gbgb [1],_gbgb [_edbb -1]);
};func (_fedc *wordBag )scanBand (_bcgb string ,_daec *wordBag ,_cff func (_bdbd *wordBag ,_gaage *textWord )bool ,_cgcb ,_bfdf ,_cgce float64 ,_ccece ,_bbfd bool )int {_effd :=_daec ._ecdf ;var _egdg map[int ]map[*textWord ]struct{};if !_ccece {_egdg =_fedc .makeRemovals ();
};_aeba :=_gdab *_effd ;_aff :=0;for _ ,_ffgc :=range _fedc .depthBand (_cgcb -_aeba ,_bfdf +_aeba ){if len (_fedc ._cgdg [_ffgc ])==0{continue ;};for _ ,_fcgc :=range _fedc ._cgdg [_ffgc ]{if !(_cgcb -_aeba <=_fcgc ._baebb &&_fcgc ._baebb <=_bfdf +_aeba ){continue ;
};if !_cff (_daec ,_fcgc ){continue ;};_gdaa :=2.0*_ef .Abs (_fcgc ._ebgb -_daec ._ecdf )/(_fcgc ._ebgb +_daec ._ecdf );_ccfc :=_ef .Max (_fcgc ._ebgb /_daec ._ecdf ,_daec ._ecdf /_fcgc ._ebgb );_gfdd :=_ef .Min (_gdaa ,_ccfc );if _cgce > 0&&_gfdd > _cgce {continue ;
};if _daec .blocked (_fcgc ){continue ;};if !_ccece {_daec .pullWord (_fcgc ,_ffgc ,_egdg );};_aff ++;if !_bbfd {if _fcgc ._baebb < _cgcb {_cgcb =_fcgc ._baebb ;};if _fcgc ._baebb > _bfdf {_bfdf =_fcgc ._baebb ;};};if _ccece {break ;};};};if !_ccece {_fedc .applyRemovals (_egdg );
};return _aff ;};func _aeef (_bgcf _bg .PdfRectangle ,_fdce ,_bdaecd ,_ffge ,_ggdc *ruling )gridTile {_ebbe :=_bgcf .Llx ;_fgfb :=_bgcf .Urx ;_egcgc :=_bgcf .Lly ;_gdad :=_bgcf .Ury ;return gridTile {PdfRectangle :_bgcf ,_afdge :_fdce !=nil &&_fdce .encloses (_egcgc ,_gdad ),_bfecb :_bdaecd !=nil &&_bdaecd .encloses (_egcgc ,_gdad ),_eaed :_ffge !=nil &&_ffge .encloses (_ebbe ,_fgfb ),_fdbd :_ggdc !=nil &&_ggdc .encloses (_ebbe ,_fgfb )};
};type structElement struct{_bfeg string ;_efce []structElement ;_aeae int64 ;_fgagg _ea .PdfObject ;};
2023-04-06 19:57:40 +00:00
2023-07-28 12:14:31 +00:00
// String returns a string describing `ma`.
func (_eafg TextMarkArray )String ()string {_bag :=len (_eafg ._bca );if _bag ==0{return "\u0045\u004d\u0050T\u0059";};_dadf :=_eafg ._bca [0];_fecb :=_eafg ._bca [_bag -1];return _ce .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_bag ,_dadf ,_fecb );
};func (_gbb *textObject )reset (){_gbb ._fda =_gab .IdentityMatrix ();_gbb ._cfec =_gab .IdentityMatrix ();_gbb ._fecd =nil ;};var _dgdfb =map[rulingKind ]string {_bgbdg :"\u006e\u006f\u006e\u0065",_cefaa :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_acgee :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};
func (_ccfe *wordBag )depthRange (_dgab ,_ggab int )[]int {var _abece []int ;for _fbec :=range _ccfe ._cgdg {if _dgab <=_fbec &&_fbec <=_ggab {_abece =append (_abece ,_fbec );};};if len (_abece )==0{return nil ;};_df .Ints (_abece );return _abece ;};type lineRuling struct{_cbfb rulingKind ;
_abeac markKind ;_ag .Color ;_egaf ,_eaebf _gab .Point ;};func (_gedeb paraList )yNeighbours (_daab float64 )map[*textPara ][]int {_cdada :=make ([]event ,2*len (_gedeb ));if _daab ==0{for _cffg ,_beceb :=range _gedeb {_cdada [2*_cffg ]=event {_beceb .Lly ,true ,_cffg };
_cdada [2*_cffg +1]=event {_beceb .Ury ,false ,_cffg };};}else {for _fegef ,_aecb :=range _gedeb {_cdada [2*_fegef ]=event {_aecb .Lly -_daab *_aecb .fontsize (),true ,_fegef };_cdada [2*_fegef +1]=event {_aecb .Ury +_daab *_aecb .fontsize (),false ,_fegef };
};};return _gedeb .eventNeighbours (_cdada );};func (_fdcdg rulingList )aligned ()bool {if len (_fdcdg )< 2{return false ;};_fcdcd :=make (map[*ruling ]int );_fcdcd [_fdcdg [0]]=0;for _ ,_bfcae :=range _fdcdg [1:]{_gbggd :=false ;for _cebc :=range _fcdcd {if _bfcae .gridIntersecting (_cebc ){_fcdcd [_cebc ]++;
_gbggd =true ;break ;};};if !_gbggd {_fcdcd [_bfcae ]=0;};};_bbbd :=0;for _ ,_bfab :=range _fcdcd {if _bfab ==0{_bbbd ++;};};_cfgfb :=float64 (_bbbd )/float64 (len (_fdcdg ));_fffcge :=_cfgfb <=1.0-_eadb ;if _bccgb {_b .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_fffcge ,_cfgfb ,_bbbd ,len (_fdcdg ),_fdcdg .String ());
};return _fffcge ;};type subpath struct{_fbcgf []_gab .Point ;_bbdg bool ;};func (_feea *textWord )computeText ()string {_egfa :=make ([]string ,len (_feea ._dggf ));for _dgbfc ,_ebgf :=range _feea ._dggf {_egfa [_dgbfc ]=_ebgf ._ebgd ;};return _c .Join (_egfa ,"");
};func (_fefd *textObject )getStrokeColor ()_ag .Color {return _badcf (_fefd ._agbf .ColorspaceStroking ,_fefd ._agbf .ColorStroking );};var _fbdd =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_ag .White ,StrokeColor :_ag .White };
func (_fcee paraList )log (_dcaf string ){if !_ecdg {return ;};_b .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_dcaf ,len (_fcee ));
for _agdd ,_cfegb :=range _fcee {if _cfegb ==nil {continue ;};_caaec :=_cfegb .text ();_fbgc :="\u0020\u0020";if _cfegb ._bgba !=nil {_fbgc =_ce .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_cfegb ._bgba ._ddfc ,_cfegb ._bgba ._gcbge );};_ce .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_agdd ,_cfegb .PdfRectangle ,_fbgc ,_dfcggd (_caaec ,50));
};};func (_eeffg *structTreeRoot )parseStructTreeRoot (_dfgd _ea .PdfObject ){if _dfgd !=nil {_eceae ,_gbfdf :=_ea .GetDict (_dfgd );if !_gbfdf {_b .Log .Debug ("\u0070\u0061\u0072s\u0065\u0053\u0074\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u003a\u0020\u0064\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006eo\u0074\u0020\u0066\u006f\u0075\u006e\u0064\u002e");
};K :=_eceae .Get ("\u004b");_aebd :=_eceae .Get ("\u0054\u0079\u0070\u0065").String ();var _dbcg *_ea .PdfObjectArray ;switch _degd :=K .(type ){case *_ea .PdfObjectArray :_dbcg =_degd ;case *_ea .PdfObjectReference :_dbcg =_ea .MakeArray (K );};_bdcc :=[]structElement {};
for _ ,_eecfa :=range _dbcg .Elements (){_gbfdfa :=&structElement {};_gbfdfa .parseStructElement (_eecfa );_bdcc =append (_bdcc ,*_gbfdfa );};_eeffg ._cegf =_bdcc ;_eeffg ._baeb =_aebd ;};};func _fgad (_dgcb []*textMark ,_eedeg _bg .PdfRectangle ,_eeda rulingList ,_ccfa []gridTiling ,_cgba bool )paraList {_b .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_dgcb ),_eedeg );
if len (_dgcb )==0{return nil ;};_egecb :=_gbeaf (_dgcb ,_eedeg );if len (_egecb )==0{return nil ;};_eeda .log ("\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065");_dcdfd ,_dcaga :=_eeda .vertsHorzs ();_afgd :=_gaaff (_egecb ,_eedeg .Ury ,_dcdfd ,_dcaga );
_adfc :=_bccb (_afgd ,_eedeg .Ury ,_dcdfd ,_dcaga );_adfc =_bacb (_adfc );_cfdea :=make (paraList ,0,len (_adfc ));for _ ,_bbde :=range _adfc {_ecaa :=_bbde .arrangeText ();if _ecaa !=nil {_cfdea =append (_cfdea ,_ecaa );};};if !_cgba &&len (_cfdea )>=_abda {_cfdea =_cfdea .extractTables (_ccfa );
};_cfdea .sortReadingOrder ();if !_cgba {_cfdea .sortTopoOrder ();};_cfdea .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _cfdea ;};func (_befe *wordBag )highestWord (_cecb int ,_efc ,_cdbb float64 )*textWord {for _ ,_bbga :=range _befe ._cgdg [_cecb ]{if _efc <=_bbga ._baebb &&_bbga ._baebb <=_cdbb {return _bbga ;
};};return nil ;};func _afbd (_bfac _bg .PdfRectangle )textState {return textState {_dgc :100,_gd :RenderModeFill ,_fagg :_bfac };};func (_cbda *textPara )getListLines ()[]*textLine {var _acaf []*textLine ;_baed :=_dgga (_cbda ._gfbb );for _ ,_ebge :=range _cbda ._gfbb {_dced :=_ebge ._aafd [0]._ggaef [0];
if _dbae (_dced ){_acaf =append (_acaf ,_ebge );};};_acaf =append (_acaf ,_baed ...);return _acaf ;};func (_eee *imageExtractContext )extractXObjectImage (_dcg *_ea .PdfObjectName ,_egd _fb .GraphicsState ,_bfa *_bg .PdfPageResources )error {_fd ,_ :=_bfa .GetXObjectByName (*_dcg );
if _fd ==nil {return nil ;};_bcf ,_bcd :=_eee ._agf [_fd ];if !_bcd {_cbd ,_faac :=_bfa .GetXObjectImageByName (*_dcg );if _faac !=nil {return _faac ;};if _cbd ==nil {return nil ;};_gf ,_faac :=_cbd .ToImage ();if _faac !=nil {return _faac ;};var _eaf _fa .Image ;
if _cbd .SMask !=nil {_eaf ,_faac =_acgbed (_cbd .SMask ,_ag .Opaque );if _faac !=nil {_b .Log .Debug ("W\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0067\u0065\u0074\u0020\u0073\u006f\u0066\u0074\u0020\u0069\u006da\u0067e\u0020\u006d\u0061\u0073k\u002e\u0020O\u0075\u0074\u0070\u0075\u0074\u0020\u006d\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063\u0074\u002e");
};};if _eaf !=nil {_dfa ,_fgc :=_gf .ToGoImage ();if _fgc !=nil {return _fgc ;};_dfa =_aaeg (_dfa ,_eaf );switch _cbd .ColorSpace .String (){case "\u0044\u0065\u0076\u0069\u0063\u0065\u0047\u0072\u0061\u0079","\u0049n\u0064\u0065\u0078\u0065\u0064":_gf ,_fgc =_bg .ImageHandling .NewGrayImageFromGoImage (_dfa );
if _fgc !=nil {return _fgc ;};default:_gf ,_fgc =_bg .ImageHandling .NewImageFromGoImage (_dfa );if _fgc !=nil {return _fgc ;};};};_bcf =&cachedImage {_dba :_gf ,_gca :_cbd .ColorSpace };_eee ._agf [_fd ]=_bcf ;};_fga :=_bcf ._dba ;_dab :=_bcf ._gca ;_deb ,_cfgb :=_dab .ImageToRGB (*_fga );
if _cfgb !=nil {return _cfgb ;};_b .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_egd .CTM .String ());_edd :=ImageMark {Image :&_deb ,Width :_egd .CTM .ScalingFactorX (),Height :_egd .CTM .ScalingFactorY (),Angle :_egd .CTM .Angle ()};
_edd .X ,_edd .Y =_egd .CTM .Translation ();_eee ._dae =append (_eee ._dae ,_edd );_eee ._edc ++;return nil ;};type rulingKind int ;func _ggfa (_fdbb []rulingList )(rulingList ,rulingList ){var _dbedb rulingList ;for _ ,_bcec :=range _fdbb {_dbedb =append (_dbedb ,_bcec ...);
};return _dbedb .vertsHorzs ();};
2023-04-06 19:57:40 +00:00
2023-06-30 13:19:48 +00:00
// String returns a description of `state`.
2023-07-28 12:14:31 +00:00
func (_bccg *textState )String ()string {_aag :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _bccg ._dgdf !=nil {_aag =_bccg ._dgdf .BaseFont ();};return _ce .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_bccg ._fdf ,_bccg ._cagc ,_bccg ._fgca ,_aag );
};func (_dfgcaa paraList )findTableGrid (_dgggf gridTiling )(*textTable ,map[*textPara ]struct{}){_cbafe :=len (_dgggf ._eaafd );_fgdfg :=len (_dgggf ._dade );_fbab :=textTable {_edgac :true ,_ddfc :_cbafe ,_gcbge :_fgdfg ,_efeac :make (map[uint64 ]*textPara ,_cbafe *_fgdfg ),_dadcc :make (map[uint64 ]compositeCell ,_cbafe *_fgdfg )};
_fbab .PdfRectangle =_dgggf .PdfRectangle ;_bebe :=make (map[*textPara ]struct{});_caceg :=int ((1.0-_dfecd )*float64 (_cbafe *_fgdfg ));_cddcd :=0;if _agd {_b .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_cbafe ,_fgdfg );
};for _adaf ,_aegd :=range _dgggf ._dade {_egedc ,_efgb :=_dgggf ._cbec [_aegd ];if !_efgb {continue ;};for _dggd ,_gbdag :=range _dgggf ._eaafd {_fdca ,_ggefd :=_egedc [_gbdag ];if !_ggefd {continue ;};_eaecb :=_dfgcaa .inTile (_fdca );if len (_eaecb )==0{_cddcd ++;
if _cddcd > _caceg {if _agd {_b .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_cddcd );};return nil ,nil ;};}else {_fbab .putComposite (_dggd ,_adaf ,_eaecb ,_fdca .PdfRectangle );for _ ,_cafc :=range _eaecb {_bebe [_cafc ]=struct{}{};
};};};};_ggcd :=0;for _fcab :=0;_fcab < _cbafe ;_fcab ++{_bgbfe :=_fbab .get (_fcab ,0);if _bgbfe ==nil ||!_bgbfe ._cfga {_ggcd ++;};};if _ggcd ==0{if _agd {_b .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};return nil ,nil ;
};_fdecd :=_fbab .reduceTiling (_dgggf ,_feaa );_fdecd =_fdecd .subdivide ();return _fdecd ,_bebe ;};func _ggegd (_adff []pathSection )rulingList {_bbcaa (_adff );if _bccgb {_b .Log .Info ("\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs",len (_adff ));
};var _bcagd rulingList ;for _ ,_acde :=range _adff {for _ ,_caaa :=range _acde ._dgfc {if !_caaa .isQuadrilateral (){if _bccgb {_b .Log .Error ("!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073",_caaa );};
continue ;};if _fdffef ,_bdbcdd :=_caaa .makeRectRuling (_acde .Color );_bdbcdd {_bcagd =append (_bcagd ,_fdffef );}else {if _cfde {_b .Log .Error ("\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073",_caaa );
};};};};if _bccgb {_b .Log .Info ("\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073",_bcagd .String ());};return _bcagd ;};func (_bdag *textTable )depth ()float64 {_fefc :=1e10;for _daebc :=0;_daebc < _bdag ._ddfc ;
_daebc ++{_ecedg :=_bdag .get (_daebc ,0);if _ecedg ==nil ||_ecedg ._cfga {continue ;};_fefc =_ef .Min (_fefc ,_ecedg .depth ());};return _fefc ;};func _ggbg (_gfbbb ,_daecbe ,_dbdg ,_fcfge *textPara )*textTable {_dcbg :=&textTable {_ddfc :2,_gcbge :2,_efeac :make (map[uint64 ]*textPara ,4)};
_dcbg .put (0,0,_gfbbb );_dcbg .put (1,0,_daecbe );_dcbg .put (0,1,_dbdg );_dcbg .put (1,1,_fcfge );return _dcbg ;};func (_gec *textObject )showTextAdjusted (_bccc *_ea .PdfObjectArray ,_bcg int )error {_bac :=false ;for _ ,_acg :=range _bccc .Elements (){switch _acg .(type ){case *_ea .PdfObjectFloat ,*_ea .PdfObjectInteger :_gcbg ,_gcdg :=_ea .GetNumberAsFloat (_acg );
if _gcdg !=nil {_b .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_acg ,_bccc );
return _gcdg ;};_fdb ,_dfd :=-_gcbg *0.001*_gec ._gacd ._fgca ,0.0;if _bac {_dfd ,_fdb =_fdb ,_dfd ;};_gac :=_ebba (_gab .Point {X :_fdb ,Y :_dfd });_gec ._fda .Concat (_gac );case *_ea .PdfObjectString :_ggfd :=_ea .TraceToDirectObject (_acg );_eaba ,_cgaf :=_ea .GetStringBytes (_ggfd );
if !_cgaf {_b .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_acg ,_bccc );
return _ea .ErrTypeError ;};_gec .renderText (_ggfd ,_eaba ,_bcg );default:_b .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_acg ,_bccc );
return _ea .ErrTypeError ;};};return nil ;};func (_bbccf *textObject )getFontDict (_bdeac string )(_abec _ea .PdfObject ,_gcae error ){_defc :=_bbccf ._edef ;if _defc ==nil {_b .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_bdeac );
return nil ,nil ;};_abec ,_ccea :=_defc .GetFontByName (_ea .PdfObjectName (_bdeac ));if !_ccea {_b .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_bdeac );
return nil ,_d .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _abec ,nil ;};func (_cbbb *subpath )removeDuplicates (){if len (_cbbb ._fbcgf )==0{return ;};_dcgd :=[]_gab .Point {_cbbb ._fbcgf [0]};
for _ ,_ddfd :=range _cbbb ._fbcgf [1:]{if !_eaca (_ddfd ,_dcgd [len (_dcgd )-1]){_dcgd =append (_dcgd ,_ddfd );};};_cbbb ._fbcgf =_dcgd ;};
2023-06-30 13:19:48 +00:00
// String returns a description of `k`.
2023-07-28 12:14:31 +00:00
func (_ccgb rulingKind )String ()string {_dcegg ,_fegf :=_dgdfb [_ccgb ];if !_fegf {return _ce .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_ccgb );};return _dcegg ;};type textResult struct{_ddd PageText ;
_aede int ;_bega int ;};func _egbg (_dedg _gab .Point )*subpath {return &subpath {_fbcgf :[]_gab .Point {_dedg }}};func (_afaa *ruling )intersects (_cdgca *ruling )bool {_dfbb :=(_afaa ._eabdg ==_acgee &&_cdgca ._eabdg ==_cefaa )||(_cdgca ._eabdg ==_acgee &&_afaa ._eabdg ==_cefaa );
_dace :=func (_cbced ,_gcabg *ruling )bool {return _cbced ._agbc -_cfgg <=_gcabg ._befee &&_gcabg ._befee <=_cbced ._gffgd +_cfgg ;};_bfbc :=_dace (_afaa ,_cdgca );_abdc :=_dace (_cdgca ,_afaa );if _bccgb {_ce .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_dfbb ,_bfbc ,_abdc ,_dfbb &&_bfbc &&_abdc ,_afaa ,_cdgca );
};return _dfbb &&_bfbc &&_abdc ;};func (_eagee *wordBag )applyRemovals (_gcea map[int ]map[*textWord ]struct{}){for _ecab ,_ageb :=range _gcea {if len (_ageb )==0{continue ;};_afdb :=_eagee ._cgdg [_ecab ];_bbdc :=len (_afdb )-len (_ageb );if _bbdc ==0{delete (_eagee ._cgdg ,_ecab );
continue ;};_dabg :=make ([]*textWord ,_bbdc );_feagc :=0;for _ ,_cdda :=range _afdb {if _ ,_abca :=_ageb [_cdda ];!_abca {_dabg [_feagc ]=_cdda ;_feagc ++;};};_eagee ._cgdg [_ecab ]=_dabg ;};};func _cgdcf (_gada []TextMark ,_fage *int ,_cgdag TextMark )[]TextMark {_cgdag .Offset =*_fage ;
_gada =append (_gada ,_cgdag );*_fage +=len (_cgdag .Text );return _gada ;};func _aee (_ba []Font ,_abb string )bool {for _ ,_beg :=range _ba {if _beg .FontName ==_abb {return true ;};};return false ;};func _dcdgd (_cfdg []*textLine ,_fbfgg string )string {var _fcac _c .Builder ;
_bgdga :=0.0;for _cccd ,_fdg :=range _cfdg {_ecc :=_fdg .text ();_cbcg :=_fdg ._cbbd ;if _cccd < len (_cfdg )-1{_bgdga =_cfdg [_cccd +1]._cbbd ;}else {_bgdga =0.0;};_fcac .WriteString (_fbfgg );_fcac .WriteString (_ecc );if _bgdga !=_cbcg {_fcac .WriteString ("\u000a");
}else {_fcac .WriteString ("\u0020");};};return _fcac .String ();};
2023-02-07 17:17:49 +00:00
2023-07-28 12:14:31 +00:00
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
func (_gfae *TextMarkArray )BBox ()(_bg .PdfRectangle ,bool ){var _dbef _bg .PdfRectangle ;_eef :=false ;for _ ,_degg :=range _gfae ._bca {if _degg .Meta ||_dfcc (_degg .Text ){continue ;};if _eef {_dbef =_egbga (_dbef ,_degg .BBox );}else {_dbef =_degg .BBox ;
_eef =true ;};};return _dbef ,_eef ;};func _ddda (_ddbg ,_dbac _gab .Point )rulingKind {_dgdd :=_ef .Abs (_ddbg .X -_dbac .X );_fgadf :=_ef .Abs (_ddbg .Y -_dbac .Y );return _bffg (_dgdd ,_fgadf ,_gbca );};func _gdgf (_dcef []*textLine ,_fcge map[float64 ][]*textLine ,_gbee []float64 ,_aecg int ,_dgbc ,_dabd float64 )[]*list {_gage :=[]*list {};
_bgaa :=_aecg ;_aecg =_aecg +1;_egcfe :=_gbee [_bgaa ];_bbab :=_fcge [_egcfe ];_afeac :=_ddba (_bbab ,_dabd ,_dgbc );for _cgef ,_gfcb :=range _afeac {var _gfce float64 ;_adfd :=[]*list {};_abbg :=_gfcb ._cbbd ;_caad :=_dabd ;if _cgef < len (_afeac )-1{_caad =_afeac [_cgef +1]._cbbd ;
};if _aecg < len (_gbee ){_adfd =_gdgf (_dcef ,_fcge ,_gbee ,_aecg ,_abbg ,_caad );};_gfce =_caad ;if len (_adfd )> 0{_agc :=_adfd [0];if len (_agc ._ecdee )> 0{_gfce =_agc ._ecdee [0]._cbbd ;};};_agfc :=[]*textLine {_gfcb };_caegf :=_cbcbe (_gfcb ,_dcef ,_gbee ,_abbg ,_gfce );
_agfc =append (_agfc ,_caegf ...);_edac :=_facb (_agfc ,"\u0062\u0075\u006c\u006c\u0065\u0074",_adfd );_edac ._bfcg =_dcdgd (_agfc ,"");_gage =append (_gage ,_edac );};return _gage ;};func (_degc *wordBag )arrangeText ()*textPara {_degc .sort ();if _aaade {_degc .removeDuplicates ();
};var _addf []*textLine ;for _ ,_bdbda :=range _degc .depthIndexes (){for !_degc .empty (_bdbda ){_bdac :=_degc .firstReadingIndex (_bdbda );_gdgd :=_degc .firstWord (_bdac );_acbb :=_ffff (_degc ,_bdac );_cbed :=_gdgd ._ebgb ;_eggg :=_gdgd ._baebb -_gdab *_cbed ;
_eded :=_gdgd ._baebb +_gdab *_cbed ;_gegd :=_dcdc *_cbed ;_egcce :=_efdcb *_cbed ;_febe :for {var _fedeg *textWord ;_aecf :=0;for _ ,_acbdf :=range _degc .depthBand (_eggg ,_eded ){_efbb :=_degc .highestWord (_acbdf ,_eggg ,_eded );if _efbb ==nil {continue ;
};_eebdd :=_efbc (_efbb ,_acbb ._aafd [len (_acbb ._aafd )-1]);if _eebdd < -_egcce {break _febe ;};if _eebdd > _gegd {continue ;};if _fedeg !=nil &&_aea (_efbb ,_fedeg )>=0{continue ;};_fedeg =_efbb ;_aecf =_acbdf ;};if _fedeg ==nil {break ;};_acbb .pullWord (_degc ,_fedeg ,_aecf );
};_acbb .markWordBoundaries ();_addf =append (_addf ,_acbb );};};if len (_addf )==0{return nil ;};_df .Slice (_addf ,func (_fgfc ,_gagbg int )bool {return _dag (_addf [_fgfc ],_addf [_gagbg ])< 0});_cgbd :=_gdae (_degc .PdfRectangle ,_addf );if _gde {_b .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_cgbd .String ());
if _cdbf {for _babc ,_gdbdb :=range _cgbd ._gfbb {_ce .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_babc ,_gdbdb .String ());if _baeef {for _feae ,_abfg :=range _gdbdb ._aafd {_ce .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_feae ,_abfg .String ());
for _dbed ,_deff :=range _abfg ._dggf {_ce .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_dbed ,_deff .String ());};};};};};};return _cgbd ;};func (_eaad *textTable )get (_cfcff ,_egcgd int )*textPara {return _eaad ._efeac [_fgcce (_cfcff ,_egcgd )];
};func (_egea paraList )merge ()*textPara {_b .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_egea ));
if len (_egea )==0{return nil ;};_egea .sortReadingOrder ();_gbbc :=_egea [0].PdfRectangle ;_aebe :=_egea [0]._gfbb ;for _ ,_cdeee :=range _egea [1:]{_gbbc =_egbga (_gbbc ,_cdeee .PdfRectangle );_aebe =append (_aebe ,_cdeee ._gfbb ...);};return _gdae (_gbbc ,_aebe );
};type textPara struct{_bg .PdfRectangle ;_gfbgd _bg .PdfRectangle ;_gfbb []*textLine ;_bgba *textTable ;_abeg bool ;_cfga bool ;_dbaed *textPara ;_eabac *textPara ;_ffeg *textPara ;_fgdg *textPara ;_gbff []list ;};func _ggdg (_gacb ,_eagab float64 )bool {return _gacb /_ef .Max (_gfad ,_eagab )< _ceecf };
type bounded interface{bbox ()_bg .PdfRectangle };func (_cbebd rulingList )findPrimSec (_eaacd ,_afcdb float64 )*ruling {for _ ,_dcebfe :=range _cbebd {if _acbc (_dcebfe ._befee -_eaacd )&&_dcebfe ._agbc -_cfgg <=_afcdb &&_afcdb <=_dcebfe ._gffgd +_cfgg {return _dcebfe ;
};};return nil ;};func (_gabg rulingList )comp (_egcg ,_baga int )bool {_gbgc ,_ffga :=_gabg [_egcg ],_gabg [_baga ];_bdabe ,_gfed :=_gbgc ._eabdg ,_ffga ._eabdg ;if _bdabe !=_gfed {return _bdabe > _gfed ;};if _bdabe ==_bgbdg {return false ;};_bcfa :=func (_fecf bool )bool {if _bdabe ==_cefaa {return _fecf ;
};return !_fecf ;};_gaec ,_effa :=_gbgc ._befee ,_ffga ._befee ;if _gaec !=_effa {return _bcfa (_gaec > _effa );};_gaec ,_effa =_gbgc ._agbc ,_ffga ._agbc ;if _gaec !=_effa {return _bcfa (_gaec < _effa );};return _bcfa (_gbgc ._gffgd < _ffga ._gffgd );
};func (_fdfd *wordBag )firstWord (_fcb int )*textWord {return _fdfd ._cgdg [_fcb ][0]};func (_cdd *subpath )close (){if !_eaca (_cdd ._fbcgf [0],_cdd .last ()){_cdd .add (_cdd ._fbcgf [0]);};_cdd ._bbdg =true ;_cdd .removeDuplicates ();};func (_aeaab rulingList )toTilings ()(rulingList ,[]gridTiling ){_aeaab .log ("\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s");
if len (_aeaab )==0{return nil ,nil ;};_aeaab =_aeaab .tidied ("\u0061\u006c\u006c");_aeaab .log ("\u0074\u0069\u0064\u0069\u0065\u0064");_fafgb :=_aeaab .toGrids ();_cefb :=make ([]gridTiling ,len (_fafgb ));for _fbgg ,_dfdc :=range _fafgb {_cefb [_fbgg ]=_dfdc .asTiling ();
};return _aeaab ,_cefb ;};const (_deg ="\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";
_egf ="\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064";
_cf ="\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";);
func _dgbe (_eged *list ,_dfdf *_c .Builder ,_cgfea *string ){_bdbcc :=_ggcc (_eged ,_cgfea );_dfdf .WriteString (_bdbcc );for _ ,_dcfg :=range _eged ._cdfc {_ddffg :=*_cgfea +"\u0020\u0020\u0020";_dgbe (_dcfg ,_dfdf ,&_ddffg );};};func (_edb *stateStack )empty ()bool {return len (*_edb )==0};
func (_befc *textObject )checkOp (_cagf *_fb .ContentStreamOperation ,_gaac int ,_cac bool )(_ada bool ,_ffce error ){if _befc ==nil {var _cae []_ea .PdfObject ;if _gaac > 0{_cae =_cagf .Params ;if len (_cae )> _gaac {_cae =_cae [:_gaac ];};};_b .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_cagf .Operand ,_cae );
};if _gaac >=0{if len (_cagf .Params )!=_gaac {if _cac {_ffce =_d .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");};_b .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_cagf .Operand ,_gaac ,len (_cagf .Params ),_cagf .Params );
return false ,_ffce ;};};return true ,nil ;};func (_fbeeb rulingList )secMinMax ()(float64 ,float64 ){_cfedd ,_dfecf :=_fbeeb [0]._agbc ,_fbeeb [0]._gffgd ;for _ ,_geae :=range _fbeeb [1:]{if _geae ._agbc < _cfedd {_cfedd =_geae ._agbc ;};if _geae ._gffgd > _dfecf {_dfecf =_geae ._gffgd ;
};};return _cfedd ,_dfecf ;};func (_dcfgf *textWord )addDiacritic (_cdcb string ){_ebafb :=_dcfgf ._dggf [len (_dcfgf ._dggf )-1];_ebafb ._ebgd +=_cdcb ;_ebafb ._ebgd =_dg .NFKC .String (_ebafb ._ebgd );};func (_dfdb *textObject )newTextMark (_baagd string ,_cdbd _gab .Matrix ,_bbae _gab .Point ,_gdbd float64 ,_bfef *_bg .PdfFont ,_dgcf float64 ,_cdcd ,_gecb _ag .Color ,_edfg _ea .PdfObject ,_cgecc []string ,_fbcca int ,_gafc int )(textMark ,bool ){_fdge :=_cdbd .Angle ();
_cdca :=_fdfb (_fdge ,_fcbd );var _eabe float64 ;if _cdca %180!=90{_eabe =_cdbd .ScalingFactorY ();}else {_eabe =_cdbd .ScalingFactorX ();};_bcbgcd :=_bbccc (_cdbd );_eega :=_bg .PdfRectangle {Llx :_bcbgcd .X ,Lly :_bcbgcd .Y ,Urx :_bbae .X ,Ury :_bbae .Y };
switch _cdca %360{case 90:_eega .Urx -=_eabe ;case 180:_eega .Ury -=_eabe ;case 270:_eega .Urx +=_eabe ;case 0:_eega .Ury +=_eabe ;default:_cdca =0;_eega .Ury +=_eabe ;};if _eega .Llx > _eega .Urx {_eega .Llx ,_eega .Urx =_eega .Urx ,_eega .Llx ;};if _eega .Lly > _eega .Ury {_eega .Lly ,_eega .Ury =_eega .Ury ,_eega .Lly ;
};_gegb :=true ;if _dfdb ._dcdg ._de .Width ()> 0{_gaad ,_ebefe :=_fgdf (_eega ,_dfdb ._dcdg ._de );if !_ebefe {_gegb =false ;_b .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_eega ,_dfdb ._dcdg ._de ,_baagd );
};_eega =_gaad ;};_ffd :=_eega ;_ggeac :=_dfdb ._dcdg ._de ;switch _cdca %360{case 90:_ggeac .Urx ,_ggeac .Ury =_ggeac .Ury ,_ggeac .Urx ;_ffd =_bg .PdfRectangle {Llx :_ggeac .Urx -_eega .Ury ,Urx :_ggeac .Urx -_eega .Lly ,Lly :_eega .Llx ,Ury :_eega .Urx };
case 180:_ffd =_bg .PdfRectangle {Llx :_ggeac .Urx -_eega .Llx ,Urx :_ggeac .Urx -_eega .Urx ,Lly :_ggeac .Ury -_eega .Lly ,Ury :_ggeac .Ury -_eega .Ury };case 270:_ggeac .Urx ,_ggeac .Ury =_ggeac .Ury ,_ggeac .Urx ;_ffd =_bg .PdfRectangle {Llx :_eega .Ury ,Urx :_eega .Lly ,Lly :_ggeac .Ury -_eega .Llx ,Ury :_ggeac .Ury -_eega .Urx };
};if _ffd .Llx > _ffd .Urx {_ffd .Llx ,_ffd .Urx =_ffd .Urx ,_ffd .Llx ;};if _ffd .Lly > _ffd .Ury {_ffd .Lly ,_ffd .Ury =_ffd .Ury ,_ffd .Lly ;};_bdbg :=textMark {_ebgd :_baagd ,PdfRectangle :_ffd ,_bcfd :_eega ,_ecbeg :_bfef ,_gceb :_eabe ,_abac :_dgcf ,_acddd :_cdbd ,_efgg :_bbae ,_acec :_cdca ,_bdaff :_cdcd ,_bfdb :_gecb ,_dcbd :_edfg ,_babd :_cgecc ,Th :_dfdb ._gacd ._dgc ,Tw :_dfdb ._gacd ._cagc ,_adbb :_gafc ,_fbcc :_fbcca };
if _efe {_b .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_bcbgcd ,_bbae ,_bdbg .String ());};return _bdbg ,_gegb ;
};func (_fecgd *textWord )bbox ()_bg .PdfRectangle {return _fecgd .PdfRectangle };func (_ecad *shapesState )closePath (){if _ecad ._afge {_ecad ._cbfc =append (_ecad ._cbfc ,_egbg (_ecad ._bfd ));_ecad ._afge =false ;}else if len (_ecad ._cbfc )==0{if _bdaae {_b .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");
};_ecad ._afge =false ;return ;};_ecad ._cbfc [len (_ecad ._cbfc )-1].close ();if _bdaae {_b .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_ecad );};};func _gaf (_cece *wordBag ,_egbe *textWord ,_caae float64 )bool {return _egbe .Llx < _cece .Urx +_caae &&_cece .Llx -_caae < _egbe .Urx ;
};func (_baab *textObject )setTextRenderMode (_bda int ){if _baab ==nil {return ;};_baab ._gacd ._gd =RenderMode (_bda );};func (_efee *textLine )appendWord (_bgf *textWord ){_efee ._aafd =append (_efee ._aafd ,_bgf );_efee .PdfRectangle =_egbga (_efee .PdfRectangle ,_bgf .PdfRectangle );
if _bgf ._ebgb > _efee ._bfbb {_efee ._bfbb =_bgf ._ebgb ;};if _bgf ._baebb > _efee ._cbbd {_efee ._cbbd =_bgf ._baebb ;};};func (_efdd *textLine )text ()string {var _afde []string ;for _ ,_eaaf :=range _efdd ._aafd {if _eaaf ._gagaf {_afde =append (_afde ,"\u0020");
};_afde =append (_afde ,_eaaf ._ggaef );};return _c .Join (_afde ,"");};var (_gbf =_d .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072");_gc =_d .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072");
);func _cbcbe (_bab *textLine ,_gffg []*textLine ,_cfacd []float64 ,_eaegb ,_gceg float64 )[]*textLine {_bggb :=[]*textLine {};for _ ,_cbca :=range _gffg {if _cbca ._cbbd >=_eaegb {if _gceg !=-1&&_cbca ._cbbd < _gceg {if _cbca .text ()!=_bab .text (){if _ef .Round (_cbca .Llx )< _ef .Round (_bab .Llx ){break ;
};_bggb =append (_bggb ,_cbca );};}else if _gceg ==-1{if _cbca ._cbbd ==_bab ._cbbd {if _cbca .text ()!=_bab .text (){_bggb =append (_bggb ,_cbca );};continue ;};_ecge :=_cfad (_bab ,_gffg ,_cfacd );if _ecge !=-1&&_cbca ._cbbd <=_ecge {_bggb =append (_bggb ,_cbca );
};};};};return _bggb ;};func (_bedgb *textPara )isAtom ()*textTable {_eagcg :=_bedgb ;_dfgda :=_bedgb ._eabac ;_acdc :=_bedgb ._fgdg ;if _dfgda .taken ()||_acdc .taken (){return nil ;};_ffecc :=_dfgda ._fgdg ;if _ffecc .taken ()||_ffecc !=_acdc ._eabac {return nil ;
};return _ggbg (_eagcg ,_dfgda ,_acdc ,_ffecc );};type gridTiling struct{_bg .PdfRectangle ;_eaafd []float64 ;_dade []float64 ;_cbec map[float64 ]map[float64 ]gridTile ;};type cachedImage struct{_dba *_bg .Image ;_gca _bg .PdfColorspace ;};func _gdbc (_fbcfc []TextMark ,_cecf *int ,_bebd string )[]TextMark {_eced :=_fbdd ;
_eced .Text =_bebd ;return _cgdcf (_fbcfc ,_cecf ,_eced );};func (_gggaa *textTable )putComposite (_gecc ,_aegc int ,_gdff paraList ,_bcef _bg .PdfRectangle ){if len (_gdff )==0{_b .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073");
return ;};_egefg :=compositeCell {PdfRectangle :_bcef ,paraList :_gdff };if _cgafg {_ce .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_gecc ,_aegc ,_egefg .String ());
};_egefg .updateBBox ();_gggaa ._dadcc [_fgcce (_gecc ,_aegc )]=_egefg ;};func (_gbaa rulingList )removeDuplicates ()rulingList {if len (_gbaa )==0{return nil ;};_gbaa .sort ();_eeee :=rulingList {_gbaa [0]};for _ ,_agec :=range _gbaa [1:]{if _agec .equals (_eeee [len (_eeee )-1]){continue ;
};_eeee =append (_eeee ,_agec );};return _eeee ;};func (_gedd *subpath )isQuadrilateral ()bool {if len (_gedd ._fbcgf )< 4||len (_gedd ._fbcgf )> 5{return false ;};if len (_gedd ._fbcgf )==5{_abgc :=_gedd ._fbcgf [0];_abaa :=_gedd ._fbcgf [4];if _abgc .X !=_abaa .X ||_abgc .Y !=_abaa .Y {return false ;
};};return true ;};func (_bdaf *PageText )computeViews (){_fbga :=_bdaf .getParagraphs ();_bbda :=new (_dfe .Buffer );_fbga .writeText (_bbda );_bdaf ._bdf =_bbda .String ();_bdaf ._fccf =_fbga .toTextMarks ();_bdaf ._ecege =_fbga .tables ();if _cgafg {_b .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_bdaf ._ecege ));
};};func _egagc (_abga float64 )float64 {return _dgeb *_ef .Round (_abga /_dgeb )};func _ebfc (_cagb float64 )int {var _eace int ;if _cagb >=0{_eace =int (_cagb /_fefe );}else {_eace =int (_cagb /_fefe )-1;};return _eace ;};func _dddaa (_gddc map[int ][]float64 ){if len (_gddc )<=1{return ;
};_cdggb :=_ecdb (_gddc );if _cgafg {_b .Log .Info ("\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076",_cdggb );};var _edead ,_ebbg int ;for _edead ,_ebbg =range _cdggb {if _gddc [_ebbg ]!=nil {break ;};};for _ecfe ,_fegb :=range _cdggb [_edead :]{_bdeb :=_gddc [_fegb ];
if _bdeb ==nil {continue ;};if _cgafg {_ce .Printf ("\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a",_edead +_ecfe ,_ebbg ,_fegb );};_gbccg :=_gddc [_fegb ];if _gbccg [len (_gbccg )-1]> _bdeb [0]{_gbccg [len (_gbccg )-1]=_bdeb [0];
_gddc [_ebbg ]=_gbccg ;};_ebbg =_fegb ;};};
2023-05-29 17:26:33 +00:00
2023-07-28 12:14:31 +00:00
// String returns a string describing `tm`.
func (_fba TextMark )String ()string {_ebfa :=_fba .BBox ;var _aacc string ;if _fba .Font !=nil {_aacc =_fba .Font .String ();if len (_aacc )> 50{_aacc =_aacc [:50]+"\u002e\u002e\u002e";};};var _dgca string ;if _fba .Meta {_dgca ="\u0020\u002a\u004d\u002a";
};return _ce .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_fba .Offset ,_fba .Text ,[]rune (_fba .Text ),_ebfa .Llx ,_ebfa .Lly ,_ebfa .Urx ,_ebfa .Ury ,_aacc ,_dgca );
};func (_gef *shapesState )moveTo (_becea ,_aad float64 ){_gef ._afge =true ;_gef ._bfd =_gef .devicePoint (_becea ,_aad );if _bdaae {_b .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_becea ,_aad ,_gef ._bfd );
};};func _eafb (_aagf []structElement ,_ffae map[int ][]*textLine ,_bgeg _ea .PdfObject )[]*list {_gded :=[]*list {};for _ ,_eefc :=range _aagf {_cdeeb :=_eefc ._efce ;_ecgd :=int (_eefc ._aeae );_affe :=_eefc ._bfeg ;_gefb :=[]*textLine {};_gefg :=[]*list {};
_ddaa :=_eefc ._fgagg ;_egec ,_dgaag :=(_ddaa .(*_ea .PdfObjectReference ));if !_dgaag {_b .Log .Debug ("\u0066\u0061\u0069l\u0065\u0064\u0020\u006f\u0074\u0020\u0063\u0061\u0073\u0074\u0020\u0074\u006f\u0020\u002a\u0063\u006f\u0072\u0065\u002e\u0050\u0064\u0066\u004f\u0062\u006a\u0065\u0063\u0074R\u0065\u0066\u0065\u0072\u0065\u006e\u0063\u0065");
};if _ecgd !=-1&&_egec !=nil {if _egdgf ,_becc :=_ffae [_ecgd ];_becc {if _efdf ,_aefd :=_bgeg .(*_ea .PdfIndirectObject );_aefd {_edbf :=_efdf .PdfObjectReference ;if _e .DeepEqual (*_egec ,_edbf ){_gefb =_egdgf ;};};};};if _cdeeb !=nil {_gefg =_eafb (_cdeeb ,_ffae ,_bgeg );
};_afbgc :=_facb (_gefb ,_affe ,_gefg );_gded =append (_gded ,_afbgc );};return _gded ;};
2023-05-29 17:26:33 +00:00
2023-07-28 12:14:31 +00:00
// Options extractor options.
type Options struct{
2023-02-07 17:17:49 +00:00
2023-07-28 12:14:31 +00:00
// DisableDocumentTags specifies whether to use the document tags during list extraction.
DisableDocumentTags bool ;
2023-05-29 17:26:33 +00:00
2023-07-28 12:14:31 +00:00
// ApplyCropBox will extract page text based on page cropbox if set to `true`.
ApplyCropBox bool ;
2023-02-07 17:17:49 +00:00
2023-07-28 12:14:31 +00:00
// UseSimplerExtractionProcess will skip topological text ordering and table processing.
//
// NOTE: While normally the extra processing is beneficial, it can also lead to problems when it does not work.
// Thus it is a flag to allow the user to control this process.
//
// Skipping some extraction processes would also lead to the reduced processing time.
UseSimplerExtractionProcess bool ;};func (_bebge *textTable )reduce ()*textTable {_agbbg :=make ([]int ,0,_bebge ._gcbge );_gbae :=make ([]int ,0,_bebge ._ddfc );for _begcf :=0;_begcf < _bebge ._gcbge ;_begcf ++{if !_bebge .emptyCompositeRow (_begcf ){_agbbg =append (_agbbg ,_begcf );
};};for _cgafa :=0;_cgafa < _bebge ._ddfc ;_cgafa ++{if !_bebge .emptyCompositeColumn (_cgafa ){_gbae =append (_gbae ,_cgafa );};};if len (_agbbg )==_bebge ._gcbge &&len (_gbae )==_bebge ._ddfc {return _bebge ;};_edbdg :=textTable {_edgac :_bebge ._edgac ,_ddfc :len (_gbae ),_gcbge :len (_agbbg ),_efeac :make (map[uint64 ]*textPara ,len (_gbae )*len (_agbbg ))};
if _cgafg {_b .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_bebge ._ddfc ,_bebge ._gcbge ,len (_gbae ),len (_agbbg ));_b .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_gbae );
_b .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_agbbg );};for _bgdgd ,_beggc :=range _agbbg {for _ffde ,_ccbg :=range _gbae {_dcgfb ,_efdcf :=_bebge .getComposite (_ccbg ,_beggc );if _dcgfb ==nil {continue ;
};if _cgafg {_ce .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_ffde ,_bgdgd ,_ccbg ,_beggc ,_dfcggd (_dcgfb .merge ().text (),50));};_edbdg .putComposite (_ffde ,_bgdgd ,_dcgfb ,_efdcf );
};};return &_edbdg ;};func (_fdcf paraList )extractTables (_ffgge []gridTiling )paraList {if _cgafg {_b .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_fdcf ));
};if len (_fdcf )< _abda {return _fdcf ;};_dbdd :=_fdcf .findTables (_ffgge );if _cgafg {_b .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_dbdd ));
for _faff ,_egbgae :=range _dbdd {_egbgae .log (_ce .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_faff ));};};return _fdcf .applyTables (_dbdd );};type wordBag struct{_bg .PdfRectangle ;_ecdf float64 ;_debag ,_gfe rulingList ;
_dgec float64 ;_cgdg map[int ][]*textWord ;};func _fffd (_dbbc ,_gbdf *textPara )bool {return _gefa (_dbbc ._gfbgd ,_gbdf ._gfbgd )};func (_gcfb *textObject )setWordSpacing (_bdbc float64 ){if _gcfb ==nil {return ;};_gcfb ._gacd ._cagc =_bdbc ;};func _fecc (_aacad ,_adcf _gab .Point )rulingKind {_geaf :=_ef .Abs (_aacad .X -_adcf .X );
_fbeba :=_ef .Abs (_aacad .Y -_adcf .Y );return _bffg (_geaf ,_fbeba ,_ceecf );};func (_cdgd *textPara )writeText (_gcbd _ga .Writer ){if _cdgd ._bgba ==nil {_cdgd .writeCellText (_gcbd );return ;};for _accd :=0;_accd < _cdgd ._bgba ._gcbge ;_accd ++{for _faadc :=0;
_faadc < _cdgd ._bgba ._ddfc ;_faadc ++{_eagg :=_cdgd ._bgba .get (_faadc ,_accd );if _eagg ==nil {_gcbd .Write ([]byte ("\u0009"));}else {_eagg .writeCellText (_gcbd );};_gcbd .Write ([]byte ("\u0020"));};if _accd < _cdgd ._bgba ._gcbge -1{_gcbd .Write ([]byte ("\u000a"));
};};};func _ddcc (_adcdg bounded )float64 {return -_adcdg .bbox ().Lly };func (_gaga *TextMarkArray )exists (_becg TextMark )bool {for _ ,_cbef :=range _gaga .Elements (){if _e .DeepEqual (_becg .DirectObject ,_cbef .DirectObject )&&_e .DeepEqual (_becg .BBox ,_cbef .BBox )&&_cbef .Text ==_becg .Text {return true ;
};};return false ;};func (_agae *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_ggaf :=make (map[int ]map[*textWord ]struct{},len (_agae ._cgdg ));for _fdba :=range _agae ._cgdg {_ggaf [_fdba ]=make (map[*textWord ]struct{});};return _ggaf ;
};func _gfea (_cbdea ,_cddab _bg .PdfRectangle )bool {return _cbdea .Llx <=_cddab .Llx &&_cddab .Urx <=_cbdea .Urx &&_cbdea .Lly <=_cddab .Lly &&_cddab .Ury <=_cbdea .Ury ;};func (_ggbe rulingList )sort (){_df .Slice (_ggbe ,_ggbe .comp )};
2023-02-07 17:17:49 +00:00
2023-07-28 12:14:31 +00:00
// String returns a description of `l`.
func (_ddce *textLine )String ()string {return _ce .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_ddce ._cbbd ,_ddce .PdfRectangle ,_ddce ._bfbb ,_ddce .text ());
};func (_acce rectRuling )asRuling ()(*ruling ,bool ){_ebgce :=ruling {_eabdg :_acce ._fbad ,Color :_acce .Color ,_gggfe :_ceag };switch _acce ._fbad {case _acgee :_ebgce ._befee =0.5*(_acce .Llx +_acce .Urx );_ebgce ._agbc =_acce .Lly ;_ebgce ._gffgd =_acce .Ury ;
_gcbb ,_bbgaab :=_acce .checkWidth (_acce .Llx ,_acce .Urx );if !_bbgaab {if _cfde {_b .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_acce );
};return nil ,false ;};_ebgce ._fadae =_gcbb ;case _cefaa :_ebgce ._befee =0.5*(_acce .Lly +_acce .Ury );_ebgce ._agbc =_acce .Llx ;_ebgce ._gffgd =_acce .Urx ;_daca ,_gccgd :=_acce .checkWidth (_acce .Lly ,_acce .Ury );if !_gccgd {if _cfde {_b .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_acce );
};return nil ,false ;};_ebgce ._fadae =_daca ;default:_b .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_acce ._fbad );return nil ,false ;};return &_ebgce ,true ;};func _egbga (_gafe ,_cebb _bg .PdfRectangle )_bg .PdfRectangle {return _bg .PdfRectangle {Llx :_ef .Min (_gafe .Llx ,_cebb .Llx ),Lly :_ef .Min (_gafe .Lly ,_cebb .Lly ),Urx :_ef .Max (_gafe .Urx ,_cebb .Urx ),Ury :_ef .Max (_gafe .Ury ,_cebb .Ury )};
};func (_cded paraList )llyOrdering ()[]int {_ecede :=make ([]int ,len (_cded ));for _afac :=range _cded {_ecede [_afac ]=_afac ;};_df .SliceStable (_ecede ,func (_gbgg ,_fbea int )bool {_bcde ,_gfbg :=_ecede [_gbgg ],_ecede [_fbea ];return _cded [_bcde ].Lly < _cded [_gfbg ].Lly ;
});return _ecede ;};var _bgbdd string ="\u005e\u005b\u0061\u002d\u007a\u0041\u002dZ\u005d\u0028\u005c)\u007c\u005c\u002e)\u007c\u005e[\u005c\u0064\u005d\u002b\u0028\u005c)\u007c\\.\u0029\u007c\u005e\u005c\u0028\u005b\u0061\u002d\u007a\u0041\u002d\u005a\u005d\u005c\u0029\u007c\u005e\u005c\u0028\u005b\u005c\u0064\u005d\u002b\u005c\u0029";
func (_cadb *TextMarkArray )getTextMarkAtOffset (_gcda int )*TextMark {for _ ,_adbc :=range _cadb ._bca {if _adbc .Offset ==_gcda {return &_adbc ;};};return nil ;};var _cd =false ;
2023-03-01 18:45:57 +00:00
2023-07-28 12:14:31 +00:00
// Text gets the extracted text contained in `l`.
func (_cgb *list )Text ()string {_afcd :=&_c .Builder {};_dfgc :="";_dgbe (_cgb ,_afcd ,&_dfgc );return _afcd .String ();};func _gcgc (_gcfe ,_dbec _gab .Point ,_bfedg _ag .Color )(*ruling ,bool ){_adea :=lineRuling {_egaf :_gcfe ,_eaebf :_dbec ,_cbfb :_ddda (_gcfe ,_dbec ),Color :_bfedg };
if _adea ._cbfb ==_bgbdg {return nil ,false ;};return _adea .asRuling ();};
2023-03-01 18:45:57 +00:00
2023-07-28 12:14:31 +00:00
// String returns a string descibing `i`.
func (_aecdb gridTile )String ()string {_facac :=func (_dcbba bool ,_dffg string )string {if _dcbba {return _dffg ;};return "\u005f";};return _ce .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_aecdb .PdfRectangle ,_facac (_aecdb ._afdge ,"\u004c"),_facac (_aecdb ._bfecb ,"\u0052"),_facac (_aecdb ._eaed ,"\u0042"),_facac (_aecdb ._fdbd ,"\u0054"));
};func (_gcdda *ruling )equals (_bdaec *ruling )bool {return _gcdda ._eabdg ==_bdaec ._eabdg &&_ffcaf (_gcdda ._befee ,_bdaec ._befee )&&_ffcaf (_gcdda ._agbc ,_bdaec ._agbc )&&_ffcaf (_gcdda ._gffgd ,_bdaec ._gffgd );};
2023-03-01 18:45:57 +00:00
2023-07-28 12:14:31 +00:00
// String returns a description of `t`.
func (_fgge *textTable )String ()string {return _ce .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_fgge ._ddfc ,_fgge ._gcbge ,_fgge ._edgac );};const (_eebec =false ;_efe =false ;_cfab =false ;_adbf =false ;_bdaae =false ;_beff =false ;
_fcea =false ;_ecdg =false ;_gde =false ;_cdbf =_gde &&true ;_baeef =_cdbf &&false ;_aebb =_gde &&true ;_cgafg =false ;_efed =_cgafg &&false ;_eecc =_cgafg &&true ;_bccgb =false ;_fegd =_bccgb &&false ;_dbdb =_bccgb &&false ;_agd =_bccgb &&true ;_cfde =_bccgb &&false ;
_geg =_bccgb &&false ;);
2023-03-01 18:45:57 +00:00
2023-07-28 12:14:31 +00:00
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
func (_fea *Extractor )ExtractText ()(string ,error ){_dgd ,_ ,_ ,_eca :=_fea .ExtractTextWithStats ();return _dgd ,_eca ;};
2023-05-29 17:26:33 +00:00
2023-07-28 12:14:31 +00:00
// Extractor stores and offers functionality for extracting content from PDF pages.
type Extractor struct{_bc string ;_gga *_bg .PdfPageResources ;_de _bg .PdfRectangle ;_fc *_bg .PdfRectangle ;_eg map[string ]fontEntry ;_gbfd map[string ]textResult ;_ge int64 ;_ead int ;_ad *Options ;_fbb *_ea .PdfObject ;_ca _ea .PdfObject ;};func (_fbfd *textPara )depth ()float64 {if _fbfd ._cfga {return -1.0;
};if len (_fbfd ._gfbb )> 0{return _fbfd ._gfbb [0]._cbbd ;};return _fbfd ._bgba .depth ();};type fontEntry struct{_bcae *_bg .PdfFont ;_edcg int64 ;};const (_efea =1.0e-6;_dgeb =1.0e-4;_fcbd =10;_fefe =6;_gdab =0.5;_ffef =0.12;_ccfg =0.19;_ccfb =0.04;
_gbcf =0.04;_dega =1.0;_fbbf =0.04;_bcbge =0.4;_abg =0.7;_begg =1.0;_dcdf =0.1;_dcdc =1.4;_efdcb =0.46;_cfbb =0.02;_ccee =0.2;_cfdf =0.5;_ecg =4;_dggb =4.0;_abda =6;_dfecd =0.3;_bbe =0.01;_baeeb =0.02;_gfda =2;_cafad =2;_cfae =500;_gbca =4.0;_gfdaf =4.0;
_ceecf =0.05;_gfad =0.1;_cfgg =2.0;_bddeb =2.0;_cddf =1.5;_feaa =3.0;_eadb =0.25;);func _gdae (_cdag _bg .PdfRectangle ,_bgdf []*textLine )*textPara {return &textPara {PdfRectangle :_cdag ,_gfbb :_bgdf };};func (_gbef *textMark )inDiacriticArea (_fcfe *textMark )bool {_gggeac :=_gbef .Llx -_fcfe .Llx ;
_efde :=_gbef .Urx -_fcfe .Urx ;_aafa :=_gbef .Lly -_fcfe .Lly ;return _ef .Abs (_gggeac +_efde )< _gbef .Width ()*_cfdf &&_ef .Abs (_aafa )< _gbef .Height ()*_cfdf ;};func (_eafa paraList )list ()[]*list {var _effe []*textLine ;var _bfce []*textLine ;
for _ ,_bgbb :=range _eafa {_bbgda :=_bgbb .getListLines ();_effe =append (_effe ,_bbgda ...);_bfce =append (_bfce ,_bgbb ._gfbb ...);};_ccgf :=_dceg (_effe );_efab :=_ggeb (_bfce ,_ccgf );return _efab ;};func _ebba (_bebga _gab .Point )_gab .Matrix {return _gab .TranslationMatrix (_bebga .X ,_bebga .Y )};
2023-05-29 17:26:33 +00:00
2023-07-28 12:14:31 +00:00
// Tables returns the tables extracted from the page.
func (_gba PageText )Tables ()[]TextTable {if _cgafg {_b .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_gba ._ecege ));};return _gba ._ecege ;};func (_ebbb *shapesState )clearPath (){_ebbb ._cbfc =nil ;_ebbb ._afge =false ;
if _bdaae {_b .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_ebbb );};};type ruling struct{_eabdg rulingKind ;_gggfe markKind ;_ag .Color ;_befee float64 ;_agbc float64 ;_gffgd float64 ;_fadae float64 ;};func (_adgb *wordBag )depthBand (_eeff ,_bafg float64 )[]int {if len (_adgb ._cgdg )==0{return nil ;
};return _adgb .depthRange (_adgb .getDepthIdx (_eeff ),_adgb .getDepthIdx (_bafg ));};func _egfd (_fabc []pathSection )rulingList {_bbcaa (_fabc );if _bccgb {_b .Log .Info ("\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073",len (_fabc ));
};var _fffcg rulingList ;for _ ,_baagf :=range _fabc {for _ ,_cdcca :=range _baagf ._dgfc {if len (_cdcca ._fbcgf )< 2{continue ;};_gaca :=_cdcca ._fbcgf [0];for _ ,_edgg :=range _cdcca ._fbcgf [1:]{if _fgbg ,_dabc :=_gcgc (_gaca ,_edgg ,_baagf .Color );
_dabc {_fffcg =append (_fffcg ,_fgbg );};_gaca =_edgg ;};};};if _bccgb {_b .Log .Info ("m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073",_fffcg );};return _fffcg ;};func _bbccc (_eage _gab .Matrix )_gab .Point {_ecbe ,_fgbe :=_eage .Translation ();
return _gab .Point {X :_ecbe ,Y :_fgbe };};var _ebef =[]string {"\u2756","\u27a2","\u2713","\u2022","\uf0a7","\u25a1","\u2212","\u25a0","\u25aa","\u006f"};func _eabga (_dgeeg float64 )bool {return _ef .Abs (_dgeeg )< _bddeb };func (_gaabg *textTable )newTablePara ()*textPara {_baabc :=_gaabg .computeBbox ();
_fbdda :=&textPara {PdfRectangle :_baabc ,_gfbgd :_baabc ,_bgba :_gaabg };if _cgafg {_b .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_fbdda );};return _fbdda ;};func (_dfdae *wordBag )depthIndexes ()[]int {if len (_dfdae ._cgdg )==0{return nil ;
};_cgfa :=make ([]int ,len (_dfdae ._cgdg ));_feg :=0;for _adef :=range _dfdae ._cgdg {_cgfa [_feg ]=_adef ;_feg ++;};_df .Ints (_cgfa );return _cgfa ;};func (_fdaa paraList )lines ()[]*textLine {var _cadf []*textLine ;for _ ,_defcg :=range _fdaa {_cadf =append (_cadf ,_defcg ._gfbb ...);
};return _cadf ;};func _dbcdg (_acgbd map[int ]intSet )[]int {_ecfcf :=make ([]int ,0,len (_acgbd ));for _eebgg :=range _acgbd {_ecfcf =append (_ecfcf ,_eebgg );};_df .Ints (_ecfcf );return _ecfcf ;};func (_fbgd *ruling )encloses (_efcf ,_bebc float64 )bool {return _fbgd ._agbc -_cfgg <=_efcf &&_bebc <=_fbgd ._gffgd +_cfgg ;
};func _ggff (_bedg string )bool {if _a .RuneCountInString (_bedg )< _ecg {return false ;};_daa ,_efgfa :=_a .DecodeLastRuneInString (_bedg );if _efgfa <=0||!_f .Is (_f .Hyphen ,_daa ){return false ;};_daa ,_efgfa =_a .DecodeLastRuneInString (_bedg [:len (_bedg )-_efgfa ]);
return _efgfa > 0&&!_f .IsSpace (_daa );};func (_dbbfc rulingList )augmentGrid ()(rulingList ,rulingList ){_faged ,_fedbb :=_dbbfc .vertsHorzs ();if len (_faged )==0||len (_fedbb )==0{return _faged ,_fedbb ;};_aade ,_ggbf :=_faged ,_fedbb ;_dbaac :=_faged .bbox ();
_gedbg :=_fedbb .bbox ();if _bccgb {_b .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066",_dbaac );_b .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066",_gedbg );
};var _bgae ,_fcgd ,_eceaeb ,_cbafd *ruling ;if _gedbg .Llx < _dbaac .Llx -_cfgg {_bgae =&ruling {_gggfe :_abgg ,_eabdg :_acgee ,_befee :_gedbg .Llx ,_agbc :_dbaac .Lly ,_gffgd :_dbaac .Ury };_faged =append (rulingList {_bgae },_faged ...);};if _gedbg .Urx > _dbaac .Urx +_cfgg {_fcgd =&ruling {_gggfe :_abgg ,_eabdg :_acgee ,_befee :_gedbg .Urx ,_agbc :_dbaac .Lly ,_gffgd :_dbaac .Ury };
_faged =append (_faged ,_fcgd );};if _dbaac .Lly < _gedbg .Lly -_cfgg {_eceaeb =&ruling {_gggfe :_abgg ,_eabdg :_cefaa ,_befee :_dbaac .Lly ,_agbc :_gedbg .Llx ,_gffgd :_gedbg .Urx };_fedbb =append (rulingList {_eceaeb },_fedbb ...);};if _dbaac .Ury > _gedbg .Ury +_cfgg {_cbafd =&ruling {_gggfe :_abgg ,_eabdg :_cefaa ,_befee :_dbaac .Ury ,_agbc :_gedbg .Llx ,_gffgd :_gedbg .Urx };
_fedbb =append (_fedbb ,_cbafd );};if len (_faged )+len (_fedbb )==len (_dbbfc ){return _aade ,_ggbf ;};_eaebe :=append (_faged ,_fedbb ...);_dbbfc .log ("u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064");_eaebe .log ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d");
return _faged ,_fedbb ;};func (_dgfd *textObject )getFillColor ()_ag .Color {return _badcf (_dgfd ._agbf .ColorspaceNonStroking ,_dgfd ._agbf .ColorNonStroking );};func _gfdaa (_eaaa []int )[]int {_eeede :=make ([]int ,len (_eaaa ));for _dbbfg ,_gcef :=range _eaaa {_eeede [len (_eaaa )-1-_dbbfg ]=_gcef ;
};return _eeede ;};func _ggcc (_egcd *list ,_abee *string )string {_fbgf :=_c .Split (_egcd ._bfcg ,"\u000a");_cdcc :=&_c .Builder {};for _ ,_dedc :=range _fbgf {if _dedc !=""{_cdcc .WriteString (*_abee );_cdcc .WriteString (_dedc );_cdcc .WriteString ("\u000a");
};};return _cdcc .String ();};
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
2023-01-08 22:34:27 +00:00
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
2022-07-13 21:28:43 +00:00
//
2023-01-08 22:34:27 +00:00
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
2022-07-13 21:28:43 +00:00
type TextMark struct{
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Text is the extracted text.
Text string ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// BBox is the bounding box of the text.
2023-07-28 12:14:31 +00:00
BBox _bg .PdfRectangle ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Font is the font the text was drawn with.
2023-07-28 12:14:31 +00:00
Font *_bg .PdfFont ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// FontSize is the font size the text was drawn with.
FontSize float64 ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2023-07-28 12:14:31 +00:00
FillColor _ag .Color ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2023-07-28 12:14:31 +00:00
StrokeColor _ag .Color ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Orientation is the text orientation
Orientation int ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// DirectObject is the underlying PdfObject (Text Object) that represents the visible texts. This is introduced to get
// a simple access to the TextObject in case editing or replacment of some text is needed. E.g during redaction.
2023-07-28 12:14:31 +00:00
DirectObject _ea .PdfObject ;
2022-07-13 21:28:43 +00:00
// ObjString is a decoded string operand of a text-showing operator. It has the same value as `Text` attribute except
// when many glyphs are represented with the same Text Object that contains multiple length string operand in which case
// ObjString spans more than one character string that falls in different TextMark objects.
2023-07-28 12:14:31 +00:00
ObjString []string ;Tw float64 ;Th float64 ;Tc float64 ;Index int ;_fgfd bool ;_ggea *TextTable ;};func _begbe (_eaeeg []_ea .PdfObject )(_cggde ,_bfda float64 ,_bfaf error ){if len (_eaeeg )!=2{return 0,0,_ce .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_eaeeg ));
};_degcc ,_bfaf :=_ea .GetNumbersAsFloat (_eaeeg );if _bfaf !=nil {return 0,0,_bfaf ;};return _degcc [0],_degcc [1],nil ;};
2023-02-07 17:17:49 +00:00
2023-07-28 12:14:31 +00:00
// String returns a string describing `pt`.
func (_cfcb PageText )String ()string {_dceb :=_ce .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_cfcb ._ccf ));_gcga :=[]string {"\u002d"+_dceb };for _ ,_cbgf :=range _cfcb ._ccf {_gcga =append (_gcga ,_cbgf .String ());
};_gcga =append (_gcga ,"\u002b"+_dceb );return _c .Join (_gcga ,"\u000a");};func (_bef *textObject )setTextMatrix (_cebf []float64 ){if len (_cebf )!=6{_b .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_cebf ));
return ;};_bad ,_bee ,_cfcc ,_eaga ,_cgcab ,_gcf :=_cebf [0],_cebf [1],_cebf [2],_cebf [3],_cebf [4],_cebf [5];_bef ._fda =_gab .NewMatrix (_bad ,_bee ,_cfcc ,_eaga ,_cgcab ,_gcf );_bef ._cfec =_bef ._fda ;};func (_babe *textPara )bbox ()_bg .PdfRectangle {return _babe .PdfRectangle };
func (_aef *structTreeRoot )buildList (_egdgb map[int ][]*textLine ,_ggc _ea .PdfObject )[]*list {if _aef ==nil {_b .Log .Debug ("\u0062\u0075\u0069\u006c\u0064\u004c\u0069\u0073\u0074\u003a\u0020t\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0069\u0073 \u006e\u0069\u006c");
return nil ;};var _gdeb *structElement ;_cddg :=[]structElement {};if len (_aef ._cegf )==1{_cbgd :=_aef ._cegf [0]._bfeg ;if _cbgd =="\u0044\u006f\u0063\u0075\u006d\u0065\u006e\u0074"||_cbgd =="\u0053\u0065\u0063\u0074"||_cbgd =="\u0050\u0061\u0072\u0074"||_cbgd =="\u0044\u0069\u0076"||_cbgd =="\u0041\u0072\u0074"{_gdeb =&_aef ._cegf [0];
};}else {_gdeb =&structElement {_efce :_aef ._cegf ,_bfeg :_aef ._baeb };};if _gdeb ==nil {_b .Log .Debug ("\u0062\u0075\u0069\u006cd\u004c\u0069\u0073\u0074\u003a\u0020\u0074\u006f\u0070\u0045l\u0065m\u0065\u006e\u0074\u0020\u0069\u0073\u0020n\u0069\u006c");
return nil ;};for _ ,_bcbgc :=range _gdeb ._efce {if _bcbgc ._bfeg =="\u004c"{_cddg =append (_cddg ,_bcbgc );}else if _bcbgc ._bfeg =="\u0054\u0061\u0062l\u0065"{_ccdc :=_eabd (_bcbgc );_cddg =append (_cddg ,_ccdc ...);};};_bdgb :=_eafb (_cddg ,_egdgb ,_ggc );
var _dadfb []*list ;for _ ,_gfadc :=range _bdgb {_cbaf :=_bgabc (_gfadc );_dadfb =append (_dadfb ,_cbaf ...);};return _dadfb ;};func _fdfb (_afed float64 ,_fdec int )int {if _fdec ==0{_fdec =1;};_gfba :=float64 (_fdec );return int (_ef .Round (_afed /_gfba )*_gfba );
};func _fgdf (_faebb ,_faagd _bg .PdfRectangle )(_bg .PdfRectangle ,bool ){if !_geec (_faebb ,_faagd ){return _bg .PdfRectangle {},false ;};return _bg .PdfRectangle {Llx :_ef .Max (_faebb .Llx ,_faagd .Llx ),Urx :_ef .Min (_faebb .Urx ,_faagd .Urx ),Lly :_ef .Max (_faebb .Lly ,_faagd .Lly ),Ury :_ef .Min (_faebb .Ury ,_faagd .Ury )},true ;
};func (_ffe *Extractor )extractPageText (_aga string ,_cge *_bg .PdfPageResources ,_fac _gab .Matrix ,_ece int )(*PageText ,int ,int ,error ){_b .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_ece );
_dbc :=&PageText {_ebbd :_ffe ._de ,_cbc :_ffe ._fbb ,_fabg :_ffe ._ca };_gccg :=_afbd (_ffe ._de );var _afeg stateStack ;_cab :=_aedc (_ffe ,_cge ,_fb .GraphicsState {},&_gccg ,&_afeg );_cgd :=shapesState {_afee :_fac ,_dfgb :_gab .IdentityMatrix (),_eeadb :_cab };
var _abe bool ;_bec :=-1;if _ece > _bb {_gcg :=_d .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");_b .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_ece ,_gcg );
return _dbc ,_gccg ._feab ,_gccg ._fefg ,_gcg ;};_ddb :=_fb .NewContentStreamParser (_aga );_cec ,_bcbg :=_ddb .Parse ();if _bcbg !=nil {_b .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bcbg );
return _dbc ,_gccg ._feab ,_gccg ._fefg ,_bcbg ;};_dbc ._gagg =_cec ;_dbbf :=_fb .NewContentStreamProcessor (*_cec );_dbbf .AddHandler (_fb .HandlerConditionEnumAllOperands ,"",func (_cbf *_fb .ContentStreamOperation ,_eaec _fb .GraphicsState ,_acb *_bg .PdfPageResources )error {_dgdb :=_cbf .Operand ;
if _cfab {_b .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_cbf );};switch _dgdb {case "\u0071":if _bdaae {_b .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_cgd ._dfgb );};_afeg .push (&_gccg );case "\u0051":if !_afeg .empty (){_gccg =*_afeg .pop ();
};_cgd ._dfgb =_eaec .CTM ;if _bdaae {_b .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_cgd ._dfgb );};case "\u0042\u0044\u0043":_ecea ,_eec :=_ea .GetDict (_cbf .Params [1]);if !_eec {_b .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0042D\u0043\u0020\u006f\u0070\u003d\u0025\u0073 \u0047\u0065\u0074\u0044\u0069\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_cbf );
return _bcbg ;};_ega :=_ecea .Get ("\u004d\u0043\u0049\u0044");if _ega !=nil {_bgcg ,_aba :=_ea .GetIntVal (_ega );if !_aba {_b .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0042\u0044C\u0020\u006f\u0070=\u0025\u0073\u002e\u0020\u0042\u0061\u0064\u0020\u006eum\u0065\u0072\u0069c\u0061\u006c \u006f\u0062\u006a\u0065\u0063\u0074.\u0020\u006f=\u0025\u0073",_cbf ,_ega );
};_bec =_bgcg ;}else {_bec =-1;};case "\u0045\u004d\u0043":_bec =-1;case "\u0042\u0054":if _abe {_b .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
_dbc ._ccf =append (_dbc ._ccf ,_cab ._fecd ...);};_abe =true ;_ffa :=_eaec ;_ffa .CTM =_fac .Mult (_ffa .CTM );_cab =_aedc (_ffe ,_acb ,_ffa ,&_gccg ,&_afeg );_cgd ._eeadb =_cab ;case "\u0045\u0054":if !_abe {_b .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
};_abe =false ;_dbc ._ccf =append (_dbc ._ccf ,_cab ._fecd ...);_cab .reset ();case "\u0054\u002a":_cab .nextLine ();case "\u0054\u0064":if _dcf ,_def :=_cab .checkOp (_cbf ,2,true );!_dcf {_b .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_def );
return _def ;};_bbd ,_daeb ,_ecfg :=_begbe (_cbf .Params );if _ecfg !=nil {return _ecfg ;};_cab .moveText (_bbd ,_daeb );case "\u0054\u0044":if _egb ,_gcac :=_cab .checkOp (_cbf ,2,true );!_egb {_b .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gcac );
return _gcac ;};_eaee ,_cfe ,_eed :=_begbe (_cbf .Params );if _eed !=nil {_b .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_eed );return _eed ;};_cab .moveTextSetLeading (_eaee ,_cfe );case "\u0054\u006a":if _afg ,_beb :=_cab .checkOp (_cbf ,1,true );
!_afg {_b .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_cbf ,_beb );return _beb ;};_aeg :=_ea .TraceToDirectObject (_cbf .Params [0]);_decc ,_dea :=_ea .GetStringBytes (_aeg );
if !_dea {_b .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_cbf );return _ea .ErrTypeError ;
};return _cab .showText (_aeg ,_decc ,_bec );case "\u0054\u004a":if _eeb ,_eafe :=_cab .checkOp (_cbf ,1,true );!_eeb {_b .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_eafe );return _eafe ;};_abbd ,_aab :=_ea .GetArray (_cbf .Params [0]);
if !_aab {_b .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_cbf );return _bcbg ;};return _cab .showTextAdjusted (_abbd ,_bec );
case "\u0027":if _cdc ,_cefa :=_cab .checkOp (_cbf ,1,true );!_cdc {_b .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cefa );return _cefa ;};_cfd :=_ea .TraceToDirectObject (_cbf .Params [0]);_eea ,_efd :=_ea .GetStringBytes (_cfd );
if !_efd {_b .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_cbf );return _ea .ErrTypeError ;};_cab .nextLine ();return _cab .showText (_cfd ,_eea ,_bec );
case "\u0022":if _fbg ,_bdc :=_cab .checkOp (_cbf ,3,true );!_fbg {_b .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bdc );return _bdc ;};_cebe ,_fbf ,_aabc :=_begbe (_cbf .Params [:2]);if _aabc !=nil {return _aabc ;
};_cfeb :=_ea .TraceToDirectObject (_cbf .Params [2]);_fec ,_cebd :=_ea .GetStringBytes (_cfeb );if !_cebd {_b .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_cbf );
return _ea .ErrTypeError ;};_cab .setCharSpacing (_cebe );_cab .setWordSpacing (_fbf );_cab .nextLine ();return _cab .showText (_cfeb ,_fec ,_bec );case "\u0054\u004c":_gge ,_dfg :=_cdb (_cbf );if _dfg !=nil {_b .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dfg );
return _dfg ;};_cab .setTextLeading (_gge );case "\u0054\u0063":_aeeg ,_bebg :=_cdb (_cbf );if _bebg !=nil {_b .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bebg );return _bebg ;};_cab .setCharSpacing (_aeeg );
case "\u0054\u0066":if _face ,_fgb :=_cab .checkOp (_cbf ,2,true );!_face {_b .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fgb );return _fgb ;};_fbd ,_eaeb :=_ea .GetNameVal (_cbf .Params [0]);if !_eaeb {_b .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_cbf );
return _ea .ErrTypeError ;};_ecfa ,_dff :=_ea .GetNumberAsFloat (_cbf .Params [1]);if !_eaeb {_b .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cbf ,_dff );
return _dff ;};_dff =_cab .setFont (_fbd ,_ecfa );_cab ._cbde =_ae .Is (_dff ,_ea .ErrNotSupported );if _dff !=nil &&!_cab ._cbde {return _dff ;};case "\u0054\u006d":if _aae ,_cag :=_cab .checkOp (_cbf ,6,true );!_aae {_b .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cag );
return _cag ;};_ebdf ,_cgdc :=_ea .GetNumbersAsFloat (_cbf .Params );if _cgdc !=nil {_b .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cgdc );return _cgdc ;};_cab .setTextMatrix (_ebdf );case "\u0054\u0072":if _fge ,_eab :=_cab .checkOp (_cbf ,1,true );
!_fge {_b .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_eab );return _eab ;};_add ,_eecf :=_ea .GetIntVal (_cbf .Params [0]);if !_eecf {_b .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_cbf );
return _ea .ErrTypeError ;};_cab .setTextRenderMode (_add );case "\u0054\u0073":if _fef ,_cfc :=_cab .checkOp (_cbf ,1,true );!_fef {_b .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cfc );return _cfc ;
};_dad ,_gfc :=_ea .GetNumberAsFloat (_cbf .Params [0]);if _gfc !=nil {_b .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gfc );return _gfc ;};_cab .setTextRise (_dad );case "\u0054\u0077":if _dce ,_bfe :=_cab .checkOp (_cbf ,1,true );
!_dce {_b .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bfe );return _bfe ;};_aedg ,_bfb :=_ea .GetNumberAsFloat (_cbf .Params [0]);if _bfb !=nil {_b .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bfb );
return _bfb ;};_cab .setWordSpacing (_aedg );case "\u0054\u007a":if _adf ,_ggd :=_cab .checkOp (_cbf ,1,true );!_adf {_b .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ggd );return _ggd ;};_ggeg ,_aac :=_ea .GetNumberAsFloat (_cbf .Params [0]);
if _aac !=nil {_b .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aac );return _aac ;};_cab .setHorizScaling (_ggeg );case "\u0063\u006d":_cgd ._dfgb =_eaec .CTM ;if _cgd ._dfgb .Singular (){_egcf :=_gab .IdentityMatrix ().Translate (_cgd ._dfgb .Translation ());
_b .Log .Debug ("S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s",_cgd ._dfgb ,_egcf );_cgd ._dfgb =_egcf ;};if _bdaae {_b .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_cgd ._dfgb );};case "\u006d":if len (_cbf .Params )!=2{_b .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_gc );
return nil ;};_cgc ,_gaaf :=_ea .GetNumbersAsFloat (_cbf .Params );if _gaaf !=nil {return _gaaf ;};_cgd .moveTo (_cgc [0],_cgc [1]);case "\u006c":if len (_cbf .Params )!=2{_b .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_gc );
return nil ;};_bdea ,_eag :=_ea .GetNumbersAsFloat (_cbf .Params );if _eag !=nil {return _eag ;};_cgd .lineTo (_bdea [0],_bdea [1]);case "\u0063":if len (_cbf .Params )!=6{return _gc ;};_ecd ,_gabd :=_ea .GetNumbersAsFloat (_cbf .Params );if _gabd !=nil {return _gabd ;
};_b .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_ecd );_cgd .cubicTo (_ecd [0],_ecd [1],_ecd [2],_ecd [3],_ecd [4],_ecd [5]);case "\u0076","\u0079":if len (_cbf .Params )!=4{return _gc ;
};_cgca ,_fde :=_ea .GetNumbersAsFloat (_cbf .Params );if _fde !=nil {return _fde ;};_b .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_cgca );_cgd .quadraticTo (_cgca [0],_cgca [1],_cgca [2],_cgca [3]);
case "\u0068":_cgd .closePath ();case "\u0072\u0065":if len (_cbf .Params )!=4{return _gc ;};_deba ,_cga :=_ea .GetNumbersAsFloat (_cbf .Params );if _cga !=nil {return _cga ;};_cgd .drawRectangle (_deba [0],_deba [1],_deba [2],_deba [3]);_cgd .closePath ();
case "\u0053":_cgd .stroke (&_dbc ._bgac );_cgd .clearPath ();case "\u0073":_cgd .closePath ();_cgd .stroke (&_dbc ._bgac );_cgd .clearPath ();case "\u0046":_cgd .fill (&_dbc ._ffb );_cgd .clearPath ();case "\u0066","\u0066\u002a":_cgd .closePath ();_cgd .fill (&_dbc ._ffb );
_cgd .clearPath ();case "\u0042","\u0042\u002a":_cgd .fill (&_dbc ._ffb );_cgd .stroke (&_dbc ._bgac );_cgd .clearPath ();case "\u0062","\u0062\u002a":_cgd .closePath ();_cgd .fill (&_dbc ._ffb );_cgd .stroke (&_dbc ._bgac );_cgd .clearPath ();case "\u006e":_cgd .clearPath ();
case "\u0044\u006f":if len (_cbf .Params )==0{_b .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_cbf .Params );
return _ea .ErrRangeError ;};_dgf ,_bece :=_ea .GetName (_cbf .Params [0]);if !_bece {_b .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_cbf .Params [0]);
return _ea .ErrTypeError ;};_ ,_agb :=_acb .GetXObjectByName (*_dgf );if _agb !=_bg .XObjectTypeForm {break ;};_beca ,_bece :=_ffe ._gbfd [_dgf .String ()];if !_bece {_cfgf ,_ade :=_acb .GetXObjectFormByName (*_dgf );if _ade !=nil {_b .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_ade );
return _ade ;};_cde ,_ade :=_cfgf .GetContentStream ();if _ade !=nil {_b .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_ade );return _ade ;};_fbbb :=_cfgf .Resources ;if _fbbb ==nil {_fbbb =_acb ;};_ggg :=_eaec .CTM ;if _ceg ,_cea :=_ea .GetArray (_cfgf .Matrix );
_cea {_dge ,_dde :=_ceg .GetAsFloat64Slice ();if _dde !=nil {return _dde ;};if len (_dge )!=6{return _gc ;};_bgab :=_gab .NewMatrix (_dge [0],_dge [1],_dge [2],_dge [3],_dge [4],_dge [5]);_ggg =_eaec .CTM .Mult (_bgab );};_bgdd ,_gbcc ,_cecc ,_ade :=_ffe .extractPageText (string (_cde ),_fbbb ,_fac .Mult (_ggg ),_ece +1);
if _ade !=nil {_b .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_ade );return _ade ;};_beca =textResult {*_bgdd ,_gbcc ,_cecc };_ffe ._gbfd [_dgf .String ()]=_beca ;};_cgd ._dfgb =_eaec .CTM ;if _bdaae {_b .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_cgd ._dfgb );
};_dbc ._ccf =append (_dbc ._ccf ,_beca ._ddd ._ccf ...);_dbc ._bgac =append (_dbc ._bgac ,_beca ._ddd ._bgac ...);_dbc ._ffb =append (_dbc ._ffb ,_beca ._ddd ._ffb ...);_gccg ._feab +=_beca ._aede ;_gccg ._fefg +=_beca ._bega ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_cab ._agbf .ColorspaceNonStroking =_eaec .ColorspaceNonStroking ;
_cab ._agbf .ColorNonStroking =_eaec .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_cab ._agbf .ColorspaceStroking =_eaec .ColorspaceStroking ;_cab ._agbf .ColorStroking =_eaec .ColorStroking ;
};return nil ;});_bcbg =_dbbf .Process (_cge );return _dbc ,_gccg ._feab ,_gccg ._fefg ,_bcbg ;};func _cbba (_dcag *textLine )float64 {return _dcag ._aafd [0].Llx };func _effad (_efad map[float64 ]map[float64 ]gridTile )[]float64 {_feee :=make ([]float64 ,0,len (_efad ));
_fbbefg :=make (map[float64 ]struct{},len (_efad ));for _ ,_becee :=range _efad {for _aadc :=range _becee {if _ ,_gdac :=_fbbefg [_aadc ];_gdac {continue ;};_feee =append (_feee ,_aadc );_fbbefg [_aadc ]=struct{}{};};};_df .Float64s (_feee );return _feee ;
};func (_gabb *shapesState )newSubPath (){_gabb .clearPath ();if _bdaae {_b .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_gabb );};};
2023-05-29 17:26:33 +00:00
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
2023-07-28 12:14:31 +00:00
type ImageMark struct{Image *_bg .Image ;
2023-05-29 17:26:33 +00:00
// Dimensions of the image as displayed in the PDF.
Width float64 ;Height float64 ;
// Position of the image in PDF coordinates (lower left corner).
X float64 ;Y float64 ;
// Angle in degrees, if rotated.
2023-07-28 12:14:31 +00:00
Angle float64 ;};func _bffg (_dcce ,_bbea ,_gggc float64 )rulingKind {if _dcce >=_gggc &&_ggdg (_bbea ,_dcce ){return _cefaa ;};if _bbea >=_gggc &&_ggdg (_dcce ,_bbea ){return _acgee ;};return _bgbdg ;};func _befef (_edfbc []*textWord ,_gbfdb *textWord )[]*textWord {for _cdaa ,_fbdb :=range _edfbc {if _fbdb ==_gbfdb {return _dddc (_edfbc ,_cdaa );
};};_b .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_gbfdb );
return nil ;};func _ggeb (_bfca []*textLine ,_edeb map[float64 ][]*textLine )[]*list {_cdce :=_cecd (_edeb );_bfgd :=[]*list {};if len (_cdce )==0{return _bfgd ;};_baag :=_cdce [0];_ccge :=1;_eggfa :=_edeb [_baag ];for _aaccf ,_gff :=range _eggfa {var _cfeg float64 ;
_cgdgb :=[]*list {};_bgge :=_gff ._cbbd ;_gagb :=-1.0;if _aaccf < len (_eggfa )-1{_gagb =_eggfa [_aaccf +1]._cbbd ;};if _ccge < len (_cdce ){_cgdgb =_gdgf (_bfca ,_edeb ,_cdce ,_ccge ,_bgge ,_gagb );};_cfeg =_gagb ;if len (_cgdgb )> 0{_agdg :=_cgdgb [0];
if len (_agdg ._ecdee )> 0{_cfeg =_agdg ._ecdee [0]._cbbd ;};};_ccb :=[]*textLine {_gff };_efec :=_cbcbe (_gff ,_bfca ,_cdce ,_bgge ,_cfeg );_ccb =append (_ccb ,_efec ...);_dcba :=_facb (_ccb ,"\u0062\u0075\u006c\u006c\u0065\u0074",_cgdgb );_dcba ._bfcg =_dcdgd (_ccb ,"");
_bfgd =append (_bfgd ,_dcba );};return _bfgd ;};
2022-07-13 21:28:43 +00:00
2023-06-30 13:19:48 +00:00
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
2023-07-28 12:14:31 +00:00
type TextTable struct{_bg .PdfRectangle ;W ,H int ;Cells [][]TableCell ;};func (_fbff *subpath )add (_aaba ..._gab .Point ){_fbff ._fbcgf =append (_fbff ._fbcgf ,_aaba ...)};func (_baba rulingList )vertsHorzs ()(rulingList ,rulingList ){var _gbcb ,_gede rulingList ;
for _ ,_bfaa :=range _baba {switch _bfaa ._eabdg {case _acgee :_gbcb =append (_gbcb ,_bfaa );case _cefaa :_gede =append (_gede ,_bfaa );};};return _gbcb ,_gede ;};const (_fgab markKind =iota ;_bafga ;_ceag ;_abgg ;);func _dfcggd (_fdcbd string ,_agfb int )string {if len (_fdcbd )< _agfb {return _fdcbd ;
};return _fdcbd [:_agfb ];};func _ceac (_beaeg map[float64 ]gridTile )[]float64 {_eegc :=make ([]float64 ,0,len (_beaeg ));for _gacg :=range _beaeg {_eegc =append (_eegc ,_gacg );};_df .Float64s (_eegc );return _eegc ;};func (_fee *imageExtractContext )extractFormImages (_cbg *_ea .PdfObjectName ,_cef _fb .GraphicsState ,_eeed *_bg .PdfPageResources )error {_bcda ,_bgbd :=_eeed .GetXObjectFormByName (*_cbg );
if _bgbd !=nil {return _bgbd ;};if _bcda ==nil {return nil ;};_bgc ,_bgbd :=_bcda .GetContentStream ();if _bgbd !=nil {return _bgbd ;};_fbcf :=_bcda .Resources ;if _fbcf ==nil {_fbcf =_eeed ;};_bgbd =_fee .extractContentStreamImages (string (_bgc ),_fbcf );
if _bgbd !=nil {return _bgbd ;};_fee ._ddc ++;return nil ;};func _gceee (_efcb *PageText )error {_dfff :=_gb .GetLicenseKey ();if _dfff !=nil &&_dfff .IsLicensed ()||_cd {return nil ;};_ce .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");
_ce .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");
return _d .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};func (_bacbb rulingList )isActualGrid ()(rulingList ,bool ){_fdfa ,_aecgf :=_bacbb .augmentGrid ();
if !(len (_fdfa )>=_gfda +1&&len (_aecgf )>=_cafad +1){if _bccgb {_b .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_fdfa ),len (_aecgf ),_gfda +1,_cafad +1);
};return nil ,false ;};if _bccgb {_b .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_bacbb ,len (_fdfa )>=2,len (_aecgf )>=2,len (_fdfa )>=2&&len (_aecgf )>=2);
for _edce ,_aeac :=range _bacbb {_ce .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_edce ,_aeac );};};if _caec {_bffe ,_ggdf :=_fdfa [0],_fdfa [len (_fdfa )-1];_edec ,_ddgb :=_aecgf [0],_aecgf [len (_aecgf )-1];if !(_eabga (_bffe ._befee -_edec ._agbc )&&_eabga (_ggdf ._befee -_edec ._gffgd )&&_eabga (_edec ._befee -_bffe ._gffgd )&&_eabga (_ddgb ._befee -_bffe ._agbc )){if _bccgb {_b .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_bffe ,_ggdf ,_edec ,_ddgb );
};return nil ,false ;};}else {if !_fdfa .aligned (){if _dbdb {_b .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_fdfa ));
};return nil ,false ;};if !_aecgf .aligned (){if _bccgb {_b .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_aecgf ));
};return nil ,false ;};};_effc :=append (_fdfa ,_aecgf ...);return _effc ,true ;};
2023-01-08 22:34:27 +00:00
2023-07-28 12:14:31 +00:00
// PageFonts represents extracted fonts on a PDF page.
type PageFonts struct{Fonts []Font ;};func (_faec *shapesState )lineTo (_dfecb ,_dcde float64 ){if _bdaae {_b .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_dfecb ,_dcde ,_faec .devicePoint (_dfecb ,_dcde ));
};_faec .addPoint (_dfecb ,_dcde );};func (_dcfdb *textTable )reduceTiling (_cdcg gridTiling ,_bbdfc float64 )*textTable {_dedcd :=make ([]int ,0,_dcfdb ._gcbge );_adgc :=make ([]int ,0,_dcfdb ._ddfc );_ggfc :=_cdcg ._eaafd ;_ccdg :=_cdcg ._dade ;for _cfebe :=0;
_cfebe < _dcfdb ._gcbge ;_cfebe ++{_ecgf :=_cfebe > 0&&_ef .Abs (_ccdg [_cfebe -1]-_ccdg [_cfebe ])< _bbdfc &&_dcfdb .emptyCompositeRow (_cfebe );if !_ecgf {_dedcd =append (_dedcd ,_cfebe );};};for _ccagd :=0;_ccagd < _dcfdb ._ddfc ;_ccagd ++{_eabde :=_ccagd < _dcfdb ._ddfc -1&&_ef .Abs (_ggfc [_ccagd +1]-_ggfc [_ccagd ])< _bbdfc &&_dcfdb .emptyCompositeColumn (_ccagd );
if !_eabde {_adgc =append (_adgc ,_ccagd );};};if len (_dedcd )==_dcfdb ._gcbge &&len (_adgc )==_dcfdb ._ddfc {return _dcfdb ;};_fafb :=textTable {_edgac :_dcfdb ._edgac ,_ddfc :len (_adgc ),_gcbge :len (_dedcd ),_dadcc :make (map[uint64 ]compositeCell ,len (_adgc )*len (_dedcd ))};
if _cgafg {_b .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_dcfdb ._ddfc ,_dcfdb ._gcbge ,len (_adgc ),len (_dedcd ));_b .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_adgc );
_b .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_dedcd );};for _bcga ,_bagae :=range _dedcd {for _ggbdf ,_eebda :=range _adgc {_afbc ,_bcgc :=_dcfdb .getComposite (_eebda ,_bagae );if len (_afbc )==0{continue ;
};if _cgafg {_ce .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_ggbdf ,_bcga ,_eebda ,_bagae ,_dfcggd (_afbc .merge ().text (),50));};_fafb .putComposite (_ggbdf ,_bcga ,_afbc ,_bcgc );
};};return &_fafb ;};func _fbabg (_ebdb []*textMark ,_fggc _bg .PdfRectangle )*textWord {_cffde :=_ebdb [0].PdfRectangle ;_egfee :=_ebdb [0]._gceb ;for _ ,_gfdcf :=range _ebdb [1:]{_cffde =_egbga (_cffde ,_gfdcf .PdfRectangle );if _gfdcf ._gceb > _egfee {_egfee =_gfdcf ._gceb ;
};};return &textWord {PdfRectangle :_cffde ,_dggf :_ebdb ,_baebb :_fggc .Ury -_cffde .Lly ,_ebgb :_egfee };};func (_acegag *textTable )growTable (){_ecbd :=func (_abbb paraList ){_acegag ._gcbge ++;for _bcecb :=0;_bcecb < _acegag ._ddfc ;_bcecb ++{_gabe :=_abbb [_bcecb ];
_acegag .put (_bcecb ,_acegag ._gcbge -1,_gabe );};};_gffd :=func (_gdfe paraList ){_acegag ._ddfc ++;for _acgbf :=0;_acgbf < _acegag ._gcbge ;_acgbf ++{_acdb :=_gdfe [_acgbf ];_acegag .put (_acegag ._ddfc -1,_acgbf ,_acdb );};};if _efed {_acegag .log ("\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce");
};for _fagbg :=0;;_fagbg ++{_fccb :=false ;_gdcd :=_acegag .getDown ();_daf :=_acegag .getRight ();if _efed {_ce .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fagbg ,_acegag );_ce .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a",_gdcd );
_ce .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a",_daf );};if _gdcd !=nil &&_daf !=nil {_bcea :=_gdcd [len (_gdcd )-1];if !_bcea .taken ()&&_bcea ==_daf [len (_daf )-1]{_ecbd (_gdcd );if _daf =_acegag .getRight ();
_daf !=nil {_gffd (_daf );_acegag .put (_acegag ._ddfc -1,_acegag ._gcbge -1,_bcea );};_fccb =true ;};};if !_fccb &&_gdcd !=nil {_ecbd (_gdcd );_fccb =true ;};if !_fccb &&_daf !=nil {_gffd (_daf );_fccb =true ;};if !_fccb {break ;};};};func _dgga (_cdbge []*textLine )[]*textLine {_edade :=[]*textLine {};
for _ ,_fced :=range _cdbge {_cead :=_fced .text ();_bdg :=_bgaf .Find ([]byte (_cead ));if _bdg !=nil {_edade =append (_edade ,_fced );};};return _edade ;};func (_aaed *textLine )toTextMarks (_agbfc *int )[]TextMark {var _fdebf []TextMark ;for _ ,_ecadc :=range _aaed ._aafd {if _ecadc ._gagaf {_fdebf =_gdbc (_fdebf ,_agbfc ,"\u0020");
};_edefd :=_ecadc .toTextMarks (_agbfc );_fdebf =append (_fdebf ,_edefd ...);};return _fdebf ;};func (_ageg paraList )llyRange (_bbgg []int ,_eaeca ,_beae float64 )[]int {_ffdg :=len (_ageg );if _beae < _ageg [_bbgg [0]].Lly ||_eaeca > _ageg [_bbgg [_ffdg -1]].Lly {return nil ;
};_eeac :=_df .Search (_ffdg ,func (_gfcg int )bool {return _ageg [_bbgg [_gfcg ]].Lly >=_eaeca });_dccc :=_df .Search (_ffdg ,func (_bafe int )bool {return _ageg [_bbgg [_bafe ]].Lly > _beae });return _bbgg [_eeac :_dccc ];};func (_aacg *wordBag )minDepth ()float64 {return _aacg ._dgec -(_aacg .Ury -_aacg ._ecdf )};
func (_gadbd lineRuling )asRuling ()(*ruling ,bool ){_dceba :=ruling {_eabdg :_gadbd ._cbfb ,Color :_gadbd .Color ,_gggfe :_bafga };switch _gadbd ._cbfb {case _acgee :_dceba ._befee =_gadbd .xMean ();_dceba ._agbc =_ef .Min (_gadbd ._egaf .Y ,_gadbd ._eaebf .Y );
_dceba ._gffgd =_ef .Max (_gadbd ._egaf .Y ,_gadbd ._eaebf .Y );case _cefaa :_dceba ._befee =_gadbd .yMean ();_dceba ._agbc =_ef .Min (_gadbd ._egaf .X ,_gadbd ._eaebf .X );_dceba ._gffgd =_ef .Max (_gadbd ._egaf .X ,_gadbd ._eaebf .X );default:_b .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_gadbd ._cbfb );
return nil ,false ;};return &_dceba ,true ;};func (_fff *wordBag )firstReadingIndex (_ggde int )int {_fbca :=_fff .firstWord (_ggde )._ebgb ;_cbce :=float64 (_ggde +1)*_fefe ;_cbcb :=_cbce +_dggb *_fbca ;_abcg :=_ggde ;for _ ,_fbbed :=range _fff .depthBand (_cbce ,_cbcb ){if _aea (_fff .firstWord (_fbbed ),_fff .firstWord (_abcg ))< 0{_abcg =_fbbed ;
};};return _abcg ;};
2023-01-08 22:34:27 +00:00
2023-07-28 12:14:31 +00:00
// ExtractFonts returns all font information from the page extractor, including
// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more.
//
// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document.
// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries.
//
// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range.
func (_gbe *Extractor )ExtractFonts (previousPageFonts *PageFonts )(*PageFonts ,error ){_bgb :=PageFonts {};_eaa :=_bgb .extractPageResourcesToFont (_gbe ._gga );if _eaa !=nil {return nil ,_eaa ;};if previousPageFonts !=nil {for _ ,_eb :=range previousPageFonts .Fonts {if !_aee (_bgb .Fonts ,_eb .FontName ){_bgb .Fonts =append (_bgb .Fonts ,_eb );
};};};return &PageFonts {Fonts :_bgb .Fonts },nil ;};
2023-01-08 22:34:27 +00:00
2023-07-28 12:14:31 +00:00
// TableInfo gets table information of the textmark `tm`.
func (_bgcc *TextMark )TableInfo ()(*TextTable ,[][]int ){if !_bgcc ._fgfd {return nil ,nil ;};_eead :=_bgcc ._ggea ;_bdae :=_eead .getCellInfo (*_bgcc );return _eead ,_bdae ;};
2023-01-08 22:34:27 +00:00
2023-07-28 12:14:31 +00:00
// PageText represents the layout of text on a device page.
type PageText struct{_ccf []*textMark ;_bdf string ;_fccf []TextMark ;_ecege []TextTable ;_ebbd _bg .PdfRectangle ;_bgac []pathSection ;_ffb []pathSection ;_cbc *_ea .PdfObject ;_fabg _ea .PdfObject ;_gagg *_fb .ContentStreamOperations ;_fca PageTextOptions ;
};
2023-05-29 17:26:33 +00:00
2023-06-30 13:19:48 +00:00
// TableCell is a cell in a TextTable.
2023-07-28 12:14:31 +00:00
type TableCell struct{_bg .PdfRectangle ;
2023-05-29 17:26:33 +00:00
2023-06-30 13:19:48 +00:00
// Text is the extracted text.
Text string ;
2023-01-08 22:34:27 +00:00
2023-06-30 13:19:48 +00:00
// Marks returns the TextMarks corresponding to the text in Text.
2023-07-28 12:14:31 +00:00
Marks TextMarkArray ;};func _acfc (_becd func (*wordBag ,*textWord ,float64 )bool ,_aagb float64 )func (*wordBag ,*textWord )bool {return func (_cadd *wordBag ,_ddccb *textWord )bool {return _becd (_cadd ,_ddccb ,_aagb )};};type lists []*list ;func (_gfdc *PageText )getParagraphs ()paraList {var _ccc rulingList ;
if _bbdab {_adg :=_egfd (_gfdc ._bgac );_ccc =append (_ccc ,_adg ...);};if _bcac {_bgad :=_ggegd (_gfdc ._ffb );_ccc =append (_ccc ,_bgad ...);};_ccc ,_ged :=_ccc .toTilings ();var _ddf paraList ;_fedf :=len (_gfdc ._ccf );for _adbg :=0;_adbg < 360&&_fedf > 0;
_adbg +=90{_bce :=make ([]*textMark ,0,len (_gfdc ._ccf )-_fedf );for _ ,_aagd :=range _gfdc ._ccf {if _aagd ._acec ==_adbg {_bce =append (_bce ,_aagd );};};if len (_bce )> 0{_fgde :=_fgad (_bce ,_gfdc ._ebbd ,_ccc ,_ged ,_gfdc ._fca ._bbg );_ddf =append (_ddf ,_fgde ...);
_fedf -=len (_bce );};};return _ddf ;};
2022-12-15 21:59:56 +00:00
2023-07-28 12:14:31 +00:00
// GetContentStreamOps returns the contentStreamOps field of `pt`.
func (_acc *PageText )GetContentStreamOps ()*_fb .ContentStreamOperations {return _acc ._gagg };func (_dfac *structElement )parseStructElement (_gcgb _ea .PdfObject ){_edca ,_faabf :=_ea .GetDict (_gcgb );if !_faabf {_b .Log .Debug ("\u0070\u0061\u0072\u0073\u0065\u0053\u0074\u0072u\u0063\u0074\u0045le\u006d\u0065\u006e\u0074\u003a\u0020d\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006f\u0062\u006a\u0065\u0063t\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075n\u0064\u002e");
return ;};_fegdc :=_edca .Get ("\u0053");_eggb :=_edca .Get ("\u0050\u0067");_ddcba :="";if _fegdc !=nil {_ddcba =_fegdc .String ();};_efgd :=_edca .Get ("\u004b");_dfac ._bfeg =_ddcba ;_dfac ._fgagg =_eggb ;switch _dgfa :=_efgd .(type ){case *_ea .PdfObjectInteger :_dfac ._bfeg =_ddcba ;
_dfac ._aeae =int64 (*_dgfa );_dfac ._fgagg =_eggb ;case *_ea .PdfObjectReference :_dgagc :=*_ea .MakeArray (_dgfa );var _gaae int64 =-1;_dfac ._aeae =_gaae ;if _dgagc .Len ()==1{_fdfc :=_dgagc .Elements ()[0];_ccfcf ,_aaec :=_fdfc .(*_ea .PdfObjectInteger );
if _aaec {_gaae =int64 (*_ccfcf );_dfac ._aeae =_gaae ;_dfac ._bfeg =_ddcba ;_dfac ._fgagg =_eggb ;return ;};};_cecag :=[]structElement {};for _ ,_egbbf :=range _dgagc .Elements (){_fafg ,_bfdg :=_egbbf .(*_ea .PdfObjectInteger );if _bfdg {_gaae =int64 (*_fafg );
_dfac ._aeae =_gaae ;_dfac ._bfeg =_ddcba ;}else {_acfb :=&structElement {};_acfb .parseStructElement (_egbbf );_cecag =append (_cecag ,*_acfb );};_gaae =-1;};_dfac ._efce =_cecag ;case *_ea .PdfObjectArray :_gagac :=_efgd .(*_ea .PdfObjectArray );var _caag int64 =-1;
_dfac ._aeae =_caag ;if _gagac .Len ()==1{_dgb :=_gagac .Elements ()[0];_abad ,_cfcbg :=_dgb .(*_ea .PdfObjectInteger );if _cfcbg {_caag =int64 (*_abad );_dfac ._aeae =_caag ;_dfac ._bfeg =_ddcba ;_dfac ._fgagg =_eggb ;return ;};};_gbag :=[]structElement {};
for _ ,_fbfg :=range _gagac .Elements (){_dfb ,_ebc :=_fbfg .(*_ea .PdfObjectInteger );if _ebc {_caag =int64 (*_dfb );_dfac ._aeae =_caag ;_dfac ._bfeg =_ddcba ;_dfac ._fgagg =_eggb ;}else {_aec :=&structElement {};_aec .parseStructElement (_fbfg );_gbag =append (_gbag ,*_aec );
};_caag =-1;};_dfac ._efce =_gbag ;};};func (_ed *PageFonts )extractPageResourcesToFont (_af *_bg .PdfPageResources )error {_bf ,_fag :=_ea .GetDict (_af .Font );if !_fag {return _d .New (_deg );};for _ ,_ecb :=range _bf .Keys (){var (_bff =true ;_cb []byte ;
_ede string ;);_da ,_gaa :=_af .GetFontByName (_ecb );if !_gaa {return _d .New (_egf );};_fg ,_dd :=_bg .NewPdfFontFromPdfObject (_da );if _dd !=nil {return _dd ;};_cee :=_fg .FontDescriptor ();_cc :=_fg .FontDescriptor ().FontName .String ();_gcb :=_fg .Subtype ();
if _aee (_ed .Fonts ,_cc ){continue ;};if len (_fg .ToUnicode ())==0{_bff =false ;};if _cee .FontFile !=nil {if _faa ,_bga :=_ea .GetStream (_cee .FontFile );_bga {_cb ,_dd =_ea .DecodeStream (_faa );if _dd !=nil {return _dd ;};_ede =_cc +"\u002e\u0070\u0066\u0062";
};}else if _cee .FontFile2 !=nil {if _dc ,_faab :=_ea .GetStream (_cee .FontFile2 );_faab {_cb ,_dd =_ea .DecodeStream (_dc );if _dd !=nil {return _dd ;};_ede =_cc +"\u002e\u0074\u0074\u0066";};}else if _cee .FontFile3 !=nil {if _afe ,_aed :=_ea .GetStream (_cee .FontFile3 );
_aed {_cb ,_dd =_ea .DecodeStream (_afe );if _dd !=nil {return _dd ;};_ede =_cc +"\u002e\u0063\u0066\u0066";};};if len (_ede )< 1{_b .Log .Debug (_cf );};_ebd :=Font {FontName :_cc ,PdfFont :_fg ,IsCID :_fg .IsCID (),IsSimple :_fg .IsSimple (),ToUnicode :_bff ,FontType :_gcb ,FontData :_cb ,FontFileName :_ede ,FontDescriptor :_cee };
_ed .Fonts =append (_ed .Fonts ,_ebd );};return nil ;};func _ffff (_aaef *wordBag ,_dcbb int )*textLine {_ggb :=_aaef .firstWord (_dcbb );_dfc :=textLine {PdfRectangle :_ggb .PdfRectangle ,_bfbb :_ggb ._ebgb ,_cbbd :_ggb ._baebb };_dfc .pullWord (_aaef ,_ggb ,_dcbb );
return &_dfc ;};func (_aecgb rulingList )primMinMax ()(float64 ,float64 ){_dcca ,_ggeaa :=_aecgb [0]._befee ,_aecgb [0]._befee ;for _ ,_eadf :=range _aecgb [1:]{if _eadf ._befee < _dcca {_dcca =_eadf ._befee ;}else if _eadf ._befee > _ggeaa {_ggeaa =_eadf ._befee ;
};};return _dcca ,_ggeaa ;};func (_cadgg rulingList )snapToGroupsDirection ()rulingList {_cadgg .sortStrict ();_afad :=make (map[*ruling ]rulingList ,len (_cadgg ));_cede :=_cadgg [0];_gcgg :=func (_egfc *ruling ){_cede =_egfc ;_afad [_cede ]=rulingList {_egfc }};
_gcgg (_cadgg [0]);for _ ,_agbe :=range _cadgg [1:]{if _agbe ._befee < _cede ._befee -_efea {_b .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_cede ,_agbe );
};if _agbe ._befee > _cede ._befee +_bddeb {_gcgg (_agbe );}else {_afad [_cede ]=append (_afad [_cede ],_agbe );};};_ebbff :=make (map[*ruling ]float64 ,len (_afad ));_cecdb :=make (map[*ruling ]*ruling ,len (_cadgg ));for _fecdd ,_fecbc :=range _afad {_ebbff [_fecdd ]=_fecbc .mergePrimary ();
for _ ,_dgbb :=range _fecbc {_cecdb [_dgbb ]=_fecdd ;};};for _ ,_bfced :=range _cadgg {_bfced ._befee =_ebbff [_cecdb [_bfced ]];};_gfbce :=make (rulingList ,0,len (_cadgg ));for _ ,_cadga :=range _afad {_gege :=_cadga .splitSec ();for _eeba ,_deffe :=range _gege {_aagee :=_deffe .merge ();
if len (_gfbce )> 0{_edfgb :=_gfbce [len (_gfbce )-1];if _edfgb .alignsPrimary (_aagee )&&_edfgb .alignsSec (_aagee ){_b .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_eeba ,_edfgb ,_aagee );
continue ;};};_gfbce =append (_gfbce ,_aagee );};};_gfbce .sortStrict ();return _gfbce ;};
2022-10-27 19:04:58 +00:00
2023-07-28 12:14:31 +00:00
// Elements returns the TextMarks in `ma`.
func (_cdbc *TextMarkArray )Elements ()[]TextMark {return _cdbc ._bca };
2023-05-29 17:26:33 +00:00
2023-07-28 12:14:31 +00:00
// NewWithOptions an Extractor instance for extracting content from the input PDF page with options.
func NewWithOptions (page *_bg .PdfPage ,options *Options )(*Extractor ,error ){const _be ="\u0065x\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077W\u0069\u0074\u0068\u004f\u0070\u0074\u0069\u006f\u006e\u0073";_ff ,_db :=page .GetAllContentStreams ();
if _db !=nil {return nil ,_db ;};_dga ,_fcc :=page .GetStructTreeRoot ();if !_fcc {_b .Log .Info ("T\u0068\u0065\u0020\u0070\u0064\u0066\u0020\u0064\u006f\u0063\u0075\u006d\u0065\u006e\u0074\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020\u0074\u0061\u0067g\u0065d\u002e\u0020\u0053\u0074r\u0075\u0063t\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e\u0027\u0074\u0020\u0065\u0078\u0069\u0073\u0074\u002e");
};_dbb :=page .GetContainingPdfObject ();_ceb ,_db :=page .GetMediaBox ();if _db !=nil {return nil ,_ce .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_db );
};_dbd :=&Extractor {_bc :_ff ,_gga :page .Resources ,_de :*_ceb ,_fc :page .CropBox ,_eg :map[string ]fontEntry {},_gbfd :map[string ]textResult {},_ad :options ,_fbb :_dga ,_ca :_dbb };if _dbd ._de .Llx > _dbd ._de .Urx {_b .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_dbd ._de );
_dbd ._de .Llx ,_dbd ._de .Urx =_dbd ._de .Urx ,_dbd ._de .Llx ;};if _dbd ._de .Lly > _dbd ._de .Ury {_b .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_dbd ._de );
_dbd ._de .Lly ,_dbd ._de .Ury =_dbd ._de .Ury ,_dbd ._de .Lly ;};_gb .TrackUse (_be );return _dbd ,nil ;};type textObject struct{_dcdg *Extractor ;_edef *_bg .PdfPageResources ;_agbf _fb .GraphicsState ;_gacd *textState ;_bffa *stateStack ;_fda _gab .Matrix ;
_cfec _gab .Matrix ;_fecd []*textMark ;_cbde bool ;};
2023-06-30 13:19:48 +00:00
2023-07-28 12:14:31 +00:00
// String returns a string describing the current state of the textState stack.
func (_acdd *stateStack )String ()string {_fdc :=[]string {_ce .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_acdd ))};for _cfdd ,_acf :=range *_acdd {_bada :="\u003c\u006e\u0069l\u003e";
if _acf !=nil {_bada =_acf .String ();};_fdc =append (_fdc ,_ce .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_cfdd ,_bada ));};return _c .Join (_fdc ,"\u000a");};
2023-05-29 17:26:33 +00:00
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
2023-07-28 12:14:31 +00:00
func (_ggf *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_dgaa :=&imageExtractContext {_fae :options };_bcc :=_dgaa .extractContentStreamImages (_ggf ._bc ,_ggf ._gga );if _bcc !=nil {return nil ,_bcc ;};return &PageImages {Images :_dgaa ._dae },nil ;
};func (_ddeg *textObject )renderText (_dbg _ea .PdfObject ,_cfb []byte ,_afd int )error {if _ddeg ._cbde {_b .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");
return nil ;};_bccce :=_ddeg .getCurrentFont ();_cbfa :=_bccce .BytesToCharcodes (_cfb );_ffcac ,_dbe ,_faga :=_bccce .CharcodesToStrings (_cbfa );if _faga > 0{_b .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_dbe ,_faga );
};_ddeg ._gacd ._feab +=_dbe ;_ddeg ._gacd ._fefg +=_faga ;_ggae :=_ddeg ._gacd ;_bgbe :=_ggae ._fgca ;_bdcd :=_ggae ._dgc /100.0;_ded :=_aceg ;if _bccce .Subtype ()=="\u0054\u0079\u0070e\u0033"{_ded =1;};_cagfg ,_eceg :=_bccce .GetRuneMetrics (' ');if !_eceg {_cagfg ,_eceg =_bccce .GetCharMetrics (32);
};if !_eceg {_cagfg ,_ =_bg .DefaultFont ().GetRuneMetrics (' ');};_efb :=_cagfg .Wx *_ded ;_b .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_efb ,_ffcac ,_bccce ,_bgbe );
_eecfe :=_gab .NewMatrix (_bgbe *_bdcd ,0,0,_bgbe ,0,_ggae ._fdea );if _beff {_b .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_cbfa ),_cbfa ,_ffcac );
};_b .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_cbfa ),_cbfa ,len (_ffcac ));_bdde :=_ddeg .getFillColor ();
_bbcc :=_ddeg .getStrokeColor ();for _ceca ,_dbf :=range _ffcac {_eaag :=[]rune (_dbf );if len (_eaag )==1&&_eaag [0]=='\x00'{continue ;};_dfda :=_cbfa [_ceca ];_dgag :=_ddeg ._agbf .CTM .Mult (_ddeg ._fda ).Mult (_eecfe );_adcg :=0.0;if len (_eaag )==1&&_eaag [0]==32{_adcg =_ggae ._cagc ;
};_ege ,_cefd :=_bccce .GetCharMetrics (_dfda );if !_cefd {_b .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_dfda ,_eaag ,_eaag ,_bccce );
return _ce .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_bccce .String (),_dfda );};_cgg :=_gab .Point {X :_ege .Wx *_ded ,Y :_ege .Wy *_ded };
_aafg :=_gab .Point {X :(_cgg .X *_bgbe +_adcg )*_bdcd };_ggfda :=_gab .Point {X :(_cgg .X *_bgbe +_ggae ._fdf +_adcg )*_bdcd };if _beff {_b .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_bgbe ,_ggae ._fdf ,_ggae ._cagc ,_bdcd );
_b .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_cgg ,_aafg ,_ggfda );};_aaa :=_ebba (_aafg );_fabeb :=_ebba (_ggfda );_ebf :=_ddeg ._agbf .CTM .Mult (_ddeg ._fda ).Mult (_aaa );
if _adbf {_b .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_ddeg ._agbf .CTM ,_ddeg ._fda ,_fabeb ,_bbccc (_ddeg ._agbf .CTM .Mult (_ddeg ._fda ).Mult (_fabeb )),_aaa ,_ebf ,_bbccc (_ebf ));
};_fbcg ,_bede :=_ddeg .newTextMark (_aa .ExpandLigatures (_eaag ),_dgag ,_bbccc (_ebf ),_ef .Abs (_efb *_dgag .ScalingFactorX ()),_bccce ,_ddeg ._gacd ._fdf ,_bdde ,_bbcc ,_dbg ,_ffcac ,_ceca ,_afd );if !_bede {_b .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067");
continue ;};if _bccce ==nil {_b .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _bccce .Encoder ()==nil {_b .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_bccce );
}else {if _bgabg ,_adb :=_bccce .Encoder ().CharcodeToRune (_dfda );_adb {_fbcg ._ffbg =string (_bgabg );};};_b .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_ceca ,_dfda ,_fbcg ,_dgag );
_ddeg ._fecd =append (_ddeg ._fecd ,&_fbcg );_ddeg ._fda .Concat (_fabeb );};return nil ;};func _cecd (_aeaa map[float64 ][]*textLine )[]float64 {_faefa :=[]float64 {};for _bbce :=range _aeaa {_faefa =append (_faefa ,_bbce );};_df .Float64s (_faefa );return _faefa ;
};func (_cgfg *textTable )computeBbox ()_bg .PdfRectangle {var _fdcec _bg .PdfRectangle ;_faeff :=false ;for _gebg :=0;_gebg < _cgfg ._gcbge ;_gebg ++{for _eafea :=0;_eafea < _cgfg ._ddfc ;_eafea ++{_becgb :=_cgfg .get (_eafea ,_gebg );if _becgb ==nil {continue ;
};if !_faeff {_fdcec =_becgb .PdfRectangle ;_faeff =true ;}else {_fdcec =_egbga (_fdcec ,_becgb .PdfRectangle );};};};return _fdcec ;};func (_dgdc *textObject )setHorizScaling (_egfb float64 ){if _dgdc ==nil {return ;};_dgdc ._gacd ._dgc =_egfb ;};type markKind int ;
func _cdb (_ffec *_fb .ContentStreamOperation )(float64 ,error ){if len (_ffec .Params )!=1{_bebb :=_d .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");_b .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_ffec .Operand ,1,len (_ffec .Params ),_ffec .Params );
return 0.0,_bebb ;};return _ea .GetNumberAsFloat (_ffec .Params [0]);};const (_fgcg =true ;_aaade =true ;_ccdf =true ;_gbcd =false ;_cafa =false ;_fddc =6;_fddb =3.0;_ceec =200;_ceed =true ;_adfg =true ;_bbdab =true ;_bcac =true ;_caec =false ;);func (_ggfb paraList )computeEBBoxes (){if _eebec {_b .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a");
};for _ ,_fbccg :=range _ggfb {_fbccg ._gfbgd =_fbccg .PdfRectangle ;};_degb :=_ggfb .yNeighbours (0);for _fceb ,_cfacf :=range _ggfb {_edab :=_cfacf ._gfbgd ;_gdec ,_agea :=-1.0e9,+1.0e9;for _ ,_dcee :=range _degb [_cfacf ]{_eefa :=_ggfb [_dcee ]._gfbgd ;
if _eefa .Urx < _edab .Llx {_gdec =_ef .Max (_gdec ,_eefa .Urx );}else if _edab .Urx < _eefa .Llx {_agea =_ef .Min (_agea ,_eefa .Llx );};};for _bbcb ,_dgacb :=range _ggfb {_bgegc :=_dgacb ._gfbgd ;if _fceb ==_bbcb ||_bgegc .Ury > _edab .Lly {continue ;
};if _gdec <=_bgegc .Llx &&_bgegc .Llx < _edab .Llx {_edab .Llx =_bgegc .Llx ;}else if _bgegc .Urx <=_agea &&_edab .Urx < _bgegc .Urx {_edab .Urx =_bgegc .Urx ;};};if _eebec {_ce .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_fceb ,_cfacf ._gfbgd ,_edab ,_dfcggd (_cfacf .text (),50));
};_cfacf ._gfbgd =_edab ;};if _gbcd {for _ ,_gfca :=range _ggfb {_gfca .PdfRectangle =_gfca ._gfbgd ;};};};type textTable struct{_bg .PdfRectangle ;_ddfc ,_gcbge int ;_edgac bool ;_efeac map[uint64 ]*textPara ;_dadcc map[uint64 ]compositeCell ;};func (_bcaf *wordBag )text ()string {_cegb :=_bcaf .allWords ();
_afdg :=make ([]string ,len (_cegb ));for _agff ,_bcbd :=range _cegb {_afdg [_agff ]=_bcbd ._ggaef ;};return _c .Join (_afdg ,"\u0020");};func (_gccgdc paraList )xNeighbours (_beac float64 )map[*textPara ][]int {_ecded :=make ([]event ,2*len (_gccgdc ));
if _beac ==0{for _aege ,_dddf :=range _gccgdc {_ecded [2*_aege ]=event {_dddf .Llx ,true ,_aege };_ecded [2*_aege +1]=event {_dddf .Urx ,false ,_aege };};}else {for _fdfg ,_cfced :=range _gccgdc {_ecded [2*_fdfg ]=event {_cfced .Llx -_beac *_cfced .fontsize (),true ,_fdfg };
_ecded [2*_fdfg +1]=event {_cfced .Urx +_beac *_cfced .fontsize (),false ,_fdfg };};};return _gccgdc .eventNeighbours (_ecded );};func _bccb (_adbff *wordBag ,_dadb float64 ,_gcfg ,_gfbaf rulingList )[]*wordBag {var _bacaa []*wordBag ;for _ ,_fcfgg :=range _adbff .depthIndexes (){_dgae :=false ;
for !_adbff .empty (_fcfgg ){_bbdb :=_adbff .firstReadingIndex (_fcfgg );_bbfa :=_adbff .firstWord (_bbdb );_ddfa :=_fcfg (_bbfa ,_dadb ,_gcfg ,_gfbaf );_adbff .removeWord (_bbfa ,_bbdb );if _fcea {_b .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_bbfa .String ());
};for _aebf :=true ;_aebf ;_aebf =_dgae {_dgae =false ;_caab :=_begg *_ddfa ._ecdf ;_bdcf :=_bcbge *_ddfa ._ecdf ;_bcbfd :=_dega *_ddfa ._ecdf ;if _fcea {_b .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_ddfa .minDepth (),_ddfa .maxDepth (),_bcbfd ,_bdcf );
};if _adbff .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_ddfa ,_acfc (_gaf ,0),_ddfa .minDepth ()-_bcbfd ,_ddfa .maxDepth ()+_bcbfd ,_fbbf ,false ,false )> 0{_dgae =true ;};if _adbff .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_ddfa ,_acfc (_gaf ,_bdcf ),_ddfa .minDepth (),_ddfa .maxDepth (),_abg ,false ,false )> 0{_dgae =true ;
};if _dgae {continue ;};_efbac :=_adbff .scanBand ("",_ddfa ,_acfc (_bbdf ,_caab ),_ddfa .minDepth (),_ddfa .maxDepth (),_dcdf ,true ,false );if _efbac > 0{_fadge :=(_ddfa .maxDepth ()-_ddfa .minDepth ())/_ddfa ._ecdf ;if (_efbac > 1&&float64 (_efbac )> 0.3*_fadge )||_efbac <=10{if _adbff .scanBand ("\u006f\u0074\u0068e\u0072",_ddfa ,_acfc (_bbdf ,_caab ),_ddfa .minDepth (),_ddfa .maxDepth (),_dcdf ,false ,true )> 0{_dgae =true ;
};};};};_bacaa =append (_bacaa ,_ddfa );};};return _bacaa ;};func _geec (_ffac ,_bbgf _bg .PdfRectangle )bool {return _gefa (_ffac ,_bbgf )&&_cfda (_ffac ,_bbgf )};type textState struct{_fdf float64 ;_cagc float64 ;_dgc float64 ;_ced float64 ;_fgca float64 ;
_gd RenderMode ;_fdea float64 ;_dgdf *_bg .PdfFont ;_fagg _bg .PdfRectangle ;_feab int ;_fefg int ;};func _cfda (_bgca ,_bffb _bg .PdfRectangle )bool {return _bgca .Lly <=_bffb .Ury &&_bffb .Lly <=_bgca .Ury ;};func _ddba (_bgeb []*textLine ,_cgdcc ,_cgda float64 )[]*textLine {var _edfc []*textLine ;
for _ ,_cfdcf :=range _bgeb {if _cgdcc ==-1{if _cfdcf ._cbbd > _cgda {_edfc =append (_edfc ,_cfdcf );};}else {if _cfdcf ._cbbd > _cgda &&_cfdcf ._cbbd < _cgdcc {_edfc =append (_edfc ,_cfdcf );};};};return _edfc ;};
2023-06-30 13:19:48 +00:00
// List returns all the list objects detected on the page.
// It detects all the bullet point Lists from a given pdf page and builds a slice of bullet list objects.
// A given bullet list object has a tree structure.
// Each bullet point list is extracted with the text content it contains and all the sub lists found under it as children in the tree.
// The rest content of the pdf is ignored and only text in the bullet point lists are extracted.
// The list extraction is done in two ways.
// 1. If the document is tagged then the lists are extracted using the tags provided in the document.
// 2. Otherwise the bullet lists are extracted from the raw text using regex matching.
// By default the document tag is used if available.
// However this can be disabled using `DisableDocumentTags` in the `Options` object.
// Sometimes disabling document tags option might give a better bullet list extraction if the document was tagged incorrectly.
// options := &Options{
// DisableDocumentTags: false, // this means use document tag if available
// }
// ex, err := NewWithOptions(page, options)
// // handle error
// pageText, _, _, err := ex.ExtractPageText()
// // handle error
// lists := pageText.List()
// txt := lists.Text()
2023-07-28 12:14:31 +00:00
func (_bgdg PageText )List ()lists {_egfbg :=!_bgdg ._fca ._cbff ;_dfcg :=_bgdg .getParagraphs ();_ggee :=true ;if _bgdg ._cbc ==nil ||*_bgdg ._cbc ==nil {_ggee =false ;};_bedgf :=_dfcg .list ();if _ggee &&_egfbg {_cfgc :=_dfba (&_dfcg );_acega :=&structTreeRoot {};
_acega .parseStructTreeRoot (*_bgdg ._cbc );if _acega ._cegf ==nil {_b .Log .Debug ("\u004c\u0069\u0073\u0074\u003a\u0020\u0073t\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e'\u0074\u0020\u0068\u0061\u0076e\u0020\u0061\u006e\u0079\u0020\u0063\u006f\u006e\u0074e\u006e\u0074\u002c\u0020\u0075\u0073\u0069\u006e\u0067\u0020\u0074\u0065\u0078\u0074\u0020\u006d\u0061\u0074\u0063\u0068\u0069\u006e\u0067\u0020\u006d\u0065\u0074\u0068\u006f\u0064\u0020\u0069\u006e\u0073\u0074\u0065\u0061\u0064\u002e");
return _bedgf ;};_bedgf =_acega .buildList (_cfgc ,_bgdg ._fabg );};return _bedgf ;};func (_fgee rulingList )primaries ()[]float64 {_fbfga :=make (map[float64 ]struct{},len (_fgee ));for _ ,_bbdaa :=range _fgee {_fbfga [_bbdaa ._befee ]=struct{}{};};_dged :=make ([]float64 ,len (_fbfga ));
_eaeeb :=0;for _dcadf :=range _fbfga {_dged [_eaeeb ]=_dcadf ;_eaeeb ++;};_df .Float64s (_dged );return _dged ;};func (_gbbf *shapesState )addPoint (_gcab ,_feb float64 ){_afa :=_gbbf .establishSubpath ();_agaf :=_gbbf .devicePoint (_gcab ,_feb );if _afa ==nil {_gbbf ._afge =true ;
_gbbf ._bfd =_agaf ;}else {_afa .add (_agaf );};};func (_cddec *textTable )isExportable ()bool {if _cddec ._edgac {return true ;};_adbbc :=func (_eaece int )bool {_geed :=_cddec .get (0,_eaece );if _geed ==nil {return false ;};_bgcb :=_geed .text ();_egff :=_a .RuneCountInString (_bgcb );
_dbee :=_gefeb .MatchString (_bgcb );return _egff <=1||_dbee ;};for _begdb :=0;_begdb < _cddec ._gcbge ;_begdb ++{if !_adbbc (_begdb ){return true ;};};return false ;};
// Font represents the font properties on a PDF page.
type Font struct{PdfFont *_bg .PdfFont ;
// FontName represents Font Name from font properties.
FontName string ;
// FontType represents Font Subtype entry in the font dictionary inside page resources.
// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont.
FontType string ;
// ToUnicode is true if font provides a `ToUnicode` mapping.
ToUnicode bool ;
// IsCID is true if underlying font is a composite font.
// Composite font is represented by a font dictionary whose Subtype is `Type0`
IsCID bool ;
// IsSimple is true if font is simple font.
// A simple font is limited to only 8 bit (255) character codes.
IsSimple bool ;
// FontData represents the raw data of the embedded font file.
// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF).
// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor.
// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value.
FontData []byte ;
// FontFileName is a name representing the font. it has format:
// (Font Name) + (Font Type Extension), example: helvetica.ttf.
FontFileName string ;
// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor).
FontDescriptor *_bg .PdfFontDescriptor ;};
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};func _bbdf (_ecee *wordBag ,_dcc *textWord ,_eaae float64 )bool {return _ecee .Urx <=_dcc .Llx &&_dcc .Llx < _ecee .Urx +_eaae ;};
// String returns a description of `p`.
func (_gbbcc *textPara )String ()string {if _gbbcc ._cfga {return _ce .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_gbbcc .PdfRectangle );};_gbeg :="";if _gbbcc ._bgba !=nil {_gbeg =_ce .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_gbbcc ._bgba ._ddfc ,_gbbcc ._bgba ._gcbge );
};return _ce .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_gbbcc .PdfRectangle ,_gbeg ,len (_gbbcc ._gfbb ),_dfcggd (_gbbcc .text (),50));};func _bdab (_gfcf []TextMark ,_bfga *int )[]TextMark {_dgecc :=_gfcf [len (_gfcf )-1];
_gggf :=[]rune (_dgecc .Text );if len (_gggf )==1{_gfcf =_gfcf [:len (_gfcf )-1];_gdf :=_gfcf [len (_gfcf )-1];*_bfga =_gdf .Offset +len (_gdf .Text );}else {_bddd :=_cdbbf (_dgecc .Text );*_bfga +=len (_bddd )-len (_dgecc .Text );_dgecc .Text =_bddd ;
};return _gfcf ;};func (_eebddb gridTile )contains (_cbcbc _bg .PdfRectangle )bool {if _eebddb .numBorders ()< 3{return false ;};if _eebddb ._afdge &&_cbcbc .Llx < _eebddb .Llx -_cddf {return false ;};if _eebddb ._bfecb &&_cbcbc .Urx > _eebddb .Urx +_cddf {return false ;
};if _eebddb ._eaed &&_cbcbc .Lly < _eebddb .Lly -_cddf {return false ;};if _eebddb ._fdbd &&_cbcbc .Ury > _eebddb .Ury +_cddf {return false ;};return true ;};func (_gbbca *textPara )toCellTextMarks (_cfggf *int )[]TextMark {var _egad []TextMark ;for _ggebc ,_gccgb :=range _gbbca ._gfbb {_dgdcd :=_gccgb .toTextMarks (_cfggf );
_bffae :=_fgcg &&_gccgb .endsInHyphen ()&&_ggebc !=len (_gbbca ._gfbb )-1;if _bffae {_dgdcd =_bdab (_dgdcd ,_cfggf );};_egad =append (_egad ,_dgdcd ...);if !(_bffae ||_ggebc ==len (_gbbca ._gfbb )-1){_egad =_gdbc (_egad ,_cfggf ,_dgbg (_gccgb ._cbbd ,_gbbca ._gfbb [_ggebc +1]._cbbd ));
};};return _egad ;};func _dfcc (_dbbb string )bool {for _ ,_gfff :=range _dbbb {if !_f .IsSpace (_gfff ){return false ;};};return true ;};func (_ebe *shapesState )drawRectangle (_fgcc ,_geee ,_gbbb ,_aaag float64 ){if _bdaae {_gcacd :=_ebe .devicePoint (_fgcc ,_geee );
_eggf :=_ebe .devicePoint (_fgcc +_gbbb ,_geee +_aaag );_fdffe :=_bg .PdfRectangle {Llx :_gcacd .X ,Lly :_gcacd .Y ,Urx :_eggf .X ,Ury :_eggf .Y };_b .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_fdffe );
};_ebe .newSubPath ();_ebe .moveTo (_fgcc ,_geee );_ebe .lineTo (_fgcc +_gbbb ,_geee );_ebe .lineTo (_fgcc +_gbbb ,_geee +_aaag );_ebe .lineTo (_fgcc ,_geee +_aaag );_ebe .closePath ();};func (_bfbcg paraList )findTables (_ffcd []gridTiling )[]*textTable {_bfbcg .addNeighbours ();
_df .Slice (_bfbcg ,func (_dddb ,_fedef int )bool {return _fcd (_bfbcg [_dddb ],_bfbcg [_fedef ])< 0});var _ddege []*textTable ;if _ceed {_eeeg :=_bfbcg .findGridTables (_ffcd );_ddege =append (_ddege ,_eeeg ...);};if _adfg {_efgc :=_bfbcg .findTextTables ();
_ddege =append (_ddege ,_efgc ...);};return _ddege ;};func _ccab (_eagd []compositeCell )[]float64 {var _ggccg []*textLine ;_gbad :=0;for _ ,_dbdf :=range _eagd {_gbad +=len (_dbdf .paraList );_ggccg =append (_ggccg ,_dbdf .lines ()...);};_df .Slice (_ggccg ,func (_deab ,_ddadg int )bool {_bdfe ,_badec :=_ggccg [_deab ],_ggccg [_ddadg ];
_dacab ,_bagfg :=_bdfe ._cbbd ,_badec ._cbbd ;if !_acbc (_dacab -_bagfg ){return _dacab < _bagfg ;};return _bdfe .Llx < _badec .Llx ;});if _cgafg {_ce .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_gbad ,len (_ggccg ));
for _affd ,_gfcgb :=range _ggccg {_ce .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_affd ,_gfcgb );};};var _eedcb []float64 ;_bdeeb :=_ggccg [0];var _baae [][]*textLine ;_egbf :=[]*textLine {_bdeeb };for _adee ,_cgdf :=range _ggccg [1:]{if _cgdf .Ury < _bdeeb .Lly {_ggce :=0.5*(_cgdf .Ury +_bdeeb .Lly );
if _cgafg {_ce .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_adee ,_cgdf .Ury ,_bdeeb .Lly ,_ggce ,_bdeeb ,_cgdf );
};_eedcb =append (_eedcb ,_ggce );_baae =append (_baae ,_egbf );_egbf =nil ;};_egbf =append (_egbf ,_cgdf );if _cgdf .Lly < _bdeeb .Lly {_bdeeb =_cgdf ;};};if len (_egbf )> 0{_baae =append (_baae ,_egbf );};if _cgafg {_ce .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_eedcb );
};if _cgafg {_b .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len (_eagd ));for _bbccfc ,_cddb :=range _eagd {_ce .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bbccfc ,_cddb );};_b .Log .Info ("\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d",len (_baae ));
for _fadf ,_cdec :=range _baae {_ce .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a",_fadf ,len (_cdec ));for _ccbb ,_cacge :=range _cdec {_ce .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_ccbb ,_cacge );};};};_gebgf :=true ;for _efdcc ,_agbaa :=range _baae {_bddcb :=true ;
for _cadfc ,_fcgg :=range _eagd {if _cgafg {_ce .Printf ("\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a",_efdcc ,len (_baae ),_cadfc ,len (_eagd ),_fcgg );
};if !_fcgg .hasLines (_agbaa ){if _cgafg {_ce .Printf ("\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a",_efdcc ,len (_baae ),_cadfc ,len (_eagd ));
};_bddcb =false ;break ;};};if !_bddcb {_gebgf =false ;break ;};};if !_gebgf {if _cgafg {_b .Log .Info ("\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg");
};_eedcb =nil ;};if _cgafg &&_eedcb !=nil {_ce .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a",_eedcb );};return _eedcb ;
};type paraList []*textPara ;func (_dgace rectRuling )checkWidth (_cfba ,_bfea float64 )(float64 ,bool ){_ggfdg :=_bfea -_cfba ;_aagef :=_ggfdg <=_bddeb ;return _ggfdg ,_aagef ;};func (_gaafa rulingList )intersections ()map[int ]intSet {var _fcbc ,_eecd []int ;
for _agffb ,_bdafa :=range _gaafa {switch _bdafa ._eabdg {case _acgee :_fcbc =append (_fcbc ,_agffb );case _cefaa :_eecd =append (_eecd ,_agffb );};};if len (_fcbc )< _gfda +1||len (_eecd )< _cafad +1{return nil ;};if len (_fcbc )+len (_eecd )> _cfae {_b .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_gaafa ),len (_fcbc ),len (_eecd ));
return nil ;};_bgacf :=make (map[int ]intSet ,len (_fcbc )+len (_eecd ));for _ ,_gbegd :=range _fcbc {for _ ,_aaefb :=range _eecd {if _gaafa [_gbegd ].intersects (_gaafa [_aaefb ]){if _ ,_dfeb :=_bgacf [_gbegd ];!_dfeb {_bgacf [_gbegd ]=make (intSet );
};if _ ,_dcad :=_bgacf [_aaefb ];!_dcad {_bgacf [_aaefb ]=make (intSet );};_bgacf [_gbegd ].add (_aaefb );_bgacf [_aaefb ].add (_gbegd );};};};return _bgacf ;};func (_beced *shapesState )devicePoint (_feag ,_bea float64 )_gab .Point {_gce :=_beced ._afee .Mult (_beced ._dfgb );
_feag ,_bea =_gce .Transform (_feag ,_bea );return _gab .NewPoint (_feag ,_bea );};func (_bge *textObject )setFont (_bdd string ,_ggaa float64 )error {if _bge ==nil {return nil ;};_bge ._gacd ._fgca =_ggaa ;_cfce ,_caa :=_bge .getFont (_bdd );if _caa !=nil {return _caa ;
};_bge ._gacd ._dgdf =_cfce ;return nil ;};func _cadc (_bdfd []TextMark ,_acddc *TextTable )[]TextMark {var _gdeg []TextMark ;for _ ,_ececa :=range _bdfd {_ececa ._fgfd =true ;_ececa ._ggea =_acddc ;_gdeg =append (_gdeg ,_ececa );};return _gdeg ;};const (_bgbdg rulingKind =iota ;
_cefaa ;_acgee ;);type gridTile struct{_bg .PdfRectangle ;_fdbd ,_afdge ,_eaed ,_bfecb bool ;};func (_fgaf *textTable )getDown ()paraList {_gbdg :=make (paraList ,_fgaf ._ddfc );for _agce :=0;_agce < _fgaf ._ddfc ;_agce ++{_fgafb :=_fgaf .get (_agce ,_fgaf ._gcbge -1)._fgdg ;
if _fgafb .taken (){return nil ;};_gbdg [_agce ]=_fgafb ;};for _gfcfe :=0;_gfcfe < _fgaf ._ddfc -1;_gfcfe ++{if _gbdg [_gfcfe ]._eabac !=_gbdg [_gfcfe +1]{return nil ;};};return _gbdg ;};func (_fdcdgd *textTable )toTextTable ()TextTable {if _cgafg {_b .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_fdcdgd ._ddfc ,_fdcdgd ._gcbge );
};_cfdfba :=make ([][]TableCell ,_fdcdgd ._gcbge );for _dbba :=0;_dbba < _fdcdgd ._gcbge ;_dbba ++{_cfdfba [_dbba ]=make ([]TableCell ,_fdcdgd ._ddfc );for _fcda :=0;_fcda < _fdcdgd ._ddfc ;_fcda ++{_beab :=_fdcdgd .get (_fcda ,_dbba );if _beab ==nil {continue ;
};if _cgafg {_ce .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_fcda ,_dbba ,_beab );};_cfdfba [_dbba ][_fcda ].Text =_beab .text ();_bagb :=0;_cfdfba [_dbba ][_fcda ].Marks ._bca =_beab .toTextMarks (&_bagb );};};_fbgge :=TextTable {W :_fdcdgd ._ddfc ,H :_fdcdgd ._gcbge ,Cells :_cfdfba };
_fbgge .PdfRectangle =_fdcdgd .bbox ();return _fbgge ;};
// PageTextOptions holds various options available in extraction process.
type PageTextOptions struct{_cbff bool ;_bbg bool ;};func _gefa (_geb ,_fgcaf _bg .PdfRectangle )bool {return _fgcaf .Llx <=_geb .Urx &&_geb .Llx <=_fgcaf .Urx ;};func _ecdb (_bbgc map[int ][]float64 )[]int {_cacc :=make ([]int ,len (_bbgc ));_ggbdb :=0;
for _cedeg :=range _bbgc {_cacc [_ggbdb ]=_cedeg ;_ggbdb ++;};_df .Ints (_cacc );return _cacc ;};func _aage (_fdcb _bg .PdfRectangle )*ruling {return &ruling {_eabdg :_cefaa ,_befee :_fdcb .Ury ,_agbc :_fdcb .Llx ,_gffgd :_fdcb .Urx };};func (_egdf *textMark )bbox ()_bg .PdfRectangle {return _egdf .PdfRectangle };
func (_cdae rulingList )log (_bgbf string ){if !_bccgb {return ;};_b .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_bgbf ,_cdae .String ());for _ecfd ,_cbab :=range _cdae {_ce .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ecfd ,_cbab .String ());
};};var (_fegfa =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};
);func (_ggddb paraList )topoOrder ()[]int {if _ecdg {_b .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");};_fdcd :=len (_ggddb );_bcag :=make ([]bool ,_fdcd );_efbae :=make ([]int ,0,_fdcd );_ebaf :=_ggddb .llyOrdering ();var _cdge func (_bcgbe int );
_cdge =func (_degdg int ){_bcag [_degdg ]=true ;for _bcbfa :=0;_bcbfa < _fdcd ;_bcbfa ++{if !_bcag [_bcbfa ]{if _ggddb .readBefore (_ebaf ,_degdg ,_bcbfa ){_cdge (_bcbfa );};};};_efbae =append (_efbae ,_degdg );};for _dfea :=0;_dfea < _fdcd ;_dfea ++{if !_bcag [_dfea ]{_cdge (_dfea );
};};return _gfdaa (_efbae );};func _fgcce (_aged ,_afecb int )uint64 {return uint64 (_aged )*0x1000000+uint64 (_afecb )};func _dfba (_cbeb *paraList )map[int ][]*textLine {_gefe :=map[int ][]*textLine {};for _ ,_aeec :=range *_cbeb {for _ ,_gfaea :=range _aeec ._gfbb {if !_gegg (_gfaea ){_b .Log .Debug ("g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e");
continue ;};_bafd :=_gfaea ._aafd [0]._dggf [0]._adbb ;_gefe [_bafd ]=append (_gefe [_bafd ],_gfaea );};if _aeec ._bgba !=nil {_eebgf :=_aeec ._bgba ._efeac ;for _ ,_bfcge :=range _eebgf {for _ ,_eede :=range _bfcge ._gfbb {if !_gegg (_eede ){_b .Log .Debug ("g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e");
continue ;};_bbb :=_eede ._aafd [0]._dggf [0]._adbb ;_gefe [_bbb ]=append (_gefe [_bbb ],_eede );};};};};return _gefe ;};func (_dfdag *textTable )compositeRowCorridors ()map[int ][]float64 {_fgfg :=make (map[int ][]float64 ,_dfdag ._gcbge );if _cgafg {_b .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_dfdag ._gcbge );
};for _bedc :=1;_bedc < _dfdag ._gcbge ;_bedc ++{var _gdefb []compositeCell ;for _ggcge :=0;_ggcge < _dfdag ._ddfc ;_ggcge ++{if _efecd ,_gcfff :=_dfdag ._dadcc [_fgcce (_ggcge ,_bedc )];_gcfff {_gdefb =append (_gdefb ,_efecd );};};if len (_gdefb )==0{continue ;
};_dgdeb :=_ccab (_gdefb );_fgfg [_bedc ]=_dgdeb ;if _cgafg {_ce .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_bedc ,_dgdeb );};};return _fgfg ;};var _dabgf string ="\u0028\u003f\u0069\u0029\u005e\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028?\u003a\u0044\u007cM\u0029\u007c\u0044\u003f\u0043{\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028\u003f\u003a\u004c\u007c\u0043\u0029\u007cL\u003f\u0058\u007b\u0030\u002c\u0033}\u0029\u0028\u0049\u0028\u003f\u003a\u0056\u007c\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u005c\u0029\u007c\u005c\u002e\u0029\u007c\u005e\u005c\u0028\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028\u003f\u003aD\u007cM\u0029\u007c\u0044\u003f\u0043\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028?\u003a\u004c\u007c\u0043\u0029\u007c\u004c?\u0058\u007b0\u002c\u0033\u007d\u0029(\u0049\u0028\u003f\u003a\u0056|\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u005c\u0029";
func (_abd *textObject )nextLine (){_abd .moveLP (0,-_abd ._gacd ._ced )};func (_eecff *textLine )bbox ()_bg .PdfRectangle {return _eecff .PdfRectangle };func (_faebf gridTile )numBorders ()int {_agdb :=0;if _faebf ._afdge {_agdb ++;};if _faebf ._bfecb {_agdb ++;
};if _faebf ._eaed {_agdb ++;};if _faebf ._fdbd {_agdb ++;};return _agdb ;};type textWord struct{_bg .PdfRectangle ;_baebb float64 ;_ggaef string ;_dggf []*textMark ;_ebgb float64 ;_gagaf bool ;};func (_dcac rulingList )blocks (_gabdf ,_cedb *ruling )bool {if _gabdf ._agbc > _cedb ._gffgd ||_cedb ._agbc > _gabdf ._gffgd {return false ;
};_ecef :=_ef .Max (_gabdf ._agbc ,_cedb ._agbc );_aebdd :=_ef .Min (_gabdf ._gffgd ,_cedb ._gffgd );if _gabdf ._befee > _cedb ._befee {_gabdf ,_cedb =_cedb ,_gabdf ;};for _ ,_baeg :=range _dcac {if _gabdf ._befee <=_baeg ._befee +_bddeb &&_baeg ._befee <=_cedb ._befee +_bddeb &&_baeg ._agbc <=_aebdd &&_ecef <=_baeg ._gffgd {return true ;
};};return false ;};func _fgeb (_bbdd int ,_faeec map[int ][]float64 )([]int ,int ){_ccfgf :=make ([]int ,_bbdd );_feaaf :=0;for _dceed :=0;_dceed < _bbdd ;_dceed ++{_ccfgf [_dceed ]=_feaaf ;_feaaf +=len (_faeec [_dceed ])+1;};return _ccfgf ,_feaaf ;};
func _dgbg (_fbeb ,_bedb float64 )string {_efeg :=!_acbc (_fbeb -_bedb );if _efeg {return "\u000a";};return "\u0020";};func _eabd (_fbee structElement )[]structElement {_edadc :=[]structElement {};for _ ,_cegba :=range _fbee ._efce {for _ ,_gbfee :=range _cegba ._efce {for _ ,_gfdb :=range _gbfee ._efce {if _gfdb ._bfeg =="\u004c"{_edadc =append (_edadc ,_gfdb );
};};};};return _edadc ;};func _cdbbf (_gbagd string )string {_ggeab :=[]rune (_gbagd );return string (_ggeab [:len (_ggeab )-1])};func (_fggb *textTable )log (_dcbf string ){if !_cgafg {return ;};_b .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_dcbf ,_fggb ._ddfc ,_fggb ._gcbge ,_fggb ._edgac ,_fggb .PdfRectangle );
for _ffaa :=0;_ffaa < _fggb ._gcbge ;_ffaa ++{for _deabg :=0;_deabg < _fggb ._ddfc ;_deabg ++{_gdgb :=_fggb .get (_deabg ,_ffaa );if _gdgb ==nil {continue ;};_ce .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_deabg ,_ffaa ,_gdgb .PdfRectangle ,_dfcggd (_gdgb .text (),50),_a .RuneCountInString (_gdgb .text ()));
};};};func (_gbda *wordBag )sort (){for _ ,_edcce :=range _gbda ._cgdg {_df .Slice (_edcce ,func (_abbeb ,_fcaf int )bool {return _aea (_edcce [_abbeb ],_edcce [_fcaf ])< 0});};};
// Len returns the number of TextMarks in `ma`.
func (_fddf *TextMarkArray )Len ()int {if _fddf ==nil {return 0;};return len (_fddf ._bca );};func (_debg *textObject )setTextRise (_cad float64 ){if _debg ==nil {return ;};_debg ._gacd ._fdea =_cad ;};func (_bbfca *textTable )emptyCompositeColumn (_cfada int )bool {for _eefaa :=0;
_eefaa < _bbfca ._gcbge ;_eefaa ++{if _dcbcb ,_cfadc :=_bbfca ._dadcc [_fgcce (_cfada ,_eefaa )];_cfadc {if len (_dcbcb .paraList )> 0{return false ;};};};return true ;};func (_cdac *textLine )markWordBoundaries (){_eedfg :=_cfbb *_cdac ._bfbb ;for _gadf ,_fgfe :=range _cdac ._aafd [1:]{if _efbc (_fgfe ,_cdac ._aafd [_gadf ])>=_eedfg {_fgfe ._gagaf =true ;
};};};
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
//
// Replace with a function like Extract() (*PageText, error)
func (_egg *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_bbc ,_bde ,_abbe ,_acd :=_egg .extractPageText (_egg ._bc ,_egg ._gga ,_gab .IdentityMatrix (),0);if _acd !=nil &&_acd !=_bg .ErrColorOutOfRange {return nil ,0,0,_acd ;};if _egg ._ad !=nil {_bbc ._fca ._bbg =_egg ._ad .UseSimplerExtractionProcess ;
};_bbc .computeViews ();_acd =_gceee (_bbc );if _acd !=nil {return nil ,0,0,_acd ;};if _egg ._ad !=nil {if _egg ._ad .ApplyCropBox &&_egg ._fc !=nil {_bbc .ApplyArea (*_egg ._fc );};_bbc ._fca ._cbff =_egg ._ad .DisableDocumentTags ;};return _bbc ,_bde ,_abbe ,nil ;
};func (_fcfgd paraList )addNeighbours (){_bafdf :=func (_gbcda []int ,_efgfd *textPara )([]*textPara ,[]*textPara ){_fdde :=make ([]*textPara ,0,len (_gbcda )-1);_bfbd :=make ([]*textPara ,0,len (_gbcda )-1);for _ ,_cced :=range _gbcda {_gdabd :=_fcfgd [_cced ];
if _gdabd .Urx <=_efgfd .Llx {_fdde =append (_fdde ,_gdabd );}else if _gdabd .Llx >=_efgfd .Urx {_bfbd =append (_bfbd ,_gdabd );};};return _fdde ,_bfbd ;};_befcb :=func (_faed []int ,_eebf *textPara )([]*textPara ,[]*textPara ){_gfcdc :=make ([]*textPara ,0,len (_faed )-1);
_fgeg :=make ([]*textPara ,0,len (_faed )-1);for _ ,_babg :=range _faed {_egfg :=_fcfgd [_babg ];if _egfg .Ury <=_eebf .Lly {_fgeg =append (_fgeg ,_egfg );}else if _egfg .Lly >=_eebf .Ury {_gfcdc =append (_gfcdc ,_egfg );};};return _gfcdc ,_fgeg ;};_bcebd :=_fcfgd .yNeighbours (_baeeb );
for _ ,_afab :=range _fcfgd {_gfece :=_bcebd [_afab ];if len (_gfece )==0{continue ;};_dabfg ,_gfac :=_bafdf (_gfece ,_afab );if len (_dabfg )==0&&len (_gfac )==0{continue ;};if len (_dabfg )> 0{_egbc :=_dabfg [0];for _ ,_bebbb :=range _dabfg [1:]{if _bebbb .Urx >=_egbc .Urx {_egbc =_bebbb ;
};};for _ ,_defe :=range _dabfg {if _defe !=_egbc &&_defe .Urx > _egbc .Llx {_egbc =nil ;break ;};};if _egbc !=nil &&_cfda (_afab .PdfRectangle ,_egbc .PdfRectangle ){_afab ._dbaed =_egbc ;};};if len (_gfac )> 0{_gffe :=_gfac [0];for _ ,_dcfa :=range _gfac [1:]{if _dcfa .Llx <=_gffe .Llx {_gffe =_dcfa ;
};};for _ ,_abegb :=range _gfac {if _abegb !=_gffe &&_abegb .Llx < _gffe .Urx {_gffe =nil ;break ;};};if _gffe !=nil &&_cfda (_afab .PdfRectangle ,_gffe .PdfRectangle ){_afab ._eabac =_gffe ;};};};_bcebd =_fcfgd .xNeighbours (_bbe );for _ ,_fccc :=range _fcfgd {_dfgf :=_bcebd [_fccc ];
if len (_dfgf )==0{continue ;};_dcfgd ,_baaf :=_befcb (_dfgf ,_fccc );if len (_dcfgd )==0&&len (_baaf )==0{continue ;};if len (_baaf )> 0{_afgbf :=_baaf [0];for _ ,_gggg :=range _baaf [1:]{if _gggg .Ury >=_afgbf .Ury {_afgbf =_gggg ;};};for _ ,_gedfg :=range _baaf {if _gedfg !=_afgbf &&_gedfg .Ury > _afgbf .Lly {_afgbf =nil ;
break ;};};if _afgbf !=nil &&_gefa (_fccc .PdfRectangle ,_afgbf .PdfRectangle ){_fccc ._fgdg =_afgbf ;};};if len (_dcfgd )> 0{_gbeb :=_dcfgd [0];for _ ,_dfab :=range _dcfgd [1:]{if _dfab .Lly <=_gbeb .Lly {_gbeb =_dfab ;};};for _ ,_fdcfd :=range _dcfgd {if _fdcfd !=_gbeb &&_fdcfd .Lly < _gbeb .Ury {_gbeb =nil ;
break ;};};if _gbeb !=nil &&_gefa (_fccc .PdfRectangle ,_gbeb .PdfRectangle ){_fccc ._ffeg =_gbeb ;};};};for _ ,_bddcbe :=range _fcfgd {if _bddcbe ._dbaed !=nil &&_bddcbe ._dbaed ._eabac !=_bddcbe {_bddcbe ._dbaed =nil ;};if _bddcbe ._ffeg !=nil &&_bddcbe ._ffeg ._fgdg !=_bddcbe {_bddcbe ._ffeg =nil ;
};if _bddcbe ._eabac !=nil &&_bddcbe ._eabac ._dbaed !=_bddcbe {_bddcbe ._eabac =nil ;};if _bddcbe ._fgdg !=nil &&_bddcbe ._fgdg ._ffeg !=_bddcbe {_bddcbe ._fgdg =nil ;};};};func _acgbed (_cgaae _ea .PdfObject ,_abdef _ag .Color )(_fa .Image ,error ){_ebff ,_aaaf :=_ea .GetStream (_cgaae );
if !_aaaf {return nil ,nil ;};_cbfcf ,_aabac :=_bg .NewXObjectImageFromStream (_ebff );if _aabac !=nil {return nil ,_aabac ;};_bbfdb ,_aabac :=_cbfcf .ToImage ();if _aabac !=nil {return nil ,_aabac ;};return _ffda (_bbfdb ,_abdef ),nil ;};const (RenderModeStroke RenderMode =1<<iota ;
RenderModeFill ;RenderModeClip ;);func _dadbd (_gdced int ,_adffg func (int ,int )bool )[]int {_gagbd :=make ([]int ,_gdced );for _aggg :=range _gagbd {_gagbd [_aggg ]=_aggg ;};_df .Slice (_gagbd ,func (_accf ,_fdffb int )bool {return _adffg (_gagbd [_accf ],_gagbd [_fdffb ])});
return _gagbd ;};func (_abde intSet )add (_gfcag int ){_abde [_gfcag ]=struct{}{}};func (_cfcac paraList )inTile (_ebcbg gridTile )paraList {var _dcge paraList ;for _ ,_ddgf :=range _cfcac {if _ebcbg .contains (_ddgf .PdfRectangle ){_dcge =append (_dcge ,_ddgf );
};};if _cgafg {_ce .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_ebcbg ,len (_dcge ));for _ccfba ,_eebgbb :=range _dcge {_ce .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ccfba ,_eebgbb );
};_ce .Println ("");};return _dcge ;};func _ffda (_acfcg *_bg .Image ,_gbecd _ag .Color )_fa .Image {_ddge ,_eegb :=int (_acfcg .Width ),int (_acfcg .Height );_bffbd :=_fa .NewRGBA (_fa .Rect (0,0,_ddge ,_eegb ));for _aceb :=0;_aceb < _eegb ;_aceb ++{for _dedf :=0;
_dedf < _ddge ;_dedf ++{_bgfe ,_dfaa :=_acfcg .ColorAt (_dedf ,_aceb );if _dfaa !=nil {_b .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063o\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0072\u0065\u0074\u0072\u0069\u0065v\u0065 \u0069\u006d\u0061\u0067\u0065\u0020m\u0061\u0073\u006b\u0020\u0076\u0061\u006cu\u0065\u0020\u0061\u0074\u0020\u0028\u0025\u0064\u002c\u0020\u0025\u0064\u0029\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006da\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063t\u002e",_dedf ,_aceb );
continue ;};_cafg ,_fggbe ,_ccaf ,_ :=_bgfe .RGBA ();var _cfcef _ag .Color ;if _cafg +_fggbe +_ccaf ==0{_cfcef =_ag .Transparent ;}else {_cfcef =_gbecd ;};_bffbd .Set (_dedf ,_aceb ,_cfcef );};};return _bffbd ;};func (_dbddf paraList )eventNeighbours (_ebda []event )map[*textPara ][]int {_df .Slice (_ebda ,func (_ebedf ,_abgd int )bool {_dbafb ,_dbddd :=_ebda [_ebedf ],_ebda [_abgd ];
_dcdfbc ,_cbgg :=_dbafb ._faccf ,_dbddd ._faccf ;if _dcdfbc !=_cbgg {return _dcdfbc < _cbgg ;};if _dbafb ._ecacd !=_dbddd ._ecacd {return _dbafb ._ecacd ;};return _ebedf < _abgd ;});_dbbd :=make (map[int ]intSet );_eade :=make (intSet );for _ ,_fbfgd :=range _ebda {if _fbfgd ._ecacd {_dbbd [_fbfgd ._gbfff ]=make (intSet );
for _gbba :=range _eade {if _gbba !=_fbfgd ._gbfff {_dbbd [_fbfgd ._gbfff ].add (_gbba );_dbbd [_gbba ].add (_fbfgd ._gbfff );};};_eade .add (_fbfgd ._gbfff );}else {_eade .del (_fbfgd ._gbfff );};};_eadd :=map[*textPara ][]int {};for _bbgde ,_becbc :=range _dbbd {_dcga :=_dbddf [_bbgde ];
if len (_becbc )==0{_eadd [_dcga ]=nil ;continue ;};_gbbde :=make ([]int ,len (_becbc ));_gdbb :=0;for _gaee :=range _becbc {_gbbde [_gdbb ]=_gaee ;_gdbb ++;};_eadd [_dcga ]=_gbbde ;};return _eadd ;};func (_dcgfd *wordBag )removeDuplicates (){if _aebb {_b .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_dcgfd .text ());
};for _ ,_gcgae :=range _dcgfd .depthIndexes (){if len (_dcgfd ._cgdg [_gcgae ])==0{continue ;};_ecfb :=_dcgfd ._cgdg [_gcgae ][0];_acecf :=_ccee *_ecfb ._ebgb ;_ceaa :=_ecfb ._baebb ;for _ ,_edgb :=range _dcgfd .depthBand (_ceaa ,_ceaa +_acecf ){_dcdgc :=map[*textWord ]struct{}{};
_adad :=_dcgfd ._cgdg [_edgb ];for _ ,_baad :=range _adad {if _ ,_gcegc :=_dcdgc [_baad ];_gcegc {continue ;};for _ ,_abbcd :=range _adad {if _ ,_aegg :=_dcdgc [_abbcd ];_aegg {continue ;};if _abbcd !=_baad &&_abbcd ._ggaef ==_baad ._ggaef &&_ef .Abs (_abbcd .Llx -_baad .Llx )< _acecf &&_ef .Abs (_abbcd .Urx -_baad .Urx )< _acecf &&_ef .Abs (_abbcd .Lly -_baad .Lly )< _acecf &&_ef .Abs (_abbcd .Ury -_baad .Ury )< _acecf {_dcdgc [_abbcd ]=struct{}{};
};};};if len (_dcdgc )> 0{_fcbf :=0;for _ ,_gebf :=range _adad {if _ ,_dbfgc :=_dcdgc [_gebf ];!_dbfgc {_adad [_fcbf ]=_gebf ;_fcbf ++;};};_dcgfd ._cgdg [_edgb ]=_adad [:len (_adad )-len (_dcdgc )];if len (_dcgfd ._cgdg [_edgb ])==0{delete (_dcgfd ._cgdg ,_edgb );
};};};};};func _gdfa (_dadec map[int ][]float64 )string {_edgba :=_ecdb (_dadec );_afgdc :=make ([]string ,len (_dadec ));for _cbgef ,_ggfag :=range _edgba {_afgdc [_cbgef ]=_ce .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_ggfag ,_dadec [_ggfag ]);
};return _ce .Sprintf ("\u007b\u0025\u0073\u007d",_c .Join (_afgdc ,"\u002c\u0020"));};func (_gcce paraList )sortTopoOrder (){_abdd :=_gcce .topoOrder ();_gcce .reorder (_abdd )};func (_egafc rulingList )tidied (_gcfeb string )rulingList {_bdgg :=_egafc .removeDuplicates ();
_bdgg .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");_dgff :=_bdgg .snapToGroups ();if _dgff ==nil {return nil ;};_dgff .sort ();if _bccgb {_b .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_gcfeb ,len (_egafc ),len (_bdgg ),len (_dgff ));
};_dgff .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _dgff ;};func (_ffca *textObject )showText (_cdg _ea .PdfObject ,_dcb []byte ,_gaag int )error {return _ffca .renderText (_cdg ,_dcb ,_gaag );};const _aceg =1.0/1000.0;type structTreeRoot struct{_cegf []structElement ;
_baeb string ;};func _dag (_cddag ,_edga bounded )float64 {_eedb :=_fdcc (_cddag ,_edga );if !_acbc (_eedb ){return _eedb ;};return _aea (_cddag ,_edga );};func _bacb (_cbge []*wordBag )[]*wordBag {if len (_cbge )<=1{return _cbge ;};if _gde {_b .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");
};_df .Slice (_cbge ,func (_efga ,_fagge int )bool {_fgbba ,_bfdfe :=_cbge [_efga ],_cbge [_fagge ];_eaeg :=_fgbba .Width ()*_fgbba .Height ();_edg :=_bfdfe .Width ()*_bfdfe .Height ();if _eaeg !=_edg {return _eaeg > _edg ;};if _fgbba .Height ()!=_bfdfe .Height (){return _fgbba .Height ()> _bfdfe .Height ();
};return _efga < _fagge ;});var _ddfb []*wordBag ;_cfag :=make (intSet );for _efgf :=0;_efgf < len (_cbge );_efgf ++{if _cfag .has (_efgf ){continue ;};_begag :=_cbge [_efgf ];for _fbbdc :=_efgf +1;_fbbdc < len (_cbge );_fbbdc ++{if _cfag .has (_efgf ){continue ;
};_cgafc :=_cbge [_fbbdc ];_agba :=_begag .PdfRectangle ;_agba .Llx -=_begag ._ecdf ;if _gfea (_agba ,_cgafc .PdfRectangle ){_begag .absorb (_cgafc );_cfag .add (_fbbdc );};};_ddfb =append (_ddfb ,_begag );};if len (_cbge )!=len (_ddfb )+len (_cfag ){_b .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_cbge ),len (_ddfb ),len (_cfag ));
};return _ddfb ;};func (_cgadg *textWord )toTextMarks (_ccdfa *int )[]TextMark {var _cggd []TextMark ;for _ ,_adbfg :=range _cgadg ._dggf {_cggd =_cgdcf (_cggd ,_ccdfa ,_adbfg .ToTextMark ());};return _cggd ;};func _gfeg (_ffefc []float64 ,_bgdfd ,_gacfg float64 )[]float64 {_ebfe ,_fged :=_bgdfd ,_gacfg ;
if _fged < _ebfe {_ebfe ,_fged =_fged ,_ebfe ;};_bdfdc :=make ([]float64 ,0,len (_ffefc )+2);_bdfdc =append (_bdfdc ,_bgdfd );for _ ,_cdbca :=range _ffefc {if _cdbca <=_ebfe {continue ;}else if _cdbca >=_fged {break ;};_bdfdc =append (_bdfdc ,_cdbca );
};_bdfdc =append (_bdfdc ,_gacfg );return _bdfdc ;};func (_cdcef *textPara )taken ()bool {return _cdcef ==nil ||_cdcef ._abeg };func _badcf (_fefga _bg .PdfColorspace ,_geafd _bg .PdfColor )_ag .Color {if _fefga ==nil ||_geafd ==nil {return _ag .Black ;
};_beaac ,_afgba :=_fefga .ColorToRGB (_geafd );if _afgba !=nil {_b .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_geafd ,_fefga ,_afgba );
return _ag .Black ;};_ggfbb ,_eafcg :=_beaac .(*_bg .PdfColorDeviceRGB );if !_eafcg {_b .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_beaac );
return _ag .Black ;};return _ag .NRGBA {R :uint8 (_ggfbb .R ()*255),G :uint8 (_ggfbb .G ()*255),B :uint8 (_ggfbb .B ()*255),A :uint8 (255)};};func (_cfcae *textTable )getComposite (_fadgb ,_gfcfg int )(paraList ,_bg .PdfRectangle ){_eebgb ,_ggef :=_cfcae ._dadcc [_fgcce (_fadgb ,_gfcfg )];
if _cgafg {_ce .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a",_fadgb ,_gfcfg ,_eebgb .String ());};if !_ggef {return nil ,_bg .PdfRectangle {};
};return _eebgb .parasBBox ();};func (_cbgga *textWord )absorb (_dgbf *textWord ){_cbgga .PdfRectangle =_egbga (_cbgga .PdfRectangle ,_dgbf .PdfRectangle );_cbgga ._dggf =append (_cbgga ._dggf ,_dgbf ._dggf ...);};func (_gead *imageExtractContext )extractContentStreamImages (_bd string ,_baa *_bg .PdfPageResources )error {_ccg :=_fb .NewContentStreamParser (_bd );
_ee ,_bae :=_ccg .Parse ();if _bae !=nil {return _bae ;};if _gead ._agf ==nil {_gead ._agf =map[*_ea .PdfObjectStream ]*cachedImage {};};if _gead ._fae ==nil {_gead ._fae =&ImageExtractOptions {};};_eae :=_fb .NewContentStreamProcessor (*_ee );_eae .AddHandler (_fb .HandlerConditionEnumAllOperands ,"",_gead .processOperand );
return _eae .Process (_baa );};func _bbcaa (_dadbf []pathSection ){if _dgeb < 0.0{return ;};if _bccgb {_b .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_dadbf ));
};for _fafdd ,_defg :=range _dadbf {for _gdfaf ,_abed :=range _defg ._dgfc {for _cccfc ,_ccfga :=range _abed ._fbcgf {_abed ._fbcgf [_cccfc ]=_gab .Point {X :_egagc (_ccfga .X ),Y :_egagc (_ccfga .Y )};if _bccgb {_bgdgb :=_abed ._fbcgf [_cccfc ];if !_eaca (_ccfga ,_bgdgb ){_cgcf :=_gab .Point {X :_bgdgb .X -_ccfga .X ,Y :_bgdgb .Y -_ccfga .Y };
_ce .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_fafdd ,_gdfaf ,_cccfc ,_ccfga ,_bgdgb ,_cgcf );};};};};};};func _gaaff (_dcbc []*textWord ,_gda float64 ,_bfc ,_dgaac rulingList )*wordBag {_cabc :=_fcfg (_dcbc [0],_gda ,_bfc ,_dgaac );
for _ ,_dcfb :=range _dcbc [1:]{_efa :=_ebfc (_dcfb ._baebb );_cabc ._cgdg [_efa ]=append (_cabc ._cgdg [_efa ],_dcfb );_cabc .PdfRectangle =_egbga (_cabc .PdfRectangle ,_dcfb .PdfRectangle );};_cabc .sort ();return _cabc ;};func _eba (_bdcg _bg .PdfRectangle ,_eadg bounded )float64 {return _bdcg .Ury -_eadg .bbox ().Lly };
func _gegg (_ffcc *textLine )bool {_egbgc :=true ;_dfdg :=-1;for _ ,_ffcg :=range _ffcc ._aafd {for _ ,_fadg :=range _ffcg ._dggf {_edee :=_fadg ._adbb ;if _dfdg ==-1{_dfdg =_edee ;}else {if _dfdg !=_edee {_egbgc =false ;break ;};};};};return _egbgc ;};
func (_ddcd *wordBag )pullWord (_agg *textWord ,_bcab int ,_bfba map[int ]map[*textWord ]struct{}){_ddcd .PdfRectangle =_egbga (_ddcd .PdfRectangle ,_agg .PdfRectangle );if _agg ._ebgb > _ddcd ._ecdf {_ddcd ._ecdf =_agg ._ebgb ;};_ddcd ._cgdg [_bcab ]=append (_ddcd ._cgdg [_bcab ],_agg );
_bfba [_bcab ][_agg ]=struct{}{};};func (_gabf *textPara )writeCellText (_acgd _ga .Writer ){for _dedgf ,_ddg :=range _gabf ._gfbb {_abfa :=_ddg .text ();_bdcfb :=_fgcg &&_ddg .endsInHyphen ()&&_dedgf !=len (_gabf ._gfbb )-1;if _bdcfb {_abfa =_cdbbf (_abfa );
};_acgd .Write ([]byte (_abfa ));if !(_bdcfb ||_dedgf ==len (_gabf ._gfbb )-1){_acgd .Write ([]byte (_dgbg (_ddg ._cbbd ,_gabf ._gfbb [_dedgf +1]._cbbd )));};};};func (_acdg *textObject )moveText (_bdb ,_cfa float64 ){_acdg .moveLP (_bdb ,_cfa )};func (_effea *textTable )bbox ()_bg .PdfRectangle {return _effea .PdfRectangle };
const _efdc =10;
2023-01-08 22:34:27 +00:00
2023-07-28 12:14:31 +00:00
// String returns a human readable description of `ss`.
func (_ebfg *shapesState )String ()string {return _ce .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_ebfg ._cbfc ),_ebfg ._afge );};
// String returns a description of `tm`.
func (_fbbef *textMark )String ()string {return _ce .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_fbbef .PdfRectangle ,_fbbef ._gceb ,_fbbef ._ebgd );};func _afega (_ecgef _bg .PdfRectangle )*ruling {return &ruling {_eabdg :_acgee ,_befee :_ecgef .Llx ,_agbc :_ecgef .Lly ,_gffgd :_ecgef .Ury };
};func (_cdgc *textLine )pullWord (_fgag *wordBag ,_afcb *textWord ,_bgdc int ){_cdgc .appendWord (_afcb );_fgag .removeWord (_afcb ,_bgdc );};func (_afcac rulingList )sortStrict (){_df .Slice (_afcac ,func (_edcca ,_cdfe int )bool {_ggga ,_ecfbc :=_afcac [_edcca ],_afcac [_cdfe ];
_gcega ,_gbbd :=_ggga ._eabdg ,_ecfbc ._eabdg ;if _gcega !=_gbbd {return _gcega > _gbbd ;};_agee ,_abeaf :=_ggga ._befee ,_ecfbc ._befee ;if !_acbc (_agee -_abeaf ){return _agee < _abeaf ;};_agee ,_abeaf =_ggga ._agbc ,_ecfbc ._agbc ;if _agee !=_abeaf {return _agee < _abeaf ;
};return _ggga ._gffgd < _ecfbc ._gffgd ;});};func (_bbf *textObject )getFont (_ddff string )(*_bg .PdfFont ,error ){if _bbf ._dcdg ._eg !=nil {_cgfe ,_ebbab :=_bbf .getFontDict (_ddff );if _ebbab !=nil {_b .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073",_ddff ,_ebbab .Error ());
return nil ,_ebbab ;};_bbf ._dcdg ._ge ++;_afca ,_abea :=_bbf ._dcdg ._eg [_cgfe .String ()];if _abea {_afca ._edcg =_bbf ._dcdg ._ge ;return _afca ._bcae ,nil ;};};_dfdac ,_cace :=_bbf .getFontDict (_ddff );if _cace !=nil {return nil ,_cace ;};_bcdf ,_cace :=_bbf .getFontDirect (_ddff );
if _cace !=nil {return nil ,_cace ;};if _bbf ._dcdg ._eg !=nil {_dcgg :=fontEntry {_bcdf ,_bbf ._dcdg ._ge };if len (_bbf ._dcdg ._eg )>=_efdc {var _ffgg []string ;for _eff :=range _bbf ._dcdg ._eg {_ffgg =append (_ffgg ,_eff );};_df .Slice (_ffgg ,func (_ggdd ,_eedf int )bool {return _bbf ._dcdg ._eg [_ffgg [_ggdd ]]._edcg < _bbf ._dcdg ._eg [_ffgg [_eedf ]]._edcg ;
});delete (_bbf ._dcdg ._eg ,_ffgg [0]);};_bbf ._dcdg ._eg [_dfdac .String ()]=_dcgg ;};return _bcdf ,nil ;};func (_edad *wordBag )maxDepth ()float64 {return _edad ._dgec -_edad .Lly };func _dbae (_gfec byte )bool {for _ ,_bba :=range _ebef {if []byte (_bba )[0]==_gfec {return true ;
};};return false ;};func (_gdd *textPara )text ()string {_dgfae :=new (_dfe .Buffer );_gdd .writeText (_dgfae );return _dgfae .String ();};func (_bbgaf compositeCell )parasBBox ()(paraList ,_bg .PdfRectangle ){return _bbgaf .paraList ,_bbgaf .PdfRectangle ;
};func _ffcaf (_eabg ,_adaab float64 )bool {return _ef .Abs (_eabg -_adaab )<=_cfgg };func _cfad (_gadd *textLine ,_bcee []*textLine ,_ffeb []float64 )float64 {var _dfecc float64 =-1;for _ ,_bfdfc :=range _bcee {if _bfdfc ._cbbd > _gadd ._cbbd {if _ef .Round (_bfdfc .Llx )>=_ef .Round (_gadd .Llx ){_dfecc =_bfdfc ._cbbd ;
}else {break ;};};};return _dfecc ;};
2023-03-01 18:45:57 +00:00
2023-05-29 17:26:33 +00:00
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
2023-07-28 12:14:31 +00:00
type PageImages struct{Images []ImageMark ;};const _bb =20;func (_ceab intSet )del (_gdda int ){delete (_ceab ,_gdda )};
// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents (contents string ,resources *_bg .PdfPageResources )(*Extractor ,error ){const _ab ="\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s";_cg :=&Extractor {_bc :contents ,_gga :resources ,_eg :map[string ]fontEntry {},_gbfd :map[string ]textResult {}};
_gb .TrackUse (_ab );return _cg ,nil ;};
// String returns a description of `b`.
func (_eefg *wordBag )String ()string {var _cdbg []string ;for _ ,_egbb :=range _eefg .depthIndexes (){_eac :=_eefg ._cgdg [_egbb ];for _ ,_bdce :=range _eac {_cdbg =append (_cdbg ,_bdce ._ggaef );};};return _ce .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_eefg .PdfRectangle ,_eefg ._ecdf ,len (_cdbg ),_cdbg );
};
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
func (_caf *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _caf ==nil {return nil ,_d .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_ce .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );
};_bfg :=len (_caf ._bca );if _bfg ==0{return _caf ,nil ;};if start < _caf ._bca [0].Offset {start =_caf ._bca [0].Offset ;};if end > _caf ._bca [_bfg -1].Offset +1{end =_caf ._bca [_bfg -1].Offset +1;};_edcc :=_df .Search (_bfg ,func (_dda int )bool {return _caf ._bca [_dda ].Offset +len (_caf ._bca [_dda ].Text )-1>=start });
if !(0<=_edcc &&_edcc < _bfg ){_dcff :=_ce .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_edcc ,_bfg ,_caf ._bca [0],_caf ._bca [_bfg -1]);
return nil ,_dcff ;};_fgbb :=_df .Search (_bfg ,func (_abf int )bool {return _caf ._bca [_abf ].Offset > end -1});if !(0<=_fgbb &&_fgbb < _bfg ){_gee :=_ce .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_fgbb ,_bfg ,_caf ._bca [0],_caf ._bca [_bfg -1]);
return nil ,_gee ;};if _fgbb <=_edcc {return nil ,_ce .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_edcc ,_fgbb );
};return &TextMarkArray {_bca :_caf ._bca [_edcc :_fgbb ]},nil ;};type rectRuling struct{_fbad rulingKind ;_gadb markKind ;_ag .Color ;_bg .PdfRectangle ;};var _bgaf *_gg .Regexp =_gg .MustCompile (_dabgf +"\u007c"+_bgbdd );func (_dffb *textTable )subdivide ()*textTable {_dffb .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");
_gbegc :=_dffb .compositeRowCorridors ();_befa :=_dffb .compositeColCorridors ();if _cgafg {_b .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_gdfa (_gbegc ),_gdfa (_befa ));
};if len (_gbegc )==0||len (_befa )==0{return _dffb ;};_dddaa (_gbegc );_dddaa (_befa );if _cgafg {_b .Log .Info ("\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073",_gdfa (_gbegc ),_gdfa (_befa ));
};_cbdga ,_aegag :=_fgeb (_dffb ._gcbge ,_gbegc );_aedeb ,_aada :=_fgeb (_dffb ._ddfc ,_befa );_afgg :=make (map[uint64 ]*textPara ,_aada *_aegag );_gbac :=&textTable {PdfRectangle :_dffb .PdfRectangle ,_edgac :_dffb ._edgac ,_gcbge :_aegag ,_ddfc :_aada ,_efeac :_afgg };
if _cgafg {_b .Log .Info ("\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a"+"\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076",_dffb ._ddfc ,_dffb ._gcbge ,_aada ,_aegag ,_gdfa (_gbegc ),_gdfa (_befa ),_cbdga ,_aedeb );
};for _gggd :=0;_gggd < _dffb ._gcbge ;_gggd ++{_ggbfe :=_cbdga [_gggd ];for _dgabd :=0;_dgabd < _dffb ._ddfc ;_dgabd ++{_gcgba :=_aedeb [_dgabd ];if _cgafg {_ce .Printf ("\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a",_dgabd ,_gggd ,_gcgba ,_ggbfe );
};_dbgg ,_bbca :=_dffb ._dadcc [_fgcce (_dgabd ,_gggd )];if !_bbca {continue ;};_agdca :=_dbgg .split (_gbegc [_gggd ],_befa [_dgabd ]);for _ddag :=0;_ddag < _agdca ._gcbge ;_ddag ++{for _adbfe :=0;_adbfe < _agdca ._ddfc ;_adbfe ++{_cacfa :=_agdca .get (_adbfe ,_ddag );
_gbac .put (_gcgba +_adbfe ,_ggbfe +_ddag ,_cacfa );if _cgafg {_ce .Printf ("\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_gcgba +_adbfe ,_ggbfe +_ddag ,_cacfa );};};};};};return _gbac ;};func (_degde gridTiling )log (_abadc string ){if !_agd {return ;
};_b .Log .Info ("\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071",len (_degde ._eaafd ),len (_degde ._dade ),_abadc );_ce .Printf ("\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a",_degde ._eaafd );
_ce .Printf ("\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a",_degde ._dade );for _aeag ,_gcee :=range _degde ._dade {_dada ,_eadbb :=_degde ._cbec [_gcee ];if !_eadbb {continue ;};_ce .Printf ("%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_aeag ,_gcee );
for _cgbc ,_edebe :=range _degde ._eaafd {_beba ,_ggffa :=_dada [_edebe ];if !_ggffa {continue ;};_ce .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_cgbc ,_beba .String ());};};};
// New returns an Extractor instance for extracting content from the input PDF page.
func New (page *_bg .PdfPage )(*Extractor ,error ){return NewWithOptions (page ,nil )};
2023-05-29 17:26:33 +00:00
2023-06-30 13:19:48 +00:00
// String returns a human readable description of `vecs`.
2023-07-28 12:14:31 +00:00
func (_afagb rulingList )String ()string {if len (_afagb )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_aacae ,_geef :=_afagb .vertsHorzs ();_beaa :=len (_aacae );_edcd :=len (_geef );if _beaa ==0||_edcd ==0{return _ce .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_beaa ,_edcd );
};_abcb :=_bg .PdfRectangle {Llx :_aacae [0]._befee ,Urx :_aacae [_beaa -1]._befee ,Lly :_geef [_edcd -1]._befee ,Ury :_geef [0]._befee };return _ce .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_beaa ,_edcd ,_abcb );
};func (_dac *compositeCell )updateBBox (){for _ ,_gebb :=range _dac .paraList {_dac .PdfRectangle =_egbga (_dac .PdfRectangle ,_gebb .PdfRectangle );};};func _dceg (_acga []*textLine )map[float64 ][]*textLine {_df .Slice (_acga ,func (_gggea ,_bbgaa int )bool {return _acga [_gggea ]._cbbd < _acga [_bbgaa ]._cbbd });
_febc :=map[float64 ][]*textLine {};for _ ,_ebede :=range _acga {_aacb :=_cbba (_ebede );_aacb =_ef .Round (_aacb );_febc [_aacb ]=append (_febc [_aacb ],_ebede );};return _febc ;};func (_dadc compositeCell )split (_fbaa ,_ceede []float64 )*textTable {_cbdg :=len (_fbaa )+1;
_gaeb :=len (_ceede )+1;if _cgafg {_b .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066",_gaeb ,_cbdg ,_dadc ,_fbaa ,_ceede );
_ce .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a",len (_dadc .paraList ));for _fadc ,_ccag :=range _dadc .paraList {_ce .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fadc ,_ccag .String ());};_ce .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",len (_dadc .lines ()));
for _bfbbba ,_aaaa :=range _dadc .lines (){_ce .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bfbbba ,_aaaa );};};_fbaa =_gfeg (_fbaa ,_dadc .Ury ,_dadc .Lly );_ceede =_gfeg (_ceede ,_dadc .Llx ,_dadc .Urx );_fccd :=make (map[uint64 ]*textPara ,_gaeb *_cbdg );
_bafc :=textTable {_ddfc :_gaeb ,_gcbge :_cbdg ,_efeac :_fccd };_gfg :=_dadc .paraList ;_df .Slice (_gfg ,func (_cdgg ,_agcc int )bool {_ddbf ,_dfcgg :=_gfg [_cdgg ],_gfg [_agcc ];_dcdfb ,_dgfgc :=_ddbf .Lly ,_dfcgg .Lly ;if _dcdfb !=_dgfgc {return _dcdfb < _dgfgc ;
};return _ddbf .Llx < _dfcgg .Llx ;});_acge :=make (map[uint64 ]_bg .PdfRectangle ,_gaeb *_cbdg );for _gggfb ,_gefad :=range _fbaa [1:]{_dcbde :=_fbaa [_gggfb ];for _eaeea ,_gged :=range _ceede [1:]{_gdef :=_ceede [_eaeea ];_acge [_fgcce (_eaeea ,_gggfb )]=_bg .PdfRectangle {Llx :_gdef ,Urx :_gged ,Lly :_gefad ,Ury :_dcbde };
};};if _cgafg {_b .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073");_ce .Printf ("\u0020\u0020\u0020\u0020");for _bggdb :=0;_bggdb < _gaeb ;
_bggdb ++{_ce .Printf ("\u0025\u0033\u0030\u0064\u002c\u0020",_bggdb );};_ce .Println ();for _fdebd :=0;_fdebd < _cbdg ;_fdebd ++{_ce .Printf ("\u0020\u0020\u0025\u0032\u0064\u003a",_fdebd );for _edeba :=0;_edeba < _gaeb ;_edeba ++{_ce .Printf ("\u00256\u002e\u0032\u0066\u002c\u0020",_acge [_fgcce (_edeba ,_fdebd )]);
};_ce .Println ();};};_cbcf :=func (_cedae *textLine )(int ,int ){for _ffbd :=0;_ffbd < _cbdg ;_ffbd ++{for _begc :=0;_begc < _gaeb ;_begc ++{if _gfea (_acge [_fgcce (_begc ,_ffbd )],_cedae .PdfRectangle ){return _begc ,_ffbd ;};};};return -1,-1;};_cgde :=make (map[uint64 ][]*textLine ,_gaeb *_cbdg );
for _ ,_cgdccd :=range _gfg .lines (){_debe ,_dggg :=_cbcf (_cgdccd );if _debe < 0{continue ;};_cgde [_fgcce (_debe ,_dggg )]=append (_cgde [_fgcce (_debe ,_dggg )],_cgdccd );};for _aafb :=0;_aafb < len (_fbaa )-1;_aafb ++{_cggc :=_fbaa [_aafb ];_abbee :=_fbaa [_aafb +1];
for _gbga :=0;_gbga < len (_ceede )-1;_gbga ++{_ffea :=_ceede [_gbga ];_fafd :=_ceede [_gbga +1];_fdfcc :=_bg .PdfRectangle {Llx :_ffea ,Urx :_fafd ,Lly :_abbee ,Ury :_cggc };_gcdd :=_cgde [_fgcce (_gbga ,_aafb )];if len (_gcdd )==0{continue ;};_fabb :=_gdae (_fdfcc ,_gcdd );
_bafc .put (_gbga ,_aafb ,_fabb );};};return &_bafc ;};func (_dgge *wordBag )absorb (_fddg *wordBag ){_cdad :=_fddg .makeRemovals ();for _abce ,_bggd :=range _fddg ._cgdg {for _ ,_cdde :=range _bggd {_dgge .pullWord (_cdde ,_abce ,_cdad );};};_fddg .applyRemovals (_cdad );
};func (_bceb paraList )writeText (_ebce _ga .Writer ){for _eddbb ,_fdfdf :=range _bceb {if _fdfdf ._cfga {continue ;};_fdfdf .writeText (_ebce );if _eddbb !=len (_bceb )-1{if _bbgff (_fdfdf ,_bceb [_eddbb +1]){_ebce .Write ([]byte ("\u0020"));}else {_ebce .Write ([]byte ("\u000a"));
_ebce .Write ([]byte ("\u000a"));};};};_ebce .Write ([]byte ("\u000a"));_ebce .Write ([]byte ("\u000a"));};func (_aca *wordBag )removeWord (_bged *textWord ,_afdf int ){_dgdba :=_aca ._cgdg [_afdf ];_dgdba =_befef (_dgdba ,_bged );if len (_dgdba )==0{delete (_aca ._cgdg ,_afdf );
}else {_aca ._cgdg [_afdf ]=_dgdba ;};};func (_adgbe *wordBag )empty (_faad int )bool {_ ,_cgeb :=_adgbe ._cgdg [_faad ];return !_cgeb };func (_efba *shapesState )cubicTo (_faef ,_cfed ,_caeg ,_efff ,_cdee ,_aaad float64 ){if _bdaae {_b .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a");
};_efba .addPoint (_cdee ,_aaad );};func (_cbfg *shapesState )establishSubpath ()*subpath {_bcbf ,_cbb :=_cbfg .lastpointEstablished ();if !_cbb {_cbfg ._cbfc =append (_cbfg ._cbfc ,_egbg (_bcbf ));};if len (_cbfg ._cbfc )==0{return nil ;};_cbfg ._afge =false ;
return _cbfg ._cbfc [len (_cbfg ._cbfc )-1];};func (_eedc *stateStack )push (_cfdc *textState ){_cce :=*_cfdc ;*_eedc =append (*_eedc ,&_cce )};func _aaeg (_dded ,_egede _fa .Image )_fa .Image {_edfa ,_dbbdg :=_egede .Bounds ().Size (),_dded .Bounds ().Size ();
_aggb ,_afgdg :=_edfa .X ,_edfa .Y ;if _dbbdg .X > _aggb {_aggb =_dbbdg .X ;};if _dbbdg .Y > _afgdg {_afgdg =_dbbdg .Y ;};_cabb :=_fa .Rect (0,0,_aggb ,_afgdg );if _edfa .X !=_aggb ||_edfa .Y !=_afgdg {_eccf :=_fa .NewRGBA (_cabb );_ec .BiLinear .Scale (_eccf ,_cabb ,_dded ,_egede .Bounds (),_ec .Over ,nil );
_egede =_eccf ;};if _dbbdg .X !=_aggb ||_dbbdg .Y !=_afgdg {_facd :=_fa .NewRGBA (_cabb );_ec .BiLinear .Scale (_facd ,_cabb ,_dded ,_dded .Bounds (),_ec .Over ,nil );_dded =_facd ;};_cedg :=_fa .NewRGBA (_cabb );_ec .DrawMask (_cedg ,_cabb ,_dded ,_fa .Point {},_egede ,_fa .Point {},_ec .Over );
return _cedg ;};func _cgae (_gfgc ,_efbaef _gab .Point )bool {_ccgfd :=_ef .Abs (_gfgc .X -_efbaef .X );_fdgd :=_ef .Abs (_gfgc .Y -_efbaef .Y );return _ggdg (_fdgd ,_ccgfd );};
// Append appends `mark` to the mark array.
func (_fdbe *TextMarkArray )Append (mark TextMark ){_fdbe ._bca =append (_fdbe ._bca ,mark )};func _fcfg (_daba *textWord ,_gacdd float64 ,_caff ,_ceea rulingList )*wordBag {_fbed :=_ebfc (_daba ._baebb );_cefg :=[]*textWord {_daba };_faag :=wordBag {_cgdg :map[int ][]*textWord {_fbed :_cefg },PdfRectangle :_daba .PdfRectangle ,_ecdf :_daba ._ebgb ,_dgec :_gacdd ,_debag :_caff ,_gfe :_ceea };
return &_faag ;};func _eeg (_dbce *list )[]*textLine {for _ ,_fcfc :=range _dbce ._cdfc {switch _fcfc ._ebed {case "\u004c\u0042\u006fd\u0079":if len (_fcfc ._ecdee )!=0{return _fcfc ._ecdee ;};return _eeg (_fcfc );case "\u0053\u0070\u0061\u006e":return _fcfc ._ecdee ;
case "I\u006e\u006c\u0069\u006e\u0065\u0053\u0068\u0061\u0070\u0065":return _fcfc ._ecdee ;};};return nil ;};func (_eefgc compositeCell )String ()string {_ebbf :="";if len (_eefgc .paraList )> 0{_ebbf =_dfcggd (_eefgc .paraList .merge ().text (),50);};
return _ce .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_eefgc .PdfRectangle ,len (_eefgc .paraList ),_ebbf );};func (_gccde rulingList )splitSec ()[]rulingList {_df .Slice (_gccde ,func (_cecdd ,_ebgcg int )bool {_daga ,_acgc :=_gccde [_cecdd ],_gccde [_ebgcg ];
if _daga ._agbc !=_acgc ._agbc {return _daga ._agbc < _acgc ._agbc ;};return _daga ._gffgd < _acgc ._gffgd ;});_gaaca :=make (map[*ruling ]struct{},len (_gccde ));_dcedb :=func (_dbcd *ruling )rulingList {_dgbcg :=rulingList {_dbcd };_gaaca [_dbcd ]=struct{}{};
for _ ,_bbbdb :=range _gccde {if _ ,_geba :=_gaaca [_bbbdb ];_geba {continue ;};for _ ,_ccfab :=range _dgbcg {if _bbbdb .alignsSec (_ccfab ){_dgbcg =append (_dgbcg ,_bbbdb );_gaaca [_bbbdb ]=struct{}{};break ;};};};return _dgbcg ;};_bdgc :=[]rulingList {_dcedb (_gccde [0])};
for _ ,_aeeb :=range _gccde [1:]{if _ ,_gdfg :=_gaaca [_aeeb ];_gdfg {continue ;};_bdgc =append (_bdgc ,_dcedb (_aeeb ));};return _bdgc ;};func (_gcde *textObject )moveTextSetLeading (_fdeb ,_gfa float64 ){_gcde ._gacd ._ced =-_gfa ;_gcde .moveLP (_fdeb ,_gfa );
};func (_geaba paraList )applyTables (_edea []*textTable )paraList {var _cbcae paraList ;for _ ,_gedg :=range _edea {_cbcae =append (_cbcae ,_gedg .newTablePara ());};for _ ,_cfcf :=range _geaba {if _cfcf ._abeg {continue ;};_cbcae =append (_cbcae ,_cfcf );
};return _cbcae ;};func _caed (_ggfe _bg .PdfRectangle )*ruling {return &ruling {_eabdg :_acgee ,_befee :_ggfe .Urx ,_agbc :_ggfe .Lly ,_gffgd :_ggfe .Ury };};type list struct{_ecdee []*textLine ;_ebed string ;_cdfc []*list ;_bfcg string ;};type imageExtractContext struct{_dae []ImageMark ;
_egfe int ;_edc int ;_ddc int ;_agf map[*_ea .PdfObjectStream ]*cachedImage ;_fae *ImageExtractOptions ;_gea bool ;};type stateStack []*textState ;
// ToTextMark returns the public view of `tm`.
func (_fcfa *textMark )ToTextMark ()TextMark {return TextMark {Text :_fcfa ._ebgd ,Original :_fcfa ._ffbg ,BBox :_fcfa ._bcfd ,Font :_fcfa ._ecbeg ,FontSize :_fcfa ._gceb ,FillColor :_fcfa ._bdaff ,StrokeColor :_fcfa ._bfdb ,Orientation :_fcfa ._acec ,DirectObject :_fcfa ._dcbd ,ObjString :_fcfa ._babd ,Tw :_fcfa .Tw ,Th :_fcfa .Th ,Tc :_fcfa ._abac ,Index :_fcfa ._fbcc };
2023-06-30 13:19:48 +00:00
};
2023-05-29 17:26:33 +00:00
2023-07-28 12:14:31 +00:00
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
func (_baf *Extractor )ExtractTextWithStats ()(_ace string ,_dec int ,_ffc int ,_afb error ){_egc ,_dec ,_ffc ,_afb :=_baf .ExtractPageText ();if _afb !=nil {return "",_dec ,_ffc ,_afb ;};return _egc .Text (),_dec ,_ffc ,nil ;};func _bgabc (_bcce *list )[]*list {var _befcc []*list ;
for _ ,_gedf :=range _bcce ._cdfc {switch _gedf ._ebed {case "\u004c\u0049":_aeab :=_eeg (_gedf );_cadg :=_bgabc (_gedf );_dbgfd :=_facb (_aeab ,"\u0062\u0075\u006c\u006c\u0065\u0074",_cadg );_bgfc :=_dcdgd (_aeab ,"");_dbgfd ._bfcg =_bgfc ;_befcc =append (_befcc ,_dbgfd );
case "\u004c\u0042\u006fd\u0079":return _bgabc (_gedf );case "\u004c":_gaef :=_bgabc (_gedf );_befcc =append (_befcc ,_gaef ...);return _befcc ;};};return _befcc ;};func _fgdeg (_ggbd ,_dcded _gab .Point )bool {_dcfc :=_ef .Abs (_ggbd .X -_dcded .X );_ggaae :=_ef .Abs (_ggbd .Y -_dcded .Y );
return _ggdg (_dcfc ,_ggaae );};func _aedc (_bcbb *Extractor ,_dgac *_bg .PdfPageResources ,_aace _fb .GraphicsState ,_fgf *textState ,_dcfd *stateStack )*textObject {return &textObject {_dcdg :_bcbb ,_edef :_dgac ,_agbf :_aace ,_bffa :_dcfd ,_gacd :_fgf ,_fda :_gab .IdentityMatrix (),_cfec :_gab .IdentityMatrix ()};
};func _gdc (_gbbbf string ,_acbdfb []rulingList ){_b .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_acbdfb ),_gbbbf );for _dcced ,_eeada :=range _acbdfb {_ce .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dcced ,_eeada .String ());
};};func (_beag rulingList )mergePrimary ()float64 {_aedce :=_beag [0]._befee ;for _ ,_bcaa :=range _beag [1:]{_aedce +=_bcaa ._befee ;};return _aedce /float64 (len (_beag ));};
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
func (_eagc *PageText )ApplyArea (bbox _bg .PdfRectangle ){_cdga :=make ([]*textMark ,0,len (_eagc ._ccf ));for _ ,_gedb :=range _eagc ._ccf {if _geec (_gedb .bbox (),bbox ){_cdga =append (_cdga ,_gedb );};};var _dgdbb paraList ;_bdaaf :=len (_cdga );for _adga :=0;
_adga < 360&&_bdaaf > 0;_adga +=90{_cbe :=make ([]*textMark ,0,len (_cdga )-_bdaaf );for _ ,_bccf :=range _cdga {if _bccf ._acec ==_adga {_cbe =append (_cbe ,_bccf );};};if len (_cbe )> 0{_eddb :=_fgad (_cbe ,_eagc ._ebbd ,nil ,nil ,_eagc ._fca ._bbg );
_dgdbb =append (_dgdbb ,_eddb ...);_bdaaf -=len (_cbe );};};_aeb :=new (_dfe .Buffer );_dgdbb .writeText (_aeb );_eagc ._bdf =_aeb .String ();_eagc ._fccf =_dgdbb .toTextMarks ();_eagc ._ecege =_dgdbb .tables ();};func (_edadd paraList )tables ()[]TextTable {var _cbcgd []TextTable ;
if _cgafg {_b .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a");};for _ ,_bdcb :=range _edadd {_gccgg :=_bdcb ._bgba ;if _gccgg !=nil &&_gccgg .isExportable (){_cbcgd =append (_cbcgd ,_gccgg .toTextTable ());
};};return _cbcgd ;};func (_dbcb *textTable )compositeColCorridors ()map[int ][]float64 {_gdcg :=make (map[int ][]float64 ,_dbcb ._ddfc );if _cgafg {_b .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_dbcb ._ddfc );
};for _fbggb :=0;_fbggb < _dbcb ._ddfc ;_fbggb ++{_gdcg [_fbggb ]=nil ;};return _gdcg ;};func _acbc (_egfeb float64 )bool {return _ef .Abs (_egfeb )< _efea };
2023-05-29 17:26:33 +00:00
2023-06-30 13:19:48 +00:00
// Text returns the extracted page text.
2023-07-28 12:14:31 +00:00
func (_bdaa PageText )Text ()string {return _bdaa ._bdf };func (_gdb *textObject )getCurrentFont ()*_bg .PdfFont {_abbc :=_gdb ._gacd ._dgdf ;if _abbc ==nil {_b .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e");
return _bg .DefaultFont ();};return _abbc ;};func (_deda *textTable )logComposite (_gcfa string ){if !_cgafg {return ;};_b .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_deda ._ddfc ,_deda ._gcbge ,_gcfa );
_ce .Printf ("\u0025\u0035\u0073 \u007c","");for _bdgce :=0;_bdgce < _deda ._ddfc ;_bdgce ++{_ce .Printf ("\u0025\u0033\u0064 \u007c",_bdgce );};_ce .Println ("");_ce .Printf ("\u0025\u0035\u0073 \u002b","");for _cfcbc :=0;_cfcbc < _deda ._ddfc ;_cfcbc ++{_ce .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d");
};_ce .Println ("");for _cafb :=0;_cafb < _deda ._gcbge ;_cafb ++{_ce .Printf ("\u0025\u0035\u0064 \u007c",_cafb );for _dface :=0;_dface < _deda ._ddfc ;_dface ++{_eagga ,_ :=_deda ._dadcc [_fgcce (_dface ,_cafb )].parasBBox ();_ce .Printf ("\u0025\u0033\u0064 \u007c",len (_eagga ));
};_ce .Println ("");};_b .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_deda ._ddfc ,_deda ._gcbge ,_gcfa );_ce .Printf ("\u0025\u0035\u0073 \u007c","");for _deec :=0;_deec < _deda ._ddfc ;_deec ++{_ce .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_deec );
};_ce .Println ("");_ce .Printf ("\u0025\u0035\u0073 \u002b","");for _cbcgb :=0;_cbcgb < _deda ._ddfc ;_cbcgb ++{_ce .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");};_ce .Println ("");for _cdbgf :=0;_cdbgf < _deda ._gcbge ;
_cdbgf ++{_ce .Printf ("\u0025\u0035\u0064 \u007c",_cdbgf );for _bgcbe :=0;_bgcbe < _deda ._ddfc ;_bgcbe ++{_bbbc ,_ :=_deda ._dadcc [_fgcce (_bgcbe ,_cdbgf )].parasBBox ();_gece :="";_gacfd :=_bbbc .merge ();if _gacfd !=nil {_gece =_gacfd .text ();};_gece =_ce .Sprintf ("\u0025\u0071",_dfcggd (_gece ,12));
_gece =_gece [1:len (_gece )-1];_ce .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_gece );};_ce .Println ("");};};func (_bacbf paraList )readBefore (_fdbc []int ,_fffc ,_cddea int )bool {_fgg ,_bbace :=_bacbf [_fffc ],_bacbf [_cddea ];if _fffd (_fgg ,_bbace )&&_fgg .Lly > _bbace .Lly {return true ;
};if !(_fgg ._gfbgd .Urx < _bbace ._gfbgd .Llx ){return false ;};_bfcd ,_dbcgc :=_fgg .Lly ,_bbace .Lly ;if _bfcd > _dbcgc {_dbcgc ,_bfcd =_bfcd ,_dbcgc ;};_debae :=_ef .Max (_fgg ._gfbgd .Llx ,_bbace ._gfbgd .Llx );_cacgf :=_ef .Min (_fgg ._gfbgd .Urx ,_bbace ._gfbgd .Urx );
_cbee :=_bacbf .llyRange (_fdbc ,_bfcd ,_dbcgc );for _ ,_addb :=range _cbee {if _addb ==_fffc ||_addb ==_cddea {continue ;};_dgee :=_bacbf [_addb ];if _dgee ._gfbgd .Llx <=_cacgf &&_debae <=_dgee ._gfbgd .Urx {return false ;};};return true ;};func (_dfaf TextTable )getCellInfo (_gcgf TextMark )[][]int {for _cdf ,_cceb :=range _dfaf .Cells {for _bfec ,_dbgd :=range _cceb {_gagge :=&_dbgd .Marks ;
if _gagge .exists (_gcgf ){return [][]int {{_cdf },{_bfec }};};};};return nil ;};func (_fgd *textObject )setTextLeading (_cgafd float64 ){if _fgd ==nil {return ;};_fgd ._gacd ._ced =_cgafd ;};func (_afeee *textPara )fontsize ()float64 {return _afeee ._gfbb [0]._bfbb };
func _aefc (_cafeb _bg .PdfRectangle )rulingKind {_bbff :=_cafeb .Width ();_ffgf :=_cafeb .Height ();if _bbff > _ffgf {if _bbff >=_gbca {return _cefaa ;};}else {if _ffgf >=_gbca {return _acgee ;};};return _bgbdg ;};func (_gacf *ruling )alignsSec (_edbd *ruling )bool {const _gdbdbg =_bddeb +1.0;
return _gacf ._agbc -_gdbdbg <=_edbd ._gffgd &&_edbd ._agbc -_gdbdbg <=_gacf ._gffgd ;};func (_bgfaa gridTiling )complete ()bool {for _ ,_gdafa :=range _bgfaa ._cbec {for _ ,_cfgbb :=range _gdafa {if !_cfgbb .complete (){return false ;};};};return true ;
};func (_gfd *stateStack )size ()int {return len (*_gfd )};func (_afec pathSection )bbox ()_bg .PdfRectangle {_dcebf :=_afec ._dgfc [0]._fbcgf [0];_caee :=_bg .PdfRectangle {Llx :_dcebf .X ,Urx :_dcebf .X ,Lly :_dcebf .Y ,Ury :_dcebf .Y };_cgcg :=func (_eebd _gab .Point ){if _eebd .X < _caee .Llx {_caee .Llx =_eebd .X ;
}else if _eebd .X > _caee .Urx {_caee .Urx =_eebd .X ;};if _eebd .Y < _caee .Lly {_caee .Lly =_eebd .Y ;}else if _eebd .Y > _caee .Ury {_caee .Ury =_eebd .Y ;};};for _ ,_fdee :=range _afec ._dgfc [0]._fbcgf [1:]{_cgcg (_fdee );};for _ ,_egcc :=range _afec ._dgfc [1:]{for _ ,_eecfeg :=range _egcc ._fbcgf {_cgcg (_eecfeg );
};};return _caee ;};var _gefeb =_gg .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024");func (_daecb lineRuling )xMean ()float64 {return 0.5*(_daecb ._egaf .X +_daecb ._eaebf .X )};
type shapesState struct{_dfgb _gab .Matrix ;_afee _gab .Matrix ;_cbfc []*subpath ;_afge bool ;_bfd _gab .Point ;_eeadb *textObject ;};func _eaca (_eeab ,_ecbef _gab .Point )bool {return _eeab .X ==_ecbef .X &&_eeab .Y ==_ecbef .Y };func (_bbfc *wordBag )blocked (_fada *textWord )bool {if _fada .Urx < _bbfc .Llx {_gad :=_caed (_fada .PdfRectangle );
_acfa :=_afega (_bbfc .PdfRectangle );if _bbfc ._debag .blocks (_gad ,_acfa ){if _geg {_b .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_fada ,_bbfc );};return true ;};}else if _bbfc .Urx < _fada .Llx {_acda :=_caed (_bbfc .PdfRectangle );
_ddac :=_afega (_fada .PdfRectangle );if _bbfc ._debag .blocks (_acda ,_ddac ){if _geg {_b .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s",_fada ,_bbfc );};return true ;};};if _fada .Ury < _bbfc .Lly {_febb :=_aage (_fada .PdfRectangle );
_cccb :=_cfaf (_bbfc .PdfRectangle );if _bbfc ._gfe .blocks (_febb ,_cccb ){if _geg {_b .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_fada ,_bbfc );};return true ;};}else if _bbfc .Ury < _fada .Lly {_fedd :=_aage (_bbfc .PdfRectangle );
_eebb :=_cfaf (_fada .PdfRectangle );if _bbfc ._gfe .blocks (_fedd ,_eebb ){if _geg {_b .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s",_fada ,_bbfc );};return true ;};};return false ;};func _fdcc (_ccd ,_afba bounded )float64 {return _ddcc (_ccd )-_ddcc (_afba )};
func (_dcdcc *ruling )alignsPrimary (_gdbg *ruling )bool {return _dcdcc ._eabdg ==_gdbg ._eabdg &&_ef .Abs (_dcdcc ._befee -_gdbg ._befee )< _bddeb *0.5;};
2023-06-30 13:19:48 +00:00
2023-07-28 12:14:31 +00:00
// String returns a description of `w`.
func (_efdb *textWord )String ()string {return _ce .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_efdb ._baebb ,_efdb .PdfRectangle ,_efdb ._ebgb ,_efdb ._ggaef );
};func (_begb *textLine )endsInHyphen ()bool {_gfb :=_begb ._aafd [len (_begb ._aafd )-1];_fbeg :=_gfb ._ggaef ;_gacdf ,_deea :=_a .DecodeLastRuneInString (_fbeg );if _deea <=0||!_f .Is (_f .Hyphen ,_gacdf ){return false ;};if _gfb ._gagaf &&_ggff (_fbeg ){return true ;
};return _ggff (_begb .text ());};func (_gebc rulingList )snapToGroups ()rulingList {_fbddf ,_afeea :=_gebc .vertsHorzs ();if len (_fbddf )> 0{_fbddf =_fbddf .snapToGroupsDirection ();};if len (_afeea )> 0{_afeea =_afeea .snapToGroupsDirection ();};_afaae :=append (_fbddf ,_afeea ...);
_afaae .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");return _afaae ;};func (_age *shapesState )stroke (_cabe *[]pathSection ){_adgd :=pathSection {_dgfc :_age ._cbfc ,Color :_age ._eeadb .getStrokeColor ()};*_cabe =append (*_cabe ,_adgd );
if _bccgb {_ce .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_cabe ),_age ,_age ._eeadb .getStrokeColor (),_adgd .bbox ());
if _fegd {for _bade ,_baca :=range _age ._cbfc {_ce .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_bade ,_baca );if _bade ==10{break ;};};};};};
2023-06-30 13:19:48 +00:00
2023-07-28 12:14:31 +00:00
// String returns a description of `k`.
func (_fedcg markKind )String ()string {_badb ,_efaba :=_daed [_fedcg ];if !_efaba {return _ce .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_fedcg );};return _badb ;};func (_dabf *textTable )put (_aaagb ,_bcafa int ,_eacg *textPara ){_dabf ._efeac [_fgcce (_aaagb ,_bcafa )]=_eacg ;
};
2023-06-30 13:19:48 +00:00
// String returns a description of `v`.
2023-07-28 12:14:31 +00:00
func (_gabdb *ruling )String ()string {if _gabdb ._eabdg ==_bgbdg {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_gdaf ,_ecga :="\u0078","\u0079";if _gabdb ._eabdg ==_cefaa {_gdaf ,_ecga ="\u0079","\u0078";};_gbea :="";if _gabdb ._fadae !=0.0{_gbea =_ce .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_gabdb ._fadae );
};return _ce .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_gabdb ._eabdg ,_gdaf ,_gabdb ._befee ,_ecga ,_gabdb ._agbc ,_gabdb ._gffgd ,_gabdb ._gffgd -_gabdb ._agbc ,_gabdb ._gggfe ,_gabdb .Color ,_gbea );
};func (_gbfe *shapesState )lastpointEstablished ()(_gab .Point ,bool ){if _gbfe ._afge {return _gbfe ._bfd ,false ;};_gae :=len (_gbfe ._cbfc );if _gae > 0&&_gbfe ._cbfc [_gae -1]._bbdg {return _gbfe ._cbfc [_gae -1].last (),false ;};return _gab .Point {},true ;
};func _cfaf (_dbede _bg .PdfRectangle )*ruling {return &ruling {_eabdg :_cefaa ,_befee :_dbede .Lly ,_agbc :_dbede .Llx ,_gffgd :_dbede .Urx };};func (_begbd *subpath )makeRectRuling (_fgbd _ag .Color )(*ruling ,bool ){if _cfde {_b .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_begbd );
};_faca :=_begbd ._fbcgf [:4];_baeed :=make (map[int ]rulingKind ,len (_faca ));for _gbgf ,_bcccf :=range _faca {_ggca :=_begbd ._fbcgf [(_gbgf +1)%4];_baeed [_gbgf ]=_fecc (_bcccf ,_ggca );if _cfde {_ce .Printf ("\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066",_gbgf ,_baeed [_gbgf ],_bcccf ,_ggca );
};};if _cfde {_ce .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_baeed );};var _dgbgb ,_dgde []int ;for _ccead ,_dcdd :=range _baeed {switch _dcdd {case _cefaa :_dgde =append (_dgde ,_ccead );case _acgee :_dgbgb =append (_dgbgb ,_ccead );
};};if _cfde {_ce .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_dgde ),_dgde );_ce .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_dgbgb ),_dgbgb );
};_adffb :=(len (_dgde )==2&&len (_dgbgb )==2)||(len (_dgde )==2&&len (_dgbgb )==0&&_cgae (_faca [_dgde [0]],_faca [_dgde [1]]))||(len (_dgbgb )==2&&len (_dgde )==0&&_fgdeg (_faca [_dgbgb [0]],_faca [_dgbgb [1]]));if _cfde {_ce .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_dgde ),len (_dgbgb ),_adffb );
};if !_adffb {if _cfde {_b .Log .Error ("\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v",_begbd );_ce .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_dgde ),len (_dgbgb ),_adffb );
};return &ruling {},false ;};if len (_dgbgb )==0{for _aeca ,_ecfcg :=range _baeed {if _ecfcg !=_cefaa {_dgbgb =append (_dgbgb ,_aeca );};};};if len (_dgde )==0{for _cccf ,_cddc :=range _baeed {if _cddc !=_acgee {_dgde =append (_dgde ,_cccf );};};};if _cfde {_b .Log .Info ("\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_dgde ),len (_dgbgb ),len (_faca ),_dgde ,_dgbgb ,_faca );
};var _fege ,_afdc ,_abcf ,_eafc _gab .Point ;if _faca [_dgde [0]].Y > _faca [_dgde [1]].Y {_abcf ,_eafc =_faca [_dgde [0]],_faca [_dgde [1]];}else {_abcf ,_eafc =_faca [_dgde [1]],_faca [_dgde [0]];};if _faca [_dgbgb [0]].X > _faca [_dgbgb [1]].X {_fege ,_afdc =_faca [_dgbgb [0]],_faca [_dgbgb [1]];
}else {_fege ,_afdc =_faca [_dgbgb [1]],_faca [_dgbgb [0]];};_abeag :=_bg .PdfRectangle {Llx :_fege .X ,Urx :_afdc .X ,Lly :_eafc .Y ,Ury :_abcf .Y };if _abeag .Llx > _abeag .Urx {_abeag .Llx ,_abeag .Urx =_abeag .Urx ,_abeag .Llx ;};if _abeag .Lly > _abeag .Ury {_abeag .Lly ,_abeag .Ury =_abeag .Ury ,_abeag .Lly ;
};_fgcf :=rectRuling {PdfRectangle :_abeag ,_fbad :_aefc (_abeag ),Color :_fgbd };if _fgcf ._fbad ==_bgbdg {if _cfde {_b .Log .Error ("\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c");
};return nil ,false ;};_gbbbb ,_gfgg :=_fgcf .asRuling ();if !_gfgg {if _cfde {_b .Log .Error ("\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg");};return nil ,false ;};if _bccgb {_ce .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_gbbbb .String ());
};return _gbbbb ,true ;};func (_ecaad gridTile )complete ()bool {return _ecaad .numBorders ()==4};type textMark struct{_bg .PdfRectangle ;_acec int ;_ebgd string ;_ffbg string ;_ecbeg *_bg .PdfFont ;_gceb float64 ;_abac float64 ;_acddd _gab .Matrix ;_efgg _gab .Point ;
_bcfd _bg .PdfRectangle ;_bdaff _ag .Color ;_bfdb _ag .Color ;_dcbd _ea .PdfObject ;_babd []string ;Tw float64 ;Th float64 ;_adbb int ;_fbcc int ;};func _aea (_ecde ,_bgbef bounded )float64 {return _ecde .bbox ().Llx -_bgbef .bbox ().Llx };func (_aecd *textPara )toTextMarks (_gecg *int )[]TextMark {if _aecd ._bgba ==nil {return _aecd .toCellTextMarks (_gecg );
};var _badea []TextMark ;for _ccae :=0;_ccae < _aecd ._bgba ._gcbge ;_ccae ++{for _bgff :=0;_bgff < _aecd ._bgba ._ddfc ;_bgff ++{_ceeg :=_aecd ._bgba .get (_bgff ,_ccae );if _ceeg ==nil {_badea =_gdbc (_badea ,_gecg ,"\u0009");}else {_cedd :=_ceeg .toCellTextMarks (_gecg );
_badea =append (_badea ,_cedd ...);};_badea =_gdbc (_badea ,_gecg ,"\u0020");};if _ccae < _aecd ._bgba ._gcbge -1{_badea =_gdbc (_badea ,_gecg ,"\u000a");};};_fbfdg :=_aecd ._bgba ;if _fbfdg .isExportable (){_agga :=_fbfdg .toTextTable ();_badea =_cadc (_badea ,&_agga );
};return _badea ;};
2023-06-30 13:19:48 +00:00
2023-07-28 12:14:31 +00:00
// String returns a human readable description of `s`.
func (_acdda intSet )String ()string {var _defa []int ;for _bgabb :=range _acdda {if _acdda .has (_bgabb ){_defa =append (_defa ,_bgabb );};};_df .Ints (_defa );return _ce .Sprintf ("\u0025\u002b\u0076",_defa );};type intSet map[int ]struct{};func (_fggg rulingList )bbox ()_bg .PdfRectangle {var _aafba _bg .PdfRectangle ;
if len (_fggg )==0{_b .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073");return _bg .PdfRectangle {};};if _fggg [0]._eabdg ==_cefaa {_aafba .Llx ,_aafba .Urx =_fggg .secMinMax ();
_aafba .Lly ,_aafba .Ury =_fggg .primMinMax ();}else {_aafba .Llx ,_aafba .Urx =_fggg .primMinMax ();_aafba .Lly ,_aafba .Ury =_fggg .secMinMax ();};return _aafba ;};func (_fdff *textObject )getFontDirect (_gcaa string )(*_bg .PdfFont ,error ){_ccec ,_cgad :=_fdff .getFontDict (_gcaa );
if _cgad !=nil {return nil ,_cgad ;};_dca ,_cgad :=_bg .NewPdfFontFromPdfObject (_ccec );if _cgad !=nil {_b .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gcaa ,_cgad );
};return _dca ,_cgad ;};
2023-06-30 13:19:48 +00:00
2023-07-28 12:14:31 +00:00
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
type RenderMode int ;func _bbbdbd (_ggfde ,_becb int )int {if _ggfde > _becb {return _ggfde ;};return _becb ;};func (_baff rulingList )merge ()*ruling {_ccgc :=_baff [0]._befee ;_eabdga :=_baff [0]._agbc ;_ddcde :=_baff [0]._gffgd ;for _ ,_bagd :=range _baff [1:]{_ccgc +=_bagd ._befee ;
if _bagd ._agbc < _eabdga {_eabdga =_bagd ._agbc ;};if _bagd ._gffgd > _ddcde {_ddcde =_bagd ._gffgd ;};};_dcacb :=&ruling {_eabdg :_baff [0]._eabdg ,_gggfe :_baff [0]._gggfe ,Color :_baff [0].Color ,_befee :_ccgc /float64 (len (_baff )),_agbc :_eabdga ,_gffgd :_ddcde };
if _dbdb {_b .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_baff ),_dcacb );for _abba ,_aadf :=range _baff {_ce .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_abba ,_aadf );};};
return _dcacb ;};func (_cdbdf *textWord )appendMark (_edfe *textMark ,_bdcfbg _bg .PdfRectangle ){_cdbdf ._dggf =append (_cdbdf ._dggf ,_edfe );_cdbdf .PdfRectangle =_egbga (_cdbdf .PdfRectangle ,_edfe .PdfRectangle );if _edfe ._gceb > _cdbdf ._ebgb {_cdbdf ._ebgb =_edfe ._gceb ;
};_cdbdf ._baebb =_bdcfbg .Ury -_cdbdf .PdfRectangle .Lly ;};func (_cgac paraList )findGridTables (_gdaae []gridTiling )[]*textTable {if _cgafg {_b .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_cgac ));
for _bebcf ,_ffcf :=range _cgac {_ce .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bebcf ,_ffcf );};};var _gacfc []*textTable ;for _acfd ,_bedd :=range _gdaae {_bdcfbc ,_dbga :=_cgac .findTableGrid (_bedd );if _bdcfbc !=nil {_bdcfbc .log (_ce .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_acfd ));
_gacfc =append (_gacfc ,_bdcfbc );_bdcfbc .markCells ();};for _cecad :=range _dbga {_cecad ._abeg =true ;};};if _cgafg {_b .Log .Info ("\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s",len (_gacfc ));
};return _gacfc ;};func (_egag *wordBag )allWords ()[]*textWord {var _bced []*textWord ;for _ ,_dbaa :=range _egag ._cgdg {_bced =append (_bced ,_dbaa ...);};return _bced ;};func (_fed *stateStack )pop ()*textState {if _fed .empty (){return nil ;};_dcd :=*(*_fed )[len (*_fed )-1];
*_fed =(*_fed )[:len (*_fed )-1];return &_dcd ;};func (_gbfc paraList )sortReadingOrder (){_b .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_gbfc ));
if len (_gbfc )<=1{return ;};_gbfc .computeEBBoxes ();_df .Slice (_gbfc ,func (_bbee ,_addc int )bool {return _dag (_gbfc [_bbee ],_gbfc [_addc ])<=0});};func (_badeca *textTable )emptyCompositeRow (_aeabg int )bool {for _eedg :=0;_eedg < _badeca ._ddfc ;
_eedg ++{if _fedce ,_addfc :=_badeca ._dadcc [_fgcce (_eedg ,_aeabg )];_addfc {if len (_fedce .paraList )> 0{return false ;};};};return true ;};
// Text returns the text content of the `bulletLists`.
func (_afgb *lists )Text ()string {_dbgf :=&_c .Builder {};for _ ,_gbbg :=range *_afgb {_ecac :=_gbbg .Text ();_dbgf .WriteString (_ecac );};return _dbgf .String ();};func (_gbg *subpath )clear (){*_gbg =subpath {}};func (_cgec *shapesState )quadraticTo (_bagc ,_eaebd ,_dgg ,_ddef float64 ){if _bdaae {_b .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a");
};_cgec .addPoint (_dgg ,_ddef );};func (_efgad intSet )has (_dfdd int )bool {_ ,_fefed :=_efgad [_dfdd ];return _fefed };func _gbeaf (_aabcc []*textMark ,_aagde _bg .PdfRectangle )[]*textWord {var _cbcd []*textWord ;var _dafa *textWord ;if _efe {_b .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_aabcc ));
};_eccc :=func (){if _dafa !=nil {_aebag :=_dafa .computeText ();if !_dfcc (_aebag ){_dafa ._ggaef =_aebag ;_cbcd =append (_cbcd ,_dafa );if _efe {_b .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_cbcd )-1,_dafa .String ());
for _dacc ,_cccca :=range _dafa ._dggf {_ce .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dacc ,_cccca .String ());};};};_dafa =nil ;};};for _ ,_fecg :=range _aabcc {if _ccdf &&_dafa !=nil &&len (_dafa ._dggf )> 0{_fdeg :=_dafa ._dggf [len (_dafa ._dggf )-1];
_dcbac ,_fccg :=_egeag (_fecg ._ebgd );_gdffe ,_gggff :=_egeag (_fdeg ._ebgd );if _fccg &&!_gggff &&_fdeg .inDiacriticArea (_fecg ){_dafa .addDiacritic (_dcbac );continue ;};if _gggff &&!_fccg &&_fecg .inDiacriticArea (_fdeg ){_dafa ._dggf =_dafa ._dggf [:len (_dafa ._dggf )-1];
_dafa .appendMark (_fecg ,_aagde );_dafa .addDiacritic (_gdffe );continue ;};};_ecgg :=_dfcc (_fecg ._ebgd );if _ecgg {_eccc ();continue ;};if _dafa ==nil &&!_ecgg {_dafa =_fbabg ([]*textMark {_fecg },_aagde );continue ;};_eggba :=_dafa ._ebgb ;_dabb :=_ef .Abs (_eba (_aagde ,_fecg )-_dafa ._baebb )/_eggba ;
_beaf :=_efbc (_fecg ,_dafa )/_eggba ;if _beaf >=_ffef ||!(-_ccfg <=_beaf &&_dabb <=_ccfb ){_eccc ();_dafa =_fbabg ([]*textMark {_fecg },_aagde );continue ;};_dafa .appendMark (_fecg ,_aagde );};_eccc ();return _cbcd ;};func (_fbafe *textTable )getRight ()paraList {_fddcf :=make (paraList ,_fbafe ._gcbge );
for _aacga :=0;_aacga < _fbafe ._gcbge ;_aacga ++{_fbgde :=_fbafe .get (_fbafe ._ddfc -1,_aacga )._eabac ;if _fbgde .taken (){return nil ;};_fddcf [_aacga ]=_fbgde ;};for _efaf :=0;_efaf < _fbafe ._gcbge -1;_efaf ++{if _fddcf [_efaf ]._fgdg !=_fddcf [_efaf +1]{return nil ;
};};return _fddcf ;};func _efbc (_gcgfb ,_bebf bounded )float64 {return _gcgfb .bbox ().Llx -_bebf .bbox ().Urx };
// Marks returns the TextMark collection for a page. It represents all the text on the page.
func (_baee PageText )Marks ()*TextMarkArray {return &TextMarkArray {_bca :_baee ._fccf }};func (_gbcg paraList )toTextMarks ()[]TextMark {_cabce :=0;var _dddg []TextMark ;for _fedb ,_dcgf :=range _gbcg {if _dcgf ._cfga {continue ;};_abdf :=_dcgf .toTextMarks (&_cabce );
_dddg =append (_dddg ,_abdf ...);if _fedb !=len (_gbcg )-1{if _bbgff (_dcgf ,_gbcg [_fedb +1]){_dddg =_gdbc (_dddg ,&_cabce ,"\u0020");}else {_dddg =_gdbc (_dddg ,&_cabce ,"\u000a");_dddg =_gdbc (_dddg ,&_cabce ,"\u000a");};};};_dddg =_gdbc (_dddg ,&_cabce ,"\u000a");
_dddg =_gdbc (_dddg ,&_cabce ,"\u000a");return _dddg ;};func (_cdeg rulingList )toGrids ()[]rulingList {if _bccgb {_b .Log .Info ("t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073",_cdeg );};_fcdc :=_cdeg .intersections ();if _bccgb {_b .Log .Info ("\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020",len (_cdeg ),len (_fcdc ));
for _ ,_dggc :=range _dbcdg (_fcdc ){_ce .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_dggc ,_fcdc [_dggc ]);};};_bdcea :=make (map[int ]intSet ,len (_cdeg ));for _cedc :=range _cdeg {_fbebc :=_cdeg .connections (_fcdc ,_cedc );if len (_fbebc )> 0{_bdcea [_cedc ]=_fbebc ;
};};if _bccgb {_b .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064",len (_bdcea ));for _ ,_bbgaff :=range _dbcdg (_bdcea ){_ce .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_bbgaff ,_bdcea [_bbgaff ]);
};};_ffggf :=_dadbd (len (_cdeg ),func (_aaab ,_dbaf int )bool {_fbccd ,_ffefd :=len (_bdcea [_aaab ]),len (_bdcea [_dbaf ]);if _fbccd !=_ffefd {return _fbccd > _ffefd ;};return _cdeg .comp (_aaab ,_dbaf );});if _bccgb {_b .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_ffggf );
};_gcff :=[][]int {{_ffggf [0]}};_eaac :for _ ,_bffbb :=range _ffggf [1:]{for _afced ,_efegg :=range _gcff {for _ ,_agegg :=range _efegg {if _bdcea [_agegg ].has (_bffbb ){_gcff [_afced ]=append (_efegg ,_bffbb );continue _eaac ;};};};_gcff =append (_gcff ,[]int {_bffbb });
};if _bccgb {_b .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_gcff );};_df .SliceStable (_gcff ,func (_gbaaa ,_gccc int )bool {return len (_gcff [_gbaaa ])> len (_gcff [_gccc ])});for _ ,_afag :=range _gcff {_df .Slice (_afag ,func (_beeg ,_gaab int )bool {return _cdeg .comp (_afag [_beeg ],_afag [_gaab ])});
};_befg :=make ([]rulingList ,len (_gcff ));for _dgfaa ,_afbf :=range _gcff {_ddbaa :=make (rulingList ,len (_afbf ));for _edgf ,_afeb :=range _afbf {_ddbaa [_edgf ]=_cdeg [_afeb ];};_befg [_dgfaa ]=_ddbaa ;};if _bccgb {_b .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076",_befg );
};var _dbgb []rulingList ;for _ ,_egef :=range _befg {if _ffaf ,_afdee :=_egef .isActualGrid ();_afdee {_egef =_ffaf ;_egef =_egef .snapToGroups ();_dbgb =append (_dbgb ,_egef );};};if _bccgb {_gdc ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073",_dbgb );
_b .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_befg ),len (_dbgb ));};return _dbgb ;};func (_bcbc *ruling )gridIntersecting (_gedbc *ruling )bool {return _ffcaf (_bcbc ._agbc ,_gedbc ._agbc )&&_ffcaf (_bcbc ._gffgd ,_gedbc ._gffgd );
};func (_adfe *stateStack )top ()*textState {if _adfe .empty (){return nil ;};return (*_adfe )[_adfe .size ()-1];};func (_afce *textObject )setCharSpacing (_aega float64 ){if _afce ==nil {return ;};_afce ._gacd ._fdf =_aega ;if _beff {_b .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_aega ,_afce ._gacd .String ());
};};func (_gbgeb *textTable )markCells (){for _bddb :=0;_bddb < _gbgeb ._gcbge ;_bddb ++{for _abace :=0;_abace < _gbgeb ._ddfc ;_abace ++{_abdfe :=_gbgeb .get (_abace ,_bddb );if _abdfe !=nil {_abdfe ._abeg =true ;};};};};func (_ecf *imageExtractContext )extractInlineImage (_abc *_fb .ContentStreamInlineImage ,_fabe _fb .GraphicsState ,_gbd *_bg .PdfPageResources )error {_fce ,_faf :=_abc .ToImage (_gbd );
if _faf !=nil {return _faf ;};_gbc ,_faf :=_abc .GetColorSpace (_gbd );if _faf !=nil {return _faf ;};if _gbc ==nil {_gbc =_bg .NewPdfColorspaceDeviceGray ();};_aaf ,_faf :=_gbc .ImageToRGB (*_fce );if _faf !=nil {return _faf ;};_cfg :=ImageMark {Image :&_aaf ,Width :_fabe .CTM .ScalingFactorX (),Height :_fabe .CTM .ScalingFactorY (),Angle :_fabe .CTM .Angle ()};
_cfg .X ,_cfg .Y =_fabe .CTM .Translation ();_ecf ._dae =append (_ecf ._dae ,_cfg );_ecf ._egfe ++;return nil ;};func _fcd (_bfed ,_fbbbc bounded )float64 {_gdg :=_aea (_bfed ,_fbbbc );if !_acbc (_gdg ){return _gdg ;};return _fdcc (_bfed ,_fbbbc );};type rulingList []*ruling ;
func (_dgaf *textObject )moveLP (_ebg ,_gag float64 ){_dgaf ._cfec .Concat (_gab .NewMatrix (1,0,0,1,_ebg ,_gag ));_dgaf ._fda =_dgaf ._cfec ;};func _bbgff (_dabe ,_ecfc *textPara )bool {if _dabe ._cfga ||_ecfc ._cfga {return true ;};return _acbc (_dabe .depth ()-_ecfc .depth ());
};func _facb (_fgaa []*textLine ,_gcag string ,_afef []*list )*list {return &list {_ecdee :_fgaa ,_ebed :_gcag ,_cdfc :_afef };};func _bdee (_gfbc map[float64 ]map[float64 ]gridTile )[]float64 {_gbbfa :=make ([]float64 ,0,len (_gfbc ));for _gggb :=range _gfbc {_gbbfa =append (_gbbfa ,_gggb );
};_df .Float64s (_gbbfa );_faaf :=len (_gbbfa );for _afgbe :=0;_afgbe < _faaf /2;_afgbe ++{_gbbfa [_afgbe ],_gbbfa [_faaf -1-_afgbe ]=_gbbfa [_faaf -1-_afgbe ],_gbbfa [_afgbe ];};return _gbbfa ;};
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct{_bca []TextMark };func _egeag (_baedb string )(string ,bool ){_gbdbc :=[]rune (_baedb );if len (_gbdbc )!=1{return "",false ;};_ffdf ,_acfg :=_fegfa [_gbdbc [0]];return _ffdf ,_acfg ;};
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
func (_feeb PageText )ToText ()string {return _feeb .Text ()};func (_cgbg compositeCell )hasLines (_ggac []*textLine )bool {for _efae ,_gaddg :=range _ggac {_gfee :=_geec (_cgbg .PdfRectangle ,_gaddg .PdfRectangle );if _cgafg {_ce .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a",_gfee ,_efae ,len (_ggac ));
_ce .Printf ("\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a",_cgbg );_ce .Printf ("\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a",_gaddg );};if _gfee {return true ;
};};return false ;};type compositeCell struct{_bg .PdfRectangle ;paraList ;};func (_gbec paraList )reorder (_ddae []int ){_bdaafg :=make (paraList ,len (_gbec ));for _ggddc ,_edbc :=range _ddae {_bdaafg [_ggddc ]=_gbec [_edbc ];};copy (_gbec ,_bdaafg );
};func (_fabf lineRuling )yMean ()float64 {return 0.5*(_fabf ._egaf .Y +_fabf ._eaebf .Y )};type textLine struct{_bg .PdfRectangle ;_cbbd float64 ;_aafd []*textWord ;_bfbb float64 ;};func _dddc (_gagc []*textWord ,_fbdc int )[]*textWord {_cfgfe :=len (_gagc );
copy (_gagc [_fbdc :],_gagc [_fbdc +1:]);return _gagc [:_cfgfe -1];};func (_ceaf rulingList )connections (_cfddb map[int ]intSet ,_agbb int )intSet {_adgbee :=make (intSet );_baec :=make (intSet );var _ccbf func (int );_ccbf =func (_fbaf int ){if !_baec .has (_fbaf ){_baec .add (_fbaf );
for _ffbc :=range _ceaf {if _cfddb [_ffbc ].has (_fbaf ){_adgbee .add (_ffbc );};};for _ggcg :=range _ceaf {if _adgbee .has (_ggcg ){_ccbf (_ggcg );};};};};_ccbf (_agbb );return _adgbee ;};type event struct{_faccf float64 ;_ecacd bool ;_gbfff int ;};func (_ac *imageExtractContext )processOperand (_cgf *_fb .ContentStreamOperation ,_fbe _fb .GraphicsState ,_gcc *_bg .PdfPageResources )error {if _cgf .Operand =="\u0042\u0049"&&len (_cgf .Params )==1{_adc ,_afc :=_cgf .Params [0].(*_fb .ContentStreamInlineImage );
if !_afc {return nil ;};if _fe ,_dee :=_ea .GetBoolVal (_adc .ImageMask );_dee {if _fe &&!_ac ._fae .IncludeInlineStencilMasks {return nil ;};};return _ac .extractInlineImage (_adc ,_fbe ,_gcc );}else if _cgf .Operand =="\u0044\u006f"&&len (_cgf .Params )==1{_bgd ,_cba :=_ea .GetName (_cgf .Params [0]);
if !_cba {_b .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _gbf ;};_ ,_ebb :=_gcc .GetXObjectByName (*_bgd );switch _ebb {case _bg .XObjectTypeImage :return _ac .extractXObjectImage (_bgd ,_fbe ,_gcc );case _bg .XObjectTypeForm :return _ac .extractFormImages (_bgd ,_fbe ,_gcc );
};}else if _ac ._gea &&(_cgf .Operand =="\u0073\u0063\u006e"||_cgf .Operand =="\u0053\u0043\u004e")&&len (_cgf .Params )==1{_dfec ,_adcd :=_ea .GetName (_cgf .Params [0]);if !_adcd {_b .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");
return _gbf ;};_bcb ,_adcd :=_gcc .GetPatternByName (*_dfec );if !_adcd {_b .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0050\u0061\u0074\u0074\u0065\u0072n\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075\u006e\u0064");return nil ;};if _bcb .IsTiling (){_eda :=_bcb .GetAsTilingPattern ();
_efg ,_bed :=_eda .GetContentStream ();if _bed !=nil {return _bed ;};_bed =_ac .extractContentStreamImages (string (_efg ),_eda .Resources );if _bed !=nil {return _bed ;};};}else if (_cgf .Operand =="\u0063\u0073"||_cgf .Operand =="\u0043\u0053")&&len (_cgf .Params )>=1{_ac ._gea =_cgf .Params [0].String ()=="\u0050a\u0074\u0074\u0065\u0072\u006e";
};return nil ;};func (_bdeae rulingList )asTiling ()gridTiling {if _agd {_b .Log .Info ("r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_bdeae ));
};for _gdce ,_dagc :=range _bdeae [1:]{_gfbd :=_bdeae [_gdce ];if _gfbd .alignsPrimary (_dagc )&&_gfbd .alignsSec (_dagc ){_b .Log .Error ("a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073",_dagc ,_gfbd );
};};_bdeae .sortStrict ();_bdeae .log ("\u0073n\u0061\u0070\u0070\u0065\u0064");_gdeff ,_cfegd :=_bdeae .vertsHorzs ();_ggfef :=_gdeff .primaries ();_efbe :=_cfegd .primaries ();_ddbag :=len (_ggfef )-1;_eacc :=len (_efbe )-1;if _ddbag ==0||_eacc ==0{return gridTiling {};
};_bgag :=_bg .PdfRectangle {Llx :_ggfef [0],Urx :_ggfef [_ddbag ],Lly :_efbe [0],Ury :_efbe [_eacc ]};if _agd {_b .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064",len (_gdeff ));
for _fgcab ,_gfcd :=range _gdeff {_ce .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fgcab ,_gfcd );};_b .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064",len (_cfegd ));
for _bdacf ,_dbedeg :=range _cfegd {_ce .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bdacf ,_dbedeg );};_b .Log .Info ("\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f",_ddbag ,_eacc ,_ggfef ,_efbe );
};_gecgg :=make ([]gridTile ,_ddbag *_eacc );for _cccc :=_eacc -1;_cccc >=0;_cccc --{_dcfbd :=_efbe [_cccc ];_gbab :=_efbe [_cccc +1];for _gcdga :=0;_gcdga < _ddbag ;_gcdga ++{_fagc :=_ggfef [_gcdga ];_dfgca :=_ggfef [_gcdga +1];_gefbf :=_gdeff .findPrimSec (_fagc ,_dcfbd );
_gfge :=_gdeff .findPrimSec (_dfgca ,_dcfbd );_ccef :=_cfegd .findPrimSec (_dcfbd ,_fagc );_afgc :=_cfegd .findPrimSec (_gbab ,_fagc );_fdfce :=_bg .PdfRectangle {Llx :_fagc ,Urx :_dfgca ,Lly :_dcfbd ,Ury :_gbab };_aeaf :=_aeef (_fdfce ,_gefbf ,_gfge ,_ccef ,_afgc );
_gecgg [_cccc *_ddbag +_gcdga ]=_aeaf ;if _agd {_ce .Printf ("\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_gcdga ,_cccc ,_aeaf .String (),_aeaf .Width (),_aeaf .Height ());
};};};if _agd {_b .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_bgag );
};_bebff :=make ([]map[float64 ]gridTile ,_eacc );for _fgfcd :=_eacc -1;_fgfcd >=0;_fgfcd --{if _agd {_ce .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_fgfcd );};_bebff [_fgfcd ]=make (map[float64 ]gridTile ,_ddbag );for _aacec :=0;_aacec < _ddbag ;
_aacec ++{_cegd :=_gecgg [_fgfcd *_ddbag +_aacec ];if _agd {_ce .Printf ("\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_aacec ,_cegd );};if !_cegd ._afdge {continue ;};_acgef :=_aacec ;for _fgaga :=_aacec +1;!_cegd ._bfecb &&_fgaga < _ddbag ;
_fgaga ++{_ddbgf :=_gecgg [_fgfcd *_ddbag +_fgaga ];_cegd .Urx =_ddbgf .Urx ;_cegd ._fdbd =_cegd ._fdbd ||_ddbgf ._fdbd ;_cegd ._eaed =_cegd ._eaed ||_ddbgf ._eaed ;_cegd ._bfecb =_ddbgf ._bfecb ;if _agd {_ce .Printf ("\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a",_fgaga ,_ddbgf ,_cegd );
};_acgef =_fgaga ;};if _agd {_ce .Printf (" \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n",_aacec ,_acgef ,_cegd );};_aacec =_acgef ;_bebff [_fgfcd ][_cegd .Llx ]=_cegd ;};};_ddaf :=make (map[float64 ]map[float64 ]gridTile ,_eacc );
_fagga :=make (map[float64 ]map[float64 ]struct{},_eacc );for _bcdb :=_eacc -1;_bcdb >=0;_bcdb --{_aeacd :=_gecgg [_bcdb *_ddbag ].Lly ;_ddaf [_aeacd ]=make (map[float64 ]gridTile ,_ddbag );_fagga [_aeacd ]=make (map[float64 ]struct{},_ddbag );};if _agd {_b .Log .Info ("\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_bgag );
};for _aaga :=_eacc -1;_aaga >=0;_aaga --{_afdga :=_gecgg [_aaga *_ddbag ].Lly ;_dfbe :=_bebff [_aaga ];if _agd {_ce .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_aaga );};for _ ,_ddgc :=range _ceac (_dfbe ){if _ ,_bgace :=_fagga [_afdga ][_ddgc ];
_bgace {continue ;};_bffaf :=_dfbe [_ddgc ];if _agd {_ce .Printf (" \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_bffaf .String ());};for _eeaa :=_aaga -1;_eeaa >=0;_eeaa --{if _bffaf ._eaed {break ;};_caecc :=_bebff [_eeaa ];_dccf ,_acee :=_caecc [_ddgc ];
if !_acee {break ;};if _dccf .Urx !=_bffaf .Urx {break ;};_bffaf ._eaed =_dccf ._eaed ;_bffaf .Lly =_dccf .Lly ;if _agd {_ce .Printf ("\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_dccf .String (),_bffaf .String ());
};_fagga [_dccf .Lly ][_dccf .Llx ]=struct{}{};};if _aaga ==0{_bffaf ._eaed =true ;};if _bffaf .complete (){_ddaf [_afdga ][_ddgc ]=_bffaf ;};};};_cceeb :=gridTiling {PdfRectangle :_bgag ,_eaafd :_effad (_ddaf ),_dade :_bdee (_ddaf ),_cbec :_ddaf };_cceeb .log ("\u0043r\u0065\u0061\u0074\u0065\u0064");
return _cceeb ;};func (_ceada paraList )findTextTables ()[]*textTable {var _gcgaea []*textTable ;for _ ,_gcfea :=range _ceada {if _gcfea .taken ()||_gcfea .Width ()==0{continue ;};_abddf :=_gcfea .isAtom ();if _abddf ==nil {continue ;};_abddf .growTable ();
if _abddf ._ddfc *_abddf ._gcbge < _abda {continue ;};_abddf .markCells ();_abddf .log ("\u0067\u0072\u006fw\u006e");_gcgaea =append (_gcgaea ,_abddf );};return _gcgaea ;};func (_geab *shapesState )fill (_dbfg *[]pathSection ){_bcdc :=pathSection {_dgfc :_geab ._cbfc ,Color :_geab ._eeadb .getFillColor ()};
*_dbfg =append (*_dbfg ,_bcdc );if _bccgb {_ceda :=_bcdc .bbox ();_ce .Printf ("\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a",len (*_dbfg ),len (_bcdc ._dgfc ),_geab ,_bcdc .Color ,_ceda ,_ceda .Width (),_ceda .Height ());
if _fegd {for _ddcb ,_afbg :=range _bcdc ._dgfc {_ce .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_ddcb ,_afbg );if _ddcb ==10{break ;};};};};};type pathSection struct{_dgfc []*subpath ;_ag .Color ;};