unipdf/extractor/extractor.go

795 lines
181 KiB
Go
Raw Normal View History

2020-08-27 21:45:09 +00:00
//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
2020-08-27 21:45:09 +00:00
//
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
//
2022-02-05 21:34:53 +00:00
package extractor ;import (_gfa "bytes";_cc "errors";_fg "fmt";_cb "github.com/unidoc/unipdf/v3/common";_aa "github.com/unidoc/unipdf/v3/contentstream";_dd "github.com/unidoc/unipdf/v3/core";_e "github.com/unidoc/unipdf/v3/internal/license";_fda "github.com/unidoc/unipdf/v3/internal/textencoding";
_ce "github.com/unidoc/unipdf/v3/internal/transform";_ge "github.com/unidoc/unipdf/v3/model";_be "golang.org/x/text/unicode/norm";_gbd "golang.org/x/xerrors";_fd "image/color";_b "io";_gb "math";_a "regexp";_gf "sort";_g "strings";_f "unicode";_d "unicode/utf8";
);func (_ffbd paraList )writeText (_bbaee _b .Writer ){for _eaeb ,_bcfcc :=range _ffbd {if _bcfcc ._caaad {continue ;};_bcfcc .writeText (_bbaee );if _eaeb !=len (_ffbd )-1{if _ebde (_bcfcc ,_ffbd [_eaeb +1]){_bbaee .Write ([]byte ("\u0020"));}else {_bbaee .Write ([]byte ("\u000a"));
_bbaee .Write ([]byte ("\u000a"));};};};_bbaee .Write ([]byte ("\u000a"));_bbaee .Write ([]byte ("\u000a"));};func (_bcff *textLine )pullWord (_fafb *wordBag ,_gdgc *textWord ,_def int ){_bcff .appendWord (_gdgc );_fafb .removeWord (_gdgc ,_def );};type gridTiling struct{_ge .PdfRectangle ;
_gecb []float64 ;_daba []float64 ;_ecaee map[float64 ]map[float64 ]gridTile ;};
2021-09-23 22:37:42 +00:00
2022-02-05 21:34:53 +00:00
// String returns a description of `b`.
func (_cgbdb *wordBag )String ()string {var _abae []string ;for _ ,_dff :=range _cgbdb .depthIndexes (){_eeee :=_cgbdb ._ebae [_dff ];for _ ,_egag :=range _eeee {_abae =append (_abae ,_egag ._aebb );};};return _fg .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_cgbdb .PdfRectangle ,_cgbdb ._adcc ,len (_abae ),_abae );
};func (_acf *textObject )setTextLeading (_dec float64 ){if _acf ==nil {return ;};_acf ._agff ._dab =_dec ;};func (_dbd *subpath )close (){if !_fcffc (_dbd ._eeff [0],_dbd .last ()){_dbd .add (_dbd ._eeff [0]);};_dbd ._agaa =true ;_dbd .removeDuplicates ();
};func _fbgac (_ffacg *PageText )error {_fcdfg :=_e .GetLicenseKey ();if _fcdfg !=nil &&_fcdfg .IsLicensed ()||_df {return nil ;};_fg .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");
_fg .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");
return _cc .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};func (_gfba paraList )readBefore (_gcgb []int ,_gdgf ,_gefa int )bool {_dffc ,_egfg :=_gfba [_gdgf ],_gfba [_gefa ];
if _dabg (_dffc ,_egfg )&&_dffc .Lly > _egfg .Lly {return true ;};if !(_dffc ._cacf .Urx < _egfg ._cacf .Llx ){return false ;};_gagga ,_fcfd :=_dffc .Lly ,_egfg .Lly ;if _gagga > _fcfd {_fcfd ,_gagga =_gagga ,_fcfd ;};_deba :=_gb .Max (_dffc ._cacf .Llx ,_egfg ._cacf .Llx );
_fgef :=_gb .Min (_dffc ._cacf .Urx ,_egfg ._cacf .Urx );_cgcg :=_gfba .llyRange (_gcgb ,_gagga ,_fcfd );for _ ,_gbb :=range _cgcg {if _gbb ==_gdgf ||_gbb ==_gefa {continue ;};_gbgb :=_gfba [_gbb ];if _gbgb ._cacf .Llx <=_fgef &&_deba <=_gbgb ._cacf .Urx {return false ;
};};return true ;};func (_bacg *wordBag )depthIndexes ()[]int {if len (_bacg ._ebae )==0{return nil ;};_adcf :=make ([]int ,len (_bacg ._ebae ));_cfb :=0;for _gbab :=range _bacg ._ebae {_adcf [_cfb ]=_gbab ;_cfb ++;};_gf .Ints (_adcf );return _adcf ;};
func (_feabd paraList )findTableGrid (_aabgg gridTiling )(*textTable ,map[*textPara ]struct{}){_bgadf :=len (_aabgg ._gecb );_acge :=len (_aabgg ._daba );_cdeb :=textTable {_bagb :true ,_ggda :_bgadf ,_accb :_acge ,_fedcd :make (map[uint64 ]*textPara ,_bgadf *_acge ),_gefg :make (map[uint64 ]compositeCell ,_bgadf *_acge )};
_cacgc :=make (map[*textPara ]struct{});_fbee :=int ((1.0-_ggfc )*float64 (_bgadf *_acge ));_acdad :=0;if _afab {_cb .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_bgadf ,_acge );
};for _bcdcb ,_dagbb :=range _aabgg ._daba {_afbbc ,_bacea :=_aabgg ._ecaee [_dagbb ];if !_bacea {continue ;};for _cbcfa ,_cgda :=range _aabgg ._gecb {_decb ,_adcgb :=_afbbc [_cgda ];if !_adcgb {continue ;};_dfdce :=_feabd .inTile (_decb );if len (_dfdce )==0{_acdad ++;
if _acdad > _fbee {if _afab {_cb .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_acdad );};return nil ,nil ;};}else {_cdeb .putComposite (_cbcfa ,_bcdcb ,_dfdce ,_decb .PdfRectangle );for _ ,_caaac :=range _dfdce {_cacgc [_caaac ]=struct{}{};
};};};};_adcb :=0;for _bcdd :=0;_bcdd < _bgadf ;_bcdd ++{_bbde :=_cdeb .get (_bcdd ,0);if _bbde ==nil ||!_bbde ._caaad {_adcb ++;};};if _adcb ==0{if _afab {_cb .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};return nil ,nil ;
};_fbff :=_cdeb .reduceTiling (_aabgg ,_fdbb );_fbff =_fbff .subdivide ();return _fbff ,_cacgc ;};
2021-10-22 10:53:20 +00:00
2022-02-05 21:34:53 +00:00
// String returns a string describing `tm`.
func (_cbee TextMark )String ()string {_ggb :=_cbee .BBox ;var _fgbc string ;if _cbee .Font !=nil {_fgbc =_cbee .Font .String ();if len (_fgbc )> 50{_fgbc =_fgbc [:50]+"\u002e\u002e\u002e";};};var _dddcd string ;if _cbee .Meta {_dddcd ="\u0020\u002a\u004d\u002a";
};return _fg .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_cbee .Offset ,_cbee .Text ,[]rune (_cbee .Text ),_ggb .Llx ,_ggb .Lly ,_ggb .Urx ,_ggb .Ury ,_fgbc ,_dddcd );
};func (_gcdd *textWord )bbox ()_ge .PdfRectangle {return _gcdd .PdfRectangle };func (_bgcee *textTable )toTextTable ()TextTable {if _fcbb {_cb .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_bgcee ._ggda ,_bgcee ._accb );
};_fgcd :=make ([][]TableCell ,_bgcee ._accb );for _bdcgd :=0;_bdcgd < _bgcee ._accb ;_bdcgd ++{_fgcd [_bdcgd ]=make ([]TableCell ,_bgcee ._ggda );for _deebd :=0;_deebd < _bgcee ._ggda ;_deebd ++{_fgge :=_bgcee .get (_deebd ,_bdcgd );if _fgge ==nil {continue ;
};if _fcbb {_fg .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_deebd ,_bdcgd ,_fgge );};_fgcd [_bdcgd ][_deebd ].Text =_fgge .text ();_cgbfd :=0;_fgcd [_bdcgd ][_deebd ].Marks ._agbde =_fgge .toTextMarks (&_cgbfd );};};
return TextTable {W :_bgcee ._ggda ,H :_bgcee ._accb ,Cells :_fgcd };};func (_cegf paraList )xNeighbours (_dbgea float64 )map[*textPara ][]int {_bbgg :=make ([]event ,2*len (_cegf ));if _dbgea ==0{for _agdde ,_affec :=range _cegf {_bbgg [2*_agdde ]=event {_affec .Llx ,true ,_agdde };
_bbgg [2*_agdde +1]=event {_affec .Urx ,false ,_agdde };};}else {for _efddd ,_bgabd :=range _cegf {_bbgg [2*_efddd ]=event {_bgabd .Llx -_dbgea *_bgabd .fontsize (),true ,_efddd };_bbgg [2*_efddd +1]=event {_bgabd .Urx +_dbgea *_bgabd .fontsize (),false ,_efddd };
};};return _cegf .eventNeighbours (_bbgg );};func (_agc *shapesState )lastpointEstablished ()(_ce .Point ,bool ){if _agc ._faec {return _agc ._afcbb ,false ;};_gdfd :=len (_agc ._becc );if _gdfd > 0&&_agc ._becc [_gdfd -1]._agaa {return _agc ._becc [_gdfd -1].last (),false ;
};return _ce .Point {},true ;};func _bggd (_gaeb ,_eeea bounded )float64 {return _edafe (_gaeb )-_edafe (_eeea )};
2021-10-22 10:53:20 +00:00
2022-02-05 21:34:53 +00:00
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
func (_bba *PageText )ApplyArea (bbox _ge .PdfRectangle ){_cgbf :=make ([]*textMark ,0,len (_bba ._gfgg ));for _ ,_age :=range _bba ._gfgg {if _ace (_age .bbox (),bbox ){_cgbf =append (_cgbf ,_age );};};var _gba paraList ;_bfdg :=len (_cgbf );for _abf :=0;
_abf < 360&&_bfdg > 0;_abf +=90{_bge :=make ([]*textMark ,0,len (_cgbf )-_bfdg );for _ ,_edaf :=range _cgbf {if _edaf ._dgcf ==_abf {_bge =append (_bge ,_edaf );};};if len (_bge )> 0{_fca :=_geeg (_bge ,_bba ._edbb ,nil ,nil );_gba =append (_gba ,_fca ...);
_bfdg -=len (_bge );};};_bgb :=new (_gfa .Buffer );_gba .writeText (_bgb );_bba ._bfbe =_bgb .String ();_bba ._cdd =_gba .toTextMarks ();_bba ._afga =_gba .tables ();};func (_adeg rulingList )sort (){_gf .Slice (_adeg ,_adeg .comp )};
2021-07-30 00:21:16 +00:00
2021-12-14 01:08:28 +00:00
// String returns a description of `k`.
2022-02-05 21:34:53 +00:00
func (_dbcec markKind )String ()string {_gddg ,_bfad :=_fdcc [_dbcec ];if !_bfad {return _fg .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_dbcec );};return _gddg ;};func _bbad (_bcdg ,_dgdb bounded )float64 {_fbf :=_bggd (_bcdg ,_dgdb );
if !_cgcb (_fbf ){return _fbf ;};return _cffbf (_bcdg ,_dgdb );};func (_cdecc *textTable )getComposite (_adgc ,_bcffc int )(paraList ,_ge .PdfRectangle ){_aeeg ,_gbdc :=_cdecc ._gefg [_dffgb (_adgc ,_bcffc )];if _fcbb {_fg .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a",_adgc ,_bcffc ,_aeeg .String ());
};if !_gbdc {return nil ,_ge .PdfRectangle {};};return _aeeg .parasBBox ();};func (_gaca *textTable )markCells (){for _ecgc :=0;_ecgc < _gaca ._accb ;_ecgc ++{for _dbfdf :=0;_dbfdf < _gaca ._ggda ;_dbfdf ++{_aeab :=_gaca .get (_dbfdf ,_ecgc );if _aeab !=nil {_aeab ._gcdf =true ;
};};};};
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// Append appends `mark` to the mark array.
func (_bgfb *TextMarkArray )Append (mark TextMark ){_bgfb ._agbde =append (_bgfb ._agbde ,mark )};func (_fccb *ruling )equals (_abec *ruling )bool {return _fccb ._bgcf ==_abec ._bgcf &&_eegb (_fccb ._acabca ,_abec ._acabca )&&_eegb (_fccb ._dbef ,_abec ._dbef )&&_eegb (_fccb ._efdg ,_abec ._efdg );
};type stateStack []*textState ;
2021-07-30 00:21:16 +00:00
2022-02-05 21:34:53 +00:00
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
func (_bcd PageText )ToText ()string {return _bcd .Text ()};func (_fba paraList )toTextMarks ()[]TextMark {_dddca :=0;var _afecc []TextMark ;for _dgcg ,_cbed :=range _fba {if _cbed ._caaad {continue ;};_bddbb :=_cbed .toTextMarks (&_dddca );_afecc =append (_afecc ,_bddbb ...);
if _dgcg !=len (_fba )-1{if _ebde (_cbed ,_fba [_dgcg +1]){_afecc =_dgde (_afecc ,&_dddca ,"\u0020");}else {_afecc =_dgde (_afecc ,&_dddca ,"\u000a");_afecc =_dgde (_afecc ,&_dddca ,"\u000a");};};};_afecc =_dgde (_afecc ,&_dddca ,"\u000a");_afecc =_dgde (_afecc ,&_dddca ,"\u000a");
return _afecc ;};func (_abe *textLine )bbox ()_ge .PdfRectangle {return _abe .PdfRectangle };func (_bdeg rulingList )sortStrict (){_gf .Slice (_bdeg ,func (_cdgcg ,_egec int )bool {_fadd ,_bdaaa :=_bdeg [_cdgcg ],_bdeg [_egec ];_geeb ,_cgde :=_fadd ._bgcf ,_bdaaa ._bgcf ;
if _geeb !=_cgde {return _geeb > _cgde ;};_beccb ,_bedc :=_fadd ._acabca ,_bdaaa ._acabca ;if !_cgcb (_beccb -_bedc ){return _beccb < _bedc ;};_beccb ,_bedc =_fadd ._dbef ,_bdaaa ._dbef ;if _beccb !=_bedc {return _beccb < _bedc ;};return _fadd ._efdg < _bdaaa ._efdg ;
});};func (_acab *textObject )setFont (_fea string ,_ffaf float64 )error {if _acab ==nil {return nil ;};_acab ._agff ._ccfg =_ffaf ;_ebd ,_cdg :=_acab .getFont (_fea );if _cdg !=nil {return _cdg ;};_acab ._agff ._dgdd =_ebd ;return nil ;};func _bcgfb (_aeac _ge .PdfRectangle )*ruling {return &ruling {_bgcf :_agdda ,_acabca :_aeac .Llx ,_dbef :_aeac .Lly ,_efdg :_aeac .Ury };
};func (_cgba *wordBag )sort (){for _ ,_cdgc :=range _cgba ._ebae {_gf .Slice (_cdgc ,func (_cfbg ,_efag int )bool {return _cffbf (_cdgc [_cfbg ],_cdgc [_efag ])< 0});};};
2021-10-22 10:53:20 +00:00
2022-02-05 21:34:53 +00:00
// String returns a human readable description of `vecs`.
func (_faadd rulingList )String ()string {if len (_faadd )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_caagg ,_fcacf :=_faadd .vertsHorzs ();_bfba :=len (_caagg );_dgdba :=len (_fcacf );if _bfba ==0||_dgdba ==0{return _fg .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_bfba ,_dgdba );
};_fcdag :=_ge .PdfRectangle {Llx :_caagg [0]._acabca ,Urx :_caagg [_bfba -1]._acabca ,Lly :_fcacf [_dgdba -1]._acabca ,Ury :_fcacf [0]._acabca };return _fg .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_bfba ,_dgdba ,_fcdag );
};func (_dgfe rulingList )mergePrimary ()float64 {_bafc :=_dgfe [0]._acabca ;for _ ,_cbae :=range _dgfe [1:]{_bafc +=_cbae ._acabca ;};return _bafc /float64 (len (_dgfe ));};func _bdga (_fae _ge .PdfRectangle )textState {return textState {_bbbc :100,_aba :RenderModeFill ,_fccd :_fae };
};func (_feff *shapesState )cubicTo (_abfe ,_ddg ,_gfab ,_bebg ,_fdad ,_bgbg float64 ){if _dfce {_cb .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a");};_feff .addPoint (_fdad ,_bgbg );};func _acabf (_dadeg []*wordBag )[]*wordBag {if len (_dadeg )<=1{return _dadeg ;
};if _bcfc {_cb .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");};_gf .Slice (_dadeg ,func (_fdf ,_babb int )bool {_bfge ,_gdcg :=_dadeg [_fdf ],_dadeg [_babb ];_edbc :=_bfge .Width ()*_bfge .Height ();_caebf :=_gdcg .Width ()*_gdcg .Height ();
if _edbc !=_caebf {return _edbc > _caebf ;};if _bfge .Height ()!=_gdcg .Height (){return _bfge .Height ()> _gdcg .Height ();};return _fdf < _babb ;});var _fgc []*wordBag ;_gbaf :=make (intSet );for _gfb :=0;_gfb < len (_dadeg );_gfb ++{if _gbaf .has (_gfb ){continue ;
};_geacb :=_dadeg [_gfb ];for _gfbe :=_gfb +1;_gfbe < len (_dadeg );_gfbe ++{if _gbaf .has (_gfb ){continue ;};_cccf :=_dadeg [_gfbe ];_bbbb :=_geacb .PdfRectangle ;_bbbb .Llx -=_geacb ._adcc ;if _bgad (_bbbb ,_cccf .PdfRectangle ){_geacb .absorb (_cccf );
_gbaf .add (_gfbe );};};_fgc =append (_fgc ,_geacb );};if len (_dadeg )!=len (_fgc )+len (_gbaf ){_cb .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_dadeg ),len (_fgc ),len (_gbaf ));
};return _fgc ;};func (_ffge *textTable )get (_dabb ,_gffd int )*textPara {return _ffge ._fedcd [_dffgb (_dabb ,_gffd )]};func _gbbg (_ddgc map[float64 ]map[float64 ]gridTile )[]float64 {_gcbe :=make ([]float64 ,0,len (_ddgc ));for _bbda :=range _ddgc {_gcbe =append (_gcbe ,_bbda );
};_gf .Float64s (_gcbe );_fbbag :=len (_gcbe );for _bagca :=0;_bagca < _fbbag /2;_bagca ++{_gcbe [_bagca ],_gcbe [_fbbag -1-_bagca ]=_gcbe [_fbbag -1-_bagca ],_gcbe [_bagca ];};return _gcbe ;};func (_cbgg rulingList )aligned ()bool {if len (_cbgg )< 2{return false ;
};_eddf :=make (map[*ruling ]int );_eddf [_cbgg [0]]=0;for _ ,_edead :=range _cbgg [1:]{_gaed :=false ;for _adae :=range _eddf {if _edead .gridIntersecting (_adae ){_eddf [_adae ]++;_gaed =true ;break ;};};if !_gaed {_eddf [_edead ]=0;};};_bbfc :=0;for _ ,_bceaf :=range _eddf {if _bceaf ==0{_bbfc ++;
};};_cecebb :=float64 (_bbfc )/float64 (len (_cbgg ));_fddd :=_cecebb <=1.0-_edcce ;if _dcdc {_cb .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_fddd ,_cecebb ,_bbfc ,len (_cbgg ),_cbgg .String ());
};return _fddd ;};func (_fcfb *stateStack )top ()*textState {if _fcfb .empty (){return nil ;};return (*_fcfb )[_fcfb .size ()-1];};type textPara struct{_ge .PdfRectangle ;_cacf _ge .PdfRectangle ;_ffcb []*textLine ;_defe *textTable ;_gcdf bool ;_caaad bool ;
_cbfa *textPara ;_cegg *textPara ;_bdaa *textPara ;_gbff *textPara ;};func (_ecd *shapesState )clearPath (){_ecd ._becc =nil ;_ecd ._faec =false ;if _dfce {_cb .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_ecd );};};
const (_gbgg =false ;_fdcf =false ;_ecde =false ;_cbgdf =false ;_dfce =false ;_bcea =false ;_dagb =false ;_ffed =false ;_bcfc =false ;_ccgb =_bcfc &&true ;_dbgg =_ccgb &&false ;_cccfa =_bcfc &&true ;_fcbb =false ;_aggb =_fcbb &&false ;_egeb =_fcbb &&true ;
_dcdc =false ;_gedc =_dcdc &&false ;_cabea =_dcdc &&false ;_afab =_dcdc &&true ;_gddf =_dcdc &&false ;_dbdf =_dcdc &&false ;);func (_bfac *subpath )isQuadrilateral ()bool {if len (_bfac ._eeff )< 4||len (_bfac ._eeff )> 5{return false ;};if len (_bfac ._eeff )==5{_caeba :=_bfac ._eeff [0];
_bdba :=_bfac ._eeff [4];if _caeba .X !=_bdba .X ||_caeba .Y !=_bdba .Y {return false ;};};return true ;};func _abg (_edba _ce .Point )_ce .Matrix {return _ce .TranslationMatrix (_edba .X ,_edba .Y )};func (_bbab paraList )llyOrdering ()[]int {_ddfe :=make ([]int ,len (_bbab ));
for _ebf :=range _bbab {_ddfe [_ebf ]=_ebf ;};_gf .SliceStable (_ddfe ,func (_cebbg ,_fbbf int )bool {_dbdg ,_bbcb :=_ddfe [_cebbg ],_ddfe [_fbbf ];return _bbab [_dbdg ].Lly < _bbab [_bbcb ].Lly ;});return _ddfe ;};func (_cdgg *textObject )getCurrentFont ()*_ge .PdfFont {_eeaf :=_cdgg ._agff ._dgdd ;
if _eeaf ==nil {_cb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e");return _ge .DefaultFont ();
};return _eeaf ;};func _ggf (_bgcd func (*wordBag ,*textWord ,float64 )bool ,_fcaa float64 )func (*wordBag ,*textWord )bool {return func (_aebf *wordBag ,_aadb *textWord )bool {return _bgcd (_aebf ,_aadb ,_fcaa )};};type cachedImage struct{_fc *_ge .Image ;
_eg _ge .PdfColorspace ;};
2021-01-07 14:20:10 +00:00
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
//
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
type TextMark struct{
2020-11-23 22:15:56 +00:00
2020-12-06 13:03:03 +00:00
// Text is the extracted text.
Text string ;
2020-11-23 22:15:56 +00:00
2021-01-07 14:20:10 +00:00
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
2020-11-23 22:15:56 +00:00
2021-01-07 14:20:10 +00:00
// BBox is the bounding box of the text.
2022-02-05 21:34:53 +00:00
BBox _ge .PdfRectangle ;
2021-01-07 14:20:10 +00:00
// Font is the font the text was drawn with.
2022-02-05 21:34:53 +00:00
Font *_ge .PdfFont ;
2021-01-07 14:20:10 +00:00
// FontSize is the font size the text was drawn with.
FontSize float64 ;
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2022-02-05 21:34:53 +00:00
FillColor _fd .Color ;
2021-01-07 14:20:10 +00:00
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2022-02-05 21:34:53 +00:00
StrokeColor _fd .Color ;
2021-01-07 14:20:10 +00:00
// Orientation is the text orientation
2022-02-05 21:34:53 +00:00
Orientation int ;};func (_gcgc *wordBag )blocked (_abab *textWord )bool {if _abab .Urx < _gcgc .Llx {_afdac :=_eebfd (_abab .PdfRectangle );_bfgd :=_bcgfb (_gcgc .PdfRectangle );if _gcgc ._aec .blocks (_afdac ,_bfgd ){if _dbdf {_cb .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_abab ,_gcgc );
};return true ;};}else if _gcgc .Urx < _abab .Llx {_badb :=_eebfd (_gcgc .PdfRectangle );_gfd :=_bcgfb (_abab .PdfRectangle );if _gcgc ._aec .blocks (_badb ,_gfd ){if _dbdf {_cb .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s",_abab ,_gcgc );
};return true ;};};if _abab .Ury < _gcgc .Lly {_baa :=_edff (_abab .PdfRectangle );_effb :=_bbga (_gcgc .PdfRectangle );if _gcgc ._bbgc .blocks (_baa ,_effb ){if _dbdf {_cb .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_abab ,_gcgc );
};return true ;};}else if _gcgc .Ury < _abab .Lly {_ece :=_edff (_gcgc .PdfRectangle );_bfc :=_bbga (_abab .PdfRectangle );if _gcgc ._bbgc .blocks (_ece ,_bfc ){if _dbdf {_cb .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s",_abab ,_gcgc );
};return true ;};};return false ;};func (_acega *textTable )reduce ()*textTable {_cded :=make ([]int ,0,_acega ._accb );_eace :=make ([]int ,0,_acega ._ggda );for _dcbg :=0;_dcbg < _acega ._accb ;_dcbg ++{if !_acega .emptyCompositeRow (_dcbg ){_cded =append (_cded ,_dcbg );
};};for _dffea :=0;_dffea < _acega ._ggda ;_dffea ++{if !_acega .emptyCompositeColumn (_dffea ){_eace =append (_eace ,_dffea );};};if len (_cded )==_acega ._accb &&len (_eace )==_acega ._ggda {return _acega ;};_afafc :=textTable {_bagb :_acega ._bagb ,_ggda :len (_eace ),_accb :len (_cded ),_fedcd :make (map[uint64 ]*textPara ,len (_eace )*len (_cded ))};
if _fcbb {_cb .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_acega ._ggda ,_acega ._accb ,len (_eace ),len (_cded ));_cb .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_eace );
_cb .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_cded );};for _gaeg ,_faeaf :=range _cded {for _ccbg ,_dffba :=range _eace {_bfae ,_cgecf :=_acega .getComposite (_dffba ,_faeaf );if _bfae ==nil {continue ;
};if _fcbb {_fg .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_ccbg ,_gaeg ,_dffba ,_faeaf ,_geabcf (_bfae .merge ().text (),50));};_afafc .putComposite (_ccbg ,_gaeg ,_bfae ,_cgecf );
};};return &_afafc ;};func (_de *imageExtractContext )processOperand (_beaf *_aa .ContentStreamOperation ,_fdab _aa .GraphicsState ,_cag *_ge .PdfPageResources )error {if _beaf .Operand =="\u0042\u0049"&&len (_beaf .Params )==1{_afg ,_dfg :=_beaf .Params [0].(*_aa .ContentStreamInlineImage );
if !_dfg {return nil ;};if _beg ,_cd :=_dd .GetBoolVal (_afg .ImageMask );_cd {if _beg &&!_de ._fec .IncludeInlineStencilMasks {return nil ;};};return _de .extractInlineImage (_afg ,_fdab ,_cag );}else if _beaf .Operand =="\u0044\u006f"&&len (_beaf .Params )==1{_ae ,_ga :=_dd .GetName (_beaf .Params [0]);
if !_ga {_cb .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _cg ;};_ ,_bbb :=_cag .GetXObjectByName (*_ae );switch _bbb {case _ge .XObjectTypeImage :return _de .extractXObjectImage (_ae ,_fdab ,_cag );case _ge .XObjectTypeForm :return _de .extractFormImages (_ae ,_fdab ,_cag );
};};return nil ;};func (_bec *textObject )showText (_bfff []byte )error {return _bec .renderText (_bfff )};func _edff (_dabf _ge .PdfRectangle )*ruling {return &ruling {_bgcf :_ggdg ,_acabca :_dabf .Ury ,_dbef :_dabf .Llx ,_efdg :_dabf .Urx };};func (_bddf *shapesState )stroke (_fbcd *[]pathSection ){_bged :=pathSection {_ccfga :_bddf ._becc ,Color :_bddf ._aac .getStrokeColor ()};
*_fbcd =append (*_fbcd ,_bged );if _dcdc {_fg .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_fbcd ),_bddf ,_bddf ._aac .getStrokeColor (),_bged .bbox ());
if _gedc {for _dbce ,_ggbf :=range _bddf ._becc {_fg .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_dbce ,_ggbf );if _dbce ==10{break ;};};};};};
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct{_agbde []TextMark };
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// String returns a description of `state`.
func (_ecb *textState )String ()string {_gae :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _ecb ._dgdd !=nil {_gae =_ecb ._dgdd .BaseFont ();};return _fg .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_ecb ._cabf ,_ecb ._egc ,_ecb ._ccfg ,_gae );
};func (_eed *wordBag )scanBand (_fcde string ,_bagg *wordBag ,_agbe func (_bbc *wordBag ,_bfed *textWord )bool ,_badbe ,_cdec ,_dacf float64 ,_agge ,_fde bool )int {_eceg :=_bagg ._adcc ;var _gdfa map[int ]map[*textWord ]struct{};if !_agge {_gdfa =_eed .makeRemovals ();
};_agbdec :=_faad *_eceg ;_dga :=0;for _ ,_eeg :=range _eed .depthBand (_badbe -_agbdec ,_cdec +_agbdec ){if len (_eed ._ebae [_eeg ])==0{continue ;};for _ ,_bddg :=range _eed ._ebae [_eeg ]{if !(_badbe -_agbdec <=_bddg ._gaff &&_bddg ._gaff <=_cdec +_agbdec ){continue ;
};if !_agbe (_bagg ,_bddg ){continue ;};_dagg :=2.0*_gb .Abs (_bddg ._egdce -_bagg ._adcc )/(_bddg ._egdce +_bagg ._adcc );_eaeg :=_gb .Max (_bddg ._egdce /_bagg ._adcc ,_bagg ._adcc /_bddg ._egdce );_bgffg :=_gb .Min (_dagg ,_eaeg );if _dacf > 0&&_bgffg > _dacf {continue ;
};if _bagg .blocked (_bddg ){continue ;};if !_agge {_bagg .pullWord (_bddg ,_eeg ,_gdfa );};_dga ++;if !_fde {if _bddg ._gaff < _badbe {_badbe =_bddg ._gaff ;};if _bddg ._gaff > _cdec {_cdec =_bddg ._gaff ;};};if _agge {break ;};};};if !_agge {_eed .applyRemovals (_gdfa );
};return _dga ;};func _eeaeg (_gdgg _ge .PdfRectangle ,_gcdc []*textLine )*textPara {return &textPara {PdfRectangle :_gdgg ,_ffcb :_gcdc };};
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// Extractor stores and offers functionality for extracting content from PDF pages.
type Extractor struct{_bb string ;_fe *_ge .PdfPageResources ;_ca _ge .PdfRectangle ;_dfd map[string ]fontEntry ;_af map[string ]textResult ;_fa int64 ;_gfaf int ;};func (_beee paraList )findGridTables (_faca []gridTiling )[]*textTable {if _fcbb {_cb .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_beee ));
for _acada ,_abcf :=range _beee {_fg .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_acada ,_abcf );};};var _dfdd []*textTable ;for _cefg ,_aeaca :=range _faca {_fdaf ,_bceee :=_beee .findTableGrid (_aeaca );if _fdaf !=nil {_fdaf .log (_fg .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_cefg ));
_dfdd =append (_dfdd ,_fdaf );_fdaf .markCells ();};for _efeg :=range _bceee {_efeg ._gcdf =true ;};};if _fcbb {_cb .Log .Info ("\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s",len (_dfdd ));
};return _dfdd ;};
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};func (_gffeg compositeCell )split (_dgff ,_eafa []float64 )*textTable {_bbbbe :=len (_dgff )+1;_bgfg :=len (_eafa )+1;if _fcbb {_cb .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066",_bgfg ,_bbbbe ,_gffeg ,_dgff ,_eafa );
_fg .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a",len (_gffeg .paraList ));for _fadf ,_afecb :=range _gffeg .paraList {_fg .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fadf ,_afecb .String ());
};_fg .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",len (_gffeg .lines ()));for _aegd ,_afffc :=range _gffeg .lines (){_fg .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_aegd ,_afffc );};};_dgff =_dabc (_dgff ,_gffeg .Ury ,_gffeg .Lly );
_eafa =_dabc (_eafa ,_gffeg .Llx ,_gffeg .Urx );_eegd :=make (map[uint64 ]*textPara ,_bgfg *_bbbbe );_ebabb :=textTable {_ggda :_bgfg ,_accb :_bbbbe ,_fedcd :_eegd };_fga :=_gffeg .paraList ;_gf .Slice (_fga ,func (_edcfa ,_daec int )bool {_gfceb ,_aeea :=_fga [_edcfa ],_fga [_daec ];
_caag ,_aecad :=_gfceb .Lly ,_aeea .Lly ;if _caag !=_aecad {return _caag < _aecad ;};return _gfceb .Llx < _aeea .Llx ;});_bbca :=make (map[uint64 ]_ge .PdfRectangle ,_bgfg *_bbbbe );for _baceb ,_becd :=range _dgff [1:]{_dfgc :=_dgff [_baceb ];for _fcegf ,_cccca :=range _eafa [1:]{_eccg :=_eafa [_fcegf ];
_bbca [_dffgb (_fcegf ,_baceb )]=_ge .PdfRectangle {Llx :_eccg ,Urx :_cccca ,Lly :_becd ,Ury :_dfgc };};};if _fcbb {_cb .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073");
_fg .Printf ("\u0020\u0020\u0020\u0020");for _cfac :=0;_cfac < _bgfg ;_cfac ++{_fg .Printf ("\u0025\u0033\u0030\u0064\u002c\u0020",_cfac );};_fg .Println ();for _gefe :=0;_gefe < _bbbbe ;_gefe ++{_fg .Printf ("\u0020\u0020\u0025\u0032\u0064\u003a",_gefe );
for _ddcae :=0;_ddcae < _bgfg ;_ddcae ++{_fg .Printf ("\u00256\u002e\u0032\u0066\u002c\u0020",_bbca [_dffgb (_ddcae ,_gefe )]);};_fg .Println ();};};_fcfc :=func (_ebgd *textLine )(int ,int ){for _efda :=0;_efda < _bbbbe ;_efda ++{for _fece :=0;_fece < _bgfg ;
_fece ++{if _bgad (_bbca [_dffgb (_fece ,_efda )],_ebgd .PdfRectangle ){return _fece ,_efda ;};};};return -1,-1;};_aceg :=make (map[uint64 ][]*textLine ,_bgfg *_bbbbe );for _ ,_eaga :=range _fga .lines (){_gcdad ,_gadg :=_fcfc (_eaga );if _gcdad < 0{continue ;
};_aceg [_dffgb (_gcdad ,_gadg )]=append (_aceg [_dffgb (_gcdad ,_gadg )],_eaga );};for _aabf :=0;_aabf < len (_dgff )-1;_aabf ++{_dfad :=_dgff [_aabf ];_edef :=_dgff [_aabf +1];for _aebe :=0;_aebe < len (_eafa )-1;_aebe ++{_feag :=_eafa [_aebe ];_cggg :=_eafa [_aebe +1];
_dedg :=_ge .PdfRectangle {Llx :_feag ,Urx :_cggg ,Lly :_edef ,Ury :_dfad };_geab :=_aceg [_dffgb (_aebe ,_aabf )];if len (_geab )==0{continue ;};_adbga :=_eeaeg (_dedg ,_geab );_ebabb .put (_aebe ,_aabf ,_adbga );};};return &_ebabb ;};func (_bdcg *textPara )bbox ()_ge .PdfRectangle {return _bdcg .PdfRectangle };
func (_afaa *shapesState )fill (_agfg *[]pathSection ){_dcc :=pathSection {_ccfga :_afaa ._becc ,Color :_afaa ._aac .getFillColor ()};*_agfg =append (*_agfg ,_dcc );if _dcdc {_bgec :=_dcc .bbox ();_fg .Printf ("\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a",len (*_agfg ),len (_dcc ._ccfga ),_afaa ,_dcc .Color ,_bgec ,_bgec .Width (),_bgec .Height ());
if _gedc {for _cbbd ,_eca :=range _dcc ._ccfga {_fg .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_cbbd ,_eca );if _cbbd ==10{break ;};};};};};func (_gggc rulingList )intersections ()map[int ]intSet {var _cbfd ,_aaab []int ;for _fdag ,_gbega :=range _gggc {switch _gbega ._bgcf {case _agdda :_cbfd =append (_cbfd ,_fdag );
case _ggdg :_aaab =append (_aaab ,_fdag );};};if len (_cbfd )< _aggeb +1||len (_aaab )< _deea +1{return nil ;};if len (_cbfd )+len (_aaab )> _cffgc {_cb .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_gggc ),len (_cbfd ),len (_aaab ));
return nil ;};_aagd :=make (map[int ]intSet ,len (_cbfd )+len (_aaab ));for _ ,_fddec :=range _cbfd {for _ ,_fdbe :=range _aaab {if _gggc [_fddec ].intersects (_gggc [_fdbe ]){if _ ,_fcgbd :=_aagd [_fddec ];!_fcgbd {_aagd [_fddec ]=make (intSet );};if _ ,_aaff :=_aagd [_fdbe ];
!_aaff {_aagd [_fdbe ]=make (intSet );};_aagd [_fddec ].add (_fdbe );_aagd [_fdbe ].add (_fddec );};};};return _aagd ;};
// String returns a string descibing `i`.
func (_ddgcc gridTile )String ()string {_dffg :=func (_babed bool ,_eacg string )string {if _babed {return _eacg ;};return "\u005f";};return _fg .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_ddgcc .PdfRectangle ,_dffg (_ddgcc ._bbeg ,"\u004c"),_dffg (_ddgcc ._efcdf ,"\u0052"),_dffg (_ddgcc ._gbbc ,"\u0042"),_dffg (_ddgcc ._ecf ,"\u0054"));
};func (_ebfca paraList )extractTables (_dgca []gridTiling )paraList {if _fcbb {_cb .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_ebfca ));
};if len (_ebfca )< _adbg {return _ebfca ;};_bgcg :=_ebfca .findTables (_dgca );if _fcbb {_cb .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_bgcg ));
for _ccagg ,_eged :=range _bgcg {_eged .log (_fg .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_ccagg ));};};return _ebfca .applyTables (_bgcg );};func (_agac *subpath )add (_cega ..._ce .Point ){_agac ._eeff =append (_agac ._eeff ,_cega ...)};
func _cggb (_edcbf _ge .PdfRectangle ,_egee ,_bbbef ,_bbcbg ,_fcga *ruling )gridTile {_ddgb :=_edcbf .Llx ;_ceddf :=_edcbf .Urx ;_efbg :=_edcbf .Lly ;_edccg :=_edcbf .Ury ;return gridTile {PdfRectangle :_edcbf ,_bbeg :_egee !=nil &&_egee .encloses (_efbg ,_edccg ),_efcdf :_bbbef !=nil &&_bbbef .encloses (_efbg ,_edccg ),_gbbc :_bbcbg !=nil &&_bbcbg .encloses (_ddgb ,_ceddf ),_ecf :_fcga !=nil &&_fcga .encloses (_ddgb ,_ceddf )};
};func (_fecg *textMark )inDiacriticArea (_dgec *textMark )bool {_agdd :=_fecg .Llx -_dgec .Llx ;_bdaf :=_fecg .Urx -_dgec .Urx ;_aefa :=_fecg .Lly -_dgec .Lly ;return _gb .Abs (_agdd +_bdaf )< _fecg .Width ()*_fcfba &&_gb .Abs (_aefa )< _fecg .Height ()*_fcfba ;
};func (_fbef *shapesState )devicePoint (_faaa ,_dac float64 )_ce .Point {_afda :=_fbef ._cage .Mult (_fbef ._decc );_faaa ,_dac =_afda .Transform (_faaa ,_dac );return _ce .NewPoint (_faaa ,_dac );};
// ToTextMark returns the public view of `tm`.
func (_acec *textMark )ToTextMark ()TextMark {return TextMark {Text :_acec ._gbf ,Original :_acec ._dcdbd ,BBox :_acec ._gdfb ,Font :_acec ._gggd ,FontSize :_acec ._adcd ,FillColor :_acec ._cdcf ,StrokeColor :_acec ._ceee ,Orientation :_acec ._dgcf };};
func (_ebee paraList )reorder (_ccccc []int ){_dcgbf :=make (paraList ,len (_ebee ));for _dfbf ,_gfcef :=range _ccccc {_dcgbf [_dfbf ]=_ebee [_gfcef ];};copy (_ebee ,_dcgbf );};type textTable struct{_ge .PdfRectangle ;_ggda ,_accb int ;_bagb bool ;_fedcd map[uint64 ]*textPara ;
_gefg map[uint64 ]compositeCell ;};func _fdgbgg (_fgege []*textWord ,_dbfg int )[]*textWord {_dfdag :=len (_fgege );copy (_fgege [_dbfg :],_fgege [_dbfg +1:]);return _fgege [:_dfdag -1];};func (_cgbe *textPara )text ()string {_baga :=new (_gfa .Buffer );
_cgbe .writeText (_baga );return _baga .String ();};func (_aef *imageExtractContext )extractXObjectImage (_bfd *_dd .PdfObjectName ,_agb _aa .GraphicsState ,_gd *_ge .PdfPageResources )error {_cf ,_ :=_gd .GetXObjectByName (*_bfd );if _cf ==nil {return nil ;
};_cef ,_dfe :=_aef ._cgc [_cf ];if !_dfe {_daea ,_ed :=_gd .GetXObjectImageByName (*_bfd );if _ed !=nil {return _ed ;};if _daea ==nil {return nil ;};_edc ,_ed :=_daea .ToImage ();if _ed !=nil {return _ed ;};_cef =&cachedImage {_fc :_edc ,_eg :_daea .ColorSpace };
_aef ._cgc [_cf ]=_cef ;};_adc :=_cef ._fc ;_fed :=_cef ._eg ;_ced ,_ffa :=_fed .ImageToRGB (*_adc );if _ffa !=nil {return _ffa ;};_cb .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_agb .CTM .String ());_cff :=ImageMark {Image :&_ced ,Width :_agb .CTM .ScalingFactorX (),Height :_agb .CTM .ScalingFactorY (),Angle :_agb .CTM .Angle ()};
_cff .X ,_cff .Y =_agb .CTM .Translation ();_aef ._ac =append (_aef ._ac ,_cff );_aef ._da ++;return nil ;};var _df =false ;
// String returns a description of `l`.
func (_dfbd *textLine )String ()string {return _fg .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_dfbd ._dgce ,_dfbd .PdfRectangle ,_dfbd ._dedb ,_dfbd .text ());
};func (_fcad intSet )add (_egagg int ){_fcad [_egagg ]=struct{}{}};func (_dade *shapesState )lineTo (_adff ,_cfcc float64 ){if _dfce {_cb .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_adff ,_cfcc ,_dade .devicePoint (_adff ,_cfcc ));
};_dade .addPoint (_adff ,_cfcc );};type bounded interface{bbox ()_ge .PdfRectangle };func (_fggb *wordBag )depthRange (_bdb ,_abac int )[]int {var _cece []int ;for _aee :=range _fggb ._ebae {if _bdb <=_aee &&_aee <=_abac {_cece =append (_cece ,_aee );
};};if len (_cece )==0{return nil ;};_gf .Ints (_cece );return _cece ;};
// String returns a human readable description of `s`.
func (_bafd intSet )String ()string {var _beaa []int ;for _adca :=range _bafd {if _bafd .has (_adca ){_beaa =append (_beaa ,_adca );};};_gf .Ints (_beaa );return _fg .Sprintf ("\u0025\u002b\u0076",_beaa );};func (_beac *shapesState )establishSubpath ()*subpath {_ceed ,_eec :=_beac .lastpointEstablished ();
if !_eec {_beac ._becc =append (_beac ._becc ,_dfb (_ceed ));};if len (_beac ._becc )==0{return nil ;};_beac ._faec =false ;return _beac ._becc [len (_beac ._becc )-1];};func _gabe (_acb *textWord ,_dbge float64 ,_aeca ,_ddb rulingList )*wordBag {_abb :=_dagd (_acb ._gaff );
_cegc :=[]*textWord {_acb };_cccc :=wordBag {_ebae :map[int ][]*textWord {_abb :_cegc },PdfRectangle :_acb .PdfRectangle ,_adcc :_acb ._egdce ,_feaf :_dbge ,_aec :_aeca ,_bbgc :_ddb };return &_cccc ;};func (_cdac rulingList )augmentGrid ()(rulingList ,rulingList ){_eebg ,_ccded :=_cdac .vertsHorzs ();
if len (_eebg )==0||len (_ccded )==0{return _eebg ,_ccded ;};_beca ,_cdaf :=_eebg ,_ccded ;_ceba :=_eebg .bbox ();_dfdf :=_ccded .bbox ();if _dcdc {_cb .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066",_ceba );
_cb .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066",_dfdf );};var _daef ,_faaae ,_fbaba ,_fedfd *ruling ;if _dfdf .Llx < _ceba .Llx -_abc {_daef =&ruling {_aagg :_fbbc ,_bgcf :_agdda ,_acabca :_dfdf .Llx ,_dbef :_ceba .Lly ,_efdg :_ceba .Ury };
_eebg =append (rulingList {_daef },_eebg ...);};if _dfdf .Urx > _ceba .Urx +_abc {_faaae =&ruling {_aagg :_fbbc ,_bgcf :_agdda ,_acabca :_dfdf .Urx ,_dbef :_ceba .Lly ,_efdg :_ceba .Ury };_eebg =append (_eebg ,_faaae );};if _ceba .Lly < _dfdf .Lly -_abc {_fbaba =&ruling {_aagg :_fbbc ,_bgcf :_ggdg ,_acabca :_ceba .Lly ,_dbef :_dfdf .Llx ,_efdg :_dfdf .Urx };
_ccded =append (rulingList {_fbaba },_ccded ...);};if _ceba .Ury > _dfdf .Ury +_abc {_fedfd =&ruling {_aagg :_fbbc ,_bgcf :_ggdg ,_acabca :_ceba .Ury ,_dbef :_dfdf .Llx ,_efdg :_dfdf .Urx };_ccded =append (_ccded ,_fedfd );};if len (_eebg )+len (_ccded )==len (_cdac ){return _beca ,_cdaf ;
};_dgafd :=append (_eebg ,_ccded ...);_cdac .log ("u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064");_dgafd .log ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d");return _eebg ,_ccded ;};func (_bg *imageExtractContext )extractContentStreamImages (_ff string ,_dbb *_ge .PdfPageResources )error {_eba :=_aa .NewContentStreamParser (_ff );
_aad ,_ag :=_eba .Parse ();if _ag !=nil {return _ag ;};if _bg ._cgc ==nil {_bg ._cgc =map[*_dd .PdfObjectStream ]*cachedImage {};};if _bg ._fec ==nil {_bg ._fec =&ImageExtractOptions {};};_bfb :=_aa .NewContentStreamProcessor (*_aad );_bfb .AddHandler (_aa .HandlerConditionEnumAllOperands ,"",_bg .processOperand );
return _bfb .Process (_dbb );};func _fbce (_bgea []pathSection )rulingList {_gaaeg (_bgea );if _dcdc {_cb .Log .Info ("\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs",len (_bgea ));
};var _afbc rulingList ;for _ ,_bffb :=range _bgea {for _ ,_eedg :=range _bffb ._ccfga {if !_eedg .isQuadrilateral (){if _dcdc {_cb .Log .Error ("!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073",_eedg );};
continue ;};if _aagb ,_fdgbg :=_eedg .makeRectRuling (_bffb .Color );_fdgbg {_afbc =append (_afbc ,_aagb );}else {if _gddf {_cb .Log .Error ("\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073",_eedg );
};};};};if _dcdc {_cb .Log .Info ("\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073",_afbc .String ());};return _afbc ;};func (_fgdca paraList )yNeighbours (_eadd float64 )map[*textPara ][]int {_abgd :=make ([]event ,2*len (_fgdca ));
if _eadd ==0{for _eaba ,_dgcag :=range _fgdca {_abgd [2*_eaba ]=event {_dgcag .Lly ,true ,_eaba };_abgd [2*_eaba +1]=event {_dgcag .Ury ,false ,_eaba };};}else {for _ddec ,_eabf :=range _fgdca {_abgd [2*_ddec ]=event {_eabf .Lly -_eadd *_eabf .fontsize (),true ,_ddec };
_abgd [2*_ddec +1]=event {_eabf .Ury +_eadd *_eabf .fontsize (),false ,_ddec };};};return _fgdca .eventNeighbours (_abgd );};func (_agacf rulingList )comp (_fddf ,_fbbb int )bool {_abga ,_bebag :=_agacf [_fddf ],_agacf [_fbbb ];_gbafd ,_gegbb :=_abga ._bgcf ,_bebag ._bgcf ;
if _gbafd !=_gegbb {return _gbafd > _gegbb ;};if _gbafd ==_ebdfe {return false ;};_gaac :=func (_aedbc bool )bool {if _gbafd ==_ggdg {return _aedbc ;};return !_aedbc ;};_gggdc ,_cbec :=_abga ._acabca ,_bebag ._acabca ;if _gggdc !=_cbec {return _gaac (_gggdc > _cbec );
};_gggdc ,_cbec =_abga ._dbef ,_bebag ._dbef ;if _gggdc !=_cbec {return _gaac (_gggdc < _cbec );};return _gaac (_abga ._efdg < _bebag ._efdg );};func (_cgbb *textTable )depth ()float64 {_fbbagg :=1e10;for _egcd :=0;_egcd < _cgbb ._ggda ;_egcd ++{_fgce :=_cgbb .get (_egcd ,0);
if _fgce ==nil ||_fgce ._caaad {continue ;};_fbbagg =_gb .Min (_fbbagg ,_fgce .depth ());};return _fbbagg ;};func _cgcb (_bceb float64 )bool {return _gb .Abs (_bceb )< _ddfc };func (_cbef *wordBag )depthBand (_fefg ,_dge float64 )[]int {if len (_cbef ._ebae )==0{return nil ;
};return _cbef .depthRange (_cbef .getDepthIdx (_fefg ),_cbef .getDepthIdx (_dge ));};func (_geddee *textTable )putComposite (_ddeec ,_eadcf int ,_cgdc paraList ,_daaa _ge .PdfRectangle ){if len (_cgdc )==0{_cb .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073");
return ;};_cfaa :=compositeCell {PdfRectangle :_daaa ,paraList :_cgdc };if _fcbb {_fg .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_ddeec ,_eadcf ,_cfaa .String ());
};_cfaa .updateBBox ();_geddee ._gefg [_dffgb (_ddeec ,_eadcf )]=_cfaa ;};func _dfb (_ddcg _ce .Point )*subpath {return &subpath {_eeff :[]_ce .Point {_ddcg }}};type textWord struct{_ge .PdfRectangle ;_gaff float64 ;_aebb string ;_daggf []*textMark ;_egdce float64 ;
_agde bool ;};
2021-12-14 01:08:28 +00:00
// String returns a description of `w`.
2022-02-05 21:34:53 +00:00
func (_gfbec *textWord )String ()string {return _fg .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_gfbec ._gaff ,_gfbec .PdfRectangle ,_gfbec ._egdce ,_gfbec ._aebb );
};func _dabc (_fgfce []float64 ,_abde ,_egga float64 )[]float64 {_gabba ,_gegbd :=_abde ,_egga ;if _gegbd < _gabba {_gabba ,_gegbd =_gegbd ,_gabba ;};_bbaec :=make ([]float64 ,0,len (_fgfce )+2);_bbaec =append (_bbaec ,_abde );for _ ,_cfe :=range _fgfce {if _cfe <=_gabba {continue ;
}else if _cfe >=_gegbd {break ;};_bbaec =append (_bbaec ,_cfe );};_bbaec =append (_bbaec ,_egga );return _bbaec ;};func _edda (_ffaee []TextMark ,_cdab *int )[]TextMark {_gedde :=_ffaee [len (_ffaee )-1];_gebdb :=[]rune (_gedde .Text );if len (_gebdb )==1{_ffaee =_ffaee [:len (_ffaee )-1];
_bfbce :=_ffaee [len (_ffaee )-1];*_cdab =_bfbce .Offset +len (_bfbce .Text );}else {_bfaf :=_bbecf (_gedde .Text );*_cdab +=len (_bfaf )-len (_gedde .Text );_gedde .Text =_bfaf ;};return _ffaee ;};func _gdea (_fffc ,_fedb float64 )string {_cdaa :=!_cgcb (_fffc -_fedb );
if _cdaa {return "\u000a";};return "\u0020";};func (_ebag *imageExtractContext )extractFormImages (_ffab *_dd .PdfObjectName ,_aga _aa .GraphicsState ,_bgf *_ge .PdfPageResources )error {_fcc ,_fgd :=_bgf .GetXObjectFormByName (*_ffab );if _fgd !=nil {return _fgd ;
};if _fcc ==nil {return nil ;};_ccb ,_fgd :=_fcc .GetContentStream ();if _fgd !=nil {return _fgd ;};_cga :=_fcc .Resources ;if _cga ==nil {_cga =_bgf ;};_fgd =_ebag .extractContentStreamImages (string (_ccb ),_cga );if _fgd !=nil {return _fgd ;};_ebag ._eb ++;
return nil ;};
// Text returns the extracted page text.
func (_edg PageText )Text ()string {return _edg ._bfbe };func (_cceg gridTiling )log (_begeb string ){if !_afab {return ;};_cb .Log .Info ("\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071",len (_cceg ._gecb ),len (_cceg ._daba ),_begeb );
_fg .Printf ("\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a",_cceg ._gecb );_fg .Printf ("\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a",_cceg ._daba );for _eadf ,_ebeg :=range _cceg ._daba {_fgec ,_egbb :=_cceg ._ecaee [_ebeg ];
if !_egbb {continue ;};_fg .Printf ("%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_eadf ,_ebeg );for _ffgb ,_dgba :=range _cceg ._gecb {_gacg ,_efdff :=_fgec [_dgba ];if !_efdff {continue ;};_fg .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_ffgb ,_gacg .String ());
};};};func _aedf (_acbdc int ,_gged func (int ,int )bool )[]int {_cddeg :=make ([]int ,_acbdc );for _agace :=range _cddeg {_cddeg [_agace ]=_agace ;};_gf .Slice (_cddeg ,func (_gcfe ,_dbfa int )bool {return _gged (_cddeg [_gcfe ],_cddeg [_dbfa ])});return _cddeg ;
};func (_fcgb paraList )merge ()*textPara {_cb .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_fcgb ));
if len (_fcgb )==0{return nil ;};_fcgb .sortReadingOrder ();_fdee :=_fcgb [0].PdfRectangle ;_cgef :=_fcgb [0]._ffcb ;for _ ,_bbgde :=range _fcgb [1:]{_fdee =_gfc (_fdee ,_bbgde .PdfRectangle );_cgef =append (_cgef ,_bbgde ._ffcb ...);};return _eeaeg (_fdee ,_cgef );
};func (_ebda *textPara )isAtom ()*textTable {_cbfgd :=_ebda ;_fggd :=_ebda ._cegg ;_cdgcf :=_ebda ._gbff ;if _fggd .taken ()||_cdgcf .taken (){return nil ;};_bfdgb :=_fggd ._gbff ;if _bfdgb .taken ()||_bfdgb !=_cdgcf ._cegg {return nil ;};return _fade (_cbfgd ,_fggd ,_cdgcf ,_bfdgb );
};const (RenderModeStroke RenderMode =1<<iota ;RenderModeFill ;RenderModeClip ;);type textState struct{_cabf float64 ;_egc float64 ;_bbbc float64 ;_dab float64 ;_ccfg float64 ;_aba RenderMode ;_acaf float64 ;_dgdd *_ge .PdfFont ;_fccd _ge .PdfRectangle ;
_ffga int ;_fddb int ;};func _gaaeg (_adaa []pathSection ){if _afgcd < 0.0{return ;};if _dcdc {_cb .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_adaa ));
};for _cdad ,_cedg :=range _adaa {for _fgde ,_dgac :=range _cedg ._ccfga {for _cddfd ,_gabbg :=range _dgac ._eeff {_dgac ._eeff [_cddfd ]=_ce .Point {X :_cfgcfe (_gabbg .X ),Y :_cfgcfe (_gabbg .Y )};if _dcdc {_cdfb :=_dgac ._eeff [_cddfd ];if !_fcffc (_gabbg ,_cdfb ){_bgbd :=_ce .Point {X :_cdfb .X -_gabbg .X ,Y :_cdfb .Y -_gabbg .Y };
_fg .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_cdad ,_fgde ,_cddfd ,_gabbg ,_cdfb ,_bgbd );};};};};};};func (_dacc *textPara )fontsize ()float64 {return _dacc ._ffcb [0]._dedb };
func (_bbe *shapesState )quadraticTo (_fgfb ,_effd ,_cabe ,_cdcg float64 ){if _dfce {_cb .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a");};_bbe .addPoint (_cabe ,_cdcg );};const _cee =20;var _gdbd =map[rulingKind ]string {_ebdfe :"\u006e\u006f\u006e\u0065",_ggdg :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_agdda :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};
func (_ebcf *shapesState )newSubPath (){_ebcf .clearPath ();if _dfce {_cb .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_ebcf );};};
// Marks returns the TextMark collection for a page. It represents all the text on the page.
func (_cca PageText )Marks ()*TextMarkArray {return &TextMarkArray {_agbde :_cca ._cdd }};func (_ef *imageExtractContext )extractInlineImage (_dbbb *_aa .ContentStreamInlineImage ,_cgb _aa .GraphicsState ,_cbg *_ge .PdfPageResources )error {_cde ,_cge :=_dbbb .ToImage (_cbg );
if _cge !=nil {return _cge ;};_ab ,_cge :=_dbbb .GetColorSpace (_cbg );if _cge !=nil {return _cge ;};if _ab ==nil {_ab =_ge .NewPdfColorspaceDeviceGray ();};_dae ,_cge :=_ab .ImageToRGB (*_cde );if _cge !=nil {return _cge ;};_cad :=ImageMark {Image :&_dae ,Width :_cgb .CTM .ScalingFactorX (),Height :_cgb .CTM .ScalingFactorY (),Angle :_cgb .CTM .Angle ()};
_cad .X ,_cad .Y =_cgb .CTM .Translation ();_ef ._ac =append (_ef ._ac ,_cad );_ef ._db ++;return nil ;};func _bbga (_bege _ge .PdfRectangle )*ruling {return &ruling {_bgcf :_ggdg ,_acabca :_bege .Lly ,_dbef :_bege .Llx ,_efdg :_bege .Urx };};func (_dgda rulingList )log (_dbgd string ){if !_dcdc {return ;
};_cb .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_dbgd ,_dgda .String ());for _bgbc ,_ebfcg :=range _dgda {_fg .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bgbc ,_ebfcg .String ());
};};func (_eccd paraList )addNeighbours (){_ffcee :=func (_eaeac []int ,_acee *textPara )([]*textPara ,[]*textPara ){_baba :=make ([]*textPara ,0,len (_eaeac )-1);_ccegd :=make ([]*textPara ,0,len (_eaeac )-1);for _ ,_abdfbg :=range _eaeac {_egaf :=_eccd [_abdfbg ];
if _egaf .Urx <=_acee .Llx {_baba =append (_baba ,_egaf );}else if _egaf .Llx >=_acee .Urx {_ccegd =append (_ccegd ,_egaf );};};return _baba ,_ccegd ;};_fdagc :=func (_edfg []int ,_ebdag *textPara )([]*textPara ,[]*textPara ){_aecc :=make ([]*textPara ,0,len (_edfg )-1);
_fgda :=make ([]*textPara ,0,len (_edfg )-1);for _ ,_bbfca :=range _edfg {_bbee :=_eccd [_bbfca ];if _bbee .Ury <=_ebdag .Lly {_fgda =append (_fgda ,_bbee );}else if _bbee .Lly >=_ebdag .Ury {_aecc =append (_aecc ,_bbee );};};return _aecc ,_fgda ;};_bgfbd :=_eccd .yNeighbours (_gfce );
for _ ,_cageg :=range _eccd {_gaeab :=_bgfbd [_cageg ];if len (_gaeab )==0{continue ;};_gdfbb ,_geebf :=_ffcee (_gaeab ,_cageg );if len (_gdfbb )==0&&len (_geebf )==0{continue ;};if len (_gdfbb )> 0{_baca :=_gdfbb [0];for _ ,_gdggf :=range _gdfbb [1:]{if _gdggf .Urx >=_baca .Urx {_baca =_gdggf ;
};};for _ ,_edbg :=range _gdfbb {if _edbg !=_baca &&_edbg .Urx > _baca .Llx {_baca =nil ;break ;};};if _baca !=nil &&_gabad (_cageg .PdfRectangle ,_baca .PdfRectangle ){_cageg ._cbfa =_baca ;};};if len (_geebf )> 0{_ffdfff :=_geebf [0];for _ ,_cdgfd :=range _geebf [1:]{if _cdgfd .Llx <=_ffdfff .Llx {_ffdfff =_cdgfd ;
};};for _ ,_bddfe :=range _geebf {if _bddfe !=_ffdfff &&_bddfe .Llx < _ffdfff .Urx {_ffdfff =nil ;break ;};};if _ffdfff !=nil &&_gabad (_cageg .PdfRectangle ,_ffdfff .PdfRectangle ){_cageg ._cegg =_ffdfff ;};};};_bgfbd =_eccd .xNeighbours (_becce );for _ ,_fedgb :=range _eccd {_dffdf :=_bgfbd [_fedgb ];
if len (_dffdf )==0{continue ;};_dccb ,_gfeb :=_fdagc (_dffdf ,_fedgb );if len (_dccb )==0&&len (_gfeb )==0{continue ;};if len (_gfeb )> 0{_eage :=_gfeb [0];for _ ,_adfe :=range _gfeb [1:]{if _adfe .Ury >=_eage .Ury {_eage =_adfe ;};};for _ ,_agfd :=range _gfeb {if _agfd !=_eage &&_agfd .Ury > _eage .Lly {_eage =nil ;
break ;};};if _eage !=nil &&_daeaa (_fedgb .PdfRectangle ,_eage .PdfRectangle ){_fedgb ._gbff =_eage ;};};if len (_dccb )> 0{_gccc :=_dccb [0];for _ ,_cbcfg :=range _dccb [1:]{if _cbcfg .Lly <=_gccc .Lly {_gccc =_cbcfg ;};};for _ ,_cbbfff :=range _dccb {if _cbbfff !=_gccc &&_cbbfff .Lly < _gccc .Ury {_gccc =nil ;
break ;};};if _gccc !=nil &&_daeaa (_fedgb .PdfRectangle ,_gccc .PdfRectangle ){_fedgb ._bdaa =_gccc ;};};};for _ ,_ccebb :=range _eccd {if _ccebb ._cbfa !=nil &&_ccebb ._cbfa ._cegg !=_ccebb {_ccebb ._cbfa =nil ;};if _ccebb ._bdaa !=nil &&_ccebb ._bdaa ._gbff !=_ccebb {_ccebb ._bdaa =nil ;
};if _ccebb ._cegg !=nil &&_ccebb ._cegg ._cbfa !=_ccebb {_ccebb ._cegg =nil ;};if _ccebb ._gbff !=nil &&_ccebb ._gbff ._bdaa !=_ccebb {_ccebb ._gbff =nil ;};};};
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
func (_ffeff *TextMarkArray )BBox ()(_ge .PdfRectangle ,bool ){var _ceff _ge .PdfRectangle ;_bad :=false ;for _ ,_aeaff :=range _ffeff ._agbde {if _aeaff .Meta ||_dgcd (_aeaff .Text ){continue ;};if _bad {_ceff =_gfc (_ceff ,_aeaff .BBox );}else {_ceff =_aeaff .BBox ;
_bad =true ;};};return _ceff ,_bad ;};type event struct{_fdfcg float64 ;_dgdbf bool ;_ccga int ;};func (_fedf *textObject )getFillColor ()_fd .Color {return _egcg (_fedf ._afff .ColorspaceNonStroking ,_fedf ._afff .ColorNonStroking );};func (_deeb rulingList )toGrids ()[]rulingList {if _dcdc {_cb .Log .Info ("t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073",_deeb );
};_afcc :=_deeb .intersections ();if _dcdc {_cb .Log .Info ("\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020",len (_deeb ),len (_afcc ));
for _ ,_cacb :=range _efbdd (_afcc ){_fg .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_cacb ,_afcc [_cacb ]);};};_eaef :=make (map[int ]intSet ,len (_deeb ));for _dcdcca :=range _deeb {_gdfae :=_deeb .connections (_afcc ,_dcdcca );if len (_gdfae )> 0{_eaef [_dcdcca ]=_gdfae ;
};};if _dcdc {_cb .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064",len (_eaef ));for _ ,_cecg :=range _efbdd (_eaef ){_fg .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_cecg ,_eaef [_cecg ]);
};};_bggb :=_aedf (len (_deeb ),func (_gade ,_defb int )bool {_aaae ,_gbdac :=len (_eaef [_gade ]),len (_eaef [_defb ]);if _aaae !=_gbdac {return _aaae > _gbdac ;};return _deeb .comp (_gade ,_defb );});if _dcdc {_cb .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_bggb );
};_efef :=[][]int {{_bggb [0]}};_fafg :for _ ,_caccc :=range _bggb [1:]{for _gce ,_fdef :=range _efef {for _ ,_afaf :=range _fdef {if _eaef [_afaf ].has (_caccc ){_efef [_gce ]=append (_fdef ,_caccc );continue _fafg ;};};};_efef =append (_efef ,[]int {_caccc });
};if _dcdc {_cb .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_efef );};_gf .SliceStable (_efef ,func (_daggbc ,_dage int )bool {return len (_efef [_daggbc ])> len (_efef [_dage ])});for _ ,_gefb :=range _efef {_gf .Slice (_gefb ,func (_gdef ,_ffdb int )bool {return _deeb .comp (_gefb [_gdef ],_gefb [_ffdb ])});
};_gbegf :=make ([]rulingList ,len (_efef ));for _gdfe ,_gggb :=range _efef {_dgdae :=make (rulingList ,len (_gggb ));for _cdcae ,_bgcc :=range _gggb {_dgdae [_cdcae ]=_deeb [_bgcc ];};_gbegf [_gdfe ]=_dgdae ;};if _dcdc {_cb .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076",_gbegf );
};var _bbbgb []rulingList ;for _ ,_bacc :=range _gbegf {if _gdbc ,_dfeg :=_bacc .isActualGrid ();_dfeg {_bacc =_gdbc ;_bacc =_bacc .snapToGroups ();_bbbgb =append (_bbbgb ,_bacc );};};if _dcdc {_adffc ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073",_bbbgb );
_cb .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_gbegf ),len (_bbbgb ));};return _bbbgb ;};func _geea (_dbbc ,_dffec int )int {if _dbbc < _dffec {return _dbbc ;
};return _dffec ;};type subpath struct{_eeff []_ce .Point ;_agaa bool ;};type imageExtractContext struct{_ac []ImageMark ;_db int ;_da int ;_eb int ;_cgc map[*_dd .PdfObjectStream ]*cachedImage ;_fec *ImageExtractOptions ;};func (_cgac *textObject )moveText (_dba ,_bee float64 ){_cgac .moveLP (_dba ,_bee )};
2021-12-14 01:08:28 +00:00
// String returns a human readable description of `path`.
2022-02-05 21:34:53 +00:00
func (_geaac *subpath )String ()string {_dfc :=_geaac ._eeff ;_afcdc :=len (_dfc );if _afcdc <=5{return _fg .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_afcdc ,_dfc );};return _fg .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_afcdc ,_dfc [0],_dfc [1],_dfc [_afcdc -1]);
};func (_agd *textObject )checkOp (_gdg *_aa .ContentStreamOperation ,_edcc int ,_gcag bool )(_ffd bool ,_gabc error ){if _agd ==nil {var _cbgc []_dd .PdfObject ;if _edcc > 0{_cbgc =_gdg .Params ;if len (_cbgc )> _edcc {_cbgc =_cbgc [:_edcc ];};};_cb .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_gdg .Operand ,_cbgc );
};if _edcc >=0{if len (_gdg .Params )!=_edcc {if _gcag {_gabc =_cc .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");};_cb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_gdg .Operand ,_edcc ,len (_gdg .Params ),_gdg .Params );
return false ,_gabc ;};};return true ,nil ;};func (_bdec rulingList )blocks (_bebgg ,_dfegd *ruling )bool {if _bebgg ._dbef > _dfegd ._efdg ||_dfegd ._dbef > _bebgg ._efdg {return false ;};_caaf :=_gb .Max (_bebgg ._dbef ,_dfegd ._dbef );_cgcc :=_gb .Min (_bebgg ._efdg ,_dfegd ._efdg );
if _bebgg ._acabca > _dfegd ._acabca {_bebgg ,_dfegd =_dfegd ,_bebgg ;};for _ ,_dfac :=range _bdec {if _bebgg ._acabca <=_dfac ._acabca +_gfge &&_dfac ._acabca <=_dfegd ._acabca +_gfge &&_dfac ._dbef <=_cgcc &&_caaf <=_dfac ._efdg {return true ;};};return false ;
};func (_ebec *textObject )newTextMark (_fcec string ,_bebd _ce .Matrix ,_eccb _ce .Point ,_ebagcd float64 ,_bfdc *_ge .PdfFont ,_deec float64 ,_ccgbf ,_bffe _fd .Color )(textMark ,bool ){_eede :=_bebd .Angle ();_cebe :=_bcee (_eede ,_febb );var _gabec float64 ;
if _cebe %180!=90{_gabec =_bebd .ScalingFactorY ();}else {_gabec =_bebd .ScalingFactorX ();};_caeg :=_febf (_bebd );_adfac :=_ge .PdfRectangle {Llx :_caeg .X ,Lly :_caeg .Y ,Urx :_eccb .X ,Ury :_eccb .Y };switch _cebe %360{case 90:_adfac .Urx -=_gabec ;
case 180:_adfac .Ury -=_gabec ;case 270:_adfac .Urx +=_gabec ;case 0:_adfac .Ury +=_gabec ;default:_cebe =0;_adfac .Ury +=_gabec ;};if _adfac .Llx > _adfac .Urx {_adfac .Llx ,_adfac .Urx =_adfac .Urx ,_adfac .Llx ;};if _adfac .Lly > _adfac .Ury {_adfac .Lly ,_adfac .Ury =_adfac .Ury ,_adfac .Lly ;
};_aab :=true ;if _ebec ._cgbd ._ca .Width ()> 0{_gddfa ,_bebf :=_cddb (_adfac ,_ebec ._cgbd ._ca );if !_bebf {_aab =false ;_cb .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_adfac ,_ebec ._cgbd ._ca ,_fcec );
};_adfac =_gddfa ;};_gee :=_adfac ;_eabg :=_ebec ._cgbd ._ca ;switch _cebe %360{case 90:_eabg .Urx ,_eabg .Ury =_eabg .Ury ,_eabg .Urx ;_gee =_ge .PdfRectangle {Llx :_eabg .Urx -_adfac .Ury ,Urx :_eabg .Urx -_adfac .Lly ,Lly :_adfac .Llx ,Ury :_adfac .Urx };
case 180:_gee =_ge .PdfRectangle {Llx :_eabg .Urx -_adfac .Llx ,Urx :_eabg .Urx -_adfac .Urx ,Lly :_eabg .Ury -_adfac .Lly ,Ury :_eabg .Ury -_adfac .Ury };case 270:_eabg .Urx ,_eabg .Ury =_eabg .Ury ,_eabg .Urx ;_gee =_ge .PdfRectangle {Llx :_adfac .Ury ,Urx :_adfac .Lly ,Lly :_eabg .Ury -_adfac .Llx ,Ury :_eabg .Ury -_adfac .Urx };
};if _gee .Llx > _gee .Urx {_gee .Llx ,_gee .Urx =_gee .Urx ,_gee .Llx ;};if _gee .Lly > _gee .Ury {_gee .Lly ,_gee .Ury =_gee .Ury ,_gee .Lly ;};_aacc :=textMark {_gbf :_fcec ,PdfRectangle :_gee ,_gdfb :_adfac ,_gggd :_bfdc ,_adcd :_gabec ,_cebf :_deec ,_cda :_bebd ,_egfb :_eccb ,_dgcf :_cebe ,_cdcf :_ccgbf ,_ceee :_bffe };
if _fdcf {_cb .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_caeg ,_eccb ,_aacc .String ());};return _aacc ,_aab ;
};
// String returns a description of `k`.
func (_gegd rulingKind )String ()string {_geba ,_gegg :=_gdbd [_gegd ];if !_gegg {return _fg .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_gegd );};return _geba ;};func _gffe (_cffgb *wordBag ,_cadc int )*textLine {_ecg :=_cffgb .firstWord (_cadc );
_eegg :=textLine {PdfRectangle :_ecg .PdfRectangle ,_dedb :_ecg ._egdce ,_dgce :_ecg ._gaff };_eegg .pullWord (_cffgb ,_ecg ,_cadc );return &_eegg ;};func _bbecf (_abd string )string {_adde :=[]rune (_abd );return string (_adde [:len (_adde )-1])};func _edafe (_bbd bounded )float64 {return -_bbd .bbox ().Lly };
func (_bcfd rulingList )primaries ()[]float64 {_cagca :=make (map[float64 ]struct{},len (_bcfd ));for _ ,_efac :=range _bcfd {_cagca [_efac ._acabca ]=struct{}{};};_dadd :=make ([]float64 ,len (_cagca ));_aaade :=0;for _acff :=range _cagca {_dadd [_aaade ]=_acff ;
_aaade ++;};_gf .Float64s (_dadd );return _dadd ;};func (_ecfd paraList )findTextTables ()[]*textTable {var _abcfb []*textTable ;for _ ,_ecgd :=range _ecfd {if _ecgd .taken ()||_ecgd .Width ()==0{continue ;};_eade :=_ecgd .isAtom ();if _eade ==nil {continue ;
};_eade .growTable ();if _eade ._ggda *_eade ._accb < _adbg {continue ;};_eade .markCells ();_eade .log ("\u0067\u0072\u006fw\u006e");_abcfb =append (_abcfb ,_eade );};return _abcfb ;};func (_dcbf *wordBag )arrangeText ()*textPara {_dcbf .sort ();if _fged {_dcbf .removeDuplicates ();
};var _efee []*textLine ;for _ ,_abfb :=range _dcbf .depthIndexes (){for !_dcbf .empty (_abfb ){_cedb :=_dcbf .firstReadingIndex (_abfb );_dcbd :=_dcbf .firstWord (_cedb );_gdgfb :=_gffe (_dcbf ,_cedb );_cdcda :=_dcbd ._egdce ;_ebfc :=_dcbd ._gaff -_faad *_cdcda ;
_afge :=_dcbd ._gaff +_faad *_cdcda ;_gcdg :=_cfgcf *_cdcda ;_fefb :=_cgfd *_cdcda ;_bdce :for {var _dgaf *textWord ;_bcge :=0;for _ ,_fad :=range _dcbf .depthBand (_ebfc ,_afge ){_gfbb :=_dcbf .highestWord (_fad ,_ebfc ,_afge );if _gfbb ==nil {continue ;
};_cabgc :=_egdc (_gfbb ,_gdgfb ._badf [len (_gdgfb ._badf )-1]);if _cabgc < -_fefb {break _bdce ;};if _cabgc > _gcdg {continue ;};if _dgaf !=nil &&_cffbf (_gfbb ,_dgaf )>=0{continue ;};_dgaf =_gfbb ;_bcge =_fad ;};if _dgaf ==nil {break ;};_gdgfb .pullWord (_dcbf ,_dgaf ,_bcge );
};_gdgfb .markWordBoundaries ();_efee =append (_efee ,_gdgfb );};};if len (_efee )==0{return nil ;};_gf .Slice (_efee ,func (_gdfca ,_dccc int )bool {return _bbad (_efee [_gdfca ],_efee [_dccc ])< 0});_dbgga :=_eeaeg (_dcbf .PdfRectangle ,_efee );if _bcfc {_cb .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_dbgga .String ());
if _ccgb {for _dbbg ,_bece :=range _dbgga ._ffcb {_fg .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dbbg ,_bece .String ());if _dbgg {for _fagab ,_dagda :=range _bece ._badf {_fg .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_fagab ,_dagda .String ());
for _bgac ,_debag :=range _dagda ._daggf {_fg .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_bgac ,_debag .String ());};};};};};};return _dbgga ;};
// String returns a string describing `ma`.
func (_bca TextMarkArray )String ()string {_bdd :=len (_bca ._agbde );if _bdd ==0{return "\u0045\u004d\u0050T\u0059";};_bfbg :=_bca ._agbde [0];_cddg :=_bca ._agbde [_bdd -1];return _fg .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_bdd ,_bfbg ,_cddg );
};func (_gecf *wordBag )allWords ()[]*textWord {var _ecc []*textWord ;for _ ,_gbc :=range _gecf ._ebae {_ecc =append (_ecc ,_gbc ...);};return _ecc ;};func (_cgcae rulingList )toTilings ()(rulingList ,[]gridTiling ){_cgcae .log ("\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s");
if len (_cgcae )==0{return nil ,nil ;};_cgcae =_cgcae .tidied ("\u0061\u006c\u006c");_cgcae .log ("\u0074\u0069\u0064\u0069\u0065\u0064");_gagc :=_cgcae .toGrids ();_dagf :=make ([]gridTiling ,len (_gagc ));for _effdc ,_fafdf :=range _gagc {_dagf [_effdc ]=_fafdf .asTiling ();
};return _cgcae ,_dagf ;};func (_fbad compositeCell )parasBBox ()(paraList ,_ge .PdfRectangle ){return _fbad .paraList ,_fbad .PdfRectangle ;};func _cddb (_eggf ,_cbgcf _ge .PdfRectangle )(_ge .PdfRectangle ,bool ){if !_ace (_eggf ,_cbgcf ){return _ge .PdfRectangle {},false ;
};return _ge .PdfRectangle {Llx :_gb .Max (_eggf .Llx ,_cbgcf .Llx ),Urx :_gb .Min (_eggf .Urx ,_cbgcf .Urx ),Lly :_gb .Max (_eggf .Lly ,_cbgcf .Lly ),Ury :_gb .Min (_eggf .Ury ,_cbgcf .Ury )},true ;};func _cfcgd (_dfed int ,_gbgga map[int ][]float64 )([]int ,int ){_ebbe :=make ([]int ,_dfed );
_bcgag :=0;for _bbcge :=0;_bbcge < _dfed ;_bbcge ++{_ebbe [_bbcge ]=_bcgag ;_bcgag +=len (_gbgga [_bbcge ])+1;};return _ebbe ,_bcgag ;};func (_aaadc gridTile )complete ()bool {return _aaadc .numBorders ()==4};func (_faea *textObject )renderText (_ebdb []byte )error {if _faea ._gcaa {_cb .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");
return nil ;};_gedf :=_faea .getCurrentFont ();_bdf :=_gedf .BytesToCharcodes (_ebdb );_beb ,_gag ,_gegb :=_gedf .CharcodesToStrings (_bdf );if _gegb > 0{_cb .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_gag ,_gegb );
};_faea ._agff ._ffga +=_gag ;_faea ._agff ._fddb +=_gegb ;_gaea :=_faea ._agff ;_dcd :=_gaea ._ccfg ;_gfe :=_gaea ._bbbc /100.0;_feg :=_fdabd ;if _gedf .Subtype ()=="\u0054\u0079\u0070e\u0033"{_feg =1;};_cbaa ,_fab :=_gedf .GetRuneMetrics (' ');if !_fab {_cbaa ,_fab =_gedf .GetCharMetrics (32);
};if !_fab {_cbaa ,_ =_ge .DefaultFont ().GetRuneMetrics (' ');};_ffae :=_cbaa .Wx *_feg ;_cb .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_ffae ,_beb ,_gedf ,_dcd );
_feab :=_ce .NewMatrix (_dcd *_gfe ,0,0,_dcd ,0,_gaea ._acaf );if _bcea {_cb .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_bdf ),_bdf ,_beb );
};_cb .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_bdf ),_bdf ,len (_beb ));_cceb :=_faea .getFillColor ();
_dddf :=_faea .getStrokeColor ();for _bag ,_ccfb :=range _beb {_fbe :=[]rune (_ccfb );if len (_fbe )==1&&_fbe [0]=='\x00'{continue ;};_acfd :=_bdf [_bag ];_beagf :=_faea ._afff .CTM .Mult (_faea ._dddc ).Mult (_feab );_fag :=0.0;if len (_fbe )==1&&_fbe [0]==32{_fag =_gaea ._egc ;
};_aae ,_bgfa :=_gedf .GetCharMetrics (_acfd );if !_bgfa {_cb .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_acfd ,_fbe ,_fbe ,_gedf );
return _fg .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_gedf .String (),_acfd );};_edb :=_ce .Point {X :_aae .Wx *_feg ,Y :_aae .Wy *_feg };
_edd :=_ce .Point {X :(_edb .X *_dcd +_fag )*_gfe };_daa :=_ce .Point {X :(_edb .X *_dcd +_gaea ._cabf +_fag )*_gfe };if _bcea {_cb .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_dcd ,_gaea ._cabf ,_gaea ._egc ,_gfe );
_cb .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_edb ,_edd ,_daa );};_caaa :=_abg (_edd );_ggd :=_abg (_daa );_ccbdb :=_faea ._afff .CTM .Mult (_faea ._dddc ).Mult (_caaa );
if _cbgdf {_cb .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_faea ._afff .CTM ,_faea ._dddc ,_ggd ,_febf (_faea ._afff .CTM .Mult (_faea ._dddc ).Mult (_ggd )),_caaa ,_ccbdb ,_febf (_ccbdb ));
};_gada ,_deab :=_faea .newTextMark (_fda .ExpandLigatures (_fbe ),_beagf ,_febf (_ccbdb ),_gb .Abs (_ffae *_beagf .ScalingFactorX ()),_gedf ,_faea ._agff ._cabf ,_cceb ,_dddf );if !_deab {_cb .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067");
continue ;};if _gedf ==nil {_cb .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _gedf .Encoder ()==nil {_cb .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_gedf );
}else {if _gagg ,_cdb :=_gedf .Encoder ().CharcodeToRune (_acfd );_cdb {_gada ._dcdbd =string (_gagg );};};_cb .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_bag ,_acfd ,_gada ,_beagf );
_faea ._cdca =append (_faea ._cdca ,&_gada );_faea ._dddc .Concat (_ggd );};return nil ;};func (_geac *textObject )setWordSpacing (_dgg float64 ){if _geac ==nil {return ;};_geac ._agff ._egc =_dgg ;};func (_eeef paraList )sortReadingOrder (){_cb .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_eeef ));
if len (_eeef )<=1{return ;};_eeef .computeEBBoxes ();_gf .Slice (_eeef ,func (_bbec ,_bbade int )bool {return _bbad (_eeef [_bbec ],_eeef [_bbade ])<=0});_fggg :=_eeef .topoOrder ();_eeef .reorder (_fggg );};func (_ebe *textLine )endsInHyphen ()bool {_bgab :=_ebe ._badf [len (_ebe ._badf )-1];
_cbda :=_bgab ._aebb ;_dfda ,_acabc :=_d .DecodeLastRuneInString (_cbda );if _acabc <=0||!_f .Is (_f .Hyphen ,_dfda ){return false ;};if _bgab ._agde &&_dfebc (_cbda ){return true ;};return _dfebc (_ebe .text ());};func (_badfg *textLine )appendWord (_bbdg *textWord ){_badfg ._badf =append (_badfg ._badf ,_bbdg );
_badfg .PdfRectangle =_gfc (_badfg .PdfRectangle ,_bbdg .PdfRectangle );if _bbdg ._egdce > _badfg ._dedb {_badfg ._dedb =_bbdg ._egdce ;};if _bbdg ._gaff > _badfg ._dgce {_badfg ._dgce =_bbdg ._gaff ;};};func _ccdedg (_edabc map[int ][]float64 )[]int {_beae :=make ([]int ,len (_edabc ));
_fedeb :=0;for _fgebd :=range _edabc {_beae [_fedeb ]=_fgebd ;_fedeb ++;};_gf .Ints (_beae );return _beae ;};func _gge (_bdfg []TextMark ,_gbde *int ,_eagb TextMark )[]TextMark {_eagb .Offset =*_gbde ;_bdfg =append (_bdfg ,_eagb );*_gbde +=len (_eagb .Text );
return _bdfg ;};func (_dffb paraList )findTables (_afbd []gridTiling )[]*textTable {_dffb .addNeighbours ();_gf .Slice (_dffb ,func (_bbge ,_bdece int )bool {return _cefd (_dffb [_bbge ],_dffb [_bdece ])< 0});var _gebe []*textTable ;if _cfca {_eaed :=_dffb .findGridTables (_afbd );
_gebe =append (_gebe ,_eaed ...);};if _fgfe {_bfdgd :=_dffb .findTextTables ();_gebe =append (_gebe ,_bfdgd ...);};return _gebe ;};type paraList []*textPara ;func (_effg rulingList )connections (_gfac map[int ]intSet ,_dceb int )intSet {_gafe :=make (intSet );
_bcgad :=make (intSet );var _afdd func (int );_afdd =func (_faeg int ){if !_bcgad .has (_faeg ){_bcgad .add (_faeg );for _ggc :=range _effg {if _gfac [_ggc ].has (_faeg ){_gafe .add (_ggc );};};for _ecae :=range _effg {if _gafe .has (_ecae ){_afdd (_ecae );
};};};};_afdd (_dceb );return _gafe ;};func _fcg (_bfgc *wordBag ,_dcga float64 ,_gfda ,_gaae rulingList )[]*wordBag {var _bddbe []*wordBag ;for _ ,_deedd :=range _bfgc .depthIndexes (){_ccbc :=false ;for !_bfgc .empty (_deedd ){_gfef :=_bfgc .firstReadingIndex (_deedd );
_gfede :=_bfgc .firstWord (_gfef );_bfbc :=_gabe (_gfede ,_dcga ,_gfda ,_gaae );_bfgc .removeWord (_gfede ,_gfef );if _dagb {_cb .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_gfede .String ());
};for _edgg :=true ;_edgg ;_edgg =_ccbc {_ccbc =false ;_dgad :=_afaaa *_bfbc ._adcc ;_fbcc :=_eecd *_bfbc ._adcc ;_gadcb :=_fcda *_bfbc ._adcc ;if _dagb {_cb .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_bfbc .minDepth (),_bfbc .maxDepth (),_gadcb ,_fbcc );
};if _bfgc .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_bfbc ,_ggf (_fcac ,0),_bfbc .minDepth ()-_gadcb ,_bfbc .maxDepth ()+_gadcb ,_fffa ,false ,false )> 0{_ccbc =true ;};if _bfgc .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_bfbc ,_ggf (_fcac ,_fbcc ),_bfbc .minDepth (),_bfbc .maxDepth (),_fabg ,false ,false )> 0{_ccbc =true ;
};if _ccbc {continue ;};_ecad :=_bfgc .scanBand ("",_bfbc ,_ggf (_acfe ,_dgad ),_bfbc .minDepth (),_bfbc .maxDepth (),_bgdb ,true ,false );if _ecad > 0{_aecac :=(_bfbc .maxDepth ()-_bfbc .minDepth ())/_bfbc ._adcc ;if (_ecad > 1&&float64 (_ecad )> 0.3*_aecac )||_ecad <=10{if _bfgc .scanBand ("\u006f\u0074\u0068e\u0072",_bfbc ,_ggf (_acfe ,_dgad ),_bfbc .minDepth (),_bfbc .maxDepth (),_bgdb ,false ,true )> 0{_ccbc =true ;
};};};};_bddbe =append (_bddbe ,_bfbc );};};return _bddbe ;};func (_dgdf *wordBag )removeDuplicates (){if _cccfa {_cb .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_dgdf .text ());};for _ ,_ecgb :=range _dgdf .depthIndexes (){if len (_dgdf ._ebae [_ecgb ])==0{continue ;
};_fecf :=_dgdf ._ebae [_ecgb ][0];_eeac :=_dda *_fecf ._egdce ;_bbcg :=_fecf ._gaff ;for _ ,_ecgbc :=range _dgdf .depthBand (_bbcg ,_bbcg +_eeac ){_ggeg :=map[*textWord ]struct{}{};_cfbd :=_dgdf ._ebae [_ecgbc ];for _ ,_eebf :=range _cfbd {if _ ,_agfcc :=_ggeg [_eebf ];
_agfcc {continue ;};for _ ,_ccgf :=range _cfbd {if _ ,_aced :=_ggeg [_ccgf ];_aced {continue ;};if _ccgf !=_eebf &&_ccgf ._aebb ==_eebf ._aebb &&_gb .Abs (_ccgf .Llx -_eebf .Llx )< _eeac &&_gb .Abs (_ccgf .Urx -_eebf .Urx )< _eeac &&_gb .Abs (_ccgf .Lly -_eebf .Lly )< _eeac &&_gb .Abs (_ccgf .Ury -_eebf .Ury )< _eeac {_ggeg [_ccgf ]=struct{}{};
};};};if len (_ggeg )> 0{_bdca :=0;for _ ,_cdbd :=range _cfbd {if _ ,_debb :=_ggeg [_cdbd ];!_debb {_cfbd [_bdca ]=_cdbd ;_bdca ++;};};_dgdf ._ebae [_ecgbc ]=_cfbd [:len (_cfbd )-len (_ggeg )];if len (_dgdf ._ebae [_ecgbc ])==0{delete (_dgdf ._ebae ,_ecgbc );
};};};};};func (_fgea *shapesState )moveTo (_efdf ,_gbda float64 ){_fgea ._faec =true ;_fgea ._afcbb =_fgea .devicePoint (_efdf ,_gbda );if _dfce {_cb .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_efdf ,_gbda ,_fgea ._afcbb );
};};func _fade (_fffcd ,_abbf ,_daf ,_aecae *textPara )*textTable {_dbba :=&textTable {_ggda :2,_accb :2,_fedcd :make (map[uint64 ]*textPara ,4)};_dbba .put (0,0,_fffcd );_dbba .put (1,0,_abbf );_dbba .put (0,1,_daf );_dbba .put (1,1,_aecae );return _dbba ;
};func _daeb (_acbf map[int ][]float64 )string {_abbg :=_ccdedg (_acbf );_aagbf :=make ([]string ,len (_acbf ));for _eabbc ,_aeed :=range _abbg {_aagbf [_eabbc ]=_fg .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_aeed ,_acbf [_aeed ]);};
return _fg .Sprintf ("\u007b\u0025\u0073\u007d",_g .Join (_aagbf ,"\u002c\u0020"));};func _cagd (_feae *_aa .ContentStreamOperation )(float64 ,error ){if len (_feae .Params )!=1{_caa :=_cc .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");
_cb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_feae .Operand ,1,len (_feae .Params ),_feae .Params );
return 0.0,_caa ;};return _dd .GetNumberAsFloat (_feae .Params [0]);};func (_efc *textObject )setCharSpacing (_gg float64 ){if _efc ==nil {return ;};_efc ._agff ._cabf =_gg ;if _bcea {_cb .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_gg ,_efc ._agff .String ());
};};func (_bddc *textTable )logComposite (_gfbf string ){if !_fcbb {return ;};_cb .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_bddc ._ggda ,_bddc ._accb ,_gfbf );_fg .Printf ("\u0025\u0035\u0073 \u007c","");
for _bdge :=0;_bdge < _bddc ._ggda ;_bdge ++{_fg .Printf ("\u0025\u0033\u0064 \u007c",_bdge );};_fg .Println ("");_fg .Printf ("\u0025\u0035\u0073 \u002b","");for _acgbf :=0;_acgbf < _bddc ._ggda ;_acgbf ++{_fg .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d");
};_fg .Println ("");for _efad :=0;_efad < _bddc ._accb ;_efad ++{_fg .Printf ("\u0025\u0035\u0064 \u007c",_efad );for _geef :=0;_geef < _bddc ._ggda ;_geef ++{_ggfb ,_ :=_bddc ._gefg [_dffgb (_geef ,_efad )].parasBBox ();_fg .Printf ("\u0025\u0033\u0064 \u007c",len (_ggfb ));
};_fg .Println ("");};_cb .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_bddc ._ggda ,_bddc ._accb ,_gfbf );_fg .Printf ("\u0025\u0035\u0073 \u007c","");for _egcb :=0;_egcb < _bddc ._ggda ;_egcb ++{_fg .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_egcb );
};_fg .Println ("");_fg .Printf ("\u0025\u0035\u0073 \u002b","");for _dcca :=0;_dcca < _bddc ._ggda ;_dcca ++{_fg .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");};_fg .Println ("");for _baag :=0;_baag < _bddc ._accb ;
_baag ++{_fg .Printf ("\u0025\u0035\u0064 \u007c",_baag );for _bfdgc :=0;_bfdgc < _bddc ._ggda ;_bfdgc ++{_ccccd ,_ :=_bddc ._gefg [_dffgb (_bfdgc ,_baag )].parasBBox ();_dagfg :="";_cbbdg :=_ccccd .merge ();if _cbbdg !=nil {_dagfg =_cbbdg .text ();};_dagfg =_fg .Sprintf ("\u0025\u0071",_geabcf (_dagfg ,12));
_dagfg =_dagfg [1:len (_dagfg )-1];_fg .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_dagfg );};_fg .Println ("");};};func _geeg (_add []*textMark ,_efddf _ge .PdfRectangle ,_edgb rulingList ,_efdb []gridTiling )paraList {_cb .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_add ),_efddf );
if len (_add )==0{return nil ;};_afbb :=_aged (_add ,_efddf );if len (_afbb )==0{return nil ;};_edgb .log ("\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065");_bbbg ,_ffce :=_edgb .vertsHorzs ();_cfgdd :=_dde (_afbb ,_efddf .Ury ,_bbbg ,_ffce );
_ecdf :=_fcg (_cfgdd ,_efddf .Ury ,_bbbg ,_ffce );_ecdf =_acabf (_ecdf );_gbgf :=make (paraList ,0,len (_ecdf ));for _ ,_bdgc :=range _ecdf {_baea :=_bdgc .arrangeText ();if _baea !=nil {_gbgf =append (_gbgf ,_baea );};};if len (_gbgf )>=_adbg {_gbgf =_gbgf .extractTables (_efdb );
};_gbgf .sortReadingOrder ();_gbgf .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _gbgf ;};func (_fbba *wordBag )removeWord (_aaeb *textWord ,_cggc int ){_afdg :=_fbba ._ebae [_cggc ];
_afdg =_gfacg (_afdg ,_aaeb );if len (_afdg )==0{delete (_fbba ._ebae ,_cggc );}else {_fbba ._ebae [_cggc ]=_afdg ;};};func (_bdea compositeCell )String ()string {_fgdb :="";if len (_bdea .paraList )> 0{_fgdb =_geabcf (_bdea .paraList .merge ().text (),50);
};return _fg .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_bdea .PdfRectangle ,len (_bdea .paraList ),_fgdb );};func _cdba (_abbd []rulingList )(rulingList ,rulingList ){var _afed rulingList ;
for _ ,_gdcfc :=range _abbd {_afed =append (_afed ,_gdcfc ...);};return _afed .vertsHorzs ();};func _gdde (_eadg []int )[]int {_geaf :=make ([]int ,len (_eadg ));for _bfgcc ,_deccd :=range _eadg {_geaf [len (_eadg )-1-_bfgcc ]=_deccd ;};return _geaf ;};
func (_aagf rulingList )removeDuplicates ()rulingList {if len (_aagf )==0{return nil ;};_aagf .sort ();_acbg :=rulingList {_aagf [0]};for _ ,_gdff :=range _aagf [1:]{if _gdff .equals (_acbg [len (_acbg )-1]){continue ;};_acbg =append (_acbg ,_gdff );};
return _acbg ;};func (_bafgc *textWord )computeText ()string {_faddd :=make ([]string ,len (_bafgc ._daggf ));for _gdcd ,_abbdc :=range _bafgc ._daggf {_faddd [_gdcd ]=_abbdc ._gbf ;};return _g .Join (_faddd ,"");};func (_egdgf *textWord )addDiacritic (_bggc string ){_fdgg :=_egdgf ._daggf [len (_egdgf ._daggf )-1];
_fdgg ._gbf +=_bggc ;_fdgg ._gbf =_be .NFKC .String (_fdgg ._gbf );};func (_aeg *textObject )getFontDirect (_bfg string )(*_ge .PdfFont ,error ){_afcd ,_cacd :=_aeg .getFontDict (_bfg );if _cacd !=nil {return nil ,_cacd ;};_gfed ,_cacd :=_ge .NewPdfFontFromPdfObject (_afcd );
if _cacd !=nil {_cb .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bfg ,_cacd );
};return _gfed ,_cacd ;};func _aged (_adag []*textMark ,_agbfb _ge .PdfRectangle )[]*textWord {var _fgebc []*textWord ;var _fcdc *textWord ;if _fdcf {_cb .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_adag ));
};_fdadc :=func (){if _fcdc !=nil {_eegf :=_fcdc .computeText ();if !_dgcd (_eegf ){_fcdc ._aebb =_eegf ;_fgebc =append (_fgebc ,_fcdc );if _fdcf {_cb .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_fgebc )-1,_fcdc .String ());
for _efgd ,_bafef :=range _fcdc ._daggf {_fg .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_efgd ,_bafef .String ());};};};_fcdc =nil ;};};for _ ,_dcebf :=range _adag {if _fbfb &&_fcdc !=nil &&len (_fcdc ._daggf )> 0{_fabfc :=_fcdc ._daggf [len (_fcdc ._daggf )-1];
_fdgdd ,_daeg :=_afde (_dcebf ._gbf );_bgcff ,_gggdcf :=_afde (_fabfc ._gbf );if _daeg &&!_gggdcf &&_fabfc .inDiacriticArea (_dcebf ){_fcdc .addDiacritic (_fdgdd );continue ;};if _gggdcf &&!_daeg &&_dcebf .inDiacriticArea (_fabfc ){_fcdc ._daggf =_fcdc ._daggf [:len (_fcdc ._daggf )-1];
_fcdc .appendMark (_dcebf ,_agbfb );_fcdc .addDiacritic (_bgcff );continue ;};};_dfdca :=_dgcd (_dcebf ._gbf );if _dfdca {_fdadc ();continue ;};if _fcdc ==nil &&!_dfdca {_fcdc =_fdcff ([]*textMark {_dcebf },_agbfb );continue ;};_cffca :=_fcdc ._egdce ;
_cbbb :=_gb .Abs (_daee (_agbfb ,_dcebf )-_fcdc ._gaff )/_cffca ;_dafa :=_egdc (_dcebf ,_fcdc )/_cffca ;if _dafa >=_fddcb ||!(-_fbda <=_dafa &&_cbbb <=_fedg ){_fdadc ();_fcdc =_fdcff ([]*textMark {_dcebf },_agbfb );continue ;};_fcdc .appendMark (_dcebf ,_agbfb );
};_fdadc ();return _fgebc ;};func (_cfdc rectRuling )checkWidth (_fdfa ,_gagaf float64 )(float64 ,bool ){_dcdcc :=_gagaf -_fdfa ;_fabf :=_dcdcc <=_gfge ;return _dcdcc ,_fabf ;};
// Len returns the number of TextMarks in `ma`.
func (_dbg *TextMarkArray )Len ()int {if _dbg ==nil {return 0;};return len (_dbg ._agbde );};func _cefd (_bfee ,_ffb bounded )float64 {_eag :=_cffbf (_bfee ,_ffb );if !_cgcb (_eag ){return _eag ;};return _bggd (_bfee ,_ffb );};func _dgcd (_eafc string )bool {for _ ,_cefed :=range _eafc {if !_f .IsSpace (_cefed ){return false ;
};};return true ;};func (_eae *stateStack )push (_adfa *textState ){_ffgg :=*_adfa ;*_eae =append (*_eae ,&_ffgg )};type lineRuling struct{_aaga rulingKind ;_ebef markKind ;_fd .Color ;_gfcd ,_gccg _ce .Point ;};func (_ebge *textTable )bbox ()_ge .PdfRectangle {return _ebge .PdfRectangle };
type intSet map[int ]struct{};type textObject struct{_cgbd *Extractor ;_ddf *_ge .PdfPageResources ;_afff _aa .GraphicsState ;_agff *textState ;_aaaa *stateStack ;_dddc _ce .Matrix ;_cfgd _ce .Matrix ;_cdca []*textMark ;_gcaa bool ;};func (_edcf paraList )topoOrder ()[]int {if _ffed {_cb .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");
};_caac :=len (_edcf );_gaee :=make ([]bool ,_caac );_beaff :=make ([]int ,0,_caac );_fgcb :=_edcf .llyOrdering ();var _afcdcb func (_bafb int );_afcdcb =func (_cagf int ){_gaee [_cagf ]=true ;for _efff :=0;_efff < _caac ;_efff ++{if !_gaee [_efff ]{if _edcf .readBefore (_fgcb ,_cagf ,_efff ){_afcdcb (_efff );
};};};_beaff =append (_beaff ,_cagf );};for _bceg :=0;_bceg < _caac ;_bceg ++{if !_gaee [_bceg ]{_afcdcb (_bceg );};};return _gdde (_beaff );};func (_gfff gridTile )contains (_deda _ge .PdfRectangle )bool {if _gfff .numBorders ()< 3{return false ;};if _gfff ._bbeg &&_deda .Llx < _gfff .Llx -_gcgcd {return false ;
};if _gfff ._efcdf &&_deda .Urx > _gfff .Urx +_gcgcd {return false ;};if _gfff ._gbbc &&_deda .Lly < _gfff .Lly -_gcgcd {return false ;};if _gfff ._ecf &&_deda .Ury > _gfff .Ury +_gcgcd {return false ;};return true ;};type ruling struct{_bgcf rulingKind ;
_aagg markKind ;_fd .Color ;_acabca float64 ;_dbef float64 ;_efdg float64 ;_efeeg float64 ;};func _dffgb (_fbefb ,_acbfg int )uint64 {return uint64 (_fbefb )*0x1000000+uint64 (_acbfg )};func (_ffcf paraList )lines ()[]*textLine {var _dbgeb []*textLine ;
for _ ,_abff :=range _ffcf {_dbgeb =append (_dbgeb ,_abff ._ffcb ...);};return _dbgeb ;};func (_cab *textObject )moveTextSetLeading (_ged ,_cacc float64 ){_cab ._agff ._dab =-_cacc ;_cab .moveLP (_ged ,_cacc );};func (_eefdg rulingList )isActualGrid ()(rulingList ,bool ){_fac ,_fdbbc :=_eefdg .augmentGrid ();
if !(len (_fac )>=_aggeb +1&&len (_fdbbc )>=_deea +1){if _dcdc {_cb .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_fac ),len (_fdbbc ),_aggeb +1,_deea +1);
};return nil ,false ;};if _dcdc {_cb .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_eefdg ,len (_fac )>=2,len (_fdbbc )>=2,len (_fac )>=2&&len (_fdbbc )>=2);
for _cadeb ,_ecadd :=range _eefdg {_fg .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_cadeb ,_ecadd );};};if _bafa {_gead ,_cdce :=_fac [0],_fac [len (_fac )-1];_cbdf ,_fgfcd :=_fdbbc [0],_fdbbc [len (_fdbbc )-1];if !(_cbcg (_gead ._acabca -_cbdf ._dbef )&&_cbcg (_cdce ._acabca -_cbdf ._efdg )&&_cbcg (_cbdf ._acabca -_gead ._efdg )&&_cbcg (_fgfcd ._acabca -_gead ._dbef )){if _dcdc {_cb .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_gead ,_cdce ,_cbdf ,_fgfcd );
};return nil ,false ;};}else {if !_fac .aligned (){if _cabea {_cb .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_fac ));
};return nil ,false ;};if !_fdbbc .aligned (){if _dcdc {_cb .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_fdbbc ));
};return nil ,false ;};};_gggcb :=append (_fac ,_fdbbc ...);return _gggcb ,true ;};func (_bfdf *textObject )showTextAdjusted (_bdcf *_dd .PdfObjectArray )error {_adf :=false ;for _ ,_gadc :=range _bdcf .Elements (){switch _gadc .(type ){case *_dd .PdfObjectFloat ,*_dd .PdfObjectInteger :_cbe ,_bda :=_dd .GetNumberAsFloat (_gadc );
if _bda !=nil {_cb .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_gadc ,_bdcf );
return _bda ;};_bdg ,_gbef :=-_cbe *0.001*_bfdf ._agff ._ccfg ,0.0;if _adf {_gbef ,_bdg =_bdg ,_gbef ;};_gcd :=_abg (_ce .Point {X :_bdg ,Y :_gbef });_bfdf ._dddc .Concat (_gcd );case *_dd .PdfObjectString :_cadg ,_egd :=_dd .GetStringBytes (_gadc );if !_egd {_cb .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_gadc ,_bdcf );
return _dd .ErrTypeError ;};_bfdf .renderText (_cadg );default:_cb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_gadc ,_bdcf );
return _dd .ErrTypeError ;};};return nil ;};func _egcg (_dbcb _ge .PdfColorspace ,_dabfa _ge .PdfColor )_fd .Color {if _dbcb ==nil ||_dabfa ==nil {return _fd .Black ;};_ecgg ,_fbdec :=_dbcb .ColorToRGB (_dabfa );if _fbdec !=nil {_cb .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_dabfa ,_dbcb ,_fbdec );
return _fd .Black ;};_adcdb ,_gebdbg :=_ecgg .(*_ge .PdfColorDeviceRGB );if !_gebdbg {_cb .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_ecgg );
return _fd .Black ;};return _fd .NRGBA {R :uint8 (_adcdb .R ()*255),G :uint8 (_adcdb .G ()*255),B :uint8 (_adcdb .B ()*255),A :uint8 (255)};};func _gfbd (_cggf ,_gfcf _ce .Point )rulingKind {_cade :=_gb .Abs (_cggf .X -_gfcf .X );_dbbde :=_gb .Abs (_cggf .Y -_gfcf .Y );
return _faff (_cade ,_dbbde ,_cbbff );};func _dcefg (_fdae ,_dbcg int )int {if _fdae > _dbcg {return _fdae ;};return _dbcg ;};func _afde (_cgcdb string )(string ,bool ){_cagfc :=[]rune (_cgcdb );if len (_cagfc )!=1{return "",false ;};_ffggf ,_baebe :=_fbbab [_cagfc [0]];
return _ffggf ,_baebe ;};func (_cdcgf *subpath )makeRectRuling (_fdec _fd .Color )(*ruling ,bool ){if _gddf {_cb .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_cdcgf );
};_dfbff :=_cdcgf ._eeff [:4];_bfef :=make (map[int ]rulingKind ,len (_dfbff ));for _cbeea ,_ecbf :=range _dfbff {_deca :=_cdcgf ._eeff [(_cbeea +1)%4];_bfef [_cbeea ]=_gfbd (_ecbf ,_deca );if _gddf {_fg .Printf ("\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066",_cbeea ,_bfef [_cbeea ],_ecbf ,_deca );
};};if _gddf {_fg .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_bfef );};var _ffcfd ,_cgggb []int ;for _eaag ,_addf :=range _bfef {switch _addf {case _ggdg :_cgggb =append (_cgggb ,_eaag );case _agdda :_ffcfd =append (_ffcfd ,_eaag );
};};if _gddf {_fg .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_cgggb ),_cgggb );_fg .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_ffcfd ),_ffcfd );
};_fdfeb :=(len (_cgggb )==2&&len (_ffcfd )==2)||(len (_cgggb )==2&&len (_ffcfd )==0&&_bbaeeb (_dfbff [_cgggb [0]],_dfbff [_cgggb [1]]))||(len (_ffcfd )==2&&len (_cgggb )==0&&_ccfeg (_dfbff [_ffcfd [0]],_dfbff [_ffcfd [1]]));if _gddf {_fg .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_cgggb ),len (_ffcfd ),_fdfeb );
};if !_fdfeb {if _gddf {_cb .Log .Error ("\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v",_cdcgf );_fg .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_cgggb ),len (_ffcfd ),_fdfeb );
};return &ruling {},false ;};if len (_ffcfd )==0{for _ecadc ,_cffc :=range _bfef {if _cffc !=_ggdg {_ffcfd =append (_ffcfd ,_ecadc );};};};if len (_cgggb )==0{for _bacf ,_dddfg :=range _bfef {if _dddfg !=_agdda {_cgggb =append (_cgggb ,_bacf );};};};if _gddf {_cb .Log .Info ("\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_cgggb ),len (_ffcfd ),len (_dfbff ),_cgggb ,_ffcfd ,_dfbff );
};var _adfb ,_bcc ,_gbefc ,_ccde _ce .Point ;if _dfbff [_cgggb [0]].Y > _dfbff [_cgggb [1]].Y {_gbefc ,_ccde =_dfbff [_cgggb [0]],_dfbff [_cgggb [1]];}else {_gbefc ,_ccde =_dfbff [_cgggb [1]],_dfbff [_cgggb [0]];};if _dfbff [_ffcfd [0]].X > _dfbff [_ffcfd [1]].X {_adfb ,_bcc =_dfbff [_ffcfd [0]],_dfbff [_ffcfd [1]];
}else {_adfb ,_bcc =_dfbff [_ffcfd [1]],_dfbff [_ffcfd [0]];};_ecab :=_ge .PdfRectangle {Llx :_adfb .X ,Urx :_bcc .X ,Lly :_ccde .Y ,Ury :_gbefc .Y };if _ecab .Llx > _ecab .Urx {_ecab .Llx ,_ecab .Urx =_ecab .Urx ,_ecab .Llx ;};if _ecab .Lly > _ecab .Ury {_ecab .Lly ,_ecab .Ury =_ecab .Ury ,_ecab .Lly ;
};_dadc :=rectRuling {PdfRectangle :_ecab ,_eefa :_gfdea (_ecab ),Color :_fdec };if _dadc ._eefa ==_ebdfe {if _gddf {_cb .Log .Error ("\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c");
};return nil ,false ;};_egbd ,_acd :=_dadc .asRuling ();if !_acd {if _gddf {_cb .Log .Error ("\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg");};return nil ,false ;};if _dcdc {_fg .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_egbd .String ());
};return _egbd ,true ;};func _fcac (_egg *wordBag ,_edgf *textWord ,_ffegg float64 )bool {return _edgf .Llx < _egg .Urx +_ffegg &&_egg .Llx -_ffegg < _edgf .Urx ;};
// String returns a description of `t`.
func (_cfacc *textTable )String ()string {return _fg .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_cfacc ._ggda ,_cfacc ._accb ,_cfacc ._bagb );};func (_agafd compositeCell )hasLines (_beeg []*textLine )bool {for _bffd ,_adaf :=range _beeg {_beceg :=_ace (_agafd .PdfRectangle ,_adaf .PdfRectangle );
if _fcbb {_fg .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a",_beceg ,_bffd ,len (_beeg ));_fg .Printf ("\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a",_agafd );
_fg .Printf ("\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a",_adaf );};if _beceg {return true ;};};return false ;};
// Elements returns the TextMarks in `ma`.
func (_dbaf *TextMarkArray )Elements ()[]TextMark {return _dbaf ._agbde };var _ffdg =_a .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024");
func (_cdeg *textObject )setTextRenderMode (_dag int ){if _cdeg ==nil {return ;};_cdeg ._agff ._aba =RenderMode (_dag );};
// String returns a description of `p`.
func (_acgff *textPara )String ()string {if _acgff ._caaad {return _fg .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_acgff .PdfRectangle );};_cegac :="";if _acgff ._defe !=nil {_cegac =_fg .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_acgff ._defe ._ggda ,_acgff ._defe ._accb );
};return _fg .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_acgff .PdfRectangle ,_cegac ,len (_acgff ._ffcb ),_geabcf (_acgff .text (),50));};func (_fgcbc *compositeCell )updateBBox (){for _ ,_ade :=range _fgcbc .paraList {_fgcbc .PdfRectangle =_gfc (_fgcbc .PdfRectangle ,_ade .PdfRectangle );
};};func (_efed *wordBag )getDepthIdx (_ffafb float64 )int {_aedb :=_efed .depthIndexes ();_cbag :=_dagd (_ffafb );if _cbag < _aedb [0]{return _aedb [0];};if _cbag > _aedb [len (_aedb )-1]{return _aedb [len (_aedb )-1];};return _cbag ;};func (_edfb *textPara )writeText (_ebab _b .Writer ){if _edfb ._defe ==nil {_edfb .writeCellText (_ebab );
return ;};for _ccdg :=0;_ccdg < _edfb ._defe ._accb ;_ccdg ++{for _aegg :=0;_aegg < _edfb ._defe ._ggda ;_aegg ++{_gdgca :=_edfb ._defe .get (_aegg ,_ccdg );if _gdgca ==nil {_ebab .Write ([]byte ("\u0009"));}else {_gdgca .writeCellText (_ebab );};_ebab .Write ([]byte ("\u0020"));
};if _ccdg < _edfb ._defe ._accb -1{_ebab .Write ([]byte ("\u000a"));};};};
// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents (contents string ,resources *_ge .PdfPageResources )(*Extractor ,error ){const _bea ="\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s";_afc :=&Extractor {_bb :contents ,_fe :resources ,_dfd :map[string ]fontEntry {},_af :map[string ]textResult {}};
_e .TrackUse (_bea );return _afc ,nil ;};func (_efge paraList )computeEBBoxes (){if _gbgg {_cb .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a");};for _ ,_bdfgc :=range _efge {_bdfgc ._cacf =_bdfgc .PdfRectangle ;
};_edcb :=_efge .yNeighbours (0);for _beda ,_bdeb :=range _efge {_efcdc :=_bdeb ._cacf ;_gffb ,_efddg :=-1.0e9,+1.0e9;for _ ,_dcef :=range _edcb [_bdeb ]{_dgfd :=_efge [_dcef ]._cacf ;if _dgfd .Urx < _efcdc .Llx {_gffb =_gb .Max (_gffb ,_dgfd .Urx );}else if _efcdc .Urx < _dgfd .Llx {_efddg =_gb .Min (_efddg ,_dgfd .Llx );
};};for _babe ,_fdge :=range _efge {_cgd :=_fdge ._cacf ;if _beda ==_babe ||_cgd .Ury > _efcdc .Lly {continue ;};if _gffb <=_cgd .Llx &&_cgd .Llx < _efcdc .Llx {_efcdc .Llx =_cgd .Llx ;}else if _cgd .Urx <=_efddg &&_efcdc .Urx < _cgd .Urx {_efcdc .Urx =_cgd .Urx ;
};};if _gbgg {_fg .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_beda ,_bdeb ._cacf ,_efcdc ,_geabcf (_bdeb .text (),50));};_bdeb ._cacf =_efcdc ;};if _ecea {for _ ,_bgag :=range _efge {_bgag .PdfRectangle =_bgag ._cacf ;
};};};func _dgdg (_cdff []pathSection )rulingList {_gaaeg (_cdff );if _dcdc {_cb .Log .Info ("\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073",len (_cdff ));
};var _caacg rulingList ;for _ ,_dcgf :=range _cdff {for _ ,_dedc :=range _dcgf ._ccfga {if len (_dedc ._eeff )< 2{continue ;};_gcf :=_dedc ._eeff [0];for _ ,_cdcb :=range _dedc ._eeff [1:]{if _efgb ,_dbfd :=_fedc (_gcf ,_cdcb ,_dcgf .Color );_dbfd {_caacg =append (_caacg ,_efgb );
};_gcf =_cdcb ;};};};if _dcdc {_cb .Log .Info ("m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073",_caacg );};return _caacg ;};func _dgde (_ccdb []TextMark ,_ebbb *int ,_daggg string )[]TextMark {_edbd :=_gbeb ;
_edbd .Text =_daggg ;return _gge (_ccdb ,_ebbb ,_edbd );};func _geabcf (_agbbe string ,_bgccb int )string {if len (_agbbe )< _bgccb {return _agbbe ;};return _agbbe [:_bgccb ];};func (_fcfe lineRuling )yMean ()float64 {return 0.5*(_fcfe ._gfcd .Y +_fcfe ._gccg .Y )};
type rectRuling struct{_eefa rulingKind ;_fgfc markKind ;_fd .Color ;_ge .PdfRectangle ;};func _egdc (_gbcb ,_edea bounded )float64 {return _gbcb .bbox ().Llx -_edea .bbox ().Urx };func _eebfd (_egdg _ge .PdfRectangle )*ruling {return &ruling {_bgcf :_agdda ,_acabca :_egdg .Urx ,_dbef :_egdg .Lly ,_efdg :_egdg .Ury };
};func _ccfeg (_ggge ,_eggfd _ce .Point )bool {_ffaed :=_gb .Abs (_ggge .X -_eggfd .X );_afgf :=_gb .Abs (_ggge .Y -_eggfd .Y );return _baec (_ffaed ,_afgf );};func (_ebbf rectRuling )asRuling ()(*ruling ,bool ){_dfcf :=ruling {_bgcf :_ebbf ._eefa ,Color :_ebbf .Color ,_aagg :_baeg };
switch _ebbf ._eefa {case _agdda :_dfcf ._acabca =0.5*(_ebbf .Llx +_ebbf .Urx );_dfcf ._dbef =_ebbf .Lly ;_dfcf ._efdg =_ebbf .Ury ;_ddga ,_bbfa :=_ebbf .checkWidth (_ebbf .Llx ,_ebbf .Urx );if !_bbfa {if _gddf {_cb .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_ebbf );
};return nil ,false ;};_dfcf ._efeeg =_ddga ;case _ggdg :_dfcf ._acabca =0.5*(_ebbf .Lly +_ebbf .Ury );_dfcf ._dbef =_ebbf .Llx ;_dfcf ._efdg =_ebbf .Urx ;_adcg ,_ccag :=_ebbf .checkWidth (_ebbf .Lly ,_ebbf .Ury );if !_ccag {if _gddf {_cb .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_ebbf );
};return nil ,false ;};_dfcf ._efeeg =_adcg ;default:_cb .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_ebbf ._eefa );return nil ,false ;};return &_dfcf ,true ;};func (_aeef *textTable )computeBbox ()_ge .PdfRectangle {var _gbbb _ge .PdfRectangle ;
_befd :=false ;for _cccg :=0;_cccg < _aeef ._accb ;_cccg ++{for _gbfd :=0;_gbfd < _aeef ._ggda ;_gbfd ++{_edeae :=_aeef .get (_gbfd ,_cccg );if _edeae ==nil {continue ;};if !_befd {_gbbb =_edeae .PdfRectangle ;_befd =true ;}else {_gbbb =_gfc (_gbbb ,_edeae .PdfRectangle );
};};};return _gbbb ;};func (_afdab lineRuling )xMean ()float64 {return 0.5*(_afdab ._gfcd .X +_afdab ._gccg .X )};
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
func (_cffg *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _cffg ==nil {return nil ,_cc .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_fg .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );
};_ede :=len (_cffg ._agbde );if _ede ==0{return _cffg ,nil ;};if start < _cffg ._agbde [0].Offset {start =_cffg ._agbde [0].Offset ;};if end > _cffg ._agbde [_ede -1].Offset +1{end =_cffg ._agbde [_ede -1].Offset +1;};_afcb :=_gf .Search (_ede ,func (_dcg int )bool {return _cffg ._agbde [_dcg ].Offset +len (_cffg ._agbde [_dcg ].Text )-1>=start });
if !(0<=_afcb &&_afcb < _ede ){_gcda :=_fg .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_afcb ,_ede ,_cffg ._agbde [0],_cffg ._agbde [_ede -1]);
return nil ,_gcda ;};_cggd :=_gf .Search (_ede ,func (_egce int )bool {return _cffg ._agbde [_egce ].Offset > end -1});if !(0<=_cggd &&_cggd < _ede ){_cgf :=_fg .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_cggd ,_ede ,_cffg ._agbde [0],_cffg ._agbde [_ede -1]);
return nil ,_cgf ;};if _cggd <=_afcb {return nil ,_fg .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_afcb ,_cggd );
};return &TextMarkArray {_agbde :_cffg ._agbde [_afcb :_cggd ]},nil ;};func (_ffca *wordBag )pullWord (_aag *textWord ,_dgb int ,_edee map[int ]map[*textWord ]struct{}){_ffca .PdfRectangle =_gfc (_ffca .PdfRectangle ,_aag .PdfRectangle );if _aag ._egdce > _ffca ._adcc {_ffca ._adcc =_aag ._egdce ;
};_ffca ._ebae [_dgb ]=append (_ffca ._ebae [_dgb ],_aag );_edee [_dgb ][_aag ]=struct{}{};};
// TableCell is a cell in a TextTable.
type TableCell struct{
// Text is the extracted text.
Text string ;
// Marks returns the TextMarks corresponding to the text in Text.
Marks TextMarkArray ;};func (_aeacg rulingList )tidied (_fbgf string )rulingList {_aade :=_aeacg .removeDuplicates ();_aade .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");_dfba :=_aade .snapToGroups ();if _dfba ==nil {return nil ;};_dfba .sort ();if _dcdc {_cb .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_fbgf ,len (_aeacg ),len (_aade ),len (_dfba ));
};_dfba .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _dfba ;};type textResult struct{_cfa PageText ;_gdda int ;_bed int ;};func (_fgeg *shapesState )addPoint (_cdf ,_faee float64 ){_ega :=_fgeg .establishSubpath ();_ccd :=_fgeg .devicePoint (_cdf ,_faee );
if _ega ==nil {_fgeg ._faec =true ;_fgeg ._afcbb =_ccd ;}else {_ega .add (_ccd );};};func _daeaa (_fgdc ,_cfcf _ge .PdfRectangle )bool {return _cfcf .Llx <=_fgdc .Urx &&_fgdc .Llx <=_cfcf .Urx ;};func (_eeefd *textPara )toTextMarks (_gacf *int )[]TextMark {if _eeefd ._defe ==nil {return _eeefd .toCellTextMarks (_gacf );
};var _gegbf []TextMark ;for _bafg :=0;_bafg < _eeefd ._defe ._accb ;_bafg ++{for _bffc :=0;_bffc < _eeefd ._defe ._ggda ;_bffc ++{_efdfa :=_eeefd ._defe .get (_bffc ,_bafg );if _efdfa ==nil {_gegbf =_dgde (_gegbf ,_gacf ,"\u0009");}else {_gfde :=_efdfa .toCellTextMarks (_gacf );
_gegbf =append (_gegbf ,_gfde ...);};_gegbf =_dgde (_gegbf ,_gacf ,"\u0020");};if _bafg < _eeefd ._defe ._accb -1{_gegbf =_dgde (_gegbf ,_gacf ,"\u000a");};};return _gegbf ;};func _gdeg (_acfg ,_fegfg _ce .Point )rulingKind {_dced :=_gb .Abs (_acfg .X -_fegfg .X );
_cace :=_gb .Abs (_acfg .Y -_fegfg .Y );return _faff (_dced ,_cace ,_gbca );};func (_egad *wordBag )applyRemovals (_dce map[int ]map[*textWord ]struct{}){for _fccdc ,_dggf :=range _dce {if len (_dggf )==0{continue ;};_bcb :=_egad ._ebae [_fccdc ];_cfd :=len (_bcb )-len (_dggf );
if _cfd ==0{delete (_egad ._ebae ,_fccdc );continue ;};_eabc :=make ([]*textWord ,_cfd );_cfgc :=0;for _ ,_ead :=range _bcb {if _ ,_bce :=_dggf [_ead ];!_bce {_eabc [_cfgc ]=_ead ;_cfgc ++;};};_egad ._ebae [_fccdc ]=_eabc ;};};func (_eab *stateStack )size ()int {return len (*_eab )};
func (_eabb rulingList )asTiling ()gridTiling {if _afab {_cb .Log .Info ("r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_eabb ));
};for _dbgdf ,_dbggd :=range _eabb [1:]{_ecdec :=_eabb [_dbgdf ];if _ecdec .alignsPrimary (_dbggd )&&_ecdec .alignsSec (_dbggd ){_cb .Log .Error ("a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073",_dbggd ,_ecdec );
};};_eabb .sortStrict ();_eabb .log ("\u0073n\u0061\u0070\u0070\u0065\u0064");_gbcg ,_fdbgg :=_eabb .vertsHorzs ();_gdad :=_gbcg .primaries ();_eebgf :=_fdbgg .primaries ();_cdfc :=len (_gdad )-1;_geadc :=len (_eebgf )-1;if _cdfc ==0||_geadc ==0{return gridTiling {};
};_aafgf :=_ge .PdfRectangle {Llx :_gdad [0],Urx :_gdad [_cdfc ],Lly :_eebgf [0],Ury :_eebgf [_geadc ]};if _afab {_cb .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064",len (_gbcg ));
for _eaeff ,_abeg :=range _gbcg {_fg .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_eaeff ,_abeg );};_cb .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064",len (_fdbgg ));
for _gdcb ,_bgef :=range _fdbgg {_fg .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gdcb ,_bgef );};_cb .Log .Info ("\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f",_cdfc ,_geadc ,_gdad ,_eebgf );
};_beed :=make ([]gridTile ,_cdfc *_geadc );for _cbdde :=_geadc -1;_cbdde >=0;_cbdde --{_agbb :=_eebgf [_cbdde ];_abegb :=_eebgf [_cbdde +1];for _bfecb :=0;_bfecb < _cdfc ;_bfecb ++{_fgegc :=_gdad [_bfecb ];_gefab :=_gdad [_bfecb +1];_ddba :=_gbcg .findPrimSec (_fgegc ,_agbb );
_bffbg :=_gbcg .findPrimSec (_gefab ,_agbb );_fbfa :=_fdbgg .findPrimSec (_agbb ,_fgegc );_bbdd :=_fdbgg .findPrimSec (_abegb ,_fgegc );_abgg :=_ge .PdfRectangle {Llx :_fgegc ,Urx :_gefab ,Lly :_agbb ,Ury :_abegb };_cfde :=_cggb (_abgg ,_ddba ,_bffbg ,_fbfa ,_bbdd );
_beed [_cbdde *_cdfc +_bfecb ]=_cfde ;if _afab {_fg .Printf ("\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_bfecb ,_cbdde ,_cfde .String (),_cfde .Width (),_cfde .Height ());
};};};if _afab {_cb .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_aafgf );
};_fgeb :=make ([]map[float64 ]gridTile ,_geadc );for _adccc :=_geadc -1;_adccc >=0;_adccc --{if _afab {_fg .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_adccc );};_fgeb [_adccc ]=make (map[float64 ]gridTile ,_cdfc );for _dgcfd :=0;_dgcfd < _cdfc ;
_dgcfd ++{_ddde :=_beed [_adccc *_cdfc +_dgcfd ];if _afab {_fg .Printf ("\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dgcfd ,_ddde );};if !_ddde ._bbeg {continue ;};_fcdgf :=_dgcfd ;for _bcca :=_dgcfd +1;!_ddde ._efcdf &&_bcca < _cdfc ;
_bcca ++{_bfdd :=_beed [_adccc *_cdfc +_bcca ];_ddde .Urx =_bfdd .Urx ;_ddde ._ecf =_ddde ._ecf ||_bfdd ._ecf ;_ddde ._gbbc =_ddde ._gbbc ||_bfdd ._gbbc ;_ddde ._efcdf =_bfdd ._efcdf ;if _afab {_fg .Printf ("\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a",_bcca ,_bfdd ,_ddde );
};_fcdgf =_bcca ;};if _afab {_fg .Printf (" \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n",_dgcfd ,_fcdgf ,_ddde );};_dgcfd =_fcdgf ;_fgeb [_adccc ][_ddde .Llx ]=_ddde ;};};_aecf :=make (map[float64 ]map[float64 ]gridTile ,_geadc );
_cbgf :=make (map[float64 ]map[float64 ]struct{},_geadc );for _dbeca :=_geadc -1;_dbeca >=0;_dbeca --{_fdeg :=_beed [_dbeca *_cdfc ].Lly ;_aecf [_fdeg ]=make (map[float64 ]gridTile ,_cdfc );_cbgf [_fdeg ]=make (map[float64 ]struct{},_cdfc );};if _afab {_cb .Log .Info ("\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_aafgf );
};for _dcde :=_geadc -1;_dcde >=0;_dcde --{_fcdf :=_beed [_dcde *_cdfc ].Lly ;_cbde :=_fgeb [_dcde ];if _afab {_fg .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_dcde );};for _ ,_afgbc :=range _ggff (_cbde ){if _ ,_ceae :=_cbgf [_fcdf ][_afgbc ];
_ceae {continue ;};_gbabc :=_cbde [_afgbc ];if _afab {_fg .Printf (" \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_gbabc .String ());};for _abdf :=_dcde -1;_abdf >=0;_abdf --{if _gbabc ._gbbc {break ;};_bgccf :=_fgeb [_abdf ];_bcdc ,_bagf :=_bgccf [_afgbc ];
if !_bagf {break ;};if _bcdc .Urx !=_gbabc .Urx {break ;};_gbabc ._gbbc =_bcdc ._gbbc ;_gbabc .Lly =_bcdc .Lly ;if _afab {_fg .Printf ("\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_bcdc .String (),_gbabc .String ());
};_cbgf [_bcdc .Lly ][_bcdc .Llx ]=struct{}{};};if _dcde ==0{_gbabc ._gbbc =true ;};if _gbabc .complete (){_aecf [_fcdf ][_afgbc ]=_gbabc ;};};};_geabc :=gridTiling {PdfRectangle :_aafgf ,_gecb :_ddff (_aecf ),_daba :_gbbg (_aecf ),_ecaee :_aecf };_geabc .log ("\u0043r\u0065\u0061\u0074\u0065\u0064");
return _geabc ;};func (_bfedg *textTable )reduceTiling (_bfgag gridTiling ,_eabdf float64 )*textTable {_dgdeg :=make ([]int ,0,_bfedg ._accb );_ffabe :=make ([]int ,0,_bfedg ._ggda );_dgcgg :=_bfgag ._gecb ;_ffcg :=_bfgag ._daba ;for _aeaa :=0;_aeaa < _bfedg ._accb ;
_aeaa ++{_eebfb :=_aeaa > 0&&_gb .Abs (_ffcg [_aeaa -1]-_ffcg [_aeaa ])< _eabdf &&_bfedg .emptyCompositeRow (_aeaa );if !_eebfb {_dgdeg =append (_dgdeg ,_aeaa );};};for _fadb :=0;_fadb < _bfedg ._ggda ;_fadb ++{_egadb :=_fadb < _bfedg ._ggda -1&&_gb .Abs (_dgcgg [_fadb +1]-_dgcgg [_fadb ])< _eabdf &&_bfedg .emptyCompositeColumn (_fadb );
if !_egadb {_ffabe =append (_ffabe ,_fadb );};};if len (_dgdeg )==_bfedg ._accb &&len (_ffabe )==_bfedg ._ggda {return _bfedg ;};_bcgg :=textTable {_bagb :_bfedg ._bagb ,_ggda :len (_ffabe ),_accb :len (_dgdeg ),_gefg :make (map[uint64 ]compositeCell ,len (_ffabe )*len (_dgdeg ))};
if _fcbb {_cb .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_bfedg ._ggda ,_bfedg ._accb ,len (_ffabe ),len (_dgdeg ));_cb .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_ffabe );
_cb .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_dgdeg );};for _acccg ,_cbaag :=range _dgdeg {for _fbga ,_fecge :=range _ffabe {_ebaf ,_efedg :=_bfedg .getComposite (_fecge ,_cbaag );if len (_ebaf )==0{continue ;
};if _fcbb {_fg .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_fbga ,_acccg ,_fecge ,_cbaag ,_geabcf (_ebaf .merge ().text (),50));};_bcgg .putComposite (_fbga ,_acccg ,_ebaf ,_efedg );
};};return &_bcgg ;};func (_adbac *ruling )intersects (_egdaa *ruling )bool {_aebg :=(_adbac ._bgcf ==_agdda &&_egdaa ._bgcf ==_ggdg )||(_egdaa ._bgcf ==_agdda &&_adbac ._bgcf ==_ggdg );_gdgfe :=func (_debaa ,_cgbc *ruling )bool {return _debaa ._dbef -_abc <=_cgbc ._acabca &&_cgbc ._acabca <=_debaa ._efdg +_abc ;
};_bgcde :=_gdgfe (_adbac ,_egdaa );_fccc :=_gdgfe (_egdaa ,_adbac );if _dcdc {_fg .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_aebg ,_bgcde ,_fccc ,_aebg &&_bgcde &&_fccc ,_adbac ,_egdaa );
};return _aebg &&_bgcde &&_fccc ;};func (_defeb rulingList )secMinMax ()(float64 ,float64 ){_ggbd ,_abee :=_defeb [0]._dbef ,_defeb [0]._efdg ;for _ ,_gebb :=range _defeb [1:]{if _gebb ._dbef < _ggbd {_ggbd =_gebb ._dbef ;};if _gebb ._efdg > _abee {_abee =_gebb ._efdg ;
};};return _ggbd ,_abee ;};func (_aggec paraList )llyRange (_cgcdd []int ,_gcc ,_aceb float64 )[]int {_fbfg :=len (_aggec );if _aceb < _aggec [_cgcdd [0]].Lly ||_gcc > _aggec [_cgcdd [_fbfg -1]].Lly {return nil ;};_fegd :=_gf .Search (_fbfg ,func (_agaf int )bool {return _aggec [_cgcdd [_agaf ]].Lly >=_gcc });
_efcb :=_gf .Search (_fbfg ,func (_caef int )bool {return _aggec [_cgcdd [_caef ]].Lly > _aceb });return _cgcdd [_fegd :_efcb ];};func (_cdge *stateStack )empty ()bool {return len (*_cdge )==0};func (_afac rulingList )findPrimSec (_cggcd ,_baee float64 )*ruling {for _ ,_cdgf :=range _afac {if _cgcb (_cdgf ._acabca -_cggcd )&&_cdgf ._dbef -_abc <=_baee &&_baee <=_cdgf ._efdg +_abc {return _cdgf ;
};};return nil ;};
2021-09-23 22:37:42 +00:00
2021-10-22 10:53:20 +00:00
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
2022-02-05 21:34:53 +00:00
type RenderMode int ;
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// String returns a string describing `pt`.
func (_gef PageText )String ()string {_faeb :=_fg .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_gef ._gfgg ));_dbc :=[]string {"\u002d"+_faeb };for _ ,_ffc :=range _gef ._gfgg {_dbc =append (_dbc ,_ffc .String ());
};_dbc =append (_dbc ,"\u002b"+_faeb );return _g .Join (_dbc ,"\u000a");};func (_ecabe rulingList )primMinMax ()(float64 ,float64 ){_bebbf ,_dbgca :=_ecabe [0]._acabca ,_ecabe [0]._acabca ;for _ ,_gfcc :=range _ecabe [1:]{if _gfcc ._acabca < _bebbf {_bebbf =_gfcc ._acabca ;
}else if _gfcc ._acabca > _dbgca {_dbgca =_gfcc ._acabca ;};};return _bebbf ,_dbgca ;};func (_eaa *shapesState )drawRectangle (_cege ,_fdbg ,_bgeb ,_cfcg float64 ){if _dfce {_gga :=_eaa .devicePoint (_cege ,_fdbg );_aeb :=_eaa .devicePoint (_cege +_bgeb ,_fdbg +_cfcg );
_feaeb :=_ge .PdfRectangle {Llx :_gga .X ,Lly :_gga .Y ,Urx :_aeb .X ,Ury :_aeb .Y };_cb .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_feaeb );};_eaa .newSubPath ();_eaa .moveTo (_cege ,_fdbg );
_eaa .lineTo (_cege +_bgeb ,_fdbg );_eaa .lineTo (_cege +_bgeb ,_fdbg +_cfcg );_eaa .lineTo (_cege ,_fdbg +_cfcg );_eaa .closePath ();};func (_fagabc intSet )del (_caf int ){delete (_fagabc ,_caf )};func _fbgb (_cbfg *Extractor ,_efbc *_ge .PdfPageResources ,_cgg _aa .GraphicsState ,_bbbfg *textState ,_aeaf *stateStack )*textObject {return &textObject {_cgbd :_cbfg ,_ddf :_efbc ,_afff :_cgg ,_aaaa :_aeaf ,_agff :_bbbfg ,_dddc :_ce .IdentityMatrix (),_cfgd :_ce .IdentityMatrix ()};
};func _baec (_fgac ,_beaga float64 )bool {return _fgac /_gb .Max (_ccea ,_beaga )< _cbbff };func _faff (_ggag ,_dgdfa ,_egge float64 )rulingKind {if _ggag >=_egge &&_baec (_dgdfa ,_ggag ){return _ggdg ;};if _dgdfa >=_egge &&_baec (_ggag ,_dgdfa ){return _agdda ;
};return _ebdfe ;};func (_dcae *textTable )compositeRowCorridors ()map[int ][]float64 {_bccf :=make (map[int ][]float64 ,_dcae ._accb );if _fcbb {_cb .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_dcae ._accb );
};for _abdfb :=1;_abdfb < _dcae ._accb ;_abdfb ++{var _aaaeg []compositeCell ;for _geff :=0;_geff < _dcae ._ggda ;_geff ++{if _ccgd ,_gfced :=_dcae ._gefg [_dffgb (_geff ,_abdfb )];_gfced {_aaaeg =append (_aaaeg ,_ccgd );};};if len (_aaaeg )==0{continue ;
};_ffdff :=_eeda (_aaaeg );_bccf [_abdfb ]=_ffdff ;if _fcbb {_fg .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_abdfb ,_ffdff );};};return _bccf ;};func (_ffdd *ruling )encloses (_ebdfa ,_agcd float64 )bool {return _ffdd ._dbef -_abc <=_ebdfa &&_agcd <=_ffdd ._efdg +_abc ;
};func _ace (_ecaa ,_bebb _ge .PdfRectangle )bool {return _daeaa (_ecaa ,_bebb )&&_gabad (_ecaa ,_bebb )};func (_ddeb *textTable )put (_addg ,_dggbb int ,_fcae *textPara ){_ddeb ._fedcd [_dffgb (_addg ,_dggbb )]=_fcae ;};
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
func (_dbf *Extractor )ExtractText ()(string ,error ){_dg ,_ ,_ ,_feb :=_dbf .ExtractTextWithStats ();return _dg ,_feb ;};func (_ddgg *textTable )growTable (){_bffcb :=func (_gaedb paraList ){_ddgg ._accb ++;for _babf :=0;_babf < _ddgg ._ggda ;_babf ++{_bdcae :=_gaedb [_babf ];
_ddgg .put (_babf ,_ddgg ._accb -1,_bdcae );};};_ceda :=func (_ccgg paraList ){_ddgg ._ggda ++;for _cgefe :=0;_cgefe < _ddgg ._accb ;_cgefe ++{_edab :=_ccgg [_cgefe ];_ddgg .put (_ddgg ._ggda -1,_cgefe ,_edab );};};if _aggb {_ddgg .log ("\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce");
};for _cgdg :=0;;_cgdg ++{_aaba :=false ;_ffdf :=_ddgg .getDown ();_ecaec :=_ddgg .getRight ();if _aggb {_fg .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cgdg ,_ddgg );_fg .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a",_ffdf );
_fg .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a",_ecaec );};if _ffdf !=nil &&_ecaec !=nil {_fdbcg :=_ffdf [len (_ffdf )-1];if !_fdbcg .taken ()&&_fdbcg ==_ecaec [len (_ecaec )-1]{_bffcb (_ffdf );if _ecaec =_ddgg .getRight ();
_ecaec !=nil {_ceda (_ecaec );_ddgg .put (_ddgg ._ggda -1,_ddgg ._accb -1,_fdbcg );};_aaba =true ;};};if !_aaba &&_ffdf !=nil {_bffcb (_ffdf );_aaba =true ;};if !_aaba &&_ecaec !=nil {_ceda (_ecaec );_aaba =true ;};if !_aaba {break ;};};};func (_deccg *shapesState )closePath (){if _deccg ._faec {_deccg ._becc =append (_deccg ._becc ,_dfb (_deccg ._afcbb ));
_deccg ._faec =false ;}else if len (_deccg ._becc )==0{if _dfce {_cb .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");};_deccg ._faec =false ;return ;};_deccg ._becc [len (_deccg ._becc )-1].close ();
if _dfce {_cb .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_deccg );};};func (_gebf *textMark )bbox ()_ge .PdfRectangle {return _gebf .PdfRectangle };func (_cadd *stateStack )pop ()*textState {if _cadd .empty (){return nil ;
};_ccc :=*(*_cadd )[len (*_cadd )-1];*_cadd =(*_cadd )[:len (*_cadd )-1];return &_ccc ;};
2021-12-14 01:08:28 +00:00
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
// Replace with a function like Extract() (*PageText, error)
2022-02-05 21:34:53 +00:00
func (_cfg *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_ebg ,_beag ,_ba ,_gdd :=_cfg .extractPageText (_cfg ._bb ,_cfg ._fe ,_ce .IdentityMatrix (),0);if _gdd !=nil &&_gdd !=_ge .ErrColorOutOfRange {return nil ,0,0,_gdd ;};_ebg .computeViews ();
_gdd =_fbgac (_ebg );if _gdd !=nil {return nil ,0,0,_gdd ;};return _ebg ,_beag ,_ba ,nil ;};func _eeda (_fdgc []compositeCell )[]float64 {var _ddee []*textLine ;_gfea :=0;for _ ,_dgced :=range _fdgc {_gfea +=len (_dgced .paraList );_ddee =append (_ddee ,_dgced .lines ()...);
};_gf .Slice (_ddee ,func (_cbad ,_agfb int )bool {_bgdab ,_cfbga :=_ddee [_cbad ],_ddee [_agfb ];_addee ,_cead :=_bgdab ._dgce ,_cfbga ._dgce ;if !_cgcb (_addee -_cead ){return _addee < _cead ;};return _bgdab .Llx < _cfbga .Llx ;});if _fcbb {_fg .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_gfea ,len (_ddee ));
for _dfff ,_ebgeb :=range _ddee {_fg .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_dfff ,_ebgeb );};};var _ebfa []float64 ;_fcfg :=_ddee [0];var _cccd [][]*textLine ;_bcgff :=[]*textLine {_fcfg };for _acaff ,_fccfg :=range _ddee [1:]{if _fccfg .Ury < _fcfg .Lly {_bbcf :=0.5*(_fccfg .Ury +_fcfg .Lly );
if _fcbb {_fg .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_acaff ,_fccfg .Ury ,_fcfg .Lly ,_bbcf ,_fcfg ,_fccfg );
};_ebfa =append (_ebfa ,_bbcf );_cccd =append (_cccd ,_bcgff );_bcgff =nil ;};_bcgff =append (_bcgff ,_fccfg );if _fccfg .Lly < _fcfg .Lly {_fcfg =_fccfg ;};};if len (_bcgff )> 0{_cccd =append (_cccd ,_bcgff );};if _fcbb {_fg .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_ebfa );
};if _fcbb {_cb .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len (_fdgc ));for _bgcec ,_cebfg :=range _fdgc {_fg .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bgcec ,_cebfg );};_cb .Log .Info ("\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d",len (_cccd ));
for _afbda ,_dbee :=range _cccd {_fg .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a",_afbda ,len (_dbee ));for _gefeg ,_ggdb :=range _dbee {_fg .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_gefeg ,_ggdb );};};};_eaebe :=true ;
for _cgfdg ,_cdacd :=range _cccd {_agfba :=true ;for _dfdc ,_cadcc :=range _fdgc {if _fcbb {_fg .Printf ("\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a",_cgfdg ,len (_cccd ),_dfdc ,len (_fdgc ),_cadcc );
};if !_cadcc .hasLines (_cdacd ){if _fcbb {_fg .Printf ("\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a",_cgfdg ,len (_cccd ),_dfdc ,len (_fdgc ));
};_agfba =false ;break ;};};if !_agfba {_eaebe =false ;break ;};};if !_eaebe {if _fcbb {_cb .Log .Info ("\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg");
};_ebfa =nil ;};if _fcbb &&_ebfa !=nil {_fg .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a",_ebfa );};return _ebfa ;};
func _cbcg (_aafg float64 )bool {return _gb .Abs (_aafg )< _gfge };func (_eaea rulingList )snapToGroupsDirection ()rulingList {_eaea .sortStrict ();_gefbb :=make (map[*ruling ]rulingList ,len (_eaea ));_bggdc :=_eaea [0];_ceddd :=func (_gfedg *ruling ){_bggdc =_gfedg ;
_gefbb [_bggdc ]=rulingList {_gfedg }};_ceddd (_eaea [0]);for _ ,_egecc :=range _eaea [1:]{if _egecc ._acabca < _bggdc ._acabca -_ddfc {_cb .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_bggdc ,_egecc );
};if _egecc ._acabca > _bggdc ._acabca +_gfge {_ceddd (_egecc );}else {_gefbb [_bggdc ]=append (_gefbb [_bggdc ],_egecc );};};_cefe :=make (map[*ruling ]float64 ,len (_gefbb ));_bgce :=make (map[*ruling ]*ruling ,len (_eaea ));for _caed ,_fdce :=range _gefbb {_cefe [_caed ]=_fdce .mergePrimary ();
for _ ,_dceg :=range _fdce {_bgce [_dceg ]=_caed ;};};for _ ,_baegc :=range _eaea {_baegc ._acabca =_cefe [_bgce [_baegc ]];};_bfafc :=make (rulingList ,0,len (_eaea ));for _ ,_cedbb :=range _gefbb {_acda :=_cedbb .splitSec ();for _dcgc ,_bebdd :=range _acda {_fabe :=_bebdd .merge ();
if len (_bfafc )> 0{_dcbc :=_bfafc [len (_bfafc )-1];if _dcbc .alignsPrimary (_fabe )&&_dcbc .alignsSec (_fabe ){_cb .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_dcgc ,_dcbc ,_fabe );
continue ;};};_bfafc =append (_bfafc ,_fabe );};};_bfafc .sortStrict ();return _bfafc ;};
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
func (_afd *Extractor )ExtractTextWithStats ()(_bc string ,_cac int ,_ebagc int ,_bcg error ){_cce ,_cac ,_ebagc ,_bcg :=_afd .ExtractPageText ();if _bcg !=nil {return "",_cac ,_ebagc ,_bcg ;};return _cce .Text (),_cac ,_ebagc ,nil ;};func (_dbbf *textObject )reset (){_dbbf ._dddc =_ce .IdentityMatrix ();
_dbbf ._cfgd =_ce .IdentityMatrix ();_dbbf ._cdca =nil ;};type textLine struct{_ge .PdfRectangle ;_dgce float64 ;_badf []*textWord ;_dedb float64 ;};const (_ebdfe rulingKind =iota ;_ggdg ;_agdda ;);type textMark struct{_ge .PdfRectangle ;_dgcf int ;_gbf string ;
_dcdbd string ;_gggd *_ge .PdfFont ;_adcd float64 ;_cebf float64 ;_cda _ce .Matrix ;_egfb _ce .Point ;_gdfb _ge .PdfRectangle ;_cdcf _fd .Color ;_ceee _fd .Color ;};func (_gfeg *textLine )markWordBoundaries (){_accc :=_bbae *_gfeg ._dedb ;for _gbcd ,_ebdf :=range _gfeg ._badf [1:]{if _egdc (_ebdf ,_gfeg ._badf [_gbcd ])>=_accc {_ebdf ._agde =true ;
};};};func (_beba *textPara )depth ()float64 {if _beba ._caaad {return -1.0;};if len (_beba ._ffcb )> 0{return _beba ._ffcb [0]._dgce ;};return _beba ._defe .depth ();};func (_ecec rulingList )vertsHorzs ()(rulingList ,rulingList ){var _bcef ,_eggb rulingList ;
for _ ,_dgbd :=range _ecec {switch _dgbd ._bgcf {case _agdda :_bcef =append (_bcef ,_dgbd );case _ggdg :_eggb =append (_eggb ,_dgbd );};};return _bcef ,_eggb ;};
2021-09-23 22:37:42 +00:00
2022-02-05 21:34:53 +00:00
// PageText represents the layout of text on a device page.
type PageText struct{_gfgg []*textMark ;_bfbe string ;_cdd []TextMark ;_afga []TextTable ;_edbb _ge .PdfRectangle ;_agfe []pathSection ;_gaga []pathSection ;};
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
type ImageMark struct{Image *_ge .Image ;
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// Dimensions of the image as displayed in the PDF.
Width float64 ;Height float64 ;
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// Position of the image in PDF coordinates (lower left corner).
X float64 ;Y float64 ;
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// Angle in degrees, if rotated.
Angle float64 ;};func (_ddfeb gridTile )numBorders ()int {_eaegd :=0;if _ddfeb ._bbeg {_eaegd ++;};if _ddfeb ._efcdf {_eaegd ++;};if _ddfeb ._gbbc {_eaegd ++;};if _ddfeb ._ecf {_eaegd ++;};return _eaegd ;};var _gbeb =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_fd .White ,StrokeColor :_fd .White };
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
func (_faf *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_bf :=&imageExtractContext {_fec :options };_cbcc :=_bf .extractContentStreamImages (_faf ._bb ,_faf ._fe );if _cbcc !=nil {return nil ,_cbcc ;};return &PageImages {Images :_bf ._ac },nil ;
};func _gfdea (_febfa _ge .PdfRectangle )rulingKind {_dbec :=_febfa .Width ();_ceceb :=_febfa .Height ();if _dbec > _ceceb {if _dbec >=_gbca {return _ggdg ;};}else {if _ceceb >=_gbca {return _agdda ;};};return _ebdfe ;};func (_beef paraList )inTile (_bbdaf gridTile )paraList {var _baccf paraList ;
for _ ,_gffg :=range _beef {if _bbdaf .contains (_gffg .PdfRectangle ){_baccf =append (_baccf ,_gffg );};};if _fcbb {_fg .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_bbdaf ,len (_baccf ));
for _caga ,_edeb :=range _baccf {_fg .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_caga ,_edeb );};_fg .Println ("");};return _baccf ;};func (_dggb *textTable )newTablePara ()*textPara {_efgeg :=_dggb .computeBbox ();_ccdd :=&textPara {PdfRectangle :_efgeg ,_cacf :_efgeg ,_defe :_dggb };
if _fcbb {_cb .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_ccdd );};return _ccdd ;};var (_cg =_cc .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072");_gc =_cc .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072");
2021-12-14 01:08:28 +00:00
);
2022-02-05 21:34:53 +00:00
// New returns an Extractor instance for extracting content from the input PDF page.
func New (page *_ge .PdfPage )(*Extractor ,error ){const _dfa ="\u0065\u0078\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077";_cbc ,_ee :=page .GetAllContentStreams ();if _ee !=nil {return nil ,_ee ;};_ad ,_ee :=page .GetMediaBox ();if _ee !=nil {return nil ,_fg .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_ee );
};_gbdg :=&Extractor {_bb :_cbc ,_fe :page .Resources ,_ca :*_ad ,_dfd :map[string ]fontEntry {},_af :map[string ]textResult {}};if _gbdg ._ca .Llx > _gbdg ._ca .Urx {_cb .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_gbdg ._ca );
_gbdg ._ca .Llx ,_gbdg ._ca .Urx =_gbdg ._ca .Urx ,_gbdg ._ca .Llx ;};if _gbdg ._ca .Lly > _gbdg ._ca .Ury {_cb .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_gbdg ._ca );
_gbdg ._ca .Lly ,_gbdg ._ca .Ury =_gbdg ._ca .Ury ,_gbdg ._ca .Lly ;};_e .TrackUse (_dfa );return _gbdg ,nil ;};func _eegb (_ecgab ,_egadd float64 )bool {return _gb .Abs (_ecgab -_egadd )<=_abc };func (_fee *textObject )setTextRise (_adb float64 ){if _fee ==nil {return ;
};_fee ._agff ._acaf =_adb ;};func (_bbbe *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_dgfc :=make (map[int ]map[*textWord ]struct{},len (_bbbe ._ebae ));for _bbf :=range _bbbe ._ebae {_dgfc [_bbf ]=make (map[*textWord ]struct{});};return _dgfc ;
};func (_ffaeed paraList )applyTables (_eecb []*textTable )paraList {var _beeb paraList ;for _ ,_agcb :=range _eecb {_beeb =append (_beeb ,_agcb .newTablePara ());};for _ ,_afdag :=range _ffaeed {if _afdag ._gcdf {continue ;};_beeb =append (_beeb ,_afdag );
};return _beeb ;};func (_ddabf *textTable )getDown ()paraList {_deag :=make (paraList ,_ddabf ._ggda );for _cggba :=0;_cggba < _ddabf ._ggda ;_cggba ++{_abcb :=_ddabf .get (_cggba ,_ddabf ._accb -1)._gbff ;if _abcb .taken (){return nil ;};_deag [_cggba ]=_abcb ;
};for _bbaba :=0;_bbaba < _ddabf ._ggda -1;_bbaba ++{if _deag [_bbaba ]._cegg !=_deag [_bbaba +1]{return nil ;};};return _deag ;};func (_gabb *ruling )alignsSec (_dbbe *ruling )bool {const _eagc =_gfge +1.0;return _gabb ._dbef -_eagc <=_dbbe ._efdg &&_dbbe ._dbef -_eagc <=_gabb ._efdg ;
};func _fedc (_cfgcb ,_ebaef _ce .Point ,_cgcga _fd .Color )(*ruling ,bool ){_faeac :=lineRuling {_gfcd :_cfgcb ,_gccg :_ebaef ,_aaga :_gdeg (_cfgcb ,_ebaef ),Color :_cgcga };if _faeac ._aaga ==_ebdfe {return nil ,false ;};return _faeac .asRuling ();};
func (_eeb *wordBag )minDepth ()float64 {return _eeb ._feaf -(_eeb .Ury -_eeb ._adcc )};func (_eaca *textTable )log (_ggfa string ){if !_fcbb {return ;};_cb .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_ggfa ,_eaca ._ggda ,_eaca ._accb ,_eaca ._bagb ,_eaca .PdfRectangle );
for _eaad :=0;_eaad < _eaca ._accb ;_eaad ++{for _edbda :=0;_edbda < _eaca ._ggda ;_edbda ++{_bfda :=_eaca .get (_edbda ,_eaad );if _bfda ==nil {continue ;};_fg .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_edbda ,_eaad ,_bfda .PdfRectangle ,_geabcf (_bfda .text (),50),_d .RuneCountInString (_bfda .text ()));
};};};type rulingKind int ;func _fcffc (_ggefa ,_cabbe _ce .Point )bool {return _ggefa .X ==_cabbe .X &&_ggefa .Y ==_cabbe .Y };func (_fgcce *textWord )appendMark (_bcebb *textMark ,_fcdbb _ge .PdfRectangle ){_fgcce ._daggf =append (_fgcce ._daggf ,_bcebb );
_fgcce .PdfRectangle =_gfc (_fgcce .PdfRectangle ,_bcebb .PdfRectangle );if _bcebb ._adcd > _fgcce ._egdce {_fgcce ._egdce =_bcebb ._adcd ;};_fgcce ._gaff =_fcdbb .Ury -_fgcce .PdfRectangle .Lly ;};func (_fgcf *textPara )taken ()bool {return _fgcf ==nil ||_fgcf ._gcdf };
func (_dgaa *textLine )toTextMarks (_fabgc *int )[]TextMark {var _cfbgb []TextMark ;for _ ,_gddc :=range _dgaa ._badf {if _gddc ._agde {_cfbgb =_dgde (_cfbgb ,_fabgc ,"\u0020");};_acgf :=_gddc .toTextMarks (_fabgc );_cfbgb =append (_cfbgb ,_acgf ...);};
return _cfbgb ;};func (_fcff rulingList )merge ()*ruling {_fddbf :=_fcff [0]._acabca ;_babc :=_fcff [0]._dbef ;_bbcc :=_fcff [0]._efdg ;for _ ,_gaeef :=range _fcff [1:]{_fddbf +=_gaeef ._acabca ;if _gaeef ._dbef < _babc {_babc =_gaeef ._dbef ;};if _gaeef ._efdg > _bbcc {_bbcc =_gaeef ._efdg ;
};};_ecgf :=&ruling {_bgcf :_fcff [0]._bgcf ,_aagg :_fcff [0]._aagg ,Color :_fcff [0].Color ,_acabca :_fddbf /float64 (len (_fcff )),_dbef :_babc ,_efdg :_bbcc };if _cabea {_cb .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_fcff ),_ecgf );
for _efdae ,_ebbbb :=range _fcff {_fg .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_efdae ,_ebbbb );};};return _ecgf ;};func _gabad (_ggdd ,_cfbb _ge .PdfRectangle )bool {return _ggdd .Lly <=_cfbb .Ury &&_cfbb .Lly <=_ggdd .Ury ;};func (_fgdg *textWord )toTextMarks (_badc *int )[]TextMark {var _beccd []TextMark ;
for _ ,_daedg :=range _fgdg ._daggf {_beccd =_gge (_beccd ,_badc ,_daedg .ToTextMark ());};return _beccd ;};func (_efbe *textTable )subdivide ()*textTable {_efbe .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");_fecc :=_efbe .compositeRowCorridors ();
_ebbbba :=_efbe .compositeColCorridors ();if _fcbb {_cb .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_daeb (_fecc ),_daeb (_ebbbba ));
};if len (_fecc )==0||len (_ebbbba )==0{return _efbe ;};_gdgd (_fecc );_gdgd (_ebbbba );if _fcbb {_cb .Log .Info ("\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073",_daeb (_fecc ),_daeb (_ebbbba ));
};_faffg ,_dcgaa :=_cfcgd (_efbe ._accb ,_fecc );_abdc ,_gfbdf :=_cfcgd (_efbe ._ggda ,_ebbbba );_gfgeb :=make (map[uint64 ]*textPara ,_gfbdf *_dcgaa );_dbgf :=&textTable {PdfRectangle :_efbe .PdfRectangle ,_bagb :_efbe ._bagb ,_accb :_dcgaa ,_ggda :_gfbdf ,_fedcd :_gfgeb };
if _fcbb {_cb .Log .Info ("\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a"+"\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076",_efbe ._ggda ,_efbe ._accb ,_gfbdf ,_dcgaa ,_daeb (_fecc ),_daeb (_ebbbba ),_faffg ,_abdc );
};for _adcge :=0;_adcge < _efbe ._accb ;_adcge ++{_fdcd :=_faffg [_adcge ];for _cabb :=0;_cabb < _efbe ._ggda ;_cabb ++{_efbf :=_abdc [_cabb ];if _fcbb {_fg .Printf ("\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a",_cabb ,_adcge ,_efbf ,_fdcd );
};_bbbba ,_fdfc :=_efbe ._gefg [_dffgb (_cabb ,_adcge )];if !_fdfc {continue ;};_dggca :=_bbbba .split (_fecc [_adcge ],_ebbbba [_cabb ]);for _baff :=0;_baff < _dggca ._accb ;_baff ++{for _dffe :=0;_dffe < _dggca ._ggda ;_dffe ++{_becae :=_dggca .get (_dffe ,_baff );
_dbgf .put (_efbf +_dffe ,_fdcd +_baff ,_becae );if _fcbb {_fg .Printf ("\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_efbf +_dffe ,_fdcd +_baff ,_becae );};};};};};return _dbgf ;};func _fdcff (_cagea []*textMark ,_ccbcb _ge .PdfRectangle )*textWord {_ebebc :=_cagea [0].PdfRectangle ;
_bggbb :=_cagea [0]._adcd ;for _ ,_cbac :=range _cagea [1:]{_ebebc =_gfc (_ebebc ,_cbac .PdfRectangle );if _cbac ._adcd > _bggbb {_bggbb =_cbac ._adcd ;};};return &textWord {PdfRectangle :_ebebc ,_daggf :_cagea ,_gaff :_ccbcb .Ury -_ebebc .Lly ,_egdce :_bggbb };
};const (_afgaa markKind =iota ;_dggc ;_baeg ;_fbbc ;);type pathSection struct{_ccfga []*subpath ;_fd .Color ;};type gridTile struct{_ge .PdfRectangle ;_ecf ,_bbeg ,_gbbc ,_efcdf bool ;};func (_gbeg *textObject )setHorizScaling (_bbg float64 ){if _gbeg ==nil {return ;
};_gbeg ._agff ._bbbc =_bbg ;};const _fdabd =1.0/1000.0;func (_dccf rulingList )splitSec ()[]rulingList {_gf .Slice (_dccf ,func (_bbbbeg ,_acccf int )bool {_fgfd ,_acdc :=_dccf [_bbbbeg ],_dccf [_acccf ];if _fgfd ._dbef !=_acdc ._dbef {return _fgfd ._dbef < _acdc ._dbef ;
};return _fgfd ._efdg < _acdc ._efdg ;});_dfde :=make (map[*ruling ]struct{},len (_dccf ));_fdca :=func (_fagf *ruling )rulingList {_acgb :=rulingList {_fagf };_dfde [_fagf ]=struct{}{};for _ ,_dbeg :=range _dccf {if _ ,_bdfa :=_dfde [_dbeg ];_bdfa {continue ;
};for _ ,_ebeb :=range _acgb {if _dbeg .alignsSec (_ebeb ){_acgb =append (_acgb ,_dbeg );_dfde [_dbeg ]=struct{}{};break ;};};};return _acgb ;};_cacdd :=[]rulingList {_fdca (_dccf [0])};for _ ,_fagac :=range _dccf [1:]{if _ ,_fgae :=_dfde [_fagac ];_fgae {continue ;
};_cacdd =append (_cacdd ,_fdca (_fagac ));};return _cacdd ;};var (_fbbab =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};
);func _ddcd (_cfcfb []_dd .PdfObject )(_eaacg ,_dcea float64 ,_cfaaf error ){if len (_cfcfb )!=2{return 0,0,_fg .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_cfcfb ));
};_dcgcg ,_cfaaf :=_dd .GetNumbersAsFloat (_cfcfb );if _cfaaf !=nil {return 0,0,_cfaaf ;};return _dcgcg [0],_dcgcg [1],nil ;};func _dde (_gbdd []*textWord ,_afb float64 ,_caeb ,_ffeg rulingList )*wordBag {_gebd :=_gabe (_gbdd [0],_afb ,_caeb ,_ffeg );for _ ,_ddca :=range _gbdd [1:]{_fabd :=_dagd (_ddca ._gaff );
_gebd ._ebae [_fabd ]=append (_gebd ._ebae [_fabd ],_ddca );_gebd .PdfRectangle =_gfc (_gebd .PdfRectangle ,_ddca .PdfRectangle );};_gebd .sort ();return _gebd ;};func _dfebc (_cadb string )bool {if _d .RuneCountInString (_cadb )< _gdec {return false ;
};_edca ,_fdde :=_d .DecodeLastRuneInString (_cadb );if _fdde <=0||!_f .Is (_f .Hyphen ,_edca ){return false ;};_edca ,_fdde =_d .DecodeLastRuneInString (_cadb [:len (_cadb )-_fdde ]);return _fdde > 0&&!_f .IsSpace (_edca );};func (_feef *textObject )moveLP (_fbdb ,_fced float64 ){_feef ._cfgd .Concat (_ce .NewMatrix (1,0,0,1,_fbdb ,_fced ));
_feef ._dddc =_feef ._cfgd ;};type rulingList []*ruling ;func (_baac intSet )has (_cdede int )bool {_ ,_bfeca :=_baac [_cdede ];return _bfeca };
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// String returns a description of `tm`.
func (_acca *textMark )String ()string {return _fg .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_acca .PdfRectangle ,_acca ._adcd ,_acca ._gbf );};func (_cgaef paraList )eventNeighbours (_gfca []event )map[*textPara ][]int {_gf .Slice (_gfca ,func (_baef ,_acebd int )bool {_efga ,_acfgc :=_gfca [_baef ],_gfca [_acebd ];
_aadf ,_aaaag :=_efga ._fdfcg ,_acfgc ._fdfcg ;if _aadf !=_aaaag {return _aadf < _aaaag ;};if _efga ._dgdbf !=_acfgc ._dgdbf {return _efga ._dgdbf ;};return _baef < _acebd ;});_bffbgf :=make (map[int ]intSet );_ffbc :=make (intSet );for _ ,_bbdge :=range _gfca {if _bbdge ._dgdbf {_bffbgf [_bbdge ._ccga ]=make (intSet );
for _bcfg :=range _ffbc {if _bcfg !=_bbdge ._ccga {_bffbgf [_bbdge ._ccga ].add (_bcfg );_bffbgf [_bcfg ].add (_bbdge ._ccga );};};_ffbc .add (_bbdge ._ccga );}else {_ffbc .del (_bbdge ._ccga );};};_gfffd :=map[*textPara ][]int {};for _cbgb ,_ageb :=range _bffbgf {_ceea :=_cgaef [_cbgb ];
if len (_ageb )==0{_gfffd [_ceea ]=nil ;continue ;};_afgca :=make ([]int ,len (_ageb ));_cggdb :=0;for _eegc :=range _ageb {_afgca [_cggdb ]=_eegc ;_cggdb ++;};_gfffd [_ceea ]=_afgca ;};return _gfffd ;};
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// String returns a description of `v`.
func (_dggg *ruling )String ()string {if _dggg ._bgcf ==_ebdfe {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_eefd ,_dfdae :="\u0078","\u0079";if _dggg ._bgcf ==_ggdg {_eefd ,_dfdae ="\u0079","\u0078";};_caege :="";if _dggg ._efeeg !=0.0{_caege =_fg .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_dggg ._efeeg );
};return _fg .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_dggg ._bgcf ,_eefd ,_dggg ._acabca ,_dfdae ,_dggg ._dbef ,_dggg ._efdg ,_dggg ._efdg -_dggg ._dbef ,_dggg ._aagg ,_dggg .Color ,_caege );
};func _dagd (_egb float64 )int {var _bdcd int ;if _egb >=0{_bdcd =int (_egb /_egdec );}else {_bdcd =int (_egb /_egdec )-1;};return _bdcd ;};func (_eff *textObject )nextLine (){_eff .moveLP (0,-_eff ._agff ._dab )};func _febf (_bgg _ce .Matrix )_ce .Point {_bbbfd ,_edf :=_bgg .Translation ();
return _ce .Point {X :_bbbfd ,Y :_edf };};func _cfgcfe (_gdefe float64 )float64 {return _afgcd *_gb .Round (_gdefe /_afgcd )};func (_bebgc pathSection )bbox ()_ge .PdfRectangle {_agcf :=_bebgc ._ccfga [0]._eeff [0];_adbb :=_ge .PdfRectangle {Llx :_agcf .X ,Urx :_agcf .X ,Lly :_agcf .Y ,Ury :_agcf .Y };
_ggg :=func (_fgdf _ce .Point ){if _fgdf .X < _adbb .Llx {_adbb .Llx =_fgdf .X ;}else if _fgdf .X > _adbb .Urx {_adbb .Urx =_fgdf .X ;};if _fgdf .Y < _adbb .Lly {_adbb .Lly =_fgdf .Y ;}else if _fgdf .Y > _adbb .Ury {_adbb .Ury =_fgdf .Y ;};};for _ ,_fddc :=range _bebgc ._ccfga [0]._eeff [1:]{_ggg (_fddc );
};for _ ,_fgeag :=range _bebgc ._ccfga [1:]{for _ ,_aebc :=range _fgeag ._eeff {_ggg (_aebc );};};return _adbb ;};func _gfc (_adgg ,_edac _ge .PdfRectangle )_ge .PdfRectangle {return _ge .PdfRectangle {Llx :_gb .Min (_adgg .Llx ,_edac .Llx ),Lly :_gb .Min (_adgg .Lly ,_edac .Lly ),Urx :_gb .Max (_adgg .Urx ,_edac .Urx ),Ury :_gb .Max (_adgg .Ury ,_edac .Ury )};
};func _gdgd (_gece map[int ][]float64 ){if len (_gece )<=1{return ;};_aadbf :=_ccdedg (_gece );if _fcbb {_cb .Log .Info ("\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076",_aadbf );};var _bccb ,_eagd int ;for _bccb ,_eagd =range _aadbf {if _gece [_eagd ]!=nil {break ;
};};for _dggcd ,_eadcd :=range _aadbf [_bccb :]{_afbdb :=_gece [_eadcd ];if _afbdb ==nil {continue ;};if _fcbb {_fg .Printf ("\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a",_bccb +_dggcd ,_eagd ,_eadcd );
};_gdgeg :=_gece [_eadcd ];if _gdgeg [len (_gdgeg )-1]> _afbdb [0]{_gdgeg [len (_gdgeg )-1]=_afbdb [0];_gece [_eagd ]=_gdgeg ;};_eagd =_eadcd ;};};func _ddff (_ffcbb map[float64 ]map[float64 ]gridTile )[]float64 {_egdgc :=make ([]float64 ,0,len (_ffcbb ));
_cacg :=make (map[float64 ]struct{},len (_ffcbb ));for _ ,_beagfe :=range _ffcbb {for _cgec :=range _beagfe {if _ ,_adfae :=_cacg [_cgec ];_adfae {continue ;};_egdgc =append (_egdgc ,_cgec );_cacg [_cgec ]=struct{}{};};};_gf .Float64s (_egdgc );return _egdgc ;
};func _ggff (_ebff map[float64 ]gridTile )[]float64 {_dgdfb :=make ([]float64 ,0,len (_ebff ));for _cbdc :=range _ebff {_dgdfb =append (_dgdfb ,_cbdc );};_gf .Float64s (_dgdfb );return _dgdfb ;};func _bbaeeb (_fcecc ,_fcdb _ce .Point )bool {_agab :=_gb .Abs (_fcecc .X -_fcdb .X );
_accd :=_gb .Abs (_fcecc .Y -_fcdb .Y );return _baec (_accd ,_agab );};func (_ccaa *subpath )clear (){*_ccaa =subpath {}};func (_faged *textTable )emptyCompositeColumn (_ceeb int )bool {for _caadf :=0;_caadf < _faged ._accb ;_caadf ++{if _bcbc ,_fgbe :=_faged ._gefg [_dffgb (_ceeb ,_caadf )];
_fgbe {if len (_bcbc .paraList )> 0{return false ;};};};return true ;};
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
type TextTable struct{W ,H int ;Cells [][]TableCell ;};
// String returns a string describing the current state of the textState stack.
func (_cbgd *stateStack )String ()string {_gdee :=[]string {_fg .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_cbgd ))};for _baf ,_aff :=range *_cbgd {_fdg :="\u003c\u006e\u0069l\u003e";
if _aff !=nil {_fdg =_aff .String ();};_gdee =append (_gdee ,_fg .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_baf ,_fdg ));};return _g .Join (_gdee ,"\u000a");};func (_cgaf *wordBag )firstWord (_dfeb int )*textWord {return _cgaf ._ebae [_dfeb ][0]};
func _cffbf (_dgga ,_cbbc bounded )float64 {return _dgga .bbox ().Llx -_cbbc .bbox ().Llx };func (_bcgf *wordBag )empty (_dbbd int )bool {_ ,_efcd :=_bcgf ._ebae [_dbbd ];return !_efcd };func (_bafe *wordBag )firstReadingIndex (_cedd int )int {_adbag :=_bafe .firstWord (_cedd )._egdce ;
_bcga :=float64 (_cedd +1)*_egdec ;_gcgg :=_bcga +_ggaa *_adbag ;_cabg :=_cedd ;for _ ,_daac :=range _bafe .depthBand (_bcga ,_gcgg ){if _cffbf (_bafe .firstWord (_daac ),_bafe .firstWord (_cabg ))< 0{_cabg =_daac ;};};return _cabg ;};func (_caefg *textTable )emptyCompositeRow (_cfbgf int )bool {for _gbcbg :=0;
_gbcbg < _caefg ._ggda ;_gbcbg ++{if _fgged ,_dgecb :=_caefg ._gefg [_dffgb (_gbcbg ,_cfbgf )];_dgecb {if len (_fgged .paraList )> 0{return false ;};};};return true ;};func (_beegd *ruling )alignsPrimary (_ffac *ruling )bool {return _beegd ._bgcf ==_ffac ._bgcf &&_gb .Abs (_beegd ._acabca -_ffac ._acabca )< _gfge *0.5;
};func (_cgaed paraList )log (_fdfe string ){if !_ffed {return ;};_cb .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_fdfe ,len (_cgaed ));
for _ecga ,_bfec :=range _cgaed {if _bfec ==nil {continue ;};_adbf :=_bfec .text ();_eagg :="\u0020\u0020";if _bfec ._defe !=nil {_eagg =_fg .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_bfec ._defe ._ggda ,_bfec ._defe ._accb );};_fg .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_ecga ,_bfec .PdfRectangle ,_eagg ,_geabcf (_adbf ,50));
};};func (_bcffe rulingList )snapToGroups ()rulingList {_ecdb ,_gcdfg :=_bcffe .vertsHorzs ();if len (_ecdb )> 0{_ecdb =_ecdb .snapToGroupsDirection ();};if len (_gcdfg )> 0{_gcdfg =_gcdfg .snapToGroupsDirection ();};_eedef :=append (_ecdb ,_gcdfg ...);
_eedef .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");return _eedef ;};func (_dead paraList )tables ()[]TextTable {var _dcgb []TextTable ;if _fcbb {_cb .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a");
};for _ ,_begbe :=range _dead {_gecd :=_begbe ._defe ;if _gecd !=nil &&_gecd .isExportable (){_dcgb =append (_dcgb ,_gecd .toTextTable ());};};return _dcgb ;};type wordBag struct{_ge .PdfRectangle ;_adcc float64 ;_aec ,_bbgc rulingList ;_feaf float64 ;
_ebae map[int ][]*textWord ;};func (_bfde *ruling )gridIntersecting (_acbd *ruling )bool {return _eegb (_bfde ._dbef ,_acbd ._dbef )&&_eegb (_bfde ._efdg ,_acbd ._efdg );};func _adffc (_eddb string ,_fbec []rulingList ){_cb .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_fbec ),_eddb );
for _gdfce ,_cfag :=range _fbec {_fg .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gdfce ,_cfag .String ());};};type shapesState struct{_decc _ce .Matrix ;_cage _ce .Matrix ;_becc []*subpath ;_faec bool ;_afcbb _ce .Point ;_aac *textObject ;
};func (_gbdb *PageText )computeViews (){var _gcb rulingList ;if _dfcc {_effe :=_dgdg (_gbdb ._agfe );_gcb =append (_gcb ,_effe ...);};if _daab {_egda :=_fbce (_gbdb ._gaga );_gcb =append (_gcb ,_egda ...);};_gcb ,_fgf :=_gcb .toTilings ();var _cgca paraList ;
_efde :=len (_gbdb ._gfgg );for _cea :=0;_cea < 360&&_efde > 0;_cea +=90{_efdd :=make ([]*textMark ,0,len (_gbdb ._gfgg )-_efde );for _ ,_deb :=range _gbdb ._gfgg {if _deb ._dgcf ==_cea {_efdd =append (_efdd ,_deb );};};if len (_efdd )> 0{_eabd :=_geeg (_efdd ,_gbdb ._edbb ,_gcb ,_fgf );
_cgca =append (_cgca ,_eabd ...);_efde -=len (_efdd );};};_dbe :=new (_gfa .Buffer );_cgca .writeText (_dbe );_gbdb ._bfbe =_dbe .String ();_gbdb ._cdd =_cgca .toTextMarks ();_gbdb ._afga =_cgca .tables ();if _fcbb {_cb .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_gbdb ._afga ));
};};var _fdcc =map[markKind ]string {_dggc :"\u0073\u0074\u0072\u006f\u006b\u0065",_baeg :"\u0066\u0069\u006c\u006c",_fbbc :"\u0061u\u0067\u006d\u0065\u006e\u0074"};func _bgad (_bgda ,_daacf _ge .PdfRectangle )bool {return _bgda .Llx <=_daacf .Llx &&_daacf .Urx <=_bgda .Urx &&_bgda .Lly <=_daacf .Lly &&_daacf .Ury <=_bgda .Ury ;
};func (_afccg *textWord )absorb (_eaac *textWord ){_afccg .PdfRectangle =_gfc (_afccg .PdfRectangle ,_eaac .PdfRectangle );_afccg ._daggf =append (_afccg ._daggf ,_eaac ._daggf ...);};func (_fdc *Extractor )extractPageText (_gdb string ,_bfa *_ge .PdfPageResources ,_cbcf _ce .Matrix ,_fb int )(*PageText ,int ,int ,error ){_cb .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_fb );
_efd :=&PageText {_edbb :_fdc ._ca };_ccf :=_bdga (_fdc ._ca );var _fcd stateStack ;_fff :=_fbgb (_fdc ,_bfa ,_aa .GraphicsState {},&_ccf ,&_fcd );_agbd :=shapesState {_cage :_cbcf ,_decc :_ce .IdentityMatrix (),_aac :_fff };var _acc bool ;if _fb > _cee {_cede :=_cc .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");
_cb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_fb ,_cede );
return _efd ,_ccf ._ffga ,_ccf ._fddb ,_cede ;};_eda :=_aa .NewContentStreamParser (_gdb );_dfgd ,_cdc :=_eda .Parse ();if _cdc !=nil {_cb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cdc );
return _efd ,_ccf ._ffga ,_ccf ._fddb ,_cdc ;};_aaa :=_aa .NewContentStreamProcessor (*_dfgd );_aaa .AddHandler (_aa .HandlerConditionEnumAllOperands ,"",func (_efa *_aa .ContentStreamOperation ,_gbe _aa .GraphicsState ,_bac *_ge .PdfPageResources )error {_gac :=_efa .Operand ;
if _ecde {_cb .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_efa );};switch _gac {case "\u0071":if _dfce {_cb .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_agbd ._decc );};_fcd .push (&_ccf );case "\u0051":if !_fcd .empty (){_ccf =*_fcd .pop ();
};_agbd ._decc =_gbe .CTM ;if _dfce {_cb .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_agbd ._decc );};case "\u0042\u0054":if _acc {_cb .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
_efd ._gfgg =append (_efd ._gfgg ,_fff ._cdca ...);};_acc =true ;_ea :=_gbe ;_ea .CTM =_cbcf .Mult (_ea .CTM );_fff =_fbgb (_fdc ,_bac ,_ea ,&_ccf ,&_fcd );_agbd ._aac =_fff ;case "\u0045\u0054":if !_acc {_cb .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
};_acc =false ;_efd ._gfgg =append (_efd ._gfgg ,_fff ._cdca ...);_fff .reset ();case "\u0054\u002a":_fff .nextLine ();case "\u0054\u0064":if _efb ,_dgd :=_fff .checkOp (_efa ,2,true );!_efb {_cb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dgd );
return _dgd ;};_fdd ,_gea ,_gec :=_ddcd (_efa .Params );if _gec !=nil {return _gec ;};_fff .moveText (_fdd ,_gea );case "\u0054\u0044":if _ccfe ,_ded :=_fff .checkOp (_efa ,2,true );!_ccfe {_cb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ded );
return _ded ;};_dc ,_geb ,_eee :=_ddcd (_efa .Params );if _eee !=nil {_cb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_eee );return _eee ;};_fff .moveTextSetLeading (_dc ,_geb );case "\u0054\u006a":if _dad ,_agae :=_fff .checkOp (_efa ,1,true );
!_dad {_cb .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_efa ,_agae );return _agae ;};_ddd ,_cfc :=_dd .GetStringBytes (_efa .Params [0]);if !_cfc {_cb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_efa );
return _dd .ErrTypeError ;};return _fff .showText (_ddd );case "\u0054\u004a":if _gde ,_gbg :=_fff .checkOp (_efa ,1,true );!_gde {_cb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gbg );return _gbg ;
};_bab ,_bae :=_dd .GetArray (_efa .Params [0]);if !_bae {_cb .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_efa );
return _cdc ;};return _fff .showTextAdjusted (_bab );case "\u0027":if _dea ,_faa :=_fff .checkOp (_efa ,1,true );!_dea {_cb .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_faa );return _faa ;};_gcg ,_fef :=_dd .GetStringBytes (_efa .Params [0]);
if !_fef {_cb .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_efa );return _dd .ErrTypeError ;};_fff .nextLine ();return _fff .showText (_gcg );
case "\u0022":if _gab ,_aaf :=_fff .checkOp (_efa ,3,true );!_gab {_cb .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aaf );return _aaf ;};_fbd ,_cae ,_aea :=_ddcd (_efa .Params [:2]);if _aea !=nil {return _aea ;
};_afgc ,_geaa :=_dd .GetStringBytes (_efa .Params [2]);if !_geaa {_cb .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_efa );
return _dd .ErrTypeError ;};_fff .setCharSpacing (_fbd );_fff .setWordSpacing (_cae );_fff .nextLine ();return _fff .showText (_afgc );case "\u0054\u004c":_gfg ,_begf :=_cagd (_efa );if _begf !=nil {_cb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_begf );
return _begf ;};_fff .setTextLeading (_gfg );case "\u0054\u0063":_bd ,_dcb :=_cagd (_efa );if _dcb !=nil {_cb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dcb );return _dcb ;};_fff .setCharSpacing (_bd );
case "\u0054\u0066":if _afa ,_agg :=_fff .checkOp (_efa ,2,true );!_afa {_cb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_agg );return _agg ;};_cgcd ,_afgg :=_dd .GetNameVal (_efa .Params [0]);if !_afgg {_cb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_efa );
return _dd .ErrTypeError ;};_afe ,_fcdg :=_dd .GetNumberAsFloat (_efa .Params [1]);if !_afgg {_cb .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_efa ,_fcdg );
return _fcdg ;};_fcdg =_fff .setFont (_cgcd ,_afe );_fff ._gcaa =_gbd .Is (_fcdg ,_dd .ErrNotSupported );if _fcdg !=nil &&!_fff ._gcaa {return _fcdg ;};case "\u0054\u006d":if _cbd ,_gca :=_fff .checkOp (_efa ,6,true );!_cbd {_cb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gca );
return _gca ;};_fgb ,_fbc :=_dd .GetNumbersAsFloat (_efa .Params );if _fbc !=nil {_cb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fbc );return _fbc ;};_fff .setTextMatrix (_fgb );case "\u0054\u0072":if _aaad ,_ebc :=_fff .checkOp (_efa ,1,true );
!_aaad {_cb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ebc );return _ebc ;};_acg ,_aca :=_dd .GetIntVal (_efa .Params [0]);if !_aca {_cb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_efa );
return _dd .ErrTypeError ;};_fff .setTextRenderMode (_acg );case "\u0054\u0073":if _cbfc ,_egf :=_fff .checkOp (_efa ,1,true );!_cbfc {_cb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_egf );return _egf ;
};_fgg ,_cbb :=_dd .GetNumberAsFloat (_efa .Params [0]);if _cbb !=nil {_cb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cbb );return _cbb ;};_fff .setTextRise (_fgg );case "\u0054\u0077":if _eac ,_fdb :=_fff .checkOp (_efa ,1,true );
!_eac {_cb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fdb );return _fdb ;};_gaa ,_bgc :=_dd .GetNumberAsFloat (_efa .Params [0]);if _bgc !=nil {_cb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bgc );
return _bgc ;};_fff .setWordSpacing (_gaa );case "\u0054\u007a":if _fge ,_dgf :=_fff .checkOp (_efa ,1,true );!_fge {_cb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dgf );return _dgf ;};_ada ,_eef :=_dd .GetNumberAsFloat (_efa .Params [0]);
if _eef !=nil {_cb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_eef );return _eef ;};_fff .setHorizScaling (_ada );case "\u0063\u006d":_agbd ._decc =_gbe .CTM ;if _agbd ._decc .Singular (){_fede :=_ce .IdentityMatrix ().Translate (_agbd ._decc .Translation ());
_cb .Log .Debug ("S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s",_agbd ._decc ,_fede );_agbd ._decc =_fede ;};if _dfce {_cb .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_agbd ._decc );};case "\u006d":if len (_efa .Params )!=2{_cb .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_gc );
return nil ;};_ffe ,_geg :=_dd .GetNumbersAsFloat (_efa .Params );if _geg !=nil {return _geg ;};_agbd .moveTo (_ffe [0],_ffe [1]);case "\u006c":if len (_efa .Params )!=2{_cb .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_gc );
return nil ;};_gad ,_ccg :=_dd .GetNumbersAsFloat (_efa .Params );if _ccg !=nil {return _ccg ;};_agbd .lineTo (_gad [0],_gad [1]);case "\u0063":if len (_efa .Params )!=6{return _gc ;};_agba ,_afec :=_dd .GetNumbersAsFloat (_efa .Params );if _afec !=nil {return _afec ;
};_cb .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_agba );_agbd .cubicTo (_agba [0],_agba [1],_agba [2],_agba [3],_agba [4],_agba [5]);case "\u0076","\u0079":if len (_efa .Params )!=4{return _gc ;
};_gdc ,_ffef :=_dd .GetNumbersAsFloat (_efa .Params );if _ffef !=nil {return _ffef ;};_cb .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_gdc );_agbd .quadraticTo (_gdc [0],_gdc [1],_gdc [2],_gdc [3]);
case "\u0068":_agbd .closePath ();case "\u0072\u0065":if len (_efa .Params )!=4{return _gc ;};_gdf ,_ccbd :=_dd .GetNumbersAsFloat (_efa .Params );if _ccbd !=nil {return _ccbd ;};_agbd .drawRectangle (_gdf [0],_gdf [1],_gdf [2],_gdf [3]);_agbd .closePath ();
case "\u0053":_agbd .stroke (&_efd ._agfe );_agbd .clearPath ();case "\u0073":_agbd .closePath ();_agbd .stroke (&_efd ._agfe );_agbd .clearPath ();case "\u0046":_agbd .fill (&_efd ._gaga );_agbd .clearPath ();case "\u0066","\u0066\u002a":_agbd .closePath ();
_agbd .fill (&_efd ._gaga );_agbd .clearPath ();case "\u0042","\u0042\u002a":_agbd .fill (&_efd ._gaga );_agbd .stroke (&_efd ._agfe );_agbd .clearPath ();case "\u0062","\u0062\u002a":_agbd .closePath ();_agbd .fill (&_efd ._gaga );_agbd .stroke (&_efd ._agfe );
_agbd .clearPath ();case "\u006e":_agbd .clearPath ();case "\u0044\u006f":if len (_efa .Params )==0{_cb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_efa .Params );
return _dd .ErrRangeError ;};_bff ,_cffb :=_dd .GetName (_efa .Params [0]);if !_cffb {_cb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_efa .Params [0]);
return _dd .ErrTypeError ;};_ ,_fbg :=_bac .GetXObjectByName (*_bff );if _fbg !=_ge .XObjectTypeForm {break ;};_fcf ,_cffb :=_fdc ._af [_bff .String ()];if !_cffb {_cbbf ,_aada :=_bac .GetXObjectFormByName (*_bff );if _aada !=nil {_cb .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_aada );
return _aada ;};_agf ,_aada :=_cbbf .GetContentStream ();if _aada !=nil {_cb .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_aada );return _aada ;};_gdcf :=_cbbf .Resources ;if _gdcf ==nil {_gdcf =_bac ;};_dbfb ,_bgff ,_fce ,_aada :=_fdc .extractPageText (string (_agf ),_gdcf ,_cbcf .Mult (_gbe .CTM ),_fb +1);
if _aada !=nil {_cb .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_aada );return _aada ;};_fcf =textResult {*_dbfb ,_bgff ,_fce };_fdc ._af [_bff .String ()]=_fcf ;};_agbd ._decc =_gbe .CTM ;if _dfce {_cb .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_agbd ._decc );
};_efd ._gfgg =append (_efd ._gfgg ,_fcf ._cfa ._gfgg ...);_efd ._agfe =append (_efd ._agfe ,_fcf ._cfa ._agfe ...);_efd ._gaga =append (_efd ._gaga ,_fcf ._cfa ._gaga ...);_ccf ._ffga +=_fcf ._gdda ;_ccf ._fddb +=_fcf ._bed ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_fff ._afff .ColorspaceNonStroking =_gbe .ColorspaceNonStroking ;
_fff ._afff .ColorNonStroking =_gbe .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_fff ._afff .ColorspaceStroking =_gbe .ColorspaceStroking ;_fff ._afff .ColorStroking =_gbe .ColorStroking ;
};return nil ;});_cdc =_aaa .Process (_bfa );return _efd ,_ccf ._ffga ,_ccf ._fddb ,_cdc ;};func (_eaegf *wordBag )absorb (_dcdb *wordBag ){_babg :=_dcdb .makeRemovals ();for _fdgd ,_becb :=range _dcdb ._ebae {for _ ,_efbd :=range _becb {_eaegf .pullWord (_efbd ,_fdgd ,_babg );
};};_dcdb .applyRemovals (_babg );};const (_ddfc =1.0e-6;_afgcd =1.0e-4;_febb =10;_egdec =6;_faad =0.5;_fddcb =0.12;_fbda =0.19;_fedg =0.04;_fdbc =0.04;_fcda =1.0;_fffa =0.04;_eecd =0.4;_fabg =0.7;_afaaa =1.0;_bgdb =0.1;_cfgcf =1.4;_cgfd =0.46;_bbae =0.02;
_dda =0.2;_fcfba =0.5;_gdec =4;_ggaa =4.0;_adbg =6;_ggfc =0.3;_becce =0.01;_gfce =0.02;_aggeb =2;_deea =2;_cffgc =500;_gbca =4.0;_fgfbb =4.0;_cbbff =0.05;_ccea =0.1;_abc =2.0;_gfge =2.0;_gcgcd =1.5;_fdbb =3.0;_edcce =0.25;);func (_fgdd *textLine )text ()string {var _bdac []string ;
for _ ,_begfe :=range _fgdd ._badf {if _begfe ._agde {_bdac =append (_bdac ,"\u0020");};_bdac =append (_bdac ,_begfe ._aebb );};return _g .Join (_bdac ,"");};type fontEntry struct{_bef *_ge .PdfFont ;_fcbe int64 ;};func (_eddc *textTable )isExportable ()bool {if _eddc ._bagb {return true ;
};_abba :=func (_acfgf int )bool {_fdgea :=_eddc .get (0,_acfgf );if _fdgea ==nil {return false ;};_dacca :=_fdgea .text ();_ceag :=_d .RuneCountInString (_dacca );_ddbc :=_ffdg .MatchString (_dacca );return _ceag <=1||_ddbc ;};for _degc :=0;_degc < _eddc ._accb ;
_degc ++{if !_abba (_degc ){return true ;};};return false ;};func _efbdd (_dadg map[int ]intSet )[]int {_fbde :=make ([]int ,0,len (_dadg ));for _ecca :=range _dadg {_fbde =append (_fbde ,_ecca );};_gf .Ints (_fbde );return _fbde ;};type compositeCell struct{_ge .PdfRectangle ;
paraList ;};func _dabg (_ggef ,_agda *textPara )bool {return _daeaa (_ggef ._cacf ,_agda ._cacf )};func (_dcf *textPara )writeCellText (_gdfc _b .Writer ){for _eeeee ,_cgae :=range _dcf ._ffcb {_bcda :=_cgae .text ();_faada :=_gcggd &&_cgae .endsInHyphen ()&&_eeeee !=len (_dcf ._ffcb )-1;
if _faada {_bcda =_bbecf (_bcda );};_gdfc .Write ([]byte (_bcda ));if !(_faada ||_eeeee ==len (_dcf ._ffcb )-1){_gdfc .Write ([]byte (_gdea (_cgae ._dgce ,_dcf ._ffcb [_eeeee +1]._dgce )));};};};type markKind int ;func _bcee (_cebb float64 ,_bddb int )int {if _bddb ==0{_bddb =1;
};_deed :=float64 (_bddb );return int (_gb .Round (_cebb /_deed )*_deed );};func _ebde (_daca ,_fgdcd *textPara )bool {if _daca ._caaad ||_fgdcd ._caaad {return true ;};return _cgcb (_daca .depth ()-_fgdcd .depth ());};const _dbed =10;func (_cdcd *textObject )getFontDict (_begb string )(_bagc _dd .PdfObject ,_bgga error ){_eaf :=_cdcd ._ddf ;
if _eaf ==nil {_cb .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_begb );return nil ,nil ;};_bagc ,_agbf :=_eaf .GetFontByName (_dd .PdfObjectName (_begb ));
if !_agbf {_cb .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_begb );
return nil ,_cc .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _bagc ,nil ;};
// Tables returns the tables extracted from the page.
func (_gaba PageText )Tables ()[]TextTable {if _fcbb {_cb .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_gaba ._afga ));};return _gaba ._afga ;};func (_acfb *subpath )last ()_ce .Point {return _acfb ._eeff [len (_acfb ._eeff )-1]};
func (_bde *textObject )getStrokeColor ()_fd .Color {return _egcg (_bde ._afff .ColorspaceStroking ,_bde ._afff .ColorStroking );};func _daee (_bbgd _ge .PdfRectangle ,_acfda bounded )float64 {return _bbgd .Ury -_acfda .bbox ().Lly };func (_dcab *textTable )compositeColCorridors ()map[int ][]float64 {_fgcc :=make (map[int ][]float64 ,_dcab ._ggda );
if _fcbb {_cb .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_dcab ._ggda );};for _adace :=0;_adace < _dcab ._ggda ;_adace ++{_fgcc [_adace ]=nil ;
};return _fgcc ;};func (_dabd *textObject )getFont (_aed string )(*_ge .PdfFont ,error ){if _dabd ._cgbd ._dfd !=nil {_dabd ._cgbd ._fa ++;_acga ,_ddc :=_dabd ._cgbd ._dfd [_aed ];if _ddc {_acga ._fcbe =_dabd ._cgbd ._fa ;return _acga ._bef ,nil ;};};_fafc ,_bbgb :=_dabd .getFontDirect (_aed );
if _bbgb !=nil {return nil ,_bbgb ;};if _dabd ._cgbd ._dfd !=nil {_deg :=fontEntry {_fafc ,_dabd ._cgbd ._fa };if len (_dabd ._cgbd ._dfd )>=_dbed {var _adba []string ;for _feea :=range _dabd ._cgbd ._dfd {_adba =append (_adba ,_feea );};_gf .Slice (_adba ,func (_bace ,_dgc int )bool {return _dabd ._cgbd ._dfd [_adba [_bace ]]._fcbe < _dabd ._cgbd ._dfd [_adba [_dgc ]]._fcbe ;
});delete (_dabd ._cgbd ._dfd ,_adba [0]);};_dabd ._cgbd ._dfd [_aed ]=_deg ;};return _fafc ,nil ;};
// String returns a human readable description of `ss`.
func (_affe *shapesState )String ()string {return _fg .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_affe ._becc ),_affe ._faec );};func (_cagc *wordBag )highestWord (_ege int ,_adac ,_afef float64 )*textWord {for _ ,_bbfe :=range _cagc ._ebae [_ege ]{if _adac <=_bbfe ._gaff &&_bbfe ._gaff <=_afef {return _bbfe ;
};};return nil ;};func (_ec *textObject )setTextMatrix (_ebb []float64 ){if len (_ebb )!=6{_cb .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_ebb ));
return ;};_gaf ,_bdc ,_bgd ,_ffg ,_aaaf ,_cba :=_ebb [0],_ebb [1],_ebb [2],_ebb [3],_ebb [4],_ebb [5];_ec ._dddc =_ce .NewMatrix (_gaf ,_bdc ,_bgd ,_ffg ,_aaaf ,_cba );_ec ._cfgd =_ec ._dddc ;};func _gfacg (_dfgcg []*textWord ,_bbgdc *textWord )[]*textWord {for _egdd ,_cbca :=range _dfgcg {if _cbca ==_bbgdc {return _fdgbgg (_dfgcg ,_egdd );
};};_cb .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_bbgdc );
return nil ;};func (_gcff rulingList )bbox ()_ge .PdfRectangle {var _cbea _ge .PdfRectangle ;if len (_gcff )==0{_cb .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073");
return _ge .PdfRectangle {};};if _gcff [0]._bgcf ==_ggdg {_cbea .Llx ,_cbea .Urx =_gcff .secMinMax ();_cbea .Lly ,_cbea .Ury =_gcff .primMinMax ();}else {_cbea .Llx ,_cbea .Urx =_gcff .primMinMax ();_cbea .Lly ,_cbea .Ury =_gcff .secMinMax ();};return _cbea ;
};func (_afcg *wordBag )text ()string {_afgb :=_afcg .allWords ();_dca :=make ([]string ,len (_afgb ));for _ebcb ,_dbgc :=range _afgb {_dca [_ebcb ]=_dbgc ._aebb ;};return _g .Join (_dca ,"\u0020");};func (_dddg *textPara )toCellTextMarks (_dgfb *int )[]TextMark {var _bfga []TextMark ;
for _cdga ,_gbcac :=range _dddg ._ffcb {_badg :=_gbcac .toTextMarks (_dgfb );_agfc :=_gcggd &&_gbcac .endsInHyphen ()&&_cdga !=len (_dddg ._ffcb )-1;if _agfc {_badg =_edda (_badg ,_dgfb );};_bfga =append (_bfga ,_badg ...);if !(_agfc ||_cdga ==len (_dddg ._ffcb )-1){_bfga =_dgde (_bfga ,_dgfb ,_gdea (_gbcac ._dgce ,_dddg ._ffcb [_cdga +1]._dgce ));
};};return _bfga ;};func (_egde *subpath )removeDuplicates (){if len (_egde ._eeff )==0{return ;};_aaec :=[]_ce .Point {_egde ._eeff [0]};for _ ,_bfe :=range _egde ._eeff [1:]{if !_fcffc (_bfe ,_aaec [len (_aaec )-1]){_aaec =append (_aaec ,_bfe );};};_egde ._eeff =_aaec ;
};func _acfe (_eadc *wordBag ,_adg *textWord ,_ffefc float64 )bool {return _eadc .Urx <=_adg .Llx &&_adg .Llx < _eadc .Urx +_ffefc ;};func (_agbgd gridTiling )complete ()bool {for _ ,_caad :=range _agbgd ._ecaee {for _ ,_afccf :=range _caad {if !_afccf .complete (){return false ;
};};};return true ;};const (_gcggd =true ;_fged =true ;_fbfb =true ;_ecea =false ;_bfeg =false ;_gbge =6;_bfcd =3.0;_fbbg =200;_cfca =true ;_fgfe =true ;_dfcc =true ;_daab =true ;_bafa =false ;);
2021-12-14 01:08:28 +00:00
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
2022-02-05 21:34:53 +00:00
type PageImages struct{Images []ImageMark ;};func (_bdabe *textTable )getRight ()paraList {_cadbd :=make (paraList ,_bdabe ._accb );for _bcfce :=0;_bcfce < _bdabe ._accb ;_bcfce ++{_gdbf :=_bdabe .get (_bdabe ._ggda -1,_bcfce )._cegg ;if _gdbf .taken (){return nil ;
};_cadbd [_bcfce ]=_gdbf ;};for _bdae :=0;_bdae < _bdabe ._accb -1;_bdae ++{if _cadbd [_bdae ]._gbff !=_cadbd [_bdae +1]{return nil ;};};return _cadbd ;};func (_gaad lineRuling )asRuling ()(*ruling ,bool ){_bbgf :=ruling {_bgcf :_gaad ._aaga ,Color :_gaad .Color ,_aagg :_dggc };
switch _gaad ._aaga {case _agdda :_bbgf ._acabca =_gaad .xMean ();_bbgf ._dbef =_gb .Min (_gaad ._gfcd .Y ,_gaad ._gccg .Y );_bbgf ._efdg =_gb .Max (_gaad ._gfcd .Y ,_gaad ._gccg .Y );case _ggdg :_bbgf ._acabca =_gaad .yMean ();_bbgf ._dbef =_gb .Min (_gaad ._gfcd .X ,_gaad ._gccg .X );
_bbgf ._efdg =_gb .Max (_gaad ._gfcd .X ,_gaad ._gccg .X );default:_cb .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_gaad ._aaga );return nil ,false ;};return &_bbgf ,true ;};func (_gbaa *wordBag )maxDepth ()float64 {return _gbaa ._feaf -_gbaa .Lly };