unipdf/extractor/extractor.go

992 lines
216 KiB
Go
Raw Normal View History

2020-08-27 21:45:09 +00:00
//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
2020-08-27 21:45:09 +00:00
//
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
//
2023-12-17 13:54:01 +00:00
package extractor ;import (_df "bytes";_d "errors";_ge "fmt";_ec "github.com/unidoc/unipdf/v3/common";_ag "github.com/unidoc/unipdf/v3/contentstream";_gb "github.com/unidoc/unipdf/v3/core";_bd "github.com/unidoc/unipdf/v3/internal/license";_fd "github.com/unidoc/unipdf/v3/internal/textencoding";
_bc "github.com/unidoc/unipdf/v3/internal/transform";_ce "github.com/unidoc/unipdf/v3/model";_b "golang.org/x/image/draw";_ef "golang.org/x/text/unicode/norm";_dfc "golang.org/x/xerrors";_ded "image";_fg "image/color";_a "io";_aa "math";_gc "reflect";_e "regexp";
_c "sort";_gd "strings";_de "unicode";_g "unicode/utf8";);type lineRuling struct{_fbbag rulingKind ;_aagg markKind ;_fg .Color ;_ddgfc ,_eacg _bc .Point ;};func _ccacc (_effca map[float64 ]gridTile )[]float64 {_cabg :=make ([]float64 ,0,len (_effca ));
for _decbc :=range _effca {_cabg =append (_cabg ,_decbc );};_c .Float64s (_cabg );return _cabg ;};func (_gadb *textObject )showTextAdjusted (_dae *_gb .PdfObjectArray ,_fdbf int )error {_afc :=false ;for _ ,_gaa :=range _dae .Elements (){switch _gaa .(type ){case *_gb .PdfObjectFloat ,*_gb .PdfObjectInteger :_gcacc ,_dceg :=_gb .GetNumberAsFloat (_gaa );
if _dceg !=nil {_ec .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_gaa ,_dae );
return _dceg ;};_ceeg ,_bfcf :=-_gcacc *0.001*_gadb ._befa ._ccf ,0.0;if _afc {_bfcf ,_ceeg =_ceeg ,_bfcf ;};_cddc :=_dgfb (_bc .Point {X :_ceeg ,Y :_bfcf });_gadb ._eee .Concat (_cddc );case *_gb .PdfObjectString :_abgfd :=_gb .TraceToDirectObject (_gaa );
_caa ,_bfg :=_gb .GetStringBytes (_abgfd );if !_bfg {_ec .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_gaa ,_dae );
return _gb .ErrTypeError ;};_gadb .renderText (_abgfd ,_caa ,_fdbf );default:_ec .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_gaa ,_dae );
return _gb .ErrTypeError ;};};return nil ;};func (_cdeg *textTable )getDown ()paraList {_abfe :=make (paraList ,_cdeg ._ecbf );for _efdbg :=0;_efdbg < _cdeg ._ecbf ;_efdbg ++{_gfffb :=_cdeg .get (_efdbg ,_cdeg ._dcfg -1)._fdgbd ;if _gfffb .taken (){return nil ;
};_abfe [_efdbg ]=_gfffb ;};for _egccf :=0;_egccf < _cdeg ._ecbf -1;_egccf ++{if _abfe [_egccf ]._gaca !=_abfe [_egccf +1]{return nil ;};};return _abfe ;};func (_ecbe *textTable )compositeRowCorridors ()map[int ][]float64 {_edadb :=make (map[int ][]float64 ,_ecbe ._dcfg );
if _afcg {_ec .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_ecbe ._dcfg );};for _fffe :=1;_fffe < _ecbe ._dcfg ;_fffe ++{var _abag []compositeCell ;
for _bbfba :=0;_bbfba < _ecbe ._ecbf ;_bbfba ++{if _cdcb ,_cdfcf :=_ecbe ._egfe [_aaca (_bbfba ,_fffe )];_cdfcf {_abag =append (_abag ,_cdcb );};};if len (_abag )==0{continue ;};_ebef :=_ccef (_abag );_edadb [_fffe ]=_ebef ;if _afcg {_ge .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_fffe ,_ebef );
};};return _edadb ;};func (_egcc pathSection )bbox ()_ce .PdfRectangle {_fgfa :=_egcc ._fbfe [0]._gfefe [0];_dabg :=_ce .PdfRectangle {Llx :_fgfa .X ,Urx :_fgfa .X ,Lly :_fgfa .Y ,Ury :_fgfa .Y };_cae :=func (_bgbe _bc .Point ){if _bgbe .X < _dabg .Llx {_dabg .Llx =_bgbe .X ;
}else if _bgbe .X > _dabg .Urx {_dabg .Urx =_bgbe .X ;};if _bgbe .Y < _dabg .Lly {_dabg .Lly =_bgbe .Y ;}else if _bgbe .Y > _dabg .Ury {_dabg .Ury =_bgbe .Y ;};};for _ ,_gcgb :=range _egcc ._fbfe [0]._gfefe [1:]{_cae (_gcgb );};for _ ,_addf :=range _egcc ._fbfe [1:]{for _ ,_gggf :=range _addf ._gfefe {_cae (_gggf );
};};return _dabg ;};func (_fdeea paraList )readBefore (_dddgd []int ,_cbgba ,_cbbc int )bool {_aggf ,_deed :=_fdeea [_cbgba ],_fdeea [_cbbc ];if _adcg (_aggf ,_deed )&&_aggf .Lly > _deed .Lly {return true ;};if !(_aggf ._ebcf .Urx < _deed ._ebcf .Llx ){return false ;
};_cgec ,_caaec :=_aggf .Lly ,_deed .Lly ;if _cgec > _caaec {_caaec ,_cgec =_cgec ,_caaec ;};_agef :=_aa .Max (_aggf ._ebcf .Llx ,_deed ._ebcf .Llx );_agbf :=_aa .Min (_aggf ._ebcf .Urx ,_deed ._ebcf .Urx );_ddgf :=_fdeea .llyRange (_dddgd ,_cgec ,_caaec );
for _ ,_egde :=range _ddgf {if _egde ==_cbgba ||_egde ==_cbbc {continue ;};_fgbg :=_fdeea [_egde ];if _fgbg ._ebcf .Llx <=_agbf &&_agef <=_fgbg ._ebcf .Urx {return false ;};};return true ;};func (_badb *textObject )nextLine (){_badb .moveLP (0,-_badb ._befa ._dcc )};
func (_ggff *shapesState )quadraticTo (_aad ,_dfdc ,_gaga ,_dcba float64 ){if _cbag {_ec .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a");};_ggff .addPoint (_gaga ,_dcba );};type textLine struct{_ce .PdfRectangle ;_bcdg float64 ;
_aebc []*textWord ;_ecag float64 ;};func (_gecd *textObject )setTextRenderMode (_cdfe int ){if _gecd ==nil {return ;};_gecd ._befa ._bbd =RenderMode (_cdfe );};func (_aeg *PageText )getParagraphs ()paraList {var _gfg rulingList ;if _gfag {_dgef :=_beca (_aeg ._geda );
_gfg =append (_gfg ,_dgef ...);};if _dbbc {_gcbcd :=_fdab (_aeg ._gegc );_gfg =append (_gfg ,_gcbcd ...);};_gfg ,_bbfb :=_gfg .toTilings ();var _gbafd paraList ;_bfae :=len (_aeg ._fcag );for _dfd :=0;_dfd < 360&&_bfae > 0;_dfd +=90{_aba :=make ([]*textMark ,0,len (_aeg ._fcag )-_bfae );
for _ ,_bfdd :=range _aeg ._fcag {if _bfdd ._edge ==_dfd {_aba =append (_aba ,_bfdd );};};if len (_aba )> 0{_ddfba :=_fbcc (_aba ,_aeg ._fbbg ,_gfg ,_bbfb ,_aeg ._ccg ._fcgfa );_gbafd =append (_gbafd ,_ddfba ...);_bfae -=len (_aba );};};return _gbafd ;
};func (_efce *shapesState )fill (_bfaed *[]pathSection ){_cbabc :=pathSection {_fbfe :_efce ._abgb ,Color :_efce ._ecfc .getFillColor ()};*_bfaed =append (*_bfaed ,_cbabc );if _gfgc {_dcca :=_cbabc .bbox ();_ge .Printf ("\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a",len (*_bfaed ),len (_cbabc ._fbfe ),_efce ,_cbabc .Color ,_dcca ,_dcca .Width (),_dcca .Height ());
if _deaga {for _dgb ,_dbda :=range _cbabc ._fbfe {_ge .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_dgb ,_dbda );if _dgb ==10{break ;};};};};};func (_dfbf *ruling )equals (_gafce *ruling )bool {return _dfbf ._bfbc ==_gafce ._bfbc &&_gecae (_dfbf ._abbgc ,_gafce ._abbgc )&&_gecae (_dfbf ._cebe ,_gafce ._cebe )&&_gecae (_dfbf ._deee ,_gafce ._deee );
};func (_agge *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_dabb :=make (map[int ]map[*textWord ]struct{},len (_agge ._faba ));for _afge :=range _agge ._faba {_dabb [_afge ]=make (map[*textWord ]struct{});};return _dabb ;};func (_bffcd *shapesState )moveTo (_eba ,_cff float64 ){_bffcd ._edee =true ;
_bffcd ._ebfb =_bffcd .devicePoint (_eba ,_cff );if _cbag {_ec .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_eba ,_cff ,_bffcd ._ebfb );
};};func (_ecbb *wordBag )sort (){for _ ,_eadf :=range _ecbb ._faba {_c .Slice (_eadf ,func (_gfd ,_eae int )bool {return _fabg (_eadf [_gfd ],_eadf [_eae ])< 0});};};func (_bedb rulingList )primMinMax ()(float64 ,float64 ){_ebbfe ,_cfdbb :=_bedb [0]._abbgc ,_bedb [0]._abbgc ;
for _ ,_cecg :=range _bedb [1:]{if _cecg ._abbgc < _ebbfe {_ebbfe =_cecg ._abbgc ;}else if _cecg ._abbgc > _cfdbb {_cfdbb =_cecg ._abbgc ;};};return _ebbfe ,_cfdbb ;};func _fcd (_cdaf ,_gdca _ce .PdfRectangle )bool {return _gdca .Llx <=_cdaf .Urx &&_cdaf .Llx <=_gdca .Urx ;
};
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
type ImageMark struct{Image *_ce .Image ;
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// Dimensions of the image as displayed in the PDF.
Width float64 ;Height float64 ;
2023-10-07 13:58:01 +00:00
2023-12-17 13:54:01 +00:00
// Position of the image in PDF coordinates (lower left corner).
X float64 ;Y float64 ;
2023-04-06 19:57:40 +00:00
2023-12-17 13:54:01 +00:00
// Angle in degrees, if rotated.
Angle float64 ;};func (_gbca *wordBag )applyRemovals (_acee map[int ]map[*textWord ]struct{}){for _fffg ,_edgd :=range _acee {if len (_edgd )==0{continue ;};_eccg :=_gbca ._faba [_fffg ];_gfcf :=len (_eccg )-len (_edgd );if _gfcf ==0{delete (_gbca ._faba ,_fffg );
continue ;};_egea :=make ([]*textWord ,_gfcf );_gfgfg :=0;for _ ,_fecg :=range _eccg {if _ ,_edef :=_edgd [_fecg ];!_edef {_egea [_gfgfg ]=_fecg ;_gfgfg ++;};};_gbca ._faba [_fffg ]=_egea ;};};type textTable struct{_ce .PdfRectangle ;_ecbf ,_dcfg int ;
_beaeg bool ;_gcbga map[uint64 ]*textPara ;_egfe map[uint64 ]compositeCell ;};func (_bgf *textObject )setTextMatrix (_aaf []float64 ){if len (_aaf )!=6{_ec .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_aaf ));
return ;};_gcac ,_fbed ,_bbgf ,_dac ,_geg ,_cgda :=_aaf [0],_aaf [1],_aaf [2],_aaf [3],_aaf [4],_aaf [5];_bgf ._eee =_bc .NewMatrix (_gcac ,_fbed ,_bbgf ,_dac ,_geg ,_cgda );_bgf ._ecg =_bgf ._eee ;};func _cacf (_bbcg []TextMark ,_adce *int ,_gcbb string )[]TextMark {_dded :=_efe ;
_dded .Text =_gcbb ;return _cfaf (_bbcg ,_adce ,_dded );};func (_bbfe *textPara )bbox ()_ce .PdfRectangle {return _bbfe .PdfRectangle };
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// Text returns the text content of the `bulletLists`.
func (_aegg *lists )Text ()string {_bcac :=&_gd .Builder {};for _ ,_bfea :=range *_aegg {_geea :=_bfea .Text ();_bcac .WriteString (_geea );};return _bcac .String ();};func (_dcdd paraList )list ()[]*list {var _cfd []*textLine ;var _cfef []*textLine ;for _ ,_dagg :=range _dcdd {_dbag :=_dagg .getListLines ();
_cfd =append (_cfd ,_dbag ...);_cfef =append (_cfef ,_dagg ._bdbcg ...);};_cbcb :=_egbg (_cfd );_effc :=_cbbae (_cfef ,_cbcb );return _effc ;};func (_bgfg *textObject )getStrokeColor ()_fg .Color {return _agefd (_bgfg ._gbe .ColorspaceStroking ,_bgfg ._gbe .ColorStroking );
};func (_cfbbg *textWord )bbox ()_ce .PdfRectangle {return _cfbbg .PdfRectangle };
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// Options extractor options.
type Options struct{
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// DisableDocumentTags specifies whether to use the document tags during list extraction.
DisableDocumentTags bool ;
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// ApplyCropBox will extract page text based on page cropbox if set to `true`.
ApplyCropBox bool ;
2023-11-11 11:29:03 +00:00
2023-12-17 13:54:01 +00:00
// UseSimplerExtractionProcess will skip topological text ordering and table processing.
2023-11-11 11:29:03 +00:00
//
2023-12-17 13:54:01 +00:00
// NOTE: While normally the extra processing is beneficial, it can also lead to problems when it does not work.
// Thus it is a flag to allow the user to control this process.
2023-11-11 11:29:03 +00:00
//
2023-12-17 13:54:01 +00:00
// Skipping some extraction processes would also lead to the reduced processing time.
UseSimplerExtractionProcess bool ;};func _beca (_aceeg []pathSection )rulingList {_bdged (_aceeg );if _gfgc {_ec .Log .Info ("\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073",len (_aceeg ));
};var _eabca rulingList ;for _ ,_fdga :=range _aceeg {for _ ,_eeed :=range _fdga ._fbfe {if len (_eeed ._gfefe )< 2{continue ;};_gceb :=_eeed ._gfefe [0];for _ ,_bfbcc :=range _eeed ._gfefe [1:]{if _gadbd ,_eabfd :=_gfbbf (_gceb ,_bfbcc ,_fdga .Color );
_eabfd {_eabca =append (_eabca ,_gadbd );};_gceb =_bfbcc ;};};};if _gfgc {_ec .Log .Info ("m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073",_eabca );};return _eabca ;};func _bcdee (_deeef ,_bgae _bc .Point )rulingKind {_gcbce :=_aa .Abs (_deeef .X -_bgae .X );
_fcefg :=_aa .Abs (_deeef .Y -_bgae .Y );return _gbbf (_gcbce ,_fcefg ,_cffa );};func (_ddbg *textPara )depth ()float64 {if _ddbg ._bedf {return -1.0;};if len (_ddbg ._bdbcg )> 0{return _ddbg ._bdbcg [0]._bcdg ;};return _ddbg ._bddea .depth ();};
// String returns a string describing `tm`.
func (_cfa TextMark )String ()string {_bfbd :=_cfa .BBox ;var _geca string ;if _cfa .Font !=nil {_geca =_cfa .Font .String ();if len (_geca )> 50{_geca =_geca [:50]+"\u002e\u002e\u002e";};};var _abbg string ;if _cfa .Meta {_abbg ="\u0020\u002a\u004d\u002a";
};return _ge .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_cfa .Offset ,_cfa .Text ,[]rune (_cfa .Text ),_bfbd .Llx ,_bfbd .Lly ,_bfbd .Urx ,_bfbd .Ury ,_geca ,_abbg );
};func (_fffca paraList )merge ()*textPara {_ec .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_fffca ));
if len (_fffca )==0{return nil ;};_fffca .sortReadingOrder ();_baea :=_fffca [0].PdfRectangle ;_dafg :=_fffca [0]._bdbcg ;for _ ,_eefa :=range _fffca [1:]{_baea =_cdggc (_baea ,_eefa .PdfRectangle );_dafg =append (_dafg ,_eefa ._bdbcg ...);};return _ffec (_baea ,_dafg );
};
2023-11-11 11:29:03 +00:00
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
2023-12-17 13:54:01 +00:00
func (_gdg *PageText )ApplyArea (bbox _ce .PdfRectangle ){_gefe :=make ([]*textMark ,0,len (_gdg ._fcag ));for _ ,_edc :=range _gdg ._fcag {if _decgb (_edc .bbox (),bbox ){_gefe =append (_gefe ,_edc );};};var _fbcf paraList ;_eaae :=len (_gefe );for _cgac :=0;
_cgac < 360&&_eaae > 0;_cgac +=90{_dgee :=make ([]*textMark ,0,len (_gefe )-_eaae );for _ ,_ceff :=range _gefe {if _ceff ._edge ==_cgac {_dgee =append (_dgee ,_ceff );};};if len (_dgee )> 0{_gfgf :=_fbcc (_dgee ,_gdg ._fbbg ,nil ,nil ,_gdg ._ccg ._fcgfa );
_fbcf =append (_fbcf ,_gfgf ...);_eaae -=len (_dgee );};};_bgdf :=new (_df .Buffer );_fbcf .writeText (_bgdf );_gdg ._gcee =_bgdf .String ();_gdg ._ebfc =_fbcf .toTextMarks ();_gdg ._eadb =_fbcf .tables ();};func _bddac (_fgaac []*textMark ,_bbad _ce .PdfRectangle )*textWord {_gadac :=_fgaac [0].PdfRectangle ;
_bdbaa :=_fgaac [0]._bfaca ;for _ ,_bcgce :=range _fgaac [1:]{_gadac =_cdggc (_gadac ,_bcgce .PdfRectangle );if _bcgce ._bfaca > _bdbaa {_bdbaa =_bcgce ._bfaca ;};};return &textWord {PdfRectangle :_gadac ,_bgeaa :_fgaac ,_cffdg :_bbad .Ury -_gadac .Lly ,_ddgee :_bdbaa };
};func (_fcee rulingList )bbox ()_ce .PdfRectangle {var _eeba _ce .PdfRectangle ;if len (_fcee )==0{_ec .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073");
return _ce .PdfRectangle {};};if _fcee [0]._bfbc ==_ecac {_eeba .Llx ,_eeba .Urx =_fcee .secMinMax ();_eeba .Lly ,_eeba .Ury =_fcee .primMinMax ();}else {_eeba .Llx ,_eeba .Urx =_fcee .primMinMax ();_eeba .Lly ,_eeba .Ury =_fcee .secMinMax ();};return _eeba ;
};func _aaca (_egcg ,_dbbbc int )uint64 {return uint64 (_egcg )*0x1000000+uint64 (_dbbbc )};func (_bgcd *shapesState )lastpointEstablished ()(_bc .Point ,bool ){if _bgcd ._edee {return _bgcd ._ebfb ,false ;};_gbbg :=len (_bgcd ._abgb );if _gbbg > 0&&_bgcd ._abgb [_gbbg -1]._cgde {return _bgcd ._abgb [_gbbg -1].last (),false ;
};return _bc .Point {},true ;};func (_ffcb *Extractor )extractPageText (_fdf string ,_gccf *_ce .PdfPageResources ,_dg _bc .Matrix ,_gccd int )(*PageText ,int ,int ,error ){_ec .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_gccd );
_eaa :=&PageText {_fbbg :_ffcb ._gda ,_bbb :_ffcb ._ba ,_fefc :_ffcb ._eg };_dee :=_cab (_ffcb ._gda );var _bfa stateStack ;_gfc :=_egf (_ffcb ,_gccf ,_ag .GraphicsState {},&_dee ,&_bfa );_ggc :=shapesState {_dag :_dg ,_aabgc :_bc .IdentityMatrix (),_ecfc :_gfc };
var _ccce bool ;_fdb :=-1;if _gccd > _adee {_fff :=_d .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");_ec .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_gccd ,_fff );
return _eaa ,_dee ._baf ,_dee ._afgb ,_fff ;};_abd :=_ag .NewContentStreamParser (_fdf );_fbf ,_bac :=_abd .Parse ();if _bac !=nil {_ec .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bac );
return _eaa ,_dee ._baf ,_dee ._afgb ,_bac ;};_eaa ._ecfe =_fbf ;_dfa :=_ag .NewContentStreamProcessor (*_fbf );_dfa .AddHandler (_ag .HandlerConditionEnumAllOperands ,"",func (_dce *_ag .ContentStreamOperation ,_gbge _ag .GraphicsState ,_bag *_ce .PdfPageResources )error {_cgd :=_dce .Operand ;
if _acab {_ec .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_dce );};switch _cgd {case "\u0071":if _cbag {_ec .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_ggc ._aabgc );};_bfa .push (&_dee );case "\u0051":if !_bfa .empty (){_dee =*_bfa .pop ();
};_ggc ._aabgc =_gbge .CTM ;if _cbag {_ec .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_ggc ._aabgc );};case "\u0042\u0044\u0043":_gce ,_bacc :=_gb .GetDict (_dce .Params [1]);if !_bacc {_ec .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0042D\u0043\u0020\u006f\u0070\u003d\u0025\u0073 \u0047\u0065\u0074\u0044\u0069\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_dce );
return _bac ;};_badc :=_gce .Get ("\u004d\u0043\u0049\u0044");if _badc !=nil {_fce ,_cdf :=_gb .GetIntVal (_badc );if !_cdf {_ec .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0042\u0044C\u0020\u006f\u0070=\u0025\u0073\u002e\u0020\u0042\u0061\u0064\u0020\u006eum\u0065\u0072\u0069c\u0061\u006c \u006f\u0062\u006a\u0065\u0063\u0074.\u0020\u006f=\u0025\u0073",_dce ,_badc );
};_fdb =_fce ;}else {_fdb =-1;};case "\u0045\u004d\u0043":_fdb =-1;case "\u0042\u0054":if _ccce {_ec .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
_eaa ._fcag =append (_eaa ._fcag ,_gfc ._cfb ...);};_ccce =true ;_ceac :=_gbge ;_ceac .CTM =_dg .Mult (_ceac .CTM );_gfc =_egf (_ffcb ,_bag ,_ceac ,&_dee ,&_bfa );_ggc ._ecfc =_gfc ;case "\u0045\u0054":if !_ccce {_ec .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
};_ccce =false ;_eaa ._fcag =append (_eaa ._fcag ,_gfc ._cfb ...);_gfc .reset ();case "\u0054\u002a":_gfc .nextLine ();case "\u0054\u0064":if _bfb ,_aec :=_gfc .checkOp (_dce ,2,true );!_bfb {_ec .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aec );
return _aec ;};_aecd ,_bacd ,_gba :=_fbda (_dce .Params );if _gba !=nil {return _gba ;};_gfc .moveText (_aecd ,_bacd );case "\u0054\u0044":if _dff ,_abg :=_gfc .checkOp (_dce ,2,true );!_dff {_ec .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_abg );
return _abg ;};_ece ,_af ,_fab :=_fbda (_dce .Params );if _fab !=nil {_ec .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fab );return _fab ;};_gfc .moveTextSetLeading (_ece ,_af );case "\u0054\u006a":if _ced ,_cdc :=_gfc .checkOp (_dce ,1,true );
!_ced {_ec .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_dce ,_cdc );return _cdc ;};_faad :=_gb .TraceToDirectObject (_dce .Params [0]);_badfg ,_caff :=_gb .GetStringBytes (_faad );
if !_caff {_ec .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_dce );return _gb .ErrTypeError ;
};return _gfc .showText (_faad ,_badfg ,_fdb );case "\u0054\u004a":if _cdce ,_cba :=_gfc .checkOp (_dce ,1,true );!_cdce {_ec .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cba );return _cba ;};_aefb ,_aed :=_gb .GetArray (_dce .Params [0]);
if !_aed {_ec .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_dce );return _bac ;};return _gfc .showTextAdjusted (_aefb ,_fdb );
case "\u0027":if _fcgf ,_bff :=_gfc .checkOp (_dce ,1,true );!_fcgf {_ec .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bff );return _bff ;};_cdd :=_gb .TraceToDirectObject (_dce .Params [0]);_bdd ,_fac :=_gb .GetStringBytes (_cdd );
if !_fac {_ec .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_dce );return _gb .ErrTypeError ;};_gfc .nextLine ();return _gfc .showText (_cdd ,_bdd ,_fdb );
case "\u0022":if _bbc ,_gge :=_gfc .checkOp (_dce ,3,true );!_bbc {_ec .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gge );return _gge ;};_gcd ,_gbaf ,_fbe :=_fbda (_dce .Params [:2]);if _fbe !=nil {return _fbe ;
};_cf :=_gb .TraceToDirectObject (_dce .Params [2]);_eda ,_fga :=_gb .GetStringBytes (_cf );if !_fga {_ec .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_dce );
return _gb .ErrTypeError ;};_gfc .setCharSpacing (_gcd );_gfc .setWordSpacing (_gbaf );_gfc .nextLine ();return _gfc .showText (_cf ,_eda ,_fdb );case "\u0054\u004c":_eaf ,_bgd :=_dgd (_dce );if _bgd !=nil {_ec .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bgd );
return _bgd ;};_gfc .setTextLeading (_eaf );case "\u0054\u0063":_cega ,_fef :=_dgd (_dce );if _fef !=nil {_ec .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fef );return _fef ;};_gfc .setCharSpacing (_cega );
case "\u0054\u0066":if _bfe ,_deag :=_gfc .checkOp (_dce ,2,true );!_bfe {_ec .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_deag );return _deag ;};_fcef ,_fca :=_gb .GetNameVal (_dce .Params [0]);
if !_fca {_ec .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_dce );return _gb .ErrTypeError ;};_egb ,_eade :=_gb .GetNumberAsFloat (_dce .Params [1]);
if !_fca {_ec .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dce ,_eade );
return _eade ;};_eade =_gfc .setFont (_fcef ,_egb );_gfc ._efbb =_dfc .Is (_eade ,_gb .ErrNotSupported );if _eade !=nil &&!_gfc ._efbb {return _eade ;};case "\u0054\u006d":if _dgf ,_dade :=_gfc .checkOp (_dce ,6,true );!_dgf {_ec .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dade );
return _dade ;};_eac ,_fee :=_gb .GetNumbersAsFloat (_dce .Params );if _fee !=nil {_ec .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fee );return _fee ;};_gfc .setTextMatrix (_eac );case "\u0054\u0072":if _cdb ,_fcff :=_gfc .checkOp (_dce ,1,true );
!_cdb {_ec .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fcff );return _fcff ;};_dgg ,_acag :=_gb .GetIntVal (_dce .Params [0]);if !_acag {_ec .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_dce );
return _gb .ErrTypeError ;};_gfc .setTextRenderMode (_dgg );case "\u0054\u0073":if _afg ,_bgc :=_gfc .checkOp (_dce ,1,true );!_afg {_ec .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bgc );return _bgc ;
};_afa ,_cfc :=_gb .GetNumberAsFloat (_dce .Params [0]);if _cfc !=nil {_ec .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cfc );return _cfc ;};_gfc .setTextRise (_afa );case "\u0054\u0077":if _bgbc ,_gbc :=_gfc .checkOp (_dce ,1,true );
!_bgbc {_ec .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gbc );return _gbc ;};_abgf ,_cbab :=_gb .GetNumberAsFloat (_dce .Params [0]);if _cbab !=nil {_ec .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cbab );
return _cbab ;};_gfc .setWordSpacing (_abgf );case "\u0054\u007a":if _aaa ,_bbg :=_gfc .checkOp (_dce ,1,true );!_aaa {_ec .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bbg );return _bbg ;};_dfb ,_bagg :=_gb .GetNumberAsFloat (_dce .Params [0]);
if _bagg !=nil {_ec .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bagg );return _bagg ;};_gfc .setHorizScaling (_dfb );case "\u0063\u006d":_ggc ._aabgc =_gbge .CTM ;if _ggc ._aabgc .Singular (){_add :=_bc .IdentityMatrix ().Translate (_ggc ._aabgc .Translation ());
_ec .Log .Debug ("S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s",_ggc ._aabgc ,_add );_ggc ._aabgc =_add ;};if _cbag {_ec .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_ggc ._aabgc );};case "\u006d":if len (_dce .Params )!=2{_ec .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_fe );
return nil ;};_beb ,_efbd :=_gb .GetNumbersAsFloat (_dce .Params );if _efbd !=nil {return _efbd ;};_ggc .moveTo (_beb [0],_beb [1]);case "\u006c":if len (_dce .Params )!=2{_ec .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_fe );
return nil ;};_dada ,_bfdb :=_gb .GetNumbersAsFloat (_dce .Params );if _bfdb !=nil {return _bfdb ;};_ggc .lineTo (_dada [0],_dada [1]);case "\u0063":if len (_dce .Params )!=6{return _fe ;};_afgf ,_ecc :=_gb .GetNumbersAsFloat (_dce .Params );if _ecc !=nil {return _ecc ;
};_ec .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_afgf );_ggc .cubicTo (_afgf [0],_afgf [1],_afgf [2],_afgf [3],_afgf [4],_afgf [5]);case "\u0076","\u0079":if len (_dce .Params )!=4{return _fe ;
};_cdfc ,_edg :=_gb .GetNumbersAsFloat (_dce .Params );if _edg !=nil {return _edg ;};_ec .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_cdfc );_ggc .quadraticTo (_cdfc [0],_cdfc [1],_cdfc [2],_cdfc [3]);
case "\u0068":_ggc .closePath ();case "\u0072\u0065":if len (_dce .Params )!=4{return _fe ;};_gca ,_adf :=_gb .GetNumbersAsFloat (_dce .Params );if _adf !=nil {return _adf ;};_ggc .drawRectangle (_gca [0],_gca [1],_gca [2],_gca [3]);_ggc .closePath ();
case "\u0053":_ggc .stroke (&_eaa ._geda );_ggc .clearPath ();case "\u0073":_ggc .closePath ();_ggc .stroke (&_eaa ._geda );_ggc .clearPath ();case "\u0046":_ggc .fill (&_eaa ._gegc );_ggc .clearPath ();case "\u0066","\u0066\u002a":_ggc .closePath ();_ggc .fill (&_eaa ._gegc );
_ggc .clearPath ();case "\u0042","\u0042\u002a":_ggc .fill (&_eaa ._gegc );_ggc .stroke (&_eaa ._geda );_ggc .clearPath ();case "\u0062","\u0062\u002a":_ggc .closePath ();_ggc .fill (&_eaa ._gegc );_ggc .stroke (&_eaa ._geda );_ggc .clearPath ();case "\u006e":_ggc .clearPath ();
case "\u0044\u006f":if len (_dce .Params )==0{_ec .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_dce .Params );
return _gb .ErrRangeError ;};_deda ,_eff :=_gb .GetName (_dce .Params [0]);if !_eff {_ec .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_dce .Params [0]);
return _gb .ErrTypeError ;};_ ,_geaf :=_bag .GetXObjectByName (*_deda );if _geaf !=_ce .XObjectTypeForm {break ;};_egbd ,_eff :=_ffcb ._fa [_deda .String ()];if !_eff {_agcd ,_ddf :=_bag .GetXObjectFormByName (*_deda );if _ddf !=nil {_ec .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_ddf );
return _ddf ;};_edf ,_ddf :=_agcd .GetContentStream ();if _ddf !=nil {_ec .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_ddf );return _ddf ;};_bdg :=_agcd .Resources ;if _bdg ==nil {_bdg =_bag ;};_gcdc :=_gbge .CTM ;if _ggg ,_ada :=_gb .GetArray (_agcd .Matrix );
_ada {_cgc ,_gced :=_ggg .GetAsFloat64Slice ();if _gced !=nil {return _gced ;};if len (_cgc )!=6{return _fe ;};_bcc :=_bc .NewMatrix (_cgc [0],_cgc [1],_cgc [2],_cgc [3],_cgc [4],_cgc [5]);_gcdc =_gbge .CTM .Mult (_bcc );};_gff ,_cdbg ,_edgg ,_ddf :=_ffcb .extractPageText (string (_edf ),_bdg ,_dg .Mult (_gcdc ),_gccd +1);
if _ddf !=nil {_ec .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_ddf );return _ddf ;};_egbd =textResult {*_gff ,_cdbg ,_edgg };_ffcb ._fa [_deda .String ()]=_egbd ;};_ggc ._aabgc =_gbge .CTM ;if _cbag {_ec .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_ggc ._aabgc );
};_eaa ._fcag =append (_eaa ._fcag ,_egbd ._ceee ._fcag ...);_eaa ._geda =append (_eaa ._geda ,_egbd ._ceee ._geda ...);_eaa ._gegc =append (_eaa ._gegc ,_egbd ._ceee ._gegc ...);_dee ._baf +=_egbd ._agcc ;_dee ._afgb +=_egbd ._dge ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_gfc ._gbe .ColorspaceNonStroking =_gbge .ColorspaceNonStroking ;
_gfc ._gbe .ColorNonStroking =_gbge .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_gfc ._gbe .ColorspaceStroking =_gbge .ColorspaceStroking ;_gfc ._gbe .ColorStroking =_gbge .ColorStroking ;
};return nil ;});_bac =_dfa .Process (_gccf );return _eaa ,_dee ._baf ,_dee ._afgb ,_bac ;};
2023-11-11 11:29:03 +00:00
2023-12-17 13:54:01 +00:00
// Tables returns the tables extracted from the page.
func (_gdfa PageText )Tables ()[]TextTable {if _afcg {_ec .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_gdfa ._eadb ));};return _gdfa ._eadb ;};func _fddd (_dcbdd _gb .PdfObject ,_gdgcc _fg .Color )(_ded .Image ,error ){_dcafg ,_afadd :=_gb .GetStream (_dcbdd );
if !_afadd {return nil ,nil ;};_bdac ,_gdacc :=_ce .NewXObjectImageFromStream (_dcafg );if _gdacc !=nil {return nil ,_gdacc ;};_dfgad ,_gdacc :=_bdac .ToImage ();if _gdacc !=nil {return nil ,_gdacc ;};return _abgbe (_dfgad ,_gdgcc ),nil ;};func _ffef (_gfae func (*wordBag ,*textWord ,float64 )bool ,_bdbc float64 )func (*wordBag ,*textWord )bool {return func (_fdda *wordBag ,_cfca *textWord )bool {return _gfae (_fdda ,_cfca ,_bdbc )};
};func _deca (_fbgeb []*textLine ,_fffa map[float64 ][]*textLine ,_afacg []float64 ,_bfbda int ,_bead ,_ebbd float64 )[]*list {_cgce :=[]*list {};_afcgc :=_bfbda ;_bfbda =_bfbda +1;_abdc :=_afacg [_afcgc ];_beecba :=_fffa [_abdc ];_fgab :=_bdbf (_beecba ,_ebbd ,_bead );
for _dadcf ,_ccac :=range _fgab {var _dfcc float64 ;_fefff :=[]*list {};_fgbaa :=_ccac ._bcdg ;_fded :=_ebbd ;if _dadcf < len (_fgab )-1{_fded =_fgab [_dadcf +1]._bcdg ;};if _bfbda < len (_afacg ){_fefff =_deca (_fbgeb ,_fffa ,_afacg ,_bfbda ,_fgbaa ,_fded );
};_dfcc =_fded ;if len (_fefff )> 0{_dafb :=_fefff [0];if len (_dafb ._ggdb )> 0{_dfcc =_dafb ._ggdb [0]._bcdg ;};};_bcfg :=[]*textLine {_ccac };_aced :=_dcbf (_ccac ,_fbgeb ,_afacg ,_fgbaa ,_dfcc );_bcfg =append (_bcfg ,_aced ...);_fagdb :=_baba (_bcfg ,"\u0062\u0075\u006c\u006c\u0065\u0074",_fefff );
_fagdb ._dage =_efaf (_bcfg ,"");_cgce =append (_cgce ,_fagdb );};return _cgce ;};var _eeggd =_e .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024");
const (_be ="\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";
_cc ="\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064";
_cb ="\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";);
func (_adea *textTable )growTable (){_ggac :=func (_gacg paraList ){_adea ._dcfg ++;for _dgfgb :=0;_dgfgb < _adea ._ecbf ;_dgfgb ++{_beecc :=_gacg [_dgfgb ];_adea .put (_dgfgb ,_adea ._dcfg -1,_beecc );};};_fgge :=func (_bgfe paraList ){_adea ._ecbf ++;
for _babbc :=0;_babbc < _adea ._dcfg ;_babbc ++{_aafac :=_bgfe [_babbc ];_adea .put (_adea ._ecbf -1,_babbc ,_aafac );};};if _dcedc {_adea .log ("\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce");};for _gbfa :=0;;_gbfa ++{_gaba :=false ;_cada :=_adea .getDown ();
_afgcd :=_adea .getRight ();if _dcedc {_ge .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gbfa ,_adea );_ge .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a",_cada );_ge .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a",_afgcd );
};if _cada !=nil &&_afgcd !=nil {_fcdf :=_cada [len (_cada )-1];if !_fcdf .taken ()&&_fcdf ==_afgcd [len (_afgcd )-1]{_ggac (_cada );if _afgcd =_adea .getRight ();_afgcd !=nil {_fgge (_afgcd );_adea .put (_adea ._ecbf -1,_adea ._dcfg -1,_fcdf );};_gaba =true ;
};};if !_gaba &&_cada !=nil {_ggac (_cada );_gaba =true ;};if !_gaba &&_afgcd !=nil {_fgge (_afgcd );_gaba =true ;};if !_gaba {break ;};};};func _edeff (_cbed _ce .PdfRectangle )*ruling {return &ruling {_bfbc :_ecac ,_abbgc :_cbed .Ury ,_cebe :_cbed .Llx ,_deee :_cbed .Urx };
};func (_abee *textWord )computeText ()string {_dbfffe :=make ([]string ,len (_abee ._bgeaa ));for _eedge ,_cfcbg :=range _abee ._bgeaa {_dbfffe [_eedge ]=_cfcbg ._ecaa ;};return _gd .Join (_dbfffe ,"");};func (_fddcf gridTiling )log (_cdcf string ){if !_bea {return ;
};_ec .Log .Info ("\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071",len (_fddcf ._cgecb ),len (_fddcf ._agbb ),_cdcf );_ge .Printf ("\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a",_fddcf ._cgecb );
_ge .Printf ("\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a",_fddcf ._agbb );for _fdgdf ,_ffcde :=range _fddcf ._agbb {_fbaa ,_ccag :=_fddcf ._bage [_ffcde ];if !_ccag {continue ;};_ge .Printf ("%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_fdgdf ,_ffcde );
for _agfa ,_deedg :=range _fddcf ._cgecb {_eedgg ,_cafeg :=_fbaa [_deedg ];if !_cafeg {continue ;};_ge .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_agfa ,_eedgg .String ());};};};func _eccb (_bdff []*textLine )[]*textLine {_adgf :=[]*textLine {};
for _ ,_geac :=range _bdff {_eabad :=_geac .text ();_dcf :=_ccfe .Find ([]byte (_eabad ));if _dcf !=nil {_adgf =append (_adgf ,_geac );};};return _adgf ;};
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
//
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
type TextMark struct{
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Text is the extracted text.
Text string ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// BBox is the bounding box of the text.
2023-12-17 13:54:01 +00:00
BBox _ce .PdfRectangle ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Font is the font the text was drawn with.
2023-12-17 13:54:01 +00:00
Font *_ce .PdfFont ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// FontSize is the font size the text was drawn with.
FontSize float64 ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2023-12-17 13:54:01 +00:00
FillColor _fg .Color ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2023-12-17 13:54:01 +00:00
StrokeColor _fg .Color ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Orientation is the text orientation
Orientation int ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// DirectObject is the underlying PdfObject (Text Object) that represents the visible texts. This is introduced to get
// a simple access to the TextObject in case editing or replacment of some text is needed. E.g during redaction.
2023-12-17 13:54:01 +00:00
DirectObject _gb .PdfObject ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// ObjString is a decoded string operand of a text-showing operator. It has the same value as `Text` attribute except
// when many glyphs are represented with the same Text Object that contains multiple length string operand in which case
// ObjString spans more than one character string that falls in different TextMark objects.
2023-12-17 13:54:01 +00:00
ObjString []string ;Tw float64 ;Th float64 ;Tc float64 ;Index int ;_ebd bool ;_afe *TextTable ;};
2023-10-07 13:58:01 +00:00
2023-12-17 13:54:01 +00:00
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
func (_fbeda PageText )ToText ()string {return _fbeda .Text ()};type shapesState struct{_aabgc _bc .Matrix ;_dag _bc .Matrix ;_abgb []*subpath ;_edee bool ;_ebfb _bc .Point ;_ecfc *textObject ;};func (_dbcc paraList )eventNeighbours (_cgcee []event )map[*textPara ][]int {_c .Slice (_cgcee ,func (_dfag ,_dafe int )bool {_edacf ,_adefe :=_cgcee [_dfag ],_cgcee [_dafe ];
_fbfa ,_acef :=_edacf ._gcaf ,_adefe ._gcaf ;if _fbfa !=_acef {return _fbfa < _acef ;};if _edacf ._cgcbd !=_adefe ._cgcbd {return _edacf ._cgcbd ;};return _dfag < _dafe ;});_cgfd :=make (map[int ]intSet );_gcdf :=make (intSet );for _ ,_agdda :=range _cgcee {if _agdda ._cgcbd {_cgfd [_agdda ._bffcfb ]=make (intSet );
for _cgcge :=range _gcdf {if _cgcge !=_agdda ._bffcfb {_cgfd [_agdda ._bffcfb ].add (_cgcge );_cgfd [_cgcge ].add (_agdda ._bffcfb );};};_gcdf .add (_agdda ._bffcfb );}else {_gcdf .del (_agdda ._bffcfb );};};_cfec :=map[*textPara ][]int {};for _fefb ,_caeb :=range _cgfd {_cdfgd :=_dbcc [_fefb ];
if len (_caeb )==0{_cfec [_cdfgd ]=nil ;continue ;};_fegc :=make ([]int ,len (_caeb ));_cgdcg :=0;for _bdecg :=range _caeb {_fegc [_cgdcg ]=_bdecg ;_cgdcg ++;};_cfec [_cdfgd ]=_fegc ;};return _cfec ;};
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// PageFonts represents extracted fonts on a PDF page.
type PageFonts struct{Fonts []Font ;};func (_caed *textTable )log (_ecaga string ){if !_afcg {return ;};_ec .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_ecaga ,_caed ._ecbf ,_caed ._dcfg ,_caed ._beaeg ,_caed .PdfRectangle );
for _cgaa :=0;_cgaa < _caed ._dcfg ;_cgaa ++{for _edbb :=0;_edbb < _caed ._ecbf ;_edbb ++{_ccgdc :=_caed .get (_edbb ,_cgaa );if _ccgdc ==nil {continue ;};_ge .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_edbb ,_cgaa ,_ccgdc .PdfRectangle ,_adagc (_ccgdc .text (),50),_g .RuneCountInString (_ccgdc .text ()));
};};};
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// List returns all the list objects detected on the page.
// It detects all the bullet point Lists from a given pdf page and builds a slice of bullet list objects.
// A given bullet list object has a tree structure.
// Each bullet point list is extracted with the text content it contains and all the sub lists found under it as children in the tree.
// The rest content of the pdf is ignored and only text in the bullet point lists are extracted.
// The list extraction is done in two ways.
// 1. If the document is tagged then the lists are extracted using the tags provided in the document.
// 2. Otherwise the bullet lists are extracted from the raw text using regex matching.
// By default the document tag is used if available.
// However this can be disabled using `DisableDocumentTags` in the `Options` object.
// Sometimes disabling document tags option might give a better bullet list extraction if the document was tagged incorrectly.
// options := &Options{
// DisableDocumentTags: false, // this means use document tag if available
// }
// ex, err := NewWithOptions(page, options)
// // handle error
// pageText, _, _, err := ex.ExtractPageText()
// // handle error
// lists := pageText.List()
// txt := lists.Text()
func (_eaeg PageText )List ()lists {_edba :=!_eaeg ._ccg ._efab ;_gggb :=_eaeg .getParagraphs ();_gaef :=true ;if _eaeg ._bbb ==nil ||*_eaeg ._bbb ==nil {_gaef =false ;};_gcbcf :=_gggb .list ();if _gaef &&_edba {_bgg :=_dafd (&_gggb );_ffbf :=&structTreeRoot {};
_ffbf .parseStructTreeRoot (*_eaeg ._bbb );if _ffbf ._dgbb ==nil {_ec .Log .Debug ("\u004c\u0069\u0073\u0074\u003a\u0020\u0073t\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e'\u0074\u0020\u0068\u0061\u0076e\u0020\u0061\u006e\u0079\u0020\u0063\u006f\u006e\u0074e\u006e\u0074\u002c\u0020\u0075\u0073\u0069\u006e\u0067\u0020\u0074\u0065\u0078\u0074\u0020\u006d\u0061\u0074\u0063\u0068\u0069\u006e\u0067\u0020\u006d\u0065\u0074\u0068\u006f\u0064\u0020\u0069\u006e\u0073\u0074\u0065\u0061\u0064\u002e");
return _gcbcf ;};_gcbcf =_ffbf .buildList (_bgg ,_eaeg ._fefc );};return _gcbcf ;};func (_edca paraList )findTextTables ()[]*textTable {var _ffgdb []*textTable ;for _ ,_ggcc :=range _edca {if _ggcc .taken ()||_ggcc .Width ()==0{continue ;};_fgccd :=_ggcc .isAtom ();
if _fgccd ==nil {continue ;};_fgccd .growTable ();if _fgccd ._ecbf *_fgccd ._dcfg < _ddd {continue ;};_fgccd .markCells ();_fgccd .log ("\u0067\u0072\u006fw\u006e");_ffgdb =append (_ffgdb ,_fgccd );};return _ffgdb ;};type textObject struct{_bgcb *Extractor ;
_bacad *_ce .PdfPageResources ;_gbe _ag .GraphicsState ;_befa *textState ;_ebf *stateStack ;_eee _bc .Matrix ;_ecg _bc .Matrix ;_cfb []*textMark ;_efbb bool ;};func (_deba paraList )toTextMarks ()[]TextMark {_afcgb :=0;var _edff []TextMark ;for _eeacf ,_cgbd :=range _deba {if _cgbd ._bedf {continue ;
};_gacb :=_cgbd .toTextMarks (&_afcgb );_edff =append (_edff ,_gacb ...);if _eeacf !=len (_deba )-1{if _eeacg (_cgbd ,_deba [_eeacf +1]){_edff =_cacf (_edff ,&_afcgb ,"\u0020");}else {_edff =_cacf (_edff ,&_afcgb ,"\u000a");_edff =_cacf (_edff ,&_afcgb ,"\u000a");
};};};_edff =_cacf (_edff ,&_afcgb ,"\u000a");_edff =_cacf (_edff ,&_afcgb ,"\u000a");return _edff ;};func (_abab *shapesState )drawRectangle (_dga ,_ebed ,_aada ,_age float64 ){if _cbag {_bbfbd :=_abab .devicePoint (_dga ,_ebed );_aga :=_abab .devicePoint (_dga +_aada ,_ebed +_age );
_bafe :=_ce .PdfRectangle {Llx :_bbfbd .X ,Lly :_bbfbd .Y ,Urx :_aga .X ,Ury :_aga .Y };_ec .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_bafe );};_abab .newSubPath ();_abab .moveTo (_dga ,_ebed );
_abab .lineTo (_dga +_aada ,_ebed );_abab .lineTo (_dga +_aada ,_ebed +_age );_abab .lineTo (_dga ,_ebed +_age );_abab .closePath ();};func _dcgbg (_ebdd ,_badd float64 )string {_fgcc :=!_ffegg (_ebdd -_badd );if _fgcc {return "\u000a";};return "\u0020";
};func (_fec *PageFonts )extractPageResourcesToFont (_ccc *_ce .PdfPageResources )error {_ad ,_cbg :=_gb .GetDict (_ccc .Font );if !_cbg {return _d .New (_be );};for _ ,_bbf :=range _ad .Keys (){var (_gef =true ;_bda []byte ;_gdb string ;);_bbe ,_gec :=_ccc .GetFontByName (_bbf );
if !_gec {return _d .New (_cc );};_fcf ,_efb :=_ce .NewPdfFontFromPdfObject (_bbe );if _efb !=nil {return _efb ;};_agd :=_fcf .FontDescriptor ();_acg :=_fcf .FontDescriptor ().FontName .String ();_ea :=_fcf .Subtype ();if _efg (_fec .Fonts ,_acg ){continue ;
};if len (_fcf .ToUnicode ())==0{_gef =false ;};if _agd .FontFile !=nil {if _gcf ,_fda :=_gb .GetStream (_agd .FontFile );_fda {_bda ,_efb =_gb .DecodeStream (_gcf );if _efb !=nil {return _efb ;};_gdb =_acg +"\u002e\u0070\u0066\u0062";};}else if _agd .FontFile2 !=nil {if _faf ,_bf :=_gb .GetStream (_agd .FontFile2 );
_bf {_bda ,_efb =_gb .DecodeStream (_faf );if _efb !=nil {return _efb ;};_gdb =_acg +"\u002e\u0074\u0074\u0066";};}else if _agd .FontFile3 !=nil {if _aae ,_fea :=_gb .GetStream (_agd .FontFile3 );_fea {_bda ,_efb =_gb .DecodeStream (_aae );if _efb !=nil {return _efb ;
};_gdb =_acg +"\u002e\u0063\u0066\u0066";};};if len (_gdb )< 1{_ec .Log .Debug (_cb );};_aab :=Font {FontName :_acg ,PdfFont :_fcf ,IsCID :_fcf .IsCID (),IsSimple :_fcf .IsSimple (),ToUnicode :_gef ,FontType :_ea ,FontData :_bda ,FontFileName :_gdb ,FontDescriptor :_agd };
_fec .Fonts =append (_fec .Fonts ,_aab );};return nil ;};func (_dbae *wordBag )depthRange (_efea ,_fdcf int )[]int {var _gacf []int ;for _bgef :=range _dbae ._faba {if _efea <=_bgef &&_bgef <=_fdcf {_gacf =append (_gacf ,_bgef );};};if len (_gacf )==0{return nil ;
};_c .Ints (_gacf );return _gacf ;};type textMark struct{_ce .PdfRectangle ;_edge int ;_ecaa string ;_gdcgd string ;_bcdb *_ce .PdfFont ;_bfaca float64 ;_cgdb float64 ;_dadd _bc .Matrix ;_feea _bc .Point ;_aefef _ce .PdfRectangle ;_abgd _fg .Color ;_eedd _fg .Color ;
_bfade _gb .PdfObject ;_fcdc []string ;Tw float64 ;Th float64 ;_fefe int ;_beaa int ;};func (_egfb *subpath )removeDuplicates (){if len (_egfb ._gfefe )==0{return ;};_faca :=[]_bc .Point {_egfb ._gfefe [0]};for _ ,_dedg :=range _egfb ._gfefe [1:]{if !_dcbd (_dedg ,_faca [len (_faca )-1]){_faca =append (_faca ,_dedg );
};};_egfb ._gfefe =_faca ;};func _gaagg (_faab []TextMark ,_gbaa *int )[]TextMark {_dffa :=_faab [len (_faab )-1];_bbdfe :=[]rune (_dffa .Text );if len (_bbdfe )==1{_faab =_faab [:len (_faab )-1];_gege :=_faab [len (_faab )-1];*_gbaa =_gege .Offset +len (_gege .Text );
}else {_dagc :=_edcg (_dffa .Text );*_gbaa +=len (_dagc )-len (_dffa .Text );_dffa .Text =_dagc ;};return _faab ;};func (_faeb *textPara )text ()string {_agac :=new (_df .Buffer );_faeb .writeText (_agac );return _agac .String ();};func _dgd (_cbabd *_ag .ContentStreamOperation )(float64 ,error ){if len (_cbabd .Params )!=1{_fdbfe :=_d .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");
_ec .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_cbabd .Operand ,1,len (_cbabd .Params ),_cbabd .Params );
return 0.0,_fdbfe ;};return _gb .GetNumberAsFloat (_cbabd .Params [0]);};func _baba (_bded []*textLine ,_cafa string ,_efgac []*list )*list {return &list {_ggdb :_bded ,_ffcg :_cafa ,_gbab :_efgac };};func (_bbfa *imageExtractContext )extractFormImages (_gbgb *_gb .PdfObjectName ,_cefa _ag .GraphicsState ,_fbd *_ce .PdfPageResources )error {_gga ,_bab :=_fbd .GetXObjectFormByName (*_gbgb );
if _bab !=nil {return _bab ;};if _gga ==nil {return nil ;};_dcb ,_bab :=_gga .GetContentStream ();if _bab !=nil {return _bab ;};_eea :=_gga .Resources ;if _eea ==nil {_eea =_fbd ;};_bab =_bbfa .extractContentStreamImages (string (_dcb ),_eea );if _bab !=nil {return _bab ;
};_bbfa ._agb ++;return nil ;};type imageExtractContext struct{_gbf []ImageMark ;_da int ;_fbc int ;_agb int ;_aabe map[*_gb .PdfObjectStream ]*cachedImage ;_cef *ImageExtractOptions ;_dedf bool ;};func _edcg (_dgfbg string )string {_bcdca :=[]rune (_dgfbg );
return string (_bcdca [:len (_bcdca )-1])};func _fbga (_agfb _gb .PdfObject ,_faaga _fg .Color )(_ded .Image ,error ){_edda ,_bgdca :=_gb .GetStream (_agfb );if !_bgdca {return nil ,nil ;};_dabga ,_abagc :=_ce .NewXObjectImageFromStream (_edda );if _abagc !=nil {return nil ,_abagc ;
};_bfda ,_abagc :=_dabga .ToImage ();if _abagc !=nil {return nil ,_abagc ;};return _bcfde (_bfda ,_faaga ),nil ;};
2023-11-11 11:29:03 +00:00
2023-12-17 13:54:01 +00:00
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
func (_bcg *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _bcg ==nil {return nil ,_d .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_ge .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );
};_gab :=len (_bcg ._dec );if _gab ==0{return _bcg ,nil ;};if start < _bcg ._dec [0].Offset {start =_bcg ._dec [0].Offset ;};if end > _bcg ._dec [_gab -1].Offset +1{end =_bcg ._dec [_gab -1].Offset +1;};_cgdca :=_c .Search (_gab ,func (_aafag int )bool {return _bcg ._dec [_aafag ].Offset +len (_bcg ._dec [_aafag ].Text )-1>=start });
if !(0<=_cgdca &&_cgdca < _gab ){_accc :=_ge .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_cgdca ,_gab ,_bcg ._dec [0],_bcg ._dec [_gab -1]);
return nil ,_accc ;};_bgcf :=_c .Search (_gab ,func (_bedge int )bool {return _bcg ._dec [_bedge ].Offset > end -1});if !(0<=_bgcf &&_bgcf < _gab ){_gffd :=_ge .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_bgcf ,_gab ,_bcg ._dec [0],_bcg ._dec [_gab -1]);
return nil ,_gffd ;};if _bgcf <=_cgdca {return nil ,_ge .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_cgdca ,_bgcf );
};return &TextMarkArray {_dec :_bcg ._dec [_cgdca :_bgcf ]},nil ;};const _gdgb =10;func (_effb compositeCell )String ()string {_acdc :="";if len (_effb .paraList )> 0{_acdc =_adagc (_effb .paraList .merge ().text (),50);};return _ge .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_effb .PdfRectangle ,len (_effb .paraList ),_acdc );
};func _eabf (_cdfcb *list ,_gddd *string )string {_dcde :=_gd .Split (_cdfcb ._dage ,"\u000a");_debc :=&_gd .Builder {};for _ ,_ccda :=range _dcde {if _ccda !=""{_debc .WriteString (*_gddd );_debc .WriteString (_ccda );_debc .WriteString ("\u000a");};
};return _debc .String ();};func (_bdbd rulingList )vertsHorzs ()(rulingList ,rulingList ){var _afabe ,_gbac rulingList ;for _ ,_gcdg :=range _bdbd {switch _gcdg ._bfbc {case _ebdaf :_afabe =append (_afabe ,_gcdg );case _ecac :_gbac =append (_gbac ,_gcdg );
};};return _afabe ,_gbac ;};
2023-11-11 11:29:03 +00:00
2023-12-17 13:54:01 +00:00
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
func (_ceef *Extractor )ExtractTextWithStats ()(_ddg string ,_cce int ,_fed int ,_dda error ){_fcb ,_cce ,_fed ,_dda :=_ceef .ExtractPageText ();if _dda !=nil {return "",_cce ,_fed ,_dda ;};return _fcb .Text (),_cce ,_fed ,nil ;};
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// PageTextOptions holds various options available in extraction process.
type PageTextOptions struct{_efab bool ;_fcgfa bool ;};func (_ddafg *textTable )subdivide ()*textTable {_ddafg .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");_bdba :=_ddafg .compositeRowCorridors ();_gcagd :=_ddafg .compositeColCorridors ();
if _afcg {_ec .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_bfbfd (_bdba ),_bfbfd (_gcagd ));
};if len (_bdba )==0||len (_gcagd )==0{return _ddafg ;};_acgg (_bdba );_acgg (_gcagd );if _afcg {_ec .Log .Info ("\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073",_bfbfd (_bdba ),_bfbfd (_gcagd ));
};_egfae ,_gfbfa :=_daeg (_ddafg ._dcfg ,_bdba );_ggbd ,_eccea :=_daeg (_ddafg ._ecbf ,_gcagd );_afeab :=make (map[uint64 ]*textPara ,_eccea *_gfbfa );_fcde :=&textTable {PdfRectangle :_ddafg .PdfRectangle ,_beaeg :_ddafg ._beaeg ,_dcfg :_gfbfa ,_ecbf :_eccea ,_gcbga :_afeab };
if _afcg {_ec .Log .Info ("\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a"+"\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076",_ddafg ._ecbf ,_ddafg ._dcfg ,_eccea ,_gfbfa ,_bfbfd (_bdba ),_bfbfd (_gcagd ),_egfae ,_ggbd );
};for _feece :=0;_feece < _ddafg ._dcfg ;_feece ++{_feeb :=_egfae [_feece ];for _fdgdd :=0;_fdgdd < _ddafg ._ecbf ;_fdgdd ++{_gcgd :=_ggbd [_fdgdd ];if _afcg {_ge .Printf ("\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a",_fdgdd ,_feece ,_gcgd ,_feeb );
};_cbbfg ,_dffaf :=_ddafg ._egfe [_aaca (_fdgdd ,_feece )];if !_dffaf {continue ;};_defff :=_cbbfg .split (_bdba [_feece ],_gcagd [_fdgdd ]);for _addfd :=0;_addfd < _defff ._dcfg ;_addfd ++{for _gfbe :=0;_gfbe < _defff ._ecbf ;_gfbe ++{_fcafg :=_defff .get (_gfbe ,_addfd );
_fcde .put (_gcgd +_gfbe ,_feeb +_addfd ,_fcafg );if _afcg {_ge .Printf ("\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_gcgd +_gfbe ,_feeb +_addfd ,_fcafg );};};};};};return _fcde ;};func _dca (_ccga []*textWord ,_aag float64 ,_dgge ,_acd rulingList )*wordBag {_bcbb :=_bbea (_ccga [0],_aag ,_dgge ,_acd );
for _ ,_gaf :=range _ccga [1:]{_eaad :=_fece (_gaf ._cffdg );_bcbb ._faba [_eaad ]=append (_bcbb ._faba [_eaad ],_gaf );_bcbb .PdfRectangle =_cdggc (_bcbb .PdfRectangle ,_gaf .PdfRectangle );};_bcbb .sort ();return _bcbb ;};func (_fbfca rulingList )tidied (_bcdfe string )rulingList {_cefd :=_fbfca .removeDuplicates ();
_cefd .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");_fgae :=_cefd .snapToGroups ();if _fgae ==nil {return nil ;};_fgae .sort ();if _gfgc {_ec .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_bcdfe ,len (_fbfca ),len (_cefd ),len (_fgae ));
};_fgae .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _fgae ;};func (_cdag *subpath )makeRectRuling (_bbaf _fg .Color )(*ruling ,bool ){if _feceb {_ec .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_cdag );
};_dgad :=_cdag ._gfefe [:4];_fcga :=make (map[int ]rulingKind ,len (_dgad ));for _gaeff ,_fabe :=range _dgad {_agcac :=_cdag ._gfefe [(_gaeff +1)%4];_fcga [_gaeff ]=_bcdee (_fabe ,_agcac );if _feceb {_ge .Printf ("\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066",_gaeff ,_fcga [_gaeff ],_fabe ,_agcac );
};};if _feceb {_ge .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_fcga );};var _adbc ,_cbecc []int ;for _ceeeg ,_gaadb :=range _fcga {switch _gaadb {case _ecac :_cbecc =append (_cbecc ,_ceeeg );case _ebdaf :_adbc =append (_adbc ,_ceeeg );
};};if _feceb {_ge .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_cbecc ),_cbecc );_ge .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_adbc ),_adbc );
};_fcce :=(len (_cbecc )==2&&len (_adbc )==2)||(len (_cbecc )==2&&len (_adbc )==0&&_acdeg (_dgad [_cbecc [0]],_dgad [_cbecc [1]]))||(len (_adbc )==2&&len (_cbecc )==0&&_ddafc (_dgad [_adbc [0]],_dgad [_adbc [1]]));if _feceb {_ge .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_cbecc ),len (_adbc ),_fcce );
};if !_fcce {if _feceb {_ec .Log .Error ("\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v",_cdag );_ge .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_cbecc ),len (_adbc ),_fcce );
};return &ruling {},false ;};if len (_adbc )==0{for _aeadc ,_aagb :=range _fcga {if _aagb !=_ecac {_adbc =append (_adbc ,_aeadc );};};};if len (_cbecc )==0{for _ebbg ,_ffgd :=range _fcga {if _ffgd !=_ebdaf {_cbecc =append (_cbecc ,_ebbg );};};};if _feceb {_ec .Log .Info ("\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_cbecc ),len (_adbc ),len (_dgad ),_cbecc ,_adbc ,_dgad );
};var _fedd ,_acagb ,_aegcd ,_baee _bc .Point ;if _dgad [_cbecc [0]].Y > _dgad [_cbecc [1]].Y {_aegcd ,_baee =_dgad [_cbecc [0]],_dgad [_cbecc [1]];}else {_aegcd ,_baee =_dgad [_cbecc [1]],_dgad [_cbecc [0]];};if _dgad [_adbc [0]].X > _dgad [_adbc [1]].X {_fedd ,_acagb =_dgad [_adbc [0]],_dgad [_adbc [1]];
}else {_fedd ,_acagb =_dgad [_adbc [1]],_dgad [_adbc [0]];};_gefg :=_ce .PdfRectangle {Llx :_fedd .X ,Urx :_acagb .X ,Lly :_baee .Y ,Ury :_aegcd .Y };if _gefg .Llx > _gefg .Urx {_gefg .Llx ,_gefg .Urx =_gefg .Urx ,_gefg .Llx ;};if _gefg .Lly > _gefg .Ury {_gefg .Lly ,_gefg .Ury =_gefg .Ury ,_gefg .Lly ;
};_gdfd :=rectRuling {PdfRectangle :_gefg ,_dfec :_gcgf (_gefg ),Color :_bbaf };if _gdfd ._dfec ==_gdcf {if _feceb {_ec .Log .Error ("\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c");
};return nil ,false ;};_addd ,_ddgea :=_gdfd .asRuling ();if !_ddgea {if _feceb {_ec .Log .Error ("\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg");};return nil ,false ;};if _gfgc {_ge .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_addd .String ());
};return _addd ,true ;};func (_ecaab *textTable )get (_dcafe ,_dafcc int )*textPara {return _ecaab ._gcbga [_aaca (_dcafe ,_dafcc )];};func _eacf (_aebfc float64 )float64 {return _dgeec *_aa .Round (_aebfc /_dgeec )};func (_aaed paraList )writeText (_eafd _a .Writer ){for _fcgdd ,_bbfd :=range _aaed {if _bbfd ._bedf {continue ;
};_bbfd .writeText (_eafd );if _fcgdd !=len (_aaed )-1{if _eeacg (_bbfd ,_aaed [_fcgdd +1]){_eafd .Write ([]byte ("\u0020"));}else {_eafd .Write ([]byte ("\u000a"));_eafd .Write ([]byte ("\u000a"));};};};_eafd .Write ([]byte ("\u000a"));_eafd .Write ([]byte ("\u000a"));
};func _eecad (_cbbcf ,_eabef _bc .Point )rulingKind {_egfba :=_aa .Abs (_cbbcf .X -_eabef .X );_gbaae :=_aa .Abs (_cbbcf .Y -_eabef .Y );return _gbbf (_egfba ,_gbaae ,_cec );};func _gfbbf (_eaeb ,_begg _bc .Point ,_cgg _fg .Color )(*ruling ,bool ){_cbfgb :=lineRuling {_ddgfc :_eaeb ,_eacg :_begg ,_fbbag :_eecad (_eaeb ,_begg ),Color :_cgg };
if _cbfgb ._fbbag ==_gdcf {return nil ,false ;};return _cbfgb .asRuling ();};func (_cbae *wordBag )pullWord (_bfab *textWord ,_fceff int ,_dcbg map[int ]map[*textWord ]struct{}){_cbae .PdfRectangle =_cdggc (_cbae .PdfRectangle ,_bfab .PdfRectangle );if _bfab ._ddgee > _cbae ._egfa {_cbae ._egfa =_bfab ._ddgee ;
};_cbae ._faba [_fceff ]=append (_cbae ._faba [_fceff ],_bfab );_dcbg [_fceff ][_bfab ]=struct{}{};};func _dafd (_bbde *paraList )map[int ][]*textLine {_fdbg :=map[int ][]*textLine {};for _ ,_ceca :=range *_bbde {for _ ,_bba :=range _ceca ._bdbcg {if !_bcgc (_bba ){_ec .Log .Debug ("g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e");
continue ;};_bcefg :=_bba ._aebc [0]._bgeaa [0]._fefe ;_fdbg [_bcefg ]=append (_fdbg [_bcefg ],_bba );};if _ceca ._bddea !=nil {_cafg :=_ceca ._bddea ._gcbga ;for _ ,_gfdd :=range _cafg {for _ ,_cbfd :=range _gfdd ._bdbcg {if !_bcgc (_cbfd ){_ec .Log .Debug ("g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e");
continue ;};_gabc :=_cbfd ._aebc [0]._bgeaa [0]._fefe ;_fdbg [_gabc ]=append (_fdbg [_gabc ],_cbfd );};};};};return _fdbg ;};func (_fgcad *shapesState )establishSubpath ()*subpath {_cdgg ,_egbf :=_fgcad .lastpointEstablished ();if !_egbf {_fgcad ._abgb =append (_fgcad ._abgb ,_fae (_cdgg ));
};if len (_fgcad ._abgb )==0{return nil ;};_fgcad ._edee =false ;return _fgcad ._abgb [len (_fgcad ._abgb )-1];};func (_cafe lineRuling )xMean ()float64 {return 0.5*(_cafe ._ddgfc .X +_cafe ._eacg .X )};func (_egef rulingList )mergePrimary ()float64 {_aebae :=_egef [0]._abbgc ;
for _ ,_eagab :=range _egef [1:]{_aebae +=_eagab ._abbgc ;};return _aebae /float64 (len (_egef ));};type wordBag struct{_ce .PdfRectangle ;_egfa float64 ;_eeca ,_gbef rulingList ;_cfg float64 ;_faba map[int ][]*textWord ;};func (_faae *shapesState )devicePoint (_ceba ,_ecgc float64 )_bc .Point {_cac :=_faae ._dag .Mult (_faae ._aabgc );
_ceba ,_ecgc =_cac .Transform (_ceba ,_ecgc );return _bc .NewPoint (_ceba ,_ecgc );};func (_fegd intSet )has (_gdcff int )bool {_ ,_bedff :=_fegd [_gdcff ];return _bedff };func (_bbbc rulingList )augmentGrid ()(rulingList ,rulingList ){_fecfe ,_efccb :=_bbbc .vertsHorzs ();
if len (_fecfe )==0||len (_efccb )==0{return _fecfe ,_efccb ;};_bgea ,_gdddc :=_fecfe ,_efccb ;_dbfe :=_fecfe .bbox ();_abbf :=_efccb .bbox ();if _gfgc {_ec .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066",_dbfe );
_ec .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066",_abbf );};var _bdef ,_cgcg ,_ggafbf ,_fffb *ruling ;if _abbf .Llx < _dbfe .Llx -_ebbf {_bdef =&ruling {_bgaa :_cgacb ,_bfbc :_ebdaf ,_abbgc :_abbf .Llx ,_cebe :_dbfe .Lly ,_deee :_dbfe .Ury };
_fecfe =append (rulingList {_bdef },_fecfe ...);};if _abbf .Urx > _dbfe .Urx +_ebbf {_cgcg =&ruling {_bgaa :_cgacb ,_bfbc :_ebdaf ,_abbgc :_abbf .Urx ,_cebe :_dbfe .Lly ,_deee :_dbfe .Ury };_fecfe =append (_fecfe ,_cgcg );};if _dbfe .Lly < _abbf .Lly -_ebbf {_ggafbf =&ruling {_bgaa :_cgacb ,_bfbc :_ecac ,_abbgc :_dbfe .Lly ,_cebe :_abbf .Llx ,_deee :_abbf .Urx };
_efccb =append (rulingList {_ggafbf },_efccb ...);};if _dbfe .Ury > _abbf .Ury +_ebbf {_fffb =&ruling {_bgaa :_cgacb ,_bfbc :_ecac ,_abbgc :_dbfe .Ury ,_cebe :_abbf .Llx ,_deee :_abbf .Urx };_efccb =append (_efccb ,_fffb );};if len (_fecfe )+len (_efccb )==len (_bbbc ){return _bgea ,_gdddc ;
};_gfdag :=append (_fecfe ,_efccb ...);_bbbc .log ("u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064");_gfdag .log ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d");return _fecfe ,_efccb ;};const (_fdac =1.0e-6;_dgeec =1.0e-4;_fafa =10;_edbf =6;
_cdca =0.5;_edd =0.12;_dfac =0.19;_bagb =0.04;_fffgc =0.04;_cfebd =1.0;_bdgd =0.04;_ccab =0.4;_gfff =0.7;_gceeb =1.0;_baae =0.1;_ebfa =1.4;_gecdb =0.46;_ddag =0.02;_bada =0.2;_geb =0.5;_ggafb =4;_bafc =4.0;_ddd =6;_fbbd =0.3;_gccfg =0.01;_gcga =0.02;_cbbb =2;
_acgd =2;_fcae =500;_cec =4.0;_aace =4.0;_cffa =0.05;_edeeb =0.1;_ebbf =2.0;_gcef =2.0;_fdgb =1.5;_ffee =3.0;_cfea =0.25;);func (_bgcg *shapesState )newSubPath (){_bgcg .clearPath ();if _cbag {_ec .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_bgcg );
};};func _adcg (_adfee ,_fcbg *textPara )bool {return _fcd (_adfee ._ebcf ,_fcbg ._ebcf )};func (_fcfa gridTile )complete ()bool {return _fcfa .numBorders ()==4};type structElement struct{_bacdb string ;_cagb []structElement ;_efcc int64 ;_geaa _gb .PdfObject ;
};var (_gdf =_d .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072");_fe =_d .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072"););func _gbbf (_deafb ,_ccdd ,_cgddb float64 )rulingKind {if _deafb >=_cgddb &&_beeee (_ccdd ,_deafb ){return _ecac ;
};if _ccdd >=_cgddb &&_beeee (_deafb ,_ccdd ){return _ebdaf ;};return _gdcf ;};func (_bae *shapesState )stroke (_gceed *[]pathSection ){_decb :=pathSection {_fbfe :_bae ._abgb ,Color :_bae ._ecfc .getStrokeColor ()};*_gceed =append (*_gceed ,_decb );if _gfgc {_ge .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_gceed ),_bae ,_bae ._ecfc .getStrokeColor (),_decb .bbox ());
if _deaga {for _bca ,_edad :=range _bae ._abgb {_ge .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_bca ,_edad );if _bca ==10{break ;};};};};};
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};func _fgabc (_cbec float64 ,_cbfe int )int {if _cbfe ==0{_cbfe =1;};_afcc :=float64 (_cbfe );return int (_aa .Round (_cbec /_afcc )*_afcc );};
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// Extractor stores and offers functionality for extracting content from PDF pages.
type Extractor struct{_bcf string ;_ab *_ce .PdfPageResources ;_gda _ce .PdfRectangle ;_ed *_ce .PdfRectangle ;_cd map[string ]fontEntry ;_fa map[string ]textResult ;_bg int64 ;_eb int ;_fgb *Options ;_ba *_gb .PdfObject ;_eg _gb .PdfObject ;};
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct{_dec []TextMark };func _fafg (_gagb ,_aafg _ce .PdfRectangle )bool {return _gagb .Llx <=_aafg .Llx &&_aafg .Urx <=_gagb .Urx &&_gagb .Lly <=_aafg .Lly &&_aafg .Ury <=_gagb .Ury ;};func (_cdddg *textTable )emptyCompositeColumn (_bdgc int )bool {for _bddfb :=0;
_bddfb < _cdddg ._dcfg ;_bddfb ++{if _gfed ,_bdda :=_cdddg ._egfe [_aaca (_bdgc ,_bddfb )];_bdda {if len (_gfed .paraList )> 0{return false ;};};};return true ;};func _bdcdcf (_afbef map[float64 ]map[float64 ]gridTile )[]float64 {_edced :=make ([]float64 ,0,len (_afbef ));
for _eddc :=range _afbef {_edced =append (_edced ,_eddc );};_c .Float64s (_edced );_ebbfa :=len (_edced );for _afad :=0;_afad < _ebbfa /2;_afad ++{_edced [_afad ],_edced [_ebbfa -1-_afad ]=_edced [_ebbfa -1-_afad ],_edced [_afad ];};return _edced ;};func (_baggf *textTable )compositeColCorridors ()map[int ][]float64 {_fabf :=make (map[int ][]float64 ,_baggf ._ecbf );
if _afcg {_ec .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_baggf ._ecbf );};for _bccdf :=0;_bccdf < _baggf ._ecbf ;_bccdf ++{_fabf [_bccdf ]=nil ;
};return _fabf ;};func (_geaaf paraList )findGridTables (_bdbde []gridTiling )[]*textTable {if _afcg {_ec .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_geaaf ));
for _fccfg ,_cbafd :=range _geaaf {_ge .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fccfg ,_cbafd );};};var _egca []*textTable ;for _gefad ,_badag :=range _bdbde {_gcfee ,_afdba :=_geaaf .findTableGrid (_badag );if _gcfee !=nil {_gcfee .log (_ge .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_gefad ));
_egca =append (_egca ,_gcfee );_gcfee .markCells ();};for _cdfcd :=range _afdba {_cdfcd ._bfcd =true ;};};if _afcg {_ec .Log .Info ("\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s",len (_egca ));
};return _egca ;};
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// Elements returns the TextMarks in `ma`.
func (_ecb *TextMarkArray )Elements ()[]TextMark {return _ecb ._dec };func (_gecdc lineRuling )yMean ()float64 {return 0.5*(_gecdc ._ddgfc .Y +_gecdc ._eacg .Y )};func (_bggbe rulingList )snapToGroups ()rulingList {_gecce ,_ffgda :=_bggbe .vertsHorzs ();
if len (_gecce )> 0{_gecce =_gecce .snapToGroupsDirection ();};if len (_ffgda )> 0{_ffgda =_ffgda .snapToGroupsDirection ();};_gfeb :=append (_gecce ,_ffgda ...);_gfeb .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");return _gfeb ;
2023-09-07 17:40:17 +00:00
};
2023-12-17 13:54:01 +00:00
// String returns a description of `v`.
func (_faga *ruling )String ()string {if _faga ._bfbc ==_gdcf {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_cegd ,_bgdd :="\u0078","\u0079";if _faga ._bfbc ==_ecac {_cegd ,_bgdd ="\u0079","\u0078";};_dcga :="";if _faga ._bcgdf !=0.0{_dcga =_ge .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_faga ._bcgdf );
};return _ge .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_faga ._bfbc ,_cegd ,_faga ._abbgc ,_bgdd ,_faga ._cebe ,_faga ._deee ,_faga ._deee -_faga ._cebe ,_faga ._bgaa ,_faga .Color ,_dcga );
};type textPara struct{_ce .PdfRectangle ;_ebcf _ce .PdfRectangle ;_bdbcg []*textLine ;_bddea *textTable ;_bfcd bool ;_bedf bool ;_egab *textPara ;_gaca *textPara ;_abdda *textPara ;_fdgbd *textPara ;_efafd []list ;};const (RenderModeStroke RenderMode =1<<iota ;
RenderModeFill ;RenderModeClip ;);func (_aee *imageExtractContext )extractInlineImage (_cgae *_ag .ContentStreamInlineImage ,_gfe _ag .GraphicsState ,_fcg *_ce .PdfPageResources )error {_fcc ,_aef :=_cgae .ToImage (_fcg );if _aef !=nil {return _aef ;};
_ee ,_aef :=_cgae .GetColorSpace (_fcg );if _aef !=nil {return _aef ;};if _ee ==nil {_ee =_ce .NewPdfColorspaceDeviceGray ();};_cge ,_aef :=_ee .ImageToRGB (*_fcc );if _aef !=nil {return _aef ;};_dad :=ImageMark {Image :&_cge ,Width :_gfe .CTM .ScalingFactorX (),Height :_gfe .CTM .ScalingFactorY (),Angle :_gfe .CTM .Angle ()};
_dad .X ,_dad .Y =_gfe .CTM .Translation ();_aee ._gbf =append (_aee ._gbf ,_dad );_aee ._da ++;return nil ;};func _bfbfd (_ggeb map[int ][]float64 )string {_ffdda :=_fadg (_ggeb );_gbfbe :=make ([]string ,len (_ggeb ));for _edcc ,_fgffd :=range _ffdda {_gbfbe [_edcc ]=_ge .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_fgffd ,_ggeb [_fgffd ]);
};return _ge .Sprintf ("\u007b\u0025\u0073\u007d",_gd .Join (_gbfbe ,"\u002c\u0020"));};func _dgdc (_fcgfe _ce .PdfRectangle ,_bcfb bounded )float64 {return _fcgfe .Ury -_bcfb .bbox ().Lly };func (_adbb *textPara )toTextMarks (_bbeb *int )[]TextMark {if _adbb ._bddea ==nil {return _adbb .toCellTextMarks (_bbeb );
};var _ffdec []TextMark ;for _efceb :=0;_efceb < _adbb ._bddea ._dcfg ;_efceb ++{for _aefa :=0;_aefa < _adbb ._bddea ._ecbf ;_aefa ++{_gffc :=_adbb ._bddea .get (_aefa ,_efceb );if _gffc ==nil {_ffdec =_cacf (_ffdec ,_bbeb ,"\u0009");}else {_dcfe :=_gffc .toCellTextMarks (_bbeb );
_ffdec =append (_ffdec ,_dcfe ...);};_ffdec =_cacf (_ffdec ,_bbeb ,"\u0020");};if _efceb < _adbb ._bddea ._dcfg -1{_ffdec =_cacf (_ffdec ,_bbeb ,"\u000a");};};_bdcdc :=_adbb ._bddea ;if _bdcdc .isExportable (){_fcage :=_bdcdc .toTextTable ();_ffdec =_ddedf (_ffdec ,&_fcage );
};return _ffdec ;};func (_ebag *wordBag )depthBand (_bfef ,_efga float64 )[]int {if len (_ebag ._faba )==0{return nil ;};return _ebag .depthRange (_ebag .getDepthIdx (_bfef ),_ebag .getDepthIdx (_efga ));};
2023-08-03 17:30:04 +00:00
2023-12-17 13:54:01 +00:00
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
func (_dea *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_ceg :=&imageExtractContext {_cef :options };_aac :=_ceg .extractContentStreamImages (_dea ._bcf ,_dea ._ab );if _aac !=nil {return nil ,_aac ;};return &PageImages {Images :_ceg ._gbf },nil ;
};
2023-08-03 17:30:04 +00:00
2023-12-17 13:54:01 +00:00
// String returns a description of `k`.
func (_cgdbg rulingKind )String ()string {_gafc ,_efeb :=_baff [_cgdbg ];if !_efeb {return _ge .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_cgdbg );};return _gafc ;};func (_cfad paraList )tables ()[]TextTable {var _bfeaee []TextTable ;
if _afcg {_ec .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a");};for _ ,_acde :=range _cfad {_gfda :=_acde ._bddea ;if _gfda !=nil &&_gfda .isExportable (){_bfeaee =append (_bfeaee ,_gfda .toTextTable ());};
};return _bfeaee ;};func (_gad *textObject )moveText (_dfcf ,_dfcfa float64 ){_gad .moveLP (_dfcf ,_dfcfa )};type rulingKind int ;var _baff =map[rulingKind ]string {_gdcf :"\u006e\u006f\u006e\u0065",_ecac :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_ebdaf :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};
func (_ggfg rulingList )toGrids ()[]rulingList {if _gfgc {_ec .Log .Info ("t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073",_ggfg );};_gfagg :=_ggfg .intersections ();if _gfgc {_ec .Log .Info ("\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020",len (_ggfg ),len (_gfagg ));
for _ ,_afdg :=range _afaae (_gfagg ){_ge .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_afdg ,_gfagg [_afdg ]);};};_gada :=make (map[int ]intSet ,len (_ggfg ));for _geeae :=range _ggfg {_bcdfea :=_ggfg .connections (_gfagg ,_geeae );if len (_bcdfea )> 0{_gada [_geeae ]=_bcdfea ;
};};if _gfgc {_ec .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064",len (_gada ));for _ ,_gceef :=range _afaae (_gada ){_ge .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_gceef ,_gada [_gceef ]);
};};_faed :=_debaa (len (_ggfg ),func (_bffcf ,_gagd int )bool {_dcbfc ,_eccd :=len (_gada [_bffcf ]),len (_gada [_gagd ]);if _dcbfc !=_eccd {return _dcbfc > _eccd ;};return _ggfg .comp (_bffcf ,_gagd );});if _gfgc {_ec .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_faed );
};_ffcccf :=[][]int {{_faed [0]}};_afabg :for _ ,_gecc :=range _faed [1:]{for _edbg ,_baaf :=range _ffcccf {for _ ,_cfgf :=range _baaf {if _gada [_cfgf ].has (_gecc ){_ffcccf [_edbg ]=append (_baaf ,_gecc );continue _afabg ;};};};_ffcccf =append (_ffcccf ,[]int {_gecc });
};if _gfgc {_ec .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_ffcccf );};_c .SliceStable (_ffcccf ,func (_fdgd ,_edded int )bool {return len (_ffcccf [_fdgd ])> len (_ffcccf [_edded ])});for _ ,_bgab :=range _ffcccf {_c .Slice (_bgab ,func (_bacbg ,_abcf int )bool {return _ggfg .comp (_bgab [_bacbg ],_bgab [_abcf ])});
};_bade :=make ([]rulingList ,len (_ffcccf ));for _gagef ,_decba :=range _ffcccf {_bdfb :=make (rulingList ,len (_decba ));for _cfcc ,_fcdb :=range _decba {_bdfb [_cfcc ]=_ggfg [_fcdb ];};_bade [_gagef ]=_bdfb ;};if _gfgc {_ec .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076",_bade );
};var _aeafa []rulingList ;for _ ,_acdb :=range _bade {if _beee ,_edegf :=_acdb .isActualGrid ();_edegf {_acdb =_beee ;_acdb =_acdb .snapToGroups ();_aeafa =append (_aeafa ,_acdb );};};if _gfgc {_fcegc ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073",_aeafa );
_ec .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_bade ),len (_aeafa ));};return _aeafa ;};func _beeee (_fdgg ,_gbcbf float64 )bool {return _fdgg /_aa .Max (_edeeb ,_gbcbf )< _cffa };
func _ebebc (_egad *textLine ,_bdbbb []*textLine ,_agaf []float64 )float64 {var _gdba float64 =-1;for _ ,_eefbc :=range _bdbbb {if _eefbc ._bcdg > _egad ._bcdg {if _aa .Round (_eefbc .Llx )>=_aa .Round (_egad .Llx ){_gdba =_eefbc ._bcdg ;}else {break ;
};};};return _gdba ;};func _egee (_dcd *wordBag ,_edde int )*textLine {_cbbaa :=_dcd .firstWord (_edde );_cdfgf :=textLine {PdfRectangle :_cbbaa .PdfRectangle ,_ecag :_cbbaa ._ddgee ,_bcdg :_cbbaa ._cffdg };_cdfgf .pullWord (_dcd ,_cbbaa ,_edde );return &_cdfgf ;
};func (_bcdf paraList )topoOrder ()[]int {if _eaba {_ec .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");};_aacba :=len (_bcdf );_ebedb :=make ([]bool ,_aacba );_bfgcd :=make ([]int ,0,_aacba );_ceefe :=_bcdf .llyOrdering ();
var _ffbb func (_acge int );_ffbb =func (_cddd int ){_ebedb [_cddd ]=true ;for _ggge :=0;_ggge < _aacba ;_ggge ++{if !_ebedb [_ggge ]{if _bcdf .readBefore (_ceefe ,_cddd ,_ggge ){_ffbb (_ggge );};};};_bfgcd =append (_bfgcd ,_cddd );};for _eabd :=0;_eabd < _aacba ;
_eabd ++{if !_ebedb [_eabd ]{_ffbb (_eabd );};};return _ccba (_bfgcd );};func (_gage *textLine )endsInHyphen ()bool {_gddbf :=_gage ._aebc [len (_gage ._aebc )-1];_dbdc :=_gddbf ._bbdb ;_afd ,_beec :=_g .DecodeLastRuneInString (_dbdc );if _beec <=0||!_de .Is (_de .Hyphen ,_afd ){return false ;
};if _gddbf ._gfffc &&_fccf (_dbdc ){return true ;};return _fccf (_gage .text ());};func _fae (_bbdd _bc .Point )*subpath {return &subpath {_gfefe :[]_bc .Point {_bbdd }}};type intSet map[int ]struct{};func _acdeg (_bggb ,_caeg _bc .Point )bool {_gceaf :=_aa .Abs (_bggb .X -_caeg .X );
_agdab :=_aa .Abs (_bggb .Y -_caeg .Y );return _beeee (_agdab ,_gceaf );};func _beecb (_aggd *list )[]*list {var _cgea []*list ;for _ ,_fadc :=range _aggd ._gbab {switch _fadc ._ffcg {case "\u004c\u0049":_daga :=_fdecc (_fadc );_dfae :=_beecb (_fadc );
_eddg :=_baba (_daga ,"\u0062\u0075\u006c\u006c\u0065\u0074",_dfae );_ffge :=_efaf (_daga ,"");_eddg ._dage =_ffge ;_cgea =append (_cgea ,_eddg );case "\u004c\u0042\u006fd\u0079":return _beecb (_fadc );case "\u004c":_ecda :=_beecb (_fadc );_cgea =append (_cgea ,_ecda ...);
return _cgea ;};};return _cgea ;};func _dcbf (_ddfg *textLine ,_gfee []*textLine ,_agde []float64 ,_dagdd ,_cfcb float64 )[]*textLine {_fcda :=[]*textLine {};for _ ,_ffde :=range _gfee {if _ffde ._bcdg >=_dagdd {if _cfcb !=-1&&_ffde ._bcdg < _cfcb {if _ffde .text ()!=_ddfg .text (){if _aa .Round (_ffde .Llx )< _aa .Round (_ddfg .Llx ){break ;
};_fcda =append (_fcda ,_ffde );};}else if _cfcb ==-1{if _ffde ._bcdg ==_ddfg ._bcdg {if _ffde .text ()!=_ddfg .text (){_fcda =append (_fcda ,_ffde );};continue ;};_gddg :=_ebebc (_ddfg ,_gfee ,_agde );if _gddg !=-1&&_ffde ._bcdg <=_gddg {_fcda =append (_fcda ,_ffde );
};};};};return _fcda ;};func (_gaec *stateStack )empty ()bool {return len (*_gaec )==0};func _agce (_gbfb ,_cgeea bounded )float64 {return _deage (_gbfb )-_deage (_cgeea )};type textResult struct{_ceee PageText ;_agcc int ;_dge int ;};
2023-05-29 17:26:33 +00:00
2023-12-17 13:54:01 +00:00
// TableInfo gets table information of the textmark `tm`.
func (_bafd *TextMark )TableInfo ()(*TextTable ,[][]int ){if !_bafd ._ebd {return nil ,nil ;};_cfbd :=_bafd ._afe ;_bafa :=_cfbd .getCellInfo (*_bafd );return _cfbd ,_bafa ;};func _daeg (_dcabd int ,_aeef map[int ][]float64 )([]int ,int ){_dcgce :=make ([]int ,_dcabd );
_fdcg :=0;for _cdcdd :=0;_cdcdd < _dcabd ;_cdcdd ++{_dcgce [_cdcdd ]=_fdcg ;_fdcg +=len (_aeef [_cdcdd ])+1;};return _dcgce ,_fdcg ;};var _cfgd string ="\u0028\u003f\u0069\u0029\u005e\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028?\u003a\u0044\u007cM\u0029\u007c\u0044\u003f\u0043{\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028\u003f\u003a\u004c\u007c\u0043\u0029\u007cL\u003f\u0058\u007b\u0030\u002c\u0033}\u0029\u0028\u0049\u0028\u003f\u003a\u0056\u007c\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u005c\u0029\u007c\u005c\u002e\u0029\u007c\u005e\u005c\u0028\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028\u003f\u003aD\u007cM\u0029\u007c\u0044\u003f\u0043\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028?\u003a\u004c\u007c\u0043\u0029\u007c\u004c?\u0058\u007b0\u002c\u0033\u007d\u0029(\u0049\u0028\u003f\u003a\u0056|\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u005c\u0029";
2023-02-07 17:17:49 +00:00
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// Font represents the font properties on a PDF page.
type Font struct{PdfFont *_ce .PdfFont ;
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// FontName represents Font Name from font properties.
FontName string ;
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// FontType represents Font Subtype entry in the font dictionary inside page resources.
// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont.
FontType string ;
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// ToUnicode is true if font provides a `ToUnicode` mapping.
ToUnicode bool ;
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// IsCID is true if underlying font is a composite font.
// Composite font is represented by a font dictionary whose Subtype is `Type0`
IsCID bool ;
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// IsSimple is true if font is simple font.
// A simple font is limited to only 8 bit (255) character codes.
IsSimple bool ;
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// FontData represents the raw data of the embedded font file.
// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF).
// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor.
// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value.
FontData []byte ;
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// FontFileName is a name representing the font. it has format:
// (Font Name) + (Font Type Extension), example: helvetica.ttf.
FontFileName string ;
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor).
FontDescriptor *_ce .PdfFontDescriptor ;};func (_fdeef *textPara )writeCellText (_cbfec _a .Writer ){for _abff ,_defb :=range _fdeef ._bdbcg {_efgg :=_defb .text ();_gaadf :=_dcgd &&_defb .endsInHyphen ()&&_abff !=len (_fdeef ._bdbcg )-1;if _gaadf {_efgg =_edcg (_efgg );
};_cbfec .Write ([]byte (_efgg ));if !(_gaadf ||_abff ==len (_fdeef ._bdbcg )-1){_cbfec .Write ([]byte (_dcgbg (_defb ._bcdg ,_fdeef ._bdbcg [_abff +1]._bcdg )));};};};func (_gefgf *textTable )toTextTable ()TextTable {if _afcg {_ec .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_gefgf ._ecbf ,_gefgf ._dcfg );
};_bgga :=make ([][]TableCell ,_gefgf ._dcfg );for _cdbe :=0;_cdbe < _gefgf ._dcfg ;_cdbe ++{_bgga [_cdbe ]=make ([]TableCell ,_gefgf ._ecbf );for _gcbe :=0;_gcbe < _gefgf ._ecbf ;_gcbe ++{_egdc :=_gefgf .get (_gcbe ,_cdbe );if _egdc ==nil {continue ;};
if _afcg {_ge .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_gcbe ,_cdbe ,_egdc );};_bgga [_cdbe ][_gcbe ].Text =_egdc .text ();_adbcf :=0;_bgga [_cdbe ][_gcbe ].Marks ._dec =_egdc .toTextMarks (&_adbcf );};};_gdaaf :=TextTable {W :_gefgf ._ecbf ,H :_gefgf ._dcfg ,Cells :_bgga };
_gdaaf .PdfRectangle =_gefgf .bbox ();return _gdaaf ;};var _fdgf =map[markKind ]string {_gbffb :"\u0073\u0074\u0072\u006f\u006b\u0065",_eecbc :"\u0066\u0069\u006c\u006c",_cgacb :"\u0061u\u0067\u006d\u0065\u006e\u0074"};func (_bfc *imageExtractContext )extractContentStreamImages (_ae string ,_fgf *_ce .PdfPageResources )error {_dfca :=_ag .NewContentStreamParser (_ae );
_cea ,_ga :=_dfca .Parse ();if _ga !=nil {return _ga ;};if _bfc ._aabe ==nil {_bfc ._aabe =map[*_gb .PdfObjectStream ]*cachedImage {};};if _bfc ._cef ==nil {_bfc ._cef =&ImageExtractOptions {};};_dbe :=_ag .NewContentStreamProcessor (*_cea );_dbe .AddHandler (_ag .HandlerConditionEnumAllOperands ,"",_bfc .processOperand );
return _dbe .Process (_fgf );};func _cbbae (_ffda []*textLine ,_cagca map[float64 ][]*textLine )[]*list {_acf :=_bcfe (_cagca );_ccgd :=[]*list {};if len (_acf )==0{return _ccgd ;};_facd :=_acf [0];_eebg :=1;_eeda :=_cagca [_facd ];for _gaad ,_dbgd :=range _eeda {var _bdcd float64 ;
_bafaa :=[]*list {};_caacf :=_dbgd ._bcdg ;_gcdd :=-1.0;if _gaad < len (_eeda )-1{_gcdd =_eeda [_gaad +1]._bcdg ;};if _eebg < len (_acf ){_bafaa =_deca (_ffda ,_cagca ,_acf ,_eebg ,_caacf ,_gcdd );};_bdcd =_gcdd ;if len (_bafaa )> 0{_eaff :=_bafaa [0];
if len (_eaff ._ggdb )> 0{_bdcd =_eaff ._ggdb [0]._bcdg ;};};_cbbe :=[]*textLine {_dbgd };_cadg :=_dcbf (_dbgd ,_ffda ,_acf ,_caacf ,_bdcd );_cbbe =append (_cbbe ,_cadg ...);_aeba :=_baba (_cbbe ,"\u0062\u0075\u006c\u006c\u0065\u0074",_bafaa );_aeba ._dage =_efaf (_cbbe ,"");
_ccgd =append (_ccgd ,_aeba );};return _ccgd ;};func (_geff *ruling )intersects (_dbfa *ruling )bool {_acbf :=(_geff ._bfbc ==_ebdaf &&_dbfa ._bfbc ==_ecac )||(_dbfa ._bfbc ==_ebdaf &&_geff ._bfbc ==_ecac );_dbce :=func (_fdgab ,_aceg *ruling )bool {return _fdgab ._cebe -_ebbf <=_aceg ._abbgc &&_aceg ._abbgc <=_fdgab ._deee +_ebbf ;
};_cfge :=_dbce (_geff ,_dbfa );_ggdd :=_dbce (_dbfa ,_geff );if _gfgc {_ge .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_acbf ,_cfge ,_ggdd ,_acbf &&_cfge &&_ggdd ,_geff ,_dbfa );
};return _acbf &&_cfge &&_ggdd ;};func (_bcfa *textObject )setTextRise (_dfga float64 ){if _bcfa ==nil {return ;};_bcfa ._befa ._gegg =_dfga ;};func (_baa *textObject )setFont (_dedfd string ,_gcbc float64 )error {if _baa ==nil {return nil ;};_baa ._befa ._ccf =_gcbc ;
_dffb ,_ggea :=_baa .getFont (_dedfd );if _ggea !=nil {return _ggea ;};_baa ._befa ._badfc =_dffb ;return nil ;};func (_bdec rulingList )intersections ()map[int ]intSet {var _eebc ,_bcab []int ;for _fabga ,_aebab :=range _bdec {switch _aebab ._bfbc {case _ebdaf :_eebc =append (_eebc ,_fabga );
case _ecac :_bcab =append (_bcab ,_fabga );};};if len (_eebc )< _cbbb +1||len (_bcab )< _acgd +1{return nil ;};if len (_eebc )+len (_bcab )> _fcae {_ec .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_bdec ),len (_eebc ),len (_bcab ));
return nil ;};_geeb :=make (map[int ]intSet ,len (_eebc )+len (_bcab ));for _ ,_dbgg :=range _eebc {for _ ,_agdd :=range _bcab {if _bdec [_dbgg ].intersects (_bdec [_agdd ]){if _ ,_baac :=_geeb [_dbgg ];!_baac {_geeb [_dbgg ]=make (intSet );};if _ ,_fage :=_geeb [_agdd ];
!_fage {_geeb [_agdd ]=make (intSet );};_geeb [_dbgg ].add (_agdd );_geeb [_agdd ].add (_dbgg );};};};return _geeb ;};func (_dbff rulingList )aligned ()bool {if len (_dbff )< 2{return false ;};_edce :=make (map[*ruling ]int );_edce [_dbff [0]]=0;for _ ,_dddgg :=range _dbff [1:]{_fggg :=false ;
for _bfffe :=range _edce {if _dddgg .gridIntersecting (_bfffe ){_edce [_bfffe ]++;_fggg =true ;break ;};};if !_fggg {_edce [_dddgg ]=0;};};_bgbf :=0;for _ ,_aabb :=range _edce {if _aabb ==0{_bgbf ++;};};_gafdc :=float64 (_bgbf )/float64 (len (_dbff ));
_cbacg :=_gafdc <=1.0-_cfea ;if _gfgc {_ec .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_cbacg ,_gafdc ,_bgbf ,len (_dbff ),_dbff .String ());
};return _cbacg ;};func (_aafe rulingList )connections (_abfa map[int ]intSet ,_bacg int )intSet {_dbee :=make (intSet );_fdeca :=make (intSet );var _agdb func (int );_agdb =func (_fdaf int ){if !_fdeca .has (_fdaf ){_fdeca .add (_fdaf );for _eeabg :=range _aafe {if _abfa [_eeabg ].has (_fdaf ){_dbee .add (_eeabg );
};};for _bege :=range _aafe {if _dbee .has (_bege ){_agdb (_bege );};};};};_agdb (_bacg );return _dbee ;};func _fabgb (_cfbb ,_dcac bounded )float64 {_gfeg :=_fabg (_cfbb ,_dcac );if !_ffegg (_gfeg ){return _gfeg ;};return _agce (_cfbb ,_dcac );};func (_dccd *shapesState )lineTo (_cbc ,_faac float64 ){if _cbag {_ec .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_cbc ,_faac ,_dccd .devicePoint (_cbc ,_faac ));
};_dccd .addPoint (_cbc ,_faac );};func (_dgdg *subpath )close (){if !_dcbd (_dgdg ._gfefe [0],_dgdg .last ()){_dgdg .add (_dgdg ._gfefe [0]);};_dgdg ._cgde =true ;_dgdg .removeDuplicates ();};
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// ExtractFonts returns all font information from the page extractor, including
// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more.
//
// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document.
// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries.
//
// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range.
func (_cg *Extractor )ExtractFonts (previousPageFonts *PageFonts )(*PageFonts ,error ){_fb :=PageFonts {};_gcg :=_fb .extractPageResourcesToFont (_cg ._ab );if _gcg !=nil {return nil ,_gcg ;};if previousPageFonts !=nil {for _ ,_cbb :=range previousPageFonts .Fonts {if !_efg (_fb .Fonts ,_cbb .FontName ){_fb .Fonts =append (_fb .Fonts ,_cbb );
};};};return &PageFonts {Fonts :_fb .Fonts },nil ;};func (_ggag *shapesState )clearPath (){_ggag ._abgb =nil ;_ggag ._edee =false ;if _cbag {_ec .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_ggag );};};func _bdbf (_cgca []*textLine ,_bbgd ,_ebab float64 )[]*textLine {var _cdga []*textLine ;
for _ ,_ddfcf :=range _cgca {if _bbgd ==-1{if _ddfcf ._bcdg > _ebab {_cdga =append (_cdga ,_ddfcf );};}else {if _ddfcf ._bcdg > _ebab &&_ddfcf ._bcdg < _bbgd {_cdga =append (_cdga ,_ddfcf );};};};return _cdga ;};
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// String returns a human readable description of `path`.
func (_ccfa *subpath )String ()string {_dagd :=_ccfa ._gfefe ;_cged :=len (_dagd );if _cged <=5{return _ge .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_cged ,_dagd );};return _ge .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_cged ,_dagd [0],_dagd [1],_dagd [_cged -1]);
};func (_egccc *ruling )encloses (_cbaf ,_gefeb float64 )bool {return _egccc ._cebe -_ebbf <=_cbaf &&_gefeb <=_egccc ._deee +_ebbf ;};func (_eecf paraList )findTables (_bgfa []gridTiling )[]*textTable {_eecf .addNeighbours ();_c .Slice (_eecf ,func (_adbee ,_acda int )bool {return _fabgb (_eecf [_adbee ],_eecf [_acda ])< 0});
var _fedga []*textTable ;if _dacb {_abffb :=_eecf .findGridTables (_bgfa );_fedga =append (_fedga ,_abffb ...);};if _eccc {_cbfdf :=_eecf .findTextTables ();_fedga =append (_fedga ,_cbfdf ...);};return _fedga ;};func (_ggceb *textObject )getCurrentFont ()*_ce .PdfFont {_ccea :=_ggceb ._befa ._badfc ;
if _ccea ==nil {_ec .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e");return _ce .DefaultFont ();
};return _ccea ;};func (_ababb *textPara )toCellTextMarks (_baaeb *int )[]TextMark {var _bcga []TextMark ;for _fcgfag ,_cgcff :=range _ababb ._bdbcg {_bfff :=_cgcff .toTextMarks (_baaeb );_debac :=_dcgd &&_cgcff .endsInHyphen ()&&_fcgfag !=len (_ababb ._bdbcg )-1;
if _debac {_bfff =_gaagg (_bfff ,_baaeb );};_bcga =append (_bcga ,_bfff ...);if !(_debac ||_fcgfag ==len (_ababb ._bdbcg )-1){_bcga =_cacf (_bcga ,_baaeb ,_dcgbg (_cgcff ._bcdg ,_ababb ._bdbcg [_fcgfag +1]._bcdg ));};};return _bcga ;};const (_fad =false ;
_cfgg =false ;_acab =false ;_fgba =false ;_cbag =false ;_dggee =false ;_becc =false ;_eaba =false ;_egd =false ;_eeg =_egd &&true ;_aacb =_eeg &&false ;_fbfc =_egd &&true ;_afcg =false ;_dcedc =_afcg &&false ;_fefa =_afcg &&true ;_gfgc =false ;_deaga =_gfgc &&false ;
_fcfe =_gfgc &&false ;_bea =_gfgc &&true ;_feceb =_gfgc &&false ;_aecc =_gfgc &&false ;);func _geedc (_aeda _ce .PdfRectangle ,_baed ,_deaab ,_afdbf ,_gdaac *ruling )gridTile {_bfgb :=_aeda .Llx ;_dfdgd :=_aeda .Urx ;_defe :=_aeda .Lly ;_facca :=_aeda .Ury ;
return gridTile {PdfRectangle :_aeda ,_ebdg :_baed !=nil &&_baed .encloses (_defe ,_facca ),_ebga :_deaab !=nil &&_deaab .encloses (_defe ,_facca ),_ddbaf :_afdbf !=nil &&_afdbf .encloses (_bfgb ,_dfdgd ),_fgde :_gdaac !=nil &&_gdaac .encloses (_bfgb ,_dfdgd )};
};func (_feca *ruling )gridIntersecting (_acfc *ruling )bool {return _gecae (_feca ._cebe ,_acfc ._cebe )&&_gecae (_feca ._deee ,_acfc ._deee );};func (_bagd gridTile )numBorders ()int {_egecc :=0;if _bagd ._ebdg {_egecc ++;};if _bagd ._ebga {_egecc ++;
};if _bagd ._ddbaf {_egecc ++;};if _bagd ._fgde {_egecc ++;};return _egecc ;};const (_dcgd =true ;_cafd =true ;_efbf =true ;_dbde =false ;_gaee =false ;_gafd =6;_aedg =3.0;_abgc =200;_dacb =true ;_eccc =true ;_gfag =true ;_dbbc =true ;_gdcg =false ;);func _cedg (_gbfd *wordBag ,_deagd *textWord ,_dbbb float64 )bool {return _gbfd .Urx <=_deagd .Llx &&_deagd .Llx < _gbfd .Urx +_dbbb ;
};func (_ccd *textObject )setHorizScaling (_ecd float64 ){if _ccd ==nil {return ;};_ccd ._befa ._gdc =_ecd ;};func _deage (_bfbb bounded )float64 {return -_bfbb .bbox ().Lly };func _bcfe (_fbba map[float64 ][]*textLine )[]float64 {_ecde :=[]float64 {};
for _aaaa :=range _fbba {_ecde =append (_ecde ,_aaaa );};_c .Float64s (_ecde );return _ecde ;};func (_babgb *textObject )newTextMark (_fagg string ,_aeea _bc .Matrix ,_edae _bc .Point ,_bfdfg float64 ,_dbbf *_ce .PdfFont ,_cccb float64 ,_dbbce ,_becca _fg .Color ,_cffd _gb .PdfObject ,_cefgg []string ,_fbae int ,_gcfe int )(textMark ,bool ){_gaeec :=_aeea .Angle ();
_ceefg :=_fgabc (_gaeec ,_fafa );var _caefe float64 ;if _ceefg %180!=90{_caefe =_aeea .ScalingFactorY ();}else {_caefe =_aeea .ScalingFactorX ();};_gbdaa :=_afb (_aeea );_fcdd :=_ce .PdfRectangle {Llx :_gbdaa .X ,Lly :_gbdaa .Y ,Urx :_edae .X ,Ury :_edae .Y };
switch _ceefg %360{case 90:_fcdd .Urx -=_caefe ;case 180:_fcdd .Ury -=_caefe ;case 270:_fcdd .Urx +=_caefe ;case 0:_fcdd .Ury +=_caefe ;default:_ceefg =0;_fcdd .Ury +=_caefe ;};if _fcdd .Llx > _fcdd .Urx {_fcdd .Llx ,_fcdd .Urx =_fcdd .Urx ,_fcdd .Llx ;
};if _fcdd .Lly > _fcdd .Ury {_fcdd .Lly ,_fcdd .Ury =_fcdd .Ury ,_fcdd .Lly ;};_adbec :=true ;if _babgb ._bgcb ._gda .Width ()> 0{_facc ,_gbfe :=_cad (_fcdd ,_babgb ._bgcb ._gda );if !_gbfe {_adbec =false ;_ec .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_fcdd ,_babgb ._bgcb ._gda ,_fagg );
};_fcdd =_facc ;};_fffda :=_fcdd ;_aacf :=_babgb ._bgcb ._gda ;switch _ceefg %360{case 90:_aacf .Urx ,_aacf .Ury =_aacf .Ury ,_aacf .Urx ;_fffda =_ce .PdfRectangle {Llx :_aacf .Urx -_fcdd .Ury ,Urx :_aacf .Urx -_fcdd .Lly ,Lly :_fcdd .Llx ,Ury :_fcdd .Urx };
case 180:_fffda =_ce .PdfRectangle {Llx :_aacf .Urx -_fcdd .Llx ,Urx :_aacf .Urx -_fcdd .Urx ,Lly :_aacf .Ury -_fcdd .Lly ,Ury :_aacf .Ury -_fcdd .Ury };case 270:_aacf .Urx ,_aacf .Ury =_aacf .Ury ,_aacf .Urx ;_fffda =_ce .PdfRectangle {Llx :_fcdd .Ury ,Urx :_fcdd .Lly ,Lly :_aacf .Ury -_fcdd .Llx ,Ury :_aacf .Ury -_fcdd .Urx };
};if _fffda .Llx > _fffda .Urx {_fffda .Llx ,_fffda .Urx =_fffda .Urx ,_fffda .Llx ;};if _fffda .Lly > _fffda .Ury {_fffda .Lly ,_fffda .Ury =_fffda .Ury ,_fffda .Lly ;};_ecdb :=textMark {_ecaa :_fagg ,PdfRectangle :_fffda ,_aefef :_fcdd ,_bcdb :_dbbf ,_bfaca :_caefe ,_cgdb :_cccb ,_dadd :_aeea ,_feea :_edae ,_edge :_ceefg ,_abgd :_dbbce ,_eedd :_becca ,_bfade :_cffd ,_fcdc :_cefgg ,Th :_babgb ._befa ._gdc ,Tw :_babgb ._befa ._eag ,_fefe :_gcfe ,_beaa :_fbae };
if _cfgg {_ec .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_gbdaa ,_edae ,_ecdb .String ());};return _ecdb ,_adbec ;
2023-10-07 13:58:01 +00:00
};
2022-07-13 21:28:43 +00:00
2023-12-17 13:54:01 +00:00
// String returns a string describing the current state of the textState stack.
func (_bcbg *stateStack )String ()string {_ceb :=[]string {_ge .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_bcbg ))};for _gdda ,_ggca :=range *_bcbg {_dbcf :="\u003c\u006e\u0069l\u003e";
if _ggca !=nil {_dbcf =_ggca .String ();};_ceb =append (_ceb ,_ge .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_gdda ,_dbcf ));};return _gd .Join (_ceb ,"\u000a");};func (_ffb *textObject )setTextLeading (_dbdd float64 ){if _ffb ==nil {return ;
};_ffb ._befa ._dcc =_dbdd ;};func (_beeb paraList )findTableGrid (_bafeb gridTiling )(*textTable ,map[*textPara ]struct{}){_dcfgf :=len (_bafeb ._cgecb );_agcf :=len (_bafeb ._agbb );_feeee :=textTable {_beaeg :true ,_ecbf :_dcfgf ,_dcfg :_agcf ,_gcbga :make (map[uint64 ]*textPara ,_dcfgf *_agcf ),_egfe :make (map[uint64 ]compositeCell ,_dcfgf *_agcf )};
_feeee .PdfRectangle =_bafeb .PdfRectangle ;_aggdd :=make (map[*textPara ]struct{});_dfda :=int ((1.0-_fbbd )*float64 (_dcfgf *_agcf ));_cafc :=0;if _bea {_ec .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_dcfgf ,_agcf );
};for _fadgf ,_acaa :=range _bafeb ._agbb {_gfcgc ,_ddaag :=_bafeb ._bage [_acaa ];if !_ddaag {continue ;};for _efgf ,_aaedd :=range _bafeb ._cgecb {_dfbg ,_fcab :=_gfcgc [_aaedd ];if !_fcab {continue ;};_fgdg :=_beeb .inTile (_dfbg );if len (_fgdg )==0{_cafc ++;
if _cafc > _dfda {if _bea {_ec .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_cafc );};return nil ,nil ;};}else {_feeee .putComposite (_efgf ,_fadgf ,_fgdg ,_dfbg .PdfRectangle );for _ ,_cgeg :=range _fgdg {_aggdd [_cgeg ]=struct{}{};
};};};};_ddgb :=0;for _bbba :=0;_bbba < _dcfgf ;_bbba ++{_fgfd :=_feeee .get (_bbba ,0);if _fgfd ==nil ||!_fgfd ._bedf {_ddgb ++;};};if _ddgb ==0{if _bea {_ec .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};return nil ,nil ;
};_dfdd :=_feeee .reduceTiling (_bafeb ,_ffee );_dfdd =_dfdd .subdivide ();return _dfdd ,_aggdd ;};func (_faggb *textTable )isExportable ()bool {if _faggb ._beaeg {return true ;};_aefbe :=func (_eebf int )bool {_ggcbc :=_faggb .get (0,_eebf );if _ggcbc ==nil {return false ;
};_gddf :=_ggcbc .text ();_bgdcb :=_g .RuneCountInString (_gddf );_eeaea :=_eeggd .MatchString (_gddf );return _bgdcb <=1||_eeaea ;};for _ecbag :=0;_ecbag < _faggb ._dcfg ;_ecbag ++{if !_aefbe (_ecbag ){return true ;};};return false ;};func _fece (_fbff float64 )int {var _eedg int ;
if _fbff >=0{_eedg =int (_fbff /_edbf );}else {_eedg =int (_fbff /_edbf )-1;};return _eedg ;};func _fadg (_eaffb map[int ][]float64 )[]int {_ebbgc :=make ([]int ,len (_eaffb ));_cbfda :=0;for _ebdc :=range _eaffb {_ebbgc [_cbfda ]=_ebdc ;_cbfda ++;};_c .Ints (_ebbgc );
return _ebbgc ;};type rectRuling struct{_dfec rulingKind ;_effbc markKind ;_fg .Color ;_ce .PdfRectangle ;};
// String returns a human readable description of `vecs`.
func (_dfcg rulingList )String ()string {if len (_dfcg )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_agefa ,_afbc :=_dfcg .vertsHorzs ();_cgdg :=len (_agefa );_acdea :=len (_afbc );if _cgdg ==0||_acdea ==0{return _ge .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_cgdg ,_acdea );
};_degdb :=_ce .PdfRectangle {Llx :_agefa [0]._abbgc ,Urx :_agefa [_cgdg -1]._abbgc ,Lly :_afbc [_acdea -1]._abbgc ,Ury :_afbc [0]._abbgc };return _ge .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_cgdg ,_acdea ,_degdb );
};func (_gac *textObject )setCharSpacing (_cdfg float64 ){if _gac ==nil {return ;};_gac ._befa ._ggf =_cdfg ;if _dggee {_ec .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_cdfg ,_gac ._befa .String ());
};};func (_bfad *textLine )toTextMarks (_ccfg *int )[]TextMark {var _dcab []TextMark ;for _ ,_bfddb :=range _bfad ._aebc {if _bfddb ._gfffc {_dcab =_cacf (_dcab ,_ccfg ,"\u0020");};_aefe :=_bfddb .toTextMarks (_ccfg );_dcab =append (_dcab ,_aefe ...);};
return _dcab ;};
// String returns a string descibing `i`.
func (_dcea gridTile )String ()string {_ffeb :=func (_eaga bool ,_dbfff string )string {if _eaga {return _dbfff ;};return "\u005f";};return _ge .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_dcea .PdfRectangle ,_ffeb (_dcea ._ebdg ,"\u004c"),_ffeb (_dcea ._ebga ,"\u0052"),_ffeb (_dcea ._ddbaf ,"\u0042"),_ffeb (_dcea ._fgde ,"\u0054"));
};func _fdecc (_fdeg *list )[]*textLine {for _ ,_fbge :=range _fdeg ._gbab {switch _fbge ._ffcg {case "\u004c\u0042\u006fd\u0079":if len (_fbge ._ggdb )!=0{return _fbge ._ggdb ;};return _fdecc (_fbge );case "\u0053\u0070\u0061\u006e":return _fbge ._ggdb ;
case "I\u006e\u006c\u0069\u006e\u0065\u0053\u0068\u0061\u0070\u0065":return _fbge ._ggdb ;};};return nil ;};func _gcgf (_fdba _ce .PdfRectangle )rulingKind {_bcffb :=_fdba .Width ();_ddedfg :=_fdba .Height ();if _bcffb > _ddedfg {if _bcffb >=_cec {return _ecac ;
};}else {if _ddedfg >=_cec {return _ebdaf ;};};return _gdcf ;};func (_cbgb *textObject )renderText (_egg _gb .PdfObject ,_bed []byte ,_cdfd int )error {if _cbgb ._efbb {_ec .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");
return nil ;};_febf :=_cbgb .getCurrentFont ();_gbff :=_febf .BytesToCharcodes (_bed );_deeb ,_ccbf ,_ffbd :=_febf .CharcodesToStrings (_gbff );if _ffbd > 0{_ec .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_ccbf ,_ffbd );
};_cbgb ._befa ._baf +=_ccbf ;_cbgb ._befa ._afgb +=_ffbd ;_fcgd :=_cbgb ._befa ;_acgc :=_fcgd ._ccf ;_ecf :=_fcgd ._gdc /100.0;_cda :=_fgc ;if _febf .Subtype ()=="\u0054\u0079\u0070e\u0033"{_cda =1;};_becb ,_bedg :=_febf .GetRuneMetrics (' ');if !_bedg {_becb ,_bedg =_febf .GetCharMetrics (32);
};if !_bedg {_becb ,_ =_ce .DefaultFont ().GetRuneMetrics (' ');};_gffg :=_becb .Wx *_cda ;_ec .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_gffg ,_deeb ,_febf ,_acgc );
_eecb :=_bc .NewMatrix (_acgc *_ecf ,0,0,_acgc ,0,_fcgd ._gegg );if _dggee {_ec .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_gbff ),_gbff ,_deeb );
};_ec .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_gbff ),_gbff ,len (_deeb ));_egc :=_cbgb .getFillColor ();
_ddfb :=_cbgb .getStrokeColor ();for _fbb ,_aafa :=range _deeb {_ffe :=[]rune (_aafa );if len (_ffe )==1&&_ffe [0]=='\x00'{continue ;};_eed :=_gbff [_fbb ];_ffea :=_cbgb ._gbe .CTM .Mult (_cbgb ._eee ).Mult (_eecb );_efde :=0.0;if len (_ffe )==1&&_ffe [0]==32{_efde =_fcgd ._eag ;
};_ebe ,_fdc :=_febf .GetCharMetrics (_eed );if !_fdc {_ec .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_eed ,_ffe ,_ffe ,_febf );
return _ge .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_febf .String (),_eed );};_cdg :=_bc .Point {X :_ebe .Wx *_cda ,Y :_ebe .Wy *_cda };
_fbbf :=_bc .Point {X :(_cdg .X *_acgc +_efde )*_ecf };_cdcd :=_bc .Point {X :(_cdg .X *_acgc +_fcgd ._ggf +_efde )*_ecf };if _dggee {_ec .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_acgc ,_fcgd ._ggf ,_fcgd ._eag ,_ecf );
_ec .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_cdg ,_fbbf ,_cdcd );};_ffd :=_dgfb (_fbbf );_abfb :=_dgfb (_cdcd );_babb :=_cbgb ._gbe .CTM .Mult (_cbgb ._eee ).Mult (_ffd );
if _fgba {_ec .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_cbgb ._gbe .CTM ,_cbgb ._eee ,_abfb ,_afb (_cbgb ._gbe .CTM .Mult (_cbgb ._eee ).Mult (_abfb )),_ffd ,_babb ,_afb (_babb ));
};_efc ,_ffbe :=_cbgb .newTextMark (_fd .ExpandLigatures (_ffe ),_ffea ,_afb (_babb ),_aa .Abs (_gffg *_ffea .ScalingFactorX ()),_febf ,_cbgb ._befa ._ggf ,_egc ,_ddfb ,_egg ,_deeb ,_fbb ,_cdfd );if !_ffbe {_ec .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067");
continue ;};if _febf ==nil {_ec .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _febf .Encoder ()==nil {_ec .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_febf );
}else {if _ggce ,_gbbd :=_febf .Encoder ().CharcodeToRune (_eed );_gbbd {_efc ._gdcgd =string (_ggce );};};_ec .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_fbb ,_eed ,_efc ,_ffea );
_cbgb ._cfb =append (_cbgb ._cfb ,&_efc );_cbgb ._eee .Concat (_abfb );};return nil ;};func (_bcgaa lineRuling )asRuling ()(*ruling ,bool ){_cabcb :=ruling {_bfbc :_bcgaa ._fbbag ,Color :_bcgaa .Color ,_bgaa :_gbffb };switch _bcgaa ._fbbag {case _ebdaf :_cabcb ._abbgc =_bcgaa .xMean ();
_cabcb ._cebe =_aa .Min (_bcgaa ._ddgfc .Y ,_bcgaa ._eacg .Y );_cabcb ._deee =_aa .Max (_bcgaa ._ddgfc .Y ,_bcgaa ._eacg .Y );case _ecac :_cabcb ._abbgc =_bcgaa .yMean ();_cabcb ._cebe =_aa .Min (_bcgaa ._ddgfc .X ,_bcgaa ._eacg .X );_cabcb ._deee =_aa .Max (_bcgaa ._ddgfc .X ,_bcgaa ._eacg .X );
default:_ec .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_bcgaa ._fbbag );return nil ,false ;};return &_cabcb ,true ;};func (_ccgae *textWord )toTextMarks (_dbadg *int )[]TextMark {var _bfbab []TextMark ;
for _ ,_ecegf :=range _ccgae ._bgeaa {_bfbab =_cfaf (_bfbab ,_dbadg ,_ecegf .ToTextMark ());};return _bfbab ;};func (_fgff *wordBag )scanBand (_ceea string ,_bdde *wordBag ,_ffcf func (_dfdg *wordBag ,_ggd *textWord )bool ,_bacdd ,_ecgcc ,_cdfea float64 ,_eef ,_dffg bool )int {_cbe :=_bdde ._egfa ;
var _bbfbb map[int ]map[*textWord ]struct{};if !_eef {_bbfbb =_fgff .makeRemovals ();};_agda :=_cdca *_cbe ;_aaec :=0;for _ ,_deab :=range _fgff .depthBand (_bacdd -_agda ,_ecgcc +_agda ){if len (_fgff ._faba [_deab ])==0{continue ;};for _ ,_eagc :=range _fgff ._faba [_deab ]{if !(_bacdd -_agda <=_eagc ._cffdg &&_eagc ._cffdg <=_ecgcc +_agda ){continue ;
};if !_ffcf (_bdde ,_eagc ){continue ;};_gde :=2.0*_aa .Abs (_eagc ._ddgee -_bdde ._egfa )/(_eagc ._ddgee +_bdde ._egfa );_eabb :=_aa .Max (_eagc ._ddgee /_bdde ._egfa ,_bdde ._egfa /_eagc ._ddgee );_caef :=_aa .Min (_gde ,_eabb );if _cdfea > 0&&_caef > _cdfea {continue ;
};if _bdde .blocked (_eagc ){continue ;};if !_eef {_bdde .pullWord (_eagc ,_deab ,_bbfbb );};_aaec ++;if !_dffg {if _eagc ._cffdg < _bacdd {_bacdd =_eagc ._cffdg ;};if _eagc ._cffdg > _ecgcc {_ecgcc =_eagc ._cffdg ;};};if _eef {break ;};};};if !_eef {_fgff .applyRemovals (_bbfbb );
};return _aaec ;};func _cab (_eec _ce .PdfRectangle )textState {return textState {_gdc :100,_bbd :RenderModeFill ,_dab :_eec };};func (_fagc *wordBag )minDepth ()float64 {return _fagc ._cfg -(_fagc .Ury -_fagc ._egfa )};func _ggabe (_deabe float64 )bool {return _aa .Abs (_deabe )< _gcef };
func (_dbed paraList )xNeighbours (_gcdda float64 )map[*textPara ][]int {_daeee :=make ([]event ,2*len (_dbed ));if _gcdda ==0{for _gdac ,_ggfd :=range _dbed {_daeee [2*_gdac ]=event {_ggfd .Llx ,true ,_gdac };_daeee [2*_gdac +1]=event {_ggfd .Urx ,false ,_gdac };
};}else {for _ccge ,_fgaf :=range _dbed {_daeee [2*_ccge ]=event {_fgaf .Llx -_gcdda *_fgaf .fontsize (),true ,_ccge };_daeee [2*_ccge +1]=event {_fgaf .Urx +_gcdda *_fgaf .fontsize (),false ,_ccge };};};return _dbed .eventNeighbours (_daeee );};
// String returns a description of `t`.
func (_bdgda *textTable )String ()string {return _ge .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_bdgda ._ecbf ,_bdgda ._dcfg ,_bdgda ._beaeg );};func _afb (_cced _bc .Matrix )_bc .Point {_eabe ,_aabg :=_cced .Translation ();
return _bc .Point {X :_eabe ,Y :_aabg };};func (_fbfg *textTable )logComposite (_bgeb string ){if !_afcg {return ;};_ec .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_fbfg ._ecbf ,_fbfg ._dcfg ,_bgeb );
_ge .Printf ("\u0025\u0035\u0073 \u007c","");for _fdgda :=0;_fdgda < _fbfg ._ecbf ;_fdgda ++{_ge .Printf ("\u0025\u0033\u0064 \u007c",_fdgda );};_ge .Println ("");_ge .Printf ("\u0025\u0035\u0073 \u002b","");for _beef :=0;_beef < _fbfg ._ecbf ;_beef ++{_ge .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d");
};_ge .Println ("");for _ffebg :=0;_ffebg < _fbfg ._dcfg ;_ffebg ++{_ge .Printf ("\u0025\u0035\u0064 \u007c",_ffebg );for _acdda :=0;_acdda < _fbfg ._ecbf ;_acdda ++{_ecfge ,_ :=_fbfg ._egfe [_aaca (_acdda ,_ffebg )].parasBBox ();_ge .Printf ("\u0025\u0033\u0064 \u007c",len (_ecfge ));
};_ge .Println ("");};_ec .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_fbfg ._ecbf ,_fbfg ._dcfg ,_bgeb );_ge .Printf ("\u0025\u0035\u0073 \u007c","");for _dfcbe :=0;_dfcbe < _fbfg ._ecbf ;_dfcbe ++{_ge .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_dfcbe );
};_ge .Println ("");_ge .Printf ("\u0025\u0035\u0073 \u002b","");for _efbbb :=0;_efbbb < _fbfg ._ecbf ;_efbbb ++{_ge .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");};_ge .Println ("");for _fceaf :=0;_fceaf < _fbfg ._dcfg ;
_fceaf ++{_ge .Printf ("\u0025\u0035\u0064 \u007c",_fceaf );for _ecca :=0;_ecca < _fbfg ._ecbf ;_ecca ++{_gdddg ,_ :=_fbfg ._egfe [_aaca (_ecca ,_fceaf )].parasBBox ();_ddfbg :="";_cdfa :=_gdddg .merge ();if _cdfa !=nil {_ddfbg =_cdfa .text ();};_ddfbg =_ge .Sprintf ("\u0025\u0071",_adagc (_ddfbg ,12));
_ddfbg =_ddfbg [1:len (_ddfbg )-1];_ge .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_ddfbg );};_ge .Println ("");};};func (_ebabb paraList )log (_eedee string ){if !_eaba {return ;};_ec .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_eedee ,len (_ebabb ));
for _bebg ,_ecbg :=range _ebabb {if _ecbg ==nil {continue ;};_dggde :=_ecbg .text ();_affb :="\u0020\u0020";if _ecbg ._bddea !=nil {_affb =_ge .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_ecbg ._bddea ._ecbf ,_ecbg ._bddea ._dcfg );};_ge .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_bebg ,_ecbg .PdfRectangle ,_affb ,_adagc (_dggde ,50));
};};
// ToTextMark returns the public view of `tm`.
func (_cfcg *textMark )ToTextMark ()TextMark {return TextMark {Text :_cfcg ._ecaa ,Original :_cfcg ._gdcgd ,BBox :_cfcg ._aefef ,Font :_cfcg ._bcdb ,FontSize :_cfcg ._bfaca ,FillColor :_cfcg ._abgd ,StrokeColor :_cfcg ._eedd ,Orientation :_cfcg ._edge ,DirectObject :_cfcg ._bfade ,ObjString :_cfcg ._fcdc ,Tw :_cfcg .Tw ,Th :_cfcg .Th ,Tc :_cfcg ._cgdb ,Index :_cfcg ._beaa };
};type structTreeRoot struct{_dgbb []structElement ;_fdec string ;};func (_edfe *structTreeRoot )parseStructTreeRoot (_daec _gb .PdfObject ){if _daec !=nil {_adbe ,_edgcg :=_gb .GetDict (_daec );if !_edgcg {_ec .Log .Debug ("\u0070\u0061\u0072s\u0065\u0053\u0074\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u003a\u0020\u0064\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006eo\u0074\u0020\u0066\u006f\u0075\u006e\u0064\u002e");
};K :=_adbe .Get ("\u004b");_aggb :=_adbe .Get ("\u0054\u0079\u0070\u0065").String ();var _fffd *_gb .PdfObjectArray ;switch _accd :=K .(type ){case *_gb .PdfObjectArray :_fffd =_accd ;case *_gb .PdfObjectReference :_fffd =_gb .MakeArray (K );};_ega :=[]structElement {};
for _ ,_dcce :=range _fffd .Elements (){_ddfc :=&structElement {};_ddfc .parseStructElement (_dcce );_ega =append (_ega ,*_ddfc );};_edfe ._dgbb =_ega ;_edfe ._fdec =_aggb ;};};func (_aefgf rulingList )primaries ()[]float64 {_fecb :=make (map[float64 ]struct{},len (_aefgf ));
for _ ,_cfga :=range _aefgf {_fecb [_cfga ._abbgc ]=struct{}{};};_effa :=make ([]float64 ,len (_fecb ));_ccaag :=0;for _afdga :=range _fecb {_effa [_ccaag ]=_afdga ;_ccaag ++;};_c .Float64s (_effa );return _effa ;};func (_fcbgb rulingList )findPrimSec (_abgfc ,_cfafg float64 )*ruling {for _ ,_cbbg :=range _fcbgb {if _ffegg (_cbbg ._abbgc -_abgfc )&&_cbbg ._cebe -_ebbf <=_cfafg &&_cfafg <=_cbbg ._deee +_ebbf {return _cbbg ;
};};return nil ;};const _fgc =1.0/1000.0;func (_begd rulingList )toTilings ()(rulingList ,[]gridTiling ){_begd .log ("\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s");if len (_begd )==0{return nil ,nil ;};_begd =_begd .tidied ("\u0061\u006c\u006c");_begd .log ("\u0074\u0069\u0064\u0069\u0065\u0064");
_daa :=_begd .toGrids ();_feac :=make ([]gridTiling ,len (_daa ));for _debb ,_abffe :=range _daa {_feac [_debb ]=_abffe .asTiling ();};return _begd ,_feac ;};func (_bfgf paraList )sortTopoOrder (){_bdcg :=_bfgf .topoOrder ();_bfgf .reorder (_bdcg )};func _egf (_bebb *Extractor ,_bdb *_ce .PdfPageResources ,_eab _ag .GraphicsState ,_fgfe *textState ,_ebc *stateStack )*textObject {return &textObject {_bgcb :_bebb ,_bacad :_bdb ,_gbe :_eab ,_ebf :_ebc ,_befa :_fgfe ,_eee :_bc .IdentityMatrix (),_ecg :_bc .IdentityMatrix ()};
};func (_aacg rulingList )isActualGrid ()(rulingList ,bool ){_cbea ,_eaegd :=_aacg .augmentGrid ();if !(len (_cbea )>=_cbbb +1&&len (_eaegd )>=_acgd +1){if _gfgc {_ec .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_cbea ),len (_eaegd ),_cbbb +1,_acgd +1);
};return nil ,false ;};if _gfgc {_ec .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_aacg ,len (_cbea )>=2,len (_eaegd )>=2,len (_cbea )>=2&&len (_eaegd )>=2);
for _gcgc ,_gfce :=range _aacg {_ge .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_gcgc ,_gfce );};};if _gdcg {_bgbb ,_dgbc :=_cbea [0],_cbea [len (_cbea )-1];_cdfda ,_cdac :=_eaegd [0],_eaegd [len (_eaegd )-1];if !(_ggabe (_bgbb ._abbgc -_cdfda ._cebe )&&_ggabe (_dgbc ._abbgc -_cdfda ._deee )&&_ggabe (_cdfda ._abbgc -_bgbb ._deee )&&_ggabe (_cdac ._abbgc -_bgbb ._cebe )){if _gfgc {_ec .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_bgbb ,_dgbc ,_cdfda ,_cdac );
};return nil ,false ;};}else {if !_cbea .aligned (){if _fcfe {_ec .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_cbea ));
};return nil ,false ;};if !_eaegd .aligned (){if _gfgc {_ec .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_eaegd ));
};return nil ,false ;};};_cbfeca :=append (_cbea ,_eaegd ...);return _cbfeca ,true ;};
// Marks returns the TextMark collection for a page. It represents all the text on the page.
func (_fba PageText )Marks ()*TextMarkArray {return &TextMarkArray {_dec :_fba ._ebfc }};func (_fbbda *textTable )put (_bfgfd ,_cgga int ,_abgbb *textPara ){_fbbda ._gcbga [_aaca (_bfgfd ,_cgga )]=_abgbb ;};func _eggg (_fdce ,_eddee ,_aafgf ,_eddcc *textPara )*textTable {_ddcg :=&textTable {_ecbf :2,_dcfg :2,_gcbga :make (map[uint64 ]*textPara ,4)};
_ddcg .put (0,0,_fdce );_ddcg .put (1,0,_eddee );_ddcg .put (0,1,_aafgf );_ddcg .put (1,1,_eddcc );return _ddcg ;};func (_ggcg *wordBag )getDepthIdx (_dggd float64 )int {_edcb :=_ggcg .depthIndexes ();_dfgac :=_fece (_dggd );if _dfgac < _edcb [0]{return _edcb [0];
};if _dfgac > _edcb [len (_edcb )-1]{return _edcb [len (_edcb )-1];};return _dfgac ;};
// Text returns the extracted page text.
func (_bge PageText )Text ()string {return _bge ._gcee };func (_degd *textObject )getFontDict (_decf string )(_aefg _gb .PdfObject ,_aaee error ){_aea :=_degd ._bacad ;if _aea ==nil {_ec .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_decf );
return nil ,nil ;};_aefg ,_bdf :=_aea .GetFontByName (_gb .PdfObjectName (_decf ));if !_bdf {_ec .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_decf );
return nil ,_d .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _aefg ,nil ;};func _decgb (_bbcb ,_fcgb _ce .PdfRectangle )bool {return _fcd (_bbcb ,_fcgb )&&_fdg (_bbcb ,_fcgb )};func (_fecf *subpath )last ()_bc .Point {return _fecf ._gfefe [len (_fecf ._gfefe )-1]};
func (_fagb *textLine )markWordBoundaries (){_dged :=_ddag *_fagb ._ecag ;for _ccff ,_egeeg :=range _fagb ._aebc [1:]{if _gdgbc (_egeeg ,_fagb ._aebc [_ccff ])>=_dged {_egeeg ._gfffc =true ;};};};
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
type TextTable struct{_ce .PdfRectangle ;W ,H int ;Cells [][]TableCell ;};func (_aeeff intSet )add (_gefd int ){_aeeff [_gefd ]=struct{}{}};func _abgbe (_adbbg *_ce .Image ,_gbdc _fg .Color )_ded .Image {_eadcf ,_dfbcd :=int (_adbbg .Width ),int (_adbbg .Height );
_cdebf :=_ded .NewRGBA (_ded .Rect (0,0,_eadcf ,_dfbcd ));for _cbeaf :=0;_cbeaf < _dfbcd ;_cbeaf ++{for _fcgfb :=0;_fcgfb < _eadcf ;_fcgfb ++{_cgbbg ,_gdcac :=_adbbg .ColorAt (_fcgfb ,_cbeaf );if _gdcac !=nil {_ec .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063o\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0072\u0065\u0074\u0072\u0069\u0065v\u0065 \u0069\u006d\u0061\u0067\u0065\u0020m\u0061\u0073\u006b\u0020\u0076\u0061\u006cu\u0065\u0020\u0061\u0074\u0020\u0028\u0025\u0064\u002c\u0020\u0025\u0064\u0029\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006da\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063t\u002e",_fcgfb ,_cbeaf );
continue ;};_bfcbg ,_bbgeg ,_eagf ,_ :=_cgbbg .RGBA ();var _ebgaf _fg .Color ;if _bfcbg +_bbgeg +_eagf ==0{_ebgaf =_gbdc ;}else {_ebgaf =_fg .Transparent ;};_cdebf .Set (_fcgfb ,_cbeaf ,_ebgaf );};};return _cdebf ;};func _cbda (_bdaf *PageText )error {_gffgg :=_bd .GetLicenseKey ();
if _gffgg !=nil &&_gffgg .IsLicensed ()||_ff {return nil ;};_ge .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");_ge .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");
return _d .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};func (_beecg rectRuling )checkWidth (_dccaf ,_afab float64 )(float64 ,bool ){_bagbg :=_afab -_dccaf ;
_abcb :=_bagbg <=_gcef ;return _bagbg ,_abcb ;};func (_bfac *shapesState )cubicTo (_agf ,_dbg ,_fgca ,_cbaae ,_dcec ,_gadf float64 ){if _cbag {_ec .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a");};_bfac .addPoint (_dcec ,_gadf );};func _acgg (_bddeg map[int ][]float64 ){if len (_bddeg )<=1{return ;
};_ggggb :=_fadg (_bddeg );if _afcg {_ec .Log .Info ("\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076",_ggggb );};var _bgcca ,_gfca int ;for _bgcca ,_gfca =range _ggggb {if _bddeg [_gfca ]!=nil {break ;};};for _gdabf ,_agba :=range _ggggb [_bgcca :]{_cgba :=_bddeg [_agba ];
if _cgba ==nil {continue ;};if _afcg {_ge .Printf ("\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a",_bgcca +_gdabf ,_gfca ,_agba );};_egbfef :=_bddeg [_agba ];if _egbfef [len (_egbfef )-1]> _cgba [0]{_egbfef [len (_egbfef )-1]=_cgba [0];
_bddeg [_gfca ]=_egbfef ;};_gfca =_agba ;};};func _fbcc (_agbg []*textMark ,_gbefe _ce .PdfRectangle ,_afgd rulingList ,_dffd []gridTiling ,_fbgd bool )paraList {_ec .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_agbg ),_gbefe );
if len (_agbg )==0{return nil ;};_ebaga :=_fecgf (_agbg ,_gbefe );if len (_ebaga )==0{return nil ;};_afgd .log ("\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065");_bfag ,_aafad :=_afgd .vertsHorzs ();_facac :=_dca (_ebaga ,_gbefe .Ury ,_bfag ,_aafad );
_gadfb :=_cgbf (_facac ,_gbefe .Ury ,_bfag ,_aafad );_gadfb =_bddf (_gadfb );_gedg :=make (paraList ,0,len (_gadfb ));for _ ,_gcffb :=range _gadfb {_gegbb :=_gcffb .arrangeText ();if _gegbb !=nil {_gedg =append (_gedg ,_gegbb );};};if !_fbgd &&len (_gedg )>=_ddd {_gedg =_gedg .extractTables (_dffd );
};_gedg .sortReadingOrder ();if !_fbgd {_gedg .sortTopoOrder ();};_gedg .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _gedg ;};
// Append appends `mark` to the mark array.
func (_ceae *TextMarkArray )Append (mark TextMark ){_ceae ._dec =append (_ceae ._dec ,mark )};func (_gfdf paraList )inTile (_dedc gridTile )paraList {var _adeff paraList ;for _ ,_gcda :=range _gfdf {if _dedc .contains (_gcda .PdfRectangle ){_adeff =append (_adeff ,_gcda );
};};if _afcg {_ge .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_dedc ,len (_adeff ));for _eddd ,_agfaa :=range _adeff {_ge .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_eddd ,_agfaa );
};_ge .Println ("");};return _adeff ;};
// String returns a description of `b`.
func (_cbf *wordBag )String ()string {var _ccdg []string ;for _ ,_ebfd :=range _cbf .depthIndexes (){_aeb :=_cbf ._faba [_ebfd ];for _ ,_efba :=range _aeb {_ccdg =append (_ccdg ,_efba ._bbdb );};};return _ge .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_cbf .PdfRectangle ,_cbf ._egfa ,len (_ccdg ),_ccdg );
};func (_cbbfe *subpath )add (_bcde ..._bc .Point ){_cbbfe ._gfefe =append (_cbbfe ._gfefe ,_bcde ...)};func (_ebcg *wordBag )arrangeText ()*textPara {_ebcg .sort ();if _cafd {_ebcg .removeDuplicates ();};var _gdced []*textLine ;for _ ,_bga :=range _ebcg .depthIndexes (){for !_ebcg .empty (_bga ){_cgbc :=_ebcg .firstReadingIndex (_bga );
_fgcfa :=_ebcg .firstWord (_cgbc );_fgbdd :=_egee (_ebcg ,_cgbc );_fdeb :=_fgcfa ._ddgee ;_geed :=_fgcfa ._cffdg -_cdca *_fdeb ;_ccabb :=_fgcfa ._cffdg +_cdca *_fdeb ;_ddbe :=_ebfa *_fdeb ;_aecce :=_gecdb *_fdeb ;_aabf :for {var _agaa *textWord ;_egdb :=0;
for _ ,_baggg :=range _ebcg .depthBand (_geed ,_ccabb ){_geggfe :=_ebcg .highestWord (_baggg ,_geed ,_ccabb );if _geggfe ==nil {continue ;};_cgeaa :=_gdgbc (_geggfe ,_fgbdd ._aebc [len (_fgbdd ._aebc )-1]);if _cgeaa < -_aecce {break _aabf ;};if _cgeaa > _ddbe {continue ;
};if _agaa !=nil &&_fabg (_geggfe ,_agaa )>=0{continue ;};_agaa =_geggfe ;_egdb =_baggg ;};if _agaa ==nil {break ;};_fgbdd .pullWord (_ebcg ,_agaa ,_egdb );};_fgbdd .markWordBoundaries ();_gdced =append (_gdced ,_fgbdd );};};if len (_gdced )==0{return nil ;
};_c .Slice (_gdced ,func (_abffd ,_fbfed int )bool {return _cbcf (_gdced [_abffd ],_gdced [_fbfed ])< 0});_gfbc :=_ffec (_ebcg .PdfRectangle ,_gdced );if _egd {_ec .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_gfbc .String ());
if _eeg {for _cagg ,_bdad :=range _gfbc ._bdbcg {_ge .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cagg ,_bdad .String ());if _aacb {for _fbbc ,_edfg :=range _bdad ._aebc {_ge .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_fbbc ,_edfg .String ());
for _dfe ,_ddbd :=range _edfg ._bgeaa {_ge .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_dfe ,_ddbd .String ());};};};};};};return _gfbc ;};func _fbcfg (_ffcbg string )(string ,bool ){_geacfe :=[]rune (_ffcbg );if len (_geacfe )!=1{return "",false ;
};_bffg ,_bcfgd :=_eeff [_geacfe [0]];return _bffg ,_bcfgd ;};
2023-08-03 17:30:04 +00:00
2023-11-11 11:29:03 +00:00
// PageText represents the layout of text on a device page.
2023-12-17 13:54:01 +00:00
type PageText struct{_fcag []*textMark ;_gcee string ;_ebfc []TextMark ;_eadb []TextTable ;_fbbg _ce .PdfRectangle ;_geda []pathSection ;_gegc []pathSection ;_bbb *_gb .PdfObject ;_fefc _gb .PdfObject ;_ecfe *_ag .ContentStreamOperations ;_ccg PageTextOptions ;
};func _bgeba (_ceabc ,_abgg int )int {if _ceabc > _abgg {return _ceabc ;};return _abgg ;};func (_aagf rulingList )asTiling ()gridTiling {if _bea {_ec .Log .Info ("r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_aagf ));
};for _bfacf ,_afged :=range _aagf [1:]{_egada :=_aagf [_bfacf ];if _egada .alignsPrimary (_afged )&&_egada .alignsSec (_afged ){_ec .Log .Error ("a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073",_afged ,_egada );
};};_aagf .sortStrict ();_aagf .log ("\u0073n\u0061\u0070\u0070\u0065\u0064");_cdge ,_bacf :=_aagf .vertsHorzs ();_dbcg :=_cdge .primaries ();_geag :=_bacf .primaries ();_dfdb :=len (_dbcg )-1;_ecacf :=len (_geag )-1;if _dfdb ==0||_ecacf ==0{return gridTiling {};
};_gaae :=_ce .PdfRectangle {Llx :_dbcg [0],Urx :_dbcg [_dfdb ],Lly :_geag [0],Ury :_geag [_ecacf ]};if _bea {_ec .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064",len (_cdge ));
for _dgbe ,_aabfb :=range _cdge {_ge .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dgbe ,_aabfb );};_ec .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064",len (_bacf ));
for _eagcf ,_fcaf :=range _bacf {_ge .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_eagcf ,_fcaf );};_ec .Log .Info ("\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f",_dfdb ,_ecacf ,_dbcg ,_geag );
};_eedf :=make ([]gridTile ,_dfdb *_ecacf );for _feffe :=_ecacf -1;_feffe >=0;_feffe --{_eabfg :=_geag [_feffe ];_bdffb :=_geag [_feffe +1];for _gdeb :=0;_gdeb < _dfdb ;_gdeb ++{_efad :=_dbcg [_gdeb ];_bfgca :=_dbcg [_gdeb +1];_dgaac :=_cdge .findPrimSec (_efad ,_eabfg );
_efadd :=_cdge .findPrimSec (_bfgca ,_eabfg );_gabcb :=_bacf .findPrimSec (_eabfg ,_efad );_fefcb :=_bacf .findPrimSec (_bdffb ,_efad );_gecda :=_ce .PdfRectangle {Llx :_efad ,Urx :_bfgca ,Lly :_eabfg ,Ury :_bdffb };_dbdbf :=_geedc (_gecda ,_dgaac ,_efadd ,_gabcb ,_fefcb );
_eedf [_feffe *_dfdb +_gdeb ]=_dbdbf ;if _bea {_ge .Printf ("\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_gdeb ,_feffe ,_dbdbf .String (),_dbdbf .Width (),_dbdbf .Height ());
};};};if _bea {_ec .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_gaae );
};_decbd :=make ([]map[float64 ]gridTile ,_ecacf );for _egfda :=_ecacf -1;_egfda >=0;_egfda --{if _bea {_ge .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_egfda );};_decbd [_egfda ]=make (map[float64 ]gridTile ,_dfdb );for _eefac :=0;_eefac < _dfdb ;
_eefac ++{_dcaf :=_eedf [_egfda *_dfdb +_eefac ];if _bea {_ge .Printf ("\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_eefac ,_dcaf );};if !_dcaf ._ebdg {continue ;};_adga :=_eefac ;for _dbfd :=_eefac +1;!_dcaf ._ebga &&_dbfd < _dfdb ;_dbfd ++{_abbfd :=_eedf [_egfda *_dfdb +_dbfd ];
_dcaf .Urx =_abbfd .Urx ;_dcaf ._fgde =_dcaf ._fgde ||_abbfd ._fgde ;_dcaf ._ddbaf =_dcaf ._ddbaf ||_abbfd ._ddbaf ;_dcaf ._ebga =_abbfd ._ebga ;if _bea {_ge .Printf ("\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a",_dbfd ,_abbfd ,_dcaf );
};_adga =_dbfd ;};if _bea {_ge .Printf (" \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n",_eefac ,_adga ,_dcaf );};_eefac =_adga ;_decbd [_egfda ][_dcaf .Llx ]=_dcaf ;};};_eeea :=make (map[float64 ]map[float64 ]gridTile ,_ecacf );
_ffgf :=make (map[float64 ]map[float64 ]struct{},_ecacf );for _dcfbc :=_ecacf -1;_dcfbc >=0;_dcfbc --{_eaec :=_eedf [_dcfbc *_dfdb ].Lly ;_eeea [_eaec ]=make (map[float64 ]gridTile ,_dfdb );_ffgf [_eaec ]=make (map[float64 ]struct{},_dfdb );};if _bea {_ec .Log .Info ("\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_gaae );
};for _cfda :=_ecacf -1;_cfda >=0;_cfda --{_dfceb :=_eedf [_cfda *_dfdb ].Lly ;_ecagb :=_decbd [_cfda ];if _bea {_ge .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_cfda );};for _ ,_aeca :=range _ccacc (_ecagb ){if _ ,_afeb :=_ffgf [_dfceb ][_aeca ];
_afeb {continue ;};_dbdac :=_ecagb [_aeca ];if _bea {_ge .Printf (" \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_dbdac .String ());};for _beea :=_cfda -1;_beea >=0;_beea --{if _dbdac ._ddbaf {break ;};_adba :=_decbd [_beea ];_cgf ,_gefa :=_adba [_aeca ];
if !_gefa {break ;};if _cgf .Urx !=_dbdac .Urx {break ;};_dbdac ._ddbaf =_cgf ._ddbaf ;_dbdac .Lly =_cgf .Lly ;if _bea {_ge .Printf ("\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_cgf .String (),_dbdac .String ());
};_ffgf [_cgf .Lly ][_cgf .Llx ]=struct{}{};};if _cfda ==0{_dbdac ._ddbaf =true ;};if _dbdac .complete (){_eeea [_dfceb ][_aeca ]=_dbdac ;};};};_gefaa :=gridTiling {PdfRectangle :_gaae ,_cgecb :_edege (_eeea ),_agbb :_bdcdcf (_eeea ),_bage :_eeea };_gefaa .log ("\u0043r\u0065\u0061\u0074\u0065\u0064");
return _gefaa ;};func (_gfb *wordBag )firstReadingIndex (_febc int )int {_aadb :=_gfb .firstWord (_febc )._ddgee ;_def :=float64 (_febc +1)*_edbf ;_efgde :=_def +_bafc *_aadb ;_gdgf :=_febc ;for _ ,_gdab :=range _gfb .depthBand (_def ,_efgde ){if _fabg (_gfb .firstWord (_gdab ),_gfb .firstWord (_gdgf ))< 0{_gdgf =_gdab ;
};};return _gdgf ;};func (_gcdb *compositeCell )updateBBox (){for _ ,_bbbg :=range _gcdb .paraList {_gcdb .PdfRectangle =_cdggc (_gcdb .PdfRectangle ,_bbbg .PdfRectangle );};};func _bgec (_dagce _ce .PdfRectangle )*ruling {return &ruling {_bfbc :_ebdaf ,_abbgc :_dagce .Llx ,_cebe :_dagce .Lly ,_deee :_dagce .Ury };
};var _ccfe *_e .Regexp =_e .MustCompile (_cfgd +"\u007c"+_befc );
2023-05-29 17:26:33 +00:00
2023-12-17 13:54:01 +00:00
// NewWithOptions an Extractor instance for extracting content from the input PDF page with options.
func NewWithOptions (page *_ce .PdfPage ,options *Options )(*Extractor ,error ){const _gcc ="\u0065x\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077W\u0069\u0074\u0068\u004f\u0070\u0074\u0069\u006f\u006e\u0073";_ged ,_ca :=page .GetAllContentStreams ();
if _ca !=nil {return nil ,_ca ;};_dc ,_ffc :=page .GetStructTreeRoot ();if !_ffc {_ec .Log .Info ("T\u0068\u0065\u0020\u0070\u0064\u0066\u0020\u0064\u006f\u0063\u0075\u006d\u0065\u006e\u0074\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020\u0074\u0061\u0067g\u0065d\u002e\u0020\u0053\u0074r\u0075\u0063t\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e\u0027\u0074\u0020\u0065\u0078\u0069\u0073\u0074\u002e");
};_dd :=page .GetContainingPdfObject ();_bb ,_ca :=page .GetMediaBox ();if _ca !=nil {return nil ,_ge .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_ca );
};_gbg :=&Extractor {_bcf :_ged ,_ab :page .Resources ,_gda :*_bb ,_ed :page .CropBox ,_cd :map[string ]fontEntry {},_fa :map[string ]textResult {},_fgb :options ,_ba :_dc ,_eg :_dd };if _gbg ._gda .Llx > _gbg ._gda .Urx {_ec .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_gbg ._gda );
_gbg ._gda .Llx ,_gbg ._gda .Urx =_gbg ._gda .Urx ,_gbg ._gda .Llx ;};if _gbg ._gda .Lly > _gbg ._gda .Ury {_ec .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_gbg ._gda );
_gbg ._gda .Lly ,_gbg ._gda .Ury =_gbg ._gda .Ury ,_gbg ._gda .Lly ;};_bd .TrackUse (_gcc );return _gbg ,nil ;};
2023-01-08 22:34:27 +00:00
2023-12-17 13:54:01 +00:00
// String returns a human readable description of `ss`.
func (_accb *shapesState )String ()string {return _ge .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_accb ._abgb ),_accb ._edee );};func (_gdaa *textObject )moveTextSetLeading (_cgdc ,_dfcb float64 ){_gdaa ._befa ._dcc =-_dfcb ;
_gdaa .moveLP (_cgdc ,_dfcb );};func (_bbdec rulingList )log (_gcaca string ){if !_gfgc {return ;};_ec .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_gcaca ,_bbdec .String ());for _dagf ,_gfaaa :=range _bbdec {_ge .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dagf ,_gfaaa .String ());
};};func (_dcdf *textLine )pullWord (_eede *wordBag ,_fceg *textWord ,_fedc int ){_dcdf .appendWord (_fceg );_eede .removeWord (_fceg ,_fedc );};func (_eecg rulingList )removeDuplicates ()rulingList {if len (_eecg )==0{return nil ;};_eecg .sort ();_cdbd :=rulingList {_eecg [0]};
for _ ,_cbfc :=range _eecg [1:]{if _cbfc .equals (_cdbd [len (_cdbd )-1]){continue ;};_cdbd =append (_cdbd ,_cbfc );};return _cdbd ;};func (_agcdb *subpath )isQuadrilateral ()bool {if len (_agcdb ._gfefe )< 4||len (_agcdb ._gfefe )> 5{return false ;};if len (_agcdb ._gfefe )==5{_gaada :=_agcdb ._gfefe [0];
_ccdf :=_agcdb ._gfefe [4];if _gaada .X !=_ccdf .X ||_gaada .Y !=_ccdf .Y {return false ;};};return true ;};func _cecd (_eacgc []rulingList )(rulingList ,rulingList ){var _fagf rulingList ;for _ ,_fcbfg :=range _eacgc {_fagf =append (_fagf ,_fcbfg ...);
};return _fagf .vertsHorzs ();};func _dbga (_fagba _ce .PdfRectangle )*ruling {return &ruling {_bfbc :_ebdaf ,_abbgc :_fagba .Urx ,_cebe :_fagba .Lly ,_deee :_fagba .Ury };};func _baafc (_gfebd ,_gdaace int )int {if _gfebd < _gdaace {return _gfebd ;};return _gdaace ;
};func (_bcdbf compositeCell )hasLines (_cddcb []*textLine )bool {for _afcd ,_gfge :=range _cddcb {_afegg :=_decgb (_bcdbf .PdfRectangle ,_gfge .PdfRectangle );if _afcg {_ge .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a",_afegg ,_afcd ,len (_cddcb ));
_ge .Printf ("\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a",_bcdbf );_ge .Printf ("\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a",_gfge );};if _afegg {return true ;
};};return false ;};const (_beagd markKind =iota ;_gbffb ;_eecbc ;_cgacb ;);func _fccf (_ebeb string )bool {if _g .RuneCountInString (_ebeb )< _ggafb {return false ;};_ffag ,_gfbg :=_g .DecodeLastRuneInString (_ebeb );if _gfbg <=0||!_de .Is (_de .Hyphen ,_ffag ){return false ;
};_ffag ,_gfbg =_g .DecodeLastRuneInString (_ebeb [:len (_ebeb )-_gfbg ]);return _gfbg > 0&&!_de .IsSpace (_ffag );};type fontEntry struct{_ddaa *_ce .PdfFont ;_addb int64 ;};func _gecae (_effdd ,_ceddb float64 )bool {return _aa .Abs (_effdd -_ceddb )<=_ebbf };
// String returns a description of `w`.
func (_beaf *textWord )String ()string {return _ge .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_beaf ._cffdg ,_beaf .PdfRectangle ,_beaf ._ddgee ,_beaf ._bbdb );
};func _ddedf (_agcg []TextMark ,_egec *TextTable )[]TextMark {var _cbfg []TextMark ;for _ ,_dfgd :=range _agcg {_dfgd ._ebd =true ;_dfgd ._afe =_egec ;_cbfg =append (_cbfg ,_dfgd );};return _cbfg ;};type markKind int ;func (_gegdd *textTable )reduce ()*textTable {_eadc :=make ([]int ,0,_gegdd ._dcfg );
_dfbcf :=make ([]int ,0,_gegdd ._ecbf );for _ecbbd :=0;_ecbbd < _gegdd ._dcfg ;_ecbbd ++{if !_gegdd .emptyCompositeRow (_ecbbd ){_eadc =append (_eadc ,_ecbbd );};};for _addg :=0;_addg < _gegdd ._ecbf ;_addg ++{if !_gegdd .emptyCompositeColumn (_addg ){_dfbcf =append (_dfbcf ,_addg );
};};if len (_eadc )==_gegdd ._dcfg &&len (_dfbcf )==_gegdd ._ecbf {return _gegdd ;};_ebba :=textTable {_beaeg :_gegdd ._beaeg ,_ecbf :len (_dfbcf ),_dcfg :len (_eadc ),_gcbga :make (map[uint64 ]*textPara ,len (_dfbcf )*len (_eadc ))};if _afcg {_ec .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_gegdd ._ecbf ,_gegdd ._dcfg ,len (_dfbcf ),len (_eadc ));
_ec .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_dfbcf );_ec .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_eadc );};for _dgbea ,_bggf :=range _eadc {for _fgea ,_ccbb :=range _dfbcf {_fbcg ,_egfed :=_gegdd .getComposite (_ccbb ,_bggf );
if _fbcg ==nil {continue ;};if _afcg {_ge .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_fgea ,_dgbea ,_ccbb ,_bggf ,_adagc (_fbcg .merge ().text (),50));};_ebba .putComposite (_fgea ,_dgbea ,_fbcg ,_egfed );
};};return &_ebba ;};func (_bcbgb rulingList )sortStrict (){_c .Slice (_bcbgb ,func (_ddcdg ,_befg int )bool {_bgee ,_aeec :=_bcbgb [_ddcdg ],_bcbgb [_befg ];_dedba ,_adgc :=_bgee ._bfbc ,_aeec ._bfbc ;if _dedba !=_adgc {return _dedba > _adgc ;};_babe ,_ccacf :=_bgee ._abbgc ,_aeec ._abbgc ;
if !_ffegg (_babe -_ccacf ){return _babe < _ccacf ;};_babe ,_ccacf =_bgee ._cebe ,_aeec ._cebe ;if _babe !=_ccacf {return _babe < _ccacf ;};return _bgee ._deee < _aeec ._deee ;});};func _ffegg (_cfac float64 )bool {return _aa .Abs (_cfac )< _fdac };func _bcfde (_debfc *_ce .Image ,_cgedf _fg .Color )_ded .Image {_gfcga ,_bgbac :=int (_debfc .Width ),int (_debfc .Height );
_cgdde :=_ded .NewRGBA (_ded .Rect (0,0,_gfcga ,_bgbac ));for _ceebd :=0;_ceebd < _bgbac ;_ceebd ++{for _aceee :=0;_aceee < _gfcga ;_aceee ++{_efagc ,_ggfa :=_debfc .ColorAt (_aceee ,_ceebd );if _ggfa !=nil {_ec .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063o\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0072\u0065\u0074\u0072\u0069\u0065v\u0065 \u0069\u006d\u0061\u0067\u0065\u0020m\u0061\u0073\u006b\u0020\u0076\u0061\u006cu\u0065\u0020\u0061\u0074\u0020\u0028\u0025\u0064\u002c\u0020\u0025\u0064\u0029\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006da\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063t\u002e",_aceee ,_ceebd );
continue ;};_bfdcg ,_eagca ,_dbgb ,_ :=_efagc .RGBA ();var _dgcg _fg .Color ;if _bfdcg +_eagca +_dbgb ==0{_dgcg =_fg .Transparent ;}else {_dgcg =_cgedf ;};_cgdde .Set (_aceee ,_ceebd ,_dgcg );};};return _cgdde ;};type pathSection struct{_fbfe []*subpath ;
_fg .Color ;};func (_bcb *textObject )setWordSpacing (_eaag float64 ){if _bcb ==nil {return ;};_bcb ._befa ._eag =_eaag ;};func (_ggb *stateStack )size ()int {return len (*_ggb )};func (_ddae *textObject )showText (_efa _gb .PdfObject ,_ede []byte ,_baca int )error {return _ddae .renderText (_efa ,_ede ,_baca );
};func _efaf (_efag []*textLine ,_bcbba string )string {var _gfcff _gd .Builder ;_gdgc :=0.0;for _ffab ,_aggc :=range _efag {_egag :=_aggc .text ();_bdee :=_aggc ._bcdg ;if _ffab < len (_efag )-1{_gdgc =_efag [_ffab +1]._bcdg ;}else {_gdgc =0.0;};_gfcff .WriteString (_bcbba );
_gfcff .WriteString (_egag );if _gdgc !=_bdee {_gfcff .WriteString ("\u000a");}else {_gfcff .WriteString ("\u0020");};};return _gfcff .String ();};var _ff =false ;func _fcegc (_ceab string ,_dcgca []rulingList ){_ec .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_dcgca ),_ceab );
for _ffdg ,_afbe :=range _dcgca {_ge .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ffdg ,_afbe .String ());};};func _bcgc (_cege *textLine )bool {_ccgf :=true ;_cbgd :=-1;for _ ,_gbeg :=range _cege ._aebc {for _ ,_gdgd :=range _gbeg ._bgeaa {_dgefb :=_gdgd ._fefe ;
if _cbgd ==-1{_cbgd =_dgefb ;}else {if _cbgd !=_dgefb {_ccgf =false ;break ;};};};};return _ccgf ;};func (_gbd *wordBag )blocked (_febb *textWord )bool {if _febb .Urx < _gbd .Llx {_facg :=_dbga (_febb .PdfRectangle );_ddc :=_bgec (_gbd .PdfRectangle );
if _gbd ._eeca .blocks (_facg ,_ddc ){if _aecc {_ec .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_febb ,_gbd );};return true ;};}else if _gbd .Urx < _febb .Llx {_afba :=_dbga (_gbd .PdfRectangle );
_bcag :=_bgec (_febb .PdfRectangle );if _gbd ._eeca .blocks (_afba ,_bcag ){if _aecc {_ec .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s",_febb ,_gbd );};return true ;};};if _febb .Ury < _gbd .Lly {_bbgfd :=_edeff (_febb .PdfRectangle );
_fdbd :=_fgee (_gbd .PdfRectangle );if _gbd ._gbef .blocks (_bbgfd ,_fdbd ){if _aecc {_ec .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_febb ,_gbd );};return true ;};}else if _gbd .Ury < _febb .Lly {_cbaad :=_edeff (_gbd .PdfRectangle );
_bbce :=_fgee (_febb .PdfRectangle );if _gbd ._gbef .blocks (_cbaad ,_bbce ){if _aecc {_ec .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s",_febb ,_gbd );};return true ;};};return false ;};func (_ggda *wordBag )maxDepth ()float64 {return _ggda ._cfg -_ggda .Lly };
func _agefd (_cfcag _ce .PdfColorspace ,_cadgd _ce .PdfColor )_fg .Color {if _cfcag ==nil ||_cadgd ==nil {return _fg .Black ;};_cadb ,_acegb :=_cfcag .ColorToRGB (_cadgd );if _acegb !=nil {_ec .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_cadgd ,_cfcag ,_acegb );
return _fg .Black ;};_cbgf ,_fafab :=_cadb .(*_ce .PdfColorDeviceRGB );if !_fafab {_ec .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_cadb );
return _fg .Black ;};return _fg .NRGBA {R :uint8 (_cbgf .R ()*255),G :uint8 (_cbgf .G ()*255),B :uint8 (_cbgf .B ()*255),A :uint8 (255)};};func (_faedf paraList )yNeighbours (_abfag float64 )map[*textPara ][]int {_agag :=make ([]event ,2*len (_faedf ));
if _abfag ==0{for _gdada ,_cggb :=range _faedf {_agag [2*_gdada ]=event {_cggb .Lly ,true ,_gdada };_agag [2*_gdada +1]=event {_cggb .Ury ,false ,_gdada };};}else {for _caad ,_eacfg :=range _faedf {_agag [2*_caad ]=event {_eacfg .Lly -_abfag *_eacfg .fontsize (),true ,_caad };
_agag [2*_caad +1]=event {_eacfg .Ury +_abfag *_eacfg .fontsize (),false ,_caad };};};return _faedf .eventNeighbours (_agag );};func _cfaf (_gbeb []TextMark ,_adag *int ,_accfb TextMark )[]TextMark {_accfb .Offset =*_adag ;_gbeb =append (_gbeb ,_accfb );
*_adag +=len (_accfb .Text );return _gbeb ;};func (_fcgad *textTable )getRight ()paraList {_bgdc :=make (paraList ,_fcgad ._dcfg );for _faag :=0;_faag < _fcgad ._dcfg ;_faag ++{_adgaf :=_fcgad .get (_fcgad ._ecbf -1,_faag )._gaca ;if _adgaf .taken (){return nil ;
};_bgdc [_faag ]=_adgaf ;};for _eaac :=0;_eaac < _fcgad ._dcfg -1;_eaac ++{if _bgdc [_eaac ]._fdgbd !=_bgdc [_eaac +1]{return nil ;};};return _bgdc ;};func (_gaefc *textPara )isAtom ()*textTable {_fefeg :=_gaefc ;_eefg :=_gaefc ._gaca ;_daaa :=_gaefc ._fdgbd ;
if _eefg .taken ()||_daaa .taken (){return nil ;};_bdce :=_eefg ._fdgbd ;if _bdce .taken ()||_bdce !=_daaa ._gaca {return nil ;};return _eggg (_fefeg ,_eefg ,_daaa ,_bdce );};func _ccef (_bgcfe []compositeCell )[]float64 {var _ffcfb []*textLine ;_ccbcc :=0;
for _ ,_baga :=range _bgcfe {_ccbcc +=len (_baga .paraList );_ffcfb =append (_ffcfb ,_baga .lines ()...);};_c .Slice (_ffcfb ,func (_dbac ,_ecdd int )bool {_egade ,_ecfce :=_ffcfb [_dbac ],_ffcfb [_ecdd ];_adeg ,_cgcea :=_egade ._bcdg ,_ecfce ._bcdg ;if !_ffegg (_adeg -_cgcea ){return _adeg < _cgcea ;
};return _egade .Llx < _ecfce .Llx ;});if _afcg {_ge .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_ccbcc ,len (_ffcfb ));
for _egff ,_cffad :=range _ffcfb {_ge .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_egff ,_cffad );};};var _bbcbd []float64 ;_edbd :=_ffcfb [0];var _cdaed [][]*textLine ;_ddgd :=[]*textLine {_edbd };for _bcec ,_deebg :=range _ffcfb [1:]{if _deebg .Ury < _edbd .Lly {_afebd :=0.5*(_deebg .Ury +_edbd .Lly );
if _afcg {_ge .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_bcec ,_deebg .Ury ,_edbd .Lly ,_afebd ,_edbd ,_deebg );
};_bbcbd =append (_bbcbd ,_afebd );_cdaed =append (_cdaed ,_ddgd );_ddgd =nil ;};_ddgd =append (_ddgd ,_deebg );if _deebg .Lly < _edbd .Lly {_edbd =_deebg ;};};if len (_ddgd )> 0{_cdaed =append (_cdaed ,_ddgd );};if _afcg {_ge .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_bbcbd );
};if _afcg {_ec .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len (_bgcfe ));for _fecge ,_dfdf :=range _bgcfe {_ge .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fecge ,_dfdf );};_ec .Log .Info ("\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d",len (_cdaed ));
for _ebcgc ,_fabge :=range _cdaed {_ge .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a",_ebcgc ,len (_fabge ));for _eacgf ,_ffga :=range _fabge {_ge .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_eacgf ,_ffga );};};};_ccaba :=true ;
for _fcgab ,_feeae :=range _cdaed {_agab :=true ;for _ecea ,_adbad :=range _bgcfe {if _afcg {_ge .Printf ("\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a",_fcgab ,len (_cdaed ),_ecea ,len (_bgcfe ),_adbad );
};if !_adbad .hasLines (_feeae ){if _afcg {_ge .Printf ("\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a",_fcgab ,len (_cdaed ),_ecea ,len (_bgcfe ));
};_agab =false ;break ;};};if !_agab {_ccaba =false ;break ;};};if !_ccaba {if _afcg {_ec .Log .Info ("\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg");
};_bbcbd =nil ;};if _afcg &&_bbcbd !=nil {_ge .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a",_bbcbd );};return _bbcbd ;
};func (_aff *TextMarkArray )getTextMarkAtOffset (_cbaa int )*TextMark {for _ ,_cefb :=range _aff ._dec {if _cefb .Offset ==_cbaa {return &_cefb ;};};return nil ;};
2023-10-07 13:58:01 +00:00
2023-11-11 11:29:03 +00:00
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
2023-12-17 13:54:01 +00:00
func (_fgg *Extractor )ExtractText ()(string ,error ){_badf ,_ ,_ ,_gae :=_fgg .ExtractTextWithStats ();return _badf ,_gae ;};func (_fbag compositeCell )parasBBox ()(paraList ,_ce .PdfRectangle ){return _fbag .paraList ,_fbag .PdfRectangle ;};func _ddafc (_bfffa ,_fadcg _bc .Point )bool {_adge :=_aa .Abs (_bfffa .X -_fadcg .X );
_cdeb :=_aa .Abs (_bfffa .Y -_fadcg .Y );return _beeee (_adge ,_cdeb );};func (_gcddc *textPara )writeText (_fecd _a .Writer ){if _gcddc ._bddea ==nil {_gcddc .writeCellText (_fecd );return ;};for _gcbf :=0;_gcbf < _gcddc ._bddea ._dcfg ;_gcbf ++{for _dadcc :=0;
_dadcc < _gcddc ._bddea ._ecbf ;_dadcc ++{_gfcg :=_gcddc ._bddea .get (_dadcc ,_gcbf );if _gfcg ==nil {_fecd .Write ([]byte ("\u0009"));}else {_gfcg .writeCellText (_fecd );};_fecd .Write ([]byte ("\u0020"));};if _gcbf < _gcddc ._bddea ._dcfg -1{_fecd .Write ([]byte ("\u000a"));
};};};func (_dbbfc rulingList )sort (){_c .Slice (_dbbfc ,_dbbfc .comp )};var (_eeff =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};
);func (_cgeba *textTable )emptyCompositeRow (_acbg int )bool {for _aegcb :=0;_aegcb < _cgeba ._ecbf ;_aegcb ++{if _bdfe ,_eaead :=_cgeba ._egfe [_aaca (_aegcb ,_acbg )];_eaead {if len (_bdfe .paraList )> 0{return false ;};};};return true ;};func (_dcg *imageExtractContext )extractXObjectImage (_cee *_gb .PdfObjectName ,_faa _ag .GraphicsState ,_fead *_ce .PdfPageResources )error {_ebb ,_ :=_fead .GetXObjectByName (*_cee );
if _ebb ==nil {return nil ;};_gdff ,_bad :=_dcg ._aabe [_ebb ];if !_bad {_ddb ,_gcb :=_fead .GetXObjectImageByName (*_cee );if _gcb !=nil {return _gcb ;};if _ddb ==nil {return nil ;};_gg ,_gcb :=_ddb .ToImage ();if _gcb !=nil {return _gcb ;};var _agg _ded .Image ;
if _ddb .Mask !=nil {if _agg ,_gcb =_fddd (_ddb .Mask ,_fg .Opaque );_gcb !=nil {_ec .Log .Debug ("\u0057\u0041\u0052\u004e\u003a \u0063\u006f\u0075\u006c\u0064 \u006eo\u0074\u0020\u0067\u0065\u0074\u0020\u0065\u0078\u0070\u006c\u0069\u0063\u0069\u0074\u0020\u0069\u006d\u0061\u0067e\u0020\u006d\u0061\u0073\u006b\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006d\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e");
};}else if _ddb .SMask !=nil {_agg ,_gcb =_fbga (_ddb .SMask ,_fg .Opaque );if _gcb !=nil {_ec .Log .Debug ("W\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0067\u0065\u0074\u0020\u0073\u006f\u0066\u0074\u0020\u0069\u006da\u0067e\u0020\u006d\u0061\u0073k\u002e\u0020O\u0075\u0074\u0070\u0075\u0074\u0020\u006d\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063\u0074\u002e");
};};if _agg !=nil {_dbb ,_cag :=_gg .ToGoImage ();if _cag !=nil {return _cag ;};_dbb =_eaecc (_dbb ,_agg );switch _ddb .ColorSpace .String (){case "\u0044\u0065\u0076\u0069\u0063\u0065\u0047\u0072\u0061\u0079","\u0049n\u0064\u0065\u0078\u0065\u0064":_gg ,_cag =_ce .ImageHandling .NewGrayImageFromGoImage (_dbb );
if _cag !=nil {return _cag ;};default:_gg ,_cag =_ce .ImageHandling .NewImageFromGoImage (_dbb );if _cag !=nil {return _cag ;};};};_gdff =&cachedImage {_aca :_gg ,_abb :_ddb .ColorSpace };_dcg ._aabe [_ebb ]=_gdff ;};_gag :=_gdff ._aca ;_agc :=_gdff ._abb ;
_ege ,_dbc :=_agc .ImageToRGB (*_gag );if _dbc !=nil {return _dbc ;};_ec .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_faa .CTM .String ());_gdbf :=ImageMark {Image :&_ege ,Width :_faa .CTM .ScalingFactorX (),Height :_faa .CTM .ScalingFactorY (),Angle :_faa .CTM .Angle ()};
_gdbf .X ,_gdbf .Y =_faa .CTM .Translation ();_dcg ._gbf =append (_dcg ._gbf ,_gdbf );_dcg ._fbc ++;return nil ;};
2023-10-07 13:58:01 +00:00
2023-12-17 13:54:01 +00:00
// String returns a description of `tm`.
func (_ccgdg *textMark )String ()string {return _ge .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_ccgdg .PdfRectangle ,_ccgdg ._bfaca ,_ccgdg ._ecaa );};func _cgbe (_dfce structElement )[]structElement {_egbb :=[]structElement {};
for _ ,_edggc :=range _dfce ._cagb {for _ ,_fgbd :=range _edggc ._cagb {for _ ,_dffe :=range _fgbd ._cagb {if _dffe ._bacdb =="\u004c"{_egbb =append (_egbb ,_dffe );};};};};return _egbb ;};func (_abddc paraList )llyRange (_cde []int ,_cefga ,_cabdb float64 )[]int {_beg :=len (_abddc );
if _cabdb < _abddc [_cde [0]].Lly ||_cefga > _abddc [_cde [_beg -1]].Lly {return nil ;};_dfaf :=_c .Search (_beg ,func (_bcaf int )bool {return _abddc [_cde [_bcaf ]].Lly >=_cefga });_edeef :=_c .Search (_beg ,func (_afag int )bool {return _abddc [_cde [_afag ]].Lly > _cabdb });
return _cde [_dfaf :_edeef ];};func (_edgce *textWord )addDiacritic (_eccfb string ){_ccdfb :=_edgce ._bgeaa [len (_edgce ._bgeaa )-1];_ccdfb ._ecaa +=_eccfb ;_ccdfb ._ecaa =_ef .NFKC .String (_ccdfb ._ecaa );};func (_ffgdd rulingList )secMinMax ()(float64 ,float64 ){_face ,_cefag :=_ffgdd [0]._cebe ,_ffgdd [0]._deee ;
for _ ,_ebdf :=range _ffgdd [1:]{if _ebdf ._cebe < _face {_face =_ebdf ._cebe ;};if _ebdf ._deee > _cefag {_cefag =_ebdf ._deee ;};};return _face ,_cefag ;};func (_beggg paraList )addNeighbours (){_fcffc :=func (_agefc []int ,_ebbad *textPara )([]*textPara ,[]*textPara ){_gfbd :=make ([]*textPara ,0,len (_agefc )-1);
_acfg :=make ([]*textPara ,0,len (_agefc )-1);for _ ,_dgadg :=range _agefc {_bfcb :=_beggg [_dgadg ];if _bfcb .Urx <=_ebbad .Llx {_gfbd =append (_gfbd ,_bfcb );}else if _bfcb .Llx >=_ebbad .Urx {_acfg =append (_acfg ,_bfcb );};};return _gfbd ,_acfg ;};
_eccgb :=func (_cbfgbf []int ,_gcbeb *textPara )([]*textPara ,[]*textPara ){_febg :=make ([]*textPara ,0,len (_cbfgbf )-1);_aegcf :=make ([]*textPara ,0,len (_cbfgbf )-1);for _ ,_ccbaf :=range _cbfgbf {_beaga :=_beggg [_ccbaf ];if _beaga .Ury <=_gcbeb .Lly {_aegcf =append (_aegcf ,_beaga );
}else if _beaga .Lly >=_gcbeb .Ury {_febg =append (_febg ,_beaga );};};return _febg ,_aegcf ;};_eccf :=_beggg .yNeighbours (_gcga );for _ ,_afbg :=range _beggg {_cgaf :=_eccf [_afbg ];if len (_cgaf )==0{continue ;};_bffb ,_eaecb :=_fcffc (_cgaf ,_afbg );
if len (_bffb )==0&&len (_eaecb )==0{continue ;};if len (_bffb )> 0{_aafc :=_bffb [0];for _ ,_eccfa :=range _bffb [1:]{if _eccfa .Urx >=_aafc .Urx {_aafc =_eccfa ;};};for _ ,_cegab :=range _bffb {if _cegab !=_aafc &&_cegab .Urx > _aafc .Llx {_aafc =nil ;
break ;};};if _aafc !=nil &&_fdg (_afbg .PdfRectangle ,_aafc .PdfRectangle ){_afbg ._egab =_aafc ;};};if len (_eaecb )> 0{_dgc :=_eaecb [0];for _ ,_gbbfd :=range _eaecb [1:]{if _gbbfd .Llx <=_dgc .Llx {_dgc =_gbbfd ;};};for _ ,_bccaf :=range _eaecb {if _bccaf !=_dgc &&_bccaf .Llx < _dgc .Urx {_dgc =nil ;
break ;};};if _dgc !=nil &&_fdg (_afbg .PdfRectangle ,_dgc .PdfRectangle ){_afbg ._gaca =_dgc ;};};};_eccf =_beggg .xNeighbours (_gccfg );for _ ,_ffbgf :=range _beggg {_gcad :=_eccf [_ffbgf ];if len (_gcad )==0{continue ;};_edbde ,_adbae :=_eccgb (_gcad ,_ffbgf );
if len (_edbde )==0&&len (_adbae )==0{continue ;};if len (_adbae )> 0{_abeb :=_adbae [0];for _ ,_acbb :=range _adbae [1:]{if _acbb .Ury >=_abeb .Ury {_abeb =_acbb ;};};for _ ,_dcad :=range _adbae {if _dcad !=_abeb &&_dcad .Ury > _abeb .Lly {_abeb =nil ;
break ;};};if _abeb !=nil &&_fcd (_ffbgf .PdfRectangle ,_abeb .PdfRectangle ){_ffbgf ._fdgbd =_abeb ;};};if len (_edbde )> 0{_bgad :=_edbde [0];for _ ,_abfgd :=range _edbde [1:]{if _abfgd .Lly <=_bgad .Lly {_bgad =_abfgd ;};};for _ ,_cdbf :=range _edbde {if _cdbf !=_bgad &&_cdbf .Lly < _bgad .Ury {_bgad =nil ;
break ;};};if _bgad !=nil &&_fcd (_ffbgf .PdfRectangle ,_bgad .PdfRectangle ){_ffbgf ._abdda =_bgad ;};};};for _ ,_caaeb :=range _beggg {if _caaeb ._egab !=nil &&_caaeb ._egab ._gaca !=_caaeb {_caaeb ._egab =nil ;};if _caaeb ._abdda !=nil &&_caaeb ._abdda ._fdgbd !=_caaeb {_caaeb ._abdda =nil ;
};if _caaeb ._gaca !=nil &&_caaeb ._gaca ._egab !=_caaeb {_caaeb ._gaca =nil ;};if _caaeb ._fdgbd !=nil &&_caaeb ._fdgbd ._abdda !=_caaeb {_caaeb ._fdgbd =nil ;};};};func (_fdfd *wordBag )depthIndexes ()[]int {if len (_fdfd ._faba )==0{return nil ;};_afeg :=make ([]int ,len (_fdfd ._faba ));
_fagd :=0;for _agfd :=range _fdfd ._faba {_afeg [_fagd ]=_agfd ;_fagd ++;};_c .Ints (_afeg );return _afeg ;};func (_ddaac gridTiling )complete ()bool {for _ ,_ddcff :=range _ddaac ._bage {for _ ,_edcec :=range _ddcff {if !_edcec .complete (){return false ;
};};};return true ;};func (_fdd *textObject )checkOp (_fdbff *_ag .ContentStreamOperation ,_efd int ,_feb bool )(_efdb bool ,_fdaa error ){if _fdd ==nil {var _eeab []_gb .PdfObject ;if _efd > 0{_eeab =_fdbff .Params ;if len (_eeab )> _efd {_eeab =_eeab [:_efd ];
};};_ec .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_fdbff .Operand ,_eeab );};if _efd >=0{if len (_fdbff .Params )!=_efd {if _feb {_fdaa =_d .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");
};_ec .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_fdbff .Operand ,_efd ,len (_fdbff .Params ),_fdbff .Params );
return false ,_fdaa ;};};return true ,nil ;};func (_aegdb rulingList )blocks (_fgec ,_dfbfe *ruling )bool {if _fgec ._cebe > _dfbfe ._deee ||_dfbfe ._cebe > _fgec ._deee {return false ;};_dfbce :=_aa .Max (_fgec ._cebe ,_dfbfe ._cebe );_gdcfc :=_aa .Min (_fgec ._deee ,_dfbfe ._deee );
if _fgec ._abbgc > _dfbfe ._abbgc {_fgec ,_dfbfe =_dfbfe ,_fgec ;};for _ ,_ffaga :=range _aegdb {if _fgec ._abbgc <=_ffaga ._abbgc +_gcef &&_ffaga ._abbgc <=_dfbfe ._abbgc +_gcef &&_ffaga ._cebe <=_gdcfc &&_dfbce <=_ffaga ._deee {return true ;};};return false ;
};
// New returns an Extractor instance for extracting content from the input PDF page.
func New (page *_ce .PdfPage )(*Extractor ,error ){return NewWithOptions (page ,nil )};func (_gacff *textMark )inDiacriticArea (_bced *textMark )bool {_beae :=_gacff .Llx -_bced .Llx ;_gcff :=_gacff .Urx -_bced .Urx ;_aade :=_gacff .Lly -_bced .Lly ;return _aa .Abs (_beae +_gcff )< _gacff .Width ()*_geb &&_aa .Abs (_aade )< _gacff .Height ()*_geb ;
};func (_bec *textObject )getFillColor ()_fg .Color {return _agefd (_bec ._gbe .ColorspaceNonStroking ,_bec ._gbe .ColorNonStroking );};func (_bgcc *wordBag )highestWord (_efbg int ,_egfd ,_cegf float64 )*textWord {for _ ,_bee :=range _bgcc ._faba [_efbg ]{if _egfd <=_bee ._cffdg &&_bee ._cffdg <=_cegf {return _bee ;
};};return nil ;};type list struct{_ggdb []*textLine ;_ffcg string ;_gbab []*list ;_dage string ;};type textWord struct{_ce .PdfRectangle ;_cffdg float64 ;_bbdb string ;_bgeaa []*textMark ;_ddgee float64 ;_gfffc bool ;};type gridTile struct{_ce .PdfRectangle ;
_fgde ,_ebdg ,_ddbaf ,_ebga bool ;};func (_aeadcd *ruling )alignsPrimary (_efgb *ruling )bool {return _aeadcd ._bfbc ==_efgb ._bfbc &&_aa .Abs (_aeadcd ._abbgc -_efgb ._abbgc )< _gcef *0.5;};
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
//
// Replace with a function like Extract() (*PageText, error)
func (_ead *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_gfef ,_adc ,_ebg ,_bgb :=_ead .extractPageText (_ead ._bcf ,_ead ._ab ,_bc .IdentityMatrix (),0);if _bgb !=nil &&_bgb !=_ce .ErrColorOutOfRange {return nil ,0,0,_bgb ;};if _ead ._fgb !=nil {_gfef ._ccg ._fcgfa =_ead ._fgb .UseSimplerExtractionProcess ;
};_gfef .computeViews ();_bgb =_cbda (_gfef );if _bgb !=nil {return nil ,0,0,_bgb ;};if _ead ._fgb !=nil {if _ead ._fgb .ApplyCropBox &&_ead ._ed !=nil {_gfef .ApplyArea (*_ead ._ed );};_gfef ._ccg ._efab =_ead ._fgb .DisableDocumentTags ;};return _gfef ,_adc ,_ebg ,nil ;
};func (_ceda *textObject )getFont (_bce string )(*_ce .PdfFont ,error ){if _ceda ._bgcb ._cd !=nil {_bacb ,_bffc :=_ceda .getFontDict (_bce );if _bffc !=nil {_ec .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073",_bce ,_bffc .Error ());
return nil ,_bffc ;};_ceda ._bgcb ._bg ++;_ffa ,_caac :=_ceda ._bgcb ._cd [_bacb .String ()];if _caac {_ffa ._addb =_ceda ._bgcb ._bg ;return _ffa ._ddaa ,nil ;};};_ace ,_afea :=_ceda .getFontDict (_bce );if _afea !=nil {return nil ,_afea ;};_caab ,_afea :=_ceda .getFontDirect (_bce );
if _afea !=nil {return nil ,_afea ;};if _ceda ._bgcb ._cd !=nil {_bfgc :=fontEntry {_caab ,_ceda ._bgcb ._bg };if len (_ceda ._bgcb ._cd )>=_gdgb {var _fgcf []string ;for _fafb :=range _ceda ._bgcb ._cd {_fgcf =append (_fgcf ,_fafb );};_c .Slice (_fgcf ,func (_cdgd ,_gffb int )bool {return _ceda ._bgcb ._cd [_fgcf [_cdgd ]]._addb < _ceda ._bgcb ._cd [_fgcf [_gffb ]]._addb ;
});delete (_ceda ._bgcb ._cd ,_fgcf [0]);};_ceda ._bgcb ._cd [_ace .String ()]=_bfgc ;};return _caab ,nil ;};func (_bgbaa rulingList )merge ()*ruling {_gdfdb :=_bgbaa [0]._abbgc ;_bbbb :=_bgbaa [0]._cebe ;_bfba :=_bgbaa [0]._deee ;for _ ,_gbefc :=range _bgbaa [1:]{_gdfdb +=_gbefc ._abbgc ;
if _gbefc ._cebe < _bbbb {_bbbb =_gbefc ._cebe ;};if _gbefc ._deee > _bfba {_bfba =_gbefc ._deee ;};};_dggdg :=&ruling {_bfbc :_bgbaa [0]._bfbc ,_bgaa :_bgbaa [0]._bgaa ,Color :_bgbaa [0].Color ,_abbgc :_gdfdb /float64 (len (_bgbaa )),_cebe :_bbbb ,_deee :_bfba };
if _fcfe {_ec .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_bgbaa ),_dggdg );for _fcad ,_geegc :=range _bgbaa {_ge .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fcad ,_geegc );
};};return _dggdg ;};func (_acca *textPara )taken ()bool {return _acca ==nil ||_acca ._bfcd };func (_fdfe rulingList )splitSec ()[]rulingList {_c .Slice (_fdfe ,func (_geffd ,_bgca int )bool {_egdg ,_bbge :=_fdfe [_geffd ],_fdfe [_bgca ];if _egdg ._cebe !=_bbge ._cebe {return _egdg ._cebe < _bbge ._cebe ;
};return _egdg ._deee < _bbge ._deee ;});_dffeb :=make (map[*ruling ]struct{},len (_fdfe ));_gabed :=func (_fbcfb *ruling )rulingList {_fgbbd :=rulingList {_fbcfb };_dffeb [_fbcfb ]=struct{}{};for _ ,_decc :=range _fdfe {if _ ,_debe :=_dffeb [_decc ];_debe {continue ;
};for _ ,_afgaa :=range _fgbbd {if _decc .alignsSec (_afgaa ){_fgbbd =append (_fgbbd ,_decc );_dffeb [_decc ]=struct{}{};break ;};};};return _fgbbd ;};_fdgff :=[]rulingList {_gabed (_fdfe [0])};for _ ,_feede :=range _fdfe [1:]{if _ ,_gead :=_dffeb [_feede ];
_gead {continue ;};_fdgff =append (_fdgff ,_gabed (_feede ));};return _fdgff ;};func (_agbd *ruling )alignsSec (_cgdae *ruling )bool {const _ecce =_gcef +1.0;return _agbd ._cebe -_ecce <=_cgdae ._deee &&_cgdae ._cebe -_ecce <=_agbd ._deee ;};func (_dfge *wordBag )removeWord (_cdaeb *textWord ,_effe int ){_eeac :=_dfge ._faba [_effe ];
_eeac =_beff (_eeac ,_cdaeb );if len (_eeac )==0{delete (_dfge ._faba ,_effe );}else {_dfge ._faba [_effe ]=_eeac ;};};func (_fbgg *textTable )reduceTiling (_aadf gridTiling ,_egabb float64 )*textTable {_cbcg :=make ([]int ,0,_fbgg ._dcfg );_cdgee :=make ([]int ,0,_fbgg ._ecbf );
_dadfb :=_aadf ._cgecb ;_gbed :=_aadf ._agbb ;for _ecfg :=0;_ecfg < _fbgg ._dcfg ;_ecfg ++{_dgbbf :=_ecfg > 0&&_aa .Abs (_gbed [_ecfg -1]-_gbed [_ecfg ])< _egabb &&_fbgg .emptyCompositeRow (_ecfg );if !_dgbbf {_cbcg =append (_cbcg ,_ecfg );};};for _fcdfa :=0;
_fcdfa < _fbgg ._ecbf ;_fcdfa ++{_bace :=_fcdfa < _fbgg ._ecbf -1&&_aa .Abs (_dadfb [_fcdfa +1]-_dadfb [_fcdfa ])< _egabb &&_fbgg .emptyCompositeColumn (_fcdfa );if !_bace {_cdgee =append (_cdgee ,_fcdfa );};};if len (_cbcg )==_fbgg ._dcfg &&len (_cdgee )==_fbgg ._ecbf {return _fbgg ;
};_fefeb :=textTable {_beaeg :_fbgg ._beaeg ,_ecbf :len (_cdgee ),_dcfg :len (_cbcg ),_egfe :make (map[uint64 ]compositeCell ,len (_cdgee )*len (_cbcg ))};if _afcg {_ec .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_fbgg ._ecbf ,_fbgg ._dcfg ,len (_cdgee ),len (_cbcg ));
_ec .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_cdgee );_ec .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_cbcg );};for _egeca ,_aabgd :=range _cbcg {for _gceaa ,_ddgfb :=range _cdgee {_aafdd ,_fbcfa :=_fbgg .getComposite (_ddgfb ,_aabgd );
if len (_aafdd )==0{continue ;};if _afcg {_ge .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_gceaa ,_egeca ,_ddgfb ,_aabgd ,_adagc (_aafdd .merge ().text (),50));};_fefeb .putComposite (_gceaa ,_egeca ,_aafdd ,_fbcfa );
};};return &_fefeb ;};func (_dfcfg *wordBag )text ()string {_ccbfg :=_dfcfg .allWords ();_fbg :=make ([]string ,len (_ccbfg ));for _fcfg ,_fcge :=range _ccbfg {_fbg [_fcfg ]=_fcge ._bbdb ;};return _gd .Join (_fbg ,"\u0020");};func _abdg (_dced *wordBag ,_cdfcc *textWord ,_gbcaa float64 )bool {return _cdfcc .Llx < _dced .Urx +_gbcaa &&_dced .Llx -_gbcaa < _cdfcc .Urx ;
};
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
func (_daea *TextMarkArray )BBox ()(_ce .PdfRectangle ,bool ){var _afgc _ce .PdfRectangle ;_gabb :=false ;for _ ,_ccfd :=range _daea ._dec {if _ccfd .Meta ||_ecga (_ccfd .Text ){continue ;};if _gabb {_afgc =_cdggc (_afgc ,_ccfd .BBox );}else {_afgc =_ccfd .BBox ;
_gabb =true ;};};return _afgc ,_gabb ;};func _ccaa (_bafg *list ,_ddcf *_gd .Builder ,_babg *string ){_fbde :=_eabf (_bafg ,_babg );_ddcf .WriteString (_fbde );for _ ,_edbag :=range _bafg ._gbab {_gccg :=*_babg +"\u0020\u0020\u0020";_ccaa (_edbag ,_ddcf ,&_gccg );
};};func _ecga (_bdadd string )bool {for _ ,_bgdfg :=range _bdadd {if !_de .IsSpace (_bgdfg ){return false ;};};return true ;};func (_gfbb *textLine )bbox ()_ce .PdfRectangle {return _gfbb .PdfRectangle };func _gdfab (_bfcg []structElement ,_ebagg map[int ][]*textLine ,_bgbce _gb .PdfObject )[]*list {_geggf :=[]*list {};
for _ ,_afga :=range _bfcg {_ggab :=_afga ._cagb ;_eafc :=int (_afga ._efcc );_eagcd :=_afga ._bacdb ;_eeadd :=[]*textLine {};_aegc :=[]*list {};_ffccc :=_afga ._geaa ;_gbdd ,_daeab :=(_ffccc .(*_gb .PdfObjectReference ));if !_daeab {_ec .Log .Debug ("\u0066\u0061\u0069l\u0065\u0064\u0020\u006f\u0074\u0020\u0063\u0061\u0073\u0074\u0020\u0074\u006f\u0020\u002a\u0063\u006f\u0072\u0065\u002e\u0050\u0064\u0066\u004f\u0062\u006a\u0065\u0063\u0074R\u0065\u0066\u0065\u0072\u0065\u006e\u0063\u0065");
};if _eafc !=-1&&_gbdd !=nil {if _cbgg ,_gcbcg :=_ebagg [_eafc ];_gcbcg {if _eaca ,_fcfb :=_bgbce .(*_gb .PdfIndirectObject );_fcfb {_dbad :=_eaca .PdfObjectReference ;if _gc .DeepEqual (*_gbdd ,_dbad ){_eeadd =_cbgg ;};};};};if _ggab !=nil {_aegc =_gdfab (_ggab ,_ebagg ,_bgbce );
};_gcag :=_baba (_eeadd ,_eagcd ,_aegc );_geggf =append (_geggf ,_gcag );};return _geggf ;};
// Text gets the extracted text contained in `l`.
func (_ccdc *list )Text ()string {_cfee :=&_gd .Builder {};_ggee :="";_ccaa (_ccdc ,_cfee ,&_ggee );return _cfee .String ();};type event struct{_gcaf float64 ;_cgcbd bool ;_bffcfb int ;};
// String returns a description of `p`.
func (_bcbd *textPara )String ()string {if _bcbd ._bedf {return _ge .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_bcbd .PdfRectangle );};_cedd :="";if _bcbd ._bddea !=nil {_cedd =_ge .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_bcbd ._bddea ._ecbf ,_bcbd ._bddea ._dcfg );
};return _ge .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_bcbd .PdfRectangle ,_cedd ,len (_bcbd ._bdbcg ),_adagc (_bcbd .text (),50));};func (_dcag *textMark )bbox ()_ce .PdfRectangle {return _dcag .PdfRectangle };
// GetContentStreamOps returns the contentStreamOps field of `pt`.
func (_bdab *PageText )GetContentStreamOps ()*_ag .ContentStreamOperations {return _bdab ._ecfe };type stateStack []*textState ;func _dgfb (_dgda _bc .Point )_bc .Matrix {return _bc .TranslationMatrix (_dgda .X ,_dgda .Y )};var _efe =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_fg .White ,StrokeColor :_fg .White };
func (_eeaa gridTile )contains (_ecef _ce .PdfRectangle )bool {if _eeaa .numBorders ()< 3{return false ;};if _eeaa ._ebdg &&_ecef .Llx < _eeaa .Llx -_fdgb {return false ;};if _eeaa ._ebga &&_ecef .Urx > _eeaa .Urx +_fdgb {return false ;};if _eeaa ._ddbaf &&_ecef .Lly < _eeaa .Lly -_fdgb {return false ;
};if _eeaa ._fgde &&_ecef .Ury > _eeaa .Ury +_fdgb {return false ;};return true ;};func (_bcbdg *wordBag )removeDuplicates (){if _fbfc {_ec .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_bcbdg .text ());
};for _ ,_eagcb :=range _bcbdg .depthIndexes (){if len (_bcbdg ._faba [_eagcb ])==0{continue ;};_bgfcfe :=_bcbdg ._faba [_eagcb ][0];_dccdd :=_bada *_bgfcfe ._ddgee ;_ecfd :=_bgfcfe ._cffdg ;for _ ,_aaecb :=range _bcbdg .depthBand (_ecfd ,_ecfd +_dccdd ){_cddde :=map[*textWord ]struct{}{};
_cgbee :=_bcbdg ._faba [_aaecb ];for _ ,_aacd :=range _cgbee {if _ ,_gcae :=_cddde [_aacd ];_gcae {continue ;};for _ ,_daeac :=range _cgbee {if _ ,_fbedag :=_cddde [_daeac ];_fbedag {continue ;};if _daeac !=_aacd &&_daeac ._bbdb ==_aacd ._bbdb &&_aa .Abs (_daeac .Llx -_aacd .Llx )< _dccdd &&_aa .Abs (_daeac .Urx -_aacd .Urx )< _dccdd &&_aa .Abs (_daeac .Lly -_aacd .Lly )< _dccdd &&_aa .Abs (_daeac .Ury -_aacd .Ury )< _dccdd {_cddde [_daeac ]=struct{}{};
};};};if len (_cddde )> 0{_accfe :=0;for _ ,_gdbbg :=range _cgbee {if _ ,_gdge :=_cddde [_gdbbg ];!_gdge {_cgbee [_accfe ]=_gdbbg ;_accfe ++;};};_bcbdg ._faba [_aaecb ]=_cgbee [:len (_cgbee )-len (_cddde )];if len (_bcbdg ._faba [_aaecb ])==0{delete (_bcbdg ._faba ,_aaecb );
};};};};};func (_ddge *textObject )moveLP (_efgd ,_badcf float64 ){_ddge ._ecg .Concat (_bc .NewMatrix (1,0,0,1,_efgd ,_badcf ));_ddge ._eee =_ddge ._ecg ;};
// String returns a description of `l`.
func (_gdcef *textLine )String ()string {return _ge .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_gdcef ._bcdg ,_gdcef .PdfRectangle ,_gdcef ._ecag ,_gdcef .text ());
};func (_dfee compositeCell )split (_cbd ,_ggffa []float64 )*textTable {_fcgc :=len (_cbd )+1;_bffcg :=len (_ggffa )+1;if _afcg {_ec .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066",_bffcg ,_fcgc ,_dfee ,_cbd ,_ggffa );
_ge .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a",len (_dfee .paraList ));for _deff ,_beage :=range _dfee .paraList {_ge .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_deff ,_beage .String ());};
_ge .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",len (_dfee .lines ()));for _egac ,_ddcd :=range _dfee .lines (){_ge .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_egac ,_ddcd );};};_cbd =_ggba (_cbd ,_dfee .Ury ,_dfee .Lly );
_ggffa =_ggba (_ggffa ,_dfee .Llx ,_dfee .Urx );_gbee :=make (map[uint64 ]*textPara ,_bffcg *_fcgc );_dfeb :=textTable {_ecbf :_bffcg ,_dcfg :_fcgc ,_gcbga :_gbee };_ddbb :=_dfee .paraList ;_c .Slice (_ddbb ,func (_cfdb ,_fafae int )bool {_dggg ,_adcga :=_ddbb [_cfdb ],_ddbb [_fafae ];
_ecgd ,_cgbdg :=_dggg .Lly ,_adcga .Lly ;if _ecgd !=_cgbdg {return _ecgd < _cgbdg ;};return _dggg .Llx < _adcga .Llx ;});_abbc :=make (map[uint64 ]_ce .PdfRectangle ,_bffcg *_fcgc );for _efaa ,_cccd :=range _cbd [1:]{_ccfgc :=_cbd [_efaa ];for _gagg ,_ffdd :=range _ggffa [1:]{_ggef :=_ggffa [_gagg ];
_abbc [_aaca (_gagg ,_efaa )]=_ce .PdfRectangle {Llx :_ggef ,Urx :_ffdd ,Lly :_cccd ,Ury :_ccfgc };};};if _afcg {_ec .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073");
_ge .Printf ("\u0020\u0020\u0020\u0020");for _fgbb :=0;_fgbb < _bffcg ;_fgbb ++{_ge .Printf ("\u0025\u0033\u0030\u0064\u002c\u0020",_fgbb );};_ge .Println ();for _cdef :=0;_cdef < _fcgc ;_cdef ++{_ge .Printf ("\u0020\u0020\u0025\u0032\u0064\u003a",_cdef );
for _ebbe :=0;_ebbe < _bffcg ;_ebbe ++{_ge .Printf ("\u00256\u002e\u0032\u0066\u002c\u0020",_abbc [_aaca (_ebbe ,_cdef )]);};_ge .Println ();};};_dbf :=func (_aecfd *textLine )(int ,int ){for _ddgc :=0;_ddgc < _fcgc ;_ddgc ++{for _abdcd :=0;_abdcd < _bffcg ;
_abdcd ++{if _fafg (_abbc [_aaca (_abdcd ,_ddgc )],_aecfd .PdfRectangle ){return _abdcd ,_ddgc ;};};};return -1,-1;};_eadg :=make (map[uint64 ][]*textLine ,_bffcg *_fcgc );for _ ,_adfa :=range _ddbb .lines (){_cdgab ,_ccde :=_dbf (_adfa );if _cdgab < 0{continue ;
};_eadg [_aaca (_cdgab ,_ccde )]=append (_eadg [_aaca (_cdgab ,_ccde )],_adfa );};for _dgfg :=0;_dgfg < len (_cbd )-1;_dgfg ++{_gffa :=_cbd [_dgfg ];_adgg :=_cbd [_dgfg +1];for _fggc :=0;_fggc < len (_ggffa )-1;_fggc ++{_ebcc :=_ggffa [_fggc ];_dbdb :=_ggffa [_fggc +1];
_eggc :=_ce .PdfRectangle {Llx :_ebcc ,Urx :_dbdb ,Lly :_adgg ,Ury :_gffa };_afaa :=_eadg [_aaca (_fggc ,_dgfg )];if len (_afaa )==0{continue ;};_eaded :=_ffec (_eggc ,_afaa );_dfeb .put (_fggc ,_dgfg ,_eaded );};};return &_dfeb ;};func _fbda (_aeeg []_gb .PdfObject )(_ffgeg ,_fcbga float64 ,_acgec error ){if len (_aeeg )!=2{return 0,0,_ge .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_aeeg ));
};_fdecb ,_acgec :=_gb .GetNumbersAsFloat (_aeeg );if _acgec !=nil {return 0,0,_acgec ;};return _fdecb [0],_fdecb [1],nil ;};type subpath struct{_gfefe []_bc .Point ;_cgde bool ;};func (_bedc *textTable )newTablePara ()*textPara {_fbfeg :=_bedc .computeBbox ();
_cadab :=&textPara {PdfRectangle :_fbfeg ,_ebcf :_fbfeg ,_bddea :_bedc };if _afcg {_ec .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_cadab );};return _cadab ;};func (_ebbc *textPara )fontsize ()float64 {return _ebbc ._bdbcg [0]._ecag };
func (_adfe *stateStack )top ()*textState {if _adfe .empty (){return nil ;};return (*_adfe )[_adfe .size ()-1];};func (_cbbf *stateStack )pop ()*textState {if _cbbf .empty (){return nil ;};_aefc :=*(*_cbbf )[len (*_cbbf )-1];*_cbbf =(*_cbbf )[:len (*_cbbf )-1];
return &_aefc ;};func _beff (_cafb []*textWord ,_fefbf *textWord )[]*textWord {for _eadcd ,_gdaga :=range _cafb {if _gdaga ==_fefbf {return _ddbf (_cafb ,_eadcd );};};_ec .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_fefbf );
return nil ;};const _adee =20;func (_dcee *wordBag )empty (_ggaf int )bool {_ ,_cdgb :=_dcee ._faba [_ggaf ];return !_cdgb };type rulingList []*ruling ;func (_decfg paraList )applyTables (_gece []*textTable )paraList {var _ebfff paraList ;for _ ,_bcgcf :=range _gece {_ebfff =append (_ebfff ,_bcgcf .newTablePara ());
};for _ ,_bede :=range _decfg {if _bede ._bfcd {continue ;};_ebfff =append (_ebfff ,_bede );};return _ebfff ;};func _fabg (_fggd ,_cbee bounded )float64 {return _fggd .bbox ().Llx -_cbee .bbox ().Llx };func _eaecc (_gcggc ,_dgea _ded .Image )_ded .Image {_bdgee ,_ffdb :=_dgea .Bounds ().Size (),_gcggc .Bounds ().Size ();
_cbgbb ,_gfaec :=_bdgee .X ,_bdgee .Y ;if _ffdb .X > _cbgbb {_cbgbb =_ffdb .X ;};if _ffdb .Y > _gfaec {_gfaec =_ffdb .Y ;};_ffad :=_ded .Rect (0,0,_cbgbb ,_gfaec );if _bdgee .X !=_cbgbb ||_bdgee .Y !=_gfaec {_gcfc :=_ded .NewRGBA (_ffad );_b .BiLinear .Scale (_gcfc ,_ffad ,_gcggc ,_dgea .Bounds (),_b .Over ,nil );
_dgea =_gcfc ;};if _ffdb .X !=_cbgbb ||_ffdb .Y !=_gfaec {_bgfab :=_ded .NewRGBA (_ffad );_b .BiLinear .Scale (_bgfab ,_ffad ,_gcggc ,_gcggc .Bounds (),_b .Over ,nil );_gcggc =_bgfab ;};_dgdf :=_ded .NewRGBA (_ffad );_b .DrawMask (_dgdf ,_ffad ,_gcggc ,_ded .Point {},_dgea ,_ded .Point {},_b .Over );
return _dgdf ;};
// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents (contents string ,resources *_ce .PdfPageResources )(*Extractor ,error ){const _abf ="\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s";_ac :=&Extractor {_bcf :contents ,_ab :resources ,_cd :map[string ]fontEntry {},_fa :map[string ]textResult {}};
_bd .TrackUse (_abf );return _ac ,nil ;};type ruling struct{_bfbc rulingKind ;_bgaa markKind ;_fg .Color ;_abbgc float64 ;_cebe float64 ;_deee float64 ;_bcgdf float64 ;};func (_bgfc *textObject )reset (){_bgfc ._eee =_bc .IdentityMatrix ();_bgfc ._ecg =_bc .IdentityMatrix ();
_bgfc ._cfb =nil ;};func (_cebb paraList )extractTables (_bafge []gridTiling )paraList {if _afcg {_ec .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_cebb ));
};if len (_cebb )< _ddd {return _cebb ;};_deeee :=_cebb .findTables (_bafge );if _afcg {_ec .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_deeee ));
for _dbeba ,_dcbfg :=range _deeee {_dcbfg .log (_ge .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_dbeba ));};};return _cebb .applyTables (_deeee );};func _edege (_eccbb map[float64 ]map[float64 ]gridTile )[]float64 {_fege :=make ([]float64 ,0,len (_eccbb ));
_gaagc :=make (map[float64 ]struct{},len (_eccbb ));for _ ,_dffda :=range _eccbb {for _aceb :=range _dffda {if _ ,_ggabg :=_gaagc [_aceb ];_ggabg {continue ;};_fege =append (_fege ,_aceb );_gaagc [_aceb ]=struct{}{};};};_c .Float64s (_fege );return _fege ;
};func _dcbd (_egaga ,_fagcg _bc .Point )bool {return _egaga .X ==_fagcg .X &&_egaga .Y ==_fagcg .Y };func _cad (_adef ,_dcgg _ce .PdfRectangle )(_ce .PdfRectangle ,bool ){if !_decgb (_adef ,_dcgg ){return _ce .PdfRectangle {},false ;};return _ce .PdfRectangle {Llx :_aa .Max (_adef .Llx ,_dcgg .Llx ),Urx :_aa .Min (_adef .Urx ,_dcgg .Urx ),Lly :_aa .Max (_adef .Lly ,_dcgg .Lly ),Ury :_aa .Min (_adef .Ury ,_dcgg .Ury )},true ;
};func _fdg (_gfaf ,_bdbb _ce .PdfRectangle )bool {return _gfaf .Lly <=_bdbb .Ury &&_bdbb .Lly <=_gfaf .Ury ;};func (_fcea *textTable )getComposite (_cgcba ,_cdcbg int )(paraList ,_ce .PdfRectangle ){_gggd ,_cedf :=_fcea ._egfe [_aaca (_cgcba ,_cdcbg )];
if _afcg {_ge .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a",_cgcba ,_cdcbg ,_gggd .String ());};if !_cedf {return nil ,_ce .PdfRectangle {};
};return _gggd .parasBBox ();};func (_bcaa *textTable )markCells (){for _bcae :=0;_bcae < _bcaa ._dcfg ;_bcae ++{for _badfcb :=0;_badfcb < _bcaa ._ecbf ;_badfcb ++{_aece :=_bcaa .get (_badfcb ,_bcae );if _aece !=nil {_aece ._bfcd =true ;};};};};func (_gcea paraList )llyOrdering ()[]int {_debf :=make ([]int ,len (_gcea ));
for _bfaec :=range _gcea {_debf [_bfaec ]=_bfaec ;};_c .SliceStable (_debf ,func (_ddaf ,_beag int )bool {_bcbge ,_ceaeg :=_debf [_ddaf ],_debf [_beag ];return _gcea [_bcbge ].Lly < _gcea [_ceaeg ].Lly ;});return _debf ;};func (_ebcfd *textTable )depth ()float64 {_cfgef :=1e10;
for _dcgaa :=0;_dcgaa < _ebcfd ._ecbf ;_dcgaa ++{_cdcef :=_ebcfd .get (_dcgaa ,0);if _cdcef ==nil ||_cdcef ._bedf {continue ;};_cfgef =_aa .Min (_cfgef ,_cdcef .depth ());};return _cfgef ;};func _afaae (_cecfe map[int ]intSet )[]int {_febbf :=make ([]int ,0,len (_cecfe ));
for _fbfcb :=range _cecfe {_febbf =append (_febbf ,_fbfcb );};_c .Ints (_febbf );return _febbf ;};func _bbea (_gcbgg *textWord ,_dbeb float64 ,_cagf ,_bbeg rulingList )*wordBag {_aagd :=_fece (_gcbgg ._cffdg );_gdbfb :=[]*textWord {_gcbgg };_feee :=wordBag {_faba :map[int ][]*textWord {_aagd :_gdbfb },PdfRectangle :_gcbgg .PdfRectangle ,_egfa :_gcbgg ._ddgee ,_cfg :_dbeb ,_eeca :_cagf ,_gbef :_bbeg };
return &_feee ;};func (_ccdge *structElement )parseStructElement (_eead _gb .PdfObject ){_fdad ,_fafe :=_gb .GetDict (_eead );if !_fafe {_ec .Log .Debug ("\u0070\u0061\u0072\u0073\u0065\u0053\u0074\u0072u\u0063\u0074\u0045le\u006d\u0065\u006e\u0074\u003a\u0020d\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006f\u0062\u006a\u0065\u0063t\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075n\u0064\u002e");
return ;};_aecde :=_fdad .Get ("\u0053");_gedc :=_fdad .Get ("\u0050\u0067");_dfbc :="";if _aecde !=nil {_dfbc =_aecde .String ();};_gbda :=_fdad .Get ("\u004b");_ccdge ._bacdb =_dfbc ;_ccdge ._geaa =_gedc ;switch _dedb :=_gbda .(type ){case *_gb .PdfObjectInteger :_ccdge ._bacdb =_dfbc ;
_ccdge ._efcc =int64 (*_dedb );_ccdge ._geaa =_gedc ;case *_gb .PdfObjectReference :_aebf :=*_gb .MakeArray (_dedb );var _gdcc int64 =-1;_ccdge ._efcc =_gdcc ;if _aebf .Len ()==1{_abe :=_aebf .Elements ()[0];_acec ,_ceega :=_abe .(*_gb .PdfObjectInteger );
if _ceega {_gdcc =int64 (*_acec );_ccdge ._efcc =_gdcc ;_ccdge ._bacdb =_dfbc ;_ccdge ._geaa =_gedc ;return ;};};_aegd :=[]structElement {};for _ ,_fgaa :=range _aebf .Elements (){_fdee ,_fdaaf :=_fgaa .(*_gb .PdfObjectInteger );if _fdaaf {_gdcc =int64 (*_fdee );
_ccdge ._efcc =_gdcc ;_ccdge ._bacdb =_dfbc ;}else {_dddg :=&structElement {};_dddg .parseStructElement (_fgaa );_aegd =append (_aegd ,*_dddg );};_gdcc =-1;};_ccdge ._cagb =_aegd ;case *_gb .PdfObjectArray :_gdcab :=_gbda .(*_gb .PdfObjectArray );var _baef int64 =-1;
_ccdge ._efcc =_baef ;if _gdcab .Len ()==1{_acdd :=_gdcab .Elements ()[0];_afac ,_gbbgd :=_acdd .(*_gb .PdfObjectInteger );if _gbbgd {_baef =int64 (*_afac );_ccdge ._efcc =_baef ;_ccdge ._bacdb =_dfbc ;_ccdge ._geaa =_gedc ;return ;};};_dcfc :=[]structElement {};
for _ ,_gbcb :=range _gdcab .Elements (){_effg ,_caaa :=_gbcb .(*_gb .PdfObjectInteger );if _caaa {_baef =int64 (*_effg );_ccdge ._efcc =_baef ;_ccdge ._bacdb =_dfbc ;_ccdge ._geaa =_gedc ;}else {_fbdd :=&structElement {};_fbdd .parseStructElement (_gbcb );
_dcfc =append (_dcfc ,*_fbdd );};_baef =-1;};_ccdge ._cagb =_dcfc ;};};func (_cdgbf *structTreeRoot )buildList (_cbfb map[int ][]*textLine ,_gcge _gb .PdfObject )[]*list {if _cdgbf ==nil {_ec .Log .Debug ("\u0062\u0075\u0069\u006c\u0064\u004c\u0069\u0073\u0074\u003a\u0020t\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0069\u0073 \u006e\u0069\u006c");
return nil ;};var _eabc *structElement ;_dadc :=[]structElement {};if len (_cdgbf ._dgbb )==1{_efae :=_cdgbf ._dgbb [0]._bacdb ;if _efae =="\u0044\u006f\u0063\u0075\u006d\u0065\u006e\u0074"||_efae =="\u0053\u0065\u0063\u0074"||_efae =="\u0050\u0061\u0072\u0074"||_efae =="\u0044\u0069\u0076"||_efae =="\u0041\u0072\u0074"{_eabc =&_cdgbf ._dgbb [0];
};}else {_eabc =&structElement {_cagb :_cdgbf ._dgbb ,_bacdb :_cdgbf ._fdec };};if _eabc ==nil {_ec .Log .Debug ("\u0062\u0075\u0069\u006cd\u004c\u0069\u0073\u0074\u003a\u0020\u0074\u006f\u0070\u0045l\u0065m\u0065\u006e\u0074\u0020\u0069\u0073\u0020n\u0069\u006c");
return nil ;};for _ ,_aead :=range _eabc ._cagb {if _aead ._bacdb =="\u004c"{_dadc =append (_dadc ,_aead );}else if _aead ._bacdb =="\u0054\u0061\u0062l\u0065"{_dcegf :=_cgbe (_aead );_dadc =append (_dadc ,_dcegf ...);};};_adgfc :=_gdfab (_dadc ,_cbfb ,_gcge );
var _ebebe []*list ;for _ ,_eegf :=range _adgfc {_bfcc :=_beecb (_eegf );_ebebe =append (_ebebe ,_bfcc ...);};return _ebebe ;};func (_adeea TextTable )getCellInfo (_gcaa TextMark )[][]int {for _eadbb ,_ebec :=range _adeea .Cells {for _edb ,_agca :=range _ebec {_deg :=&_agca .Marks ;
if _deg .exists (_gcaa ){return [][]int {{_eadbb },{_edb }};};};};return nil ;};func (_bgba paraList )lines ()[]*textLine {var _dafgg []*textLine ;for _ ,_abffg :=range _bgba {_dafgg =append (_dafgg ,_abffg ._bdbcg ...);};return _dafgg ;};func (_bcef *shapesState )closePath (){if _bcef ._edee {_bcef ._abgb =append (_bcef ._abgb ,_fae (_bcef ._ebfb ));
_bcef ._edee =false ;}else if len (_bcef ._abgb )==0{if _cbag {_ec .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");};_bcef ._edee =false ;return ;};_bcef ._abgb [len (_bcef ._abgb )-1].close ();
if _cbag {_ec .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_bcef );};};func (_dcge *stateStack )push (_adb *textState ){_ffbg :=*_adb ;*_dcge =append (*_dcge ,&_ffbg )};func _bddf (_efgea []*wordBag )[]*wordBag {if len (_efgea )<=1{return _efgea ;
};if _egd {_ec .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");};_c .Slice (_efgea ,func (_ebda ,_eebb int )bool {_bcade ,_bfdf :=_efgea [_ebda ],_efgea [_eebb ];_aacc :=_bcade .Width ()*_bcade .Height ();
_bcdcf :=_bfdf .Width ()*_bfdf .Height ();if _aacc !=_bcdcf {return _aacc > _bcdcf ;};if _bcade .Height ()!=_bfdf .Height (){return _bcade .Height ()> _bfdf .Height ();};return _ebda < _eebb ;});var _bgfb []*wordBag ;_cefad :=make (intSet );for _cabd :=0;
_cabd < len (_efgea );_cabd ++{if _cefad .has (_cabd ){continue ;};_bfdc :=_efgea [_cabd ];for _bfgg :=_cabd +1;_bfgg < len (_efgea );_bfgg ++{if _cefad .has (_cabd ){continue ;};_ffg :=_efgea [_bfgg ];_bcgd :=_bfdc .PdfRectangle ;_bcgd .Llx -=_bfdc ._egfa ;
if _fafg (_bcgd ,_ffg .PdfRectangle ){_bfdc .absorb (_ffg );_cefad .add (_bfgg );};};_bgfb =append (_bgfb ,_bfdc );};if len (_efgea )!=len (_bgfb )+len (_cefad ){_ec .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_efgea ),len (_bgfb ),len (_cefad ));
};return _bgfb ;};func (_cceb *textPara )getListLines ()[]*textLine {var _gegb []*textLine ;_cefg :=_eccb (_cceb ._bdbcg );for _ ,_cbeb :=range _cceb ._bdbcg {_gcba :=_cbeb ._aebc [0]._bbdb [0];if _fdfa (_gcba ){_gegb =append (_gegb ,_cbeb );};};_gegb =append (_gegb ,_cefg ...);
return _gegb ;};func (_abdd *PageText )computeViews (){_gefc :=_abdd .getParagraphs ();_ceeec :=new (_df .Buffer );_gefc .writeText (_ceeec );_abdd ._gcee =_ceeec .String ();_abdd ._ebfc =_gefc .toTextMarks ();_abdd ._eadb =_gefc .tables ();if _afcg {_ec .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_abdd ._eadb ));
};};func _bdged (_fcaed []pathSection ){if _dgeec < 0.0{return ;};if _gfgc {_ec .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_fcaed ));
};for _cgdbd ,_cacg :=range _fcaed {for _bdcb ,_befab :=range _cacg ._fbfe {for _agfde ,_efee :=range _befab ._gfefe {_befab ._gfefe [_agfde ]=_bc .Point {X :_eacf (_efee .X ),Y :_eacf (_efee .Y )};if _gfgc {_ddgfa :=_befab ._gfefe [_agfde ];if !_dcbd (_efee ,_ddgfa ){_ffgb :=_bc .Point {X :_ddgfa .X -_efee .X ,Y :_ddgfa .Y -_efee .Y };
_ge .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_cgdbd ,_bdcb ,_agfde ,_efee ,_ddgfa ,_ffgb );};};};};};};func (_cedaf *textTable )putComposite (_fgggd ,_egadc int ,_fabfg paraList ,_ggagg _ce .PdfRectangle ){if len (_fabfg )==0{_ec .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073");
return ;};_gbgd :=compositeCell {PdfRectangle :_ggagg ,paraList :_fabfg };if _afcg {_ge .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_fgggd ,_egadc ,_gbgd .String ());
};_gbgd .updateBBox ();_cedaf ._egfe [_aaca (_fgggd ,_egadc )]=_gbgd ;};func (_eeb *shapesState )addPoint (_cgee ,_bcad float64 ){_edgc :=_eeb .establishSubpath ();_cbac :=_eeb .devicePoint (_cgee ,_bcad );if _edgc ==nil {_eeb ._edee =true ;_eeb ._ebfb =_cbac ;
}else {_edgc .add (_cbac );};};
2023-05-29 17:26:33 +00:00
2023-09-07 17:40:17 +00:00
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
2023-12-17 13:54:01 +00:00
type RenderMode int ;type bounded interface{bbox ()_ce .PdfRectangle };type textState struct{_ggf float64 ;_eag float64 ;_gdc float64 ;_dcc float64 ;_ccf float64 ;_bbd RenderMode ;_gegg float64 ;_badfc *_ce .PdfFont ;_dab _ce .PdfRectangle ;_baf int ;_afgb int ;
};func (_dgdd *wordBag )firstWord (_bcdc int )*textWord {return _dgdd ._faba [_bcdc ][0]};func _accf (_ebca *textLine )float64 {return _ebca ._aebc [0].Llx };func _gdgbc (_ccgb ,_feff bounded )float64 {return _ccgb .bbox ().Llx -_feff .bbox ().Urx };
2023-10-07 13:58:01 +00:00
2023-12-17 13:54:01 +00:00
// String returns a description of `state`.
func (_gcbg *textState )String ()string {_bdge :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _gcbg ._badfc !=nil {_bdge =_gcbg ._badfc .BaseFont ();};return _ge .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_gcbg ._ggf ,_gcbg ._eag ,_gcbg ._ccf ,_bdge );
};
2023-10-07 13:58:01 +00:00
2023-12-17 13:54:01 +00:00
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
type PageImages struct{Images []ImageMark ;};
2023-10-07 13:58:01 +00:00
2023-12-17 13:54:01 +00:00
// String returns a human readable description of `s`.
func (_bfed intSet )String ()string {var _affd []int ;for _eafdb :=range _bfed {if _bfed .has (_eafdb ){_affd =append (_affd ,_eafdb );};};_c .Ints (_affd );return _ge .Sprintf ("\u0025\u002b\u0076",_affd );};func (_eeae *textLine )text ()string {var _daf []string ;
for _ ,_gafdb :=range _eeae ._aebc {if _gafdb ._gfffc {_daf =append (_daf ,"\u0020");};_daf =append (_daf ,_gafdb ._bbdb );};return _gd .Join (_daf ,"");};func _efg (_cca []Font ,_db string )bool {for _ ,_gdd :=range _cca {if _gdd .FontName ==_db {return true ;
};};return false ;};func _fgee (_gfdad _ce .PdfRectangle )*ruling {return &ruling {_bfbc :_ecac ,_abbgc :_gfdad .Lly ,_cebe :_gfdad .Llx ,_deee :_gfdad .Urx };};func (_dgdcb *textTable )bbox ()_ce .PdfRectangle {return _dgdcb .PdfRectangle };func (_dcbad rulingList )comp (_aaeca ,_fbgb int )bool {_eccgd ,_feec :=_dcbad [_aaeca ],_dcbad [_fbgb ];
_cecgc ,_fddc :=_eccgd ._bfbc ,_feec ._bfbc ;if _cecgc !=_fddc {return _cecgc > _fddc ;};if _cecgc ==_gdcf {return false ;};_bdadf :=func (_adfc bool )bool {if _cecgc ==_ecac {return _adfc ;};return !_adfc ;};_egaf ,_daee :=_eccgd ._abbgc ,_feec ._abbgc ;
if _egaf !=_daee {return _bdadf (_egaf > _daee );};_egaf ,_daee =_eccgd ._cebe ,_feec ._cebe ;if _egaf !=_daee {return _bdadf (_egaf < _daee );};return _bdadf (_eccgd ._deee < _feec ._deee );};func (_cafda *textWord )absorb (_bbbda *textWord ){_cafda .PdfRectangle =_cdggc (_cafda .PdfRectangle ,_bbbda .PdfRectangle );
_cafda ._bgeaa =append (_cafda ._bgeaa ,_bbbda ._bgeaa ...);};var _dbgc =[]string {"\u2756","\u27a2","\u2713","\u2022","\uf0a7","\u25a1","\u2212","\u25a0","\u25aa","\u006f"};func (_decg *wordBag )absorb (_eceg *wordBag ){_gfbf :=_eceg .makeRemovals ();
for _cabc ,_bfeb :=range _eceg ._faba {for _ ,_dece :=range _bfeb {_decg .pullWord (_dece ,_cabc ,_gfbf );};};_eceg .applyRemovals (_gfbf );};func _adagc (_agaae string ,_agacd int )string {if len (_agaae )< _agacd {return _agaae ;};return _agaae [:_agacd ];
};func (_ddbeb rectRuling )asRuling ()(*ruling ,bool ){_bbda :=ruling {_bfbc :_ddbeb ._dfec ,Color :_ddbeb .Color ,_bgaa :_eecbc };switch _ddbeb ._dfec {case _ebdaf :_bbda ._abbgc =0.5*(_ddbeb .Llx +_ddbeb .Urx );_bbda ._cebe =_ddbeb .Lly ;_bbda ._deee =_ddbeb .Ury ;
_acae ,_beda :=_ddbeb .checkWidth (_ddbeb .Llx ,_ddbeb .Urx );if !_beda {if _feceb {_ec .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_ddbeb );
};return nil ,false ;};_bbda ._bcgdf =_acae ;case _ecac :_bbda ._abbgc =0.5*(_ddbeb .Lly +_ddbeb .Ury );_bbda ._cebe =_ddbeb .Llx ;_bbda ._deee =_ddbeb .Urx ;_eagg ,_abca :=_ddbeb .checkWidth (_ddbeb .Lly ,_ddbeb .Ury );if !_abca {if _feceb {_ec .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_ddbeb );
};return nil ,false ;};_bbda ._bcgdf =_eagg ;default:_ec .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_ddbeb ._dfec );return nil ,false ;};return &_bbda ,true ;};func (_fag *TextMarkArray )exists (_cagc TextMark )bool {for _ ,_dbca :=range _fag .Elements (){if _gc .DeepEqual (_cagc .DirectObject ,_dbca .DirectObject )&&_gc .DeepEqual (_cagc .BBox ,_dbca .BBox )&&_dbca .Text ==_cagc .Text {return true ;
};};return false ;};var _befc string ="\u005e\u005b\u0061\u002d\u007a\u0041\u002dZ\u005d\u0028\u005c)\u007c\u005c\u002e)\u007c\u005e[\u005c\u0064\u005d\u002b\u0028\u005c)\u007c\\.\u0029\u007c\u005e\u005c\u0028\u005b\u0061\u002d\u007a\u0041\u002d\u005a\u005d\u005c\u0029\u007c\u005e\u005c\u0028\u005b\u005c\u0064\u005d\u002b\u005c\u0029";
func (_fffc *textLine )appendWord (_gaag *textWord ){_fffc ._aebc =append (_fffc ._aebc ,_gaag );_fffc .PdfRectangle =_cdggc (_fffc .PdfRectangle ,_gaag .PdfRectangle );if _gaag ._ddgee > _fffc ._ecag {_fffc ._ecag =_gaag ._ddgee ;};if _gaag ._cffdg > _fffc ._bcdg {_fffc ._bcdg =_gaag ._cffdg ;
};};func _fdab (_afdb []pathSection )rulingList {_bdged (_afdb );if _gfgc {_ec .Log .Info ("\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs",len (_afdb ));};var _gbfde rulingList ;
for _ ,_bedged :=range _afdb {for _ ,_aggbf :=range _bedged ._fbfe {if !_aggbf .isQuadrilateral (){if _gfgc {_ec .Log .Error ("!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073",_aggbf );};continue ;};if _ebff ,_gbgc :=_aggbf .makeRectRuling (_bedged .Color );
_gbgc {_gbfde =append (_gbfde ,_ebff );}else {if _feceb {_ec .Log .Error ("\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073",_aggbf );};};};};if _gfgc {_ec .Log .Info ("\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073",_gbfde .String ());
};return _gbfde ;};func _eeacg (_adefa ,_dccb *textPara )bool {if _adefa ._bedf ||_dccb ._bedf {return true ;};return _ffegg (_adefa .depth ()-_dccb .depth ());};func _cbcf (_dbef ,_ggcb bounded )float64 {_cfeb :=_agce (_dbef ,_ggcb );if !_ffegg (_cfeb ){return _cfeb ;
};return _fabg (_dbef ,_ggcb );};
2023-10-07 13:58:01 +00:00
2023-12-17 13:54:01 +00:00
// String returns a string describing `ma`.
func (_cbba TextMarkArray )String ()string {_cgcf :=len (_cbba ._dec );if _cgcf ==0{return "\u0045\u004d\u0050T\u0059";};_egbdd :=_cbba ._dec [0];_cgdac :=_cbba ._dec [_cgcf -1];return _ge .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_cgcf ,_egbdd ,_cgdac );
};func _ddbf (_bbag []*textWord ,_ababe int )[]*textWord {_bgcfc :=len (_bbag );copy (_bbag [_ababe :],_bbag [_ababe +1:]);return _bbag [:_bgcfc -1];};type cachedImage struct{_aca *_ce .Image ;_abb _ce .PdfColorspace ;};func _cgbf (_eecd *wordBag ,_cddg float64 ,_cfebc ,_dbbg rulingList )[]*wordBag {var _abbec []*wordBag ;
for _ ,_faff :=range _eecd .depthIndexes (){_ecdbg :=false ;for !_eecd .empty (_faff ){_bcca :=_eecd .firstReadingIndex (_faff );_aeee :=_eecd .firstWord (_bcca );_fge :=_bbea (_aeee ,_cddg ,_cfebc ,_dbbg );_eecd .removeWord (_aeee ,_bcca );if _becc {_ec .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_aeee .String ());
};for _gddc :=true ;_gddc ;_gddc =_ecdbg {_ecdbg =false ;_dfacb :=_gceeb *_fge ._egfa ;_fdeeb :=_ccab *_fge ._egfa ;_bgbed :=_cfebd *_fge ._egfa ;if _becc {_ec .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_fge .minDepth (),_fge .maxDepth (),_bgbed ,_fdeeb );
};if _eecd .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_fge ,_ffef (_abdg ,0),_fge .minDepth ()-_bgbed ,_fge .maxDepth ()+_bgbed ,_bdgd ,false ,false )> 0{_ecdbg =true ;};if _eecd .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_fge ,_ffef (_abdg ,_fdeeb ),_fge .minDepth (),_fge .maxDepth (),_gfff ,false ,false )> 0{_ecdbg =true ;
};if _ecdbg {continue ;};_eaea :=_eecd .scanBand ("",_fge ,_ffef (_cedg ,_dfacb ),_fge .minDepth (),_fge .maxDepth (),_baae ,true ,false );if _eaea > 0{_aedd :=(_fge .maxDepth ()-_fge .minDepth ())/_fge ._egfa ;if (_eaea > 1&&float64 (_eaea )> 0.3*_aedd )||_eaea <=10{if _eecd .scanBand ("\u006f\u0074\u0068e\u0072",_fge ,_ffef (_cedg ,_dfacb ),_fge .minDepth (),_fge .maxDepth (),_baae ,false ,true )> 0{_ecdbg =true ;
};};};};_abbec =append (_abbec ,_fge );};};return _abbec ;};func _ffec (_afcgbe _ce .PdfRectangle ,_ddea []*textLine )*textPara {return &textPara {PdfRectangle :_afcgbe ,_bdbcg :_ddea };};func (_edbc *textWord )appendMark (_edaf *textMark ,_aecac _ce .PdfRectangle ){_edbc ._bgeaa =append (_edbc ._bgeaa ,_edaf );
_edbc .PdfRectangle =_cdggc (_edbc .PdfRectangle ,_edaf .PdfRectangle );if _edaf ._bfaca > _edbc ._ddgee {_edbc ._ddgee =_edaf ._bfaca ;};_edbc ._cffdg =_aecac .Ury -_edbc .PdfRectangle .Lly ;};func (_gfa *subpath )clear (){*_gfa =subpath {}};func _fdfa (_adgb byte )bool {for _ ,_abddd :=range _dbgc {if []byte (_abddd )[0]==_adgb {return true ;
};};return false ;};func _fecgf (_cbca []*textMark ,_daef _ce .PdfRectangle )[]*textWord {var _gafbf []*textWord ;var _facgf *textWord ;if _cfgg {_ec .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_cbca ));
};_bacba :=func (){if _facgf !=nil {_dgbd :=_facgf .computeText ();if !_ecga (_dgbd ){_facgf ._bbdb =_dgbd ;_gafbf =append (_gafbf ,_facgf );if _cfgg {_ec .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_gafbf )-1,_facgf .String ());
for _egded ,_ccfgcd :=range _facgf ._bgeaa {_ge .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_egded ,_ccfgcd .String ());};};};_facgf =nil ;};};for _ ,_dcbc :=range _cbca {if _efbf &&_facgf !=nil &&len (_facgf ._bgeaa )> 0{_eced :=_facgf ._bgeaa [len (_facgf ._bgeaa )-1];
_gdfc ,_fcca :=_fbcfg (_dcbc ._ecaa );_bcdeb ,_agefe :=_fbcfg (_eced ._ecaa );if _fcca &&!_agefe &&_eced .inDiacriticArea (_dcbc ){_facgf .addDiacritic (_gdfc );continue ;};if _agefe &&!_fcca &&_dcbc .inDiacriticArea (_eced ){_facgf ._bgeaa =_facgf ._bgeaa [:len (_facgf ._bgeaa )-1];
_facgf .appendMark (_dcbc ,_daef );_facgf .addDiacritic (_bcdeb );continue ;};};_agcgb :=_ecga (_dcbc ._ecaa );if _agcgb {_bacba ();continue ;};if _facgf ==nil &&!_agcgb {_facgf =_bddac ([]*textMark {_dcbc },_daef );continue ;};_dbefd :=_facgf ._ddgee ;
_beefe :=_aa .Abs (_dgdc (_daef ,_dcbc )-_facgf ._cffdg )/_dbefd ;_dgga :=_gdgbc (_dcbc ,_facgf )/_dbefd ;if _dgga >=_edd ||!(-_dfac <=_dgga &&_beefe <=_bagb ){_bacba ();_facgf =_bddac ([]*textMark {_dcbc },_daef );continue ;};_facgf .appendMark (_dcbc ,_daef );
};_bacba ();return _gafbf ;};func (_cedb *textTable )computeBbox ()_ce .PdfRectangle {var _edeb _ce .PdfRectangle ;_dbbcf :=false ;for _fegea :=0;_fegea < _cedb ._dcfg ;_fegea ++{for _ccbc :=0;_ccbc < _cedb ._ecbf ;_ccbc ++{_ffeab :=_cedb .get (_ccbc ,_fegea );
if _ffeab ==nil {continue ;};if !_dbbcf {_edeb =_ffeab .PdfRectangle ;_dbbcf =true ;}else {_edeb =_cdggc (_edeb ,_ffeab .PdfRectangle );};};};return _edeb ;};
2023-10-07 13:58:01 +00:00
2023-12-17 13:54:01 +00:00
// String returns a description of `k`.
func (_dgaa markKind )String ()string {_ggbe ,_cdceg :=_fdgf [_dgaa ];if !_cdceg {return _ge .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_dgaa );};return _ggbe ;};func (_ddaea paraList )reorder (_dfbd []int ){_efed :=make (paraList ,len (_ddaea ));
for _gbegb ,_afec :=range _dfbd {_efed [_gbegb ]=_ddaea [_afec ];};copy (_ddaea ,_efed );};type lists []*list ;func (_cgb *textObject )getFontDirect (_ddba string )(*_ce .PdfFont ,error ){_abc ,_deb :=_cgb .getFontDict (_ddba );if _deb !=nil {return nil ,_deb ;
};_caae ,_deb :=_ce .NewPdfFontFromPdfObject (_abc );if _deb !=nil {_ec .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ddba ,_deb );
};return _caae ,_deb ;};func _cdggc (_eaed ,_bcff _ce .PdfRectangle )_ce .PdfRectangle {return _ce .PdfRectangle {Llx :_aa .Min (_eaed .Llx ,_bcff .Llx ),Lly :_aa .Min (_eaed .Lly ,_bcff .Lly ),Urx :_aa .Max (_eaed .Urx ,_bcff .Urx ),Ury :_aa .Max (_eaed .Ury ,_bcff .Ury )};
};type compositeCell struct{_ce .PdfRectangle ;paraList ;};func (_ffccf paraList )sortReadingOrder (){_ec .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_ffccf ));
if len (_ffccf )<=1{return ;};_ffccf .computeEBBoxes ();_c .Slice (_ffccf ,func (_cgdd ,_cfag int )bool {return _cbcf (_ffccf [_cgdd ],_ffccf [_cfag ])<=0});};
// String returns a string describing `pt`.
func (_gacc PageText )String ()string {_bbfc :=_ge .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_gacc ._fcag ));_cdae :=[]string {"\u002d"+_bbfc };for _ ,_acc :=range _gacc ._fcag {_cdae =append (_cdae ,_acc .String ());
};_cdae =append (_cdae ,"\u002b"+_bbfc );return _gd .Join (_cdae ,"\u000a");};func (_fgdc paraList )computeEBBoxes (){if _fad {_ec .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a");};for _ ,_bffe :=range _fgdc {_bffe ._ebcf =_bffe .PdfRectangle ;
};_fbede :=_fgdc .yNeighbours (0);for _gbebc ,_dfcd :=range _fgdc {_dcfb :=_dfcd ._ebcf ;_gaeeg ,_fdefd :=-1.0e9,+1.0e9;for _ ,_ddfce :=range _fbede [_dfcd ]{_dafde :=_fgdc [_ddfce ]._ebcf ;if _dafde .Urx < _dcfb .Llx {_gaeeg =_aa .Max (_gaeeg ,_dafde .Urx );
}else if _dcfb .Urx < _dafde .Llx {_fdefd =_aa .Min (_fdefd ,_dafde .Llx );};};for _cgbea ,_efgad :=range _fgdc {_afef :=_efgad ._ebcf ;if _gbebc ==_cgbea ||_afef .Ury > _dcfb .Lly {continue ;};if _gaeeg <=_afef .Llx &&_afef .Llx < _dcfb .Llx {_dcfb .Llx =_afef .Llx ;
}else if _afef .Urx <=_fdefd &&_dcfb .Urx < _afef .Urx {_dcfb .Urx =_afef .Urx ;};};if _fad {_ge .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_gbebc ,_dfcd ._ebcf ,_dcfb ,_adagc (_dfcd .text (),50));
};_dfcd ._ebcf =_dcfb ;};if _dbde {for _ ,_abbb :=range _fgdc {_abbb .PdfRectangle =_abbb ._ebcf ;};};};func (_gadg *wordBag )allWords ()[]*textWord {var _afgbd []*textWord ;for _ ,_edeg :=range _gadg ._faba {_afgbd =append (_afgbd ,_edeg ...);};return _afgbd ;
};type gridTiling struct{_ce .PdfRectangle ;_cgecb []float64 ;_agbb []float64 ;_bage map[float64 ]map[float64 ]gridTile ;};func (_feab *imageExtractContext )processOperand (_ccb *_ag .ContentStreamOperation ,_cbge _ag .GraphicsState ,_gee *_ce .PdfPageResources )error {if _ccb .Operand =="\u0042\u0049"&&len (_ccb .Params )==1{_dbd ,_gddb :=_ccb .Params [0].(*_ag .ContentStreamInlineImage );
if !_gddb {return nil ;};if _bef ,_feg :=_gb .GetBoolVal (_dbd .ImageMask );_feg {if _bef &&!_feab ._cef .IncludeInlineStencilMasks {return nil ;};};return _feab .extractInlineImage (_dbd ,_cbge ,_gee );}else if _ccb .Operand =="\u0044\u006f"&&len (_ccb .Params )==1{_dfg ,_gbb :=_gb .GetName (_ccb .Params [0]);
if !_gbb {_ec .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _gdf ;};_ ,_gf :=_gee .GetXObjectByName (*_dfg );switch _gf {case _ce .XObjectTypeImage :return _feab .extractXObjectImage (_dfg ,_cbge ,_gee );case _ce .XObjectTypeForm :return _feab .extractFormImages (_dfg ,_cbge ,_gee );
};}else if _feab ._dedf &&(_ccb .Operand =="\u0073\u0063\u006e"||_ccb .Operand =="\u0053\u0043\u004e")&&len (_ccb .Params )==1{_bde ,_gdag :=_gb .GetName (_ccb .Params [0]);if !_gdag {_ec .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");
return _gdf ;};_bcd ,_gdag :=_gee .GetPatternByName (*_bde );if !_gdag {_ec .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0050\u0061\u0074\u0074\u0065\u0072n\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075\u006e\u0064");return nil ;};if _bcd .IsTiling (){_cga :=_bcd .GetAsTilingPattern ();
_caf ,_efge :=_cga .GetContentStream ();if _efge !=nil {return _efge ;};_efge =_feab .extractContentStreamImages (string (_caf ),_cga .Resources );if _efge !=nil {return _efge ;};};}else if (_ccb .Operand =="\u0063\u0073"||_ccb .Operand =="\u0043\u0053")&&len (_ccb .Params )>=1{_feab ._dedf =_ccb .Params [0].String ()=="\u0050a\u0074\u0074\u0065\u0072\u006e";
};return nil ;};func _ccba (_gcgeb []int )[]int {_fcbf :=make ([]int ,len (_gcgeb ));for _dcgb ,_fafge :=range _gcgeb {_fcbf [len (_gcgeb )-1-_dcgb ]=_fafge ;};return _fcbf ;};func (_baad rulingList )snapToGroupsDirection ()rulingList {_baad .sortStrict ();
_cfgae :=make (map[*ruling ]rulingList ,len (_baad ));_febfa :=_baad [0];_fcfec :=func (_aebcg *ruling ){_febfa =_aebcg ;_cfgae [_febfa ]=rulingList {_aebcg }};_fcfec (_baad [0]);for _ ,_bgdde :=range _baad [1:]{if _bgdde ._abbgc < _febfa ._abbgc -_fdac {_ec .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_febfa ,_bgdde );
};if _bgdde ._abbgc > _febfa ._abbgc +_gcef {_fcfec (_bgdde );}else {_cfgae [_febfa ]=append (_cfgae [_febfa ],_bgdde );};};_efaag :=make (map[*ruling ]float64 ,len (_cfgae ));_addbd :=make (map[*ruling ]*ruling ,len (_baad ));for _ffeg ,_aaecbe :=range _cfgae {_efaag [_ffeg ]=_aaecbe .mergePrimary ();
for _ ,_cegae :=range _aaecbe {_addbd [_cegae ]=_ffeg ;};};for _ ,_babc :=range _baad {_babc ._abbgc =_efaag [_addbd [_babc ]];};_bccd :=make (rulingList ,0,len (_baad ));for _ ,_ecba :=range _cfgae {_egdf :=_ecba .splitSec ();for _adca ,_facgc :=range _egdf {_cbde :=_facgc .merge ();
if len (_bccd )> 0{_cgge :=_bccd [len (_bccd )-1];if _cgge .alignsPrimary (_cbde )&&_cgge .alignsSec (_cbde ){_ec .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_adca ,_cgge ,_cbde );
continue ;};};_bccd =append (_bccd ,_cbde );};};_bccd .sortStrict ();return _bccd ;};
// Len returns the number of TextMarks in `ma`.
func (_eggf *TextMarkArray )Len ()int {if _eggf ==nil {return 0;};return len (_eggf ._dec );};func _ggba (_cbfca []float64 ,_efadb ,_edac float64 )[]float64 {_fdcff ,_dacc :=_efadb ,_edac ;if _dacc < _fdcff {_fdcff ,_dacc =_dacc ,_fdcff ;};_faged :=make ([]float64 ,0,len (_cbfca )+2);
_faged =append (_faged ,_efadb );for _ ,_eeef :=range _cbfca {if _eeef <=_fdcff {continue ;}else if _eeef >=_dacc {break ;};_faged =append (_faged ,_eeef );};_faged =append (_faged ,_edac );return _faged ;};type paraList []*textPara ;func _debaa (_affa int ,_abfg func (int ,int )bool )[]int {_bbdaf :=make ([]int ,_affa );
for _debg :=range _bbdaf {_bbdaf [_debg ]=_debg ;};_c .Slice (_bbdaf ,func (_cdfgc ,_acegf int )bool {return _abfg (_bbdaf [_cdfgc ],_bbdaf [_acegf ])});return _bbdaf ;};func _egbg (_dcgc []*textLine )map[float64 ][]*textLine {_c .Slice (_dcgc ,func (_gegdc ,_bgdb int )bool {return _dcgc [_gegdc ]._bcdg < _dcgc [_bgdb ]._bcdg });
_gabe :=map[float64 ][]*textLine {};for _ ,_acb :=range _dcgc {_dfcee :=_accf (_acb );_dfcee =_aa .Round (_dfcee );_gabe [_dfcee ]=append (_gabe [_dfcee ],_acb );};return _gabe ;};func (_faggf intSet )del (_dafac int ){delete (_faggf ,_dafac )};
// TableCell is a cell in a TextTable.
type TableCell struct{_ce .PdfRectangle ;
// Text is the extracted text.
Text string ;
// Marks returns the TextMarks corresponding to the text in Text.
Marks TextMarkArray ;};const (_gdcf rulingKind =iota ;_ecac ;_ebdaf ;);