unipdf/extractor/extractor.go

846 lines
186 KiB
Go
Raw Normal View History

2020-08-27 21:45:09 +00:00
//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
2020-08-27 21:45:09 +00:00
//
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
//
2022-03-13 12:41:53 +00:00
package extractor ;import (_e "bytes";_d "errors";_cag "fmt";_fb "github.com/unidoc/unipdf/v3/common";_ag "github.com/unidoc/unipdf/v3/contentstream";_gdd "github.com/unidoc/unipdf/v3/core";_fg "github.com/unidoc/unipdf/v3/internal/license";_dc "github.com/unidoc/unipdf/v3/internal/textencoding";
_afd "github.com/unidoc/unipdf/v3/internal/transform";_ac "github.com/unidoc/unipdf/v3/model";_dd "golang.org/x/text/unicode/norm";_cg "golang.org/x/xerrors";_ca "image/color";_c "io";_gc "math";_a "regexp";_af "sort";_cf "strings";_gd "unicode";_g "unicode/utf8";
);func (_decad *textTable )computeBbox ()_ac .PdfRectangle {var _dgadd _ac .PdfRectangle ;_gdgc :=false ;for _defc :=0;_defc < _decad ._dcbdf ;_defc ++{for _cbdd :=0;_cbdd < _decad ._agac ;_cbdd ++{_eefa :=_decad .get (_cbdd ,_defc );if _eefa ==nil {continue ;
};if !_gdgc {_dgadd =_eefa .PdfRectangle ;_gdgc =true ;}else {_dgadd =_gcae (_dgadd ,_eefa .PdfRectangle );};};};return _dgadd ;};func _acded (_fccb ,_cfcda int )int {if _fccb < _cfcda {return _fccb ;};return _cfcda ;};func (_cecg *textObject )setWordSpacing (_bfe float64 ){if _cecg ==nil {return ;
};_cecg ._fga ._eag =_bfe ;};
// New returns an Extractor instance for extracting content from the input PDF page.
func New (page *_ac .PdfPage )(*Extractor ,error ){const _b ="\u0065\u0078\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077";_cac ,_ggc :=page .GetAllContentStreams ();if _ggc !=nil {return nil ,_ggc ;};_fbb ,_ggc :=page .GetMediaBox ();
if _ggc !=nil {return nil ,_cag .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_ggc );};_be :=&Extractor {_gg :_cac ,_cb :page .Resources ,_fe :*_fbb ,_gde :map[string ]fontEntry {},_agb :map[string ]textResult {}};
if _be ._fe .Llx > _be ._fe .Urx {_fb .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_be ._fe );
_be ._fe .Llx ,_be ._fe .Urx =_be ._fe .Urx ,_be ._fe .Llx ;};if _be ._fe .Lly > _be ._fe .Ury {_fb .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_be ._fe );
_be ._fe .Lly ,_be ._fe .Ury =_be ._fe .Ury ,_be ._fe .Lly ;};_fg .TrackUse (_b );return _be ,nil ;};func (_cgcef *textTable )putComposite (_gafg ,_bfgg int ,_fgcb paraList ,_cdbafb _ac .PdfRectangle ){if len (_fgcb )==0{_fb .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073");
return ;};_gaeg :=compositeCell {PdfRectangle :_cdbafb ,paraList :_fgcb };if _agede {_cag .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_gafg ,_bfgg ,_gaeg .String ());
};_gaeg .updateBBox ();_cgcef ._cead [_addg (_gafg ,_bfgg )]=_gaeg ;};func (_ccag paraList )eventNeighbours (_ggda []event )map[*textPara ][]int {_af .Slice (_ggda ,func (_eddc ,_fddgg int )bool {_gfffd ,_gdbf :=_ggda [_eddc ],_ggda [_fddgg ];_begef ,_gdaab :=_gfffd ._eefg ,_gdbf ._eefg ;
if _begef !=_gdaab {return _begef < _gdaab ;};if _gfffd ._bgdfg !=_gdbf ._bgdfg {return _gfffd ._bgdfg ;};return _eddc < _fddgg ;});_bgdd :=make (map[int ]intSet );_badde :=make (intSet );for _ ,_cbddb :=range _ggda {if _cbddb ._bgdfg {_bgdd [_cbddb ._deed ]=make (intSet );
for _ddgdd :=range _badde {if _ddgdd !=_cbddb ._deed {_bgdd [_cbddb ._deed ].add (_ddgdd );_bgdd [_ddgdd ].add (_cbddb ._deed );};};_badde .add (_cbddb ._deed );}else {_badde .del (_cbddb ._deed );};};_fgba :=map[*textPara ][]int {};for _ceade ,_bdcda :=range _bgdd {_eaeeed :=_ccag [_ceade ];
if len (_bdcda )==0{_fgba [_eaeeed ]=nil ;continue ;};_aadbe :=make ([]int ,len (_bdcda ));_dgfgc :=0;for _dgffd :=range _bdcda {_aadbe [_dgfgc ]=_dgffd ;_dgfgc ++;};_fgba [_eaeeed ]=_aadbe ;};return _fgba ;};func (_gdbd *wordBag )maxDepth ()float64 {return _gdbd ._egbb -_gdbd .Lly };
func (_becc *textObject )setCharSpacing (_cddg float64 ){if _becc ==nil {return ;};_becc ._fga ._feba =_cddg ;if _agcb {_fb .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_cddg ,_becc ._fga .String ());
};};func _dccd (_aaaf map[float64 ]map[float64 ]gridTile )[]float64 {_gaead :=make ([]float64 ,0,len (_aaaf ));_gddea :=make (map[float64 ]struct{},len (_aaaf ));for _ ,_eacgb :=range _aaaf {for _effea :=range _eacgb {if _ ,_dgdef :=_gddea [_effea ];_dgdef {continue ;
};_gaead =append (_gaead ,_effea );_gddea [_effea ]=struct{}{};};};_af .Float64s (_gaead );return _gaead ;};const _bf =20;func _gegd (_abcde []*textWord ,_bbbg int )[]*textWord {_edgeg :=len (_abcde );copy (_abcde [_bbbg :],_abcde [_bbbg +1:]);return _abcde [:_edgeg -1];
};
2021-09-23 22:37:42 +00:00
2022-02-05 21:34:53 +00:00
// String returns a description of `b`.
2022-03-13 12:41:53 +00:00
func (_bga *wordBag )String ()string {var _fdbg []string ;for _ ,_daa :=range _bga .depthIndexes (){_gcce :=_bga ._cfeeb [_daa ];for _ ,_daaa :=range _gcce {_fdbg =append (_fdbg ,_daaa ._gebf );};};return _cag .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_bga .PdfRectangle ,_bga ._aege ,len (_fdbg ),_fdbg );
};
2021-10-22 10:53:20 +00:00
2022-03-13 12:41:53 +00:00
// TableCell is a cell in a TextTable.
type TableCell struct{
2021-10-22 10:53:20 +00:00
2022-03-13 12:41:53 +00:00
// Text is the extracted text.
Text string ;
2021-07-30 00:21:16 +00:00
2022-03-13 12:41:53 +00:00
// Marks returns the TextMarks corresponding to the text in Text.
Marks TextMarkArray ;};func (_fdg *imageExtractContext )extractFormImages (_cfg *_gdd .PdfObjectName ,_ebg _ag .GraphicsState ,_bge *_ac .PdfPageResources )error {_aff ,_ea :=_bge .GetXObjectFormByName (*_cfg );if _ea !=nil {return _ea ;};if _aff ==nil {return nil ;
};_dec ,_ea :=_aff .GetContentStream ();if _ea !=nil {return _ea ;};_bgef :=_aff .Resources ;if _bgef ==nil {_bgef =_bge ;};_ea =_fdg .extractContentStreamImages (string (_dec ),_bgef );if _ea !=nil {return _ea ;};_fdg ._acg ++;return nil ;};func (_eab *textObject )renderText (_gdcd []byte )error {if _eab ._dcb {_fb .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");
return nil ;};_gda :=_eab .getCurrentFont ();_dfg :=_gda .BytesToCharcodes (_gdcd );_cafc ,_edce ,_cacf :=_gda .CharcodesToStrings (_dfg );if _cacf > 0{_fb .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_edce ,_cacf );
};_eab ._fga ._ededf +=_edce ;_eab ._fga ._eca +=_cacf ;_agf :=_eab ._fga ;_cffc :=_agf ._cef ;_acf :=_agf ._bgg /100.0;_afff :=_gef ;if _gda .Subtype ()=="\u0054\u0079\u0070e\u0033"{_afff =1;};_gcee ,_dcf :=_gda .GetRuneMetrics (' ');if !_dcf {_gcee ,_dcf =_gda .GetCharMetrics (32);
};if !_dcf {_gcee ,_ =_ac .DefaultFont ().GetRuneMetrics (' ');};_fefg :=_gcee .Wx *_afff ;_fb .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_fefg ,_cafc ,_gda ,_cffc );
_gbfa :=_afd .NewMatrix (_cffc *_acf ,0,0,_cffc ,0,_agf ._fec );if _agcb {_fb .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_dfg ),_dfg ,_cafc );
};_fb .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_dfg ),_dfg ,len (_cafc ));_aeb :=_eab .getFillColor ();
_aef :=_eab .getStrokeColor ();for _ffa ,_dad :=range _cafc {_afe :=[]rune (_dad );if len (_afe )==1&&_afe [0]=='\x00'{continue ;};_eaca :=_dfg [_ffa ];_aee :=_eab ._gbc .CTM .Mult (_eab ._bgc ).Mult (_gbfa );_aebb :=0.0;if len (_afe )==1&&_afe [0]==32{_aebb =_agf ._eag ;
};_afgc ,_egff :=_gda .GetCharMetrics (_eaca );if !_egff {_fb .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_eaca ,_afe ,_afe ,_gda );
return _cag .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_gda .String (),_eaca );};_ecfc :=_afd .Point {X :_afgc .Wx *_afff ,Y :_afgc .Wy *_afff };
_fcfd :=_afd .Point {X :(_ecfc .X *_cffc +_aebb )*_acf };_gbeg :=_afd .Point {X :(_ecfc .X *_cffc +_agf ._feba +_aebb )*_acf };if _agcb {_fb .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_cffc ,_agf ._feba ,_agf ._eag ,_acf );
_fb .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_ecfc ,_fcfd ,_gbeg );};_ddd :=_babf (_fcfd );_ebce :=_babf (_gbeg );_acff :=_eab ._gbc .CTM .Mult (_eab ._bgc ).Mult (_ddd );
if _ccc {_fb .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_eab ._gbc .CTM ,_eab ._bgc ,_ebce ,_fgfb (_eab ._gbc .CTM .Mult (_eab ._bgc ).Mult (_ebce )),_ddd ,_acff ,_fgfb (_acff ));
};_dbdg ,_efbd :=_eab .newTextMark (_dc .ExpandLigatures (_afe ),_aee ,_fgfb (_acff ),_gc .Abs (_fefg *_aee .ScalingFactorX ()),_gda ,_eab ._fga ._feba ,_aeb ,_aef );if !_efbd {_fb .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067");
continue ;};if _gda ==nil {_fb .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _gda .Encoder ()==nil {_fb .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_gda );
}else {if _bgfdf ,_bgdc :=_gda .Encoder ().CharcodeToRune (_eaca );_bgdc {_dbdg ._bcfd =string (_bgfdf );};};_fb .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_ffa ,_eaca ,_dbdg ,_aee );
_eab ._bece =append (_eab ._bece ,&_dbdg );_eab ._bgc .Concat (_ebce );};return nil ;};const (_fgg ="\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";
_eb ="\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064";
_fa ="\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";);
func _gfaf (_gbcd ,_dgded _ac .PdfRectangle )bool {return _gbcd .Llx <=_dgded .Llx &&_dgded .Urx <=_gbcd .Urx &&_gbcd .Lly <=_dgded .Lly &&_dgded .Ury <=_gbcd .Ury ;};
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// String returns a string describing the current state of the textState stack.
func (_ebd *stateStack )String ()string {_gcg :=[]string {_cag .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_ebd ))};for _cgf ,_aad :=range *_ebd {_cedd :="\u003c\u006e\u0069l\u003e";
if _aad !=nil {_cedd =_aad .String ();};_gcg =append (_gcg ,_cag .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_cgf ,_cedd ));};return _cf .Join (_gcg ,"\u000a");};var (_dedc =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};
);var _dgfa =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_ca .White ,StrokeColor :_ca .White };func (_ebea *textObject )newTextMark (_dag string ,_ccge _afd .Matrix ,_egbf _afd .Point ,_gee float64 ,_egfb *_ac .PdfFont ,_adfe float64 ,_dcbd ,_cgdb _ca .Color )(textMark ,bool ){_cgef :=_ccge .Angle ();
_dba :=_cagd (_cgef ,_ebgc );var _begc float64 ;if _dba %180!=90{_begc =_ccge .ScalingFactorY ();}else {_begc =_ccge .ScalingFactorX ();};_agd :=_fgfb (_ccge );_afdc :=_ac .PdfRectangle {Llx :_agd .X ,Lly :_agd .Y ,Urx :_egbf .X ,Ury :_egbf .Y };switch _dba %360{case 90:_afdc .Urx -=_begc ;
case 180:_afdc .Ury -=_begc ;case 270:_afdc .Urx +=_begc ;case 0:_afdc .Ury +=_begc ;default:_dba =0;_afdc .Ury +=_begc ;};if _afdc .Llx > _afdc .Urx {_afdc .Llx ,_afdc .Urx =_afdc .Urx ,_afdc .Llx ;};if _afdc .Lly > _afdc .Ury {_afdc .Lly ,_afdc .Ury =_afdc .Ury ,_afdc .Lly ;
};_aabc :=true ;if _ebea ._defa ._fe .Width ()> 0{_cgg ,_bgdg :=_aaga (_afdc ,_ebea ._defa ._fe );if !_bgdg {_aabc =false ;_fb .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_afdc ,_ebea ._defa ._fe ,_dag );
};_afdc =_cgg ;};_bgfde :=_afdc ;_aecd :=_ebea ._defa ._fe ;switch _dba %360{case 90:_aecd .Urx ,_aecd .Ury =_aecd .Ury ,_aecd .Urx ;_bgfde =_ac .PdfRectangle {Llx :_aecd .Urx -_afdc .Ury ,Urx :_aecd .Urx -_afdc .Lly ,Lly :_afdc .Llx ,Ury :_afdc .Urx };
case 180:_bgfde =_ac .PdfRectangle {Llx :_aecd .Urx -_afdc .Llx ,Urx :_aecd .Urx -_afdc .Urx ,Lly :_aecd .Ury -_afdc .Lly ,Ury :_aecd .Ury -_afdc .Ury };case 270:_aecd .Urx ,_aecd .Ury =_aecd .Ury ,_aecd .Urx ;_bgfde =_ac .PdfRectangle {Llx :_afdc .Ury ,Urx :_afdc .Lly ,Lly :_aecd .Ury -_afdc .Llx ,Ury :_aecd .Ury -_afdc .Urx };
};if _bgfde .Llx > _bgfde .Urx {_bgfde .Llx ,_bgfde .Urx =_bgfde .Urx ,_bgfde .Llx ;};if _bgfde .Lly > _bgfde .Ury {_bgfde .Lly ,_bgfde .Ury =_bgfde .Ury ,_bgfde .Lly ;};_adca :=textMark {_cadaf :_dag ,PdfRectangle :_bgfde ,_bagc :_afdc ,_afac :_egfb ,_gba :_begc ,_dcabg :_adfe ,_ccbb :_ccge ,_cgfb :_egbf ,_adbf :_dba ,_bfc :_dcbd ,_bdba :_cgdb };
if _abb {_fb .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_agd ,_egbf ,_adca .String ());};return _adca ,_aabc ;
};var _fccc =map[markKind ]string {_gagc :"\u0073\u0074\u0072\u006f\u006b\u0065",_ceaac :"\u0066\u0069\u006c\u006c",_adbg :"\u0061u\u0067\u006d\u0065\u006e\u0074"};
2021-07-30 00:21:16 +00:00
2022-03-13 12:41:53 +00:00
// String returns a description of `state`.
func (_eff *textState )String ()string {_caac :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _eff ._eedd !=nil {_caac =_eff ._eedd .BaseFont ();};return _cag .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_eff ._feba ,_eff ._eag ,_eff ._cef ,_caac );
};func _cbcbf (_fegca ,_deba ,_ffecb float64 )rulingKind {if _fegca >=_ffecb &&_dgbd (_deba ,_fegca ){return _eccgd ;};if _deba >=_ffecb &&_dgbd (_fegca ,_deba ){return _aafafg ;};return _dgdb ;};func _ade (_beg []Font ,_fc string )bool {for _ ,_bg :=range _beg {if _bg .FontName ==_fc {return true ;
};};return false ;};type imageExtractContext struct{_caa []ImageMark ;_ebc int ;_cdge int ;_acg int ;_gfb map[*_gdd .PdfObjectStream ]*cachedImage ;_ga *ImageExtractOptions ;};func _ffcc (_accc []int )[]int {_aceb :=make ([]int ,len (_accc ));for _badc ,_dfca :=range _accc {_aceb [len (_accc )-1-_badc ]=_dfca ;
};return _aceb ;};func (_aed *wordBag )blocked (_faac *textWord )bool {if _faac .Urx < _aed .Llx {_fdgaf :=_fgbec (_faac .PdfRectangle );_fbbf :=_eacc (_aed .PdfRectangle );if _aed ._fedd .blocks (_fdgaf ,_fbbf ){if _ccfb {_fb .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_faac ,_aed );
};return true ;};}else if _aed .Urx < _faac .Llx {_agbf :=_fgbec (_aed .PdfRectangle );_gge :=_eacc (_faac .PdfRectangle );if _aed ._fedd .blocks (_agbf ,_gge ){if _ccfb {_fb .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s",_faac ,_aed );
};return true ;};};if _faac .Ury < _aed .Lly {_bgea :=_ebgf (_faac .PdfRectangle );_dfcef :=_dgba (_aed .PdfRectangle );if _aed ._bba .blocks (_bgea ,_dfcef ){if _ccfb {_fb .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_faac ,_aed );
};return true ;};}else if _aed .Ury < _faac .Lly {_ecgc :=_ebgf (_aed .PdfRectangle );_ggab :=_dgba (_faac .PdfRectangle );if _aed ._bba .blocks (_ecgc ,_ggab ){if _ccfb {_fb .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s",_faac ,_aed );
};return true ;};};return false ;};func (_afa *textObject )setTextLeading (_cfcg float64 ){if _afa ==nil {return ;};_afa ._fga ._dgc =_cfcg ;};func _aab (_bee _afd .Point )*subpath {return &subpath {_egd :[]_afd .Point {_bee }}};func _gefb (_abdd ,_cbac bounded )float64 {_cdaf :=_cafa (_abdd ,_cbac );
if !_gcceb (_cdaf ){return _cdaf ;};return _bdga (_abdd ,_cbac );};
2021-10-22 10:53:20 +00:00
2022-03-13 12:41:53 +00:00
// String returns a human readable description of `s`.
func (_ceca intSet )String ()string {var _bbbef []int ;for _eedc :=range _ceca {if _ceca .has (_eedc ){_bbbef =append (_bbbef ,_eedc );};};_af .Ints (_bbbef );return _cag .Sprintf ("\u0025\u002b\u0076",_bbbef );};func _dfbc (_gfag string ,_cbed []rulingList ){_fb .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_cbed ),_gfag );
for _bcac ,_aabcg :=range _cbed {_cag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bcac ,_aabcg .String ());};};func (_eagb *textTable )getComposite (_adecb ,_eggff int )(paraList ,_ac .PdfRectangle ){_faafd ,_gaeag :=_eagb ._cead [_addg (_adecb ,_eggff )];
if _agede {_cag .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a",_adecb ,_eggff ,_faafd .String ());};if !_gaeag {return nil ,_ac .PdfRectangle {};
};return _faafd .parasBBox ();};func (_egac rulingList )merge ()*ruling {_cacfc :=_egac [0]._edcba ;_eeccc :=_egac [0]._bfeag ;_fbcd :=_egac [0]._cbba ;for _ ,_acabc :=range _egac [1:]{_cacfc +=_acabc ._edcba ;if _acabc ._bfeag < _eeccc {_eeccc =_acabc ._bfeag ;
};if _acabc ._cbba > _fbcd {_fbcd =_acabc ._cbba ;};};_gedg :=&ruling {_agcgg :_egac [0]._agcgg ,_aaec :_egac [0]._aaec ,Color :_egac [0].Color ,_edcba :_cacfc /float64 (len (_egac )),_bfeag :_eeccc ,_cbba :_fbcd };if _cecd {_fb .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_egac ),_gedg );
for _badef ,_fgbdg :=range _egac {_cag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_badef ,_fgbdg );};};return _gedg ;};func (_gbgb rulingList )tidied (_gdde string )rulingList {_egcf :=_gbgb .removeDuplicates ();_egcf .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");
_fdgfe :=_egcf .snapToGroups ();if _fdgfe ==nil {return nil ;};_fdgfe .sort ();if _adce {_fb .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_gdde ,len (_gbgb ),len (_egcf ),len (_fdgfe ));
};_fdgfe .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _fdgfe ;};func (_bbaa *ruling )alignsPrimary (_ecab *ruling )bool {return _bbaa ._agcgg ==_ecab ._agcgg &&_gc .Abs (_bbaa ._edcba -_ecab ._edcba )< _gbbd *0.5;};func (_abde paraList )findTableGrid (_fbabg gridTiling )(*textTable ,map[*textPara ]struct{}){_gfgcb :=len (_fbabg ._egfga );
_cdaa :=len (_fbabg ._ebbb );_acfg :=textTable {_ebabc :true ,_agac :_gfgcb ,_dcbdf :_cdaa ,_abccf :make (map[uint64 ]*textPara ,_gfgcb *_cdaa ),_cead :make (map[uint64 ]compositeCell ,_gfgcb *_cdaa )};_edaa :=make (map[*textPara ]struct{});_fafaa :=int ((1.0-_cfef )*float64 (_gfgcb *_cdaa ));
_affe :=0;if _bdea {_fb .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_gfgcb ,_cdaa );};for _egca ,_egffg :=range _fbabg ._ebbb {_fdeae ,_abag :=_fbabg ._cgdc [_egffg ];
if !_abag {continue ;};for _caga ,_cfff :=range _fbabg ._egfga {_fbcdfg ,_gcgec :=_fdeae [_cfff ];if !_gcgec {continue ;};_eebb :=_abde .inTile (_fbcdfg );if len (_eebb )==0{_affe ++;if _affe > _fafaa {if _bdea {_fb .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_affe );
};return nil ,nil ;};}else {_acfg .putComposite (_caga ,_egca ,_eebb ,_fbcdfg .PdfRectangle );for _ ,_eafef :=range _eebb {_edaa [_eafef ]=struct{}{};};};};};_beafe :=0;for _cdfe :=0;_cdfe < _gfgcb ;_cdfe ++{_cdeb :=_acfg .get (_cdfe ,0);if _cdeb ==nil ||!_cdeb ._affbf {_beafe ++;
};};if _beafe ==0{if _bdea {_fb .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};return nil ,nil ;};_eebg :=_acfg .reduceTiling (_fbabg ,_faf );_eebg =_eebg .subdivide ();return _eebg ,_edaa ;};func (_fdef *textTable )logComposite (_fceba string ){if !_agede {return ;
};_fb .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_fdef ._agac ,_fdef ._dcbdf ,_fceba );_cag .Printf ("\u0025\u0035\u0073 \u007c","");for _egaff :=0;_egaff < _fdef ._agac ;_egaff ++{_cag .Printf ("\u0025\u0033\u0064 \u007c",_egaff );
};_cag .Println ("");_cag .Printf ("\u0025\u0035\u0073 \u002b","");for _cdffg :=0;_cdffg < _fdef ._agac ;_cdffg ++{_cag .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d");};_cag .Println ("");for _fafac :=0;_fafac < _fdef ._dcbdf ;_fafac ++{_cag .Printf ("\u0025\u0035\u0064 \u007c",_fafac );
for _eddff :=0;_eddff < _fdef ._agac ;_eddff ++{_dgfg ,_ :=_fdef ._cead [_addg (_eddff ,_fafac )].parasBBox ();_cag .Printf ("\u0025\u0033\u0064 \u007c",len (_dgfg ));};_cag .Println ("");};_fb .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_fdef ._agac ,_fdef ._dcbdf ,_fceba );
_cag .Printf ("\u0025\u0035\u0073 \u007c","");for _gbaff :=0;_gbaff < _fdef ._agac ;_gbaff ++{_cag .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_gbaff );};_cag .Println ("");_cag .Printf ("\u0025\u0035\u0073 \u002b","");for _ffcb :=0;_ffcb < _fdef ._agac ;
_ffcb ++{_cag .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");};_cag .Println ("");for _efabg :=0;_efabg < _fdef ._dcbdf ;_efabg ++{_cag .Printf ("\u0025\u0035\u0064 \u007c",_efabg );for _cbbef :=0;_cbbef < _fdef ._agac ;
_cbbef ++{_efaga ,_ :=_fdef ._cead [_addg (_cbbef ,_efabg )].parasBBox ();_aacag :="";_gcfac :=_efaga .merge ();if _gcfac !=nil {_aacag =_gcfac .text ();};_aacag =_cag .Sprintf ("\u0025\u0071",_bgdca (_aacag ,12));_aacag =_aacag [1:len (_aacag )-1];_cag .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_aacag );
};_cag .Println ("");};};
// String returns a description of `tm`.
func (_ffaf *textMark )String ()string {return _cag .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_ffaf .PdfRectangle ,_ffaf ._gba ,_ffaf ._cadaf );};func (_gbfgc *textTable )bbox ()_ac .PdfRectangle {return _gbfgc .PdfRectangle };
func _dgbd (_edccc ,_dddf float64 )bool {return _edccc /_gc .Max (_efff ,_dddf )< _dda };func (_cgcbf *textPara )writeText (_eece _c .Writer ){if _cgcbf ._ccec ==nil {_cgcbf .writeCellText (_eece );return ;};for _aeeb :=0;_aeeb < _cgcbf ._ccec ._dcbdf ;
_aeeb ++{for _fafg :=0;_fafg < _cgcbf ._ccec ._agac ;_fafg ++{_ggde :=_cgcbf ._ccec .get (_fafg ,_aeeb );if _ggde ==nil {_eece .Write ([]byte ("\u0009"));}else {_ggde .writeCellText (_eece );};_eece .Write ([]byte ("\u0020"));};if _aeeb < _cgcbf ._ccec ._dcbdf -1{_eece .Write ([]byte ("\u000a"));
};};};func (_gbfg *stateStack )size ()int {return len (*_gbfg )};type textResult struct{_gga PageText ;_febb int ;_gcf int ;};const _gef =1.0/1000.0;func (_fcdb *shapesState )stroke (_efage *[]pathSection ){_cgcf :=pathSection {_bbdc :_fcdb ._fegc ,Color :_fcdb ._afbf .getStrokeColor ()};
*_efage =append (*_efage ,_cgcf );if _adce {_cag .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_efage ),_fcdb ,_fcdb ._afbf .getStrokeColor (),_cgcf .bbox ());
if _gbbagd {for _gfbc ,_gbfe :=range _fcdb ._fegc {_cag .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_gfbc ,_gbfe );if _gfbc ==10{break ;};};};};};func (_ccga rulingList )log (_decac string ){if !_adce {return ;};_fb .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_decac ,_ccga .String ());
for _fbfdf ,_gbgdb :=range _ccga {_cag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fbfdf ,_gbgdb .String ());};};func (_dcdd *textObject )nextLine (){_dcdd .moveLP (0,-_dcdd ._fga ._dgc )};
2021-01-07 14:20:10 +00:00
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
//
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
type TextMark struct{
2020-11-23 22:15:56 +00:00
2020-12-06 13:03:03 +00:00
// Text is the extracted text.
Text string ;
2020-11-23 22:15:56 +00:00
2021-01-07 14:20:10 +00:00
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
2020-11-23 22:15:56 +00:00
2021-01-07 14:20:10 +00:00
// BBox is the bounding box of the text.
2022-03-13 12:41:53 +00:00
BBox _ac .PdfRectangle ;
2021-01-07 14:20:10 +00:00
// Font is the font the text was drawn with.
2022-03-13 12:41:53 +00:00
Font *_ac .PdfFont ;
2021-01-07 14:20:10 +00:00
// FontSize is the font size the text was drawn with.
FontSize float64 ;
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2022-03-13 12:41:53 +00:00
FillColor _ca .Color ;
2021-01-07 14:20:10 +00:00
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2022-03-13 12:41:53 +00:00
StrokeColor _ca .Color ;
2021-01-07 14:20:10 +00:00
// Orientation is the text orientation
2022-03-13 12:41:53 +00:00
Orientation int ;};func _bcbef (_gfaa ,_ecded _ac .PdfRectangle )bool {return _ecded .Llx <=_gfaa .Urx &&_gfaa .Llx <=_ecded .Urx ;};func (_gcffe *compositeCell )updateBBox (){for _ ,_edcd :=range _gcffe .paraList {_gcffe .PdfRectangle =_gcae (_gcffe .PdfRectangle ,_edcd .PdfRectangle );
};};func _eeb (_afad *wordBag ,_bbf *textWord ,_bdf float64 )bool {return _afad .Urx <=_bbf .Llx &&_bbf .Llx < _afad .Urx +_bdf ;};func (_gedba *textPara )toTextMarks (_ffcdf *int )[]TextMark {if _gedba ._ccec ==nil {return _gedba .toCellTextMarks (_ffcdf );
};var _feabg []TextMark ;for _dcdb :=0;_dcdb < _gedba ._ccec ._dcbdf ;_dcdb ++{for _faeg :=0;_faeg < _gedba ._ccec ._agac ;_faeg ++{_dbgc :=_gedba ._ccec .get (_faeg ,_dcdb );if _dbgc ==nil {_feabg =_bafe (_feabg ,_ffcdf ,"\u0009");}else {_acbb :=_dbgc .toCellTextMarks (_ffcdf );
_feabg =append (_feabg ,_acbb ...);};_feabg =_bafe (_feabg ,_ffcdf ,"\u0020");};if _dcdb < _gedba ._ccec ._dcbdf -1{_feabg =_bafe (_feabg ,_ffcdf ,"\u000a");};};return _feabg ;};func (_cdffa *textTable )emptyCompositeRow (_caabe int )bool {for _cccc :=0;
_cccc < _cdffa ._agac ;_cccc ++{if _bbcc ,_eddac :=_cdffa ._cead [_addg (_cccc ,_caabe )];_eddac {if len (_bbcc .paraList )> 0{return false ;};};};return true ;};const (_ebff markKind =iota ;_gagc ;_ceaac ;_adbg ;);func (_gcge *shapesState )newSubPath (){_gcge .clearPath ();
if _efca {_fb .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_gcge );};};func (_gcfa gridTiling )log (_dddb string ){if !_bdea {return ;};_fb .Log .Info ("\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071",len (_gcfa ._egfga ),len (_gcfa ._ebbb ),_dddb );
_cag .Printf ("\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a",_gcfa ._egfga );_cag .Printf ("\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a",_gcfa ._ebbb );for _cddbg ,_cfab :=range _gcfa ._ebbb {_acfa ,_fgbgc :=_gcfa ._cgdc [_cfab ];
if !_fgbgc {continue ;};_cag .Printf ("%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_cddbg ,_cfab );for _ccba ,_dgda :=range _gcfa ._egfga {_gdaa ,_gdbeb :=_acfa [_dgda ];if !_gdbeb {continue ;};_cag .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_ccba ,_gdaa .String ());
};};};func (_cdb *textObject )getFontDirect (_fgff string )(*_ac .PdfFont ,error ){_face ,_dddg :=_cdb .getFontDict (_fgff );if _dddg !=nil {return nil ,_dddg ;};_dbccc ,_dddg :=_ac .NewPdfFontFromPdfObject (_face );if _dddg !=nil {_fb .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fgff ,_dddg );
};return _dbccc ,_dddg ;};func (_ccd rulingList )aligned ()bool {if len (_ccd )< 2{return false ;};_dbee :=make (map[*ruling ]int );_dbee [_ccd [0]]=0;for _ ,_gabb :=range _ccd [1:]{_ggec :=false ;for _cbbge :=range _dbee {if _gabb .gridIntersecting (_cbbge ){_dbee [_cbbge ]++;
_ggec =true ;break ;};};if !_ggec {_dbee [_gabb ]=0;};};_adfbf :=0;for _ ,_aggde :=range _dbee {if _aggde ==0{_adfbf ++;};};_fddga :=float64 (_adfbf )/float64 (len (_ccd ));_ffga :=_fddga <=1.0-_adcf ;if _adce {_fb .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_ffga ,_fddga ,_adfbf ,len (_ccd ),_ccd .String ());
};return _ffga ;};type pathSection struct{_bbdc []*subpath ;_ca .Color ;};func (_cffd paraList )toTextMarks ()[]TextMark {_acea :=0;var _gcea []TextMark ;for _bbbed ,_cbdg :=range _cffd {if _cbdg ._affbf {continue ;};_gccc :=_cbdg .toTextMarks (&_acea );
_gcea =append (_gcea ,_gccc ...);if _bbbed !=len (_cffd )-1{if _abae (_cbdg ,_cffd [_bbbed +1]){_gcea =_bafe (_gcea ,&_acea ,"\u0020");}else {_gcea =_bafe (_gcea ,&_acea ,"\u000a");_gcea =_bafe (_gcea ,&_acea ,"\u000a");};};};_gcea =_bafe (_gcea ,&_acea ,"\u000a");
_gcea =_bafe (_gcea ,&_acea ,"\u000a");return _gcea ;};type rulingKind int ;func (_dbaa compositeCell )String ()string {_ebeb :="";if len (_dbaa .paraList )> 0{_ebeb =_bgdca (_dbaa .paraList .merge ().text (),50);};return _cag .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_dbaa .PdfRectangle ,len (_dbaa .paraList ),_ebeb );
};func (_dfad *wordBag )scanBand (_gdg string ,_dcea *wordBag ,_aggb func (_fedf *wordBag ,_dafb *textWord )bool ,_eddb ,_gdeb ,_gdgd float64 ,_ddgdg ,_afga bool )int {_eaf :=_dcea ._aege ;var _ebag map[int ]map[*textWord ]struct{};if !_ddgdg {_ebag =_dfad .makeRemovals ();
};_afaec :=_dbga *_eaf ;_efaf :=0;for _ ,_baba :=range _dfad .depthBand (_eddb -_afaec ,_gdeb +_afaec ){if len (_dfad ._cfeeb [_baba ])==0{continue ;};for _ ,_bdef :=range _dfad ._cfeeb [_baba ]{if !(_eddb -_afaec <=_bdef ._fgbda &&_bdef ._fgbda <=_gdeb +_afaec ){continue ;
};if !_aggb (_dcea ,_bdef ){continue ;};_babg :=2.0*_gc .Abs (_bdef ._abeg -_dcea ._aege )/(_bdef ._abeg +_dcea ._aege );_dbbb :=_gc .Max (_bdef ._abeg /_dcea ._aege ,_dcea ._aege /_bdef ._abeg );_dcab :=_gc .Min (_babg ,_dbbb );if _gdgd > 0&&_dcab > _gdgd {continue ;
};if _dcea .blocked (_bdef ){continue ;};if !_ddgdg {_dcea .pullWord (_bdef ,_baba ,_ebag );};_efaf ++;if !_afga {if _bdef ._fgbda < _eddb {_eddb =_bdef ._fgbda ;};if _bdef ._fgbda > _gdeb {_gdeb =_bdef ._fgbda ;};};if _ddgdg {break ;};};};if !_ddgdg {_dfad .applyRemovals (_ebag );
};return _efaf ;};func (_efc *imageExtractContext )extractInlineImage (_ecg *_ag .ContentStreamInlineImage ,_bgd _ag .GraphicsState ,_cba *_ac .PdfPageResources )error {_da ,_ggd :=_ecg .ToImage (_cba );if _ggd !=nil {return _ggd ;};_egg ,_ggd :=_ecg .GetColorSpace (_cba );
if _ggd !=nil {return _ggd ;};if _egg ==nil {_egg =_ac .NewPdfColorspaceDeviceGray ();};_cff ,_ggd :=_egg .ImageToRGB (*_da );if _ggd !=nil {return _ggd ;};_dca :=ImageMark {Image :&_cff ,Width :_bgd .CTM .ScalingFactorX (),Height :_bgd .CTM .ScalingFactorY (),Angle :_bgd .CTM .Angle ()};
_dca .X ,_dca .Y =_bgd .CTM .Translation ();_efc ._caa =append (_efc ._caa ,_dca );_efc ._ebc ++;return nil ;};var _gbfad =_a .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024");
func (_eabg lineRuling )yMean ()float64 {return 0.5*(_eabg ._gffd .Y +_eabg ._eeebc .Y )};
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// Extractor stores and offers functionality for extracting content from PDF pages.
type Extractor struct{_gg string ;_cb *_ac .PdfPageResources ;_fe _ac .PdfRectangle ;_gde map[string ]fontEntry ;_agb map[string ]textResult ;_df int64 ;_ae int ;};var (_agc =_d .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072");
_ge =_d .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072"););
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// String returns a string describing `tm`.
func (_bcca TextMark )String ()string {_agbdf :=_bcca .BBox ;var _eege string ;if _bcca .Font !=nil {_eege =_bcca .Font .String ();if len (_eege )> 50{_eege =_eege [:50]+"\u002e\u002e\u002e";};};var _cadg string ;if _bcca .Meta {_cadg ="\u0020\u002a\u004d\u002a";
};return _cag .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_bcca .Offset ,_bcca .Text ,[]rune (_bcca .Text ),_agbdf .Llx ,_agbdf .Lly ,_agbdf .Urx ,_agbdf .Ury ,_eege ,_cadg );
};
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// Tables returns the tables extracted from the page.
func (_ggb PageText )Tables ()[]TextTable {if _agede {_fb .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_ggb ._dcgg ));};return _ggb ._dcgg ;};func _aaga (_acgf ,_faca _ac .PdfRectangle )(_ac .PdfRectangle ,bool ){if !_ecde (_acgf ,_faca ){return _ac .PdfRectangle {},false ;
};return _ac .PdfRectangle {Llx :_gc .Max (_acgf .Llx ,_faca .Llx ),Urx :_gc .Min (_acgf .Urx ,_faca .Urx ),Lly :_gc .Max (_acgf .Lly ,_faca .Lly ),Ury :_gc .Min (_acgf .Ury ,_faca .Ury )},true ;};func (_gcab paraList )readBefore (_bege []int ,_aedb ,_bddd int )bool {_ceggc ,_eacgg :=_gcab [_aedb ],_gcab [_bddd ];
if _cbdb (_ceggc ,_eacgg )&&_ceggc .Lly > _eacgg .Lly {return true ;};if !(_ceggc ._gbaa .Urx < _eacgg ._gbaa .Llx ){return false ;};_fgcf ,_cgca :=_ceggc .Lly ,_eacgg .Lly ;if _fgcf > _cgca {_cgca ,_fgcf =_fgcf ,_cgca ;};_cdfg :=_gc .Max (_ceggc ._gbaa .Llx ,_eacgg ._gbaa .Llx );
_bcbb :=_gc .Min (_ceggc ._gbaa .Urx ,_eacgg ._gbaa .Urx );_fabbe :=_gcab .llyRange (_bege ,_fgcf ,_cgca );for _ ,_cbgg :=range _fabbe {if _cbgg ==_aedb ||_cbgg ==_bddd {continue ;};_geea :=_gcab [_cbgg ];if _geea ._gbaa .Llx <=_bcbb &&_cdfg <=_geea ._gbaa .Urx {return false ;
};};return true ;};func (_cfa *textObject )showText (_bfd []byte )error {return _cfa .renderText (_bfd )};func (_cebe gridTiling )complete ()bool {for _ ,_geef :=range _cebe ._cgdc {for _ ,_acae :=range _geef {if !_acae .complete (){return false ;};};};
return true ;};func _edde (_dab ,_abdcb bounded )float64 {_bccg :=_bdga (_dab ,_abdcb );if !_gcceb (_bccg ){return _bccg ;};return _cafa (_dab ,_abdcb );};var _afbfd =map[rulingKind ]string {_dgdb :"\u006e\u006f\u006e\u0065",_eccgd :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_aafafg :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};
const (_ecgaa =true ;_acdb =true ;_bfdb =true ;_bfae =false ;_ebfga =false ;_gfgc =6;_bdfg =3.0;_gfcfb =200;_ggfg =true ;_aebe =true ;_ffac =true ;_gcbg =true ;_efdb =false ;);const (_cbd =1.0e-6;_eada =1.0e-4;_ebgc =10;_eddf =6;_dbga =0.5;_aafe =0.12;
_bafd =0.19;_gdcf =0.04;_bffb =0.04;_cafad =1.0;_cegb =0.04;_effe =0.4;_fgeb =0.7;_ebba =1.0;_faae =0.1;_ddbc =1.4;_fecd =0.46;_gdf =0.02;_bbce =0.2;_ffcd =0.5;_dfed =4;_eaeg =4.0;_eecc =6;_cfef =0.3;_bbdee =0.01;_aaa =0.02;_fee =2;_edcf =2;_edaeb =500;
_dcbf =4.0;_cefe =4.0;_dda =0.05;_efff =0.1;_bade =2.0;_gbbd =2.0;_dcbg =1.5;_faf =3.0;_adcf =0.25;);func _afbfb (_dbeb map[float64 ]map[float64 ]gridTile )[]float64 {_bdde :=make ([]float64 ,0,len (_dbeb ));for _eedf :=range _dbeb {_bdde =append (_bdde ,_eedf );
};_af .Float64s (_bdde );_cfede :=len (_bdde );for _babfe :=0;_babfe < _cfede /2;_babfe ++{_bdde [_babfe ],_bdde [_cfede -1-_babfe ]=_bdde [_cfede -1-_babfe ],_bdde [_babfe ];};return _bdde ;};func (_cgfae *wordBag )arrangeText ()*textPara {_cgfae .sort ();
if _acdb {_cgfae .removeDuplicates ();};var _caecb []*textLine ;for _ ,_gcege :=range _cgfae .depthIndexes (){for !_cgfae .empty (_gcege ){_cedb :=_cgfae .firstReadingIndex (_gcege );_cbga :=_cgfae .firstWord (_cedb );_ebab :=_eade (_cgfae ,_cedb );_abec :=_cbga ._abeg ;
_deeg :=_cbga ._fgbda -_dbga *_abec ;_adfeg :=_cbga ._fgbda +_dbga *_abec ;_fcdc :=_ddbc *_abec ;_gdcff :=_fecd *_abec ;_cgbb :for {var _gafce *textWord ;_gdff :=0;for _ ,_ecaff :=range _cgfae .depthBand (_deeg ,_adfeg ){_abbg :=_cgfae .highestWord (_ecaff ,_deeg ,_adfeg );
if _abbg ==nil {continue ;};_bagg :=_dgfaa (_abbg ,_ebab ._cadc [len (_ebab ._cadc )-1]);if _bagg < -_gdcff {break _cgbb ;};if _bagg > _fcdc {continue ;};if _gafce !=nil &&_cafa (_abbg ,_gafce )>=0{continue ;};_gafce =_abbg ;_gdff =_ecaff ;};if _gafce ==nil {break ;
};_ebab .pullWord (_cgfae ,_gafce ,_gdff );};_ebab .markWordBoundaries ();_caecb =append (_caecb ,_ebab );};};if len (_caecb )==0{return nil ;};_af .Slice (_caecb ,func (_abccc ,_dffb int )bool {return _edde (_caecb [_abccc ],_caecb [_dffb ])< 0});_ebge :=_efagec (_cgfae .PdfRectangle ,_caecb );
if _addb {_fb .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_ebge .String ());if _dfgd {for _cgda ,_faaa :=range _ebge ._ddeb {_cag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cgda ,_faaa .String ());
if _ceac {for _cgdgd ,_cgeff :=range _faaa ._cadc {_cag .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_cgdgd ,_cgeff .String ());for _cceb ,_cfeg :=range _cgeff ._ggabc {_cag .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_cceb ,_cfeg .String ());
};};};};};};return _ebge ;};func (_cdacd paraList )findTables (_aabb []gridTiling )[]*textTable {_cdacd .addNeighbours ();_af .Slice (_cdacd ,func (_egbbd ,_accdc int )bool {return _gefb (_cdacd [_egbbd ],_cdacd [_accdc ])< 0});var _gaag []*textTable ;
if _ggfg {_abffb :=_cdacd .findGridTables (_aabb );_gaag =append (_gaag ,_abffb ...);};if _aebe {_gfdce :=_cdacd .findTextTables ();_gaag =append (_gaag ,_gfdce ...);};return _gaag ;};func (_dbcbc *textWord )computeText ()string {_ebfgd :=make ([]string ,len (_dbcbc ._ggabc ));
for _afbc ,_gefeg :=range _dbcbc ._ggabc {_ebfgd [_afbc ]=_gefeg ._cadaf ;};return _cf .Join (_ebfgd ,"");};func _aaca (_bgde _ac .PdfRectangle )rulingKind {_eaabb :=_bgde .Width ();_aebbf :=_bgde .Height ();if _eaabb > _aebbf {if _eaabb >=_dcbf {return _eccgd ;
};}else {if _aebbf >=_dcbf {return _aafafg ;};};return _dgdb ;};func _efeb (_fcedg ,_bdbb _afd .Point )rulingKind {_gcacc :=_gc .Abs (_fcedg .X -_bdbb .X );_gefd :=_gc .Abs (_fcedg .Y -_bdbb .Y );return _cbcbf (_gcacc ,_gefd ,_dcbf );};func (_afae *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_gdae :=make (map[int ]map[*textWord ]struct{},len (_afae ._cfeeb ));
for _edbg :=range _afae ._cfeeb {_gdae [_edbg ]=make (map[*textWord ]struct{});};return _gdae ;};func (_gbaf *textPara )fontsize ()float64 {return _gbaf ._ddeb [0]._eabc };func (_bdaag *textTable )getDown ()paraList {_ebdad :=make (paraList ,_bdaag ._agac );
for _ccefc :=0;_ccefc < _bdaag ._agac ;_ccefc ++{_baeb :=_bdaag .get (_ccefc ,_bdaag ._dcbdf -1)._aegf ;if _baeb .taken (){return nil ;};_ebdad [_ccefc ]=_baeb ;};for _ddad :=0;_ddad < _bdaag ._agac -1;_ddad ++{if _ebdad [_ddad ]._cegf !=_ebdad [_ddad +1]{return nil ;
};};return _ebdad ;};func (_ddc *PageFonts )extractPageResourcesToFont (_geg *_ac .PdfPageResources )error {_afg ,_ed :=_gdd .GetDict (_geg .Font );if !_ed {return _d .New (_fgg );};for _ ,_cab :=range _afg .Keys (){var (_gf =true ;_cae []byte ;_cd string ;
);_gb ,_fggc :=_geg .GetFontByName (_cab );if !_fggc {return _d .New (_eb );};_agg ,_ef :=_ac .NewPdfFontFromPdfObject (_gb );if _ef !=nil {return _ef ;};_ec :=_agg .FontDescriptor ();_ff :=_agg .FontDescriptor ().FontName .String ();_bc :=_agg .Subtype ();
if _ade (_ddc .Fonts ,_ff ){continue ;};if len (_agg .ToUnicode ())==0{_gf =false ;};if _ec .FontFile !=nil {if _cgb ,_edd :=_gdd .GetStream (_ec .FontFile );_edd {_cae ,_ef =_gdd .DecodeStream (_cgb );if _ef !=nil {return _ef ;};_cd =_ff +"\u002e\u0070\u0066\u0062";
};}else if _ec .FontFile2 !=nil {if _fd ,_efg :=_gdd .GetStream (_ec .FontFile2 );_efg {_cae ,_ef =_gdd .DecodeStream (_fd );if _ef !=nil {return _ef ;};_cd =_ff +"\u002e\u0074\u0074\u0066";};}else if _ec .FontFile3 !=nil {if _bcg ,_ebf :=_gdd .GetStream (_ec .FontFile3 );
_ebf {_cae ,_ef =_gdd .DecodeStream (_bcg );if _ef !=nil {return _ef ;};_cd =_ff +"\u002e\u0063\u0066\u0066";};};if len (_cd )< 1{_fb .Log .Debug (_fa );};_fbg :=Font {FontName :_ff ,PdfFont :_agg ,IsCID :_agg .IsCID (),IsSimple :_agg .IsSimple (),ToUnicode :_gf ,FontType :_bc ,FontData :_cae ,FontFileName :_cd ,FontDescriptor :_ec };
_ddc .Fonts =append (_ddc .Fonts ,_fbg );};return nil ;};func (_bdbbd rulingList )toGrids ()[]rulingList {if _adce {_fb .Log .Info ("t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073",_bdbbd );};_dgfbb :=_bdbbd .intersections ();if _adce {_fb .Log .Info ("\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020",len (_bdbbd ),len (_dgfbb ));
for _ ,_cdagg :=range _cdgaf (_dgfbb ){_cag .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_cdagg ,_dgfbb [_cdagg ]);};};_bead :=make (map[int ]intSet ,len (_bdbbd ));for _bccdd :=range _bdbbd {_acfb :=_bdbbd .connections (_dgfbb ,_bccdd );if len (_acfb )> 0{_bead [_bccdd ]=_acfb ;
};};if _adce {_fb .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064",len (_bead ));for _ ,_efcg :=range _cdgaf (_bead ){_cag .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_efcg ,_bead [_efcg ]);
};};_gbgad :=_dffge (len (_bdbbd ),func (_gdda ,_acce int )bool {_bdbff ,_fcdcc :=len (_bead [_gdda ]),len (_bead [_acce ]);if _bdbff !=_fcdcc {return _bdbff > _fcdcc ;};return _bdbbd .comp (_gdda ,_acce );});if _adce {_fb .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_gbgad );
};_gadc :=[][]int {{_gbgad [0]}};_caceg :for _ ,_abbc :=range _gbgad [1:]{for _ebee ,_gaed :=range _gadc {for _ ,_gfceb :=range _gaed {if _bead [_gfceb ].has (_abbc ){_gadc [_ebee ]=append (_gaed ,_abbc );continue _caceg ;};};};_gadc =append (_gadc ,[]int {_abbc });
};if _adce {_fb .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_gadc );};_af .SliceStable (_gadc ,func (_eadf ,_bfcc int )bool {return len (_gadc [_eadf ])> len (_gadc [_bfcc ])});for _ ,_egga :=range _gadc {_af .Slice (_egga ,func (_gdcbg ,_abfb int )bool {return _bdbbd .comp (_egga [_gdcbg ],_egga [_abfb ])});
};_faff :=make ([]rulingList ,len (_gadc ));for _gece ,_fbc :=range _gadc {_dafg :=make (rulingList ,len (_fbc ));for _cegfc ,_egda :=range _fbc {_dafg [_cegfc ]=_bdbbd [_egda ];};_faff [_gece ]=_dafg ;};if _adce {_fb .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076",_faff );
};var _gddbb []rulingList ;for _ ,_ccecg :=range _faff {if _aggd ,_bacb :=_ccecg .isActualGrid ();_bacb {_ccecg =_aggd ;_ccecg =_ccecg .snapToGroups ();_gddbb =append (_gddbb ,_ccecg );};};if _adce {_dfbc ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073",_gddbb );
_fb .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_faff ),len (_gddbb ));};return _gddbb ;};func _cagd (_dadb float64 ,_defee int )int {if _defee ==0{_defee =1;
};_badd :=float64 (_defee );return int (_gc .Round (_dadb /_badd )*_badd );};func (_facc rulingList )vertsHorzs ()(rulingList ,rulingList ){var _cgbcg ,_cfafb rulingList ;for _ ,_bfee :=range _facc {switch _bfee ._agcgg {case _aafafg :_cgbcg =append (_cgbcg ,_bfee );
case _eccgd :_cfafb =append (_cfafb ,_bfee );};};return _cgbcg ,_cfafb ;};func (_aeg *textObject )getFontDict (_fbgfe string )(_bcbee _gdd .PdfObject ,_eacd error ){_ecd :=_aeg ._fgf ;if _ecd ==nil {_fb .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_fbgfe );
return nil ,nil ;};_bcbee ,_ggad :=_ecd .GetFontByName (_gdd .PdfObjectName (_fbgfe ));if !_ggad {_fb .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_fbgfe );
return nil ,_d .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _bcbee ,nil ;};func (_abgfd *textPara )bbox ()_ac .PdfRectangle {return _abgfd .PdfRectangle };func (_fggge rulingList )asTiling ()gridTiling {if _bdea {_fb .Log .Info ("r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_fggge ));
};for _cacc ,_ddgce :=range _fggge [1:]{_fgcd :=_fggge [_cacc ];if _fgcd .alignsPrimary (_ddgce )&&_fgcd .alignsSec (_ddgce ){_fb .Log .Error ("a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073",_ddgce ,_fgcd );
};};_fggge .sortStrict ();_fggge .log ("\u0073n\u0061\u0070\u0070\u0065\u0064");_aeccd ,_cddgg :=_fggge .vertsHorzs ();_ggbaa :=_aeccd .primaries ();_gacf :=_cddgg .primaries ();_dbdc :=len (_ggbaa )-1;_ffdd :=len (_gacf )-1;if _dbdc ==0||_ffdd ==0{return gridTiling {};
};_fgfa :=_ac .PdfRectangle {Llx :_ggbaa [0],Urx :_ggbaa [_dbdc ],Lly :_gacf [0],Ury :_gacf [_ffdd ]};if _bdea {_fb .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064",len (_aeccd ));
for _acgaf ,_gafca :=range _aeccd {_cag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_acgaf ,_gafca );};_fb .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064",len (_cddgg ));
for _cbbff ,_edff :=range _cddgg {_cag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cbbff ,_edff );};_fb .Log .Info ("\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f",_dbdc ,_ffdd ,_ggbaa ,_gacf );
};_gbde :=make ([]gridTile ,_dbdc *_ffdd );for _fgbgbe :=_ffdd -1;_fgbgbe >=0;_fgbgbe --{_gagcd :=_gacf [_fgbgbe ];_gdgg :=_gacf [_fgbgbe +1];for _bfed :=0;_bfed < _dbdc ;_bfed ++{_fedaa :=_ggbaa [_bfed ];_bfeef :=_ggbaa [_bfed +1];_gbgc :=_aeccd .findPrimSec (_fedaa ,_gagcd );
_fbec :=_aeccd .findPrimSec (_bfeef ,_gagcd );_geaa :=_cddgg .findPrimSec (_gagcd ,_fedaa );_cefg :=_cddgg .findPrimSec (_gdgg ,_fedaa );_gafad :=_ac .PdfRectangle {Llx :_fedaa ,Urx :_bfeef ,Lly :_gagcd ,Ury :_gdgg };_bgegd :=_gcgf (_gafad ,_gbgc ,_fbec ,_geaa ,_cefg );
_gbde [_fgbgbe *_dbdc +_bfed ]=_bgegd ;if _bdea {_cag .Printf ("\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_bfed ,_fgbgbe ,_bgegd .String (),_bgegd .Width (),_bgegd .Height ());
};};};if _bdea {_fb .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_fgfa );
};_accd :=make ([]map[float64 ]gridTile ,_ffdd );for _adec :=_ffdd -1;_adec >=0;_adec --{if _bdea {_cag .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_adec );};_accd [_adec ]=make (map[float64 ]gridTile ,_dbdc );for _edecd :=0;_edecd < _dbdc ;
_edecd ++{_ggff :=_gbde [_adec *_dbdc +_edecd ];if _bdea {_cag .Printf ("\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_edecd ,_ggff );};if !_ggff ._fcgbfd {continue ;};_fcbbg :=_edecd ;for _ebceb :=_edecd +1;!_ggff ._fgdd &&_ebceb < _dbdc ;
_ebceb ++{_cgff :=_gbde [_adec *_dbdc +_ebceb ];_ggff .Urx =_cgff .Urx ;_ggff ._gbac =_ggff ._gbac ||_cgff ._gbac ;_ggff ._cccb =_ggff ._cccb ||_cgff ._cccb ;_ggff ._fgdd =_cgff ._fgdd ;if _bdea {_cag .Printf ("\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a",_ebceb ,_cgff ,_ggff );
};_fcbbg =_ebceb ;};if _bdea {_cag .Printf (" \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n",_edecd ,_fcbbg ,_ggff );};_edecd =_fcbbg ;_accd [_adec ][_ggff .Llx ]=_ggff ;};};_afee :=make (map[float64 ]map[float64 ]gridTile ,_ffdd );
_ggbcc :=make (map[float64 ]map[float64 ]struct{},_ffdd );for _fbfge :=_ffdd -1;_fbfge >=0;_fbfge --{_abacf :=_gbde [_fbfge *_dbdc ].Lly ;_afee [_abacf ]=make (map[float64 ]gridTile ,_dbdc );_ggbcc [_abacf ]=make (map[float64 ]struct{},_dbdc );};if _bdea {_fb .Log .Info ("\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_fgfa );
};for _bffa :=_ffdd -1;_bffa >=0;_bffa --{_abace :=_gbde [_bffa *_dbdc ].Lly ;_agdag :=_accd [_bffa ];if _bdea {_cag .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_bffa );};for _ ,_bfag :=range _aggbbd (_agdag ){if _ ,_ccca :=_ggbcc [_abace ][_bfag ];
_ccca {continue ;};_fgabd :=_agdag [_bfag ];if _bdea {_cag .Printf (" \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_fgabd .String ());};for _ggfa :=_bffa -1;_ggfa >=0;_ggfa --{if _fgabd ._cccb {break ;};_ffdf :=_accd [_ggfa ];_dggc ,_decg :=_ffdf [_bfag ];
if !_decg {break ;};if _dggc .Urx !=_fgabd .Urx {break ;};_fgabd ._cccb =_dggc ._cccb ;_fgabd .Lly =_dggc .Lly ;if _bdea {_cag .Printf ("\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_dggc .String (),_fgabd .String ());
};_ggbcc [_dggc .Lly ][_dggc .Llx ]=struct{}{};};if _bffa ==0{_fgabd ._cccb =true ;};if _fgabd .complete (){_afee [_abace ][_bfag ]=_fgabd ;};};};_dcgf :=gridTiling {PdfRectangle :_fgfa ,_egfga :_dccd (_afee ),_ebbb :_afbfb (_afee ),_cgdc :_afee };_dcgf .log ("\u0043r\u0065\u0061\u0074\u0065\u0064");
return _dcgf ;};
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
func (_gdab *PageText )ApplyArea (bbox _ac .PdfRectangle ){_gccb :=make ([]*textMark ,0,len (_gdab ._aged ));for _ ,_gcec :=range _gdab ._aged {if _ecde (_gcec .bbox (),bbox ){_gccb =append (_gccb ,_gcec );};};var _gbdf paraList ;_dgbc :=len (_gccb );for _gedb :=0;
_gedb < 360&&_dgbc > 0;_gedb +=90{_aga :=make ([]*textMark ,0,len (_gccb )-_dgbc );for _ ,_edda :=range _gccb {if _edda ._adbf ==_gedb {_aga =append (_aga ,_edda );};};if len (_aga )> 0{_dfag :=_dgg (_aga ,_gdab ._abea ,nil ,nil );_gbdf =append (_gbdf ,_dfag ...);
_dgbc -=len (_aga );};};_bbbee :=new (_e .Buffer );_gbdf .writeText (_bbbee );_gdab ._cged =_bbbee .String ();_gdab ._gccf =_gbdf .toTextMarks ();_gdab ._dcgg =_gbdf .tables ();};type gridTile struct{_ac .PdfRectangle ;_gbac ,_fcgbfd ,_cccb ,_fgdd bool ;
};func (_afcc rulingList )primMinMax ()(float64 ,float64 ){_cgbd ,_dadaa :=_afcc [0]._edcba ,_afcc [0]._edcba ;for _ ,_cbec :=range _afcc [1:]{if _cbec ._edcba < _cgbd {_cgbd =_cbec ._edcba ;}else if _cbec ._edcba > _dadaa {_dadaa =_cbec ._edcba ;};};return _cgbd ,_dadaa ;
};
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// String returns a human readable description of `ss`.
func (_bgb *shapesState )String ()string {return _cag .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_bgb ._fegc ),_bgb ._dcef );};type textWord struct{_ac .PdfRectangle ;
_fgbda float64 ;_gebf string ;_ggabc []*textMark ;_abeg float64 ;_gcefe bool ;};
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
func (_bb *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_cbf :=&imageExtractContext {_ga :options };_cdg :=_cbf .extractContentStreamImages (_bb ._gg ,_bb ._cb );if _cdg !=nil {return nil ,_cdg ;};return &PageImages {Images :_cbf ._caa },nil ;
};const (RenderModeStroke RenderMode =1<<iota ;RenderModeFill ;RenderModeClip ;);type markKind int ;func _agad (_ddfb _ac .PdfRectangle ,_egaf bounded )float64 {return _ddfb .Ury -_egaf .bbox ().Lly };func _ecde (_dacg ,_gec _ac .PdfRectangle )bool {return _bcbef (_dacg ,_gec )&&_aecg (_dacg ,_gec )};
func (_abgb compositeCell )hasLines (_debb []*textLine )bool {for _cfcd ,_agca :=range _debb {_bded :=_ecde (_abgb .PdfRectangle ,_agca .PdfRectangle );if _agede {_cag .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a",_bded ,_cfcd ,len (_debb ));
_cag .Printf ("\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a",_abgb );_cag .Printf ("\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a",_agca );};if _bded {return true ;
};};return false ;};func (_gag paraList )writeText (_bfea _c .Writer ){for _fbbb ,_bdfc :=range _gag {if _bdfc ._affbf {continue ;};_bdfc .writeText (_bfea );if _fbbb !=len (_gag )-1{if _abae (_bdfc ,_gag [_fbbb +1]){_bfea .Write ([]byte ("\u0020"));}else {_bfea .Write ([]byte ("\u000a"));
_bfea .Write ([]byte ("\u000a"));};};};_bfea .Write ([]byte ("\u000a"));_bfea .Write ([]byte ("\u000a"));};func (_cbca *textPara )taken ()bool {return _cbca ==nil ||_cbca ._cfeff };func (_bedgb *textTable )isExportable ()bool {if _bedgb ._ebabc {return true ;
};_beec :=func (_cbbfc int )bool {_bedc :=_bedgb .get (0,_cbbfc );if _bedc ==nil {return false ;};_abda :=_bedc .text ();_gcfe :=_g .RuneCountInString (_abda );_agebc :=_gbfad .MatchString (_abda );return _gcfe <=1||_agebc ;};for _dadce :=0;_dadce < _bedgb ._dcbdf ;
_dadce ++{if !_beec (_dadce ){return true ;};};return false ;};func (_fge *wordBag )getDepthIdx (_fbe float64 )int {_gdcg :=_fge .depthIndexes ();_edcb :=_agfb (_fbe );if _edcb < _gdcg [0]{return _gdcg [0];};if _edcb > _gdcg [len (_gdcg )-1]{return _gdcg [len (_gdcg )-1];
};return _edcb ;};func _aggbbd (_bfdd map[float64 ]gridTile )[]float64 {_aecga :=make ([]float64 ,0,len (_bfdd ));for _gdecc :=range _bfdd {_aecga =append (_aecga ,_gdecc );};_af .Float64s (_aecga );return _aecga ;};const _daca =10;func (_feda *shapesState )quadraticTo (_gbb ,_feaf ,_feff ,_bcfe float64 ){if _efca {_fb .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a");
};_feda .addPoint (_feff ,_bcfe );};func (_fdfg *wordBag )sort (){for _ ,_gcaf :=range _fdfg ._cfeeb {_af .Slice (_gcaf ,func (_ceaf ,_gdecg int )bool {return _cafa (_gcaf [_ceaf ],_gcaf [_gdecg ])< 0});};};func (_cgefa paraList )topoOrder ()[]int {if _dfgc {_fb .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");
};_geb :=len (_cgefa );_gbfcb :=make ([]bool ,_geb );_gdcb :=make ([]int ,0,_geb );_ggba :=_cgefa .llyOrdering ();var _bdeac func (_aeae int );_bdeac =func (_bgcc int ){_gbfcb [_bgcc ]=true ;for _degf :=0;_degf < _geb ;_degf ++{if !_gbfcb [_degf ]{if _cgefa .readBefore (_ggba ,_bgcc ,_degf ){_bdeac (_degf );
};};};_gdcb =append (_gdcb ,_bgcc );};for _fgac :=0;_fgac < _geb ;_fgac ++{if !_gbfcb [_fgac ]{_bdeac (_fgac );};};return _ffcc (_gdcb );};type stateStack []*textState ;func (_eeaa *wordBag )highestWord (_caacg int ,_gdbe ,_bfef float64 )*textWord {for _ ,_fbf :=range _eeaa ._cfeeb [_caacg ]{if _gdbe <=_fbf ._fgbda &&_fbf ._fgbda <=_bfef {return _fbf ;
};};return nil ;};func (_dfaf lineRuling )xMean ()float64 {return 0.5*(_dfaf ._gffd .X +_dfaf ._eeebc .X )};
2022-02-05 21:34:53 +00:00
// String returns a description of `l`.
2022-03-13 12:41:53 +00:00
func (_aaff *textLine )String ()string {return _cag .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_aaff ._bcgf ,_aaff .PdfRectangle ,_aaff ._eabc ,_aaff .text ());
};func (_cebec rulingList )splitSec ()[]rulingList {_af .Slice (_cebec ,func (_gfdd ,_bbad int )bool {_daed ,_geaaa :=_cebec [_gfdd ],_cebec [_bbad ];if _daed ._bfeag !=_geaaa ._bfeag {return _daed ._bfeag < _geaaa ._bfeag ;};return _daed ._cbba < _geaaa ._cbba ;
});_affbb :=make (map[*ruling ]struct{},len (_cebec ));_fbbc :=func (_cbdgg *ruling )rulingList {_adaee :=rulingList {_cbdgg };_affbb [_cbdgg ]=struct{}{};for _ ,_ggefg :=range _cebec {if _ ,_cafae :=_affbb [_ggefg ];_cafae {continue ;};for _ ,_bfeac :=range _adaee {if _ggefg .alignsSec (_bfeac ){_adaee =append (_adaee ,_ggefg );
_affbb [_ggefg ]=struct{}{};break ;};};};return _adaee ;};_gfac :=[]rulingList {_fbbc (_cebec [0])};for _ ,_bgdcg :=range _cebec [1:]{if _ ,_cegbg :=_affbb [_bgdcg ];_cegbg {continue ;};_gfac =append (_gfac ,_fbbc (_bgdcg ));};return _gfac ;};func _bafc (_fgbg []TextMark ,_cbfa *int ,_agec TextMark )[]TextMark {_agec .Offset =*_cbfa ;
_fgbg =append (_fgbg ,_agec );*_cbfa +=len (_agec .Text );return _fgbg ;};func (_abff *subpath )clear (){*_abff =subpath {}};type textMark struct{_ac .PdfRectangle ;_adbf int ;_cadaf string ;_bcfd string ;_afac *_ac .PdfFont ;_gba float64 ;_dcabg float64 ;
_ccbb _afd .Matrix ;_cgfb _afd .Point ;_bagc _ac .PdfRectangle ;_bfc _ca .Color ;_bdba _ca .Color ;};func (_deb *wordBag )allWords ()[]*textWord {var _fabb []*textWord ;for _ ,_fbfb :=range _deb ._cfeeb {_fabb =append (_fabb ,_fbfb ...);};return _fabb ;
};func (_ffece *textWord )bbox ()_ac .PdfRectangle {return _ffece .PdfRectangle };func (_afdg *wordBag )minDepth ()float64 {return _afdg ._egbb -(_afdg .Ury -_afdg ._aege )};func (_aadb rulingList )secMinMax ()(float64 ,float64 ){_bcaa ,_ffb :=_aadb [0]._bfeag ,_aadb [0]._cbba ;
for _ ,_gdcc :=range _aadb [1:]{if _gdcc ._bfeag < _bcaa {_bcaa =_gdcc ._bfeag ;};if _gdcc ._cbba > _ffb {_ffb =_gdcc ._cbba ;};};return _bcaa ,_ffb ;};func (_edea *textObject )moveLP (_bedg ,_gbed float64 ){_edea ._bab .Concat (_afd .NewMatrix (1,0,0,1,_bedg ,_gbed ));
_edea ._bgc =_edea ._bab ;};func (_abbec rectRuling )checkWidth (_faaf ,_cgdgg float64 )(float64 ,bool ){_edad :=_cgdgg -_faaf ;_fegb :=_edad <=_gbbd ;return _edad ,_fegb ;};
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// Elements returns the TextMarks in `ma`.
func (_edbb *TextMarkArray )Elements ()[]TextMark {return _edbb ._dgf };func (_fcg *stateStack )pop ()*textState {if _fcg .empty (){return nil ;};_ffd :=*(*_fcg )[len (*_fcg )-1];*_fcg =(*_fcg )[:len (*_fcg )-1];return &_ffd ;};func _cggg (_bcae ,_aedbg _afd .Point )bool {_ddfg :=_gc .Abs (_bcae .X -_aedbg .X );
_fagb :=_gc .Abs (_bcae .Y -_aedbg .Y );return _dgbd (_fagb ,_ddfg );};func _abae (_cdff ,_gcgc *textPara )bool {if _cdff ._affbf ||_gcgc ._affbf {return true ;};return _gcceb (_cdff .depth ()-_gcgc .depth ());};func (_edcbda *textPara )toCellTextMarks (_cbbg *int )[]TextMark {var _fbdg []TextMark ;
for _bggc ,_dddgg :=range _edcbda ._ddeb {_gacabg :=_dddgg .toTextMarks (_cbbg );_abbf :=_ecgaa &&_dddgg .endsInHyphen ()&&_bggc !=len (_edcbda ._ddeb )-1;if _abbf {_gacabg =_fafc (_gacabg ,_cbbg );};_fbdg =append (_fbdg ,_gacabg ...);if !(_abbf ||_bggc ==len (_edcbda ._ddeb )-1){_fbdg =_bafe (_fbdg ,_cbbg ,_aaee (_dddgg ._bcgf ,_edcbda ._ddeb [_bggc +1]._bcgf ));
};};return _fbdg ;};func (_agba *textMark )inDiacriticArea (_ffge *textMark )bool {_dae :=_agba .Llx -_ffge .Llx ;_decb :=_agba .Urx -_ffge .Urx ;_fdbf :=_agba .Lly -_ffge .Lly ;return _gc .Abs (_dae +_decb )< _agba .Width ()*_ffcd &&_gc .Abs (_fdbf )< _agba .Height ()*_ffcd ;
};func _aecc (_fcfe []rulingList )(rulingList ,rulingList ){var _eggd rulingList ;for _ ,_ageg :=range _fcfe {_eggd =append (_eggd ,_ageg ...);};return _eggd .vertsHorzs ();};func (_gafe *textLine )toTextMarks (_bfgd *int )[]TextMark {var _cfed []TextMark ;
for _ ,_ebgde :=range _gafe ._cadc {if _ebgde ._gcefe {_cfed =_bafe (_cfed ,_bfgd ,"\u0020");};_geab :=_ebgde .toTextMarks (_bfgd );_cfed =append (_cfed ,_geab ...);};return _cfed ;};
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// ToTextMark returns the public view of `tm`.
func (_dbgd *textMark )ToTextMark ()TextMark {return TextMark {Text :_dbgd ._cadaf ,Original :_dbgd ._bcfd ,BBox :_dbgd ._bagc ,Font :_dbgd ._afac ,FontSize :_dbgd ._gba ,FillColor :_dbgd ._bfc ,StrokeColor :_dbgd ._bdba ,Orientation :_dbgd ._adbf };};
func _gff (_adc func (*wordBag ,*textWord ,float64 )bool ,_gbbag float64 )func (*wordBag ,*textWord )bool {return func (_ecdd *wordBag ,_dabf *textWord )bool {return _adc (_ecdd ,_dabf ,_gbbag )};};func (_cga *imageExtractContext )extractContentStreamImages (_ggg string ,_ebaf *_ac .PdfPageResources )error {_bd :=_ag .NewContentStreamParser (_ggg );
_ee ,_cfe :=_bd .Parse ();if _cfe !=nil {return _cfe ;};if _cga ._gfb ==nil {_cga ._gfb =map[*_gdd .PdfObjectStream ]*cachedImage {};};if _cga ._ga ==nil {_cga ._ga =&ImageExtractOptions {};};_aec :=_ag .NewContentStreamProcessor (*_ee );_aec .AddHandler (_ag .HandlerConditionEnumAllOperands ,"",_cga .processOperand );
return _aec .Process (_ebaf );};func (_agff intSet )add (_eggbc int ){_agff [_eggbc ]=struct{}{}};func _cgfe (_cgea ,_bbgg float64 )bool {return _gc .Abs (_cgea -_bbgg )<=_bade };func _ecdfc (_gcfcg ,_cdbb ,_bacg ,_caacf *textPara )*textTable {_affc :=&textTable {_agac :2,_dcbdf :2,_abccf :make (map[uint64 ]*textPara ,4)};
_affc .put (0,0,_gcfcg );_affc .put (1,0,_cdbb );_affc .put (0,1,_bacg );_affc .put (1,1,_caacf );return _affc ;};func (_caabc gridTile )complete ()bool {return _caabc .numBorders ()==4};func _gfe (_dcc _ac .PdfRectangle )textState {return textState {_bgg :100,_acgdg :RenderModeFill ,_beb :_dcc };
};func (_accbc rulingList )intersections ()map[int ]intSet {var _gffa ,_cgbeg []int ;for _decbg ,_gaec :=range _accbc {switch _gaec ._agcgg {case _aafafg :_gffa =append (_gffa ,_decbg );case _eccgd :_cgbeg =append (_cgbeg ,_decbg );};};if len (_gffa )< _fee +1||len (_cgbeg )< _edcf +1{return nil ;
};if len (_gffa )+len (_cgbeg )> _edaeb {_fb .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_accbc ),len (_gffa ),len (_cgbeg ));
return nil ;};_bggdd :=make (map[int ]intSet ,len (_gffa )+len (_cgbeg ));for _ ,_cbad :=range _gffa {for _ ,_fceb :=range _cgbeg {if _accbc [_cbad ].intersects (_accbc [_fceb ]){if _ ,_cdbaf :=_bggdd [_cbad ];!_cdbaf {_bggdd [_cbad ]=make (intSet );};
if _ ,_gcd :=_bggdd [_fceb ];!_gcd {_bggdd [_fceb ]=make (intSet );};_bggdd [_cbad ].add (_fceb );_bggdd [_fceb ].add (_cbad );};};};return _bggdd ;};type subpath struct{_egd []_afd .Point ;_ega bool ;};func (_cgfc paraList )applyTables (_gfbd []*textTable )paraList {var _acede paraList ;
for _ ,_ddeg :=range _gfbd {_acede =append (_acede ,_ddeg .newTablePara ());};for _ ,_cfedf :=range _cgfc {if _cfedf ._cfeff {continue ;};_acede =append (_acede ,_cfedf );};return _acede ;};func (_bdc *wordBag )removeDuplicates (){if _ecfa {_fb .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_bdc .text ());
};for _ ,_fbffd :=range _bdc .depthIndexes (){if len (_bdc ._cfeeb [_fbffd ])==0{continue ;};_adaf :=_bdc ._cfeeb [_fbffd ][0];_dbgae :=_bbce *_adaf ._abeg ;_addc :=_adaf ._fgbda ;for _ ,_cdad :=range _bdc .depthBand (_addc ,_addc +_dbgae ){_cffa :=map[*textWord ]struct{}{};
_fbda :=_bdc ._cfeeb [_cdad ];for _ ,_edccad :=range _fbda {if _ ,_gcac :=_cffa [_edccad ];_gcac {continue ;};for _ ,_ddba :=range _fbda {if _ ,_efba :=_cffa [_ddba ];_efba {continue ;};if _ddba !=_edccad &&_ddba ._gebf ==_edccad ._gebf &&_gc .Abs (_ddba .Llx -_edccad .Llx )< _dbgae &&_gc .Abs (_ddba .Urx -_edccad .Urx )< _dbgae &&_gc .Abs (_ddba .Lly -_edccad .Lly )< _dbgae &&_gc .Abs (_ddba .Ury -_edccad .Ury )< _dbgae {_cffa [_ddba ]=struct{}{};
};};};if len (_cffa )> 0{_bgeg :=0;for _ ,_ccbd :=range _fbda {if _ ,_gae :=_cffa [_ccbd ];!_gae {_fbda [_bgeg ]=_ccbd ;_bgeg ++;};};_bdc ._cfeeb [_cdad ]=_fbda [:len (_fbda )-len (_cffa )];if len (_bdc ._cfeeb [_cdad ])==0{delete (_bdc ._cfeeb ,_cdad );
};};};};};func (_agfbe *textTable )compositeRowCorridors ()map[int ][]float64 {_bacd :=make (map[int ][]float64 ,_agfbe ._dcbdf );if _agede {_fb .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_agfbe ._dcbdf );
};for _dadbd :=1;_dadbd < _agfbe ._dcbdf ;_dadbd ++{var _efcb []compositeCell ;for _aagb :=0;_aagb < _agfbe ._agac ;_aagb ++{if _bgcb ,_dage :=_agfbe ._cead [_addg (_aagb ,_dadbd )];_dage {_efcb =append (_efcb ,_bgcb );};};if len (_efcb )==0{continue ;
};_aagfa :=_dedd (_efcb );_bacd [_dadbd ]=_aagfa ;if _agede {_cag .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_dadbd ,_aagfa );};};return _bacd ;};func (_cgfa *textObject )getFillColor ()_ca .Color {return _aecce (_cgfa ._gbc .ColorspaceNonStroking ,_cgfa ._gbc .ColorNonStroking );
};func _efae (_faab bounded )float64 {return -_faab .bbox ().Lly };func _addg (_cbag ,_cdeca int )uint64 {return uint64 (_cbag )*0x1000000+uint64 (_cdeca )};func _dgba (_dgfb _ac .PdfRectangle )*ruling {return &ruling {_agcgg :_eccgd ,_edcba :_dgfb .Lly ,_bfeag :_dgfb .Llx ,_cbba :_dgfb .Urx };
};func (_egbg paraList )reorder (_cggd []int ){_gdfa :=make (paraList ,len (_egbg ));for _gcbe ,_afdgf :=range _cggd {_gdfa [_gcbe ]=_egbg [_afdgf ];};copy (_egbg ,_gdfa );};func (_dgbb *shapesState )closePath (){if _dgbb ._dcef {_dgbb ._fegc =append (_dgbb ._fegc ,_aab (_dgbb ._dffc ));
_dgbb ._dcef =false ;}else if len (_dgbb ._fegc )==0{if _efca {_fb .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");};_dgbb ._dcef =false ;return ;};_dgbb ._fegc [len (_dgbb ._fegc )-1].close ();
if _efca {_fb .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_dgbb );};};func (_fcaa *textObject )getStrokeColor ()_ca .Color {return _aecce (_fcaa ._gbc .ColorspaceStroking ,_fcaa ._gbc .ColorStroking );};
type lineRuling struct{_fbab rulingKind ;_ebbe markKind ;_ca .Color ;_gffd ,_eeebc _afd .Point ;};func (_dbc *textObject )moveTextSetLeading (_aaf ,_bdb float64 ){_dbc ._fga ._dgc =-_bdb ;_dbc .moveLP (_aaf ,_bdb );};func (_gdac *wordBag )firstReadingIndex (_cded int )int {_gcff :=_gdac .firstWord (_cded )._abeg ;
_daac :=float64 (_cded +1)*_eddf ;_ddga :=_daac +_eaeg *_gcff ;_bea :=_cded ;for _ ,_bfefc :=range _gdac .depthBand (_daac ,_ddga ){if _cafa (_gdac .firstWord (_bfefc ),_gdac .firstWord (_bea ))< 0{_bea =_bfefc ;};};return _bea ;};
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// String returns a human readable description of `path`.
func (_befe *subpath )String ()string {_gfae :=_befe ._egd ;_bccf :=len (_gfae );if _bccf <=5{return _cag .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_bccf ,_gfae );};return _cag .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_bccf ,_gfae [0],_gfae [1],_gfae [_bccf -1]);
};func (_aacg compositeCell )split (_bdfd ,_fafa []float64 )*textTable {_geedd :=len (_bdfd )+1;_cbgae :=len (_fafa )+1;if _agede {_fb .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066",_cbgae ,_geedd ,_aacg ,_bdfd ,_fafa );
_cag .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a",len (_aacg .paraList ));for _fdgag ,_gbcg :=range _aacg .paraList {_cag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fdgag ,_gbcg .String ());
};_cag .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",len (_aacg .lines ()));for _aafaf ,_cfd :=range _aacg .lines (){_cag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_aafaf ,_cfd );};};_bdfd =_gagd (_bdfd ,_aacg .Ury ,_aacg .Lly );
_fafa =_gagd (_fafa ,_aacg .Llx ,_aacg .Urx );_agfd :=make (map[uint64 ]*textPara ,_cbgae *_geedd );_dacf :=textTable {_agac :_cbgae ,_dcbdf :_geedd ,_abccf :_agfd };_acgfg :=_aacg .paraList ;_af .Slice (_acgfg ,func (_fefcg ,_adee int )bool {_gdbeg ,_ffgc :=_acgfg [_fefcg ],_acgfg [_adee ];
_cbdc ,_adad :=_gdbeg .Lly ,_ffgc .Lly ;if _cbdc !=_adad {return _cbdc < _adad ;};return _gdbeg .Llx < _ffgc .Llx ;});_edag :=make (map[uint64 ]_ac .PdfRectangle ,_cbgae *_geedd );for _aafdf ,_abga :=range _bdfd [1:]{_abfc :=_bdfd [_aafdf ];for _bcfdf ,_gdca :=range _fafa [1:]{_fbag :=_fafa [_bcfdf ];
_edag [_addg (_bcfdf ,_aafdf )]=_ac .PdfRectangle {Llx :_fbag ,Urx :_gdca ,Lly :_abga ,Ury :_abfc };};};if _agede {_fb .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073");
_cag .Printf ("\u0020\u0020\u0020\u0020");for _eeae :=0;_eeae < _cbgae ;_eeae ++{_cag .Printf ("\u0025\u0033\u0030\u0064\u002c\u0020",_eeae );};_cag .Println ();for _aagf :=0;_aagf < _geedd ;_aagf ++{_cag .Printf ("\u0020\u0020\u0025\u0032\u0064\u003a",_aagf );
for _agbca :=0;_agbca < _cbgae ;_agbca ++{_cag .Printf ("\u00256\u002e\u0032\u0066\u002c\u0020",_edag [_addg (_agbca ,_aagf )]);};_cag .Println ();};};_cffaf :=func (_acgac *textLine )(int ,int ){for _edfa :=0;_edfa < _geedd ;_edfa ++{for _defeeb :=0;_defeeb < _cbgae ;
_defeeb ++{if _gfaf (_edag [_addg (_defeeb ,_edfa )],_acgac .PdfRectangle ){return _defeeb ,_edfa ;};};};return -1,-1;};_afec :=make (map[uint64 ][]*textLine ,_cbgae *_geedd );for _ ,_dbe :=range _acgfg .lines (){_fcbdc ,_ceec :=_cffaf (_dbe );if _fcbdc < 0{continue ;
};_afec [_addg (_fcbdc ,_ceec )]=append (_afec [_addg (_fcbdc ,_ceec )],_dbe );};for _agee :=0;_agee < len (_bdfd )-1;_agee ++{_ffed :=_bdfd [_agee ];_aafb :=_bdfd [_agee +1];for _cgcd :=0;_cgcd < len (_fafa )-1;_cgcd ++{_cabg :=_fafa [_cgcd ];_fdfe :=_fafa [_cgcd +1];
_eedb :=_ac .PdfRectangle {Llx :_cabg ,Urx :_fdfe ,Lly :_aafb ,Ury :_ffed };_cdfc :=_afec [_addg (_cgcd ,_agee )];if len (_cdfc )==0{continue ;};_bdfa :=_efagec (_eedb ,_cdfc );_dacf .put (_cgcd ,_agee ,_bdfa );};};return &_dacf ;};
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
type ImageMark struct{Image *_ac .Image ;
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// Dimensions of the image as displayed in the PDF.
Width float64 ;Height float64 ;
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// Position of the image in PDF coordinates (lower left corner).
X float64 ;Y float64 ;
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// Angle in degrees, if rotated.
Angle float64 ;};func (_efgd *ruling )alignsSec (_ecfbg *ruling )bool {const _bdcc =_gbbd +1.0;return _efgd ._bfeag -_bdcc <=_ecfbg ._cbba &&_ecfbg ._bfeag -_bdcc <=_efgd ._cbba ;};func (_dada *subpath )removeDuplicates (){if len (_dada ._egd )==0{return ;
};_ggbc :=[]_afd .Point {_dada ._egd [0]};for _ ,_gced :=range _dada ._egd [1:]{if !_cdbg (_gced ,_ggbc [len (_ggbc )-1]){_ggbc =append (_ggbc ,_gced );};};_dada ._egd =_ggbc ;};
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// String returns a string describing `pt`.
func (_gafc PageText )String ()string {_eabf :=_cag .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_gafc ._aged ));_fcgb :=[]string {"\u002d"+_eabf };for _ ,_ddf :=range _gafc ._aged {_fcgb =append (_fcgb ,_ddf .String ());
};_fcgb =append (_fcgb ,"\u002b"+_eabf );return _cf .Join (_fcgb ,"\u000a");};func (_ceeb paraList )computeEBBoxes (){if _dccc {_fb .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a");};for _ ,_bccd :=range _ceeb {_bccd ._gbaa =_bccd .PdfRectangle ;
};_facec :=_ceeb .yNeighbours (0);for _cafe ,_gfff :=range _ceeb {_acde :=_gfff ._gbaa ;_gdgb ,_egad :=-1.0e9,+1.0e9;for _ ,_afdd :=range _facec [_gfff ]{_ggbe :=_ceeb [_afdd ]._gbaa ;if _ggbe .Urx < _acde .Llx {_gdgb =_gc .Max (_gdgb ,_ggbe .Urx );}else if _acde .Urx < _ggbe .Llx {_egad =_gc .Min (_egad ,_ggbe .Llx );
};};for _fgec ,_edcca :=range _ceeb {_dgad :=_edcca ._gbaa ;if _cafe ==_fgec ||_dgad .Ury > _acde .Lly {continue ;};if _gdgb <=_dgad .Llx &&_dgad .Llx < _acde .Llx {_acde .Llx =_dgad .Llx ;}else if _dgad .Urx <=_egad &&_acde .Urx < _dgad .Urx {_acde .Urx =_dgad .Urx ;
};};if _dccc {_cag .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_cafe ,_gfff ._gbaa ,_acde ,_bgdca (_gfff .text (),50));};_gfff ._gbaa =_acde ;};if _bfae {for _ ,_fdcc :=range _ceeb {_fdcc .PdfRectangle =_fdcc ._gbaa ;
};};};func _dgfaa (_abgf ,_gdgf bounded )float64 {return _abgf .bbox ().Llx -_gdgf .bbox ().Urx };type textPara struct{_ac .PdfRectangle ;_gbaa _ac .PdfRectangle ;_ddeb []*textLine ;_ccec *textTable ;_cfeff bool ;_affbf bool ;_bddfg *textPara ;_cegf *textPara ;
_dcaba *textPara ;_aegf *textPara ;};func (_eegb *shapesState )addPoint (_bff ,_bbc float64 ){_ebgg :=_eegb .establishSubpath ();_fcca :=_eegb .devicePoint (_bff ,_bbc );if _ebgg ==nil {_eegb ._dcef =true ;_eegb ._dffc =_fcca ;}else {_ebgg .add (_fcca );
};};func (_ebgd *textLine )text ()string {var _fcgbf []string ;for _ ,_gfdc :=range _ebgd ._cadc {if _gfdc ._gcefe {_fcgbf =append (_fcgbf ,"\u0020");};_fcgbf =append (_fcgbf ,_gfdc ._gebf );};return _cf .Join (_fcgbf ,"");};func (_baaa *PageText )computeViews (){var _bgcd rulingList ;
if _ffac {_cacg :=_fbgcd (_baaa ._cagb );_bgcd =append (_bgcd ,_cacg ...);};if _gcbg {_gcfb :=_bac (_baaa ._bfa );_bgcd =append (_bgcd ,_gcfb ...);};_bgcd ,_feac :=_bgcd .toTilings ();var _dafa paraList ;_gfce :=len (_baaa ._aged );for _bfga :=0;_bfga < 360&&_gfce > 0;
_bfga +=90{_ceb :=make ([]*textMark ,0,len (_baaa ._aged )-_gfce );for _ ,_fcfa :=range _baaa ._aged {if _fcfa ._adbf ==_bfga {_ceb =append (_ceb ,_fcfa );};};if len (_ceb )> 0{_fbgc :=_dgg (_ceb ,_baaa ._abea ,_bgcd ,_feac );_dafa =append (_dafa ,_fbgc ...);
_gfce -=len (_ceb );};};_abcd :=new (_e .Buffer );_dafa .writeText (_abcd );_baaa ._cged =_abcd .String ();_baaa ._gccf =_dafa .toTextMarks ();_baaa ._dcgg =_dafa .tables ();if _agede {_fb .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_baaa ._dcgg ));
};};func (_cagg rulingList )augmentGrid ()(rulingList ,rulingList ){_gfeb ,_agecb :=_cagg .vertsHorzs ();if len (_gfeb )==0||len (_agecb )==0{return _gfeb ,_agecb ;};_caeb ,_aaad :=_gfeb ,_agecb ;_adgd :=_gfeb .bbox ();_gcbb :=_agecb .bbox ();if _adce {_fb .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066",_adgd );
_fb .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066",_gcbb );};var _dfcdf ,_daga ,_fdea ,_eabgf *ruling ;if _gcbb .Llx < _adgd .Llx -_bade {_dfcdf =&ruling {_aaec :_adbg ,_agcgg :_aafafg ,_edcba :_gcbb .Llx ,_bfeag :_adgd .Lly ,_cbba :_adgd .Ury };
_gfeb =append (rulingList {_dfcdf },_gfeb ...);};if _gcbb .Urx > _adgd .Urx +_bade {_daga =&ruling {_aaec :_adbg ,_agcgg :_aafafg ,_edcba :_gcbb .Urx ,_bfeag :_adgd .Lly ,_cbba :_adgd .Ury };_gfeb =append (_gfeb ,_daga );};if _adgd .Lly < _gcbb .Lly -_bade {_fdea =&ruling {_aaec :_adbg ,_agcgg :_eccgd ,_edcba :_adgd .Lly ,_bfeag :_gcbb .Llx ,_cbba :_gcbb .Urx };
_agecb =append (rulingList {_fdea },_agecb ...);};if _adgd .Ury > _gcbb .Ury +_bade {_eabgf =&ruling {_aaec :_adbg ,_agcgg :_eccgd ,_edcba :_adgd .Ury ,_bfeag :_gcbb .Llx ,_cbba :_gcbb .Urx };_agecb =append (_agecb ,_eabgf );};if len (_gfeb )+len (_agecb )==len (_cagg ){return _caeb ,_aaad ;
};_cgbcf :=append (_gfeb ,_agecb ...);_cagg .log ("u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064");_cgbcf .log ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d");return _gfeb ,_agecb ;};type textState struct{_feba float64 ;_eag float64 ;_bgg float64 ;
_dgc float64 ;_cef float64 ;_acgdg RenderMode ;_fec float64 ;_eedd *_ac .PdfFont ;_beb _ac .PdfRectangle ;_ededf int ;_eca int ;};func (_cbfgg paraList )extractTables (_cagbe []gridTiling )paraList {if _agede {_fb .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_cbfgg ));
};if len (_cbfgg )< _eecc {return _cbfgg ;};_aced :=_cbfgg .findTables (_cagbe );if _agede {_fb .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_aced ));
for _cfcdb ,_cccge :=range _aced {_cccge .log (_cag .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_cfcdb ));};};return _cbfgg .applyTables (_aced );};
// String returns a human readable description of `vecs`.
func (_fbeg rulingList )String ()string {if len (_fbeg )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_gaea ,_bcab :=_fbeg .vertsHorzs ();_agaa :=len (_gaea );_degbc :=len (_bcab );if _agaa ==0||_degbc ==0{return _cag .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_agaa ,_degbc );
};_cbeb :=_ac .PdfRectangle {Llx :_gaea [0]._edcba ,Urx :_gaea [_agaa -1]._edcba ,Lly :_bcab [_degbc -1]._edcba ,Ury :_bcab [0]._edcba };return _cag .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_agaa ,_degbc ,_cbeb );
};
2022-02-05 21:34:53 +00:00
// String returns a string describing `ma`.
2022-03-13 12:41:53 +00:00
func (_ddb TextMarkArray )String ()string {_aafd :=len (_ddb ._dgf );if _aafd ==0{return "\u0045\u004d\u0050T\u0059";};_ecc :=_ddb ._dgf [0];_fdc :=_ddb ._dgf [_aafd -1];return _cag .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_aafd ,_ecc ,_fdc );
};func _cbdb (_gfcc ,_efgg *textPara )bool {return _bcbef (_gfcc ._gbaa ,_efgg ._gbaa )};
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents (contents string ,resources *_ac .PdfPageResources )(*Extractor ,error ){const _ba ="\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s";_gdc :=&Extractor {_gg :contents ,_cb :resources ,_gde :map[string ]fontEntry {},_agb :map[string ]textResult {}};
_fg .TrackUse (_ba );return _gdc ,nil ;};func (_dadg *wordBag )applyRemovals (_gabc map[int ]map[*textWord ]struct{}){for _fabf ,_acgb :=range _gabc {if len (_acgb )==0{continue ;};_fegdc :=_dadg ._cfeeb [_fabf ];_egbc :=len (_fegdc )-len (_acgb );if _egbc ==0{delete (_dadg ._cfeeb ,_fabf );
continue ;};_deca :=make ([]*textWord ,_egbc );_aagc :=0;for _ ,_afdf :=range _fegdc {if _ ,_abgd :=_acgb [_afdf ];!_abgd {_deca [_aagc ]=_afdf ;_aagc ++;};};_dadg ._cfeeb [_fabf ]=_deca ;};};func _dffge (_aeea int ,_aeedg func (int ,int )bool )[]int {_eaeee :=make ([]int ,_aeea );
for _fddfd :=range _eaeee {_eaeee [_fddfd ]=_fddfd ;};_af .Slice (_eaeee ,func (_fccf ,_gadgb int )bool {return _aeedg (_eaeee [_fccf ],_eaeee [_gadgb ])});return _eaeee ;};
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// Text returns the extracted page text.
func (_bcce PageText )Text ()string {return _bcce ._cged };func _eadb (_fegdg []*wordBag )[]*wordBag {if len (_fegdg )<=1{return _fegdg ;};if _addb {_fb .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");};_af .Slice (_fegdg ,func (_fffb ,_dgbf int )bool {_dbce ,_eee :=_fegdg [_fffb ],_fegdg [_dgbf ];
_dgde :=_dbce .Width ()*_dbce .Height ();_agcc :=_eee .Width ()*_eee .Height ();if _dgde !=_agcc {return _dgde > _agcc ;};if _dbce .Height ()!=_eee .Height (){return _dbce .Height ()> _eee .Height ();};return _fffb < _dgbf ;});var _cddgc []*wordBag ;_cade :=make (intSet );
for _adae :=0;_adae < len (_fegdg );_adae ++{if _cade .has (_adae ){continue ;};_fccaf :=_fegdg [_adae ];for _febe :=_adae +1;_febe < len (_fegdg );_febe ++{if _cade .has (_adae ){continue ;};_eaae :=_fegdg [_febe ];_feffg :=_fccaf .PdfRectangle ;_feffg .Llx -=_fccaf ._aege ;
if _gfaf (_feffg ,_eaae .PdfRectangle ){_fccaf .absorb (_eaae );_cade .add (_febe );};};_cddgc =append (_cddgc ,_fccaf );};if len (_fegdg )!=len (_cddgc )+len (_cade ){_fb .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_fegdg ),len (_cddgc ),len (_cade ));
};return _cddgc ;};func _cdbg (_ecfge ,_gggfe _afd .Point )bool {return _ecfge .X ==_gggfe .X &&_ecfge .Y ==_gggfe .Y };func (_deag *textTable )depth ()float64 {_bgfg :=1e10;for _ddec :=0;_ddec < _deag ._agac ;_ddec ++{_aefa :=_deag .get (_ddec ,0);if _aefa ==nil ||_aefa ._affbf {continue ;
};_bgfg =_gc .Min (_bgfg ,_aefa .depth ());};return _bgfg ;};func (_fgadb *textTable )get (_dcgae ,_afefg int )*textPara {return _fgadb ._abccf [_addg (_dcgae ,_afefg )];};func (_gdcac *textTable )getRight ()paraList {_dbaab :=make (paraList ,_gdcac ._dcbdf );
for _egfff :=0;_egfff < _gdcac ._dcbdf ;_egfff ++{_bgfa :=_gdcac .get (_gdcac ._agac -1,_egfff )._cegf ;if _bgfa .taken (){return nil ;};_dbaab [_egfff ]=_bgfa ;};for _effa :=0;_effa < _gdcac ._dcbdf -1;_effa ++{if _dbaab [_effa ]._aegf !=_dbaab [_effa +1]{return nil ;
};};return _dbaab ;};func (_fccg lineRuling )asRuling ()(*ruling ,bool ){_ggged :=ruling {_agcgg :_fccg ._fbab ,Color :_fccg .Color ,_aaec :_gagc };switch _fccg ._fbab {case _aafafg :_ggged ._edcba =_fccg .xMean ();_ggged ._bfeag =_gc .Min (_fccg ._gffd .Y ,_fccg ._eeebc .Y );
_ggged ._cbba =_gc .Max (_fccg ._gffd .Y ,_fccg ._eeebc .Y );case _eccgd :_ggged ._edcba =_fccg .yMean ();_ggged ._bfeag =_gc .Min (_fccg ._gffd .X ,_fccg ._eeebc .X );_ggged ._cbba =_gc .Max (_fccg ._gffd .X ,_fccg ._eeebc .X );default:_fb .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_fccg ._fbab );
return nil ,false ;};return &_ggged ,true ;};func (_aecf *textObject )reset (){_aecf ._bgc =_afd .IdentityMatrix ();_aecf ._bab =_afd .IdentityMatrix ();_aecf ._bece =nil ;};func (_dcba *textTable )growTable (){_eeac :=func (_dbde paraList ){_dcba ._dcbdf ++;
for _afcf :=0;_afcf < _dcba ._agac ;_afcf ++{_fecb :=_dbde [_afcf ];_dcba .put (_afcf ,_dcba ._dcbdf -1,_fecb );};};_aaeeg :=func (_gabd paraList ){_dcba ._agac ++;for _geca :=0;_geca < _dcba ._dcbdf ;_geca ++{_gcdf :=_gabd [_geca ];_dcba .put (_dcba ._agac -1,_geca ,_gcdf );
};};if _adga {_dcba .log ("\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce");};for _gebac :=0;;_gebac ++{_gbeb :=false ;_gbbaa :=_dcba .getDown ();_afffe :=_dcba .getRight ();if _adga {_cag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gebac ,_dcba );
_cag .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a",_gbbaa );_cag .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a",_afffe );};if _gbbaa !=nil &&_afffe !=nil {_ecgcb :=_gbbaa [len (_gbbaa )-1];
if !_ecgcb .taken ()&&_ecgcb ==_afffe [len (_afffe )-1]{_eeac (_gbbaa );if _afffe =_dcba .getRight ();_afffe !=nil {_aaeeg (_afffe );_dcba .put (_dcba ._agac -1,_dcba ._dcbdf -1,_ecgcb );};_gbeb =true ;};};if !_gbeb &&_gbbaa !=nil {_eeac (_gbbaa );_gbeb =true ;
};if !_gbeb &&_afffe !=nil {_aaeeg (_afffe );_gbeb =true ;};if !_gbeb {break ;};};};func _acca (_ebfg *textWord ,_dfcfe float64 ,_baf ,_begf rulingList )*wordBag {_edcc :=_agfb (_ebfg ._fgbda );_cdc :=[]*textWord {_ebfg };_egge :=wordBag {_cfeeb :map[int ][]*textWord {_edcc :_cdc },PdfRectangle :_ebfg .PdfRectangle ,_aege :_ebfg ._abeg ,_egbb :_dfcfe ,_fedd :_baf ,_bba :_begf };
return &_egge ;};func (_bfgae *textLine )endsInHyphen ()bool {_abeae :=_bfgae ._cadc [len (_bfgae ._cadc )-1];_egdg :=_abeae ._gebf ;_aac ,_fbfg :=_g .DecodeLastRuneInString (_egdg );if _fbfg <=0||!_gd .Is (_gd .Hyphen ,_aac ){return false ;};if _abeae ._gcefe &&_bdbg (_egdg ){return true ;
};return _bdbg (_bfgae .text ());};
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// String returns a description of `w`.
func (_ccac *textWord )String ()string {return _cag .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_ccac ._fgbda ,_ccac .PdfRectangle ,_ccac ._abeg ,_ccac ._gebf );
};type textLine struct{_ac .PdfRectangle ;_bcgf float64 ;_cadc []*textWord ;_eabc float64 ;};func _agbg (_gdce *Extractor ,_fcb *_ac .PdfPageResources ,_egfg _ag .GraphicsState ,_bdbf *textState ,_gcef *stateStack )*textObject {return &textObject {_defa :_gdce ,_fgf :_fcb ,_gbc :_egfg ,_aece :_gcef ,_fga :_bdbf ,_bgc :_afd .IdentityMatrix (),_bab :_afd .IdentityMatrix ()};
};func _efag (_bed *_ag .ContentStreamOperation )(float64 ,error ){if len (_bed .Params )!=1{_gbe :=_d .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");_fb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_bed .Operand ,1,len (_bed .Params ),_bed .Params );
return 0.0,_gbe ;};return _gdd .GetNumberAsFloat (_bed .Params [0]);};func (_agcfa rulingList )comp (_bdaa ,_dcccb int )bool {_ggee ,_cedbd :=_agcfa [_bdaa ],_agcfa [_dcccb ];_caff ,_fda :=_ggee ._agcgg ,_cedbd ._agcgg ;if _caff !=_fda {return _caff > _fda ;
};if _caff ==_dgdb {return false ;};_ccfbba :=func (_eabb bool )bool {if _caff ==_eccgd {return _eabb ;};return !_eabb ;};_cegbc ,_beadg :=_ggee ._edcba ,_cedbd ._edcba ;if _cegbc !=_beadg {return _ccfbba (_cegbc > _beadg );};_cegbc ,_beadg =_ggee ._bfeag ,_cedbd ._bfeag ;
if _cegbc !=_beadg {return _ccfbba (_cegbc < _beadg );};return _ccfbba (_ggee ._cbba < _cedbd ._cbba );};func (_ecgb *textTable )reduce ()*textTable {_gaad :=make ([]int ,0,_ecgb ._dcbdf );_bebc :=make ([]int ,0,_ecgb ._agac );for _bdgb :=0;_bdgb < _ecgb ._dcbdf ;
_bdgb ++{if !_ecgb .emptyCompositeRow (_bdgb ){_gaad =append (_gaad ,_bdgb );};};for _geedc :=0;_geedc < _ecgb ._agac ;_geedc ++{if !_ecgb .emptyCompositeColumn (_geedc ){_bebc =append (_bebc ,_geedc );};};if len (_gaad )==_ecgb ._dcbdf &&len (_bebc )==_ecgb ._agac {return _ecgb ;
};_eagff :=textTable {_ebabc :_ecgb ._ebabc ,_agac :len (_bebc ),_dcbdf :len (_gaad ),_abccf :make (map[uint64 ]*textPara ,len (_bebc )*len (_gaad ))};if _agede {_fb .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_ecgb ._agac ,_ecgb ._dcbdf ,len (_bebc ),len (_gaad ));
_fb .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_bebc );_fb .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_gaad );};for _acdd ,_affcd :=range _gaad {for _ggdcb ,_dcabf :=range _bebc {_egadg ,_cgcff :=_ecgb .getComposite (_dcabf ,_affcd );
if _egadg ==nil {continue ;};if _agede {_cag .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_ggdcb ,_acdd ,_dcabf ,_affcd ,_bgdca (_egadg .merge ().text (),50));};_eagff .putComposite (_ggdcb ,_acdd ,_egadg ,_cgcff );
};};return &_eagff ;};func (_gdec *imageExtractContext )extractXObjectImage (_daf *_gdd .PdfObjectName ,_faea _ag .GraphicsState ,_cc *_ac .PdfPageResources )error {_adg ,_ :=_cc .GetXObjectByName (*_daf );if _adg ==nil {return nil ;};_ggge ,_cgd :=_gdec ._gfb [_adg ];
if !_cgd {_fea ,_abf :=_cc .GetXObjectImageByName (*_daf );if _abf !=nil {return _abf ;};if _fea ==nil {return nil ;};_fdb ,_abf :=_fea .ToImage ();if _abf !=nil {return _abf ;};_ggge =&cachedImage {_dfa :_fdb ,_ecf :_fea .ColorSpace };_gdec ._gfb [_adg ]=_ggge ;
};_eded :=_ggge ._dfa ;_gfba :=_ggge ._ecf ;_feg ,_cgc :=_gfba .ImageToRGB (*_eded );if _cgc !=nil {return _cgc ;};_fb .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_faea .CTM .String ());_bbd :=ImageMark {Image :&_feg ,Width :_faea .CTM .ScalingFactorX (),Height :_faea .CTM .ScalingFactorY (),Angle :_faea .CTM .Angle ()};
_bbd .X ,_bbd .Y =_faea .CTM .Translation ();_gdec ._caa =append (_gdec ._caa ,_bbd );_gdec ._cdge ++;return nil ;};func (_dbfg *textPara )writeCellText (_ccef _c .Writer ){for _cabe ,_bae :=range _dbfg ._ddeb {_ggcad :=_bae .text ();_cbcb :=_ecgaa &&_bae .endsInHyphen ()&&_cabe !=len (_dbfg ._ddeb )-1;
if _cbcb {_ggcad =_gaaf (_ggcad );};_ccef .Write ([]byte (_ggcad ));if !(_cbcb ||_cabe ==len (_dbfg ._ddeb )-1){_ccef .Write ([]byte (_aaee (_bae ._bcgf ,_dbfg ._ddeb [_cabe +1]._bcgf )));};};};func (_cgfaf *textWord )toTextMarks (_gcca *int )[]TextMark {var _bbdef []TextMark ;
for _ ,_dbdgg :=range _cgfaf ._ggabc {_bbdef =_bafc (_bbdef ,_gcca ,_dbdgg .ToTextMark ());};return _bbdef ;};
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
// Replace with a function like Extract() (*PageText, error)
func (_dg *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_edg ,_gaca ,_edc ,_gacd :=_dg .extractPageText (_dg ._gg ,_dg ._cb ,_afd .IdentityMatrix (),0);if _gacd !=nil &&_gacd !=_ac .ErrColorOutOfRange {return nil ,0,0,_gacd ;};_edg .computeViews ();
_gacd =_degfb (_edg );if _gacd !=nil {return nil ,0,0,_gacd ;};return _edg ,_gaca ,_edc ,nil ;};func (_fbdf paraList )llyOrdering ()[]int {_cffcd :=make ([]int ,len (_fbdf ));for _eeeb :=range _fbdf {_cffcd [_eeeb ]=_eeeb ;};_af .SliceStable (_cffcd ,func (_cdcf ,_bgaf int )bool {_abcc ,_geed :=_cffcd [_cdcf ],_cffcd [_bgaf ];
return _fbdf [_abcc ].Lly < _fbdf [_geed ].Lly ;});return _cffcd ;};type compositeCell struct{_ac .PdfRectangle ;paraList ;};func (_dbeg *textTable )reduceTiling (_eacge gridTiling ,_aegg float64 )*textTable {_ggac :=make ([]int ,0,_dbeg ._dcbdf );_agega :=make ([]int ,0,_dbeg ._agac );
_gcfaf :=_eacge ._egfga ;_acfd :=_eacge ._ebbb ;for _ebdf :=0;_ebdf < _dbeg ._dcbdf ;_ebdf ++{_bacc :=_ebdf > 0&&_gc .Abs (_acfd [_ebdf -1]-_acfd [_ebdf ])< _aegg &&_dbeg .emptyCompositeRow (_ebdf );if !_bacc {_ggac =append (_ggac ,_ebdf );};};for _aaeab :=0;
_aaeab < _dbeg ._agac ;_aaeab ++{_bdfe :=_aaeab < _dbeg ._agac -1&&_gc .Abs (_gcfaf [_aaeab +1]-_gcfaf [_aaeab ])< _aegg &&_dbeg .emptyCompositeColumn (_aaeab );if !_bdfe {_agega =append (_agega ,_aaeab );};};if len (_ggac )==_dbeg ._dcbdf &&len (_agega )==_dbeg ._agac {return _dbeg ;
};_ggbg :=textTable {_ebabc :_dbeg ._ebabc ,_agac :len (_agega ),_dcbdf :len (_ggac ),_cead :make (map[uint64 ]compositeCell ,len (_agega )*len (_ggac ))};if _agede {_fb .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_dbeg ._agac ,_dbeg ._dcbdf ,len (_agega ),len (_ggac ));
_fb .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_agega );_fb .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_ggac );};for _cdcfa ,_ecbac :=range _ggac {for _egec ,_ddda :=range _agega {_cbcdg ,_effeaa :=_dbeg .getComposite (_ddda ,_ecbac );
if len (_cbcdg )==0{continue ;};if _agede {_cag .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_egec ,_cdcfa ,_ddda ,_ecbac ,_bgdca (_cbcdg .merge ().text (),50));};_ggbg .putComposite (_egec ,_cdcfa ,_cbcdg ,_effeaa );
};};return &_ggbg ;};func _eade (_fgeg *wordBag ,_bfgf int )*textLine {_dabc :=_fgeg .firstWord (_bfgf );_gea :=textLine {PdfRectangle :_dabc .PdfRectangle ,_eabc :_dabc ._abeg ,_bcgf :_dabc ._fgbda };_gea .pullWord (_fgeg ,_dabc ,_bfgf );return &_gea ;
};func (_bbab rulingList )bbox ()_ac .PdfRectangle {var _adba _ac .PdfRectangle ;if len (_bbab )==0{_fb .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073");
return _ac .PdfRectangle {};};if _bbab [0]._agcgg ==_eccgd {_adba .Llx ,_adba .Urx =_bbab .secMinMax ();_adba .Lly ,_adba .Ury =_bbab .primMinMax ();}else {_adba .Llx ,_adba .Urx =_bbab .primMinMax ();_adba .Lly ,_adba .Ury =_bbab .secMinMax ();};return _adba ;
};func _dedd (_dbgcg []compositeCell )[]float64 {var _gbddf []*textLine ;_bcaf :=0;for _ ,_gcde :=range _dbgcg {_bcaf +=len (_gcde .paraList );_gbddf =append (_gbddf ,_gcde .lines ()...);};_af .Slice (_gbddf ,func (_cgae ,_bdab int )bool {_ceee ,_bbcg :=_gbddf [_cgae ],_gbddf [_bdab ];
_bcfdc ,_bgce :=_ceee ._bcgf ,_bbcg ._bcgf ;if !_gcceb (_bcfdc -_bgce ){return _bcfdc < _bgce ;};return _ceee .Llx < _bbcg .Llx ;});if _agede {_cag .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_bcaf ,len (_gbddf ));
for _ebfea ,_ceaacc :=range _gbddf {_cag .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_ebfea ,_ceaacc );};};var _gbafa []float64 ;_egdd :=_gbddf [0];var _eddfd [][]*textLine ;_degg :=[]*textLine {_egdd };for _cbcc ,_dfgdg :=range _gbddf [1:]{if _dfgdg .Ury < _egdd .Lly {_dfdc :=0.5*(_dfgdg .Ury +_egdd .Lly );
if _agede {_cag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_cbcc ,_dfgdg .Ury ,_egdd .Lly ,_dfdc ,_egdd ,_dfgdg );
};_gbafa =append (_gbafa ,_dfdc );_eddfd =append (_eddfd ,_degg );_degg =nil ;};_degg =append (_degg ,_dfgdg );if _dfgdg .Lly < _egdd .Lly {_egdd =_dfgdg ;};};if len (_degg )> 0{_eddfd =append (_eddfd ,_degg );};if _agede {_cag .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_gbafa );
};if _agede {_fb .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len (_dbgcg ));for _ccefa ,_cfcb :=range _dbgcg {_cag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ccefa ,_cfcb );};_fb .Log .Info ("\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d",len (_eddfd ));
for _faaeg ,_cbebf :=range _eddfd {_cag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a",_faaeg ,len (_cbebf ));for _fcfga ,_gefa :=range _cbebf {_cag .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_fcfga ,_gefa );};};};_gcgg :=true ;
for _acbf ,_bdccc :=range _eddfd {_bfgdc :=true ;for _gagb ,_eedbe :=range _dbgcg {if _agede {_cag .Printf ("\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a",_acbf ,len (_eddfd ),_gagb ,len (_dbgcg ),_eedbe );
};if !_eedbe .hasLines (_bdccc ){if _agede {_cag .Printf ("\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a",_acbf ,len (_eddfd ),_gagb ,len (_dbgcg ));
};_bfgdc =false ;break ;};};if !_bfgdc {_gcgg =false ;break ;};};if !_gcgg {if _agede {_fb .Log .Info ("\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg");
};_gbafa =nil ;};if _agede &&_gbafa !=nil {_cag .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a",_gbafa );};return _gbafa ;
};type rectRuling struct{_bbgc rulingKind ;_gdecgd markKind ;_ca .Color ;_ac .PdfRectangle ;};func _fgbec (_becea _ac .PdfRectangle )*ruling {return &ruling {_agcgg :_aafafg ,_edcba :_becea .Urx ,_bfeag :_becea .Lly ,_cbba :_becea .Ury };};func (_ccfgc paraList )findTextTables ()[]*textTable {var _ecfg []*textTable ;
for _ ,_cffb :=range _ccfgc {if _cffb .taken ()||_cffb .Width ()==0{continue ;};_bfb :=_cffb .isAtom ();if _bfb ==nil {continue ;};_bfb .growTable ();if _bfb ._agac *_bfb ._dcbdf < _eecc {continue ;};_bfb .markCells ();_bfb .log ("\u0067\u0072\u006fw\u006e");
_ecfg =append (_ecfg ,_bfb );};return _ecfg ;};func _aded (_efea *wordBag ,_aggbb *textWord ,_adfb float64 )bool {return _aggbb .Llx < _efea .Urx +_adfb &&_efea .Llx -_adfb < _aggbb .Urx ;};
// Font represents the font properties on a PDF page.
type Font struct{PdfFont *_ac .PdfFont ;
// FontName represents Font Name from font properties.
FontName string ;
// FontType represents Font Subtype entry in the font dictionary inside page resources.
// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont.
FontType string ;
// ToUnicode is true if font provides a `ToUnicode` mapping.
ToUnicode bool ;
// IsCID is true if underlying font is a composite font.
// Composite font is represented by a font dictionary whose Subtype is `Type0`
IsCID bool ;
// IsSimple is true if font is simple font.
// A simple font is limited to only 8 bit (255) character codes.
IsSimple bool ;
// FontData represents the raw data of the embedded font file.
// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF).
// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor.
// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value.
FontData []byte ;
// FontFileName is a name representing the font. it has format:
// (Font Name) + (Font Type Extension), example: helvetica.ttf.
FontFileName string ;
// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor).
FontDescriptor *_ac .PdfFontDescriptor ;};func (_gfca rulingList )blocks (_fcea ,_bffg *ruling )bool {if _fcea ._bfeag > _bffg ._cbba ||_bffg ._bfeag > _fcea ._cbba {return false ;};_cbff :=_gc .Max (_fcea ._bfeag ,_bffg ._bfeag );_edeg :=_gc .Min (_fcea ._cbba ,_bffg ._cbba );
if _fcea ._edcba > _bffg ._edcba {_fcea ,_bffg =_bffg ,_fcea ;};for _ ,_afgg :=range _gfca {if _fcea ._edcba <=_afgg ._edcba +_gbbd &&_afgg ._edcba <=_bffg ._edcba +_gbbd &&_afgg ._bfeag <=_edeg &&_cbff <=_afgg ._cbba {return true ;};};return false ;};
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};func _ddcd (_ecaaa map[int ][]float64 )[]int {_defaa :=make ([]int ,len (_ecaaa ));_egfgd :=0;for _begb :=range _ecaaa {_defaa [_egfgd ]=_begb ;_egfgd ++;};_af .Ints (_defaa );return _defaa ;
};func (_bfcad paraList )yNeighbours (_ebebg float64 )map[*textPara ][]int {_caag :=make ([]event ,2*len (_bfcad ));if _ebebg ==0{for _eeebd ,_eedga :=range _bfcad {_caag [2*_eeebd ]=event {_eedga .Lly ,true ,_eeebd };_caag [2*_eeebd +1]=event {_eedga .Ury ,false ,_eeebd };
};}else {for _abdcc ,_dfbda :=range _bfcad {_caag [2*_abdcc ]=event {_dfbda .Lly -_ebebg *_dfbda .fontsize (),true ,_abdcc };_caag [2*_abdcc +1]=event {_dfbda .Ury +_ebebg *_dfbda .fontsize (),false ,_abdcc };};};return _bfcad .eventNeighbours (_caag );
};func (_gbgec *textTable )put (_gfef ,_deece int ,_baggd *textPara ){_gbgec ._abccf [_addg (_gfef ,_deece )]=_baggd ;};type wordBag struct{_ac .PdfRectangle ;_aege float64 ;_fedd ,_bba rulingList ;_egbb float64 ;_cfeeb map[int ][]*textWord ;};func _agfb (_efcc float64 )int {var _cedc int ;
if _efcc >=0{_cedc =int (_efcc /_eddf );}else {_cedc =int (_efcc /_eddf )-1;};return _cedc ;};const (_dgdb rulingKind =iota ;_eccgd ;_aafafg ;);func _gaaf (_ebaeb string )string {_ebbg :=[]rune (_ebaeb );return string (_ebbg [:len (_ebbg )-1])};func (_bfcaf *ruling )encloses (_dfde ,_ccfbc float64 )bool {return _bfcaf ._bfeag -_bade <=_dfde &&_ccfbc <=_bfcaf ._cbba +_bade ;
};func (_acgd *stateStack )push (_edec *textState ){_gab :=*_edec ;*_acgd =append (*_acgd ,&_gab )};type intSet map[int ]struct{};func (_bbdb *textPara )text ()string {_eaee :=new (_e .Buffer );_bbdb .writeText (_eaee );return _eaee .String ();};func (_cbe *textObject )setTextRise (_aead float64 ){if _cbe ==nil {return ;
};_cbe ._fga ._fec =_aead ;};func (_abdbf *ruling )gridIntersecting (_cbgf *ruling )bool {return _cgfe (_abdbf ._bfeag ,_cbgf ._bfeag )&&_cgfe (_abdbf ._cbba ,_cbgf ._cbba );};
// PageText represents the layout of text on a device page.
type PageText struct{_aged []*textMark ;_cged string ;_gccf []TextMark ;_dcgg []TextTable ;_abea _ac .PdfRectangle ;_cagb []pathSection ;_bfa []pathSection ;};
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
func (_adb *TextMarkArray )BBox ()(_ac .PdfRectangle ,bool ){var _bef _ac .PdfRectangle ;_bcf :=false ;for _ ,_ffg :=range _adb ._dgf {if _ffg .Meta ||_feaba (_ffg .Text ){continue ;};if _bcf {_bef =_gcae (_bef ,_ffg .BBox );}else {_bef =_ffg .BBox ;_bcf =true ;
};};return _bef ,_bcf ;};func _eacc (_bgefd _ac .PdfRectangle )*ruling {return &ruling {_agcgg :_aafafg ,_edcba :_bgefd .Llx ,_bfeag :_bgefd .Lly ,_cbba :_bgefd .Ury };};func (_gffc *textWord )absorb (_fded *textWord ){_gffc .PdfRectangle =_gcae (_gffc .PdfRectangle ,_fded .PdfRectangle );
_gffc ._ggabc =append (_gffc ._ggabc ,_fded ._ggabc ...);};func (_dafbe rulingList )toTilings ()(rulingList ,[]gridTiling ){_dafbe .log ("\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s");if len (_dafbe )==0{return nil ,nil ;};_dafbe =_dafbe .tidied ("\u0061\u006c\u006c");
_dafbe .log ("\u0074\u0069\u0064\u0069\u0065\u0064");_fbde :=_dafbe .toGrids ();_aacaa :=make ([]gridTiling ,len (_fbde ));for _eaea ,_bccb :=range _fbde {_aacaa [_eaea ]=_bccb .asTiling ();};return _dafbe ,_aacaa ;};func (_bggd *textLine )markWordBoundaries (){_edcbd :=_gdf *_bggd ._eabc ;
for _edbc ,_aeadc :=range _bggd ._cadc [1:]{if _dgfaa (_aeadc ,_bggd ._cadc [_edbc ])>=_edcbd {_aeadc ._gcefe =true ;};};};func (_fafd rulingList )removeDuplicates ()rulingList {if len (_fafd )==0{return nil ;};_fafd .sort ();_adbe :=rulingList {_fafd [0]};
for _ ,_afef :=range _fafd [1:]{if _afef .equals (_adbe [len (_adbe )-1]){continue ;};_adbe =append (_adbe ,_afef );};return _adbe ;};func (_gceg *textObject )setTextRenderMode (_gcc int ){if _gceg ==nil {return ;};_gceg ._fga ._acgdg =RenderMode (_gcc );
};func (_fca *textObject )setFont (_efac string ,_caab float64 )error {if _fca ==nil {return nil ;};_fca ._fga ._cef =_caab ;_gad ,_ded :=_fca .getFont (_efac );if _ded !=nil {return _ded ;};_fca ._fga ._eedd =_gad ;return nil ;};func _bdbg (_ege string )bool {if _g .RuneCountInString (_ege )< _dfed {return false ;
};_ffec ,_bfgdg :=_g .DecodeLastRuneInString (_ege );if _bfgdg <=0||!_gd .Is (_gd .Hyphen ,_ffec ){return false ;};_ffec ,_bfgdg =_g .DecodeLastRuneInString (_ege [:len (_ege )-_bfgdg ]);return _bfgdg > 0&&!_gd .IsSpace (_ffec );};func (_eecf rulingList )primaries ()[]float64 {_agag :=make (map[float64 ]struct{},len (_eecf ));
for _ ,_aedc :=range _eecf {_agag [_aedc ._edcba ]=struct{}{};};_dafbd :=make ([]float64 ,len (_agag ));_cbcbg :=0;for _bgcf :=range _agag {_dafbd [_cbcbg ]=_bgcf ;_cbcbg ++;};_af .Float64s (_dafbd );return _dafbd ;};func (_deefg intSet )del (_gddad int ){delete (_deefg ,_gddad )};
func _efagec (_bbfg _ac .PdfRectangle ,_afca []*textLine )*textPara {return &textPara {PdfRectangle :_bbfg ,_ddeb :_afca };};func _eeea (_bcde ,_fde _afd .Point ,_ggcd _ca .Color )(*ruling ,bool ){_fecc :=lineRuling {_gffd :_bcde ,_eeebc :_fde ,_fbab :_efeb (_bcde ,_fde ),Color :_ggcd };
if _fecc ._fbab ==_dgdb {return nil ,false ;};return _fecc .asRuling ();};func (_ddaf *subpath )isQuadrilateral ()bool {if len (_ddaf ._egd )< 4||len (_ddaf ._egd )> 5{return false ;};if len (_ddaf ._egd )==5{_dccb :=_ddaf ._egd [0];_fafb :=_ddaf ._egd [4];
if _dccb .X !=_fafb .X ||_dccb .Y !=_fafb .Y {return false ;};};return true ;};func (_gdbda rulingList )connections (_bfce map[int ]intSet ,_adfc int )intSet {_ccfe :=make (intSet );_bfcb :=make (intSet );var _fdee func (int );_fdee =func (_baeg int ){if !_bfcb .has (_baeg ){_bfcb .add (_baeg );
for _gbfb :=range _gdbda {if _bfce [_gbfb ].has (_baeg ){_ccfe .add (_gbfb );};};for _aace :=range _gdbda {if _ccfe .has (_aace ){_fdee (_aace );};};};};_fdee (_adfc );return _ccfe ;};func (_fbgd intSet )has (_gecdb int )bool {_ ,_gdbegf :=_fbgd [_gecdb ];
return _gdbegf };
// String returns a description of `v`.
func (_cfaf *ruling )String ()string {if _cfaf ._agcgg ==_dgdb {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_fcad ,_egcbb :="\u0078","\u0079";if _cfaf ._agcgg ==_eccgd {_fcad ,_egcbb ="\u0079","\u0078";};_fgab :="";if _cfaf ._geba !=0.0{_fgab =_cag .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_cfaf ._geba );
};return _cag .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_cfaf ._agcgg ,_fcad ,_cfaf ._edcba ,_egcbb ,_cfaf ._bfeag ,_cfaf ._cbba ,_cfaf ._cbba -_cfaf ._bfeag ,_cfaf ._aaec ,_cfaf .Color ,_fgab );
};func (_abdca *textTable )newTablePara ()*textPara {_egce :=_abdca .computeBbox ();_gada :=&textPara {PdfRectangle :_egce ,_gbaa :_egce ,_ccec :_abdca };if _agede {_fb .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_gada );
};return _gada ;};func _bafe (_ebec []TextMark ,_fafe *int ,_ddgaa string )[]TextMark {_fgc :=_dgfa ;_fgc .Text =_ddgaa ;return _bafc (_ebec ,_fafe ,_fgc );};func _ddgc (_agda ,_bgag _afd .Point )rulingKind {_gdcfe :=_gc .Abs (_agda .X -_bgag .X );_adffe :=_gc .Abs (_agda .Y -_bgag .Y );
return _cbcbf (_gdcfe ,_adffe ,_dda );};func (_fbga *textMark )bbox ()_ac .PdfRectangle {return _fbga .PdfRectangle };func (_facgb *textLine )pullWord (_edbbe *wordBag ,_aedf *textWord ,_debe int ){_facgb .appendWord (_aedf );_edbbe .removeWord (_aedf ,_debe );
};type textTable struct{_ac .PdfRectangle ;_agac ,_dcbdf int ;_ebabc bool ;_abccf map[uint64 ]*textPara ;_cead map[uint64 ]compositeCell ;};var _ad =false ;func (_gbge pathSection )bbox ()_ac .PdfRectangle {_dfce :=_gbge ._bbdc [0]._egd [0];_gggf :=_ac .PdfRectangle {Llx :_dfce .X ,Urx :_dfce .X ,Lly :_dfce .Y ,Ury :_dfce .Y };
_fcbb :=func (_caef _afd .Point ){if _caef .X < _gggf .Llx {_gggf .Llx =_caef .X ;}else if _caef .X > _gggf .Urx {_gggf .Urx =_caef .X ;};if _caef .Y < _gggf .Lly {_gggf .Lly =_caef .Y ;}else if _caef .Y > _gggf .Ury {_gggf .Ury =_caef .Y ;};};for _ ,_efgb :=range _gbge ._bbdc [0]._egd [1:]{_fcbb (_efgb );
};for _ ,_eaa :=range _gbge ._bbdc [1:]{for _ ,_gegbc :=range _eaa ._egd {_fcbb (_gegbc );};};return _gggf ;};func (_edae *shapesState )lastpointEstablished ()(_afd .Point ,bool ){if _edae ._dcef {return _edae ._dffc ,false ;};_bdbe :=len (_edae ._fegc );
if _bdbe > 0&&_edae ._fegc [_bdbe -1]._ega {return _edae ._fegc [_bdbe -1].last (),false ;};return _afd .Point {},true ;};func _aecce (_gbbg _ac .PdfColorspace ,_eegg _ac .PdfColor )_ca .Color {if _gbbg ==nil ||_eegg ==nil {return _ca .Black ;};_bcadd ,_agfc :=_gbbg .ColorToRGB (_eegg );
if _agfc !=nil {_fb .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_eegg ,_gbbg ,_agfc );
return _ca .Black ;};_bfeace ,_fcee :=_bcadd .(*_ac .PdfColorDeviceRGB );if !_fcee {_fb .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_bcadd );
return _ca .Black ;};return _ca .NRGBA {R :uint8 (_bfeace .R ()*255),G :uint8 (_bfeace .G ()*255),B :uint8 (_bfeace .B ()*255),A :uint8 (255)};};func (_gcga paraList )findGridTables (_fegaf []gridTiling )[]*textTable {if _agede {_fb .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_gcga ));
for _dceff ,_fgad :=range _gcga {_cag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dceff ,_fgad );};};var _aafgg []*textTable ;for _ffgb ,_aebf :=range _fegaf {_dfec ,_ccbae :=_gcga .findTableGrid (_aebf );if _dfec !=nil {_dfec .log (_cag .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_ffgb ));
_aafgg =append (_aafgg ,_dfec );_dfec .markCells ();};for _afdfd :=range _ccbae {_afdfd ._cfeff =true ;};};if _agede {_fb .Log .Info ("\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s",len (_aafgg ));
};return _aafgg ;};func (_dabd gridTile )numBorders ()int {_fgcg :=0;if _dabd ._fcgbfd {_fgcg ++;};if _dabd ._fgdd {_fgcg ++;};if _dabd ._cccb {_fgcg ++;};if _dabd ._gbac {_fgcg ++;};return _fgcg ;};func (_eggg *textPara )depth ()float64 {if _eggg ._affbf {return -1.0;
};if len (_eggg ._ddeb )> 0{return _eggg ._ddeb [0]._bcgf ;};return _eggg ._ccec .depth ();};func _geabe (_dccfe float64 )float64 {return _eada *_gc .Round (_dccfe /_eada )};
// String returns a description of `k`.
func (_gdfe rulingKind )String ()string {_gdffa ,_acad :=_afbfd [_gdfe ];if !_acad {return _cag .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_gdfe );};return _gdffa ;};func (_bffd rulingList )snapToGroupsDirection ()rulingList {_bffd .sortStrict ();
_aafg :=make (map[*ruling ]rulingList ,len (_bffd ));_dcccd :=_bffd [0];_eaag :=func (_fdcf *ruling ){_dcccd =_fdcf ;_aafg [_dcccd ]=rulingList {_fdcf }};_eaag (_bffd [0]);for _ ,_fdcce :=range _bffd [1:]{if _fdcce ._edcba < _dcccd ._edcba -_cbd {_fb .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_dcccd ,_fdcce );
};if _fdcce ._edcba > _dcccd ._edcba +_gbbd {_eaag (_fdcce );}else {_aafg [_dcccd ]=append (_aafg [_dcccd ],_fdcce );};};_bafcg :=make (map[*ruling ]float64 ,len (_aafg ));_gbcc :=make (map[*ruling ]*ruling ,len (_bffd ));for _cdbe ,_gaeaa :=range _aafg {_bafcg [_cdbe ]=_gaeaa .mergePrimary ();
for _ ,_gdfd :=range _gaeaa {_gbcc [_gdfd ]=_cdbe ;};};for _ ,_aedfd :=range _bffd {_aedfd ._edcba =_bafcg [_gbcc [_aedfd ]];};_adbcf :=make (rulingList ,0,len (_bffd ));for _ ,_bgcda :=range _aafg {_dffbg :=_bgcda .splitSec ();for _gfcd ,_ece :=range _dffbg {_aafae :=_ece .merge ();
if len (_adbcf )> 0{_gdcba :=_adbcf [len (_adbcf )-1];if _gdcba .alignsPrimary (_aafae )&&_gdcba .alignsSec (_aafae ){_fb .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_gfcd ,_gdcba ,_aafae );
continue ;};};_adbcf =append (_adbcf ,_aafae );};};_adbcf .sortStrict ();return _adbcf ;};func (_ddcc paraList )llyRange (_aceaa []int ,_ddff ,_fag float64 )[]int {_bdaf :=len (_ddcc );if _fag < _ddcc [_aceaa [0]].Lly ||_ddff > _ddcc [_aceaa [_bdaf -1]].Lly {return nil ;
};_dccf :=_af .Search (_bdaf ,func (_gfdb int )bool {return _ddcc [_aceaa [_gfdb ]].Lly >=_ddff });_bgdf :=_af .Search (_bdaf ,func (_cbbf int )bool {return _ddcc [_aceaa [_cbbf ]].Lly > _fag });return _aceaa [_dccf :_bgdf ];};
// String returns a string descibing `i`.
func (_fbca gridTile )String ()string {_dafc :=func (_gfade bool ,_fdab string )string {if _gfade {return _fdab ;};return "\u005f";};return _cag .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_fbca .PdfRectangle ,_dafc (_fbca ._fcgbfd ,"\u004c"),_dafc (_fbca ._fgdd ,"\u0052"),_dafc (_fbca ._cccb ,"\u0042"),_dafc (_fbca ._gbac ,"\u0054"));
};func (_ecfb *shapesState )fill (_baab *[]pathSection ){_gfea :=pathSection {_bbdc :_ecfb ._fegc ,Color :_ecfb ._afbf .getFillColor ()};*_baab =append (*_baab ,_gfea );if _adce {_effd :=_gfea .bbox ();_cag .Printf ("\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a",len (*_baab ),len (_gfea ._bbdc ),_ecfb ,_gfea .Color ,_effd ,_effd .Width (),_effd .Height ());
if _gbbagd {for _dbbg ,_ebda :=range _gfea ._bbdc {_cag .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_dbbg ,_ebda );if _dbbg ==10{break ;};};};};};type paraList []*textPara ;func (_cgdg *textObject )checkOp (_dbd *_ag .ContentStreamOperation ,_edge int ,_gafb bool )(_ffe bool ,_ecgae error ){if _cgdg ==nil {var _cabba []_gdd .PdfObject ;
if _edge > 0{_cabba =_dbd .Params ;if len (_cabba )> _edge {_cabba =_cabba [:_edge ];};};_fb .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_dbd .Operand ,_cabba );
};if _edge >=0{if len (_dbd .Params )!=_edge {if _gafb {_ecgae =_d .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");};_fb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_dbd .Operand ,_edge ,len (_dbd .Params ),_dbd .Params );
return false ,_ecgae ;};};return true ,nil ;};
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
type RenderMode int ;func (_acffc *textTable )emptyCompositeColumn (_cebgg int )bool {for _dcdbd :=0;_dcdbd < _acffc ._dcbdf ;_dcdbd ++{if _dfbcd ,_eabca :=_acffc ._cead [_addg (_cebgg ,_dcdbd )];_eabca {if len (_dfbcd .paraList )> 0{return false ;};};
};return true ;};func (_cdgd *wordBag )removeWord (_fad *textWord ,_bgac int ){_bbe :=_cdgd ._cfeeb [_bgac ];_bbe =_eadbg (_bbe ,_fad );if len (_bbe )==0{delete (_cdgd ._cfeeb ,_bgac );}else {_cdgd ._cfeeb [_bgac ]=_bbe ;};};func _bac (_aabd []pathSection )rulingList {_eefaa (_aabd );
if _adce {_fb .Log .Info ("\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs",len (_aabd ));};var _ecge rulingList ;for _ ,_bedb :=range _aabd {for _ ,_bcbefd :=range _bedb ._bbdc {if !_bcbefd .isQuadrilateral (){if _adce {_fb .Log .Error ("!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073",_bcbefd );
};continue ;};if _dgbe ,_bgegf :=_bcbefd .makeRectRuling (_bedb .Color );_bgegf {_ecge =append (_ecge ,_dgbe );}else {if _caea {_fb .Log .Error ("\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073",_bcbefd );
};};};};if _adce {_fb .Log .Info ("\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073",_ecge .String ());};return _ecge ;};func _fddf (_eeaf []*textWord ,_agbc float64 ,_abac ,_deeb rulingList )*wordBag {_dega :=_acca (_eeaf [0],_agbc ,_abac ,_deeb );
for _ ,_dde :=range _eeaf [1:]{_ggaa :=_agfb (_dde ._fgbda );_dega ._cfeeb [_ggaa ]=append (_dega ._cfeeb [_ggaa ],_dde );_dega .PdfRectangle =_gcae (_dega .PdfRectangle ,_dde .PdfRectangle );};_dega .sort ();return _dega ;};func (_abge *ruling )intersects (_fcff *ruling )bool {_dcfe :=(_abge ._agcgg ==_aafafg &&_fcff ._agcgg ==_eccgd )||(_fcff ._agcgg ==_aafafg &&_abge ._agcgg ==_eccgd );
_dagc :=func (_aeed ,_fbdd *ruling )bool {return _aeed ._bfeag -_bade <=_fbdd ._edcba &&_fbdd ._edcba <=_aeed ._cbba +_bade ;};_fega :=_dagc (_abge ,_fcff );_fdcd :=_dagc (_fcff ,_abge );if _adce {_cag .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_dcfe ,_fega ,_fdcd ,_dcfe &&_fega &&_fdcd ,_abge ,_fcff );
};return _dcfe &&_fega &&_fdcd ;};func _aaee (_dcdbe ,_abbe float64 )string {_cebc :=!_gcceb (_dcdbe -_abbe );if _cebc {return "\u000a";};return "\u0020";};func _feaba (_cdbc string )bool {for _ ,_fabcf :=range _cdbc {if !_gd .IsSpace (_fabcf ){return false ;
};};return true ;};func _fbgcd (_edeac []pathSection )rulingList {_eefaa (_edeac );if _adce {_fb .Log .Info ("\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073",len (_edeac ));
};var _gage rulingList ;for _ ,_cabed :=range _edeac {for _ ,_cceg :=range _cabed ._bbdc {if len (_cceg ._egd )< 2{continue ;};_abfcf :=_cceg ._egd [0];for _ ,_dcge :=range _cceg ._egd [1:]{if _cccf ,_cafb :=_eeea (_abfcf ,_dcge ,_cabed .Color );_cafb {_gage =append (_gage ,_cccf );
};_abfcf =_dcge ;};};};if _adce {_fb .Log .Info ("m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073",_gage );};return _gage ;};func (_dee *stateStack )empty ()bool {return len (*_dee )==0};func (_ebfe paraList )merge ()*textPara {_fb .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_ebfe ));
if len (_ebfe )==0{return nil ;};_ebfe .sortReadingOrder ();_cfefc :=_ebfe [0].PdfRectangle ;_cccg :=_ebfe [0]._ddeb ;for _ ,_gcad :=range _ebfe [1:]{_cfefc =_gcae (_cfefc ,_gcad .PdfRectangle );_cccg =append (_cccg ,_gcad ._ddeb ...);};return _efagec (_cfefc ,_cccg );
};func _eggb (_geede map[int ][]float64 ){if len (_geede )<=1{return ;};_acgfc :=_ddcd (_geede );if _agede {_fb .Log .Info ("\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076",_acgfc );};var _cbade ,_dfcc int ;for _cbade ,_dfcc =range _acgfc {if _geede [_dfcc ]!=nil {break ;
};};for _ebfdf ,_aefg :=range _acgfc [_cbade :]{_fbgb :=_geede [_aefg ];if _fbgb ==nil {continue ;};if _agede {_cag .Printf ("\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a",_cbade +_ebfdf ,_dfcc ,_aefg );
};_fbef :=_geede [_aefg ];if _fbef [len (_fbef )-1]> _fbgb [0]{_fbef [len (_fbef )-1]=_fbgb [0];_geede [_dfcc ]=_fbef ;};_dfcc =_aefg ;};};func _fdbe (_gafa ,_dbdf _afd .Point )bool {_fabe :=_gc .Abs (_gafa .X -_dbdf .X );_agdc :=_gc .Abs (_gafa .Y -_dbdf .Y );
return _dgbd (_fabe ,_agdc );};func _gcgf (_cgabb _ac .PdfRectangle ,_agdfd ,_aedfc ,_dgbca ,_ecgd *ruling )gridTile {_gfad :=_cgabb .Llx ;_eddfc :=_cgabb .Urx ;_eedfg :=_cgabb .Lly ;_afab :=_cgabb .Ury ;return gridTile {PdfRectangle :_cgabb ,_fcgbfd :_agdfd !=nil &&_agdfd .encloses (_eedfg ,_afab ),_fgdd :_aedfc !=nil &&_aedfc .encloses (_eedfg ,_afab ),_cccb :_dgbca !=nil &&_dgbca .encloses (_gfad ,_eddfc ),_gbac :_ecgd !=nil &&_ecgd .encloses (_gfad ,_eddfc )};
};type event struct{_eefg float64 ;_bgdfg bool ;_deed int ;};func (_ebac *stateStack )top ()*textState {if _ebac .empty (){return nil ;};return (*_ebac )[_ebac .size ()-1];};func (_efbc *wordBag )absorb (_cegg *wordBag ){_deef :=_cegg .makeRemovals ();
for _fcag ,_gbba :=range _cegg ._cfeeb {for _ ,_ggfd :=range _gbba {_efbc .pullWord (_ggfd ,_fcag ,_deef );};};_cegg .applyRemovals (_deef );};func (_dbcb *textObject )getCurrentFont ()*_ac .PdfFont {_cdec :=_dbcb ._fga ._eedd ;if _cdec ==nil {_fb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e");
return _ac .DefaultFont ();};return _cdec ;};func (_egcg *shapesState )establishSubpath ()*subpath {_fbgfc ,_fdga :=_egcg .lastpointEstablished ();if !_fdga {_egcg ._fegc =append (_egcg ._fegc ,_aab (_fbgfc ));};if len (_egcg ._fegc )==0{return nil ;};
_egcg ._dcef =false ;return _egcg ._fegc [len (_egcg ._fegc )-1];};type gridTiling struct{_ac .PdfRectangle ;_egfga []float64 ;_ebbb []float64 ;_cgdc map[float64 ]map[float64 ]gridTile ;};func (_egag *textLine )bbox ()_ac .PdfRectangle {return _egag .PdfRectangle };
func (_cgab *subpath )makeRectRuling (_efdf _ca .Color )(*ruling ,bool ){if _caea {_fb .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_cgab );};_ddab :=_cgab ._egd [:4];
_afbb :=make (map[int ]rulingKind ,len (_ddab ));for _acbc ,_feag :=range _ddab {_gdeg :=_cgab ._egd [(_acbc +1)%4];_afbb [_acbc ]=_ddgc (_feag ,_gdeg );if _caea {_cag .Printf ("\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066",_acbc ,_afbb [_acbc ],_feag ,_gdeg );
};};if _caea {_cag .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_afbb );};var _fbed ,_bfca []int ;for _fceg ,_dggg :=range _afbb {switch _dggg {case _eccgd :_bfca =append (_bfca ,_fceg );case _aafafg :_fbed =append (_fbed ,_fceg );
};};if _caea {_cag .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_bfca ),_bfca );_cag .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_fbed ),_fbed );
};_gbga :=(len (_bfca )==2&&len (_fbed )==2)||(len (_bfca )==2&&len (_fbed )==0&&_cggg (_ddab [_bfca [0]],_ddab [_bfca [1]]))||(len (_fbed )==2&&len (_bfca )==0&&_fdbe (_ddab [_fbed [0]],_ddab [_fbed [1]]));if _caea {_cag .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_bfca ),len (_fbed ),_gbga );
};if !_gbga {if _caea {_fb .Log .Error ("\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v",_cgab );_cag .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_bfca ),len (_fbed ),_gbga );
};return &ruling {},false ;};if len (_fbed )==0{for _bffbc ,_aagcb :=range _afbb {if _aagcb !=_eccgd {_fbed =append (_fbed ,_bffbc );};};};if len (_bfca )==0{for _eegd ,_afaca :=range _afbb {if _afaca !=_aafafg {_bfca =append (_bfca ,_eegd );};};};if _caea {_fb .Log .Info ("\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_bfca ),len (_fbed ),len (_ddab ),_bfca ,_fbed ,_ddab );
};var _ebgdg ,_fced ,_bfdbc ,_ecdf _afd .Point ;if _ddab [_bfca [0]].Y > _ddab [_bfca [1]].Y {_bfdbc ,_ecdf =_ddab [_bfca [0]],_ddab [_bfca [1]];}else {_bfdbc ,_ecdf =_ddab [_bfca [1]],_ddab [_bfca [0]];};if _ddab [_fbed [0]].X > _ddab [_fbed [1]].X {_ebgdg ,_fced =_ddab [_fbed [0]],_ddab [_fbed [1]];
}else {_ebgdg ,_fced =_ddab [_fbed [1]],_ddab [_fbed [0]];};_ggdf :=_ac .PdfRectangle {Llx :_ebgdg .X ,Urx :_fced .X ,Lly :_ecdf .Y ,Ury :_bfdbc .Y };if _ggdf .Llx > _ggdf .Urx {_ggdf .Llx ,_ggdf .Urx =_ggdf .Urx ,_ggdf .Llx ;};if _ggdf .Lly > _ggdf .Ury {_ggdf .Lly ,_ggdf .Ury =_ggdf .Ury ,_ggdf .Lly ;
};_eedg :=rectRuling {PdfRectangle :_ggdf ,_bbgc :_aaca (_ggdf ),Color :_efdf };if _eedg ._bbgc ==_dgdb {if _caea {_fb .Log .Error ("\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c");
};return nil ,false ;};_gcfc ,_aagaa :=_eedg .asRuling ();if !_aagaa {if _caea {_fb .Log .Error ("\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg");};return nil ,false ;};if _adce {_cag .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_gcfc .String ());
};return _gcfc ,true ;};func (_gfeg *ruling )equals (_ccbe *ruling )bool {return _gfeg ._agcgg ==_ccbe ._agcgg &&_cgfe (_gfeg ._edcba ,_ccbe ._edcba )&&_cgfe (_gfeg ._bfeag ,_ccbe ._bfeag )&&_cgfe (_gfeg ._cbba ,_ccbe ._cbba );};func (_bbadf *textTable )toTextTable ()TextTable {if _agede {_fb .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_bbadf ._agac ,_bbadf ._dcbdf );
};_ggbce :=make ([][]TableCell ,_bbadf ._dcbdf );for _bgbc :=0;_bgbc < _bbadf ._dcbdf ;_bgbc ++{_ggbce [_bgbc ]=make ([]TableCell ,_bbadf ._agac );for _ccgb :=0;_ccgb < _bbadf ._agac ;_ccgb ++{_cedg :=_bbadf .get (_ccgb ,_bgbc );if _cedg ==nil {continue ;
};if _agede {_cag .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_ccgb ,_bgbc ,_cedg );};_ggbce [_bgbc ][_ccgb ].Text =_cedg .text ();_baddc :=0;_ggbce [_bgbc ][_ccgb ].Marks ._dgf =_cedg .toTextMarks (&_baddc );};};return TextTable {W :_bbadf ._agac ,H :_bbadf ._dcbdf ,Cells :_ggbce };
};func (_gcdg rulingList )mergePrimary ()float64 {_cagcb :=_gcdg [0]._edcba ;for _ ,_faefe :=range _gcdg [1:]{_cagcb +=_faefe ._edcba ;};return _cagcb /float64 (len (_gcdg ));};
2022-02-05 21:34:53 +00:00
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
2022-03-13 12:41:53 +00:00
func (_bcbe *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _bcbe ==nil {return nil ,_d .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_cag .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );
};_baad :=len (_bcbe ._dgf );if _baad ==0{return _bcbe ,nil ;};if start < _bcbe ._dgf [0].Offset {start =_bcbe ._dgf [0].Offset ;};if end > _bcbe ._dgf [_baad -1].Offset +1{end =_bcbe ._dgf [_baad -1].Offset +1;};_fecg :=_af .Search (_baad ,func (_dfcd int )bool {return _bcbe ._dgf [_dfcd ].Offset +len (_bcbe ._dgf [_dfcd ].Text )-1>=start });
if !(0<=_fecg &&_fecg < _baad ){_fdf :=_cag .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_fecg ,_baad ,_bcbe ._dgf [0],_bcbe ._dgf [_baad -1]);
return nil ,_fdf ;};_dgfe :=_af .Search (_baad ,func (_defe int )bool {return _bcbe ._dgf [_defe ].Offset > end -1});if !(0<=_dgfe &&_dgfe < _baad ){_ecac :=_cag .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_dgfe ,_baad ,_bcbe ._dgf [0],_bcbe ._dgf [_baad -1]);
return nil ,_ecac ;};if _dgfe <=_fecg {return nil ,_cag .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_fecg ,_dgfe );
};return &TextMarkArray {_dgf :_bcbe ._dgf [_fecg :_dgfe ]},nil ;};func (_gbgae rulingList )findPrimSec (_degb ,_adccd float64 )*ruling {for _ ,_befc :=range _gbgae {if _gcceb (_befc ._edcba -_degb )&&_befc ._bfeag -_bade <=_adccd &&_adccd <=_befc ._cbba +_bade {return _befc ;
};};return nil ;};func (_ebcf rulingList )sortStrict (){_af .Slice (_ebcf ,func (_ebcfb ,_aeged int )bool {_abef ,_dabcg :=_ebcf [_ebcfb ],_ebcf [_aeged ];_eeded ,_cfbc :=_abef ._agcgg ,_dabcg ._agcgg ;if _eeded !=_cfbc {return _eeded > _cfbc ;};_ggdc ,_fcffe :=_abef ._edcba ,_dabcg ._edcba ;
if !_gcceb (_ggdc -_fcffe ){return _ggdc < _fcffe ;};_ggdc ,_fcffe =_abef ._bfeag ,_dabcg ._bfeag ;if _ggdc !=_fcffe {return _ggdc < _fcffe ;};return _abef ._cbba < _dabcg ._cbba ;});};func (_dbbc *textObject )moveText (_fbbd ,_cbbc float64 ){_dbbc .moveLP (_fbbd ,_cbbc )};
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// String returns a description of `t`.
func (_bdcd *textTable )String ()string {return _cag .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_bdcd ._agac ,_bdcd ._dcbdf ,_bdcd ._ebabc );};type shapesState struct{_eacf _afd .Matrix ;_bag _afd .Matrix ;_fegc []*subpath ;
_dcef bool ;_dffc _afd .Point ;_afbf *textObject ;};func (_agae gridTile )contains (_gfed _ac .PdfRectangle )bool {if _agae .numBorders ()< 3{return false ;};if _agae ._fcgbfd &&_gfed .Llx < _agae .Llx -_dcbg {return false ;};if _agae ._fgdd &&_gfed .Urx > _agae .Urx +_dcbg {return false ;
};if _agae ._cccb &&_gfed .Lly < _agae .Lly -_dcbg {return false ;};if _agae ._gbac &&_gfed .Ury > _agae .Ury +_dcbg {return false ;};return true ;};func (_de *imageExtractContext )processOperand (_fae *_ag .ContentStreamOperation ,_ab _ag .GraphicsState ,_fggg *_ac .PdfPageResources )error {if _fae .Operand =="\u0042\u0049"&&len (_fae .Params )==1{_caf ,_gbg :=_fae .Params [0].(*_ag .ContentStreamInlineImage );
if !_gbg {return nil ;};if _gac ,_deg :=_gdd .GetBoolVal (_caf .ImageMask );_deg {if _gac &&!_de ._ga .IncludeInlineStencilMasks {return nil ;};};return _de .extractInlineImage (_caf ,_ab ,_fggg );}else if _fae .Operand =="\u0044\u006f"&&len (_fae .Params )==1{_dcg ,_eg :=_gdd .GetName (_fae .Params [0]);
if !_eg {_fb .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _agc ;};_ ,_gbf :=_fggg .GetXObjectByName (*_dcg );switch _gbf {case _ac .XObjectTypeImage :return _de .extractXObjectImage (_dcg ,_ab ,_fggg );case _ac .XObjectTypeForm :return _de .extractFormImages (_dcg ,_ab ,_fggg );
};};return nil ;};func (_dbb *Extractor )extractPageText (_eda string ,_fggcd *_ac .PdfPageResources ,_afgf _afd .Matrix ,_ebfd int )(*PageText ,int ,int ,error ){_fb .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_ebfd );
_dbf :=&PageText {_abea :_dbb ._fe };_accb :=_gfe (_dbb ._fe );var _dce stateStack ;_dafe :=_agbg (_dbb ,_fggcd ,_ag .GraphicsState {},&_accb ,&_dce );_eae :=shapesState {_bag :_afgf ,_eacf :_afd .IdentityMatrix (),_afbf :_dafe };var _gfd bool ;if _ebfd > _bf {_abd :=_d .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");
_fb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_ebfd ,_abd );
return _dbf ,_accb ._ededf ,_accb ._eca ,_abd ;};_efb :=_ag .NewContentStreamParser (_eda );_fac ,_ace :=_efb .Parse ();if _ace !=nil {_fb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ace );
return _dbf ,_accb ._ededf ,_accb ._eca ,_ace ;};_afb :=_ag .NewContentStreamProcessor (*_fac );_afb .AddHandler (_ag .HandlerConditionEnumAllOperands ,"",func (_baa *_ag .ContentStreamOperation ,_ddcb _ag .GraphicsState ,_cbc *_ac .PdfPageResources )error {_eed :=_baa .Operand ;
if _dbcdf {_fb .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_baa );};switch _eed {case "\u0071":if _efca {_fb .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_eae ._eacf );};_dce .push (&_accb );case "\u0051":if !_dce .empty (){_accb =*_dce .pop ();
};_eae ._eacf =_ddcb .CTM ;if _efca {_fb .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_eae ._eacf );};case "\u0042\u0054":if _gfd {_fb .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
_dbf ._aged =append (_dbf ._aged ,_dafe ._bece ...);};_gfd =true ;_eec :=_ddcb ;_eec .CTM =_afgf .Mult (_eec .CTM );_dafe =_agbg (_dbb ,_cbc ,_eec ,&_accb ,&_dce );_eae ._afbf =_dafe ;case "\u0045\u0054":if !_gfd {_fb .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
};_gfd =false ;_dbf ._aged =append (_dbf ._aged ,_dafe ._bece ...);_dafe .reset ();case "\u0054\u002a":_dafe .nextLine ();case "\u0054\u0064":if _egcb ,_cbb :=_dafe .checkOp (_baa ,2,true );!_egcb {_fb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cbb );
return _cbb ;};_gbd ,_fegd ,_gaf :=_acfgd (_baa .Params );if _gaf !=nil {return _gaf ;};_dafe .moveText (_gbd ,_fegd );case "\u0054\u0044":if _abe ,_dcga :=_dafe .checkOp (_baa ,2,true );!_abe {_fb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dcga );
return _dcga ;};_dfb ,_bbb ,_eac :=_acfgd (_baa .Params );if _eac !=nil {_fb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_eac );return _eac ;};_dafe .moveTextSetLeading (_dfb ,_bbb );case "\u0054\u006a":if _cge ,_ada :=_dafe .checkOp (_baa ,1,true );
!_cge {_fb .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_baa ,_ada );return _ada ;};_bde ,_dbfd :=_gdd .GetStringBytes (_baa .Params [0]);if !_dbfd {_fb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_baa );
return _gdd .ErrTypeError ;};return _dafe .showText (_bde );case "\u0054\u004a":if _def ,_fdgf :=_dafe .checkOp (_baa ,1,true );!_def {_fb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fdgf );return _fdgf ;
};_bda ,_bad :=_gdd .GetArray (_baa .Params [0]);if !_bad {_fb .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_baa );
return _ace ;};return _dafe .showTextAdjusted (_bda );case "\u0027":if _efe ,_egf :=_dafe .checkOp (_baa ,1,true );!_efe {_fb .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_egf );return _egf ;};_edb ,_eea :=_gdd .GetStringBytes (_baa .Params [0]);
if !_eea {_fb .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_baa );return _gdd .ErrTypeError ;};_dafe .nextLine ();return _dafe .showText (_edb );
case "\u0022":if _gdb ,_faef :=_dafe .checkOp (_baa ,3,true );!_gdb {_fb .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_faef );return _faef ;};_gacab ,_bfg ,_ddg :=_acfgd (_baa .Params [:2]);if _ddg !=nil {return _ddg ;
};_bec ,_fef :=_gdd .GetStringBytes (_baa .Params [2]);if !_fef {_fb .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_baa );
return _gdd .ErrTypeError ;};_dafe .setCharSpacing (_gacab );_dafe .setWordSpacing (_bfg );_dafe .nextLine ();return _dafe .showText (_bec );case "\u0054\u004c":_efa ,_dfe :=_efag (_baa );if _dfe !=nil {_fb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dfe );
return _dfe ;};_dafe .setTextLeading (_efa );case "\u0054\u0063":_dff ,_acb :=_efag (_baa );if _acb !=nil {_fb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_acb );return _acb ;};_dafe .setCharSpacing (_dff );
case "\u0054\u0066":if _agcg ,_abg :=_dafe .checkOp (_baa ,2,true );!_agcg {_fb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_abg );return _abg ;};_eeg ,_fed :=_gdd .GetNameVal (_baa .Params [0]);
if !_fed {_fb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_baa );return _gdd .ErrTypeError ;};_cea ,_dea :=_gdd .GetNumberAsFloat (_baa .Params [1]);
if !_fed {_fb .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_baa ,_dea );
return _dea ;};_dea =_dafe .setFont (_eeg ,_cea );_dafe ._dcb =_cg .Is (_dea ,_gdd .ErrNotSupported );if _dea !=nil &&!_dafe ._dcb {return _dea ;};case "\u0054\u006d":if _ggce ,_cabb :=_dafe .checkOp (_baa ,6,true );!_ggce {_fb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cabb );
return _cabb ;};_eacb ,_dac :=_gdd .GetNumbersAsFloat (_baa .Params );if _dac !=nil {_fb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dac );return _dac ;};_dafe .setTextMatrix (_eacb );case "\u0054\u0072":if _bdad ,_ccf :=_dafe .checkOp (_baa ,1,true );
!_bdad {_fb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ccf );return _ccf ;};_ecb ,_cdf :=_gdd .GetIntVal (_baa .Params [0]);if !_cdf {_fb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_baa );
return _gdd .ErrTypeError ;};_dafe .setTextRenderMode (_ecb );case "\u0054\u0073":if _gdba ,_dfee :=_dafe .checkOp (_baa ,1,true );!_gdba {_fb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dfee );
return _dfee ;};_dcd ,_bca :=_gdd .GetNumberAsFloat (_baa .Params [0]);if _bca !=nil {_fb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bca );return _bca ;};_dafe .setTextRise (_dcd );case "\u0054\u0077":if _abdc ,_ced :=_dafe .checkOp (_baa ,1,true );
!_abdc {_fb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ced );return _ced ;};_fba ,_fedc :=_gdd .GetNumberAsFloat (_baa .Params [0]);if _fedc !=nil {_fb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fedc );
return _fedc ;};_dafe .setWordSpacing (_fba );case "\u0054\u007a":if _efaa ,_bcb :=_dafe .checkOp (_baa ,1,true );!_efaa {_fb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bcb );return _bcb ;};_efd ,_aba :=_gdd .GetNumberAsFloat (_baa .Params [0]);
if _aba !=nil {_fb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aba );return _aba ;};_dafe .setHorizScaling (_efd );case "\u0063\u006d":_eae ._eacf =_ddcb .CTM ;if _eae ._eacf .Singular (){_cgcb :=_afd .IdentityMatrix ().Translate (_eae ._eacf .Translation ());
_fb .Log .Debug ("S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s",_eae ._eacf ,_cgcb );_eae ._eacf =_cgcb ;};if _efca {_fb .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_eae ._eacf );};case "\u006d":if len (_baa .Params )!=2{_fb .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_ge );
return nil ;};_dfc ,_aca :=_gdd .GetNumbersAsFloat (_baa .Params );if _aca !=nil {return _aca ;};_eae .moveTo (_dfc [0],_dfc [1]);case "\u006c":if len (_baa .Params )!=2{_fb .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_ge );
return nil ;};_ceg ,_cec :=_gdd .GetNumbersAsFloat (_baa .Params );if _cec !=nil {return _cec ;};_eae .lineTo (_ceg [0],_ceg [1]);case "\u0063":if len (_baa .Params )!=6{return _ge ;};_acab ,_dgd :=_gdd .GetNumbersAsFloat (_baa .Params );if _dgd !=nil {return _dgd ;
};_fb .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_acab );_eae .cubicTo (_acab [0],_acab [1],_acab [2],_acab [3],_acab [4],_acab [5]);case "\u0076","\u0079":if len (_baa .Params )!=4{return _ge ;
};_gcb ,_bgf :=_gdd .GetNumbersAsFloat (_baa .Params );if _bgf !=nil {return _bgf ;};_fb .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_gcb );_eae .quadraticTo (_gcb [0],_gcb [1],_gcb [2],_gcb [3]);
case "\u0068":_eae .closePath ();case "\u0072\u0065":if len (_baa .Params )!=4{return _ge ;};_gfa ,_aea :=_gdd .GetNumbersAsFloat (_baa .Params );if _aea !=nil {return _aea ;};_eae .drawRectangle (_gfa [0],_gfa [1],_gfa [2],_gfa [3]);_eae .closePath ();
case "\u0053":_eae .stroke (&_dbf ._cagb );_eae .clearPath ();case "\u0073":_eae .closePath ();_eae .stroke (&_dbf ._cagb );_eae .clearPath ();case "\u0046":_eae .fill (&_dbf ._bfa );_eae .clearPath ();case "\u0066","\u0066\u002a":_eae .closePath ();_eae .fill (&_dbf ._bfa );
_eae .clearPath ();case "\u0042","\u0042\u002a":_eae .fill (&_dbf ._bfa );_eae .stroke (&_dbf ._cagb );_eae .clearPath ();case "\u0062","\u0062\u002a":_eae .closePath ();_eae .fill (&_dbf ._bfa );_eae .stroke (&_dbf ._cagb );_eae .clearPath ();case "\u006e":_eae .clearPath ();
case "\u0044\u006f":if len (_baa .Params )==0{_fb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_baa .Params );
return _gdd .ErrRangeError ;};_dceg ,_add :=_gdd .GetName (_baa .Params [0]);if !_add {_fb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_baa .Params [0]);
return _gdd .ErrTypeError ;};_ ,_gegb :=_cbc .GetXObjectByName (*_dceg );if _gegb !=_ac .XObjectTypeForm {break ;};_abc ,_add :=_dbb ._agb [_dceg .String ()];if !_add {_bgeb ,_caaa :=_cbc .GetXObjectFormByName (*_dceg );if _caaa !=nil {_fb .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_caaa );
return _caaa ;};_cad ,_caaa :=_bgeb .GetContentStream ();if _caaa !=nil {_fb .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_caaa );return _caaa ;};_aa :=_bgeb .Resources ;if _aa ==nil {_aa =_cbc ;};_cee ,_fgggb ,_cdga ,_caaa :=_dbb .extractPageText (string (_cad ),_aa ,_afgf .Mult (_ddcb .CTM ),_ebfd +1);
if _caaa !=nil {_fb .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_caaa );return _caaa ;};_abc =textResult {*_cee ,_fgggb ,_cdga };_dbb ._agb [_dceg .String ()]=_abc ;};_eae ._eacf =_ddcb .CTM ;if _efca {_fb .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_eae ._eacf );
};_dbf ._aged =append (_dbf ._aged ,_abc ._gga ._aged ...);_dbf ._cagb =append (_dbf ._cagb ,_abc ._gga ._cagb ...);_dbf ._bfa =append (_dbf ._bfa ,_abc ._gga ._bfa ...);_accb ._ededf +=_abc ._febb ;_accb ._eca +=_abc ._gcf ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_dafe ._gbc .ColorspaceNonStroking =_ddcb .ColorspaceNonStroking ;
_dafe ._gbc .ColorNonStroking =_ddcb .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_dafe ._gbc .ColorspaceStroking =_ddcb .ColorspaceStroking ;_dafe ._gbc .ColorStroking =_ddcb .ColorStroking ;
};return nil ;});_ace =_afb .Process (_fggcd );return _dbf ,_accb ._ededf ,_accb ._eca ,_ace ;};func (_ccfg *textObject )setHorizScaling (_affb float64 ){if _ccfg ==nil {return ;};_ccfg ._fga ._bgg =_affb ;};func _eefaa (_bgfdc []pathSection ){if _eada < 0.0{return ;
};if _adce {_fb .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_bgfdc ));};for _cdgc ,_dacb :=range _bgfdc {for _abcg ,_dgaf :=range _dacb ._bbdc {for _ggfdg ,_efaef :=range _dgaf ._egd {_dgaf ._egd [_ggfdg ]=_afd .Point {X :_geabe (_efaef .X ),Y :_geabe (_efaef .Y )};
if _adce {_cfde :=_dgaf ._egd [_ggfdg ];if !_cdbg (_efaef ,_cfde ){_dbcf :=_afd .Point {X :_cfde .X -_efaef .X ,Y :_cfde .Y -_efaef .Y };_cag .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_cdgc ,_abcg ,_ggfdg ,_efaef ,_cfde ,_dbcf );
};};};};};};func (_cacgb *textTable )log (_bggf string ){if !_agede {return ;};_fb .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_bggf ,_cacgb ._agac ,_cacgb ._dcbdf ,_cacgb ._ebabc ,_cacgb .PdfRectangle );
for _aefd :=0;_aefd < _cacgb ._dcbdf ;_aefd ++{for _ddccb :=0;_ddccb < _cacgb ._agac ;_ddccb ++{_gebe :=_cacgb .get (_ddccb ,_aefd );if _gebe ==nil {continue ;};_cag .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_ddccb ,_aefd ,_gebe .PdfRectangle ,_bgdca (_gebe .text (),50),_g .RuneCountInString (_gebe .text ()));
};};};func (_dgcf rulingList )sort (){_af .Slice (_dgcf ,_dgcf .comp )};func _bdga (_gedbb ,_gfbae bounded )float64 {return _efae (_gedbb )-_efae (_gfbae )};func (_cbfg *shapesState )moveTo (_cage ,_cddb float64 ){_cbfg ._dcef =true ;_cbfg ._dffc =_cbfg .devicePoint (_cage ,_cddb );
if _efca {_fb .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_cage ,_cddb ,_cbfg ._dffc );};};func (_febf *wordBag )firstWord (_efbg int )*textWord {return _febf ._cfeeb [_efbg ][0]};
type textObject struct{_defa *Extractor ;_fgf *_ac .PdfPageResources ;_gbc _ag .GraphicsState ;_fga *textState ;_aece *stateStack ;_bgc _afd .Matrix ;_bab _afd .Matrix ;_bece []*textMark ;_dcb bool ;};func _fgfb (_fbd _afd .Matrix )_afd .Point {_bbbe ,_dacd :=_fbd .Translation ();
return _afd .Point {X :_bbbe ,Y :_dacd };};func _cafa (_gfcf ,_ebbc bounded )float64 {return _gfcf .bbox ().Llx -_ebbc .bbox ().Llx };func _ccbf (_gbcb []*textMark ,_eaagc _ac .PdfRectangle )*textWord {_cecgg :=_gbcb [0].PdfRectangle ;_egef :=_gbcb [0]._gba ;
for _ ,_eebgd :=range _gbcb [1:]{_cecgg =_gcae (_cecgg ,_eebgd .PdfRectangle );if _eebgd ._gba > _egef {_egef =_eebgd ._gba ;};};return &textWord {PdfRectangle :_cecgg ,_ggabc :_gbcb ,_fgbda :_eaagc .Ury -_cecgg .Lly ,_abeg :_egef };};func (_fcc *textObject )getFont (_gegg string )(*_ac .PdfFont ,error ){if _fcc ._defa ._gde !=nil {_cgce ,_acffg :=_fcc .getFontDict (_gegg );
if _acffg !=nil {_fb .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073",_gegg ,_acffg .Error ());return nil ,_acffg ;
};_fcc ._defa ._df ++;_gbfaa ,_cbbd :=_fcc ._defa ._gde [_cgce .String ()];if _cbbd {_gbfaa ._fabc =_fcc ._defa ._df ;return _gbfaa ._fab ,nil ;};};_fcbg ,_cddf :=_fcc .getFontDict (_gegg );if _cddf !=nil {return nil ,_cddf ;};_cda ,_cddf :=_fcc .getFontDirect (_gegg );
if _cddf !=nil {return nil ,_cddf ;};if _fcc ._defa ._gde !=nil {_ffea :=fontEntry {_cda ,_fcc ._defa ._df };if len (_fcc ._defa ._gde )>=_daca {var _dbg []string ;for _dbcc :=range _fcc ._defa ._gde {_dbg =append (_dbg ,_dbcc );};_af .Slice (_dbg ,func (_fcbd ,_dade int )bool {return _fcc ._defa ._gde [_dbg [_fcbd ]]._fabc < _fcc ._defa ._gde [_dbg [_dade ]]._fabc ;
});delete (_fcc ._defa ._gde ,_dbg [0]);};_fcc ._defa ._gde [_fcbg .String ()]=_ffea ;};return _cda ,nil ;};func _degfb (_cddba *PageText )error {_adgb :=_fg .GetLicenseKey ();if _adgb !=nil &&_adgb .IsLicensed ()||_ad {return nil ;};_cag .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");
_cag .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");
return _d .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};func (_ageb *textObject )showTextAdjusted (_fcf *_gdd .PdfObjectArray )error {_gfc :=false ;
for _ ,_cdd :=range _fcf .Elements (){switch _cdd .(type ){case *_gdd .PdfObjectFloat ,*_gdd .PdfObjectInteger :_dfcf ,_abdb :=_gdd .GetNumberAsFloat (_cdd );if _abdb !=nil {_fb .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_cdd ,_fcf );
return _abdb ;};_fefc ,_bcc :=-_dfcf *0.001*_ageb ._fga ._cef ,0.0;if _gfc {_bcc ,_fefc =_fefc ,_bcc ;};_aggg :=_babf (_afd .Point {X :_fefc ,Y :_bcc });_ageb ._bgc .Concat (_aggg );case *_gdd .PdfObjectString :_cde ,_dafd :=_gdd .GetStringBytes (_cdd );
if !_dafd {_fb .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_cdd ,_fcf );
return _gdd .ErrTypeError ;};_ageb .renderText (_cde );default:_fb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_cdd ,_fcf );
return _gdd .ErrTypeError ;};};return nil ;};func (_cfbb *shapesState )devicePoint (_ccg ,_fcbdf float64 )_afd .Point {_dgdc :=_cfbb ._bag .Mult (_cfbb ._eacf );_ccg ,_fcbdf =_dgdc .Transform (_ccg ,_fcbdf );return _afd .NewPoint (_ccg ,_fcbdf );};func _adbc (_gddb *wordBag ,_eddaa float64 ,_dfbd ,_cbcd rulingList )[]*wordBag {var _feaa []*wordBag ;
for _ ,_beaf :=range _gddb .depthIndexes (){_bbfe :=false ;for !_gddb .empty (_beaf ){_fce :=_gddb .firstReadingIndex (_beaf );_ddaa :=_gddb .firstWord (_fce );_dfceg :=_acca (_ddaa ,_eddaa ,_dfbd ,_cbcd );_gddb .removeWord (_ddaa ,_fce );if _gbfc {_fb .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_ddaa .String ());
};for _cebd :=true ;_cebd ;_cebd =_bbfe {_bbfe =false ;_cbcf :=_ebba *_dfceg ._aege ;_gabcd :=_effe *_dfceg ._aege ;_dfedc :=_cafad *_dfceg ._aege ;if _gbfc {_fb .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_dfceg .minDepth (),_dfceg .maxDepth (),_dfedc ,_gabcd );
};if _gddb .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_dfceg ,_gff (_aded ,0),_dfceg .minDepth ()-_dfedc ,_dfceg .maxDepth ()+_dfedc ,_cegb ,false ,false )> 0{_bbfe =true ;};if _gddb .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_dfceg ,_gff (_aded ,_gabcd ),_dfceg .minDepth (),_dfceg .maxDepth (),_fgeb ,false ,false )> 0{_bbfe =true ;
};if _bbfe {continue ;};_dbae :=_gddb .scanBand ("",_dfceg ,_gff (_eeb ,_cbcf ),_dfceg .minDepth (),_dfceg .maxDepth (),_faae ,true ,false );if _dbae > 0{_cca :=(_dfceg .maxDepth ()-_dfceg .minDepth ())/_dfceg ._aege ;if (_dbae > 1&&float64 (_dbae )> 0.3*_cca )||_dbae <=10{if _gddb .scanBand ("\u006f\u0074\u0068e\u0072",_dfceg ,_gff (_eeb ,_cbcf ),_dfceg .minDepth (),_dfceg .maxDepth (),_faae ,false ,true )> 0{_bbfe =true ;
};};};};_feaa =append (_feaa ,_dfceg );};};return _feaa ;};func (_fbff *wordBag )depthBand (_deec ,_bfdf float64 )[]int {if len (_fbff ._cfeeb )==0{return nil ;};return _fbff .depthRange (_fbff .getDepthIdx (_deec ),_fbff .getDepthIdx (_bfdf ));};func (_ggf *shapesState )cubicTo (_fgbe ,_eeged ,_bdd ,_gbdd ,_geff ,_bbde float64 ){if _efca {_fb .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a");
};_ggf .addPoint (_geff ,_bbde );};func (_aag *textObject )setTextMatrix (_age []float64 ){if len (_age )!=6{_fb .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_age ));
return ;};_dbcd ,_ddgd ,_fbgf ,_ggca ,_bgfd ,_ddgda :=_age [0],_age [1],_age [2],_age [3],_age [4],_age [5];_aag ._bgc =_afd .NewMatrix (_dbcd ,_ddgd ,_fbgf ,_ggca ,_bgfd ,_ddgda );_aag ._bab =_aag ._bgc ;};func _cdgaf (_fcgg map[int ]intSet )[]int {_adfcg :=make ([]int ,0,len (_fcgg ));
for _gcbc :=range _fcgg {_adfcg =append (_adfcg ,_gcbc );};_af .Ints (_adfcg );return _adfcg ;};type ruling struct{_agcgg rulingKind ;_aaec markKind ;_ca .Color ;_edcba float64 ;_bfeag float64 ;_cbba float64 ;_geba float64 ;};func (_fgbgb paraList )lines ()[]*textLine {var _fcfac []*textLine ;
for _ ,_gggg :=range _fgbgb {_fcfac =append (_fcfac ,_gggg ._ddeb ...);};return _fcfac ;};func (_fecbg *textTable )subdivide ()*textTable {_fecbg .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");_cgged :=_fecbg .compositeRowCorridors ();_gadg :=_fecbg .compositeColCorridors ();
if _agede {_fb .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_beba (_cgged ),_beba (_gadg ));
};if len (_cgged )==0||len (_gadg )==0{return _fecbg ;};_eggb (_cgged );_eggb (_gadg );if _agede {_fb .Log .Info ("\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073",_beba (_cgged ),_beba (_gadg ));
};_efaea ,_gcefd :=_fbaf (_fecbg ._dcbdf ,_cgged );_cccga ,_fbcdf :=_fbaf (_fecbg ._agac ,_gadg );_agfbb :=make (map[uint64 ]*textPara ,_fbcdf *_gcefd );_ebde :=&textTable {PdfRectangle :_fecbg .PdfRectangle ,_ebabc :_fecbg ._ebabc ,_dcbdf :_gcefd ,_agac :_fbcdf ,_abccf :_agfbb };
if _agede {_fb .Log .Info ("\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a"+"\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076",_fecbg ._agac ,_fecbg ._dcbdf ,_fbcdf ,_gcefd ,_beba (_cgged ),_beba (_gadg ),_efaea ,_cccga );
};for _abeac :=0;_abeac < _fecbg ._dcbdf ;_abeac ++{_ffdb :=_efaea [_abeac ];for _egcbg :=0;_egcbg < _fecbg ._agac ;_egcbg ++{_deaff :=_cccga [_egcbg ];if _agede {_cag .Printf ("\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a",_egcbg ,_abeac ,_deaff ,_ffdb );
};_ecbe ,_eafe :=_fecbg ._cead [_addg (_egcbg ,_abeac )];if !_eafe {continue ;};_fgfac :=_ecbe .split (_cgged [_abeac ],_gadg [_egcbg ]);for _egbgd :=0;_egbgd < _fgfac ._dcbdf ;_egbgd ++{for _dbebg :=0;_dbebg < _fgfac ._agac ;_dbebg ++{_gdbae :=_fgfac .get (_dbebg ,_egbgd );
_ebde .put (_deaff +_dbebg ,_ffdb +_egbgd ,_gdbae );if _agede {_cag .Printf ("\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_deaff +_dbebg ,_ffdb +_egbgd ,_gdbae );};};};};};return _ebde ;};func _bfeb (_feca []*textMark ,_cgeac _ac .PdfRectangle )[]*textWord {var _egacd []*textWord ;
var _aeef *textWord ;if _abb {_fb .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_feca ));};_gdfc :=func (){if _aeef !=nil {_cgac :=_aeef .computeText ();
if !_feaba (_cgac ){_aeef ._gebf =_cgac ;_egacd =append (_egacd ,_aeef );if _abb {_fb .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_egacd )-1,_aeef .String ());
for _cece ,_dagb :=range _aeef ._ggabc {_cag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cece ,_dagb .String ());};};};_aeef =nil ;};};for _ ,_gacaf :=range _feca {if _bfdb &&_aeef !=nil &&len (_aeef ._ggabc )> 0{_daeg :=_aeef ._ggabc [len (_aeef ._ggabc )-1];
_cabgb ,_fbdc :=_ffdcd (_gacaf ._cadaf );_dcccbe ,_efdg :=_ffdcd (_daeg ._cadaf );if _fbdc &&!_efdg &&_daeg .inDiacriticArea (_gacaf ){_aeef .addDiacritic (_cabgb );continue ;};if _efdg &&!_fbdc &&_gacaf .inDiacriticArea (_daeg ){_aeef ._ggabc =_aeef ._ggabc [:len (_aeef ._ggabc )-1];
_aeef .appendMark (_gacaf ,_cgeac );_aeef .addDiacritic (_dcccbe );continue ;};};_dagd :=_feaba (_gacaf ._cadaf );if _dagd {_gdfc ();continue ;};if _aeef ==nil &&!_dagd {_aeef =_ccbf ([]*textMark {_gacaf },_cgeac );continue ;};_bgdb :=_aeef ._abeg ;_cfefa :=_gc .Abs (_agad (_cgeac ,_gacaf )-_aeef ._fgbda )/_bgdb ;
_fggce :=_dgfaa (_gacaf ,_aeef )/_bgdb ;if _fggce >=_aafe ||!(-_bafd <=_fggce &&_cfefa <=_gdcf ){_gdfc ();_aeef =_ccbf ([]*textMark {_gacaf },_cgeac );continue ;};_aeef .appendMark (_gacaf ,_cgeac );};_gdfc ();return _egacd ;};
2022-02-05 21:34:53 +00:00
2022-03-13 12:41:53 +00:00
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct{_dgf []TextMark };func (_becd paraList )inTile (_gded gridTile )paraList {var _agbcab paraList ;for _ ,_fcbgc :=range _becd {if _gded .contains (_fcbgc .PdfRectangle ){_agbcab =append (_agbcab ,_fcbgc );};};if _agede {_cag .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_gded ,len (_agbcab ));
for _aecff ,_beab :=range _agbcab {_cag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_aecff ,_beab );};_cag .Println ("");};return _agbcab ;};func (_dadbf paraList )xNeighbours (_ecdde float64 )map[*textPara ][]int {_gaeb :=make ([]event ,2*len (_dadbf ));
if _ecdde ==0{for _cfdb ,_ccbg :=range _dadbf {_gaeb [2*_cfdb ]=event {_ccbg .Llx ,true ,_cfdb };_gaeb [2*_cfdb +1]=event {_ccbg .Urx ,false ,_cfdb };};}else {for _ebcc ,_ecfbb :=range _dadbf {_gaeb [2*_ebcc ]=event {_ecfbb .Llx -_ecdde *_ecfbb .fontsize (),true ,_ebcc };
_gaeb [2*_ebcc +1]=event {_ecfbb .Urx +_ecdde *_ecfbb .fontsize (),false ,_ebcc };};};return _dadbf .eventNeighbours (_gaeb );};func (_aagd rulingList )isActualGrid ()(rulingList ,bool ){_edfg ,_gfgd :=_aagd .augmentGrid ();if !(len (_edfg )>=_fee +1&&len (_gfgd )>=_edcf +1){if _adce {_fb .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_edfg ),len (_gfgd ),_fee +1,_edcf +1);
};return nil ,false ;};if _adce {_fb .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_aagd ,len (_edfg )>=2,len (_gfgd )>=2,len (_edfg )>=2&&len (_gfgd )>=2);
for _dgcc ,_aedfg :=range _aagd {_cag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_dgcc ,_aedfg );};};if _efdb {_aeaa ,_egfd :=_edfg [0],_edfg [len (_edfg )-1];_feec ,_aaea :=_gfgd [0],_gfgd [len (_gfgd )-1];if !(_gfdcf (_aeaa ._edcba -_feec ._bfeag )&&_gfdcf (_egfd ._edcba -_feec ._cbba )&&_gfdcf (_feec ._edcba -_aeaa ._cbba )&&_gfdcf (_aaea ._edcba -_aeaa ._bfeag )){if _adce {_fb .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_aeaa ,_egfd ,_feec ,_aaea );
};return nil ,false ;};}else {if !_edfg .aligned (){if _cecd {_fb .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_edfg ));
};return nil ,false ;};if !_gfgd .aligned (){if _adce {_fb .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_gfgd ));
};return nil ,false ;};};_cbdgf :=append (_edfg ,_gfgd ...);return _cbdgf ,true ;};func (_gecd paraList )tables ()[]TextTable {var _baddg []TextTable ;if _agede {_fb .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a");
};for _ ,_afc :=range _gecd {_ggdd :=_afc ._ccec ;if _ggdd !=nil &&_ggdd .isExportable (){_baddg =append (_baddg ,_ggdd .toTextTable ());};};return _baddg ;};func (_bbag *wordBag )empty (_cgbe int )bool {_ ,_deac :=_bbag ._cfeeb [_cgbe ];return !_deac };
func _fafc (_abgc []TextMark ,_eccg *int )[]TextMark {_adff :=_abgc [len (_abgc )-1];_fcde :=[]rune (_adff .Text );if len (_fcde )==1{_abgc =_abgc [:len (_abgc )-1];_ccfbb :=_abgc [len (_abgc )-1];*_eccg =_ccfbb .Offset +len (_ccfbb .Text );}else {_bdee :=_gaaf (_adff .Text );
*_eccg +=len (_bdee )-len (_adff .Text );_adff .Text =_bdee ;};return _abgc ;};func _fbaf (_aacf int ,_adeg map[int ][]float64 )([]int ,int ){_fgfc :=make ([]int ,_aacf );_bcbeef :=0;for _ecdee :=0;_ecdee < _aacf ;_ecdee ++{_fgfc [_ecdee ]=_bcbeef ;_bcbeef +=len (_adeg [_ecdee ])+1;
};return _fgfc ,_bcbeef ;};func _gcae (_egafg ,_bbef _ac .PdfRectangle )_ac .PdfRectangle {return _ac .PdfRectangle {Llx :_gc .Min (_egafg .Llx ,_bbef .Llx ),Lly :_gc .Min (_egafg .Lly ,_bbef .Lly ),Urx :_gc .Max (_egafg .Urx ,_bbef .Urx ),Ury :_gc .Max (_egafg .Ury ,_bbef .Ury )};
};func (_feeg *textPara )isAtom ()*textTable {_fbcc :=_feeg ;_deaf :=_feeg ._cegf ;_fbaba :=_feeg ._aegf ;if _deaf .taken ()||_fbaba .taken (){return nil ;};_bfad :=_deaf ._aegf ;if _bfad .taken ()||_bfad !=_fbaba ._cegf {return nil ;};return _ecdfc (_fbcc ,_deaf ,_fbaba ,_bfad );
};
2021-09-23 22:37:42 +00:00
2022-03-13 12:41:53 +00:00
// Marks returns the TextMark collection for a page. It represents all the text on the page.
func (_ged PageText )Marks ()*TextMarkArray {return &TextMarkArray {_dgf :_ged ._gccf }};func (_dffg *wordBag )depthIndexes ()[]int {if len (_dffg ._cfeeb )==0{return nil ;};_cgbc :=make ([]int ,len (_dffg ._cfeeb ));_cebf :=0;for _cafg :=range _dffg ._cfeeb {_cgbc [_cebf ]=_cafg ;
_cebf ++;};_af .Ints (_cgbc );return _cgbc ;};
// PageFonts represents extracted fonts on a PDF page.
type PageFonts struct{Fonts []Font ;};func (_aafa paraList )sortReadingOrder (){_fb .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_aafa ));
if len (_aafa )<=1{return ;};_aafa .computeEBBoxes ();_af .Slice (_aafa ,func (_eega ,_bcad int )bool {return _edde (_aafa [_eega ],_aafa [_bcad ])<=0});_cce :=_aafa .topoOrder ();_aafa .reorder (_cce );};func (_babfg *subpath )last ()_afd .Point {return _babfg ._egd [len (_babfg ._egd )-1]};
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
2022-03-13 12:41:53 +00:00
func (_gce *Extractor )ExtractText ()(string ,error ){_egc ,_ ,_ ,_fdd :=_gce .ExtractTextWithStats ();return _egc ,_fdd ;};func (_ddca *wordBag )text ()string {_aae :=_ddca .allWords ();_ecfbd :=make ([]string ,len (_aae ));for _dadc ,_abaa :=range _aae {_ecfbd [_dadc ]=_abaa ._gebf ;
};return _cf .Join (_ecfbd ,"\u0020");};func _babf (_dfbf _afd .Point )_afd .Matrix {return _afd .TranslationMatrix (_dfbf .X ,_dfbf .Y )};func (_bfgdgb paraList )log (_eede string ){if !_dfgc {return ;};_fb .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_eede ,len (_bfgdgb ));
for _agadf ,_bbda :=range _bfgdgb {if _bbda ==nil {continue ;};_dfada :=_bbda .text ();_gcccf :="\u0020\u0020";if _bbda ._ccec !=nil {_gcccf =_cag .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_bbda ._ccec ._agac ,_bbda ._ccec ._dcbdf );};_cag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_agadf ,_bbda .PdfRectangle ,_gcccf ,_bgdca (_dfada ,50));
};};type rulingList []*ruling ;type bounded interface{bbox ()_ac .PdfRectangle };const (_dccc =false ;_abb =false ;_dbcdf =false ;_ccc =false ;_efca =false ;_agcb =false ;_gbfc =false ;_dfgc =false ;_addb =false ;_dfgd =_addb &&true ;_ceac =_dfgd &&false ;
_ecfa =_addb &&true ;_agede =false ;_adga =_agede &&false ;_effg =_agede &&true ;_adce =false ;_gbbagd =_adce &&false ;_cecd =_adce &&false ;_bdea =_adce &&true ;_caea =_adce &&false ;_ccfb =_adce &&false ;);func _ebgf (_cfge _ac .PdfRectangle )*ruling {return &ruling {_agcgg :_eccgd ,_edcba :_cfge .Ury ,_bfeag :_cfge .Llx ,_cbba :_cfge .Urx };
};
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
2022-03-13 12:41:53 +00:00
func (_cfc *Extractor )ExtractTextWithStats ()(_fgb string ,_fddg int ,_feab int ,_ebe error ){_fbbg ,_fddg ,_feab ,_ebe :=_cfc .ExtractPageText ();if _ebe !=nil {return "",_fddg ,_feab ,_ebe ;};return _fbbg .Text (),_fddg ,_feab ,nil ;};func (_ggae compositeCell )parasBBox ()(paraList ,_ac .PdfRectangle ){return _ggae .paraList ,_ggae .PdfRectangle ;
};func _beba (_dbgf map[int ][]float64 )string {_cdgf :=_ddcd (_dbgf );_ccgf :=make ([]string ,len (_dbgf ));for _gdcca ,_agce :=range _cdgf {_ccgf [_gdcca ]=_cag .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_agce ,_dbgf [_agce ]);};return _cag .Sprintf ("\u007b\u0025\u0073\u007d",_cf .Join (_ccgf ,"\u002c\u0020"));
};func _acfgd (_daae []_gdd .PdfObject )(_bcff ,_bada float64 ,_efdbc error ){if len (_daae )!=2{return 0,0,_cag .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_daae ));
};_dbfb ,_efdbc :=_gdd .GetNumbersAsFloat (_daae );if _efdbc !=nil {return 0,0,_efdbc ;};return _dbfb [0],_dbfb [1],nil ;};func _eadbg (_cafaa []*textWord ,_cfffa *textWord )[]*textWord {for _gbea ,_adeb :=range _cafaa {if _adeb ==_cfffa {return _gegd (_cafaa ,_gbea );
};};_fb .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_cfffa );
return nil ;};func (_dffa *wordBag )pullWord (_ggef *textWord ,_becf int ,_cada map[int ]map[*textWord ]struct{}){_dffa .PdfRectangle =_gcae (_dffa .PdfRectangle ,_ggef .PdfRectangle );if _ggef ._abeg > _dffa ._aege {_dffa ._aege =_ggef ._abeg ;};_dffa ._cfeeb [_becf ]=append (_dffa ._cfeeb [_becf ],_ggef );
_cada [_becf ][_ggef ]=struct{}{};};func (_ead *wordBag )depthRange (_aadg ,_fbba int )[]int {var _fff []int ;for _afeg :=range _ead ._cfeeb {if _aadg <=_afeg &&_afeg <=_fbba {_fff =append (_fff ,_afeg );};};if len (_fff )==0{return nil ;};_af .Ints (_fff );
return _fff ;};
// ExtractFonts returns all font information from the page extractor, including
// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more.
//
// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document.
// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries.
//
// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range.
func (_acc *Extractor )ExtractFonts (previousPageFonts *PageFonts )(*PageFonts ,error ){_ce :=PageFonts {};_feb :=_ce .extractPageResourcesToFont (_acc ._cb );if _feb !=nil {return nil ,_feb ;};if previousPageFonts !=nil {for _ ,_eba :=range previousPageFonts .Fonts {if !_ade (_ce .Fonts ,_eba .FontName ){_ce .Fonts =append (_ce .Fonts ,_eba );
};};};return &PageFonts {Fonts :_ce .Fonts },nil ;};
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// Len returns the number of TextMarks in `ma`.
func (_effb *TextMarkArray )Len ()int {if _effb ==nil {return 0;};return len (_effb ._dgf );};func _ffdcd (_dcfa string )(string ,bool ){_adcff :=[]rune (_dcfa );if len (_adcff )!=1{return "",false ;};_ceed ,_bdaae :=_dedc [_adcff [0]];return _ceed ,_bdaae ;
};func _aecg (_dfeeg ,_gaa _ac .PdfRectangle )bool {return _dfeeg .Lly <=_gaa .Ury &&_gaa .Lly <=_dfeeg .Ury ;};
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// Append appends `mark` to the mark array.
func (_eegf *TextMarkArray )Append (mark TextMark ){_eegf ._dgf =append (_eegf ._dgf ,mark )};
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
type PageImages struct{Images []ImageMark ;};func (_bged *textTable )markCells (){for _fgcc :=0;_fgcc < _bged ._dcbdf ;_fgcc ++{for _gfbcd :=0;_gfbcd < _bged ._agac ;_gfbcd ++{_abdda :=_bged .get (_gfbcd ,_fgcc );if _abdda !=nil {_abdda ._cfeff =true ;
};};};};func (_gca *shapesState )lineTo (_edga ,_acga float64 ){if _efca {_fb .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_edga ,_acga ,_gca .devicePoint (_edga ,_acga ));
};_gca .addPoint (_edga ,_acga );};
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
func (_faa PageText )ToText ()string {return _faa .Text ()};func _bgdca (_cegfg string ,_cdecf int )string {if len (_cegfg )< _cdecf {return _cegfg ;};return _cegfg [:_cdecf ];};func (_dbcdd *textLine )appendWord (_becg *textWord ){_dbcdd ._cadc =append (_dbcdd ._cadc ,_becg );
_dbcdd .PdfRectangle =_gcae (_dbcdd .PdfRectangle ,_becg .PdfRectangle );if _becg ._abeg > _dbcdd ._eabc {_dbcdd ._eabc =_becg ._abeg ;};if _becg ._fgbda > _dbcdd ._bcgf {_dbcdd ._bcgf =_becg ._fgbda ;};};func _gagd (_dfbff []float64 ,_bceb ,_cbdf float64 )[]float64 {_dfea ,_eddg :=_bceb ,_cbdf ;
if _eddg < _dfea {_dfea ,_eddg =_eddg ,_dfea ;};_cedcf :=make ([]float64 ,0,len (_dfbff )+2);_cedcf =append (_cedcf ,_bceb );for _ ,_abfbd :=range _dfbff {if _abfbd <=_dfea {continue ;}else if _abfbd >=_eddg {break ;};_cedcf =append (_cedcf ,_abfbd );};
_cedcf =append (_cedcf ,_cbdf );return _cedcf ;};func _gfdcf (_cfdd float64 )bool {return _gc .Abs (_cfdd )< _gbbd };func (_fbbce *textWord )addDiacritic (_edcfc string ){_feee :=_fbbce ._ggabc [len (_fbbce ._ggabc )-1];_feee ._cadaf +=_edcfc ;_feee ._cadaf =_dd .NFKC .String (_feee ._cadaf );
};type cachedImage struct{_dfa *_ac .Image ;_ecf _ac .PdfColorspace ;};func (_eebe paraList )addNeighbours (){_ecfd :=func (_cgcfg []int ,_dbebc *textPara )([]*textPara ,[]*textPara ){_aefb :=make ([]*textPara ,0,len (_cgcfg )-1);_fefa :=make ([]*textPara ,0,len (_cgcfg )-1);
for _ ,_dcce :=range _cgcfg {_fdde :=_eebe [_dcce ];if _fdde .Urx <=_dbebc .Llx {_aefb =append (_aefb ,_fdde );}else if _fdde .Llx >=_dbebc .Urx {_fefa =append (_fefa ,_fdde );};};return _aefb ,_fefa ;};_gddf :=func (_agfe []int ,_afea *textPara )([]*textPara ,[]*textPara ){_bdgd :=make ([]*textPara ,0,len (_agfe )-1);
_cbaf :=make ([]*textPara ,0,len (_agfe )-1);for _ ,_egcgc :=range _agfe {_gcggd :=_eebe [_egcgc ];if _gcggd .Ury <=_afea .Lly {_cbaf =append (_cbaf ,_gcggd );}else if _gcggd .Lly >=_afea .Ury {_bdgd =append (_bdgd ,_gcggd );};};return _bdgd ,_cbaf ;};
_fdda :=_eebe .yNeighbours (_aaa );for _ ,_bfeg :=range _eebe {_fdec :=_fdda [_bfeg ];if len (_fdec )==0{continue ;};_bdff ,_cddfe :=_ecfd (_fdec ,_bfeg );if len (_bdff )==0&&len (_cddfe )==0{continue ;};if len (_bdff )> 0{_gdgca :=_bdff [0];for _ ,_egeb :=range _bdff [1:]{if _egeb .Urx >=_gdgca .Urx {_gdgca =_egeb ;
};};for _ ,_edab :=range _bdff {if _edab !=_gdgca &&_edab .Urx > _gdgca .Llx {_gdgca =nil ;break ;};};if _gdgca !=nil &&_aecg (_bfeg .PdfRectangle ,_gdgca .PdfRectangle ){_bfeg ._bddfg =_gdgca ;};};if len (_cddfe )> 0{_gfaec :=_cddfe [0];for _ ,_fcffc :=range _cddfe [1:]{if _fcffc .Llx <=_gfaec .Llx {_gfaec =_fcffc ;
};};for _ ,_aedbc :=range _cddfe {if _aedbc !=_gfaec &&_aedbc .Llx < _gfaec .Urx {_gfaec =nil ;break ;};};if _gfaec !=nil &&_aecg (_bfeg .PdfRectangle ,_gfaec .PdfRectangle ){_bfeg ._cegf =_gfaec ;};};};_fdda =_eebe .xNeighbours (_bbdee );for _ ,_gdgdf :=range _eebe {_gcbbc :=_fdda [_gdgdf ];
if len (_gcbbc )==0{continue ;};_bcec ,_dgdd :=_gddf (_gcbbc ,_gdgdf );if len (_bcec )==0&&len (_dgdd )==0{continue ;};if len (_dgdd )> 0{_dabde :=_dgdd [0];for _ ,_efgge :=range _dgdd [1:]{if _efgge .Ury >=_dabde .Ury {_dabde =_efgge ;};};for _ ,_eedge :=range _dgdd {if _eedge !=_dabde &&_eedge .Ury > _dabde .Lly {_dabde =nil ;
break ;};};if _dabde !=nil &&_bcbef (_gdgdf .PdfRectangle ,_dabde .PdfRectangle ){_gdgdf ._aegf =_dabde ;};};if len (_bcec )> 0{_ggfgg :=_bcec [0];for _ ,_gfdcg :=range _bcec [1:]{if _gfdcg .Lly <=_ggfgg .Lly {_ggfgg =_gfdcg ;};};for _ ,_bffag :=range _bcec {if _bffag !=_ggfgg &&_bffag .Lly < _ggfgg .Ury {_ggfgg =nil ;
break ;};};if _ggfgg !=nil &&_bcbef (_gdgdf .PdfRectangle ,_ggfgg .PdfRectangle ){_gdgdf ._dcaba =_ggfgg ;};};};for _ ,_eacff :=range _eebe {if _eacff ._bddfg !=nil &&_eacff ._bddfg ._cegf !=_eacff {_eacff ._bddfg =nil ;};if _eacff ._dcaba !=nil &&_eacff ._dcaba ._aegf !=_eacff {_eacff ._dcaba =nil ;
};if _eacff ._cegf !=nil &&_eacff ._cegf ._bddfg !=_eacff {_eacff ._cegf =nil ;};if _eacff ._aegf !=nil &&_eacff ._aegf ._dcaba !=_eacff {_eacff ._aegf =nil ;};};};func (_agcf rectRuling )asRuling ()(*ruling ,bool ){_bbdbf :=ruling {_agcgg :_agcf ._bbgc ,Color :_agcf .Color ,_aaec :_ceaac };
switch _agcf ._bbgc {case _aafafg :_bbdbf ._edcba =0.5*(_agcf .Llx +_agcf .Urx );_bbdbf ._bfeag =_agcf .Lly ;_bbdbf ._cbba =_agcf .Ury ;_dge ,_dgff :=_agcf .checkWidth (_agcf .Llx ,_agcf .Urx );if !_dgff {if _caea {_fb .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_agcf );
};return nil ,false ;};_bbdbf ._geba =_dge ;case _eccgd :_bbdbf ._edcba =0.5*(_agcf .Lly +_agcf .Ury );_bbdbf ._bfeag =_agcf .Llx ;_bbdbf ._cbba =_agcf .Urx ;_aadgc ,_abbfa :=_agcf .checkWidth (_agcf .Lly ,_agcf .Ury );if !_abbfa {if _caea {_fb .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_agcf );
};return nil ,false ;};_bbdbf ._geba =_aadgc ;default:_fb .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_agcf ._bbgc );return nil ,false ;};return &_bbdbf ,true ;};func (_edf *shapesState )drawRectangle (_ebb ,_cfee ,_caed ,_ffc float64 ){if _efca {_cebg :=_edf .devicePoint (_ebb ,_cfee );
_ebga :=_edf .devicePoint (_ebb +_caed ,_cfee +_ffc );_facg :=_ac .PdfRectangle {Llx :_cebg .X ,Lly :_cebg .Y ,Urx :_ebga .X ,Ury :_ebga .Y };_fb .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_facg );
};_edf .newSubPath ();_edf .moveTo (_ebb ,_cfee );_edf .lineTo (_ebb +_caed ,_cfee );_edf .lineTo (_ebb +_caed ,_cfee +_ffc );_edf .lineTo (_ebb ,_cfee +_ffc );_edf .closePath ();};func _dgg (_bfdc []*textMark ,_eagf _ac .PdfRectangle ,_dece rulingList ,_dafdc []gridTiling )paraList {_fb .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_bfdc ),_eagf );
if len (_bfdc )==0{return nil ;};_ddbf :=_bfeb (_bfdc ,_eagf );if len (_ddbf )==0{return nil ;};_dece .log ("\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065");_acec ,_cbbe :=_dece .vertsHorzs ();_cgbed :=_fddf (_ddbf ,_eagf .Ury ,_acec ,_cbbe );
_ecba :=_adbc (_cgbed ,_eagf .Ury ,_acec ,_cbbe );_ecba =_eadb (_ecba );_cdba :=make (paraList ,0,len (_ecba ));for _ ,_abcf :=range _ecba {_dbceb :=_abcf .arrangeText ();if _dbceb !=nil {_cdba =append (_cdba ,_dbceb );};};if len (_cdba )>=_eecc {_cdba =_cdba .extractTables (_dafdc );
};_cdba .sortReadingOrder ();_cdba .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _cdba ;};
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// String returns a description of `p`.
func (_efaee *textPara )String ()string {if _efaee ._affbf {return _cag .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_efaee .PdfRectangle );};_cace :="";if _efaee ._ccec !=nil {_cace =_cag .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_efaee ._ccec ._agac ,_efaee ._ccec ._dcbdf );
};return _cag .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_efaee .PdfRectangle ,_cace ,len (_efaee ._ddeb ),_bgdca (_efaee .text (),50));};func (_adef *textTable )compositeColCorridors ()map[int ][]float64 {_ccfeg :=make (map[int ][]float64 ,_adef ._agac );
if _agede {_fb .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_adef ._agac );};for _efcf :=0;_efcf < _adef ._agac ;_efcf ++{_ccfeg [_efcf ]=nil ;
};return _ccfeg ;};func (_bdg *shapesState )clearPath (){_bdg ._fegc =nil ;_bdg ._dcef =false ;if _efca {_fb .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_bdg );};};func (_effaf *textWord )appendMark (_fffa *textMark ,_cgcfa _ac .PdfRectangle ){_effaf ._ggabc =append (_effaf ._ggabc ,_fffa );
_effaf .PdfRectangle =_gcae (_effaf .PdfRectangle ,_fffa .PdfRectangle );if _fffa ._gba > _effaf ._abeg {_effaf ._abeg =_fffa ._gba ;};_effaf ._fgbda =_cgcfa .Ury -_effaf .PdfRectangle .Lly ;};func (_gfceg *subpath )close (){if !_cdbg (_gfceg ._egd [0],_gfceg .last ()){_gfceg .add (_gfceg ._egd [0]);
};_gfceg ._ega =true ;_gfceg .removeDuplicates ();};func _gcceb (_gccfa float64 )bool {return _gc .Abs (_gccfa )< _cbd };func _beee (_ffbg ,_bcbf int )int {if _ffbg > _bcbf {return _ffbg ;};return _bcbf ;};
2021-12-14 01:08:28 +00:00
2022-02-05 21:34:53 +00:00
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
2022-03-13 12:41:53 +00:00
type TextTable struct{W ,H int ;Cells [][]TableCell ;};type fontEntry struct{_fab *_ac .PdfFont ;_fabc int64 ;};func (_dacc *subpath )add (_ebae ..._afd .Point ){_dacc ._egd =append (_dacc ._egd ,_ebae ...)};func (_abgff rulingList )snapToGroups ()rulingList {_gbded ,_cfddb :=_abgff .vertsHorzs ();
if len (_gbded )> 0{_gbded =_gbded .snapToGroupsDirection ();};if len (_cfddb )> 0{_cfddb =_cfddb .snapToGroupsDirection ();};_ccdb :=append (_gbded ,_cfddb ...);_ccdb .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");return _ccdb ;
};
2021-12-14 01:08:28 +00:00
2022-03-13 12:41:53 +00:00
// String returns a description of `k`.
func (_eabcb markKind )String ()string {_bcbg ,_ebbf :=_fccc [_eabcb ];if !_ebbf {return _cag .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_eabcb );};return _bcbg ;};