mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-27 13:48:51 +08:00
788 lines
180 KiB
Go
788 lines
180 KiB
Go
//
|
||
// Copyright 2020 FoxyUtils ehf. All rights reserved.
|
||
//
|
||
// This is a commercial product and requires a license to operate.
|
||
// A trial license can be obtained at https://unidoc.io
|
||
//
|
||
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
|
||
//
|
||
// Use of this source code is governed by the UniDoc End User License Agreement
|
||
// terms that can be accessed at https://unidoc.io/eula/
|
||
|
||
//
|
||
// Package extractor is used for quickly extracting PDF content through a simple interface.
|
||
// Currently offers functionality for extracting textual content.
|
||
//
|
||
package extractor ;import (_ga "bytes";_f "errors";_bc "fmt";_bb "github.com/unidoc/unipdf/v3/common";_bg "github.com/unidoc/unipdf/v3/contentstream";_af "github.com/unidoc/unipdf/v3/core";_ebg "github.com/unidoc/unipdf/v3/internal/license";_gc "github.com/unidoc/unipdf/v3/internal/textencoding";
|
||
_gaa "github.com/unidoc/unipdf/v3/internal/transform";_eb "github.com/unidoc/unipdf/v3/model";_c "golang.org/x/text/unicode/norm";_fd "golang.org/x/xerrors";_eac "image/color";_ea "io";_be "math";_e "regexp";_b "sort";_a "strings";_fe "unicode";_gf "unicode/utf8";
|
||
);func (_dfb *textObject )setTextLeading (_caf float64 ){if _dfb ==nil {return ;};_dfb ._add ._cad =_caf ;};
|
||
|
||
// String returns a description of `k`.
|
||
func (_fgd markKind )String ()string {_ecaeg ,_faab :=_ecgc [_fgd ];if !_faab {return _bc .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_fgd );};return _ecaeg ;};func (_dcc *textObject )getStrokeColor ()_eac .Color {return _cbcfg (_dcc ._gfa .ColorspaceStroking ,_dcc ._gfa .ColorStroking );
|
||
};func _ecef (_gbbe ,_gbgg _gaa .Point ,_afbaf _eac .Color )(*ruling ,bool ){_cbea :=lineRuling {_cbdg :_gbbe ,_cbfb :_gbgg ,_cbgfa :_dcea (_gbbe ,_gbgg ),Color :_afbaf };if _cbea ._cbgfa ==_dcga {return nil ,false ;};return _cbea .asRuling ();};
|
||
|
||
// String returns a description of `t`.
|
||
func (_gcf *textTable )String ()string {return _bc .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_gcf ._gfgdf ,_gcf ._ggca ,_gcf ._ggac );};func (_adda *ruling )alignsPrimary (_bbgb *ruling )bool {return _adda ._bdfg ==_bbgb ._bdfg &&_be .Abs (_adda ._cda -_bbgb ._cda )< _cdceg *0.5;
|
||
};func (_bge *imageExtractContext )extractXObjectImage (_afb *_af .PdfObjectName ,_eag _bg .GraphicsState ,_cdb *_eb .PdfPageResources )error {_db ,_ :=_cdb .GetXObjectByName (*_afb );if _db ==nil {return nil ;};_cf ,_dae :=_bge ._gaf [_db ];if !_dae {_afd ,_de :=_cdb .GetXObjectImageByName (*_afb );
|
||
if _de !=nil {return _de ;};if _afd ==nil {return nil ;};_fbf ,_de :=_afd .ToImage ();if _de !=nil {return _de ;};_cf =&cachedImage {_eaa :_fbf ,_adf :_afd .ColorSpace };_bge ._gaf [_db ]=_cf ;};_baf :=_cf ._eaa ;_afc :=_cf ._adf ;_gce ,_agd :=_afc .ImageToRGB (*_baf );
|
||
if _agd !=nil {return _agd ;};_bb .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_eag .CTM .String ());_bad :=ImageMark {Image :&_gce ,Width :_eag .CTM .ScalingFactorX (),Height :_eag .CTM .ScalingFactorY (),Angle :_eag .CTM .Angle ()};
|
||
_bad .X ,_bad .Y =_eag .CTM .Translation ();_bge ._abc =append (_bge ._abc ,_bad );_bge ._gg ++;return nil ;};func (_dbgg *textTable )computeBbox ()_eb .PdfRectangle {var _ceaae _eb .PdfRectangle ;_ggeg :=false ;for _caaac :=0;_caaac < _dbgg ._ggca ;_caaac ++{for _gbgb :=0;
|
||
_gbgb < _dbgg ._gfgdf ;_gbgb ++{_beeca :=_dbgg .get (_gbgb ,_caaac );if _beeca ==nil {continue ;};if !_ggeg {_ceaae =_beeca .PdfRectangle ;_ggeg =true ;}else {_ceaae =_agege (_ceaae ,_beeca .PdfRectangle );};};};return _ceaae ;};
|
||
|
||
// String returns a description of `p`.
|
||
func (_cfee *textPara )String ()string {if _cfee ._dgcce {return _bc .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_cfee .PdfRectangle );};_bcfae :="";if _cfee ._dcfac !=nil {_bcfae =_bc .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_cfee ._dcfac ._gfgdf ,_cfee ._dcfac ._ggca );
|
||
};return _bc .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_cfee .PdfRectangle ,_bcfae ,len (_cfee ._ccaa ),_geff (_cfee .text (),50));};func (_aega rulingList )aligned ()bool {if len (_aega )< 2{return false ;
|
||
};_feed :=make (map[*ruling ]int );_feed [_aega [0]]=0;for _ ,_agfa :=range _aega [1:]{_gdcde :=false ;for _dccg :=range _feed {if _agfa .gridIntersecting (_dccg ){_feed [_dccg ]++;_gdcde =true ;break ;};};if !_gdcde {_feed [_agfa ]=0;};};_bgeea :=0;for _ ,_ffdab :=range _feed {if _ffdab ==0{_bgeea ++;
|
||
};};_cccef :=float64 (_bgeea )/float64 (len (_aega ));_agab :=_cccef <=1.0-_bedf ;if _gfba {_bb .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_agab ,_cccef ,_bgeea ,len (_aega ),_aega .String ());
|
||
};return _agab ;};
|
||
|
||
// String returns a description of `l`.
|
||
func (_ffcb *textLine )String ()string {return _bc .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_ffcb ._eggg ,_ffcb .PdfRectangle ,_ffcb ._dfefd ,_ffcb .text ());
|
||
};type cachedImage struct{_eaa *_eb .Image ;_adf _eb .PdfColorspace ;};type paraList []*textPara ;func _gcae (_cbfg ,_adef *textPara )bool {if _cbfg ._dgcce ||_adef ._dgcce {return true ;};return _dede (_cbfg .depth ()-_adef .depth ());};
|
||
|
||
// String returns a description of `state`.
|
||
func (_cdgf *textState )String ()string {_ecc :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _cdgf ._adgd !=nil {_ecc =_cdgf ._adgd .BaseFont ();};return _bc .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_cdgf ._cdbb ,_cdgf ._fege ,_cdgf ._ccef ,_ecc );
|
||
};func (_gcag paraList )lines ()[]*textLine {var _ceffg []*textLine ;for _ ,_gccb :=range _gcag {_ceffg =append (_ceffg ,_gccb ._ccaa ...);};return _ceffg ;};func (_cecb *textTable )growTable (){_cacee :=func (_bgeaf paraList ){_cecb ._ggca ++;for _ccfad :=0;
|
||
_ccfad < _cecb ._gfgdf ;_ccfad ++{_gcea :=_bgeaf [_ccfad ];_cecb .put (_ccfad ,_cecb ._ggca -1,_gcea );};};_dgegb :=func (_cdaa paraList ){_cecb ._gfgdf ++;for _dfag :=0;_dfag < _cecb ._ggca ;_dfag ++{_bcdf :=_cdaa [_dfag ];_cecb .put (_cecb ._gfgdf -1,_dfag ,_bcdf );
|
||
};};if _aacd {_cecb .log ("\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce");};for _acae :=0;;_acae ++{_efac :=false ;_afebb :=_cecb .getDown ();_geccb :=_cecb .getRight ();if _aacd {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_acae ,_cecb );
|
||
_bc .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a",_afebb );_bc .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a",_geccb );};if _afebb !=nil &&_geccb !=nil {_acdg :=_afebb [len (_afebb )-1];
|
||
if _acdg !=nil &&!_acdg ._dfbc &&_acdg ==_geccb [len (_geccb )-1]{_cacee (_afebb );if _geccb =_cecb .getRight ();_geccb !=nil {_dgegb (_geccb );_cecb .put (_cecb ._gfgdf -1,_cecb ._ggca -1,_acdg );};_efac =true ;};};if !_efac &&_afebb !=nil {_cacee (_afebb );
|
||
_efac =true ;};if !_efac &&_geccb !=nil {_dgegb (_geccb );_efac =true ;};if !_efac {break ;};};};func (_bfce *textObject )reset (){_bfce ._bbcf =_gaa .IdentityMatrix ();_bfce ._fbe =_gaa .IdentityMatrix ();_bfce ._fgc =nil ;};func (_bba *shapesState )stroke (_ggbc *[]pathSection ){_cegab :=pathSection {_gaac :_bba ._ege ,Color :_bba ._babd .getStrokeColor ()};
|
||
*_ggbc =append (*_ggbc ,_cegab );if _gfba {_bc .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_ggbc ),_bba ,_bba ._babd .getStrokeColor (),_cegab .bbox ());
|
||
if _bgf {for _dcfb ,_abbf :=range _bba ._ege {_bc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_dcfb ,_abbf );if _dcfb ==10{break ;};};};};};func (_dgfb *textObject )setTextRenderMode (_bffb int ){if _dgfb ==nil {return ;};_dgfb ._add ._dab =RenderMode (_bffb );
|
||
};func (_eebb paraList )findTables (_eccfb []gridTiling )[]*textTable {_eebb .addNeighbours ();_b .Slice (_eebb ,func (_bcfec ,_deebg int )bool {return _aaaa (_eebb [_bcfec ],_eebb [_deebg ])< 0});var _cgfg []*textTable ;if _bace {_gcdg :=_eebb .findGridTables (_eccfb );
|
||
_cgfg =append (_cgfg ,_gcdg ...);};if _fdfa {_abab :=_eebb .findTextTables ();_cgfg =append (_cgfg ,_abab ...);};return _cgfg ;};type textLine struct{_eb .PdfRectangle ;_eggg float64 ;_ffd []*textWord ;_dfefd float64 ;};type fontEntry struct{_dffg *_eb .PdfFont ;
|
||
_cgg int64 ;};func _fefba (_gdcdg string )bool {for _ ,_gacbe :=range _gdcdg {if !_fe .IsSpace (_gacbe ){return false ;};};return true ;};func _bfb (_ecd *Extractor ,_ffg *_eb .PdfPageResources ,_gggg _bg .GraphicsState ,_dfgg *textState ,_deeg *stateStack )*textObject {return &textObject {_gcb :_ecd ,_cdff :_ffg ,_gfa :_gggg ,_gbce :_deeg ,_add :_dfgg ,_bbcf :_gaa .IdentityMatrix (),_fbe :_gaa .IdentityMatrix ()};
|
||
};func (_fbcce *textObject )newTextMark (_gbag string ,_cdebe _gaa .Matrix ,_bcff _gaa .Point ,_bbca float64 ,_fbab *_eb .PdfFont ,_bgeg float64 ,_ecaa ,_deac _eac .Color )(textMark ,bool ){_geec :=_cdebe .Angle ();_gagfa :=_efff (_geec ,_deegc );var _gdba float64 ;
|
||
if _gagfa %180!=90{_gdba =_cdebe .ScalingFactorY ();}else {_gdba =_cdebe .ScalingFactorX ();};_cfbad :=_gdg (_cdebe );_dffb :=_eb .PdfRectangle {Llx :_cfbad .X ,Lly :_cfbad .Y ,Urx :_bcff .X ,Ury :_bcff .Y };switch _gagfa %360{case 90:_dffb .Urx -=_gdba ;
|
||
case 180:_dffb .Ury -=_gdba ;case 270:_dffb .Urx +=_gdba ;case 0:_dffb .Ury +=_gdba ;default:_gagfa =0;_dffb .Ury +=_gdba ;};if _dffb .Llx > _dffb .Urx {_dffb .Llx ,_dffb .Urx =_dffb .Urx ,_dffb .Llx ;};if _dffb .Lly > _dffb .Ury {_dffb .Lly ,_dffb .Ury =_dffb .Ury ,_dffb .Lly ;
|
||
};_agg ,_cbed :=_aeaf (_dffb ,_fbcce ._gcb ._fc );if !_cbed {_bb .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_dffb ,_fbcce ._gcb ._fc ,_gbag );
|
||
};_dffb =_agg ;_acda :=_dffb ;_agbfb :=_fbcce ._gcb ._fc ;switch _gagfa %360{case 90:_agbfb .Urx ,_agbfb .Ury =_agbfb .Ury ,_agbfb .Urx ;_acda =_eb .PdfRectangle {Llx :_agbfb .Urx -_dffb .Ury ,Urx :_agbfb .Urx -_dffb .Lly ,Lly :_dffb .Llx ,Ury :_dffb .Urx };
|
||
case 180:_acda =_eb .PdfRectangle {Llx :_agbfb .Urx -_dffb .Llx ,Urx :_agbfb .Urx -_dffb .Urx ,Lly :_agbfb .Ury -_dffb .Lly ,Ury :_agbfb .Ury -_dffb .Ury };case 270:_agbfb .Urx ,_agbfb .Ury =_agbfb .Ury ,_agbfb .Urx ;_acda =_eb .PdfRectangle {Llx :_dffb .Ury ,Urx :_dffb .Lly ,Lly :_agbfb .Ury -_dffb .Llx ,Ury :_agbfb .Ury -_dffb .Urx };
|
||
};if _acda .Llx > _acda .Urx {_acda .Llx ,_acda .Urx =_acda .Urx ,_acda .Llx ;};if _acda .Lly > _acda .Ury {_acda .Lly ,_acda .Ury =_acda .Ury ,_acda .Lly ;};_ecfg :=textMark {_gbgc :_gbag ,PdfRectangle :_acda ,_addfe :_dffb ,_gdcg :_fbab ,_deedb :_gdba ,_gdade :_bgeg ,_bef :_cdebe ,_ebcc :_bcff ,_fdag :_gagfa ,_febb :_ecaa ,_dgd :_deac };
|
||
if _cbab {_bb .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_cfbad ,_bcff ,_ecfg .String ());};return _ecfg ,_cbed ;
|
||
};func _fbdc (_faad []*wordBag )[]*wordBag {if len (_faad )<=1{return _faad ;};if _caafc {_bb .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");};_b .Slice (_faad ,func (_efc ,_fbcc int )bool {_bae ,_eccg :=_faad [_efc ],_faad [_fbcc ];
|
||
_dfa :=_bae .Width ()*_bae .Height ();_cgbd :=_eccg .Width ()*_eccg .Height ();if _dfa !=_cgbd {return _dfa > _cgbd ;};if _bae .Height ()!=_eccg .Height (){return _bae .Height ()> _eccg .Height ();};return _efc < _fbcc ;});var _gdc []*wordBag ;_egag :=make (intSet );
|
||
for _gefbd :=0;_gefbd < len (_faad );_gefbd ++{if _egag .has (_gefbd ){continue ;};_bfced :=_faad [_gefbd ];for _abcc :=_gefbd +1;_abcc < len (_faad );_abcc ++{if _egag .has (_gefbd ){continue ;};_bda :=_faad [_abcc ];_bcc :=_bfced .PdfRectangle ;_bcc .Llx -=_bfced ._cgad ;
|
||
if _feb (_bcc ,_bda .PdfRectangle ){_bfced .absorb (_bda );_egag .add (_abcc );};};_gdc =append (_gdc ,_bfced );};if len (_faad )!=len (_gdc )+len (_egag ){_bb .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_faad ),len (_gdc ),len (_egag ));
|
||
};return _gdc ;};func (_dabe paraList )tables ()[]TextTable {var _cegc []TextTable ;if _cece {_bb .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a");};for _ ,_badf :=range _dabe {_abg :=_badf ._dcfac ;if _abg !=nil &&_abg .isExportable (){_cegc =append (_cegc ,_abg .toTextTable ());
|
||
};};return _cegc ;};
|
||
|
||
// Elements returns the TextMarks in `ma`.
|
||
func (_fdbb *TextMarkArray )Elements ()[]TextMark {return _fdbb ._gddg };func _ffcba (_egcfd []*textMark ,_efed _eb .PdfRectangle )[]*textWord {var _adbg []*textWord ;var _ccfag *textWord ;if _cbab {_bb .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_egcfd ));
|
||
};_gfebg :=func (){if _ccfag !=nil {_gaebc :=_ccfag .computeText ();if !_fefba (_gaebc ){_ccfag ._caee =_gaebc ;_adbg =append (_adbg ,_ccfag );if _cbab {_bb .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_adbg )-1,_ccfag .String ());
|
||
for _dgdea ,_abge :=range _ccfag ._badb {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dgdea ,_abge .String ());};};};_ccfag =nil ;};};for _ ,_fbca :=range _egcfd {if _ffba &&_ccfag !=nil &&len (_ccfag ._badb )> 0{_ggge :=_ccfag ._badb [len (_ccfag ._badb )-1];
|
||
_cabg ,_cfgf :=_aeba (_fbca ._gbgc );_ggcb ,_ebfae :=_aeba (_ggge ._gbgc );if _cfgf &&!_ebfae &&_ggge .inDiacriticArea (_fbca ){_ccfag .addDiacritic (_cabg );continue ;};if _ebfae &&!_cfgf &&_fbca .inDiacriticArea (_ggge ){_ccfag ._badb =_ccfag ._badb [:len (_ccfag ._badb )-1];
|
||
_ccfag .appendMark (_fbca ,_efed );_ccfag .addDiacritic (_ggcb );continue ;};};_gfbce :=_fefba (_fbca ._gbgc );if _gfbce {_gfebg ();continue ;};if _ccfag ==nil &&!_gfbce {_ccfag =_geca ([]*textMark {_fbca },_efed );continue ;};_cbgdc :=_ccfag ._aggf ;_efffb :=_be .Abs (_bafc (_efed ,_fbca )-_ccfag ._efaba )/_cbgdc ;
|
||
_gcebb :=_dbde (_fbca ,_ccfag )/_cbgdc ;if _gcebb >=_fag ||!(-_gbec <=_gcebb &&_efffb <=_bbfa ){_gfebg ();_ccfag =_geca ([]*textMark {_fbca },_efed );continue ;};_ccfag .appendMark (_fbca ,_efed );};_gfebg ();return _adbg ;};func _ffgg (_aaad string )bool {if _gf .RuneCountInString (_aaad )< _gbd {return false ;
|
||
};_bfefg ,_ead :=_gf .DecodeLastRuneInString (_aaad );if _ead <=0||!_fe .Is (_fe .Hyphen ,_bfefg ){return false ;};_bfefg ,_ead =_gf .DecodeLastRuneInString (_aaad [:len (_aaad )-_ead ]);return _ead > 0&&!_fe .IsSpace (_bfefg );};func (_gacf *textPara )fontsize ()float64 {return _gacf ._ccaa [0]._dfefd };
|
||
func _gfbc (_ccbd int ,_gddgag map[int ][]float64 )([]int ,int ){_gdagd :=make ([]int ,_ccbd );_fbgd :=0;for _fdad :=0;_fdad < _ccbd ;_fdad ++{_gdagd [_fdad ]=_fbgd ;_fbgd +=len (_gddgag [_fdad ])+1;};return _gdagd ,_fbgd ;};func (_effdb *textTable )reduce ()*textTable {_ffggb :=make ([]int ,0,_effdb ._ggca );
|
||
_feba :=make ([]int ,0,_effdb ._gfgdf );for _bcfc :=0;_bcfc < _effdb ._ggca ;_bcfc ++{if !_effdb .emptyRow (_bcfc ){_ffggb =append (_ffggb ,_bcfc );};};for _dffe :=0;_dffe < _effdb ._gfgdf ;_dffe ++{if !_effdb .emptyColumn (_dffe ){_feba =append (_feba ,_dffe );
|
||
};};if len (_ffggb )==_effdb ._ggca &&len (_feba )==_effdb ._gfgdf {return _effdb ;};_fegc :=textTable {_ggac :_effdb ._ggac ,_gfgdf :len (_feba ),_ggca :len (_ffggb ),_ecac :make (map[uint64 ]*textPara ,len (_feba )*len (_ffggb ))};if _cece {_bb .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_effdb ._gfgdf ,_effdb ._ggca ,len (_feba ),len (_ffggb ));
|
||
_bb .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_feba );_bb .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_ffggb );};for _ccdga ,_ebgb :=range _ffggb {for _baggb ,_ggbd :=range _feba {_bbfb :=_effdb .get (_ggbd ,_ebgb );
|
||
if _bbfb ==nil {continue ;};if _cece {_bc .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_baggb ,_ccdga ,_ggbd ,_ebgb ,_geff (_bbfb .text (),50));};_fegc .put (_baggb ,_ccdga ,_bbfb );
|
||
};};return &_fegc ;};func (_aeda rulingList )comp (_adfeb ,_ecfe int )bool {_gbff ,_efgd :=_aeda [_adfeb ],_aeda [_ecfe ];_bfedf ,_eecb :=_gbff ._bdfg ,_efgd ._bdfg ;if _bfedf !=_eecb {return _bfedf > _eecb ;};if _bfedf ==_dcga {return false ;};_begg :=func (_fbdge bool )bool {if _bfedf ==_afdgd {return _fbdge ;
|
||
};return !_fbdge ;};_dcgb ,_fcgde :=_gbff ._cda ,_efgd ._cda ;if _dcgb !=_fcgde {return _begg (_dcgb > _fcgde );};_dcgb ,_fcgde =_gbff ._dbce ,_efgd ._dbce ;if _dcgb !=_fcgde {return _begg (_dcgb < _fcgde );};return _begg (_gbff ._daafc < _efgd ._daafc );
|
||
};func _gabc (_fde []*textMark ,_eedb _eb .PdfRectangle ,_cffc rulingList ,_dfcd []gridTiling )paraList {_bb .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_fde ),_eedb );
|
||
if len (_fde )==0{return nil ;};_eede :=_ffcba (_fde ,_eedb );if len (_eede )==0{return nil ;};_cffc .log ("\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065");_cgae ,_gbac :=_cffc .vertsHorzs ();_cbgea :=_gef (_eede ,_eedb .Ury ,_cgae ,_gbac );
|
||
_cdea :=_babf (_cbgea ,_eedb .Ury ,_cgae ,_gbac );_cdea =_fbdc (_cdea );_aadf :=make (paraList ,0,len (_cdea ));for _ ,_def :=range _cdea {_dbef :=_def .arrangeText ();if _dbef !=nil {_aadf =append (_aadf ,_dbef );};};if len (_aadf )>=_edee {_aadf =_aadf .extractTables (_dfcd );
|
||
};_aadf .sortReadingOrder ();_aadf .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _aadf ;};func _geca (_gdcfc []*textMark ,_eadg _eb .PdfRectangle )*textWord {_befgb :=_gdcfc [0].PdfRectangle ;
|
||
_cdffb :=_gdcfc [0]._deedb ;for _ ,_egafe :=range _gdcfc [1:]{_befgb =_agege (_befgb ,_egafe .PdfRectangle );if _egafe ._deedb > _cdffb {_cdffb =_egafe ._deedb ;};};return &textWord {PdfRectangle :_befgb ,_badb :_gdcfc ,_efaba :_eadg .Ury -_befgb .Lly ,_aggf :_cdffb };
|
||
};func (_daea paraList )writeText (_ccdf _ea .Writer ){for _egbe ,_ffgee :=range _daea {if _ffgee ._dgcce {continue ;};_ffgee .writeText (_ccdf );if _egbe !=len (_daea )-1{if _gcae (_ffgee ,_daea [_egbe +1]){_ccdf .Write ([]byte ("\u0020"));}else {_ccdf .Write ([]byte ("\u000a"));
|
||
_ccdf .Write ([]byte ("\u000a"));};};};_ccdf .Write ([]byte ("\u000a"));_ccdf .Write ([]byte ("\u000a"));};func (_ebee *stateStack )empty ()bool {return len (*_ebee )==0};func (_bgab rulingList )primMinMax ()(float64 ,float64 ){_cgcc ,_bdfd :=_bgab [0]._cda ,_bgab [0]._cda ;
|
||
for _ ,_begda :=range _bgab [1:]{if _begda ._cda < _cgcc {_cgcc =_begda ._cda ;}else if _begda ._cda > _bdfd {_bdfd =_begda ._cda ;};};return _cgcc ,_bdfd ;};func _ddd (_cefg ,_gaea bounded )float64 {return _cefg .bbox ().Llx -_gaea .bbox ().Llx };func _caafb (_gcbg ,_fdbde float64 )bool {return _be .Abs (_gcbg -_fdbde )<=_gffe };
|
||
const _eaf =20;func _bdgde (_eecbb []float64 ,_aaag ,_ecabfb float64 )[]float64 {_fedb ,_baga :=_aaag ,_ecabfb ;if _baga < _fedb {_fedb ,_baga =_baga ,_fedb ;};_daac :=make ([]float64 ,0,len (_eecbb )+2);_daac =append (_daac ,_aaag );for _ ,_adcbf :=range _eecbb {if _adcbf <=_fedb {continue ;
|
||
}else if _adcbf >=_baga {break ;};_daac =append (_daac ,_adcbf );};_daac =append (_daac ,_ecabfb );return _daac ;};func (_cga *textObject )getFontDirect (_ccffba string )(*_eb .PdfFont ,error ){_eec ,_ebgc :=_cga .getFontDict (_ccffba );if _ebgc !=nil {return nil ,_ebgc ;
|
||
};_edfg ,_ebgc :=_eb .NewPdfFontFromPdfObject (_eec );if _ebgc !=nil {_bb .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ccffba ,_ebgc );
|
||
};return _edfg ,_ebgc ;};
|
||
|
||
// String returns a string descibing `i`.
|
||
func (_gddcb gridTile )String ()string {_defbd :=func (_eecg bool ,_gcaba string )string {if _eecg {return _gcaba ;};return "\u005f";};return _bc .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_gddcb .PdfRectangle ,_defbd (_gddcb ._gbde ,"\u004c"),_defbd (_gddcb ._cacc ,"\u0052"),_defbd (_gddcb ._aeafd ,"\u0042"),_defbd (_gddcb ._eegfc ,"\u0054"));
|
||
};func _ecce (_eabf ,_bgc _gaa .Point )rulingKind {_agda :=_be .Abs (_eabf .X -_bgc .X );_dfd :=_be .Abs (_eabf .Y -_bgc .Y );return _abbd (_agda ,_dfd ,_bafg );};func _ecbb (_ffdg []pathSection )rulingList {_cffd (_ffdg );if _gfba {_bb .Log .Info ("\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs",len (_ffdg ));
|
||
};var _eabaf rulingList ;for _ ,_efdfd :=range _ffdg {for _ ,_bccf :=range _efdfd ._gaac {if !_bccf .isQuadrilateral (){if _gfba {_bb .Log .Error ("!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073",_bccf );
|
||
};continue ;};if _gegc ,_bfdb :=_bccf .makeRectRuling (_efdfd .Color );_bfdb {_eabaf =append (_eabaf ,_gegc );}else {if _fdbg {_bb .Log .Error ("\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073",_bccf );
|
||
};};};};if _gfba {_bb .Log .Info ("\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073",_eabaf .String ());};return _eabaf ;};func (_aacb *wordBag )removeWord (_aafb *textWord ,_bbe int ){_cbdc :=_aacb ._eaba [_bbe ];
|
||
_cbdc =_bdee (_cbdc ,_aafb );if len (_cbdc )==0{delete (_aacb ._eaba ,_bbe );}else {_aacb ._eaba [_bbe ]=_cbdc ;};};func (_eab *imageExtractContext )extractInlineImage (_ca *_bg .ContentStreamInlineImage ,_abd _bg .GraphicsState ,_cd *_eb .PdfPageResources )error {_ba ,_cba :=_ca .ToImage (_cd );
|
||
if _cba !=nil {return _cba ;};_aeg ,_cba :=_ca .GetColorSpace (_cd );if _cba !=nil {return _cba ;};if _aeg ==nil {_aeg =_eb .NewPdfColorspaceDeviceGray ();};_cgb ,_cba :=_aeg .ImageToRGB (*_ba );if _cba !=nil {return _cba ;};_dg :=ImageMark {Image :&_cgb ,Width :_abd .CTM .ScalingFactorX (),Height :_abd .CTM .ScalingFactorY (),Angle :_abd .CTM .Angle ()};
|
||
_dg .X ,_dg .Y =_abd .CTM .Translation ();_eab ._abc =append (_eab ._abc ,_dg );_eab ._df ++;return nil ;};func (_ccage paraList )llyRange (_gbef []int ,_dgeg ,_gaaf float64 )[]int {_cdgd :=len (_ccage );if _gaaf < _ccage [_gbef [0]].Lly ||_dgeg > _ccage [_gbef [_cdgd -1]].Lly {return nil ;
|
||
};_fdae :=_b .Search (_cdgd ,func (_dcab int )bool {return _ccage [_gbef [_dcab ]].Lly >=_dgeg });_acff :=_b .Search (_cdgd ,func (_edda int )bool {return _ccage [_gbef [_edda ]].Lly > _gaaf });return _gbef [_fdae :_acff ];};const _bee =1.0/1000.0;
|
||
|
||
// ImageExtractOptions contains options for controlling image extraction from
|
||
// PDF pages.
|
||
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};func (_eeg *subpath )last ()_gaa .Point {return _eeg ._edg [len (_eeg ._edg )-1]};func _fcbc (_cebfa ,_bdcd int )uint64 {return uint64 (_cebfa )*0x1000000+uint64 (_bdcd )};func _gef (_egac []*textWord ,_dfef float64 ,_efbd ,_edc rulingList )*wordBag {_bbcb :=_aecg (_egac [0],_dfef ,_efbd ,_edc );
|
||
for _ ,_fgbb :=range _egac [1:]{_bdbea :=_aag (_fgbb ._efaba );_bbcb ._eaba [_bdbea ]=append (_bbcb ._eaba [_bdbea ],_fgbb );_bbcb .PdfRectangle =_agege (_bbcb .PdfRectangle ,_fgbb .PdfRectangle );};_bbcb .sort ();return _bbcb ;};var (_dddgb =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};
|
||
);func (_fded *textTable )emptyColumn (_fgad int )bool {for _bgad :=0;_bgad < _fded ._ggca ;_bgad ++{_cfgge :=_fded .get (_fgad ,_bgad );if _cfgge !=nil &&_cfgge .text ()!=""{return false ;};};return true ;};func (_dagg *wordBag )maxDepth ()float64 {return _dagg ._decb -_dagg .Lly };
|
||
func _fecc (_edad []TextMark ,_cgaa *int )[]TextMark {_ccdg :=_edad [len (_edad )-1];_dffbe :=[]rune (_ccdg .Text );if len (_dffbe )==1{_edad =_edad [:len (_edad )-1];_gggfc :=_edad [len (_edad )-1];*_cgaa =_gggfc .Offset +len (_gggfc .Text );}else {_bgfb :=_eacb (_ccdg .Text );
|
||
*_cgaa +=len (_bgfb )-len (_ccdg .Text );_ccdg .Text =_bgfb ;};return _edad ;};type textWord struct{_eb .PdfRectangle ;_efaba float64 ;_caee string ;_badb []*textMark ;_aggf float64 ;_dffcf bool ;};func (_adba *textLine )toTextMarks (_fbgcc *int )[]TextMark {var _gdae []TextMark ;
|
||
for _ ,_cfgc :=range _adba ._ffd {if _cfgc ._dffcf {_gdae =_deef (_gdae ,_fbgcc ,"\u0020");};_gaace :=_cfgc .toTextMarks (_fbgcc );_gdae =append (_gdae ,_gaace ...);};return _gdae ;};func _agee (_ceadgd ,_geafg int )int {if _ceadgd < _geafg {return _ceadgd ;
|
||
};return _geafg ;};func (_cbgd paraList )eventNeighbours (_ceeb []event )map[*textPara ][]int {_b .Slice (_ceeb ,func (_fbbab ,_fcbcf int )bool {_debe ,_eacf :=_ceeb [_fbbab ],_ceeb [_fcbcf ];_ffgb ,_gdde :=_debe ._dfab ,_eacf ._dfab ;if _ffgb !=_gdde {return _ffgb < _gdde ;
|
||
};if _debe ._aefd !=_eacf ._aefd {return _debe ._aefd ;};return _fbbab < _fcbcf ;});_fcfaf :=make (map[int ]intSet );_bgbdee :=make (intSet );for _ ,_fdbe :=range _ceeb {if _fdbe ._aefd {_fcfaf [_fdbe ._dabd ]=make (intSet );for _gdgg :=range _bgbdee {if _gdgg !=_fdbe ._dabd {_fcfaf [_fdbe ._dabd ].add (_gdgg );
|
||
_fcfaf [_gdgg ].add (_fdbe ._dabd );};};_bgbdee .add (_fdbe ._dabd );}else {_bgbdee .del (_fdbe ._dabd );};};_gceec :=map[*textPara ][]int {};for _gfed ,_fbbad :=range _fcfaf {_ddabe :=_cbgd [_gfed ];if len (_fbbad )==0{_gceec [_ddabe ]=nil ;continue ;
|
||
};_dcae :=make ([]int ,len (_fbbad ));_agegc :=0;for _gfbg :=range _fbbad {_dcae [_agegc ]=_gfbg ;_agegc ++;};_gceec [_ddabe ]=_dcae ;};return _gceec ;};func (_daff *stateStack )size ()int {return len (*_daff )};func (_agagd paraList )xNeighbours (_cfdbe float64 )map[*textPara ][]int {_bafb :=make ([]event ,2*len (_agagd ));
|
||
if _cfdbe ==0{for _ffaf ,_afcbb :=range _agagd {_bafb [2*_ffaf ]=event {_afcbb .Llx ,true ,_ffaf };_bafb [2*_ffaf +1]=event {_afcbb .Urx ,false ,_ffaf };};}else {for _bdeb ,_adea :=range _agagd {_bafb [2*_bdeb ]=event {_adea .Llx -_cfdbe *_adea .fontsize (),true ,_bdeb };
|
||
_bafb [2*_bdeb +1]=event {_adea .Urx +_cfdbe *_adea .fontsize (),false ,_bdeb };};};return _agagd .eventNeighbours (_bafb );};type textMark struct{_eb .PdfRectangle ;_fdag int ;_gbgc string ;_gdda string ;_gdcg *_eb .PdfFont ;_deedb float64 ;_gdade float64 ;
|
||
_bef _gaa .Matrix ;_ebcc _gaa .Point ;_addfe _eb .PdfRectangle ;_febb _eac .Color ;_dgd _eac .Color ;};func (_aaebc *wordBag )depthBand (_ddf ,_eafc float64 )[]int {if len (_aaebc ._eaba )==0{return nil ;};return _aaebc .depthRange (_aaebc .getDepthIdx (_ddf ),_aaebc .getDepthIdx (_eafc ));
|
||
};func (_bacd *textTable )markCells (){for _aacdg :=0;_aacdg < _bacd ._ggca ;_aacdg ++{for _cgca :=0;_cgca < _bacd ._gfgdf ;_cgca ++{_fefg :=_bacd .get (_cgca ,_aacdg );if _fefg !=nil {_fefg ._dfbc =true ;};};};};func (_afbg *PageText )computeViews (){var _bcdg rulingList ;
|
||
if _dgbf {_ffad :=_bgag (_afbg ._cfec );_bcdg =append (_bcdg ,_ffad ...);};if _cdfb {_dage :=_ecbb (_afbg ._gde );_bcdg =append (_bcdg ,_dage ...);};_bcdg ,_addf :=_bcdg .toTilings ();var _egb paraList ;_fcb :=len (_afbg ._ecca );for _gbf :=0;_gbf < 360&&_fcb > 0;
|
||
_gbf +=90{_ecee :=make ([]*textMark ,0,len (_afbg ._ecca )-_fcb );for _ ,_edb :=range _afbg ._ecca {if _edb ._fdag ==_gbf {_ecee =append (_ecee ,_edb );};};if len (_ecee )> 0{_dcg :=_gabc (_ecee ,_afbg ._aefg ,_bcdg ,_addf );_egb =append (_egb ,_dcg ...);
|
||
_fcb -=len (_ecee );};};_bafa :=new (_ga .Buffer );_egb .writeText (_bafa );_afbg ._caa =_bafa .String ();_afbg ._ceb =_egb .toTextMarks ();_afbg ._cfbe =_egb .tables ();if _cece {_bb .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_afbg ._cfbe ));
|
||
};};func (_bd *textObject )showTextAdjusted (_dbeg *_af .PdfObjectArray )error {_fcc :=false ;for _ ,_cec :=range _dbeg .Elements (){switch _cec .(type ){case *_af .PdfObjectFloat ,*_af .PdfObjectInteger :_cffef ,_fdb :=_af .GetNumberAsFloat (_cec );if _fdb !=nil {_bb .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_cec ,_dbeg );
|
||
return _fdb ;};_dedc ,_dee :=-_cffef *0.001*_bd ._add ._ccef ,0.0;if _fcc {_dee ,_dedc =_dedc ,_dee ;};_ggdd :=_gggab (_gaa .Point {X :_dedc ,Y :_dee });_bd ._bbcf .Concat (_ggdd );case *_af .PdfObjectString :_age ,_gcee :=_af .GetStringBytes (_cec );if !_gcee {_bb .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_cec ,_dbeg );
|
||
return _af .ErrTypeError ;};_bd .renderText (_age );default:_bb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_cec ,_dbeg );
|
||
return _af .ErrTypeError ;};};return nil ;};type markKind int ;func (_dfca *textLine )appendWord (_eed *textWord ){_dfca ._ffd =append (_dfca ._ffd ,_eed );_dfca .PdfRectangle =_agege (_dfca .PdfRectangle ,_eed .PdfRectangle );if _eed ._aggf > _dfca ._dfefd {_dfca ._dfefd =_eed ._aggf ;
|
||
};if _eed ._efaba > _dfca ._eggg {_dfca ._eggg =_eed ._efaba ;};};const (_fcca =true ;_agec =true ;_ffba =true ;_ddagf =false ;_gagf =false ;_fffe =6;_eece =3.0;_ceadd =200;_bace =true ;_fdfa =true ;_dgbf =true ;_cdfb =true ;_fgf =false ;);func _gadg (_ccae _gaa .Point )*subpath {return &subpath {_edg :[]_gaa .Point {_ccae }}};
|
||
func (_acfc paraList )merge ()*textPara {_bb .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_acfc ));
|
||
if len (_acfc )==0{return nil ;};_acfc .sortReadingOrder ();_dagd :=_acfc [0].PdfRectangle ;_bcabb :=_acfc [0]._ccaa ;for _ ,_fcggg :=range _acfc [1:]{_dagd =_agege (_dagd ,_fcggg .PdfRectangle );_bcabb =append (_bcabb ,_fcggg ._ccaa ...);};return _fefc (_dagd ,_bcabb );
|
||
};func _dcea (_cbaa ,_fdc _gaa .Point )rulingKind {_ggce :=_be .Abs (_cbaa .X -_fdc .X );_bbbf :=_be .Abs (_cbaa .Y -_fdc .Y );return _abbd (_ggce ,_bbbf ,_eefb );};func _deef (_agdf []TextMark ,_dcba *int ,_ebdb string )[]TextMark {_aegbd :=_gca ;_aegbd .Text =_ebdb ;
|
||
return _cbba (_agdf ,_dcba ,_aegbd );};func _fgccb (_dccgc float64 )float64 {return _cgf *_be .Round (_dccgc /_cgf )};func (_gadd *textObject )showText (_dff []byte )error {return _gadd .renderText (_dff )};type bounded interface{bbox ()_eb .PdfRectangle };
|
||
func _geff (_agcgg string ,_aaec int )string {if len (_agcgg )< _aaec {return _agcgg ;};return _agcgg [:_aaec ];};type gridTile struct{_eb .PdfRectangle ;_eegfc ,_gbde ,_aeafd ,_cacc bool ;};func (_ebf *imageExtractContext )processOperand (_ae *_bg .ContentStreamOperation ,_abf _bg .GraphicsState ,_fafd *_eb .PdfPageResources )error {if _ae .Operand =="\u0042\u0049"&&len (_ae .Params )==1{_aed ,_edd :=_ae .Params [0].(*_bg .ContentStreamInlineImage );
|
||
if !_edd {return nil ;};if _fef ,_ecb :=_af .GetBoolVal (_aed .ImageMask );_ecb {if _fef &&!_ebf ._bgb .IncludeInlineStencilMasks {return nil ;};};return _ebf .extractInlineImage (_aed ,_abf ,_fafd );}else if _ae .Operand =="\u0044\u006f"&&len (_ae .Params )==1{_ccc ,_ccf :=_af .GetName (_ae .Params [0]);
|
||
if !_ccf {_bb .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _cc ;};_ ,_ce :=_fafd .GetXObjectByName (*_ccc );switch _ce {case _eb .XObjectTypeImage :return _ebf .extractXObjectImage (_ccc ,_abf ,_fafd );case _eb .XObjectTypeForm :return _ebf .extractFormImages (_ccc ,_abf ,_fafd );
|
||
};};return nil ;};func _fefc (_badd _eb .PdfRectangle ,_cee []*textLine )*textPara {return &textPara {PdfRectangle :_badd ,_ccaa :_cee };};func (_bgea compositeCell )parasBBox ()(paraList ,_eb .PdfRectangle ){return _bgea .paraList ,_bgea .PdfRectangle ;
|
||
};type rulingList []*ruling ;func _cfccf (_ceed _eb .PdfRectangle )*ruling {return &ruling {_bdfg :_dbbb ,_cda :_ceed .Urx ,_dbce :_ceed .Lly ,_daafc :_ceed .Ury };};type textPara struct{_eb .PdfRectangle ;_ccac _eb .PdfRectangle ;_ccaa []*textLine ;_dcfac *textTable ;
|
||
_dfbc bool ;_dgcce bool ;_dcef *textPara ;_cggf *textPara ;_aggd *textPara ;_cebed *textPara ;};func (_cbedc *textPara )taken ()bool {return _cbedc ==nil ||_cbedc ._dfbc };func (_dcdf paraList )applyTables (_cgaec []*textTable )paraList {var _badcd paraList ;
|
||
for _ ,_daeg :=range _cgaec {_badcd =append (_badcd ,_daeg .newTablePara ());};for _ ,_fddf :=range _dcdf {if _fddf ._dfbc {continue ;};_badcd =append (_badcd ,_fddf );};return _badcd ;};func (_egcdc compositeCell )String ()string {_bagg :="";if len (_egcdc .paraList )> 0{_bagg =_geff (_egcdc .paraList .merge ().text (),50);
|
||
};return _bc .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_egcdc .PdfRectangle ,len (_egcdc .paraList ),_bagg );};func (_dcag intSet )has (_aead int )bool {_ ,_bgafg :=_dcag [_aead ];return _bgafg };
|
||
func (_fgea *textMark )bbox ()_eb .PdfRectangle {return _fgea .PdfRectangle };func (_bgg *textObject )getFillColor ()_eac .Color {return _cbcfg (_bgg ._gfa .ColorspaceNonStroking ,_bgg ._gfa .ColorNonStroking );};func (_dbcg rulingList )isActualGrid ()(rulingList ,bool ){_effb ,_dbdb :=_dbcg .augmentGrid ();
|
||
if !(len (_effb )>=_ddga +1&&len (_dbdb )>=_fcgdc +1){if _gfba {_bb .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_effb ),len (_dbdb ),_ddga +1,_fcgdc +1);
|
||
};return nil ,false ;};if _gfba {_bb .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_dbcg ,len (_effb )>=2,len (_dbdb )>=2,len (_effb )>=2&&len (_dbdb )>=2);
|
||
for _adac ,_eaaab :=range _dbcg {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_adac ,_eaaab );};};if _fgf {_baffc ,_bgfa :=_effb [0],_effb [len (_effb )-1];_becb ,_ceadg :=_dbdb [0],_dbdb [len (_dbdb )-1];if !(_gcbaf (_baffc ._cda -_becb ._dbce )&&_gcbaf (_bgfa ._cda -_becb ._daafc )&&_gcbaf (_becb ._cda -_baffc ._daafc )&&_gcbaf (_ceadg ._cda -_baffc ._dbce )){if _gfba {_bb .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_baffc ,_bgfa ,_becb ,_ceadg );
|
||
};return nil ,false ;};}else {if !_effb .aligned (){if _gcbb {_bb .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_effb ));
|
||
};return nil ,false ;};if !_dbdb .aligned (){if _gfba {_bb .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_dbdb ));
|
||
};return nil ,false ;};};_addd :=append (_effb ,_dbdb ...);return _addd ,true ;};
|
||
|
||
// String returns a description of `k`.
|
||
func (_ffcc rulingKind )String ()string {_dfbg ,_cbcfe :=_dfbegc [_ffcc ];if !_cbcfe {return _bc .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_ffcc );};return _dfbg ;};func (_fgga *textPara )writeCellText (_bbef _ea .Writer ){for _faed ,_gddga :=range _fgga ._ccaa {_ddbg :=_gddga .text ();
|
||
_egbc :=_fcca &&_gddga .endsInHyphen ()&&_faed !=len (_fgga ._ccaa )-1;if _egbc {_ddbg =_eacb (_ddbg );};_bbef .Write ([]byte (_ddbg ));if !(_egbc ||_faed ==len (_fgga ._ccaa )-1){_bbef .Write ([]byte (_abfb (_gddga ._eggg ,_fgga ._ccaa [_faed +1]._eggg )));
|
||
};};};func (_dabc *textTable )put (_babfg ,_agcd int ,_egbga *textPara ){_dabc ._ecac [_fcbc (_babfg ,_agcd )]=_egbga ;};func (_ade *shapesState )lastpointEstablished ()(_gaa .Point ,bool ){if _ade ._gdef {return _ade ._ebgfc ,false ;};_bbfc :=len (_ade ._ege );
|
||
if _bbfc > 0&&_ade ._ege [_bbfc -1]._edabe {return _ade ._ege [_bbfc -1].last (),false ;};return _gaa .Point {},true ;};func _caaf (_gba *wordBag ,_agbf *textWord ,_cdeb float64 )bool {return _gba .Urx <=_agbf .Llx &&_agbf .Llx < _gba .Urx +_cdeb ;};
|
||
|
||
// ExtractText processes and extracts all text data in content streams and returns as a string.
|
||
// It takes into account character encodings in the PDF file, which are decoded by
|
||
// CharcodeBytesToUnicode.
|
||
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
|
||
func (_gd *Extractor )ExtractText ()(string ,error ){_eda ,_ ,_ ,_ee :=_gd .ExtractTextWithStats ();return _eda ,_ee ;};type rulingKind int ;func (_ddad rulingList )intersections ()map[int ]intSet {var _aeee ,_gaafdf []int ;for _agcg ,_egab :=range _ddad {switch _egab ._bdfg {case _dbbb :_aeee =append (_aeee ,_agcg );
|
||
case _afdgd :_gaafdf =append (_gaafdf ,_agcg );};};if len (_aeee )< _ddga +1||len (_gaafdf )< _fcgdc +1{return nil ;};if len (_aeee )+len (_gaafdf )> _bcdgf {_bb .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_ddad ),len (_aeee ),len (_gaafdf ));
|
||
return nil ;};_gggac :=make (map[int ]intSet ,len (_aeee )+len (_gaafdf ));for _ ,_aecac :=range _aeee {for _ ,_bgfc :=range _gaafdf {if _ddad [_aecac ].intersects (_ddad [_bgfc ]){if _ ,_dfcec :=_gggac [_aecac ];!_dfcec {_gggac [_aecac ]=make (intSet );
|
||
};if _ ,_gbgcd :=_gggac [_bgfc ];!_gbgcd {_gggac [_bgfc ]=make (intSet );};_gggac [_aecac ].add (_bgfc );_gggac [_bgfc ].add (_aecac );};};};return _gggac ;};func (_abe *imageExtractContext )extractFormImages (_bbg *_af .PdfObjectName ,_cea _bg .GraphicsState ,_bfa *_eb .PdfPageResources )error {_adb ,_cac :=_bfa .GetXObjectFormByName (*_bbg );
|
||
if _cac !=nil {return _cac ;};if _adb ==nil {return nil ;};_eg ,_cac :=_adb .GetContentStream ();if _cac !=nil {return _cac ;};_baff :=_adb .Resources ;if _baff ==nil {_baff =_bfa ;};_cac =_abe .extractContentStreamImages (string (_eg ),_baff );if _cac !=nil {return _cac ;
|
||
};_abe ._adg ++;return nil ;};type textResult struct{_gbb PageText ;_fcgc int ;_eagcc int ;};func _aeaf (_deea ,_aca _eb .PdfRectangle )(_eb .PdfRectangle ,bool ){if !_gcaa (_deea ,_aca ){return _eb .PdfRectangle {},false ;};return _eb .PdfRectangle {Llx :_be .Max (_deea .Llx ,_aca .Llx ),Urx :_be .Min (_deea .Urx ,_aca .Urx ),Lly :_be .Max (_deea .Lly ,_aca .Lly ),Ury :_be .Min (_deea .Ury ,_aca .Ury )},true ;
|
||
};func (_ggggd *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_gfc :=make (map[int ]map[*textWord ]struct{},len (_ggggd ._eaba ));for _afae :=range _ggggd ._eaba {_gfc [_afae ]=make (map[*textWord ]struct{});};return _gfc ;};func (_dcdd *textObject )getCurrentFont ()*_eb .PdfFont {_bged :=_dcdd ._add ._adgd ;
|
||
if _bged ==nil {_bb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e");return _eb .DefaultFont ();
|
||
};return _bged ;};
|
||
|
||
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
|
||
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
|
||
func (_aa *Extractor )ExtractTextWithStats ()(_cfg string ,_cdg int ,_cead int ,_cbg error ){_eea ,_cdg ,_cead ,_cbg :=_aa .ExtractPageText ();if _cbg !=nil {return "",_cdg ,_cead ,_cbg ;};return _eea .Text (),_cdg ,_cead ,nil ;};func (_gecg *wordBag )pullWord (_ddgc *textWord ,_aaf int ,_cab map[int ]map[*textWord ]struct{}){_gecg .PdfRectangle =_agege (_gecg .PdfRectangle ,_ddgc .PdfRectangle );
|
||
if _ddgc ._aggf > _gecg ._cgad {_gecg ._cgad =_ddgc ._aggf ;};_gecg ._eaba [_aaf ]=append (_gecg ._eaba [_aaf ],_ddgc );_cab [_aaf ][_ddgc ]=struct{}{};};func _fdbac (_cbdce _eb .PdfRectangle )*ruling {return &ruling {_bdfg :_afdgd ,_cda :_cbdce .Ury ,_dbce :_cbdce .Llx ,_daafc :_cbdce .Urx };
|
||
};type shapesState struct{_agbc _gaa .Matrix ;_cbgb _gaa .Matrix ;_ege []*subpath ;_gdef bool ;_ebgfc _gaa .Point ;_babd *textObject ;};func (_egfg rulingList )findPrimSec (_gdfgg ,_bedd float64 )*ruling {for _ ,_geeg :=range _egfg {if _dede (_geeg ._cda -_gdfgg )&&_geeg ._dbce -_gffe <=_bedd &&_bedd <=_geeg ._daafc +_gffe {return _geeg ;
|
||
};};return nil ;};
|
||
|
||
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
|
||
func (_fdf *TextMarkArray )BBox ()(_eb .PdfRectangle ,bool ){var _cfba _eb .PdfRectangle ;_bea :=false ;for _ ,_efgf :=range _fdf ._gddg {if _efgf .Meta ||_fefba (_efgf .Text ){continue ;};if _bea {_cfba =_agege (_cfba ,_efgf .BBox );}else {_cfba =_efgf .BBox ;
|
||
_bea =true ;};};return _cfba ,_bea ;};func (_bcec rulingList )toTilings ()(rulingList ,[]gridTiling ){_bcec .log ("\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s");if len (_bcec )==0{return nil ,nil ;};_bcec =_bcec .tidied ("\u0061\u006c\u006c");_bcec .log ("\u0074\u0069\u0064\u0069\u0065\u0064");
|
||
_gdadb :=_bcec .toGrids ();_bdcff :=make ([]gridTiling ,len (_gdadb ));for _gabb ,_ebedd :=range _gdadb {_bdcff [_gabb ]=_ebedd .asTiling ();};return _bcec ,_bdcff ;};func (_ffa *textObject )setTextRise (_bga float64 ){if _ffa ==nil {return ;};_ffa ._add ._dce =_bga ;
|
||
};func (_fgb *shapesState )devicePoint (_afcb ,_effg float64 )_gaa .Point {_cacg :=_fgb ._cbgb .Mult (_fgb ._agbc );_afcb ,_effg =_cacg .Transform (_afcb ,_effg );return _gaa .NewPoint (_afcb ,_effg );};func (_dgca *textPara )text ()string {_adgcg :=new (_ga .Buffer );
|
||
_dgca .writeText (_adgcg );return _adgcg .String ();};func (_dafg *shapesState )quadraticTo (_bcab ,_dfc ,_adcef ,_abfe float64 ){if _agbcg {_bb .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a");};_dafg .addPoint (_adcef ,_abfe );
|
||
};func (_aeff rulingList )toGrids ()[]rulingList {if _gfba {_bb .Log .Info ("t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073",_aeff );};_eafa :=_aeff .intersections ();if _gfba {_bb .Log .Info ("\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020",len (_aeff ),len (_eafa ));
|
||
for _ ,_cgdd :=range _cfbc (_eafa ){_bc .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_cgdd ,_eafa [_cgdd ]);};};_dfce :=make (map[int ]intSet ,len (_aeff ));for _faefe :=range _aeff {_ecge :=_aeff .connections (_eafa ,_faefe );if len (_ecge )> 0{_dfce [_faefe ]=_ecge ;
|
||
};};if _gfba {_bb .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064",len (_dfce ));for _ ,_agdcc :=range _cfbc (_dfce ){_bc .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_agdcc ,_dfce [_agdcc ]);
|
||
};};_agaf :=_debc (len (_aeff ),func (_egee ,_beca int )bool {_gdge ,_cggb :=len (_dfce [_egee ]),len (_dfce [_beca ]);if _gdge !=_cggb {return _gdge > _cggb ;};return _aeff .comp (_egee ,_beca );});if _gfba {_bb .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_agaf );
|
||
};_ebeg :=[][]int {{_agaf [0]}};_fede :for _ ,_ccfed :=range _agaf [1:]{for _daeb ,_adae :=range _ebeg {for _ ,_bbbc :=range _adae {if _dfce [_bbbc ].has (_ccfed ){_ebeg [_daeb ]=append (_adae ,_ccfed );continue _fede ;};};};_ebeg =append (_ebeg ,[]int {_ccfed });
|
||
};if _gfba {_bb .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_ebeg );};_b .SliceStable (_ebeg ,func (_eada ,_febf int )bool {return len (_ebeg [_eada ])> len (_ebeg [_febf ])});for _ ,_fdec :=range _ebeg {_b .Slice (_fdec ,func (_agce ,_eddf int )bool {return _aeff .comp (_fdec [_agce ],_fdec [_eddf ])});
|
||
};_bafde :=make ([]rulingList ,len (_ebeg ));for _aebf ,_gacg :=range _ebeg {_bgaed :=make (rulingList ,len (_gacg ));for _dfggd ,_gbbae :=range _gacg {_bgaed [_dfggd ]=_aeff [_gbbae ];};_bafde [_aebf ]=_bgaed ;};if _gfba {_bb .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076",_bafde );
|
||
};var _ecgff []rulingList ;for _ ,_cffa :=range _bafde {if _bgga ,_dafe :=_cffa .isActualGrid ();_dafe {_cffa =_bgga ;_cffa =_cffa .snapToGroups ();_ecgff =append (_ecgff ,_cffa );};};if _gfba {_fdbce ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073",_ecgff );
|
||
_bb .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_bafde ),len (_ecgff ));};return _ecgff ;};func (_gcfb paraList )addNeighbours (){_bcaa :=func (_bceca []int ,_bcga *textPara )([]*textPara ,[]*textPara ){_aggg :=make ([]*textPara ,0,len (_bceca )-1);
|
||
_fccfd :=make ([]*textPara ,0,len (_bceca )-1);for _ ,_gbfa :=range _bceca {_gbefd :=_gcfb [_gbfa ];if _gbefd .Urx <=_bcga .Llx {_aggg =append (_aggg ,_gbefd );}else if _gbefd .Llx >=_bcga .Urx {_fccfd =append (_fccfd ,_gbefd );};};return _aggg ,_fccfd ;
|
||
};_gcdff :=func (_fgfg []int ,_ecgcg *textPara )([]*textPara ,[]*textPara ){_ecdb :=make ([]*textPara ,0,len (_fgfg )-1);_bcbe :=make ([]*textPara ,0,len (_fgfg )-1);for _ ,_cfdag :=range _fgfg {_agef :=_gcfb [_cfdag ];if _agef .Ury <=_ecgcg .Lly {_bcbe =append (_bcbe ,_agef );
|
||
}else if _agef .Lly >=_ecgcg .Ury {_ecdb =append (_ecdb ,_agef );};};return _ecdb ,_bcbe ;};_egfef :=_gcfb .yNeighbours (_ffac );for _ ,_cdbeb :=range _gcfb {_edgbd :=_egfef [_cdbeb ];if len (_edgbd )==0{continue ;};_acgf ,_efdd :=_bcaa (_edgbd ,_cdbeb );
|
||
if len (_acgf )==0&&len (_efdd )==0{continue ;};if len (_acgf )> 0{_dfbcg :=_acgf [0];for _ ,_abaeg :=range _acgf [1:]{if _abaeg .Urx >=_dfbcg .Urx {_dfbcg =_abaeg ;};};for _ ,_cbdbe :=range _acgf {if _cbdbe !=_dfbcg &&_cbdbe .Urx > _dfbcg .Llx {_dfbcg =nil ;
|
||
break ;};};if _dfbcg !=nil &&_gbgf (_cdbeb .PdfRectangle ,_dfbcg .PdfRectangle ){_cdbeb ._dcef =_dfbcg ;};};if len (_efdd )> 0{_gcff :=_efdd [0];for _ ,_gddfe :=range _efdd [1:]{if _gddfe .Llx <=_gcff .Llx {_gcff =_gddfe ;};};for _ ,_bfccc :=range _efdd {if _bfccc !=_gcff &&_bfccc .Llx < _gcff .Urx {_gcff =nil ;
|
||
break ;};};if _gcff !=nil &&_gbgf (_cdbeb .PdfRectangle ,_gcff .PdfRectangle ){_cdbeb ._cggf =_gcff ;};};};_egfef =_gcfb .xNeighbours (_dabb );for _ ,_adff :=range _gcfb {_fgda :=_egfef [_adff ];if len (_fgda )==0{continue ;};_edfda ,_dbga :=_gcdff (_fgda ,_adff );
|
||
if len (_edfda )==0&&len (_dbga )==0{continue ;};if len (_dbga )> 0{_bgfge :=_dbga [0];for _ ,_dfbbg :=range _dbga [1:]{if _dfbbg .Ury >=_bgfge .Ury {_bgfge =_dfbbg ;};};for _ ,_gffgb :=range _dbga {if _gffgb !=_bgfge &&_gffgb .Ury > _bgfge .Lly {_bgfge =nil ;
|
||
break ;};};if _bgfge !=nil &&_degf (_adff .PdfRectangle ,_bgfge .PdfRectangle ){_adff ._cebed =_bgfge ;};};if len (_edfda )> 0{_fccgba :=_edfda [0];for _ ,_aagd :=range _edfda [1:]{if _aagd .Lly <=_fccgba .Lly {_fccgba =_aagd ;};};for _ ,_fefgf :=range _edfda {if _fefgf !=_fccgba &&_fefgf .Lly < _fccgba .Ury {_fccgba =nil ;
|
||
break ;};};if _fccgba !=nil &&_degf (_adff .PdfRectangle ,_fccgba .PdfRectangle ){_adff ._aggd =_fccgba ;};};};for _ ,_bgde :=range _gcfb {if _bgde ._dcef !=nil &&_bgde ._dcef ._cggf !=_bgde {_bgde ._dcef =nil ;};if _bgde ._aggd !=nil &&_bgde ._aggd ._cebed !=_bgde {_bgde ._aggd =nil ;
|
||
};if _bgde ._cggf !=nil &&_bgde ._cggf ._dcef !=_bgde {_bgde ._cggf =nil ;};if _bgde ._cebed !=nil &&_bgde ._cebed ._aggd !=_bgde {_bgde ._cebed =nil ;};};};func (_edef paraList )computeEBBoxes (){if _adfc {_bb .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a");
|
||
};for _ ,_fage :=range _edef {_fage ._ccac =_fage .PdfRectangle ;};_cbbf :=_edef .yNeighbours (0);for _aadc ,_efdf :=range _edef {_bfg :=_efdf ._ccac ;_acfe ,_fcbg :=-1.0e9,+1.0e9;for _ ,_aecc :=range _cbbf [_efdf ]{_aaea :=_edef [_aecc ]._ccac ;if _aaea .Urx < _bfg .Llx {_acfe =_be .Max (_acfe ,_aaea .Urx );
|
||
}else if _bfg .Urx < _aaea .Llx {_fcbg =_be .Min (_fcbg ,_aaea .Llx );};};for _gcad ,_gdcf :=range _edef {_gfbe :=_gdcf ._ccac ;if _aadc ==_gcad ||_gfbe .Ury > _bfg .Lly {continue ;};if _acfe <=_gfbe .Llx &&_gfbe .Llx < _bfg .Llx {_bfg .Llx =_gfbe .Llx ;
|
||
}else if _gfbe .Urx <=_fcbg &&_bfg .Urx < _gfbe .Urx {_bfg .Urx =_gfbe .Urx ;};};if _adfc {_bc .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_aadc ,_efdf ._ccac ,_bfg ,_geff (_efdf .text (),50));
|
||
};_efdf ._ccac =_bfg ;};if _ddagf {for _ ,_cebg :=range _edef {_cebg .PdfRectangle =_cebg ._ccac ;};};};func _cffd (_gddcf []pathSection ){if _cgf < 0.0{return ;};if _gfba {_bb .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_gddcf ));
|
||
};for _cgbff ,_eafgb :=range _gddcf {for _dedcf ,_eccc :=range _eafgb ._gaac {for _acbff ,_beagc :=range _eccc ._edg {_eccc ._edg [_acbff ]=_gaa .Point {X :_fgccb (_beagc .X ),Y :_fgccb (_beagc .Y )};if _gfba {_aff :=_eccc ._edg [_acbff ];if !_bgdg (_beagc ,_aff ){_dgad :=_gaa .Point {X :_aff .X -_beagc .X ,Y :_aff .Y -_beagc .Y };
|
||
_bc .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_cgbff ,_dedcf ,_acbff ,_beagc ,_aff ,_dgad );};};};};};};func _fdbce (_edce string ,_aagg []rulingList ){_bb .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_aagg ),_edce );
|
||
for _bage ,_eege :=range _aagg {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bage ,_eege .String ());};};
|
||
|
||
// String returns a string describing the current state of the textState stack.
|
||
func (_cgbf *stateStack )String ()string {_bbc :=[]string {_bc .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_cgbf ))};for _dag ,_bgae :=range *_cgbf {_gafb :="\u003c\u006e\u0069l\u003e";
|
||
if _bgae !=nil {_gafb =_bgae .String ();};_bbc =append (_bbc ,_bc .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_dag ,_gafb ));};return _a .Join (_bbc ,"\u000a");};
|
||
|
||
// Text returns the extracted page text.
|
||
func (_aeag PageText )Text ()string {return _aeag ._caa };func (_egbcf *ruling )encloses (_ecfbf ,_ebaa float64 )bool {return _egbcf ._dbce -_gffe <=_ecfbf &&_ebaa <=_egbcf ._daafc +_gffe ;};func (_egge rulingList )snapToGroups ()rulingList {_dfbf ,_ffgec :=_egge .vertsHorzs ();
|
||
if len (_dfbf )> 0{_dfbf =_dfbf .snapToGroupsDirection ();};if len (_ffgec )> 0{_ffgec =_ffgec .snapToGroupsDirection ();};_dbdga :=append (_dfbf ,_ffgec ...);_dbdga .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");return _dbdga ;
|
||
};func (_ffgf paraList )findTextTables ()[]*textTable {var _dgdb []*textTable ;for _ ,_bgfg :=range _ffgf {if _bgfg .taken ()||_bgfg .Width ()==0{continue ;};_fecbe :=_bgfg .isAtom ();if _fecbe ==nil {continue ;};_fecbe .growTable ();if _fecbe ._gfgdf *_fecbe ._ggca < _edee {continue ;
|
||
};_fecbe .markCells ();_fecbe .log ("\u0067\u0072\u006fw\u006e");_dgdb =append (_dgdb ,_fecbe );};return _dgdb ;};type gridTiling struct{_eb .PdfRectangle ;_abdf []float64 ;_bgcf []float64 ;_bfbgd map[float64 ]map[float64 ]gridTile ;};
|
||
|
||
// String returns a description of `tm`.
|
||
func (_adfe *textMark )String ()string {return _bc .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_adfe .PdfRectangle ,_adfe ._deedb ,_adfe ._gbgc );};
|
||
|
||
// String returns a description of `v`.
|
||
func (_fcgb *ruling )String ()string {if _fcgb ._bdfg ==_dcga {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_ggdf ,_dfeg :="\u0078","\u0079";if _fcgb ._bdfg ==_afdgd {_ggdf ,_dfeg ="\u0079","\u0078";};_dffc :="";if _fcgb ._gddcc !=0.0{_dffc =_bc .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_fcgb ._gddcc );
|
||
};return _bc .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_fcgb ._bdfg ,_ggdf ,_fcgb ._cda ,_dfeg ,_fcgb ._dbce ,_fcgb ._daafc ,_fcgb ._daafc -_fcgb ._dbce ,_fcgb ._eabd ,_fcgb .Color ,_dffc );
|
||
};type compositeCell struct{_eb .PdfRectangle ;paraList ;};const _cgdg =10;func (_bfaf *textWord )computeText ()string {_cgbbg :=make ([]string ,len (_bfaf ._badb ));for _fggdd ,_cbabb :=range _bfaf ._badb {_cgbbg [_fggdd ]=_cbabb ._gbgc ;};return _a .Join (_cgbbg ,"");
|
||
};func _babf (_begba *wordBag ,_bced float64 ,_badc ,_gcaag rulingList )[]*wordBag {var _addfd []*wordBag ;for _ ,_bfcc :=range _begba .depthIndexes (){_becf :=false ;for !_begba .empty (_bfcc ){_bedg :=_begba .firstReadingIndex (_bfcc );_eaaa :=_begba .firstWord (_bedg );
|
||
_acfg :=_aecg (_eaaa ,_bced ,_badc ,_gcaag );_begba .removeWord (_eaaa ,_bedg );if _egeb {_bb .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_eaaa .String ());};for _fbdb :=true ;_fbdb ;_fbdb =_becf {_becf =false ;
|
||
_fad :=_bdcf *_acfg ._cgad ;_aced :=_eage *_acfg ._cgad ;_bcca :=_ccgd *_acfg ._cgad ;if _egeb {_bb .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_acfg .minDepth (),_acfg .maxDepth (),_bcca ,_aced );
|
||
};if _begba .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_acfg ,_addc (_aafc ,0),_acfg .minDepth ()-_bcca ,_acfg .maxDepth ()+_bcca ,_fgbg ,false ,false )> 0{_becf =true ;};if _begba .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_acfg ,_addc (_aafc ,_aced ),_acfg .minDepth (),_acfg .maxDepth (),_ecgf ,false ,false )> 0{_becf =true ;
|
||
};if _becf {continue ;};_dfcc :=_begba .scanBand ("",_acfg ,_addc (_caaf ,_fad ),_acfg .minDepth (),_acfg .maxDepth (),_cgdgg ,true ,false );if _dfcc > 0{_agdfg :=(_acfg .maxDepth ()-_acfg .minDepth ())/_acfg ._cgad ;if (_dfcc > 1&&float64 (_dfcc )> 0.3*_agdfg )||_dfcc <=10{if _begba .scanBand ("\u006f\u0074\u0068e\u0072",_acfg ,_addc (_caaf ,_fad ),_acfg .minDepth (),_acfg .maxDepth (),_cgdgg ,false ,true )> 0{_becf =true ;
|
||
};};};};_addfd =append (_addfd ,_acfg );};};return _addfd ;};type textState struct{_cdbb float64 ;_fege float64 ;_gbg float64 ;_cad float64 ;_ccef float64 ;_dab RenderMode ;_dce float64 ;_adgd *_eb .PdfFont ;_fggd _eb .PdfRectangle ;_cdbe int ;_gac int ;
|
||
};func (_fdbd *subpath )add (_adgg ..._gaa .Point ){_fdbd ._edg =append (_fdbd ._edg ,_adgg ...)};func _gcaa (_adgf ,_cabd _eb .PdfRectangle )bool {return _degf (_adgf ,_cabd )&&_gbgf (_adgf ,_cabd )};func (_aaed gridTile )complete ()bool {return _aaed .numBorders ()==4};
|
||
func (_dbfc *textTable )subdivide ()*textTable {_dbfc .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");_fagc :=_dbfc .compositeRowCorridors ();_gddf :=_dbfc .compositeColCorridors ();if _cece {_bb .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_efbdb (_fagc ),_efbdb (_gddf ));
|
||
};if len (_fagc )==0||len (_gddf )==0{return _dbfc ;};_cgbab (_fagc );_cgbab (_gddf );if _cece {_bb .Log .Info ("\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073",_efbdb (_fagc ),_efbdb (_gddf ));
|
||
};_dbed ,_gggd :=_gfbc (_dbfc ._ggca ,_fagc );_gdffa ,_eafe :=_gfbc (_dbfc ._gfgdf ,_gddf );_aeaffa :=make (map[uint64 ]*textPara ,_eafe *_gggd );_bebc :=&textTable {PdfRectangle :_dbfc .PdfRectangle ,_ggac :_dbfc ._ggac ,_ggca :_gggd ,_gfgdf :_eafe ,_ecac :_aeaffa };
|
||
if _cece {_bb .Log .Info ("\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a"+"\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076",_dbfc ._gfgdf ,_dbfc ._ggca ,_eafe ,_gggd ,_efbdb (_fagc ),_efbdb (_gddf ),_dbed ,_gdffa );
|
||
};for _cceg :=0;_cceg < _dbfc ._ggca ;_cceg ++{_acdae :=_dbed [_cceg ];for _bfgc :=0;_bfgc < _dbfc ._gfgdf ;_bfgc ++{_cgbeg :=_gdffa [_bfgc ];if _cece {_bc .Printf ("\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a",_bfgc ,_cceg ,_cgbeg ,_acdae );
|
||
};_gcde ,_bbccc :=_dbfc ._bcdge [_fcbc (_bfgc ,_cceg )];if !_bbccc {continue ;};_cddb :=_gcde .split (_fagc [_cceg ],_gddf [_bfgc ]);for _cbadeb :=0;_cbadeb < _cddb ._ggca ;_cbadeb ++{for _fcfdb :=0;_fcfdb < _cddb ._gfgdf ;_fcfdb ++{_dacb :=_cddb .get (_fcfdb ,_cbadeb );
|
||
_bebc .put (_cgbeg +_fcfdb ,_acdae +_cbadeb ,_dacb );if _cece {_bc .Printf ("\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_cgbeg +_fcfdb ,_acdae +_cbadeb ,_dacb );};};};};};return _bebc ;};func (_dcfa *textMark )inDiacriticArea (_faec *textMark )bool {_dcff :=_dcfa .Llx -_faec .Llx ;
|
||
_gfgb :=_dcfa .Urx -_faec .Urx ;_gfbb :=_dcfa .Lly -_faec .Lly ;return _be .Abs (_dcff +_gfgb )< _dcfa .Width ()*_eaag &&_be .Abs (_gfbb )< _dcfa .Height ()*_eaag ;};func (_ggbgd rulingList )blocks (_faae ,_afcbc *ruling )bool {if _faae ._dbce > _afcbc ._daafc ||_afcbc ._dbce > _faae ._daafc {return false ;
|
||
};_ebag :=_be .Max (_faae ._dbce ,_afcbc ._dbce );_cbde :=_be .Min (_faae ._daafc ,_afcbc ._daafc );if _faae ._cda > _afcbc ._cda {_faae ,_afcbc =_afcbc ,_faae ;};for _ ,_aeagb :=range _ggbgd {if _faae ._cda <=_aeagb ._cda +_cdceg &&_aeagb ._cda <=_afcbc ._cda +_cdceg &&_aeagb ._dbce <=_cbde &&_ebag <=_aeagb ._daafc {return true ;
|
||
};};return false ;};func (_ebdg *textTable )toTextTable ()TextTable {if _cece {_bb .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_ebdg ._gfgdf ,_ebdg ._ggca );};_ccfd :=make ([][]TableCell ,_ebdg ._ggca );
|
||
for _dfed :=0;_dfed < _ebdg ._ggca ;_dfed ++{_ccfd [_dfed ]=make ([]TableCell ,_ebdg ._gfgdf );for _aaaba :=0;_aaaba < _ebdg ._gfgdf ;_aaaba ++{_caab :=_ebdg .get (_aaaba ,_dfed );if _caab ==nil {continue ;};if _cece {_bc .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_aaaba ,_dfed ,_caab );
|
||
};_ccfd [_dfed ][_aaaba ].Text =_caab .text ();_fagee :=0;_ccfd [_dfed ][_aaaba ].Marks ._gddg =_caab .toTextMarks (&_fagee );};};return TextTable {W :_ebdg ._gfgdf ,H :_ebdg ._ggca ,Cells :_ccfd };};func (_gebe *ruling )equals (_dcebc *ruling )bool {return _gebe ._bdfg ==_dcebc ._bdfg &&_caafb (_gebe ._cda ,_dcebc ._cda )&&_caafb (_gebe ._dbce ,_dcebc ._dbce )&&_caafb (_gebe ._daafc ,_dcebc ._daafc );
|
||
};func (_cgc rulingList )augmentGrid ()(rulingList ,rulingList ){_dedf ,_gcab :=_cgc .vertsHorzs ();if len (_dedf )==0||len (_gcab )==0{return _dedf ,_gcab ;};_cfce ,_ebda :=_dedf ,_gcab ;_efecf :=_dedf .bbox ();_fggeb :=_gcab .bbox ();if _gfba {_bb .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066",_efecf );
|
||
_bb .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066",_fggeb );};var _cgaeg ,_dfge ,_bdef ,_cecaa *ruling ;if _fggeb .Llx < _efecf .Llx -_gffe {_cgaeg =&ruling {_eabd :_dbagd ,_bdfg :_dbbb ,_cda :_fggeb .Llx ,_dbce :_efecf .Lly ,_daafc :_efecf .Ury };
|
||
_dedf =append (rulingList {_cgaeg },_dedf ...);};if _fggeb .Urx > _efecf .Urx +_gffe {_dfge =&ruling {_eabd :_dbagd ,_bdfg :_dbbb ,_cda :_fggeb .Urx ,_dbce :_efecf .Lly ,_daafc :_efecf .Ury };_dedf =append (_dedf ,_dfge );};if _efecf .Lly < _fggeb .Lly -_gffe {_bdef =&ruling {_eabd :_dbagd ,_bdfg :_afdgd ,_cda :_efecf .Lly ,_dbce :_fggeb .Llx ,_daafc :_fggeb .Urx };
|
||
_gcab =append (rulingList {_bdef },_gcab ...);};if _efecf .Ury > _fggeb .Ury +_gffe {_cecaa =&ruling {_eabd :_dbagd ,_bdfg :_afdgd ,_cda :_efecf .Ury ,_dbce :_fggeb .Llx ,_daafc :_fggeb .Urx };_gcab =append (_gcab ,_cecaa );};if len (_dedf )+len (_gcab )==len (_cgc ){return _cfce ,_ebda ;
|
||
};_aadb :=append (_dedf ,_gcab ...);_cgc .log ("u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064");_aadb .log ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d");return _dedf ,_gcab ;};type intSet map[int ]struct{};type stateStack []*textState ;
|
||
type lineRuling struct{_cbgfa rulingKind ;_bggc markKind ;_eac .Color ;_cbdg ,_cbfb _gaa .Point ;};func (_eafg rulingList )mergePrimary ()float64 {_aeabb :=_eafg [0]._cda ;for _ ,_ccafd :=range _eafg [1:]{_aeabb +=_ccafd ._cda ;};return _aeabb /float64 (len (_eafg ));
|
||
};func (_aaedc intSet )add (_bcdb int ){_aaedc [_bcdb ]=struct{}{}};type rectRuling struct{_bcfe rulingKind ;_fcff markKind ;_eac .Color ;_eb .PdfRectangle ;};func (_bbdb *textTable )compositeRowCorridors ()map[int ][]float64 {_cbbg :=make (map[int ][]float64 ,_bbdb ._ggca );
|
||
if _cece {_bb .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_bbdb ._ggca );};for _beba :=1;_beba < _bbdb ._ggca ;_beba ++{var _agegg []compositeCell ;
|
||
for _cdeda :=0;_cdeda < _bbdb ._gfgdf ;_cdeda ++{if _aage ,_eacd :=_bbdb ._bcdge [_fcbc (_cdeda ,_beba )];_eacd {_agegg =append (_agegg ,_aage );};};if len (_agegg )==0{continue ;};_gecb :=_dfad (_agegg );_cbbg [_beba ]=_gecb ;if _cece {_bc .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_beba ,_gecb );
|
||
};};return _cbbg ;};func _afga (_cafb *wordBag ,_gbfb int )*textLine {_fgaa :=_cafb .firstWord (_gbfb );_agdee :=textLine {PdfRectangle :_fgaa .PdfRectangle ,_dfefd :_fgaa ._aggf ,_eggg :_fgaa ._efaba };_agdee .pullWord (_cafb ,_fgaa ,_gbfb );return &_agdee ;
|
||
};func (_dac *stateStack )pop ()*textState {if _dac .empty (){return nil ;};_dedb :=*(*_dac )[len (*_dac )-1];*_dac =(*_dac )[:len (*_dac )-1];return &_dedb ;};func (_eff *textObject )setTextMatrix (_ffef []float64 ){if len (_ffef )!=6{_bb .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_ffef ));
|
||
return ;};_aaef ,_ged ,_baae ,_ece ,_aee ,_efg :=_ffef [0],_ffef [1],_ffef [2],_ffef [3],_ffef [4],_ffef [5];_eff ._bbcf =_gaa .NewMatrix (_aaef ,_ged ,_baae ,_ece ,_aee ,_efg );_eff ._fbe =_eff ._bbcf ;};func _bdee (_efge []*textWord ,_dbdeg *textWord )[]*textWord {for _cgaef ,_gfea :=range _efge {if _gfea ==_dbdeg {return _fcbeg (_efge ,_cgaef );
|
||
};};_bb .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_dbdeg );
|
||
return nil ;};func _dgec (_agaga ,_gcdc ,_cgbbe ,_acea *textPara )*textTable {_degab :=&textTable {_gfgdf :2,_ggca :2,_ecac :make (map[uint64 ]*textPara ,4)};_degab .put (0,0,_agaga );_degab .put (1,0,_gcdc );_degab .put (0,1,_cgbbe );_degab .put (1,1,_acea );
|
||
return _degab ;};func _gdfa (_bbfaa []int )[]int {_fce :=make ([]int ,len (_bbfaa ));for _gdgb ,_cddd :=range _bbfaa {_fce [len (_bbfaa )-1-_gdgb ]=_cddd ;};return _fce ;};func (_ffga compositeCell )hasLines (_gbae []*textLine )bool {for _dcde ,_fcef :=range _gbae {_edeg :=_gcaa (_ffga .PdfRectangle ,_fcef .PdfRectangle );
|
||
if _cece {_bc .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a",_edeg ,_dcde ,len (_gbae ));_bc .Printf ("\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a",_ffga );
|
||
_bc .Printf ("\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a",_fcef );};if _edeg {return true ;};};return false ;};var _gaeb =_e .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024");
|
||
func (_ggdb paraList )inTile (_dbbf gridTile )paraList {var _bfdf paraList ;for _ ,_gffc :=range _ggdb {if _dbbf .contains (_gffc .PdfRectangle ){_bfdf =append (_bfdf ,_gffc );};};if _cece {_bc .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_dbbf ,len (_bfdf ));
|
||
for _ccfb ,_egba :=range _bfdf {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ccfb ,_egba );};_bc .Println ("");};return _bfdf ;};
|
||
|
||
// String returns a string describing `pt`.
|
||
func (_geg PageText )String ()string {_acf :=_bc .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_geg ._ecca ));_fca :=[]string {"\u002d"+_acf };for _ ,_dcd :=range _geg ._ecca {_fca =append (_fca ,_dcd .String ());
|
||
};_fca =append (_fca ,"\u002b"+_acf );return _a .Join (_fca ,"\u000a");};
|
||
|
||
// ToTextMark returns the public view of `tm`.
|
||
func (_dfcab *textMark )ToTextMark ()TextMark {return TextMark {Text :_dfcab ._gbgc ,Original :_dfcab ._gdda ,BBox :_dfcab ._addfe ,Font :_dfcab ._gdcg ,FontSize :_dfcab ._deedb ,FillColor :_dfcab ._febb ,StrokeColor :_dfcab ._dgd ,Orientation :_dfcab ._fdag };
|
||
};const (_adfc =false ;_cbab =false ;_aafa =false ;_cfcc =false ;_agbcg =false ;_dbfg =false ;_egeb =false ;_cfff =false ;_caafc =false ;_bbaf =_caafc &&true ;_daec =_bbaf &&false ;_dbaf =_caafc &&true ;_cece =false ;_aacd =_cece &&false ;_fggeg =_cece &&true ;
|
||
_gfba =false ;_bgf =_gfba &&false ;_gcbb =_gfba &&false ;_ecbd =_gfba &&true ;_fdbg =_gfba &&false ;_cceb =_gfba &&false ;);func _abbd (_eadf ,_ddbf ,_beaaa float64 )rulingKind {if _eadf >=_beaaa &&_fgae (_ddbf ,_eadf ){return _afdgd ;};if _ddbf >=_beaaa &&_fgae (_eadf ,_ddbf ){return _dbbb ;
|
||
};return _dcga ;};func _cdce (_gfde bounded )float64 {return -_gfde .bbox ().Lly };func (_faac rulingList )splitSec ()[]rulingList {_b .Slice (_faac ,func (_gbagd ,_afgaa int )bool {_eadac ,_gdeba :=_faac [_gbagd ],_faac [_afgaa ];if _eadac ._dbce !=_gdeba ._dbce {return _eadac ._dbce < _gdeba ._dbce ;
|
||
};return _eadac ._daafc < _gdeba ._daafc ;});_gbbaa :=make (map[*ruling ]struct{},len (_faac ));_cagd :=func (_cfca *ruling )rulingList {_ecgb :=rulingList {_cfca };_gbbaa [_cfca ]=struct{}{};for _ ,_ddab :=range _faac {if _ ,_faefg :=_gbbaa [_ddab ];_faefg {continue ;
|
||
};for _ ,_bdcfb :=range _ecgb {if _ddab .alignsSec (_bdcfb ){_ecgb =append (_ecgb ,_ddab );_gbbaa [_ddab ]=struct{}{};break ;};};};return _ecgb ;};_dgde :=[]rulingList {_cagd (_faac [0])};for _ ,_eccdg :=range _faac [1:]{if _ ,_agdd :=_gbbaa [_eccdg ];
|
||
_agdd {continue ;};_dgde =append (_dgde ,_cagd (_eccdg ));};return _dgde ;};func (_gaddd *wordBag )empty (_aegb int )bool {_ ,_dccc :=_gaddd ._eaba [_aegb ];return !_dccc };
|
||
|
||
// String returns a string describing `tm`.
|
||
func (_cafgf TextMark )String ()string {_gda :=_cafgf .BBox ;var _bfe string ;if _cafgf .Font !=nil {_bfe =_cafgf .Font .String ();if len (_bfe )> 50{_bfe =_bfe [:50]+"\u002e\u002e\u002e";};};var _ggf string ;if _cafgf .Meta {_ggf ="\u0020\u002a\u004d\u002a";
|
||
};return _bc .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_cafgf .Offset ,_cafgf .Text ,[]rune (_cafgf .Text ),_gda .Llx ,_gda .Lly ,_gda .Urx ,_gda .Ury ,_bfe ,_ggf );
|
||
};
|
||
|
||
// New returns an Extractor instance for extracting content from the input PDF page.
|
||
func New (page *_eb .PdfPage )(*Extractor ,error ){const _faf ="\u0065\u0078\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077";_cg ,_cb :=page .GetAllContentStreams ();if _cb !=nil {return nil ,_cb ;};_ad ,_cb :=page .GetMediaBox ();if _cb !=nil {return nil ,_bc .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_cb );
|
||
};_bcd :=&Extractor {_ag :_cg ,_bca :page .Resources ,_fc :*_ad ,_ab :map[string ]fontEntry {},_ebd :map[string ]textResult {}};if _bcd ._fc .Llx > _bcd ._fc .Urx {_bb .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_bcd ._fc );
|
||
_bcd ._fc .Llx ,_bcd ._fc .Urx =_bcd ._fc .Urx ,_bcd ._fc .Llx ;};if _bcd ._fc .Lly > _bcd ._fc .Ury {_bb .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_bcd ._fc );
|
||
_bcd ._fc .Lly ,_bcd ._fc .Ury =_bcd ._fc .Ury ,_bcd ._fc .Lly ;};_ebg .TrackUse (_faf );return _bcd ,nil ;};func _aeba (_dad string )(string ,bool ){_ebgfg :=[]rune (_dad );if len (_ebgfg )!=1{return "",false ;};_fbbfg ,_bgca :=_dddgb [_ebgfg [0]];return _fbbfg ,_bgca ;
|
||
};type subpath struct{_edg []_gaa .Point ;_edabe bool ;};const (_cbgg =1.0e-6;_cgf =1.0e-4;_deegc =10;_cccb =6;_gcgb =0.5;_fag =0.12;_gbec =0.19;_bbfa =0.04;_cebf =0.04;_ccgd =1.0;_fgbg =0.04;_eage =0.4;_ecgf =0.7;_bdcf =1.0;_cgdgg =0.1;_ebaf =1.4;_gcge =0.46;
|
||
_aecgf =0.02;_fgfe =0.2;_eaag =0.5;_gbd =4;_cbbb =4.0;_edee =6;_aabb =0.3;_dabb =0.01;_ffac =0.02;_ddga =2;_fcgdc =2;_bcdgf =500;_eefb =4.0;_eacc =4.0;_bafg =0.05;_gggc =0.1;_gffe =2.0;_cdceg =2.0;_adec =1.5;_gfaa =3.0;_bedf =0.25;);
|
||
|
||
// ImageMark represents an image drawn on a page and its position in device coordinates.
|
||
// All coordinates are in device coordinates.
|
||
type ImageMark struct{Image *_eb .Image ;
|
||
|
||
// Dimensions of the image as displayed in the PDF.
|
||
Width float64 ;Height float64 ;
|
||
|
||
// Position of the image in PDF coordinates (lower left corner).
|
||
X float64 ;Y float64 ;
|
||
|
||
// Angle in degrees, if rotated.
|
||
Angle float64 ;};func (_ffcg paraList )reorder (_gcaf []int ){_ecegc :=make (paraList ,len (_ffcg ));for _bead ,_cfad :=range _gcaf {_ecegc [_bead ]=_ffcg [_cfad ];};copy (_ffcg ,_ecegc );};func (_fccf rectRuling )checkWidth (_afebe ,_ecfd float64 )(float64 ,bool ){_dbbbe :=_ecfd -_afebe ;
|
||
_cafge :=_dbbbe <=_cdceg ;return _dbbbe ,_cafge ;};func (_ggbb *subpath )close (){if !_bgdg (_ggbb ._edg [0],_ggbb .last ()){_ggbb .add (_ggbb ._edg [0]);};_ggbb ._edabe =true ;_ggbb .removeDuplicates ();};func _eacb (_gade string )string {_debd :=[]rune (_gade );
|
||
return string (_debd [:len (_debd )-1])};func (_gdaed rulingList )bbox ()_eb .PdfRectangle {var _afbab _eb .PdfRectangle ;if len (_gdaed )==0{_bb .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073");
|
||
return _eb .PdfRectangle {};};if _gdaed [0]._bdfg ==_afdgd {_afbab .Llx ,_afbab .Urx =_gdaed .secMinMax ();_afbab .Lly ,_afbab .Ury =_gdaed .primMinMax ();}else {_afbab .Llx ,_afbab .Urx =_gdaed .primMinMax ();_afbab .Lly ,_afbab .Ury =_gdaed .secMinMax ();
|
||
};return _afbab ;};const (_dcga rulingKind =iota ;_afdgd ;_dbbb ;);func (_fcf *stateStack )top ()*textState {if _fcf .empty (){return nil ;};return (*_fcf )[_fcf .size ()-1];};func _eceb (_ecbbe _eb .PdfRectangle )rulingKind {_fffbb :=_ecbbe .Width ();
|
||
_dgfbc :=_ecbbe .Height ();if _fffbb > _dgfbc {if _fffbb >=_eefb {return _afdgd ;};}else {if _dgfbc >=_eefb {return _dbbb ;};};return _dcga ;};func (_feff *wordBag )depthIndexes ()[]int {if len (_feff ._eaba )==0{return nil ;};_cgab :=make ([]int ,len (_feff ._eaba ));
|
||
_ggbca :=0;for _eaef :=range _feff ._eaba {_cgab [_ggbca ]=_eaef ;_ggbca ++;};_b .Ints (_cgab );return _cgab ;};func (_gdac *compositeCell )updateBBox (){for _ ,_fade :=range _gdac .paraList {_gdac .PdfRectangle =_agege (_gdac .PdfRectangle ,_fade .PdfRectangle );
|
||
};};func _bgdg (_deeda ,_gfacf _gaa .Point )bool {return _deeda .X ==_gfacf .X &&_deeda .Y ==_gfacf .Y };func (_gabg *textTable )getDown ()paraList {_accd :=make (paraList ,_gabg ._gfgdf );for _fcdbc :=0;_fcdbc < _gabg ._gfgdf ;_fcdbc ++{_caed :=_gabg .get (_fcdbc ,_gabg ._ggca -1)._cebed ;
|
||
if _caed ==nil ||_caed ._dfbc {return nil ;};_accd [_fcdbc ]=_caed ;};for _aacce :=0;_aacce < _gabg ._gfgdf -1;_aacce ++{if _accd [_aacce ]._cggf !=_accd [_aacce +1]{return nil ;};};return _accd ;};func (_eegfb intSet )del (_gegf int ){delete (_eegfb ,_gegf )};
|
||
|
||
|
||
// Len returns the number of TextMarks in `ma`.
|
||
func (_ggb *TextMarkArray )Len ()int {if _ggb ==nil {return 0;};return len (_ggb ._gddg );};func (_dgc pathSection )bbox ()_eb .PdfRectangle {_bgbc :=_dgc ._gaac [0]._edg [0];_fbbb :=_eb .PdfRectangle {Llx :_bgbc .X ,Urx :_bgbc .X ,Lly :_bgbc .Y ,Ury :_bgbc .Y };
|
||
_dbcf :=func (_ddb _gaa .Point ){if _ddb .X < _fbbb .Llx {_fbbb .Llx =_ddb .X ;}else if _ddb .X > _fbbb .Urx {_fbbb .Urx =_ddb .X ;};if _ddb .Y < _fbbb .Lly {_fbbb .Lly =_ddb .Y ;}else if _ddb .Y > _fbbb .Ury {_fbbb .Ury =_ddb .Y ;};};for _ ,_agc :=range _dgc ._gaac [0]._edg [1:]{_dbcf (_agc );
|
||
};for _ ,_eccac :=range _dgc ._gaac [1:]{for _ ,_fbc :=range _eccac ._edg {_dbcf (_fbc );};};return _fbbb ;};func (_facd rulingList )secMinMax ()(float64 ,float64 ){_eeabf ,_abff :=_facd [0]._dbce ,_facd [0]._daafc ;for _ ,_ffcgb :=range _facd [1:]{if _ffcgb ._dbce < _eeabf {_eeabf =_ffcgb ._dbce ;
|
||
};if _ffcgb ._daafc > _abff {_abff =_ffcgb ._daafc ;};};return _eeabf ,_abff ;};func (_feccb lineRuling )yMean ()float64 {return 0.5*(_feccb ._cbdg .Y +_feccb ._cbfb .Y )};var _fa =false ;func (_deeb compositeCell )split (_dagcf ,_acfd []float64 )*textTable {_egbcb :=len (_dagcf )+1;
|
||
_ccceg :=len (_acfd )+1;if _cece {_bb .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066",_ccceg ,_egbcb ,_deeb ,_dagcf ,_acfd );
|
||
_bc .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a",len (_deeb .paraList ));for _fafbg ,_gfdc :=range _deeb .paraList {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fafbg ,_gfdc .String ());};
|
||
_bc .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",len (_deeb .lines ()));for _dafd ,_gagff :=range _deeb .lines (){_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dafd ,_gagff );};};_dagcf =_bdgde (_dagcf ,_deeb .Ury ,_deeb .Lly );
|
||
_acfd =_bdgde (_acfd ,_deeb .Llx ,_deeb .Urx );_deefg :=make (map[uint64 ]*textPara ,_ccceg *_egbcb );_cfeeb :=textTable {_gfgdf :_ccceg ,_ggca :_egbcb ,_ecac :_deefg };_abgb :=_deeb .paraList ;_b .Slice (_abgb ,func (_bgdc ,_fgcg int )bool {_eaab ,_adbcb :=_abgb [_bgdc ],_abgb [_fgcg ];
|
||
_aeaff ,_fgeg :=_eaab .Lly ,_adbcb .Lly ;if _aeaff !=_fgeg {return _aeaff < _fgeg ;};return _eaab .Llx < _adbcb .Llx ;});_bfbb :=make (map[uint64 ]_eb .PdfRectangle ,_ccceg *_egbcb );for _ecabf ,_dcfab :=range _dagcf [1:]{_fbdd :=_dagcf [_ecabf ];for _cfbef ,_geed :=range _acfd [1:]{_aeagc :=_acfd [_cfbef ];
|
||
_bfbb [_fcbc (_cfbef ,_ecabf )]=_eb .PdfRectangle {Llx :_aeagc ,Urx :_geed ,Lly :_dcfab ,Ury :_fbdd };};};if _cece {_bb .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073");
|
||
_bc .Printf ("\u0020\u0020\u0020\u0020");for _ecae :=0;_ecae < _ccceg ;_ecae ++{_bc .Printf ("\u0025\u0033\u0030\u0064\u002c\u0020",_ecae );};_bc .Println ();for _fccgf :=0;_fccgf < _egbcb ;_fccgf ++{_bc .Printf ("\u0020\u0020\u0025\u0032\u0064\u003a",_fccgf );
|
||
for _gcce :=0;_gcce < _ccceg ;_gcce ++{_bc .Printf ("\u00256\u002e\u0032\u0066\u002c\u0020",_bfbb [_fcbc (_gcce ,_fccgf )]);};_bc .Println ();};};_gfcf :=func (_cbcd *textLine )(int ,int ){for _ebgd :=0;_ebgd < _egbcb ;_ebgd ++{for _dddg :=0;_dddg < _ccceg ;
|
||
_dddg ++{if _feb (_bfbb [_fcbc (_dddg ,_ebgd )],_cbcd .PdfRectangle ){return _dddg ,_ebgd ;};};};return -1,-1;};_bgee :=make (map[uint64 ][]*textLine ,_ccceg *_egbcb );for _ ,_ceaa :=range _abgb .lines (){_efcc ,_fbfe :=_gfcf (_ceaa );if _efcc < 0{continue ;
|
||
};_bgee [_fcbc (_efcc ,_fbfe )]=append (_bgee [_fcbc (_efcc ,_fbfe )],_ceaa );};for _egbb :=0;_egbb < len (_dagcf )-1;_egbb ++{_ceec :=_dagcf [_egbb ];_abda :=_dagcf [_egbb +1];for _ggcgc :=0;_ggcgc < len (_acfd )-1;_ggcgc ++{_fcbe :=_acfd [_ggcgc ];_gccba :=_acfd [_ggcgc +1];
|
||
_ecfc :=_eb .PdfRectangle {Llx :_fcbe ,Urx :_gccba ,Lly :_abda ,Ury :_ceec };_dcdde :=_bgee [_fcbc (_ggcgc ,_egbb )];if len (_dcdde )==0{continue ;};_acac :=_fefc (_ecfc ,_dcdde );_cfeeb .put (_ggcgc ,_egbb ,_acac );};};return &_cfeeb ;};func _dbde (_fegdd ,_cfgg bounded )float64 {return _fegdd .bbox ().Llx -_cfgg .bbox ().Urx };
|
||
func (_aedg lineRuling )xMean ()float64 {return 0.5*(_aedg ._cbdg .X +_aedg ._cbfb .X )};
|
||
|
||
// Marks returns the TextMark collection for a page. It represents all the text on the page.
|
||
func (_egade PageText )Marks ()*TextMarkArray {return &TextMarkArray {_gddg :_egade ._ceb }};func (_bdbe *textObject )getFontDict (_ageg string )(_gdb _af .PdfObject ,_ccfg error ){_ddag :=_bdbe ._cdff ;if _ddag ==nil {_bb .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_ageg );
|
||
return nil ,nil ;};_gdb ,_acbd :=_ddag .GetFontByName (_af .PdfObjectName (_ageg ));if !_acbd {_bb .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_ageg );
|
||
return nil ,_f .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _gdb ,nil ;};func (_egd rulingList )connections (_fedg map[int ]intSet ,_gdea int )intSet {_fac :=make (intSet );_gaeeb :=make (intSet );
|
||
var _eafag func (int );_eafag =func (_cdgfa int ){if !_gaeeb .has (_cdgfa ){_gaeeb .add (_cdgfa );for _bafef :=range _egd {if _fedg [_bafef ].has (_cdgfa ){_fac .add (_bafef );};};for _ffea :=range _egd {if _fac .has (_ffea ){_eafag (_ffea );};};};};_eafag (_gdea );
|
||
return _fac ;};func (_gff *shapesState )newSubPath (){_gff .clearPath ();if _agbcg {_bb .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_gff );};};func (_ggff *textPara )isAtom ()*textTable {_effcc :=_ggff ;
|
||
_fgca :=_ggff ._cggf ;_bfaga :=_ggff ._cebed ;if !(_fgca !=nil &&!_fgca ._dfbc &&_bfaga !=nil &&!_bfaga ._dfbc ){return nil ;};_debba :=_fgca ._cebed ;if !(_debba !=nil &&!_debba ._dfbc &&_debba ==_bfaga ._cggf ){return nil ;};return _dgec (_effcc ,_fgca ,_bfaga ,_debba );
|
||
};func _efbdb (_bcfb map[int ][]float64 )string {_fdbfd :=_addcd (_bcfb );_ggcgd :=make ([]string ,len (_bcfb ));for _ceffb ,_cgfe :=range _fdbfd {_ggcgd [_ceffb ]=_bc .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_cgfe ,_bcfb [_cgfe ]);
|
||
};return _bc .Sprintf ("\u007b\u0025\u0073\u007d",_a .Join (_ggcgd ,"\u002c\u0020"));};func _cbad (_fdba *_bg .ContentStreamOperation )(float64 ,error ){if len (_fdba .Params )!=1{_gae :=_f .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");
|
||
_bb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_fdba .Operand ,1,len (_fdba .Params ),_fdba .Params );
|
||
return 0.0,_gae ;};return _af .GetNumberAsFloat (_fdba .Params [0]);};const (_ccfgc markKind =iota ;_dece ;_gddcg ;_dbagd ;);func (_acbb *shapesState )clearPath (){_acbb ._ege =nil ;_acbb ._gdef =false ;if _agbcg {_bb .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_acbb );
|
||
};};func (_cdbgb paraList )sortReadingOrder (){_bb .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_cdbgb ));
|
||
if len (_cdbgb )<=1{return ;};_cdbgb .computeEBBoxes ();_b .Slice (_cdbgb ,func (_bffe ,_degff int )bool {return _fgcc (_cdbgb [_bffe ],_cdbgb [_degff ])<=0});_egcd :=_cdbgb .topoOrder ();_cdbgb .reorder (_egcd );};func (_gebbb *textWord )bbox ()_eb .PdfRectangle {return _gebbb .PdfRectangle };
|
||
func (_gaafd rulingList )log (_fcea string ){if !_gfba {return ;};_bb .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_fcea ,_gaafd .String ());for _bdde ,_bdcfc :=range _gaafd {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bdde ,_bdcfc .String ());
|
||
};};type textTable struct{_eb .PdfRectangle ;_gfgdf ,_ggca int ;_ggac bool ;_ecac map[uint64 ]*textPara ;_bcdge map[uint64 ]compositeCell ;};func (_bdbf paraList )extractTables (_gecc []gridTiling )paraList {if _cece {_bb .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_bdbf ));
|
||
};if len (_bdbf )< _edee {return _bdbf ;};_gefc :=_bdbf .findTables (_gecc );if _cece {_bb .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_gefc ));
|
||
for _daadg ,_agabf :=range _gefc {_agabf .log (_bc .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_daadg ));};};return _bdbf .applyTables (_gefc );};func (_gcef gridTiling )complete ()bool {for _ ,_cada :=range _gcef ._bfbgd {for _ ,_ecdee :=range _cada {if !_ecdee .complete (){return false ;
|
||
};};};return true ;};func (_ffda paraList )toTextMarks ()[]TextMark {_cfcd :=0;var _fcfe []TextMark ;for _bgbe ,_aaac :=range _ffda {if _aaac ._dgcce {continue ;};_gdee :=_aaac .toTextMarks (&_cfcd );_fcfe =append (_fcfe ,_gdee ...);if _bgbe !=len (_ffda )-1{if _gcae (_aaac ,_ffda [_bgbe +1]){_fcfe =_deef (_fcfe ,&_cfcd ,"\u0020");
|
||
}else {_fcfe =_deef (_fcfe ,&_cfcd ,"\u000a");_fcfe =_deef (_fcfe ,&_cfcd ,"\u000a");};};};_fcfe =_deef (_fcfe ,&_cfcd ,"\u000a");_fcfe =_deef (_fcfe ,&_cfcd ,"\u000a");return _fcfe ;};func _aeef (_cbade _eb .PdfRectangle )textState {return textState {_gbg :100,_dab :RenderModeFill ,_fggd :_cbade };
|
||
};func (_gbe *wordBag )absorb (_dbag *wordBag ){_gabe :=_dbag .makeRemovals ();for _gcbe ,_cgba :=range _dbag ._eaba {for _ ,_gccg :=range _cgba {_gbe .pullWord (_gccg ,_gcbe ,_gabe );};};_dbag .applyRemovals (_gabe );};func (_begd paraList )log (_becfc string ){if !_cfff {return ;
|
||
};_bb .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_becfc ,len (_begd ));for _bfgg ,_afdg :=range _begd {if _afdg ==nil {continue ;
|
||
};_ebbe :=_afdg .text ();_ggfg :="\u0020\u0020";if _afdg ._dcfac !=nil {_ggfg =_bc .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_afdg ._dcfac ._gfgdf ,_afdg ._dcfac ._ggca );};_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_bfgg ,_afdg .PdfRectangle ,_ggfg ,_geff (_ebbe ,50));
|
||
};};
|
||
|
||
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
|
||
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
|
||
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
|
||
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
|
||
type RenderMode int ;func (_bdgd *wordBag )minDepth ()float64 {return _bdgd ._decb -(_bdgd .Ury -_bdgd ._cgad )};func _cbcfg (_aafbe _eb .PdfColorspace ,_fbfg _eb .PdfColor )_eac .Color {if _aafbe ==nil ||_fbfg ==nil {return _eac .Black ;};_adgcgc ,_dacbg :=_aafbe .ColorToRGB (_fbfg );
|
||
if _dacbg !=nil {_bb .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_fbfg ,_aafbe ,_dacbg );
|
||
return _eac .Black ;};_egdf ,_ecea :=_adgcgc .(*_eb .PdfColorDeviceRGB );if !_ecea {_bb .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_adgcgc );
|
||
return _eac .Black ;};return _eac .NRGBA {R :uint8 (_egdf .R ()*255),G :uint8 (_egdf .G ()*255),B :uint8 (_egdf .B ()*255),A :uint8 (255)};};func (_deed *shapesState )drawRectangle (_fab ,_beb ,_dbf ,_ecg float64 ){if _agbcg {_dge :=_deed .devicePoint (_fab ,_beb );
|
||
_gdad :=_deed .devicePoint (_fab +_dbf ,_beb +_ecg );_ffff :=_eb .PdfRectangle {Llx :_dge .X ,Lly :_dge .Y ,Urx :_gdad .X ,Ury :_gdad .Y };_bb .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_ffff );
|
||
};_deed .newSubPath ();_deed .moveTo (_fab ,_beb );_deed .lineTo (_fab +_dbf ,_beb );_deed .lineTo (_fab +_dbf ,_beb +_ecg );_deed .lineTo (_fab ,_beb +_ecg );_deed .closePath ();};
|
||
|
||
// NewFromContents creates a new extractor from contents and page resources.
|
||
func NewFromContents (contents string ,resources *_eb .PdfPageResources )(*Extractor ,error ){const _bcac ="\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s";_beg :=&Extractor {_ag :contents ,_bca :resources ,_ab :map[string ]fontEntry {},_ebd :map[string ]textResult {}};
|
||
_ebg .TrackUse (_bcac );return _beg ,nil ;};func _feb (_eegf ,_geaf _eb .PdfRectangle )bool {return _eegf .Llx <=_geaf .Llx &&_geaf .Urx <=_eegf .Urx &&_eegf .Lly <=_geaf .Lly &&_geaf .Ury <=_eegf .Ury ;};func (_bfef *shapesState )lineTo (_dgfd ,_fdfg float64 ){if _agbcg {_bb .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_dgfd ,_fdfg ,_bfef .devicePoint (_dgfd ,_fdfg ));
|
||
};_bfef .addPoint (_dgfd ,_fdfg );};
|
||
|
||
// TableCell is a cell in a TextTable.
|
||
type TableCell struct{
|
||
|
||
// Text is the extracted text.
|
||
Text string ;
|
||
|
||
// Marks returns the TextMarks corresponding to the text in Text.
|
||
Marks TextMarkArray ;};func _cbba (_cgbg []TextMark ,_eefbg *int ,_aga TextMark )[]TextMark {_aga .Offset =*_eefbg ;_cgbg =append (_cgbg ,_aga );*_eefbg +=len (_aga .Text );return _cgbg ;};func (_gbed *subpath )makeRectRuling (_bdaf _eac .Color )(*ruling ,bool ){if _fdbg {_bb .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_gbed );
|
||
};_eeba :=_gbed ._edg [:4];_daad :=make (map[int ]rulingKind ,len (_eeba ));for _gcba ,_acaca :=range _eeba {_gcaeg :=_gbed ._edg [(_gcba +1)%4];_daad [_gcba ]=_ecce (_acaca ,_gcaeg );if _fdbg {_bc .Printf ("\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066",_gcba ,_daad [_gcba ],_acaca ,_gcaeg );
|
||
};};if _fdbg {_bc .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_daad );};var _cade ,_fagef []int ;for _eddac ,_adcae :=range _daad {switch _adcae {case _afdgd :_fagef =append (_fagef ,_eddac );case _dbbb :_cade =append (_cade ,_eddac );
|
||
};};if _fdbg {_bc .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_fagef ),_fagef );_bc .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_cade ),_cade );
|
||
};_gdaef :=(len (_fagef )==2&&len (_cade )==2)||(len (_fagef )==2&&len (_cade )==0&&_eegg (_eeba [_fagef [0]],_eeba [_fagef [1]]))||(len (_cade )==2&&len (_fagef )==0&&_fgcgb (_eeba [_cade [0]],_eeba [_cade [1]]));if _fdbg {_bc .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_fagef ),len (_cade ),_gdaef );
|
||
};if !_gdaef {if _fdbg {_bb .Log .Error ("\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v",_gbed );_bc .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_fagef ),len (_cade ),_gdaef );
|
||
};return &ruling {},false ;};if len (_cade )==0{for _efaa ,_cdaf :=range _daad {if _cdaf !=_afdgd {_cade =append (_cade ,_efaa );};};};if len (_fagef )==0{for _ccfc ,_fcbgd :=range _daad {if _fcbgd !=_dbbb {_fagef =append (_fagef ,_ccfc );};};};if _fdbg {_bb .Log .Info ("\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_fagef ),len (_cade ),len (_eeba ),_fagef ,_cade ,_eeba );
|
||
};var _effe ,_gcadc ,_bafe ,_aaefa _gaa .Point ;if _eeba [_fagef [0]].Y > _eeba [_fagef [1]].Y {_bafe ,_aaefa =_eeba [_fagef [0]],_eeba [_fagef [1]];}else {_bafe ,_aaefa =_eeba [_fagef [1]],_eeba [_fagef [0]];};if _eeba [_cade [0]].X > _eeba [_cade [1]].X {_effe ,_gcadc =_eeba [_cade [0]],_eeba [_cade [1]];
|
||
}else {_effe ,_gcadc =_eeba [_cade [1]],_eeba [_cade [0]];};_ebcgc :=_eb .PdfRectangle {Llx :_effe .X ,Urx :_gcadc .X ,Lly :_aaefa .Y ,Ury :_bafe .Y };if _ebcgc .Llx > _ebcgc .Urx {_ebcgc .Llx ,_ebcgc .Urx =_ebcgc .Urx ,_ebcgc .Llx ;};if _ebcgc .Lly > _ebcgc .Ury {_ebcgc .Lly ,_ebcgc .Ury =_ebcgc .Ury ,_ebcgc .Lly ;
|
||
};_dga :=rectRuling {PdfRectangle :_ebcgc ,_bcfe :_eceb (_ebcgc ),Color :_bdaf };if _dga ._bcfe ==_dcga {if _fdbg {_bb .Log .Error ("\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c");
|
||
};return nil ,false ;};_fafa ,_gbbc :=_dga .asRuling ();if !_gbbc {if _fdbg {_bb .Log .Error ("\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg");};return nil ,false ;};if _gfba {_bc .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_fafa .String ());
|
||
};return _fafa ,true ;};
|
||
|
||
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
|
||
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
|
||
// `start` and `end` are offsets in the extracted text.
|
||
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
|
||
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
|
||
func (_ffee *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _ffee ==nil {return nil ,_f .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_bc .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );
|
||
};_cfa :=len (_ffee ._gddg );if _cfa ==0{return _ffee ,nil ;};if start < _ffee ._gddg [0].Offset {start =_ffee ._gddg [0].Offset ;};if end > _ffee ._gddg [_cfa -1].Offset +1{end =_ffee ._gddg [_cfa -1].Offset +1;};_afbdd :=_b .Search (_cfa ,func (_afdec int )bool {return _ffee ._gddg [_afdec ].Offset +len (_ffee ._gddg [_afdec ].Text )-1>=start });
|
||
if !(0<=_afbdd &&_afbdd < _cfa ){_ada :=_bc .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_afbdd ,_cfa ,_ffee ._gddg [0],_ffee ._gddg [_cfa -1]);
|
||
return nil ,_ada ;};_bafd :=_b .Search (_cfa ,func (_cccc int )bool {return _ffee ._gddg [_cccc ].Offset > end -1});if !(0<=_bafd &&_bafd < _cfa ){_gea :=_bc .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_bafd ,_cfa ,_ffee ._gddg [0],_ffee ._gddg [_cfa -1]);
|
||
return nil ,_gea ;};if _bafd <=_afbdd {return nil ,_bc .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_afbdd ,_bafd );
|
||
};return &TextMarkArray {_gddg :_ffee ._gddg [_afbdd :_bafd ]},nil ;};func (_dgfa *textObject )moveLP (_cafg ,_faff float64 ){_dgfa ._fbe .Concat (_gaa .NewMatrix (1,0,0,1,_cafg ,_faff ));_dgfa ._bbcf =_dgfa ._fbe ;};func (_cge *shapesState )moveTo (_acec ,_gdabe float64 ){_cge ._gdef =true ;
|
||
_cge ._ebgfc =_cge .devicePoint (_acec ,_gdabe );if _agbcg {_bb .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_acec ,_gdabe ,_cge ._ebgfc );
|
||
};};func (_ffdb *textTable )getComposite (_ggag ,_bffa int )(paraList ,_eb .PdfRectangle ){_gfcg ,_bgcd :=_ffdb ._bcdge [_fcbc (_ggag ,_bffa )];if _cece {_bc .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a",_ggag ,_bffa ,_gfcg .String ());
|
||
};if !_bgcd {return nil ,_eb .PdfRectangle {};};return _gfcg .parasBBox ();};func (_fada *textWord )absorb (_efaf *textWord ){_fada .PdfRectangle =_agege (_fada .PdfRectangle ,_efaf .PdfRectangle );_fada ._badb =append (_fada ._badb ,_efaf ._badb ...);
|
||
};func (_ggcga *wordBag )applyRemovals (_gfgg map[int ]map[*textWord ]struct{}){for _bdgg ,_bcf :=range _gfgg {if len (_bcf )==0{continue ;};_cgag :=_ggcga ._eaba [_bdgg ];_dbg :=len (_cgag )-len (_bcf );if _dbg ==0{delete (_ggcga ._eaba ,_bdgg );continue ;
|
||
};_fdd :=make ([]*textWord ,_dbg );_edgg :=0;for _ ,_bafdc :=range _cgag {if _ ,_eca :=_bcf [_bafdc ];!_eca {_fdd [_edgg ]=_bafdc ;_edgg ++;};};_ggcga ._eaba [_bdgg ]=_fdd ;};};func (_decd *textLine )markWordBoundaries (){_cfbb :=_aecgf *_decd ._dfefd ;
|
||
for _dcaf ,_dbegd :=range _decd ._ffd [1:]{if _dbde (_dbegd ,_decd ._ffd [_dcaf ])>=_cfbb {_dbegd ._dffcf =true ;};};};func _gcbaf (_bdca float64 )bool {return _be .Abs (_bdca )< _cdceg };type imageExtractContext struct{_abc []ImageMark ;_df int ;_gg int ;
|
||
_adg int ;_gaf map[*_af .PdfObjectStream ]*cachedImage ;_bgb *ImageExtractOptions ;};
|
||
|
||
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
|
||
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
|
||
// Replace with a function like Extract() (*PageText, error)
|
||
func (_ded *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_adcb ,_gfe ,_bbf ,_ffe :=_ded .extractPageText (_ded ._ag ,_ded ._bca ,_gaa .IdentityMatrix (),0);if _ffe !=nil {return nil ,0,0,_ffe ;};_adcb .computeViews ();_ffe =_aeeg (_adcb );
|
||
if _ffe !=nil {return nil ,0,0,_ffe ;};return _adcb ,_gfe ,_bbf ,nil ;};func (_fbcca paraList )readBefore (_dbab []int ,_dgcc ,_fbcd int )bool {_acdd ,_fdbf :=_fbcca [_dgcc ],_fbcca [_fbcd ];if _fbdgg (_acdd ,_fdbf )&&_acdd .Lly > _fdbf .Lly {return true ;
|
||
};if !(_acdd ._ccac .Urx < _fdbf ._ccac .Llx ){return false ;};_ffgef ,_fecdf :=_acdd .Lly ,_fdbf .Lly ;if _ffgef > _fecdf {_fecdf ,_ffgef =_ffgef ,_fecdf ;};_fbda :=_be .Max (_acdd ._ccac .Llx ,_fdbf ._ccac .Llx );_eceg :=_be .Min (_acdd ._ccac .Urx ,_fdbf ._ccac .Urx );
|
||
_fddc :=_fbcca .llyRange (_dbab ,_ffgef ,_fecdf );for _ ,_aade :=range _fddc {if _aade ==_dgcc ||_aade ==_fbcd {continue ;};_edfa :=_fbcca [_aade ];if _edfa ._ccac .Llx <=_eceg &&_fbda <=_edfa ._ccac .Urx {return false ;};};return true ;};func (_gccgg *wordBag )text ()string {_adfb :=_gccgg .allWords ();
|
||
_gebg :=make ([]string ,len (_adfb ));for _dbdgf ,_baed :=range _adfb {_gebg [_dbdgf ]=_baed ._caee ;};return _a .Join (_gebg ,"\u0020");};func (_degb *textObject )setWordSpacing (_babg float64 ){if _degb ==nil {return ;};_degb ._add ._fege =_babg ;};
|
||
|
||
// String returns a description of `w`.
|
||
func (_daae *textWord )String ()string {return _bc .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_daae ._efaba ,_daae .PdfRectangle ,_daae ._aggf ,_daae ._caee );
|
||
};func (_agag rectRuling )asRuling ()(*ruling ,bool ){_fdg :=ruling {_bdfg :_agag ._bcfe ,Color :_agag .Color ,_eabd :_gddcg };switch _agag ._bcfe {case _dbbb :_fdg ._cda =0.5*(_agag .Llx +_agag .Urx );_fdg ._dbce =_agag .Lly ;_fdg ._daafc =_agag .Ury ;
|
||
_dagb ,_fgaag :=_agag .checkWidth (_agag .Llx ,_agag .Urx );if !_fgaag {if _fdbg {_bb .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_agag );
|
||
};return nil ,false ;};_fdg ._gddcc =_dagb ;case _afdgd :_fdg ._cda =0.5*(_agag .Lly +_agag .Ury );_fdg ._dbce =_agag .Llx ;_fdg ._daafc =_agag .Urx ;_fead ,_ffefb :=_agag .checkWidth (_agag .Lly ,_agag .Ury );if !_ffefb {if _fdbg {_bb .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_agag );
|
||
};return nil ,false ;};_fdg ._gddcc =_fead ;default:_bb .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_agag ._bcfe );return nil ,false ;};return &_fdg ,true ;};func (_aeac paraList )llyOrdering ()[]int {_gebc :=make ([]int ,len (_aeac ));
|
||
for _aedf :=range _aeac {_gebc [_aedf ]=_aedf ;};_b .SliceStable (_gebc ,func (_faffc ,_adeb int )bool {_cefa ,_feec :=_gebc [_faffc ],_gebc [_adeb ];return _aeac [_cefa ].Lly < _aeac [_feec ].Lly ;});return _gebc ;};type ruling struct{_bdfg rulingKind ;
|
||
_eabd markKind ;_eac .Color ;_cda float64 ;_dbce float64 ;_daafc float64 ;_gddcc float64 ;};
|
||
|
||
// TextTable represents a table.
|
||
// Cells are ordered top-to-bottom, left-to-right.
|
||
// Cells[y] is the (0-offset) y'th row in the table.
|
||
// Cells[y][x] is the (0-offset) x'th column in the table.
|
||
type TextTable struct{W ,H int ;Cells [][]TableCell ;};func _degf (_aegf ,_bdc _eb .PdfRectangle )bool {return _bdc .Llx <=_aegf .Urx &&_aegf .Llx <=_bdc .Urx };func (_acfcg rulingList )merge ()*ruling {_decf :=_acfcg [0]._cda ;_cbaae :=_acfcg [0]._dbce ;
|
||
_egcdd :=_acfcg [0]._daafc ;for _ ,_cgbge :=range _acfcg [1:]{_decf +=_cgbge ._cda ;if _cgbge ._dbce < _cbaae {_cbaae =_cgbge ._dbce ;};if _cgbge ._daafc > _egcdd {_egcdd =_cgbge ._daafc ;};};_cafc :=&ruling {_bdfg :_acfcg [0]._bdfg ,_eabd :_acfcg [0]._eabd ,Color :_acfcg [0].Color ,_cda :_decf /float64 (len (_acfcg )),_dbce :_cbaae ,_daafc :_egcdd };
|
||
if _gcbb {_bb .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_acfcg ),_cafc );for _edeed ,_gbcd :=range _acfcg {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_edeed ,_gbcd );
|
||
};};return _cafc ;};func (_adfd *wordBag )sort (){for _ ,_cdda :=range _adfd ._eaba {_b .Slice (_cdda ,func (_bedc ,_deag int )bool {return _ddd (_cdda [_bedc ],_cdda [_deag ])< 0});};};func (_gedf *subpath )isQuadrilateral ()bool {if len (_gedf ._edg )< 4||len (_gedf ._edg )> 5{return false ;
|
||
};if len (_gedf ._edg )==5{_aacbe :=_gedf ._edg [0];_cbcg :=_gedf ._edg [4];if _aacbe .X !=_cbcg .X ||_aacbe .Y !=_cbcg .Y {return false ;};};return true ;};func (_abfd *Extractor )extractPageText (_ef string ,_eagc *_eb .PdfPageResources ,_dd _gaa .Matrix ,_ffb int )(*PageText ,int ,int ,error ){_bb .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_ffb );
|
||
_cfc :=&PageText {_aefg :_abfd ._fc };_gdd :=_aeef (_abfd ._fc );var _cca stateStack ;_efb :=_bfb (_abfd ,_eagc ,_bg .GraphicsState {},&_gdd ,&_cca );_ac :=shapesState {_cbgb :_dd ,_agbc :_gaa .IdentityMatrix (),_babd :_efb };var _afde bool ;if _ffb > _eaf {_agf :=_f .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");
|
||
_bb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_ffb ,_agf );
|
||
return _cfc ,_gdd ._cdbe ,_gdd ._gac ,_agf ;};_bab :=_bg .NewContentStreamParser (_ef );_fafb ,_bcda :=_bab .Parse ();if _bcda !=nil {_bb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bcda );
|
||
return _cfc ,_gdd ._cdbe ,_gdd ._gac ,_bcda ;};_deda :=_bg .NewContentStreamProcessor (*_fafb );_deda .AddHandler (_bg .HandlerConditionEnumAllOperands ,"",func (_fda *_bg .ContentStreamOperation ,_acb _bg .GraphicsState ,_cbe *_eb .PdfPageResources )error {_cce :=_fda .Operand ;
|
||
if _aafa {_bb .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_fda );};switch _cce {case "\u0071":if _agbcg {_bb .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_ac ._agbc );};_cca .push (&_gdd );case "\u0051":if !_cca .empty (){_gdd =*_cca .pop ();
|
||
};_ac ._agbc =_acb .CTM ;if _agbcg {_bb .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_ac ._agbc );};case "\u0042\u0054":if _afde {_bb .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
|
||
_cfc ._ecca =append (_cfc ._ecca ,_efb ._fgc ...);};_afde =true ;_cbgf :=_acb ;_cbgf .CTM =_dd .Mult (_cbgf .CTM );_efb =_bfb (_abfd ,_cbe ,_cbgf ,&_gdd ,&_cca );_ac ._babd =_efb ;case "\u0045\u0054":if !_afde {_bb .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
|
||
};_afde =false ;_cfc ._ecca =append (_cfc ._ecca ,_efb ._fgc ...);_efb .reset ();case "\u0054\u002a":_efb .nextLine ();case "\u0054\u0064":if _afe ,_gab :=_efb .checkOp (_fda ,2,true );!_afe {_bb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gab );
|
||
return _gab ;};_gag ,_ggc ,_cfe :=_faca (_fda .Params );if _cfe !=nil {return _cfe ;};_efb .moveText (_gag ,_ggc );case "\u0054\u0044":if _gad ,_fff :=_efb .checkOp (_fda ,2,true );!_gad {_bb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fff );
|
||
return _fff ;};_edf ,_egg ,_fbb :=_faca (_fda .Params );if _fbb !=nil {_bb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fbb );return _fbb ;};_efb .moveTextSetLeading (_edf ,_egg );case "\u0054\u006a":if _faa ,_ccfa :=_efb .checkOp (_fda ,1,true );
|
||
!_faa {_bb .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_fda ,_ccfa );return _ccfa ;};_efa ,_deg :=_af .GetStringBytes (_fda .Params [0]);if !_deg {_bb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_fda );
|
||
return _af .ErrTypeError ;};return _efb .showText (_efa );case "\u0054\u004a":if _bcacf ,_cdf :=_efb .checkOp (_fda ,1,true );!_bcacf {_bb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cdf );return _cdf ;
|
||
};_cfd ,_bfc :=_af .GetArray (_fda .Params [0]);if !_bfc {_bb .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_fda );
|
||
return _bcda ;};return _efb .showTextAdjusted (_cfd );case "\u0027":if _dfe ,_gfb :=_efb .checkOp (_fda ,1,true );!_dfe {_bb .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gfb );return _gfb ;};_ebgf ,_fcg :=_af .GetStringBytes (_fda .Params [0]);
|
||
if !_fcg {_bb .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_fda );return _af .ErrTypeError ;};_efb .nextLine ();return _efb .showText (_ebgf );
|
||
case "\u0022":if _dcbd ,_adbb :=_efb .checkOp (_fda ,3,true );!_dcbd {_bb .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_adbb );return _adbb ;};_bec ,_aaa ,_aae :=_faca (_fda .Params [:2]);if _aae !=nil {return _aae ;
|
||
};_baa ,_cff :=_af .GetStringBytes (_fda .Params [2]);if !_cff {_bb .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_fda );
|
||
return _af .ErrTypeError ;};_efb .setCharSpacing (_bec );_efb .setWordSpacing (_aaa );_efb .nextLine ();return _efb .showText (_baa );case "\u0054\u004c":_cfdb ,_dega :=_cbad (_fda );if _dega !=nil {_bb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dega );
|
||
return _dega ;};_efb .setTextLeading (_cfdb );case "\u0054\u0063":_dgf ,_acd :=_cbad (_fda );if _acd !=nil {_bb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_acd );return _acd ;};_efb .setCharSpacing (_dgf );
|
||
case "\u0054\u0066":if _ccff ,_ccffb :=_efb .checkOp (_fda ,2,true );!_ccff {_bb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ccffb );return _ccffb ;};_fea ,_gadb :=_af .GetNameVal (_fda .Params [0]);
|
||
if !_gadb {_bb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_fda );return _af .ErrTypeError ;};_agb ,_adcbc :=_af .GetNumberAsFloat (_fda .Params [1]);
|
||
if !_gadb {_bb .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fda ,_adcbc );
|
||
return _adcbc ;};_adcbc =_efb .setFont (_fea ,_agb );_efb ._eae =_fd .Is (_adcbc ,_af .ErrNotSupported );if _adcbc !=nil &&!_efb ._eae {return _adcbc ;};case "\u0054\u006d":if _cfdf ,_aea :=_efb .checkOp (_fda ,6,true );!_cfdf {_bb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aea );
|
||
return _aea ;};_deb ,_cccg :=_af .GetNumbersAsFloat (_fda .Params );if _cccg !=nil {_bb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cccg );return _cccg ;};_efb .setTextMatrix (_deb );case "\u0054\u0072":if _gee ,_ggg :=_efb .checkOp (_fda ,1,true );
|
||
!_gee {_bb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ggg );return _ggg ;};_aba ,_dbd :=_af .GetIntVal (_fda .Params [0]);if !_dbd {_bb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_fda );
|
||
return _af .ErrTypeError ;};_efb .setTextRenderMode (_aba );case "\u0054\u0073":if _abaf ,_agde :=_efb .checkOp (_fda ,1,true );!_abaf {_bb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_agde );return _agde ;
|
||
};_cbeg ,_daa :=_af .GetNumberAsFloat (_fda .Params [0]);if _daa !=nil {_bb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_daa );return _daa ;};_efb .setTextRise (_cbeg );case "\u0054\u0077":if _cffe ,_ggga :=_efb .checkOp (_fda ,1,true );
|
||
!_cffe {_bb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ggga );return _ggga ;};_aab ,_gga :=_af .GetNumberAsFloat (_fda .Params [0]);if _gga !=nil {_bb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gga );
|
||
return _gga ;};_efb .setWordSpacing (_aab );case "\u0054\u007a":if _eba ,_bbgc :=_efb .checkOp (_fda ,1,true );!_eba {_bb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bbgc );return _bbgc ;};_fg ,_ebe :=_af .GetNumberAsFloat (_fda .Params [0]);
|
||
if _ebe !=nil {_bb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ebe );return _ebe ;};_efb .setHorizScaling (_fg );case "\u0063\u006d":_ac ._agbc =_acb .CTM ;if _ac ._agbc .Singular (){_dfg :=_gaa .IdentityMatrix ().Translate (_ac ._agbc .Translation ());
|
||
_bb .Log .Debug ("S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s",_ac ._agbc ,_dfg );_ac ._agbc =_dfg ;};if _agbcg {_bb .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_ac ._agbc );};case "\u006d":if len (_fda .Params )!=2{_bb .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_gb );
|
||
return nil ;};_fgg ,_fec :=_af .GetNumbersAsFloat (_fda .Params );if _fec !=nil {return _fec ;};_bb .Log .Debug ("\u004d\u006f\u0076\u0065\u0020\u0074\u006f\u003a\u0020\u0025\u002e\u0032\u0066",_fgg );_ac .moveTo (_fgg [0],_fgg [1]);case "\u006c":if len (_fda .Params )!=2{_bb .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_gb );
|
||
return nil ;};_dbc ,_dgfe :=_af .GetNumbersAsFloat (_fda .Params );if _dgfe !=nil {return _dgfe ;};_ac .lineTo (_dbc [0],_dbc [1]);case "\u0063":if len (_fda .Params )!=6{return _gb ;};_ggd ,_bff :=_af .GetNumbersAsFloat (_fda .Params );if _bff !=nil {return _bff ;
|
||
};_bb .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_ggd );_ac .cubicTo (_ggd [0],_ggd [1],_ggd [2],_ggd [3],_ggd [4],_ggd [5]);case "\u0076","\u0079":if len (_fda .Params )!=4{return _gb ;
|
||
};_dbe ,_dda :=_af .GetNumbersAsFloat (_fda .Params );if _dda !=nil {return _dda ;};_bb .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_dbe );_ac .quadraticTo (_dbe [0],_dbe [1],_dbe [2],_dbe [3]);
|
||
case "\u0068":_ac .closePath ();case "\u0072\u0065":if len (_fda .Params )!=4{return _gb ;};_ecf ,_ccaf :=_af .GetNumbersAsFloat (_fda .Params );if _ccaf !=nil {return _ccaf ;};_ac .drawRectangle (_ecf [0],_ecf [1],_ecf [2],_ecf [3]);_ac .closePath ();
|
||
case "\u0053":_ac .stroke (&_cfc ._cfec );_ac .clearPath ();case "\u0073":_ac .closePath ();_ac .stroke (&_cfc ._cfec );_ac .clearPath ();case "\u0046":_ac .fill (&_cfc ._gde );_ac .clearPath ();case "\u0066","\u0066\u002a":_ac .closePath ();_ac .fill (&_cfc ._gde );
|
||
_ac .clearPath ();case "\u0042","\u0042\u002a":_ac .fill (&_cfc ._gde );_ac .stroke (&_cfc ._cfec );_ac .clearPath ();case "\u0062","\u0062\u002a":_ac .closePath ();_ac .fill (&_cfc ._gde );_ac .stroke (&_cfc ._cfec );_ac .clearPath ();case "\u006e":_ac .clearPath ();
|
||
case "\u0044\u006f":if len (_fda .Params )==0{_bb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_fda .Params );
|
||
return _af .ErrRangeError ;};_ega ,_geb :=_af .GetName (_fda .Params [0]);if !_geb {_bb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_fda .Params [0]);
|
||
return _af .ErrTypeError ;};_ ,_afeb :=_cbe .GetXObjectByName (*_ega );if _afeb !=_eb .XObjectTypeForm {break ;};_cfda ,_geb :=_abfd ._ebd [_ega .String ()];if !_geb {_afbf ,_debf :=_cbe .GetXObjectFormByName (*_ega );if _debf !=nil {_bb .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_debf );
|
||
return _debf ;};_fae ,_debf :=_afbf .GetContentStream ();if _debf !=nil {_bb .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_debf );return _debf ;};_gbc :=_afbf .Resources ;if _gbc ==nil {_gbc =_cbe ;};_afa ,_egad ,_feg ,_debf :=_abfd .extractPageText (string (_fae ),_gbc ,_dd .Mult (_acb .CTM ),_ffb +1);
|
||
if _debf !=nil {_bb .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_debf );return _debf ;};_cfda =textResult {*_afa ,_egad ,_feg };_abfd ._ebd [_ega .String ()]=_cfda ;};_ac ._agbc =_acb .CTM ;if _agbcg {_bb .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_ac ._agbc );
|
||
};_cfc ._ecca =append (_cfc ._ecca ,_cfda ._gbb ._ecca ...);_cfc ._cfec =append (_cfc ._cfec ,_cfda ._gbb ._cfec ...);_cfc ._gde =append (_cfc ._gde ,_cfda ._gbb ._gde ...);_gdd ._cdbe +=_cfda ._fcgc ;_gdd ._gac +=_cfda ._eagcc ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_efb ._gfa .ColorspaceNonStroking =_acb .ColorspaceNonStroking ;
|
||
_efb ._gfa .ColorNonStroking =_acb .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_efb ._gfa .ColorspaceStroking =_acb .ColorspaceStroking ;_efb ._gfa .ColorStroking =_acb .ColorStroking ;};return nil ;
|
||
});_bcda =_deda .Process (_eagc );return _cfc ,_gdd ._cdbe ,_gdd ._gac ,_bcda ;};
|
||
|
||
// Extractor stores and offers functionality for extracting content from PDF pages.
|
||
type Extractor struct{_ag string ;_bca *_eb .PdfPageResources ;_fc _eb .PdfRectangle ;_ab map[string ]fontEntry ;_ebd map[string ]textResult ;_d int64 ;_ed int ;};func (_efdg *shapesState )addPoint (_bed ,_dec float64 ){_caca :=_efdg .establishSubpath ();
|
||
_bdgc :=_efdg .devicePoint (_bed ,_dec );if _caca ==nil {_efdg ._gdef =true ;_efdg ._ebgfc =_bdgc ;}else {_caca .add (_bdgc );};};func _addc (_dbdf func (*wordBag ,*textWord ,float64 )bool ,_bdd float64 )func (*wordBag ,*textWord )bool {return func (_bbac *wordBag ,_beaa *textWord )bool {return _dbdf (_bbac ,_beaa ,_bdd )};
|
||
};
|
||
|
||
// TextMark represents extracted text on a page with information regarding both textual content,
|
||
// formatting (font and size) and positioning.
|
||
// It is the smallest unit of text on a PDF page, typically a single character.
|
||
//
|
||
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
|
||
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
|
||
// `bbox` of substring `term` in `text`.
|
||
//
|
||
// ex, _ := New(page)
|
||
// // handle errors
|
||
// pageText, _, _, err := ex.ExtractPageText()
|
||
// // handle errors
|
||
// text := pageText.Text()
|
||
// textMarks := pageText.Marks()
|
||
//
|
||
// start := strings.Index(text, term)
|
||
// end := start + len(term)
|
||
// spanMarks, err := textMarks.RangeOffset(start, end)
|
||
// // handle errors
|
||
// bbox, ok := spanMarks.BBox()
|
||
// // handle errors
|
||
type TextMark struct{
|
||
|
||
// Text is the extracted text.
|
||
Text string ;
|
||
|
||
// Original is the text in the PDF. It has not been decoded like `Text`.
|
||
Original string ;
|
||
|
||
// BBox is the bounding box of the text.
|
||
BBox _eb .PdfRectangle ;
|
||
|
||
// Font is the font the text was drawn with.
|
||
Font *_eb .PdfFont ;
|
||
|
||
// FontSize is the font size the text was drawn with.
|
||
FontSize float64 ;
|
||
|
||
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
|
||
// text, textMarks := pageText.Text(), pageText.Marks()
|
||
// marks := textMarks.Elements()
|
||
// then marks[i].Offset is the offset of marks[i].Text in text.
|
||
Offset int ;
|
||
|
||
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
|
||
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
|
||
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
|
||
Meta bool ;
|
||
|
||
// FillColor is the fill color of the text.
|
||
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
|
||
FillColor _eac .Color ;
|
||
|
||
// StrokeColor is the stroke color of the text.
|
||
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
|
||
StrokeColor _eac .Color ;
|
||
|
||
// Orientation is the text orientation
|
||
Orientation int ;};func (_ccebbf *textTable )compositeColCorridors ()map[int ][]float64 {_edge :=make (map[int ][]float64 ,_ccebbf ._gfgdf );if _cece {_bb .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_ccebbf ._gfgdf );
|
||
};for _aeae :=0;_aeae < _ccebbf ._gfgdf ;_aeae ++{_edge [_aeae ]=nil ;};return _edge ;};func (_cfef *textObject )setCharSpacing (_bcb float64 ){if _cfef ==nil {return ;};_cfef ._add ._cdbb =_bcb ;if _dbfg {_bb .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_bcb ,_cfef ._add .String ());
|
||
};};
|
||
|
||
// ApplyArea processes the page text only within the specified area `bbox`.
|
||
// Each time ApplyArea is called, it updates the result set in `pt`.
|
||
// Can be called multiple times in a row with different bounding boxes.
|
||
func (_faag *PageText )ApplyArea (bbox _eb .PdfRectangle ){_aeb :=make ([]*textMark ,0,len (_faag ._ecca ));for _ ,_adfa :=range _faag ._ecca {if _gcaa (_adfa .bbox (),bbox ){_aeb =append (_aeb ,_adfa );};};var _bdf paraList ;_dagcd :=len (_aeb );for _fcgd :=0;
|
||
_fcgd < 360&&_dagcd > 0;_fcgd +=90{_egc :=make ([]*textMark ,0,len (_aeb )-_dagcd );for _ ,_eccf :=range _aeb {if _eccf ._fdag ==_fcgd {_egc =append (_egc ,_eccf );};};if len (_egc )> 0{_fcgca :=_gabc (_egc ,_faag ._aefg ,nil ,nil );_bdf =append (_bdf ,_fcgca ...);
|
||
_dagcd -=len (_egc );};};_fcad :=new (_ga .Buffer );_bdf .writeText (_fcad );_faag ._caa =_fcad .String ();_faag ._ceb =_bdf .toTextMarks ();_faag ._cfbe =_bdf .tables ();};type pathSection struct{_gaac []*subpath ;_eac .Color ;};func (_gcga gridTiling )log (_fdcd string ){if !_ecbd {return ;
|
||
};_bb .Log .Info ("\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071",len (_gcga ._abdf ),len (_gcga ._bgcf ),_fdcd );_bc .Printf ("\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a",_gcga ._abdf );
|
||
_bc .Printf ("\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a",_gcga ._bgcf );for _gcac ,_dbee :=range _gcga ._bgcf {_aedd ,_bbfdg :=_gcga ._bfbgd [_dbee ];if !_bbfdg {continue ;};_bc .Printf ("%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_gcac ,_dbee );
|
||
for _gcgad ,_ebae :=range _gcga ._abdf {_cgbb ,_ecfb :=_aedd [_ebae ];if !_ecfb {continue ;};_bc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_gcgad ,_cgbb .String ());};};};func (_beec *textTable )getRight ()paraList {_fgab :=make (paraList ,_beec ._ggca );
|
||
for _ceffd :=0;_ceffd < _beec ._ggca ;_ceffd ++{_bgeeb :=_beec .get (_beec ._gfgdf -1,_ceffd )._cggf ;if _bgeeb ==nil ||_bgeeb ._dfbc {return nil ;};_fgab [_ceffd ]=_bgeeb ;};for _efeaa :=0;_efeaa < _beec ._ggca -1;_efeaa ++{if _fgab [_efeaa ]._cebed !=_fgab [_efeaa +1]{return nil ;
|
||
};};return _fgab ;};var _ecgc =map[markKind ]string {_dece :"\u0073\u0074\u0072\u006f\u006b\u0065",_gddcg :"\u0066\u0069\u006c\u006c",_dbagd :"\u0061u\u0067\u006d\u0065\u006e\u0074"};func _agege (_abbfa ,_afdd _eb .PdfRectangle )_eb .PdfRectangle {return _eb .PdfRectangle {Llx :_be .Min (_abbfa .Llx ,_afdd .Llx ),Lly :_be .Min (_abbfa .Lly ,_afdd .Lly ),Urx :_be .Max (_abbfa .Urx ,_afdd .Urx ),Ury :_be .Max (_abbfa .Ury ,_afdd .Ury )};
|
||
};
|
||
|
||
// Tables returns the tables extracted from the page.
|
||
func (_ecdc PageText )Tables ()[]TextTable {if _cece {_bb .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_ecdc ._cfbe ));};return _ecdc ._cfbe ;};func _fcbeg (_cddag []*textWord ,_dffbd int )[]*textWord {_dfgge :=len (_cddag );
|
||
copy (_cddag [_dffbd :],_cddag [_dffbd +1:]);return _cddag [:_dfgge -1];};type textObject struct{_gcb *Extractor ;_cdff *_eb .PdfPageResources ;_gfa _bg .GraphicsState ;_add *textState ;_gbce *stateStack ;_bbcf _gaa .Matrix ;_fbe _gaa .Matrix ;_fgc []*textMark ;
|
||
_eae bool ;};func _efff (_bcbba float64 ,_cfccg int )int {if _cfccg ==0{_cfccg =1;};_gdf :=float64 (_cfccg );return int (_be .Round (_bcbba /_gdf )*_gdf );};func _fbdgg (_bbcbg ,_cbc *textPara )bool {return _degf (_bbcbg ._ccac ,_cbc ._ccac )};func _dcac (_ccce ,_dcccf bounded )float64 {return _cdce (_ccce )-_cdce (_dcccf )};
|
||
func _aabf (_cffca map[float64 ]map[float64 ]gridTile )[]float64 {_efcaa :=make ([]float64 ,0,len (_cffca ));_bfdgd :=make (map[float64 ]struct{},len (_cffca ));for _ ,_gbab :=range _cffca {for _fcga :=range _gbab {if _ ,_agdcg :=_bfdgd [_fcga ];_agdcg {continue ;
|
||
};_efcaa =append (_efcaa ,_fcga );_bfdgd [_fcga ]=struct{}{};};};_b .Float64s (_efcaa );return _efcaa ;};func _gdbb (_fcee map[float64 ]gridTile )[]float64 {_efda :=make ([]float64 ,0,len (_fcee ));for _aafbf :=range _fcee {_efda =append (_efda ,_aafbf );
|
||
};_b .Float64s (_efda );return _efda ;};type wordBag struct{_eb .PdfRectangle ;_cgad float64 ;_baaf ,_eef rulingList ;_decb float64 ;_eaba map[int ][]*textWord ;};func (_acg *wordBag )firstWord (_ebed int )*textWord {return _acg ._eaba [_ebed ][0]};func (_cccca *ruling )intersects (_ffca *ruling )bool {_accc :=(_cccca ._bdfg ==_dbbb &&_ffca ._bdfg ==_afdgd )||(_ffca ._bdfg ==_dbbb &&_cccca ._bdfg ==_afdgd );
|
||
_ddbb :=func (_aedb ,_bfbg *ruling )bool {return _aedb ._dbce -_gffe <=_bfbg ._cda &&_bfbg ._cda <=_aedb ._daafc +_gffe ;};_bdec :=_ddbb (_cccca ,_ffca );_efaag :=_ddbb (_ffca ,_cccca );if _gfba {_bc .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_accc ,_bdec ,_efaag ,_accc &&_bdec &&_efaag ,_cccca ,_ffca );
|
||
};return _accc &&_bdec &&_efaag ;};func _eegg (_defg ,_ggee _gaa .Point )bool {_geag :=_be .Abs (_defg .X -_ggee .X );_geeb :=_be .Abs (_defg .Y -_ggee .Y );return _fgae (_geeb ,_geag );};func _bgcg (_acdf map[float64 ]map[float64 ]gridTile )[]float64 {_eddd :=make ([]float64 ,0,len (_acdf ));
|
||
for _afgc :=range _acdf {_eddd =append (_eddd ,_afgc );};_b .Float64s (_eddd );_bbgf :=len (_eddd );for _fecb :=0;_fecb < _bbgf /2;_fecb ++{_eddd [_fecb ],_eddd [_bbgf -1-_fecb ]=_eddd [_bbgf -1-_fecb ],_eddd [_fecb ];};return _eddd ;};func _fgae (_dged ,_dcdc float64 )bool {return _dged /_be .Max (_gggc ,_dcdc )< _bafg };
|
||
func (_afgd *ruling )alignsSec (_gfgd *ruling )bool {const _befb =_cdceg +1.0;return _afgd ._dbce -_befb <=_gfgd ._daafc &&_gfgd ._dbce -_befb <=_afgd ._daafc ;};func _cfbc (_bcfg map[int ]intSet )[]int {_ccfgb :=make ([]int ,0,len (_bcfg ));for _agbd :=range _bcfg {_ccfgb =append (_ccfgb ,_agbd );
|
||
};_b .Ints (_ccfgb );return _ccfgb ;};var _dfbegc =map[rulingKind ]string {_dcga :"\u006e\u006f\u006e\u0065",_afdgd :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_dbbb :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};func _ccba (_ffag _eb .PdfRectangle )*ruling {return &ruling {_bdfg :_dbbb ,_cda :_ffag .Llx ,_dbce :_ffag .Lly ,_daafc :_ffag .Ury };
|
||
};func _aaaa (_fdfb ,_ggfd bounded )float64 {_dceb :=_ddd (_fdfb ,_ggfd );if !_dede (_dceb ){return _dceb ;};return _dcac (_fdfb ,_ggfd );};func (_fbfb *wordBag )arrangeText ()*textPara {_fbfb .sort ();if _agec {_fbfb .removeDuplicates ();};var _gfdd []*textLine ;
|
||
for _ ,_bgd :=range _fbfb .depthIndexes (){for !_fbfb .empty (_bgd ){_dcec :=_fbfb .firstReadingIndex (_bgd );_accg :=_fbfb .firstWord (_dcec );_gbaa :=_afga (_fbfb ,_dcec );_dfaa :=_accg ._aggf ;_cbcf :=_accg ._efaba -_gcgb *_dfaa ;_gafc :=_accg ._efaba +_gcgb *_dfaa ;
|
||
_ecff :=_ebaf *_dfaa ;_cgbe :=_gcge *_dfaa ;_adbc :for {var _baeb *textWord ;_cfeg :=0;for _ ,_beda :=range _fbfb .depthBand (_cbcf ,_gafc ){_cbff :=_fbfb .highestWord (_beda ,_cbcf ,_gafc );if _cbff ==nil {continue ;};_bfba :=_dbde (_cbff ,_gbaa ._ffd [len (_gbaa ._ffd )-1]);
|
||
if _bfba < -_cgbe {break _adbc ;};if _bfba > _ecff {continue ;};if _baeb !=nil &&_ddd (_cbff ,_baeb )>=0{continue ;};_baeb =_cbff ;_cfeg =_beda ;};if _baeb ==nil {break ;};_gbaa .pullWord (_fbfb ,_baeb ,_cfeg );};_gbaa .markWordBoundaries ();_gfdd =append (_gfdd ,_gbaa );
|
||
};};if len (_gfdd )==0{return nil ;};_b .Slice (_gfdd ,func (_fcdg ,_egcf int )bool {return _fgcc (_gfdd [_fcdg ],_gfdd [_egcf ])< 0});_adaf :=_fefc (_fbfb .PdfRectangle ,_gfdd );if _caafc {_bb .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_adaf .String ());
|
||
if _bbaf {for _gge ,_dcbae :=range _adaf ._ccaa {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gge ,_dcbae .String ());if _daec {for _eabc ,_cgeg :=range _dcbae ._ffd {_bc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_eabc ,_cgeg .String ());
|
||
for _gdcfd ,_dacd :=range _cgeg ._badb {_bc .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_gdcfd ,_dacd .String ());};};};};};};return _adaf ;};func (_ccdb *textTable )log (_aefcg string ){if !_cece {return ;};_bb .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_aefcg ,_ccdb ._gfgdf ,_ccdb ._ggca ,_ccdb ._ggac ,_ccdb .PdfRectangle );
|
||
for _cefab :=0;_cefab < _ccdb ._ggca ;_cefab ++{for _ecfeg :=0;_ecfeg < _ccdb ._gfgdf ;_ecfeg ++{_cgfgb :=_ccdb .get (_ecfeg ,_cefab );if _cgfgb ==nil {continue ;};_bc .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_ecfeg ,_cefab ,_cgfgb .PdfRectangle ,_geff (_cgfgb .text (),50),_gf .RuneCountInString (_cgfgb .text ()));
|
||
};};};func _dfad (_gfee []compositeCell )[]float64 {var _dbda []*textLine ;_dagcce :=0;for _ ,_fcfdc :=range _gfee {_dagcce +=len (_fcfdc .paraList );_dbda =append (_dbda ,_fcfdc .lines ()...);};_b .Slice (_dbda ,func (_gfgee ,_eefd int )bool {_dgcb ,_aadfa :=_dbda [_gfgee ],_dbda [_eefd ];
|
||
_ecgd ,_beddd :=_dgcb ._eggg ,_aadfa ._eggg ;if !_dede (_ecgd -_beddd ){return _ecgd < _beddd ;};return _dgcb .Llx < _aadfa .Llx ;});if _cece {_bc .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_dagcce ,len (_dbda ));
|
||
for _bbaa ,_bbccg :=range _dbda {_bc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_bbaa ,_bbccg );};};var _fedc []float64 ;_ffggc :=_dbda [0];var _gdebe [][]*textLine ;_gcdf :=[]*textLine {_ffggc };for _ccgg ,_ebfa :=range _dbda [1:]{if _ebfa .Ury < _ffggc .Lly {_eeff :=0.5*(_ebfa .Ury +_ffggc .Lly );
|
||
if _cece {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_ccgg ,_ebfa .Ury ,_ffggc .Lly ,_eeff ,_ffggc ,_ebfa );
|
||
};_fedc =append (_fedc ,_eeff );_gdebe =append (_gdebe ,_gcdf );_gcdf =nil ;};_gcdf =append (_gcdf ,_ebfa );if _ebfa .Lly < _ffggc .Lly {_ffggc =_ebfa ;};};if len (_gcdf )> 0{_gdebe =append (_gdebe ,_gcdf );};if _cece {_bc .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_fedc );
|
||
};if _cece {_bb .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len (_gfee ));for _edfd ,_fbcb :=range _gfee {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_edfd ,_fbcb );};_bb .Log .Info ("\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d",len (_gdebe ));
|
||
for _dfeef ,_bddb :=range _gdebe {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a",_dfeef ,len (_bddb ));for _accgd ,_fgdg :=range _bddb {_bc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_accgd ,_fgdg );};};};_bgbgb :=true ;
|
||
for _dbbce ,_cebba :=range _gdebe {_dffbc :=true ;for _gfeb ,_addeg :=range _gfee {if _cece {_bc .Printf ("\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a",_dbbce ,len (_gdebe ),_gfeb ,len (_gfee ),_addeg );
|
||
};if !_addeg .hasLines (_cebba ){if _cece {_bc .Printf ("\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a",_dbbce ,len (_gdebe ),_gfeb ,len (_gfee ));
|
||
};_dffbc =false ;break ;};};if !_dffbc {_bgbgb =false ;break ;};};if !_bgbgb {if _cece {_bb .Log .Info ("\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg");
|
||
};_fedc =nil ;};if _cece &&_fedc !=nil {_bc .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a",_fedc );};return _fedc ;};
|
||
func (_egf paraList )topoOrder ()[]int {if _cfff {_bb .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");};_bafad :=len (_egf );_bebg :=make ([]bool ,_bafad );_fcgg :=make ([]int ,0,_bafad );_cbfd :=_egf .llyOrdering ();var _efcg func (_eebd int );
|
||
_efcg =func (_eaca int ){_bebg [_eaca ]=true ;for _eead :=0;_eead < _bafad ;_eead ++{if !_bebg [_eead ]{if _egf .readBefore (_cbfd ,_eaca ,_eead ){_efcg (_eead );};};};_fcgg =append (_fcgg ,_eaca );};for _dbgb :=0;_dbgb < _bafad ;_dbgb ++{if !_bebg [_dbgb ]{_efcg (_dbgb );
|
||
};};return _gdfa (_fcgg );};func (_adbe *textTable )isExportable ()bool {if _adbe ._ggac {return true ;};_dfdc :=func (_feda int )bool {_feae :=_adbe .get (0,_feda );if _feae ==nil {return false ;};_ccbf :=_feae .text ();_bcgf :=_gf .RuneCountInString (_ccbf );
|
||
_egcg :=_gaeb .MatchString (_ccbf );return _bcgf <=1||_egcg ;};for _fagb :=0;_fagb < _adbe ._ggca ;_fagb ++{if !_dfdc (_fagb ){return true ;};};return false ;};func (_aef *textObject )checkOp (_dbb *_bg .ContentStreamOperation ,_cbb int ,_aad bool )(_dde bool ,_dbdg error ){if _aef ==nil {var _fcd []_af .PdfObject ;
|
||
if _cbb > 0{_fcd =_dbb .Params ;if len (_fcd )> _cbb {_fcd =_fcd [:_cbb ];};};_bb .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_dbb .Operand ,_fcd );
|
||
};if _cbb >=0{if len (_dbb .Params )!=_cbb {if _aad {_dbdg =_f .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");};_bb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_dbb .Operand ,_cbb ,len (_dbb .Params ),_dbb .Params );
|
||
return false ,_dbdg ;};};return true ,nil ;};func (_ffc *textObject )setFont (_cbae string ,_gfg float64 )error {if _ffc ==nil {return nil ;};_ffc ._add ._ccef =_gfg ;_afce ,_faeg :=_ffc .getFont (_cbae );if _faeg !=nil {return _faeg ;};_ffc ._add ._adgd =_afce ;
|
||
return nil ;};func (_efd *stateStack )push (_dgg *textState ){_fed :=*_dgg ;*_efd =append (*_efd ,&_fed )};
|
||
|
||
// PageImages represents extracted images on a PDF page with spatial information:
|
||
// display position and size.
|
||
type PageImages struct{Images []ImageMark ;};func (_bac *shapesState )establishSubpath ()*subpath {_dccf ,_fccg :=_bac .lastpointEstablished ();if !_fccg {_bac ._ege =append (_bac ._ege ,_gadg (_dccf ));};if len (_bac ._ege )==0{return nil ;};_bac ._gdef =false ;
|
||
return _bac ._ege [len (_bac ._ege )-1];};func (_afgcc paraList )findGridTables (_eadaa []gridTiling )[]*textTable {if _cece {_bb .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_afgcc ));
|
||
for _faadf ,_gege :=range _afgcc {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_faadf ,_gege );};};var _deaf []*textTable ;for _acbf ,_gefa :=range _eadaa {_abbc ,_gffa :=_afgcc .findTableGrid (_gefa );if _abbc !=nil {_abbc .log (_bc .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_acbf ));
|
||
_deaf =append (_deaf ,_abbc );_abbc .markCells ();};for _abbff :=range _gffa {_abbff ._dfbc =true ;};};if _cece {_bb .Log .Info ("\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s",len (_deaf ));
|
||
};return _deaf ;};func (_cfb *textObject )nextLine (){_cfb .moveLP (0,-_cfb ._add ._cad )};func (_egef *subpath )clear (){*_egef =subpath {}};func (_fcaa *wordBag )blocked (_bacg *textWord )bool {if _bacg .Urx < _fcaa .Llx {_cdd :=_cfccf (_bacg .PdfRectangle );
|
||
_eded :=_ccba (_fcaa .PdfRectangle );if _fcaa ._baaf .blocks (_cdd ,_eded ){if _cceb {_bb .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_bacg ,_fcaa );};return true ;};}else if _fcaa .Urx < _bacg .Llx {_eeb :=_cfccf (_fcaa .PdfRectangle );
|
||
_fee :=_ccba (_bacg .PdfRectangle );if _fcaa ._baaf .blocks (_eeb ,_fee ){if _cceb {_bb .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s",_bacg ,_fcaa );};return true ;};};if _bacg .Ury < _fcaa .Lly {_ddg :=_fdbac (_bacg .PdfRectangle );
|
||
_eaeg :=_adca (_fcaa .PdfRectangle );if _fcaa ._eef .blocks (_ddg ,_eaeg ){if _cceb {_bb .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_bacg ,_fcaa );};return true ;};}else if _fcaa .Ury < _bacg .Lly {_cae :=_fdbac (_fcaa .PdfRectangle );
|
||
_faef :=_adca (_bacg .PdfRectangle );if _fcaa ._eef .blocks (_cae ,_faef ){if _cceb {_bb .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s",_bacg ,_fcaa );};return true ;};};return false ;};
|
||
|
||
// Append appends `mark` to the mark array.
|
||
func (_faegg *TextMarkArray )Append (mark TextMark ){_faegg ._gddg =append (_faegg ._gddg ,mark )};func (_ace *textObject )moveText (_ceg ,_cega float64 ){_ace .moveLP (_ceg ,_cega )};func (_daaa *textObject )renderText (_bbcc []byte )error {if _daaa ._eae {_bb .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");
|
||
return nil ;};_cadc :=_daaa .getCurrentFont ();_aec :=_cadc .BytesToCharcodes (_bbcc );_dabf ,_debg ,_aeeb :=_cadc .CharcodesToStrings (_aec );if _aeeb > 0{_bb .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_debg ,_aeeb );
|
||
};_daaa ._add ._cdbe +=_debg ;_daaa ._add ._gac +=_aeeb ;_dacg :=_daaa ._add ;_afba :=_dacg ._ccef ;_dagc :=_dacg ._gbg /100.0;_fbg :=_bee ;if _cadc .Subtype ()=="\u0054\u0079\u0070e\u0033"{_fbg =1;};_abde ,_eeab :=_cadc .GetRuneMetrics (' ');if !_eeab {_abde ,_eeab =_cadc .GetCharMetrics (32);
|
||
};if !_eeab {_abde ,_ =_eb .DefaultFont ().GetRuneMetrics (' ');};_aeab :=_abde .Wx *_fbg ;_bb .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_aeab ,_dabf ,_cadc ,_afba );
|
||
_bdb :=_gaa .NewMatrix (_afba *_dagc ,0,0,_afba ,0,_dacg ._dce );if _dbfg {_bb .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_aec ),_aec ,_dabf );
|
||
};_bb .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_aec ),_aec ,len (_dabf ));_fefb :=_daaa .getFillColor ();
|
||
_efab :=_daaa .getStrokeColor ();for _bcg ,_gfd :=range _dabf {_eaeb :=[]rune (_gfd );if len (_eaeb )==1&&_eaeb [0]=='\x00'{continue ;};_dfgd :=_aec [_bcg ];_dfga :=_daaa ._gfa .CTM .Mult (_daaa ._bbcf ).Mult (_bdb );_ede :=0.0;if len (_eaeb )==1&&_eaeb [0]==32{_ede =_dacg ._fege ;
|
||
};_adgdc ,_bdbd :=_cadc .GetCharMetrics (_dfgd );if !_bdbd {_bb .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_dfgd ,_eaeb ,_eaeb ,_cadc );
|
||
return _bc .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_cadc .String (),_dfgd );};_dbbc :=_gaa .Point {X :_adgdc .Wx *_fbg ,Y :_adgdc .Wy *_fbg };
|
||
_bdg :=_gaa .Point {X :(_dbbc .X *_afba +_ede )*_dagc };_gcg :=_gaa .Point {X :(_dbbc .X *_afba +_dacg ._cdbb +_ede )*_dagc };if _dbfg {_bb .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_afba ,_dacg ._cdbb ,_dacg ._fege ,_dagc );
|
||
_bb .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_dbbc ,_bdg ,_gcg );};_adcg :=_gggab (_bdg );_cde :=_gggab (_gcg );_bgbd :=_daaa ._gfa .CTM .Mult (_daaa ._bbcf ).Mult (_adcg );
|
||
if _cfcc {_bb .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_daaa ._gfa .CTM ,_daaa ._bbcf ,_cde ,_gdg (_daaa ._gfa .CTM .Mult (_daaa ._bbcf ).Mult (_cde )),_adcg ,_bgbd ,_gdg (_bgbd ));
|
||
};_gbba ,_bfag :=_daaa .newTextMark (_gc .ExpandLigatures (_eaeb ),_dfga ,_gdg (_bgbd ),_be .Abs (_aeab *_dfga .ScalingFactorX ()),_cadc ,_daaa ._add ._cdbb ,_fefb ,_efab );if !_bfag {_bb .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067");
|
||
continue ;};if _cadc ==nil {_bb .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _cadc .Encoder ()==nil {_bb .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_cadc );
|
||
}else {if _gaad ,_fecd :=_cadc .Encoder ().CharcodeToRune (_dfgd );_fecd {_gbba ._gdda =string (_gaad );};};_bb .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_bcg ,_dfgd ,_gbba ,_dfga );
|
||
_daaa ._fgc =append (_daaa ._fgc ,&_gbba );_daaa ._bbcf .Concat (_cde );};return nil ;};
|
||
|
||
// String returns a human readable description of `vecs`.
|
||
func (_becg rulingList )String ()string {if len (_becg )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_agcc ,_debff :=_becg .vertsHorzs ();_bgaf :=len (_agcc );_edcd :=len (_debff );if _bgaf ==0||_edcd ==0{return _bc .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_bgaf ,_edcd );
|
||
};_fcgcac :=_eb .PdfRectangle {Llx :_agcc [0]._cda ,Urx :_agcc [_bgaf -1]._cda ,Lly :_debff [_edcd -1]._cda ,Ury :_debff [0]._cda };return _bc .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_bgaf ,_edcd ,_fcgcac );
|
||
};func (_cfbaa gridTile )numBorders ()int {_ecgg :=0;if _cfbaa ._gbde {_ecgg ++;};if _cfbaa ._cacc {_ecgg ++;};if _cfbaa ._aeafd {_ecgg ++;};if _cfbaa ._eegfc {_ecgg ++;};return _ecgg ;};func _debc (_bfdff int ,_acaa func (int ,int )bool )[]int {_fgba :=make ([]int ,_bfdff );
|
||
for _eeaa :=range _fgba {_fgba [_eeaa ]=_eeaa ;};_b .Slice (_fgba ,func (_ddgcc ,_adgb int )bool {return _acaa (_fgba [_ddgcc ],_fgba [_adgb ])});return _fgba ;};func (_aebg *textPara )bbox ()_eb .PdfRectangle {return _aebg .PdfRectangle };func (_afabf *textTable )depth ()float64 {_ccebb :=1e10;
|
||
for _eddag :=0;_eddag < _afabf ._gfgdf ;_eddag ++{_aecb :=_afabf .get (_eddag ,0);if _aecb ==nil ||_aecb ._dgcce {continue ;};_ccebb =_be .Min (_ccebb ,_aecb .depth ());};return _ccebb ;};func (_dfee gridTile )contains (_ebeef _eb .PdfRectangle )bool {if _dfee .numBorders ()< 3{return false ;
|
||
};if _dfee ._gbde &&_ebeef .Llx < _dfee .Llx -_adec {return false ;};if _dfee ._cacc &&_ebeef .Urx > _dfee .Urx +_adec {return false ;};if _dfee ._aeafd &&_ebeef .Lly < _dfee .Lly -_adec {return false ;};if _dfee ._eegfc &&_ebeef .Ury > _dfee .Ury +_adec {return false ;
|
||
};return true ;};var (_cc =_f .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072");_gb =_f .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072"););
|
||
|
||
// TextMarkArray is a collection of TextMarks.
|
||
type TextMarkArray struct{_gddg []TextMark };func (_dfcg *textLine )endsInHyphen ()bool {_gdcd :=_dfcg ._ffd [len (_dfcg ._ffd )-1];_cbbe :=_gdcd ._caee ;_ceff ,_gggf :=_gf .DecodeLastRuneInString (_cbbe );if _gggf <=0||!_fe .Is (_fe .Hyphen ,_ceff ){return false ;
|
||
};if _gdcd ._dffcf &&_ffgg (_cbbe ){return true ;};return _ffgg (_dfcg .text ());};func (_eaec *textPara )toTextMarks (_fabe *int )[]TextMark {if _eaec ._dcfac ==nil {return _eaec .toCellTextMarks (_fabe );};var _gbda []TextMark ;for _ccab :=0;_ccab < _eaec ._dcfac ._ggca ;
|
||
_ccab ++{for _abbg :=0;_abbg < _eaec ._dcfac ._gfgdf ;_abbg ++{_ffbaf :=_eaec ._dcfac .get (_abbg ,_ccab );if _ffbaf ==nil {_gbda =_deef (_gbda ,_fabe ,"\u0009");}else {_gfdb :=_ffbaf .toCellTextMarks (_fabe );_gbda =append (_gbda ,_gfdb ...);};_gbda =_deef (_gbda ,_fabe ,"\u0020");
|
||
};if _ccab < _eaec ._dcfac ._ggca -1{_gbda =_deef (_gbda ,_fabe ,"\u000a");};};return _gbda ;};func (_eddg *textTable )bbox ()_eb .PdfRectangle {return _eddg .PdfRectangle };func (_ccag *textLine )text ()string {var _aaab []string ;for _ ,_cdbg :=range _ccag ._ffd {if _cdbg ._dffcf {_aaab =append (_aaab ,"\u0020");
|
||
};_aaab =append (_aaab ,_cdbg ._caee );};return _a .Join (_aaab ,"");};func _abfb (_befa ,_ddef float64 )string {_deagf :=!_dede (_befa -_ddef );if _deagf {return "\u000a";};return "\u0020";};
|
||
|
||
// String returns a human readable description of `s`.
|
||
func (_ecgbf intSet )String ()string {var _bdcac []int ;for _adddf :=range _ecgbf {if _ecgbf .has (_adddf ){_bdcac =append (_bdcac ,_adddf );};};_b .Ints (_bdcac );return _bc .Sprintf ("\u0025\u002b\u0076",_bdcac );};func _bcbgg (_cbbc ,_fdgfa int )int {if _cbbc > _fdgfa {return _cbbc ;
|
||
};return _fdgfa ;};func (_egbg *wordBag )getDepthIdx (_ebce float64 )int {_cbec :=_egbg .depthIndexes ();_cfgb :=_aag (_ebce );if _cfgb < _cbec [0]{return _cbec [0];};if _cfgb > _cbec [len (_cbec )-1]{return _cbec [len (_cbec )-1];};return _cfgb ;};func _bgag (_ggbg []pathSection )rulingList {_cffd (_ggbg );
|
||
if _gfba {_bb .Log .Info ("\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073",len (_ggbg ));};var _egec rulingList ;for _ ,_ecad :=range _ggbg {for _ ,_gdfae :=range _ecad ._gaac {if len (_gdfae ._edg )< 2{continue ;
|
||
};_ccaba :=_gdfae ._edg [0];for _ ,_gbbaf :=range _gdfae ._edg [1:]{if _bgfbc ,_ebcd :=_ecef (_ccaba ,_gbbaf ,_ecad .Color );_ebcd {_egec =append (_egec ,_bgfbc );};_ccaba =_gbbaf ;};};};if _gfba {_bb .Log .Info ("m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073",_egec );
|
||
};return _egec ;};func (_gacb *ruling )gridIntersecting (_cbgec *ruling )bool {return _caafb (_gacb ._dbce ,_cbgec ._dbce )&&_caafb (_gacb ._daafc ,_cbgec ._daafc );};func (_bacef rulingList )vertsHorzs ()(rulingList ,rulingList ){var _faeed ,_agcb rulingList ;
|
||
for _ ,_ecdca :=range _bacef {switch _ecdca ._bdfg {case _dbbb :_faeed =append (_faeed ,_ecdca );case _afdgd :_agcb =append (_agcb ,_ecdca );};};return _faeed ,_agcb ;};func _cgbab (_deecc map[int ][]float64 ){if len (_deecc )<=1{return ;};_adfdf :=_addcd (_deecc );
|
||
if _cece {_bb .Log .Info ("\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076",_adfdf );};var _abgfg ,_decfg int ;for _abgfg ,_decfg =range _adfdf {if _deecc [_decfg ]!=nil {break ;};};for _dcaaf ,_fabc :=range _adfdf [_abgfg :]{_aedgc :=_deecc [_fabc ];
|
||
if _aedgc ==nil {continue ;};if _cece {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a",_abgfg +_dcaaf ,_decfg ,_fabc );};_dgae :=_deecc [_fabc ];if _dgae [len (_dgae )-1]> _aedgc [0]{_dgae [len (_dgae )-1]=_aedgc [0];
|
||
_deecc [_decfg ]=_dgae ;};_decfg =_fabc ;};};func (_egafa *textTable )putComposite (_dgfeg ,_eadag int ,_eaad paraList ,_eega _eb .PdfRectangle ){if len (_eaad )==0{_bb .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073");
|
||
return ;};_gbdg :=compositeCell {_eega ,_eaad };if _cece {_bc .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_dgfeg ,_eadag ,_gbdg .String ());
|
||
};_gbdg .updateBBox ();_egafa ._bcdge [_fcbc (_dgfeg ,_eadag )]=_gbdg ;};func (_ccg *wordBag )allWords ()[]*textWord {var _bade []*textWord ;for _ ,_effc :=range _ccg ._eaba {_bade =append (_bade ,_effc ...);};return _bade ;};var _gca =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_eac .White ,StrokeColor :_eac .White };
|
||
func _adca (_bdbc _eb .PdfRectangle )*ruling {return &ruling {_bdfg :_afdgd ,_cda :_bdbc .Lly ,_dbce :_bdbc .Llx ,_daafc :_bdbc .Urx };};func (_bcbb *wordBag )highestWord (_ebcg int ,_feee ,_cebe float64 )*textWord {for _ ,_ffaa :=range _bcbb ._eaba [_ebcg ]{if _feee <=_ffaa ._efaba &&_ffaa ._efaba <=_cebe {return _ffaa ;
|
||
};};return nil ;};func _aag (_ffeb float64 )int {var _adad int ;if _ffeb >=0{_adad =int (_ffeb /_cccb );}else {_adad =int (_ffeb /_cccb )-1;};return _adad ;};func _aafc (_fga *wordBag ,_ceca *textWord ,_ebfd float64 )bool {return _ceca .Llx < _fga .Urx +_ebfd &&_fga .Llx -_ebfd < _ceca .Urx ;
|
||
};func (_cbf *wordBag )scanBand (_efbb string ,_gceg *wordBag ,_bfd func (_fgbc *wordBag ,_cccf *textWord )bool ,_bbae ,_ffge ,_bcacg float64 ,_caff ,_fbbf bool )int {_geac :=_gceg ._cgad ;var _aaeb map[int ]map[*textWord ]struct{};if !_caff {_aaeb =_cbf .makeRemovals ();
|
||
};_dgb :=_gcgb *_geac ;_fge :=0;for _ ,_gcc :=range _cbf .depthBand (_bbae -_dgb ,_ffge +_dgb ){if len (_cbf ._eaba [_gcc ])==0{continue ;};for _ ,_ecde :=range _cbf ._eaba [_gcc ]{if !(_bbae -_dgb <=_ecde ._efaba &&_ecde ._efaba <=_ffge +_dgb ){continue ;
|
||
};if !_bfd (_gceg ,_ecde ){continue ;};_dea :=2.0*_be .Abs (_ecde ._aggf -_gceg ._cgad )/(_ecde ._aggf +_gceg ._cgad );_ccb :=_be .Max (_ecde ._aggf /_gceg ._cgad ,_gceg ._cgad /_ecde ._aggf );_gaee :=_be .Min (_dea ,_ccb );if _bcacg > 0&&_gaee > _bcacg {continue ;
|
||
};if _gceg .blocked (_ecde ){continue ;};if !_caff {_gceg .pullWord (_ecde ,_gcc ,_aaeb );};_fge ++;if !_fbbf {if _ecde ._efaba < _bbae {_bbae =_ecde ._efaba ;};if _ecde ._efaba > _ffge {_ffge =_ecde ._efaba ;};};if _caff {break ;};};};if !_caff {_cbf .applyRemovals (_aaeb );
|
||
};return _fge ;};func _addcd (_addcc map[int ][]float64 )[]int {_dgbfe :=make ([]int ,len (_addcc ));_acee :=0;for _cdfbd :=range _addcc {_dgbfe [_acee ]=_cdfbd ;_acee ++;};_b .Ints (_dgbfe );return _dgbfe ;};func (_caaa rulingList )tidied (_geae string )rulingList {_cbca :=_caaa .removeDuplicates ();
|
||
_cbca .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");_cbcb :=_cbca .snapToGroups ();if _cbcb ==nil {return nil ;};_cbcb .sort ();if _gfba {_bb .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_geae ,len (_caaa ),len (_cbca ),len (_cbcb ));
|
||
};_cbcb .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _cbcb ;};func (_aaacg lineRuling )asRuling ()(*ruling ,bool ){_aeca :=ruling {_bdfg :_aaacg ._cbgfa ,Color :_aaacg .Color ,_eabd :_dece };switch _aaacg ._cbgfa {case _dbbb :_aeca ._cda =_aaacg .xMean ();
|
||
_aeca ._dbce =_be .Min (_aaacg ._cbdg .Y ,_aaacg ._cbfb .Y );_aeca ._daafc =_be .Max (_aaacg ._cbdg .Y ,_aaacg ._cbfb .Y );case _afdgd :_aeca ._cda =_aaacg .yMean ();_aeca ._dbce =_be .Min (_aaacg ._cbdg .X ,_aaacg ._cbfb .X );_aeca ._daafc =_be .Max (_aaacg ._cbdg .X ,_aaacg ._cbfb .X );
|
||
default:_bb .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_aaacg ._cbgfa );return nil ,false ;};return &_aeca ,true ;};func (_gdefc *textTable )reduceTiling (_edcdf gridTiling ,_dcfc float64 )*textTable {_faea :=make ([]int ,0,_gdefc ._ggca );
|
||
_cdba :=make ([]int ,0,_gdefc ._gfgdf );_fegge :=_edcdf ._abdf ;_adcf :=_edcdf ._bgcf ;for _badg :=0;_badg < _gdefc ._ggca ;_badg ++{_bcef :=_badg > 0&&_be .Abs (_adcf [_badg -1]-_adcf [_badg ])< _dcfc &&_gdefc .emptyRow (_badg );if !_bcef {_faea =append (_faea ,_badg );
|
||
};};for _ebcef :=0;_ebcef < _gdefc ._gfgdf ;_ebcef ++{_dfega :=_ebcef < _gdefc ._gfgdf -1&&_be .Abs (_fegge [_ebcef +1]-_fegge [_ebcef ])< _dcfc &&_gdefc .emptyColumn (_ebcef );if !_dfega {_cdba =append (_cdba ,_ebcef );};};if len (_faea )==_gdefc ._ggca &&len (_cdba )==_gdefc ._gfgdf {return _gdefc ;
|
||
};_aecgfc :=textTable {_ggac :_gdefc ._ggac ,_gfgdf :len (_cdba ),_ggca :len (_faea ),_bcdge :make (map[uint64 ]compositeCell ,len (_cdba )*len (_faea ))};if _cece {_bb .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_gdefc ._gfgdf ,_gdefc ._ggca ,len (_cdba ),len (_faea ));
|
||
_bb .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_cdba );_bb .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_faea );};for _eebe ,_cddc :=range _faea {for _bccc ,_bdbg :=range _cdba {_aacde ,_bbbe :=_gdefc .getComposite (_bdbg ,_cddc );
|
||
if len (_aacde )==0{continue ;};if _cece {_bc .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_bccc ,_eebe ,_bdbg ,_cddc ,_geff (_aacde .merge ().text (),50));};_aecgfc .putComposite (_bccc ,_eebe ,_aacde ,_bbbe );
|
||
};};return &_aecgfc ;};func (_ccffg paraList )findTableGrid (_ededd gridTiling )(*textTable ,map[*textPara ]struct{}){_cdga :=len (_ededd ._abdf );_fgcd :=len (_ededd ._bgcf );_begc :=textTable {_ggac :true ,_gfgdf :_cdga ,_ggca :_fgcd ,_ecac :make (map[uint64 ]*textPara ,_cdga *_fgcd ),_bcdge :make (map[uint64 ]compositeCell ,_cdga *_fgcd )};
|
||
_gegcd :=make (map[*textPara ]struct{});_eeaba :=int ((1.0-_aabb )*float64 (_cdga *_fgcd ));_cged :=0;if _ecbd {_bb .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_cdga ,_fgcd );
|
||
};for _dfege ,_abccc :=range _ededd ._bgcf {_edgb ,_bebe :=_ededd ._bfbgd [_abccc ];if !_bebe {continue ;};for _edefe ,_egfgg :=range _ededd ._abdf {_bccad ,_fdga :=_edgb [_egfgg ];if !_fdga {continue ;};_ebcea :=_ccffg .inTile (_bccad );if len (_ebcea )==0{_cged ++;
|
||
if _cged > _eeaba {if _ecbd {_bb .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_cged );};return nil ,nil ;};}else {_begc .putComposite (_edefe ,_dfege ,_ebcea ,_bccad .PdfRectangle );for _ ,_eddgb :=range _ebcea {_gegcd [_eddgb ]=struct{}{};
|
||
};};};};_defd :=0;for _cbbff :=0;_cbbff < _cdga ;_cbbff ++{_cfefb :=_begc .get (_cbbff ,0);if _cfefb ==nil ||!_cfefb ._dgcce {_defd ++;};};if _defd ==0{if _ecbd {_bb .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};return nil ,nil ;
|
||
};_fefag :=_begc .reduceTiling (_ededd ,_gfaa );_fefag =_fefag .subdivide ();return _fefag ,_gegcd ;};func _gdg (_gcd _gaa .Matrix )_gaa .Point {_ggcd ,_dfbe :=_gcd .Translation ();return _gaa .Point {X :_ggcd ,Y :_dfbe };};func (_cace rulingList )sort (){_b .Slice (_cace ,_cace .comp )};
|
||
func (_bgdcg rulingList )removeDuplicates ()rulingList {if len (_bgdcg )==0{return nil ;};_bgdcg .sort ();_fcdb :=rulingList {_bgdcg [0]};for _ ,_faee :=range _bgdcg [1:]{if _faee .equals (_fcdb [len (_fcdb )-1]){continue ;};_fcdb =append (_fcdb ,_faee );
|
||
};return _fcdb ;};func (_aeace *textPara )writeText (_bfca _ea .Writer ){if _aeace ._dcfac ==nil {_aeace .writeCellText (_bfca );return ;};for _egff :=0;_egff < _aeace ._dcfac ._ggca ;_egff ++{for _agbfd :=0;_agbfd < _aeace ._dcfac ._gfgdf ;_agbfd ++{_ddc :=_aeace ._dcfac .get (_agbfd ,_egff );
|
||
if _ddc ==nil {_bfca .Write ([]byte ("\u0009"));}else {_ddc .writeCellText (_bfca );};_bfca .Write ([]byte ("\u0020"));};if _egff < _aeace ._dcfac ._ggca -1{_bfca .Write ([]byte ("\u000a"));};};};func _gbgf (_fgge ,_gcgc _eb .PdfRectangle )bool {return _fgge .Lly <=_gcgc .Ury &&_gcgc .Lly <=_fgge .Ury ;
|
||
};func (_efcd *textTable )newTablePara ()*textPara {_agge :=_efcd .computeBbox ();_bacc :=&textPara {PdfRectangle :_agge ,_ccac :_agge ,_dcfac :_efcd };if _cece {_bb .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_bacc );
|
||
};return _bacc ;};const (RenderModeStroke RenderMode =1<<iota ;RenderModeFill ;RenderModeClip ;);
|
||
|
||
// String returns a human readable description of `ss`.
|
||
func (_bcdae *shapesState )String ()string {return _bc .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_bcdae ._ege ),_bcdae ._gdef );};
|
||
|
||
// String returns a string describing `ma`.
|
||
func (_fbgc TextMarkArray )String ()string {_abb :=len (_fbgc ._gddg );if _abb ==0{return "\u0045\u004d\u0050T\u0059";};_debb :=_fbgc ._gddg [0];_ebef :=_fbgc ._gddg [_abb -1];return _bc .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_abb ,_debb ,_ebef );
|
||
};type event struct{_dfab float64 ;_aefd bool ;_dabd int ;};func (_fbd *shapesState )cubicTo (_cbge ,_dcdg ,_efef ,_gdeb ,_adce ,_edab float64 ){if _agbcg {_bb .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a");};_fbd .addPoint (_adce ,_edab );
|
||
};func (_gddc *textObject )setHorizScaling (_degg float64 ){if _gddc ==nil {return ;};_gddc ._add ._gbg =_degg ;};func (_fbec *textObject )getFont (_efec string )(*_eb .PdfFont ,error ){if _fbec ._gcb ._ab !=nil {_fbec ._gcb ._d ++;_cded ,_faffg :=_fbec ._gcb ._ab [_efec ];
|
||
if _faffg {_cded ._cgg =_fbec ._gcb ._d ;return _cded ._dffg ,nil ;};};_baad ,_gdab :=_fbec .getFontDirect (_efec );if _gdab !=nil {return nil ,_gdab ;};if _fbec ._gcb ._ab !=nil {_edbc :=fontEntry {_baad ,_fbec ._gcb ._d };if len (_fbec ._gcb ._ab )>=_cgdg {var _daba []string ;
|
||
for _deba :=range _fbec ._gcb ._ab {_daba =append (_daba ,_deba );};_b .Slice (_daba ,func (_cfdfb ,_cef int )bool {return _fbec ._gcb ._ab [_daba [_cfdfb ]]._cgg < _fbec ._gcb ._ab [_daba [_cef ]]._cgg ;});delete (_fbec ._gcb ._ab ,_daba [0]);};_fbec ._gcb ._ab [_efec ]=_edbc ;
|
||
};return _baad ,nil ;};func _faca (_addbb []_af .PdfObject )(_gfbge ,_aebe float64 ,_ffbe error ){if len (_addbb )!=2{return 0,0,_bc .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_addbb ));
|
||
};_dbgc ,_ffbe :=_af .GetNumbersAsFloat (_addbb );if _ffbe !=nil {return 0,0,_ffbe ;};return _dbgc [0],_dbgc [1],nil ;};func _gggab (_dcf _gaa .Point )_gaa .Matrix {return _gaa .TranslationMatrix (_dcf .X ,_dcf .Y )};func (_fegec *wordBag )depthRange (_gefb ,_acba int )[]int {var _bcfa []int ;
|
||
for _cdc :=range _fegec ._eaba {if _gefb <=_cdc &&_cdc <=_acba {_bcfa =append (_bcfa ,_cdc );};};if len (_bcfa )==0{return nil ;};_b .Ints (_bcfa );return _bcfa ;};func (_addb *textWord )addDiacritic (_bgeba string ){_cbcc :=_addb ._badb [len (_addb ._badb )-1];
|
||
_cbcc ._gbgc +=_bgeba ;_cbcc ._gbgc =_c .NFKC .String (_cbcc ._gbgc );};func (_cegb *textLine )pullWord (_cfae *wordBag ,_ced *textWord ,_gggga int ){_cegb .appendWord (_ced );_cfae .removeWord (_ced ,_gggga );};
|
||
|
||
// String returns a human readable description of `path`.
|
||
func (_fegd *subpath )String ()string {_gec :=_fegd ._edg ;_ccfe :=len (_gec );if _ccfe <=5{return _bc .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_ccfe ,_gec );};return _bc .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_ccfe ,_gec [0],_gec [1],_gec [_ccfe -1]);
|
||
};func _bgeb (_cgfc []rulingList )(rulingList ,rulingList ){var _gdadf rulingList ;for _ ,_fbdgc :=range _cgfc {_gdadf =append (_gdadf ,_fbdgc ...);};return _gdadf .vertsHorzs ();};func (_dagdf rulingList )snapToGroupsDirection ()rulingList {_dagdf .sortStrict ();
|
||
_gcdb :=make (map[*ruling ]rulingList ,len (_dagdf ));_cefd :=_dagdf [0];_fbeg :=func (_fcfdg *ruling ){_cefd =_fcfdg ;_gcdb [_cefd ]=rulingList {_fcfdg }};_fbeg (_dagdf [0]);for _ ,_bgfbd :=range _dagdf [1:]{if _bgfbd ._cda < _cefd ._cda -_cbgg {_bb .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_cefd ,_bgfbd );
|
||
};if _bgfbd ._cda > _cefd ._cda +_cdceg {_fbeg (_bgfbd );}else {_gcdb [_cefd ]=append (_gcdb [_cefd ],_bgfbd );};};_bece :=make (map[*ruling ]float64 ,len (_gcdb ));_aedfa :=make (map[*ruling ]*ruling ,len (_dagdf ));for _gfge ,_dcfd :=range _gcdb {_bece [_gfge ]=_dcfd .mergePrimary ();
|
||
for _ ,_gbeg :=range _dcfd {_aedfa [_gbeg ]=_gfge ;};};for _ ,_egbee :=range _dagdf {_egbee ._cda =_bece [_aedfa [_egbee ]];};_cbdb :=make (rulingList ,0,len (_dagdf ));for _ ,_ccbe :=range _gcdb {_aefc :=_ccbe .splitSec ();for _fdfgd ,_fdbgf :=range _aefc {_bgbde :=_fdbgf .merge ();
|
||
if len (_cbdb )> 0{_bbgd :=_cbdb [len (_cbdb )-1];if _bbgd .alignsPrimary (_bgbde )&&_bbgd .alignsSec (_bgbde ){_bb .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_fdfgd ,_bbgd ,_bgbde );
|
||
continue ;};};_cbdb =append (_cbdb ,_bgbde );};};_cbdb .sortStrict ();return _cbdb ;};func (_eeac *wordBag )removeDuplicates (){if _dbaf {_bb .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_eeac .text ());
|
||
};for _ ,_gdcga :=range _eeac .depthIndexes (){if len (_eeac ._eaba [_gdcga ])==0{continue ;};_fgfb :=_eeac ._eaba [_gdcga ][0];_acgb :=_fgfe *_fgfb ._aggf ;_bcbda :=_fgfb ._efaba ;for _ ,_dbac :=range _eeac .depthBand (_bcbda ,_bcbda +_acgb ){_edadf :=map[*textWord ]struct{}{};
|
||
_acc :=_eeac ._eaba [_dbac ];for _ ,_bgbcc :=range _acc {if _ ,_bfdg :=_edadf [_bgbcc ];_bfdg {continue ;};for _ ,_adde :=range _acc {if _ ,_ccbc :=_edadf [_adde ];_ccbc {continue ;};if _adde !=_bgbcc &&_adde ._caee ==_bgbcc ._caee &&_be .Abs (_adde .Llx -_bgbcc .Llx )< _acgb &&_be .Abs (_adde .Urx -_bgbcc .Urx )< _acgb &&_be .Abs (_adde .Lly -_bgbcc .Lly )< _acgb &&_be .Abs (_adde .Ury -_bgbcc .Ury )< _acgb {_edadf [_adde ]=struct{}{};
|
||
};};};if len (_edadf )> 0{_gfac :=0;for _ ,_gfbef :=range _acc {if _ ,_cdfg :=_edadf [_gfbef ];!_cdfg {_acc [_gfac ]=_gfbef ;_gfac ++;};};_eeac ._eaba [_dbac ]=_acc [:len (_acc )-len (_edadf )];if len (_eeac ._eaba [_dbac ])==0{delete (_eeac ._eaba ,_dbac );
|
||
};};};};};func (_daf *textObject )moveTextSetLeading (_gceb ,_gage float64 ){_daf ._add ._cad =-_gage ;_daf .moveLP (_gceb ,_gage );};func _fgcgb (_cgbgd ,_gebge _gaa .Point )bool {_fdfgf :=_be .Abs (_cgbgd .X -_gebge .X );_ceede :=_be .Abs (_cgbgd .Y -_gebge .Y );
|
||
return _fgae (_fdfgf ,_ceede );};
|
||
|
||
// PageText represents the layout of text on a device page.
|
||
type PageText struct{_ecca []*textMark ;_caa string ;_ceb []TextMark ;_cfbe []TextTable ;_aefg _eb .PdfRectangle ;_cfec []pathSection ;_gde []pathSection ;};
|
||
|
||
// ToText returns the page text as a single string.
|
||
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
|
||
// Text() instead.
|
||
func (_bde PageText )ToText ()string {return _bde .Text ()};func (_fafdb *shapesState )fill (_dba *[]pathSection ){_bfff :=pathSection {_gaac :_fafdb ._ege ,Color :_fafdb ._babd .getFillColor ()};*_dba =append (*_dba ,_bfff );if _gfba {_abbe :=_bfff .bbox ();
|
||
_bc .Printf ("\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a",len (*_dba ),len (_bfff ._gaac ),_fafdb ,_bfff .Color ,_abbe ,_abbe .Width (),_abbe .Height ());
|
||
if _bgf {for _bag ,_eace :=range _bfff ._gaac {_bc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_bag ,_eace );if _bag ==10{break ;};};};};};func (_ecgcgf *textWord )appendMark (_bfga *textMark ,_dccgd _eb .PdfRectangle ){_ecgcgf ._badb =append (_ecgcgf ._badb ,_bfga );
|
||
_ecgcgf .PdfRectangle =_agege (_ecgcgf .PdfRectangle ,_bfga .PdfRectangle );if _bfga ._deedb > _ecgcgf ._aggf {_ecgcgf ._aggf =_bfga ._deedb ;};_ecgcgf ._efaba =_dccgd .Ury -_ecgcgf .PdfRectangle .Lly ;};func _abae (_geegc _eb .PdfRectangle ,_gacgd ,_gaaag ,_cbfbf ,_fcbb *ruling )gridTile {_ebafd :=_geegc .Llx ;
|
||
_adgfd :=_geegc .Urx ;_bdcb :=_geegc .Lly ;_bdggf :=_geegc .Ury ;return gridTile {PdfRectangle :_geegc ,_gbde :_gacgd !=nil &&_gacgd .encloses (_bdcb ,_bdggf ),_cacc :_gaaag !=nil &&_gaaag .encloses (_bdcb ,_bdggf ),_aeafd :_cbfbf !=nil &&_cbfbf .encloses (_ebafd ,_adgfd ),_eegfc :_fcbb !=nil &&_fcbb .encloses (_ebafd ,_adgfd )};
|
||
};func (_deeab rulingList )sortStrict (){_b .Slice (_deeab ,func (_ecceb ,_fdeg int )bool {_ddgf ,_eecea :=_deeab [_ecceb ],_deeab [_fdeg ];_eee ,_ceab :=_ddgf ._bdfg ,_eecea ._bdfg ;if _eee !=_ceab {return _eee > _ceab ;};_edgc ,_fgbcf :=_ddgf ._cda ,_eecea ._cda ;
|
||
if !_dede (_edgc -_fgbcf ){return _edgc < _fgbcf ;};_edgc ,_fgbcf =_ddgf ._dbce ,_eecea ._dbce ;if _edgc !=_fgbcf {return _edgc < _fgbcf ;};return _ddgf ._daafc < _eecea ._daafc ;});};func (_dfda *textTable )emptyRow (_ccbca int )bool {for _caae :=0;_caae < _dfda ._gfgdf ;
|
||
_caae ++{_caad :=_dfda .get (_caae ,_ccbca );if _caad !=nil &&_caad .text ()!=""{return false ;};};return true ;};func (_egaf *textPara )depth ()float64 {if _egaf ._dgcce {return -1.0;};if len (_egaf ._ccaa )> 0{return _egaf ._ccaa [0]._eggg ;};return _egaf ._dcfac .depth ();
|
||
};func (_bce *wordBag )firstReadingIndex (_cggd int )int {_bbb :=_bce .firstWord (_cggd )._aggf ;_abce :=float64 (_cggd +1)*_cccb ;_eabg :=_abce +_cbbb *_bbb ;_agdc :=_cggd ;for _ ,_bggf :=range _bce .depthBand (_abce ,_eabg ){if _ddd (_bce .firstWord (_bggf ),_bce .firstWord (_agdc ))< 0{_agdc =_bggf ;
|
||
};};return _agdc ;};func _fgcc (_bgbg ,_cfge bounded )float64 {_beag :=_dcac (_bgbg ,_cfge );if !_dede (_beag ){return _beag ;};return _ddd (_bgbg ,_cfge );};func _bafc (_gdgc _eb .PdfRectangle ,_bcgg bounded )float64 {return _gdgc .Ury -_bcgg .bbox ().Lly };
|
||
func (_caba *textTable )logComposite (_gfcd string ){if !_cece {return ;};_bb .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_caba ._gfgdf ,_caba ._ggca ,_gfcd );_bc .Printf ("\u0025\u0035\u0073 \u007c","");
|
||
for _cacf :=0;_cacf < _caba ._gfgdf ;_cacf ++{_bc .Printf ("\u0025\u0033\u0064 \u007c",_cacf );};_bc .Println ("");_bc .Printf ("\u0025\u0035\u0073 \u002b","");for _egacb :=0;_egacb < _caba ._gfgdf ;_egacb ++{_bc .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d");
|
||
};_bc .Println ("");for _dacgb :=0;_dacgb < _caba ._ggca ;_dacgb ++{_bc .Printf ("\u0025\u0035\u0064 \u007c",_dacgb );for _cbaad :=0;_cbaad < _caba ._gfgdf ;_cbaad ++{_fdgf ,_ :=_caba ._bcdge [_fcbc (_cbaad ,_dacgb )].parasBBox ();_bc .Printf ("\u0025\u0033\u0064 \u007c",len (_fdgf ));
|
||
};_bc .Println ("");};_bb .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_caba ._gfgdf ,_caba ._ggca ,_gfcd );_bc .Printf ("\u0025\u0035\u0073 \u007c","");for _ecfbg :=0;_ecfbg < _caba ._gfgdf ;_ecfbg ++{_bc .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_ecfbg );
|
||
};_bc .Println ("");_bc .Printf ("\u0025\u0035\u0073 \u002b","");for _fgdge :=0;_fgdge < _caba ._gfgdf ;_fgdge ++{_bc .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");};_bc .Println ("");for _dcafg :=0;_dcafg < _caba ._ggca ;
|
||
_dcafg ++{_bc .Printf ("\u0025\u0035\u0064 \u007c",_dcafg );for _fdfd :=0;_fdfd < _caba ._gfgdf ;_fdfd ++{_gafa ,_ :=_caba ._bcdge [_fcbc (_fdfd ,_dcafg )].parasBBox ();_abegg :="";_fcfa :=_gafa .merge ();if _fcfa !=nil {_abegg =_fcfa .text ();};_abegg =_bc .Sprintf ("\u0025\u0071",_geff (_abegg ,12));
|
||
_abegg =_abegg [1:len (_abegg )-1];_bc .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_abegg );};_bc .Println ("");};};func (_dca *imageExtractContext )extractContentStreamImages (_afg string ,_bf *_eb .PdfPageResources )error {_ec :=_bg .NewContentStreamParser (_afg );
|
||
_bbd ,_cgd :=_ec .Parse ();if _cgd !=nil {return _cgd ;};if _dca ._gaf ==nil {_dca ._gaf =map[*_af .PdfObjectStream ]*cachedImage {};};if _dca ._bgb ==nil {_dca ._bgb =&ImageExtractOptions {};};_ff :=_bg .NewContentStreamProcessor (*_bbd );_ff .AddHandler (_bg .HandlerConditionEnumAllOperands ,"",_dca .processOperand );
|
||
return _ff .Process (_bf );};func _dede (_cbaac float64 )bool {return _be .Abs (_cbaac )< _cbgg };func (_afdc *shapesState )closePath (){if _afdc ._gdef {_afdc ._ege =append (_afdc ._ege ,_gadg (_afdc ._ebgfc ));_afdc ._gdef =false ;}else if len (_afdc ._ege )==0{if _agbcg {_bb .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");
|
||
};_afdc ._gdef =false ;return ;};_afdc ._ege [len (_afdc ._ege )-1].close ();if _agbcg {_bb .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_afdc );};};func _aeeg (_ebbd *PageText )error {_cffdf :=_ebg .GetLicenseKey ();
|
||
if _cffdf !=nil &&_cffdf .IsLicensed ()||_fa {return nil ;};_bc .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");_bc .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");
|
||
return _f .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};func (_faba *subpath )removeDuplicates (){if len (_faba ._edg )==0{return ;};_aac :=[]_gaa .Point {_faba ._edg [0]};
|
||
for _ ,_bcbd :=range _faba ._edg [1:]{if !_bgdg (_bcbd ,_aac [len (_aac )-1]){_aac =append (_aac ,_bcbd );};};_faba ._edg =_aac ;};func _aecg (_abdg *textWord ,_dfbb float64 ,_ggfb ,_fdbc rulingList )*wordBag {_dfbeg :=_aag (_abdg ._efaba );_ecab :=[]*textWord {_abdg };
|
||
_fbba :=wordBag {_eaba :map[int ][]*textWord {_dfbeg :_ecab },PdfRectangle :_abdg .PdfRectangle ,_cgad :_abdg ._aggf ,_decb :_dfbb ,_baaf :_ggfb ,_eef :_fdbc };return &_fbba ;};
|
||
|
||
// String returns a description of `b`.
|
||
func (_egea *wordBag )String ()string {var _fba []string ;for _ ,_cggc :=range _egea .depthIndexes (){_gbcc :=_egea ._eaba [_cggc ];for _ ,_begb :=range _gbcc {_fba =append (_fba ,_begb ._caee );};};return _bc .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_egea .PdfRectangle ,_egea ._cgad ,len (_fba ),_fba );
|
||
};func (_dcbb rulingList )asTiling ()gridTiling {if _ecbd {_bb .Log .Info ("r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_dcbb ));
|
||
};for _dcee ,_cccgg :=range _dcbb [1:]{_egefd :=_dcbb [_dcee ];if _egefd .alignsPrimary (_cccgg )&&_egefd .alignsSec (_cccgg ){_bb .Log .Error ("a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073",_cccgg ,_egefd );
|
||
};};_dcbb .sortStrict ();_dcbb .log ("\u0073n\u0061\u0070\u0070\u0065\u0064");_dddd ,_fgbge :=_dcbb .vertsHorzs ();_ddgg :=_dddd .primaries ();_bfbbb :=_fgbge .primaries ();_gaged :=len (_ddgg )-1;_gbbcg :=len (_bfbbb )-1;if _gaged ==0||_gbbcg ==0{return gridTiling {};
|
||
};_fdfba :=_eb .PdfRectangle {Llx :_ddgg [0],Urx :_ddgg [_gaged ],Lly :_bfbbb [0],Ury :_bfbbb [_gbbcg ]};if _ecbd {_bb .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064",len (_dddd ));
|
||
for _gcgbg ,_cgddf :=range _dddd {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gcgbg ,_cgddf );};_bb .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064",len (_fgbge ));
|
||
for _eggd ,_efca :=range _fgbge {_bc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_eggd ,_efca );};_bb .Log .Info ("\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f",_gaged ,_gbbcg ,_ddgg ,_bfbbb );
|
||
};_gffdd :=make ([]gridTile ,_gaged *_gbbcg );for _cdgg :=_gbbcg -1;_cdgg >=0;_cdgg --{_bdba :=_bfbbb [_cdgg ];_efgg :=_bfbbb [_cdgg +1];for _acffe :=0;_acffe < _gaged ;_acffe ++{_gdff :=_ddgg [_acffe ];_eddc :=_ddgg [_acffe +1];_gggcg :=_dddd .findPrimSec (_gdff ,_bdba );
|
||
_abcf :=_dddd .findPrimSec (_eddc ,_bdba );_edgd :=_fgbge .findPrimSec (_bdba ,_gdff );_aaeg :=_fgbge .findPrimSec (_efgg ,_gdff );_fegg :=_eb .PdfRectangle {Llx :_gdff ,Urx :_eddc ,Lly :_bdba ,Ury :_efgg };_fefa :=_abae (_fegg ,_gggcg ,_abcf ,_edgd ,_aaeg );
|
||
_gffdd [_cdgg *_gaged +_acffe ]=_fefa ;if _ecbd {_bc .Printf ("\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_acffe ,_cdgg ,_fefa .String (),_fefa .Width (),_fefa .Height ());
|
||
};};};if _ecbd {_bb .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_fdfba );
|
||
};_geba :=make ([]map[float64 ]gridTile ,_gbbcg );for _edca :=_gbbcg -1;_edca >=0;_edca --{if _ecbd {_bc .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_edca );};_geba [_edca ]=make (map[float64 ]gridTile ,_gaged );for _fadg :=0;_fadg < _gaged ;
|
||
_fadg ++{_efea :=_gffdd [_edca *_gaged +_fadg ];if _ecbd {_bc .Printf ("\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fadg ,_efea );};if !_efea ._gbde {continue ;};_afbc :=_fadg ;for _fcfd :=_fadg +1;!_efea ._cacc &&_fcfd < _gaged ;_fcfd ++{_bdfe :=_gffdd [_edca *_gaged +_fcfd ];
|
||
_efea .Urx =_bdfe .Urx ;_efea ._eegfc =_efea ._eegfc ||_bdfe ._eegfc ;_efea ._aeafd =_efea ._aeafd ||_bdfe ._aeafd ;_efea ._cacc =_bdfe ._cacc ;if _ecbd {_bc .Printf ("\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a",_fcfd ,_bdfe ,_efea );
|
||
};_afbc =_fcfd ;};if _ecbd {_bc .Printf (" \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n",_fadg ,_afbc ,_efea );};_fadg =_afbc ;_geba [_edca ][_efea .Llx ]=_efea ;};};_egdg :=make (map[float64 ]map[float64 ]gridTile ,_gbbcg );
|
||
_agfd :=make (map[float64 ]map[float64 ]struct{},_gbbcg );for _fdaa :=_gbbcg -1;_fdaa >=0;_fdaa --{_bfcaf :=_gffdd [_fdaa *_gaged ].Lly ;_egdg [_bfcaf ]=make (map[float64 ]gridTile ,_gaged );_agfd [_bfcaf ]=make (map[float64 ]struct{},_gaged );};if _ecbd {_bb .Log .Info ("\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_fdfba );
|
||
};for _gbfbe :=_gbbcg -1;_gbfbe >=0;_gbfbe --{_egdd :=_gffdd [_gbfbe *_gaged ].Lly ;_eccd :=_geba [_gbfbe ];if _ecbd {_bc .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_gbfbe );};for _ ,_ebdc :=range _gdbb (_eccd ){if _ ,_abgf :=_agfd [_egdd ][_ebdc ];
|
||
_abgf {continue ;};_gaaa :=_eccd [_ebdc ];if _ecbd {_bc .Printf (" \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_gaaa .String ());};for _dcefg :=_gbfbe -1;_dcefg >=0;_dcefg --{if _gaaa ._aeafd {break ;};_bfdc :=_geba [_dcefg ];_abeg ,_dfdb :=_bfdc [_ebdc ];
|
||
if !_dfdb {break ;};if _abeg .Urx !=_gaaa .Urx {break ;};_gaaa ._aeafd =_abeg ._aeafd ;_gaaa .Lly =_abeg .Lly ;if _ecbd {_bc .Printf ("\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_abeg .String (),_gaaa .String ());
|
||
};_agfd [_abeg .Lly ][_abeg .Llx ]=struct{}{};};if _gbfbe ==0{_gaaa ._aeafd =true ;};if _gaaa .complete (){_egdg [_egdd ][_ebdc ]=_gaaa ;};};};_acge :=gridTiling {PdfRectangle :_fdfba ,_abdf :_aabf (_egdg ),_bgcf :_bgcg (_egdg ),_bfbgd :_egdg };_acge .log ("\u0043r\u0065\u0061\u0074\u0065\u0064");
|
||
return _acge ;};func (_fbabc *textWord )toTextMarks (_gdbbe *int )[]TextMark {var _fgged []TextMark ;for _ ,_fdgc :=range _fbabc ._badb {_fgged =_cbba (_fgged ,_gdbbe ,_fdgc .ToTextMark ());};return _fgged ;};func (_eabb *textLine )bbox ()_eb .PdfRectangle {return _eabb .PdfRectangle };
|
||
func (_cbcbf *textTable )get (_feab ,_bcbg int )*textPara {return _cbcbf ._ecac [_fcbc (_feab ,_bcbg )]};func (_defa paraList )yNeighbours (_ffec float64 )map[*textPara ][]int {_aaff :=make ([]event ,2*len (_defa ));if _ffec ==0{for _daefa ,_fgbab :=range _defa {_aaff [2*_daefa ]=event {_fgbab .Lly ,true ,_daefa };
|
||
_aaff [2*_daefa +1]=event {_fgbab .Ury ,false ,_daefa };};}else {for _cbbd ,_becee :=range _defa {_aaff [2*_cbbd ]=event {_becee .Lly -_ffec *_becee .fontsize (),true ,_cbbd };_aaff [2*_cbbd +1]=event {_becee .Ury +_ffec *_becee .fontsize (),false ,_cbbd };
|
||
};};return _defa .eventNeighbours (_aaff );};func (_afcd *textPara )toCellTextMarks (_dgccg *int )[]TextMark {var _fged []TextMark ;for _gdfg ,_cdbga :=range _afcd ._ccaa {_decdb :=_cdbga .toTextMarks (_dgccg );_daed :=_fcca &&_cdbga .endsInHyphen ()&&_gdfg !=len (_afcd ._ccaa )-1;
|
||
if _daed {_decdb =_fecc (_decdb ,_dgccg );};_fged =append (_fged ,_decdb ...);if !(_daed ||_gdfg ==len (_afcd ._ccaa )-1){_fged =_deef (_fged ,_dgccg ,_abfb (_cdbga ._eggg ,_afcd ._ccaa [_gdfg +1]._eggg ));};};return _fged ;};func (_egdb rulingList )primaries ()[]float64 {_bfee :=make (map[float64 ]struct{},len (_egdb ));
|
||
for _ ,_eefg :=range _egdb {_bfee [_eefg ._cda ]=struct{}{};};_aacc :=make ([]float64 ,len (_bfee ));_edaf :=0;for _dgef :=range _bfee {_aacc [_edaf ]=_dgef ;_edaf ++;};_b .Float64s (_aacc );return _aacc ;};
|
||
|
||
// ExtractPageImages returns the image contents of the page extractor, including data
|
||
// and position, size information for each image.
|
||
// A set of options to control page image extraction can be passed in. The options
|
||
// parameter can be nil for the default options. By default, inline stencil masks
|
||
// are not extracted.
|
||
func (_fb *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_da :=&imageExtractContext {_bgb :options };_dc :=_da .extractContentStreamImages (_fb ._ag ,_fb ._bca );if _dc !=nil {return nil ,_dc ;};return &PageImages {Images :_da ._abc },nil ;
|
||
}; |