mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-26 13:48:55 +08:00
863 lines
188 KiB
Go
863 lines
188 KiB
Go
//
|
||
// Copyright 2020 FoxyUtils ehf. All rights reserved.
|
||
//
|
||
// This is a commercial product and requires a license to operate.
|
||
// A trial license can be obtained at https://unidoc.io
|
||
//
|
||
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
|
||
//
|
||
// Use of this source code is governed by the UniDoc End User License Agreement
|
||
// terms that can be accessed at https://unidoc.io/eula/
|
||
|
||
//
|
||
// Package extractor is used for quickly extracting PDF content through a simple interface.
|
||
// Currently offers functionality for extracting textual content.
|
||
//
|
||
package extractor ;import (_f "bytes";_g "errors";_caa "fmt";_d "github.com/unidoc/unipdf/v3/common";_gcf "github.com/unidoc/unipdf/v3/contentstream";_aa "github.com/unidoc/unipdf/v3/core";_gd "github.com/unidoc/unipdf/v3/internal/license";_ad "github.com/unidoc/unipdf/v3/internal/textencoding";
|
||
_bab "github.com/unidoc/unipdf/v3/internal/transform";_dc "github.com/unidoc/unipdf/v3/model";_ac "golang.org/x/text/unicode/norm";_af "golang.org/x/xerrors";_ba "image/color";_gc "io";_ca "math";_c "regexp";_e "sort";_ce "strings";_bb "unicode";_b "unicode/utf8";
|
||
);var _aefa =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_ba .White ,StrokeColor :_ba .White };func (_ecbf gridTile )numBorders ()int {_ecge :=0;if _ecbf ._gcfbe {_ecge ++;};if _ecbf ._ebbb {_ecge ++;};if _ecbf ._ecbgb {_ecge ++;
|
||
};if _ecbf ._faaa {_ecge ++;};return _ecge ;};func (_afcb rulingList )log (_fade string ){if !_cage {return ;};_d .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_fade ,_afcb .String ());for _abdae ,_dgebc :=range _afcb {_caa .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_abdae ,_dgebc .String ());
|
||
};};func (_fbfe *textTable )computeBbox ()_dc .PdfRectangle {var _ebfa _dc .PdfRectangle ;_fbgb :=false ;for _dcaeb :=0;_dcaeb < _fbfe ._fcedd ;_dcaeb ++{for _dacfe :=0;_dacfe < _fbfe ._bgdee ;_dacfe ++{_gddad :=_fbfe .get (_dacfe ,_dcaeb );if _gddad ==nil {continue ;
|
||
};if !_fbgb {_ebfa =_gddad .PdfRectangle ;_fbgb =true ;}else {_ebfa =_abga (_ebfa ,_gddad .PdfRectangle );};};};return _ebfa ;};
|
||
|
||
// Extractor stores and offers functionality for extracting content from PDF pages.
|
||
type Extractor struct{_aaf string ;_gdc *_dc .PdfPageResources ;_cbb _dc .PdfRectangle ;_aae *_dc .PdfRectangle ;_ab map[string ]fontEntry ;_bf map[string ]textResult ;_abd int64 ;_ff int ;_da *Options ;};type markKind int ;func (_aca *textPara )depth ()float64 {if _aca ._bedda {return -1.0;
|
||
};if len (_aca ._ddaf )> 0{return _aca ._ddaf [0]._gddd ;};return _aca ._affa .depth ();};func (_acgf compositeCell )split (_gffbc ,_bgagd []float64 )*textTable {_eeeg :=len (_gffbc )+1;_eegdcf :=len (_bgagd )+1;if _eea {_d .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066",_eegdcf ,_eeeg ,_acgf ,_gffbc ,_bgagd );
|
||
_caa .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a",len (_acgf .paraList ));for _dagf ,_bbdg :=range _acgf .paraList {_caa .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dagf ,_bbdg .String ());};
|
||
_caa .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",len (_acgf .lines ()));for _beaa ,_gded :=range _acgf .lines (){_caa .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_beaa ,_gded );};};_gffbc =_aebga (_gffbc ,_acgf .Ury ,_acgf .Lly );
|
||
_bgagd =_aebga (_bgagd ,_acgf .Llx ,_acgf .Urx );_bfcb :=make (map[uint64 ]*textPara ,_eegdcf *_eeeg );_bede :=textTable {_bgdee :_eegdcf ,_fcedd :_eeeg ,_fddab :_bfcb };_acgc :=_acgf .paraList ;_e .Slice (_acgc ,func (_fbbbd ,_bcdgb int )bool {_abceg ,_eaeg :=_acgc [_fbbbd ],_acgc [_bcdgb ];
|
||
_bbfb ,_daaa :=_abceg .Lly ,_eaeg .Lly ;if _bbfb !=_daaa {return _bbfb < _daaa ;};return _abceg .Llx < _eaeg .Llx ;});_dfab :=make (map[uint64 ]_dc .PdfRectangle ,_eegdcf *_eeeg );for _fdff ,_afcda :=range _gffbc [1:]{_cddb :=_gffbc [_fdff ];for _faba ,_fggda :=range _bgagd [1:]{_eeef :=_bgagd [_faba ];
|
||
_dfab [_abdec (_faba ,_fdff )]=_dc .PdfRectangle {Llx :_eeef ,Urx :_fggda ,Lly :_afcda ,Ury :_cddb };};};if _eea {_d .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073");
|
||
_caa .Printf ("\u0020\u0020\u0020\u0020");for _cfed :=0;_cfed < _eegdcf ;_cfed ++{_caa .Printf ("\u0025\u0033\u0030\u0064\u002c\u0020",_cfed );};_caa .Println ();for _ageag :=0;_ageag < _eeeg ;_ageag ++{_caa .Printf ("\u0020\u0020\u0025\u0032\u0064\u003a",_ageag );
|
||
for _bfca :=0;_bfca < _eegdcf ;_bfca ++{_caa .Printf ("\u00256\u002e\u0032\u0066\u002c\u0020",_dfab [_abdec (_bfca ,_ageag )]);};_caa .Println ();};};_afad :=func (_febaa *textLine )(int ,int ){for _ffde :=0;_ffde < _eeeg ;_ffde ++{for _efcff :=0;_efcff < _eegdcf ;
|
||
_efcff ++{if _gcdf (_dfab [_abdec (_efcff ,_ffde )],_febaa .PdfRectangle ){return _efcff ,_ffde ;};};};return -1,-1;};_ggdg :=make (map[uint64 ][]*textLine ,_eegdcf *_eeeg );for _ ,_ffeg :=range _acgc .lines (){_ggcc ,_aeed :=_afad (_ffeg );if _ggcc < 0{continue ;
|
||
};_ggdg [_abdec (_ggcc ,_aeed )]=append (_ggdg [_abdec (_ggcc ,_aeed )],_ffeg );};for _eadc :=0;_eadc < len (_gffbc )-1;_eadc ++{_ffee :=_gffbc [_eadc ];_fcaac :=_gffbc [_eadc +1];for _fffd :=0;_fffd < len (_bgagd )-1;_fffd ++{_adda :=_bgagd [_fffd ];_fbbbg :=_bgagd [_fffd +1];
|
||
_bgaaf :=_dc .PdfRectangle {Llx :_adda ,Urx :_fbbbg ,Lly :_fcaac ,Ury :_ffee };_dgbe :=_ggdg [_abdec (_fffd ,_eadc )];if len (_dgbe )==0{continue ;};_agad :=_dcfc (_bgaaf ,_dgbe );_bede .put (_fffd ,_eadc ,_agad );};};return &_bede ;};func (_agfc *textTable )logComposite (_ggaab string ){if !_eea {return ;
|
||
};_d .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_agfc ._bgdee ,_agfc ._fcedd ,_ggaab );_caa .Printf ("\u0025\u0035\u0073 \u007c","");for _gdgg :=0;_gdgg < _agfc ._bgdee ;_gdgg ++{_caa .Printf ("\u0025\u0033\u0064 \u007c",_gdgg );
|
||
};_caa .Println ("");_caa .Printf ("\u0025\u0035\u0073 \u002b","");for _fdeg :=0;_fdeg < _agfc ._bgdee ;_fdeg ++{_caa .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d");};_caa .Println ("");for _bcag :=0;_bcag < _agfc ._fcedd ;_bcag ++{_caa .Printf ("\u0025\u0035\u0064 \u007c",_bcag );
|
||
for _ffaa :=0;_ffaa < _agfc ._bgdee ;_ffaa ++{_eedf ,_ :=_agfc ._fgge [_abdec (_ffaa ,_bcag )].parasBBox ();_caa .Printf ("\u0025\u0033\u0064 \u007c",len (_eedf ));};_caa .Println ("");};_d .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_agfc ._bgdee ,_agfc ._fcedd ,_ggaab );
|
||
_caa .Printf ("\u0025\u0035\u0073 \u007c","");for _eeaag :=0;_eeaag < _agfc ._bgdee ;_eeaag ++{_caa .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_eeaag );};_caa .Println ("");_caa .Printf ("\u0025\u0035\u0073 \u002b","");for _ggaff :=0;_ggaff < _agfc ._bgdee ;
|
||
_ggaff ++{_caa .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");};_caa .Println ("");for _adfde :=0;_adfde < _agfc ._fcedd ;_adfde ++{_caa .Printf ("\u0025\u0035\u0064 \u007c",_adfde );for _afgd :=0;_afgd < _agfc ._bgdee ;
|
||
_afgd ++{_ggfde ,_ :=_agfc ._fgge [_abdec (_afgd ,_adfde )].parasBBox ();_cafg :="";_edfed :=_ggfde .merge ();if _edfed !=nil {_cafg =_edfed .text ();};_cafg =_caa .Sprintf ("\u0025\u0071",_ebfce (_cafg ,12));_cafg =_cafg [1:len (_cafg )-1];_caa .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_cafg );
|
||
};_caa .Println ("");};};func (_decb *shapesState )stroke (_cgcad *[]pathSection ){_cea :=pathSection {_bffc :_decb ._bfg ,Color :_decb ._degb .getStrokeColor ()};*_cgcad =append (*_cgcad ,_cea );if _cage {_caa .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_cgcad ),_decb ,_decb ._degb .getStrokeColor (),_cea .bbox ());
|
||
if _eabg {for _ecc ,_cdge :=range _decb ._bfg {_caa .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_ecc ,_cdge );if _ecc ==10{break ;};};};};};func (_gbbe paraList )findTextTables ()[]*textTable {var _gfdg []*textTable ;for _ ,_deeeb :=range _gbbe {if _deeeb .taken ()||_deeeb .Width ()==0{continue ;
|
||
};_aaeag :=_deeeb .isAtom ();if _aaeag ==nil {continue ;};_aaeag .growTable ();if _aaeag ._bgdee *_aaeag ._fcedd < _baeag {continue ;};_aaeag .markCells ();_aaeag .log ("\u0067\u0072\u006fw\u006e");_gfdg =append (_gfdg ,_aaeag );};return _gfdg ;};func (_afe *shapesState )quadraticTo (_dbgf ,_bcb ,_fddb ,_edfc float64 ){if _fgeac {_d .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a");
|
||
};_afe .addPoint (_fddb ,_edfc );};func (_dba *shapesState )clearPath (){_dba ._bfg =nil ;_dba ._dcgc =false ;if _fgeac {_d .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_dba );};};func (_ggdd paraList )sortReadingOrder (){_d .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_ggdd ));
|
||
if len (_ggdd )<=1{return ;};_ggdd .computeEBBoxes ();_e .Slice (_ggdd ,func (_decf ,_bgeea int )bool {return _aecf (_ggdd [_decf ],_ggdd [_bgeea ])<=0});_geea :=_ggdd .topoOrder ();_ggdd .reorder (_geea );};func (_eddd *textObject )newTextMark (_dcaf string ,_fgdbg _bab .Matrix ,_bbcf _bab .Point ,_cecf float64 ,_abdd *_dc .PdfFont ,_fabe float64 ,_bcca ,_bggc _ba .Color ,_afggc _aa .PdfObject ,_egfe []string ,_ecbb int )(textMark ,bool ){_gedfb :=_fgdbg .Angle ();
|
||
_acec :=_cbbb (_gedfb ,_aaaa );var _edgf float64 ;if _acec %180!=90{_edgf =_fgdbg .ScalingFactorY ();}else {_edgf =_fgdbg .ScalingFactorX ();};_egcee :=_fdacf (_fgdbg );_cccf :=_dc .PdfRectangle {Llx :_egcee .X ,Lly :_egcee .Y ,Urx :_bbcf .X ,Ury :_bbcf .Y };
|
||
switch _acec %360{case 90:_cccf .Urx -=_edgf ;case 180:_cccf .Ury -=_edgf ;case 270:_cccf .Urx +=_edgf ;case 0:_cccf .Ury +=_edgf ;default:_acec =0;_cccf .Ury +=_edgf ;};if _cccf .Llx > _cccf .Urx {_cccf .Llx ,_cccf .Urx =_cccf .Urx ,_cccf .Llx ;};if _cccf .Lly > _cccf .Ury {_cccf .Lly ,_cccf .Ury =_cccf .Ury ,_cccf .Lly ;
|
||
};_bacbg :=true ;if _eddd ._ebe ._cbb .Width ()> 0{_gfabb ,_fabf :=_cedb (_cccf ,_eddd ._ebe ._cbb );if !_fabf {_bacbg =false ;_d .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_cccf ,_eddd ._ebe ._cbb ,_dcaf );
|
||
};_cccf =_gfabb ;};_ccab :=_cccf ;_adee :=_eddd ._ebe ._cbb ;switch _acec %360{case 90:_adee .Urx ,_adee .Ury =_adee .Ury ,_adee .Urx ;_ccab =_dc .PdfRectangle {Llx :_adee .Urx -_cccf .Ury ,Urx :_adee .Urx -_cccf .Lly ,Lly :_cccf .Llx ,Ury :_cccf .Urx };
|
||
case 180:_ccab =_dc .PdfRectangle {Llx :_adee .Urx -_cccf .Llx ,Urx :_adee .Urx -_cccf .Urx ,Lly :_adee .Ury -_cccf .Lly ,Ury :_adee .Ury -_cccf .Ury };case 270:_adee .Urx ,_adee .Ury =_adee .Ury ,_adee .Urx ;_ccab =_dc .PdfRectangle {Llx :_cccf .Ury ,Urx :_cccf .Lly ,Lly :_adee .Ury -_cccf .Llx ,Ury :_adee .Ury -_cccf .Urx };
|
||
};if _ccab .Llx > _ccab .Urx {_ccab .Llx ,_ccab .Urx =_ccab .Urx ,_ccab .Llx ;};if _ccab .Lly > _ccab .Ury {_ccab .Lly ,_ccab .Ury =_ccab .Ury ,_ccab .Lly ;};_edb :=textMark {_cbge :_dcaf ,PdfRectangle :_ccab ,_gde :_cccf ,_eead :_abdd ,_beaf :_edgf ,_bddca :_fabe ,_adcg :_fgdbg ,_ffb :_bbcf ,_gcce :_acec ,_fgeee :_bcca ,_cab :_bggc ,_abda :_afggc ,_bfga :_egfe ,Th :_eddd ._cgf ._abbd ,Tw :_eddd ._cgf ._fgd ,_gffe :_ecbb };
|
||
if _agb {_d .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_egcee ,_bbcf ,_edb .String ());};return _edb ,_bacbg ;
|
||
};func _fbag (_cdd *Extractor ,_agef *_dc .PdfPageResources ,_gab _gcf .GraphicsState ,_afga *textState ,_fceb *stateStack )*textObject {return &textObject {_ebe :_cdd ,_eff :_agef ,_fdf :_gab ,_cdbd :_fceb ,_cgf :_afga ,_aafd :_bab .IdentityMatrix (),_acd :_bab .IdentityMatrix ()};
|
||
};func _caca (_fcbf float64 )float64 {return _bacb *_ca .Round (_fcbf /_bacb )};func (_bbcb paraList )xNeighbours (_abeb float64 )map[*textPara ][]int {_afcee :=make ([]event ,2*len (_bbcb ));if _abeb ==0{for _aedd ,_eeca :=range _bbcb {_afcee [2*_aedd ]=event {_eeca .Llx ,true ,_aedd };
|
||
_afcee [2*_aedd +1]=event {_eeca .Urx ,false ,_aedd };};}else {for _fbdde ,_fbcgbd :=range _bbcb {_afcee [2*_fbdde ]=event {_fbcgbd .Llx -_abeb *_fbcgbd .fontsize (),true ,_fbdde };_afcee [2*_fbdde +1]=event {_fbcgbd .Urx +_abeb *_fbcgbd .fontsize (),false ,_fbdde };
|
||
};};return _bbcb .eventNeighbours (_afcee );};func (_babfc *wordBag )applyRemovals (_aega map[int ]map[*textWord ]struct{}){for _cacea ,_bcbf :=range _aega {if len (_bcbf )==0{continue ;};_abea :=_babfc ._adcb [_cacea ];_fab :=len (_abea )-len (_bcbf );
|
||
if _fab ==0{delete (_babfc ._adcb ,_cacea );continue ;};_aadd :=make ([]*textWord ,_fab );_bcbfg :=0;for _ ,_fdbcb :=range _abea {if _ ,_ebdg :=_bcbf [_fdbcb ];!_ebdg {_aadd [_bcbfg ]=_fdbcb ;_bcbfg ++;};};_babfc ._adcb [_cacea ]=_aadd ;};};func (_gafa *textTable )put (_ebbbc ,_adfg int ,_dadf *textPara ){_gafa ._fddab [_abdec (_ebbbc ,_adfg )]=_dadf ;
|
||
};func (_eegc *textWord )absorb (_gacf *textWord ){_eegc .PdfRectangle =_abga (_eegc .PdfRectangle ,_gacf .PdfRectangle );_eegc ._gbaed =append (_eegc ._gbaed ,_gacf ._gbaed ...);};func (_efb *textObject )setCharSpacing (_ddd float64 ){if _efb ==nil {return ;
|
||
};_efb ._cgf ._cace =_ddd ;if _gfaee {_d .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_ddd ,_efb ._cgf .String ());};};func _ebfce (_fccff string ,_caeg int )string {if len (_fccff )< _caeg {return _fccff ;
|
||
};return _fccff [:_caeg ];};func (_acfg *compositeCell )updateBBox (){for _ ,_abddc :=range _acfg .paraList {_acfg .PdfRectangle =_abga (_acfg .PdfRectangle ,_abddc .PdfRectangle );};};func _fgbcc (_cdbf ,_gfbfa _bab .Point )rulingKind {_ffgf :=_ca .Abs (_cdbf .X -_gfbfa .X );
|
||
_ebdd :=_ca .Abs (_cdbf .Y -_gfbfa .Y );return _afgac (_ffgf ,_ebdd ,_dagaf );};type ruling struct{_fabfb rulingKind ;_fcgb markKind ;_ba .Color ;_efbdg float64 ;_becdd float64 ;_aggb float64 ;_gebag float64 ;};func _aecf (_cgfb ,_ffdg bounded )float64 {_fdge :=_aaed (_cgfb ,_ffdg );
|
||
if !_edeg (_fdge ){return _fdge ;};return _deba (_cgfb ,_ffdg );};func (_aba *PageFonts )extractPageResourcesToFont (_ge *_dc .PdfPageResources )error {_adg ,_dd :=_aa .GetDict (_ge .Font );if !_dd {return _g .New (_bbf );};for _ ,_eef :=range _adg .Keys (){var (_eac =true ;
|
||
_edfb []byte ;_bdg string ;);_baf ,_ada :=_ge .GetFontByName (_eef );if !_ada {return _g .New (_dac );};_gb ,_gf :=_dc .NewPdfFontFromPdfObject (_baf );if _gf !=nil {return _gf ;};_abag :=_gb .FontDescriptor ();_gg :=_gb .FontDescriptor ().FontName .String ();
|
||
_cd :=_gb .Subtype ();if _dca (_aba .Fonts ,_gg ){continue ;};if len (_gb .ToUnicode ())==0{_eac =false ;};if _abag .FontFile !=nil {if _de ,_agg :=_aa .GetStream (_abag .FontFile );_agg {_edfb ,_gf =_aa .DecodeStream (_de );if _gf !=nil {return _gf ;};
|
||
_bdg =_gg +"\u002e\u0070\u0066\u0062";};}else if _abag .FontFile2 !=nil {if _bac ,_fb :=_aa .GetStream (_abag .FontFile2 );_fb {_edfb ,_gf =_aa .DecodeStream (_bac );if _gf !=nil {return _gf ;};_bdg =_gg +"\u002e\u0074\u0074\u0066";};}else if _abag .FontFile3 !=nil {if _fc ,_fg :=_aa .GetStream (_abag .FontFile3 );
|
||
_fg {_edfb ,_gf =_aa .DecodeStream (_fc );if _gf !=nil {return _gf ;};_bdg =_gg +"\u002e\u0063\u0066\u0066";};};if len (_bdg )< 1{_d .Log .Debug (_eg );};_dda :=Font {FontName :_gg ,PdfFont :_gb ,IsCID :_gb .IsCID (),IsSimple :_gb .IsSimple (),ToUnicode :_eac ,FontType :_cd ,FontData :_edfb ,FontFileName :_bdg ,FontDescriptor :_abag };
|
||
_aba .Fonts =append (_aba .Fonts ,_dda );};return nil ;};func (_dgea *textLine )toTextMarks (_dafg *int )[]TextMark {var _eeff []TextMark ;for _ ,_gdcgf :=range _dgea ._ccfce {if _gdcgf ._gabe {_eeff =_egag (_eeff ,_dafg ,"\u0020");};_abdcf :=_gdcgf .toTextMarks (_dafg );
|
||
_eeff =append (_eeff ,_abdcf ...);};return _eeff ;};
|
||
|
||
// ToText returns the page text as a single string.
|
||
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
|
||
// Text() instead.
|
||
func (_dfca PageText )ToText ()string {return _dfca .Text ()};func (_dcdd *stateStack )size ()int {return len (*_dcdd )};func (_aafe rulingList )toTilings ()(rulingList ,[]gridTiling ){_aafe .log ("\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s");if len (_aafe )==0{return nil ,nil ;
|
||
};_aafe =_aafe .tidied ("\u0061\u006c\u006c");_aafe .log ("\u0074\u0069\u0064\u0069\u0065\u0064");_acgfe :=_aafe .toGrids ();_fefb :=make ([]gridTiling ,len (_acgfe ));for _gbadg ,_eaba :=range _acgfe {_fefb [_gbadg ]=_eaba .asTiling ();};return _aafe ,_fefb ;
|
||
};func (_fbg *PageText )computeViews (){var _gefg rulingList ;if _efg {_aaa :=_gfge (_fbg ._ggeb );_gefg =append (_gefg ,_aaa ...);};if _abcf {_dbbc :=_cega (_fbg ._agc );_gefg =append (_gefg ,_dbbc ...);};_gefg ,_ecg :=_gefg .toTilings ();var _dbcf paraList ;
|
||
_cede :=len (_fbg ._gfc );for _dfge :=0;_dfge < 360&&_cede > 0;_dfge +=90{_dcfg :=make ([]*textMark ,0,len (_fbg ._gfc )-_cede );for _ ,_dgeb :=range _fbg ._gfc {if _dgeb ._gcce ==_dfge {_dcfg =append (_dcfg ,_dgeb );};};if len (_dcfg )> 0{_efba :=_fdef (_dcfg ,_fbg ._fdbe ,_gefg ,_ecg );
|
||
_dbcf =append (_dbcf ,_efba ...);_cede -=len (_dcfg );};};_cecgb :=new (_f .Buffer );_dbcf .writeText (_cecgb );_fbg ._dbdg =_cecgb .String ();_fbg ._efda =_dbcf .toTextMarks ();_fbg ._fdc =_dbcf .tables ();if _eea {_d .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_fbg ._fdc ));
|
||
};};type textWord struct{_dc .PdfRectangle ;_cffg float64 ;_bgdg string ;_gbaed []*textMark ;_debab float64 ;_gabe bool ;};
|
||
|
||
// String returns a description of `t`.
|
||
func (_egdbe *textTable )String ()string {return _caa .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_egdbe ._bgdee ,_egdbe ._fcedd ,_egdbe ._bebc );};func (_gcbc *shapesState )devicePoint (_fgee ,_gbgg float64 )_bab .Point {_dea :=_gcbc ._gcbe .Mult (_gcbc ._eabb );
|
||
_fgee ,_gbgg =_dea .Transform (_fgee ,_gbgg );return _bab .NewPoint (_fgee ,_gbgg );};func (_afdf *textObject )showTextAdjusted (_ddfc *_aa .PdfObjectArray )error {_cecg :=false ;for _ ,_baba :=range _ddfc .Elements (){switch _baba .(type ){case *_aa .PdfObjectFloat ,*_aa .PdfObjectInteger :_bff ,_daac :=_aa .GetNumberAsFloat (_baba );
|
||
if _daac !=nil {_d .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_baba ,_ddfc );
|
||
return _daac ;};_cceb ,_fbfd :=-_bff *0.001*_afdf ._cgf ._fdd ,0.0;if _cecg {_fbfd ,_cceb =_cceb ,_fbfd ;};_fca :=_aec (_bab .Point {X :_cceb ,Y :_fbfd });_afdf ._aafd .Concat (_fca );case *_aa .PdfObjectString :_dbc :=_aa .TraceToDirectObject (_baba );
|
||
_eaa ,_befd :=_aa .GetStringBytes (_dbc );if !_befd {_d .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_baba ,_ddfc );
|
||
return _aa .ErrTypeError ;};_afdf .renderText (_dbc ,_eaa );default:_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_baba ,_ddfc );
|
||
return _aa .ErrTypeError ;};};return nil ;};func _badg (_bfbcc _bab .Point )*subpath {return &subpath {_bcbc :[]_bab .Point {_bfbcc }}};func _cabb (_bdcf ,_dgcab _bab .Point ,_bcaa _ba .Color )(*ruling ,bool ){_fcee :=lineRuling {_becgc :_bdcf ,_facd :_dgcab ,_gfdf :_fgbcc (_bdcf ,_dgcab ),Color :_bcaa };
|
||
if _fcee ._gfdf ==_aebb {return nil ,false ;};return _fcee .asRuling ();};func _gdfc (_bedf float64 )bool {return _ca .Abs (_bedf )< _geff };func _dbef (_gfcc _dc .PdfRectangle )*ruling {return &ruling {_fabfb :_bcaef ,_efbdg :_gfcc .Urx ,_becdd :_gfcc .Lly ,_aggb :_gfcc .Ury };
|
||
};func _gcdf (_cedf ,_ffcc _dc .PdfRectangle )bool {return _cedf .Llx <=_ffcc .Llx &&_ffcc .Urx <=_cedf .Urx &&_cedf .Lly <=_ffcc .Lly &&_ffcc .Ury <=_cedf .Ury ;};func (_cgdd *ruling )encloses (_abcfb ,_afgf float64 )bool {return _cgdd ._becdd -_aecg <=_abcfb &&_afgf <=_cgdd ._aggb +_aecg ;
|
||
};func (_fbbge rulingList )blocks (_egecg ,_ccgf *ruling )bool {if _egecg ._becdd > _ccgf ._aggb ||_ccgf ._becdd > _egecg ._aggb {return false ;};_cacef :=_ca .Max (_egecg ._becdd ,_ccgf ._becdd );_babaf :=_ca .Min (_egecg ._aggb ,_ccgf ._aggb );if _egecg ._efbdg > _ccgf ._efbdg {_egecg ,_ccgf =_ccgf ,_egecg ;
|
||
};for _ ,_gead :=range _fbbge {if _egecg ._efbdg <=_gead ._efbdg +_geff &&_gead ._efbdg <=_ccgf ._efbdg +_geff &&_gead ._becdd <=_babaf &&_cacef <=_gead ._aggb {return true ;};};return false ;};type stateStack []*textState ;func (_abb *stateStack )push (_dfb *textState ){_beg :=*_dfb ;
|
||
*_abb =append (*_abb ,&_beg )};func (_fddde gridTile )complete ()bool {return _fddde .numBorders ()==4};func (_bggff paraList )tables ()[]TextTable {var _ebbd []TextTable ;if _eea {_d .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a");
|
||
};for _ ,_gdgf :=range _bggff {_gbeg :=_gdgf ._affa ;if _gbeg !=nil &&_gbeg .isExportable (){_ebbd =append (_ebbd ,_gbeg .toTextTable ());};};return _ebbd ;};func _cadb (_dedb ,_gdce ,_gbaaf ,_bdcag *textPara )*textTable {_fgdbdb :=&textTable {_bgdee :2,_fcedd :2,_fddab :make (map[uint64 ]*textPara ,4)};
|
||
_fgdbdb .put (0,0,_dedb );_fgdbdb .put (1,0,_gdce );_fgdbdb .put (0,1,_gbaaf );_fgdbdb .put (1,1,_bdcag );return _fgdbdb ;};func (_cbde *textObject )getFontDirect (_gbfd string )(*_dc .PdfFont ,error ){_ggdb ,_ebcg :=_cbde .getFontDict (_gbfd );if _ebcg !=nil {return nil ,_ebcg ;
|
||
};_fef ,_ebcg :=_dc .NewPdfFontFromPdfObject (_ggdb );if _ebcg !=nil {_d .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gbfd ,_ebcg );
|
||
};return _fef ,_ebcg ;};func (_dcba *textTable )reduce ()*textTable {_gfacb :=make ([]int ,0,_dcba ._fcedd );_eeadc :=make ([]int ,0,_dcba ._bgdee );for _dcbg :=0;_dcbg < _dcba ._fcedd ;_dcbg ++{if !_dcba .emptyCompositeRow (_dcbg ){_gfacb =append (_gfacb ,_dcbg );
|
||
};};for _gaecf :=0;_gaecf < _dcba ._bgdee ;_gaecf ++{if !_dcba .emptyCompositeColumn (_gaecf ){_eeadc =append (_eeadc ,_gaecf );};};if len (_gfacb )==_dcba ._fcedd &&len (_eeadc )==_dcba ._bgdee {return _dcba ;};_ddcag :=textTable {_bebc :_dcba ._bebc ,_bgdee :len (_eeadc ),_fcedd :len (_gfacb ),_fddab :make (map[uint64 ]*textPara ,len (_eeadc )*len (_gfacb ))};
|
||
if _eea {_d .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_dcba ._bgdee ,_dcba ._fcedd ,len (_eeadc ),len (_gfacb ));_d .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_eeadc );
|
||
_d .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_gfacb );};for _ecdb ,_afdbc :=range _gfacb {for _baeee ,_fabg :=range _eeadc {_cccb ,_ecfga :=_dcba .getComposite (_fabg ,_afdbc );if _cccb ==nil {continue ;
|
||
};if _eea {_caa .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_baeee ,_ecdb ,_fabg ,_afdbc ,_ebfce (_cccb .merge ().text (),50));};_ddcag .putComposite (_baeee ,_ecdb ,_cccb ,_ecfga );
|
||
};};return &_ddcag ;};const _gdcag =1.0/1000.0;func (_abfb *textLine )endsInHyphen ()bool {_eegdc :=_abfb ._ccfce [len (_abfb ._ccfce )-1];_dbgb :=_eegdc ._bgdg ;_dfbdb ,_geba :=_b .DecodeLastRuneInString (_dbgb );if _geba <=0||!_bb .Is (_bb .Hyphen ,_dfbdb ){return false ;
|
||
};if _eegdc ._gabe &&_dgcf (_dbgb ){return true ;};return _dgcf (_abfb .text ());};func _dbcfe (_cgce []*textWord ,_abce float64 ,_gga ,_gcd rulingList )*wordBag {_eagc :=_afccc (_cgce [0],_abce ,_gga ,_gcd );for _ ,_dede :=range _cgce [1:]{_gcab :=_ebcf (_dede ._cffg );
|
||
_eagc ._adcb [_gcab ]=append (_eagc ._adcb [_gcab ],_dede );_eagc .PdfRectangle =_abga (_eagc .PdfRectangle ,_dede .PdfRectangle );};_eagc .sort ();return _eagc ;};
|
||
|
||
// TableCell is a cell in a TextTable.
|
||
type TableCell struct{
|
||
|
||
// Text is the extracted text.
|
||
Text string ;
|
||
|
||
// Marks returns the TextMarks corresponding to the text in Text.
|
||
Marks TextMarkArray ;};func (_defc intSet )has (_bbfdce int )bool {_ ,_ecgd :=_defc [_bbfdce ];return _ecgd };
|
||
|
||
// ExtractText processes and extracts all text data in content streams and returns as a string.
|
||
// It takes into account character encodings in the PDF file, which are decoded by
|
||
// CharcodeBytesToUnicode.
|
||
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
|
||
func (_bag *Extractor )ExtractText ()(string ,error ){_gdca ,_ ,_ ,_cbaa :=_bag .ExtractTextWithStats ();return _gdca ,_cbaa ;};
|
||
|
||
// New returns an Extractor instance for extracting content from the input PDF page.
|
||
func New (page *_dc .PdfPage )(*Extractor ,error ){return NewWithOptions (page ,nil )};type gridTiling struct{_dc .PdfRectangle ;_gggb []float64 ;_gceb []float64 ;_begc map[float64 ]map[float64 ]gridTile ;};func (_acfab *textWord )appendMark (_egdgc *textMark ,_eeea _dc .PdfRectangle ){_acfab ._gbaed =append (_acfab ._gbaed ,_egdgc );
|
||
_acfab .PdfRectangle =_abga (_acfab .PdfRectangle ,_egdgc .PdfRectangle );if _egdgc ._beaf > _acfab ._debab {_acfab ._debab =_egdgc ._beaf ;};_acfab ._cffg =_eeea .Ury -_acfab .PdfRectangle .Lly ;};func _dee (_ffge ,_cgfc _dc .PdfRectangle )bool {return _cgfc .Llx <=_ffge .Urx &&_ffge .Llx <=_cgfc .Urx ;
|
||
};func _ebbda (_ddca map[float64 ]map[float64 ]gridTile )[]float64 {_bcgf :=make ([]float64 ,0,len (_ddca ));for _gaad :=range _ddca {_bcgf =append (_bcgf ,_gaad );};_e .Float64s (_bcgf );_caba :=len (_bcgf );for _baeb :=0;_baeb < _caba /2;_baeb ++{_bcgf [_baeb ],_bcgf [_caba -1-_baeb ]=_bcgf [_caba -1-_baeb ],_bcgf [_baeb ];
|
||
};return _bcgf ;};func (_aed *textObject )getFontDict (_babf string )(_bbff _aa .PdfObject ,_gad error ){_dae :=_aed ._eff ;if _dae ==nil {_d .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_babf );
|
||
return nil ,nil ;};_bbff ,_cbca :=_dae .GetFontByName (_aa .PdfObjectName (_babf ));if !_cbca {_d .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_babf );
|
||
return nil ,_g .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _bbff ,nil ;};func (_effb paraList )writeText (_dfda _gc .Writer ){for _afdd ,_bda :=range _effb {if _bda ._bedda {continue ;
|
||
};_bda .writeText (_dfda );if _afdd !=len (_effb )-1{if _adfb (_bda ,_effb [_afdd +1]){_dfda .Write ([]byte ("\u0020"));}else {_dfda .Write ([]byte ("\u000a"));_dfda .Write ([]byte ("\u000a"));};};};_dfda .Write ([]byte ("\u000a"));_dfda .Write ([]byte ("\u000a"));
|
||
};func (_cacb rulingList )snapToGroupsDirection ()rulingList {_cacb .sortStrict ();_cfffb :=make (map[*ruling ]rulingList ,len (_cacb ));_afdc :=_cacb [0];_ccfde :=func (_ffdc *ruling ){_afdc =_ffdc ;_cfffb [_afdc ]=rulingList {_ffdc }};_ccfde (_cacb [0]);
|
||
for _ ,_geada :=range _cacb [1:]{if _geada ._efbdg < _afdc ._efbdg -_agfe {_d .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_afdc ,_geada );
|
||
};if _geada ._efbdg > _afdc ._efbdg +_geff {_ccfde (_geada );}else {_cfffb [_afdc ]=append (_cfffb [_afdc ],_geada );};};_edbb :=make (map[*ruling ]float64 ,len (_cfffb ));_becfg :=make (map[*ruling ]*ruling ,len (_cacb ));for _bged ,_fbddd :=range _cfffb {_edbb [_bged ]=_fbddd .mergePrimary ();
|
||
for _ ,_agae :=range _fbddd {_becfg [_agae ]=_bged ;};};for _ ,_fbdge :=range _cacb {_fbdge ._efbdg =_edbb [_becfg [_fbdge ]];};_cdgg :=make (rulingList ,0,len (_cacb ));for _ ,_gfbde :=range _cfffb {_bcce :=_gfbde .splitSec ();for _fdaa ,_fbfg :=range _bcce {_gffg :=_fbfg .merge ();
|
||
if len (_cdgg )> 0{_dbeae :=_cdgg [len (_cdgg )-1];if _dbeae .alignsPrimary (_gffg )&&_dbeae .alignsSec (_gffg ){_d .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_fdaa ,_dbeae ,_gffg );
|
||
continue ;};};_cdgg =append (_cdgg ,_gffg );};};_cdgg .sortStrict ();return _cdgg ;};type paraList []*textPara ;func (_cce *imageExtractContext )extractContentStreamImages (_cga string ,_gge *_dc .PdfPageResources )error {_acf :=_gcf .NewContentStreamParser (_cga );
|
||
_ef ,_fgc :=_acf .Parse ();if _fgc !=nil {return _fgc ;};if _cce ._egc ==nil {_cce ._egc =map[*_aa .PdfObjectStream ]*cachedImage {};};if _cce ._cbc ==nil {_cce ._cbc =&ImageExtractOptions {};};_dcf :=_gcf .NewContentStreamProcessor (*_ef );_dcf .AddHandler (_gcf .HandlerConditionEnumAllOperands ,"",_cce .processOperand );
|
||
return _dcf .Process (_gge );};func (_ddf *textObject )moveText (_abdbd ,_acfc float64 ){_ddf .moveLP (_abdbd ,_acfc )};
|
||
|
||
// Font represents the font properties on a PDF page.
|
||
type Font struct{PdfFont *_dc .PdfFont ;
|
||
|
||
// FontName represents Font Name from font properties.
|
||
FontName string ;
|
||
|
||
// FontType represents Font Subtype entry in the font dictionary inside page resources.
|
||
// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont.
|
||
FontType string ;
|
||
|
||
// ToUnicode is true if font provides a `ToUnicode` mapping.
|
||
ToUnicode bool ;
|
||
|
||
// IsCID is true if underlying font is a composite font.
|
||
// Composite font is represented by a font dictionary whose Subtype is `Type0`
|
||
IsCID bool ;
|
||
|
||
// IsSimple is true if font is simple font.
|
||
// A simple font is limited to only 8 bit (255) character codes.
|
||
IsSimple bool ;
|
||
|
||
// FontData represents the raw data of the embedded font file.
|
||
// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF).
|
||
// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor.
|
||
// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value.
|
||
FontData []byte ;
|
||
|
||
// FontFileName is a name representing the font. it has format:
|
||
// (Font Name) + (Font Type Extension), example: helvetica.ttf.
|
||
FontFileName string ;
|
||
|
||
// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor).
|
||
FontDescriptor *_dc .PdfFontDescriptor ;};func (_bbfa *subpath )clear (){*_bbfa =subpath {}};func (_cbfe *textMark )bbox ()_dc .PdfRectangle {return _cbfe .PdfRectangle };const (_ggccf markKind =iota ;_bagd ;_gfefd ;_cefcf ;);func (_beca *subpath )makeRectRuling (_bddba _ba .Color )(*ruling ,bool ){if _agca {_d .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_beca );
|
||
};_afba :=_beca ._bcbc [:4];_ccgd :=make (map[int ]rulingKind ,len (_afba ));for _cddcgc ,_gafff :=range _afba {_edef :=_beca ._bcbc [(_cddcgc +1)%4];_ccgd [_cddcgc ]=_gefd (_gafff ,_edef );if _agca {_caa .Printf ("\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066",_cddcgc ,_ccgd [_cddcgc ],_gafff ,_edef );
|
||
};};if _agca {_caa .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_ccgd );};var _cagge ,_dacgg []int ;for _fbcgb ,_cccd :=range _ccgd {switch _cccd {case _gegc :_dacgg =append (_dacgg ,_fbcgb );case _bcaef :_cagge =append (_cagge ,_fbcgb );
|
||
};};if _agca {_caa .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_dacgg ),_dacgg );_caa .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_cagge ),_cagge );
|
||
};_aaeb :=(len (_dacgg )==2&&len (_cagge )==2)||(len (_dacgg )==2&&len (_cagge )==0&&_cacgd (_afba [_dacgg [0]],_afba [_dacgg [1]]))||(len (_cagge )==2&&len (_dacgg )==0&&_addda (_afba [_cagge [0]],_afba [_cagge [1]]));if _agca {_caa .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_dacgg ),len (_cagge ),_aaeb );
|
||
};if !_aaeb {if _agca {_d .Log .Error ("\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v",_beca );_caa .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_dacgg ),len (_cagge ),_aaeb );
|
||
};return &ruling {},false ;};if len (_cagge )==0{for _egdgg ,_gbbb :=range _ccgd {if _gbbb !=_gegc {_cagge =append (_cagge ,_egdgg );};};};if len (_dacgg )==0{for _cefa ,_bfea :=range _ccgd {if _bfea !=_bcaef {_dacgg =append (_dacgg ,_cefa );};};};if _agca {_d .Log .Info ("\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_dacgg ),len (_cagge ),len (_afba ),_dacgg ,_cagge ,_afba );
|
||
};var _dbabf ,_efgd ,_dgfe ,_bgde _bab .Point ;if _afba [_dacgg [0]].Y > _afba [_dacgg [1]].Y {_dgfe ,_bgde =_afba [_dacgg [0]],_afba [_dacgg [1]];}else {_dgfe ,_bgde =_afba [_dacgg [1]],_afba [_dacgg [0]];};if _afba [_cagge [0]].X > _afba [_cagge [1]].X {_dbabf ,_efgd =_afba [_cagge [0]],_afba [_cagge [1]];
|
||
}else {_dbabf ,_efgd =_afba [_cagge [1]],_afba [_cagge [0]];};_aggba :=_dc .PdfRectangle {Llx :_dbabf .X ,Urx :_efgd .X ,Lly :_bgde .Y ,Ury :_dgfe .Y };if _aggba .Llx > _aggba .Urx {_aggba .Llx ,_aggba .Urx =_aggba .Urx ,_aggba .Llx ;};if _aggba .Lly > _aggba .Ury {_aggba .Lly ,_aggba .Ury =_aggba .Ury ,_aggba .Lly ;
|
||
};_gfgf :=rectRuling {PdfRectangle :_aggba ,_fgdfae :_bgeae (_aggba ),Color :_bddba };if _gfgf ._fgdfae ==_aebb {if _agca {_d .Log .Error ("\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c");
|
||
};return nil ,false ;};_bbec ,_edbd :=_gfgf .asRuling ();if !_edbd {if _agca {_d .Log .Error ("\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg");};return nil ,false ;};if _cage {_caa .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_bbec .String ());
|
||
};return _bbec ,true ;};
|
||
|
||
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
|
||
func (_cdf *TextMarkArray )BBox ()(_dc .PdfRectangle ,bool ){var _ccfcd _dc .PdfRectangle ;_edab :=false ;for _ ,_ggb :=range _cdf ._bgbd {if _ggb .Meta ||_fcgba (_ggb .Text ){continue ;};if _edab {_ccfcd =_abga (_ccfcd ,_ggb .BBox );}else {_ccfcd =_ggb .BBox ;
|
||
_edab =true ;};};return _ccfcd ,_edab ;};func (_gafd *wordBag )sort (){for _ ,_bddb :=range _gafd ._adcb {_e .Slice (_bddb ,func (_afdbd ,_daad int )bool {return _deba (_bddb [_afdbd ],_bddb [_daad ])< 0});};};func _edeg (_cebfd float64 )bool {return _ca .Abs (_cebfd )< _agfe };
|
||
func (_feea *wordBag )allWords ()[]*textWord {var _dbdeg []*textWord ;for _ ,_ggaf :=range _feea ._adcb {_dbdeg =append (_dbdeg ,_ggaf ...);};return _dbdeg ;};func (_cdda *wordBag )firstWord (_bgaa int )*textWord {return _cdda ._adcb [_bgaa ][0]};func (_cgcg rulingList )splitSec ()[]rulingList {_e .Slice (_cgcg ,func (_efec ,_gbaeb int )bool {_bfcbf ,_eede :=_cgcg [_efec ],_cgcg [_gbaeb ];
|
||
if _bfcbf ._becdd !=_eede ._becdd {return _bfcbf ._becdd < _eede ._becdd ;};return _bfcbf ._aggb < _eede ._aggb ;});_aeef :=make (map[*ruling ]struct{},len (_cgcg ));_adgg :=func (_dcdge *ruling )rulingList {_fefge :=rulingList {_dcdge };_aeef [_dcdge ]=struct{}{};
|
||
for _ ,_ebbc :=range _cgcg {if _ ,_ecfa :=_aeef [_ebbc ];_ecfa {continue ;};for _ ,_afed :=range _fefge {if _ebbc .alignsSec (_afed ){_fefge =append (_fefge ,_ebbc );_aeef [_ebbc ]=struct{}{};break ;};};};return _fefge ;};_facg :=[]rulingList {_adgg (_cgcg [0])};
|
||
for _ ,_ddac :=range _cgcg [1:]{if _ ,_adffc :=_aeef [_ddac ];_adffc {continue ;};_facg =append (_facg ,_adgg (_ddac ));};return _facg ;};func (_gedc *wordBag )firstReadingIndex (_ceee int )int {_degg :=_gedc .firstWord (_ceee )._debab ;_eecb :=float64 (_ceee +1)*_caaee ;
|
||
_abceb :=_eecb +_ffed *_degg ;_agccd :=_ceee ;for _ ,_gefe :=range _gedc .depthBand (_eecb ,_abceb ){if _deba (_gedc .firstWord (_gefe ),_gedc .firstWord (_agccd ))< 0{_agccd =_gefe ;};};return _agccd ;};func (_ccaa rulingList )intersections ()map[int ]intSet {var _fbef ,_adef []int ;
|
||
for _aacd ,_deee :=range _ccaa {switch _deee ._fabfb {case _bcaef :_fbef =append (_fbef ,_aacd );case _gegc :_adef =append (_adef ,_aacd );};};if len (_fbef )< _cddcc +1||len (_adef )< _eadb +1{return nil ;};if len (_fbef )+len (_adef )> _gddb {_d .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_ccaa ),len (_fbef ),len (_adef ));
|
||
return nil ;};_fedef :=make (map[int ]intSet ,len (_fbef )+len (_adef ));for _ ,_ccea :=range _fbef {for _ ,_geaf :=range _adef {if _ccaa [_ccea ].intersects (_ccaa [_geaf ]){if _ ,_dbcfa :=_fedef [_ccea ];!_dbcfa {_fedef [_ccea ]=make (intSet );};if _ ,_bcef :=_fedef [_geaf ];
|
||
!_bcef {_fedef [_geaf ]=make (intSet );};_fedef [_ccea ].add (_geaf );_fedef [_geaf ].add (_ccea );};};};return _fedef ;};type textLine struct{_dc .PdfRectangle ;_gddd float64 ;_ccfce []*textWord ;_bfcdd float64 ;};func (_gebfe paraList )reorder (_cad []int ){_bfbb :=make (paraList ,len (_gebfe ));
|
||
for _eagb ,_efcf :=range _cad {_bfbb [_eagb ]=_gebfe [_efcf ];};copy (_gebfe ,_bfbb );};
|
||
|
||
// String returns a description of `v`.
|
||
func (_edfd *ruling )String ()string {if _edfd ._fabfb ==_aebb {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_cdecg ,_cddafc :="\u0078","\u0079";if _edfd ._fabfb ==_gegc {_cdecg ,_cddafc ="\u0079","\u0078";};_gbff :="";if _edfd ._gebag !=0.0{_gbff =_caa .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_edfd ._gebag );
|
||
};return _caa .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_edfd ._fabfb ,_cdecg ,_edfd ._efbdg ,_cddafc ,_edfd ._becdd ,_edfd ._aggb ,_edfd ._aggb -_edfd ._becdd ,_edfd ._fcgb ,_edfd .Color ,_gbff );
|
||
};func _ffffg (_feba []int )[]int {_dbeb :=make ([]int ,len (_feba ));for _fdee ,_bedd :=range _feba {_dbeb [len (_feba )-1-_fdee ]=_bedd ;};return _dbeb ;};func (_bddaa *textTable )emptyCompositeRow (_dfafc int )bool {for _bbcg :=0;_bbcg < _bddaa ._bgdee ;
|
||
_bbcg ++{if _cfca ,_dfdbd :=_bddaa ._fgge [_abdec (_bbcg ,_dfafc )];_dfdbd {if len (_cfca .paraList )> 0{return false ;};};};return true ;};func (_edga *textLine )markWordBoundaries (){_ebef :=_fde *_edga ._bfcdd ;for _cdbc ,_fabd :=range _edga ._ccfce [1:]{if _ggee (_fabd ,_edga ._ccfce [_cdbc ])>=_ebef {_fabd ._gabe =true ;
|
||
};};};
|
||
|
||
// String returns a string describing `pt`.
|
||
func (_dcfbg PageText )String ()string {_adbe :=_caa .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_dcfbg ._gfc ));_acdd :=[]string {"\u002d"+_adbe };for _ ,_bba :=range _dcfbg ._gfc {_acdd =append (_acdd ,_bba .String ());
|
||
};_acdd =append (_acdd ,"\u002b"+_adbe );return _ce .Join (_acdd ,"\u000a");};type cachedImage struct{_dga *_dc .Image ;_fcc _dc .PdfColorspace ;};func (_gegbd rulingList )augmentGrid ()(rulingList ,rulingList ){_cageg ,_bbeb :=_gegbd .vertsHorzs ();if len (_cageg )==0||len (_bbeb )==0{return _cageg ,_bbeb ;
|
||
};_cada ,_bffed :=_cageg ,_bbeb ;_cgef :=_cageg .bbox ();_beb :=_bbeb .bbox ();if _cage {_d .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066",_cgef );_d .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066",_beb );
|
||
};var _dbea ,_fffdc ,_egdff ,_bedee *ruling ;if _beb .Llx < _cgef .Llx -_aecg {_dbea =&ruling {_fcgb :_cefcf ,_fabfb :_bcaef ,_efbdg :_beb .Llx ,_becdd :_cgef .Lly ,_aggb :_cgef .Ury };_cageg =append (rulingList {_dbea },_cageg ...);};if _beb .Urx > _cgef .Urx +_aecg {_fffdc =&ruling {_fcgb :_cefcf ,_fabfb :_bcaef ,_efbdg :_beb .Urx ,_becdd :_cgef .Lly ,_aggb :_cgef .Ury };
|
||
_cageg =append (_cageg ,_fffdc );};if _cgef .Lly < _beb .Lly -_aecg {_egdff =&ruling {_fcgb :_cefcf ,_fabfb :_gegc ,_efbdg :_cgef .Lly ,_becdd :_beb .Llx ,_aggb :_beb .Urx };_bbeb =append (rulingList {_egdff },_bbeb ...);};if _cgef .Ury > _beb .Ury +_aecg {_bedee =&ruling {_fcgb :_cefcf ,_fabfb :_gegc ,_efbdg :_cgef .Ury ,_becdd :_beb .Llx ,_aggb :_beb .Urx };
|
||
_bbeb =append (_bbeb ,_bedee );};if len (_cageg )+len (_bbeb )==len (_gegbd ){return _cada ,_bffed ;};_gfgeb :=append (_cageg ,_bbeb ...);_gegbd .log ("u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064");_gfgeb .log ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d");
|
||
return _cageg ,_bbeb ;};func (_badb *textPara )bbox ()_dc .PdfRectangle {return _badb .PdfRectangle };func _daeba (_bgfec []pathSection ){if _bacb < 0.0{return ;};if _cage {_d .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_bgfec ));
|
||
};for _edff ,_bgdbe :=range _bgfec {for _fcag ,_dfbbd :=range _bgdbe ._bffc {for _cbdee ,_eecbf :=range _dfbbd ._bcbc {_dfbbd ._bcbc [_cbdee ]=_bab .Point {X :_caca (_eecbf .X ),Y :_caca (_eecbf .Y )};if _cage {_cfec :=_dfbbd ._bcbc [_cbdee ];if !_aada (_eecbf ,_cfec ){_cfac :=_bab .Point {X :_cfec .X -_eecbf .X ,Y :_cfec .Y -_eecbf .Y };
|
||
_caa .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_edff ,_fcag ,_cbdee ,_eecbf ,_cfec ,_cfac );};};};};};};
|
||
|
||
// PageImages represents extracted images on a PDF page with spatial information:
|
||
// display position and size.
|
||
type PageImages struct{Images []ImageMark ;};func (_cae paraList )extractTables (_bfdd []gridTiling )paraList {if _eea {_d .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_cae ));
|
||
};if len (_cae )< _baeag {return _cae ;};_gcga :=_cae .findTables (_bfdd );if _eea {_d .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_gcga ));
|
||
for _afdag ,_efagd :=range _gcga {_efagd .log (_caa .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_afdag ));};};return _cae .applyTables (_gcga );};func _eeac (_degea string )string {_afcfg :=[]rune (_degea );return string (_afcfg [:len (_afcfg )-1])};
|
||
func _abga (_aff ,_agdg _dc .PdfRectangle )_dc .PdfRectangle {return _dc .PdfRectangle {Llx :_ca .Min (_aff .Llx ,_agdg .Llx ),Lly :_ca .Min (_aff .Lly ,_agdg .Lly ),Urx :_ca .Max (_aff .Urx ,_agdg .Urx ),Ury :_ca .Max (_aff .Ury ,_agdg .Ury )};};func (_cbfed compositeCell )parasBBox ()(paraList ,_dc .PdfRectangle ){return _cbfed .paraList ,_cbfed .PdfRectangle ;
|
||
};func (_dab *wordBag )removeDuplicates (){if _feaa {_d .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_dab .text ());};for _ ,_bace :=range _dab .depthIndexes (){if len (_dab ._adcb [_bace ])==0{continue ;
|
||
};_ggab :=_dab ._adcb [_bace ][0];_eaggd :=_feae *_ggab ._debab ;_aagb :=_ggab ._cffg ;for _ ,_gbbg :=range _dab .depthBand (_aagb ,_aagb +_eaggd ){_dfbc :=map[*textWord ]struct{}{};_afda :=_dab ._adcb [_gbbg ];for _ ,_bgdad :=range _afda {if _ ,_aefe :=_dfbc [_bgdad ];
|
||
_aefe {continue ;};for _ ,_cagd :=range _afda {if _ ,_gefff :=_dfbc [_cagd ];_gefff {continue ;};if _cagd !=_bgdad &&_cagd ._bgdg ==_bgdad ._bgdg &&_ca .Abs (_cagd .Llx -_bgdad .Llx )< _eaggd &&_ca .Abs (_cagd .Urx -_bgdad .Urx )< _eaggd &&_ca .Abs (_cagd .Lly -_bgdad .Lly )< _eaggd &&_ca .Abs (_cagd .Ury -_bgdad .Ury )< _eaggd {_dfbc [_cagd ]=struct{}{};
|
||
};};};if len (_dfbc )> 0{_ddeb :=0;for _ ,_eadg :=range _afda {if _ ,_beed :=_dfbc [_eadg ];!_beed {_afda [_ddeb ]=_eadg ;_ddeb ++;};};_dab ._adcb [_gbbg ]=_afda [:len (_afda )-len (_dfbc )];if len (_dab ._adcb [_gbbg ])==0{delete (_dab ._adcb ,_gbbg );
|
||
};};};};};type textMark struct{_dc .PdfRectangle ;_gcce int ;_cbge string ;_bcabg string ;_eead *_dc .PdfFont ;_beaf float64 ;_bddca float64 ;_adcg _bab .Matrix ;_ffb _bab .Point ;_gde _dc .PdfRectangle ;_fgeee _ba .Color ;_cab _ba .Color ;_abda _aa .PdfObject ;
|
||
_bfga []string ;Tw float64 ;Th float64 ;_gffe int ;};func (_daec paraList )findTables (_ecbe []gridTiling )[]*textTable {_daec .addNeighbours ();_e .Slice (_daec ,func (_dffb ,_cgfg int )bool {return _aeb (_daec [_dffb ],_daec [_cgfg ])< 0});var _adcab []*textTable ;
|
||
if _ccde {_cgcabc :=_daec .findGridTables (_ecbe );_adcab =append (_adcab ,_cgcabc ...);};if _aeca {_agcb :=_daec .findTextTables ();_adcab =append (_adcab ,_agcb ...);};return _adcab ;};type intSet map[int ]struct{};
|
||
|
||
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
|
||
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
|
||
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
|
||
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
|
||
type RenderMode int ;func (_egcc rulingList )toGrids ()[]rulingList {if _cage {_d .Log .Info ("t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073",_egcc );};_aecfd :=_egcc .intersections ();if _cage {_d .Log .Info ("\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020",len (_egcc ),len (_aecfd ));
|
||
for _ ,_fffff :=range _dbed (_aecfd ){_caa .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_fffff ,_aecfd [_fffff ]);};};_cgdf :=make (map[int ]intSet ,len (_egcc ));for _bdbgf :=range _egcc {_bafa :=_egcc .connections (_aecfd ,_bdbgf );if len (_bafa )> 0{_cgdf [_bdbgf ]=_bafa ;
|
||
};};if _cage {_d .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064",len (_cgdf ));for _ ,_gbaa :=range _dbed (_cgdf ){_caa .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_gbaa ,_cgdf [_gbaa ]);
|
||
};};_effa :=_gagd (len (_egcc ),func (_dadg ,_effba int )bool {_egbc ,_edac :=len (_cgdf [_dadg ]),len (_cgdf [_effba ]);if _egbc !=_edac {return _egbc > _edac ;};return _egcc .comp (_dadg ,_effba );});if _cage {_d .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_effa );
|
||
};_faaf :=[][]int {{_effa [0]}};_gdeg :for _ ,_dbgfa :=range _effa [1:]{for _bfee ,_bade :=range _faaf {for _ ,_bece :=range _bade {if _cgdf [_bece ].has (_dbgfa ){_faaf [_bfee ]=append (_bade ,_dbgfa );continue _gdeg ;};};};_faaf =append (_faaf ,[]int {_dbgfa });
|
||
};if _cage {_d .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_faaf );};_e .SliceStable (_faaf ,func (_cebdb ,_edde int )bool {return len (_faaf [_cebdb ])> len (_faaf [_edde ])});for _ ,_gecf :=range _faaf {_e .Slice (_gecf ,func (_defb ,_bdcfd int )bool {return _egcc .comp (_gecf [_defb ],_gecf [_bdcfd ])});
|
||
};_aedbb :=make ([]rulingList ,len (_faaf ));for _eeaa ,_adaf :=range _faaf {_dcae :=make (rulingList ,len (_adaf ));for _bggfa ,_bgef :=range _adaf {_dcae [_bggfa ]=_egcc [_bgef ];};_aedbb [_eeaa ]=_dcae ;};if _cage {_d .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076",_aedbb );
|
||
};var _ggcgg []rulingList ;for _ ,_fcfbf :=range _aedbb {if _bcec ,_cccg :=_fcfbf .isActualGrid ();_cccg {_fcfbf =_bcec ;_fcfbf =_fcfbf .snapToGroups ();_ggcgg =append (_ggcgg ,_fcfbf );};};if _cage {_cccdb ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073",_ggcgg );
|
||
_d .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_aedbb ),len (_ggcgg ));};return _ggcgg ;};
|
||
|
||
// TextMarkArray is a collection of TextMarks.
|
||
type TextMarkArray struct{_bgbd []TextMark };func (_gafg *textObject )getCurrentFont ()*_dc .PdfFont {_dbbg :=_gafg ._cgf ._gcb ;if _dbbg ==nil {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e");
|
||
return _dc .DefaultFont ();};return _dbbg ;};func (_beafc rulingList )isActualGrid ()(rulingList ,bool ){_dbgfe ,_aecga :=_beafc .augmentGrid ();if !(len (_dbgfe )>=_cddcc +1&&len (_aecga )>=_eadb +1){if _cage {_d .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_dbgfe ),len (_aecga ),_cddcc +1,_eadb +1);
|
||
};return nil ,false ;};if _cage {_d .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_beafc ,len (_dbgfe )>=2,len (_aecga )>=2,len (_dbgfe )>=2&&len (_aecga )>=2);
|
||
for _dabb ,_cegd :=range _beafc {_caa .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_dabb ,_cegd );};};if _gfef {_fddca ,_bbae :=_dbgfe [0],_dbgfe [len (_dbgfe )-1];_ebdbb ,_gcccb :=_aecga [0],_aecga [len (_aecga )-1];if !(_gdfc (_fddca ._efbdg -_ebdbb ._becdd )&&_gdfc (_bbae ._efbdg -_ebdbb ._aggb )&&_gdfc (_ebdbb ._efbdg -_fddca ._aggb )&&_gdfc (_gcccb ._efbdg -_fddca ._becdd )){if _cage {_d .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_fddca ,_bbae ,_ebdbb ,_gcccb );
|
||
};return nil ,false ;};}else {if !_dbgfe .aligned (){if _baea {_d .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_dbgfe ));
|
||
};return nil ,false ;};if !_aecga .aligned (){if _cage {_d .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_aecga ));
|
||
};return nil ,false ;};};_addd :=append (_dbgfe ,_aecga ...);return _addd ,true ;};func (_egdf *textPara )writeCellText (_dccc _gc .Writer ){for _aede ,_gage :=range _egdf ._ddaf {_fbbb :=_gage .text ();_adbf :=_cggce &&_gage .endsInHyphen ()&&_aede !=len (_egdf ._ddaf )-1;
|
||
if _adbf {_fbbb =_eeac (_fbbb );};_dccc .Write ([]byte (_fbbb ));if !(_adbf ||_aede ==len (_egdf ._ddaf )-1){_dccc .Write ([]byte (_gfga (_gage ._gddd ,_egdf ._ddaf [_aede +1]._gddd )));};};};type event struct{_dfec float64 ;_efga bool ;_agafc int ;};type rulingKind int ;
|
||
func _daadd (_gaga map[int ][]float64 )[]int {_bbgbe :=make ([]int ,len (_gaga ));_gbbba :=0;for _dbaab :=range _gaga {_bbgbe [_gbbba ]=_dbaab ;_gbbba ++;};_e .Ints (_bbgbe );return _bbgbe ;};func (_afa *shapesState )cubicTo (_fgdf ,_gebf ,_cbeg ,_eafc ,_ffg ,_acdcd float64 ){if _fgeac {_d .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a");
|
||
};_afa .addPoint (_ffg ,_acdcd );};func (_gaeg *wordBag )removeWord (_agea *textWord ,_cfef int ){_abfe :=_gaeg ._adcb [_cfef ];_abfe =_cafea (_abfe ,_agea );if len (_abfe )==0{delete (_gaeg ._adcb ,_cfef );}else {_gaeg ._adcb [_cfef ]=_abfe ;};};func (_ceda paraList )merge ()*textPara {_d .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_ceda ));
|
||
if len (_ceda )==0{return nil ;};_ceda .sortReadingOrder ();_bfec :=_ceda [0].PdfRectangle ;_acfeg :=_ceda [0]._ddaf ;for _ ,_befa :=range _ceda [1:]{_bfec =_abga (_bfec ,_befa .PdfRectangle );_acfeg =append (_acfeg ,_befa ._ddaf ...);};return _dcfc (_bfec ,_acfeg );
|
||
};
|
||
|
||
// NewWithOptions an Extractor instance for extracting content from the input PDF page with options.
|
||
func NewWithOptions (page *_dc .PdfPage ,options *Options )(*Extractor ,error ){const _bg ="\u0065x\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077W\u0069\u0074\u0068\u004f\u0070\u0074\u0069\u006f\u006e\u0073";_dcb ,_ed :=page .GetAllContentStreams ();
|
||
if _ed !=nil {return nil ,_ed ;};_df ,_ed :=page .GetMediaBox ();if _ed !=nil {return nil ,_caa .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_ed );
|
||
};_edf :=&Extractor {_aaf :_dcb ,_gdc :page .Resources ,_cbb :*_df ,_aae :page .CropBox ,_ab :map[string ]fontEntry {},_bf :map[string ]textResult {},_da :options };if _edf ._cbb .Llx > _edf ._cbb .Urx {_d .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_edf ._cbb );
|
||
_edf ._cbb .Llx ,_edf ._cbb .Urx =_edf ._cbb .Urx ,_edf ._cbb .Llx ;};if _edf ._cbb .Lly > _edf ._cbb .Ury {_d .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_edf ._cbb );
|
||
_edf ._cbb .Lly ,_edf ._cbb .Ury =_edf ._cbb .Ury ,_edf ._cbb .Lly ;};_gd .TrackUse (_bg );return _edf ,nil ;};func (_cacf *wordBag )depthBand (_dedf ,_dddc float64 )[]int {if len (_cacf ._adcb )==0{return nil ;};return _cacf .depthRange (_cacf .getDepthIdx (_dedf ),_cacf .getDepthIdx (_dddc ));
|
||
};func (_gbfa *textTable )isExportable ()bool {if _gbfa ._bebc {return true ;};_babb :=func (_bdbcg int )bool {_eabe :=_gbfa .get (0,_bdbcg );if _eabe ==nil {return false ;};_gcced :=_eabe .text ();_gaee :=_b .RuneCountInString (_gcced );_dbebb :=_fcfg .MatchString (_gcced );
|
||
return _gaee <=1||_dbebb ;};for _dbbe :=0;_dbbe < _gbfa ._fcedd ;_dbbe ++{if !_babb (_dbbe ){return true ;};};return false ;};func (_acfa *textTable )getComposite (_ffdea ,_dadcd int )(paraList ,_dc .PdfRectangle ){_dbfe ,_gfbbc :=_acfa ._fgge [_abdec (_ffdea ,_dadcd )];
|
||
if _eea {_caa .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a",_ffdea ,_dadcd ,_dbfe .String ());};if !_gfbbc {return nil ,_dc .PdfRectangle {};
|
||
};return _dbfe .parasBBox ();};func (_bgfee *textTable )compositeColCorridors ()map[int ][]float64 {_bgbf :=make (map[int ][]float64 ,_bgfee ._bgdee );if _eea {_d .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_bgfee ._bgdee );
|
||
};for _agfd :=0;_agfd < _bgfee ._bgdee ;_agfd ++{_bgbf [_agfd ]=nil ;};return _bgbf ;};type shapesState struct{_eabb _bab .Matrix ;_gcbe _bab .Matrix ;_bfg []*subpath ;_dcgc bool ;_fcaf _bab .Point ;_degb *textObject ;};func (_dcfe *textObject )moveLP (_cffa ,_dfaf float64 ){_dcfe ._acd .Concat (_bab .NewMatrix (1,0,0,1,_cffa ,_dfaf ));
|
||
_dcfe ._aafd =_dcfe ._acd ;};const (_cggce =true ;_dadc =true ;_fdfd =true ;_gce =false ;_cfgc =false ;_gbad =6;_fcbe =3.0;_cdbdb =200;_ccde =true ;_aeca =true ;_efg =true ;_abcf =true ;_gfef =false ;);func (_gfbc *wordBag )getDepthIdx (_gafb float64 )int {_afgg :=_gfbc .depthIndexes ();
|
||
_dgge :=_ebcf (_gafb );if _dgge < _afgg [0]{return _afgg [0];};if _dgge > _afgg [len (_afgg )-1]{return _afgg [len (_afgg )-1];};return _dgge ;};type bounded interface{bbox ()_dc .PdfRectangle };func (_fbec *wordBag )arrangeText ()*textPara {_fbec .sort ();
|
||
if _dadc {_fbec .removeDuplicates ();};var _ecab []*textLine ;for _ ,_efdc :=range _fbec .depthIndexes (){for !_fbec .empty (_efdc ){_dbfga :=_fbec .firstReadingIndex (_efdc );_ddfe :=_fbec .firstWord (_dbfga );_dfcg :=_fcfb (_fbec ,_dbfga );_cdbdf :=_ddfe ._debab ;
|
||
_agcde :=_ddfe ._cffg -_gdgc *_cdbdf ;_fgbcg :=_ddfe ._cffg +_gdgc *_cdbdf ;_dgcgb :=_bdbc *_cdbdf ;_ffccg :=_agdd *_cdbdf ;_babg :for {var _bgdb *textWord ;_fdfa :=0;for _ ,_bdef :=range _fbec .depthBand (_agcde ,_fgbcg ){_bcfd :=_fbec .highestWord (_bdef ,_agcde ,_fgbcg );
|
||
if _bcfd ==nil {continue ;};_ecaba :=_ggee (_bcfd ,_dfcg ._ccfce [len (_dfcg ._ccfce )-1]);if _ecaba < -_ffccg {break _babg ;};if _ecaba > _dgcgb {continue ;};if _bgdb !=nil &&_deba (_bcfd ,_bgdb )>=0{continue ;};_bgdb =_bcfd ;_fdfa =_bdef ;};if _bgdb ==nil {break ;
|
||
};_dfcg .pullWord (_fbec ,_bgdb ,_fdfa );};_dfcg .markWordBoundaries ();_ecab =append (_ecab ,_dfcg );};};if len (_ecab )==0{return nil ;};_e .Slice (_ecab ,func (_fbba ,_dgd int )bool {return _aecf (_ecab [_fbba ],_ecab [_dgd ])< 0});_gebe :=_dcfc (_fbec .PdfRectangle ,_ecab );
|
||
if _daag {_d .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_gebe .String ());if _gegf {for _abcfg ,_aaac :=range _gebe ._ddaf {_caa .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_abcfg ,_aaac .String ());
|
||
if _dbf {for _cbdd ,_ebdb :=range _aaac ._ccfce {_caa .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_cbdd ,_ebdb .String ());for _fgce ,_efce :=range _ebdb ._gbaed {_caa .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_fgce ,_efce .String ());
|
||
};};};};};};return _gebe ;};func _dggd (_dgfca []*textMark ,_fbbgf _dc .PdfRectangle )*textWord {_aeab :=_dgfca [0].PdfRectangle ;_gfaa :=_dgfca [0]._beaf ;for _ ,_gafcf :=range _dgfca [1:]{_aeab =_abga (_aeab ,_gafcf .PdfRectangle );if _gafcf ._beaf > _gfaa {_gfaa =_gafcf ._beaf ;
|
||
};};return &textWord {PdfRectangle :_aeab ,_gbaed :_dgfca ,_cffg :_fbbgf .Ury -_aeab .Lly ,_debab :_gfaa };};func (_gbg *textObject )showText (_degd _aa .PdfObject ,_bad []byte )error {return _gbg .renderText (_degd ,_bad );};func (_agddd rulingList )connections (_aeedg map[int ]intSet ,_dcgb int )intSet {_degbe :=make (intSet );
|
||
_feeg :=make (intSet );var _dbabb func (int );_dbabb =func (_efae int ){if !_feeg .has (_efae ){_feeg .add (_efae );for _gabb :=range _agddd {if _aeedg [_gabb ].has (_efae ){_degbe .add (_gabb );};};for _afab :=range _agddd {if _degbe .has (_afab ){_dbabb (_afab );
|
||
};};};};_dbabb (_dcgb );return _degbe ;};func (_ecae *ruling )equals (_cbddg *ruling )bool {return _ecae ._fabfb ==_cbddg ._fabfb &&_efca (_ecae ._efbdg ,_cbddg ._efbdg )&&_efca (_ecae ._becdd ,_cbddg ._becdd )&&_efca (_ecae ._aggb ,_cbddg ._aggb );};func _cggf (_fcff ,_agead int )int {if _fcff > _agead {return _fcff ;
|
||
};return _agead ;};func (_gfae *subpath )add (_bdbg ..._bab .Point ){_gfae ._bcbc =append (_gfae ._bcbc ,_bdbg ...)};
|
||
|
||
// ImageExtractOptions contains options for controlling image extraction from
|
||
// PDF pages.
|
||
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};func _bbbb (_ebfd int ,_fgead map[int ][]float64 )([]int ,int ){_ffcb :=make ([]int ,_ebfd );_fcfga :=0;for _aabd :=0;_aabd < _ebfd ;_aabd ++{_ffcb [_aabd ]=_fcfga ;_fcfga +=len (_fgead [_aabd ])+1;
|
||
};return _ffcb ,_fcfga ;};func (_cbdff *textTable )getDown ()paraList {_gefeb :=make (paraList ,_cbdff ._bgdee );for _cdgd :=0;_cdgd < _cbdff ._bgdee ;_cdgd ++{_acdg :=_cbdff .get (_cdgd ,_cbdff ._fcedd -1)._bgce ;if _acdg .taken (){return nil ;};_gefeb [_cdgd ]=_acdg ;
|
||
};for _ggbac :=0;_ggbac < _cbdff ._bgdee -1;_ggbac ++{if _gefeb [_ggbac ]._becf !=_gefeb [_ggbac +1]{return nil ;};};return _gefeb ;};func _dca (_eeb []Font ,_cg string )bool {for _ ,_bcg :=range _eeb {if _bcg .FontName ==_cg {return true ;};};return false ;
|
||
};func (_ebf *textObject )setTextLeading (_ecf float64 ){if _ebf ==nil {return ;};_ebf ._cgf ._add =_ecf ;};func (_ebfb *textObject )setFont (_bfe string ,_cbf float64 )error {if _ebfb ==nil {return nil ;};_ebfb ._cgf ._fdd =_cbf ;_gaf ,_gcc :=_ebfb .getFont (_bfe );
|
||
if _gcc !=nil {return _gcc ;};_ebfb ._cgf ._gcb =_gaf ;return nil ;};func (_agcae paraList )toTextMarks ()[]TextMark {_bdda :=0;var _bccf []TextMark ;for _ccdc ,_fddcc :=range _agcae {if _fddcc ._bedda {continue ;};_egdef :=_fddcc .toTextMarks (&_bdda );
|
||
_bccf =append (_bccf ,_egdef ...);if _ccdc !=len (_agcae )-1{if _adfb (_fddcc ,_agcae [_ccdc +1]){_bccf =_egag (_bccf ,&_bdda ,"\u0020");}else {_bccf =_egag (_bccf ,&_bdda ,"\u000a");_bccf =_egag (_bccf ,&_bdda ,"\u000a");};};};_bccf =_egag (_bccf ,&_bdda ,"\u000a");
|
||
_bccf =_egag (_bccf ,&_bdda ,"\u000a");return _bccf ;};func _cdce (_eced func (*wordBag ,*textWord ,float64 )bool ,_gebfd float64 )func (*wordBag ,*textWord )bool {return func (_aeag *wordBag ,_gccf *textWord )bool {return _eced (_aeag ,_gccf ,_gebfd )};
|
||
};func (_gfed *wordBag )depthRange (_ddfba ,_bead int )[]int {var _faeg []int ;for _bgdd :=range _gfed ._adcb {if _ddfba <=_bgdd &&_bgdd <=_bead {_faeg =append (_faeg ,_bgdd );};};if len (_faeg )==0{return nil ;};_e .Ints (_faeg );return _faeg ;};func (_dgbfb *wordBag )text ()string {_ffcf :=_dgbfb .allWords ();
|
||
_daadf :=make ([]string ,len (_ffcf ));for _fced ,_egeb :=range _ffcf {_daadf [_fced ]=_egeb ._bgdg ;};return _ce .Join (_daadf ,"\u0020");};func _cccdb (_fedc string ,_dfag []rulingList ){_d .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_dfag ),_fedc );
|
||
for _ccba ,_fcab :=range _dfag {_caa .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ccba ,_fcab .String ());};};func _fcgba (_baae string )bool {for _ ,_afbe :=range _baae {if !_bb .IsSpace (_afbe ){return false ;};};return true ;};func (_dfgb compositeCell )String ()string {_fdfga :="";
|
||
if len (_dfgb .paraList )> 0{_fdfga =_ebfce (_dfgb .paraList .merge ().text (),50);};return _caa .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_dfgb .PdfRectangle ,len (_dfgb .paraList ),_fdfga );
|
||
};func (_agab *textTable )toTextTable ()TextTable {if _eea {_d .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_agab ._bgdee ,_agab ._fcedd );};_ebad :=make ([][]TableCell ,_agab ._fcedd );for _dbfgd :=0;
|
||
_dbfgd < _agab ._fcedd ;_dbfgd ++{_ebad [_dbfgd ]=make ([]TableCell ,_agab ._bgdee );for _abada :=0;_abada < _agab ._bgdee ;_abada ++{_addac :=_agab .get (_abada ,_dbfgd );if _addac ==nil {continue ;};if _eea {_caa .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_abada ,_dbfgd ,_addac );
|
||
};_ebad [_dbfgd ][_abada ].Text =_addac .text ();_dcgf :=0;_ebad [_dbfgd ][_abada ].Marks ._bgbd =_addac .toTextMarks (&_dcgf );};};return TextTable {W :_agab ._bgdee ,H :_agab ._fcedd ,Cells :_ebad };};
|
||
|
||
// String returns a description of `k`.
|
||
func (_aaeaa markKind )String ()string {_badf ,_aafg :=_bbdf [_aaeaa ];if !_aafg {return _caa .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_aaeaa );};return _badf ;};
|
||
|
||
// Elements returns the TextMarks in `ma`.
|
||
func (_fgab *TextMarkArray )Elements ()[]TextMark {return _fgab ._bgbd };func (_bdeac *textPara )isAtom ()*textTable {_abdcb :=_bdeac ;_gfgb :=_bdeac ._becf ;_fbccg :=_bdeac ._bgce ;if _gfgb .taken ()||_fbccg .taken (){return nil ;};_ccgc :=_gfgb ._bgce ;
|
||
if _ccgc .taken ()||_ccgc !=_fbccg ._becf {return nil ;};return _cadb (_abdcb ,_gfgb ,_fbccg ,_ccgc );};func (_debd rulingList )aligned ()bool {if len (_debd )< 2{return false ;};_becb :=make (map[*ruling ]int );_becb [_debd [0]]=0;for _ ,_agbd :=range _debd [1:]{_bacd :=false ;
|
||
for _deed :=range _becb {if _agbd .gridIntersecting (_deed ){_becb [_deed ]++;_bacd =true ;break ;};};if !_bacd {_becb [_agbd ]=0;};};_cffba :=0;for _ ,_cfab :=range _becb {if _cfab ==0{_cffba ++;};};_fddd :=float64 (_cffba )/float64 (len (_debd ));_ceaaa :=_fddd <=1.0-_ega ;
|
||
if _cage {_d .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_ceaaa ,_fddd ,_cffba ,len (_debd ),_debd .String ());
|
||
};return _ceaaa ;};type wordBag struct{_dc .PdfRectangle ;_gffd float64 ;_bbce ,_dce rulingList ;_deae float64 ;_adcb map[int ][]*textWord ;};func (_bgaeg *textWord )toTextMarks (_dfbac *int )[]TextMark {var _dged []TextMark ;for _ ,_agcf :=range _bgaeg ._gbaed {_dged =_fgbc (_dged ,_dfbac ,_agcf .ToTextMark ());
|
||
};return _dged ;};func _agag (_eggg []rulingList )(rulingList ,rulingList ){var _ggbg rulingList ;for _ ,_egcg :=range _eggg {_ggbg =append (_ggbg ,_egcg ...);};return _ggbg .vertsHorzs ();};func (_aga compositeCell )hasLines (_adfdd []*textLine )bool {for _fbbg ,_faege :=range _adfdd {_fefgf :=_dggg (_aga .PdfRectangle ,_faege .PdfRectangle );
|
||
if _eea {_caa .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a",_fefgf ,_fbbg ,len (_adfdd ));_caa .Printf ("\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a",_aga );
|
||
_caa .Printf ("\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a",_faege );};if _fefgf {return true ;};};return false ;};func (_ceg *shapesState )newSubPath (){_ceg .clearPath ();if _fgeac {_d .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_ceg );
|
||
};};func _bdb (_abf *_gcf .ContentStreamOperation )(float64 ,error ){if len (_abf .Params )!=1{_adb :=_g .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_abf .Operand ,1,len (_abf .Params ),_abf .Params );
|
||
return 0.0,_adb ;};return _aa .GetNumberAsFloat (_abf .Params [0]);};func _fgac (_aedeb []_aa .PdfObject )(_ggaba ,_ddafac float64 ,_fcbag error ){if len (_aedeb )!=2{return 0,0,_caa .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_aedeb ));
|
||
};_cceg ,_fcbag :=_aa .GetNumbersAsFloat (_aedeb );if _fcbag !=nil {return 0,0,_fcbag ;};return _cceg [0],_cceg [1],nil ;};func (_ddff *textTable )log (_dcaa string ){if !_eea {return ;};_d .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_dcaa ,_ddff ._bgdee ,_ddff ._fcedd ,_ddff ._bebc ,_ddff .PdfRectangle );
|
||
for _dcafa :=0;_dcafa < _ddff ._fcedd ;_dcafa ++{for _gbccb :=0;_gbccb < _ddff ._bgdee ;_gbccb ++{_agaf :=_ddff .get (_gbccb ,_dcafa );if _agaf ==nil {continue ;};_caa .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_gbccb ,_dcafa ,_agaf .PdfRectangle ,_ebfce (_agaf .text (),50),_b .RuneCountInString (_agaf .text ()));
|
||
};};};func (_cdab *wordBag )maxDepth ()float64 {return _cdab ._deae -_cdab .Lly };func (_cfbgg rulingList )bbox ()_dc .PdfRectangle {var _addg _dc .PdfRectangle ;if len (_cfbgg )==0{_d .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073");
|
||
return _dc .PdfRectangle {};};if _cfbgg [0]._fabfb ==_gegc {_addg .Llx ,_addg .Urx =_cfbgg .secMinMax ();_addg .Lly ,_addg .Ury =_cfbgg .primMinMax ();}else {_addg .Llx ,_addg .Urx =_cfbgg .primMinMax ();_addg .Lly ,_addg .Ury =_cfbgg .secMinMax ();};return _addg ;
|
||
};func _gfga (_aggd ,_ddab float64 )string {_eabc :=!_edeg (_aggd -_ddab );if _eabc {return "\u000a";};return "\u0020";};func (_gfec lineRuling )xMean ()float64 {return 0.5*(_gfec ._becgc .X +_gfec ._facd .X )};
|
||
|
||
// String returns a human readable description of `vecs`.
|
||
func (_cbfb rulingList )String ()string {if len (_cbfb )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_cbdf ,_bgbg :=_cbfb .vertsHorzs ();_bacdc :=len (_cbdf );_dgab :=len (_bgbg );if _bacdc ==0||_dgab ==0{return _caa .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_bacdc ,_dgab );
|
||
};_bdgc :=_dc .PdfRectangle {Llx :_cbdf [0]._efbdg ,Urx :_cbdf [_bacdc -1]._efbdg ,Lly :_bgbg [_dgab -1]._efbdg ,Ury :_bgbg [0]._efbdg };return _caa .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_bacdc ,_dgab ,_bdgc );
|
||
};type textState struct{_cace float64 ;_fgd float64 ;_abbd float64 ;_add float64 ;_fdd float64 ;_efbd RenderMode ;_bffe float64 ;_gcb *_dc .PdfFont ;_eegd _dc .PdfRectangle ;_efde int ;_cdc int ;};func _deba (_ebac ,_edae bounded )float64 {return _ebac .bbox ().Llx -_edae .bbox ().Llx };
|
||
|
||
|
||
// String returns a string descibing `i`.
|
||
func (_bacf gridTile )String ()string {_abddf :=func (_gdag bool ,_ceaac string )string {if _gdag {return _ceaac ;};return "\u005f";};return _caa .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_bacf .PdfRectangle ,_abddf (_bacf ._gcfbe ,"\u004c"),_abddf (_bacf ._ebbb ,"\u0052"),_abddf (_bacf ._ecbgb ,"\u0042"),_abddf (_bacf ._faaa ,"\u0054"));
|
||
};
|
||
|
||
// String returns a description of `l`.
|
||
func (_gecc *textLine )String ()string {return _caa .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_gecc ._gddd ,_gecc .PdfRectangle ,_gecc ._bfcdd ,_gecc .text ());
|
||
};func (_egbb paraList )log (_dbcfd string ){if !_cfee {return ;};_d .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_dbcfd ,len (_egbb ));
|
||
for _cefc ,_dbab :=range _egbb {if _dbab ==nil {continue ;};_eefe :=_dbab .text ();_dbee :="\u0020\u0020";if _dbab ._affa !=nil {_dbee =_caa .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_dbab ._affa ._bgdee ,_dbab ._affa ._fcedd );};_caa .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_cefc ,_dbab .PdfRectangle ,_dbee ,_ebfce (_eefe ,50));
|
||
};};func (_edg *textLine )appendWord (_cbfdf *textWord ){_edg ._ccfce =append (_edg ._ccfce ,_cbfdf );_edg .PdfRectangle =_abga (_edg .PdfRectangle ,_cbfdf .PdfRectangle );if _cbfdf ._debab > _edg ._bfcdd {_edg ._bfcdd =_cbfdf ._debab ;};if _cbfdf ._cffg > _edg ._gddd {_edg ._gddd =_cbfdf ._cffg ;
|
||
};};func (_feac *textWord )bbox ()_dc .PdfRectangle {return _feac .PdfRectangle };func _fdga (_gbcc ,_aeec _dc .PdfRectangle )bool {return _gbcc .Lly <=_aeec .Ury &&_aeec .Lly <=_gbcc .Ury ;};func (_fgged paraList )yNeighbours (_cfeff float64 )map[*textPara ][]int {_feead :=make ([]event ,2*len (_fgged ));
|
||
if _cfeff ==0{for _acdge ,_beef :=range _fgged {_feead [2*_acdge ]=event {_beef .Lly ,true ,_acdge };_feead [2*_acdge +1]=event {_beef .Ury ,false ,_acdge };};}else {for _gafffc ,_fdbbc :=range _fgged {_feead [2*_gafffc ]=event {_fdbbc .Lly -_cfeff *_fdbbc .fontsize (),true ,_gafffc };
|
||
_feead [2*_gafffc +1]=event {_fdbbc .Ury +_cfeff *_fdbbc .fontsize (),false ,_gafffc };};};return _fgged .eventNeighbours (_feead );};func (_eacg rulingList )primaries ()[]float64 {_bebd :=make (map[float64 ]struct{},len (_eacg ));for _ ,_agge :=range _eacg {_bebd [_agge ._efbdg ]=struct{}{};
|
||
};_bbead :=make ([]float64 ,len (_bebd ));_ceeg :=0;for _aebgg :=range _bebd {_bbead [_ceeg ]=_aebgg ;_ceeg ++;};_e .Float64s (_bbead );return _bbead ;};func _fcfb (_daca *wordBag ,_afb int )*textLine {_daee :=_daca .firstWord (_afb );_fddbb :=textLine {PdfRectangle :_daee .PdfRectangle ,_bfcdd :_daee ._debab ,_gddd :_daee ._cffg };
|
||
_fddbb .pullWord (_daca ,_daee ,_afb );return &_fddbb ;};func _aebga (_becad []float64 ,_fdeeb ,_dffc float64 )[]float64 {_beaaf ,_ddae :=_fdeeb ,_dffc ;if _ddae < _beaaf {_beaaf ,_ddae =_ddae ,_beaaf ;};_gaed :=make ([]float64 ,0,len (_becad )+2);_gaed =append (_gaed ,_fdeeb );
|
||
for _ ,_bfdfe :=range _becad {if _bfdfe <=_beaaf {continue ;}else if _bfdfe >=_ddae {break ;};_gaed =append (_gaed ,_bfdfe );};_gaed =append (_gaed ,_dffc );return _gaed ;};func (_gbde paraList )eventNeighbours (_adae []event )map[*textPara ][]int {_e .Slice (_adae ,func (_daeec ,_gcdeeb int )bool {_geca ,_fgeg :=_adae [_daeec ],_adae [_gcdeeb ];
|
||
_bbdag ,_egagc :=_geca ._dfec ,_fgeg ._dfec ;if _bbdag !=_egagc {return _bbdag < _egagc ;};if _geca ._efga !=_fgeg ._efga {return _geca ._efga ;};return _daeec < _gcdeeb ;});_ddbg :=make (map[int ]intSet );_efef :=make (intSet );for _ ,_eagbc :=range _adae {if _eagbc ._efga {_ddbg [_eagbc ._agafc ]=make (intSet );
|
||
for _gbccbd :=range _efef {if _gbccbd !=_eagbc ._agafc {_ddbg [_eagbc ._agafc ].add (_gbccbd );_ddbg [_gbccbd ].add (_eagbc ._agafc );};};_efef .add (_eagbc ._agafc );}else {_efef .del (_eagbc ._agafc );};};_gceba :=map[*textPara ][]int {};for _fbda ,_caggeb :=range _ddbg {_efcggg :=_gbde [_fbda ];
|
||
if len (_caggeb )==0{_gceba [_efcggg ]=nil ;continue ;};_eeaae :=make ([]int ,len (_caggeb ));_bbga :=0;for _gdcf :=range _caggeb {_eeaae [_bbga ]=_gdcf ;_bbga ++;};_gceba [_efcggg ]=_eeaae ;};return _gceba ;};func (_cgaee paraList )findGridTables (_cedd []gridTiling )[]*textTable {if _eea {_d .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_cgaee ));
|
||
for _dfdb ,_aece :=range _cgaee {_caa .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dfdb ,_aece );};};var _bega []*textTable ;for _agadd ,_eeagd :=range _cedd {_bagbc ,_dbag :=_cgaee .findTableGrid (_eeagd );if _bagbc !=nil {_bagbc .log (_caa .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_agadd ));
|
||
_bega =append (_bega ,_bagbc );_bagbc .markCells ();};for _adfff :=range _dbag {_adfff ._bgcb =true ;};};if _eea {_d .Log .Info ("\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s",len (_bega ));
|
||
};return _bega ;};const (_aedg =false ;_agb =false ;_dccf =false ;_eece =false ;_fgeac =false ;_gfaee =false ;_aebe =false ;_cfee =false ;_daag =false ;_gegf =_daag &&true ;_dbf =_gegf &&false ;_feaa =_daag &&true ;_eea =false ;_agbf =_eea &&false ;_gggd =_eea &&true ;
|
||
_cage =false ;_eabg =_cage &&false ;_baea =_cage &&false ;_dgfd =_cage &&true ;_agca =_cage &&false ;_gcdd =_cage &&false ;);var (_cc =_g .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072");_cb =_g .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072");
|
||
);func (_ffcg paraList )lines ()[]*textLine {var _eega []*textLine ;for _ ,_ebee :=range _ffcg {_eega =append (_eega ,_ebee ._ddaf ...);};return _eega ;};
|
||
|
||
// Len returns the number of TextMarks in `ma`.
|
||
func (_fgea *TextMarkArray )Len ()int {if _fgea ==nil {return 0;};return len (_fgea ._bgbd );};type textPara struct{_dc .PdfRectangle ;_aecfg _dc .PdfRectangle ;_ddaf []*textLine ;_affa *textTable ;_bgcb bool ;_bedda bool ;_egdgfd *textPara ;_becf *textPara ;
|
||
_gffb *textPara ;_bgce *textPara ;};func (_bfdf *textObject )setTextMatrix (_cfcd []float64 ){if len (_cfcd )!=6{_d .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_cfcd ));
|
||
return ;};_ebg ,_cbe ,_febe ,_dfa ,_agff ,_cee :=_cfcd [0],_cfcd [1],_cfcd [2],_cfcd [3],_cfcd [4],_cfcd [5];_bfdf ._aafd =_bab .NewMatrix (_ebg ,_cbe ,_febe ,_dfa ,_agff ,_cee );_bfdf ._acd =_bfdf ._aafd ;};type textResult struct{_fbf PageText ;_eab int ;
|
||
_fadb int ;};func (_cba *imageExtractContext )extractXObjectImage (_eb *_aa .PdfObjectName ,_cf _gcf .GraphicsState ,_egd *_dc .PdfPageResources )error {_age ,_ :=_egd .GetXObjectByName (*_eb );if _age ==nil {return nil ;};_fag ,_dcd :=_cba ._egc [_age ];
|
||
if !_dcd {_gbe ,_bgg :=_egd .GetXObjectImageByName (*_eb );if _bgg !=nil {return _bgg ;};if _gbe ==nil {return nil ;};_bca ,_bgg :=_gbe .ToImage ();if _bgg !=nil {return _bgg ;};_fag =&cachedImage {_dga :_bca ,_fcc :_gbe .ColorSpace };_cba ._egc [_age ]=_fag ;
|
||
};_dge :=_fag ._dga ;_fae :=_fag ._fcc ;_fd ,_bbb :=_fae .ImageToRGB (*_dge );if _bbb !=nil {return _bbb ;};_d .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_cf .CTM .String ());_bgea :=ImageMark {Image :&_fd ,Width :_cf .CTM .ScalingFactorX (),Height :_cf .CTM .ScalingFactorY (),Angle :_cf .CTM .Angle ()};
|
||
_bgea .X ,_bgea .Y =_cf .CTM .Translation ();_cba ._ggf =append (_cba ._ggf ,_bgea );_cba ._gfa ++;return nil ;};func _ggce (_gefc _dc .PdfColorspace ,_bgfa _dc .PdfColor )_ba .Color {if _gefc ==nil ||_bgfa ==nil {return _ba .Black ;};_fgcd ,_fddbd :=_gefc .ColorToRGB (_bgfa );
|
||
if _fddbd !=nil {_d .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_bgfa ,_gefc ,_fddbd );
|
||
return _ba .Black ;};_cbfee ,_cgaf :=_fgcd .(*_dc .PdfColorDeviceRGB );if !_cgaf {_d .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_fgcd );
|
||
return _ba .Black ;};return _ba .NRGBA {R :uint8 (_cbfee .R ()*255),G :uint8 (_cbfee .G ()*255),B :uint8 (_cbfee .B ()*255),A :uint8 (255)};};func (_gbb *wordBag )blocked (_gecg *textWord )bool {if _gecg .Urx < _gbb .Llx {_dfbdd :=_dbef (_gecg .PdfRectangle );
|
||
_fdfb :=_dggfd (_gbb .PdfRectangle );if _gbb ._bbce .blocks (_dfbdd ,_fdfb ){if _gcdd {_d .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_gecg ,_gbb );};return true ;};}else if _gbb .Urx < _gecg .Llx {_bcac :=_dbef (_gbb .PdfRectangle );
|
||
_dfbb :=_dggfd (_gecg .PdfRectangle );if _gbb ._bbce .blocks (_bcac ,_dfbb ){if _gcdd {_d .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s",_gecg ,_gbb );};return true ;};};if _gecg .Ury < _gbb .Lly {_afac :=_adabd (_gecg .PdfRectangle );
|
||
_dedc :=_ggdge (_gbb .PdfRectangle );if _gbb ._dce .blocks (_afac ,_dedc ){if _gcdd {_d .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_gecg ,_gbb );};return true ;};}else if _gbb .Ury < _gecg .Lly {_afef :=_adabd (_gbb .PdfRectangle );
|
||
_fefe :=_ggdge (_gecg .PdfRectangle );if _gbb ._dce .blocks (_afef ,_fefe ){if _gcdd {_d .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s",_gecg ,_gbb );};return true ;};};return false ;};func _gagd (_caad int ,_cfgf func (int ,int )bool )[]int {_feaea :=make ([]int ,_caad );
|
||
for _fbccf :=range _feaea {_feaea [_fbccf ]=_fbccf ;};_e .Slice (_feaea ,func (_bffd ,_abefd int )bool {return _cfgf (_feaea [_bffd ],_feaea [_abefd ])});return _feaea ;};func (_feg *textObject )setWordSpacing (_gaff float64 ){if _feg ==nil {return ;};
|
||
_feg ._cgf ._fgd =_gaff ;};func (_acbe *textWord )addDiacritic (_faca string ){_efcdd :=_acbe ._gbaed [len (_acbe ._gbaed )-1];_efcdd ._cbge +=_faca ;_efcdd ._cbge =_ac .NFKC .String (_efcdd ._cbge );};
|
||
|
||
// TextTable represents a table.
|
||
// Cells are ordered top-to-bottom, left-to-right.
|
||
// Cells[y] is the (0-offset) y'th row in the table.
|
||
// Cells[y][x] is the (0-offset) x'th column in the table.
|
||
type TextTable struct{W ,H int ;Cells [][]TableCell ;};func _adabd (_eebe _dc .PdfRectangle )*ruling {return &ruling {_fabfb :_gegc ,_efbdg :_eebe .Ury ,_becdd :_eebe .Llx ,_aggb :_eebe .Urx };};func (_cdbdg paraList )addNeighbours (){_cafbg :=func (_gaged []int ,_adbgf *textPara )([]*textPara ,[]*textPara ){_fgcef :=make ([]*textPara ,0,len (_gaged )-1);
|
||
_gaffb :=make ([]*textPara ,0,len (_gaged )-1);for _ ,_gegbdd :=range _gaged {_eebf :=_cdbdg [_gegbdd ];if _eebf .Urx <=_adbgf .Llx {_fgcef =append (_fgcef ,_eebf );}else if _eebf .Llx >=_adbgf .Urx {_gaffb =append (_gaffb ,_eebf );};};return _fgcef ,_gaffb ;
|
||
};_cbec :=func (_bbgc []int ,_daeed *textPara )([]*textPara ,[]*textPara ){_gaaa :=make ([]*textPara ,0,len (_bbgc )-1);_dgad :=make ([]*textPara ,0,len (_bbgc )-1);for _ ,_gfefa :=range _bbgc {_dggeg :=_cdbdg [_gfefa ];if _dggeg .Ury <=_daeed .Lly {_dgad =append (_dgad ,_dggeg );
|
||
}else if _dggeg .Lly >=_daeed .Ury {_gaaa =append (_gaaa ,_dggeg );};};return _gaaa ,_dgad ;};_dagd :=_cdbdg .yNeighbours (_cddaf );for _ ,_daed :=range _cdbdg {_ddfcc :=_dagd [_daed ];if len (_ddfcc )==0{continue ;};_bfdg ,_dfagg :=_cafbg (_ddfcc ,_daed );
|
||
if len (_bfdg )==0&&len (_dfagg )==0{continue ;};if len (_bfdg )> 0{_bfdb :=_bfdg [0];for _ ,_fgbec :=range _bfdg [1:]{if _fgbec .Urx >=_bfdb .Urx {_bfdb =_fgbec ;};};for _ ,_gafgd :=range _bfdg {if _gafgd !=_bfdb &&_gafgd .Urx > _bfdb .Llx {_bfdb =nil ;
|
||
break ;};};if _bfdb !=nil &&_fdga (_daed .PdfRectangle ,_bfdb .PdfRectangle ){_daed ._egdgfd =_bfdb ;};};if len (_dfagg )> 0{_bfdde :=_dfagg [0];for _ ,_cefgc :=range _dfagg [1:]{if _cefgc .Llx <=_bfdde .Llx {_bfdde =_cefgc ;};};for _ ,_gagb :=range _dfagg {if _gagb !=_bfdde &&_gagb .Llx < _bfdde .Urx {_bfdde =nil ;
|
||
break ;};};if _bfdde !=nil &&_fdga (_daed .PdfRectangle ,_bfdde .PdfRectangle ){_daed ._becf =_bfdde ;};};};_dagd =_cdbdg .xNeighbours (_eaga );for _ ,_caag :=range _cdbdg {_abfc :=_dagd [_caag ];if len (_abfc )==0{continue ;};_befc ,_gbcb :=_cbec (_abfc ,_caag );
|
||
if len (_befc )==0&&len (_gbcb )==0{continue ;};if len (_gbcb )> 0{_bacc :=_gbcb [0];for _ ,_egdbd :=range _gbcb [1:]{if _egdbd .Ury >=_bacc .Ury {_bacc =_egdbd ;};};for _ ,_ebbce :=range _gbcb {if _ebbce !=_bacc &&_ebbce .Ury > _bacc .Lly {_bacc =nil ;
|
||
break ;};};if _bacc !=nil &&_dee (_caag .PdfRectangle ,_bacc .PdfRectangle ){_caag ._bgce =_bacc ;};};if len (_befc )> 0{_fdbca :=_befc [0];for _ ,_bdgg :=range _befc [1:]{if _bdgg .Lly <=_fdbca .Lly {_fdbca =_bdgg ;};};for _ ,_egegd :=range _befc {if _egegd !=_fdbca &&_egegd .Lly < _fdbca .Ury {_fdbca =nil ;
|
||
break ;};};if _fdbca !=nil &&_dee (_caag .PdfRectangle ,_fdbca .PdfRectangle ){_caag ._gffb =_fdbca ;};};};for _ ,_gfdef :=range _cdbdg {if _gfdef ._egdgfd !=nil &&_gfdef ._egdgfd ._becf !=_gfdef {_gfdef ._egdgfd =nil ;};if _gfdef ._gffb !=nil &&_gfdef ._gffb ._bgce !=_gfdef {_gfdef ._gffb =nil ;
|
||
};if _gfdef ._becf !=nil &&_gfdef ._becf ._egdgfd !=_gfdef {_gfdef ._becf =nil ;};if _gfdef ._bgce !=nil &&_gfdef ._bgce ._gffb !=_gfdef {_gfdef ._bgce =nil ;};};};func _ceebc (_cacgb []compositeCell )[]float64 {var _dafeda []*textLine ;_bagag :=0;for _ ,_beaag :=range _cacgb {_bagag +=len (_beaag .paraList );
|
||
_dafeda =append (_dafeda ,_beaag .lines ()...);};_e .Slice (_dafeda ,func (_dabc ,_efbf int )bool {_egggb ,_fefd :=_dafeda [_dabc ],_dafeda [_efbf ];_fagd ,_cddd :=_egggb ._gddd ,_fefd ._gddd ;if !_edeg (_fagd -_cddd ){return _fagd < _cddd ;};return _egggb .Llx < _fefd .Llx ;
|
||
});if _eea {_caa .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_bagag ,len (_dafeda ));for _bbbf ,_gbed :=range _dafeda {_caa .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_bbbf ,_gbed );
|
||
};};var _bgccd []float64 ;_afec :=_dafeda [0];var _bcea [][]*textLine ;_dagc :=[]*textLine {_afec };for _afeca ,_fcdbe :=range _dafeda [1:]{if _fcdbe .Ury < _afec .Lly {_gcccbb :=0.5*(_fcdbe .Ury +_afec .Lly );if _eea {_caa .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_afeca ,_fcdbe .Ury ,_afec .Lly ,_gcccbb ,_afec ,_fcdbe );
|
||
};_bgccd =append (_bgccd ,_gcccbb );_bcea =append (_bcea ,_dagc );_dagc =nil ;};_dagc =append (_dagc ,_fcdbe );if _fcdbe .Lly < _afec .Lly {_afec =_fcdbe ;};};if len (_dagc )> 0{_bcea =append (_bcea ,_dagc );};if _eea {_caa .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_bgccd );
|
||
};if _eea {_d .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len (_cacgb ));for _efcgg ,_dgabb :=range _cacgb {_caa .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_efcgg ,_dgabb );};_d .Log .Info ("\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d",len (_bcea ));
|
||
for _ddag ,_bgad :=range _bcea {_caa .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a",_ddag ,len (_bgad ));for _cade ,_ebcd :=range _bgad {_caa .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_cade ,_ebcd );};};};_gffbf :=true ;for _degbbb ,_efbfc :=range _bcea {_cefd :=true ;
|
||
for _degae ,_bgca :=range _cacgb {if _eea {_caa .Printf ("\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a",_degbbb ,len (_bcea ),_degae ,len (_cacgb ),_bgca );
|
||
};if !_bgca .hasLines (_efbfc ){if _eea {_caa .Printf ("\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a",_degbbb ,len (_bcea ),_degae ,len (_cacgb ));
|
||
};_cefd =false ;break ;};};if !_cefd {_gffbf =false ;break ;};};if !_gffbf {if _eea {_d .Log .Info ("\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg");
|
||
};_bgccd =nil ;};if _eea &&_bgccd !=nil {_caa .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a",_bgccd );};return _bgccd ;
|
||
};func (_bcfb *textTable )newTablePara ()*textPara {_egbdb :=_bcfb .computeBbox ();_fbccc :=&textPara {PdfRectangle :_egbdb ,_aecfg :_egbdb ,_affa :_bcfb };if _eea {_d .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_fbccc );
|
||
};return _fbccc ;};
|
||
|
||
// String returns a description of `w`.
|
||
func (_cgdb *textWord )String ()string {return _caa .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_cgdb ._cffg ,_cgdb .PdfRectangle ,_cgdb ._debab ,_cgdb ._bgdg );
|
||
};type rectRuling struct{_fgdfae rulingKind ;_afdbf markKind ;_ba .Color ;_dc .PdfRectangle ;};
|
||
|
||
// String returns a string describing `tm`.
|
||
func (_bga TextMark )String ()string {_cgca :=_bga .BBox ;var _agcd string ;if _bga .Font !=nil {_agcd =_bga .Font .String ();if len (_agcd )> 50{_agcd =_agcd [:50]+"\u002e\u002e\u002e";};};var _gffc string ;if _bga .Meta {_gffc ="\u0020\u002a\u004d\u002a";
|
||
};return _caa .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_bga .Offset ,_bga .Text ,[]rune (_bga .Text ),_cgca .Llx ,_cgca .Lly ,_cgca .Urx ,_cgca .Ury ,_agcd ,_gffc );
|
||
};func (_bcdad *wordBag )highestWord (_gfgc int ,_acg ,_edfe float64 )*textWord {for _ ,_gdbb :=range _bcdad ._adcb [_gfgc ]{if _acg <=_gdbb ._cffg &&_gdbb ._cffg <=_edfe {return _gdbb ;};};return nil ;};type subpath struct{_bcbc []_bab .Point ;_adge bool ;
|
||
};func (_egce *textObject )renderText (_cagc _aa .PdfObject ,_adf []byte )error {if _egce ._dcg {_d .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");
|
||
return nil ;};_fge :=_egce .getCurrentFont ();_bcdd :=_fge .BytesToCharcodes (_adf );_beag ,_ggc ,_fede :=_fge .CharcodesToStrings (_bcdd );if _fede > 0{_d .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_ggc ,_fede );
|
||
};_egce ._cgf ._efde +=_ggc ;_egce ._cgf ._cdc +=_fede ;_dec :=_egce ._cgf ;_bdc :=_dec ._fdd ;_fbcg :=_dec ._abbd /100.0;_dggf :=_gdcag ;if _fge .Subtype ()=="\u0054\u0079\u0070e\u0033"{_dggf =1;};_fedg ,_bddc :=_fge .GetRuneMetrics (' ');if !_bddc {_fedg ,_bddc =_fge .GetCharMetrics (32);
|
||
};if !_bddc {_fedg ,_ =_dc .DefaultFont ().GetRuneMetrics (' ');};_bgee :=_fedg .Wx *_dggf ;_d .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_bgee ,_beag ,_fge ,_bdc );
|
||
_eda :=_bab .NewMatrix (_bdc *_fbcg ,0,0,_bdc ,0,_dec ._bffe );if _gfaee {_d .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_bcdd ),_bcdd ,_beag );
|
||
};_d .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_bcdd ),_bcdd ,len (_beag ));_efc :=_egce .getFillColor ();
|
||
_gda :=_egce .getStrokeColor ();for _geee ,_dfcb :=range _beag {_egee :=[]rune (_dfcb );if len (_egee )==1&&_egee [0]=='\x00'{continue ;};_bde :=_bcdd [_geee ];_fbdd :=_egce ._fdf .CTM .Mult (_egce ._aafd ).Mult (_eda );_bgd :=0.0;if len (_egee )==1&&_egee [0]==32{_bgd =_dec ._fgd ;
|
||
};_dcbe ,_cffd :=_fge .GetCharMetrics (_bde );if !_cffd {_d .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_bde ,_egee ,_egee ,_fge );
|
||
return _caa .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_fge .String (),_bde );};_daga :=_bab .Point {X :_dcbe .Wx *_dggf ,Y :_dcbe .Wy *_dggf };
|
||
_fdba :=_bab .Point {X :(_daga .X *_bdc +_bgd )*_fbcg };_gdge :=_bab .Point {X :(_daga .X *_bdc +_dec ._cace +_bgd )*_fbcg };if _gfaee {_d .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_bdc ,_dec ._cace ,_dec ._fgd ,_fbcg );
|
||
_d .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_daga ,_fdba ,_gdge );};_bfc :=_aec (_fdba );_fdbb :=_aec (_gdge );_acdc :=_egce ._fdf .CTM .Mult (_egce ._aafd ).Mult (_bfc );
|
||
if _eece {_d .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_egce ._fdf .CTM ,_egce ._aafd ,_fdbb ,_fdacf (_egce ._fdf .CTM .Mult (_egce ._aafd ).Mult (_fdbb )),_bfc ,_acdc ,_fdacf (_acdc ));
|
||
};_fadf ,_efe :=_egce .newTextMark (_ad .ExpandLigatures (_egee ),_fbdd ,_fdacf (_acdc ),_ca .Abs (_bgee *_fbdd .ScalingFactorX ()),_fge ,_egce ._cgf ._cace ,_efc ,_gda ,_cagc ,_beag ,_geee );if !_efe {_d .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067");
|
||
continue ;};if _fge ==nil {_d .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _fge .Encoder ()==nil {_d .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_fge );
|
||
}else {if _dcga ,_deda :=_fge .Encoder ().CharcodeToRune (_bde );_deda {_fadf ._bcabg =string (_dcga );};};_d .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_geee ,_bde ,_fadf ,_fbdd );
|
||
_egce ._dcfd =append (_egce ._dcfd ,&_fadf );_egce ._aafd .Concat (_fdbb );};return nil ;};func _gefd (_bfbf ,_cafe _bab .Point )rulingKind {_fgbe :=_ca .Abs (_bfbf .X -_cafe .X );_cabg :=_ca .Abs (_bfbf .Y -_cafe .Y );return _afgac (_fgbe ,_cabg ,_adfd );
|
||
};type compositeCell struct{_dc .PdfRectangle ;paraList ;};func _cacgd (_cgcab ,_cadag _bab .Point )bool {_agbg :=_ca .Abs (_cgcab .X -_cadag .X );_feaed :=_ca .Abs (_cgcab .Y -_cadag .Y );return _aged (_feaed ,_agbg );};
|
||
|
||
// ApplyArea processes the page text only within the specified area `bbox`.
|
||
// Each time ApplyArea is called, it updates the result set in `pt`.
|
||
// Can be called multiple times in a row with different bounding boxes.
|
||
func (_gbc *PageText )ApplyArea (bbox _dc .PdfRectangle ){_dege :=make ([]*textMark ,0,len (_gbc ._gfc ));for _ ,_egca :=range _gbc ._gfc {if _dggg (_egca .bbox (),bbox ){_dege =append (_dege ,_egca );};};var _cgg paraList ;_bcda :=len (_dege );for _agcc :=0;
|
||
_agcc < 360&&_bcda > 0;_agcc +=90{_dgfa :=make ([]*textMark ,0,len (_dege )-_bcda );for _ ,_ebed :=range _dege {if _ebed ._gcce ==_agcc {_dgfa =append (_dgfa ,_ebed );};};if len (_dgfa )> 0{_bgff :=_fdef (_dgfa ,_gbc ._fdbe ,nil ,nil );_cgg =append (_cgg ,_bgff ...);
|
||
_bcda -=len (_dgfa );};};_gffa :=new (_f .Buffer );_cgg .writeText (_gffa );_gbc ._dbdg =_gffa .String ();_gbc ._efda =_cgg .toTextMarks ();_gbc ._fdc =_cgg .tables ();};func (_eeee lineRuling )yMean ()float64 {return 0.5*(_eeee ._becgc .Y +_eeee ._facd .Y )};
|
||
func _gbda (_gdf ,_eafcf *textPara )bool {return _dee (_gdf ._aecfg ,_eafcf ._aecfg )};func _aaed (_ggcg ,_afgb bounded )float64 {return _aab (_ggcg )-_aab (_afgb )};func (_gae *shapesState )closePath (){if _gae ._dcgc {_gae ._bfg =append (_gae ._bfg ,_badg (_gae ._fcaf ));
|
||
_gae ._dcgc =false ;}else if len (_gae ._bfg )==0{if _fgeac {_d .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");};_gae ._dcgc =false ;return ;};_gae ._bfg [len (_gae ._bfg )-1].close ();
|
||
if _fgeac {_d .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_gae );};};func _ggee (_egfg ,_bdcd bounded )float64 {return _egfg .bbox ().Llx -_bdcd .bbox ().Urx };func (_adbg *shapesState )fill (_faea *[]pathSection ){_gcae :=pathSection {_bffc :_adbg ._bfg ,Color :_adbg ._degb .getFillColor ()};
|
||
*_faea =append (*_faea ,_gcae );if _cage {_ebd :=_gcae .bbox ();_caa .Printf ("\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a",len (*_faea ),len (_gcae ._bffc ),_adbg ,_gcae .Color ,_ebd ,_ebd .Width (),_ebd .Height ());
|
||
if _eabg {for _abbc ,_gccc :=range _gcae ._bffc {_caa .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_abbc ,_gccc );if _abbc ==10{break ;};};};};};func (_dggge rulingList )mergePrimary ()float64 {_bedc :=_dggge [0]._efbdg ;for _ ,_gecd :=range _dggge [1:]{_bedc +=_gecd ._efbdg ;
|
||
};return _bedc /float64 (len (_dggge ));};func (_cfeg *textTable )growTable (){_gfaea :=func (_cbebe paraList ){_cfeg ._fcedd ++;for _gffbg :=0;_gffbg < _cfeg ._bgdee ;_gffbg ++{_acbc :=_cbebe [_gffbg ];_cfeg .put (_gffbg ,_cfeg ._fcedd -1,_acbc );};};
|
||
_afedg :=func (_ebfbg paraList ){_cfeg ._bgdee ++;for _gcbb :=0;_gcbb < _cfeg ._fcedd ;_gcbb ++{_fbcga :=_ebfbg [_gcbb ];_cfeg .put (_cfeg ._bgdee -1,_gcbb ,_fbcga );};};if _agbf {_cfeg .log ("\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce");};for _bgdf :=0;
|
||
;_bgdf ++{_faegd :=false ;_adgf :=_cfeg .getDown ();_cacbf :=_cfeg .getRight ();if _agbf {_caa .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bgdf ,_cfeg );_caa .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a",_adgf );
|
||
_caa .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a",_cacbf );};if _adgf !=nil &&_cacbf !=nil {_ecccf :=_adgf [len (_adgf )-1];if !_ecccf .taken ()&&_ecccf ==_cacbf [len (_cacbf )-1]{_gfaea (_adgf );if _cacbf =_cfeg .getRight ();
|
||
_cacbf !=nil {_afedg (_cacbf );_cfeg .put (_cfeg ._bgdee -1,_cfeg ._fcedd -1,_ecccf );};_faegd =true ;};};if !_faegd &&_adgf !=nil {_gfaea (_adgf );_faegd =true ;};if !_faegd &&_cacbf !=nil {_afedg (_cacbf );_faegd =true ;};if !_faegd {break ;};};};func (_ffbfg *textTable )putComposite (_ffcfb ,_ffdd int ,_eage paraList ,_abagd _dc .PdfRectangle ){if len (_eage )==0{_d .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073");
|
||
return ;};_gbag :=compositeCell {PdfRectangle :_abagd ,paraList :_eage };if _eea {_caa .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_ffcfb ,_ffdd ,_gbag .String ());
|
||
};_gbag .updateBBox ();_ffbfg ._fgge [_abdec (_ffcfb ,_ffdd )]=_gbag ;};func _afgac (_ccac ,_dfcgb ,_ebeb float64 )rulingKind {if _ccac >=_ebeb &&_aged (_dfcgb ,_ccac ){return _gegc ;};if _dfcgb >=_ebeb &&_aged (_ccac ,_dfcgb ){return _bcaef ;};return _aebb ;
|
||
};type lineRuling struct{_gfdf rulingKind ;_fgdc markKind ;_ba .Color ;_becgc ,_facd _bab .Point ;};func (_effd pathSection )bbox ()_dc .PdfRectangle {_ggde :=_effd ._bffc [0]._bcbc [0];_dbca :=_dc .PdfRectangle {Llx :_ggde .X ,Urx :_ggde .X ,Lly :_ggde .Y ,Ury :_ggde .Y };
|
||
_fgga :=func (_ffaec _bab .Point ){if _ffaec .X < _dbca .Llx {_dbca .Llx =_ffaec .X ;}else if _ffaec .X > _dbca .Urx {_dbca .Urx =_ffaec .X ;};if _ffaec .Y < _dbca .Lly {_dbca .Lly =_ffaec .Y ;}else if _ffaec .Y > _dbca .Ury {_dbca .Ury =_ffaec .Y ;};};
|
||
for _ ,_aadf :=range _effd ._bffc [0]._bcbc [1:]{_fgga (_aadf );};for _ ,_cggc :=range _effd ._bffc [1:]{for _ ,_bee :=range _cggc ._bcbc {_fgga (_bee );};};return _dbca ;};func _cedb (_cagb ,_ccc _dc .PdfRectangle )(_dc .PdfRectangle ,bool ){if !_dggg (_cagb ,_ccc ){return _dc .PdfRectangle {},false ;
|
||
};return _dc .PdfRectangle {Llx :_ca .Max (_cagb .Llx ,_ccc .Llx ),Urx :_ca .Min (_cagb .Urx ,_ccc .Urx ),Lly :_ca .Max (_cagb .Lly ,_ccc .Lly ),Ury :_ca .Min (_cagb .Ury ,_ccc .Ury )},true ;};func (_ecegd intSet )del (_afge int ){delete (_ecegd ,_afge )};
|
||
|
||
|
||
// String returns a string describing the current state of the textState stack.
|
||
func (_abec *stateStack )String ()string {_adc :=[]string {_caa .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_abec ))};for _fdac ,_dacg :=range *_abec {_eegg :="\u003c\u006e\u0069l\u003e";
|
||
if _dacg !=nil {_eegg =_dacg .String ();};_adc =append (_adc ,_caa .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_fdac ,_eegg ));};return _ce .Join (_adc ,"\u000a");};func _ggdf (_fgeff *PageText )error {_eebfa :=_gd .GetLicenseKey ();if _eebfa !=nil &&_eebfa .IsLicensed ()||_ag {return nil ;
|
||
};_caa .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");_caa .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");
|
||
return _g .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};func (_aaad rulingList )vertsHorzs ()(rulingList ,rulingList ){var _gaecc ,_cbed rulingList ;
|
||
for _ ,_cbff :=range _aaad {switch _cbff ._fabfb {case _bcaef :_gaecc =append (_gaecc ,_cbff );case _gegc :_cbed =append (_cbed ,_cbff );};};return _gaecc ,_cbed ;};type textTable struct{_dc .PdfRectangle ;_bgdee ,_fcedd int ;_bebc bool ;_fddab map[uint64 ]*textPara ;
|
||
_fgge map[uint64 ]compositeCell ;};var _ebfc =map[rulingKind ]string {_aebb :"\u006e\u006f\u006e\u0065",_gegc :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_bcaef :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};func (_faef *textObject )getStrokeColor ()_ba .Color {return _ggce (_faef ._fdf .ColorspaceStroking ,_faef ._fdf .ColorStroking );
|
||
};func (_gefb paraList )findTableGrid (_bgade gridTiling )(*textTable ,map[*textPara ]struct{}){_aagg :=len (_bgade ._gggb );_fabea :=len (_bgade ._gceb );_bggca :=textTable {_bebc :true ,_bgdee :_aagg ,_fcedd :_fabea ,_fddab :make (map[uint64 ]*textPara ,_aagg *_fabea ),_fgge :make (map[uint64 ]compositeCell ,_aagg *_fabea )};
|
||
_fbge :=make (map[*textPara ]struct{});_fgfgg :=int ((1.0-_cefb )*float64 (_aagg *_fabea ));_gfaed :=0;if _dgfd {_d .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_aagg ,_fabea );
|
||
};for _aedff ,_aaacb :=range _bgade ._gceb {_abba ,_dbcb :=_bgade ._begc [_aaacb ];if !_dbcb {continue ;};for _acaee ,_eeacc :=range _bgade ._gggb {_cfedb ,_gfdee :=_abba [_eeacc ];if !_gfdee {continue ;};_aaag :=_gefb .inTile (_cfedb );if len (_aaag )==0{_gfaed ++;
|
||
if _gfaed > _fgfgg {if _dgfd {_d .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_gfaed );};return nil ,nil ;};}else {_bggca .putComposite (_acaee ,_aedff ,_aaag ,_cfedb .PdfRectangle );for _ ,_gfdge :=range _aaag {_fbge [_gfdge ]=struct{}{};
|
||
};};};};_fdae :=0;for _dbaf :=0;_dbaf < _aagg ;_dbaf ++{_fbffc :=_bggca .get (_dbaf ,0);if _fbffc ==nil ||!_fbffc ._bedda {_fdae ++;};};if _fdae ==0{if _dgfd {_d .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};return nil ,nil ;
|
||
};_egda :=_bggca .reduceTiling (_bgade ,_dedfe );_egda =_egda .subdivide ();return _egda ,_fbge ;};func (_bgcfc rectRuling )checkWidth (_acfgf ,_abgc float64 )(float64 ,bool ){_dgde :=_abgc -_acfgf ;_dagg :=_dgde <=_geff ;return _dgde ,_dagg ;};func (_ecga *shapesState )establishSubpath ()*subpath {_baga ,_eddb :=_ecga .lastpointEstablished ();
|
||
if !_eddb {_ecga ._bfg =append (_ecga ._bfg ,_badg (_baga ));};if len (_ecga ._bfg )==0{return nil ;};_ecga ._dcgc =false ;return _ecga ._bfg [len (_ecga ._bfg )-1];};func (_afgc gridTiling )complete ()bool {for _ ,_cfff :=range _afgc ._begc {for _ ,_bgeg :=range _cfff {if !_bgeg .complete (){return false ;
|
||
};};};return true ;};func (_fbbf *textObject )getFont (_dfeb string )(*_dc .PdfFont ,error ){if _fbbf ._ebe ._ab !=nil {_ebcb ,_ggg :=_fbbf .getFontDict (_dfeb );if _ggg !=nil {_d .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073",_dfeb ,_ggg .Error ());
|
||
return nil ,_ggg ;};_fbbf ._ebe ._abd ++;_gec ,_abde :=_fbbf ._ebe ._ab [_ebcb .String ()];if _abde {_gec ._dcgaa =_fbbf ._ebe ._abd ;return _gec ._bcab ,nil ;};};_dddf ,_dacd :=_fbbf .getFontDict (_dfeb );if _dacd !=nil {return nil ,_dacd ;};_babab ,_dacd :=_fbbf .getFontDirect (_dfeb );
|
||
if _dacd !=nil {return nil ,_dacd ;};if _fbbf ._ebe ._ab !=nil {_fcb :=fontEntry {_babab ,_fbbf ._ebe ._abd };if len (_fbbf ._ebe ._ab )>=_adaa {var _fdbc []string ;for _bfcd :=range _fbbf ._ebe ._ab {_fdbc =append (_fdbc ,_bfcd );};_e .Slice (_fdbc ,func (_fbgf ,_dffe int )bool {return _fbbf ._ebe ._ab [_fdbc [_fbgf ]]._dcgaa < _fbbf ._ebe ._ab [_fdbc [_dffe ]]._dcgaa ;
|
||
});delete (_fbbf ._ebe ._ab ,_fdbc [0]);};_fbbf ._ebe ._ab [_dddf .String ()]=_fcb ;};return _babab ,nil ;};func _adfb (_bgdc ,_afggd *textPara )bool {if _bgdc ._bedda ||_afggd ._bedda {return true ;};return _edeg (_bgdc .depth ()-_afggd .depth ());};func _cega (_egdfg []pathSection )rulingList {_daeba (_egdfg );
|
||
if _cage {_d .Log .Info ("\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs",len (_egdfg ));};var _fead rulingList ;for _ ,_adac :=range _egdfg {for _ ,_egge :=range _adac ._bffc {if !_egge .isQuadrilateral (){if _cage {_d .Log .Error ("!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073",_egge );
|
||
};continue ;};if _febc ,_gdee :=_egge .makeRectRuling (_adac .Color );_gdee {_fead =append (_fead ,_febc );}else {if _agca {_d .Log .Error ("\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073",_egge );
|
||
};};};};if _cage {_d .Log .Info ("\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073",_fead .String ());};return _fead ;};func (_dbdf *shapesState )drawRectangle (_fdfg ,_ddb ,_fbcc ,_gegb float64 ){if _fgeac {_dgbf :=_dbdf .devicePoint (_fdfg ,_ddb );
|
||
_eae :=_dbdf .devicePoint (_fdfg +_fbcc ,_ddb +_gegb );_cfb :=_dc .PdfRectangle {Llx :_dgbf .X ,Lly :_dgbf .Y ,Urx :_eae .X ,Ury :_eae .Y };_d .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_cfb );
|
||
};_dbdf .newSubPath ();_dbdf .moveTo (_fdfg ,_ddb );_dbdf .lineTo (_fdfg +_fbcc ,_ddb );_dbdf .lineTo (_fdfg +_fbcc ,_ddb +_gegb );_dbdf .lineTo (_fdfg ,_ddb +_gegb );_dbdf .closePath ();};func _fgbc (_ccfd []TextMark ,_eeffc *int ,_fbga TextMark )[]TextMark {_fbga .Offset =*_eeffc ;
|
||
_ccfd =append (_ccfd ,_fbga );*_eeffc +=len (_fbga .Text );return _ccfd ;};func (_dffa *textObject )setHorizScaling (_bddd float64 ){if _dffa ==nil {return ;};_dffa ._cgf ._abbd =_bddd ;};func (_ebebb *textTable )emptyCompositeColumn (_dece int )bool {for _daeg :=0;
|
||
_daeg < _ebebb ._fcedd ;_daeg ++{if _bedcc ,_fdafd :=_ebebb ._fgge [_abdec (_dece ,_daeg )];_fdafd {if len (_bedcc .paraList )> 0{return false ;};};};return true ;};func (_fgbb *textPara )text ()string {_cfga :=new (_f .Buffer );_fgbb .writeText (_cfga );
|
||
return _cfga .String ();};func (_aedf rulingList )secMinMax ()(float64 ,float64 ){_cgcadg ,_cgeb :=_aedf [0]._becdd ,_aedf [0]._aggb ;for _ ,_bcdc :=range _aedf [1:]{if _bcdc ._becdd < _cgcadg {_cgcadg =_bcdc ._becdd ;};if _bcdc ._aggb > _cgeb {_cgeb =_bcdc ._aggb ;
|
||
};};return _cgcadg ,_cgeb ;};func (_afgcb *textWord )computeText ()string {_fafb :=make ([]string ,len (_afgcb ._gbaed ));for _aecfga ,_bdae :=range _afgcb ._gbaed {_fafb [_aecfga ]=_bdae ._cbge ;};return _ce .Join (_fafb ,"");};
|
||
|
||
// ExtractPageImages returns the image contents of the page extractor, including data
|
||
// and position, size information for each image.
|
||
// A set of options to control page image extraction can be passed in. The options
|
||
// parameter can be nil for the default options. By default, inline stencil masks
|
||
// are not extracted.
|
||
func (_be *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_cca :=&imageExtractContext {_cbc :options };_cgc :=_cca .extractContentStreamImages (_be ._aaf ,_be ._gdc );if _cgc !=nil {return nil ,_cgc ;};return &PageImages {Images :_cca ._ggf },nil ;
|
||
};
|
||
|
||
// String returns a description of `b`.
|
||
func (_dgfg *wordBag )String ()string {var _gdaa []string ;for _ ,_bgcf :=range _dgfg .depthIndexes (){_edee :=_dgfg ._adcb [_bgcf ];for _ ,_ffc :=range _edee {_gdaa =append (_gdaa ,_ffc ._bgdg );};};return _caa .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_dgfg .PdfRectangle ,_dgfg ._gffd ,len (_gdaa ),_gdaa );
|
||
};func (_dbba *wordBag )empty (_fedb int )bool {_ ,_ecbg :=_dbba ._adcb [_fedb ];return !_ecbg };func _cbbb (_cedg float64 ,_cfba int )int {if _cfba ==0{_cfba =1;};_fcaa :=float64 (_cfba );return int (_ca .Round (_cedg /_fcaa )*_fcaa );};func (_fgca *textTable )get (_dbce ,_eeege int )*textPara {return _fgca ._fddab [_abdec (_dbce ,_eeege )]};
|
||
func (_acecg rulingList )snapToGroups ()rulingList {_gacc ,_adff :=_acecg .vertsHorzs ();if len (_gacc )> 0{_gacc =_gacc .snapToGroupsDirection ();};if len (_adff )> 0{_adff =_adff .snapToGroupsDirection ();};_faf :=append (_gacc ,_adff ...);_faf .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");
|
||
return _faf ;};const (_aebb rulingKind =iota ;_gegc ;_bcaef ;);const (_bbf ="\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";
|
||
_dac ="\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064";
|
||
_eg ="\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";);
|
||
func (_dbbae *textMark )inDiacriticArea (_edfa *textMark )bool {_cfbe :=_dbbae .Llx -_edfa .Llx ;_ecea :=_dbbae .Urx -_edfa .Urx ;_ddfa :=_dbbae .Lly -_edfa .Lly ;return _ca .Abs (_cfbe +_ecea )< _dbbae .Width ()*_cebd &&_ca .Abs (_ddfa )< _dbbae .Height ()*_cebd ;
|
||
};func (_bfeb *shapesState )lineTo (_cefg ,_fdbg float64 ){if _fgeac {_d .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_cefg ,_fdbg ,_bfeb .devicePoint (_cefg ,_fdbg ));
|
||
};_bfeb .addPoint (_cefg ,_fdbg );};
|
||
|
||
// String returns a description of `k`.
|
||
func (_ceff rulingKind )String ()string {_cge ,_aecfe :=_ebfc [_ceff ];if !_aecfe {return _caa .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_ceff );};return _cge ;};
|
||
|
||
// PageText represents the layout of text on a device page.
|
||
type PageText struct{_gfc []*textMark ;_dbdg string ;_efda []TextMark ;_fdc []TextTable ;_fdbe _dc .PdfRectangle ;_ggeb []pathSection ;_agc []pathSection ;_ced *_gcf .ContentStreamOperations ;};func (_fdb *textObject )nextLine (){_fdb .moveLP (0,-_fdb ._cgf ._add )};
|
||
func _ebcf (_efdg float64 )int {var _bagb int ;if _efdg >=0{_bagb =int (_efdg /_caaee );}else {_bagb =int (_efdg /_caaee )-1;};return _bagb ;};var _fcfg =_c .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024");
|
||
var _bbdf =map[markKind ]string {_bagd :"\u0073\u0074\u0072\u006f\u006b\u0065",_gfefd :"\u0066\u0069\u006c\u006c",_cefcf :"\u0061u\u0067\u006d\u0065\u006e\u0074"};func (_cacfa rulingList )findPrimSec (_gfaf ,_bgeb float64 )*ruling {for _ ,_bebg :=range _cacfa {if _edeg (_bebg ._efbdg -_gfaf )&&_bebg ._becdd -_aecg <=_bgeb &&_bgeb <=_bebg ._aggb +_aecg {return _bebg ;
|
||
};};return nil ;};type imageExtractContext struct{_ggf []ImageMark ;_fe int ;_gfa int ;_dg int ;_egc map[*_aa .PdfObjectStream ]*cachedImage ;_cbc *ImageExtractOptions ;};func (_fgf *subpath )removeDuplicates (){if len (_fgf ._bcbc )==0{return ;};_fgdg :=[]_bab .Point {_fgf ._bcbc [0]};
|
||
for _ ,_cbfd :=range _fgf ._bcbc [1:]{if !_aada (_cbfd ,_fgdg [len (_fgdg )-1]){_fgdg =append (_fgdg ,_cbfd );};};_fgf ._bcbc =_fgdg ;};func _aab (_ceb bounded )float64 {return -_ceb .bbox ().Lly };func (_debb *textPara )toCellTextMarks (_bbca *int )[]TextMark {var _ddeg []TextMark ;
|
||
for _gcea ,_dcad :=range _debb ._ddaf {_decd :=_dcad .toTextMarks (_bbca );_ebefd :=_cggce &&_dcad .endsInHyphen ()&&_gcea !=len (_debb ._ddaf )-1;if _ebefd {_decd =_fgdbf (_decd ,_bbca );};_ddeg =append (_ddeg ,_decd ...);if !(_ebefd ||_gcea ==len (_debb ._ddaf )-1){_ddeg =_egag (_ddeg ,_bbca ,_gfga (_dcad ._gddd ,_debb ._ddaf [_gcea +1]._gddd ));
|
||
};};return _ddeg ;};func (_fagg *imageExtractContext )extractFormImages (_afc *_aa .PdfObjectName ,_fcf _gcf .GraphicsState ,_cag *_dc .PdfPageResources )error {_dfc ,_ged :=_cag .GetXObjectFormByName (*_afc );if _ged !=nil {return _ged ;};if _dfc ==nil {return nil ;
|
||
};_egf ,_ged :=_dfc .GetContentStream ();if _ged !=nil {return _ged ;};_gba :=_dfc .Resources ;if _gba ==nil {_gba =_cag ;};_ged =_fagg .extractContentStreamImages (string (_egf ),_gba );if _ged !=nil {return _ged ;};_fagg ._dg ++;return nil ;};func (_cbac *shapesState )moveTo (_adab ,_dfebb float64 ){_cbac ._dcgc =true ;
|
||
_cbac ._fcaf =_cbac .devicePoint (_adab ,_dfebb );if _fgeac {_d .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_adab ,_dfebb ,_cbac ._fcaf );
|
||
};};func (_gafc *textLine )bbox ()_dc .PdfRectangle {return _gafc .PdfRectangle };type pathSection struct{_bffc []*subpath ;_ba .Color ;};func (_abdf *stateStack )top ()*textState {if _abdf .empty (){return nil ;};return (*_abdf )[_abdf .size ()-1];};func (_deeg *textTable )reduceTiling (_agfa gridTiling ,_ccddd float64 )*textTable {_acac :=make ([]int ,0,_deeg ._fcedd );
|
||
_bggfg :=make ([]int ,0,_deeg ._bgdee );_cdad :=_agfa ._gggb ;_fbfeb :=_agfa ._gceb ;for _ffbf :=0;_ffbf < _deeg ._fcedd ;_ffbf ++{_fbfgb :=_ffbf > 0&&_ca .Abs (_fbfeb [_ffbf -1]-_fbfeb [_ffbf ])< _ccddd &&_deeg .emptyCompositeRow (_ffbf );if !_fbfgb {_acac =append (_acac ,_ffbf );
|
||
};};for _bebgc :=0;_bebgc < _deeg ._bgdee ;_bebgc ++{_ecgc :=_bebgc < _deeg ._bgdee -1&&_ca .Abs (_cdad [_bebgc +1]-_cdad [_bebgc ])< _ccddd &&_deeg .emptyCompositeColumn (_bebgc );if !_ecgc {_bggfg =append (_bggfg ,_bebgc );};};if len (_acac )==_deeg ._fcedd &&len (_bggfg )==_deeg ._bgdee {return _deeg ;
|
||
};_bacfa :=textTable {_bebc :_deeg ._bebc ,_bgdee :len (_bggfg ),_fcedd :len (_acac ),_fgge :make (map[uint64 ]compositeCell ,len (_bggfg )*len (_acac ))};if _eea {_d .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_deeg ._bgdee ,_deeg ._fcedd ,len (_bggfg ),len (_acac ));
|
||
_d .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_bggfg );_d .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_acac );};for _ebaee ,_aaead :=range _acac {for _cgfce ,_abadf :=range _bggfg {_ceeee ,_gdcb :=_deeg .getComposite (_abadf ,_aaead );
|
||
if len (_ceeee )==0{continue ;};if _eea {_caa .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_cgfce ,_ebaee ,_abadf ,_aaead ,_ebfce (_ceeee .merge ().text (),50));};_bacfa .putComposite (_cgfce ,_ebaee ,_ceeee ,_gdcb );
|
||
};};return &_bacfa ;};func _aec (_bgcg _bab .Point )_bab .Matrix {return _bab .TranslationMatrix (_bgcg .X ,_bgcg .Y )};func (_gaec *wordBag )absorb (_dgfaa *wordBag ){_cgff :=_dgfaa .makeRemovals ();for _bdec ,_cfa :=range _dgfaa ._adcb {for _ ,_acca :=range _cfa {_gaec .pullWord (_acca ,_bdec ,_cgff );
|
||
};};_dgfaa .applyRemovals (_cgff );};
|
||
|
||
// String returns a description of `p`.
|
||
func (_cbcc *textPara )String ()string {if _cbcc ._bedda {return _caa .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_cbcc .PdfRectangle );};_gcba :="";if _cbcc ._affa !=nil {_gcba =_caa .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_cbcc ._affa ._bgdee ,_cbcc ._affa ._fcedd );
|
||
};return _caa .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_cbcc .PdfRectangle ,_gcba ,len (_cbcc ._ddaf ),_ebfce (_cbcc .text (),50));};func (_ebadc *textTable )subdivide ()*textTable {_ebadc .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");
|
||
_cbdfe :=_ebadc .compositeRowCorridors ();_gdga :=_ebadc .compositeColCorridors ();if _eea {_d .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_eafa (_cbdfe ),_eafa (_gdga ));
|
||
};if len (_cbdfe )==0||len (_gdga )==0{return _ebadc ;};_cgfbe (_cbdfe );_cgfbe (_gdga );if _eea {_d .Log .Info ("\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073",_eafa (_cbdfe ),_eafa (_gdga ));
|
||
};_degec ,_eecec :=_bbbb (_ebadc ._fcedd ,_cbdfe );_dfaab ,_daade :=_bbbb (_ebadc ._bgdee ,_gdga );_ggag :=make (map[uint64 ]*textPara ,_daade *_eecec );_ecdcb :=&textTable {PdfRectangle :_ebadc .PdfRectangle ,_bebc :_ebadc ._bebc ,_fcedd :_eecec ,_bgdee :_daade ,_fddab :_ggag };
|
||
if _eea {_d .Log .Info ("\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a"+"\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076",_ebadc ._bgdee ,_ebadc ._fcedd ,_daade ,_eecec ,_eafa (_cbdfe ),_eafa (_gdga ),_degec ,_dfaab );
|
||
};for _eeec :=0;_eeec < _ebadc ._fcedd ;_eeec ++{_daebb :=_degec [_eeec ];for _ggbf :=0;_ggbf < _ebadc ._bgdee ;_ggbf ++{_fdeec :=_dfaab [_ggbf ];if _eea {_caa .Printf ("\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a",_ggbf ,_eeec ,_fdeec ,_daebb );
|
||
};_daagc ,_dgbb :=_ebadc ._fgge [_abdec (_ggbf ,_eeec )];if !_dgbb {continue ;};_eddf :=_daagc .split (_cbdfe [_eeec ],_gdga [_ggbf ]);for _gaab :=0;_gaab < _eddf ._fcedd ;_gaab ++{for _cfcg :=0;_cfcg < _eddf ._bgdee ;_cfcg ++{_afaca :=_eddf .get (_cfcg ,_gaab );
|
||
_ecdcb .put (_fdeec +_cfcg ,_daebb +_gaab ,_afaca );if _eea {_caa .Printf ("\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_fdeec +_cfcg ,_daebb +_gaab ,_afaca );};};};};};return _ecdcb ;};
|
||
|
||
// GetContentStreamOps returns the contentStreamOps field of `pt`.
|
||
func (_efcd *PageText )GetContentStreamOps ()*_gcf .ContentStreamOperations {return _efcd ._ced };func (_ec *imageExtractContext )processOperand (_acc *_gcf .ContentStreamOperation ,_ae _gcf .GraphicsState ,_ggfb *_dc .PdfPageResources )error {if _acc .Operand =="\u0042\u0049"&&len (_acc .Params )==1{_cbg ,_ffa :=_acc .Params [0].(*_gcf .ContentStreamInlineImage );
|
||
if !_ffa {return nil ;};if _bec ,_fgg :=_aa .GetBoolVal (_cbg .ImageMask );_fgg {if _bec &&!_ec ._cbc .IncludeInlineStencilMasks {return nil ;};};return _ec .extractInlineImage (_cbg ,_ae ,_ggfb );}else if _acc .Operand =="\u0044\u006f"&&len (_acc .Params )==1{_fbc ,_fff :=_aa .GetName (_acc .Params [0]);
|
||
if !_fff {_d .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _cc ;};_ ,_edfg :=_ggfb .GetXObjectByName (*_fbc );switch _edfg {case _dc .XObjectTypeImage :return _ec .extractXObjectImage (_fbc ,_ae ,_ggfb );case _dc .XObjectTypeForm :return _ec .extractFormImages (_fbc ,_ae ,_ggfb );
|
||
};};return nil ;};func (_cffe *textPara )toTextMarks (_bcf *int )[]TextMark {if _cffe ._affa ==nil {return _cffe .toCellTextMarks (_bcf );};var _bcdg []TextMark ;for _bfgc :=0;_bfgc < _cffe ._affa ._fcedd ;_bfgc ++{for _ddad :=0;_ddad < _cffe ._affa ._bgdee ;
|
||
_ddad ++{_dcdda :=_cffe ._affa .get (_ddad ,_bfgc );if _dcdda ==nil {_bcdg =_egag (_bcdg ,_bcf ,"\u0009");}else {_degbb :=_dcdda .toCellTextMarks (_bcf );_bcdg =append (_bcdg ,_degbb ...);};_bcdg =_egag (_bcdg ,_bcf ,"\u0020");};if _bfgc < _cffe ._affa ._fcedd -1{_bcdg =_egag (_bcdg ,_bcf ,"\u000a");
|
||
};};return _bcdg ;};
|
||
|
||
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
|
||
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
|
||
// Replace with a function like Extract() (*PageText, error)
|
||
func (_gdcg *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_fdg ,_dfe ,_fbd ,_gfe :=_gdcg .extractPageText (_gdcg ._aaf ,_gdcg ._gdc ,_bab .IdentityMatrix (),0);if _gfe !=nil &&_gfe !=_dc .ErrColorOutOfRange {return nil ,0,0,_gfe ;};_fdg .computeViews ();
|
||
_gfe =_ggdf (_fdg );if _gfe !=nil {return nil ,0,0,_gfe ;};if _gdcg ._da !=nil {if _gdcg ._da .ApplyCropBox &&_gdcg ._aae !=nil {_fdg .ApplyArea (*_gdcg ._aae );};};return _fdg ,_dfe ,_fbd ,nil ;};func _dgcf (_eed string )bool {if _b .RuneCountInString (_eed )< _dgcga {return false ;
|
||
};_bafba ,_dcfef :=_b .DecodeLastRuneInString (_eed );if _dcfef <=0||!_bb .Is (_bb .Hyphen ,_bafba ){return false ;};_bafba ,_dcfef =_b .DecodeLastRuneInString (_eed [:len (_eed )-_dcfef ]);return _dcfef > 0&&!_bb .IsSpace (_bafba );};func (_dacef *textObject )moveTextSetLeading (_cbag ,_eeg float64 ){_dacef ._cgf ._add =-_eeg ;
|
||
_dacef .moveLP (_cbag ,_eeg );};func (_feaf rulingList )sortStrict (){_e .Slice (_feaf ,func (_gcde ,_gcgb int )bool {_dggc ,_fdefe :=_feaf [_gcde ],_feaf [_gcgb ];_aebee ,_fcfba :=_dggc ._fabfb ,_fdefe ._fabfb ;if _aebee !=_fcfba {return _aebee > _fcfba ;
|
||
};_cccfgb ,_bdeg :=_dggc ._efbdg ,_fdefe ._efbdg ;if !_edeg (_cccfgb -_bdeg ){return _cccfgb < _bdeg ;};_cccfgb ,_bdeg =_dggc ._becdd ,_fdefe ._becdd ;if _cccfgb !=_bdeg {return _cccfgb < _bdeg ;};return _dggc ._aggb < _fdefe ._aggb ;});};func (_decc *textLine )pullWord (_egcec *wordBag ,_fgb *textWord ,_feab int ){_decc .appendWord (_fgb );
|
||
_egcec .removeWord (_fgb ,_feab );};
|
||
|
||
// String returns a human readable description of `ss`.
|
||
func (_dfd *shapesState )String ()string {return _caa .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_dfd ._bfg ),_dfd ._dcgc );};func (_geffff *textTable )compositeRowCorridors ()map[int ][]float64 {_edec :=make (map[int ][]float64 ,_geffff ._fcedd );
|
||
if _eea {_d .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_geffff ._fcedd );};for _accbe :=1;_accbe < _geffff ._fcedd ;_accbe ++{var _ggfg []compositeCell ;
|
||
for _cadab :=0;_cadab < _geffff ._bgdee ;_cadab ++{if _dedg ,_gdeb :=_geffff ._fgge [_abdec (_cadab ,_accbe )];_gdeb {_ggfg =append (_ggfg ,_dedg );};};if len (_ggfg )==0{continue ;};_abfg :=_ceebc (_ggfg );_edec [_accbe ]=_abfg ;if _eea {_caa .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_accbe ,_abfg );
|
||
};};return _edec ;};func (_debe *textTable )getRight ()paraList {_bbgb :=make (paraList ,_debe ._fcedd );for _bbad :=0;_bbad < _debe ._fcedd ;_bbad ++{_dbecf :=_debe .get (_debe ._bgdee -1,_bbad )._becf ;if _dbecf .taken (){return nil ;};_bbgb [_bbad ]=_dbecf ;
|
||
};for _efgg :=0;_efgg < _debe ._fcedd -1;_efgg ++{if _bbgb [_efgg ]._bgce !=_bbgb [_efgg +1]{return nil ;};};return _bbgb ;};func (_gcfb rulingList )comp (_cfbf ,_ddcd int )bool {_eeag ,_cead :=_gcfb [_cfbf ],_gcfb [_ddcd ];_bcacd ,_dfaff :=_eeag ._fabfb ,_cead ._fabfb ;
|
||
if _bcacd !=_dfaff {return _bcacd > _dfaff ;};if _bcacd ==_aebb {return false ;};_geeaf :=func (_fcba bool )bool {if _bcacd ==_gegc {return _fcba ;};return !_fcba ;};_feda ,_gfde :=_eeag ._efbdg ,_cead ._efbdg ;if _feda !=_gfde {return _geeaf (_feda > _gfde );
|
||
};_feda ,_gfde =_eeag ._becdd ,_cead ._becdd ;if _feda !=_gfde {return _geeaf (_feda < _gfde );};return _geeaf (_eeag ._aggb < _cead ._aggb );};func _ggba (_fece *wordBag ,_ebda float64 ,_edc ,_ggac rulingList )[]*wordBag {var _cbcd []*wordBag ;for _ ,_eccc :=range _fece .depthIndexes (){_fgfe :=false ;
|
||
for !_fece .empty (_eccc ){_cdaa :=_fece .firstReadingIndex (_eccc );_dbdc :=_fece .firstWord (_cdaa );_dffaa :=_afccc (_dbdc ,_ebda ,_edc ,_ggac );_fece .removeWord (_dbdc ,_cdaa );if _aebe {_d .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_dbdc .String ());
|
||
};for _ceaa :=true ;_ceaa ;_ceaa =_fgfe {_fgfe =false ;_egdgf :=_adbb *_dffaa ._gffd ;_dgcb :=_cgda *_dffaa ._gffd ;_efbc :=_egde *_dffaa ._gffd ;if _aebe {_d .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_dffaa .minDepth (),_dffaa .maxDepth (),_efbc ,_dgcb );
|
||
};if _fece .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_dffaa ,_cdce (_dad ,0),_dffaa .minDepth ()-_efbc ,_dffaa .maxDepth ()+_efbc ,_gdcga ,false ,false )> 0{_fgfe =true ;};if _fece .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_dffaa ,_cdce (_dad ,_dgcb ),_dffaa .minDepth (),_dffaa .maxDepth (),_acgg ,false ,false )> 0{_fgfe =true ;
|
||
};if _fgfe {continue ;};_afca :=_fece .scanBand ("",_dffaa ,_cdce (_dddb ,_egdgf ),_dffaa .minDepth (),_dffaa .maxDepth (),_bfa ,true ,false );if _afca > 0{_cbbbf :=(_dffaa .maxDepth ()-_dffaa .minDepth ())/_dffaa ._gffd ;if (_afca > 1&&float64 (_afca )> 0.3*_cbbbf )||_afca <=10{if _fece .scanBand ("\u006f\u0074\u0068e\u0072",_dffaa ,_cdce (_dddb ,_egdgf ),_dffaa .minDepth (),_dffaa .maxDepth (),_bfa ,false ,true )> 0{_fgfe =true ;
|
||
};};};};_cbcd =append (_cbcd ,_dffaa );};};return _cbcd ;};func (_deca *wordBag )pullWord (_bfba *textWord ,_faab int ,_cdeb map[int ]map[*textWord ]struct{}){_deca .PdfRectangle =_abga (_deca .PdfRectangle ,_bfba .PdfRectangle );if _bfba ._debab > _deca ._gffd {_deca ._gffd =_bfba ._debab ;
|
||
};_deca ._adcb [_faab ]=append (_deca ._adcb [_faab ],_bfba );_cdeb [_faab ][_bfba ]=struct{}{};};func (_fcca *imageExtractContext )extractInlineImage (_efa *_gcf .ContentStreamInlineImage ,_gdb _gcf .GraphicsState ,_dgc *_dc .PdfPageResources )error {_cde ,_bgb :=_efa .ToImage (_dgc );
|
||
if _bgb !=nil {return _bgb ;};_bge ,_bgb :=_efa .GetColorSpace (_dgc );if _bgb !=nil {return _bgb ;};if _bge ==nil {_bge =_dc .NewPdfColorspaceDeviceGray ();};_bcd ,_bgb :=_bge .ImageToRGB (*_cde );if _bgb !=nil {return _bgb ;};_fba :=ImageMark {Image :&_bcd ,Width :_gdb .CTM .ScalingFactorX (),Height :_gdb .CTM .ScalingFactorY (),Angle :_gdb .CTM .Angle ()};
|
||
_fba .X ,_fba .Y =_gdb .CTM .Translation ();_fcca ._ggf =append (_fcca ._ggf ,_fba );_fcca ._fe ++;return nil ;};const (_agfe =1.0e-6;_bacb =1.0e-4;_aaaa =10;_caaee =6;_gdgc =0.5;_gaac =0.12;_aabe =0.19;_gfbbb =0.04;_fggf =0.04;_egde =1.0;_gdcga =0.04;
|
||
_cgda =0.4;_acgg =0.7;_adbb =1.0;_bfa =0.1;_bdbc =1.4;_agdd =0.46;_fde =0.02;_feae =0.2;_cebd =0.5;_dgcga =4;_ffed =4.0;_baeag =6;_cefb =0.3;_eaga =0.01;_cddaf =0.02;_cddcc =2;_eadb =2;_gddb =500;_dagaf =4.0;_dbaa =4.0;_adfd =0.05;_bgffc =0.1;_aecg =2.0;
|
||
_geff =2.0;_geed =1.5;_dedfe =3.0;_ega =0.25;);func (_gceg paraList )inTile (_eaaeg gridTile )paraList {var _fagda paraList ;for _ ,_abaf :=range _gceg {if _eaaeg .contains (_abaf .PdfRectangle ){_fagda =append (_fagda ,_abaf );};};if _eea {_caa .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_eaaeg ,len (_fagda ));
|
||
for _dage ,_eabef :=range _fagda {_caa .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dage ,_eabef );};_caa .Println ("");};return _fagda ;};func _gfbf (_cffb []*wordBag )[]*wordBag {if len (_cffb )<=1{return _cffb ;};if _daag {_d .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");
|
||
};_e .Slice (_cffb ,func (_aafda ,_eccb int )bool {_abgb ,_bae :=_cffb [_aafda ],_cffb [_eccb ];_aac :=_abgb .Width ()*_abgb .Height ();_dcdb :=_bae .Width ()*_bae .Height ();if _aac !=_dcdb {return _aac > _dcdb ;};if _abgb .Height ()!=_bae .Height (){return _abgb .Height ()> _bae .Height ();
|
||
};return _aafda < _eccb ;});var _cggb []*wordBag ;_gac :=make (intSet );for _ccdg :=0;_ccdg < len (_cffb );_ccdg ++{if _gac .has (_ccdg ){continue ;};_beee :=_cffb [_ccdg ];for _bce :=_ccdg +1;_bce < len (_cffb );_bce ++{if _gac .has (_ccdg ){continue ;
|
||
};_fcd :=_cffb [_bce ];_cddad :=_beee .PdfRectangle ;_cddad .Llx -=_beee ._gffd ;if _gcdf (_cddad ,_fcd .PdfRectangle ){_beee .absorb (_fcd );_gac .add (_bce );};};_cggb =append (_cggb ,_beee );};if len (_cffb )!=len (_cggb )+len (_gac ){_d .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_cffb ),len (_cggb ),len (_gac ));
|
||
};return _cggb ;};func _fgdbf (_dafc []TextMark ,_ebdc *int )[]TextMark {_fdbge :=_dafc [len (_dafc )-1];_gcfe :=[]rune (_fdbge .Text );if len (_gcfe )==1{_dafc =_dafc [:len (_dafc )-1];_ggaa :=_dafc [len (_dafc )-1];*_ebdc =_ggaa .Offset +len (_ggaa .Text );
|
||
}else {_face :=_eeac (_fdbge .Text );*_ebdc +=len (_face )-len (_fdbge .Text );_fdbge .Text =_face ;};return _dafc ;};func (_ceeb *stateStack )empty ()bool {return len (*_ceeb )==0};func (_fecc *textPara )fontsize ()float64 {return _fecc ._ddaf [0]._bfcdd };
|
||
var _ag =false ;func _acda (_gadf string )(string ,bool ){_bgede :=[]rune (_gadf );if len (_bgede )!=1{return "",false ;};_gafga ,_aafb :=_adabg [_bgede [0]];return _gafga ,_aafb ;};func (_eaee rulingList )merge ()*ruling {_cedc :=_eaee [0]._efbdg ;_dceb :=_eaee [0]._becdd ;
|
||
_ecdd :=_eaee [0]._aggb ;for _ ,_gaag :=range _eaee [1:]{_cedc +=_gaag ._efbdg ;if _gaag ._becdd < _dceb {_dceb =_gaag ._becdd ;};if _gaag ._aggb > _ecdd {_ecdd =_gaag ._aggb ;};};_gedd :=&ruling {_fabfb :_eaee [0]._fabfb ,_fcgb :_eaee [0]._fcgb ,Color :_eaee [0].Color ,_efbdg :_cedc /float64 (len (_eaee )),_becdd :_dceb ,_aggb :_ecdd };
|
||
if _baea {_d .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_eaee ),_gedd );for _dada ,_bgae :=range _eaee {_caa .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dada ,_bgae );};};
|
||
return _gedd ;};func (_dagaa intSet )add (_dbgbe int ){_dagaa [_dbgbe ]=struct{}{}};func (_ddcdd gridTile )contains (_gdda _dc .PdfRectangle )bool {if _ddcdd .numBorders ()< 3{return false ;};if _ddcdd ._gcfbe &&_gdda .Llx < _ddcdd .Llx -_geed {return false ;
|
||
};if _ddcdd ._ebbb &&_gdda .Urx > _ddcdd .Urx +_geed {return false ;};if _ddcdd ._ecbgb &&_gdda .Lly < _ddcdd .Lly -_geed {return false ;};if _ddcdd ._faaa &&_gdda .Ury > _ddcdd .Ury +_geed {return false ;};return true ;};func (_daacb *subpath )last ()_bab .Point {return _daacb ._bcbc [len (_daacb ._bcbc )-1]};
|
||
func (_bggcf paraList )llyOrdering ()[]int {_gdfe :=make ([]int ,len (_bggcf ));for _daeb :=range _bggcf {_gdfe [_daeb ]=_daeb ;};_e .SliceStable (_gdfe ,func (_badd ,_abad int )bool {_bdca ,_dfdg :=_gdfe [_badd ],_gdfe [_abad ];return _bggcf [_bdca ].Lly < _bggcf [_dfdg ].Lly ;
|
||
});return _gdfe ;};func (_agade *ruling )alignsSec (_gdfa *ruling )bool {const _adafd =_geff +1.0;return _agade ._becdd -_adafd <=_gdfa ._aggb &&_gdfa ._becdd -_adafd <=_agade ._aggb ;};
|
||
|
||
// TextMark represents extracted text on a page with information regarding both textual content,
|
||
// formatting (font and size) and positioning.
|
||
// It is the smallest unit of text on a PDF page, typically a single character.
|
||
//
|
||
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
|
||
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
|
||
// `bbox` of substring `term` in `text`.
|
||
//
|
||
// ex, _ := New(page)
|
||
// // handle errors
|
||
// pageText, _, _, err := ex.ExtractPageText()
|
||
// // handle errors
|
||
// text := pageText.Text()
|
||
// textMarks := pageText.Marks()
|
||
//
|
||
// start := strings.Index(text, term)
|
||
// end := start + len(term)
|
||
// spanMarks, err := textMarks.RangeOffset(start, end)
|
||
// // handle errors
|
||
// bbox, ok := spanMarks.BBox()
|
||
// // handle errors
|
||
type TextMark struct{
|
||
|
||
// Text is the extracted text.
|
||
Text string ;
|
||
|
||
// Original is the text in the PDF. It has not been decoded like `Text`.
|
||
Original string ;
|
||
|
||
// BBox is the bounding box of the text.
|
||
BBox _dc .PdfRectangle ;
|
||
|
||
// Font is the font the text was drawn with.
|
||
Font *_dc .PdfFont ;
|
||
|
||
// FontSize is the font size the text was drawn with.
|
||
FontSize float64 ;
|
||
|
||
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
|
||
// text, textMarks := pageText.Text(), pageText.Marks()
|
||
// marks := textMarks.Elements()
|
||
// then marks[i].Offset is the offset of marks[i].Text in text.
|
||
Offset int ;
|
||
|
||
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
|
||
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
|
||
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
|
||
Meta bool ;
|
||
|
||
// FillColor is the fill color of the text.
|
||
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
|
||
FillColor _ba .Color ;
|
||
|
||
// StrokeColor is the stroke color of the text.
|
||
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
|
||
StrokeColor _ba .Color ;
|
||
|
||
// Orientation is the text orientation
|
||
Orientation int ;
|
||
|
||
// DirectObject is the underlying PdfObject (Text Object) that represents the visible texts. This is introduced to get
|
||
// a simple access to the TextObject in case editing or replacment of some text is needed. E.g during redaction.
|
||
DirectObject _aa .PdfObject ;
|
||
|
||
// ObjString is a decoded string operand of a text-showing operator. It has the same value as `Text` attribute except
|
||
// when many glyphs are represented with the same Text Object that contains multiple length string operand in which case
|
||
// ObjString spans more than one character string that falls in different TextMark objects.
|
||
ObjString []string ;Tw float64 ;Th float64 ;Tc float64 ;Index int ;};func (_ggfd *stateStack )pop ()*textState {if _ggfd .empty (){return nil ;};_fbb :=*(*_ggfd )[len (*_ggfd )-1];*_ggfd =(*_ggfd )[:len (*_ggfd )-1];return &_fbb ;};func (_abgef rulingList )sort (){_e .Slice (_abgef ,_abgef .comp )};
|
||
func (_cddcg *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_aeaf :=make (map[int ]map[*textWord ]struct{},len (_cddcg ._adcb ));for _fdcc :=range _cddcg ._adcb {_aeaf [_fdcc ]=make (map[*textWord ]struct{});};return _aeaf ;};func (_dgef *textObject )setTextRenderMode (_ffad int ){if _dgef ==nil {return ;
|
||
};_dgef ._cgf ._efbd =RenderMode (_ffad );};func (_cgb paraList )applyTables (_daceg []*textTable )paraList {var _egcb paraList ;for _ ,_efcg :=range _daceg {_egcb =append (_egcb ,_efcg .newTablePara ());};for _ ,_bgbdg :=range _cgb {if _bgbdg ._bgcb {continue ;
|
||
};_egcb =append (_egcb ,_bgbdg );};return _egcb ;};func (_aedb *textLine )text ()string {var _adbbc []string ;for _ ,_abcb :=range _aedb ._ccfce {if _abcb ._gabe {_adbbc =append (_adbbc ,"\u0020");};_adbbc =append (_adbbc ,_abcb ._bgdg );};return _ce .Join (_adbbc ,"");
|
||
};const _adaa =10;
|
||
|
||
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
|
||
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
|
||
func (_cgcd *Extractor )ExtractTextWithStats ()(_gca string ,_fda int ,_def int ,_gdbe error ){_bbc ,_fda ,_def ,_gdbe :=_cgcd .ExtractPageText ();if _gdbe !=nil {return "",_fda ,_def ,_gdbe ;};return _bbc .Text (),_fda ,_def ,nil ;};func _bgfd (_ecgb []*textWord ,_fgdgb int )[]*textWord {_ageg :=len (_ecgb );
|
||
copy (_ecgb [_fgdgb :],_ecgb [_fgdgb +1:]);return _ecgb [:_ageg -1];};func (_faecb paraList )readBefore (_cgfd []int ,_egeca ,_cagg int )bool {_cdec ,_dfba :=_faecb [_egeca ],_faecb [_cagg ];if _gbda (_cdec ,_dfba )&&_cdec .Lly > _dfba .Lly {return true ;
|
||
};if !(_cdec ._aecfg .Urx < _dfba ._aecfg .Llx ){return false ;};_bbac ,_dgcd :=_cdec .Lly ,_dfba .Lly ;if _bbac > _dgcd {_dgcd ,_bbac =_bbac ,_dgcd ;};_gfefg :=_ca .Max (_cdec ._aecfg .Llx ,_dfba ._aecfg .Llx );_eaag :=_ca .Min (_cdec ._aecfg .Urx ,_dfba ._aecfg .Urx );
|
||
_acecb :=_faecb .llyRange (_cgfd ,_bbac ,_dgcd );for _ ,_aaaf :=range _acecb {if _aaaf ==_egeca ||_aaaf ==_cagg {continue ;};_gcfd :=_faecb [_aaaf ];if _gcfd ._aecfg .Llx <=_eaag &&_gfefg <=_gcfd ._aecfg .Urx {return false ;};};return true ;};func (_geb *textObject )setTextRise (_efab float64 ){if _geb ==nil {return ;
|
||
};_geb ._cgf ._bffe =_efab ;};func (_fgbca *ruling )gridIntersecting (_gdbbf *ruling )bool {return _efca (_fgbca ._becdd ,_gdbbf ._becdd )&&_efca (_fgbca ._aggb ,_gdbbf ._aggb );};func (_faec *wordBag )minDepth ()float64 {return _faec ._deae -(_faec .Ury -_faec ._gffd )};
|
||
func _dbed (_adggf map[int ]intSet )[]int {_beff :=make ([]int ,0,len (_adggf ));for _cgaeg :=range _adggf {_beff =append (_beff ,_cgaeg );};_e .Ints (_beff );return _beff ;};func _aged (_bdegd ,_eebd float64 )bool {return _bdegd /_ca .Max (_bgffc ,_eebd )< _adfd };
|
||
func (_gebae paraList )topoOrder ()[]int {if _cfee {_d .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");};_cgfcc :=len (_gebae );_bbd :=make ([]bool ,_cgfcc );_fcbd :=make ([]int ,0,_cgfcc );_bcdf :=_gebae .llyOrdering ();var _ecbgf func (_bgfe int );
|
||
_ecbgf =func (_egbe int ){_bbd [_egbe ]=true ;for _aebg :=0;_aebg < _cgfcc ;_aebg ++{if !_bbd [_aebg ]{if _gebae .readBefore (_bcdf ,_egbe ,_aebg ){_ecbgf (_aebg );};};};_fcbd =append (_fcbd ,_egbe );};for _cddf :=0;_cddf < _cgfcc ;_cddf ++{if !_bbd [_cddf ]{_ecbgf (_cddf );
|
||
};};return _ffffg (_fcbd );};
|
||
|
||
// String returns a human readable description of `path`.
|
||
func (_agd *subpath )String ()string {_egec :=_agd ._bcbc ;_dgca :=len (_egec );if _dgca <=5{return _caa .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_dgca ,_egec );};return _caa .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_dgca ,_egec [0],_egec [1],_egec [_dgca -1]);
|
||
};func _eafa (_caga map[int ][]float64 )string {_cadga :=_daadd (_caga );_edad :=make ([]string ,len (_caga ));for _fefbc ,_dcff :=range _cadga {_edad [_fefbc ]=_caa .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_dcff ,_caga [_dcff ]);};
|
||
return _caa .Sprintf ("\u007b\u0025\u0073\u007d",_ce .Join (_edad ,"\u002c\u0020"));};func _bdgf (_ccbb map[float64 ]map[float64 ]gridTile )[]float64 {_egcf :=make ([]float64 ,0,len (_ccbb ));_ddafa :=make (map[float64 ]struct{},len (_ccbb ));for _ ,_dbec :=range _ccbb {for _ebaf :=range _dbec {if _ ,_dabbd :=_ddafa [_ebaf ];
|
||
_dabbd {continue ;};_egcf =append (_egcf ,_ebaf );_ddafa [_ebaf ]=struct{}{};};};_e .Float64s (_egcf );return _egcf ;};func _dggfd (_deaf _dc .PdfRectangle )*ruling {return &ruling {_fabfb :_bcaef ,_efbdg :_deaf .Llx ,_becdd :_deaf .Lly ,_aggb :_deaf .Ury };
|
||
};func (_bacfc *textTable )markCells (){for _cadgd :=0;_cadgd < _bacfc ._fcedd ;_cadgd ++{for _egfd :=0;_egfd < _bacfc ._bgdee ;_egfd ++{_aacc :=_bacfc .get (_egfd ,_cadgd );if _aacc !=nil {_aacc ._bgcb =true ;};};};};
|
||
|
||
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
|
||
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
|
||
// `start` and `end` are offsets in the extracted text.
|
||
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
|
||
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
|
||
func (_ddfd *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _ddfd ==nil {return nil ,_g .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_caa .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );
|
||
};_afcd :=len (_ddfd ._bgbd );if _afcd ==0{return _ddfd ,nil ;};if start < _ddfd ._bgbd [0].Offset {start =_ddfd ._bgbd [0].Offset ;};if end > _ddfd ._bgbd [_afcd -1].Offset +1{end =_ddfd ._bgbd [_afcd -1].Offset +1;};_gfab :=_e .Search (_afcd ,func (_abef int )bool {return _ddfd ._bgbd [_abef ].Offset +len (_ddfd ._bgbd [_abef ].Text )-1>=start });
|
||
if !(0<=_gfab &&_gfab < _afcd ){_bgda :=_caa .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_gfab ,_afcd ,_ddfd ._bgbd [0],_ddfd ._bgbd [_afcd -1]);
|
||
return nil ,_bgda ;};_egbd :=_e .Search (_afcd ,func (_gfbb int )bool {return _ddfd ._bgbd [_gfbb ].Offset > end -1});if !(0<=_egbd &&_egbd < _afcd ){_aeg :=_caa .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_egbd ,_afcd ,_ddfd ._bgbd [0],_ddfd ._bgbd [_afcd -1]);
|
||
return nil ,_aeg ;};if _egbd <=_gfab {return nil ,_caa .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_gfab ,_egbd );
|
||
};return &TextMarkArray {_bgbd :_ddfd ._bgbd [_gfab :_egbd ]},nil ;};func (_faa *textObject )reset (){_faa ._aafd =_bab .IdentityMatrix ();_faa ._acd =_bab .IdentityMatrix ();_faa ._dcfd =nil ;};func _fdef (_efaf []*textMark ,_fefg _dc .PdfRectangle ,_agdc rulingList ,_edag []gridTiling )paraList {_d .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_efaf ),_fefg );
|
||
if len (_efaf )==0{return nil ;};_ffgea :=_gdgfd (_efaf ,_fefg );if len (_ffgea )==0{return nil ;};_agdc .log ("\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065");_cfd ,_cbeb :=_agdc .vertsHorzs ();_dcdg :=_dbcfe (_ffgea ,_fefg .Ury ,_cfd ,_cbeb );
|
||
_ceea :=_ggba (_dcdg ,_fefg .Ury ,_cfd ,_cbeb );_ceea =_gfbf (_ceea );_ecfg :=make (paraList ,0,len (_ceea ));for _ ,_dfdd :=range _ceea {_dbe :=_dfdd .arrangeText ();if _dbe !=nil {_ecfg =append (_ecfg ,_dbe );};};if len (_ecfg )>=_baeag {_ecfg =_ecfg .extractTables (_edag );
|
||
};_ecfg .sortReadingOrder ();_ecfg .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _ecfg ;};func _gfge (_cebf []pathSection )rulingList {_daeba (_cebf );if _cage {_d .Log .Info ("\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073",len (_cebf ));
|
||
};var _cacg rulingList ;for _ ,_fccf :=range _cebf {for _ ,_gbdb :=range _fccf ._bffc {if len (_gbdb ._bcbc )< 2{continue ;};_dfcad :=_gbdb ._bcbc [0];for _ ,_aagbb :=range _gbdb ._bcbc [1:]{if _befg ,_dfad :=_cabb (_dfcad ,_aagbb ,_fccf .Color );_dfad {_cacg =append (_cacg ,_befg );
|
||
};_dfcad =_aagbb ;};};};if _cage {_d .Log .Info ("m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073",_cacg );};return _cacg ;};
|
||
|
||
// ImageMark represents an image drawn on a page and its position in device coordinates.
|
||
// All coordinates are in device coordinates.
|
||
type ImageMark struct{Image *_dc .Image ;
|
||
|
||
// Dimensions of the image as displayed in the PDF.
|
||
Width float64 ;Height float64 ;
|
||
|
||
// Position of the image in PDF coordinates (lower left corner).
|
||
X float64 ;Y float64 ;
|
||
|
||
// Angle in degrees, if rotated.
|
||
Angle float64 ;};func (_fecd *textTable )depth ()float64 {_dfgg :=1e10;for _decg :=0;_decg < _fecd ._bgdee ;_decg ++{_gbfdb :=_fecd .get (_decg ,0);if _gbfdb ==nil ||_gbfdb ._bedda {continue ;};_dfgg =_ca .Min (_dfgg ,_gbfdb .depth ());};return _dfgg ;
|
||
};
|
||
|
||
// ExtractFonts returns all font information from the page extractor, including
|
||
// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more.
|
||
//
|
||
// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document.
|
||
// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries.
|
||
//
|
||
// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range.
|
||
func (_bd *Extractor )ExtractFonts (previousPageFonts *PageFonts )(*PageFonts ,error ){_fa :=PageFonts {};_ccf :=_fa .extractPageResourcesToFont (_bd ._gdc );if _ccf !=nil {return nil ,_ccf ;};if previousPageFonts !=nil {for _ ,_ea :=range previousPageFonts .Fonts {if !_dca (_fa .Fonts ,_ea .FontName ){_fa .Fonts =append (_fa .Fonts ,_ea );
|
||
};};};return &PageFonts {Fonts :_fa .Fonts },nil ;};func (_adca *ruling )alignsPrimary (_ecbbea *ruling )bool {return _adca ._fabfb ==_ecbbea ._fabfb &&_ca .Abs (_adca ._efbdg -_ecbbea ._efbdg )< _geff *0.5;};func (_geccc *subpath )isQuadrilateral ()bool {if len (_geccc ._bcbc )< 4||len (_geccc ._bcbc )> 5{return false ;
|
||
};if len (_geccc ._bcbc )==5{_ecfc :=_geccc ._bcbc [0];_edgff :=_geccc ._bcbc [4];if _ecfc .X !=_edgff .X ||_ecfc .Y !=_edgff .Y {return false ;};};return true ;};func _cgfbe (_cafb map[int ][]float64 ){if len (_cafb )<=1{return ;};_ffgff :=_daadd (_cafb );
|
||
if _eea {_d .Log .Info ("\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076",_ffgff );};var _begcc ,_fdffg int ;for _begcc ,_fdffg =range _ffgff {if _cafb [_fdffg ]!=nil {break ;};};for _dcgad ,_gcbac :=range _ffgff [_begcc :]{_cgcee :=_cafb [_gcbac ];
|
||
if _cgcee ==nil {continue ;};if _eea {_caa .Printf ("\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a",_begcc +_dcgad ,_fdffg ,_gcbac );};_gffee :=_cafb [_gcbac ];if _gffee [len (_gffee )-1]> _cgcee [0]{_gffee [len (_gffee )-1]=_cgcee [0];
|
||
_cafb [_fdffg ]=_gffee ;};_fdffg =_gcbac ;};};func (_bbea *subpath )close (){if !_aada (_bbea ._bcbc [0],_bbea .last ()){_bbea .add (_bbea ._bcbc [0]);};_bbea ._adge =true ;_bbea .removeDuplicates ();};
|
||
|
||
// String returns a string describing `ma`.
|
||
func (_dfbd TextMarkArray )String ()string {_abge :=len (_dfbd ._bgbd );if _abge ==0{return "\u0045\u004d\u0050T\u0059";};_fgdb :=_dfbd ._bgbd [0];_caf :=_dfbd ._bgbd [_abge -1];return _caa .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_abge ,_fgdb ,_caf );
|
||
};const (RenderModeStroke RenderMode =1<<iota ;RenderModeFill ;RenderModeClip ;);
|
||
|
||
// Tables returns the tables extracted from the page.
|
||
func (_bgcc PageText )Tables ()[]TextTable {if _eea {_d .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_bgcc ._fdc ));};return _bgcc ._fdc ;};func (_cdcc *wordBag )depthIndexes ()[]int {if len (_cdcc ._adcb )==0{return nil ;
|
||
};_cgae :=make ([]int ,len (_cdcc ._adcb ));_egdb :=0;for _efbe :=range _cdcc ._adcb {_cgae [_egdb ]=_efbe ;_egdb ++;};_e .Ints (_cgae );return _cgae ;};
|
||
|
||
// String returns a description of `tm`.
|
||
func (_bafe *textMark )String ()string {return _caa .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_bafe .PdfRectangle ,_bafe ._beaf ,_bafe ._cbge );};
|
||
|
||
// String returns a human readable description of `s`.
|
||
func (_ggaad intSet )String ()string {var _eeba []int ;for _abadfb :=range _ggaad {if _ggaad .has (_abadfb ){_eeba =append (_eeba ,_abadfb );};};_e .Ints (_eeba );return _caa .Sprintf ("\u0025\u002b\u0076",_eeba );};func (_fbff rulingList )tidied (_gcg string )rulingList {_bgbda :=_fbff .removeDuplicates ();
|
||
_bgbda .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");_fdbba :=_bgbda .snapToGroups ();if _fdbba ==nil {return nil ;};_fdbba .sort ();if _cage {_d .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_gcg ,len (_fbff ),len (_bgbda ),len (_fdbba ));
|
||
};_fdbba .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _fdbba ;};func _dad (_gaegg *wordBag ,_bdbd *textWord ,_efag float64 )bool {return _bdbd .Llx < _gaegg .Urx +_efag &&_gaegg .Llx -_efag < _bdbd .Urx ;};
|
||
|
||
// Marks returns the TextMark collection for a page. It represents all the text on the page.
|
||
func (_gfd PageText )Marks ()*TextMarkArray {return &TextMarkArray {_bgbd :_gfd ._efda }};func _cafea (_gdfae []*textWord ,_effg *textWord )[]*textWord {for _fbccd ,_efee :=range _gdfae {if _efee ==_effg {return _bgfd (_gdfae ,_fbccd );};};_d .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_effg );
|
||
return nil ;};func _aada (_egeee ,_adfdcf _bab .Point )bool {return _egeee .X ==_adfdcf .X &&_egeee .Y ==_adfdcf .Y };
|
||
|
||
// Append appends `mark` to the mark array.
|
||
func (_bcgg *TextMarkArray )Append (mark TextMark ){_bcgg ._bgbd =append (_bcgg ._bgbd ,mark )};func (_cegf *shapesState )lastpointEstablished ()(_bab .Point ,bool ){if _cegf ._dcgc {return _cegf ._fcaf ,false ;};_dafe :=len (_cegf ._bfg );if _dafe > 0&&_cegf ._bfg [_dafe -1]._adge {return _cegf ._bfg [_dafe -1].last (),false ;
|
||
};return _bab .Point {},true ;};func (_acae *textTable )bbox ()_dc .PdfRectangle {return _acae .PdfRectangle };func _dgggee (_bbfdc ,_gffff int )int {if _bbfdc < _gffff {return _bbfdc ;};return _gffff ;};func (_gdd *shapesState )addPoint (_abc ,_gged float64 ){_fbcgf :=_gdd .establishSubpath ();
|
||
_cdeg :=_gdd .devicePoint (_abc ,_gged );if _fbcgf ==nil {_gdd ._dcgc =true ;_gdd ._fcaf =_cdeg ;}else {_fbcgf .add (_cdeg );};};func (_dgefd *textPara )writeText (_adfdc _gc .Writer ){if _dgefd ._affa ==nil {_dgefd .writeCellText (_adfdc );return ;};for _faed :=0;
|
||
_faed < _dgefd ._affa ._fcedd ;_faed ++{for _eaae :=0;_eaae < _dgefd ._affa ._bgdee ;_eaae ++{_eee :=_dgefd ._affa .get (_eaae ,_faed );if _eee ==nil {_adfdc .Write ([]byte ("\u0009"));}else {_eee .writeCellText (_adfdc );};_adfdc .Write ([]byte ("\u0020"));
|
||
};if _faed < _dgefd ._affa ._fcedd -1{_adfdc .Write ([]byte ("\u000a"));};};};func _fdacf (_efbg _bab .Matrix )_bab .Point {_cddc ,_abg :=_efbg .Translation ();return _bab .Point {X :_cddc ,Y :_abg };};func (_fbae *textObject )getFillColor ()_ba .Color {return _ggce (_fbae ._fdf .ColorspaceNonStroking ,_fbae ._fdf .ColorNonStroking );
|
||
};
|
||
|
||
// Options extractor options.
|
||
type Options struct{
|
||
|
||
// ApplyCropBox will extract page text based on page cropbox if set to `true`.
|
||
ApplyCropBox bool ;};const _ccfc =20;func _dddb (_bceg *wordBag ,_bcaf *textWord ,_fac float64 )bool {return _bceg .Urx <=_bcaf .Llx &&_bcaf .Llx < _bceg .Urx +_fac ;};func (_gefec rectRuling )asRuling ()(*ruling ,bool ){_bgdcf :=ruling {_fabfb :_gefec ._fgdfae ,Color :_gefec .Color ,_fcgb :_gfefd };
|
||
switch _gefec ._fgdfae {case _bcaef :_bgdcf ._efbdg =0.5*(_gefec .Llx +_gefec .Urx );_bgdcf ._becdd =_gefec .Lly ;_bgdcf ._aggb =_gefec .Ury ;_accb ,_ggbb :=_gefec .checkWidth (_gefec .Llx ,_gefec .Urx );if !_ggbb {if _agca {_d .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_gefec );
|
||
};return nil ,false ;};_bgdcf ._gebag =_accb ;case _gegc :_bgdcf ._efbdg =0.5*(_gefec .Lly +_gefec .Ury );_bgdcf ._becdd =_gefec .Llx ;_bgdcf ._aggb =_gefec .Urx ;_abdce ,_gdbg :=_gefec .checkWidth (_gefec .Lly ,_gefec .Ury );if !_gdbg {if _agca {_d .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_gefec );
|
||
};return nil ,false ;};_bgdcf ._gebag =_abdce ;default:_d .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_gefec ._fgdfae );return nil ,false ;};return &_bgdcf ,true ;};func (_dafed rulingList )primMinMax ()(float64 ,float64 ){_aggde ,_gfgg :=_dafed [0]._efbdg ,_dafed [0]._efbdg ;
|
||
for _ ,_edgd :=range _dafed [1:]{if _edgd ._efbdg < _aggde {_aggde =_edgd ._efbdg ;}else if _edgd ._efbdg > _gfgg {_gfgg =_edgd ._efbdg ;};};return _aggde ,_gfgg ;};func (_fgdfa *wordBag )scanBand (_gaa string ,_caae *wordBag ,_ccdd func (_dagb *wordBag ,_adcbd *textWord )bool ,_bgge ,_fcfc ,_cfea float64 ,_cfbg ,_abab bool )int {_ebde :=_caae ._gffd ;
|
||
var _dgcg map[int ]map[*textWord ]struct{};if !_cfbg {_dgcg =_fgdfa .makeRemovals ();};_ebb :=_gdgc *_ebde ;_egecc :=0;for _ ,_bcae :=range _fgdfa .depthBand (_bgge -_ebb ,_fcfc +_ebb ){if len (_fgdfa ._adcb [_bcae ])==0{continue ;};for _ ,_bbcd :=range _fgdfa ._adcb [_bcae ]{if !(_bgge -_ebb <=_bbcd ._cffg &&_bbcd ._cffg <=_fcfc +_ebb ){continue ;
|
||
};if !_ccdd (_caae ,_bbcd ){continue ;};_ddg :=2.0*_ca .Abs (_bbcd ._debab -_caae ._gffd )/(_bbcd ._debab +_caae ._gffd );_cda :=_ca .Max (_bbcd ._debab /_caae ._gffd ,_caae ._gffd /_bbcd ._debab );_afce :=_ca .Min (_ddg ,_cda );if _cfea > 0&&_afce > _cfea {continue ;
|
||
};if _caae .blocked (_bbcd ){continue ;};if !_cfbg {_caae .pullWord (_bbcd ,_bcae ,_dgcg );};_egecc ++;if !_abab {if _bbcd ._cffg < _bgge {_bgge =_bbcd ._cffg ;};if _bbcd ._cffg > _fcfc {_fcfc =_bbcd ._cffg ;};};if _cfbg {break ;};};};if !_cfbg {_fgdfa .applyRemovals (_dgcg );
|
||
};return _egecc ;};func _abdec (_dagba ,_cecd int )uint64 {return uint64 (_dagba )*0x1000000+uint64 (_cecd )};func _bgeae (_ggddg _dc .PdfRectangle )rulingKind {_fegd :=_ggddg .Width ();_cdbb :=_ggddg .Height ();if _fegd > _cdbb {if _fegd >=_dagaf {return _gegc ;
|
||
};}else {if _cdbb >=_dagaf {return _bcaef ;};};return _aebb ;};func _ggdge (_gcaea _dc .PdfRectangle )*ruling {return &ruling {_fabfb :_gegc ,_efbdg :_gcaea .Lly ,_becdd :_gcaea .Llx ,_aggb :_gcaea .Urx };};func _egag (_fffc []TextMark ,_aeeg *int ,_fddc string )[]TextMark {_fgef :=_aefa ;
|
||
_fgef .Text =_fddc ;return _fgbc (_fffc ,_aeeg ,_fgef );};func (_efcfg *ruling )intersects (_dega *ruling )bool {_fdbef :=(_efcfg ._fabfb ==_bcaef &&_dega ._fabfb ==_gegc )||(_dega ._fabfb ==_bcaef &&_efcfg ._fabfb ==_gegc );_adefa :=func (_baac ,_cddcce *ruling )bool {return _baac ._becdd -_aecg <=_cddcce ._efbdg &&_cddcce ._efbdg <=_baac ._aggb +_aecg ;
|
||
};_cccfg :=_adefa (_efcfg ,_dega );_gcgd :=_adefa (_dega ,_efcfg );if _cage {_caa .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_fdbef ,_cccfg ,_gcgd ,_fdbef &&_cccfg &&_gcgd ,_efcfg ,_dega );
|
||
};return _fdbef &&_cccfg &&_gcgd ;};func _fcce (_daaab map[float64 ]gridTile )[]float64 {_fbab :=make ([]float64 ,0,len (_daaab ));for _bgga :=range _daaab {_fbab =append (_fbab ,_bgga );};_e .Float64s (_fbab );return _fbab ;};func (_ffec lineRuling )asRuling ()(*ruling ,bool ){_fagf :=ruling {_fabfb :_ffec ._gfdf ,Color :_ffec .Color ,_fcgb :_bagd };
|
||
switch _ffec ._gfdf {case _bcaef :_fagf ._efbdg =_ffec .xMean ();_fagf ._becdd =_ca .Min (_ffec ._becgc .Y ,_ffec ._facd .Y );_fagf ._aggb =_ca .Max (_ffec ._becgc .Y ,_ffec ._facd .Y );case _gegc :_fagf ._efbdg =_ffec .yMean ();_fagf ._becdd =_ca .Min (_ffec ._becgc .X ,_ffec ._facd .X );
|
||
_fagf ._aggb =_ca .Max (_ffec ._becgc .X ,_ffec ._facd .X );default:_d .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_ffec ._gfdf );return nil ,false ;};return &_fagf ,true ;};
|
||
|
||
// NewFromContents creates a new extractor from contents and page resources.
|
||
func NewFromContents (contents string ,resources *_dc .PdfPageResources )(*Extractor ,error ){const _ee ="\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s";_aafa :=&Extractor {_aaf :contents ,_gdc :resources ,_ab :map[string ]fontEntry {},_bf :map[string ]textResult {}};
|
||
_gd .TrackUse (_ee );return _aafa ,nil ;};func _gdgfd (_gffdf []*textMark ,_afaf _dc .PdfRectangle )[]*textWord {var _bbece []*textWord ;var _fdag *textWord ;if _agb {_d .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_gffdf ));
|
||
};_eeaad :=func (){if _fdag !=nil {_fbed :=_fdag .computeText ();if !_fcgba (_fbed ){_fdag ._bgdg =_fbed ;_bbece =append (_bbece ,_fdag );if _agb {_d .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_bbece )-1,_fdag .String ());
|
||
for _bggb ,_cfdc :=range _fdag ._gbaed {_caa .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bggb ,_cfdc .String ());};};};_fdag =nil ;};};for _ ,_afgge :=range _gffdf {if _fdfd &&_fdag !=nil &&len (_fdag ._gbaed )> 0{_ecfb :=_fdag ._gbaed [len (_fdag ._gbaed )-1];
|
||
_cbgb ,_cadd :=_acda (_afgge ._cbge );_ebaec ,_ffbfd :=_acda (_ecfb ._cbge );if _cadd &&!_ffbfd &&_ecfb .inDiacriticArea (_afgge ){_fdag .addDiacritic (_cbgb );continue ;};if _ffbfd &&!_cadd &&_afgge .inDiacriticArea (_ecfb ){_fdag ._gbaed =_fdag ._gbaed [:len (_fdag ._gbaed )-1];
|
||
_fdag .appendMark (_afgge ,_afaf );_fdag .addDiacritic (_ebaec );continue ;};};_eaad :=_fcgba (_afgge ._cbge );if _eaad {_eeaad ();continue ;};if _fdag ==nil &&!_eaad {_fdag =_dggd ([]*textMark {_afgge },_afaf );continue ;};_ccda :=_fdag ._debab ;_bddcf :=_ca .Abs (_bgag (_afaf ,_afgge )-_fdag ._cffg )/_ccda ;
|
||
_cfdce :=_ggee (_afgge ,_fdag )/_ccda ;if _cfdce >=_gaac ||!(-_aabe <=_cfdce &&_bddcf <=_gfbbb ){_eeaad ();_fdag =_dggd ([]*textMark {_afgge },_afaf );continue ;};_fdag .appendMark (_afgge ,_afaf );};_eeaad ();return _bbece ;};var (_adabg =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};
|
||
);func (_gabf rulingList )asTiling ()gridTiling {if _dgfd {_d .Log .Info ("r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_gabf ));
|
||
};for _ddba ,_eacf :=range _gabf [1:]{_bbba :=_gabf [_ddba ];if _bbba .alignsPrimary (_eacf )&&_bbba .alignsSec (_eacf ){_d .Log .Error ("a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073",_eacf ,_bbba );
|
||
};};_gabf .sortStrict ();_gabf .log ("\u0073n\u0061\u0070\u0070\u0065\u0064");_dcfbf ,_bcge :=_gabf .vertsHorzs ();_fbce :=_dcfbf .primaries ();_egacf :=_bcge .primaries ();_gebb :=len (_fbce )-1;_eebg :=len (_egacf )-1;if _gebb ==0||_eebg ==0{return gridTiling {};
|
||
};_fdca :=_dc .PdfRectangle {Llx :_fbce [0],Urx :_fbce [_gebb ],Lly :_egacf [0],Ury :_egacf [_eebg ]};if _dgfd {_d .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064",len (_dcfbf ));
|
||
for _gfbd ,_fagb :=range _dcfbf {_caa .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gfbd ,_fagb );};_d .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064",len (_bcge ));
|
||
for _cffbc ,_gcdee :=range _bcge {_caa .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cffbc ,_gcdee );};_d .Log .Info ("\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f",_gebb ,_eebg ,_fbce ,_egacf );
|
||
};_ggdc :=make ([]gridTile ,_gebb *_eebg );for _ggff :=_eebg -1;_ggff >=0;_ggff --{_gdbd :=_egacf [_ggff ];_eaddg :=_egacf [_ggff +1];for _cagdb :=0;_cagdb < _gebb ;_cagdb ++{_edaa :=_fbce [_cagdb ];_abgf :=_fbce [_cagdb +1];_ddga :=_dcfbf .findPrimSec (_edaa ,_gdbd );
|
||
_fgcg :=_dcfbf .findPrimSec (_abgf ,_gdbd );_dgaa :=_bcge .findPrimSec (_gdbd ,_edaa );_gggbd :=_bcge .findPrimSec (_eaddg ,_edaa );_gacd :=_dc .PdfRectangle {Llx :_edaa ,Urx :_abgf ,Lly :_gdbd ,Ury :_eaddg };_baad :=_bceb (_gacd ,_ddga ,_fgcg ,_dgaa ,_gggbd );
|
||
_ggdc [_ggff *_gebb +_cagdb ]=_baad ;if _dgfd {_caa .Printf ("\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_cagdb ,_ggff ,_baad .String (),_baad .Width (),_baad .Height ());
|
||
};};};if _dgfd {_d .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_fdca );
|
||
};_bdf :=make ([]map[float64 ]gridTile ,_eebg );for _geffb :=_eebg -1;_geffb >=0;_geffb --{if _dgfd {_caa .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_geffb );};_bdf [_geffb ]=make (map[float64 ]gridTile ,_gebb );for _gddc :=0;_gddc < _gebb ;
|
||
_gddc ++{_eefeb :=_ggdc [_geffb *_gebb +_gddc ];if _dgfd {_caa .Printf ("\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gddc ,_eefeb );};if !_eefeb ._gcfbe {continue ;};_gbdc :=_gddc ;for _fccd :=_gddc +1;!_eefeb ._ebbb &&_fccd < _gebb ;
|
||
_fccd ++{_cadg :=_ggdc [_geffb *_gebb +_fccd ];_eefeb .Urx =_cadg .Urx ;_eefeb ._faaa =_eefeb ._faaa ||_cadg ._faaa ;_eefeb ._ecbgb =_eefeb ._ecbgb ||_cadg ._ecbgb ;_eefeb ._ebbb =_cadg ._ebbb ;if _dgfd {_caa .Printf ("\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a",_fccd ,_cadg ,_eefeb );
|
||
};_gbdc =_fccd ;};if _dgfd {_caa .Printf (" \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n",_gddc ,_gbdc ,_eefeb );};_gddc =_gbdc ;_bdf [_geffb ][_eefeb .Llx ]=_eefeb ;};};_ddebc :=make (map[float64 ]map[float64 ]gridTile ,_eebg );
|
||
_ffaca :=make (map[float64 ]map[float64 ]struct{},_eebg );for _gecccb :=_eebg -1;_gecccb >=0;_gecccb --{_adgbf :=_ggdc [_gecccb *_gebb ].Lly ;_ddebc [_adgbf ]=make (map[float64 ]gridTile ,_gebb );_ffaca [_adgbf ]=make (map[float64 ]struct{},_gebb );};if _dgfd {_d .Log .Info ("\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_fdca );
|
||
};for _ebec :=_eebg -1;_ebec >=0;_ebec --{_gcfdf :=_ggdc [_ebec *_gebb ].Lly ;_cgac :=_bdf [_ebec ];if _dgfd {_caa .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_ebec );};for _ ,_cacd :=range _fcce (_cgac ){if _ ,_cgec :=_ffaca [_gcfdf ][_cacd ];
|
||
_cgec {continue ;};_eabbd :=_cgac [_cacd ];if _dgfd {_caa .Printf (" \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_eabbd .String ());};for _dabe :=_ebec -1;_dabe >=0;_dabe --{if _eabbd ._ecbgb {break ;};_bgeag :=_bdf [_dabe ];_aeee ,_bbedb :=_bgeag [_cacd ];
|
||
if !_bbedb {break ;};if _aeee .Urx !=_eabbd .Urx {break ;};_eabbd ._ecbgb =_aeee ._ecbgb ;_eabbd .Lly =_aeee .Lly ;if _dgfd {_caa .Printf ("\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_aeee .String (),_eabbd .String ());
|
||
};_ffaca [_aeee .Lly ][_aeee .Llx ]=struct{}{};};if _ebec ==0{_eabbd ._ecbgb =true ;};if _eabbd .complete (){_ddebc [_gcfdf ][_cacd ]=_eabbd ;};};};_acdb :=gridTiling {PdfRectangle :_fdca ,_gggb :_bdgf (_ddebc ),_gceb :_ebbda (_ddebc ),_begc :_ddebc };
|
||
_acdb .log ("\u0043r\u0065\u0061\u0074\u0065\u0064");return _acdb ;};func _aeb (_fgfg ,_becga bounded )float64 {_gdbba :=_deba (_fgfg ,_becga );if !_edeg (_gdbba ){return _gdbba ;};return _aaed (_fgfg ,_becga );};func _efca (_gebd ,_fabda float64 )bool {return _ca .Abs (_gebd -_fabda )<=_aecg };
|
||
type fontEntry struct{_bcab *_dc .PdfFont ;_dcgaa int64 ;};func (_bgf *Extractor )extractPageText (_bggf string ,_db *_dc .PdfPageResources ,_fec _bab .Matrix ,_gede int )(*PageText ,int ,int ,error ){_d .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_gede );
|
||
_dace :=&PageText {_fdbe :_bgf ._cbb };_abe :=_bea (_bgf ._cbb );var _dff stateStack ;_fcg :=_fbag (_bgf ,_db ,_gcf .GraphicsState {},&_abe ,&_dff );_bbfg :=shapesState {_gcbe :_fec ,_eabb :_bab .IdentityMatrix (),_degb :_fcg };var _fga bool ;if _gede > _ccfc {_dgg :=_g .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");
|
||
_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_gede ,_dgg );
|
||
return _dace ,_abe ._efde ,_abe ._cdc ,_dgg ;};_ccfb :=_gcf .NewContentStreamParser (_bggf );_fage ,_ead :=_ccfb .Parse ();if _ead !=nil {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ead );
|
||
return _dace ,_abe ._efde ,_abe ._cdc ,_ead ;};_dace ._ced =_fage ;_eba :=_gcf .NewContentStreamProcessor (*_fage );_eba .AddHandler (_gcf .HandlerConditionEnumAllOperands ,"",func (_bgfg *_gcf .ContentStreamOperation ,_ege _gcf .GraphicsState ,_aea *_dc .PdfPageResources )error {_edd :=_bgfg .Operand ;
|
||
if _dccf {_d .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_bgfg );};switch _edd {case "\u0071":if _fgeac {_d .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_bbfg ._eabb );};_dff .push (&_abe );case "\u0051":if !_dff .empty (){_abe =*_dff .pop ();
|
||
};_bbfg ._eabb =_ege .CTM ;if _fgeac {_d .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_bbfg ._eabb );};case "\u0042\u0054":if _fga {_d .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
|
||
_dace ._gfc =append (_dace ._gfc ,_fcg ._dcfd ...);};_fga =true ;_dcfb :=_ege ;_dcfb .CTM =_fec .Mult (_dcfb .CTM );_fcg =_fbag (_bgf ,_aea ,_dcfb ,&_abe ,&_dff );_bbfg ._degb =_fcg ;case "\u0045\u0054":if !_fga {_d .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
|
||
};_fga =false ;_dace ._gfc =append (_dace ._gfc ,_fcg ._dcfd ...);_fcg .reset ();case "\u0054\u002a":_fcg .nextLine ();case "\u0054\u0064":if _dgf ,_gee :=_fcg .checkOp (_bgfg ,2,true );!_dgf {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gee );
|
||
return _gee ;};_cfc ,_ade ,_bbe :=_fgac (_bgfg .Params );if _bbe !=nil {return _bbe ;};_fcg .moveText (_cfc ,_ade );case "\u0054\u0044":if _cac ,_ageb :=_fcg .checkOp (_bgfg ,2,true );!_cac {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ageb );
|
||
return _ageb ;};_egfb ,_efd ,_ffe :=_fgac (_bgfg .Params );if _ffe !=nil {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ffe );return _ffe ;};_fcg .moveTextSetLeading (_egfb ,_efd );case "\u0054\u006a":if _ffae ,_abdb :=_fcg .checkOp (_bgfg ,1,true );
|
||
!_ffae {_d .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_bgfg ,_abdb );return _abdb ;};_ede :=_aa .TraceToDirectObject (_bgfg .Params [0]);_cbd ,_dbd :=_aa .GetStringBytes (_ede );
|
||
if !_dbd {_d .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_bgfg );return _aa .ErrTypeError ;
|
||
};return _fcg .showText (_ede ,_cbd );case "\u0054\u004a":if _dacf ,_bfd :=_fcg .checkOp (_bgfg ,1,true );!_dacf {_d .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bfd );return _bfd ;};_aef ,_gfad :=_aa .GetArray (_bgfg .Params [0]);
|
||
if !_gfad {_d .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_bgfg );return _ead ;};return _fcg .showTextAdjusted (_aef );
|
||
case "\u0027":if _bfb ,_egeg :=_fcg .checkOp (_bgfg ,1,true );!_bfb {_d .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_egeg );return _egeg ;};_agf :=_aa .TraceToDirectObject (_bgfg .Params [0]);_cdb ,_egg :=_aa .GetStringBytes (_agf );
|
||
if !_egg {_d .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_bgfg );return _aa .ErrTypeError ;};_fcg .nextLine ();return _fcg .showText (_agf ,_cdb );
|
||
case "\u0022":if _dbb ,_ffd :=_fcg .checkOp (_bgfg ,3,true );!_dbb {_d .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ffd );return _ffd ;};_adgb ,_bcc ,_egdg :=_fgac (_bgfg .Params [:2]);if _egdg !=nil {return _egdg ;
|
||
};_feca :=_aa .TraceToDirectObject (_bgfg .Params [2]);_gff ,_egb :=_aa .GetStringBytes (_feca );if !_egb {_d .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_bgfg );
|
||
return _aa .ErrTypeError ;};_fcg .setCharSpacing (_adgb );_fcg .setWordSpacing (_bcc );_fcg .nextLine ();return _fcg .showText (_feca ,_gff );case "\u0054\u004c":_ggfc ,_afd :=_bdb (_bgfg );if _afd !=nil {_d .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_afd );
|
||
return _afd ;};_fcg .setTextLeading (_ggfc );case "\u0054\u0063":_bbbc ,_gfb :=_bdb (_bgfg );if _gfb !=nil {_d .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gfb );return _gfb ;};_fcg .setCharSpacing (_bbbc );
|
||
case "\u0054\u0066":if _fggd ,_ebag :=_fcg .checkOp (_bgfg ,2,true );!_fggd {_d .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ebag );return _ebag ;};_cec ,_fageb :=_aa .GetNameVal (_bgfg .Params [0]);
|
||
if !_fageb {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_bgfg );return _aa .ErrTypeError ;};_ggd ,_ffff :=_aa .GetNumberAsFloat (_bgfg .Params [1]);
|
||
if !_fageb {_d .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bgfg ,_ffff );
|
||
return _ffff ;};_ffff =_fcg .setFont (_cec ,_ggd );_fcg ._dcg =_af .Is (_ffff ,_aa .ErrNotSupported );if _ffff !=nil &&!_fcg ._dcg {return _ffff ;};case "\u0054\u006d":if _dag ,_fed :=_fcg .checkOp (_bgfg ,6,true );!_dag {_d .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fed );
|
||
return _fed ;};_aad ,_abdc :=_aa .GetNumbersAsFloat (_bgfg .Params );if _abdc !=nil {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_abdc );return _abdc ;};_fcg .setTextMatrix (_aad );case "\u0054\u0072":if _ecd ,_gfg :=_fcg .checkOp (_bgfg ,1,true );
|
||
!_ecd {_d .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gfg );return _gfg ;};_afcc ,_baa :=_aa .GetIntVal (_bgfg .Params [0]);if !_baa {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_bgfg );
|
||
return _aa .ErrTypeError ;};_fcg .setTextRenderMode (_afcc );case "\u0054\u0073":if _feb ,_cfe :=_fcg .checkOp (_bgfg ,1,true );!_feb {_d .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cfe );return _cfe ;
|
||
};_deg ,_ace :=_aa .GetNumberAsFloat (_bgfg .Params [0]);if _ace !=nil {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ace );return _ace ;};_fcg .setTextRise (_deg );case "\u0054\u0077":if _eaf ,_daa :=_fcg .checkOp (_bgfg ,1,true );
|
||
!_eaf {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_daa );return _daa ;};_afg ,_ebc :=_aa .GetNumberAsFloat (_bgfg .Params [0]);if _ebc !=nil {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ebc );
|
||
return _ebc ;};_fcg .setWordSpacing (_afg );case "\u0054\u007a":if _bbed ,_egea :=_fcg .checkOp (_bgfg ,1,true );!_bbed {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_egea );return _egea ;};_dbg ,_gcfa :=_aa .GetNumberAsFloat (_bgfg .Params [0]);
|
||
if _gcfa !=nil {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gcfa );return _gcfa ;};_fcg .setHorizScaling (_dbg );case "\u0063\u006d":_bbfg ._eabb =_ege .CTM ;if _bbfg ._eabb .Singular (){_ddc :=_bab .IdentityMatrix ().Translate (_bbfg ._eabb .Translation ());
|
||
_d .Log .Debug ("S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s",_bbfg ._eabb ,_ddc );_bbfg ._eabb =_ddc ;};if _fgeac {_d .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_bbfg ._eabb );};case "\u006d":if len (_bgfg .Params )!=2{_d .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_cb );
|
||
return nil ;};_dgb ,_ebcc :=_aa .GetNumbersAsFloat (_bgfg .Params );if _ebcc !=nil {return _ebcc ;};_bbfg .moveTo (_dgb [0],_dgb [1]);case "\u006c":if len (_bgfg .Params )!=2{_d .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_cb );
|
||
return nil ;};_ecb ,_gbf :=_aa .GetNumbersAsFloat (_bgfg .Params );if _gbf !=nil {return _gbf ;};_bbfg .lineTo (_ecb [0],_ecb [1]);case "\u0063":if len (_bgfg .Params )!=6{return _cb ;};_ece ,_bafb :=_aa .GetNumbersAsFloat (_bgfg .Params );if _bafb !=nil {return _bafb ;
|
||
};_d .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_ece );_bbfg .cubicTo (_ece [0],_ece [1],_ece [2],_ece [3],_ece [4],_ece [5]);case "\u0076","\u0079":if len (_bgfg .Params )!=4{return _cb ;
|
||
};_cef ,_bef :=_aa .GetNumbersAsFloat (_bgfg .Params );if _bef !=nil {return _bef ;};_d .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_cef );_bbfg .quadraticTo (_cef [0],_cef [1],_cef [2],_cef [3]);
|
||
case "\u0068":_bbfg .closePath ();case "\u0072\u0065":if len (_bgfg .Params )!=4{return _cb ;};_eec ,_fbdg :=_aa .GetNumbersAsFloat (_bgfg .Params );if _fbdg !=nil {return _fbdg ;};_bbfg .drawRectangle (_eec [0],_eec [1],_eec [2],_eec [3]);_bbfg .closePath ();
|
||
case "\u0053":_bbfg .stroke (&_dace ._ggeb );_bbfg .clearPath ();case "\u0073":_bbfg .closePath ();_bbfg .stroke (&_dace ._ggeb );_bbfg .clearPath ();case "\u0046":_bbfg .fill (&_dace ._agc );_bbfg .clearPath ();case "\u0066","\u0066\u002a":_bbfg .closePath ();
|
||
_bbfg .fill (&_dace ._agc );_bbfg .clearPath ();case "\u0042","\u0042\u002a":_bbfg .fill (&_dace ._agc );_bbfg .stroke (&_dace ._ggeb );_bbfg .clearPath ();case "\u0062","\u0062\u002a":_bbfg .closePath ();_bbfg .fill (&_dace ._agc );_bbfg .stroke (&_dace ._ggeb );
|
||
_bbfg .clearPath ();case "\u006e":_bbfg .clearPath ();case "\u0044\u006f":if len (_bgfg .Params )==0{_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_bgfg .Params );
|
||
return _aa .ErrRangeError ;};_ecef ,_becg :=_aa .GetName (_bgfg .Params [0]);if !_becg {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_bgfg .Params [0]);
|
||
return _aa .ErrTypeError ;};_ ,_ga :=_aea .GetXObjectByName (*_ecef );if _ga !=_dc .XObjectTypeForm {break ;};_geg ,_becg :=_bgf ._bf [_ecef .String ()];if !_becg {_gdg ,_eca :=_aea .GetXObjectFormByName (*_ecef );if _eca !=nil {_d .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_eca );
|
||
return _eca ;};_ded ,_eca :=_gdg .GetContentStream ();if _eca !=nil {_d .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_eca );return _eca ;};_daaf :=_gdg .Resources ;if _daaf ==nil {_daaf =_aea ;};_fce ,_cff ,_fad ,_eca :=_bgf .extractPageText (string (_ded ),_daaf ,_fec .Mult (_ege .CTM ),_gede +1);
|
||
if _eca !=nil {_d .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_eca );return _eca ;};_geg =textResult {*_fce ,_cff ,_fad };_bgf ._bf [_ecef .String ()]=_geg ;};_bbfg ._eabb =_ege .CTM ;if _fgeac {_d .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_bbfg ._eabb );
|
||
};_dace ._gfc =append (_dace ._gfc ,_geg ._fbf ._gfc ...);_dace ._ggeb =append (_dace ._ggeb ,_geg ._fbf ._ggeb ...);_dace ._agc =append (_dace ._agc ,_geg ._fbf ._agc ...);_abe ._efde +=_geg ._eab ;_abe ._cdc +=_geg ._fadb ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_fcg ._fdf .ColorspaceNonStroking =_ege .ColorspaceNonStroking ;
|
||
_fcg ._fdf .ColorNonStroking =_ege .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_fcg ._fdf .ColorspaceStroking =_ege .ColorspaceStroking ;_fcg ._fdf .ColorStroking =_ege .ColorStroking ;};return nil ;
|
||
});_ead =_eba .Process (_db );return _dace ,_abe ._efde ,_abe ._cdc ,_ead ;};func _dggg (_bed ,_bfgb _dc .PdfRectangle )bool {return _dee (_bed ,_bfgb )&&_fdga (_bed ,_bfgb )};
|
||
|
||
// PageFonts represents extracted fonts on a PDF page.
|
||
type PageFonts struct{Fonts []Font ;};
|
||
|
||
// Text returns the extracted page text.
|
||
func (_abbb PageText )Text ()string {return _abbb ._dbdg };func _bea (_cgaa _dc .PdfRectangle )textState {return textState {_abbd :100,_efbd :RenderModeFill ,_eegd :_cgaa };};type gridTile struct{_dc .PdfRectangle ;_faaa ,_gcfbe ,_ecbgb ,_ebbb bool ;};
|
||
func _dcfc (_bgcfe _dc .PdfRectangle ,_gcfdg []*textLine )*textPara {return &textPara {PdfRectangle :_bgcfe ,_ddaf :_gcfdg };};type rulingList []*ruling ;func (_ccaf paraList )llyRange (_ebae []int ,_fccg ,_fcdb float64 )[]int {_aag :=len (_ccaf );if _fcdb < _ccaf [_ebae [0]].Lly ||_fccg > _ccaf [_ebae [_aag -1]].Lly {return nil ;
|
||
};_ecca :=_e .Search (_aag ,func (_dbcc int )bool {return _ccaf [_ebae [_dbcc ]].Lly >=_fccg });_bdgb :=_e .Search (_aag ,func (_aefc int )bool {return _ccaf [_ebae [_aefc ]].Lly > _fcdb });return _ebae [_ecca :_bdgb ];};func _afccc (_egcd *textWord ,_cgd float64 ,_abgg ,_fea rulingList )*wordBag {_abgd :=_ebcf (_egcd ._cffg );
|
||
_becd :=[]*textWord {_egcd };_ecdc :=wordBag {_adcb :map[int ][]*textWord {_abgd :_becd },PdfRectangle :_egcd .PdfRectangle ,_gffd :_egcd ._debab ,_deae :_cgd ,_bbce :_abgg ,_dce :_fea };return &_ecdc ;};func _addda (_fddaa ,_egac _bab .Point )bool {_fbfa :=_ca .Abs (_fddaa .X -_egac .X );
|
||
_degf :=_ca .Abs (_fddaa .Y -_egac .Y );return _aged (_fbfa ,_degf );};func (_adec rulingList )removeDuplicates ()rulingList {if len (_adec )==0{return nil ;};_adec .sort ();_bbda :=rulingList {_adec [0]};for _ ,_aefef :=range _adec [1:]{if _aefef .equals (_bbda [len (_bbda )-1]){continue ;
|
||
};_bbda =append (_bbda ,_aefef );};return _bbda ;};func (_dfae paraList )computeEBBoxes (){if _aedg {_d .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a");};for _ ,_cbae :=range _dfae {_cbae ._aecfg =_cbae .PdfRectangle ;
|
||
};_dcbb :=_dfae .yNeighbours (0);for _gfag ,_aegf :=range _dfae {_ccg :=_aegf ._aecfg ;_fbdga ,_aaea :=-1.0e9,+1.0e9;for _ ,_dbbge :=range _dcbb [_aegf ]{_adde :=_dfae [_dbbge ]._aecfg ;if _adde .Urx < _ccg .Llx {_fbdga =_ca .Max (_fbdga ,_adde .Urx );
|
||
}else if _ccg .Urx < _adde .Llx {_aaea =_ca .Min (_aaea ,_adde .Llx );};};for _dbfg ,_cbba :=range _dfae {_dgae :=_cbba ._aecfg ;if _gfag ==_dbfg ||_dgae .Ury > _ccg .Lly {continue ;};if _fbdga <=_dgae .Llx &&_dgae .Llx < _ccg .Llx {_ccg .Llx =_dgae .Llx ;
|
||
}else if _dgae .Urx <=_aaea &&_ccg .Urx < _dgae .Urx {_ccg .Urx =_dgae .Urx ;};};if _aedg {_caa .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_gfag ,_aegf ._aecfg ,_ccg ,_ebfce (_aegf .text (),50));
|
||
};_aegf ._aecfg =_ccg ;};if _gce {for _ ,_ffeae :=range _dfae {_ffeae .PdfRectangle =_ffeae ._aecfg ;};};};func (_addb *textPara )taken ()bool {return _addb ==nil ||_addb ._bgcb };func (_bbeaa gridTiling )log (_adbbce string ){if !_dgfd {return ;};_d .Log .Info ("\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071",len (_bbeaa ._gggb ),len (_bbeaa ._gceb ),_adbbce );
|
||
_caa .Printf ("\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a",_bbeaa ._gggb );_caa .Printf ("\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a",_bbeaa ._gceb );for _cece ,_fbdb :=range _bbeaa ._gceb {_abdeb ,_agde :=_bbeaa ._begc [_fbdb ];
|
||
if !_agde {continue ;};_caa .Printf ("%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_cece ,_fbdb );for _cbda ,_aaga :=range _bbeaa ._gggb {_fddf ,_gfbbg :=_abdeb [_aaga ];if !_gfbbg {continue ;};_caa .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_cbda ,_fddf .String ());
|
||
};};};func _bceb (_bfbe _dc .PdfRectangle ,_acff ,_cbdb ,_dfcge ,_aafga *ruling )gridTile {_ecbbe :=_bfbe .Llx ;_bdbf :=_bfbe .Urx ;_aaeae :=_bfbe .Lly ;_dcccf :=_bfbe .Ury ;return gridTile {PdfRectangle :_bfbe ,_gcfbe :_acff !=nil &&_acff .encloses (_aaeae ,_dcccf ),_ebbb :_cbdb !=nil &&_cbdb .encloses (_aaeae ,_dcccf ),_ecbgb :_dfcge !=nil &&_dfcge .encloses (_ecbbe ,_bdbf ),_faaa :_aafga !=nil &&_aafga .encloses (_ecbbe ,_bdbf )};
|
||
};func (_eadd *textObject )checkOp (_gef *_gcf .ContentStreamOperation ,_afdb int ,_eceg bool )(_bfbc bool ,_gfff error ){if _eadd ==nil {var _bgc []_aa .PdfObject ;if _afdb > 0{_bgc =_gef .Params ;if len (_bgc )> _afdb {_bgc =_bgc [:_afdb ];};};_d .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_gef .Operand ,_bgc );
|
||
};if _afdb >=0{if len (_gef .Params )!=_afdb {if _eceg {_gfff =_g .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");};_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_gef .Operand ,_afdb ,len (_gef .Params ),_gef .Params );
|
||
return false ,_gfff ;};};return true ,nil ;};type textObject struct{_ebe *Extractor ;_eff *_dc .PdfPageResources ;_fdf _gcf .GraphicsState ;_cgf *textState ;_cdbd *stateStack ;_aafd _bab .Matrix ;_acd _bab .Matrix ;_dcfd []*textMark ;_dcg bool ;};
|
||
|
||
// String returns a description of `state`.
|
||
func (_dfg *textState )String ()string {_bbfd :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _dfg ._gcb !=nil {_bbfd =_dfg ._gcb .BaseFont ();};return _caa .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_dfg ._cace ,_dfg ._fgd ,_dfg ._fdd ,_bbfd );
|
||
};
|
||
|
||
// ToTextMark returns the public view of `tm`.
|
||
func (_dafa *textMark )ToTextMark ()TextMark {return TextMark {Text :_dafa ._cbge ,Original :_dafa ._bcabg ,BBox :_dafa ._gde ,Font :_dafa ._eead ,FontSize :_dafa ._beaf ,FillColor :_dafa ._fgeee ,StrokeColor :_dafa ._cab ,Orientation :_dafa ._gcce ,DirectObject :_dafa ._abda ,ObjString :_dafa ._bfga ,Tw :_dafa .Tw ,Th :_dafa .Th ,Tc :_dafa ._bddca ,Index :_dafa ._gffe };
|
||
};func _bgag (_gfee _dc .PdfRectangle ,_degc bounded )float64 {return _gfee .Ury -_degc .bbox ().Lly }; |