2020-08-27 21:45:09 +00:00
|
|
|
|
//
|
|
|
|
|
// Copyright 2020 FoxyUtils ehf. All rights reserved.
|
|
|
|
|
//
|
|
|
|
|
// This is a commercial product and requires a license to operate.
|
|
|
|
|
// A trial license can be obtained at https://unidoc.io
|
|
|
|
|
//
|
|
|
|
|
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
|
|
|
|
|
//
|
|
|
|
|
// Use of this source code is governed by the UniDoc End User License Agreement
|
|
|
|
|
// terms that can be accessed at https://unidoc.io/eula/
|
2018-03-22 14:03:47 +00:00
|
|
|
|
|
2020-08-27 21:45:09 +00:00
|
|
|
|
//
|
|
|
|
|
// Package extractor is used for quickly extracting PDF content through a simple interface.
|
|
|
|
|
// Currently offers functionality for extracting textual content.
|
|
|
|
|
//
|
2021-04-06 22:35:37 +00:00
|
|
|
|
package extractor ;import (_ab "bytes";_g "errors";_ag "fmt";_dc "github.com/unidoc/unipdf/v3/common";_dg "github.com/unidoc/unipdf/v3/contentstream";_be "github.com/unidoc/unipdf/v3/core";_ec "github.com/unidoc/unipdf/v3/internal/license";_bg "github.com/unidoc/unipdf/v3/internal/textencoding";
|
|
|
|
|
_cc "github.com/unidoc/unipdf/v3/internal/transform";_ac "github.com/unidoc/unipdf/v3/model";_gf "golang.org/x/text/unicode/norm";_abf "golang.org/x/xerrors";_gca "image/color";_c "io";_gc "math";_gd "regexp";_e "sort";_a "strings";_b "unicode";_d "unicode/utf8";
|
|
|
|
|
);func (_fbcc *textTable )growTable (){_bfdf :=func (_fegf paraList ){_fbcc ._dege ++;for _eeded :=0;_eeded < _fbcc ._eaed ;_eeded ++{_ebgg :=_fegf [_eeded ];_fbcc .put (_eeded ,_fbcc ._dege -1,_ebgg );};};_gdabf :=func (_dcefcf paraList ){_fbcc ._eaed ++;
|
|
|
|
|
for _eebf :=0;_eebf < _fbcc ._dege ;_eebf ++{_bgbg :=_dcefcf [_eebf ];_fbcc .put (_fbcc ._eaed -1,_eebf ,_bgbg );};};if _edad {_fbcc .log ("\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce");};for _eged :=0;;_eged ++{_ecabb :=false ;_eaaf :=_fbcc .getDown ();
|
|
|
|
|
_ccdde :=_fbcc .getRight ();if _edad {_ag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_eged ,_fbcc );_ag .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a",_eaaf );_ag .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a",_ccdde );
|
|
|
|
|
};if _eaaf !=nil &&_ccdde !=nil {_cafc :=_eaaf [len (_eaaf )-1];if _cafc !=nil &&!_cafc ._baba &&_cafc ==_ccdde [len (_ccdde )-1]{_bfdf (_eaaf );if _ccdde =_fbcc .getRight ();_ccdde !=nil {_gdabf (_ccdde );_fbcc .put (_fbcc ._eaed -1,_fbcc ._dege -1,_cafc );
|
|
|
|
|
};_ecabb =true ;};};if !_ecabb &&_eaaf !=nil {_bfdf (_eaaf );_ecabb =true ;};if !_ecabb &&_ccdde !=nil {_gdabf (_ccdde );_ecabb =true ;};if !_ecabb {break ;};};};func (_caba lineRuling )asRuling ()(*ruling ,bool ){_afd :=ruling {_ddca :_caba ._eddd ,Color :_caba .Color ,_egff :_beaeb };
|
|
|
|
|
switch _caba ._eddd {case _eace :_afd ._dddg =_caba .xMean ();_afd ._daa =_gc .Min (_caba ._cda .Y ,_caba ._aeff .Y );_afd ._defb =_gc .Max (_caba ._cda .Y ,_caba ._aeff .Y );case _gega :_afd ._dddg =_caba .yMean ();_afd ._daa =_gc .Min (_caba ._cda .X ,_caba ._aeff .X );
|
|
|
|
|
_afd ._defb =_gc .Max (_caba ._cda .X ,_caba ._aeff .X );default:_dc .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_caba ._eddd );return nil ,false ;};return &_afd ,true ;};func (_bb *imageExtractContext )extractContentStreamImages (_egd string ,_cb *_ac .PdfPageResources )error {_bcd :=_dg .NewContentStreamParser (_egd );
|
|
|
|
|
_af ,_gb :=_bcd .Parse ();if _gb !=nil {return _gb ;};if _bb ._dge ==nil {_bb ._dge =map[*_be .PdfObjectStream ]*cachedImage {};};if _bb ._gab ==nil {_bb ._gab =&ImageExtractOptions {};};_ge :=_dg .NewContentStreamProcessor (*_af );_ge .AddHandler (_dg .HandlerConditionEnumAllOperands ,"",_bb .processOperand );
|
|
|
|
|
return _ge .Process (_cb );};func _cabga (_bcdf ,_dbdcc float64 )bool {return _bcdf /_gc .Max (_efdga ,_dbdcc )< _eccg };func _dbaa (_ccbg _cc .Point )_cc .Matrix {return _cc .TranslationMatrix (_ccbg .X ,_ccbg .Y )};func (_bffg *subpath )last ()_cc .Point {return _bffg ._effg [len (_bffg ._effg )-1]};
|
|
|
|
|
func (_ffbg paraList )topoOrder ()[]int {if _cdgb {_dc .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");};_gecc :=len (_ffbg );_bgdd :=make ([]bool ,_gecc );_ffac :=make ([]int ,0,_gecc );_gefcg :=_ffbg .llyOrdering ();var _fage func (_gdca int );
|
|
|
|
|
_fage =func (_edfa int ){_bgdd [_edfa ]=true ;for _ddbb :=0;_ddbb < _gecc ;_ddbb ++{if !_bgdd [_ddbb ]{if _ffbg .readBefore (_gefcg ,_edfa ,_ddbb ){_fage (_ddbb );};};};_ffac =append (_ffac ,_edfa );};for _facbc :=0;_facbc < _gecc ;_facbc ++{if !_bgdd [_facbc ]{_fage (_facbc );
|
|
|
|
|
};};return _dffd (_ffac );};func (_aafbe *textPara )writeText (_cggg _c .Writer ){if _aafbe ._eecb ==nil {_aafbe .writeCellText (_cggg );return ;};for _afca :=0;_afca < _aafbe ._eecb ._dege ;_afca ++{for _cedb :=0;_cedb < _aafbe ._eecb ._eaed ;_cedb ++{_eacgc :=_aafbe ._eecb .get (_cedb ,_afca );
|
|
|
|
|
if _eacgc ==nil {_cggg .Write ([]byte ("\u0009"));}else {_eacgc .writeCellText (_cggg );};_cggg .Write ([]byte ("\u0020"));};if _afca < _aafbe ._eecb ._dege -1{_cggg .Write ([]byte ("\u000a"));};};};func _cgda (_gbde map[int ]intSet )[]int {_dabde :=make ([]int ,0,len (_gbde ));
|
|
|
|
|
for _cggc :=range _gbde {_dabde =append (_dabde ,_cggc );};_e .Ints (_dabde );return _dabde ;};func _eabg (_cdbc *wordBag ,_bda *textWord ,_cdcd float64 )bool {return _cdbc .Urx <=_bda .Llx &&_bda .Llx < _cdbc .Urx +_cdcd ;};func (_cccfd rulingList )findPrimSec (_dfdc ,_eag float64 )*ruling {for _ ,_gffcg :=range _cccfd {if _dcbea (_gffcg ._dddg -_dfdc )&&_gffcg ._daa -_eccge <=_eag &&_eag <=_gffcg ._defb +_eccge {return _gffcg ;
|
|
|
|
|
};};return nil ;};func (_aggg *shapesState )moveTo (_fbg ,_egcd float64 ){_aggg ._gbag =true ;_aggg ._adac =_aggg .devicePoint (_fbg ,_egcd );if _dfgc {_dc .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_fbg ,_egcd ,_aggg ._adac );
|
|
|
|
|
};};var _cab =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_gca .White ,StrokeColor :_gca .White };func (_gaee pathSection )bbox ()_ac .PdfRectangle {_deee :=_gaee ._feaa [0]._effg [0];_bdc :=_ac .PdfRectangle {Llx :_deee .X ,Urx :_deee .X ,Lly :_deee .Y ,Ury :_deee .Y };
|
|
|
|
|
_bcg :=func (_bdeg _cc .Point ){if _bdeg .X < _bdc .Llx {_bdc .Llx =_bdeg .X ;}else if _bdeg .X > _bdc .Urx {_bdc .Urx =_bdeg .X ;};if _bdeg .Y < _bdc .Lly {_bdc .Lly =_bdeg .Y ;}else if _bdeg .Y > _bdc .Ury {_bdc .Ury =_bdeg .Y ;};};for _ ,_bbad :=range _gaee ._feaa [0]._effg [1:]{_bcg (_bbad );
|
|
|
|
|
};for _ ,_adbg :=range _gaee ._feaa [1:]{for _ ,_fcb :=range _adbg ._effg {_bcg (_fcb );};};return _bdc ;};func (_cdbd *wordBag )absorb (_fcg *wordBag ){_degf :=_fcg .makeRemovals ();for _fafd ,_bcab :=range _fcg ._bed {for _ ,_dde :=range _bcab {_cdbd .pullWord (_dde ,_fafd ,_degf );
|
|
|
|
|
};};_fcg .applyRemovals (_degf );};func (_eaef rulingList )tidied (_bcaea string )rulingList {_fdbg :=_eaef .removeDuplicates ();_fdbg .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");_ddeg :=_fdbg .snapToGroups ();if _ddeg ==nil {return nil ;};_ddeg .sort ();
|
|
|
|
|
if _ebgeg {_dc .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_bcaea ,len (_eaef ),len (_fdbg ),len (_ddeg ));
|
|
|
|
|
};_ddeg .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _ddeg ;};func (_bec *wordBag )firstReadingIndex (_efaca int )int {_dedc :=_bec .firstWord (_efaca )._fbdg ;_eafa :=float64 (_efaca +1)*_bgca ;_cadc :=_eafa +_baad *_dedc ;_fcd :=_efaca ;
|
|
|
|
|
for _ ,_bcaf :=range _bec .depthBand (_eafa ,_cadc ){if _dace (_bec .firstWord (_bcaf ),_bec .firstWord (_fcd ))< 0{_fcd =_bcaf ;};};return _fcd ;};func _cbbc (_bgcf []*textWord ,_bdcg float64 ,_dcbg ,_baff rulingList )*wordBag {_fedf :=_bgad (_bgcf [0],_bdcg ,_dcbg ,_baff );
|
|
|
|
|
for _ ,_bfdc :=range _bgcf [1:]{_gfaf :=_ceac (_bfdc ._cfba );_fedf ._bed [_gfaf ]=append (_fedf ._bed [_gfaf ],_bfdc );_fedf .PdfRectangle =_def (_fedf .PdfRectangle ,_bfdc .PdfRectangle );};_fedf .sort ();return _fedf ;};func (_debg *textObject )setTextMatrix (_gecg []float64 ){if len (_gecg )!=6{_dc .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_gecg ));
|
|
|
|
|
return ;};_gae ,_bga ,_dcgc ,_eggf ,_efce ,_acda :=_gecg [0],_gecg [1],_gecg [2],_gecg [3],_gecg [4],_gecg [5];_debg ._edfe =_cc .NewMatrix (_gae ,_bga ,_dcgc ,_eggf ,_efce ,_acda );_debg ._dca =_debg ._edfe ;};type intSet map[int ]struct{};func _gfde (_efgb ,_gbae _ac .PdfRectangle )bool {return _gbae .Llx <=_efgb .Urx &&_efgb .Llx <=_gbae .Urx ;
|
|
|
|
|
};func (_beb *textObject )getFillColor ()_gca .Color {return _cgcgg (_beb ._dcb .ColorspaceNonStroking ,_beb ._dcb .ColorNonStroking );};func (_cebe *textPara )isAtom ()*textTable {_eegb :=_cebe ;_fgbfg :=_cebe ._dfbb ;_geccf :=_cebe ._ffaf ;if !(_fgbfg !=nil &&!_fgbfg ._baba &&_geccf !=nil &&!_geccf ._baba ){return nil ;
|
|
|
|
|
};_gfbfc :=_fgbfg ._ffaf ;if !(_gfbfc !=nil &&!_gfbfc ._baba &&_gfbfc ==_geccf ._dfbb ){return nil ;};return _aaace (_eegb ,_fgbfg ,_geccf ,_gfbfc );};func (_acgg *wordBag )getDepthIdx (_accb float64 )int {_ggag :=_acgg .depthIndexes ();_befe :=_ceac (_accb );
|
|
|
|
|
if _befe < _ggag [0]{return _ggag [0];};if _befe > _ggag [len (_ggag )-1]{return _ggag [len (_ggag )-1];};return _befe ;};func (_gefc *stateStack )empty ()bool {return len (*_gefc )==0};func (_bacc rulingList )primaries ()[]float64 {_agde :=make (map[float64 ]struct{},len (_bacc ));
|
|
|
|
|
for _ ,_ecaa :=range _bacc {_agde [_ecaa ._dddg ]=struct{}{};};_dbfbb :=make ([]float64 ,len (_agde ));_befcg :=0;for _bgaec :=range _agde {_dbfbb [_befcg ]=_bgaec ;_befcg ++;};_e .Float64s (_dbfbb );return _dbfbb ;};const _caf =10;func _ddcf (_bbdg string )string {_cfde :=[]rune (_bbdg );
|
|
|
|
|
return string (_cfde [:len (_cfde )-1])};func (_bbfa rulingList )sortStrict (){_e .Slice (_bbfa ,func (_feagf ,_ggac int )bool {_edff ,_cfaf :=_bbfa [_feagf ],_bbfa [_ggac ];_dacd ,_afddd :=_edff ._ddca ,_cfaf ._ddca ;if _dacd !=_afddd {return _dacd > _afddd ;
|
|
|
|
|
};_cace ,_ffef :=_edff ._dddg ,_cfaf ._dddg ;if !_dcbea (_cace -_ffef ){return _cace < _ffef ;};_cace ,_ffef =_edff ._daa ,_cfaf ._daa ;if _cace !=_ffef {return _cace < _ffef ;};return _edff ._defb < _cfaf ._defb ;});};
|
2020-08-27 21:45:09 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// Text returns the extracted page text.
|
|
|
|
|
func (_aee PageText )Text ()string {return _aee ._ebgd };type wordBag struct{_ac .PdfRectangle ;_afb float64 ;_gffc ,_egb rulingList ;_gfgc float64 ;_bed map[int ][]*textWord ;};func (_cedad paraList )merge ()*textPara {_dc .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_cedad ));
|
|
|
|
|
if len (_cedad )==0{return nil ;};_cedad .sortReadingOrder ();_acfg :=_cedad [0].PdfRectangle ;_bcbe :=_cedad [0]._acee ;for _ ,_gbgf :=range _cedad [1:]{_acfg =_def (_acfg ,_gbgf .PdfRectangle );_bcbe =append (_bcbe ,_gbgf ._acee ...);};return _edea (_acfg ,_bcbe );
|
|
|
|
|
};func _geac (_gdafa string ,_eadc int )string {if len (_gdafa )< _eadc {return _gdafa ;};return _gdafa [:_eadc ];};func (_geba *textObject )setTextRise (_edb float64 ){if _geba ==nil {return ;};_geba ._cebc ._bgdc =_edb ;};func _ebfdc (_cfgg ,_gceg _cc .Point )rulingKind {_gbce :=_gc .Abs (_cfgg .X -_gceg .X );
|
|
|
|
|
_bad :=_gc .Abs (_cfgg .Y -_gceg .Y );return _cgee (_gbce ,_bad ,_adge );};func (_gccc paraList )computeEBBoxes (){if _bddg {_dc .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a");};for _ ,_bdde :=range _gccc {_bdde ._efgg =_bdde .PdfRectangle ;
|
|
|
|
|
};_fedg :=_gccc .yNeighbours (0);for _dafb ,_bacb :=range _gccc {_cdcg :=_bacb ._efgg ;_caabc ,_bafbb :=-1.0e9,+1.0e9;for _ ,_gggea :=range _fedg [_bacb ]{_facg :=_gccc [_gggea ]._efgg ;if _facg .Urx < _cdcg .Llx {_caabc =_gc .Max (_caabc ,_facg .Urx );
|
|
|
|
|
}else if _cdcg .Urx < _facg .Llx {_bafbb =_gc .Min (_bafbb ,_facg .Llx );};};for _acfbf ,_fgge :=range _gccc {_gagc :=_fgge ._efgg ;if _dafb ==_acfbf ||_gagc .Ury > _cdcg .Lly {continue ;};if _caabc <=_gagc .Llx &&_gagc .Llx < _cdcg .Llx {_cdcg .Llx =_gagc .Llx ;
|
|
|
|
|
}else if _gagc .Urx <=_bafbb &&_cdcg .Urx < _gagc .Urx {_cdcg .Urx =_gagc .Urx ;};};if _bddg {_ag .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_dafb ,_bacb ._efgg ,_cdcg ,_geac (_bacb .text (),50));
|
|
|
|
|
};_bacb ._efgg =_cdcg ;};if _bddb {for _ ,_eddfd :=range _gccc {_eddfd .PdfRectangle =_eddfd ._efgg ;};};};func (_fecf *textTable )getDown ()paraList {_adbgc :=make (paraList ,_fecf ._eaed );for _fcabc :=0;_fcabc < _fecf ._eaed ;_fcabc ++{_feabd :=_fecf .get (_fcabc ,_fecf ._dege -1)._ffaf ;
|
|
|
|
|
if _feabd ==nil ||_feabd ._baba {return nil ;};_adbgc [_fcabc ]=_feabd ;};for _cabb :=0;_cabb < _fecf ._eaed -1;_cabb ++{if _adbgc [_cabb ]._dfbb !=_adbgc [_cabb +1]{return nil ;};};return _adbgc ;};func (_cdec *textObject )setTextRenderMode (_dec int ){if _cdec ==nil {return ;
|
|
|
|
|
};_cdec ._cebc ._efac =RenderMode (_dec );};func (_aaeaf gridTiling )log (_bafff string ){if !_dgf {return ;};_dc .Log .Info ("\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071",len (_aaeaf ._bddgg ),len (_aaeaf ._dcaa ),_bafff );
|
|
|
|
|
_ag .Printf ("\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a",_aaeaf ._bddgg );_ag .Printf ("\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a",_aaeaf ._dcaa );for _dcba ,_ebgbad :=range _aaeaf ._dcaa {_gcbe ,_dafdc :=_aaeaf ._ecag [_ebgbad ];
|
|
|
|
|
if !_dafdc {continue ;};_ag .Printf ("%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_dcba ,_ebgbad );for _gece ,_fcgd :=range _aaeaf ._bddgg {_fbffd ,_efae :=_gcbe [_fcgd ];if !_efae {continue ;};_ag .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_gece ,_fbffd .String ());
|
|
|
|
|
};};};func (_ceeg *textPara )toCellTextMarks (_fdcg *int )[]TextMark {var _cdd []TextMark ;for _baaff ,_gfgf :=range _ceeg ._acee {_deafae :=_gfgf .toTextMarks (_fdcg );_edgb :=_dfgf &&_gfgf .endsInHyphen ()&&_baaff !=len (_ceeg ._acee )-1;if _edgb {_deafae =_bbg (_deafae ,_fdcg );
|
|
|
|
|
};_cdd =append (_cdd ,_deafae ...);if !(_edgb ||_baaff ==len (_ceeg ._acee )-1){_cdd =_dgfa (_cdd ,_fdcg ,_agae (_gfgf ._dffg ,_ceeg ._acee [_baaff +1]._dffg ));};};return _cdd ;};
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// String returns a description of `w`.
|
|
|
|
|
func (_adfcc *textWord )String ()string {return _ag .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_adfcc ._cfba ,_adfcc .PdfRectangle ,_adfcc ._fbdg ,_adfcc ._ggbbf );
|
|
|
|
|
};func (_dbaag *shapesState )stroke (_cebd *[]pathSection ){_adf :=pathSection {_feaa :_dbaag ._aceg ,Color :_dbaag ._agfb .getStrokeColor ()};*_cebd =append (*_cebd ,_adf );if _ebgeg {_ag .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_cebd ),_dbaag ,_dbaag ._agfb .getStrokeColor (),_adf .bbox ());
|
|
|
|
|
if _dbag {for _aed ,_ffdc :=range _dbaag ._aceg {_ag .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_aed ,_ffdc );if _aed ==10{break ;};};};};};func (_aeeb *shapesState )lastpointEstablished ()(_cc .Point ,bool ){if _aeeb ._gbag {return _aeeb ._adac ,false ;
|
|
|
|
|
};_dgbe :=len (_aeeb ._aceg );if _dgbe > 0&&_aeeb ._aceg [_dgbe -1]._gbdf {return _aeeb ._aceg [_dgbe -1].last (),false ;};return _cc .Point {},true ;};func _dffd (_cccbg []int )[]int {_cagcf :=make ([]int ,len (_cccbg ));for _eccee ,_ddccf :=range _cccbg {_cagcf [len (_cccbg )-1-_eccee ]=_ddccf ;
|
|
|
|
|
};return _cagcf ;};func _ececf (_fgbgb string )(string ,bool ){_gaea :=[]rune (_fgbgb );if len (_gaea )!=1{return "",false ;};_gadcg ,_dbgdc :=_gddcd [_gaea [0]];return _gadcg ,_dbgdc ;};func _fcfa (_cgad map[float64 ]gridTile )[]float64 {_bgdcd :=make ([]float64 ,0,len (_cgad ));
|
|
|
|
|
for _abgc :=range _cgad {_bgdcd =append (_bgdcd ,_abgc );};_e .Float64s (_bgdcd );return _bgdcd ;};func (_fced *shapesState )cubicTo (_gfd ,_cdfb ,_dfb ,_acgc ,_bcae ,_ceg float64 ){if _dfgc {_dc .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a");
|
|
|
|
|
};_fced .addPoint (_bcae ,_ceg );};type rulingList []*ruling ;func (_ccgcf paraList )toTextMarks ()[]TextMark {_ffcc :=0;var _dbfe []TextMark ;for _cbde ,_ccbe :=range _ccgcf {if _ccbe ._gfcac {continue ;};_aeba :=_ccbe .toTextMarks (&_ffcc );_dbfe =append (_dbfe ,_aeba ...);
|
|
|
|
|
if _cbde !=len (_ccgcf )-1{if _bcgg (_ccbe ,_ccgcf [_cbde +1]){_dbfe =_dgfa (_dbfe ,&_ffcc ,"\u0020");}else {_dbfe =_dgfa (_dbfe ,&_ffcc ,"\u000a");_dbfe =_dgfa (_dbfe ,&_ffcc ,"\u000a");};};};_dbfe =_dgfa (_dbfe ,&_ffcc ,"\u000a");_dbfe =_dgfa (_dbfe ,&_ffcc ,"\u000a");
|
|
|
|
|
return _dbfe ;};func _bfcdc (_bbffg []*textMark ,_bcad _ac .PdfRectangle )[]*textWord {var _dfdag []*textWord ;var _bead *textWord ;if _decf {_dc .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_bbffg ));
|
|
|
|
|
};_dgdfe :=func (){if _bead !=nil {_fgec :=_bead .computeText ();if !_bfcca (_fgec ){_bead ._ggbbf =_fgec ;_dfdag =append (_dfdag ,_bead );if _decf {_dc .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_dfdag )-1,_bead .String ());
|
|
|
|
|
for _cabbe ,_fcce :=range _bead ._fddf {_ag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cabbe ,_fcce .String ());};};};_bead =nil ;};};for _ ,_fgffg :=range _bbffg {if _gaae &&_bead !=nil &&len (_bead ._fddf )> 0{_dfca :=_bead ._fddf [len (_bead ._fddf )-1];
|
|
|
|
|
_bbeb ,_efgbc :=_ececf (_fgffg ._bfccc );_cbae ,_bbagd :=_ececf (_dfca ._bfccc );if _efgbc &&!_bbagd &&_dfca .inDiacriticArea (_fgffg ){_bead .addDiacritic (_bbeb );continue ;};if _bbagd &&!_efgbc &&_fgffg .inDiacriticArea (_dfca ){_bead ._fddf =_bead ._fddf [:len (_bead ._fddf )-1];
|
|
|
|
|
_bead .appendMark (_fgffg ,_bcad );_bead .addDiacritic (_cbae );continue ;};};_agef :=_bfcca (_fgffg ._bfccc );if _agef {_dgdfe ();continue ;};if _bead ==nil &&!_agef {_bead =_dcbfe ([]*textMark {_fgffg },_bcad );continue ;};_cfab :=_bead ._fbdg ;_fgda :=_gc .Abs (_gdef (_bcad ,_fgffg )-_bead ._cfba )/_cfab ;
|
|
|
|
|
_daed :=_cgec (_fgffg ,_bead )/_cfab ;if _daed >=_cdffc ||!(-_cccc <=_daed &&_fgda <=_gbda ){_dgdfe ();_bead =_dcbfe ([]*textMark {_fgffg },_bcad );continue ;};_bead .appendMark (_fgffg ,_bcad );};_dgdfe ();return _dfdag ;};func (_dag *textLine )endsInHyphen ()bool {_fgfc :=_dag ._acec [len (_dag ._acec )-1];
|
|
|
|
|
_dgff :=_fgfc ._ggbbf ;_fcdd ,_ccda :=_d .DecodeLastRuneInString (_dgff );if _ccda <=0||!_b .Is (_b .Hyphen ,_fcdd ){return false ;};if _fgfc ._bagec &&_fdcc (_dgff ){return true ;};return _fdcc (_dag .text ());};func (_eadf *textTable )reduceTiling (_afdcf gridTiling ,_cgbd float64 )*textTable {_faec :=make ([]int ,0,_eadf ._dege );
|
|
|
|
|
_affdc :=make ([]int ,0,_eadf ._eaed );_fbegc :=_afdcf ._bddgg ;_gfcc :=_afdcf ._dcaa ;for _cbgga :=0;_cbgga < _eadf ._dege ;_cbgga ++{_dgfc :=_cbgga > 0&&_gc .Abs (_gfcc [_cbgga -1]-_gfcc [_cbgga ])< _cgbd &&_eadf .emptyRow (_cbgga );if !_dgfc {_faec =append (_faec ,_cbgga );
|
|
|
|
|
};};for _ffea :=0;_ffea < _eadf ._eaed ;_ffea ++{_degff :=_ffea < _eadf ._eaed -1&&_gc .Abs (_fbegc [_ffea +1]-_fbegc [_ffea ])< _cgbd &&_eadf .emptyColumn (_ffea );if !_degff {_affdc =append (_affdc ,_ffea );};};if len (_faec )==_eadf ._dege &&len (_affdc )==_eadf ._eaed {return _eadf ;
|
|
|
|
|
};_bfdb :=textTable {_aebf :_eadf ._aebf ,_eaed :len (_affdc ),_dege :len (_faec ),_agbbc :make (map[uint64 ]compositeCell ,len (_affdc )*len (_faec ))};if _gaa {_dc .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_eadf ._eaed ,_eadf ._dege ,len (_affdc ),len (_faec ));
|
|
|
|
|
_dc .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_affdc );_dc .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_faec );};for _ddgc ,_fcadg :=range _faec {for _cgea ,_ffde :=range _affdc {_bgab ,_dbefg :=_eadf .getComposite (_ffde ,_fcadg );
|
|
|
|
|
if len (_bgab )==0{continue ;};if _gaa {_ag .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_cgea ,_ddgc ,_ffde ,_fcadg ,_geac (_bgab .merge ().text (),50));};_bfdb .putComposite (_cgea ,_ddgc ,_bgab ,_dbefg );
|
|
|
|
|
};};return &_bfdb ;};
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// TextMarkArray is a collection of TextMarks.
|
|
|
|
|
type TextMarkArray struct{_bcda []TextMark };
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
|
|
|
|
// String returns a description of `k`.
|
2021-04-06 22:35:37 +00:00
|
|
|
|
func (_bccbf markKind )String ()string {_fcbb ,_aced :=_eefa [_bccbf ];if !_aced {return _ag .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_bccbf );};return _fcbb ;};
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
|
|
|
|
|
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
|
|
|
|
|
// `start` and `end` are offsets in the extracted text.
|
|
|
|
|
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
|
|
|
|
|
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
|
|
|
|
|
func (_afc *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _afc ==nil {return nil ,_g .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_ag .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );
|
|
|
|
|
};_bebc :=len (_afc ._bcda );if _bebc ==0{return _afc ,nil ;};if start < _afc ._bcda [0].Offset {start =_afc ._bcda [0].Offset ;};if end > _afc ._bcda [_bebc -1].Offset +1{end =_afc ._bcda [_bebc -1].Offset +1;};_ddbff :=_e .Search (_bebc ,func (_gadeb int )bool {return _afc ._bcda [_gadeb ].Offset +len (_afc ._bcda [_gadeb ].Text )-1>=start });
|
|
|
|
|
if !(0<=_ddbff &&_ddbff < _bebc ){_cga :=_ag .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_ddbff ,_bebc ,_afc ._bcda [0],_afc ._bcda [_bebc -1]);
|
|
|
|
|
return nil ,_cga ;};_baaf :=_e .Search (_bebc ,func (_eecf int )bool {return _afc ._bcda [_eecf ].Offset > end -1});if !(0<=_baaf &&_baaf < _bebc ){_cbda :=_ag .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_baaf ,_bebc ,_afc ._bcda [0],_afc ._bcda [_bebc -1]);
|
|
|
|
|
return nil ,_cbda ;};if _baaf <=_ddbff {return nil ,_ag .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_ddbff ,_baaf );
|
|
|
|
|
};return &TextMarkArray {_bcda :_afc ._bcda [_ddbff :_baaf ]},nil ;};func (_dbfb *textPara )toTextMarks (_dbfd *int )[]TextMark {if _dbfb ._eecb ==nil {return _dbfb .toCellTextMarks (_dbfd );};var _ccfg []TextMark ;for _agcd :=0;_agcd < _dbfb ._eecb ._dege ;
|
|
|
|
|
_agcd ++{for _gbaab :=0;_gbaab < _dbfb ._eecb ._eaed ;_gbaab ++{_fbcg :=_dbfb ._eecb .get (_gbaab ,_agcd );if _fbcg ==nil {_ccfg =_dgfa (_ccfg ,_dbfd ,"\u0009");}else {_dacgc :=_fbcg .toCellTextMarks (_dbfd );_ccfg =append (_ccfg ,_dacgc ...);};_ccfg =_dgfa (_ccfg ,_dbfd ,"\u0020");
|
|
|
|
|
};if _agcd < _dbfb ._eecb ._dege -1{_ccfg =_dgfa (_ccfg ,_dbfd ,"\u000a");};};return _ccfg ;};func (_acfag *ruling )alignsSec (_gfgbc *ruling )bool {const _gbccd =_gfaff +1.0;return _acfag ._daa -_gbccd <=_gfgbc ._defb &&_gfgbc ._daa -_gbccd <=_acfag ._defb ;
|
|
|
|
|
};func _dcbea (_gdbg float64 )bool {return _gc .Abs (_gdbg )< _ceda };
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// New returns an Extractor instance for extracting content from the input PDF page.
|
|
|
|
|
func New (page *_ac .PdfPage )(*Extractor ,error ){const _gfc ="\u0065\u0078\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077";_dd ,_ege :=page .GetAllContentStreams ();if _ege !=nil {return nil ,_ege ;};_gda ,_ege :=page .GetMediaBox ();
|
|
|
|
|
if _ege !=nil {return nil ,_ag .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_ege );};_dga :=&Extractor {_eg :_dd ,_ff :page .Resources ,_cf :*_gda ,_cg :map[string ]fontEntry {},_ee :map[string ]textResult {}};
|
|
|
|
|
if _dga ._cf .Llx > _dga ._cf .Urx {_dc .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_dga ._cf );
|
|
|
|
|
_dga ._cf .Llx ,_dga ._cf .Urx =_dga ._cf .Urx ,_dga ._cf .Llx ;};if _dga ._cf .Lly > _dga ._cf .Ury {_dc .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_dga ._cf );
|
|
|
|
|
_dga ._cf .Lly ,_dga ._cf .Ury =_dga ._cf .Ury ,_dga ._cf .Lly ;};_ec .TrackUse (_gfc );return _dga ,nil ;};
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// Tables returns the tables extracted from the page.
|
|
|
|
|
func (_acf PageText )Tables ()[]TextTable {if _gaa {_dc .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_acf ._ffdb ));};return _acf ._ffdb ;};var _babg =_gd .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024");
|
|
|
|
|
func _edge (_gbeb float64 )bool {return _gc .Abs (_gbeb )< _gfaff };type pathSection struct{_feaa []*subpath ;_gca .Color ;};func (_eddab *wordBag )arrangeText ()*textPara {_eddab .sort ();if _bce {_eddab .removeDuplicates ();};var _ccfd []*textLine ;for _ ,_fdebf :=range _eddab .depthIndexes (){for !_eddab .empty (_fdebf ){_bfff :=_eddab .firstReadingIndex (_fdebf );
|
|
|
|
|
_dfef :=_eddab .firstWord (_bfff );_fdfe :=_cfac (_eddab ,_bfff );_dbaaf :=_dfef ._fbdg ;_ecfc :=_dfef ._cfba -_dbgb *_dbaaf ;_fcfbc :=_dfef ._cfba +_dbgb *_dbaaf ;_ebffa :=_aece *_dbaaf ;_gcfa :=_gfgdd *_dbaaf ;_faae :for {var _gdgc *textWord ;_cgbb :=0;
|
|
|
|
|
for _ ,_dgaed :=range _eddab .depthBand (_ecfc ,_fcfbc ){_cbgg :=_eddab .highestWord (_dgaed ,_ecfc ,_fcfbc );if _cbgg ==nil {continue ;};_bcfb :=_cgec (_cbgg ,_fdfe ._acec [len (_fdfe ._acec )-1]);if _bcfb < -_gcfa {break _faae ;};if _bcfb > _ebffa {continue ;
|
|
|
|
|
};if _gdgc !=nil &&_dace (_cbgg ,_gdgc )>=0{continue ;};_gdgc =_cbgg ;_cgbb =_dgaed ;};if _gdgc ==nil {break ;};_fdfe .pullWord (_eddab ,_gdgc ,_cgbb );};_fdfe .markWordBoundaries ();_ccfd =append (_ccfd ,_fdfe );};};if len (_ccfd )==0{return nil ;};_e .Slice (_ccfd ,func (_bdceb ,_ffbb int )bool {return _eecg (_ccfd [_bdceb ],_ccfd [_ffbb ])< 0});
|
|
|
|
|
_ceff :=_edea (_eddab .PdfRectangle ,_ccfd );if _aacab {_dc .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_ceff .String ());if _ebdc {for _bbdf ,_dfdee :=range _ceff ._acee {_ag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bbdf ,_dfdee .String ());
|
|
|
|
|
if _bafa {for _aabf ,_cedc :=range _dfdee ._acec {_ag .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_aabf ,_cedc .String ());for _aaga ,_cbcc :=range _cedc ._fddf {_ag .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_aaga ,_cbcc .String ());
|
|
|
|
|
};};};};};};return _ceff ;};type textState struct{_cge float64 ;_bccba float64 ;_bfd float64 ;_cecg float64 ;_gfg float64 ;_efac RenderMode ;_bgdc float64 ;_ecc *_ac .PdfFont ;_dfe _ac .PdfRectangle ;_acc int ;_dcge int ;};
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
|
|
|
|
// ExtractText processes and extracts all text data in content streams and returns as a string.
|
|
|
|
|
// It takes into account character encodings in the PDF file, which are decoded by
|
|
|
|
|
// CharcodeBytesToUnicode.
|
|
|
|
|
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
|
2021-04-06 22:35:37 +00:00
|
|
|
|
func (_ed *Extractor )ExtractText ()(string ,error ){_fgf ,_ ,_ ,_db :=_ed .ExtractTextWithStats ();return _fgf ,_db ;};const (_cefd rulingKind =iota ;_gega ;_eace ;);func (_efbg *textObject )getFontDict (_afge string )(_feae _be .PdfObject ,_dfec error ){_adgf :=_efbg ._edf ;
|
|
|
|
|
if _adgf ==nil {_dc .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_afge );return nil ,nil ;};_feae ,_dadcd :=_adgf .GetFontByName (_be .PdfObjectName (_afge ));
|
|
|
|
|
if !_dadcd {_dc .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_afge );
|
|
|
|
|
return nil ,_g .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _feae ,nil ;};func (_adabb compositeCell )hasLines (_abcb []*textLine )bool {for _daee ,_dcgg :=range _abcb {_afbbf :=_abcd (_adabb .PdfRectangle ,_dcgg .PdfRectangle );
|
|
|
|
|
if _gaa {_ag .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a",_afbbf ,_daee ,len (_abcb ));_ag .Printf ("\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a",_adabb );
|
|
|
|
|
_ag .Printf ("\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a",_dcgg );};if _afbbf {return true ;};};return false ;};
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// String returns a human readable description of `path`.
|
|
|
|
|
func (_ggggc *subpath )String ()string {_ccbb :=_ggggc ._effg ;_efff :=len (_ccbb );if _efff <=5{return _ag .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_efff ,_ccbb );};return _ag .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_efff ,_ccbb [0],_ccbb [1],_ccbb [_efff -1]);
|
|
|
|
|
};
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// Append appends `mark` to the mark array.
|
|
|
|
|
func (_aaca *TextMarkArray )Append (mark TextMark ){_aaca ._bcda =append (_aaca ._bcda ,mark )};func _ffdca (_eeeb ,_gbdfd _cc .Point ,_gdgda _gca .Color )(*ruling ,bool ){_faee :=lineRuling {_cda :_eeeb ,_aeff :_gbdfd ,_eddd :_ebfdc (_eeeb ,_gbdfd ),Color :_gdgda };
|
|
|
|
|
if _faee ._eddd ==_cefd {return nil ,false ;};return _faee .asRuling ();};func (_ccfba *textPara )bbox ()_ac .PdfRectangle {return _ccfba .PdfRectangle };func (_feaad *textWord )absorb (_dcaf *textWord ){_feaad .PdfRectangle =_def (_feaad .PdfRectangle ,_dcaf .PdfRectangle );
|
|
|
|
|
_feaad ._fddf =append (_feaad ._fddf ,_dcaf ._fddf ...);};func (_cbea *textTable )newTablePara ()*textPara {_cdaa :=_cbea .computeBbox ();_bfbf :=&textPara {PdfRectangle :_cdaa ,_efgg :_cdaa ,_eecb :_cbea };if _gaa {_dc .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_bfbf );
|
|
|
|
|
};return _bfbf ;};func (_defgf *subpath )isQuadrilateral ()bool {if len (_defgf ._effg )< 4||len (_defgf ._effg )> 5{return false ;};if len (_defgf ._effg )==5{_fceb :=_defgf ._effg [0];_gbdfg :=_defgf ._effg [4];if _fceb .X !=_gbdfg .X ||_fceb .Y !=_gbdfg .Y {return false ;
|
|
|
|
|
};};return true ;};func (_gabd *textTable )emptyRow (_gfdbd int )bool {for _afaf :=0;_afaf < _gabd ._eaed ;_afaf ++{_ccdgb :=_gabd .get (_afaf ,_gfdbd );if _ccdgb !=nil &&_ccdgb .text ()!=""{return false ;};};return true ;};type textMark struct{_ac .PdfRectangle ;
|
|
|
|
|
_adfa int ;_bfccc string ;_gdfbd string ;_begb *_ac .PdfFont ;_adca float64 ;_egda float64 ;_ggcd _cc .Matrix ;_fefb _cc .Point ;_bgcb _ac .PdfRectangle ;_bccf _gca .Color ;_fdf _gca .Color ;};
|
2021-02-22 02:29:48 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// String returns a string describing `tm`.
|
|
|
|
|
func (_gadd TextMark )String ()string {_acdb :=_gadd .BBox ;var _ccbga string ;if _gadd .Font !=nil {_ccbga =_gadd .Font .String ();if len (_ccbga )> 50{_ccbga =_ccbga [:50]+"\u002e\u002e\u002e";};};var _ddfc string ;if _gadd .Meta {_ddfc ="\u0020\u002a\u004d\u002a";
|
|
|
|
|
};return _ag .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_gadd .Offset ,_gadd .Text ,[]rune (_gadd .Text ),_acdb .Llx ,_acdb .Lly ,_acdb .Urx ,_acdb .Ury ,_ccbga ,_ddfc );
|
|
|
|
|
};type bounded interface{bbox ()_ac .PdfRectangle };const (_eeaee markKind =iota ;_beaeb ;_gbef ;_abgf ;);type textWord struct{_ac .PdfRectangle ;_cfba float64 ;_ggbbf string ;_fddf []*textMark ;_fbdg float64 ;_bagec bool ;};func (_dgaef *textLine )bbox ()_ac .PdfRectangle {return _dgaef .PdfRectangle };
|
|
|
|
|
func (_cbeb *ruling )intersects (_cceb *ruling )bool {_abea :=(_cbeb ._ddca ==_eace &&_cceb ._ddca ==_gega )||(_cceb ._ddca ==_eace &&_cbeb ._ddca ==_gega );_ccdg :=func (_agcge ,_cgba *ruling )bool {return _agcge ._daa -_eccge <=_cgba ._dddg &&_cgba ._dddg <=_agcge ._defb +_eccge ;
|
|
|
|
|
};_bfb :=_ccdg (_cbeb ,_cceb );_efdf :=_ccdg (_cceb ,_cbeb );if _ebgeg {_ag .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_abea ,_bfb ,_efdf ,_abea &&_bfb &&_efdf ,_cbeb ,_cceb );
|
|
|
|
|
};return _abea &&_bfb &&_efdf ;};func (_adea *textTable )subdivide ()*textTable {_adea .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");_aegb :=_adea .compositeRowCorridors ();_ffge :=_adea .compositeColCorridors ();if _gaa {_dc .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_gbfa (_aegb ),_gbfa (_ffge ));
|
|
|
|
|
};if len (_aegb )==0||len (_ffge )==0{return _adea ;};_fdaa (_aegb );_fdaa (_ffge );if _gaa {_dc .Log .Info ("\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073",_gbfa (_aegb ),_gbfa (_ffge ));
|
|
|
|
|
};_egbaa ,_bfgfed :=_ecdae (_adea ._dege ,_aegb );_dgga ,_aafee :=_ecdae (_adea ._eaed ,_ffge );_facea :=make (map[uint64 ]*textPara ,_aafee *_bfgfed );_dgdec :=&textTable {PdfRectangle :_adea .PdfRectangle ,_aebf :_adea ._aebf ,_dege :_bfgfed ,_eaed :_aafee ,_gebab :_facea };
|
|
|
|
|
if _gaa {_dc .Log .Info ("\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a"+"\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076",_adea ._eaed ,_adea ._dege ,_aafee ,_bfgfed ,_gbfa (_aegb ),_gbfa (_ffge ),_egbaa ,_dgga );
|
|
|
|
|
};for _acgb :=0;_acgb < _adea ._dege ;_acgb ++{_gcda :=_egbaa [_acgb ];for _bbcd :=0;_bbcd < _adea ._eaed ;_bbcd ++{_abcaf :=_dgga [_bbcd ];if _gaa {_ag .Printf ("\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a",_bbcd ,_acgb ,_abcaf ,_gcda );
|
|
|
|
|
};_agffb ,_cfad :=_adea ._agbbc [_aeed (_bbcd ,_acgb )];if !_cfad {continue ;};_gbgdg :=_agffb .split (_aegb [_acgb ],_ffge [_bbcd ]);for _dbefd :=0;_dbefd < _gbgdg ._dege ;_dbefd ++{for _gacf :=0;_gacf < _gbgdg ._eaed ;_gacf ++{_fegd :=_gbgdg .get (_gacf ,_dbefd );
|
|
|
|
|
_dgdec .put (_abcaf +_gacf ,_gcda +_dbefd ,_fegd );if _gaa {_ag .Printf ("\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_abcaf +_gacf ,_gcda +_dbefd ,_fegd );};};};};};return _dgdec ;};
|
|
|
|
|
|
|
|
|
|
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
|
|
|
|
|
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
|
|
|
|
|
// Replace with a function like Extract() (*PageText, error)
|
|
|
|
|
func (_dfg *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_ead ,_efde ,_fed ,_aa :=_dfg .extractPageText (_dfg ._eg ,_dfg ._ff ,_cc .IdentityMatrix (),0);if _aa !=nil {return nil ,0,0,_aa ;};_ead .computeViews ();_aa =_eeafb (_ead );if _aa !=nil {return nil ,0,0,_aa ;
|
|
|
|
|
};return _ead ,_efde ,_fed ,nil ;};func (_acd *textObject )moveTextSetLeading (_cde ,_eed float64 ){_acd ._cebc ._cecg =-_eed ;_acd .moveLP (_cde ,_eed );};func (_dcbf *shapesState )lineTo (_aeb ,_bebf float64 ){if _dfgc {_dc .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_aeb ,_bebf ,_dcbf .devicePoint (_aeb ,_bebf ));
|
|
|
|
|
};_dcbf .addPoint (_aeb ,_bebf );};
|
2020-12-06 13:03:03 +00:00
|
|
|
|
|
2021-03-23 23:12:52 +00:00
|
|
|
|
// TextTable represents a table.
|
|
|
|
|
// Cells are ordered top-to-bottom, left-to-right.
|
|
|
|
|
// Cells[y] is the (0-offset) y'th row in the table.
|
|
|
|
|
// Cells[y][x] is the (0-offset) x'th column in the table.
|
2021-04-06 22:35:37 +00:00
|
|
|
|
type TextTable struct{W ,H int ;Cells [][]TableCell ;};func (_gdee *textObject )getFontDirect (_cbb string )(*_ac .PdfFont ,error ){_gfgd ,_acef :=_gdee .getFontDict (_cbb );if _acef !=nil {return nil ,_acef ;};_adeg ,_acef :=_ac .NewPdfFontFromPdfObject (_gfgd );
|
|
|
|
|
if _acef !=nil {_dc .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cbb ,_acef );
|
|
|
|
|
};return _adeg ,_acef ;};type compositeCell struct{_ac .PdfRectangle ;paraList ;};
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
|
|
|
|
// ToText returns the page text as a single string.
|
|
|
|
|
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
|
|
|
|
|
// Text() instead.
|
2021-04-06 22:35:37 +00:00
|
|
|
|
func (_ebe PageText )ToText ()string {return _ebe .Text ()};func (_fag *textObject )reset (){_fag ._edfe =_cc .IdentityMatrix ();_fag ._dca =_cc .IdentityMatrix ();_fag ._fef =nil ;};func (_edfg *wordBag )highestWord (_ceef int ,_eafd ,_bdfe float64 )*textWord {for _ ,_gdfb :=range _edfg ._bed [_ceef ]{if _eafd <=_gdfb ._cfba &&_gdfb ._cfba <=_bdfe {return _gdfb ;
|
|
|
|
|
};};return nil ;};type markKind int ;func _dace (_acefb ,_aeac bounded )float64 {return _acefb .bbox ().Llx -_aeac .bbox ().Llx };func (_eaab *textObject )newTextMark (_ggggce string ,_fffe _cc .Matrix ,_bagd _cc .Point ,_feff float64 ,_cagg *_ac .PdfFont ,_ggcf float64 ,_bcgc ,_bfcd _gca .Color )(textMark ,bool ){_eede :=_fffe .Angle ();
|
|
|
|
|
_accf :=_caeg (_eede ,_fcfc );var _ddab float64 ;if _accf %180!=90{_ddab =_fffe .ScalingFactorY ();}else {_ddab =_fffe .ScalingFactorX ();};_dbf :=_dgcc (_fffe );_gadc :=_ac .PdfRectangle {Llx :_dbf .X ,Lly :_dbf .Y ,Urx :_bagd .X ,Ury :_bagd .Y };switch _accf %360{case 90:_gadc .Urx -=_ddab ;
|
|
|
|
|
case 180:_gadc .Ury -=_ddab ;case 270:_gadc .Urx +=_ddab ;case 0:_gadc .Ury +=_ddab ;default:_accf =0;_gadc .Ury +=_ddab ;};if _gadc .Llx > _gadc .Urx {_gadc .Llx ,_gadc .Urx =_gadc .Urx ,_gadc .Llx ;};if _gadc .Lly > _gadc .Ury {_gadc .Lly ,_gadc .Ury =_gadc .Ury ,_gadc .Lly ;
|
|
|
|
|
};_cfc ,_ecdg :=_abgee (_gadc ,_eaab ._fffc ._cf );if !_ecdg {_dc .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_gadc ,_eaab ._fffc ._cf ,_ggggce );
|
|
|
|
|
};_gadc =_cfc ;_ddef :=_gadc ;_cccg :=_eaab ._fffc ._cf ;switch _accf %360{case 90:_cccg .Urx ,_cccg .Ury =_cccg .Ury ,_cccg .Urx ;_ddef =_ac .PdfRectangle {Llx :_cccg .Urx -_gadc .Ury ,Urx :_cccg .Urx -_gadc .Lly ,Lly :_gadc .Llx ,Ury :_gadc .Urx };case 180:_ddef =_ac .PdfRectangle {Llx :_cccg .Urx -_gadc .Llx ,Urx :_cccg .Urx -_gadc .Urx ,Lly :_cccg .Ury -_gadc .Lly ,Ury :_cccg .Ury -_gadc .Ury };
|
|
|
|
|
case 270:_cccg .Urx ,_cccg .Ury =_cccg .Ury ,_cccg .Urx ;_ddef =_ac .PdfRectangle {Llx :_gadc .Ury ,Urx :_gadc .Lly ,Lly :_cccg .Ury -_gadc .Llx ,Ury :_cccg .Ury -_gadc .Urx };};if _ddef .Llx > _ddef .Urx {_ddef .Llx ,_ddef .Urx =_ddef .Urx ,_ddef .Llx ;
|
|
|
|
|
};if _ddef .Lly > _ddef .Ury {_ddef .Lly ,_ddef .Ury =_ddef .Ury ,_ddef .Lly ;};_ggfe :=textMark {_bfccc :_ggggce ,PdfRectangle :_ddef ,_bgcb :_gadc ,_begb :_cagg ,_adca :_ddab ,_egda :_ggcf ,_ggcd :_fffe ,_fefb :_bagd ,_adfa :_accf ,_bccf :_bcgc ,_fdf :_bfcd };
|
|
|
|
|
if _decf {_dc .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_dbf ,_bagd ,_ggfe .String ());};return _ggfe ,_ecdg ;
|
|
|
|
|
};func _adcc (_bcfc *wordBag ,_eggd *textWord ,_adgd float64 )bool {return _eggd .Llx < _bcfc .Urx +_adgd &&_bcfc .Llx -_adgd < _eggd .Urx ;};func (_aaccb rulingList )removeDuplicates ()rulingList {if len (_aaccb )==0{return nil ;};_aaccb .sort ();_deba :=rulingList {_aaccb [0]};
|
|
|
|
|
for _ ,_bfce :=range _aaccb [1:]{if _bfce .equals (_deba [len (_deba )-1]){continue ;};_deba =append (_deba ,_bfce );};return _deba ;};func (_gcegf rulingList )bbox ()_ac .PdfRectangle {var _fgfg _ac .PdfRectangle ;if len (_gcegf )==0{_dc .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073");
|
|
|
|
|
return _ac .PdfRectangle {};};if _gcegf [0]._ddca ==_gega {_fgfg .Llx ,_fgfg .Urx =_gcegf .secMinMax ();_fgfg .Lly ,_fgfg .Ury =_gcegf .primMinMax ();}else {_fgfg .Llx ,_fgfg .Urx =_gcegf .primMinMax ();_fgfg .Lly ,_fgfg .Ury =_gcegf .secMinMax ();};return _fgfg ;
|
|
|
|
|
};func (_agbb rulingList )log (_aeccc string ){if !_ebgeg {return ;};_dc .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_aeccc ,_agbb .String ());for _bdegf ,_fcgb :=range _agbb {_ag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bdegf ,_fcgb .String ());
|
|
|
|
|
};};func (_ebdg paraList )lines ()[]*textLine {var _gaaf []*textLine ;for _ ,_ccga :=range _ebdg {_gaaf =append (_gaaf ,_ccga ._acee ...);};return _gaaf ;};type textLine struct{_ac .PdfRectangle ;_dffg float64 ;_acec []*textWord ;_cfa float64 ;};func (_efag paraList )llyRange (_beae []int ,_eddb ,_fggc float64 )[]int {_gccd :=len (_efag );
|
|
|
|
|
if _fggc < _efag [_beae [0]].Lly ||_eddb > _efag [_beae [_gccd -1]].Lly {return nil ;};_bggcc :=_e .Search (_gccd ,func (_fbba int )bool {return _efag [_beae [_fbba ]].Lly >=_eddb });_edef :=_e .Search (_gccd ,func (_fggbf int )bool {return _efag [_beae [_fggbf ]].Lly > _fggc });
|
|
|
|
|
return _beae [_bggcc :_edef ];};func (_ebcc intSet )del (_abcc int ){delete (_ebcc ,_abcc )};func _cceef (_dcf ,_cffg *textPara )bool {return _gfde (_dcf ._efgg ,_cffg ._efgg )};func (_ebag intSet )add (_cgbaa int ){_ebag [_cgbaa ]=struct{}{}};type textObject struct{_fffc *Extractor ;
|
|
|
|
|
_edf *_ac .PdfPageResources ;_dcb _dg .GraphicsState ;_cebc *textState ;_ddbf *stateStack ;_edfe _cc .Matrix ;_dca _cc .Matrix ;_fef []*textMark ;_acg bool ;};func _gdfg (_cefa []_be .PdfObject )(_daff ,_fdee float64 ,_fbcce error ){if len (_cefa )!=2{return 0,0,_ag .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_cefa ));
|
|
|
|
|
};_dfag ,_fbcce :=_be .GetNumbersAsFloat (_cefa );if _fbcce !=nil {return 0,0,_fbcce ;};return _dfag [0],_dfag [1],nil ;};func _bbg (_gaef []TextMark ,_agbc *int )[]TextMark {_febbg :=_gaef [len (_gaef )-1];_cdgc :=[]rune (_febbg .Text );if len (_cdgc )==1{_gaef =_gaef [:len (_gaef )-1];
|
|
|
|
|
_feabc :=_gaef [len (_gaef )-1];*_agbc =_feabc .Offset +len (_feabc .Text );}else {_faba :=_ddcf (_febbg .Text );*_agbc +=len (_faba )-len (_febbg .Text );_febbg .Text =_faba ;};return _gaef ;};func _ceac (_abfdf float64 )int {var _dgec int ;if _abfdf >=0{_dgec =int (_abfdf /_bgca );
|
|
|
|
|
}else {_dgec =int (_abfdf /_bgca )-1;};return _dgec ;};
|
2021-01-07 14:20:10 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
|
|
|
|
|
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
|
|
|
|
|
func (_deb *Extractor )ExtractTextWithStats ()(_abbf string ,_ddb int ,_fbf int ,_gcf error ){_caa ,_ddb ,_fbf ,_gcf :=_deb .ExtractPageText ();if _gcf !=nil {return "",_ddb ,_fbf ,_gcf ;};return _caa .Text (),_ddb ,_fbf ,nil ;};func (_acede paraList )findTables (_ccfa []gridTiling )[]*textTable {_acede .addNeighbours ();
|
|
|
|
|
_e .Slice (_acede ,func (_defff ,_agge int )bool {return _dcbgd (_acede [_defff ],_acede [_agge ])< 0});var _fdcge []*textTable ;if _eaaa {_dggf :=_acede .findGridTables (_ccfa );_fdcge =append (_fdcge ,_dggf ...);};if _adggd {_efceb :=_acede .findTextTables ();
|
|
|
|
|
_fdcge =append (_fdcge ,_efceb ...);};return _fdcge ;};func (_fafc *shapesState )addPoint (_gcdg ,_adbf float64 ){_gcc :=_fafc .establishSubpath ();_cagb :=_fafc .devicePoint (_gcdg ,_adbf );if _gcc ==nil {_fafc ._gbag =true ;_fafc ._adac =_cagb ;}else {_gcc .add (_cagb );
|
|
|
|
|
};};func (_dagg rectRuling )asRuling ()(*ruling ,bool ){_dcefc :=ruling {_ddca :_dagg ._befg ,Color :_dagg .Color ,_egff :_gbef };switch _dagg ._befg {case _eace :_dcefc ._dddg =0.5*(_dagg .Llx +_dagg .Urx );_dcefc ._daa =_dagg .Lly ;_dcefc ._defb =_dagg .Ury ;
|
|
|
|
|
_ffdg ,_ecdgb :=_dagg .checkWidth (_dagg .Llx ,_dagg .Urx );if !_ecdgb {if _dfdeg {_dc .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_dagg );
|
|
|
|
|
};return nil ,false ;};_dcefc ._bbag =_ffdg ;case _gega :_dcefc ._dddg =0.5*(_dagg .Lly +_dagg .Ury );_dcefc ._daa =_dagg .Llx ;_dcefc ._defb =_dagg .Urx ;_ffdcc ,_fdgf :=_dagg .checkWidth (_dagg .Lly ,_dagg .Ury );if !_fdgf {if _dfdeg {_dc .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_dagg );
|
|
|
|
|
};return nil ,false ;};_dcefc ._bbag =_ffdcc ;default:_dc .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_dagg ._befg );return nil ,false ;};return &_dcefc ,true ;};func (_acfc rulingList )toGrids ()[]rulingList {if _ebgeg {_dc .Log .Info ("t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073",_acfc );
|
|
|
|
|
};_gfgcg :=_acfc .intersections ();if _ebgeg {_dc .Log .Info ("\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020",len (_acfc ),len (_gfgcg ));
|
|
|
|
|
for _ ,_dfcfb :=range _cgda (_gfgcg ){_ag .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_dfcfb ,_gfgcg [_dfcfb ]);};};_gcge :=make (map[int ]intSet ,len (_acfc ));for _egfe :=range _acfc {_cfgb :=_acfc .connections (_gfgcg ,_egfe );if len (_cfgb )> 0{_gcge [_egfe ]=_cfgb ;
|
|
|
|
|
};};if _ebgeg {_dc .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064",len (_gcge ));for _ ,_ecbe :=range _cgda (_gcge ){_ag .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_ecbe ,_gcge [_ecbe ]);
|
|
|
|
|
};};_cbdde :=_acfaa (len (_acfc ),func (_feeac ,_fded int )bool {_agga ,_acbc :=len (_gcge [_feeac ]),len (_gcge [_fded ]);if _agga !=_acbc {return _agga > _acbc ;};return _acfc .comp (_feeac ,_fded );});if _ebgeg {_dc .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_cbdde );
|
|
|
|
|
};_bffag :=[][]int {{_cbdde [0]}};_bdfg :for _ ,_agaf :=range _cbdde [1:]{for _dedcc ,_dfgg :=range _bffag {for _ ,_eaefe :=range _dfgg {if _gcge [_eaefe ].has (_agaf ){_bffag [_dedcc ]=append (_dfgg ,_agaf );continue _bdfg ;};};};_bffag =append (_bffag ,[]int {_agaf });
|
|
|
|
|
};if _ebgeg {_dc .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_bffag );};_e .SliceStable (_bffag ,func (_dgcb ,_bbbb int )bool {return len (_bffag [_dgcb ])> len (_bffag [_bbbb ])});for _ ,_aadee :=range _bffag {_e .Slice (_aadee ,func (_acfca ,_bdcc int )bool {return _acfc .comp (_aadee [_acfca ],_aadee [_bdcc ])});
|
|
|
|
|
};_cgcg :=make ([]rulingList ,len (_bffag ));for _gdgdb ,_eacc :=range _bffag {_gcdd :=make (rulingList ,len (_eacc ));for _gdbf ,_ebfdgc :=range _eacc {_gcdd [_gdbf ]=_acfc [_ebfdgc ];};_cgcg [_gdgdb ]=_gcdd ;};if _ebgeg {_dc .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076",_cgcg );
|
|
|
|
|
};var _face []rulingList ;for _ ,_bddbc :=range _cgcg {if _efda ,_gged :=_bddbc .isActualGrid ();_gged {_bddbc =_efda ;_bddbc =_bddbc .snapToGroups ();_face =append (_face ,_bddbc );};};if _ebgeg {_aecag ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073",_face );
|
|
|
|
|
_dc .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_cgcg ),len (_face ));};return _face ;};type lineRuling struct{_eddd rulingKind ;
|
|
|
|
|
_fdcgf markKind ;_gca .Color ;_cda ,_aeff _cc .Point ;};
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// String returns a description of `p`.
|
|
|
|
|
func (_bffb *textPara )String ()string {if _bffb ._gfcac {return _ag .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_bffb .PdfRectangle );};_eeff :="";if _bffb ._eecb !=nil {_eeff =_ag .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_bffb ._eecb ._eaed ,_bffb ._eecb ._dege );
|
|
|
|
|
};return _ag .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_bffb .PdfRectangle ,_eeff ,len (_bffb ._acee ),_geac (_bffb .text (),50));};
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// String returns a description of `l`.
|
|
|
|
|
func (_ggdf *textLine )String ()string {return _ag .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_ggdf ._dffg ,_ggdf .PdfRectangle ,_ggdf ._cfa ,_ggdf .text ());
|
|
|
|
|
};func _dgcc (_gge _cc .Matrix )_cc .Point {_bcf ,_dce :=_gge .Translation ();return _cc .Point {X :_bcf ,Y :_dce };};func _ebfdg (_deff ,_beed _cc .Point )rulingKind {_egba :=_gc .Abs (_deff .X -_beed .X );_ceegd :=_gc .Abs (_deff .Y -_beed .Y );return _cgee (_egba ,_ceegd ,_eccg );
|
|
|
|
|
};func (_cbee paraList )findTableGrid (_fgca gridTiling )(*textTable ,map[*textPara ]struct{}){_dfcca :=len (_fgca ._bddgg );_bbgcc :=len (_fgca ._dcaa );_bdcgg :=textTable {_aebf :true ,_eaed :_dfcca ,_dege :_bbgcc ,_gebab :make (map[uint64 ]*textPara ,_dfcca *_bbgcc ),_agbbc :make (map[uint64 ]compositeCell ,_dfcca *_bbgcc )};
|
|
|
|
|
_dgca :=make (map[*textPara ]struct{});_edab :=int ((1.0-_gafe )*float64 (_dfcca *_bbgcc ));_addeg :=0;if _dgf {_dc .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_dfcca ,_bbgcc );
|
|
|
|
|
};for _bbea ,_deceg :=range _fgca ._dcaa {_bbacd ,_ebfdf :=_fgca ._ecag [_deceg ];if !_ebfdf {continue ;};for _gdafg ,_abfddf :=range _fgca ._bddgg {_eebg ,_bgea :=_bbacd [_abfddf ];if !_bgea {continue ;};_dfda :=_cbee .inTile (_eebg );if len (_dfda )==0{_addeg ++;
|
|
|
|
|
if _addeg > _edab {if _dgf {_dc .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_addeg );};return nil ,nil ;};}else {_bdcgg .putComposite (_gdafg ,_bbea ,_dfda ,_eebg .PdfRectangle );for _ ,_aabfe :=range _dfda {_dgca [_aabfe ]=struct{}{};
|
|
|
|
|
};};};};_fecc :=0;for _daab :=0;_daab < _dfcca ;_daab ++{_gdfe :=_bdcgg .get (_daab ,0);if _gdfe ==nil ||!_gdfe ._gfcac {_fecc ++;};};if _fecc ==0{if _dgf {_dc .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};return nil ,nil ;
|
|
|
|
|
};_dafe :=_bdcgg .reduceTiling (_fgca ,_bdcb );_dafe =_dafe .subdivide ();return _dafe ,_dgca ;};const (_bddg =false ;_decf =false ;_fdc =false ;_dfde =false ;_dfgc =false ;_cgdb =false ;_ccef =false ;_cdgb =false ;_aacab =false ;_ebdc =_aacab &&true ;
|
|
|
|
|
_bafa =_ebdc &&false ;_bdgd =_aacab &&true ;_gaa =false ;_edad =_gaa &&false ;_cgf =_gaa &&true ;_ebgeg =false ;_dbag =_ebgeg &&false ;_efea =_ebgeg &&false ;_dgf =_ebgeg &&true ;_dfdeg =_ebgeg &&false ;_cbba =_ebgeg &&false ;);
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// String returns a string describing `ma`.
|
|
|
|
|
func (_ebcb TextMarkArray )String ()string {_adb :=len (_ebcb ._bcda );if _adb ==0{return "\u0045\u004d\u0050T\u0059";};_dbdb :=_ebcb ._bcda [0];_fba :=_ebcb ._bcda [_adb -1];return _ag .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_adb ,_dbdb ,_fba );
|
|
|
|
|
};func _gfgcd (_agce []TextMark ,_cedd *int ,_fedfd TextMark )[]TextMark {_fedfd .Offset =*_cedd ;_agce =append (_agce ,_fedfd );*_cedd +=len (_fedfd .Text );return _agce ;};type subpath struct{_effg []_cc .Point ;_gbdf bool ;};
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
|
|
|
|
|
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
|
|
|
|
|
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
|
|
|
|
|
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
|
|
|
|
|
type RenderMode int ;var (_gdg =_g .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072");_ef =_g .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072"););func _caeg (_dbdc float64 ,_dcd int )int {if _dcd ==0{_dcd =1;
|
|
|
|
|
};_fega :=float64 (_dcd );return int (_gc .Round (_dbdc /_fega )*_fega );};func (_aecfc rulingList )toTilings ()(rulingList ,[]gridTiling ){_aecfc .log ("\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s");if len (_aecfc )==0{return nil ,nil ;};_aecfc =_aecfc .tidied ("\u0061\u006c\u006c");
|
|
|
|
|
_aecfc .log ("\u0074\u0069\u0064\u0069\u0065\u0064");_cccgg :=_aecfc .toGrids ();_cgcfe :=make ([]gridTiling ,len (_cccgg ));for _agaca ,_bbac :=range _cccgg {_cgcfe [_agaca ]=_bbac .asTiling ();};return _aecfc ,_cgcfe ;};func _eege (_agacc ,_ecbc bounded )float64 {return _bbec (_agacc )-_bbec (_ecbc )};
|
|
|
|
|
func (_egg *imageExtractContext )extractXObjectImage (_fa *_be .PdfObjectName ,_efd _dg .GraphicsState ,_abc *_ac .PdfPageResources )error {_bdf ,_ :=_abc .GetXObjectByName (*_fa );if _bdf ==nil {return nil ;};_fbe ,_gef :=_egg ._dge [_bdf ];if !_gef {_dgc ,_fee :=_abc .GetXObjectImageByName (*_fa );
|
|
|
|
|
if _fee !=nil {return _fee ;};if _dgc ==nil {return nil ;};_aba ,_fee :=_dgc .ToImage ();if _fee !=nil {return _fee ;};_fbe =&cachedImage {_ca :_aba ,_fg :_dgc .ColorSpace };_egg ._dge [_bdf ]=_fbe ;};_gg :=_fbe ._ca ;_agf :=_fbe ._fg ;_ebc ,_fae :=_agf .ImageToRGB (*_gg );
|
|
|
|
|
if _fae !=nil {return _fae ;};_dc .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_efd .CTM .String ());_dgde :=ImageMark {Image :&_ebc ,Width :_efd .CTM .ScalingFactorX (),Height :_efd .CTM .ScalingFactorY (),Angle :_efd .CTM .Angle ()};
|
|
|
|
|
_dgde .X ,_dgde .Y =_efd .CTM .Translation ();_egg ._ba =append (_egg ._ba ,_dgde );_egg ._age ++;return nil ;};func _def (_bggc ,_ebb _ac .PdfRectangle )_ac .PdfRectangle {return _ac .PdfRectangle {Llx :_gc .Min (_bggc .Llx ,_ebb .Llx ),Lly :_gc .Min (_bggc .Lly ,_ebb .Lly ),Urx :_gc .Max (_bggc .Urx ,_ebb .Urx ),Ury :_gc .Max (_bggc .Ury ,_ebb .Ury )};
|
|
|
|
|
};func _bgdb (_gccb ,_fbdb _cc .Point )bool {return _gccb .X ==_fbdb .X &&_gccb .Y ==_fbdb .Y };func (_egaec *textPara )fontsize ()float64 {return _egaec ._acee [0]._cfa };const (_dfgf =true ;_bce =true ;_gaae =true ;_bddb =false ;_bbdc =false ;_efadb =6;
|
|
|
|
|
_fgdg =3.0;_fabc =200;_eaaa =true ;_adggd =true ;_gada =true ;_defe =true ;_gbe =false ;);const (_ceda =1.0e-6;_ddee =1.0e-4;_fcfc =10;_bgca =6;_dbgb =0.5;_cdffc =0.12;_cccc =0.19;_gbda =0.04;_befff =0.04;_gdgd =1.0;_egdc =0.04;_eaea =0.4;_daca =0.7;_dccb =1.0;
|
|
|
|
|
_gdb =0.1;_aece =1.4;_gfgdd =0.46;_adbfe =0.02;_fdegb =0.2;_eddag =0.5;_cbe =4;_baad =4.0;_ecdb =6;_gafe =0.3;_ggge =0.01;_ebfbc =0.02;_cega =2;_aefba =2;_febab =500;_adge =4.0;_bgcc =4.0;_eccg =0.05;_efdga =0.1;_eccge =2.0;_gfaff =2.0;_egga =1.5;_bdcb =3.0;
|
|
|
|
|
_aecb =0.25;);func (_ddfe *ruling )alignsPrimary (_effc *ruling )bool {return _ddfe ._ddca ==_effc ._ddca &&_gc .Abs (_ddfe ._dddg -_effc ._dddg )< _gfaff *0.5;};func (_dggb *stateStack )top ()*textState {if _dggb .empty (){return nil ;};return (*_dggb )[_dggb .size ()-1];
|
|
|
|
|
};const _efc =20;func _dcbfe (_aggdcc []*textMark ,_bcfec _ac .PdfRectangle )*textWord {_cfaa :=_aggdcc [0].PdfRectangle ;_adbbg :=_aggdcc [0]._adca ;for _ ,_bgaf :=range _aggdcc [1:]{_cfaa =_def (_cfaa ,_bgaf .PdfRectangle );if _bgaf ._adca > _adbbg {_adbbg =_bgaf ._adca ;
|
|
|
|
|
};};return &textWord {PdfRectangle :_cfaa ,_fddf :_aggdcc ,_cfba :_bcfec .Ury -_cfaa .Lly ,_fbdg :_adbbg };};
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// ExtractPageImages returns the image contents of the page extractor, including data
|
|
|
|
|
// and position, size information for each image.
|
|
|
|
|
// A set of options to control page image extraction can be passed in. The options
|
|
|
|
|
// parameter can be nil for the default options. By default, inline stencil masks
|
|
|
|
|
// are not extracted.
|
|
|
|
|
func (_ce *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_ffd :=&imageExtractContext {_gab :options };_cec :=_ffd .extractContentStreamImages (_ce ._eg ,_ce ._ff );if _cec !=nil {return nil ,_cec ;};return &PageImages {Images :_ffd ._ba },nil ;
|
|
|
|
|
};func _dbde (_aaf *Extractor ,_aafe *_ac .PdfPageResources ,_ecfb _dg .GraphicsState ,_deca *textState ,_feab *stateStack )*textObject {return &textObject {_fffc :_aaf ,_edf :_aafe ,_dcb :_ecfb ,_ddbf :_feab ,_cebc :_deca ,_edfe :_cc .IdentityMatrix (),_dca :_cc .IdentityMatrix ()};
|
|
|
|
|
};func (_gegg *subpath )makeRectRuling (_geaac _gca .Color )(*ruling ,bool ){if _dfdeg {_dc .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_gegg );};_ceacc :=_gegg ._effg [:4];
|
|
|
|
|
_cefc :=make (map[int ]rulingKind ,len (_ceacc ));for _cedadg ,_fabac :=range _ceacc {_aacg :=_gegg ._effg [(_cedadg +1)%4];_cefc [_cedadg ]=_ebfdg (_fabac ,_aacg );if _dfdeg {_ag .Printf ("\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066",_cedadg ,_cefc [_cedadg ],_fabac ,_aacg );
|
|
|
|
|
};};if _dfdeg {_ag .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_cefc );};var _cfgde ,_fbbd []int ;for _fagf ,_bdadg :=range _cefc {switch _bdadg {case _gega :_fbbd =append (_fbbd ,_fagf );case _eace :_cfgde =append (_cfgde ,_fagf );
|
|
|
|
|
};};if _dfdeg {_ag .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_fbbd ),_fbbd );_ag .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_cfgde ),_cfgde );
|
|
|
|
|
};_gbaed :=(len (_fbbd )==2&&len (_cfgde )==2)||(len (_fbbd )==2&&len (_cfgde )==0&&_ebdaf (_ceacc [_fbbd [0]],_ceacc [_fbbd [1]]))||(len (_cfgde )==2&&len (_fbbd )==0&&_gbcea (_ceacc [_cfgde [0]],_ceacc [_cfgde [1]]));if _dfdeg {_ag .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_fbbd ),len (_cfgde ),_gbaed );
|
|
|
|
|
};if !_gbaed {if _dfdeg {_dc .Log .Error ("\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v",_gegg );_ag .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_fbbd ),len (_cfgde ),_gbaed );
|
|
|
|
|
};return &ruling {},false ;};if len (_cfgde )==0{for _aecca ,_febc :=range _cefc {if _febc !=_gega {_cfgde =append (_cfgde ,_aecca );};};};if len (_fbbd )==0{for _dgcca ,_aafg :=range _cefc {if _aafg !=_eace {_fbbd =append (_fbbd ,_dgcca );};};};if _dfdeg {_dc .Log .Info ("\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_fbbd ),len (_cfgde ),len (_ceacc ),_fbbd ,_cfgde ,_ceacc );
|
|
|
|
|
};var _bdbf ,_dcab ,_ccbgb ,_dfga _cc .Point ;if _ceacc [_fbbd [0]].Y > _ceacc [_fbbd [1]].Y {_ccbgb ,_dfga =_ceacc [_fbbd [0]],_ceacc [_fbbd [1]];}else {_ccbgb ,_dfga =_ceacc [_fbbd [1]],_ceacc [_fbbd [0]];};if _ceacc [_cfgde [0]].X > _ceacc [_cfgde [1]].X {_bdbf ,_dcab =_ceacc [_cfgde [0]],_ceacc [_cfgde [1]];
|
|
|
|
|
}else {_bdbf ,_dcab =_ceacc [_cfgde [1]],_ceacc [_cfgde [0]];};_cgff :=_ac .PdfRectangle {Llx :_bdbf .X ,Urx :_dcab .X ,Lly :_dfga .Y ,Ury :_ccbgb .Y };if _cgff .Llx > _cgff .Urx {_cgff .Llx ,_cgff .Urx =_cgff .Urx ,_cgff .Llx ;};if _cgff .Lly > _cgff .Ury {_cgff .Lly ,_cgff .Ury =_cgff .Ury ,_cgff .Lly ;
|
|
|
|
|
};_aceeg :=rectRuling {PdfRectangle :_cgff ,_befg :_fbce (_cgff ),Color :_geaac };if _aceeg ._befg ==_cefd {if _dfdeg {_dc .Log .Error ("\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c");
|
|
|
|
|
};return nil ,false ;};_cfeg ,_bffa :=_aceeg .asRuling ();if !_bffa {if _dfdeg {_dc .Log .Error ("\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg");};return nil ,false ;};if _ebgeg {_ag .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_cfeg .String ());
|
|
|
|
|
};return _cfeg ,true ;};func (_fbfe *textLine )text ()string {var _gdga []string ;for _ ,_ebeb :=range _fbfe ._acec {if _ebeb ._bagec {_gdga =append (_gdga ,"\u0020");};_gdga =append (_gdga ,_ebeb ._ggbbf );};return _a .Join (_gdga ,"");};type textTable struct{_ac .PdfRectangle ;
|
|
|
|
|
_eaed ,_dege int ;_aebf bool ;_gebab map[uint64 ]*textPara ;_agbbc map[uint64 ]compositeCell ;};func (_ggdc *textLine )appendWord (_aegg *textWord ){_ggdc ._acec =append (_ggdc ._acec ,_aegg );_ggdc .PdfRectangle =_def (_ggdc .PdfRectangle ,_aegg .PdfRectangle );
|
|
|
|
|
if _aegg ._fbdg > _ggdc ._cfa {_ggdc ._cfa =_aegg ._fbdg ;};if _aegg ._cfba > _ggdc ._dffg {_ggdc ._dffg =_aegg ._cfba ;};};func (_cbac compositeCell )parasBBox ()(paraList ,_ac .PdfRectangle ){return _cbac .paraList ,_cbac .PdfRectangle ;};func (_dfcfad *ruling )gridIntersecting (_bfea *ruling )bool {return _eeca (_dfcfad ._daa ,_bfea ._daa )&&_eeca (_dfcfad ._defb ,_bfea ._defb );
|
|
|
|
|
};func (_fcefe *textTable )isExportable ()bool {if _fcefe ._aebf {return true ;};_bcgbd :=func (_bdef int )bool {_adbaa :=_fcefe .get (0,_bdef );if _adbaa ==nil {return false ;};_gfba :=_adbaa .text ();_eabgd :=_d .RuneCountInString (_gfba );_gbea :=_babg .MatchString (_gfba );
|
|
|
|
|
return _eabgd <=1||_gbea ;};for _bafc :=0;_bafc < _fcefe ._dege ;_bafc ++{if !_bcgbd (_bafc ){return true ;};};return false ;};type gridTiling struct{_ac .PdfRectangle ;_bddgg []float64 ;_dcaa []float64 ;_ecag map[float64 ]map[float64 ]gridTile ;};func _bdbe (_fccf _ac .PdfRectangle )*ruling {return &ruling {_ddca :_gega ,_dddg :_fccf .Lly ,_daa :_fccf .Llx ,_defb :_fccf .Urx };
|
2021-03-23 23:12:52 +00:00
|
|
|
|
};
|
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// PageText represents the layout of text on a device page.
|
|
|
|
|
type PageText struct{_ccgd []*textMark ;_ebgd string ;_eabf []TextMark ;_ffdb []TextTable ;_ace _ac .PdfRectangle ;_dedbd []pathSection ;_fbfb []pathSection ;};
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// String returns a description of `tm`.
|
|
|
|
|
func (_bfec *textMark )String ()string {return _ag .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_bfec .PdfRectangle ,_bfec ._adca ,_bfec ._bfccc );};func (_edebb *subpath )clear (){*_edebb =subpath {}};
|
|
|
|
|
func (_deac paraList )extractTables (_cfce []gridTiling )paraList {if _gaa {_dc .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_deac ));
|
|
|
|
|
};if len (_deac )< _ecdb {return _deac ;};_abgeb :=_deac .findTables (_cfce );if _gaa {_dc .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_abgeb ));
|
|
|
|
|
for _ffebd ,_ffga :=range _abgeb {_ffga .log (_ag .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_ffebd ));};};return _deac .applyTables (_abgeb );};func _cgec (_dgbc ,_ffb bounded )float64 {return _dgbc .bbox ().Llx -_ffb .bbox ().Urx };
|
|
|
|
|
func _gbcf (_cdbee map[float64 ]map[float64 ]gridTile )[]float64 {_deed :=make ([]float64 ,0,len (_cdbee ));for _bcdbd :=range _cdbee {_deed =append (_deed ,_bcdbd );};_e .Float64s (_deed );_fgee :=len (_deed );for _bced :=0;_bced < _fgee /2;_bced ++{_deed [_bced ],_deed [_fgee -1-_bced ]=_deed [_fgee -1-_bced ],_deed [_bced ];
|
|
|
|
|
};return _deed ;};func (_ccdd *textTable )bbox ()_ac .PdfRectangle {return _ccdd .PdfRectangle };func _edbd (_gddg []pathSection )rulingList {_becg (_gddg );if _ebgeg {_dc .Log .Info ("\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs",len (_gddg ));
|
|
|
|
|
};var _aeccg rulingList ;for _ ,_aeaed :=range _gddg {for _ ,_gfaa :=range _aeaed ._feaa {if !_gfaa .isQuadrilateral (){if _ebgeg {_dc .Log .Error ("!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073",_gfaa );
|
|
|
|
|
};continue ;};if _aabb ,_gebd :=_gfaa .makeRectRuling (_aeaed .Color );_gebd {_aeccg =append (_aeccg ,_aabb );}else {if _dfdeg {_dc .Log .Error ("\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073",_gfaa );
|
|
|
|
|
};};};};if _ebgeg {_dc .Log .Info ("\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073",_aeccg .String ());};return _aeccg ;};func _fdaa (_adcg map[int ][]float64 ){if len (_adcg )<=1{return ;};_acbe :=_fbcb (_adcg );
|
|
|
|
|
if _gaa {_dc .Log .Info ("\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076",_acbe );};var _aebff ,_cgcd int ;for _aebff ,_cgcd =range _acbe {if _adcg [_cgcd ]!=nil {break ;};};for _ggee ,_ababc :=range _acbe [_aebff :]{_abfg :=_adcg [_ababc ];
|
|
|
|
|
if _abfg ==nil {continue ;};if _gaa {_ag .Printf ("\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a",_aebff +_ggee ,_cgcd ,_ababc );};_ggfbce :=_adcg [_ababc ];if _ggfbce [len (_ggfbce )-1]> _abfg [0]{_ggfbce [len (_ggfbce )-1]=_abfg [0];
|
|
|
|
|
_adcg [_cgcd ]=_ggfbce ;};_cgcd =_ababc ;};};func (_fge *textObject )getStrokeColor ()_gca .Color {return _cgcgg (_fge ._dcb .ColorspaceStroking ,_fge ._dcb .ColorStroking );};
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// String returns a human readable description of `vecs`.
|
|
|
|
|
func (_bffagb rulingList )String ()string {if len (_bffagb )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_efffg ,_cddg :=_bffagb .vertsHorzs ();_ggfcd :=len (_efffg );_badf :=len (_cddg );if _ggfcd ==0||_badf ==0{return _ag .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_ggfcd ,_badf );
|
|
|
|
|
};_bcdc :=_ac .PdfRectangle {Llx :_efffg [0]._dddg ,Urx :_efffg [_ggfcd -1]._dddg ,Lly :_cddg [_badf -1]._dddg ,Ury :_cddg [0]._dddg };return _ag .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_ggfcd ,_badf ,_bcdc );
|
|
|
|
|
};func (_dcfga *textWord )addDiacritic (_cbga string ){_gacbd :=_dcfga ._fddf [len (_dcfga ._fddf )-1];_gacbd ._bfccc +=_cbga ;_gacbd ._bfccc =_gf .NFKC .String (_gacbd ._bfccc );};func (_eab *textObject )setFont (_fff string ,_aeca float64 )error {if _eab ==nil {return nil ;
|
|
|
|
|
};_eab ._cebc ._gfg =_aeca ;_ded ,_abaf :=_eab .getFont (_fff );if _abaf !=nil {return _abaf ;};_eab ._cebc ._ecc =_ded ;if _eab ._ddbf .empty (){_eab ._ddbf .push (_eab ._cebc );}else {_eab ._ddbf .top ()._ecc =_eab ._cebc ._ecc ;};return nil ;};func _aecag (_cfff string ,_agfdg []rulingList ){_dc .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_agfdg ),_cfff );
|
|
|
|
|
for _ebgc ,_ffcf :=range _agfdg {_ag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ebgc ,_ffcf .String ());};};
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// String returns a description of `k`.
|
|
|
|
|
func (_degbf rulingKind )String ()string {_ddfd ,_gaca :=_adfd [_degbf ];if !_gaca {return _ag .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_degbf );};return _ddfd ;};func (_efbge *textTable )markCells (){for _efdfb :=0;
|
|
|
|
|
_efdfb < _efbge ._dege ;_efdfb ++{for _bbade :=0;_bbade < _efbge ._eaed ;_bbade ++{_gdedc :=_efbge .get (_bbade ,_efdfb );if _gdedc !=nil {_gdedc ._baba =true ;};};};};func _fbce (_bbgd _ac .PdfRectangle )rulingKind {_fcef :=_bbgd .Width ();_acgcf :=_bbgd .Height ();
|
|
|
|
|
if _fcef > _acgcf {if _fcef >=_adge {return _gega ;};}else {if _acgcf >=_adge {return _eace ;};};return _cefd ;};func _fggbb (_gbdbc []*textWord ,_eefb *textWord )[]*textWord {for _daeed ,_effec :=range _gbdbc {if _effec ==_eefb {return _aacdg (_gbdbc ,_daeed );
|
|
|
|
|
};};_dc .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_eefb );
|
|
|
|
|
return nil ;};
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// String returns a description of `state`.
|
|
|
|
|
func (_dggbe *textState )String ()string {_ece :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _dggbe ._ecc !=nil {_ece =_dggbe ._ecc .BaseFont ();};return _ag .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_dggbe ._cge ,_dggbe ._bccba ,_dggbe ._gfg ,_ece );
|
|
|
|
|
};func (_fdegc *textLine )pullWord (_debec *wordBag ,_ecda *textWord ,_adgef int ){_fdegc .appendWord (_ecda );_debec .removeWord (_ecda ,_adgef );};func (_edde *textObject )moveLP (_agca ,_efg float64 ){_edde ._dca .Concat (_cc .NewMatrix (1,0,0,1,_agca ,_efg ));
|
|
|
|
|
_edde ._edfe =_edde ._dca ;};func _cbd (_caab *_dg .ContentStreamOperation )(float64 ,error ){if len (_caab .Params )!=1{_aca :=_g .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");
|
|
|
|
|
_dc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_caab .Operand ,1,len (_caab .Params ),_caab .Params );
|
|
|
|
|
return 0.0,_aca ;};return _be .GetNumberAsFloat (_caab .Params [0]);};func (_gaf *subpath )close (){if !_bgdb (_gaf ._effg [0],_gaf .last ()){_gaf .add (_gaf ._effg [0]);};_gaf ._gbdf =true ;_gaf .removeDuplicates ();};func (_bfccb *textTable )compositeColCorridors ()map[int ][]float64 {_geagd :=make (map[int ][]float64 ,_bfccb ._eaed );
|
|
|
|
|
if _gaa {_dc .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_bfccb ._eaed );};for _ddbg :=0;_ddbg < _bfccb ._eaed ;_ddbg ++{_geagd [_ddbg ]=nil ;
|
|
|
|
|
};return _geagd ;};func (_gdab *textObject )showText (_eddf []byte )error {return _gdab .renderText (_eddf )};
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// String returns a string describing `pt`.
|
|
|
|
|
func (_acab PageText )String ()string {_ggfc :=_ag .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_acab ._ccgd ));_gff :=[]string {"\u002d"+_ggfc };for _ ,_efga :=range _acab ._ccgd {_gff =append (_gff ,_efga .String ());
|
|
|
|
|
};_gff =append (_gff ,"\u002b"+_ggfc );return _a .Join (_gff ,"\u000a");};func (_dcbe *textObject )renderText (_bfcb []byte )error {if _dcbe ._acg {_dc .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");
|
|
|
|
|
return nil ;};_fged :=_dcbe .getCurrentFont ();_bfdg :=_fged .BytesToCharcodes (_bfcb );_fedb ,_dbdf ,_dabd :=_fged .CharcodesToStrings (_bfdg );if _dabd > 0{_dc .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_dbdf ,_dabd );
|
|
|
|
|
};_dcbe ._cebc ._acc +=_dbdf ;_dcbe ._cebc ._dcge +=_dabd ;_ebfd :=_dcbe ._cebc ;_gdf :=_ebfd ._gfg ;_cacb :=_ebfd ._bfd /100.0;_gbcc :=_fgb ;if _fged .Subtype ()=="\u0054\u0079\u0070e\u0033"{_gbcc =1;};_cfd ,_ebff :=_fged .GetRuneMetrics (' ');if !_ebff {_cfd ,_ebff =_fged .GetCharMetrics (32);
|
|
|
|
|
};if !_ebff {_cfd ,_ =_ac .DefaultFont ().GetRuneMetrics (' ');};_cebf :=_cfd .Wx *_gbcc ;_dc .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_cebf ,_fedb ,_fged ,_gdf );
|
|
|
|
|
_ccg :=_cc .NewMatrix (_gdf *_cacb ,0,0,_gdf ,0,_ebfd ._bgdc );if _cgdb {_dc .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_bfdg ),_bfdg ,_fedb );
|
|
|
|
|
};_dc .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_bfdg ),_bfdg ,len (_fedb ));_gbd :=_dcbe .getFillColor ();
|
|
|
|
|
_ecbb :=_dcbe .getStrokeColor ();for _deef ,_ccf :=range _fedb {_bagea :=[]rune (_ccf );if len (_bagea )==1&&_bagea [0]=='\x00'{continue ;};_aeae :=_bfdg [_deef ];_fbeg :=_dcbe ._dcb .CTM .Mult (_dcbe ._edfe ).Mult (_ccg );_ccd :=0.0;if len (_bagea )==1&&_bagea [0]==32{_ccd =_ebfd ._bccba ;
|
|
|
|
|
};_dgag ,_cgb :=_fged .GetCharMetrics (_aeae );if !_cgb {_dc .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_aeae ,_bagea ,_bagea ,_fged );
|
|
|
|
|
return _ag .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_fged .String (),_aeae );};_eac :=_cc .Point {X :_dgag .Wx *_gbcc ,Y :_dgag .Wy *_gbcc };
|
|
|
|
|
_ccca :=_cc .Point {X :(_eac .X *_gdf +_ccd )*_cacb };_bge :=_cc .Point {X :(_eac .X *_gdf +_ebfd ._cge +_ccd )*_cacb };if _cgdb {_dc .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_gdf ,_ebfd ._cge ,_ebfd ._bccba ,_cacb );
|
|
|
|
|
_dc .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_eac ,_ccca ,_bge );};_gacb :=_dbaa (_ccca );_efdg :=_dbaa (_bge );_cce :=_dcbe ._dcb .CTM .Mult (_dcbe ._edfe ).Mult (_gacb );
|
|
|
|
|
if _dfde {_dc .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_dcbe ._dcb .CTM ,_dcbe ._edfe ,_efdg ,_dgcc (_dcbe ._dcb .CTM .Mult (_dcbe ._edfe ).Mult (_efdg )),_gacb ,_cce ,_dgcc (_cce ));
|
|
|
|
|
};_ccee ,_gggg :=_dcbe .newTextMark (_bg .ExpandLigatures (_bagea ),_fbeg ,_dgcc (_cce ),_gc .Abs (_cebf *_fbeg .ScalingFactorX ()),_fged ,_dcbe ._cebc ._cge ,_gbd ,_ecbb );if !_gggg {_dc .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067");
|
|
|
|
|
continue ;};if _fged ==nil {_dc .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _fged .Encoder ()==nil {_dc .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_fged );
|
|
|
|
|
}else {if _ebd ,_eef :=_fged .Encoder ().CharcodeToRune (_aeae );_eef {_ccee ._gdfbd =string (_ebd );};};_dc .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_deef ,_aeae ,_ccee ,_fbeg );
|
|
|
|
|
_dcbe ._fef =append (_dcbe ._fef ,&_ccee );_dcbe ._edfe .Concat (_efdg );};return nil ;};
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// TableCell is a cell in a TextTable.
|
|
|
|
|
type TableCell struct{
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// Text is the extracted text.
|
|
|
|
|
Text string ;
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// Marks returns the TextMarks corresponding to the text in Text.
|
|
|
|
|
Marks TextMarkArray ;};func (_bbada *wordBag )removeWord (_fggb *textWord ,_cgdd int ){_cbcg :=_bbada ._bed [_cgdd ];_cbcg =_fggbb (_cbcg ,_fggb );if len (_cbcg )==0{delete (_bbada ._bed ,_cgdd );}else {_bbada ._bed [_cgdd ]=_cbcg ;};};func (_gfgg *wordBag )depthBand (_cdc ,_ddad float64 )[]int {if len (_gfgg ._bed )==0{return nil ;
|
|
|
|
|
};return _gfgg .depthRange (_gfgg .getDepthIdx (_cdc ),_gfgg .getDepthIdx (_ddad ));};func (_eabff lineRuling )yMean ()float64 {return 0.5*(_eabff ._cda .Y +_eabff ._aeff .Y )};func (_eddfb gridTile )complete ()bool {return _eddfb .numBorders ()==4};func (_deeg *textTable )compositeRowCorridors ()map[int ][]float64 {_fggcb :=make (map[int ][]float64 ,_deeg ._dege );
|
|
|
|
|
if _gaa {_dc .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_deeg ._dege );};for _ebfe :=1;_ebfe < _deeg ._dege ;_ebfe ++{var _gfdc []compositeCell ;
|
|
|
|
|
for _eadb :=0;_eadb < _deeg ._eaed ;_eadb ++{if _cbca ,_eba :=_deeg ._agbbc [_aeed (_eadb ,_ebfe )];_eba {_gfdc =append (_gfdc ,_cbca );};};if len (_gfdc )==0{continue ;};_gffg :=_fcfgg (_gfdc );_fggcb [_ebfe ]=_gffg ;if _gaa {_ag .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_ebfe ,_gffg );
|
|
|
|
|
};};return _fggcb ;};func (_cd *imageExtractContext )extractInlineImage (_fgg *_dg .ContentStreamInlineImage ,_abb _dg .GraphicsState ,_abbb *_ac .PdfPageResources )error {_fbb ,_bf :=_fgg .ToImage (_abbb );if _bf !=nil {return _bf ;};_cef ,_bf :=_fgg .GetColorSpace (_abbb );
|
|
|
|
|
if _bf !=nil {return _bf ;};if _cef ==nil {_cef =_ac .NewPdfColorspaceDeviceGray ();};_bbe ,_bf :=_cef .ImageToRGB (*_fbb );if _bf !=nil {return _bf ;};_bd :=ImageMark {Image :&_bbe ,Width :_abb .CTM .ScalingFactorX (),Height :_abb .CTM .ScalingFactorY (),Angle :_abb .CTM .Angle ()};
|
|
|
|
|
_bd .X ,_bd .Y =_abb .CTM .Translation ();_cd ._ba =append (_cd ._ba ,_bd );_cd ._eb ++;return nil ;};
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// Extractor stores and offers functionality for extracting content from PDF pages.
|
|
|
|
|
type Extractor struct{_eg string ;_ff *_ac .PdfPageResources ;_cf _ac .PdfRectangle ;_cg map[string ]fontEntry ;_ee map[string ]textResult ;_fc int64 ;_bc int ;};
|
2021-03-23 23:12:52 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// ToTextMark returns the public view of `tm`.
|
|
|
|
|
func (_abff *textMark )ToTextMark ()TextMark {return TextMark {Text :_abff ._bfccc ,Original :_abff ._gdfbd ,BBox :_abff ._bgcb ,Font :_abff ._begb ,FontSize :_abff ._adca ,FillColor :_abff ._bccf ,StrokeColor :_abff ._fdf ,Orientation :_abff ._adfa };
|
2021-03-23 23:12:52 +00:00
|
|
|
|
};
|
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// PageImages represents extracted images on a PDF page with spatial information:
|
|
|
|
|
// display position and size.
|
|
|
|
|
type PageImages struct{Images []ImageMark ;};func (_debgc rulingList )aligned ()bool {if len (_debgc )< 2{return false ;};_bggb :=make (map[*ruling ]int );_bggb [_debgc [0]]=0;for _ ,_aecd :=range _debgc [1:]{_dcceg :=false ;for _abfb :=range _bggb {if _aecd .gridIntersecting (_abfb ){_bggb [_abfb ]++;
|
|
|
|
|
_dcceg =true ;break ;};};if !_dcceg {_bggb [_aecd ]=0;};};_gbdb :=0;for _ ,_ddcca :=range _bggb {if _ddcca ==0{_gbdb ++;};};_bfca :=float64 (_gbdb )/float64 (len (_debgc ));_bfcg :=_bfca <=1.0-_aecb ;if _ebgeg {_dc .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_bfcg ,_bfca ,_gbdb ,len (_debgc ),_debgc .String ());
|
|
|
|
|
};return _bfcg ;};func (_baca *wordBag )empty (_eecfg int )bool {_ ,_ddgd :=_baca ._bed [_eecfg ];return !_ddgd };func _dgdef (_aeffg _ac .PdfRectangle )*ruling {return &ruling {_ddca :_eace ,_dddg :_aeffg .Llx ,_daa :_aeffg .Lly ,_defb :_aeffg .Ury };
|
|
|
|
|
};func _dcbgd (_afgfc ,_bfef bounded )float64 {_eggc :=_dace (_afgfc ,_bfef );if !_dcbea (_eggc ){return _eggc ;};return _eege (_afgfc ,_bfef );};
|
2021-01-07 14:20:10 +00:00
|
|
|
|
|
|
|
|
|
// TextMark represents extracted text on a page with information regarding both textual content,
|
|
|
|
|
// formatting (font and size) and positioning.
|
|
|
|
|
// It is the smallest unit of text on a PDF page, typically a single character.
|
|
|
|
|
//
|
|
|
|
|
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
|
|
|
|
|
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
|
|
|
|
|
// `bbox` of substring `term` in `text`.
|
|
|
|
|
//
|
|
|
|
|
// ex, _ := New(page)
|
|
|
|
|
// // handle errors
|
|
|
|
|
// pageText, _, _, err := ex.ExtractPageText()
|
|
|
|
|
// // handle errors
|
|
|
|
|
// text := pageText.Text()
|
|
|
|
|
// textMarks := pageText.Marks()
|
|
|
|
|
//
|
|
|
|
|
// start := strings.Index(text, term)
|
|
|
|
|
// end := start + len(term)
|
|
|
|
|
// spanMarks, err := textMarks.RangeOffset(start, end)
|
|
|
|
|
// // handle errors
|
|
|
|
|
// bbox, ok := spanMarks.BBox()
|
|
|
|
|
// // handle errors
|
|
|
|
|
type TextMark struct{
|
2020-11-23 22:15:56 +00:00
|
|
|
|
|
2020-12-06 13:03:03 +00:00
|
|
|
|
// Text is the extracted text.
|
|
|
|
|
Text string ;
|
2020-11-23 22:15:56 +00:00
|
|
|
|
|
2021-01-07 14:20:10 +00:00
|
|
|
|
// Original is the text in the PDF. It has not been decoded like `Text`.
|
|
|
|
|
Original string ;
|
2020-11-23 22:15:56 +00:00
|
|
|
|
|
2021-01-07 14:20:10 +00:00
|
|
|
|
// BBox is the bounding box of the text.
|
2021-04-06 22:35:37 +00:00
|
|
|
|
BBox _ac .PdfRectangle ;
|
2021-01-07 14:20:10 +00:00
|
|
|
|
|
|
|
|
|
// Font is the font the text was drawn with.
|
2021-04-06 22:35:37 +00:00
|
|
|
|
Font *_ac .PdfFont ;
|
2021-01-07 14:20:10 +00:00
|
|
|
|
|
|
|
|
|
// FontSize is the font size the text was drawn with.
|
|
|
|
|
FontSize float64 ;
|
|
|
|
|
|
|
|
|
|
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
|
|
|
|
|
// text, textMarks := pageText.Text(), pageText.Marks()
|
|
|
|
|
// marks := textMarks.Elements()
|
|
|
|
|
// then marks[i].Offset is the offset of marks[i].Text in text.
|
|
|
|
|
Offset int ;
|
|
|
|
|
|
|
|
|
|
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
|
|
|
|
|
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
|
|
|
|
|
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
|
|
|
|
|
Meta bool ;
|
|
|
|
|
|
|
|
|
|
// FillColor is the fill color of the text.
|
|
|
|
|
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
|
2021-04-06 22:35:37 +00:00
|
|
|
|
FillColor _gca .Color ;
|
2021-01-07 14:20:10 +00:00
|
|
|
|
|
|
|
|
|
// StrokeColor is the stroke color of the text.
|
|
|
|
|
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
|
2021-04-06 22:35:37 +00:00
|
|
|
|
StrokeColor _gca .Color ;
|
2021-01-07 14:20:10 +00:00
|
|
|
|
|
|
|
|
|
// Orientation is the text orientation
|
2021-04-06 22:35:37 +00:00
|
|
|
|
Orientation int ;};func (_aebd *wordBag )firstWord (_edfee int )*textWord {return _aebd ._bed [_edfee ][0]};func (_eggad *textPara )depth ()float64 {if _eggad ._gfcac {return -1.0;};if len (_eggad ._acee )> 0{return _eggad ._acee [0]._dffg ;};return _eggad ._eecb .depth ();
|
|
|
|
|
};func (_acgfc rulingList )isActualGrid ()(rulingList ,bool ){_bbfb ,_cadg :=_acgfc .augmentGrid ();if !(len (_bbfb )>=_cega +1&&len (_cadg )>=_aefba +1){if _ebgeg {_dc .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_bbfb ),len (_cadg ),_cega +1,_aefba +1);
|
|
|
|
|
};return nil ,false ;};if _ebgeg {_dc .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_acgfc ,len (_bbfb )>=2,len (_cadg )>=2,len (_bbfb )>=2&&len (_cadg )>=2);
|
|
|
|
|
for _efagd ,_gbec :=range _acgfc {_ag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_efagd ,_gbec );};};if _gbe {_babcb ,_fade :=_bbfb [0],_bbfb [len (_bbfb )-1];_cacc ,_gfggd :=_cadg [0],_cadg [len (_cadg )-1];if !(_edge (_babcb ._dddg -_cacc ._daa )&&_edge (_fade ._dddg -_cacc ._defb )&&_edge (_cacc ._dddg -_babcb ._defb )&&_edge (_gfggd ._dddg -_babcb ._daa )){if _ebgeg {_dc .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_babcb ,_fade ,_cacc ,_gfggd );
|
|
|
|
|
};return nil ,false ;};}else {if !_bbfb .aligned (){if _efea {_dc .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_bbfb ));
|
|
|
|
|
};return nil ,false ;};if !_cadg .aligned (){if _ebgeg {_dc .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_cadg ));
|
|
|
|
|
};return nil ,false ;};};_beffa :=append (_bbfb ,_cadg ...);return _beffa ,true ;};func _abcd (_fadc ,_befc _ac .PdfRectangle )bool {return _gfde (_fadc ,_befc )&&_cfe (_fadc ,_befc )};func _eeafb (_gfdcf *PageText )error {_agade :=_ec .GetLicenseKey ();
|
|
|
|
|
if _agade !=nil &&_agade .IsLicensed ()||_ga {return nil ;};_ag .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");_ag .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");
|
|
|
|
|
return _g .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};func (_ae *imageExtractContext )processOperand (_bab *_dg .ContentStreamOperation ,_ea _dg .GraphicsState ,_dgd *_ac .PdfPageResources )error {if _bab .Operand =="\u0042\u0049"&&len (_bab .Params )==1{_dgg ,_fe :=_bab .Params [0].(*_dg .ContentStreamInlineImage );
|
|
|
|
|
if !_fe {return nil ;};if _de ,_cff :=_be .GetBoolVal (_dgg .ImageMask );_cff {if _de &&!_ae ._gab .IncludeInlineStencilMasks {return nil ;};};return _ae .extractInlineImage (_dgg ,_ea ,_dgd );}else if _bab .Operand =="\u0044\u006f"&&len (_bab .Params )==1{_dgae ,_df :=_be .GetName (_bab .Params [0]);
|
|
|
|
|
if !_df {_dc .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _gdg ;};_ ,_baa :=_dgd .GetXObjectByName (*_dgae );switch _baa {case _ac .XObjectTypeImage :return _ae .extractXObjectImage (_dgae ,_ea ,_dgd );case _ac .XObjectTypeForm :return _ae .extractFormImages (_dgae ,_ea ,_dgd );
|
|
|
|
|
};};return nil ;};func (_dbdec paraList )yNeighbours (_ffcgf float64 )map[*textPara ][]int {_bcdfg :=make ([]event ,2*len (_dbdec ));if _ffcgf ==0{for _gbeff ,_fcadgc :=range _dbdec {_bcdfg [2*_gbeff ]=event {_fcadgc .Lly ,true ,_gbeff };_bcdfg [2*_gbeff +1]=event {_fcadgc .Ury ,false ,_gbeff };
|
|
|
|
|
};}else {for _decfg ,_ebeaf :=range _dbdec {_bcdfg [2*_decfg ]=event {_ebeaf .Lly -_ffcgf *_ebeaf .fontsize (),true ,_decfg };_bcdfg [2*_decfg +1]=event {_ebeaf .Ury +_ffcgf *_ebeaf .fontsize (),false ,_decfg };};};return _dbdec .eventNeighbours (_bcdfg );
|
|
|
|
|
};func (_eabb rectRuling )checkWidth (_dcfg ,_bdad float64 )(float64 ,bool ){_cdcdb :=_bdad -_dcfg ;_bbadad :=_cdcdb <=_gfaff ;return _cdcdb ,_bbadad ;};func (_egef rulingList )intersections ()map[int ]intSet {var _dfcc ,_geda []int ;for _fcgf ,_gbbc :=range _egef {switch _gbbc ._ddca {case _eace :_dfcc =append (_dfcc ,_fcgf );
|
|
|
|
|
case _gega :_geda =append (_geda ,_fcgf );};};if len (_dfcc )< _cega +1||len (_geda )< _aefba +1{return nil ;};if len (_dfcc )+len (_geda )> _febab {_dc .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_egef ),len (_dfcc ),len (_geda ));
|
|
|
|
|
return nil ;};_agfd :=make (map[int ]intSet ,len (_dfcc )+len (_geda ));for _ ,_gbdaa :=range _dfcc {for _ ,_bfdd :=range _geda {if _egef [_gbdaa ].intersects (_egef [_bfdd ]){if _ ,_agad :=_agfd [_gbdaa ];!_agad {_agfd [_gbdaa ]=make (intSet );};if _ ,_fagg :=_agfd [_bfdd ];
|
|
|
|
|
!_fagg {_agfd [_bfdd ]=make (intSet );};_agfd [_gbdaa ].add (_bfdd );_agfd [_bfdd ].add (_gbdaa );};};};return _agfd ;};func (_bbfe *textLine )markWordBoundaries (){_gdce :=_adbfe *_bbfe ._cfa ;for _geaa ,_eeae :=range _bbfe ._acec [1:]{if _cgec (_eeae ,_bbfe ._acec [_geaa ])>=_gdce {_eeae ._bagec =true ;
|
|
|
|
|
};};};func (_baefb gridTile )contains (_aacaaf _ac .PdfRectangle )bool {if _baefb .numBorders ()< 3{return false ;};if _baefb ._daeb &&_aacaaf .Llx < _baefb .Llx -_egga {return false ;};if _baefb ._gdabd &&_aacaaf .Urx > _baefb .Urx +_egga {return false ;
|
|
|
|
|
};if _baefb ._dcced &&_aacaaf .Lly < _baefb .Lly -_egga {return false ;};if _baefb ._gfdb &&_aacaaf .Ury > _baefb .Ury +_egga {return false ;};return true ;};func (_dbbg paraList )readBefore (_degb []int ,_febb ,_ffbc int )bool {_bafg ,_aaea :=_dbbg [_febb ],_dbbg [_ffbc ];
|
|
|
|
|
if _cceef (_bafg ,_aaea )&&_bafg .Lly > _aaea .Lly {return true ;};if !(_bafg ._efgg .Urx < _aaea ._efgg .Llx ){return false ;};_ccdf ,_cgcf :=_bafg .Lly ,_aaea .Lly ;if _ccdf > _cgcf {_cgcf ,_ccdf =_ccdf ,_cgcf ;};_eedd :=_gc .Max (_bafg ._efgg .Llx ,_aaea ._efgg .Llx );
|
|
|
|
|
_dgdd :=_gc .Min (_bafg ._efgg .Urx ,_aaea ._efgg .Urx );_cgga :=_dbbg .llyRange (_degb ,_ccdf ,_cgcf );for _ ,_fgdfd :=range _cgga {if _fgdfd ==_febb ||_fgdfd ==_ffbc {continue ;};_cdffd :=_dbbg [_fgdfd ];if _cdffd ._efgg .Llx <=_dgdd &&_eedd <=_cdffd ._efgg .Urx {return false ;
|
|
|
|
|
};};return true ;};
|
2021-01-26 01:31:56 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// String returns a description of `v`.
|
|
|
|
|
func (_dbgc *ruling )String ()string {if _dbgc ._ddca ==_cefd {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_bfece ,_bffaf :="\u0078","\u0079";if _dbgc ._ddca ==_gega {_bfece ,_bffaf ="\u0079","\u0078";};_gfafb :="";if _dbgc ._bbag !=0.0{_gfafb =_ag .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_dbgc ._bbag );
|
|
|
|
|
};return _ag .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_dbgc ._ddca ,_bfece ,_dbgc ._dddg ,_bffaf ,_dbgc ._daa ,_dbgc ._defb ,_dbgc ._defb -_dbgc ._daa ,_dbgc ._egff ,_dbgc .Color ,_gfafb );
|
|
|
|
|
};func _fdcc (_egee string )bool {if _d .RuneCountInString (_egee )< _cbe {return false ;};_fcbd ,_aecba :=_d .DecodeLastRuneInString (_egee );if _aecba <=0||!_b .Is (_b .Hyphen ,_fcbd ){return false ;};_fcbd ,_aecba =_d .DecodeLastRuneInString (_egee [:len (_egee )-_aecba ]);
|
|
|
|
|
return _aecba > 0&&!_b .IsSpace (_fcbd );};func _bfcca (_cgbed string )bool {for _ ,_cgfbd :=range _cgbed {if !_b .IsSpace (_cgfbd ){return false ;};};return true ;};func (_efe *stateStack )push (_aef *textState ){_fgfd :=*_aef ;*_efe =append (*_efe ,&_fgfd )};
|
|
|
|
|
func (_dfgcg paraList )addNeighbours (){_beeg :=func (_ccbec []int ,_dafda *textPara )([]*textPara ,[]*textPara ){_fbgef :=make ([]*textPara ,0,len (_ccbec )-1);_cbged :=make ([]*textPara ,0,len (_ccbec )-1);for _ ,_daebd :=range _ccbec {_gecd :=_dfgcg [_daebd ];
|
|
|
|
|
if _gecd .Urx <=_dafda .Llx {_fbgef =append (_fbgef ,_gecd );}else if _gecd .Llx >=_dafda .Urx {_cbged =append (_cbged ,_gecd );};};return _fbgef ,_cbged ;};_ebged :=func (_dcgag []int ,_cacca *textPara )([]*textPara ,[]*textPara ){_affdd :=make ([]*textPara ,0,len (_dcgag )-1);
|
|
|
|
|
_ecfbe :=make ([]*textPara ,0,len (_dcgag )-1);for _ ,_fggbg :=range _dcgag {_ffaea :=_dfgcg [_fggbg ];if _ffaea .Ury <=_cacca .Lly {_ecfbe =append (_ecfbe ,_ffaea );}else if _ffaea .Lly >=_cacca .Ury {_affdd =append (_affdd ,_ffaea );};};return _affdd ,_ecfbe ;
|
|
|
|
|
};_aggdcf :=_dfgcg .yNeighbours (_ebfbc );for _ ,_ffccd :=range _dfgcg {_fgeb :=_aggdcf [_ffccd ];if len (_fgeb )==0{continue ;};_gfdg ,_abgd :=_beeg (_fgeb ,_ffccd );if len (_gfdg )==0&&len (_abgd )==0{continue ;};if len (_gfdg )> 0{_gdaff :=_gfdg [0];
|
|
|
|
|
for _ ,_ababcc :=range _gfdg [1:]{if _ababcc .Urx >=_gdaff .Urx {_gdaff =_ababcc ;};};for _ ,_adfafe :=range _gfdg {if _adfafe !=_gdaff &&_adfafe .Urx > _gdaff .Llx {_gdaff =nil ;break ;};};if _gdaff !=nil &&_cfe (_ffccd .PdfRectangle ,_gdaff .PdfRectangle ){_ffccd ._ageea =_gdaff ;
|
|
|
|
|
};};if len (_abgd )> 0{_befeb :=_abgd [0];for _ ,_bafbg :=range _abgd [1:]{if _bafbg .Llx <=_befeb .Llx {_befeb =_bafbg ;};};for _ ,_beac :=range _abgd {if _beac !=_befeb &&_beac .Llx < _befeb .Urx {_befeb =nil ;break ;};};if _befeb !=nil &&_cfe (_ffccd .PdfRectangle ,_befeb .PdfRectangle ){_ffccd ._dfbb =_befeb ;
|
|
|
|
|
};};};_aggdcf =_dfgcg .xNeighbours (_ggge );for _ ,_abgbd :=range _dfgcg {_dega :=_aggdcf [_abgbd ];if len (_dega )==0{continue ;};_cacbb ,_dffaa :=_ebged (_dega ,_abgbd );if len (_cacbb )==0&&len (_dffaa )==0{continue ;};if len (_dffaa )> 0{_cgcbg :=_dffaa [0];
|
|
|
|
|
for _ ,_ggcfg :=range _dffaa [1:]{if _ggcfg .Ury >=_cgcbg .Ury {_cgcbg =_ggcfg ;};};for _ ,_dcgdg :=range _dffaa {if _dcgdg !=_cgcbg &&_dcgdg .Ury > _cgcbg .Lly {_cgcbg =nil ;break ;};};if _cgcbg !=nil &&_gfde (_abgbd .PdfRectangle ,_cgcbg .PdfRectangle ){_abgbd ._ffaf =_cgcbg ;
|
|
|
|
|
};};if len (_cacbb )> 0{_fdba :=_cacbb [0];for _ ,_fcgc :=range _cacbb [1:]{if _fcgc .Lly <=_fdba .Lly {_fdba =_fcgc ;};};for _ ,_aaad :=range _cacbb {if _aaad !=_fdba &&_aaad .Lly < _fdba .Ury {_fdba =nil ;break ;};};if _fdba !=nil &&_gfde (_abgbd .PdfRectangle ,_fdba .PdfRectangle ){_abgbd ._dbcd =_fdba ;
|
|
|
|
|
};};};for _ ,_bcge :=range _dfgcg {if _bcge ._ageea !=nil &&_bcge ._ageea ._dfbb !=_bcge {_bcge ._ageea =nil ;};if _bcge ._dbcd !=nil &&_bcge ._dbcd ._ffaf !=_bcge {_bcge ._dbcd =nil ;};if _bcge ._dfbb !=nil &&_bcge ._dfbb ._ageea !=_bcge {_bcge ._dfbb =nil ;
|
|
|
|
|
};if _bcge ._ffaf !=nil &&_bcge ._ffaf ._dbcd !=_bcge {_bcge ._ffaf =nil ;};};};func _cfac (_gaedd *wordBag ,_dbb int )*textLine {_bbaa :=_gaedd .firstWord (_dbb );_dedcd :=textLine {PdfRectangle :_bbaa .PdfRectangle ,_cfa :_bbaa ._fbdg ,_dffg :_bbaa ._cfba };
|
|
|
|
|
_dedcd .pullWord (_gaedd ,_bbaa ,_dbb );return &_dedcd ;};func (_dee *textObject )moveText (_ffe ,_bae float64 ){_dee .moveLP (_ffe ,_bae )};func (_gfag *textTable )computeBbox ()_ac .PdfRectangle {var _aaeg _ac .PdfRectangle ;_deeb :=false ;for _acfaf :=0;
|
|
|
|
|
_acfaf < _gfag ._dege ;_acfaf ++{for _ggff :=0;_ggff < _gfag ._eaed ;_ggff ++{_bggcd :=_gfag .get (_ggff ,_acfaf );if _bggcd ==nil {continue ;};if !_deeb {_aaeg =_bggcd .PdfRectangle ;_deeb =true ;}else {_aaeg =_def (_aaeg ,_bggcd .PdfRectangle );};};};
|
|
|
|
|
return _aaeg ;};type rectRuling struct{_befg rulingKind ;_gcb markKind ;_gca .Color ;_ac .PdfRectangle ;};func (_effcg *textTable )logComposite (_cgdg string ){if !_gaa {return ;};_dc .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_effcg ._eaed ,_effcg ._dege ,_cgdg );
|
|
|
|
|
_ag .Printf ("\u0025\u0035\u0073 \u007c","");for _efdd :=0;_efdd < _effcg ._eaed ;_efdd ++{_ag .Printf ("\u0025\u0033\u0064 \u007c",_efdd );};_ag .Println ("");_ag .Printf ("\u0025\u0035\u0073 \u002b","");for _addf :=0;_addf < _effcg ._eaed ;_addf ++{_ag .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d");
|
|
|
|
|
};_ag .Println ("");for _gced :=0;_gced < _effcg ._dege ;_gced ++{_ag .Printf ("\u0025\u0035\u0064 \u007c",_gced );for _gcdb :=0;_gcdb < _effcg ._eaed ;_gcdb ++{_dgcbba ,_ :=_effcg ._agbbc [_aeed (_gcdb ,_gced )].parasBBox ();_ag .Printf ("\u0025\u0033\u0064 \u007c",len (_dgcbba ));
|
|
|
|
|
};_ag .Println ("");};_dc .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_effcg ._eaed ,_effcg ._dege ,_cgdg );_ag .Printf ("\u0025\u0035\u0073 \u007c","");for _dgfcc :=0;_dgfcc < _effcg ._eaed ;_dgfcc ++{_ag .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_dgfcc );
|
|
|
|
|
};_ag .Println ("");_ag .Printf ("\u0025\u0035\u0073 \u002b","");for _gcdc :=0;_gcdc < _effcg ._eaed ;_gcdc ++{_ag .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");};_ag .Println ("");for _gcbf :=0;_gcbf < _effcg ._dege ;
|
|
|
|
|
_gcbf ++{_ag .Printf ("\u0025\u0035\u0064 \u007c",_gcbf );for _edec :=0;_edec < _effcg ._eaed ;_edec ++{_bfdcc ,_ :=_effcg ._agbbc [_aeed (_edec ,_gcbf )].parasBBox ();_efagc :="";_fffa :=_bfdcc .merge ();if _fffa !=nil {_efagc =_fffa .text ();};_efagc =_ag .Sprintf ("\u0025\u0071",_geac (_efagc ,12));
|
|
|
|
|
_efagc =_efagc [1:len (_efagc )-1];_ag .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_efagc );};_ag .Println ("");};};const _fgb =1.0/1000.0;func (_cbdeg compositeCell )split (_ccbf ,_bgcd []float64 )*textTable {_aecc :=len (_ccbf )+1;_fbgcd :=len (_bgcd )+1;
|
|
|
|
|
if _gaa {_dc .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066",_fbgcd ,_aecc ,_cbdeg ,_ccbf ,_bgcd );
|
|
|
|
|
_ag .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a",len (_cbdeg .paraList ));for _fbbf ,_dafbf :=range _cbdeg .paraList {_ag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fbbf ,_dafbf .String ());
|
|
|
|
|
};_ag .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",len (_cbdeg .lines ()));for _bfgfe ,_afbe :=range _cbdeg .lines (){_ag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bfgfe ,_afbe );};};_ccbf =_aagb (_ccbf ,_cbdeg .Ury ,_cbdeg .Lly );
|
|
|
|
|
_bgcd =_aagb (_bgcd ,_cbdeg .Llx ,_cbdeg .Urx );_gagef :=make (map[uint64 ]*textPara ,_fbgcd *_aecc );_adfc :=textTable {_eaed :_fbgcd ,_dege :_aecc ,_gebab :_gagef };_ffff :=_cbdeg .paraList ;_e .Slice (_ffff ,func (_gfcf ,_dcdd int )bool {_cead ,_eggfg :=_ffff [_gfcf ],_ffff [_dcdd ];
|
|
|
|
|
_aaef ,_ddefa :=_cead .Lly ,_eggfg .Lly ;if _aaef !=_ddefa {return _aaef < _ddefa ;};return _cead .Llx < _eggfg .Llx ;});_faaed :=make (map[uint64 ]_ac .PdfRectangle ,_fbgcd *_aecc );for _cdfg ,_geag :=range _ccbf [1:]{_ebdca :=_ccbf [_cdfg ];for _ceab ,_bdag :=range _bgcd [1:]{_gddc :=_bgcd [_ceab ];
|
|
|
|
|
_faaed [_aeed (_ceab ,_cdfg )]=_ac .PdfRectangle {Llx :_gddc ,Urx :_bdag ,Lly :_geag ,Ury :_ebdca };};};if _gaa {_dc .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073");
|
|
|
|
|
_ag .Printf ("\u0020\u0020\u0020\u0020");for _abca :=0;_abca < _fbgcd ;_abca ++{_ag .Printf ("\u0025\u0033\u0030\u0064\u002c\u0020",_abca );};_ag .Println ();for _adfaf :=0;_adfaf < _aecc ;_adfaf ++{_ag .Printf ("\u0020\u0020\u0025\u0032\u0064\u003a",_adfaf );
|
|
|
|
|
for _gfeb :=0;_gfeb < _fbgcd ;_gfeb ++{_ag .Printf ("\u00256\u002e\u0032\u0066\u002c\u0020",_faaed [_aeed (_gfeb ,_adfaf )]);};_ag .Println ();};};_abda :=func (_cfeb *textLine )(int ,int ){for _agcg :=0;_agcg < _aecc ;_agcg ++{for _dbga :=0;_dbga < _fbgcd ;
|
|
|
|
|
_dbga ++{if _acb (_faaed [_aeed (_dbga ,_agcg )],_cfeb .PdfRectangle ){return _dbga ,_agcg ;};};};return -1,-1;};_eafc :=make (map[uint64 ][]*textLine ,_fbgcd *_aecc );for _ ,_bgbc :=range _ffff .lines (){_cgbe ,_eccgb :=_abda (_bgbc );if _cgbe < 0{continue ;
|
|
|
|
|
};_eafc [_aeed (_cgbe ,_eccgb )]=append (_eafc [_aeed (_cgbe ,_eccgb )],_bgbc );};for _faeb :=0;_faeb < len (_ccbf )-1;_faeb ++{_efgga :=_ccbf [_faeb ];_gfga :=_ccbf [_faeb +1];for _eafcf :=0;_eafcf < len (_bgcd )-1;_eafcf ++{_baeb :=_bgcd [_eafcf ];_cafd :=_bgcd [_eafcf +1];
|
|
|
|
|
_abefg :=_ac .PdfRectangle {Llx :_baeb ,Urx :_cafd ,Lly :_gfga ,Ury :_efgga };_fdbc :=_eafc [_aeed (_eafcf ,_faeb )];if len (_fdbc )==0{continue ;};_gcfe :=_edea (_abefg ,_fdbc );_adfc .put (_eafcf ,_faeb ,_gcfe );};};return &_adfc ;};func _aeed (_eabbb ,_dcfac int )uint64 {return uint64 (_eabbb )*0x1000000+uint64 (_dcfac )};
|
|
|
|
|
func (_dff *shapesState )newSubPath (){_dff .clearPath ();if _dfgc {_dc .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_dff );};};type imageExtractContext struct{_ba []ImageMark ;_eb int ;_age int ;_fd int ;
|
|
|
|
|
_dge map[*_be .PdfObjectStream ]*cachedImage ;_gab *ImageExtractOptions ;};
|
2021-03-13 21:28:23 +00:00
|
|
|
|
|
2021-04-06 22:35:37 +00:00
|
|
|
|
// String returns a string descibing `i`.
|
|
|
|
|
func (_edfaa gridTile )String ()string {_cagbg :=func (_dfbc bool ,_cdggd string )string {if _dfbc {return _cdggd ;};return "\u005f";};return _ag .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_edfaa .PdfRectangle ,_cagbg (_edfaa ._daeb ,"\u004c"),_cagbg (_edfaa ._gdabd ,"\u0052"),_cagbg (_edfaa ._dcced ,"\u0042"),_cagbg (_edfaa ._gfdb ,"\u0054"));
|
|
|
|
|
};type paraList []*textPara ;func _dfeb (_gacc _cc .Point )*subpath {return &subpath {_effg :[]_cc .Point {_gacc }}};func (_bfeb *textPara )text ()string {_dacf :=new (_ab .Buffer );_bfeb .writeText (_dacf );return _dacf .String ();};func (_dbgbg paraList )findTextTables ()[]*textTable {var _bgcff []*textTable ;
|
|
|
|
|
for _ ,_dbbec :=range _dbgbg {if _dbbec .taken ()||_dbbec .Width ()==0{continue ;};_edfc :=_dbbec .isAtom ();if _edfc ==nil {continue ;};_edfc .growTable ();if _edfc ._eaed *_edfc ._dege < _ecdb {continue ;};_edfc .markCells ();_edfc .log ("\u0067\u0072\u006fw\u006e");
|
|
|
|
|
_bgcff =append (_bgcff ,_edfc );};return _bgcff ;};func (_cebg paraList )sortReadingOrder (){_dc .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_cebg ));
|
|
|
|
|
if len (_cebg )<=1{return ;};_cebg .computeEBBoxes ();_e .Slice (_cebg ,func (_dedf ,_cffc int )bool {return _eecg (_cebg [_dedf ],_cebg [_cffc ])<=0});_ddga :=_cebg .topoOrder ();_cebg .reorder (_ddga );};
|
|
|
|
|
|
|
|
|
|
// ImageMark represents an image drawn on a page and its position in device coordinates.
|
|
|
|
|
// All coordinates are in device coordinates.
|
|
|
|
|
type ImageMark struct{Image *_ac .Image ;
|
|
|
|
|
|
|
|
|
|
// Dimensions of the image as displayed in the PDF.
|
|
|
|
|
Width float64 ;Height float64 ;
|
|
|
|
|
|
|
|
|
|
// Position of the image in PDF coordinates (lower left corner).
|
|
|
|
|
X float64 ;Y float64 ;
|
|
|
|
|
|
|
|
|
|
// Angle in degrees, if rotated.
|
|
|
|
|
Angle float64 ;};func _agae (_feea ,_abe float64 )string {_gbbff :=!_dcbea (_feea -_abe );if _gbbff {return "\u000a";};return "\u0020";};type ruling struct{_ddca rulingKind ;_egff markKind ;_gca .Color ;_dddg float64 ;_daa float64 ;_defb float64 ;_bbag float64 ;
|
|
|
|
|
};func (_abge *textObject )setCharSpacing (_fbff float64 ){if _abge ==nil {return ;};_abge ._cebc ._cge =_fbff ;if _cgdb {_dc .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_fbff ,_abge ._cebc .String ());
|
|
|
|
|
};};func _fcfgg (_cggb []compositeCell )[]float64 {var _dgedb []*textLine ;_fbag :=0;for _ ,_bagc :=range _cggb {_fbag +=len (_bagc .paraList );_dgedb =append (_dgedb ,_bagc .lines ()...);};_e .Slice (_dgedb ,func (_ebca ,_cede int )bool {_caaf ,_bdegg :=_dgedb [_ebca ],_dgedb [_cede ];
|
|
|
|
|
_gdeb ,_ecec :=_caaf ._dffg ,_bdegg ._dffg ;if !_dcbea (_gdeb -_ecec ){return _gdeb < _ecec ;};return _caaf .Llx < _bdegg .Llx ;});if _gaa {_ag .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_fbag ,len (_dgedb ));
|
|
|
|
|
for _bbff ,_cfdg :=range _dgedb {_ag .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_bbff ,_cfdg );};};var _agda []float64 ;_efaa :=_dgedb [0];var _bfee [][]*textLine ;_fgdb :=[]*textLine {_efaa };for _bdecb ,_ggagb :=range _dgedb [1:]{if _ggagb .Ury < _efaa .Lly {_bcfcd :=0.5*(_ggagb .Ury +_efaa .Lly );
|
|
|
|
|
if _gaa {_ag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_bdecb ,_ggagb .Ury ,_efaa .Lly ,_bcfcd ,_efaa ,_ggagb );
|
|
|
|
|
};_agda =append (_agda ,_bcfcd );_bfee =append (_bfee ,_fgdb );_fgdb =nil ;};_fgdb =append (_fgdb ,_ggagb );if _ggagb .Lly < _efaa .Lly {_efaa =_ggagb ;};};if len (_fgdb )> 0{_bfee =append (_bfee ,_fgdb );};if _gaa {_ag .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_agda );
|
|
|
|
|
};if _gaa {_dc .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len (_cggb ));for _bffe ,_fggd :=range _cggb {_ag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bffe ,_fggd );};_dc .Log .Info ("\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d",len (_bfee ));
|
|
|
|
|
for _effgc ,_efef :=range _bfee {_ag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a",_effgc ,len (_efef ));for _afbd ,_aebb :=range _efef {_ag .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_afbd ,_aebb );};};};_ddaf :=true ;for _bbecc ,_bdagd :=range _bfee {_dbaad :=true ;
|
|
|
|
|
for _bdab ,_dgddc :=range _cggb {if _gaa {_ag .Printf ("\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a",_bbecc ,len (_bfee ),_bdab ,len (_cggb ),_dgddc );
|
|
|
|
|
};if !_dgddc .hasLines (_bdagd ){if _gaa {_ag .Printf ("\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a",_bbecc ,len (_bfee ),_bdab ,len (_cggb ));
|
|
|
|
|
};_dbaad =false ;break ;};};if !_dbaad {_ddaf =false ;break ;};};if !_ddaf {if _gaa {_dc .Log .Info ("\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg");
|
|
|
|
|
};_agda =nil ;};if _gaa &&_agda !=nil {_ag .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a",_agda );};return _agda ;};func _aaace (_fcdfg ,_gddf ,_dfcfg ,_aafa *textPara )*textTable {_dcec :=&textTable {_eaed :2,_dege :2,_gebab :make (map[uint64 ]*textPara ,4)};
|
|
|
|
|
_dcec .put (0,0,_fcdfg );_dcec .put (1,0,_gddf );_dcec .put (0,1,_dfcfg );_dcec .put (1,1,_aafa );return _dcec ;};func _eecg (_dccf ,_cacd bounded )float64 {_feag :=_eege (_dccf ,_cacd );if !_dcbea (_feag ){return _feag ;};return _dace (_dccf ,_cacd );
|
|
|
|
|
};func (_adbae *textWord )bbox ()_ac .PdfRectangle {return _adbae .PdfRectangle };func (_ega *textObject )showTextAdjusted (_efa *_be .PdfObjectArray )error {_cgg :=false ;for _ ,_dacg :=range _efa .Elements (){switch _dacg .(type ){case *_be .PdfObjectFloat ,*_be .PdfObjectInteger :_gbab ,_bef :=_be .GetNumberAsFloat (_dacg );
|
|
|
|
|
if _bef !=nil {_dc .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_dacg ,_efa );
|
|
|
|
|
return _bef ;};_geg ,_fab :=-_gbab *0.001*_ega ._cebc ._gfg ,0.0;if _cgg {_fab ,_geg =_geg ,_fab ;};_ced :=_dbaa (_cc .Point {X :_geg ,Y :_fab });_ega ._edfe .Concat (_ced );case *_be .PdfObjectString :_aecf ,_bfc :=_be .GetStringBytes (_dacg );if !_bfc {_dc .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_dacg ,_efa );
|
|
|
|
|
return _be .ErrTypeError ;};_ega .renderText (_aecf );default:_dc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_dacg ,_efa );
|
|
|
|
|
return _be .ErrTypeError ;};};return nil ;};func (_deafg *wordBag )pullWord (_beff *textWord ,_cccb int ,_gbabe map[int ]map[*textWord ]struct{}){_deafg .PdfRectangle =_def (_deafg .PdfRectangle ,_beff .PdfRectangle );if _beff ._fbdg > _deafg ._afb {_deafg ._afb =_beff ._fbdg ;
|
|
|
|
|
};_deafg ._bed [_cccb ]=append (_deafg ._bed [_cccb ],_beff );_gbabe [_cccb ][_beff ]=struct{}{};};func (_dcc *textObject )nextLine (){_dcc .moveLP (0,-_dcc ._cebc ._cecg )};func (_aagc *shapesState )establishSubpath ()*subpath {_cdgd ,_dccg :=_aagc .lastpointEstablished ();
|
|
|
|
|
if !_dccg {_aagc ._aceg =append (_aagc ._aceg ,_dfeb (_cdgd ));};if len (_aagc ._aceg )==0{return nil ;};_aagc ._gbag =false ;return _aagc ._aceg [len (_aagc ._aceg )-1];};func (_cfffa paraList )findGridTables (_fbea []gridTiling )[]*textTable {if _gaa {_dc .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_cfffa ));
|
|
|
|
|
for _bada ,_gbfb :=range _cfffa {_ag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bada ,_gbfb );};};var _ebcg []*textTable ;for _abeg ,_acgaf :=range _fbea {_dddd ,_fbge :=_cfffa .findTableGrid (_acgaf );if _dddd !=nil {_dddd .log (_ag .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_abeg ));
|
|
|
|
|
_ebcg =append (_ebcg ,_dddd );_dddd .markCells ();};for _fcfg :=range _fbge {_fcfg ._baba =true ;};};if _gaa {_dc .Log .Info ("\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s",len (_ebcg ));
|
|
|
|
|
};return _ebcg ;};func (_fgc *shapesState )fill (_dfee *[]pathSection ){_fgad :=pathSection {_feaa :_fgc ._aceg ,Color :_fgc ._agfb .getFillColor ()};*_dfee =append (*_dfee ,_fgad );if _ebgeg {_cdeb :=_fgad .bbox ();_ag .Printf ("\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a",len (*_dfee ),len (_fgad ._feaa ),_fgc ,_fgad .Color ,_cdeb ,_cdeb .Width (),_cdeb .Height ());
|
|
|
|
|
if _dbag {for _ggb ,_gage :=range _fgad ._feaa {_ag .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_ggb ,_gage );if _ggb ==10{break ;};};};};};func _ecabe (_aab []*wordBag )[]*wordBag {if len (_aab )<=1{return _aab ;};if _aacab {_dc .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");
|
|
|
|
|
};_e .Slice (_aab ,func (_eae ,_dgdf int )bool {_fdab ,_dbg :=_aab [_eae ],_aab [_dgdf ];_agac :=_fdab .Width ()*_fdab .Height ();_fbda :=_dbg .Width ()*_dbg .Height ();if _agac !=_fbda {return _agac > _fbda ;};if _fdab .Height ()!=_dbg .Height (){return _fdab .Height ()> _dbg .Height ();
|
|
|
|
|
};return _eae < _dgdf ;});var _fec []*wordBag ;_ggdd :=make (intSet );for _edaf :=0;_edaf < len (_aab );_edaf ++{if _ggdd .has (_edaf ){continue ;};_cfda :=_aab [_edaf ];for _gcaf :=_edaf +1;_gcaf < len (_aab );_gcaf ++{if _ggdd .has (_edaf ){continue ;
|
|
|
|
|
};_aefaf :=_aab [_gcaf ];_ccgc :=_cfda .PdfRectangle ;_ccgc .Llx -=_cfda ._afb ;if _acb (_ccgc ,_aefaf .PdfRectangle ){_cfda .absorb (_aefaf );_ggdd .add (_gcaf );};};_fec =append (_fec ,_cfda );};if len (_aab )!=len (_fec )+len (_ggdd ){_dc .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_aab ),len (_fec ),len (_ggdd ));
|
|
|
|
|
};return _fec ;};func (_bfgb *textTable )put (_dabc ,_acggb int ,_gbeae *textPara ){_bfgb ._gebab [_aeed (_dabc ,_acggb )]=_gbeae ;};func _edea (_cecc _ac .PdfRectangle ,_fbbe []*textLine )*textPara {return &textPara {PdfRectangle :_cecc ,_acee :_fbbe };
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// String returns a string describing the current state of the textState stack.
|
|
|
|
|
func (_agb *stateStack )String ()string {_cdecb :=[]string {_ag .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_agb ))};for _ddg ,_dged :=range *_agb {_ggf :="\u003c\u006e\u0069l\u003e";
|
|
|
|
|
if _dged !=nil {_ggf =_dged .String ();};_cdecb =append (_cdecb ,_ag .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_ddg ,_ggf ));};return _a .Join (_cdecb ,"\u000a");};var _adfd =map[rulingKind ]string {_cefd :"\u006e\u006f\u006e\u0065",_gega :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_eace :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};
|
|
|
|
|
func (_ceega *textTable )getRight ()paraList {_ccag :=make (paraList ,_ceega ._dege );for _eedc :=0;_eedc < _ceega ._dege ;_eedc ++{_dbecg :=_ceega .get (_ceega ._eaed -1,_eedc )._dfbb ;if _dbecg ==nil ||_dbecg ._baba {return nil ;};_ccag [_eedc ]=_dbecg ;
|
|
|
|
|
};for _becd :=0;_becd < _ceega ._dege -1;_becd ++{if _ccag [_becd ]._ffaf !=_ccag [_becd +1]{return nil ;};};return _ccag ;};func (_agdd rulingList )sort (){_e .Slice (_agdd ,_agdd .comp )};func (_fbbdd *textWord )appendMark (_cefe *textMark ,_gcdec _ac .PdfRectangle ){_fbbdd ._fddf =append (_fbbdd ._fddf ,_cefe );
|
|
|
|
|
_fbbdd .PdfRectangle =_def (_fbbdd .PdfRectangle ,_cefe .PdfRectangle );if _cefe ._adca > _fbbdd ._fbdg {_fbbdd ._fbdg =_cefe ._adca ;};_fbbdd ._cfba =_gcdec .Ury -_fbbdd .PdfRectangle .Lly ;};func _acb (_cegb ,_cdcf _ac .PdfRectangle )bool {return _cegb .Llx <=_cdcf .Llx &&_cdcf .Urx <=_cegb .Urx &&_cegb .Lly <=_cdcf .Lly &&_cdcf .Ury <=_cegb .Ury ;
|
|
|
|
|
};func (_fadcc paraList )xNeighbours (_afcc float64 )map[*textPara ][]int {_ceec :=make ([]event ,2*len (_fadcc ));if _afcc ==0{for _ecgg ,_bebb :=range _fadcc {_ceec [2*_ecgg ]=event {_bebb .Llx ,true ,_ecgg };_ceec [2*_ecgg +1]=event {_bebb .Urx ,false ,_ecgg };
|
|
|
|
|
};}else {for _eaaab ,_fege :=range _fadcc {_ceec [2*_eaaab ]=event {_fege .Llx -_afcc *_fege .fontsize (),true ,_eaaab };_ceec [2*_eaaab +1]=event {_fege .Urx +_afcc *_fege .fontsize (),false ,_eaaab };};};return _fadcc .eventNeighbours (_ceec );};func _eeca (_cccf ,_deae float64 )bool {return _gc .Abs (_cccf -_deae )<=_eccge };
|
|
|
|
|
func _dcad (_fgd _ac .PdfRectangle )textState {return textState {_bfd :100,_efac :RenderModeFill ,_dfe :_fgd };};func _aagb (_cdga []float64 ,_fffccg ,_bfbfg float64 )[]float64 {_acgce ,_cgecc :=_fffccg ,_bfbfg ;if _cgecc < _acgce {_acgce ,_cgecc =_cgecc ,_acgce ;
|
|
|
|
|
};_fdfa :=make ([]float64 ,0,len (_cdga )+2);_fdfa =append (_fdfa ,_fffccg );for _ ,_ffddc :=range _cdga {if _ffddc <=_acgce {continue ;}else if _ffddc >=_cgecc {break ;};_fdfa =append (_fdfa ,_ffddc );};_fdfa =append (_fdfa ,_bfbfg );return _fdfa ;};func (_cbgef *ruling )encloses (_gbbb ,_debga float64 )bool {return _cbgef ._daa -_eccge <=_gbbb &&_debga <=_cbgef ._defb +_eccge ;
|
|
|
|
|
};func (_agcf *wordBag )blocked (_gcce *textWord )bool {if _gcce .Urx < _agcf .Llx {_gga :=_ffcb (_gcce .PdfRectangle );_fbgd :=_dgdef (_agcf .PdfRectangle );if _agcf ._gffc .blocks (_gga ,_fbgd ){if _cbba {_dc .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_gcce ,_agcf );
|
|
|
|
|
};return true ;};}else if _agcf .Urx < _gcce .Llx {_ddd :=_ffcb (_agcf .PdfRectangle );_abbd :=_dgdef (_gcce .PdfRectangle );if _agcf ._gffc .blocks (_ddd ,_abbd ){if _cbba {_dc .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s",_gcce ,_agcf );
|
|
|
|
|
};return true ;};};if _gcce .Ury < _agcf .Lly {_bbd :=_ffgg (_gcce .PdfRectangle );_dfbd :=_bdbe (_agcf .PdfRectangle );if _agcf ._egb .blocks (_bbd ,_dfbd ){if _cbba {_dc .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_gcce ,_agcf );
|
|
|
|
|
};return true ;};}else if _agcf .Ury < _gcce .Lly {_dea :=_ffgg (_agcf .PdfRectangle );_egbg :=_bdbe (_gcce .PdfRectangle );if _agcf ._egb .blocks (_dea ,_egbg ){if _cbba {_dc .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s",_gcce ,_agcf );
|
|
|
|
|
};return true ;};};return false ;};func (_gbdg intSet )has (_gadg int )bool {_ ,_fdea :=_gbdg [_gadg ];return _fdea };func (_acfb *wordBag )depthIndexes ()[]int {if len (_acfb ._bed )==0{return nil ;};_aafb :=make ([]int ,len (_acfb ._bed ));_dadcb :=0;
|
|
|
|
|
for _ffec :=range _acfb ._bed {_aafb [_dadcb ]=_ffec ;_dadcb ++;};_e .Ints (_aafb );return _aafb ;};func _aacdg (_geed []*textWord ,_effb int )[]*textWord {_egag :=len (_geed );copy (_geed [_effb :],_geed [_effb +1:]);return _geed [:_egag -1];};func (_decgf paraList )eventNeighbours (_ecfg []event )map[*textPara ][]int {_e .Slice (_ecfg ,func (_dbeg ,_fcgdc int )bool {_fecfg ,_cfea :=_ecfg [_dbeg ],_ecfg [_fcgdc ];
|
|
|
|
|
_bgec ,_dage :=_fecfg ._gebbc ,_cfea ._gebbc ;if _bgec !=_dage {return _bgec < _dage ;};if _fecfg ._ebec !=_cfea ._ebec {return _fecfg ._ebec ;};return _dbeg < _fcgdc ;});_egab :=make (map[int ]intSet );_abbbb :=make (intSet );for _ ,_dfcdg :=range _ecfg {if _dfcdg ._ebec {_egab [_dfcdg ._dfcfab ]=make (intSet );
|
|
|
|
|
for _dgebd :=range _abbbb {if _dgebd !=_dfcdg ._dfcfab {_egab [_dfcdg ._dfcfab ].add (_dgebd );_egab [_dgebd ].add (_dfcdg ._dfcfab );};};_abbbb .add (_dfcdg ._dfcfab );}else {_abbbb .del (_dfcdg ._dfcfab );};};_ggea :=map[*textPara ][]int {};for _decb ,_cabc :=range _egab {_fagdf :=_decgf [_decb ];
|
|
|
|
|
if len (_cabc )==0{_ggea [_fagdf ]=nil ;continue ;};_cbggc :=make ([]int ,len (_cabc ));_cfga :=0;for _ageee :=range _cabc {_cbggc [_cfga ]=_ageee ;_cfga ++;};_ggea [_fagdf ]=_cbggc ;};return _ggea ;};func (_gcee *textTable )putComposite (_bdge ,_cdad int ,_dgeb paraList ,_eaefec _ac .PdfRectangle ){if len (_dgeb )==0{_dc .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073");
|
|
|
|
|
return ;};_cebdd :=compositeCell {_eaefec ,_dgeb };if _gaa {_ag .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_bdge ,_cdad ,_cebdd .String ());
|
|
|
|
|
};_cebdd .updateBBox ();_gcee ._agbbc [_aeed (_bdge ,_cdad )]=_cebdd ;};func _cgee (_bcgbg ,_aaag ,_bedg float64 )rulingKind {if _bcgbg >=_bedg &&_cabga (_aaag ,_bcgbg ){return _gega ;};if _aaag >=_bedg &&_cabga (_bcgbg ,_aaag ){return _eace ;};return _cefd ;
|
|
|
|
|
};func (_bbdfb gridTile )numBorders ()int {_cggd :=0;if _bbdfb ._daeb {_cggd ++;};if _bbdfb ._gdabd {_cggd ++;};if _bbdfb ._dcced {_cggd ++;};if _bbdfb ._gfdb {_cggd ++;};return _cggd ;};func (_agggc *textTable )toTextTable ()TextTable {if _gaa {_dc .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_agggc ._eaed ,_agggc ._dege );
|
|
|
|
|
};_befb :=make ([][]TableCell ,_agggc ._dege );for _dffc :=0;_dffc < _agggc ._dege ;_dffc ++{_befb [_dffc ]=make ([]TableCell ,_agggc ._eaed );for _eebc :=0;_eebc < _agggc ._eaed ;_eebc ++{_beab :=_agggc .get (_eebc ,_dffc );if _beab ==nil {continue ;};
|
|
|
|
|
if _gaa {_ag .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_eebc ,_dffc ,_beab );};_befb [_dffc ][_eebc ].Text =_beab .text ();_edga :=0;_befb [_dffc ][_eebc ].Marks ._bcda =_beab .toTextMarks (&_edga );};};return TextTable {W :_agggc ._eaed ,H :_agggc ._dege ,Cells :_befb };
|
|
|
|
|
};func (_ccbge *subpath )add (_ddce ..._cc .Point ){_ccbge ._effg =append (_ccbge ._effg ,_ddce ...)};func (_gaed *shapesState )quadraticTo (_dgab ,_gded ,_adgg ,_bff float64 ){if _dfgc {_dc .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a");
|
|
|
|
|
};_gaed .addPoint (_adgg ,_bff );};type textPara struct{_ac .PdfRectangle ;_efgg _ac .PdfRectangle ;_acee []*textLine ;_eecb *textTable ;_baba bool ;_gfcac bool ;_ageea *textPara ;_dfbb *textPara ;_dbcd *textPara ;_ffaf *textPara ;};func (_gede *textMark )inDiacriticArea (_agee *textMark )bool {_fadb :=_gede .Llx -_agee .Llx ;
|
|
|
|
|
_cagc :=_gede .Urx -_agee .Urx ;_fcbf :=_gede .Lly -_agee .Lly ;return _gc .Abs (_fadb +_cagc )< _gede .Width ()*_eddag &&_gc .Abs (_fcbf )< _gede .Height ()*_eddag ;};func _becg (_gcfd []pathSection ){if _ddee < 0.0{return ;};if _ebgeg {_dc .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_gcfd ));
|
|
|
|
|
};for _eadd ,_acgcb :=range _gcfd {for _edede ,_badg :=range _acgcb ._feaa {for _dffa ,_gcgb :=range _badg ._effg {_badg ._effg [_dffa ]=_cc .Point {X :_beaee (_gcgb .X ),Y :_beaee (_gcgb .Y )};if _ebgeg {_eggaad :=_badg ._effg [_dffa ];if !_bgdb (_gcgb ,_eggaad ){_cade :=_cc .Point {X :_eggaad .X -_gcgb .X ,Y :_eggaad .Y -_gcgb .Y };
|
|
|
|
|
_ag .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_eadd ,_edede ,_dffa ,_gcgb ,_eggaad ,_cade );};};};};};};func (_bcaa gridTiling )complete ()bool {for _ ,_bfcf :=range _bcaa ._ecag {for _ ,_ggaf :=range _bfcf {if !_ggaf .complete (){return false ;
|
|
|
|
|
};};};return true ;};func (_dfa rulingList )connections (_fdcd map[int ]intSet ,_cbcce int )intSet {_cfdc :=make (intSet );_ccgb :=make (intSet );var _cbdf func (int );_cbdf =func (_bfaf int ){if !_ccgb .has (_bfaf ){_ccgb .add (_bfaf );for _gdbb :=range _dfa {if _fdcd [_gdbb ].has (_bfaf ){_cfdc .add (_gdbb );
|
|
|
|
|
};};for _dbecb :=range _dfa {if _cfdc .has (_dbecb ){_cbdf (_dbecb );};};};};_cbdf (_cbcce );return _cfdc ;};
|
|
|
|
|
|
|
|
|
|
// Len returns the number of TextMarks in `ma`.
|
|
|
|
|
func (_gbg *TextMarkArray )Len ()int {if _gbg ==nil {return 0;};return len (_gbg ._bcda );};func (_dddc paraList )log (_fdebc string ){if !_cdgb {return ;};_dc .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_fdebc ,len (_dddc ));
|
|
|
|
|
for _bbb ,_fafa :=range _dddc {if _fafa ==nil {continue ;};_bafe :=_fafa .text ();_fdb :="\u0020\u0020";if _fafa ._eecb !=nil {_fdb =_ag .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_fafa ._eecb ._eaed ,_fafa ._eecb ._dege );};_ag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_bbb ,_fafa .PdfRectangle ,_fdb ,_geac (_bafe ,50));
|
|
|
|
|
};};func (_daea *ruling )equals (_gbdag *ruling )bool {return _daea ._ddca ==_gbdag ._ddca &&_eeca (_daea ._dddg ,_gbdag ._dddg )&&_eeca (_daea ._daa ,_gbdag ._daa )&&_eeca (_daea ._defb ,_gbdag ._defb );};
|
2021-03-13 21:28:23 +00:00
|
|
|
|
|
|
|
|
|
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
|
2021-04-06 22:35:37 +00:00
|
|
|
|
func (_baf *TextMarkArray )BBox ()(_ac .PdfRectangle ,bool ){var _dfc _ac .PdfRectangle ;_fdeg :=false ;for _ ,_fcfb :=range _baf ._bcda {if _fcfb .Meta ||_bfcca (_fcfb .Text ){continue ;};if _fdeg {_dfc =_def (_dfc ,_fcfb .BBox );}else {_dfc =_fcfb .BBox ;
|
|
|
|
|
_fdeg =true ;};};return _dfc ,_fdeg ;};func (_egbgf *textTable )log (_edee string ){if !_gaa {return ;};_dc .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_edee ,_egbgf ._eaed ,_egbgf ._dege ,_egbgf ._aebf ,_egbgf .PdfRectangle );
|
|
|
|
|
for _fcaa :=0;_fcaa < _egbgf ._dege ;_fcaa ++{for _eeadb :=0;_eeadb < _egbgf ._eaed ;_eeadb ++{_afdb :=_egbgf .get (_eeadb ,_fcaa );if _afdb ==nil {continue ;};_ag .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_eeadb ,_fcaa ,_afdb .PdfRectangle ,_geac (_afdb .text (),50),_d .RuneCountInString (_afdb .text ()));
|
|
|
|
|
};};};func (_dedfb lineRuling )xMean ()float64 {return 0.5*(_dedfb ._cda .X +_dedfb ._aeff .X )};func (_dab *stateStack )pop ()*textState {if _dab .empty (){return nil ;};_afg :=*(*_dab )[len (*_dab )-1];*_dab =(*_dab )[:len (*_dab )-1];return &_afg ;};
|
|
|
|
|
func (_gbebd *textTable )getComposite (_feffg ,_egaed int )(paraList ,_ac .PdfRectangle ){_affb ,_dfeg :=_gbebd ._agbbc [_aeed (_feffg ,_egaed )];if _gaa {_ag .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a",_feffg ,_egaed ,_affb .String ());
|
|
|
|
|
};if !_dfeg {return nil ,_ac .PdfRectangle {};};return _affb .parasBBox ();};func (_beeed rulingList )mergePrimary ()float64 {_acga :=_beeed [0]._dddg ;for _ ,_ggggg :=range _beeed [1:]{_acga +=_ggggg ._dddg ;};return _acga /float64 (len (_beeed ));};func (_gdage paraList )writeText (_ebdcd _c .Writer ){for _adgc ,_adab :=range _gdage {if _adab ._gfcac {continue ;
|
|
|
|
|
};_adab .writeText (_ebdcd );if _adgc !=len (_gdage )-1{if _bcgg (_adab ,_gdage [_adgc +1]){_ebdcd .Write ([]byte ("\u0020"));}else {_ebdcd .Write ([]byte ("\u000a"));_ebdcd .Write ([]byte ("\u000a"));};};};_ebdcd .Write ([]byte ("\u000a"));_ebdcd .Write ([]byte ("\u000a"));
|
|
|
|
|
};func (_dagb rulingList )augmentGrid ()(rulingList ,rulingList ){_gefe ,_cdfge :=_dagb .vertsHorzs ();if len (_gefe )==0||len (_cdfge )==0{return _gefe ,_cdfge ;};_abgb ,_acgee :=_gefe ,_cdfge ;_faaf :=_gefe .bbox ();_gcba :=_cdfge .bbox ();if _ebgeg {_dc .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066",_faaf );
|
|
|
|
|
_dc .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066",_gcba );};var _gagd ,_gbgd ,_caeeg ,_gfgcc *ruling ;if _gcba .Llx < _faaf .Llx -_eccge {_gagd =&ruling {_egff :_abgf ,_ddca :_eace ,_dddg :_gcba .Llx ,_daa :_faaf .Lly ,_defb :_faaf .Ury };
|
|
|
|
|
_gefe =append (rulingList {_gagd },_gefe ...);};if _gcba .Urx > _faaf .Urx +_eccge {_gbgd =&ruling {_egff :_abgf ,_ddca :_eace ,_dddg :_gcba .Urx ,_daa :_faaf .Lly ,_defb :_faaf .Ury };_gefe =append (_gefe ,_gbgd );};if _faaf .Lly < _gcba .Lly -_eccge {_caeeg =&ruling {_egff :_abgf ,_ddca :_gega ,_dddg :_faaf .Lly ,_daa :_gcba .Llx ,_defb :_gcba .Urx };
|
|
|
|
|
_cdfge =append (rulingList {_caeeg },_cdfge ...);};if _faaf .Ury > _gcba .Ury +_eccge {_gfgcc =&ruling {_egff :_abgf ,_ddca :_gega ,_dddg :_faaf .Ury ,_daa :_gcba .Llx ,_defb :_gcba .Urx };_cdfge =append (_cdfge ,_gfgcc );};if len (_gefe )+len (_cdfge )==len (_dagb ){return _abgb ,_acgee ;
|
|
|
|
|
};_dabe :=append (_gefe ,_cdfge ...);_dagb .log ("u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064");_dabe .log ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d");return _gefe ,_cdfge ;};func _ecdae (_beaab int ,_bfddf map[int ][]float64 )([]int ,int ){_eggaa :=make ([]int ,_beaab );
|
|
|
|
|
_edada :=0;for _daagf :=0;_daagf < _beaab ;_daagf ++{_eggaa [_daagf ]=_edada ;_edada +=len (_bfddf [_daagf ])+1;};return _eggaa ,_edada ;};func _bcgg (_ebfa ,_bfgf *textPara )bool {if _ebfa ._gfcac ||_bfgf ._gfcac {return true ;};return _dcbea (_ebfa .depth ()-_bfgf .depth ());
|
|
|
|
|
};func (_aff *textObject )setTextLeading (_gea float64 ){if _aff ==nil {return ;};_aff ._cebc ._cecg =_gea ;};func (_baea *textTable )depth ()float64 {_abeb :=1e10;for _gdaa :=0;_gdaa < _baea ._eaed ;_gdaa ++{_dadb :=_baea .get (_gdaa ,0);if _dadb ==nil ||_dadb ._gfcac {continue ;
|
|
|
|
|
};_abeb =_gc .Min (_abeb ,_dadb .depth ());};return _abeb ;};func (_effe *textPara )taken ()bool {return _effe ==nil ||_effe ._baba };func (_fgdf *textLine )toTextMarks (_deafa *int )[]TextMark {var _gbbf []TextMark ;for _ ,_bccg :=range _fgdf ._acec {if _bccg ._bagec {_gbbf =_dgfa (_gbbf ,_deafa ,"\u0020");
|
|
|
|
|
};_eeaf :=_bccg .toTextMarks (_deafa );_gbbf =append (_gbbf ,_eeaf ...);};return _gbbf ;};func (_cee *textObject )setWordSpacing (_bgd float64 ){if _cee ==nil {return ;};_cee ._cebc ._bccba =_bgd ;};func (_dcfgbc rulingList )snapToGroups ()rulingList {_bdcba ,_cbbb :=_dcfgbc .vertsHorzs ();
|
|
|
|
|
if len (_bdcba )> 0{_bdcba =_bdcba .snapToGroupsDirection ();};if len (_cbbb )> 0{_cbbb =_cbbb .snapToGroupsDirection ();};_fcbbc :=append (_bdcba ,_cbbb ...);_fcbbc .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");return _fcbbc ;
|
|
|
|
|
};func _fgab (_dbef _ac .PdfRectangle ,_fedc ,_ccfdb ,_cgfg ,_dcfgb *ruling )gridTile {_abag :=_dbef .Llx ;_ffcd :=_dbef .Urx ;_adfdg :=_dbef .Lly ;_ggae :=_dbef .Ury ;return gridTile {PdfRectangle :_dbef ,_daeb :_fedc !=nil &&_fedc .encloses (_adfdg ,_ggae ),_gdabd :_ccfdb !=nil &&_ccfdb .encloses (_adfdg ,_ggae ),_dcced :_cgfg !=nil &&_cgfg .encloses (_abag ,_ffcd ),_gfdb :_dcfgb !=nil &&_dcfgb .encloses (_abag ,_ffcd )};
|
|
|
|
|
};func _bgac (_fbfeg []pathSection )rulingList {_becg (_fbfeg );if _ebgeg {_dc .Log .Info ("\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073",len (_fbfeg ));
|
|
|
|
|
};var _cefb rulingList ;for _ ,_daag :=range _fbfeg {for _ ,_gedeb :=range _daag ._feaa {if len (_gedeb ._effg )< 2{continue ;};_acac :=_gedeb ._effg [0];for _ ,_cfcb :=range _gedeb ._effg [1:]{if _gcdgd ,_eeb :=_ffdca (_acac ,_cfcb ,_daag .Color );_eeb {_cefb =append (_cefb ,_gcdgd );
|
|
|
|
|
};_acac =_cfcb ;};};};if _ebgeg {_dc .Log .Info ("m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073",_cefb );};return _cefb ;};func (_fffd paraList )applyTables (_gdbd []*textTable )paraList {var _dbca paraList ;
|
|
|
|
|
for _ ,_ddeb :=range _gdbd {_dbca =append (_dbca ,_ddeb .newTablePara ());};for _ ,_bcdg :=range _fffd {if _bcdg ._baba {continue ;};_dbca =append (_dbca ,_bcdg );};return _dbca ;};func _gbfa (_efbb map[int ][]float64 )string {_aacgb :=_fbcb (_efbb );_ffca :=make ([]string ,len (_efbb ));
|
|
|
|
|
for _efgf ,_fggea :=range _aacgb {_ffca [_efgf ]=_ag .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_fggea ,_efbb [_fggea ]);};return _ag .Sprintf ("\u007b\u0025\u0073\u007d",_a .Join (_ffca ,"\u002c\u0020"));};func (_adgdb *textWord )toTextMarks (_acacb *int )[]TextMark {var _dfab []TextMark ;
|
|
|
|
|
for _ ,_gbcdd :=range _adgdb ._fddf {_dfab =_gfgcd (_dfab ,_acacb ,_gbcdd .ToTextMark ());};return _dfab ;};func _gagda (_dbgd map[float64 ]map[float64 ]gridTile )[]float64 {_aacd :=make ([]float64 ,0,len (_dbgd ));_caffe :=make (map[float64 ]struct{},len (_dbgd ));
|
|
|
|
|
for _ ,_gaefa :=range _dbgd {for _fdcf :=range _gaefa {if _ ,_ecabec :=_caffe [_fdcf ];_ecabec {continue ;};_aacd =append (_aacd ,_fdcf );_caffe [_fdcf ]=struct{}{};};};_e .Float64s (_aacd );return _aacd ;};func (_fddc compositeCell )String ()string {_eabc :="";
|
|
|
|
|
if len (_fddc .paraList )> 0{_eabc =_geac (_fddc .paraList .merge ().text (),50);};return _ag .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_fddc .PdfRectangle ,len (_fddc .paraList ),_eabc );
|
|
|
|
|
};type gridTile struct{_ac .PdfRectangle ;_gfdb ,_daeb ,_dcced ,_gdabd bool ;};func (_cfbe paraList )llyOrdering ()[]int {_cdbe :=make ([]int ,len (_cfbe ));for _aebga :=range _cfbe {_cdbe [_aebga ]=_aebga ;};_e .SliceStable (_cdbe ,func (_bdb ,_gdgf int )bool {_cabf ,_acgf :=_cdbe [_bdb ],_cdbe [_gdgf ];
|
|
|
|
|
return _cfbe [_cabf ].Lly < _cfbe [_acgf ].Lly ;});return _cdbe ;};func (_aadea *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_ecae :=make (map[int ]map[*textWord ]struct{},len (_aadea ._bed ));for _bfab :=range _aadea ._bed {_ecae [_bfab ]=make (map[*textWord ]struct{});
|
|
|
|
|
};return _ecae ;};func (_babb *textMark )bbox ()_ac .PdfRectangle {return _babb .PdfRectangle };func (_ggad rulingList )comp (_fgae ,_bgf int )bool {_fgef ,_eefc :=_ggad [_fgae ],_ggad [_bgf ];_dgcbb ,_egce :=_fgef ._ddca ,_eefc ._ddca ;if _dgcbb !=_egce {return _dgcbb > _egce ;
|
|
|
|
|
};if _dgcbb ==_cefd {return false ;};_aecfa :=func (_gaac bool )bool {if _dgcbb ==_gega {return _gaac ;};return !_gaac ;};_fgcc ,_befea :=_fgef ._dddg ,_eefc ._dddg ;if _fgcc !=_befea {return _aecfa (_fgcc > _befea );};_fgcc ,_befea =_fgef ._daa ,_eefc ._daa ;
|
|
|
|
|
if _fgcc !=_befea {return _aecfa (_fgcc < _befea );};return _aecfa (_fgef ._defb < _eefc ._defb );};func (_deea *textObject )checkOp (_bgae *_dg .ContentStreamOperation ,_cbdd int ,_ecb bool )(_daf bool ,_eee error ){if _deea ==nil {var _cdb []_be .PdfObject ;
|
|
|
|
|
if _cbdd > 0{_cdb =_bgae .Params ;if len (_cdb )> _cbdd {_cdb =_cdb [:_cbdd ];};};_dc .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_bgae .Operand ,_cdb );
|
|
|
|
|
};if _cbdd >=0{if len (_bgae .Params )!=_cbdd {if _ecb {_eee =_g .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");};_dc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_bgae .Operand ,_cbdd ,len (_bgae .Params ),_bgae .Params );
|
|
|
|
|
return false ,_eee ;};};return true ,nil ;};func (_feba *shapesState )closePath (){if _feba ._gbag {_feba ._aceg =append (_feba ._aceg ,_dfeb (_feba ._adac ));_feba ._gbag =false ;}else if len (_feba ._aceg )==0{if _dfgc {_dc .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");
|
|
|
|
|
};_feba ._gbag =false ;return ;};_feba ._aceg [len (_feba ._aceg )-1].close ();if _dfgc {_dc .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_feba );};};const (RenderModeStroke RenderMode =1<<iota ;RenderModeFill ;
|
|
|
|
|
RenderModeClip ;);func _afbb (_ddag []*textMark ,_eded _ac .PdfRectangle ,_aecg rulingList ,_fbc []gridTiling )paraList {_dc .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_ddag ),_eded );
|
|
|
|
|
if len (_ddag )==0{return nil ;};_dggg :=_bfcdc (_ddag ,_eded );if len (_dggg )==0{return nil ;};_aecg .log ("\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065");_affd ,_abfdd :=_aecg .vertsHorzs ();_fdde :=_cbbc (_dggg ,_eded .Ury ,_affd ,_abfdd );
|
|
|
|
|
_bcgb :=_cccd (_fdde ,_eded .Ury ,_affd ,_abfdd );_bcgb =_ecabe (_bcgb );_feffe :=make (paraList ,0,len (_bcgb ));for _ ,_gbafa :=range _bcgb {_bgdf :=_gbafa .arrangeText ();if _bgdf !=nil {_feffe =append (_feffe ,_bgdf );};};if len (_feffe )>=_ecdb {_feffe =_feffe .extractTables (_fbc );
|
|
|
|
|
};_feffe .sortReadingOrder ();_feffe .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _feffe ;};func (_agggf rulingList )blocks (_cgag ,_gcgc *ruling )bool {if _cgag ._daa > _gcgc ._defb ||_gcgc ._daa > _cgag ._defb {return false ;
|
|
|
|
|
};_defg :=_gc .Max (_cgag ._daa ,_gcgc ._daa );_afdd :=_gc .Min (_cgag ._defb ,_gcgc ._defb );if _cgag ._dddg > _gcgc ._dddg {_cgag ,_gcgc =_gcgc ,_cgag ;};for _ ,_gaga :=range _agggf {if _cgag ._dddg <=_gaga ._dddg +_gfaff &&_gaga ._dddg <=_gcgc ._dddg +_gfaff &&_gaga ._daa <=_afdd &&_defg <=_gaga ._defb {return true ;
|
|
|
|
|
};};return false ;};func (_acae *wordBag )scanBand (_bafb string ,_gcg *wordBag ,_fbad func (_afgf *wordBag ,_bbef *textWord )bool ,_bfcc ,_deeaf ,_dafd float64 ,_fbaf ,_eeg bool )int {_cbc :=_gcg ._afb ;var _ebffg map[int ]map[*textWord ]struct{};if !_fbaf {_ebffg =_acae .makeRemovals ();
|
|
|
|
|
};_gbga :=_dbgb *_cbc ;_ggfd :=0;for _ ,_debe :=range _acae .depthBand (_bfcc -_gbga ,_deeaf +_gbga ){if len (_acae ._bed [_debe ])==0{continue ;};for _ ,_bede :=range _acae ._bed [_debe ]{if !(_bfcc -_gbga <=_bede ._cfba &&_bede ._cfba <=_deeaf +_gbga ){continue ;
|
|
|
|
|
};if !_fbad (_gcg ,_bede ){continue ;};_aae :=2.0*_gc .Abs (_bede ._fbdg -_gcg ._afb )/(_bede ._fbdg +_gcg ._afb );_fggg :=_gc .Max (_bede ._fbdg /_gcg ._afb ,_gcg ._afb /_bede ._fbdg );_dfd :=_gc .Min (_aae ,_fggg );if _dafd > 0&&_dfd > _dafd {continue ;
|
|
|
|
|
};if _gcg .blocked (_bede ){continue ;};if !_fbaf {_gcg .pullWord (_bede ,_debe ,_ebffg );};_ggfd ++;if !_eeg {if _bede ._cfba < _bfcc {_bfcc =_bede ._cfba ;};if _bede ._cfba > _deeaf {_deeaf =_bede ._cfba ;};};if _fbaf {break ;};};};if !_fbaf {_acae .applyRemovals (_ebffg );
|
|
|
|
|
};return _ggfd ;};type stateStack []*textState ;
|
|
|
|
|
|
|
|
|
|
// Elements returns the TextMarks in `ma`.
|
|
|
|
|
func (_bde *TextMarkArray )Elements ()[]TextMark {return _bde ._bcda };func (_baed *wordBag )maxDepth ()float64 {return _baed ._gfgc -_baed .Lly };var _ga =false ;func _bgad (_caee *textWord ,_ebfb float64 ,_fgedc ,_adba rulingList )*wordBag {_dbdg :=_ceac (_caee ._cfba );
|
|
|
|
|
_ffaad :=[]*textWord {_caee };_dcef :=wordBag {_bed :map[int ][]*textWord {_dbdg :_ffaad },PdfRectangle :_caee .PdfRectangle ,_afb :_caee ._fbdg ,_gfgc :_ebfb ,_gffc :_fgedc ,_egb :_adba };return &_dcef ;};func _ebdaf (_cdac ,_ddgg _cc .Point )bool {_dcdf :=_gc .Abs (_cdac .X -_ddgg .X );
|
|
|
|
|
_gbee :=_gc .Abs (_cdac .Y -_ddgg .Y );return _cabga (_gbee ,_dcdf );};
|
|
|
|
|
|
|
|
|
|
// NewFromContents creates a new extractor from contents and page resources.
|
|
|
|
|
func NewFromContents (contents string ,resources *_ac .PdfPageResources )(*Extractor ,error ){const _da ="\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s";_fb :=&Extractor {_eg :contents ,_ff :resources ,_cg :map[string ]fontEntry {},_ee :map[string ]textResult {}};
|
|
|
|
|
_ec .TrackUse (_da );return _fb ,nil ;};func (_ccdfb rulingList )asTiling ()gridTiling {if _dgf {_dc .Log .Info ("r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_ccdfb ));
|
|
|
|
|
};for _aabbf ,_deab :=range _ccdfb [1:]{_fddcb :=_ccdfb [_aabbf ];if _fddcb .alignsPrimary (_deab )&&_fddcb .alignsSec (_deab ){_dc .Log .Error ("a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073",_deab ,_fddcb );
|
|
|
|
|
};};_ccdfb .sortStrict ();_ccdfb .log ("\u0073n\u0061\u0070\u0070\u0065\u0064");_gebc ,_bdbb :=_ccdfb .vertsHorzs ();_dccgb :=_gebc .primaries ();_geeg :=_bdbb .primaries ();_aeef :=len (_dccgb )-1;_cdef :=len (_geeg )-1;if _aeef ==0||_cdef ==0{return gridTiling {};
|
|
|
|
|
};_gcag :=_ac .PdfRectangle {Llx :_dccgb [0],Urx :_dccgb [_aeef ],Lly :_geeg [0],Ury :_geeg [_cdef ]};if _dgf {_dc .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064",len (_gebc ));
|
|
|
|
|
for _bccgc ,_ebgf :=range _gebc {_ag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bccgc ,_ebgf );};_dc .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064",len (_bdbb ));
|
|
|
|
|
for _ccgdb ,_bdaf :=range _bdbb {_ag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ccgdb ,_bdaf );};_dc .Log .Info ("\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f",_aeef ,_cdef ,_dccgb ,_geeg );
|
|
|
|
|
};_cafb :=make ([]gridTile ,_aeef *_cdef );for _ggede :=_cdef -1;_ggede >=0;_ggede --{_fbdc :=_geeg [_ggede ];_gdeee :=_geeg [_ggede +1];for _dgba :=0;_dgba < _aeef ;_dgba ++{_aege :=_dccgb [_dgba ];_dbfbe :=_dccgb [_dgba +1];_dcga :=_gebc .findPrimSec (_aege ,_fbdc );
|
|
|
|
|
_eggdg :=_gebc .findPrimSec (_dbfbe ,_fbdc );_ddac :=_bdbb .findPrimSec (_fbdc ,_aege );_debgf :=_bdbb .findPrimSec (_gdeee ,_aege );_dbbe :=_ac .PdfRectangle {Llx :_aege ,Urx :_dbfbe ,Lly :_fbdc ,Ury :_gdeee };_bbacc :=_fgab (_dbbe ,_dcga ,_eggdg ,_ddac ,_debgf );
|
|
|
|
|
_cafb [_ggede *_aeef +_dgba ]=_bbacc ;if _dgf {_ag .Printf ("\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_dgba ,_ggede ,_bbacc .String (),_bbacc .Width (),_bbacc .Height ());
|
|
|
|
|
};};};if _dgf {_dc .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_gcag );
|
|
|
|
|
};_afa :=make ([]map[float64 ]gridTile ,_cdef );for _fedgb :=_cdef -1;_fedgb >=0;_fedgb --{if _dgf {_ag .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_fedgb );};_afa [_fedgb ]=make (map[float64 ]gridTile ,_aeef );for _ebea :=0;_ebea < _aeef ;
|
|
|
|
|
_ebea ++{_bbce :=_cafb [_fedgb *_aeef +_ebea ];if _dgf {_ag .Printf ("\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ebea ,_bbce );};if !_bbce ._daeb {continue ;};_aggdc :=_ebea ;for _caef :=_ebea +1;!_bbce ._gdabd &&_caef < _aeef ;_caef ++{_acfe :=_cafb [_fedgb *_aeef +_caef ];
|
|
|
|
|
_bbce .Urx =_acfe .Urx ;_bbce ._gfdb =_bbce ._gfdb ||_acfe ._gfdb ;_bbce ._dcced =_bbce ._dcced ||_acfe ._dcced ;_bbce ._gdabd =_acfe ._gdabd ;if _dgf {_ag .Printf ("\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a",_caef ,_acfe ,_bbce );
|
|
|
|
|
};_aggdc =_caef ;};if _dgf {_ag .Printf (" \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n",_ebea ,_aggdc ,_bbce );};_ebea =_aggdc ;_afa [_fedgb ][_bbce .Llx ]=_bbce ;};};_fcad :=make (map[float64 ]map[float64 ]gridTile ,_cdef );
|
|
|
|
|
_beaa :=make (map[float64 ]map[float64 ]struct{},_cdef );for _abcbe :=_cdef -1;_abcbe >=0;_abcbe --{_agbg :=_cafb [_abcbe *_aeef ].Lly ;_fcad [_agbg ]=make (map[float64 ]gridTile ,_aeef );_beaa [_agbg ]=make (map[float64 ]struct{},_aeef );};if _dgf {_dc .Log .Info ("\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_gcag );
|
|
|
|
|
};for _facee :=_cdef -1;_facee >=0;_facee --{_bcfe :=_cafb [_facee *_aeef ].Lly ;_dgfaf :=_afa [_facee ];if _dgf {_ag .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_facee );};for _ ,_dfdec :=range _fcfa (_dgfaf ){if _ ,_bacce :=_beaa [_bcfe ][_dfdec ];
|
|
|
|
|
_bacce {continue ;};_fagdd :=_dgfaf [_dfdec ];if _dgf {_ag .Printf (" \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_fagdd .String ());};for _eeac :=_facee -1;_eeac >=0;_eeac --{if _fagdd ._dcced {break ;};_ggbb :=_afa [_eeac ];_beee ,_fcbef :=_ggbb [_dfdec ];
|
|
|
|
|
if !_fcbef {break ;};if _beee .Urx !=_fagdd .Urx {break ;};_fagdd ._dcced =_beee ._dcced ;_fagdd .Lly =_beee .Lly ;if _dgf {_ag .Printf ("\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_beee .String (),_fagdd .String ());
|
|
|
|
|
};_beaa [_beee .Lly ][_beee .Llx ]=struct{}{};};if _facee ==0{_fagdd ._dcced =true ;};if _fagdd .complete (){_fcad [_bcfe ][_dfdec ]=_fagdd ;};};};_gecca :=gridTiling {PdfRectangle :_gcag ,_bddgg :_gagda (_fcad ),_dcaa :_gbcf (_fcad ),_ecag :_fcad };_gecca .log ("\u0043r\u0065\u0061\u0074\u0065\u0064");
|
|
|
|
|
return _gecca ;};func _cccd (_fbaa *wordBag ,_eacg float64 ,_adcb ,_ecabf rulingList )[]*wordBag {var _fgbg []*wordBag ;for _ ,_fagd :=range _fbaa .depthIndexes (){_bgbd :=false ;for !_fbaa .empty (_fagd ){_dggc :=_fbaa .firstReadingIndex (_fagd );_eeafd :=_fbaa .firstWord (_dggc );
|
|
|
|
|
_edbe :=_bgad (_eeafd ,_eacg ,_adcb ,_ecabf );_fbaa .removeWord (_eeafd ,_dggc );if _ccef {_dc .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_eeafd .String ());};for _gdag :=true ;_gdag ;
|
|
|
|
|
_gdag =_bgbd {_bgbd =false ;_edadd :=_dccb *_edbe ._afb ;_fcae :=_eaea *_edbe ._afb ;_bfae :=_gdgd *_edbe ._afb ;if _ccef {_dc .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_edbe .minDepth (),_edbe .maxDepth (),_bfae ,_fcae );
|
|
|
|
|
};if _fbaa .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_edbe ,_gbaf (_adcc ,0),_edbe .minDepth ()-_bfae ,_edbe .maxDepth ()+_bfae ,_egdc ,false ,false )> 0{_bgbd =true ;};if _fbaa .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_edbe ,_gbaf (_adcc ,_fcae ),_edbe .minDepth (),_edbe .maxDepth (),_daca ,false ,false )> 0{_bgbd =true ;
|
|
|
|
|
};if _bgbd {continue ;};_fgcd :=_fbaa .scanBand ("",_edbe ,_gbaf (_eabg ,_edadd ),_edbe .minDepth (),_edbe .maxDepth (),_gdb ,true ,false );if _fgcd > 0{_accff :=(_edbe .maxDepth ()-_edbe .minDepth ())/_edbe ._afb ;if (_fgcd > 1&&float64 (_fgcd )> 0.3*_accff )||_fgcd <=10{if _fbaa .scanBand ("\u006f\u0074\u0068e\u0072",_edbe ,_gbaf (_eabg ,_edadd ),_edbe .minDepth (),_edbe .maxDepth (),_gdb ,false ,true )> 0{_bgbd =true ;
|
|
|
|
|
};};};};_fgbg =append (_fgbg ,_edbe );};};return _fgbg ;};type rulingKind int ;func (_bdff *wordBag )sort (){for _ ,_dbc :=range _bdff ._bed {_e .Slice (_dbc ,func (_egea ,_ecce int )bool {return _dace (_dbc [_egea ],_dbc [_ecce ])< 0});};};func (_fdgg *compositeCell )updateBBox (){for _ ,_aeebd :=range _fdgg .paraList {_fdgg .PdfRectangle =_def (_fdgg .PdfRectangle ,_aeebd .PdfRectangle );
|
|
|
|
|
};};
|
|
|
|
|
|
|
|
|
|
// String returns a description of `t`.
|
|
|
|
|
func (_ccbc *textTable )String ()string {return _ag .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_ccbc ._eaed ,_ccbc ._dege ,_ccbc ._aebf );};func (_bccgb paraList )reorder (_egae []int ){_acbd :=make (paraList ,len (_bccgb ));
|
|
|
|
|
for _faef ,_cgeg :=range _egae {_acbd [_faef ]=_bccgb [_cgeg ];};copy (_bccgb ,_acbd );};func (_bea *subpath )removeDuplicates (){if len (_bea ._effg )==0{return ;};_gacd :=[]_cc .Point {_bea ._effg [0]};for _ ,_bbc :=range _bea ._effg [1:]{if !_bgdb (_bbc ,_gacd [len (_gacd )-1]){_gacd =append (_gacd ,_bbc );
|
|
|
|
|
};};_bea ._effg =_gacd ;};type event struct{_gebbc float64 ;_ebec bool ;_dfcfab int ;};func (_egc *textObject )getCurrentFont ()*_ac .PdfFont {var _ged *_ac .PdfFont ;if !_egc ._ddbf .empty (){_ged =_egc ._ddbf .top ()._ecc ;};if _ged ==nil {_dc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e");
|
|
|
|
|
return _ac .DefaultFont ();};return _ged ;};func (_gebb rulingList )splitSec ()[]rulingList {_e .Slice (_gebb ,func (_fcag ,_cebfb int )bool {_dbff ,_acedb :=_gebb [_fcag ],_gebb [_cebfb ];if _dbff ._daa !=_acedb ._daa {return _dbff ._daa < _acedb ._daa ;
|
|
|
|
|
};return _dbff ._defb < _acedb ._defb ;});_ffcfa :=make (map[*ruling ]struct{},len (_gebb ));_gbcd :=func (_abcf *ruling )rulingList {_ageg :=rulingList {_abcf };_ffcfa [_abcf ]=struct{}{};for _ ,_dfad :=range _gebb {if _ ,_dcgcg :=_ffcfa [_dfad ];_dcgcg {continue ;
|
|
|
|
|
};for _ ,_efeg :=range _ageg {if _dfad .alignsSec (_efeg ){_ageg =append (_ageg ,_dfad );_ffcfa [_dfad ]=struct{}{};break ;};};};return _ageg ;};_dbgaa :=[]rulingList {_gbcd (_gebb [0])};for _ ,_eafcfc :=range _gebb [1:]{if _ ,_cagf :=_ffcfa [_eafcfc ];
|
|
|
|
|
_cagf {continue ;};_dbgaa =append (_dbgaa ,_gbcd (_eafcfc ));};return _dbgaa ;};func _ffgg (_cbge _ac .PdfRectangle )*ruling {return &ruling {_ddca :_gega ,_dddg :_cbge .Ury ,_daa :_cbge .Llx ,_defb :_cbge .Urx };};func _cgcgg (_edfd _ac .PdfColorspace ,_cdebf _ac .PdfColor )_gca .Color {if _edfd ==nil ||_cdebf ==nil {return _gca .Black ;
|
|
|
|
|
};_ffag ,_dcefcfd :=_edfd .ColorToRGB (_cdebf );if _dcefcfd !=nil {_dc .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_cdebf ,_edfd ,_dcefcfd );
|
|
|
|
|
return _gca .Black ;};_fefc ,_gfbaa :=_ffag .(*_ac .PdfColorDeviceRGB );if !_gfbaa {_dc .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_ffag );
|
|
|
|
|
return _gca .Black ;};return _gca .NRGBA {R :uint8 (_fefc .R ()*255),G :uint8 (_fefc .G ()*255),B :uint8 (_fefc .B ()*255),A :uint8 (255)};};func (_gafg rulingList )merge ()*ruling {_gbff :=_gafg [0]._dddg ;_bged :=_gafg [0]._daa ;_dbae :=_gafg [0]._defb ;
|
|
|
|
|
for _ ,_acfagd :=range _gafg [1:]{_gbff +=_acfagd ._dddg ;if _acfagd ._daa < _bged {_bged =_acfagd ._daa ;};if _acfagd ._defb > _dbae {_dbae =_acfagd ._defb ;};};_gcad :=&ruling {_ddca :_gafg [0]._ddca ,_egff :_gafg [0]._egff ,Color :_gafg [0].Color ,_dddg :_gbff /float64 (len (_gafg )),_daa :_bged ,_defb :_dbae };
|
|
|
|
|
if _efea {_dc .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_gafg ),_gcad );for _cefca ,_ffdd :=range _gafg {_ag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cefca ,_ffdd );};
|
|
|
|
|
};return _gcad ;};func (_bdcd rulingList )secMinMax ()(float64 ,float64 ){_gagge ,_gbdc :=_bdcd [0]._daa ,_bdcd [0]._defb ;for _ ,_aadfg :=range _bdcd [1:]{if _aadfg ._daa < _gagge {_gagge =_aadfg ._daa ;};if _aadfg ._defb > _gbdc {_gbdc =_aadfg ._defb ;
|
|
|
|
|
};};return _gagge ,_gbdc ;};func (_fbgb paraList )inTile (_gbeag gridTile )paraList {var _ecaba paraList ;for _ ,_dgcce :=range _fbgb {if _gbeag .contains (_dgcce .PdfRectangle ){_ecaba =append (_ecaba ,_dgcce );};};if _gaa {_ag .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_gbeag ,len (_ecaba ));
|
|
|
|
|
for _efbc ,_ebgbe :=range _ecaba {_ag .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_efbc ,_ebgbe );};_ag .Println ("");};return _ecaba ;};func _acace (_gcde ,_ecbcb int )int {if _gcde > _ecbcb {return _gcde ;};return _ecbcb ;};
|
|
|
|
|
|
|
|
|
|
// String returns a human readable description of `s`.
|
|
|
|
|
func (_bdecbd intSet )String ()string {var _ggedf []int ;for _ccefg :=range _bdecbd {if _bdecbd .has (_ccefg ){_ggedf =append (_ggedf ,_ccefg );};};_e .Ints (_ggedf );return _ag .Sprintf ("\u0025\u002b\u0076",_ggedf );};func (_gggd *textObject )setHorizScaling (_cdeca float64 ){if _gggd ==nil {return ;
|
|
|
|
|
};_gggd ._cebc ._bfd =_cdeca ;};func _gdef (_cgcba _ac .PdfRectangle ,_efad bounded )float64 {return _cgcba .Ury -_efad .bbox ().Lly };var (_gddcd =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// Marks returns the TextMark collection for a page. It represents all the text on the page.
|
|
|
|
|
func (_bebg PageText )Marks ()*TextMarkArray {return &TextMarkArray {_bcda :_bebg ._eabf }};func (_bcb *wordBag )text ()string {_cdgg :=_bcb .allWords ();_cca :=make ([]string ,len (_cdgg ));for _bgba ,_gbgaa :=range _cdgg {_cca [_bgba ]=_gbgaa ._ggbbf ;
|
|
|
|
|
};return _a .Join (_cca ,"\u0020");};func (_febac *textWord )computeText ()string {_efagdb :=make ([]string ,len (_febac ._fddf ));for _fbfc ,_fgdbg :=range _febac ._fddf {_efagdb [_fbfc ]=_fgdbg ._bfccc ;};return _a .Join (_efagdb ,"");};func (_ggef *shapesState )drawRectangle (_fgbd ,_effd ,_ffae ,_fegc float64 ){if _dfgc {_ebge :=_ggef .devicePoint (_fgbd ,_effd );
|
|
|
|
|
_aag :=_ggef .devicePoint (_fgbd +_ffae ,_effd +_fegc );_ffc :=_ac .PdfRectangle {Llx :_ebge .X ,Lly :_ebge .Y ,Urx :_aag .X ,Ury :_aag .Y };_dc .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_ffc );
|
|
|
|
|
};_ggef .newSubPath ();_ggef .moveTo (_fgbd ,_effd );_ggef .lineTo (_fgbd +_ffae ,_effd );_ggef .lineTo (_fgbd +_ffae ,_effd +_fegc );_ggef .lineTo (_fgbd ,_effd +_fegc );_ggef .closePath ();};func (_bgag *wordBag )minDepth ()float64 {return _bgag ._gfgc -(_bgag .Ury -_bgag ._afb )};
|
|
|
|
|
func (_deaf *wordBag )depthRange (_ecab ,_bgb int )[]int {var _bbf []int ;for _faag :=range _deaf ._bed {if _ecab <=_faag &&_faag <=_bgb {_bbf =append (_bbf ,_faag );};};if len (_bbf )==0{return nil ;};_e .Ints (_bbf );return _bbf ;};
|
|
|
|
|
|
|
|
|
|
// String returns a human readable description of `ss`.
|
|
|
|
|
func (_ggd *shapesState )String ()string {return _ag .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_ggd ._aceg ),_ggd ._gbag );};func (_ffgaa *textTable )get (_bdeca ,_dgcf int )*textPara {return _ffgaa ._gebab [_aeed (_bdeca ,_dgcf )]};
|
|
|
|
|
type fontEntry struct{_aaa *_ac .PdfFont ;_eaf int64 ;};func _gbcea (_gffb ,_fcdf _cc .Point )bool {_aeec :=_gc .Abs (_gffb .X -_fcdf .X );_fdbe :=_gc .Abs (_gffb .Y -_fcdf .Y );return _cabga (_aeec ,_fdbe );};func _beaee (_edfef float64 )float64 {return _ddee *_gc .Round (_edfef /_ddee )};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// ApplyArea processes the page text only within the specified area `bbox`.
|
|
|
|
|
// Each time ApplyArea is called, it updates the result set in `pt`.
|
|
|
|
|
// Can be called multiple times in a row with different bounding boxes.
|
|
|
|
|
func (_ebda *PageText )ApplyArea (bbox _ac .PdfRectangle ){_ddf :=make ([]*textMark ,0,len (_ebda ._ccgd ));for _ ,_fda :=range _ebda ._ccgd {if _abcd (_fda .bbox (),bbox ){_ddf =append (_ddf ,_fda );};};var _gde paraList ;_fad :=len (_ddf );for _agd :=0;
|
|
|
|
|
_agd < 360&&_fad > 0;_agd +=90{_edc :=make ([]*textMark ,0,len (_ddf )-_fad );for _ ,_aeaec :=range _ddf {if _aeaec ._adfa ==_agd {_edc =append (_edc ,_aeaec );};};if len (_edc )> 0{_adg :=_afbb (_edc ,_ebda ._ace ,nil ,nil );_gde =append (_gde ,_adg ...);
|
|
|
|
|
_fad -=len (_edc );};};_aefa :=new (_ab .Buffer );_gde .writeText (_aefa );_ebda ._ebgd =_aefa .String ();_ebda ._eabf =_gde .toTextMarks ();_ebda ._ffdb =_gde .tables ();};func (_ccc *imageExtractContext )extractFormImages (_bba *_be .PdfObjectName ,_gec _dg .GraphicsState ,_gad *_ac .PdfPageResources )error {_bdg ,_aec :=_gad .GetXObjectFormByName (*_bba );
|
|
|
|
|
if _aec !=nil {return _aec ;};if _bdg ==nil {return nil ;};_aga ,_aec :=_bdg .GetContentStream ();if _aec !=nil {return _aec ;};_gbf :=_bdg .Resources ;if _gbf ==nil {_gbf =_gad ;};_aec =_ccc .extractContentStreamImages (string (_aga ),_gbf );if _aec !=nil {return _aec ;
|
|
|
|
|
};_ccc ._fd ++;return nil ;};func _acfaa (_acca int ,_bafce func (int ,int )bool )[]int {_dgad :=make ([]int ,_acca );for _dbgba :=range _dgad {_dgad [_dbgba ]=_dbgba ;};_e .Slice (_dgad ,func (_dggd ,_febag int )bool {return _bafce (_dgad [_dggd ],_dgad [_febag ])});
|
|
|
|
|
return _dgad ;};func (_gdc *wordBag )applyRemovals (_ccfe map[int ]map[*textWord ]struct{}){for _bagg ,_adga :=range _ccfe {if len (_adga )==0{continue ;};_aebg :=_gdc ._bed [_bagg ];_adcf :=len (_aebg )-len (_adga );if _adcf ==0{delete (_gdc ._bed ,_bagg );
|
|
|
|
|
continue ;};_afgc :=make ([]*textWord ,_adcf );_eeda :=0;for _ ,_bac :=range _aebg {if _ ,_cgcb :=_adga [_bac ];!_cgcb {_afgc [_eeda ]=_bac ;_eeda ++;};};_gdc ._bed [_bagg ]=_afgc ;};};func _gfaga (_bfafa ,_dbgf int )int {if _bfafa < _dbgf {return _bfafa ;
|
|
|
|
|
};return _dbgf ;};func (_fada *textObject )getFont (_baae string )(*_ac .PdfFont ,error ){if _fada ._fffc ._cg !=nil {_fada ._fffc ._fc ++;_bggf ,_baaeg :=_fada ._fffc ._cg [_baae ];if _baaeg {_bggf ._eaf =_fada ._fffc ._fc ;return _bggf ._aaa ,nil ;};
|
|
|
|
|
};_cabg ,_fdec :=_fada .getFontDirect (_baae );if _fdec !=nil {return nil ,_fdec ;};if _fada ._fffc ._cg !=nil {_gggge :=fontEntry {_cabg ,_fada ._fffc ._fc };if len (_fada ._fffc ._cg )>=_caf {var _dgb []string ;for _cdff :=range _fada ._fffc ._cg {_dgb =append (_dgb ,_cdff );
|
|
|
|
|
};_e .Slice (_dgb ,func (_aade ,_gfgb int )bool {return _fada ._fffc ._cg [_dgb [_aade ]]._eaf < _fada ._fffc ._cg [_dgb [_gfgb ]]._eaf ;});delete (_fada ._fffc ._cg ,_dgb [0]);};_fada ._fffc ._cg [_baae ]=_gggge ;};return _cabg ,nil ;};type shapesState struct{_aefb _cc .Matrix ;
|
|
|
|
|
_faa _cc .Matrix ;_aceg []*subpath ;_gbag bool ;_adac _cc .Point ;_agfb *textObject ;};func _ggfbc (_agfgd []rulingList )(rulingList ,rulingList ){var _ffgc rulingList ;for _ ,_eeffe :=range _agfgd {_ffgc =append (_ffgc ,_eeffe ...);};return _ffgc .vertsHorzs ();
|
|
|
|
|
};func _cfe (_bdec ,_cfb _ac .PdfRectangle )bool {return _bdec .Lly <=_cfb .Ury &&_cfb .Lly <=_bdec .Ury };type textResult struct{_eea PageText ;_fcf int ;_dbd int ;};func (_cfg *wordBag )allWords ()[]*textWord {var _gadee []*textWord ;for _ ,_eacf :=range _cfg ._bed {_gadee =append (_gadee ,_eacf ...);
|
|
|
|
|
};return _gadee ;};func (_ggga rulingList )vertsHorzs ()(rulingList ,rulingList ){var _fbee ,_fcbe rulingList ;for _ ,_aacaa :=range _ggga {switch _aacaa ._ddca {case _eace :_fbee =append (_fbee ,_aacaa );case _gega :_fcbe =append (_fcbe ,_aacaa );};};
|
|
|
|
|
return _fbee ,_fcbe ;};func (_dggce *textPara )writeCellText (_dedba _c .Writer ){for _fbgc ,_fegac :=range _dggce ._acee {_gcdf :=_fegac .text ();_bdce :=_dfgf &&_fegac .endsInHyphen ()&&_fbgc !=len (_dggce ._acee )-1;if _bdce {_gcdf =_ddcf (_gcdf );};
|
|
|
|
|
_dedba .Write ([]byte (_gcdf ));if !(_bdce ||_fbgc ==len (_dggce ._acee )-1){_dedba .Write ([]byte (_agae (_fegac ._dffg ,_dggce ._acee [_fbgc +1]._dffg )));};};};func (_gcbg *textTable )reduce ()*textTable {_bdcf :=make ([]int ,0,_gcbg ._dege );_bgeg :=make ([]int ,0,_gcbg ._eaed );
|
|
|
|
|
for _dbbb :=0;_dbbb < _gcbg ._dege ;_dbbb ++{if !_gcbg .emptyRow (_dbbb ){_bdcf =append (_bdcf ,_dbbb );};};for _fgffb :=0;_fgffb < _gcbg ._eaed ;_fgffb ++{if !_gcbg .emptyColumn (_fgffb ){_bgeg =append (_bgeg ,_fgffb );};};if len (_bdcf )==_gcbg ._dege &&len (_bgeg )==_gcbg ._eaed {return _gcbg ;
|
|
|
|
|
};_bcdfc :=textTable {_aebf :_gcbg ._aebf ,_eaed :len (_bgeg ),_dege :len (_bdcf ),_gebab :make (map[uint64 ]*textPara ,len (_bgeg )*len (_bdcf ))};if _gaa {_dc .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_gcbg ._eaed ,_gcbg ._dege ,len (_bgeg ),len (_bdcf ));
|
|
|
|
|
_dc .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_bgeg );_dc .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_bdcf );};for _fcece ,_cebdb :=range _bdcf {for _ffgf ,_eeebb :=range _bgeg {_adccd :=_gcbg .get (_eeebb ,_cebdb );
|
|
|
|
|
if _adccd ==nil {continue ;};if _gaa {_ag .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_ffgf ,_fcece ,_eeebb ,_cebdb ,_geac (_adccd .text (),50));};_bcdfc .put (_ffgf ,_fcece ,_adccd );
|
|
|
|
|
};};return &_bcdfc ;};func (_beef *wordBag )removeDuplicates (){if _bdgd {_dc .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_beef .text ());};for _ ,_fdg :=range _beef .depthIndexes (){if len (_beef ._bed [_fdg ])==0{continue ;
|
|
|
|
|
};_ggfb :=_beef ._bed [_fdg ][0];_gbfg :=_fdegb *_ggfb ._fbdg ;_acfa :=_ggfb ._cfba ;for _ ,_cfgd :=range _beef .depthBand (_acfa ,_acfa +_gbfg ){_gbbe :=map[*textWord ]struct{}{};_abbfc :=_beef ._bed [_cfgd ];for _ ,_gcea :=range _abbfc {if _ ,_cfee :=_gbbe [_gcea ];
|
|
|
|
|
_cfee {continue ;};for _ ,_fadf :=range _abbfc {if _ ,_ebeg :=_gbbe [_fadf ];_ebeg {continue ;};if _fadf !=_gcea &&_fadf ._ggbbf ==_gcea ._ggbbf &&_gc .Abs (_fadf .Llx -_gcea .Llx )< _gbfg &&_gc .Abs (_fadf .Urx -_gcea .Urx )< _gbfg &&_gc .Abs (_fadf .Lly -_gcea .Lly )< _gbfg &&_gc .Abs (_fadf .Ury -_gcea .Ury )< _gbfg {_gbbe [_fadf ]=struct{}{};
|
|
|
|
|
};};};if len (_gbbe )> 0{_gbafe :=0;for _ ,_gfbb :=range _abbfc {if _ ,_edefg :=_gbbe [_gfbb ];!_edefg {_abbfc [_gbafe ]=_gfbb ;_gbafe ++;};};_beef ._bed [_cfgd ]=_abbfc [:len (_abbfc )-len (_gbbe )];if len (_beef ._bed [_cfgd ])==0{delete (_beef ._bed ,_cfgd );
|
|
|
|
|
};};};};};func (_edeb *PageText )computeViews (){var _efb rulingList ;if _gada {_gfb :=_bgac (_edeb ._dedbd );_efb =append (_efb ,_gfb ...);};if _defe {_afgg :=_edbd (_edeb ._fbfb );_efb =append (_efb ,_afgg ...);};_efb ,_gbaa :=_efb .toTilings ();var _dadc paraList ;
|
|
|
|
|
_faf :=len (_edeb ._ccgd );for _ada :=0;_ada < 360&&_faf > 0;_ada +=90{_cgd :=make ([]*textMark ,0,len (_edeb ._ccgd )-_faf );for _ ,_acfd :=range _edeb ._ccgd {if _acfd ._adfa ==_ada {_cgd =append (_cgd ,_acfd );};};if len (_cgd )> 0{_bdgc :=_afbb (_cgd ,_edeb ._ace ,_efb ,_gbaa );
|
|
|
|
|
_dadc =append (_dadc ,_bdgc ...);_faf -=len (_cgd );};};_agfg :=new (_ab .Buffer );_dadc .writeText (_agfg );_edeb ._ebgd =_agfg .String ();_edeb ._eabf =_dadc .toTextMarks ();_edeb ._ffdb =_dadc .tables ();if _gaa {_dc .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_edeb ._ffdb ));
|
|
|
|
|
};};
|
|
|
|
|
|
|
|
|
|
// ImageExtractOptions contains options for controlling image extraction from
|
|
|
|
|
// PDF pages.
|
|
|
|
|
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};func _abgee (_fdd ,_ffcg _ac .PdfRectangle )(_ac .PdfRectangle ,bool ){if !_abcd (_fdd ,_ffcg ){return _ac .PdfRectangle {},false ;};return _ac .PdfRectangle {Llx :_gc .Max (_fdd .Llx ,_ffcg .Llx ),Urx :_gc .Min (_fdd .Urx ,_ffcg .Urx ),Lly :_gc .Max (_fdd .Lly ,_ffcg .Lly ),Ury :_gc .Min (_fdd .Ury ,_ffcg .Ury )},true ;
|
|
|
|
|
};func (_bgcae rulingList )primMinMax ()(float64 ,float64 ){_dead ,_gfab :=_bgcae [0]._dddg ,_bgcae [0]._dddg ;for _ ,_aggd :=range _bgcae [1:]{if _aggd ._dddg < _dead {_dead =_aggd ._dddg ;}else if _aggd ._dddg > _gfab {_gfab =_aggd ._dddg ;};};return _dead ,_gfab ;
|
|
|
|
|
};func (_eda *stateStack )size ()int {return len (*_eda )};var _eefa =map[markKind ]string {_beaeb :"\u0073\u0074\u0072\u006f\u006b\u0065",_gbef :"\u0066\u0069\u006c\u006c",_abgf :"\u0061u\u0067\u006d\u0065\u006e\u0074"};type cachedImage struct{_ca *_ac .Image ;
|
|
|
|
|
_fg _ac .PdfColorspace ;};func _gbaf (_ddcg func (*wordBag ,*textWord ,float64 )bool ,_babc float64 )func (*wordBag ,*textWord )bool {return func (_egf *wordBag ,_efca *textWord )bool {return _ddcg (_egf ,_efca ,_babc )};};func (_dcce *shapesState )devicePoint (_ccce ,_caff float64 )_cc .Point {_cafa :=_dcce ._faa .Mult (_dcce ._aefb );
|
|
|
|
|
_ccce ,_caff =_cafa .Transform (_ccce ,_caff );return _cc .NewPoint (_ccce ,_caff );};func (_dece *textTable )emptyColumn (_eabbc int )bool {for _ecac :=0;_ecac < _dece ._dege ;_ecac ++{_dfcd :=_dece .get (_eabbc ,_ecac );if _dfcd !=nil &&_dfcd .text ()!=""{return false ;
|
|
|
|
|
};};return true ;};func _ffcb (_fgfdb _ac .PdfRectangle )*ruling {return &ruling {_ddca :_eace ,_dddg :_fgfdb .Urx ,_daa :_fgfdb .Lly ,_defb :_fgfdb .Ury };};func _bbec (_ecdc bounded )float64 {return -_ecdc .bbox ().Lly };
|
|
|
|
|
|
|
|
|
|
// String returns a description of `b`.
|
|
|
|
|
func (_abba *wordBag )String ()string {var _beec []string ;for _ ,_cbabg :=range _abba .depthIndexes (){_fcc :=_abba ._bed [_cbabg ];for _ ,_gbb :=range _fcc {_beec =append (_beec ,_gbb ._ggbbf );};};return _ag .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_abba .PdfRectangle ,_abba ._afb ,len (_beec ),_beec );
|
|
|
|
|
};func _dgfa (_gfbf []TextMark ,_cgdf *int ,_bcea string )[]TextMark {_efge :=_cab ;_efge .Text =_bcea ;return _gfgcd (_gfbf ,_cgdf ,_efge );};func (_aeee rulingList )snapToGroupsDirection ()rulingList {_aeee .sortStrict ();_afab :=make (map[*ruling ]rulingList ,len (_aeee ));
|
|
|
|
|
_dgdff :=_aeee [0];_bacbb :=func (_gggb *ruling ){_dgdff =_gggb ;_afab [_dgdff ]=rulingList {_gggb }};_bacbb (_aeee [0]);for _ ,_eccb :=range _aeee [1:]{if _eccb ._dddg < _dgdff ._dddg -_ceda {_dc .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_dgdff ,_eccb );
|
|
|
|
|
};if _eccb ._dddg > _dgdff ._dddg +_gfaff {_bacbb (_eccb );}else {_afab [_dgdff ]=append (_afab [_dgdff ],_eccb );};};_cabe :=make (map[*ruling ]float64 ,len (_afab ));_bafba :=make (map[*ruling ]*ruling ,len (_aeee ));for _bafaa ,_ccgaf :=range _afab {_cabe [_bafaa ]=_ccgaf .mergePrimary ();
|
|
|
|
|
for _ ,_decg :=range _ccgaf {_bafba [_decg ]=_bafaa ;};};for _ ,_eacge :=range _aeee {_eacge ._dddg =_cabe [_bafba [_eacge ]];};_agdc :=make (rulingList ,0,len (_aeee ));for _ ,_agggb :=range _afab {_ccgg :=_agggb .splitSec ();for _aaefe ,_ddgdd :=range _ccgg {_ffeb :=_ddgdd .merge ();
|
|
|
|
|
if len (_agdc )> 0{_fffcc :=_agdc [len (_agdc )-1];if _fffcc .alignsPrimary (_ffeb )&&_fffcc .alignsSec (_ffeb ){_dc .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_aaefe ,_fffcc ,_ffeb );
|
|
|
|
|
continue ;};};_agdc =append (_agdc ,_ffeb );};};_agdc .sortStrict ();return _agdc ;};func (_cbed paraList )tables ()[]TextTable {var _baef []TextTable ;if _gaa {_dc .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a");
|
|
|
|
|
};for _ ,_abd :=range _cbed {_adag :=_abd ._eecb ;if _adag !=nil &&_adag .isExportable (){_baef =append (_baef ,_adag .toTextTable ());};};return _baef ;};func _fbcb (_gdeed map[int ][]float64 )[]int {_afdc :=make ([]int ,len (_gdeed ));_fgga :=0;for _dcefb :=range _gdeed {_afdc [_fgga ]=_dcefb ;
|
|
|
|
|
_fgga ++;};_e .Ints (_afdc );return _afdc ;};func (_fac *shapesState )clearPath (){_fac ._aceg =nil ;_fac ._gbag =false ;if _dfgc {_dc .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_fac );};};func (_cac *Extractor )extractPageText (_bbeg string ,_gcfc *_ac .PdfPageResources ,_bfa _cc .Matrix ,_fce int )(*PageText ,int ,int ,error ){_dc .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_fce );
|
|
|
|
|
_cae :=&PageText {_ace :_cac ._cf };_ceb :=_dcad (_cac ._cf );_agc :=stateStack {&_ceb };_bfe :=_dbde (_cac ,_gcfc ,_dg .GraphicsState {},&_ceb ,&_agc );_cgc :=shapesState {_faa :_bfa ,_aefb :_cc .IdentityMatrix (),_agfb :_bfe };var _ad bool ;if _fce > _efc {_dba :=_g .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");
|
|
|
|
|
_dc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_fce ,_dba );
|
|
|
|
|
return _cae ,_ceb ._acc ,_ceb ._dcge ,_dba ;};_gfe :=_dg .NewContentStreamParser (_bbeg );_dac ,_bfg :=_gfe .Parse ();if _bfg !=nil {_dc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bfg );
|
|
|
|
|
return _cae ,_ceb ._acc ,_ceb ._dcge ,_bfg ;};_gfa :=_dg .NewContentStreamProcessor (*_dac );_gfa .AddHandler (_dg .HandlerConditionEnumAllOperands ,"",func (_fbfa *_dg .ContentStreamOperation ,_cba _dg .GraphicsState ,_caae *_ac .PdfPageResources )error {_ffa :=_fbfa .Operand ;
|
|
|
|
|
if _fdc {_dc .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_fbfa );};switch _ffa {case "\u0071":if _dfgc {_dc .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_cgc ._aefb );};_agc .push (&_ceb );case "\u0051":if !_agc .empty (){if len (_agc )>=2{_agc .pop ();
|
|
|
|
|
};_ceb =*_agc .top ();};_cgc ._aefb =_cba .CTM ;if _dfgc {_dc .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_cgc ._aefb );};case "\u0042\u0054":if _ad {_dc .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
|
|
|
|
|
_cae ._ccgd =append (_cae ._ccgd ,_bfe ._fef ...);};_ad =true ;_dae :=_cba ;_dae .CTM =_bfa .Mult (_dae .CTM );_bfe =_dbde (_cac ,_caae ,_dae ,&_ceb ,&_agc );_cgc ._agfb =_bfe ;case "\u0045\u0054":if !_ad {_dc .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
|
|
|
|
|
};_ad =false ;_cae ._ccgd =append (_cae ._ccgd ,_bfe ._fef ...);_bfe .reset ();case "\u0054\u002a":_bfe .nextLine ();case "\u0054\u0064":if _dbe ,_gcd :=_bfe .checkOp (_fbfa ,2,true );!_dbe {_dc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gcd );
|
|
|
|
|
return _gcd ;};_feb ,_ggg ,_bcc :=_gdfg (_fbfa .Params );if _bcc !=nil {return _bcc ;};_bfe .moveText (_feb ,_ggg );case "\u0054\u0044":if _gdd ,_ecg :=_bfe .checkOp (_fbfa ,2,true );!_gdd {_dc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ecg );
|
|
|
|
|
return _ecg ;};_fea ,_gbc ,_aeg :=_gdfg (_fbfa .Params );if _aeg !=nil {_dc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aeg );return _aeg ;};_bfe .moveTextSetLeading (_fea ,_gbc );case "\u0054\u006a":if _aac ,_gade :=_bfe .checkOp (_fbfa ,1,true );
|
|
|
|
|
!_aac {_dc .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_fbfa ,_gade );return _gade ;};_aad ,_cdf :=_be .GetStringBytes (_fbfa .Params [0]);if !_cdf {_dc .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_fbfa );
|
|
|
|
|
return _be .ErrTypeError ;};return _bfe .showText (_aad );case "\u0054\u004a":if _fbd ,_bdd :=_bfe .checkOp (_fbfa ,1,true );!_fbd {_dc .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bdd );return _bdd ;
|
|
|
|
|
};_ffg ,_dcg :=_be .GetArray (_fbfa .Params [0]);if !_dcg {_dc .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_fbfa );
|
|
|
|
|
return _bfg ;};return _bfe .showTextAdjusted (_ffg );case "\u0027":if _cag ,_eff :=_bfe .checkOp (_fbfa ,1,true );!_cag {_dc .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_eff );return _eff ;};_bag ,_dad :=_be .GetStringBytes (_fbfa .Params [0]);
|
|
|
|
|
if !_dad {_dc .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_fbfa );return _be .ErrTypeError ;};_bfe .nextLine ();return _bfe .showText (_bag );
|
|
|
|
|
case "\u0022":if _edd ,_gba :=_bfe .checkOp (_fbfa ,3,true );!_edd {_dc .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gba );return _gba ;};_eaa ,_bee ,_ccb :=_gdfg (_fbfa .Params [:2]);if _ccb !=nil {return _ccb ;
|
|
|
|
|
};_bcdb ,_eddc :=_be .GetStringBytes (_fbfa .Params [2]);if !_eddc {_dc .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_fbfa );
|
|
|
|
|
return _be .ErrTypeError ;};_bfe .setCharSpacing (_eaa );_bfe .setWordSpacing (_bee );_bfe .nextLine ();return _bfe .showText (_bcdb );case "\u0054\u004c":_gce ,_agg :=_cbd (_fbfa );if _agg !=nil {_dc .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_agg );
|
|
|
|
|
return _agg ;};_bfe .setTextLeading (_gce );case "\u0054\u0063":_dda ,_gag :=_cbd (_fbfa );if _gag !=nil {_dc .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gag );return _gag ;};_bfe .setCharSpacing (_dda );
|
|
|
|
|
case "\u0054\u0066":if _ecgd ,_bage :=_bfe .checkOp (_fbfa ,2,true );!_ecgd {_dc .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bage );return _bage ;};_feeb ,_cea :=_be .GetNameVal (_fbfa .Params [0]);
|
|
|
|
|
if !_cea {_dc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_fbfa );return _be .ErrTypeError ;};_ffaa ,_ecgb :=_be .GetNumberAsFloat (_fbfa .Params [1]);
|
|
|
|
|
if !_cea {_dc .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fbfa ,_ecgb );
|
|
|
|
|
return _ecgb ;};_ecgb =_bfe .setFont (_feeb ,_ffaa );_bfe ._acg =_abf .Is (_ecgb ,_be .ErrNotSupported );if _ecgb !=nil &&!_bfe ._acg {return _ecgb ;};case "\u0054\u006d":if _bgg ,_ecf :=_bfe .checkOp (_fbfa ,6,true );!_bgg {_dc .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ecf );
|
|
|
|
|
return _ecf ;};_abfd ,_bgc :=_be .GetNumbersAsFloat (_fbfa .Params );if _bgc !=nil {_dc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bgc );return _bgc ;};_bfe .setTextMatrix (_abfd );case "\u0054\u0072":if _add ,_dcgd :=_bfe .checkOp (_fbfa ,1,true );
|
|
|
|
|
!_add {_dc .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dcgd );return _dcgd ;};_bfaa ,_ebg :=_be .GetIntVal (_fbfa .Params [0]);if !_ebg {_dc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_fbfa );
|
|
|
|
|
return _be .ErrTypeError ;};_bfe .setTextRenderMode (_bfaa );case "\u0054\u0073":if _aacc ,_cbf :=_bfe .checkOp (_fbfa ,1,true );!_aacc {_dc .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cbf );return _cbf ;
|
|
|
|
|
};_ebgb ,_gdaf :=_be .GetNumberAsFloat (_fbfa .Params [0]);if _gdaf !=nil {_dc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gdaf );return _gdaf ;};_bfe .setTextRise (_ebgb );case "\u0054\u0077":if _ebgba ,_eec :=_bfe .checkOp (_fbfa ,1,true );
|
|
|
|
|
!_ebgba {_dc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_eec );return _eec ;};_dbec ,_fde :=_be .GetNumberAsFloat (_fbfa .Params [0]);if _fde !=nil {_dc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fde );
|
|
|
|
|
return _fde ;};_bfe .setWordSpacing (_dbec );case "\u0054\u007a":if _dgaf ,_ade :=_bfe .checkOp (_fbfa ,1,true );!_dgaf {_dc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ade );return _ade ;};_adde ,_gcff :=_be .GetNumberAsFloat (_fbfa .Params [0]);
|
|
|
|
|
if _gcff !=nil {_dc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gcff );return _gcff ;};_bfe .setHorizScaling (_adde );case "\u0063\u006d":_cgc ._aefb =_cba .CTM ;if _cgc ._aefb .Singular (){_ceaa :=_cc .IdentityMatrix ().Translate (_cgc ._aefb .Translation ());
|
|
|
|
|
_dc .Log .Debug ("S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s",_cgc ._aefb ,_ceaa );_cgc ._aefb =_ceaa ;};if _dfgc {_dc .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_cgc ._aefb );};case "\u006d":if len (_fbfa .Params )!=2{_dc .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_ef );
|
|
|
|
|
return nil ;};_cdg ,_eca :=_be .GetNumbersAsFloat (_fbfa .Params );if _eca !=nil {return _eca ;};_dc .Log .Debug ("\u004d\u006f\u0076\u0065\u0020\u0074\u006f\u003a\u0020\u0025\u002e\u0032\u0066",_cdg );_cgc .moveTo (_cdg [0],_cdg [1]);case "\u006c":if len (_fbfa .Params )!=2{_dc .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_ef );
|
|
|
|
|
return nil ;};_deg ,_bdfb :=_be .GetNumbersAsFloat (_fbfa .Params );if _bdfb !=nil {return _bdfb ;};_cgc .lineTo (_deg [0],_deg [1]);case "\u0063":if len (_fbfa .Params )!=6{return _ef ;};_geb ,_abg :=_be .GetNumbersAsFloat (_fbfa .Params );if _abg !=nil {return _abg ;
|
|
|
|
|
};_dc .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_geb );_cgc .cubicTo (_geb [0],_geb [1],_geb [2],_geb [3],_geb [4],_geb [5]);case "\u0076","\u0079":if len (_fbfa .Params )!=4{return _ef ;
|
|
|
|
|
};_cbg ,_fca :=_be .GetNumbersAsFloat (_fbfa .Params );if _fca !=nil {return _fca ;};_dc .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_cbg );_cgc .quadraticTo (_cbg [0],_cbg [1],_cbg [2],_cbg [3]);
|
|
|
|
|
case "\u0068":_cgc .closePath ();case "\u0072\u0065":if len (_fbfa .Params )!=4{return _ef ;};_edda ,_fbec :=_be .GetNumbersAsFloat (_fbfa .Params );if _fbec !=nil {return _fbec ;};_cgc .drawRectangle (_edda [0],_edda [1],_edda [2],_edda [3]);_cgc .closePath ();
|
|
|
|
|
case "\u0053":_cgc .stroke (&_cae ._dedbd );_cgc .clearPath ();case "\u0073":_cgc .closePath ();_cgc .stroke (&_cae ._dedbd );_cgc .clearPath ();case "\u0046":_cgc .fill (&_cae ._fbfb );_cgc .clearPath ();case "\u0066","\u0066\u002a":_cgc .closePath ();
|
|
|
|
|
_cgc .fill (&_cae ._fbfb );_cgc .clearPath ();case "\u0042","\u0042\u002a":_cgc .fill (&_cae ._fbfb );_cgc .stroke (&_cae ._dedbd );_cgc .clearPath ();case "\u0062","\u0062\u002a":_cgc .closePath ();_cgc .fill (&_cae ._fbfb );_cgc .stroke (&_cae ._dedbd );
|
|
|
|
|
_cgc .clearPath ();case "\u006e":_cgc .clearPath ();case "\u0044\u006f":if len (_fbfa .Params )==0{_dc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_fbfa .Params );
|
|
|
|
|
return _be .ErrRangeError ;};_gagf ,_gac :=_be .GetName (_fbfa .Params [0]);if !_gac {_dc .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_fbfa .Params [0]);
|
|
|
|
|
return _be .ErrTypeError ;};_ ,_ecfd :=_caae .GetXObjectByName (*_gagf );if _ecfd !=_ac .XObjectTypeForm {break ;};_bccb ,_gac :=_cac ._ee [_gagf .String ()];if !_gac {_ebf ,_beg :=_caae .GetXObjectFormByName (*_gagf );if _beg !=nil {_dc .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_beg );
|
|
|
|
|
return _beg ;};_gggf ,_beg :=_ebf .GetContentStream ();if _beg !=nil {_dc .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_beg );return _beg ;};_bca :=_ebf .Resources ;if _bca ==nil {_bca =_caae ;};_ede ,_aea ,_feg ,_beg :=_cac .extractPageText (string (_gggf ),_bca ,_bfa .Mult (_cba .CTM ),_fce +1);
|
|
|
|
|
if _beg !=nil {_dc .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_beg );return _beg ;};_bccb =textResult {*_ede ,_aea ,_feg };_cac ._ee [_gagf .String ()]=_bccb ;};_cgc ._aefb =_cba .CTM ;if _dfgc {_dc .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_cgc ._aefb );
|
|
|
|
|
};_cae ._ccgd =append (_cae ._ccgd ,_bccb ._eea ._ccgd ...);_cae ._dedbd =append (_cae ._dedbd ,_bccb ._eea ._dedbd ...);_cae ._fbfb =append (_cae ._fbfb ,_bccb ._eea ._fbfb ...);_ceb ._acc +=_bccb ._fcf ;_ceb ._dcge +=_bccb ._dbd ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_bfe ._dcb .ColorspaceNonStroking =_cba .ColorspaceNonStroking ;
|
|
|
|
|
_bfe ._dcb .ColorNonStroking =_cba .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_bfe ._dcb .ColorspaceStroking =_cba .ColorspaceStroking ;_bfe ._dcb .ColorStroking =_cba .ColorStroking ;};return nil ;
|
|
|
|
|
});_bfg =_gfa .Process (_gcfc );return _cae ,_ceb ._acc ,_ceb ._dcge ,_bfg ;};
|