mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-27 13:48:51 +08:00
790 lines
181 KiB
Go
790 lines
181 KiB
Go
//
|
||
// Copyright 2020 FoxyUtils ehf. All rights reserved.
|
||
//
|
||
// This is a commercial product and requires a license to operate.
|
||
// A trial license can be obtained at https://unidoc.io
|
||
//
|
||
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
|
||
//
|
||
// Use of this source code is governed by the UniDoc End User License Agreement
|
||
// terms that can be accessed at https://unidoc.io/eula/
|
||
|
||
//
|
||
// Package extractor is used for quickly extracting PDF content through a simple interface.
|
||
// Currently offers functionality for extracting textual content.
|
||
//
|
||
package extractor ;import (_aga "bytes";_a "errors";_f "fmt";_ef "github.com/unidoc/unipdf/v3/common";_gef "github.com/unidoc/unipdf/v3/contentstream";_gc "github.com/unidoc/unipdf/v3/core";_ac "github.com/unidoc/unipdf/v3/internal/license";_eb "github.com/unidoc/unipdf/v3/internal/textencoding";
|
||
_cb "github.com/unidoc/unipdf/v3/internal/transform";_fb "github.com/unidoc/unipdf/v3/model";_ea "golang.org/x/text/unicode/norm";_aee "golang.org/x/xerrors";_b "image/color";_c "io";_d "math";_ae "regexp";_e "sort";_ag "strings";_cd "unicode";_ge "unicode/utf8";
|
||
);func (_ddba gridTile )numBorders ()int {_bbcd :=0;if _ddba ._adgdg {_bbcd ++;};if _ddba ._bdgc {_bbcd ++;};if _ddba ._cbee {_bbcd ++;};if _ddba ._fga {_bbcd ++;};return _bbcd ;};func (_baba *wordBag )scanBand (_bged string ,_eeead *wordBag ,_adgb func (_aea *wordBag ,_bcg *textWord )bool ,_bba ,_dega ,_dddf float64 ,_gegc ,_edef bool )int {_ccg :=_eeead ._fdb ;
|
||
var _cbbfe map[int ]map[*textWord ]struct{};if !_gegc {_cbbfe =_baba .makeRemovals ();};_fcbc :=_acge *_ccg ;_eab :=0;for _ ,_cca :=range _baba .depthBand (_bba -_fcbc ,_dega +_fcbc ){if len (_baba ._ecg [_cca ])==0{continue ;};for _ ,_dcbe :=range _baba ._ecg [_cca ]{if !(_bba -_fcbc <=_dcbe ._afcda &&_dcbe ._afcda <=_dega +_fcbc ){continue ;
|
||
};if !_adgb (_eeead ,_dcbe ){continue ;};_bdc :=2.0*_d .Abs (_dcbe ._bgfca -_eeead ._fdb )/(_dcbe ._bgfca +_eeead ._fdb );_bafd :=_d .Max (_dcbe ._bgfca /_eeead ._fdb ,_eeead ._fdb /_dcbe ._bgfca );_cdea :=_d .Min (_bdc ,_bafd );if _dddf > 0&&_cdea > _dddf {continue ;
|
||
};if _eeead .blocked (_dcbe ){continue ;};if !_gegc {_eeead .pullWord (_dcbe ,_cca ,_cbbfe );};_eab ++;if !_edef {if _dcbe ._afcda < _bba {_bba =_dcbe ._afcda ;};if _dcbe ._afcda > _dega {_dega =_dcbe ._afcda ;};};if _gegc {break ;};};};if !_gegc {_baba .applyRemovals (_cbbfe );
|
||
};return _eab ;};func (_aagf rulingList )intersections ()map[int ]intSet {var _gcce ,_edaba []int ;for _dfafb ,_cfba :=range _aagf {switch _cfba ._fggg {case _ggddf :_gcce =append (_gcce ,_dfafb );case _bgge :_edaba =append (_edaba ,_dfafb );};};if len (_gcce )< _gede +1||len (_edaba )< _cfae +1{return nil ;
|
||
};if len (_gcce )+len (_edaba )> _edb {_ef .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_aagf ),len (_gcce ),len (_edaba ));
|
||
return nil ;};_dgef :=make (map[int ]intSet ,len (_gcce )+len (_edaba ));for _ ,_cdcfd :=range _gcce {for _ ,_cceed :=range _edaba {if _aagf [_cdcfd ].intersects (_aagf [_cceed ]){if _ ,_bbbgb :=_dgef [_cdcfd ];!_bbbgb {_dgef [_cdcfd ]=make (intSet );};
|
||
if _ ,_bbad :=_dgef [_cceed ];!_bbad {_dgef [_cceed ]=make (intSet );};_dgef [_cdcfd ].add (_cceed );_dgef [_cceed ].add (_cdcfd );};};};return _dgef ;};func (_fcf *imageExtractContext )extractFormImages (_ccb *_gc .PdfObjectName ,_fdg _gef .GraphicsState ,_cgd *_fb .PdfPageResources )error {_bdg ,_gdc :=_cgd .GetXObjectFormByName (*_ccb );
|
||
if _gdc !=nil {return _gdc ;};if _bdg ==nil {return nil ;};_bc ,_gdc :=_bdg .GetContentStream ();if _gdc !=nil {return _gdc ;};_fge :=_bdg .Resources ;if _fge ==nil {_fge =_cgd ;};_gdc =_fcf .extractContentStreamImages (string (_bc ),_fge );if _gdc !=nil {return _gdc ;
|
||
};_fcf ._eff ++;return nil ;};func (_bcaed rectRuling )asRuling ()(*ruling ,bool ){_ccca :=ruling {_fggg :_bcaed ._dfgc ,Color :_bcaed .Color ,_fccd :_ceeed };switch _bcaed ._dfgc {case _ggddf :_ccca ._gaeba =0.5*(_bcaed .Llx +_bcaed .Urx );_ccca ._gfce =_bcaed .Lly ;
|
||
_ccca ._aegc =_bcaed .Ury ;_eead ,_ddag :=_bcaed .checkWidth (_bcaed .Llx ,_bcaed .Urx );if !_ddag {if _bfgd {_ef .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_bcaed );
|
||
};return nil ,false ;};_ccca ._eaea =_eead ;case _bgge :_ccca ._gaeba =0.5*(_bcaed .Lly +_bcaed .Ury );_ccca ._gfce =_bcaed .Llx ;_ccca ._aegc =_bcaed .Urx ;_fgdd ,_dffg :=_bcaed .checkWidth (_bcaed .Lly ,_bcaed .Ury );if !_dffg {if _bfgd {_ef .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_bcaed );
|
||
};return nil ,false ;};_ccca ._eaea =_fgdd ;default:_ef .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_bcaed ._dfgc );return nil ,false ;};return &_ccca ,true ;};func _gdfce (_gceag ,_dggfd *textPara )bool {return _bcba (_gceag ._gcag ,_dggfd ._gcag )};
|
||
func _acfdc (_ffge _fb .PdfRectangle )rulingKind {_dabd :=_ffge .Width ();_fcgb :=_ffge .Height ();if _dabd > _fcgb {if _dabd >=_daac {return _bgge ;};}else {if _fcgb >=_daac {return _ggddf ;};};return _cgbd ;};func (_bbda *textLine )endsInHyphen ()bool {_cgag :=_bbda ._aacg [len (_bbda ._aacg )-1];
|
||
_fbd :=_cgag ._ecgc ;_cecc ,_caag :=_ge .DecodeLastRuneInString (_fbd );if _caag <=0||!_cd .Is (_cd .Hyphen ,_cecc ){return false ;};if _cgag ._fecff &&_fdbaf (_fbd ){return true ;};return _fdbaf (_bbda .text ());};func _bbdc (_cffg []int )[]int {_gacg :=make ([]int ,len (_cffg ));
|
||
for _ggcef ,_affb :=range _cffg {_gacg [len (_cffg )-1-_ggcef ]=_affb ;};return _gacg ;};
|
||
|
||
// String returns a description of `state`.
|
||
func (_aceb *textState )String ()string {_aecc :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _aceb ._dec !=nil {_aecc =_aceb ._dec .BaseFont ();};return _f .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_aceb ._dgae ,_aceb ._dgd ,_aceb ._gbgg ,_aecc );
|
||
};func (_ffeg *wordBag )depthBand (_addg ,_agege float64 )[]int {if len (_ffeg ._ecg )==0{return nil ;};return _ffeg .depthRange (_ffeg .getDepthIdx (_addg ),_ffeg .getDepthIdx (_agege ));};func (_ecf *stateStack )pop ()*textState {if _ecf .empty (){return nil ;
|
||
};_geeb :=*(*_ecf )[len (*_ecf )-1];*_ecf =(*_ecf )[:len (*_ecf )-1];return &_geeb ;};func _cddbb (_baab ,_afdae int )int {if _baab < _afdae {return _baab ;};return _afdae ;};func _bfaa (_bdcgb _fb .PdfRectangle )*ruling {return &ruling {_fggg :_bgge ,_gaeba :_bdcgb .Lly ,_gfce :_bdcgb .Llx ,_aegc :_bdcgb .Urx };
|
||
};
|
||
|
||
// Marks returns the TextMark collection for a page. It represents all the text on the page.
|
||
func (_agdaa PageText )Marks ()*TextMarkArray {return &TextMarkArray {_cbdg :_agdaa ._aebbg }};type pathSection struct{_acbfc []*subpath ;_b .Color ;};func (_aed *stateStack )push (_aaa *textState ){_agbf :=*_aaa ;*_aed =append (*_aed ,&_agbf )};func _ggdg (_bbbd []rulingList )(rulingList ,rulingList ){var _gagg rulingList ;
|
||
for _ ,_fbfab :=range _bbbd {_gagg =append (_gagg ,_fbfab ...);};return _gagg .vertsHorzs ();};func (_fdag *textPara )fontsize ()float64 {return _fdag ._eggb [0]._agbc };func (_bdcc rulingList )snapToGroupsDirection ()rulingList {_bdcc .sortStrict ();_adege :=make (map[*ruling ]rulingList ,len (_bdcc ));
|
||
_fffa :=_bdcc [0];_ebea :=func (_cdab *ruling ){_fffa =_cdab ;_adege [_fffa ]=rulingList {_cdab }};_ebea (_bdcc [0]);for _ ,_bfcb :=range _bdcc [1:]{if _bfcb ._gaeba < _fffa ._gaeba -_egac {_ef .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_fffa ,_bfcb );
|
||
};if _bfcb ._gaeba > _fffa ._gaeba +_bfefd {_ebea (_bfcb );}else {_adege [_fffa ]=append (_adege [_fffa ],_bfcb );};};_eecgb :=make (map[*ruling ]float64 ,len (_adege ));_ccfb :=make (map[*ruling ]*ruling ,len (_bdcc ));for _gebg ,_ecfae :=range _adege {_eecgb [_gebg ]=_ecfae .mergePrimary ();
|
||
for _ ,_fefg :=range _ecfae {_ccfb [_fefg ]=_gebg ;};};for _ ,_gabd :=range _bdcc {_gabd ._gaeba =_eecgb [_ccfb [_gabd ]];};_gccga :=make (rulingList ,0,len (_bdcc ));for _ ,_bffg :=range _adege {_agfg :=_bffg .splitSec ();for _bgbe ,_edff :=range _agfg {_baec :=_edff .merge ();
|
||
if len (_gccga )> 0{_abdgd :=_gccga [len (_gccga )-1];if _abdgd .alignsPrimary (_baec )&&_abdgd .alignsSec (_baec ){_ef .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_bgbe ,_abdgd ,_baec );
|
||
continue ;};};_gccga =append (_gccga ,_baec );};};_gccga .sortStrict ();return _gccga ;};func (_adbb *ruling )alignsPrimary (_gbdb *ruling )bool {return _adbb ._fggg ==_gbdb ._fggg &&_d .Abs (_adbb ._gaeba -_gbdb ._gaeba )< _bfefd *0.5;};func (_abc *textLine )bbox ()_fb .PdfRectangle {return _abc .PdfRectangle };
|
||
func (_dfcd rulingList )aligned ()bool {if len (_dfcd )< 2{return false ;};_abega :=make (map[*ruling ]int );_abega [_dfcd [0]]=0;for _ ,_gefad :=range _dfcd [1:]{_cfce :=false ;for _bdbbbd :=range _abega {if _gefad .gridIntersecting (_bdbbbd ){_abega [_bdbbbd ]++;
|
||
_cfce =true ;break ;};};if !_cfce {_abega [_gefad ]=0;};};_efcd :=0;for _ ,_dafge :=range _abega {if _dafge ==0{_efcd ++;};};_fadd :=float64 (_efcd )/float64 (len (_dfcd ));_bfddc :=_fadd <=1.0-_gdad ;if _fcffd {_ef .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_bfddc ,_fadd ,_efcd ,len (_dfcd ),_dfcd .String ());
|
||
};return _bfddc ;};func _cagc (_eabb []TextMark ,_eegg *int ,_degc string )[]TextMark {_ebba :=_adeg ;_ebba .Text =_degc ;return _baga (_eabb ,_eegg ,_ebba );};
|
||
|
||
// NewFromContents creates a new extractor from contents and page resources.
|
||
func NewFromContents (contents string ,resources *_fb .PdfPageResources )(*Extractor ,error ){const _af ="\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s";_ab :=&Extractor {_gd :contents ,_be :resources ,_aeb :map[string ]fontEntry {},_dc :map[string ]textResult {}};
|
||
_ac .TrackUse (_af );return _ab ,nil ;};
|
||
|
||
// Extractor stores and offers functionality for extracting content from PDF pages.
|
||
type Extractor struct{_gd string ;_be *_fb .PdfPageResources ;_ged _fb .PdfRectangle ;_aeb map[string ]fontEntry ;_dc map[string ]textResult ;_cf int64 ;_cg int ;};var (_bagd =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};
|
||
);func (_dcc *imageExtractContext )processOperand (_cace *_gef .ContentStreamOperation ,_bee _gef .GraphicsState ,_gcad *_fb .PdfPageResources )error {if _cace .Operand =="\u0042\u0049"&&len (_cace .Params )==1{_cbb ,_ffc :=_cace .Params [0].(*_gef .ContentStreamInlineImage );
|
||
if !_ffc {return nil ;};if _effe ,_abg :=_gc .GetBoolVal (_cbb .ImageMask );_abg {if _effe &&!_dcc ._fd .IncludeInlineStencilMasks {return nil ;};};return _dcc .extractInlineImage (_cbb ,_bee ,_gcad );}else if _cace .Operand =="\u0044\u006f"&&len (_cace .Params )==1{_eae ,_dg :=_gc .GetName (_cace .Params [0]);
|
||
if !_dg {_ef .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _bd ;};_ ,_ggd :=_gcad .GetXObjectByName (*_eae );switch _ggd {case _fb .XObjectTypeImage :return _dcc .extractXObjectImage (_eae ,_bee ,_gcad );case _fb .XObjectTypeForm :return _dcc .extractFormImages (_eae ,_bee ,_gcad );
|
||
};};return nil ;};func _eggg (_dbbcg ,_ecbfg int )uint64 {return uint64 (_dbbcg )*0x1000000+uint64 (_ecbfg )};func (_ccba *textObject )setTextMatrix (_gfde []float64 ){if len (_gfde )!=6{_ef .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_gfde ));
|
||
return ;};_fdf ,_edacc ,_abgd ,_ccbe ,_deb ,_dbe :=_gfde [0],_gfde [1],_gfde [2],_gfde [3],_gfde [4],_gfde [5];_ccba ._gfc =_cb .NewMatrix (_fdf ,_edacc ,_abgd ,_ccbe ,_deb ,_dbe );_ccba ._fbgc =_ccba ._gfc ;};func (_adga *subpath )close (){if !_badbe (_adga ._bdfbe [0],_adga .last ()){_adga .add (_adga ._bdfbe [0]);
|
||
};_adga ._fbbd =true ;_adga .removeDuplicates ();};func (_abde *wordBag )applyRemovals (_acea map[int ]map[*textWord ]struct{}){for _ccde ,_cgcc :=range _acea {if len (_cgcc )==0{continue ;};_bbcg :=_abde ._ecg [_ccde ];_dgbb :=len (_bbcg )-len (_cgcc );
|
||
if _dgbb ==0{delete (_abde ._ecg ,_ccde );continue ;};_gaf :=make ([]*textWord ,_dgbb );_gbcc :=0;for _ ,_efaf :=range _bbcg {if _ ,_cga :=_cgcc [_efaf ];!_cga {_gaf [_gbcc ]=_efaf ;_gbcc ++;};};_abde ._ecg [_ccde ]=_gaf ;};};const (_egac =1.0e-6;_ecaf =1.0e-4;
|
||
_egaa =10;_eeaf =6;_acge =0.5;_dfad =0.12;_bdge =0.19;_gdaf =0.04;_eecf =0.04;_fcce =1.0;_afb =0.04;_gaga =0.4;_fabg =0.7;_dcde =1.0;_cbfbf =0.1;_bbaf =1.4;_dgbg =0.46;_gffb =0.02;_fbf =0.2;_ddga =0.5;_dfff =4;_gegg =4.0;_eggc =6;_acdad =0.3;_ebcc =0.01;
|
||
_cgdf =0.02;_gede =2;_cfae =2;_edb =500;_daac =4.0;_gcec =4.0;_fecb =0.05;_caecd =0.1;_bdfd =2.0;_bfefd =2.0;_ccag =1.5;_dbdb =3.0;_gdad =0.25;);func (_gggg gridTile )contains (_afdd _fb .PdfRectangle )bool {if _gggg .numBorders ()< 3{return false ;};if _gggg ._adgdg &&_afdd .Llx < _gggg .Llx -_ccag {return false ;
|
||
};if _gggg ._bdgc &&_afdd .Urx > _gggg .Urx +_ccag {return false ;};if _gggg ._cbee &&_afdd .Lly < _gggg .Lly -_ccag {return false ;};if _gggg ._fga &&_afdd .Ury > _gggg .Ury +_ccag {return false ;};return true ;};func (_degfb *shapesState )drawRectangle (_dfcb ,_ebce ,_bcea ,_bcfc float64 ){if _fafc {_baef :=_degfb .devicePoint (_dfcb ,_ebce );
|
||
_dee :=_degfb .devicePoint (_dfcb +_bcea ,_ebce +_bcfc );_adb :=_fb .PdfRectangle {Llx :_baef .X ,Lly :_baef .Y ,Urx :_dee .X ,Ury :_dee .Y };_ef .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_adb );
|
||
};_degfb .newSubPath ();_degfb .moveTo (_dfcb ,_ebce );_degfb .lineTo (_dfcb +_bcea ,_ebce );_degfb .lineTo (_dfcb +_bcea ,_ebce +_bcfc );_degfb .lineTo (_dfcb ,_ebce +_bcfc );_degfb .closePath ();};
|
||
|
||
// ToText returns the page text as a single string.
|
||
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
|
||
// Text() instead.
|
||
func (_geag PageText )ToText ()string {return _geag .Text ()};type rulingList []*ruling ;func (_dfe *textObject )showTextAdjusted (_bffb *_gc .PdfObjectArray )error {_ffe :=false ;for _ ,_cbf :=range _bffb .Elements (){switch _cbf .(type ){case *_gc .PdfObjectFloat ,*_gc .PdfObjectInteger :_bef ,_dgc :=_gc .GetNumberAsFloat (_cbf );
|
||
if _dgc !=nil {_ef .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_cbf ,_bffb );
|
||
return _dgc ;};_afg ,_accg :=-_bef *0.001*_dfe ._ebag ._gbgg ,0.0;if _ffe {_accg ,_afg =_afg ,_accg ;};_bac :=_cgef (_cb .Point {X :_afg ,Y :_accg });_dfe ._gfc .Concat (_bac );case *_gc .PdfObjectString :_ggg ,_gcde :=_gc .GetStringBytes (_cbf );if !_gcde {_ef .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_cbf ,_bffb );
|
||
return _gc .ErrTypeError ;};_dfe .renderText (_ggg );default:_ef .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_cbf ,_bffb );
|
||
return _gc .ErrTypeError ;};};return nil ;};func _bcgc (_acefb _fb .PdfRectangle ,_ccgd bounded )float64 {return _acefb .Ury -_ccgd .bbox ().Lly };func (_aadff rulingList )snapToGroups ()rulingList {_bfbbc ,_cdgg :=_aadff .vertsHorzs ();if len (_bfbbc )> 0{_bfbbc =_bfbbc .snapToGroupsDirection ();
|
||
};if len (_cdgg )> 0{_cdgg =_cdgg .snapToGroupsDirection ();};_gbcgg :=append (_bfbbc ,_cdgg ...);_gbcgg .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");return _gbcgg ;};func (_ebefc *textPara )bbox ()_fb .PdfRectangle {return _ebefc .PdfRectangle };
|
||
func _cdeea (_fggad ,_fdeb _cb .Point )bool {_aaeb :=_d .Abs (_fggad .X -_fdeb .X );_eeece :=_d .Abs (_fggad .Y -_fdeb .Y );return _dbef (_aaeb ,_eeece );};func (_ebda *wordBag )text ()string {_acdb :=_ebda .allWords ();_ecaa :=make ([]string ,len (_acdb ));
|
||
for _fafe ,_bagf :=range _acdb {_ecaa [_fafe ]=_bagf ._ecgc ;};return _ag .Join (_ecaa ,"\u0020");};func _bbbb (_aeeed float64 ,_bdfac int )int {if _bdfac ==0{_bdfac =1;};_bfaf :=float64 (_bdfac );return int (_d .Round (_aeeed /_bfaf )*_bfaf );};func _eecd (_gebd float64 )bool {return _d .Abs (_gebd )< _bfefd };
|
||
func (_dbgeg rulingList )findPrimSec (_cbfgf ,_fbec float64 )*ruling {for _ ,_fcacg :=range _dbgeg {if _gdga (_fcacg ._gaeba -_cbfgf )&&_fcacg ._gfce -_bdfd <=_fbec &&_fbec <=_fcacg ._aegc +_bdfd {return _fcacg ;};};return nil ;};func _adbd (_bbg ,_fefb _fb .PdfRectangle )bool {return _bbg .Lly <=_fefb .Ury &&_fefb .Lly <=_bbg .Ury };
|
||
func (_eacb *wordBag )removeDuplicates (){if _beca {_ef .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_eacb .text ());};for _ ,_bbdd :=range _eacb .depthIndexes (){if len (_eacb ._ecg [_bbdd ])==0{continue ;
|
||
};_efeeg :=_eacb ._ecg [_bbdd ][0];_edfe :=_fbf *_efeeg ._bgfca ;_aadd :=_efeeg ._afcda ;for _ ,_bgag :=range _eacb .depthBand (_aadd ,_aadd +_edfe ){_gafe :=map[*textWord ]struct{}{};_dcbb :=_eacb ._ecg [_bgag ];for _ ,_egcg :=range _dcbb {if _ ,_cffb :=_gafe [_egcg ];
|
||
_cffb {continue ;};for _ ,_abee :=range _dcbb {if _ ,_cagb :=_gafe [_abee ];_cagb {continue ;};if _abee !=_egcg &&_abee ._ecgc ==_egcg ._ecgc &&_d .Abs (_abee .Llx -_egcg .Llx )< _edfe &&_d .Abs (_abee .Urx -_egcg .Urx )< _edfe &&_d .Abs (_abee .Lly -_egcg .Lly )< _edfe &&_d .Abs (_abee .Ury -_egcg .Ury )< _edfe {_gafe [_abee ]=struct{}{};
|
||
};};};if len (_gafe )> 0{_egad :=0;for _ ,_eeeb :=range _dcbb {if _ ,_gdea :=_gafe [_eeeb ];!_gdea {_dcbb [_egad ]=_eeeb ;_egad ++;};};_eacb ._ecg [_bgag ]=_dcbb [:len (_dcbb )-len (_gafe )];if len (_eacb ._ecg [_bgag ])==0{delete (_eacb ._ecg ,_bgag );
|
||
};};};};};type gridTiling struct{_fb .PdfRectangle ;_acfggbg []float64 ;_agdf []float64 ;_efgf map[float64 ]map[float64 ]gridTile ;};type fontEntry struct{_acbg *_fb .PdfFont ;_fea int64 ;};type lineRuling struct{_bdde rulingKind ;_efea markKind ;_b .Color ;
|
||
_fbfg ,_cade _cb .Point ;};func (_gbad *shapesState )addPoint (_cbfg ,_cgdea float64 ){_bbfa :=_gbad .establishSubpath ();_egg :=_gbad .devicePoint (_cbfg ,_cgdea );if _bbfa ==nil {_gbad ._eef =true ;_gbad ._caad =_egg ;}else {_bbfa .add (_egg );};};func (_dbgf *textObject )getCurrentFont ()*_fb .PdfFont {var _feef *_fb .PdfFont ;
|
||
if !_dbgf ._gbaa .empty (){_feef =_dbgf ._gbaa .top ()._dec ;};if _feef ==nil {_ef .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e");
|
||
return _fb .DefaultFont ();};return _feef ;};func (_fcbg *textTable )markCells (){for _gbcde :=0;_gbcde < _fcbg ._ecbc ;_gbcde ++{for _fgbg :=0;_fgbg < _fcbg ._acebgf ;_fgbg ++{_afda :=_fcbg .get (_fgbg ,_gbcde );if _afda !=nil {_afda ._gebc =true ;};};
|
||
};};func (_cbcdd gridTile )complete ()bool {return _cbcdd .numBorders ()==4};
|
||
|
||
// String returns a description of `t`.
|
||
func (_ebacd *textTable )String ()string {return _f .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_ebacd ._acebgf ,_ebacd ._ecbc ,_ebacd ._cbbaf );};type compositeCell struct{_fb .PdfRectangle ;paraList ;};type textLine struct{_fb .PdfRectangle ;
|
||
_fdfed float64 ;_aacg []*textWord ;_agbc float64 ;};func (_bgcf rulingList )connections (_eddf map[int ]intSet ,_ffba int )intSet {_aeaee :=make (intSet );_befgc :=make (intSet );var _dfdgc func (int );_dfdgc =func (_dccf int ){if !_befgc .has (_dccf ){_befgc .add (_dccf );
|
||
for _fcba :=range _bgcf {if _eddf [_fcba ].has (_dccf ){_aeaee .add (_fcba );};};for _cegaf :=range _bgcf {if _aeaee .has (_cegaf ){_dfdgc (_cegaf );};};};};_dfdgc (_ffba );return _aeaee ;};func _aega (_geeg []pathSection )rulingList {_edda (_geeg );if _fcffd {_ef .Log .Info ("\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs",len (_geeg ));
|
||
};var _bffd rulingList ;for _ ,_edec :=range _geeg {for _ ,_gffd :=range _edec ._acbfc {if !_gffd .isQuadrilateral (){if _fcffd {_ef .Log .Error ("!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073",_gffd );
|
||
};continue ;};if _gbbg ,_acefg :=_gffd .makeRectRuling (_edec .Color );_acefg {_bffd =append (_bffd ,_gbbg );}else {if _bfgd {_ef .Log .Error ("\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073",_gffd );
|
||
};};};};if _fcffd {_ef .Log .Info ("\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073",_bffd .String ());};return _bffd ;};func (_cgefb paraList )xNeighbours (_dcafd float64 )map[*textPara ][]int {_deee :=make ([]event ,2*len (_cgefb ));
|
||
if _dcafd ==0{for _bcbc ,_fegbdf :=range _cgefb {_deee [2*_bcbc ]=event {_fegbdf .Llx ,true ,_bcbc };_deee [2*_bcbc +1]=event {_fegbdf .Urx ,false ,_bcbc };};}else {for _eggd ,_fefbd :=range _cgefb {_deee [2*_eggd ]=event {_fefbd .Llx -_dcafd *_fefbd .fontsize (),true ,_eggd };
|
||
_deee [2*_eggd +1]=event {_fefbd .Urx +_dcafd *_fefbd .fontsize (),false ,_eggd };};};return _cgefb .eventNeighbours (_deee );};func (_aece *textTable )compositeRowCorridors ()map[int ][]float64 {_efdfd :=make (map[int ][]float64 ,_aece ._ecbc );if _dcdd {_ef .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_aece ._ecbc );
|
||
};for _ggggd :=1;_ggggd < _aece ._ecbc ;_ggggd ++{var _adfbb []compositeCell ;for _aaca :=0;_aaca < _aece ._acebgf ;_aaca ++{if _egebg ,_bfag :=_aece ._accd [_eggg (_aaca ,_ggggd )];_bfag {_adfbb =append (_adfbb ,_egebg );};};if len (_adfbb )==0{continue ;
|
||
};_dgac :=_egacc (_adfbb );_efdfd [_ggggd ]=_dgac ;if _dcdd {_f .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_ggggd ,_dgac );};};return _efdfd ;};func (_dddd *textTable )reduceTiling (_cgaf gridTiling ,_bgfc float64 )*textTable {_cacdg :=make ([]int ,0,_dddd ._ecbc );
|
||
_gffdc :=make ([]int ,0,_dddd ._acebgf );_abad :=_cgaf ._acfggbg ;_cced :=_cgaf ._agdf ;for _gbbdg :=0;_gbbdg < _dddd ._ecbc ;_gbbdg ++{_bdef :=_gbbdg > 0&&_d .Abs (_cced [_gbbdg -1]-_cced [_gbbdg ])< _bgfc &&_dddd .emptyRow (_gbbdg );if !_bdef {_cacdg =append (_cacdg ,_gbbdg );
|
||
};};for _dgfac :=0;_dgfac < _dddd ._acebgf ;_dgfac ++{_bcdbc :=_dgfac < _dddd ._acebgf -1&&_d .Abs (_abad [_dgfac +1]-_abad [_dgfac ])< _bgfc &&_dddd .emptyColumn (_dgfac );if !_bcdbc {_gffdc =append (_gffdc ,_dgfac );};};if len (_cacdg )==_dddd ._ecbc &&len (_gffdc )==_dddd ._acebgf {return _dddd ;
|
||
};_eaceb :=textTable {_cbbaf :_dddd ._cbbaf ,_acebgf :len (_gffdc ),_ecbc :len (_cacdg ),_accd :make (map[uint64 ]compositeCell ,len (_gffdc )*len (_cacdg ))};if _dcdd {_ef .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_dddd ._acebgf ,_dddd ._ecbc ,len (_gffdc ),len (_cacdg ));
|
||
_ef .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_gffdc );_ef .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_cacdg );};for _cfgd ,_eecdd :=range _cacdg {for _addfc ,_ddfd :=range _gffdc {_cbaa ,_aaea :=_dddd .getComposite (_ddfd ,_eecdd );
|
||
if len (_cbaa )==0{continue ;};if _dcdd {_f .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_addfc ,_cfgd ,_ddfd ,_eecdd ,_debb (_cbaa .merge ().text (),50));};_eaceb .putComposite (_addfc ,_cfgd ,_cbaa ,_aaea );
|
||
};};return &_eaceb ;};type textPara struct{_fb .PdfRectangle ;_gcag _fb .PdfRectangle ;_eggb []*textLine ;_ebdd *textTable ;_gebc bool ;_gbcbg bool ;_bddg *textPara ;_aecb *textPara ;_fgbaf *textPara ;_cggd *textPara ;};func _fadbe (_fcdg []*textMark ,_debae _fb .PdfRectangle )*textWord {_fafcg :=_fcdg [0].PdfRectangle ;
|
||
_daba :=_fcdg [0]._gaea ;for _ ,_abfb :=range _fcdg [1:]{_fafcg =_aeee (_fafcg ,_abfb .PdfRectangle );if _abfb ._gaea > _daba {_daba =_abfb ._gaea ;};};return &textWord {PdfRectangle :_fafcg ,_aegfa :_fcdg ,_afcda :_debae .Ury -_fafcg .Lly ,_bgfca :_daba };
|
||
};type textObject struct{_afgf *Extractor ;_bcb *_fb .PdfPageResources ;_gbf _gef .GraphicsState ;_ebag *textState ;_gbaa *stateStack ;_gfc _cb .Matrix ;_fbgc _cb .Matrix ;_faf []*textMark ;_cgde bool ;};
|
||
|
||
// String returns a description of `k`.
|
||
func (_aede markKind )String ()string {_bfab ,_cbfa :=_geec [_aede ];if !_cbfa {return _f .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_aede );};return _bfab ;};func _aedec (_gedb []*textWord ,_fcdc int )[]*textWord {_ebddc :=len (_gedb );
|
||
copy (_gedb [_fcdc :],_gedb [_fcdc +1:]);return _gedb [:_ebddc -1];};
|
||
|
||
// TextMarkArray is a collection of TextMarks.
|
||
type TextMarkArray struct{_cbdg []TextMark };func (_geggd paraList )tables ()[]TextTable {var _cedc []TextTable ;if _dcdd {_ef .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a");};for _ ,_gfgbf :=range _geggd {_gadb :=_gfgbf ._ebdd ;
|
||
if _gadb !=nil &&_gadb .isExportable (){_cedc =append (_cedc ,_gadb .toTextTable ());};};return _cedc ;};
|
||
|
||
// ImageExtractOptions contains options for controlling image extraction from
|
||
// PDF pages.
|
||
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};func _fagce (_edbg ,_gcfbf _cb .Point )bool {_bcce :=_d .Abs (_edbg .X -_gcfbf .X );_fdac :=_d .Abs (_edbg .Y -_gcfbf .Y );return _dbef (_fdac ,_bcce );};func (_eeaab *wordBag )firstWord (_ecfa int )*textWord {return _eeaab ._ecg [_ecfa ][0]};
|
||
func _gdga (_ffgd float64 )bool {return _d .Abs (_ffgd )< _egac };var _geec =map[markKind ]string {_aaed :"\u0073\u0074\u0072\u006f\u006b\u0065",_ceeed :"\u0066\u0069\u006c\u006c",_acfgg :"\u0061u\u0067\u006d\u0065\u006e\u0074"};func (_ggeg *textPara )toCellTextMarks (_dcgc *int )[]TextMark {var _eeef []TextMark ;
|
||
for _aafg ,_cefc :=range _ggeg ._eggb {_cdcf :=_cefc .toTextMarks (_dcgc );_bfbg :=_cbef &&_cefc .endsInHyphen ()&&_aafg !=len (_ggeg ._eggb )-1;if _bfbg {_cdcf =_cbeab (_cdcf ,_dcgc );};_eeef =append (_eeef ,_cdcf ...);if !(_bfbg ||_aafg ==len (_ggeg ._eggb )-1){_eeef =_cagc (_eeef ,_dcgc ,_ddad (_cefc ._fdfed ,_ggeg ._eggb [_aafg +1]._fdfed ));
|
||
};};return _eeef ;};
|
||
|
||
// ToTextMark returns the public view of `tm`.
|
||
func (_cdgb *textMark )ToTextMark ()TextMark {return TextMark {Text :_cdgb ._dfed ,Original :_cdgb ._egga ,BBox :_cdgb ._dgece ,Font :_cdgb ._gcdg ,FontSize :_cdgb ._gaea ,FillColor :_cdgb ._bgec ,StrokeColor :_cdgb ._efee ,Orientation :_cdgb ._ddbf };
|
||
};func (_ceed *textPara )writeCellText (_gfedcc _c .Writer ){for _cafa ,_bbgg :=range _ceed ._eggb {_gbb :=_bbgg .text ();_fabe :=_cbef &&_bbgg .endsInHyphen ()&&_cafa !=len (_ceed ._eggb )-1;if _fabe {_gbb =_abga (_gbb );};_gfedcc .Write ([]byte (_gbb ));
|
||
if !(_fabe ||_cafa ==len (_ceed ._eggb )-1){_gfedcc .Write ([]byte (_ddad (_bbgg ._fdfed ,_ceed ._eggb [_cafa +1]._fdfed )));};};};func (_effa rulingList )log (_bdgee string ){if !_fcffd {return ;};_ef .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_bdgee ,_effa .String ());
|
||
for _agca ,_agfa :=range _effa {_f .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_agca ,_agfa .String ());};};type imageExtractContext struct{_ba []ImageMark ;_eg int ;_gfe int ;_eff int ;_cac map[*_gc .PdfObjectStream ]*cachedImage ;_fd *ImageExtractOptions ;
|
||
};func (_fdd *shapesState )devicePoint (_gebe ,_fef float64 )_cb .Point {_efbb :=_fdd ._gaba .Mult (_fdd ._dcd );_gebe ,_fef =_efbb .Transform (_gebe ,_fef );return _cb .NewPoint (_gebe ,_fef );};func (_deea *wordBag )pullWord (_agee *textWord ,_gbd int ,_gfed map[int ]map[*textWord ]struct{}){_deea .PdfRectangle =_aeee (_deea .PdfRectangle ,_agee .PdfRectangle );
|
||
if _agee ._bgfca > _deea ._fdb {_deea ._fdb =_agee ._bgfca ;};_deea ._ecg [_gbd ]=append (_deea ._ecg [_gbd ],_agee );_gfed [_gbd ][_agee ]=struct{}{};};func _dca (_cbg *wordBag ,_gbgf int )*textLine {_dbcc :=_cbg .firstWord (_gbgf );_adbdb :=textLine {PdfRectangle :_dbcc .PdfRectangle ,_agbc :_dbcc ._bgfca ,_fdfed :_dbcc ._afcda };
|
||
_adbdb .pullWord (_cbg ,_dbcc ,_gbgf );return &_adbdb ;};func (_gedeg *textLine )text ()string {var _dfgg []string ;for _ ,_geea :=range _gedeg ._aacg {if _geea ._fecff {_dfgg =append (_dfgg ,"\u0020");};_dfgg =append (_dfgg ,_geea ._ecgc );};return _ag .Join (_dfgg ,"");
|
||
};func _cgef (_gaa _cb .Point )_cb .Matrix {return _cb .TranslationMatrix (_gaa .X ,_gaa .Y )};func (_eacd *textTable )logComposite (_dbdfg string ){if !_dcdd {return ;};_ef .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_eacd ._acebgf ,_eacd ._ecbc ,_dbdfg );
|
||
_f .Printf ("\u0025\u0035\u0073 \u007c","");for _gfedg :=0;_gfedg < _eacd ._acebgf ;_gfedg ++{_f .Printf ("\u0025\u0033\u0064 \u007c",_gfedg );};_f .Println ("");_f .Printf ("\u0025\u0035\u0073 \u002b","");for _dcca :=0;_dcca < _eacd ._acebgf ;_dcca ++{_f .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d");
|
||
};_f .Println ("");for _cdbfg :=0;_cdbfg < _eacd ._ecbc ;_cdbfg ++{_f .Printf ("\u0025\u0035\u0064 \u007c",_cdbfg );for _cddb :=0;_cddb < _eacd ._acebgf ;_cddb ++{_adbaf ,_ :=_eacd ._accd [_eggg (_cddb ,_cdbfg )].parasBBox ();_f .Printf ("\u0025\u0033\u0064 \u007c",len (_adbaf ));
|
||
};_f .Println ("");};_ef .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_eacd ._acebgf ,_eacd ._ecbc ,_dbdfg );_f .Printf ("\u0025\u0035\u0073 \u007c","");for _adaag :=0;_adaag < _eacd ._acebgf ;_adaag ++{_f .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_adaag );
|
||
};_f .Println ("");_f .Printf ("\u0025\u0035\u0073 \u002b","");for _egff :=0;_egff < _eacd ._acebgf ;_egff ++{_f .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");};_f .Println ("");for _bgfb :=0;_bgfb < _eacd ._ecbc ;
|
||
_bgfb ++{_f .Printf ("\u0025\u0035\u0064 \u007c",_bgfb );for _fadcc :=0;_fadcc < _eacd ._acebgf ;_fadcc ++{_gdafa ,_ :=_eacd ._accd [_eggg (_fadcc ,_bgfb )].parasBBox ();_bdbf :="";_ffce :=_gdafa .merge ();if _ffce !=nil {_bdbf =_ffce .text ();};_bdbf =_f .Sprintf ("\u0025\u0071",_debb (_bdbf ,12));
|
||
_bdbf =_bdbf [1:len (_bdbf )-1];_f .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_bdbf );};_f .Println ("");};};func _ecff (_fdfcf []*textMark ,_dfbe _fb .PdfRectangle )[]*textWord {var _ccagg []*textWord ;var _ecedf *textWord ;if _ceee {_ef .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_fdfcf ));
|
||
};_fcge :=func (){if _ecedf !=nil {_adgea :=_ecedf .computeText ();if !_daaae (_adgea ){_ecedf ._ecgc =_adgea ;_ccagg =append (_ccagg ,_ecedf );if _ceee {_ef .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_ccagg )-1,_ecedf .String ());
|
||
for _agcad ,_gfedfg :=range _ecedf ._aegfa {_f .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_agcad ,_gfedfg .String ());};};};_ecedf =nil ;};};for _ ,_dfgga :=range _fdfcf {if _eebg &&_ecedf !=nil &&len (_ecedf ._aegfa )> 0{_aedc :=_ecedf ._aegfa [len (_ecedf ._aegfa )-1];
|
||
_feff ,_cfcgd :=_eced (_dfgga ._dfed );_dagc ,_gfdb :=_eced (_aedc ._dfed );if _cfcgd &&!_gfdb &&_aedc .inDiacriticArea (_dfgga ){_ecedf .addDiacritic (_feff );continue ;};if _gfdb &&!_cfcgd &&_dfgga .inDiacriticArea (_aedc ){_ecedf ._aegfa =_ecedf ._aegfa [:len (_ecedf ._aegfa )-1];
|
||
_ecedf .appendMark (_dfgga ,_dfbe );_ecedf .addDiacritic (_dagc );continue ;};};_aaagc :=_daaae (_dfgga ._dfed );if _aaagc {_fcge ();continue ;};if _ecedf ==nil &&!_aaagc {_ecedf =_fadbe ([]*textMark {_dfgga },_dfbe );continue ;};_aacde :=_ecedf ._bgfca ;
|
||
_fddd :=_d .Abs (_bcgc (_dfbe ,_dfgga )-_ecedf ._afcda )/_aacde ;_gfedff :=_efdb (_dfgga ,_ecedf )/_aacde ;if _gfedff >=_dfad ||!(-_bdge <=_gfedff &&_fddd <=_gdaf ){_fcge ();_ecedf =_fadbe ([]*textMark {_dfgga },_dfbe );continue ;};_ecedf .appendMark (_dfgga ,_dfbe );
|
||
};_fcge ();return _ccagg ;};func (_ebfc rulingList )tidied (_dgaf string )rulingList {_ddcb :=_ebfc .removeDuplicates ();_ddcb .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");_fefab :=_ddcb .snapToGroups ();if _fefab ==nil {return nil ;};_fefab .sort ();
|
||
if _fcffd {_ef .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_dgaf ,len (_ebfc ),len (_ddcb ),len (_fefab ));
|
||
};_fefab .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _fefab ;};func _afga (_eebc ,_gded bounded )float64 {_adgeb :=_bfefa (_eebc ,_gded );if !_gdga (_adgeb ){return _adgeb ;};return _cfcg (_eebc ,_gded );};const (_cbad =false ;_ceee =false ;
|
||
_ddee =false ;_abgf =false ;_fafc =false ;_beag =false ;_bfde =false ;_cfeg =false ;_dgeb =false ;_ccbef =_dgeb &&true ;_dbfd =_ccbef &&false ;_beca =_dgeb &&true ;_dcdd =false ;_cggg =_dcdd &&false ;_fbbb =_dcdd &&true ;_fcffd =false ;_bdee =_fcffd &&false ;
|
||
_cff =_fcffd &&false ;_cgdd =_fcffd &&true ;_bfgd =_fcffd &&false ;_caf =_fcffd &&false ;);func _fdbaf (_ebeg string )bool {if _ge .RuneCountInString (_ebeg )< _dfff {return false ;};_cdcc ,_acad :=_ge .DecodeLastRuneInString (_ebeg );if _acad <=0||!_cd .Is (_cd .Hyphen ,_cdcc ){return false ;
|
||
};_cdcc ,_acad =_ge .DecodeLastRuneInString (_ebeg [:len (_ebeg )-_acad ]);return _acad > 0&&!_cd .IsSpace (_cdcc );};
|
||
|
||
// ExtractText processes and extracts all text data in content streams and returns as a string.
|
||
// It takes into account character encodings in the PDF file, which are decoded by
|
||
// CharcodeBytesToUnicode.
|
||
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
|
||
func (_ffa *Extractor )ExtractText ()(string ,error ){_ffg ,_ ,_ ,_fae :=_ffa .ExtractTextWithStats ();return _ffg ,_fae ;};func (_fdde *wordBag )maxDepth ()float64 {return _fdde ._cbea -_fdde .Lly };func (_dced paraList )yNeighbours (_bbbbb float64 )map[*textPara ][]int {_ccda :=make ([]event ,2*len (_dced ));
|
||
if _bbbbb ==0{for _cbffd ,_afccdb :=range _dced {_ccda [2*_cbffd ]=event {_afccdb .Lly ,true ,_cbffd };_ccda [2*_cbffd +1]=event {_afccdb .Ury ,false ,_cbffd };};}else {for _caac ,_bbdf :=range _dced {_ccda [2*_caac ]=event {_bbdf .Lly -_bbbbb *_bbdf .fontsize (),true ,_caac };
|
||
_ccda [2*_caac +1]=event {_bbdf .Ury +_bbbbb *_bbdf .fontsize (),false ,_caac };};};return _dced .eventNeighbours (_ccda );};func _dbef (_gafd ,_dag float64 )bool {return _gafd /_d .Max (_caecd ,_dag )< _fecb };func _fage (_efae ,_ega bounded )float64 {_cgbf :=_cfcg (_efae ,_ega );
|
||
if !_gdga (_cgbf ){return _cgbf ;};return _bfefa (_efae ,_ega );};func _cfcg (_eece ,_adfd bounded )float64 {return _fab (_eece )-_fab (_adfd )};var _adeg =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_b .White ,StrokeColor :_b .White };
|
||
func (_dbdf compositeCell )hasLines (_bfdb []*textLine )bool {for _abeg ,_cfaf :=range _bfdb {_efgge :=_fabf (_dbdf .PdfRectangle ,_cfaf .PdfRectangle );if _dcdd {_f .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a",_efgge ,_abeg ,len (_bfdb ));
|
||
_f .Printf ("\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a",_dbdf );_f .Printf ("\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a",_cfaf );};if _efgge {return true ;
|
||
};};return false ;};func (_ceb *textObject )moveTextSetLeading (_gcba ,_dfc float64 ){_ceb ._ebag ._deg =-_dfc ;_ceb .moveLP (_gcba ,_dfc );};
|
||
|
||
// Text returns the extracted page text.
|
||
func (_afa PageText )Text ()string {return _afa ._fbeg };const (RenderModeStroke RenderMode =1<<iota ;RenderModeFill ;RenderModeClip ;);
|
||
|
||
// ApplyArea processes the page text only within the specified area `bbox`.
|
||
// Each time ApplyArea is called, it updates the result set in `pt`.
|
||
// Can be called multiple times in a row with different bounding boxes.
|
||
func (_gcgc *PageText )ApplyArea (bbox _fb .PdfRectangle ){_aadc :=make ([]*textMark ,0,len (_gcgc ._bda ));for _ ,_gde :=range _gcgc ._bda {if _fabf (_gde .bbox (),bbox ){_aadc =append (_aadc ,_gde );};};var _gge paraList ;_gfdef :=len (_aadc );for _gcfb :=0;
|
||
_gcfb < 360&&_gfdef > 0;_gcfb +=90{_befg :=make ([]*textMark ,0,len (_aadc )-_gfdef );for _ ,_ebe :=range _aadc {if _ebe ._ddbf ==_gcfb {_befg =append (_befg ,_ebe );};};if len (_befg )> 0{_bgc :=_bbfc (_befg ,_gcgc ._acbf ,nil ,nil );_gge =append (_gge ,_bgc ...);
|
||
_gfdef -=len (_befg );};};_gcded :=new (_aga .Buffer );_gge .writeText (_gcded );_gcgc ._fbeg =_gcded .String ();_gcgc ._aebbg =_gge .toTextMarks ();_gcgc ._caed =_gge .tables ();};func _gfaa (_ccce string ,_dcda []rulingList ){_ef .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_dcda ),_ccce );
|
||
for _bgad ,_fdafb :=range _dcda {_f .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bgad ,_fdafb .String ());};};func (_abea *subpath )removeDuplicates (){if len (_abea ._bdfbe )==0{return ;};_bedb :=[]_cb .Point {_abea ._bdfbe [0]};for _ ,_gfda :=range _abea ._bdfbe [1:]{if !_badbe (_gfda ,_bedb [len (_bedb )-1]){_bedb =append (_bedb ,_gfda );
|
||
};};_abea ._bdfbe =_bedb ;};type textMark struct{_fb .PdfRectangle ;_ddbf int ;_dfed string ;_egga string ;_gcdg *_fb .PdfFont ;_gaea float64 ;_dcaf float64 ;_ebafa _cb .Matrix ;_debce _cb .Point ;_dgece _fb .PdfRectangle ;_bgec _b .Color ;_efee _b .Color ;
|
||
};func (_ageec *textTable )growTable (){_adfbe :=func (_bcceb paraList ){_ageec ._ecbc ++;for _ceca :=0;_ceca < _ageec ._acebgf ;_ceca ++{_cbfbff :=_bcceb [_ceca ];_ageec .put (_ceca ,_ageec ._ecbc -1,_cbfbff );};};_adca :=func (_cafd paraList ){_ageec ._acebgf ++;
|
||
for _eaef :=0;_eaef < _ageec ._ecbc ;_eaef ++{_dacee :=_cafd [_eaef ];_ageec .put (_ageec ._acebgf -1,_eaef ,_dacee );};};if _cggg {_ageec .log ("\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce");};for _eedea :=0;;_eedea ++{_gbcfb :=false ;_cbgg :=_ageec .getDown ();
|
||
_agga :=_ageec .getRight ();if _cggg {_f .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_eedea ,_ageec );_f .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a",_cbgg );_f .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a",_agga );
|
||
};if _cbgg !=nil &&_agga !=nil {_bggf :=_cbgg [len (_cbgg )-1];if _bggf !=nil &&!_bggf ._gebc &&_bggf ==_agga [len (_agga )-1]{_adfbe (_cbgg );if _agga =_ageec .getRight ();_agga !=nil {_adca (_agga );_ageec .put (_ageec ._acebgf -1,_ageec ._ecbc -1,_bggf );
|
||
};_gbcfb =true ;};};if !_gbcfb &&_cbgg !=nil {_adfbe (_cbgg );_gbcfb =true ;};if !_gbcfb &&_agga !=nil {_adca (_agga );_gbcfb =true ;};if !_gbcfb {break ;};};};
|
||
|
||
// Elements returns the TextMarks in `ma`.
|
||
func (_dfcf *TextMarkArray )Elements ()[]TextMark {return _dfcf ._cbdg };func (_cbbf *Extractor )extractPageText (_beac string ,_df *_fb .PdfPageResources ,_eda _cb .Matrix ,_ebg int )(*PageText ,int ,int ,error ){_ef .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_ebg );
|
||
_bcf :=&PageText {_acbf :_cbbf ._ged };_ee :=_aebf (_cbbf ._ged );_gfg :=stateStack {&_ee };_acb :=_faea (_cbbf ,_df ,_gef .GraphicsState {},&_ee ,&_gfg );_faee :=shapesState {_gaba :_eda ,_dcd :_cb .IdentityMatrix (),_fgec :_acb };var _fbc bool ;if _ebg > _gcd {_eca :=_a .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");
|
||
_ef .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_ebg ,_eca );
|
||
return _bcf ,_ee ._egcee ,_ee ._ecac ,_eca ;};_bae :=_gef .NewContentStreamParser (_beac );_dbd ,_agb :=_bae .Parse ();if _agb !=nil {_ef .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_agb );
|
||
return _bcf ,_ee ._egcee ,_ee ._ecac ,_agb ;};_ggb :=_gef .NewContentStreamProcessor (*_dbd );_ggb .AddHandler (_gef .HandlerConditionEnumAllOperands ,"",func (_fee *_gef .ContentStreamOperation ,_eea _gef .GraphicsState ,_gcadf *_fb .PdfPageResources )error {_gb :=_fee .Operand ;
|
||
if _ddee {_ef .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_fee );};switch _gb {case "\u0071":if _fafc {_ef .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_faee ._dcd );};_gfg .push (&_ee );case "\u0051":if !_gfg .empty (){if len (_gfg )>=2{_gfg .pop ();
|
||
};_ee =*_gfg .top ();};_faee ._dcd =_eea .CTM ;if _fafc {_ef .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_faee ._dcd );};case "\u0042\u0054":if _fbc {_ef .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
|
||
_bcf ._bda =append (_bcf ._bda ,_acb ._faf ...);};_fbc =true ;_bafe :=_eea ;_bafe .CTM =_eda .Mult (_bafe .CTM );_acb =_faea (_cbbf ,_gcadf ,_bafe ,&_ee ,&_gfg );_faee ._fgec =_acb ;case "\u0045\u0054":if !_fbc {_ef .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
|
||
};_fbc =false ;_bcf ._bda =append (_bcf ._bda ,_acb ._faf ...);_acb .reset ();case "\u0054\u002a":_acb .nextLine ();case "\u0054\u0064":if _fcb ,_bfb :=_acb .checkOp (_fee ,2,true );!_fcb {_ef .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bfb );
|
||
return _bfb ;};_adf ,_add ,_ceg :=_gcfdc (_fee .Params );if _ceg !=nil {return _ceg ;};_acb .moveText (_adf ,_add );case "\u0054\u0044":if _bff ,_adfb :=_acb .checkOp (_fee ,2,true );!_bff {_ef .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_adfb );
|
||
return _adfb ;};_ffd ,_agac ,_dgg :=_gcfdc (_fee .Params );if _dgg !=nil {_ef .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dgg );return _dgg ;};_acb .moveTextSetLeading (_ffd ,_agac );case "\u0054\u006a":if _dgf ,_agda :=_acb .checkOp (_fee ,1,true );
|
||
!_dgf {_ef .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_fee ,_agda );return _agda ;};_dda ,_ffcb :=_gc .GetStringBytes (_fee .Params [0]);if !_ffcb {_ef .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_fee );
|
||
return _gc .ErrTypeError ;};return _acb .showText (_dda );case "\u0054\u004a":if _cdc ,_dge :=_acb .checkOp (_fee ,1,true );!_cdc {_ef .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dge );return _dge ;
|
||
};_ace ,_eee :=_gc .GetArray (_fee .Params [0]);if !_eee {_ef .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_fee );
|
||
return _agb ;};return _acb .showTextAdjusted (_ace );case "\u0027":if _bg ,_cgb :=_acb .checkOp (_fee ,1,true );!_bg {_ef .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cgb );return _cgb ;};_dccc ,_gfa :=_gc .GetStringBytes (_fee .Params [0]);
|
||
if !_gfa {_ef .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_fee );return _gc .ErrTypeError ;};_acb .nextLine ();return _acb .showText (_dccc );
|
||
case "\u0022":if _ecd ,_gea :=_acb .checkOp (_fee ,3,true );!_ecd {_ef .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gea );return _gea ;};_bffa ,_dfb ,_acg :=_gcfdc (_fee .Params [:2]);if _acg !=nil {return _acg ;
|
||
};_dbg ,_dce :=_gc .GetStringBytes (_fee .Params [2]);if !_dce {_ef .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_fee );
|
||
return _gc .ErrTypeError ;};_acb .setCharSpacing (_bffa );_acb .setWordSpacing (_dfb );_acb .nextLine ();return _acb .showText (_dbg );case "\u0054\u004c":_fgb ,_ebb :=_faeg (_fee );if _ebb !=nil {_ef .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ebb );
|
||
return _ebb ;};_acb .setTextLeading (_fgb );case "\u0054\u0063":_edac ,_gfd :=_faeg (_fee );if _gfd !=nil {_ef .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gfd );return _gfd ;};_acb .setCharSpacing (_edac );
|
||
case "\u0054\u0066":if _gcaf ,_dac :=_acb .checkOp (_fee ,2,true );!_gcaf {_ef .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dac );return _dac ;};_cae ,_gbc :=_gc .GetNameVal (_fee .Params [0]);if !_gbc {_ef .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_fee );
|
||
return _gc .ErrTypeError ;};_abb ,_bgg :=_gc .GetNumberAsFloat (_fee .Params [1]);if !_gbc {_ef .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fee ,_bgg );
|
||
return _bgg ;};_bgg =_acb .setFont (_cae ,_abb );_acb ._cgde =_aee .Is (_bgg ,_gc .ErrNotSupported );if _bgg !=nil &&!_acb ._cgde {return _bgg ;};case "\u0054\u006d":if _egb ,_eed :=_acb .checkOp (_fee ,6,true );!_egb {_ef .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_eed );
|
||
return _eed ;};_aag ,_bcd :=_gc .GetNumbersAsFloat (_fee .Params );if _bcd !=nil {_ef .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bcd );return _bcd ;};_acb .setTextMatrix (_aag );case "\u0054\u0072":if _aec ,_feb :=_acb .checkOp (_fee ,1,true );
|
||
!_aec {_ef .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_feb );return _feb ;};_dbc ,_acef :=_gc .GetIntVal (_fee .Params [0]);if !_acef {_ef .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_fee );
|
||
return _gc .ErrTypeError ;};_acb .setTextRenderMode (_dbc );case "\u0054\u0073":if _aad ,_cbba :=_acb .checkOp (_fee ,1,true );!_aad {_ef .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cbba );return _cbba ;
|
||
};_dgec ,_dga :=_gc .GetNumberAsFloat (_fee .Params [0]);if _dga !=nil {_ef .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dga );return _dga ;};_acb .setTextRise (_dgec );case "\u0054\u0077":if _dea ,_eba :=_acb .checkOp (_fee ,1,true );
|
||
!_dea {_ef .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_eba );return _eba ;};_ggc ,_age :=_gc .GetNumberAsFloat (_fee .Params [0]);if _age !=nil {_ef .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_age );
|
||
return _age ;};_acb .setWordSpacing (_ggc );case "\u0054\u007a":if _ebaf ,_fac :=_acb .checkOp (_fee ,1,true );!_ebaf {_ef .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fac );return _fac ;};_fdga ,_cbd :=_gc .GetNumberAsFloat (_fee .Params [0]);
|
||
if _cbd !=nil {_ef .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cbd );return _cbd ;};_acb .setHorizScaling (_fdga );case "\u0063\u006d":_faee ._dcd =_eea .CTM ;if _faee ._dcd .Singular (){_cfd :=_cb .IdentityMatrix ().Translate (_faee ._dcd .Translation ());
|
||
_ef .Log .Debug ("S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s",_faee ._dcd ,_cfd );_faee ._dcd =_cfd ;};if _fafc {_ef .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_faee ._dcd );};case "\u006d":if len (_fee .Params )!=2{_ef .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_ca );
|
||
return nil ;};_dfa ,_fcbd :=_gc .GetNumbersAsFloat (_fee .Params );if _fcbd !=nil {return _fcbd ;};_ef .Log .Debug ("\u004d\u006f\u0076\u0065\u0020\u0074\u006f\u003a\u0020\u0025\u002e\u0032\u0066",_dfa );_faee .moveTo (_dfa [0],_dfa [1]);case "\u006c":if len (_fee .Params )!=2{_ef .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_ca );
|
||
return nil ;};_gee ,_ccd :=_gc .GetNumbersAsFloat (_fee .Params );if _ccd !=nil {return _ccd ;};_faee .lineTo (_gee [0],_gee [1]);case "\u0063":if len (_fee .Params )!=6{return _ca ;};_eeaa ,_egce :=_gc .GetNumbersAsFloat (_fee .Params );if _egce !=nil {return _egce ;
|
||
};_ef .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_eeaa );_faee .cubicTo (_eeaa [0],_eeaa [1],_eeaa [2],_eeaa [3],_eeaa [4],_eeaa [5]);case "\u0076","\u0079":if len (_fee .Params )!=4{return _ca ;
|
||
};_bfd ,_agg :=_gc .GetNumbersAsFloat (_fee .Params );if _agg !=nil {return _agg ;};_ef .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_bfd );_faee .quadraticTo (_bfd [0],_bfd [1],_bfd [2],_bfd [3]);
|
||
case "\u0068":_faee .closePath ();case "\u0072\u0065":if len (_fee .Params )!=4{return _ca ;};_cef ,_geg :=_gc .GetNumbersAsFloat (_fee .Params );if _geg !=nil {return _geg ;};_faee .drawRectangle (_cef [0],_cef [1],_cef [2],_cef [3]);_faee .closePath ();
|
||
case "\u0053":_faee .stroke (&_bcf ._ebgd );_faee .clearPath ();case "\u0073":_faee .closePath ();_faee .stroke (&_bcf ._ebgd );_faee .clearPath ();case "\u0046":_faee .fill (&_bcf ._dfab );_faee .clearPath ();case "\u0066","\u0066\u002a":_faee .closePath ();
|
||
_faee .fill (&_bcf ._dfab );_faee .clearPath ();case "\u0042","\u0042\u002a":_faee .fill (&_bcf ._dfab );_faee .stroke (&_bcf ._ebgd );_faee .clearPath ();case "\u0062","\u0062\u002a":_faee .closePath ();_faee .fill (&_bcf ._dfab );_faee .stroke (&_bcf ._ebgd );
|
||
_faee .clearPath ();case "\u006e":_faee .clearPath ();case "\u0044\u006f":if len (_fee .Params )==0{_ef .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_fee .Params );
|
||
return _gc .ErrRangeError ;};_cbe ,_fbe :=_gc .GetName (_fee .Params [0]);if !_fbe {_ef .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_fee .Params [0]);
|
||
return _gc .ErrTypeError ;};_ ,_fdc :=_gcadf .GetXObjectByName (*_cbe );if _fdc !=_fb .XObjectTypeForm {break ;};_gbg ,_fbe :=_cbbf ._dc [_cbe .String ()];if !_fbe {_acc ,_bde :=_gcadf .GetXObjectFormByName (*_cbe );if _bde !=nil {_ef .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_bde );
|
||
return _bde ;};_cegg ,_bde :=_acc .GetContentStream ();if _bde !=nil {_ef .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_bde );return _bde ;};_eaf :=_acc .Resources ;if _eaf ==nil {_eaf =_gcadf ;};_egd ,_eaa ,_cge ,_bde :=_cbbf .extractPageText (string (_cegg ),_eaf ,_eda .Mult (_eea .CTM ),_ebg +1);
|
||
if _bde !=nil {_ef .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_bde );return _bde ;};_gbg =textResult {*_egd ,_eaa ,_cge };_cbbf ._dc [_cbe .String ()]=_gbg ;};_faee ._dcd =_eea .CTM ;if _fafc {_ef .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_faee ._dcd );
|
||
};_bcf ._bda =append (_bcf ._bda ,_gbg ._eag ._bda ...);_bcf ._ebgd =append (_bcf ._ebgd ,_gbg ._eag ._ebgd ...);_bcf ._dfab =append (_bcf ._dfab ,_gbg ._eag ._dfab ...);_ee ._egcee +=_gbg ._bed ;_ee ._ecac +=_gbg ._gab ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_acb ._gbf .ColorspaceNonStroking =_eea .ColorspaceNonStroking ;
|
||
_acb ._gbf .ColorNonStroking =_eea .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_acb ._gbf .ColorspaceStroking =_eea .ColorspaceStroking ;_acb ._gbf .ColorStroking =_eea .ColorStroking ;};return nil ;
|
||
});_agb =_ggb .Process (_df );return _bcf ,_ee ._egcee ,_ee ._ecac ,_agb ;};func (_efgg *wordBag )allWords ()[]*textWord {var _eedd []*textWord ;for _ ,_gbef :=range _efgg ._ecg {_eedd =append (_eedd ,_gbef ...);};return _eedd ;};func _fbbc (_egbg _cb .Point )*subpath {return &subpath {_bdfbe :[]_cb .Point {_egbg }}};
|
||
func _aebf (_cbdd _fb .PdfRectangle )textState {return textState {_fff :100,_ddg :RenderModeFill ,_ade :_cbdd };};func (_dgbcb *wordBag )empty (_bdaf int )bool {_ ,_aefe :=_dgbcb ._ecg [_bdaf ];return !_aefe };func (_bbacg *textTable )depth ()float64 {_bceed :=1e10;
|
||
for _cgbae :=0;_cgbae < _bbacg ._acebgf ;_cgbae ++{_eabg :=_bbacg .get (_cgbae ,0);if _eabg ==nil ||_eabg ._gbcbg {continue ;};_bceed =_d .Min (_bceed ,_eabg .depth ());};return _bceed ;};
|
||
|
||
// String returns a string describing `tm`.
|
||
func (_efeg TextMark )String ()string {_adge :=_efeg .BBox ;var _faab string ;if _efeg .Font !=nil {_faab =_efeg .Font .String ();if len (_faab )> 50{_faab =_faab [:50]+"\u002e\u002e\u002e";};};var _ebcg string ;if _efeg .Meta {_ebcg ="\u0020\u002a\u004d\u002a";
|
||
};return _f .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_efeg .Offset ,_efeg .Text ,[]rune (_efeg .Text ),_adge .Llx ,_adge .Lly ,_adge .Urx ,_adge .Ury ,_faab ,_ebcg );
|
||
};func (_bcga *wordBag )depthRange (_dgcc ,_deeg int )[]int {var _bfdd []int ;for _cdbe :=range _bcga ._ecg {if _dgcc <=_cdbe &&_cdbe <=_deeg {_bfdd =append (_bfdd ,_cdbe );};};if len (_bfdd )==0{return nil ;};_e .Ints (_bfdd );return _bfdd ;};func (_fbeb paraList )sortReadingOrder (){_ef .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_fbeb ));
|
||
if len (_fbeb )<=1{return ;};_fbeb .computeEBBoxes ();_e .Slice (_fbeb ,func (_agc ,_eaca int )bool {return _fage (_fbeb [_agc ],_fbeb [_eaca ])<=0});_cdda :=_fbeb .topoOrder ();_fbeb .reorder (_cdda );};func (_cfca *textWord )absorb (_gfbb *textWord ){_cfca .PdfRectangle =_aeee (_cfca .PdfRectangle ,_gfbb .PdfRectangle );
|
||
_cfca ._aegfa =append (_cfca ._aegfa ,_gfbb ._aegfa ...);};func (_aceg *textTable )reduce ()*textTable {_fegbd :=make ([]int ,0,_aceg ._ecbc );_cfef :=make ([]int ,0,_aceg ._acebgf );for _cagd :=0;_cagd < _aceg ._ecbc ;_cagd ++{if !_aceg .emptyRow (_cagd ){_fegbd =append (_fegbd ,_cagd );
|
||
};};for _fgdbc :=0;_fgdbc < _aceg ._acebgf ;_fgdbc ++{if !_aceg .emptyColumn (_fgdbc ){_cfef =append (_cfef ,_fgdbc );};};if len (_fegbd )==_aceg ._ecbc &&len (_cfef )==_aceg ._acebgf {return _aceg ;};_bgea :=textTable {_cbbaf :_aceg ._cbbaf ,_acebgf :len (_cfef ),_ecbc :len (_fegbd ),_cadf :make (map[uint64 ]*textPara ,len (_cfef )*len (_fegbd ))};
|
||
if _dcdd {_ef .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_aceg ._acebgf ,_aceg ._ecbc ,len (_cfef ),len (_fegbd ));_ef .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_cfef );
|
||
_ef .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_fegbd );};for _badaa ,_cfee :=range _fegbd {for _ebgb ,_geaae :=range _cfef {_gbae :=_aceg .get (_geaae ,_cfee );if _gbae ==nil {continue ;};if _dcdd {_f .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_ebgb ,_badaa ,_geaae ,_cfee ,_debb (_gbae .text (),50));
|
||
};_bgea .put (_ebgb ,_badaa ,_gbae );};};return &_bgea ;};func (_ggfcg *textWord )appendMark (_gebddf *textMark ,_agcb _fb .PdfRectangle ){_ggfcg ._aegfa =append (_ggfcg ._aegfa ,_gebddf );_ggfcg .PdfRectangle =_aeee (_ggfcg .PdfRectangle ,_gebddf .PdfRectangle );
|
||
if _gebddf ._gaea > _ggfcg ._bgfca {_ggfcg ._bgfca =_gebddf ._gaea ;};_ggfcg ._afcda =_agcb .Ury -_ggfcg .PdfRectangle .Lly ;};func _cabc (_gdbc _fb .PdfColorspace ,_aebg _fb .PdfColor )_b .Color {if _gdbc ==nil ||_aebg ==nil {return _b .Black ;};_fgad ,_debd :=_gdbc .ColorToRGB (_aebg );
|
||
if _debd !=nil {_ef .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_aebg ,_gdbc ,_debd );
|
||
return _b .Black ;};_ebcge ,_beee :=_fgad .(*_fb .PdfColorDeviceRGB );if !_beee {_ef .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_fgad );
|
||
return _b .Black ;};return _b .NRGBA {R :uint8 (_ebcge .R ()*255),G :uint8 (_ebcge .G ()*255),B :uint8 (_ebcge .B ()*255),A :uint8 (255)};};func _faeg (_gga *_gef .ContentStreamOperation )(float64 ,error ){if len (_gga .Params )!=1{_dab :=_a .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");
|
||
_ef .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_gga .Operand ,1,len (_gga .Params ),_gga .Params );
|
||
return 0.0,_dab ;};return _gc .GetNumberAsFloat (_gga .Params [0]);};func (_fa *imageExtractContext )extractContentStreamImages (_dcf string ,_bfg *_fb .PdfPageResources )error {_agd :=_gef .NewContentStreamParser (_dcf );_afc ,_dd :=_agd .Parse ();if _dd !=nil {return _dd ;
|
||
};if _fa ._cac ==nil {_fa ._cac =map[*_gc .PdfObjectStream ]*cachedImage {};};if _fa ._fd ==nil {_fa ._fd =&ImageExtractOptions {};};_bea :=_gef .NewContentStreamProcessor (*_afc );_bea .AddHandler (_gef .HandlerConditionEnumAllOperands ,"",_fa .processOperand );
|
||
return _bea .Process (_bfg );};type rectRuling struct{_dfgc rulingKind ;_ccfeb markKind ;_b .Color ;_fb .PdfRectangle ;};func (_caa *PageText )computeViews (){var _fcbba rulingList ;if _eagge {_dggd :=_gade (_caa ._ebgd );_fcbba =append (_fcbba ,_dggd ...);
|
||
};if _ceea {_bdb :=_aega (_caa ._dfab );_fcbba =append (_fcbba ,_bdb ...);};_fcbba ,_cefg :=_fcbba .toTilings ();var _fgd paraList ;_bacf :=len (_caa ._bda );for _aff :=0;_aff < 360&&_bacf > 0;_aff +=90{_cba :=make ([]*textMark ,0,len (_caa ._bda )-_bacf );
|
||
for _ ,_gdb :=range _caa ._bda {if _gdb ._ddbf ==_aff {_cba =append (_cba ,_gdb );};};if len (_cba )> 0{_eeea :=_bbfc (_cba ,_caa ._acbf ,_fcbba ,_cefg );_fgd =append (_fgd ,_eeea ...);_bacf -=len (_cba );};};_afd :=new (_aga .Buffer );_fgd .writeText (_afd );
|
||
_caa ._fbeg =_afd .String ();_caa ._aebbg =_fgd .toTextMarks ();_caa ._caed =_fgd .tables ();if _dcdd {_ef .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_caa ._caed ));
|
||
};};func (_abgac *textTable )getComposite (_ccadg ,_fcbce int )(paraList ,_fb .PdfRectangle ){_bcadg ,_bdcb :=_abgac ._accd [_eggg (_ccadg ,_fcbce )];if _dcdd {_f .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a",_ccadg ,_fcbce ,_bcadg .String ());
|
||
};if !_bdcb {return nil ,_fb .PdfRectangle {};};return _bcadg .parasBBox ();};func (_edgb *shapesState )clearPath (){_edgb ._gceb =nil ;_edgb ._eef =false ;if _fafc {_ef .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_edgb );
|
||
};};func (_fdaf *shapesState )closePath (){if _fdaf ._eef {_fdaf ._gceb =append (_fdaf ._gceb ,_fbbc (_fdaf ._caad ));_fdaf ._eef =false ;}else if len (_fdaf ._gceb )==0{if _fafc {_ef .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");
|
||
};_fdaf ._eef =false ;return ;};_fdaf ._gceb [len (_fdaf ._gceb )-1].close ();if _fafc {_ef .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_fdaf );};};func _febc (_egeb func (*wordBag ,*textWord ,float64 )bool ,_dbgfb float64 )func (*wordBag ,*textWord )bool {return func (_gbaad *wordBag ,_afgb *textWord )bool {return _egeb (_gbaad ,_afgb ,_dbgfb )};
|
||
};func (_ecfab *ruling )equals (_gfec *ruling )bool {return _ecfab ._fggg ==_gfec ._fggg &&_caaa (_ecfab ._gaeba ,_gfec ._gaeba )&&_caaa (_ecfab ._gfce ,_gfec ._gfce )&&_caaa (_ecfab ._aegc ,_gfec ._aegc );};
|
||
|
||
// PageImages represents extracted images on a PDF page with spatial information:
|
||
// display position and size.
|
||
type PageImages struct{Images []ImageMark ;};func (_dggb *textTable )isExportable ()bool {if _dggb ._cbbaf {return true ;};_bebb :=func (_cadgf int )bool {_ggfg :=_dggb .get (0,_cadgf );if _ggfg ==nil {return false ;};_bgaeg :=_ggfg .text ();_cdbff :=_ge .RuneCountInString (_bgaeg );
|
||
_fdae :=_dfbf .MatchString (_bgaeg );return _cdbff <=1||_fdae ;};for _dbgbc :=0;_dbgbc < _dggb ._ecbc ;_dbgbc ++{if !_bebb (_dbgbc ){return true ;};};return false ;};func _eaac (_ecdeb map[int ][]float64 ){if len (_ecdeb )<=1{return ;};_cacd :=_bfgb (_ecdeb );
|
||
if _dcdd {_ef .Log .Info ("\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076",_cacd );};var _cbga ,_adag int ;for _cbga ,_adag =range _cacd {if _ecdeb [_adag ]!=nil {break ;};};for _gfad ,_eaag :=range _cacd [_cbga :]{_bbfeg :=_ecdeb [_eaag ];
|
||
if _bbfeg ==nil {continue ;};if _dcdd {_f .Printf ("\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a",_cbga +_gfad ,_adag ,_eaag );};_ddgd :=_ecdeb [_eaag ];if _ddgd [len (_ddgd )-1]> _bbfeg [0]{_ddgd [len (_ddgd )-1]=_bbfeg [0];
|
||
_ecdeb [_adag ]=_ddgd ;};_adag =_eaag ;};};func (_effgb *subpath )isQuadrilateral ()bool {if len (_effgb ._bdfbe )< 4||len (_effgb ._bdfbe )> 5{return false ;};if len (_effgb ._bdfbe )==5{_ebaga :=_effgb ._bdfbe [0];_bdfdc :=_effgb ._bdfbe [4];if _ebaga .X !=_bdfdc .X ||_ebaga .Y !=_bdfdc .Y {return false ;
|
||
};};return true ;};func _eced (_bddf string )(string ,bool ){_afge :=[]rune (_bddf );if len (_afge )!=1{return "",false ;};_cdge ,_bagc :=_bagd [_afge [0]];return _cdge ,_bagc ;};type ruling struct{_fggg rulingKind ;_fccd markKind ;_b .Color ;_gaeba float64 ;
|
||
_gfce float64 ;_aegc float64 ;_eaea float64 ;};func (_aggc rulingList )blocks (_gdbd ,_efdg *ruling )bool {if _gdbd ._gfce > _efdg ._aegc ||_efdg ._gfce > _gdbd ._aegc {return false ;};_feedb :=_d .Max (_gdbd ._gfce ,_efdg ._gfce );_cafc :=_d .Min (_gdbd ._aegc ,_efdg ._aegc );
|
||
if _gdbd ._gaeba > _efdg ._gaeba {_gdbd ,_efdg =_efdg ,_gdbd ;};for _ ,_begf :=range _aggc {if _gdbd ._gaeba <=_begf ._gaeba +_bfefd &&_begf ._gaeba <=_efdg ._gaeba +_bfefd &&_begf ._gfce <=_cafc &&_feedb <=_begf ._aegc {return true ;};};return false ;
|
||
};func (_facab rulingList )vertsHorzs ()(rulingList ,rulingList ){var _ccdc ,_dadc rulingList ;for _ ,_dbca :=range _facab {switch _dbca ._fggg {case _ggddf :_ccdc =append (_ccdc ,_dbca );case _bgge :_dadc =append (_dadc ,_dbca );};};return _ccdc ,_dadc ;
|
||
};func _bcba (_ced ,_bage _fb .PdfRectangle )bool {return _bage .Llx <=_ced .Urx &&_ced .Llx <=_bage .Urx };func _aeee (_gag ,_baeee _fb .PdfRectangle )_fb .PdfRectangle {return _fb .PdfRectangle {Llx :_d .Min (_gag .Llx ,_baeee .Llx ),Lly :_d .Min (_gag .Lly ,_baeee .Lly ),Urx :_d .Max (_gag .Urx ,_baeee .Urx ),Ury :_d .Max (_gag .Ury ,_baeee .Ury )};
|
||
};func (_bdgea *compositeCell )updateBBox (){for _ ,_gbcf :=range _bdgea .paraList {_bdgea .PdfRectangle =_aeee (_bdgea .PdfRectangle ,_gbcf .PdfRectangle );};};func _gade (_fcffa []pathSection )rulingList {_edda (_fcffa );if _fcffd {_ef .Log .Info ("\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073",len (_fcffa ));
|
||
};var _bebd rulingList ;for _ ,_ebfg :=range _fcffa {for _ ,_fbcb :=range _ebfg ._acbfc {if len (_fbcb ._bdfbe )< 2{continue ;};_fgdc :=_fbcb ._bdfbe [0];for _ ,_bgd :=range _fbcb ._bdfbe [1:]{if _caedg ,_feed :=_befe (_fgdc ,_bgd ,_ebfg .Color );_feed {_bebd =append (_bebd ,_caedg );
|
||
};_fgdc =_bgd ;};};};if _fcffd {_ef .Log .Info ("m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073",_bebd );};return _bebd ;};
|
||
|
||
// String returns a description of `b`.
|
||
func (_agadg *wordBag )String ()string {var _ebga []string ;for _ ,_cbccc :=range _agadg .depthIndexes (){_edge :=_agadg ._ecg [_cbccc ];for _ ,_aeg :=range _edge {_ebga =append (_ebga ,_aeg ._ecgc );};};return _f .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_agadg .PdfRectangle ,_agadg ._fdb ,len (_ebga ),_ebga );
|
||
};func _cddd (_ffbe map[float64 ]map[float64 ]gridTile )[]float64 {_faeac :=make ([]float64 ,0,len (_ffbe ));_bbgf :=make (map[float64 ]struct{},len (_ffbe ));for _ ,_gdcda :=range _ffbe {for _bgae :=range _gdcda {if _ ,_efgad :=_bbgf [_bgae ];_efgad {continue ;
|
||
};_faeac =append (_faeac ,_bgae );_bbgf [_bgae ]=struct{}{};};};_e .Float64s (_faeac );return _faeac ;};func (_fbgg *textObject )setTextRise (_cbc float64 ){if _fbgg ==nil {return ;};_fbgg ._ebag ._eeg =_cbc ;};func (_adaa *textPara )text ()string {_addf :=new (_aga .Buffer );
|
||
_adaa .writeText (_addf );return _addf .String ();};const _eegb =1.0/1000.0;func (_adae *wordBag )sort (){for _ ,_dcfg :=range _adae ._ecg {_e .Slice (_dcfg ,func (_caab ,_ebef int )bool {return _bfefa (_dcfg [_caab ],_dcfg [_ebef ])< 0});};};func (_degf *textObject )moveLP (_fgc ,_bcde float64 ){_degf ._fbgc .Concat (_cb .NewMatrix (1,0,0,1,_fgc ,_bcde ));
|
||
_degf ._gfc =_degf ._fbgc ;};func (_ecbg rulingList )isActualGrid ()(rulingList ,bool ){_bbadd ,_bfefe :=_ecbg .augmentGrid ();if !(len (_bbadd )>=_gede +1&&len (_bfefe )>=_cfae +1){if _fcffd {_ef .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_bbadd ),len (_bfefe ),_gede +1,_cfae +1);
|
||
};return nil ,false ;};if _fcffd {_ef .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_ecbg ,len (_bbadd )>=2,len (_bfefe )>=2,len (_bbadd )>=2&&len (_bfefe )>=2);
|
||
for _dbgb ,_ddff :=range _ecbg {_f .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_dbgb ,_ddff );};};if _fba {_fddc ,_fbdf :=_bbadd [0],_bbadd [len (_bbadd )-1];_bfbb ,_badfe :=_bfefe [0],_bfefe [len (_bfefe )-1];if !(_eecd (_fddc ._gaeba -_bfbb ._gfce )&&_eecd (_fbdf ._gaeba -_bfbb ._aegc )&&_eecd (_bfbb ._gaeba -_fddc ._aegc )&&_eecd (_badfe ._gaeba -_fddc ._gfce )){if _fcffd {_ef .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_fddc ,_fbdf ,_bfbb ,_badfe );
|
||
};return nil ,false ;};}else {if !_bbadd .aligned (){if _cff {_ef .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_bbadd ));
|
||
};return nil ,false ;};if !_bfefe .aligned (){if _fcffd {_ef .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_bfefe ));
|
||
};return nil ,false ;};};_dcdegc :=append (_bbadd ,_bfefe ...);return _dcdegc ,true ;};func (_bdbe *textWord )bbox ()_fb .PdfRectangle {return _bdbe .PdfRectangle };func (_gdbf *textLine )markWordBoundaries (){_deec :=_gffb *_gdbf ._agbc ;for _gaeg ,_beadf :=range _gdbf ._aacg [1:]{if _efdb (_beadf ,_gdbf ._aacg [_gaeg ])>=_deec {_beadf ._fecff =true ;
|
||
};};};func (_ggf *wordBag )absorb (_cfa *wordBag ){_cgcf :=_cfa .makeRemovals ();for _gegae ,_gcfd :=range _cfa ._ecg {for _ ,_agadd :=range _gcfd {_ggf .pullWord (_agadd ,_gegae ,_cgcf );};};_cfa .applyRemovals (_cgcf );};func (_fefa *wordBag )firstReadingIndex (_aeba int )int {_cea :=_fefa .firstWord (_aeba )._bgfca ;
|
||
_ebf :=float64 (_aeba +1)*_eeaf ;_fcd :=_ebf +_gegg *_cea ;_baee :=_aeba ;for _ ,_ada :=range _fefa .depthBand (_ebf ,_fcd ){if _bfefa (_fefa .firstWord (_ada ),_fefa .firstWord (_baee ))< 0{_baee =_ada ;};};return _baee ;};func _afcd (_abca ,_edeg _cb .Point )rulingKind {_bcfe :=_d .Abs (_abca .X -_edeg .X );
|
||
_bdgf :=_d .Abs (_abca .Y -_edeg .Y );return _fecbg (_bcfe ,_bdgf ,_daac );};func _gaeb (_gbde _fb .PdfRectangle ,_bbbe []*textLine )*textPara {return &textPara {PdfRectangle :_gbde ,_eggb :_bbbe };};func (_gadc rulingList )removeDuplicates ()rulingList {if len (_gadc )==0{return nil ;
|
||
};_gadc .sort ();_fbbfb :=rulingList {_gadc [0]};for _ ,_edgd :=range _gadc [1:]{if _edgd .equals (_fbbfb [len (_fbbfb )-1]){continue ;};_fbbfb =append (_fbbfb ,_edgd );};return _fbbfb ;};func (_ccbb *textObject )setHorizScaling (_acda float64 ){if _ccbb ==nil {return ;
|
||
};_ccbb ._ebag ._fff =_acda ;};func (_dbgd *subpath )add (_dfdg ..._cb .Point ){_dbgd ._bdfbe =append (_dbgd ._bdfbe ,_dfdg ...)};
|
||
|
||
// String returns a string descibing `i`.
|
||
func (_ccad gridTile )String ()string {_ggdc :=func (_geae bool ,_ecbec string )string {if _geae {return _ecbec ;};return "\u005f";};return _f .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_ccad .PdfRectangle ,_ggdc (_ccad ._adgdg ,"\u004c"),_ggdc (_ccad ._bdgc ,"\u0052"),_ggdc (_ccad ._cbee ,"\u0042"),_ggdc (_ccad ._fga ,"\u0054"));
|
||
};func (_eegd lineRuling )xMean ()float64 {return 0.5*(_eegd ._fbfg .X +_eegd ._cade .X )};func (_faga *textObject )nextLine (){_faga .moveLP (0,-_faga ._ebag ._deg )};func _caaa (_eebcf ,_ebgdd float64 )bool {return _d .Abs (_eebcf -_ebgdd )<=_bdfd };
|
||
func (_fcdb rulingList )merge ()*ruling {_aedef :=_fcdb [0]._gaeba ;_egcfb :=_fcdb [0]._gfce ;_gabe :=_fcdb [0]._aegc ;for _ ,_gged :=range _fcdb [1:]{_aedef +=_gged ._gaeba ;if _gged ._gfce < _egcfb {_egcfb =_gged ._gfce ;};if _gged ._aegc > _gabe {_gabe =_gged ._aegc ;
|
||
};};_febbf :=&ruling {_fggg :_fcdb [0]._fggg ,_fccd :_fcdb [0]._fccd ,Color :_fcdb [0].Color ,_gaeba :_aedef /float64 (len (_fcdb )),_gfce :_egcfb ,_aegc :_gabe };if _cff {_ef .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_fcdb ),_febbf );
|
||
for _eddfa ,_bcbe :=range _fcdb {_f .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_eddfa ,_bcbe );};};return _febbf ;};func (_bead *textObject )getFillColor ()_b .Color {return _cabc (_bead ._gbf .ColorspaceNonStroking ,_bead ._gbf .ColorNonStroking );
|
||
};func (_afeef paraList )findTextTables ()[]*textTable {var _egdd []*textTable ;for _ ,_beeb :=range _afeef {if _beeb .taken ()||_beeb .Width ()==0{continue ;};_dbaf :=_beeb .isAtom ();if _dbaf ==nil {continue ;};_dbaf .growTable ();if _dbaf ._acebgf *_dbaf ._ecbc < _eggc {continue ;
|
||
};_dbaf .markCells ();_dbaf .log ("\u0067\u0072\u006fw\u006e");_egdd =append (_egdd ,_dbaf );};return _egdd ;};func _abbe (_gbcg *wordBag ,_fec *textWord ,_ebfa float64 )bool {return _gbcg .Urx <=_fec .Llx &&_fec .Llx < _gbcg .Urx +_ebfa ;};func (_dcb *textObject )moveText (_eaee ,_bce float64 ){_dcb .moveLP (_eaee ,_bce )};
|
||
func _gdfc (_dcce ,_gabce *textPara )bool {if _dcce ._gbcbg ||_gabce ._gbcbg {return true ;};return _gdga (_dcce .depth ()-_gabce .depth ());};func (_fgg pathSection )bbox ()_fb .PdfRectangle {_fbb :=_fgg ._acbfc [0]._bdfbe [0];_eafe :=_fb .PdfRectangle {Llx :_fbb .X ,Urx :_fbb .X ,Lly :_fbb .Y ,Ury :_fbb .Y };
|
||
_bebg :=func (_cfgb _cb .Point ){if _cfgb .X < _eafe .Llx {_eafe .Llx =_cfgb .X ;}else if _cfgb .X > _eafe .Urx {_eafe .Urx =_cfgb .X ;};if _cfgb .Y < _eafe .Lly {_eafe .Lly =_cfgb .Y ;}else if _cfgb .Y > _eafe .Ury {_eafe .Ury =_cfgb .Y ;};};for _ ,_bcfa :=range _fgg ._acbfc [0]._bdfbe [1:]{_bebg (_bcfa );
|
||
};for _ ,_aaf :=range _fgg ._acbfc [1:]{for _ ,_ggcc :=range _aaf ._bdfbe {_bebg (_ggcc );};};return _eafe ;};func _fgce (_fbff int ,_afccd map[int ][]float64 )([]int ,int ){_gcgac :=make ([]int ,_fbff );_ggcag :=0;for _ddccb :=0;_ddccb < _fbff ;_ddccb ++{_gcgac [_ddccb ]=_ggcag ;
|
||
_ggcag +=len (_afccd [_ddccb ])+1;};return _gcgac ,_ggcag ;};func (_cdbf paraList )lines ()[]*textLine {var _dgad []*textLine ;for _ ,_adba :=range _cdbf {_dgad =append (_dgad ,_adba ._eggb ...);};return _dgad ;};func (_agag *textObject )renderText (_efa []byte )error {if _agag ._cgde {_ef .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");
|
||
return nil ;};_bedg :=_agag .getCurrentFont ();_bgf :=_bedg .BytesToCharcodes (_efa );_cfg ,_gdd ,_ecea :=_bedg .CharcodesToStrings (_bgf );if _ecea > 0{_ef .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_gdd ,_ecea );
|
||
};_agag ._ebag ._egcee +=_gdd ;_agag ._ebag ._ecac +=_ecea ;_caec :=_agag ._ebag ;_eeb :=_caec ._gbgg ;_cbfb :=_caec ._fff /100.0;_cbcc :=_eegb ;if _bedg .Subtype ()=="\u0054\u0079\u0070e\u0033"{_cbcc =1;};_gfdeb ,_ecda :=_bedg .GetRuneMetrics (' ');if !_ecda {_gfdeb ,_ecda =_bedg .GetCharMetrics (32);
|
||
};if !_ecda {_gfdeb ,_ =_fb .DefaultFont ().GetRuneMetrics (' ');};_beb :=_gfdeb .Wx *_cbcc ;_ef .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_beb ,_cfg ,_bedg ,_eeb );
|
||
_deba :=_cb .NewMatrix (_eeb *_cbfb ,0,0,_eeb ,0,_caec ._eeg );if _beag {_ef .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_bgf ),_bgf ,_cfg );
|
||
};_ef .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_bgf ),_bgf ,len (_cfg ));_ddgb :=_agag .getFillColor ();
|
||
_gcc :=_agag .getStrokeColor ();for _dacd ,_dccg :=range _cfg {_cdd :=[]rune (_dccg );if len (_cdd )==1&&_cdd [0]=='\x00'{continue ;};_cebd :=_bgf [_dacd ];_aeff :=_agag ._gbf .CTM .Mult (_agag ._gfc ).Mult (_deba );_fgea :=0.0;if len (_cdd )==1&&_cdd [0]==32{_fgea =_caec ._dgd ;
|
||
};_bcdb ,_fdfe :=_bedg .GetCharMetrics (_cebd );if !_fdfe {_ef .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_cebd ,_cdd ,_cdd ,_bedg );
|
||
return _f .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_bedg .String (),_cebd );};_badb :=_cb .Point {X :_bcdb .Wx *_cbcc ,Y :_bcdb .Wy *_cbcc };
|
||
_ffcbe :=_cb .Point {X :(_badb .X *_eeb +_fgea )*_cbfb };_ggce :=_cb .Point {X :(_badb .X *_eeb +_caec ._dgae +_fgea )*_cbfb };if _beag {_ef .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_eeb ,_caec ._dgae ,_caec ._dgd ,_cbfb );
|
||
_ef .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_badb ,_ffcbe ,_ggce );};_gcg :=_cgef (_ffcbe );_cce :=_cgef (_ggce );_efg :=_agag ._gbf .CTM .Mult (_agag ._gfc ).Mult (_gcg );
|
||
if _abgf {_ef .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_agag ._gbf .CTM ,_agag ._gfc ,_cce ,_accf (_agag ._gbf .CTM .Mult (_agag ._gfc ).Mult (_cce )),_gcg ,_efg ,_accf (_efg ));
|
||
};_ggac ,_dgee :=_agag .newTextMark (_eb .ExpandLigatures (_cdd ),_aeff ,_accf (_efg ),_d .Abs (_beb *_aeff .ScalingFactorX ()),_bedg ,_agag ._ebag ._dgae ,_ddgb ,_gcc );if !_dgee {_ef .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067");
|
||
continue ;};if _bedg ==nil {_ef .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _bedg .Encoder ()==nil {_ef .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_bedg );
|
||
}else {if _feg ,_gcf :=_bedg .Encoder ().CharcodeToRune (_cebd );_gcf {_ggac ._egga =string (_feg );};};_ef .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_dacd ,_cebd ,_ggac ,_aeff );
|
||
_agag ._faf =append (_agag ._faf ,&_ggac );_agag ._gfc .Concat (_cce );};return nil ;};func (_agegd *textTable )subdivide ()*textTable {_agegd .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");_acefd :=_agegd .compositeRowCorridors ();_ebccf :=_agegd .compositeColCorridors ();
|
||
if _dcdd {_ef .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_bebba (_acefd ),_bebba (_ebccf ));
|
||
};if len (_acefd )==0||len (_ebccf )==0{return _agegd ;};_eaac (_acefd );_eaac (_ebccf );if _dcdd {_ef .Log .Info ("\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073",_bebba (_acefd ),_bebba (_ebccf ));
|
||
};_bbdad ,_aage :=_fgce (_agegd ._ecbc ,_acefd );_dgdg ,_gdab :=_fgce (_agegd ._acebgf ,_ebccf );_adfa :=make (map[uint64 ]*textPara ,_gdab *_aage );_ggca :=&textTable {PdfRectangle :_agegd .PdfRectangle ,_cbbaf :_agegd ._cbbaf ,_ecbc :_aage ,_acebgf :_gdab ,_cadf :_adfa };
|
||
if _dcdd {_ef .Log .Info ("\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a"+"\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076",_agegd ._acebgf ,_agegd ._ecbc ,_gdab ,_aage ,_bebba (_acefd ),_bebba (_ebccf ),_bbdad ,_dgdg );
|
||
};for _gaed :=0;_gaed < _agegd ._ecbc ;_gaed ++{_cfcf :=_bbdad [_gaed ];for _edcfc :=0;_edcfc < _agegd ._acebgf ;_edcfc ++{_eggce :=_dgdg [_edcfc ];if _dcdd {_f .Printf ("\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a",_edcfc ,_gaed ,_eggce ,_cfcf );
|
||
};_aaaf ,_ffcfa :=_agegd ._accd [_eggg (_edcfc ,_gaed )];if !_ffcfa {continue ;};_bbfe :=_aaaf .split (_acefd [_gaed ],_ebccf [_edcfc ]);for _cbbd :=0;_cbbd < _bbfe ._ecbc ;_cbbd ++{for _ddef :=0;_ddef < _bbfe ._acebgf ;_ddef ++{_gccfd :=_bbfe .get (_ddef ,_cbbd );
|
||
_ggca .put (_eggce +_ddef ,_cfcf +_cbbd ,_gccfd );if _dcdd {_f .Printf ("\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_eggce +_ddef ,_cfcf +_cbbd ,_gccfd );};};};};};return _ggca ;};type stateStack []*textState ;var (_bd =_a .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072");
|
||
_ca =_a .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072"););func (_gebdd *textTable )newTablePara ()*textPara {_fedbb :=_gebdd .computeBbox ();_cacf :=&textPara {PdfRectangle :_fedbb ,_gcag :_fedbb ,_ebdd :_gebdd };
|
||
if _dcdd {_ef .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_cacf );};return _cacf ;};
|
||
|
||
// String returns a description of `k`.
|
||
func (_edccf rulingKind )String ()string {_gdcd ,_gfddf :=_aggd [_edccf ];if !_gfddf {return _f .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_edccf );};return _gdcd ;};func (_dgb *stateStack )size ()int {return len (*_dgb )};
|
||
func (_ddb *textObject )setWordSpacing (_ggba float64 ){if _ddb ==nil {return ;};_ddb ._ebag ._dgd =_ggba ;};func (_cbda gridTiling )complete ()bool {for _ ,_decge :=range _cbda ._efgf {for _ ,_gfef :=range _decge {if !_gfef .complete (){return false ;
|
||
};};};return true ;};func (_dgbe paraList )llyOrdering ()[]int {_faca :=make ([]int ,len (_dgbe ));for _badf :=range _dgbe {_faca [_badf ]=_badf ;};_e .SliceStable (_faca ,func (_fbcg ,_caga int )bool {_bece ,_aefg :=_faca [_fbcg ],_faca [_caga ];return _dgbe [_bece ].Lly < _dgbe [_aefg ].Lly ;
|
||
});return _faca ;};func (_dace paraList )topoOrder ()[]int {if _cfeg {_ef .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");};_eaaf :=len (_dace );_gabg :=make ([]bool ,_eaaf );_cdfd :=make ([]int ,0,_eaaf );_beeg :=_dace .llyOrdering ();
|
||
var _acfg func (_aefc int );_acfg =func (_gccf int ){_gabg [_gccf ]=true ;for _ddaa :=0;_ddaa < _eaaf ;_ddaa ++{if !_gabg [_ddaa ]{if _dace .readBefore (_beeg ,_gccf ,_ddaa ){_acfg (_ddaa );};};};_cdfd =append (_cdfd ,_gccf );};for _bdcg :=0;_bdcg < _eaaf ;
|
||
_bdcg ++{if !_gabg [_bdcg ]{_acfg (_bdcg );};};return _bbdc (_cdfd );};func _adaae (_eafg _fb .PdfRectangle )*ruling {return &ruling {_fggg :_ggddf ,_gaeba :_eafg .Llx ,_gfce :_eafg .Lly ,_aegc :_eafg .Ury };};type cachedImage struct{_bfa *_fb .Image ;
|
||
_cdb _fb .PdfColorspace ;};func (_gacf rulingList )sort (){_e .Slice (_gacf ,_gacf .comp )};func (_fgfc rulingList )comp (_dgfc ,_fcaa int )bool {_edcf ,_fceaf :=_fgfc [_dgfc ],_fgfc [_fcaa ];_bfbbd ,_gfdag :=_edcf ._fggg ,_fceaf ._fggg ;if _bfbbd !=_gfdag {return _bfbbd > _gfdag ;
|
||
};if _bfbbd ==_cgbd {return false ;};_ceeb :=func (_feafb bool )bool {if _bfbbd ==_bgge {return _feafb ;};return !_feafb ;};_cddc ,_dabe :=_edcf ._gaeba ,_fceaf ._gaeba ;if _cddc !=_dabe {return _ceeb (_cddc > _dabe );};_cddc ,_dabe =_edcf ._gfce ,_fceaf ._gfce ;
|
||
if _cddc !=_dabe {return _ceeb (_cddc < _dabe );};return _ceeb (_edcf ._aegc < _fceaf ._aegc );};func (_edacg *textMark )bbox ()_fb .PdfRectangle {return _edacg .PdfRectangle };func _fdfbf (_gcbf []*textWord ,_gffbb *textWord )[]*textWord {for _bbaea ,_eeae :=range _gcbf {if _eeae ==_gffbb {return _aedec (_gcbf ,_bbaea );
|
||
};};_ef .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_gffbb );
|
||
return nil ;};func _abga (_dbge string )string {_affbf :=[]rune (_dbge );return string (_affbf [:len (_affbf )-1])};func _debb (_bgedb string ,_dccfg int )string {if len (_bgedb )< _dccfg {return _bgedb ;};return _bgedb [:_dccfg ];};func _fabf (_decd ,_fagg _fb .PdfRectangle )bool {return _bcba (_decd ,_fagg )&&_adbd (_decd ,_fagg )};
|
||
|
||
|
||
// String returns a string describing the current state of the textState stack.
|
||
func (_cec *stateStack )String ()string {_cgc :=[]string {_f .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_cec ))};for _ecee ,_bge :=range *_cec {_abe :="\u003c\u006e\u0069l\u003e";
|
||
if _bge !=nil {_abe =_bge .String ();};_cgc =append (_cgc ,_f .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_ecee ,_abe ));};return _ag .Join (_cgc ,"\u000a");};func (_caaf rulingList )asTiling ()gridTiling {if _cgdd {_ef .Log .Info ("r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_caaf ));
|
||
};for _febad ,_gcage :=range _caaf [1:]{_babg :=_caaf [_febad ];if _babg .alignsPrimary (_gcage )&&_babg .alignsSec (_gcage ){_ef .Log .Error ("a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073",_gcage ,_babg );
|
||
};};_caaf .sortStrict ();_caaf .log ("\u0073n\u0061\u0070\u0070\u0065\u0064");_bcaeb ,_bbdb :=_caaf .vertsHorzs ();_gaac :=_bcaeb .primaries ();_fgcgd :=_bbdb .primaries ();_ccaaf :=len (_gaac )-1;_cedb :=len (_fgcgd )-1;if _ccaaf ==0||_cedb ==0{return gridTiling {};
|
||
};_fdab :=_fb .PdfRectangle {Llx :_gaac [0],Urx :_gaac [_ccaaf ],Lly :_fgcgd [0],Ury :_fgcgd [_cedb ]};if _cgdd {_ef .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064",len (_bcaeb ));
|
||
for _feadf ,_afea :=range _bcaeb {_f .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_feadf ,_afea );};_ef .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064",len (_bbdb ));
|
||
for _bbeb ,_gabbc :=range _bbdb {_f .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bbeb ,_gabbc );};_ef .Log .Info ("\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f",_ccaaf ,_cedb ,_gaac ,_fgcgd );
|
||
};_gfb :=make ([]gridTile ,_ccaaf *_cedb );for _edgee :=_cedb -1;_edgee >=0;_edgee --{_gaae :=_fgcgd [_edgee ];_febb :=_fgcgd [_edgee +1];for _gfaad :=0;_gfaad < _ccaaf ;_gfaad ++{_accc :=_gaac [_gfaad ];_acbe :=_gaac [_gfaad +1];_bebc :=_bcaeb .findPrimSec (_accc ,_gaae );
|
||
_abaa :=_bcaeb .findPrimSec (_acbe ,_gaae );_dbddcd :=_bbdb .findPrimSec (_gaae ,_accc );_aaef :=_bbdb .findPrimSec (_febb ,_accc );_fbaf :=_fb .PdfRectangle {Llx :_accc ,Urx :_acbe ,Lly :_gaae ,Ury :_febb };_edbeb :=_fbac (_fbaf ,_bebc ,_abaa ,_dbddcd ,_aaef );
|
||
_gfb [_edgee *_ccaaf +_gfaad ]=_edbeb ;if _cgdd {_f .Printf ("\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_gfaad ,_edgee ,_edbeb .String (),_edbeb .Width (),_edbeb .Height ());
|
||
};};};if _cgdd {_ef .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_fdab );
|
||
};_ceda :=make ([]map[float64 ]gridTile ,_cedb );for _ccgg :=_cedb -1;_ccgg >=0;_ccgg --{if _cgdd {_f .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_ccgg );};_ceda [_ccgg ]=make (map[float64 ]gridTile ,_ccaaf );for _bbfcb :=0;_bbfcb < _ccaaf ;
|
||
_bbfcb ++{_ebdaf :=_gfb [_ccgg *_ccaaf +_bbfcb ];if _cgdd {_f .Printf ("\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bbfcb ,_ebdaf );};if !_ebdaf ._adgdg {continue ;};_bdcf :=_bbfcb ;for _gbbd :=_bbfcb +1;!_ebdaf ._bdgc &&_gbbd < _ccaaf ;
|
||
_gbbd ++{_aagg :=_gfb [_ccgg *_ccaaf +_gbbd ];_ebdaf .Urx =_aagg .Urx ;_ebdaf ._fga =_ebdaf ._fga ||_aagg ._fga ;_ebdaf ._cbee =_ebdaf ._cbee ||_aagg ._cbee ;_ebdaf ._bdgc =_aagg ._bdgc ;if _cgdd {_f .Printf ("\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a",_gbbd ,_aagg ,_ebdaf );
|
||
};_bdcf =_gbbd ;};if _cgdd {_f .Printf (" \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n",_bbfcb ,_bdcf ,_ebdaf );};_bbfcb =_bdcf ;_ceda [_ccgg ][_ebdaf .Llx ]=_ebdaf ;};};_fdcf :=make (map[float64 ]map[float64 ]gridTile ,_cedb );
|
||
_deacd :=make (map[float64 ]map[float64 ]struct{},_cedb );for _decb :=_cedb -1;_decb >=0;_decb --{_fffg :=_gfb [_decb *_ccaaf ].Lly ;_fdcf [_fffg ]=make (map[float64 ]gridTile ,_ccaaf );_deacd [_fffg ]=make (map[float64 ]struct{},_ccaaf );};if _cgdd {_ef .Log .Info ("\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_fdab );
|
||
};for _bbde :=_cedb -1;_bbde >=0;_bbde --{_ffcbd :=_gfb [_bbde *_ccaaf ].Lly ;_bdbaf :=_ceda [_bbde ];if _cgdd {_f .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_bbde );};for _ ,_gbee :=range _gfca (_bdbaf ){if _ ,_gdaa :=_deacd [_ffcbd ][_gbee ];
|
||
_gdaa {continue ;};_eecfc :=_bdbaf [_gbee ];if _cgdd {_f .Printf (" \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_eecfc .String ());};for _dbeb :=_bbde -1;_dbeb >=0;_dbeb --{if _eecfc ._cbee {break ;};_dcceb :=_ceda [_dbeb ];_eafd ,_ebca :=_dcceb [_gbee ];
|
||
if !_ebca {break ;};if _eafd .Urx !=_eecfc .Urx {break ;};_eecfc ._cbee =_eafd ._cbee ;_eecfc .Lly =_eafd .Lly ;if _cgdd {_f .Printf ("\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_eafd .String (),_eecfc .String ());
|
||
};_deacd [_eafd .Lly ][_eafd .Llx ]=struct{}{};};if _bbde ==0{_eecfc ._cbee =true ;};if _eecfc .complete (){_fdcf [_ffcbd ][_gbee ]=_eecfc ;};};};_dbcg :=gridTiling {PdfRectangle :_fdab ,_acfggbg :_cddd (_fdcf ),_agdf :_fdcaa (_fdcf ),_efgf :_fdcf };_dbcg .log ("\u0043r\u0065\u0061\u0074\u0065\u0064");
|
||
return _dbcg ;};func (_accb *wordBag )minDepth ()float64 {return _accb ._cbea -(_accb .Ury -_accb ._fdb )};func (_acac paraList )findTableGrid (_beacc gridTiling )(*textTable ,map[*textPara ]struct{}){_fagaf :=len (_beacc ._acfggbg );_fdbaff :=len (_beacc ._agdf );
|
||
_abfa :=textTable {_cbbaf :true ,_acebgf :_fagaf ,_ecbc :_fdbaff ,_cadf :make (map[uint64 ]*textPara ,_fagaf *_fdbaff ),_accd :make (map[uint64 ]compositeCell ,_fagaf *_fdbaff )};_afgba :=make (map[*textPara ]struct{});_ggfc :=int ((1.0-_acdad )*float64 (_fagaf *_fdbaff ));
|
||
_bgde :=0;if _cgdd {_ef .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_fagaf ,_fdbaff );};for _aggfc ,_agaf :=range _beacc ._agdf {_aebdg ,_cfbfa :=_beacc ._efgf [_agaf ];
|
||
if !_cfbfa {continue ;};for _eade ,_dffgd :=range _beacc ._acfggbg {_agbg ,_fgeb :=_aebdg [_dffgd ];if !_fgeb {continue ;};_bdgfg :=_acac .inTile (_agbg );if len (_bdgfg )==0{_bgde ++;if _bgde > _ggfc {if _cgdd {_ef .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_bgde );
|
||
};return nil ,nil ;};}else {_abfa .putComposite (_eade ,_aggfc ,_bdgfg ,_agbg .PdfRectangle );for _ ,_bceg :=range _bdgfg {_afgba [_bceg ]=struct{}{};};};};};_adfaa :=0;for _bfge :=0;_bfge < _fagaf ;_bfge ++{_gbdbd :=_abfa .get (_bfge ,0);if _gbdbd ==nil ||!_gbdbd ._gbcbg {_adfaa ++;
|
||
};};if _adfaa ==0{if _cgdd {_ef .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};return nil ,nil ;};_bfdac :=_abfa .reduceTiling (_beacc ,_dbdb );_bfdac =_bfdac .subdivide ();return _bfdac ,_afgba ;};type textState struct{_dgae float64 ;
|
||
_dgd float64 ;_fff float64 ;_deg float64 ;_gbgg float64 ;_ddg RenderMode ;_eeg float64 ;_dec *_fb .PdfFont ;_ade _fb .PdfRectangle ;_egcee int ;_ecac int ;};
|
||
|
||
// ExtractPageImages returns the image contents of the page extractor, including data
|
||
// and position, size information for each image.
|
||
// A set of options to control page image extraction can be passed in. The options
|
||
// parameter can be nil for the default options. By default, inline stencil masks
|
||
// are not extracted.
|
||
func (_bdf *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_bf :=&imageExtractContext {_fd :options };_gca :=_bf .extractContentStreamImages (_bdf ._gd ,_bdf ._be );if _gca !=nil {return nil ,_gca ;};return &PageImages {Images :_bf ._ba },nil ;
|
||
};func (_dbgdd *textPara )taken ()bool {return _dbgdd ==nil ||_dbgdd ._gebc };const (_cgbd rulingKind =iota ;_bgge ;_ggddf ;);type rulingKind int ;
|
||
|
||
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
|
||
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
|
||
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
|
||
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
|
||
type RenderMode int ;func _bebba (_egcag map[int ][]float64 )string {_gbede :=_bfgb (_egcag );_dcfd :=make ([]string ,len (_egcag ));for _gadg ,_adcg :=range _gbede {_dcfd [_gadg ]=_f .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_adcg ,_egcag [_adcg ]);
|
||
};return _f .Sprintf ("\u007b\u0025\u0073\u007d",_ag .Join (_dcfd ,"\u002c\u0020"));};var _dfbf =_ae .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024");
|
||
|
||
|
||
// String returns a human readable description of `ss`.
|
||
func (_cbdf *shapesState )String ()string {return _f .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_cbdf ._gceb ),_cbdf ._eef );};func _bfgb (_fdbb map[int ][]float64 )[]int {_abagc :=make ([]int ,len (_fdbb ));
|
||
_egba :=0;for _fegfa :=range _fdbb {_abagc [_egba ]=_fegfa ;_egba ++;};_e .Ints (_abagc );return _abagc ;};const _gcd =20;func (_bdgb *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_fefd :=make (map[int ]map[*textWord ]struct{},len (_bdgb ._ecg ));
|
||
for _effg :=range _bdgb ._ecg {_fefd [_effg ]=make (map[*textWord ]struct{});};return _fefd ;};func _cage (_ggccg float64 )int {var _ebfe int ;if _ggccg >=0{_ebfe =int (_ggccg /_eeaf );}else {_ebfe =int (_ggccg /_eeaf )-1;};return _ebfe ;};func (_ecc compositeCell )String ()string {_eddd :="";
|
||
if len (_ecc .paraList )> 0{_eddd =_debb (_ecc .paraList .merge ().text (),50);};return _f .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_ecc .PdfRectangle ,len (_ecc .paraList ),_eddd );};
|
||
|
||
|
||
// String returns a string describing `ma`.
|
||
func (_aba TextMarkArray )String ()string {_gdg :=len (_aba ._cbdg );if _gdg ==0{return "\u0045\u004d\u0050T\u0059";};_gdcbg :=_aba ._cbdg [0];_bbce :=_aba ._cbdg [_gdg -1];return _f .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_gdg ,_gdcbg ,_bbce );
|
||
};type intSet map[int ]struct{};
|
||
|
||
// String returns a human readable description of `s`.
|
||
func (_baea intSet )String ()string {var _edfba []int ;for _fdcgg :=range _baea {if _baea .has (_fdcgg ){_edfba =append (_edfba ,_fdcgg );};};_e .Ints (_edfba );return _f .Sprintf ("\u0025\u002b\u0076",_edfba );};func (_cabe *wordBag )removeWord (_aebfb *textWord ,_dfg int ){_gabc :=_cabe ._ecg [_dfg ];
|
||
_gabc =_fdfbf (_gabc ,_aebfb );if len (_gabc )==0{delete (_cabe ._ecg ,_dfg );}else {_cabe ._ecg [_dfg ]=_gabc ;};};func _edda (_dgfg []pathSection ){if _ecaf < 0.0{return ;};if _fcffd {_ef .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_dgfg ));
|
||
};for _cfgg ,_fbbfe :=range _dgfg {for _ffdc ,_aabd :=range _fbbfe ._acbfc {for _cgdfc ,_bfcf :=range _aabd ._bdfbe {_aabd ._bdfbe [_cgdfc ]=_cb .Point {X :_ddfaf (_bfcf .X ),Y :_ddfaf (_bfcf .Y )};if _fcffd {_aaded :=_aabd ._bdfbe [_cgdfc ];if !_badbe (_bfcf ,_aaded ){_abcc :=_cb .Point {X :_aaded .X -_bfcf .X ,Y :_aaded .Y -_bfcf .Y };
|
||
_f .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_cfgg ,_ffdc ,_cgdfc ,_bfcf ,_aaded ,_abcc );};};};};};};type event struct{_fcdf float64 ;
|
||
_dded bool ;_fgeg int ;};func (_fdee paraList )readBefore (_bdd []int ,_aebe ,_gfedc int )bool {_acebg ,_dafa :=_fdee [_aebe ],_fdee [_gfedc ];if _gdfce (_acebg ,_dafa )&&_acebg .Lly > _dafa .Lly {return true ;};if !(_acebg ._gcag .Urx < _dafa ._gcag .Llx ){return false ;
|
||
};_gecg ,_cagec :=_acebg .Lly ,_dafa .Lly ;if _gecg > _cagec {_cagec ,_gecg =_gecg ,_cagec ;};_dggf :=_d .Max (_acebg ._gcag .Llx ,_dafa ._gcag .Llx );_aged :=_d .Min (_acebg ._gcag .Urx ,_dafa ._gcag .Urx );_eeec :=_fdee .llyRange (_bdd ,_gecg ,_cagec );
|
||
for _ ,_cadcb :=range _eeec {if _cadcb ==_aebe ||_cadcb ==_gfedc {continue ;};_ebbb :=_fdee [_cadcb ];if _ebbb ._gcag .Llx <=_aged &&_dggf <=_ebbb ._gcag .Urx {return false ;};};return true ;};func _agdg (_ceeae _fb .PdfRectangle )*ruling {return &ruling {_fggg :_ggddf ,_gaeba :_ceeae .Urx ,_gfce :_ceeae .Lly ,_aegc :_ceeae .Ury };
|
||
};func (_eded *textTable )getDown ()paraList {_gaab :=make (paraList ,_eded ._acebgf );for _gaccb :=0;_gaccb < _eded ._acebgf ;_gaccb ++{_bfcg :=_eded .get (_gaccb ,_eded ._ecbc -1)._cggd ;if _bfcg ==nil ||_bfcg ._gebc {return nil ;};_gaab [_gaccb ]=_bfcg ;
|
||
};for _fagcf :=0;_fagcf < _eded ._acebgf -1;_fagcf ++{if _gaab [_fagcf ]._aecb !=_gaab [_fagcf +1]{return nil ;};};return _gaab ;};func (_gcfgd rulingList )augmentGrid ()(rulingList ,rulingList ){_bdfde ,_bdgba :=_gcfgd .vertsHorzs ();if len (_bdfde )==0||len (_bdgba )==0{return _bdfde ,_bdgba ;
|
||
};_aecd ,_fagc :=_bdfde ,_bdgba ;_gdfbd :=_bdfde .bbox ();_edgg :=_bdgba .bbox ();if _fcffd {_ef .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066",_gdfbd );
|
||
_ef .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066",_edgg );};var _cagbe ,_dfef ,_ebfag ,_bdgfd *ruling ;if _edgg .Llx < _gdfbd .Llx -_bdfd {_cagbe =&ruling {_fccd :_acfgg ,_fggg :_ggddf ,_gaeba :_edgg .Llx ,_gfce :_gdfbd .Lly ,_aegc :_gdfbd .Ury };
|
||
_bdfde =append (rulingList {_cagbe },_bdfde ...);};if _edgg .Urx > _gdfbd .Urx +_bdfd {_dfef =&ruling {_fccd :_acfgg ,_fggg :_ggddf ,_gaeba :_edgg .Urx ,_gfce :_gdfbd .Lly ,_aegc :_gdfbd .Ury };_bdfde =append (_bdfde ,_dfef );};if _gdfbd .Lly < _edgg .Lly -_bdfd {_ebfag =&ruling {_fccd :_acfgg ,_fggg :_bgge ,_gaeba :_gdfbd .Lly ,_gfce :_edgg .Llx ,_aegc :_edgg .Urx };
|
||
_bdgba =append (rulingList {_ebfag },_bdgba ...);};if _gdfbd .Ury > _edgg .Ury +_bdfd {_bdgfd =&ruling {_fccd :_acfgg ,_fggg :_bgge ,_gaeba :_gdfbd .Ury ,_gfce :_edgg .Llx ,_aegc :_edgg .Urx };_bdgba =append (_bdgba ,_bdgfd );};if len (_bdfde )+len (_bdgba )==len (_gcfgd ){return _aecd ,_fagc ;
|
||
};_fedb :=append (_bdfde ,_bdgba ...);_gcfgd .log ("u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064");_fedb .log ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d");return _bdfde ,_bdgba ;};
|
||
|
||
// TextMark represents extracted text on a page with information regarding both textual content,
|
||
// formatting (font and size) and positioning.
|
||
// It is the smallest unit of text on a PDF page, typically a single character.
|
||
//
|
||
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
|
||
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
|
||
// `bbox` of substring `term` in `text`.
|
||
//
|
||
// ex, _ := New(page)
|
||
// // handle errors
|
||
// pageText, _, _, err := ex.ExtractPageText()
|
||
// // handle errors
|
||
// text := pageText.Text()
|
||
// textMarks := pageText.Marks()
|
||
//
|
||
// start := strings.Index(text, term)
|
||
// end := start + len(term)
|
||
// spanMarks, err := textMarks.RangeOffset(start, end)
|
||
// // handle errors
|
||
// bbox, ok := spanMarks.BBox()
|
||
// // handle errors
|
||
type TextMark struct{
|
||
|
||
// Text is the extracted text.
|
||
Text string ;
|
||
|
||
// Original is the text in the PDF. It has not been decoded like `Text`.
|
||
Original string ;
|
||
|
||
// BBox is the bounding box of the text.
|
||
BBox _fb .PdfRectangle ;
|
||
|
||
// Font is the font the text was drawn with.
|
||
Font *_fb .PdfFont ;
|
||
|
||
// FontSize is the font size the text was drawn with.
|
||
FontSize float64 ;
|
||
|
||
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
|
||
// text, textMarks := pageText.Text(), pageText.Marks()
|
||
// marks := textMarks.Elements()
|
||
// then marks[i].Offset is the offset of marks[i].Text in text.
|
||
Offset int ;
|
||
|
||
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
|
||
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
|
||
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
|
||
Meta bool ;
|
||
|
||
// FillColor is the fill color of the text.
|
||
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
|
||
FillColor _b .Color ;
|
||
|
||
// StrokeColor is the stroke color of the text.
|
||
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
|
||
StrokeColor _b .Color ;
|
||
|
||
// Orientation is the text orientation
|
||
Orientation int ;};func (_dfbb *textObject )getStrokeColor ()_b .Color {return _cabc (_dfbb ._gbf .ColorspaceStroking ,_dfbb ._gbf .ColorStroking );};func (_fbda *textTable )get (_cbbfed ,_aggf int )*textPara {return _fbda ._cadf [_eggg (_cbbfed ,_aggf )]};
|
||
func (_bfca rulingList )mergePrimary ()float64 {_dege :=_bfca [0]._gaeba ;for _ ,_ead :=range _bfca [1:]{_dege +=_ead ._gaeba ;};return _dege /float64 (len (_bfca ));};func (_ecgd *wordBag )blocked (_afag *textWord )bool {if _afag .Urx < _ecgd .Llx {_cde :=_agdg (_afag .PdfRectangle );
|
||
_cgba :=_adaae (_ecgd .PdfRectangle );if _ecgd ._dgdb .blocks (_cde ,_cgba ){if _caf {_ef .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_afag ,_ecgd );};return true ;};}else if _ecgd .Urx < _afag .Llx {_fcff :=_agdg (_ecgd .PdfRectangle );
|
||
_ggbf :=_adaae (_afag .PdfRectangle );if _ecgd ._dgdb .blocks (_fcff ,_ggbf ){if _caf {_ef .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s",_afag ,_ecgd );};return true ;};};if _afag .Ury < _ecgd .Lly {_acee :=_gbeb (_afag .PdfRectangle );
|
||
_fcef :=_bfaa (_ecgd .PdfRectangle );if _ecgd ._daec .blocks (_acee ,_fcef ){if _caf {_ef .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_afag ,_ecgd );};return true ;};}else if _ecgd .Ury < _afag .Lly {_bec :=_gbeb (_ecgd .PdfRectangle );
|
||
_abbd :=_bfaa (_afag .PdfRectangle );if _ecgd ._daec .blocks (_bec ,_abbd ){if _caf {_ef .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s",_afag ,_ecgd );};return true ;};};return false ;};func _bfefa (_dbee ,_eagg bounded )float64 {return _dbee .bbox ().Llx -_eagg .bbox ().Llx };
|
||
func (_eaab compositeCell )split (_gbdc ,_bbccf []float64 )*textTable {_ggcg :=len (_gbdc )+1;_ddeb :=len (_bbccf )+1;if _dcdd {_ef .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066",_ddeb ,_ggcg ,_eaab ,_gbdc ,_bbccf );
|
||
_f .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a",len (_eaab .paraList ));for _ebgg ,_fbce :=range _eaab .paraList {_f .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ebgg ,_fbce .String ());};_f .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",len (_eaab .lines ()));
|
||
for _ecde ,_edcc :=range _eaab .lines (){_f .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ecde ,_edcc );};};_gbdc =_aedd (_gbdc ,_eaab .Ury ,_eaab .Lly );_bbccf =_aedd (_bbccf ,_eaab .Llx ,_eaab .Urx );_bfacd :=make (map[uint64 ]*textPara ,_ddeb *_ggcg );
|
||
_ebgaf :=textTable {_acebgf :_ddeb ,_ecbc :_ggcg ,_cadf :_bfacd };_caef :=_eaab .paraList ;_e .Slice (_caef ,func (_ageb ,_abcd int )bool {_dbb ,_gfdg :=_caef [_ageb ],_caef [_abcd ];_ebcd ,_eefg :=_dbb .Lly ,_gfdg .Lly ;if _ebcd !=_eefg {return _ebcd < _eefg ;
|
||
};return _dbb .Llx < _gfdg .Llx ;});_egcf :=make (map[uint64 ]_fb .PdfRectangle ,_ddeb *_ggcg );for _cadb ,_cdbg :=range _gbdc [1:]{_fabd :=_gbdc [_cadb ];for _gcbg ,_egdc :=range _bbccf [1:]{_bedc :=_bbccf [_gcbg ];_egcf [_eggg (_gcbg ,_cadb )]=_fb .PdfRectangle {Llx :_bedc ,Urx :_egdc ,Lly :_cdbg ,Ury :_fabd };
|
||
};};if _dcdd {_ef .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073");_f .Printf ("\u0020\u0020\u0020\u0020");for _fgfg :=0;_fgfg < _ddeb ;_fgfg ++{_f .Printf ("\u0025\u0033\u0030\u0064\u002c\u0020",_fgfg );
|
||
};_f .Println ();for _acgg :=0;_acgg < _ggcg ;_acgg ++{_f .Printf ("\u0020\u0020\u0025\u0032\u0064\u003a",_acgg );for _cgae :=0;_cgae < _ddeb ;_cgae ++{_f .Printf ("\u00256\u002e\u0032\u0066\u002c\u0020",_egcf [_eggg (_cgae ,_acgg )]);};_f .Println ();
|
||
};};_gfag :=func (_ecbe *textLine )(int ,int ){for _ecfaa :=0;_ecfaa < _ggcg ;_ecfaa ++{for _gfee :=0;_gfee < _ddeb ;_gfee ++{if _gae (_egcf [_eggg (_gfee ,_ecfaa )],_ecbe .PdfRectangle ){return _gfee ,_ecfaa ;};};};return -1,-1;};_bfdf :=make (map[uint64 ][]*textLine ,_ddeb *_ggcg );
|
||
for _ ,_fgga :=range _caef .lines (){_agagg ,_fdbc :=_gfag (_fgga );if _agagg < 0{continue ;};_bfdf [_eggg (_agagg ,_fdbc )]=append (_bfdf [_eggg (_agagg ,_fdbc )],_fgga );};for _ccaf :=0;_ccaf < len (_gbdc )-1;_ccaf ++{_bbfcd :=_gbdc [_ccaf ];_fbfa :=_gbdc [_ccaf +1];
|
||
for _bbgb :=0;_bbgb < len (_bbccf )-1;_bbgb ++{_ffb :=_bbccf [_bbgb ];_egeg :=_bbccf [_bbgb +1];_aadf :=_fb .PdfRectangle {Llx :_ffb ,Urx :_egeg ,Lly :_fbfa ,Ury :_bbfcd };_dcdeg :=_bfdf [_eggg (_bbgb ,_ccaf )];if len (_dcdeg )==0{continue ;};_dbdgb :=_gaeb (_aadf ,_dcdeg );
|
||
_ebgaf .put (_bbgb ,_ccaf ,_dbdgb );};};return &_ebgaf ;};func (_dged *textWord )computeText ()string {_eeebd :=make ([]string ,len (_dged ._aegfa ));for _dgbd ,_eccg :=range _dged ._aegfa {_eeebd [_dgbd ]=_eccg ._dfed ;};return _ag .Join (_eeebd ,"");
|
||
};
|
||
|
||
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
|
||
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
|
||
// Replace with a function like Extract() (*PageText, error)
|
||
func (_gcdc *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_bag ,_edc ,_ggdb ,_db :=_gcdc .extractPageText (_gcdc ._gd ,_gcdc ._be ,_cb .IdentityMatrix (),0);if _db !=nil {return nil ,0,0,_db ;};_bag .computeViews ();_db =_bfcfb (_bag );if _db !=nil {return nil ,0,0,_db ;
|
||
};return _bag ,_edc ,_ggdb ,nil ;};func _dead (_ecfb map[int ]intSet )[]int {_bggdb :=make ([]int ,0,len (_ecfb ));for _afaa :=range _ecfb {_bggdb =append (_bggdb ,_afaa );};_e .Ints (_bggdb );return _bggdb ;};
|
||
|
||
// Len returns the number of TextMarks in `ma`.
|
||
func (_beg *TextMarkArray )Len ()int {if _beg ==nil {return 0;};return len (_beg ._cbdg );};func _ffgc (_afcae *wordBag ,_dfbc *textWord ,_gad float64 )bool {return _dfbc .Llx < _afcae .Urx +_gad &&_afcae .Llx -_gad < _dfbc .Urx ;};
|
||
|
||
// Append appends `mark` to the mark array.
|
||
func (_fgbd *TextMarkArray )Append (mark TextMark ){_fgbd ._cbdg =append (_fgbd ._cbdg ,mark )};func (_aebb *textObject )setTextLeading (_abf float64 ){if _aebb ==nil {return ;};_aebb ._ebag ._deg =_abf ;};func (_facb paraList )writeText (_cgdb _c .Writer ){for _ccee ,_fafca :=range _facb {if _fafca ._gbcbg {continue ;
|
||
};_fafca .writeText (_cgdb );if _ccee !=len (_facb )-1{if _gdfc (_fafca ,_facb [_ccee +1]){_cgdb .Write ([]byte ("\u0020"));}else {_cgdb .Write ([]byte ("\u000a"));_cgdb .Write ([]byte ("\u000a"));};};};_cgdb .Write ([]byte ("\u000a"));_cgdb .Write ([]byte ("\u000a"));
|
||
};var _aggd =map[rulingKind ]string {_cgbd :"\u006e\u006f\u006e\u0065",_bgge :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_ggddf :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};func (_cdfdc intSet )del (_cadcd int ){delete (_cdfdc ,_cadcd )};
|
||
func (_beab intSet )has (_aecad int )bool {_ ,_edace :=_beab [_aecad ];return _edace };func (_eaaa paraList )findGridTables (_cgfec []gridTiling )[]*textTable {if _dcdd {_ef .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_eaaa ));
|
||
for _cgbfb ,_ggda :=range _eaaa {_f .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cgbfb ,_ggda );};};var _fcaab []*textTable ;for _fcdac ,_aebd :=range _cgfec {_gceba ,_fcae :=_eaaa .findTableGrid (_aebd );if _gceba !=nil {_gceba .log (_f .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_fcdac ));
|
||
_fcaab =append (_fcaab ,_gceba );_gceba .markCells ();};for _dfgf :=range _fcae {_dfgf ._gebc =true ;};};if _dcdd {_ef .Log .Info ("\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s",len (_fcaab ));
|
||
};return _fcaab ;};func (_gcdf rulingList )toTilings ()(rulingList ,[]gridTiling ){_gcdf .log ("\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s");if len (_gcdf )==0{return nil ,nil ;};_gcdf =_gcdf .tidied ("\u0061\u006c\u006c");_gcdf .log ("\u0074\u0069\u0064\u0069\u0065\u0064");
|
||
_dcdg :=_gcdf .toGrids ();_gcab :=make ([]gridTiling ,len (_dcdg ));for _baddd ,_cabf :=range _dcdg {_gcab [_baddd ]=_cabf .asTiling ();};return _gcdf ,_gcab ;};func (_dacb *textPara )depth ()float64 {if _dacb ._gbcbg {return -1.0;};if len (_dacb ._eggb )> 0{return _dacb ._eggb [0]._fdfed ;
|
||
};return _dacb ._ebdd .depth ();};func (_ddgg *ruling )gridIntersecting (_ebac *ruling )bool {return _caaa (_ddgg ._gfce ,_ebac ._gfce )&&_caaa (_ddgg ._aegc ,_ebac ._aegc );};func (_agaac *textTable )put (_bcdga ,_gffgf int ,_bbab *textPara ){_agaac ._cadf [_eggg (_bcdga ,_gffgf )]=_bbab ;
|
||
};type textResult struct{_eag PageText ;_bed int ;_gab int ;};type wordBag struct{_fb .PdfRectangle ;_fdb float64 ;_dgdb ,_daec rulingList ;_cbea float64 ;_ecg map[int ][]*textWord ;};func _efcec (_adde ,_edeff int )int {if _adde > _edeff {return _adde ;
|
||
};return _edeff ;};func (_bcab *textTable )compositeColCorridors ()map[int ][]float64 {_gbgb :=make (map[int ][]float64 ,_bcab ._acebgf );if _dcdd {_ef .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_bcab ._acebgf );
|
||
};for _gdbfd :=0;_gdbfd < _bcab ._acebgf ;_gdbfd ++{_gbgb [_gdbfd ]=nil ;};return _gbgb ;};func (_fddf rulingList )splitSec ()[]rulingList {_e .Slice (_fddf ,func (_aegf ,_daaa int )bool {_dbfde ,_eaeac :=_fddf [_aegf ],_fddf [_daaa ];if _dbfde ._gfce !=_eaeac ._gfce {return _dbfde ._gfce < _eaeac ._gfce ;
|
||
};return _dbfde ._aegc < _eaeac ._aegc ;});_degfbd :=make (map[*ruling ]struct{},len (_fddf ));_ffae :=func (_dgfa *ruling )rulingList {_agcec :=rulingList {_dgfa };_degfbd [_dgfa ]=struct{}{};for _ ,_fbfb :=range _fddf {if _ ,_dagg :=_degfbd [_fbfb ];
|
||
_dagg {continue ;};for _ ,_caedb :=range _agcec {if _fbfb .alignsSec (_caedb ){_agcec =append (_agcec ,_fbfb );_degfbd [_fbfb ]=struct{}{};break ;};};};return _agcec ;};_dbgec :=[]rulingList {_ffae (_fddf [0])};for _ ,_cdbd :=range _fddf [1:]{if _ ,_dacg :=_degfbd [_cdbd ];
|
||
_dacg {continue ;};_dbgec =append (_dbgec ,_ffae (_cdbd ));};return _dbgec ;};func _baga (_bgece []TextMark ,_adgbb *int ,_egef TextMark )[]TextMark {_egef .Offset =*_adgbb ;_bgece =append (_bgece ,_egef );*_adgbb +=len (_egef .Text );return _bgece ;};
|
||
func (_faec *shapesState )moveTo (_eege ,_aade float64 ){_faec ._eef =true ;_faec ._caad =_faec .devicePoint (_eege ,_aade );if _fafc {_ef .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_eege ,_aade ,_faec ._caad );
|
||
};};func (_cdcd *subpath )last ()_cb .Point {return _cdcd ._bdfbe [len (_cdcd ._bdfbe )-1]};func _efdb (_fdba ,_agagd bounded )float64 {return _fdba .bbox ().Llx -_agagd .bbox ().Urx };
|
||
|
||
// String returns a description of `l`.
|
||
func (_edd *textLine )String ()string {return _f .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_edd ._fdfed ,_edd .PdfRectangle ,_edd ._agbc ,_edd .text ());
|
||
};func (_gbe *wordBag )getDepthIdx (_dgfd float64 )int {_eec :=_gbe .depthIndexes ();_aaaa :=_cage (_dgfd );if _aaaa < _eec [0]{return _eec [0];};if _aaaa > _eec [len (_eec )-1]{return _eec [len (_eec )-1];};return _aaaa ;};func (_gggf *textLine )appendWord (_cgda *textWord ){_gggf ._aacg =append (_gggf ._aacg ,_cgda );
|
||
_gggf .PdfRectangle =_aeee (_gggf .PdfRectangle ,_cgda .PdfRectangle );if _cgda ._bgfca > _gggf ._agbc {_gggf ._agbc =_cgda ._bgfca ;};if _cgda ._afcda > _gggf ._fdfed {_gggf ._fdfed =_cgda ._afcda ;};};func (_bdae lineRuling )asRuling ()(*ruling ,bool ){_eeedf :=ruling {_fggg :_bdae ._bdde ,Color :_bdae .Color ,_fccd :_aaed };
|
||
switch _bdae ._bdde {case _ggddf :_eeedf ._gaeba =_bdae .xMean ();_eeedf ._gfce =_d .Min (_bdae ._fbfg .Y ,_bdae ._cade .Y );_eeedf ._aegc =_d .Max (_bdae ._fbfg .Y ,_bdae ._cade .Y );case _bgge :_eeedf ._gaeba =_bdae .yMean ();_eeedf ._gfce =_d .Min (_bdae ._fbfg .X ,_bdae ._cade .X );
|
||
_eeedf ._aegc =_d .Max (_bdae ._fbfg .X ,_bdae ._cade .X );default:_ef .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_bdae ._bdde );return nil ,false ;};return &_eeedf ,true ;};func _gbeb (_bdbbb _fb .PdfRectangle )*ruling {return &ruling {_fggg :_bgge ,_gaeba :_bdbbb .Ury ,_gfce :_bdbbb .Llx ,_aegc :_bdbbb .Urx };
|
||
};func (_eebd paraList )reorder (_cdcg []int ){_bfbf :=make (paraList ,len (_eebd ));for _dfaf ,_dfdb :=range _cdcg {_bfbf [_dfaf ]=_eebd [_dfdb ];};copy (_eebd ,_bfbf );};func (_edgeg rectRuling )checkWidth (_ebdb ,_bgdf float64 )(float64 ,bool ){_fgcg :=_bgdf -_ebdb ;
|
||
_dbbc :=_fgcg <=_bfefd ;return _fgcg ,_dbbc ;};func (_fgba *subpath )clear (){*_fgba =subpath {}};func (_dacf *wordBag )highestWord (_fcfb int ,_fdcg ,_debc float64 )*textWord {for _ ,_fbgd :=range _dacf ._ecg [_fcfb ]{if _fdcg <=_fbgd ._afcda &&_fbgd ._afcda <=_debc {return _fbgd ;
|
||
};};return nil ;};func _gfca (_caagd map[float64 ]gridTile )[]float64 {_ceafa :=make ([]float64 ,0,len (_caagd ));for _dfcdc :=range _caagd {_ceafa =append (_ceafa ,_dfcdc );};_e .Float64s (_ceafa );return _ceafa ;};
|
||
|
||
// TableCell is a cell in a TextTable.
|
||
type TableCell struct{
|
||
|
||
// Text is the extracted text.
|
||
Text string ;
|
||
|
||
// Marks returns the TextMarks corresponding to the text in Text.
|
||
Marks TextMarkArray ;};func _fecbg (_babf ,_afac ,_dadd float64 )rulingKind {if _babf >=_dadd &&_dbef (_afac ,_babf ){return _bgge ;};if _afac >=_dadd &&_dbef (_babf ,_afac ){return _ggddf ;};return _cgbd ;};func (_fbca paraList )eventNeighbours (_ceff []event )map[*textPara ][]int {_e .Slice (_ceff ,func (_ggbbc ,_cfbeg int )bool {_bebdg ,_fgbbg :=_ceff [_ggbbc ],_ceff [_cfbeg ];
|
||
_cbec ,_caedf :=_bebdg ._fcdf ,_fgbbg ._fcdf ;if _cbec !=_caedf {return _cbec < _caedf ;};if _bebdg ._dded !=_fgbbg ._dded {return _bebdg ._dded ;};return _ggbbc < _cfbeg ;});_dbfa :=make (map[int ]intSet );_gfgg :=make (intSet );for _ ,_dfbfg :=range _ceff {if _dfbfg ._dded {_dbfa [_dfbfg ._fgeg ]=make (intSet );
|
||
for _adgdc :=range _gfgg {if _adgdc !=_dfbfg ._fgeg {_dbfa [_dfbfg ._fgeg ].add (_adgdc );_dbfa [_adgdc ].add (_dfbfg ._fgeg );};};_gfgg .add (_dfbfg ._fgeg );}else {_gfgg .del (_dfbfg ._fgeg );};};_ddgac :=map[*textPara ][]int {};for _dggca ,_aebdgd :=range _dbfa {_defc :=_fbca [_dggca ];
|
||
if len (_aebdgd )==0{_ddgac [_defc ]=nil ;continue ;};_fbad :=make ([]int ,len (_aebdgd ));_bdbcb :=0;for _abccf :=range _aebdgd {_fbad [_bdbcb ]=_abccf ;_bdbcb ++;};_ddgac [_defc ]=_fbad ;};return _ddgac ;};func _cacef (_gega []*textWord ,_fede float64 ,_dbdd ,_gfdd rulingList )*wordBag {_badd :=_daa (_gega [0],_fede ,_dbdd ,_gfdd );
|
||
for _ ,_bgaa :=range _gega [1:]{_cab :=_cage (_bgaa ._afcda );_badd ._ecg [_cab ]=append (_badd ._ecg [_cab ],_bgaa );_badd .PdfRectangle =_aeee (_badd .PdfRectangle ,_bgaa .PdfRectangle );};_badd .sort ();return _badd ;};type textWord struct{_fb .PdfRectangle ;
|
||
_afcda float64 ;_ecgc string ;_aegfa []*textMark ;_bgfca float64 ;_fecff bool ;};type shapesState struct{_dcd _cb .Matrix ;_gaba _cb .Matrix ;_gceb []*subpath ;_eef bool ;_caad _cb .Point ;_fgec *textObject ;};func (_fadc *textTable )emptyRow (_cgab int )bool {for _gaggf :=0;
|
||
_gaggf < _fadc ._acebgf ;_gaggf ++{_egab :=_fadc .get (_gaggf ,_cgab );if _egab !=nil &&_egab .text ()!=""{return false ;};};return true ;};func (_efdc paraList )addNeighbours (){_eefge :=func (_aceaa []int ,_gebcc *textPara )([]*textPara ,[]*textPara ){_fdacb :=make ([]*textPara ,0,len (_aceaa )-1);
|
||
_fbggc :=make ([]*textPara ,0,len (_aceaa )-1);for _ ,_eebe :=range _aceaa {_bbccc :=_efdc [_eebe ];if _bbccc .Urx <=_gebcc .Llx {_fdacb =append (_fdacb ,_bbccc );}else if _bbccc .Llx >=_gebcc .Urx {_fbggc =append (_fbggc ,_bbccc );};};return _fdacb ,_fbggc ;
|
||
};_bdfc :=func (_feab []int ,_begg *textPara )([]*textPara ,[]*textPara ){_gbac :=make ([]*textPara ,0,len (_feab )-1);_ggdbe :=make ([]*textPara ,0,len (_feab )-1);for _ ,_dddg :=range _feab {_cbgc :=_efdc [_dddg ];if _cbgc .Ury <=_begg .Lly {_ggdbe =append (_ggdbe ,_cbgc );
|
||
}else if _cbgc .Lly >=_begg .Ury {_gbac =append (_gbac ,_cbgc );};};return _gbac ,_ggdbe ;};_fbdgg :=_efdc .yNeighbours (_cgdf );for _ ,_bbca :=range _efdc {_efgaa :=_fbdgg [_bbca ];if len (_efgaa )==0{continue ;};_dbgba ,_edfd :=_eefge (_efgaa ,_bbca );
|
||
if len (_dbgba )==0&&len (_edfd )==0{continue ;};if len (_dbgba )> 0{_bedeg :=_dbgba [0];for _ ,_cbgcg :=range _dbgba [1:]{if _cbgcg .Urx >=_bedeg .Urx {_bedeg =_cbgcg ;};};for _ ,_gdce :=range _dbgba {if _gdce !=_bedeg &&_gdce .Urx > _bedeg .Llx {_bedeg =nil ;
|
||
break ;};};if _bedeg !=nil &&_adbd (_bbca .PdfRectangle ,_bedeg .PdfRectangle ){_bbca ._bddg =_bedeg ;};};if len (_edfd )> 0{_dgafg :=_edfd [0];for _ ,_ebacc :=range _edfd [1:]{if _ebacc .Llx <=_dgafg .Llx {_dgafg =_ebacc ;};};for _ ,_eaeae :=range _edfd {if _eaeae !=_dgafg &&_eaeae .Llx < _dgafg .Urx {_dgafg =nil ;
|
||
break ;};};if _dgafg !=nil &&_adbd (_bbca .PdfRectangle ,_dgafg .PdfRectangle ){_bbca ._aecb =_dgafg ;};};};_fbdgg =_efdc .xNeighbours (_ebcc );for _ ,_aaedd :=range _efdc {_eeac :=_fbdgg [_aaedd ];if len (_eeac )==0{continue ;};_bbee ,_egfb :=_bdfc (_eeac ,_aaedd );
|
||
if len (_bbee )==0&&len (_egfb )==0{continue ;};if len (_egfb )> 0{_efcdc :=_egfb [0];for _ ,_beff :=range _egfb [1:]{if _beff .Ury >=_efcdc .Ury {_efcdc =_beff ;};};for _ ,_bcbab :=range _egfb {if _bcbab !=_efcdc &&_bcbab .Ury > _efcdc .Lly {_efcdc =nil ;
|
||
break ;};};if _efcdc !=nil &&_bcba (_aaedd .PdfRectangle ,_efcdc .PdfRectangle ){_aaedd ._cggd =_efcdc ;};};if len (_bbee )> 0{_cecae :=_bbee [0];for _ ,_fadcg :=range _bbee [1:]{if _fadcg .Lly <=_cecae .Lly {_cecae =_fadcg ;};};for _ ,_aacaa :=range _bbee {if _aacaa !=_cecae &&_aacaa .Lly < _cecae .Ury {_cecae =nil ;
|
||
break ;};};if _cecae !=nil &&_bcba (_aaedd .PdfRectangle ,_cecae .PdfRectangle ){_aaedd ._fgbaf =_cecae ;};};};for _ ,_addb :=range _efdc {if _addb ._bddg !=nil &&_addb ._bddg ._aecb !=_addb {_addb ._bddg =nil ;};if _addb ._fgbaf !=nil &&_addb ._fgbaf ._cggd !=_addb {_addb ._fgbaf =nil ;
|
||
};if _addb ._aecb !=nil &&_addb ._aecb ._bddg !=_addb {_addb ._aecb =nil ;};if _addb ._cggd !=nil &&_addb ._cggd ._fgbaf !=_addb {_addb ._cggd =nil ;};};};func (_egec paraList )inTile (_ccadd gridTile )paraList {var _bdbcf paraList ;for _ ,_ggeeb :=range _egec {if _ccadd .contains (_ggeeb .PdfRectangle ){_bdbcf =append (_bdbcf ,_ggeeb );
|
||
};};if _dcdd {_f .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_ccadd ,len (_bdbcf ));for _ddfdf ,_dfac :=range _bdbcf {_f .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ddfdf ,_dfac );
|
||
};_f .Println ("");};return _bdbcf ;};
|
||
|
||
// PageText represents the layout of text on a device page.
|
||
type PageText struct{_bda []*textMark ;_fbeg string ;_aebbg []TextMark ;_caed []TextTable ;_acbf _fb .PdfRectangle ;_ebgd []pathSection ;_dfab []pathSection ;};func (_bdggf *textObject )newTextMark (_ccae string ,_cgaa _cb .Matrix ,_edacf _cb .Point ,_ccdec float64 ,_becc *_fb .PdfFont ,_def float64 ,_efba ,_gbed _b .Color )(textMark ,bool ){_dggc :=_cgaa .Angle ();
|
||
_ecaca :=_bbbb (_dggc ,_egaa );var _eceb float64 ;if _ecaca %180!=90{_eceb =_cgaa .ScalingFactorY ();}else {_eceb =_cgaa .ScalingFactorX ();};_fad :=_accf (_cgaa );_ccdef :=_fb .PdfRectangle {Llx :_fad .X ,Lly :_fad .Y ,Urx :_edacf .X ,Ury :_edacf .Y };
|
||
switch _ecaca %360{case 90:_ccdef .Urx -=_eceb ;case 180:_ccdef .Ury -=_eceb ;case 270:_ccdef .Urx +=_eceb ;case 0:_ccdef .Ury +=_eceb ;default:_ecaca =0;_ccdef .Ury +=_eceb ;};if _ccdef .Llx > _ccdef .Urx {_ccdef .Llx ,_ccdef .Urx =_ccdef .Urx ,_ccdef .Llx ;
|
||
};if _ccdef .Lly > _ccdef .Ury {_ccdef .Lly ,_ccdef .Ury =_ccdef .Ury ,_ccdef .Lly ;};_affe ,_dcg :=_dgaea (_ccdef ,_bdggf ._afgf ._ged );if !_dcg {_ef .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_ccdef ,_bdggf ._afgf ._ged ,_ccae );
|
||
};_ccdef =_affe ;_cbadc :=_ccdef ;_dbdg :=_bdggf ._afgf ._ged ;switch _ecaca %360{case 90:_dbdg .Urx ,_dbdg .Ury =_dbdg .Ury ,_dbdg .Urx ;_cbadc =_fb .PdfRectangle {Llx :_dbdg .Urx -_ccdef .Ury ,Urx :_dbdg .Urx -_ccdef .Lly ,Lly :_ccdef .Llx ,Ury :_ccdef .Urx };
|
||
case 180:_cbadc =_fb .PdfRectangle {Llx :_dbdg .Urx -_ccdef .Llx ,Urx :_dbdg .Urx -_ccdef .Urx ,Lly :_dbdg .Ury -_ccdef .Lly ,Ury :_dbdg .Ury -_ccdef .Ury };case 270:_dbdg .Urx ,_dbdg .Ury =_dbdg .Ury ,_dbdg .Urx ;_cbadc =_fb .PdfRectangle {Llx :_ccdef .Ury ,Urx :_ccdef .Lly ,Lly :_dbdg .Ury -_ccdef .Llx ,Ury :_dbdg .Ury -_ccdef .Urx };
|
||
};if _cbadc .Llx > _cbadc .Urx {_cbadc .Llx ,_cbadc .Urx =_cbadc .Urx ,_cbadc .Llx ;};if _cbadc .Lly > _cbadc .Ury {_cbadc .Lly ,_cbadc .Ury =_cbadc .Ury ,_cbadc .Lly ;};_ddae :=textMark {_dfed :_ccae ,PdfRectangle :_cbadc ,_dgece :_ccdef ,_gcdg :_becc ,_gaea :_eceb ,_dcaf :_def ,_ebafa :_cgaa ,_debce :_edacf ,_ddbf :_ecaca ,_bgec :_efba ,_efee :_gbed };
|
||
if _ceee {_ef .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_fad ,_edacf ,_ddae .String ());};return _ddae ,_dcg ;
|
||
};func (_agaa *textObject )reset (){_agaa ._gfc =_cb .IdentityMatrix ();_agaa ._fbgc =_cb .IdentityMatrix ();_agaa ._faf =nil ;};func _fab (_cgg bounded )float64 {return -_cgg .bbox ().Lly };func _abdad (_ffbac int ,_fbgfg func (int ,int )bool )[]int {_eecb :=make ([]int ,_ffbac );
|
||
for _cggb :=range _eecb {_eecb [_cggb ]=_cggb ;};_e .Slice (_eecb ,func (_bebda ,_abccc int )bool {return _fbgfg (_eecb [_bebda ],_eecb [_abccc ])});return _eecb ;};func (_gbdf gridTiling )log (_dggcg string ){if !_cgdd {return ;};_ef .Log .Info ("\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071",len (_gbdf ._acfggbg ),len (_gbdf ._agdf ),_dggcg );
|
||
_f .Printf ("\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a",_gbdf ._acfggbg );_f .Printf ("\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a",_gbdf ._agdf );for _dgfe ,_bddb :=range _gbdf ._agdf {_bbed ,_efad :=_gbdf ._efgf [_bddb ];
|
||
if !_efad {continue ;};_f .Printf ("%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_dgfe ,_bddb );for _ecga ,_fcab :=range _gbdf ._acfggbg {_cgddf ,_cbab :=_bbed [_fcab ];if !_cbab {continue ;};_f .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_ecga ,_cgddf .String ());
|
||
};};};
|
||
|
||
// ImageMark represents an image drawn on a page and its position in device coordinates.
|
||
// All coordinates are in device coordinates.
|
||
type ImageMark struct{Image *_fb .Image ;
|
||
|
||
// Dimensions of the image as displayed in the PDF.
|
||
Width float64 ;Height float64 ;
|
||
|
||
// Position of the image in PDF coordinates (lower left corner).
|
||
X float64 ;Y float64 ;
|
||
|
||
// Angle in degrees, if rotated.
|
||
Angle float64 ;};func (_dgcf *textLine )pullWord (_fcdd *wordBag ,_bcae *textWord ,_bbga int ){_dgcf .appendWord (_bcae );_fcdd .removeWord (_bcae ,_bbga );};func (_ddd *shapesState )stroke (_gefa *[]pathSection ){_bbf :=pathSection {_acbfc :_ddd ._gceb ,Color :_ddd ._fgec .getStrokeColor ()};
|
||
*_gefa =append (*_gefa ,_bbf );if _fcffd {_f .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_gefa ),_ddd ,_ddd ._fgec .getStrokeColor (),_bbf .bbox ());
|
||
if _bdee {for _eede ,_egdg :=range _ddd ._gceb {_f .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_eede ,_egdg );if _eede ==10{break ;};};};};};func _abbef (_aedfa ,_egbgd ,_ccge ,_gbbge *textPara )*textTable {_ddge :=&textTable {_acebgf :2,_ecbc :2,_cadf :make (map[uint64 ]*textPara ,4)};
|
||
_ddge .put (0,0,_aedfa );_ddge .put (1,0,_egbgd );_ddge .put (0,1,_ccge );_ddge .put (1,1,_gbbge );return _ddge ;};func _egacc (_agaddb []compositeCell )[]float64 {var _gffbc []*textLine ;_adfbc :=0;for _ ,_aegca :=range _agaddb {_adfbc +=len (_aegca .paraList );
|
||
_gffbc =append (_gffbc ,_aegca .lines ()...);};_e .Slice (_gffbc ,func (_degb ,_ggacc int )bool {_dfbd ,_debaa :=_gffbc [_degb ],_gffbc [_ggacc ];_caefc ,_cecg :=_dfbd ._fdfed ,_debaa ._fdfed ;if !_gdga (_caefc -_cecg ){return _caefc < _cecg ;};return _dfbd .Llx < _debaa .Llx ;
|
||
});if _dcdd {_f .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_adfbc ,len (_gffbc ));for _ccabg ,_eagfb :=range _gffbc {_f .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_ccabg ,_eagfb );
|
||
};};var _dgda []float64 ;_bbccg :=_gffbc [0];var _cfdb [][]*textLine ;_edgba :=[]*textLine {_bbccg };for _ddfb ,_fgge :=range _gffbc [1:]{if _fgge .Ury < _bbccg .Lly {_cbff :=0.5*(_fgge .Ury +_bbccg .Lly );if _dcdd {_f .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_ddfb ,_fgge .Ury ,_bbccg .Lly ,_cbff ,_bbccg ,_fgge );
|
||
};_dgda =append (_dgda ,_cbff );_cfdb =append (_cfdb ,_edgba );_edgba =nil ;};_edgba =append (_edgba ,_fgge );if _fgge .Lly < _bbccg .Lly {_bbccg =_fgge ;};};if len (_edgba )> 0{_cfdb =append (_cfdb ,_edgba );};if _dcdd {_f .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_dgda );
|
||
};if _dcdd {_ef .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len (_agaddb ));for _gcfdd ,_effef :=range _agaddb {_f .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gcfdd ,_effef );};_ef .Log .Info ("\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d",len (_cfdb ));
|
||
for _fgda ,_cacec :=range _cfdb {_f .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a",_fgda ,len (_cacec ));for _cbbbc ,_ecgb :=range _cacec {_f .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_cbbbc ,_ecgb );};};};_ddgdb :=true ;
|
||
for _geee ,_agaae :=range _cfdb {_cgcd :=true ;for _bfabb ,_ffdg :=range _agaddb {if _dcdd {_f .Printf ("\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a",_geee ,len (_cfdb ),_bfabb ,len (_agaddb ),_ffdg );
|
||
};if !_ffdg .hasLines (_agaae ){if _dcdd {_f .Printf ("\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a",_geee ,len (_cfdb ),_bfabb ,len (_agaddb ));
|
||
};_cgcd =false ;break ;};};if !_cgcd {_ddgdb =false ;break ;};};if !_ddgdb {if _dcdd {_ef .Log .Info ("\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg");
|
||
};_dgda =nil ;};if _dcdd &&_dgda !=nil {_f .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a",_dgda );};return _dgda ;};func _gae (_gec ,_fcg _fb .PdfRectangle )bool {return _gec .Llx <=_fcg .Llx &&_fcg .Urx <=_gec .Urx &&_gec .Lly <=_fcg .Lly &&_fcg .Ury <=_gec .Ury ;
|
||
};
|
||
|
||
// String returns a description of `tm`.
|
||
func (_gbcd *textMark )String ()string {return _f .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_gbcd .PdfRectangle ,_gbcd ._gaea ,_gbcd ._dfed );};func _gfdge (_abeb ,_fddg _cb .Point )rulingKind {_ffggd :=_d .Abs (_abeb .X -_fddg .X );
|
||
_ccaa :=_d .Abs (_abeb .Y -_fddg .Y );return _fecbg (_ffggd ,_ccaa ,_fecb );};
|
||
|
||
// String returns a description of `v`.
|
||
func (_ecbf *ruling )String ()string {if _ecbf ._fggg ==_cgbd {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_begb ,_ebgc :="\u0078","\u0079";if _ecbf ._fggg ==_bgge {_begb ,_ebgc ="\u0079","\u0078";};_adcf :="";if _ecbf ._eaea !=0.0{_adcf =_f .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_ecbf ._eaea );
|
||
};return _f .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_ecbf ._fggg ,_begb ,_ecbf ._gaeba ,_ebgc ,_ecbf ._gfce ,_ecbf ._aegc ,_ecbf ._aegc -_ecbf ._gfce ,_ecbf ._fccd ,_ecbf .Color ,_adcf );
|
||
};func (_dba *stateStack )top ()*textState {if _dba .empty (){return nil ;};return (*_dba )[_dba .size ()-1];};func (_agge paraList )extractTables (_afeed []gridTiling )paraList {if _dcdd {_ef .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_agge ));
|
||
};if len (_agge )< _eggc {return _agge ;};_acag :=_agge .findTables (_afeed );if _dcdd {_ef .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_acag ));
|
||
for _edbb ,_fbcd :=range _acag {_fbcd .log (_f .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_edbb ));};};return _agge .applyTables (_acag );};func (_aabb rulingList )primaries ()[]float64 {_gdfd :=make (map[float64 ]struct{},len (_aabb ));
|
||
for _ ,_feaf :=range _aabb {_gdfd [_feaf ._gaeba ]=struct{}{};};_efff :=make ([]float64 ,len (_gdfd ));_dddcb :=0;for _afcc :=range _gdfd {_efff [_dddcb ]=_afcc ;_dddcb ++;};_e .Float64s (_efff );return _efff ;};func (_efce *ruling )intersects (_beedd *ruling )bool {_effd :=(_efce ._fggg ==_ggddf &&_beedd ._fggg ==_bgge )||(_beedd ._fggg ==_ggddf &&_efce ._fggg ==_bgge );
|
||
_cgfe :=func (_ebcdf ,_abeec *ruling )bool {return _ebcdf ._gfce -_bdfd <=_abeec ._gaeba &&_abeec ._gaeba <=_ebcdf ._aegc +_bdfd ;};_bgdd :=_cgfe (_efce ,_beedd );_fabb :=_cgfe (_beedd ,_efce );if _fcffd {_f .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_effd ,_bgdd ,_fabb ,_effd &&_bgdd &&_fabb ,_efce ,_beedd );
|
||
};return _effd &&_bgdd &&_fabb ;};func _cbeab (_gfcd []TextMark ,_cbag *int )[]TextMark {_edab :=_gfcd [len (_gfcd )-1];_efc :=[]rune (_edab .Text );if len (_efc )==1{_gfcd =_gfcd [:len (_gfcd )-1];_fegf :=_gfcd [len (_gfcd )-1];*_cbag =_fegf .Offset +len (_fegf .Text );
|
||
}else {_ebafc :=_abga (_edab .Text );*_cbag +=len (_ebafc )-len (_edab .Text );_edab .Text =_ebafc ;};return _gfcd ;};func (_bede *textObject )getFontDict (_decg string )(_gcfg _gc .PdfObject ,_fdgb error ){_ffcc :=_bede ._bcb ;if _ffcc ==nil {_ef .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_decg );
|
||
return nil ,nil ;};_gcfg ,_ffcf :=_ffcc .GetFontByName (_gc .PdfObjectName (_decg ));if !_ffcf {_ef .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_decg );
|
||
return nil ,_a .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _gcfg ,nil ;};
|
||
|
||
// String returns a human readable description of `path`.
|
||
func (_bca *subpath )String ()string {_cfc :=_bca ._bdfbe ;_gcga :=len (_cfc );if _gcga <=5{return _f .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_gcga ,_cfc );};return _f .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_gcga ,_cfc [0],_cfc [1],_cfc [_gcga -1]);
|
||
};func (_addc paraList )log (_eagf string ){if !_cfeg {return ;};_ef .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_eagf ,len (_addc ));
|
||
for _fdca ,_bbcf :=range _addc {if _bbcf ==nil {continue ;};_eebf :=_bbcf .text ();_gddc :="\u0020\u0020";if _bbcf ._ebdd !=nil {_gddc =_f .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_bbcf ._ebdd ._acebgf ,_bbcf ._ebdd ._ecbc );};_f .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_fdca ,_bbcf .PdfRectangle ,_gddc ,_debb (_eebf ,50));
|
||
};};func (_fgdb *textTable )toTextTable ()TextTable {if _dcdd {_ef .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_fgdb ._acebgf ,_fgdb ._ecbc );};_cefcc :=make ([][]TableCell ,_fgdb ._ecbc );
|
||
for _dbda :=0;_dbda < _fgdb ._ecbc ;_dbda ++{_cefcc [_dbda ]=make ([]TableCell ,_fgdb ._acebgf );for _efdf :=0;_efdf < _fgdb ._acebgf ;_efdf ++{_fagd :=_fgdb .get (_efdf ,_dbda );if _fagd ==nil {continue ;};if _dcdd {_f .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_efdf ,_dbda ,_fagd );
|
||
};_cefcc [_dbda ][_efdf ].Text =_fagd .text ();_gbeff :=0;_cefcc [_dbda ][_efdf ].Marks ._cbdg =_fagd .toTextMarks (&_gbeff );};};return TextTable {W :_fgdb ._acebgf ,H :_fgdb ._ecbc ,Cells :_cefcc };};
|
||
|
||
// String returns a description of `w`.
|
||
func (_adgc *textWord )String ()string {return _f .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_adgc ._afcda ,_adgc .PdfRectangle ,_adgc ._bgfca ,_adgc ._ecgc );
|
||
};func (_ceec rulingList )bbox ()_fb .PdfRectangle {var _adec _fb .PdfRectangle ;if len (_ceec )==0{_ef .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073");
|
||
return _fb .PdfRectangle {};};if _ceec [0]._fggg ==_bgge {_adec .Llx ,_adec .Urx =_ceec .secMinMax ();_adec .Lly ,_adec .Ury =_ceec .primMinMax ();}else {_adec .Llx ,_adec .Urx =_ceec .primMinMax ();_adec .Lly ,_adec .Ury =_ceec .secMinMax ();};return _adec ;
|
||
};func (_ecacg *shapesState )lineTo (_efb ,_decf float64 ){if _fafc {_ef .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_efb ,_decf ,_ecacg .devicePoint (_efb ,_decf ));
|
||
};_ecacg .addPoint (_efb ,_decf );};type textTable struct{_fb .PdfRectangle ;_acebgf ,_ecbc int ;_cbbaf bool ;_cadf map[uint64 ]*textPara ;_accd map[uint64 ]compositeCell ;};func (_faag paraList )applyTables (_bgfa []*textTable )paraList {var _gcfgdf paraList ;
|
||
for _ ,_fcdbc :=range _bgfa {_gcfgdf =append (_gcfgdf ,_fcdbc .newTablePara ());};for _ ,_ddaef :=range _faag {if _ddaef ._gebc {continue ;};_gcfgdf =append (_gcfgdf ,_ddaef );};return _gcfgdf ;};func (_fbcff *textTable )emptyColumn (_dbdc int )bool {for _fddfe :=0;
|
||
_fddfe < _fbcff ._ecbc ;_fddfe ++{_ffaa :=_fbcff .get (_dbdc ,_fddfe );if _ffaa !=nil &&_ffaa .text ()!=""{return false ;};};return true ;};func _daaae (_dbbcc string )bool {for _ ,_gacef :=range _dbbcc {if !_cd .IsSpace (_gacef ){return false ;};};return true ;
|
||
};
|
||
|
||
// New returns an Extractor instance for extracting content from the input PDF page.
|
||
func New (page *_fb .PdfPage )(*Extractor ,error ){const _bb ="\u0065\u0078\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077";_da ,_fc :=page .GetAllContentStreams ();if _fc !=nil {return nil ,_fc ;};_gf ,_fc :=page .GetMediaBox ();if _fc !=nil {return nil ,_f .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_fc );
|
||
};_gg :=&Extractor {_gd :_da ,_be :page .Resources ,_ged :*_gf ,_aeb :map[string ]fontEntry {},_dc :map[string ]textResult {}};if _gg ._ged .Llx > _gg ._ged .Urx {_ef .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_gg ._ged );
|
||
_gg ._ged .Llx ,_gg ._ged .Urx =_gg ._ged .Urx ,_gg ._ged .Llx ;};if _gg ._ged .Lly > _gg ._ged .Ury {_ef .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_gg ._ged );
|
||
_gg ._ged .Lly ,_gg ._ged .Ury =_gg ._ged .Ury ,_gg ._ged .Lly ;};_ac .TrackUse (_bb );return _gg ,nil ;};func _badbe (_fddbd ,_ddaf _cb .Point )bool {return _fddbd .X ==_ddaf .X &&_fddbd .Y ==_ddaf .Y };
|
||
|
||
// TextTable represents a table.
|
||
// Cells are ordered top-to-bottom, left-to-right.
|
||
// Cells[y] is the (0-offset) y'th row in the table.
|
||
// Cells[y][x] is the (0-offset) x'th column in the table.
|
||
type TextTable struct{W ,H int ;Cells [][]TableCell ;};func (_gff *shapesState )establishSubpath ()*subpath {_gcgf ,_edee :=_gff .lastpointEstablished ();if !_edee {_gff ._gceb =append (_gff ._gceb ,_fbbc (_gcgf ));};if len (_gff ._gceb )==0{return nil ;
|
||
};_gff ._eef =false ;return _gff ._gceb [len (_gff ._gceb )-1];};func (_fbg *textObject )setTextRenderMode (_edf int ){if _fbg ==nil {return ;};_fbg ._ebag ._ddg =RenderMode (_edf );};func _fdcaa (_faaff map[float64 ]map[float64 ]gridTile )[]float64 {_fegb :=make ([]float64 ,0,len (_faaff ));
|
||
for _dcgdc :=range _faaff {_fegb =append (_fegb ,_dcgdc );};_e .Float64s (_fegb );_fceac :=len (_fegb );for _fccb :=0;_fccb < _fceac /2;_fccb ++{_fegb [_fccb ],_fegb [_fceac -1-_fccb ]=_fegb [_fceac -1-_fccb ],_fegb [_fccb ];};return _fegb ;};
|
||
|
||
// Tables returns the tables extracted from the page.
|
||
func (_bdfb PageText )Tables ()[]TextTable {if _dcdd {_ef .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_bdfb ._caed ));};return _bdfb ._caed ;};func (_egf *textObject )getFontDirect (_ffgg string )(*_fb .PdfFont ,error ){_bfef ,_fda :=_egf .getFontDict (_ffgg );
|
||
if _fda !=nil {return nil ,_fda ;};_fed ,_fda :=_fb .NewPdfFontFromPdfObject (_bfef );if _fda !=nil {_ef .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ffgg ,_fda );
|
||
};return _fed ,_fda ;};func (_bfeg paraList )findTables (_fbed []gridTiling )[]*textTable {_bfeg .addNeighbours ();_e .Slice (_bfeg ,func (_edfb ,_fedda int )bool {return _afga (_bfeg [_edfb ],_bfeg [_fedda ])< 0});var _gacb []*textTable ;if _cfb {_afccb :=_bfeg .findGridTables (_fbed );
|
||
_gacb =append (_gacb ,_afccb ...);};if _ggcb {_gdgg :=_bfeg .findTextTables ();_gacb =append (_gacb ,_gdgg ...);};return _gacb ;};type paraList []*textPara ;func _aedd (_ecebd []float64 ,_aadg ,_acgd float64 )[]float64 {_bfcd ,_acgcf :=_aadg ,_acgd ;if _acgcf < _bfcd {_bfcd ,_acgcf =_acgcf ,_bfcd ;
|
||
};_fbdg :=make ([]float64 ,0,len (_ecebd )+2);_fbdg =append (_fbdg ,_aadg );for _ ,_acce :=range _ecebd {if _acce <=_bfcd {continue ;}else if _acce >=_acgcf {break ;};_fbdg =append (_fbdg ,_acce );};_fbdg =append (_fbdg ,_acgd );return _fbdg ;};func (_fffec *textTable )putComposite (_efdgb ,_aceee int ,_ggdag paraList ,_edaac _fb .PdfRectangle ){if len (_ggdag )==0{_ef .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073");
|
||
return ;};_cgee :=compositeCell {_edaac ,_ggdag };if _dcdd {_f .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_efdgb ,_aceee ,_cgee .String ());
|
||
};_cgee .updateBBox ();_fffec ._accd [_eggg (_efdgb ,_aceee )]=_cgee ;};func _adc (_ffca *wordBag ,_acgc float64 ,_agef ,_effec rulingList )[]*wordBag {var _ffef []*wordBag ;for _ ,_daf :=range _ffca .depthIndexes (){_aeef :=false ;for !_ffca .empty (_daf ){_gcecd :=_ffca .firstReadingIndex (_daf );
|
||
_cccd :=_ffca .firstWord (_gcecd );_cfbf :=_daa (_cccd ,_acgc ,_agef ,_effec );_ffca .removeWord (_cccd ,_gcecd );if _bfde {_ef .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_cccd .String ());
|
||
};for _bbae :=true ;_bbae ;_bbae =_aeef {_aeef =false ;_gabad :=_dcde *_cfbf ._fdb ;_gddd :=_gaga *_cfbf ._fdb ;_egebe :=_fcce *_cfbf ._fdb ;if _bfde {_ef .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_cfbf .minDepth (),_cfbf .maxDepth (),_egebe ,_gddd );
|
||
};if _ffca .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_cfbf ,_febc (_ffgc ,0),_cfbf .minDepth ()-_egebe ,_cfbf .maxDepth ()+_egebe ,_afb ,false ,false )> 0{_aeef =true ;};if _ffca .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_cfbf ,_febc (_ffgc ,_gddd ),_cfbf .minDepth (),_cfbf .maxDepth (),_fabg ,false ,false )> 0{_aeef =true ;
|
||
};if _aeef {continue ;};_gabca :=_ffca .scanBand ("",_cfbf ,_febc (_abbe ,_gabad ),_cfbf .minDepth (),_cfbf .maxDepth (),_cbfbf ,true ,false );if _gabca > 0{_ecef :=(_cfbf .maxDepth ()-_cfbf .minDepth ())/_cfbf ._fdb ;if (_gabca > 1&&float64 (_gabca )> 0.3*_ecef )||_gabca <=10{if _ffca .scanBand ("\u006f\u0074\u0068e\u0072",_cfbf ,_febc (_abbe ,_gabad ),_cfbf .minDepth (),_cfbf .maxDepth (),_cbfbf ,false ,true )> 0{_aeef =true ;
|
||
};};};};_ffef =append (_ffef ,_cfbf );};};return _ffef ;};func _bbfc (_eaga []*textMark ,_ggcf _fb .PdfRectangle ,_aeed rulingList ,_afcb []gridTiling )paraList {_ef .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_eaga ),_ggcf );
|
||
if len (_eaga )==0{return nil ;};_cfbe :=_ecff (_eaga ,_ggcf );if len (_cfbe )==0{return nil ;};_aeed .log ("\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065");_bbbg ,_gfdc :=_aeed .vertsHorzs ();_bcff :=_cacef (_cfbe ,_ggcf .Ury ,_bbbg ,_gfdc );
|
||
_bgfg :=_adc (_bcff ,_ggcf .Ury ,_bbbg ,_gfdc );_bgfg =_ccfe (_bgfg );_egag :=make (paraList ,0,len (_bgfg ));for _ ,_bfc :=range _bgfg {_fggc :=_bfc .arrangeText ();if _fggc !=nil {_egag =append (_egag ,_fggc );};};if len (_egag )>=_eggc {_egag =_egag .extractTables (_afcb );
|
||
};_egag .sortReadingOrder ();_egag .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _egag ;};func _ccfe (_aggg []*wordBag )[]*wordBag {if len (_aggg )<=1{return _aggg ;
|
||
};if _dgeb {_ef .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");};_e .Slice (_aggg ,func (_fafd ,_effb int )bool {_efag ,_gcaa :=_aggg [_fafd ],_aggg [_effb ];_ddbg :=_efag .Width ()*_efag .Height ();_gccg :=_gcaa .Width ()*_gcaa .Height ();
|
||
if _ddbg !=_gccg {return _ddbg > _gccg ;};if _efag .Height ()!=_gcaa .Height (){return _efag .Height ()> _gcaa .Height ();};return _fafd < _effb ;});var _gccd []*wordBag ;_bcbd :=make (intSet );for _deef :=0;_deef < len (_aggg );_deef ++{if _bcbd .has (_deef ){continue ;
|
||
};_efga :=_aggg [_deef ];for _egca :=_deef +1;_egca < len (_aggg );_egca ++{if _bcbd .has (_deef ){continue ;};_eafb :=_aggg [_egca ];_dff :=_efga .PdfRectangle ;_dff .Llx -=_efga ._fdb ;if _gae (_dff ,_eafb .PdfRectangle ){_efga .absorb (_eafb );_bcbd .add (_egca );
|
||
};};_gccd =append (_gccd ,_efga );};if len (_aggg )!=len (_gccd )+len (_bcbd ){_ef .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_aggg ),len (_gccd ),len (_bcbd ));
|
||
};return _gccd ;};func (_gba *textObject )setCharSpacing (_bbcc float64 ){if _gba ==nil {return ;};_gba ._ebag ._dgae =_bbcc ;if _beag {_ef .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_bbcc ,_gba ._ebag .String ());
|
||
};};func _faea (_gdcb *Extractor ,_befb *_fb .PdfPageResources ,_abdg _gef .GraphicsState ,_ccf *textState ,_cgf *stateStack )*textObject {return &textObject {_afgf :_gdcb ,_bcb :_befb ,_gbf :_abdg ,_gbaa :_cgf ,_ebag :_ccf ,_gfc :_cb .IdentityMatrix (),_fbgc :_cb .IdentityMatrix ()};
|
||
};func (_ddfa *stateStack )empty ()bool {return len (*_ddfa )==0};type markKind int ;func (_efbg *shapesState )fill (_fce *[]pathSection ){_caecc :=pathSection {_acbfc :_efbg ._gceb ,Color :_efbg ._fgec .getFillColor ()};*_fce =append (*_fce ,_caecc );
|
||
if _fcffd {_bab :=_caecc .bbox ();_f .Printf ("\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a",len (*_fce ),len (_caecc ._acbfc ),_efbg ,_caecc .Color ,_bab ,_bab .Width (),_bab .Height ());
|
||
if _bdee {for _cag ,_cdf :=range _caecc ._acbfc {_f .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_cag ,_cdf );if _cag ==10{break ;};};};};};func _ddfaf (_cegf float64 )float64 {return _ecaf *_d .Round (_cegf /_ecaf )};const (_cbef =true ;
|
||
_gfgc =true ;_eebg =true ;_badc =false ;_ded =false ;_bcac =6;_ecaab =3.0;_gbag =200;_cfb =true ;_ggcb =true ;_eagge =true ;_ceea =true ;_fba =false ;);func (_eecg rulingList )toGrids ()[]rulingList {if _fcffd {_ef .Log .Info ("t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073",_eecg );
|
||
};_cbaf :=_eecg .intersections ();if _fcffd {_ef .Log .Info ("\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020",len (_eecg ),len (_cbaf ));
|
||
for _ ,_dgaec :=range _dead (_cbaf ){_f .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_dgaec ,_cbaf [_dgaec ]);};};_bcca :=make (map[int ]intSet ,len (_eecg ));for _gdfb :=range _eecg {_acfc :=_eecg .connections (_cbaf ,_gdfb );if len (_acfc )> 0{_bcca [_gdfb ]=_acfc ;
|
||
};};if _fcffd {_ef .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064",len (_bcca ));for _ ,_eacbd :=range _dead (_bcca ){_f .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_eacbd ,_bcca [_eacbd ]);
|
||
};};_bade :=_abdad (len (_eecg ),func (_dece ,_bbcce int )bool {_egdcd ,_afggb :=len (_bcca [_dece ]),len (_bcca [_bbcce ]);if _egdcd !=_afggb {return _egdcd > _afggb ;};return _eecg .comp (_dece ,_bbcce );});if _fcffd {_ef .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_bade );
|
||
};_fcgf :=[][]int {{_bade [0]}};_fcda :for _ ,_gbagb :=range _bade [1:]{for _dfce ,_bfeb :=range _fcgf {for _ ,_cgaad :=range _bfeb {if _bcca [_cgaad ].has (_gbagb ){_fcgf [_dfce ]=append (_bfeb ,_gbagb );continue _fcda ;};};};_fcgf =append (_fcgf ,[]int {_gbagb });
|
||
};if _fcffd {_ef .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_fcgf );};_e .SliceStable (_fcgf ,func (_fcgc ,_fdgc int )bool {return len (_fcgf [_fcgc ])> len (_fcgf [_fdgc ])});for _ ,_dcecc :=range _fcgf {_e .Slice (_dcecc ,func (_adgd ,_fgee int )bool {return _eecg .comp (_dcecc [_adgd ],_dcecc [_fgee ])});
|
||
};_ebgag :=make ([]rulingList ,len (_fcgf ));for _dcfa ,_ebfge :=range _fcgf {_agfc :=make (rulingList ,len (_ebfge ));for _baa ,_dafgg :=range _ebfge {_agfc [_baa ]=_eecg [_dafgg ];};_ebgag [_dcfa ]=_agfc ;};if _fcffd {_ef .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076",_ebgag );
|
||
};var _gabaac []rulingList ;for _ ,_fgbe :=range _ebgag {if _fcfg ,_gfddg :=_fgbe .isActualGrid ();_gfddg {_fgbe =_fcfg ;_fgbe =_fgbe .snapToGroups ();_gabaac =append (_gabaac ,_fgbe );};};if _fcffd {_gfaa ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073",_gabaac );
|
||
_ef .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_ebgag ),len (_gabaac ));};return _gabaac ;};func (_feedd *textTable )getRight ()paraList {_aacf :=make (paraList ,_feedd ._ecbc );
|
||
for _afab :=0;_afab < _feedd ._ecbc ;_afab ++{_adgee :=_feedd .get (_feedd ._acebgf -1,_afab )._aecb ;if _adgee ==nil ||_adgee ._gebc {return nil ;};_aacf [_afab ]=_adgee ;};for _fdcga :=0;_fdcga < _feedd ._ecbc -1;_fdcga ++{if _aacf [_fdcga ]._cggd !=_aacf [_fdcga +1]{return nil ;
|
||
};};return _aacf ;};func (_febg *textPara )writeText (_baddf _c .Writer ){if _febg ._ebdd ==nil {_febg .writeCellText (_baddf );return ;};for _adgag :=0;_adgag < _febg ._ebdd ._ecbc ;_adgag ++{for _dcab :=0;_dcab < _febg ._ebdd ._acebgf ;_dcab ++{_aedf :=_febg ._ebdd .get (_dcab ,_adgag );
|
||
if _aedf ==nil {_baddf .Write ([]byte ("\u0009"));}else {_aedf .writeCellText (_baddf );};_baddf .Write ([]byte ("\u0020"));};if _adgag < _febg ._ebdd ._ecbc -1{_baddf .Write ([]byte ("\u000a"));};};};type gridTile struct{_fb .PdfRectangle ;_fga ,_adgdg ,_cbee ,_bdgc bool ;
|
||
};func (_bface *shapesState )lastpointEstablished ()(_cb .Point ,bool ){if _bface ._eef {return _bface ._caad ,false ;};_feea :=len (_bface ._gceb );if _feea > 0&&_bface ._gceb [_feea -1]._fbbd {return _bface ._gceb [_feea -1].last (),false ;};return _cb .Point {},true ;
|
||
};func (_dafg compositeCell )parasBBox ()(paraList ,_fb .PdfRectangle ){return _dafg .paraList ,_dafg .PdfRectangle ;};func (_eac *textLine )toTextMarks (_fca *int )[]TextMark {var _ebbc []TextMark ;for _ ,_fgeab :=range _eac ._aacg {if _fgeab ._fecff {_ebbc =_cagc (_ebbc ,_fca ,"\u0020");
|
||
};_cadc :=_fgeab .toTextMarks (_fca );_ebbc =append (_ebbc ,_cadc ...);};return _ebbc ;};func (_ebdc *textObject )getFont (_ebeb string )(*_fb .PdfFont ,error ){if _ebdc ._afgf ._aeb !=nil {_ebdc ._afgf ._cf ++;_bdgg ,_dbf :=_ebdc ._afgf ._aeb [_ebeb ];
|
||
if _dbf {_bdgg ._fea =_ebdc ._afgf ._cf ;return _bdgg ._acbg ,nil ;};};_edg ,_cad :=_ebdc .getFontDirect (_ebeb );if _cad !=nil {return nil ,_cad ;};if _ebdc ._afgf ._aeb !=nil {_bbd :=fontEntry {_edg ,_ebdc ._afgf ._cf };if len (_ebdc ._afgf ._aeb )>=_fgf {var _agf []string ;
|
||
for _acfd :=range _ebdc ._afgf ._aeb {_agf =append (_agf ,_acfd );};_e .Slice (_agf ,func (_ecb ,_gbcb int )bool {return _ebdc ._afgf ._aeb [_agf [_ecb ]]._fea < _ebdc ._afgf ._aeb [_agf [_gbcb ]]._fea ;});delete (_ebdc ._afgf ._aeb ,_agf [0]);};_ebdc ._afgf ._aeb [_ebeb ]=_bbd ;
|
||
};return _edg ,nil ;};func _bfcfb (_gbgga *PageText )error {_fcec :=_ac .GetLicenseKey ();if _fcec !=nil &&_fcec .IsLicensed ()||_ff {return nil ;};_f .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");
|
||
_f .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");
|
||
return _a .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};func (_bbac paraList )llyRange (_fffe []int ,_faeef ,_affd float64 )[]int {_ccab :=len (_bbac );
|
||
if _affd < _bbac [_fffe [0]].Lly ||_faeef > _bbac [_fffe [_ccab -1]].Lly {return nil ;};_ggde :=_e .Search (_ccab ,func (_ccdf int )bool {return _bbac [_fffe [_ccdf ]].Lly >=_faeef });_agage :=_e .Search (_ccab ,func (_ggbc int )bool {return _bbac [_fffe [_ggbc ]].Lly > _affd });
|
||
return _fffe [_ggde :_agage ];};func _dgaea (_gagd ,_cfgc _fb .PdfRectangle )(_fb .PdfRectangle ,bool ){if !_fabf (_gagd ,_cfgc ){return _fb .PdfRectangle {},false ;};return _fb .PdfRectangle {Llx :_d .Max (_gagd .Llx ,_cfgc .Llx ),Urx :_d .Min (_gagd .Urx ,_cfgc .Urx ),Lly :_d .Max (_gagd .Lly ,_cfgc .Lly ),Ury :_d .Min (_gagd .Ury ,_cfgc .Ury )},true ;
|
||
};func _ddad (_bddgb ,_gebb float64 )string {_faed :=!_gdga (_bddgb -_gebb );if _faed {return "\u000a";};return "\u0020";};func (_gdf *wordBag )depthIndexes ()[]int {if len (_gdf ._ecg )==0{return nil ;};_decfg :=make ([]int ,len (_gdf ._ecg ));_bfed :=0;
|
||
for _fbee :=range _gdf ._ecg {_decfg [_bfed ]=_fbee ;_bfed ++;};_e .Ints (_decfg );return _decfg ;};func _befe (_fbbe ,_dcgd _cb .Point ,_aaagf _b .Color )(*ruling ,bool ){_dadg :=lineRuling {_fbfg :_fbbe ,_cade :_dcgd ,_bdde :_afcd (_fbbe ,_dcgd ),Color :_aaagf };
|
||
if _dadg ._bdde ==_cgbd {return nil ,false ;};return _dadg .asRuling ();};func (_fcad intSet )add (_edeed int ){_fcad [_edeed ]=struct{}{}};
|
||
|
||
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
|
||
func (_cfe *TextMarkArray )BBox ()(_fb .PdfRectangle ,bool ){var _dfbbb _fb .PdfRectangle ;_gdcg :=false ;for _ ,_fffb :=range _cfe ._cbdg {if _fffb .Meta ||_daaae (_fffb .Text ){continue ;};if _gdcg {_dfbbb =_aeee (_dfbbb ,_fffb .BBox );}else {_dfbbb =_fffb .BBox ;
|
||
_gdcg =true ;};};return _dfbbb ,_gdcg ;};func _gcfdc (_effga []_gc .PdfObject )(_ffeff ,_cbgff float64 ,_fgggc error ){if len (_effga )!=2{return 0,0,_f .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_effga ));
|
||
};_fddea ,_fgggc :=_gc .GetNumbersAsFloat (_effga );if _fgggc !=nil {return 0,0,_fgggc ;};return _fddea [0],_fddea [1],nil ;};
|
||
|
||
// String returns a human readable description of `vecs`.
|
||
func (_ffcfg rulingList )String ()string {if len (_ffcfg )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_agebe ,_dfde :=_ffcfg .vertsHorzs ();_edcb :=len (_agebe );_fafgb :=len (_dfde );if _edcb ==0||_fafgb ==0{return _f .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_edcb ,_fafgb );
|
||
};_adbf :=_fb .PdfRectangle {Llx :_agebe [0]._gaeba ,Urx :_agebe [_edcb -1]._gaeba ,Lly :_dfde [_fafgb -1]._gaeba ,Ury :_dfde [0]._gaeba };return _f .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_edcb ,_fafgb ,_adbf );
|
||
};func (_bdbc paraList )toTextMarks ()[]TextMark {_cbeb :=0;var _bgee []TextMark ;for _ebege ,_eebb :=range _bdbc {if _eebb ._gbcbg {continue ;};_abgg :=_eebb .toTextMarks (&_cbeb );_bgee =append (_bgee ,_abgg ...);if _ebege !=len (_bdbc )-1{if _gdfc (_eebb ,_bdbc [_ebege +1]){_bgee =_cagc (_bgee ,&_cbeb ,"\u0020");
|
||
}else {_bgee =_cagc (_bgee ,&_cbeb ,"\u000a");_bgee =_cagc (_bgee ,&_cbeb ,"\u000a");};};};_bgee =_cagc (_bgee ,&_cbeb ,"\u000a");_bgee =_cagc (_bgee ,&_cbeb ,"\u000a");return _bgee ;};func (_fgbb *textObject )checkOp (_efd *_gef .ContentStreamOperation ,_bacg int ,_cda bool )(_aef bool ,_aagd error ){if _fgbb ==nil {var _fcbb []_gc .PdfObject ;
|
||
if _bacg > 0{_fcbb =_efd .Params ;if len (_fcbb )> _bacg {_fcbb =_fcbb [:_bacg ];};};_ef .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_efd .Operand ,_fcbb );
|
||
};if _bacg >=0{if len (_efd .Params )!=_bacg {if _cda {_aagd =_a .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");};_ef .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_efd .Operand ,_bacg ,len (_efd .Params ),_efd .Params );
|
||
return false ,_aagd ;};};return true ,nil ;};const _fgf =10;func (_bggd *textPara )toTextMarks (_gedd *int )[]TextMark {if _bggd ._ebdd ==nil {return _bggd .toCellTextMarks (_gedd );};var _dddc []TextMark ;for _bcad :=0;_bcad < _bggd ._ebdd ._ecbc ;_bcad ++{for _ddcc :=0;
|
||
_ddcc < _bggd ._ebdd ._acebgf ;_ddcc ++{_fcea :=_bggd ._ebdd .get (_ddcc ,_bcad );if _fcea ==nil {_dddc =_cagc (_dddc ,_gedd ,"\u0009");}else {_bada :=_fcea .toCellTextMarks (_gedd );_dddc =append (_dddc ,_bada ...);};_dddc =_cagc (_dddc ,_gedd ,"\u0020");
|
||
};if _bcad < _bggd ._ebdd ._ecbc -1{_dddc =_cagc (_dddc ,_gedd ,"\u000a");};};return _dddc ;};func (_fg *imageExtractContext )extractInlineImage (_fe *_gef .ContentStreamInlineImage ,_abd _gef .GraphicsState ,_efe *_fb .PdfPageResources )error {_acd ,_gcb :=_fe .ToImage (_efe );
|
||
if _gcb !=nil {return _gcb ;};_ga ,_gcb :=_fe .GetColorSpace (_efe );if _gcb !=nil {return _gcb ;};if _ga ==nil {_ga =_fb .NewPdfColorspaceDeviceGray ();};_fag ,_gcb :=_ga .ImageToRGB (*_acd );if _gcb !=nil {return _gcb ;};_ed :=ImageMark {Image :&_fag ,Width :_abd .CTM .ScalingFactorX (),Height :_abd .CTM .ScalingFactorY (),Angle :_abd .CTM .Angle ()};
|
||
_ed .X ,_ed .Y =_abd .CTM .Translation ();_fg ._ba =append (_fg ._ba ,_ed );_fg ._eg ++;return nil ;};func (_ddea *textPara )isAtom ()*textTable {_daab :=_ddea ;_gdegd :=_ddea ._aecb ;_cbcf :=_ddea ._cggd ;if !(_gdegd !=nil &&!_gdegd ._gebc &&_cbcf !=nil &&!_cbcf ._gebc ){return nil ;
|
||
};_egacf :=_gdegd ._cggd ;if !(_egacf !=nil &&!_egacf ._gebc &&_egacf ==_cbcf ._aecb ){return nil ;};return _abbef (_daab ,_gdegd ,_cbcf ,_egacf );};func (_dedf rulingList )primMinMax ()(float64 ,float64 ){_dcece ,_gacc :=_dedf [0]._gaeba ,_dedf [0]._gaeba ;
|
||
for _ ,_acfcb :=range _dedf [1:]{if _acfcb ._gaeba < _dcece {_dcece =_acfcb ._gaeba ;}else if _acfcb ._gaeba > _gacc {_gacc =_acfcb ._gaeba ;};};return _dcece ,_gacc ;};func (_ddfee lineRuling )yMean ()float64 {return 0.5*(_ddfee ._fbfg .Y +_ddfee ._cade .Y )};
|
||
|
||
|
||
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
|
||
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
|
||
// `start` and `end` are offsets in the extracted text.
|
||
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
|
||
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
|
||
func (_fgbf *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _fgbf ==nil {return nil ,_a .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_f .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );
|
||
};_debf :=len (_fgbf ._cbdg );if _debf ==0{return _fgbf ,nil ;};if start < _fgbf ._cbdg [0].Offset {start =_fgbf ._cbdg [0].Offset ;};if end > _fgbf ._cbdg [_debf -1].Offset +1{end =_fgbf ._cbdg [_debf -1].Offset +1;};_cee :=_e .Search (_debf ,func (_acf int )bool {return _fgbf ._cbdg [_acf ].Offset +len (_fgbf ._cbdg [_acf ].Text )-1>=start });
|
||
if !(0<=_cee &&_cee < _debf ){_fccf :=_f .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_cee ,_debf ,_fgbf ._cbdg [0],_fgbf ._cbdg [_debf -1]);
|
||
return nil ,_fccf ;};_bbe :=_e .Search (_debf ,func (_bga int )bool {return _fgbf ._cbdg [_bga ].Offset > end -1});if !(0<=_bbe &&_bbe < _debf ){_ege :=_f .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_bbe ,_debf ,_fgbf ._cbdg [0],_fgbf ._cbdg [_debf -1]);
|
||
return nil ,_ege ;};if _bbe <=_cee {return nil ,_f .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_cee ,_bbe );
|
||
};return &TextMarkArray {_cbdg :_fgbf ._cbdg [_cee :_bbe ]},nil ;};func (_cfbc paraList )computeEBBoxes (){if _cbad {_ef .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a");};for _ ,_adaee :=range _cfbc {_adaee ._gcag =_adaee .PdfRectangle ;
|
||
};_fceff :=_cfbc .yNeighbours (0);for _deac ,_bbdac :=range _cfbc {_agdb :=_bbdac ._gcag ;_bgb ,_dgecb :=-1.0e9,+1.0e9;for _ ,_dfffc :=range _fceff [_bbdac ]{_ddc :=_cfbc [_dfffc ]._gcag ;if _ddc .Urx < _agdb .Llx {_bgb =_d .Max (_bgb ,_ddc .Urx );}else if _agdb .Urx < _ddc .Llx {_dgecb =_d .Min (_dgecb ,_ddc .Llx );
|
||
};};for _cbcd ,_baddg :=range _cfbc {_caee :=_baddg ._gcag ;if _deac ==_cbcd ||_caee .Ury > _agdb .Lly {continue ;};if _bgb <=_caee .Llx &&_caee .Llx < _agdb .Llx {_agdb .Llx =_caee .Llx ;}else if _caee .Urx <=_dgecb &&_agdb .Urx < _caee .Urx {_agdb .Urx =_caee .Urx ;
|
||
};};if _cbad {_f .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_deac ,_bbdac ._gcag ,_agdb ,_debb (_bbdac .text (),50));};_bbdac ._gcag =_agdb ;};if _badc {for _ ,_ggdd :=range _cfbc {_ggdd .PdfRectangle =_ggdd ._gcag ;
|
||
};};};func (_dgcd *shapesState )newSubPath (){_dgcd .clearPath ();if _fafc {_ef .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_dgcd );};};func _accf (_bcc _cb .Matrix )_cb .Point {_geebe ,_faa :=_bcc .Translation ();
|
||
return _cb .Point {X :_geebe ,Y :_faa };};func (_bcdcg *wordBag )arrangeText ()*textPara {_bcdcg .sort ();if _gfgc {_bcdcg .removeDuplicates ();};var _fbcf []*textLine ;for _ ,_fcac :=range _bcdcg .depthIndexes (){for !_bcdcg .empty (_fcac ){_bffbg :=_bcdcg .firstReadingIndex (_fcac );
|
||
_dcae :=_bcdcg .firstWord (_bffbg );_eeecc :=_dca (_bcdcg ,_bffbg );_ddbe :=_dcae ._bgfca ;_daff :=_dcae ._afcda -_acge *_ddbe ;_ffee :=_dcae ._afcda +_acge *_ddbe ;_fbbf :=_bbaf *_ddbe ;_cadg :=_dgbg *_ddbe ;_cdcfa :for {var _fbebe *textWord ;_aae :=0;
|
||
for _ ,_cdfb :=range _bcdcg .depthBand (_daff ,_ffee ){_fddb :=_bcdcg .highestWord (_cdfb ,_daff ,_ffee );if _fddb ==nil {continue ;};_bdbb :=_efdb (_fddb ,_eeecc ._aacg [len (_eeecc ._aacg )-1]);if _bdbb < -_cadg {break _cdcfa ;};if _bdbb > _fbbf {continue ;
|
||
};if _fbebe !=nil &&_bfefa (_fddb ,_fbebe )>=0{continue ;};_fbebe =_fddb ;_aae =_cdfb ;};if _fbebe ==nil {break ;};_eeecc .pullWord (_bcdcg ,_fbebe ,_aae );};_eeecc .markWordBoundaries ();_fbcf =append (_fbcf ,_eeecc );};};if len (_fbcf )==0{return nil ;
|
||
};_e .Slice (_fbcf ,func (_affeb ,_bfacf int )bool {return _fage (_fbcf [_affeb ],_fbcf [_bfacf ])< 0});_ecbd :=_gaeb (_bcdcg .PdfRectangle ,_fbcf );if _dgeb {_ef .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_ecbd .String ());
|
||
if _ccbef {for _adeb ,_faafg :=range _ecbd ._eggb {_f .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_adeb ,_faafg .String ());if _dbfd {for _aefcg ,_eaff :=range _faafg ._aacg {_f .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_aefcg ,_eaff .String ());
|
||
for _fecbf ,_ddgad :=range _eaff ._aegfa {_f .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_fecbf ,_ddgad .String ());};};};};};};return _ecbd ;};func _daa (_bdea *textWord ,_dde float64 ,_cbbb ,_agad rulingList )*wordBag {_fcbdf :=_cage (_bdea ._afcda );
|
||
_fdfee :=[]*textWord {_bdea };_gdeg :=wordBag {_ecg :map[int ][]*textWord {_fcbdf :_fdfee },PdfRectangle :_bdea .PdfRectangle ,_fdb :_bdea ._bgfca ,_cbea :_dde ,_dgdb :_cbbb ,_daec :_agad };return &_gdeg ;};func (_gfedf *ruling )encloses (_gbbe ,_aegg float64 )bool {return _gfedf ._gfce -_bdfd <=_gbbe &&_aegg <=_gfedf ._aegc +_bdfd ;
|
||
};func (_dfcc rulingList )sortStrict (){_e .Slice (_dfcc ,func (_bcbg ,_abag int )bool {_ceaf ,_abda :=_dfcc [_bcbg ],_dfcc [_abag ];_bfae ,_fdfea :=_ceaf ._fggg ,_abda ._fggg ;if _bfae !=_fdfea {return _bfae > _fdfea ;};_cgfg ,_bgedg :=_ceaf ._gaeba ,_abda ._gaeba ;
|
||
if !_gdga (_cgfg -_bgedg ){return _cgfg < _bgedg ;};_cgfg ,_bgedg =_ceaf ._gfce ,_abda ._gfce ;if _cgfg !=_bgedg {return _cgfg < _bgedg ;};return _ceaf ._aegc < _abda ._aegc ;});};const (_gcfe markKind =iota ;_aaed ;_ceeed ;_acfgg ;);func (_gbaf *textWord )toTextMarks (_ecbcf *int )[]TextMark {var _ebbe []TextMark ;
|
||
for _ ,_ccfef :=range _gbaf ._aegfa {_ebbe =_baga (_ebbe ,_ecbcf ,_ccfef .ToTextMark ());};return _ebbe ;};func _fbac (_bbgbg _fb .PdfRectangle ,_fdfb ,_acfe ,_ggee ,_ebegb *ruling )gridTile {_fece :=_bbgbg .Llx ;_gccb :=_bbgbg .Urx ;_cdccb :=_bbgbg .Lly ;
|
||
_bfda :=_bbgbg .Ury ;return gridTile {PdfRectangle :_bbgbg ,_adgdg :_fdfb !=nil &&_fdfb .encloses (_cdccb ,_bfda ),_bdgc :_acfe !=nil &&_acfe .encloses (_cdccb ,_bfda ),_cbee :_ggee !=nil &&_ggee .encloses (_fece ,_gccb ),_fga :_ebegb !=nil &&_ebegb .encloses (_fece ,_gccb )};
|
||
};func (_edcg paraList )merge ()*textPara {_ef .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_edcg ));
|
||
if len (_edcg )==0{return nil ;};_edcg .sortReadingOrder ();_cega :=_edcg [0].PdfRectangle ;_bcdf :=_edcg [0]._eggb ;for _ ,_eeed :=range _edcg [1:]{_cega =_aeee (_cega ,_eeed .PdfRectangle );_bcdf =append (_bcdf ,_eeed ._eggb ...);};return _gaeb (_cega ,_bcdf );
|
||
};func (_gffe *textTable )bbox ()_fb .PdfRectangle {return _gffe .PdfRectangle };func (_ad *imageExtractContext )extractXObjectImage (_baf *_gc .PdfObjectName ,_aa _gef .GraphicsState ,_cc *_fb .PdfPageResources )error {_gcea ,_ :=_cc .GetXObjectByName (*_baf );
|
||
if _gcea ==nil {return nil ;};_adg ,_bfe :=_ad ._cac [_gcea ];if !_bfe {_ce ,_egc :=_cc .GetXObjectImageByName (*_baf );if _egc !=nil {return _egc ;};if _ce ==nil {return nil ;};_eaed ,_egc :=_ce .ToImage ();if _egc !=nil {return _egc ;};_adg =&cachedImage {_bfa :_eaed ,_cdb :_ce .ColorSpace };
|
||
_ad ._cac [_gcea ]=_adg ;};_ec :=_adg ._bfa ;_ddf :=_adg ._cdb ;_cdg ,_ddfe :=_ddf .ImageToRGB (*_ec );if _ddfe !=nil {return _ddfe ;};_ef .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_aa .CTM .String ());_dad :=ImageMark {Image :&_cdg ,Width :_aa .CTM .ScalingFactorX (),Height :_aa .CTM .ScalingFactorY (),Angle :_aa .CTM .Angle ()};
|
||
_dad .X ,_dad .Y =_aa .CTM .Translation ();_ad ._ba =append (_ad ._ba ,_dad );_ad ._gfe ++;return nil ;};func (_dgab *shapesState )quadraticTo (_faaf ,_feda ,_eaeeg ,_aca float64 ){if _fafc {_ef .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a");
|
||
};_dgab .addPoint (_eaeeg ,_aca );};func (_fafg *subpath )makeRectRuling (_bccb _b .Color )(*ruling ,bool ){if _bfgd {_ef .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_fafg );
|
||
};_ccga :=_fafg ._bdfbe [:4];_cceec :=make (map[int ]rulingKind ,len (_ccga ));for _cfda ,_gfeb :=range _ccga {_dbddc :=_fafg ._bdfbe [(_cfda +1)%4];_cceec [_cfda ]=_gfdge (_gfeb ,_dbddc );if _bfgd {_f .Printf ("\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066",_cfda ,_cceec [_cfda ],_gfeb ,_dbddc );
|
||
};};if _bfgd {_f .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_cceec );};var _adfdf ,_fdbafd []int ;for _dafab ,_agfb :=range _cceec {switch _agfb {case _bgge :_fdbafd =append (_fdbafd ,_dafab );case _ggddf :_adfdf =append (_adfdf ,_dafab );
|
||
};};if _bfgd {_f .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_fdbafd ),_fdbafd );_f .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_adfdf ),_adfdf );
|
||
};_ecdc :=(len (_fdbafd )==2&&len (_adfdf )==2)||(len (_fdbafd )==2&&len (_adfdf )==0&&_fagce (_ccga [_fdbafd [0]],_ccga [_fdbafd [1]]))||(len (_adfdf )==2&&len (_fdbafd )==0&&_cdeea (_ccga [_adfdf [0]],_ccga [_adfdf [1]]));if _bfgd {_f .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_fdbafd ),len (_adfdf ),_ecdc );
|
||
};if !_ecdc {if _bfgd {_ef .Log .Error ("\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v",_fafg );_f .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_fdbafd ),len (_adfdf ),_ecdc );
|
||
};return &ruling {},false ;};if len (_adfdf )==0{for _aecbg ,_gbcbc :=range _cceec {if _gbcbc !=_bgge {_adfdf =append (_adfdf ,_aecbg );};};};if len (_fdbafd )==0{for _ebfed ,_ebfae :=range _cceec {if _ebfae !=_ggddf {_fdbafd =append (_fdbafd ,_ebfed );
|
||
};};};if _bfgd {_ef .Log .Info ("\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_fdbafd ),len (_adfdf ),len (_ccga ),_fdbafd ,_adfdf ,_ccga );
|
||
};var _gffa ,_fabee ,_babd ,_edeca _cb .Point ;if _ccga [_fdbafd [0]].Y > _ccga [_fdbafd [1]].Y {_babd ,_edeca =_ccga [_fdbafd [0]],_ccga [_fdbafd [1]];}else {_babd ,_edeca =_ccga [_fdbafd [1]],_ccga [_fdbafd [0]];};if _ccga [_adfdf [0]].X > _ccga [_adfdf [1]].X {_gffa ,_fabee =_ccga [_adfdf [0]],_ccga [_adfdf [1]];
|
||
}else {_gffa ,_fabee =_ccga [_adfdf [1]],_ccga [_adfdf [0]];};_agab :=_fb .PdfRectangle {Llx :_gffa .X ,Urx :_fabee .X ,Lly :_edeca .Y ,Ury :_babd .Y };if _agab .Llx > _agab .Urx {_agab .Llx ,_agab .Urx =_agab .Urx ,_agab .Llx ;};if _agab .Lly > _agab .Ury {_agab .Lly ,_agab .Ury =_agab .Ury ,_agab .Lly ;
|
||
};_bbba :=rectRuling {PdfRectangle :_agab ,_dfgc :_acfdc (_agab ),Color :_bccb };if _bbba ._dfgc ==_cgbd {if _bfgd {_ef .Log .Error ("\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c");
|
||
};return nil ,false ;};_abbb ,_acde :=_bbba .asRuling ();if !_acde {if _bfgd {_ef .Log .Error ("\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg");};return nil ,false ;};if _fcffd {_f .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_abbb .String ());
|
||
};return _abbb ,true ;};func (_eafbe rulingList )secMinMax ()(float64 ,float64 ){_fdbf ,_cbfae :=_eafbe [0]._gfce ,_eafbe [0]._aegc ;for _ ,_baca :=range _eafbe [1:]{if _baca ._gfce < _fdbf {_fdbf =_baca ._gfce ;};if _baca ._aegc > _cbfae {_cbfae =_baca ._aegc ;
|
||
};};return _fdbf ,_cbfae ;};func (_befa *shapesState )cubicTo (_dgbc ,_bbb ,_gabaa ,_aeccc ,_bfac ,_edaa float64 ){if _fafc {_ef .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a");};_befa .addPoint (_bfac ,_edaa );};func (_ebd *textObject )setFont (_agde string ,_gdca float64 )error {if _ebd ==nil {return nil ;
|
||
};_ebd ._ebag ._gbgg =_gdca ;_dfae ,_fdfc :=_ebd .getFont (_agde );if _fdfc !=nil {return _fdfc ;};_ebd ._ebag ._dec =_dfae ;if _ebd ._gbaa .empty (){_ebd ._gbaa .push (_ebd ._ebag );}else {_ebd ._gbaa .top ()._dec =_ebd ._ebag ._dec ;};return nil ;};func (_gffaf *textTable )log (_ddeg string ){if !_dcdd {return ;
|
||
};_ef .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_ddeg ,_gffaf ._acebgf ,_gffaf ._ecbc ,_gffaf ._cbbaf ,_gffaf .PdfRectangle );
|
||
for _gace :=0;_gace < _gffaf ._ecbc ;_gace ++{for _gcff :=0;_gcff < _gffaf ._acebgf ;_gcff ++{_bgcd :=_gffaf .get (_gcff ,_gace );if _bgcd ==nil {continue ;};_f .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_gcff ,_gace ,_bgcd .PdfRectangle ,_debb (_bgcd .text (),50),_ge .RuneCountInString (_bgcd .text ()));
|
||
};};};type bounded interface{bbox ()_fb .PdfRectangle };type subpath struct{_bdfbe []_cb .Point ;_fbbd bool ;};func (_fde *textMark )inDiacriticArea (_ebad *textMark )bool {_dcec :=_fde .Llx -_ebad .Llx ;_dcga :=_fde .Urx -_ebad .Urx ;_edbe :=_fde .Lly -_ebad .Lly ;
|
||
return _d .Abs (_dcec +_dcga )< _fde .Width ()*_ddga &&_d .Abs (_edbe )< _fde .Height ()*_ddga ;};func (_ebbf *textObject )showText (_ece []byte )error {return _ebbf .renderText (_ece )};var _ff =false ;func (_bbacd *textTable )computeBbox ()_fb .PdfRectangle {var _eedc _fb .PdfRectangle ;
|
||
_ebfec :=false ;for _gdcgd :=0;_gdcgd < _bbacd ._ecbc ;_gdcgd ++{for _fcfgf :=0;_fcfgf < _bbacd ._acebgf ;_fcfgf ++{_ffac :=_bbacd .get (_fcfgf ,_gdcgd );if _ffac ==nil {continue ;};if !_ebfec {_eedc =_ffac .PdfRectangle ;_ebfec =true ;}else {_eedc =_aeee (_eedc ,_ffac .PdfRectangle );
|
||
};};};return _eedc ;};func (_fbgf *ruling )alignsSec (_dbbe *ruling )bool {const _addcg =_bfefd +1.0;return _fbgf ._gfce -_addcg <=_dbbe ._aegc &&_dbbe ._gfce -_addcg <=_fbgf ._aegc ;};
|
||
|
||
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
|
||
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
|
||
func (_geb *Extractor )ExtractTextWithStats ()(_de string ,_bbc int ,_dae int ,_bad error ){_beed ,_bbc ,_dae ,_bad :=_geb .ExtractPageText ();if _bad !=nil {return "",_bbc ,_dae ,_bad ;};return _beed .Text (),_bbc ,_dae ,nil ;};
|
||
|
||
// String returns a string describing `pt`.
|
||
func (_ggbb PageText )String ()string {_afcf :=_f .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_ggbb ._bda ));_afe :=[]string {"\u002d"+_afcf };for _ ,_gda :=range _ggbb ._bda {_afe =append (_afe ,_gda .String ());
|
||
};_afe =append (_afe ,"\u002b"+_afcf );return _ag .Join (_afe ,"\u000a");};func (_abbg *textWord )addDiacritic (_beadff string ){_fafee :=_abbg ._aegfa [len (_abbg ._aegfa )-1];_fafee ._dfed +=_beadff ;_fafee ._dfed =_ea .NFKC .String (_fafee ._dfed );
|
||
};
|
||
|
||
// String returns a description of `p`.
|
||
func (_ecbb *textPara )String ()string {if _ecbb ._gbcbg {return _f .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_ecbb .PdfRectangle );};_dgdbd :="";if _ecbb ._ebdd !=nil {_dgdbd =_f .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_ecbb ._ebdd ._acebgf ,_ecbb ._ebdd ._ecbc );
|
||
};return _f .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_ecbb .PdfRectangle ,_dgdbd ,len (_ecbb ._eggb ),_debb (_ecbb .text (),50));}; |