mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-27 13:48:51 +08:00
253 lines
180 KiB
Go
253 lines
180 KiB
Go
//
|
||
// Copyright 2020 FoxyUtils ehf. All rights reserved.
|
||
//
|
||
// This is a commercial product and requires a license to operate.
|
||
// A trial license can be obtained at https://unidoc.io
|
||
//
|
||
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
|
||
//
|
||
// Use of this source code is governed by the UniDoc End User License Agreement
|
||
// terms that can be accessed at https://unidoc.io/eula/
|
||
|
||
//
|
||
// Package extractor is used for quickly extracting PDF content through a simple interface.
|
||
// Currently offers functionality for extracting textual content.
|
||
//
|
||
package extractor ;import (_da "bytes";_d "errors";_gd "fmt";_adb "github.com/unidoc/unipdf/v3/common";_c "github.com/unidoc/unipdf/v3/common/license";_aee "github.com/unidoc/unipdf/v3/contentstream";_dg "github.com/unidoc/unipdf/v3/core";_ga "github.com/unidoc/unipdf/v3/internal/textencoding";_gg "github.com/unidoc/unipdf/v3/internal/transform";_gdc "github.com/unidoc/unipdf/v3/model";_cg "golang.org/x/text/unicode/norm";_ge "golang.org/x/xerrors";_dc "image/color";_b "io";_g "math";_dd "regexp";_ad "sort";_ef "strings";_ae "unicode";_a "unicode/utf8";);func (_cbac *textTable )log (_bebcb string ){if !_dfad {return ;};_adb .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_bebcb ,_cbac ._ffbe ,_cbac ._egbe ,_cbac ._fgdd ,_cbac .PdfRectangle );for _dfbc :=0;_dfbc < _cbac ._egbe ;_dfbc ++{for _abbd :=0;_abbd < _cbac ._ffbe ;_abbd ++{_gfbg :=_cbac .get (_abbd ,_dfbc );if _gfbg ==nil {continue ;};_gd .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_abbd ,_dfbc ,_gfbg .PdfRectangle ,_fgde (_gfbg .text (),50),_a .RuneCountInString (_gfbg .text ()));};};};func _fcba (_gdeea *wordBag ,_dede float64 ,_dbba ,_dbec rulingList )[]*wordBag {var _ddb []*wordBag ;for _ ,_fgcee :=range _gdeea .depthIndexes (){_cgde :=false ;for !_gdeea .empty (_fgcee ){_gaec :=_gdeea .firstReadingIndex (_fgcee );_bfbfg :=_gdeea .firstWord (_gaec );_gefg :=_dadg (_bfbfg ,_dede ,_dbba ,_dbec );_gdeea .removeWord (_bfbfg ,_gaec );if _bcef {_adb .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_bfbfg .String ());};for _aeedc :=true ;_aeedc ;_aeedc =_cgde {_cgde =false ;_ceff :=_dbdb *_gefg ._ecba ;_faac :=_cff *_gefg ._ecba ;_gdgd :=_aaaf *_gefg ._ecba ;if _bcef {_adb .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_gefg .minDepth (),_gefg .maxDepth (),_gdgd ,_faac );};if _gdeea .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_gefg ,_bdgf (_dfbf ,0),_gefg .minDepth ()-_gdgd ,_gefg .maxDepth ()+_gdgd ,_agge ,false ,false )> 0{_cgde =true ;};if _gdeea .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_gefg ,_bdgf (_dfbf ,_faac ),_gefg .minDepth (),_gefg .maxDepth (),_dfg ,false ,false )> 0{_cgde =true ;};if _cgde {continue ;};_ffdda :=_gdeea .scanBand ("",_gefg ,_bdgf (_edagd ,_ceff ),_gefg .minDepth (),_gefg .maxDepth (),_beefb ,true ,false );if _ffdda > 0{_fgdcb :=(_gefg .maxDepth ()-_gefg .minDepth ())/_gefg ._ecba ;if (_ffdda > 1&&float64 (_ffdda )> 0.3*_fgdcb )||_ffdda <=10{if _gdeea .scanBand ("\u006f\u0074\u0068e\u0072",_gefg ,_bdgf (_edagd ,_ceff ),_gefg .minDepth (),_gefg .maxDepth (),_beefb ,false ,true )> 0{_cgde =true ;};};};};_ddb =append (_ddb ,_gefg );};};return _ddb ;};func (_bgga *textObject )setTextRise (_bef float64 ){if _bgga ==nil {return ;};_bgga ._bgebc ._daa =_bef ;};type textObject struct{_dbda *Extractor ;_gec *_gdc .PdfPageResources ;_dca _aee .GraphicsState ;_bgebc *textState ;_fad *stateStack ;_aac _gg .Matrix ;_dgdc _gg .Matrix ;_fbad []*textMark ;_bdf bool ;};func _edagd (_efa *wordBag ,_bada *textWord ,_dbeg float64 )bool {return _efa .Urx <=_bada .Llx &&_bada .Llx < _efa .Urx +_dbeg ;};func (_feed *textTable )putComposite (_acgce ,_cgbfbg int ,_gede paraList ,_baga _gdc .PdfRectangle ){if len (_gede )==0{_adb .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073");return ;};_bfag :=compositeCell {_baga ,_gede };if _dfad {_gd .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_acgce ,_cgbfbg ,_bfag .String ());};_bfag .updateBBox ();_feed ._fgcc [_bgfea (_acgce ,_cgbfbg )]=_bfag ;};func _bgfc (_feeb *Extractor ,_bcd *_gdc .PdfPageResources ,_ebeg _aee .GraphicsState ,_fbb *textState ,_fcac *stateStack )*textObject {return &textObject {_dbda :_feeb ,_gec :_bcd ,_dca :_ebeg ,_fad :_fcac ,_bgebc :_fbb ,_aac :_gg .IdentityMatrix (),_dgdc :_gg .IdentityMatrix ()};};func (_bcg *imageExtractContext )extractFormImages (_bgg *_dg .PdfObjectName ,_efbf _aee .GraphicsState ,_ddc *_gdc .PdfPageResources )error {_cf ,_cgad :=_ddc .GetXObjectFormByName (*_bgg );if _cgad !=nil {return _cgad ;};if _cf ==nil {return nil ;};_bbg ,_cgad :=_cf .GetContentStream ();if _cgad !=nil {return _cgad ;};_ec :=_cf .Resources ;if _ec ==nil {_ec =_ddc ;};_cgad =_bcg .extractContentStreamImages (string (_bbg ),_ec );if _cgad !=nil {return _cgad ;};_bcg ._ag ++;return nil ;};type textLine struct{_gdc .PdfRectangle ;_febe float64 ;_cfgd []*textWord ;_aegc float64 ;};func (_cgf *textObject )setFont (_bgeb string ,_aag float64 )error {if _cgf ==nil {return nil ;};_cgf ._bgebc ._dgc =_aag ;_gcc ,_bfa :=_cgf .getFont (_bgeb );if _bfa !=nil {return _bfa ;};_cgf ._bgebc ._gdbe =_gcc ;if _cgf ._fad .empty (){_cgf ._fad .push (_cgf ._bgebc );}else {_cgf ._fad .top ()._gdbe =_cgf ._bgebc ._gdbe ;};return nil ;};func (_ccdg *textObject )showTextAdjusted (_bac *_dg .PdfObjectArray )error {_dgde :=false ;for _ ,_gfg :=range _bac .Elements (){switch _gfg .(type ){case *_dg .PdfObjectFloat ,*_dg .PdfObjectInteger :_agc ,_dbf :=_dg .GetNumberAsFloat (_gfg );if _dbf !=nil {_adb .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_gfg ,_bac );return _dbf ;};_ece ,_ecbc :=-_agc *0.001*_ccdg ._bgebc ._dgc ,0.0;if _dgde {_ecbc ,_ece =_ece ,_ecbc ;};_dcd :=_fafc (_gg .Point {X :_ece ,Y :_ecbc });_ccdg ._aac .Concat (_dcd );case *_dg .PdfObjectString :_adc ,_ebbe :=_dg .GetStringBytes (_gfg );if !_ebbe {_adb .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_gfg ,_bac );return _dg .ErrTypeError ;};_ccdg .renderText (_adc );default:_adb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_gfg ,_bac );return _dg .ErrTypeError ;};};return nil ;};func (_fafbf rulingList )primaries ()[]float64 {_cebe :=make (map[float64 ]struct{},len (_fafbf ));for _ ,_abcf :=range _fafbf {_cebe [_abcf ._bfc ]=struct{}{};};_abce :=make ([]float64 ,len (_cebe ));_fada :=0;for _cbaf :=range _cebe {_abce [_fada ]=_cbaf ;_fada ++;};_ad .Float64s (_abce );return _abce ;};func _ggca (_gcddf string )string {_cdaeg :=[]rune (_gcddf );return string (_cdaeg [:len (_cdaeg )-1])};func _bgccee (_bgae string )(string ,bool ){_edgc :=[]rune (_bgae );if len (_edgc )!=1{return "",false ;};_ddbc ,_defaf :=_efea [_edgc [0]];return _ddbc ,_defaf ;};func (_cccb *ruling )equals (_fbfba *ruling )bool {return _cccb ._fafed ==_fbfba ._fafed &&_bceca (_cccb ._bfc ,_fbfba ._bfc )&&_bceca (_cccb ._edebg ,_fbfba ._edebg )&&_bceca (_cccb ._dbfe ,_fbfba ._dbfe );};func _dcad (_abbf ,_dfcb _gg .Point )bool {_bafd :=_g .Abs (_abbf .X -_dfcb .X );_cade :=_g .Abs (_abbf .Y -_dfcb .Y );return _fefec (_cade ,_bafd );};
|
||
|
||
// String returns a description of `l`.
|
||
func (_gbgga *textLine )String ()string {return _gd .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_gbgga ._febe ,_gbgga .PdfRectangle ,_gbgga ._aegc ,_gbgga .text ());};func (_gadg paraList )llyOrdering ()[]int {_fcad :=make ([]int ,len (_gadg ));for _fedc :=range _gadg {_fcad [_fedc ]=_fedc ;};_ad .SliceStable (_fcad ,func (_becc ,_dbbaf int )bool {_cffe ,_egaa :=_fcad [_becc ],_fcad [_dbbaf ];return _gadg [_cffe ].Lly < _gadg [_egaa ].Lly ;});return _fcad ;};const (RenderModeStroke RenderMode =1<<iota ;RenderModeFill ;RenderModeClip ;);func (_eebbc rulingList )isActualGrid ()(rulingList ,bool ){_bbda ,_bcgfa :=_eebbc .augmentGrid ();if !(len (_bbda )>=_bbgf +1&&len (_bcgfa )>=_dbdbd +1){if _cfag {_adb .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_bbda ),len (_bcgfa ),_bbgf +1,_dbdbd +1);};return nil ,false ;};if _cfag {_adb .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_eebbc ,len (_bbda )>=2,len (_bcgfa )>=2,len (_bbda )>=2&&len (_bcgfa )>=2);for _ggdgf ,_ffcg :=range _eebbc {_gd .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_ggdgf ,_ffcg );};};if _ffce {_bfaf ,_deea :=_bbda [0],_bbda [len (_bbda )-1];_ccadb ,_gcgbf :=_bcgfa [0],_bcgfa [len (_bcgfa )-1];if !(_cgcc (_bfaf ._bfc -_ccadb ._edebg )&&_cgcc (_deea ._bfc -_ccadb ._dbfe )&&_cgcc (_ccadb ._bfc -_bfaf ._dbfe )&&_cgcc (_gcgbf ._bfc -_bfaf ._edebg )){if _cfag {_adb .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_bfaf ,_deea ,_ccadb ,_gcgbf );};return nil ,false ;};}else {if !_bbda .aligned (){if _aab {_adb .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_bbda ));};return nil ,false ;};if !_bcgfa .aligned (){if _cfag {_adb .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_bcgfa ));};return nil ,false ;};};_ccde :=append (_bbda ,_bcgfa ...);return _ccde ,true ;};func (_gcefb gridTiling )log (_aedb string ){if !_dceg {return ;};_adb .Log .Info ("\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071",len (_gcefb ._cgaf ),len (_gcefb ._aafd ),_aedb );_gd .Printf ("\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a",_gcefb ._cgaf );_gd .Printf ("\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a",_gcefb ._aafd );for _cbfe ,_dfdd :=range _gcefb ._aafd {_adee ,_adbaf :=_gcefb ._deae [_dfdd ];if !_adbaf {continue ;};_gd .Printf ("%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_cbfe ,_dfdd );for _dddc ,_dfff :=range _gcefb ._cgaf {_cabec ,_fdgd :=_adee [_dfff ];if !_fdgd {continue ;};_gd .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_dddc ,_cabec .String ());};};};func (_bfd *textObject )showText (_fgef []byte )error {return _bfd .renderText (_fgef )};type rectRuling struct{_fbgfb rulingKind ;_fefe markKind ;_dc .Color ;_gdc .PdfRectangle ;};func (_faa *shapesState )clearPath (){_faa ._bcce =nil ;_faa ._bbb =false ;if _ebge {_adb .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_faa );};};func (_faf *textObject )setTextRenderMode (_dggf int ){if _faf ==nil {return ;};_faf ._bgebc ._gefc =RenderMode (_dggf );};func (_efcc *textObject )moveLP (_bcdd ,_cgeb float64 ){_efcc ._dgdc .Concat (_gg .NewMatrix (1,0,0,1,_bcdd ,_cgeb ));_efcc ._aac =_efcc ._dgdc ;};var (_efea =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};);func _ebea (_gbbf []*wordBag )[]*wordBag {if len (_gbbf )<=1{return _gbbf ;};if _cdfgc {_adb .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");};_ad .Slice (_gbbf ,func (_egcc ,_fafe int )bool {_fggf ,_daaa :=_gbbf [_egcc ],_gbbf [_fafe ];_ebcc :=_fggf .Width ()*_fggf .Height ();_gaee :=_daaa .Width ()*_daaa .Height ();if _ebcc !=_gaee {return _ebcc > _gaee ;};if _fggf .Height ()!=_daaa .Height (){return _fggf .Height ()> _daaa .Height ();};return _egcc < _fafe ;});var _cafe []*wordBag ;_dbag :=make (intSet );for _dabg :=0;_dabg < len (_gbbf );_dabg ++{if _dbag .has (_dabg ){continue ;};_aaae :=_gbbf [_dabg ];for _decd :=_dabg +1;_decd < len (_gbbf );_decd ++{if _dbag .has (_dabg ){continue ;};_bbba :=_gbbf [_decd ];_acac :=_aaae .PdfRectangle ;_acac .Llx -=_aaae ._ecba ;if _fgeff (_acac ,_bbba .PdfRectangle ){_aaae .absorb (_bbba );_dbag .add (_decd );};};_cafe =append (_cafe ,_aaae );};if len (_gbbf )!=len (_cafe )+len (_dbag ){_adb .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_gbbf ),len (_cafe ),len (_dbag ));};return _cafe ;};func (_fgggdb *textTable )compositeRowCorridors ()map[int ][]float64 {_dacf :=make (map[int ][]float64 ,_fgggdb ._egbe );if _dfad {_adb .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_fgggdb ._egbe );};for _cgbda :=1;_cgbda < _fgggdb ._egbe ;_cgbda ++{var _ebaac []compositeCell ;for _dfcg :=0;_dfcg < _fgggdb ._ffbe ;_dfcg ++{if _debc ,_gbfb :=_fgggdb ._fgcc [_bgfea (_dfcg ,_cgbda )];_gbfb {_ebaac =append (_ebaac ,_debc );};};if len (_ebaac )==0{continue ;};_gebgc :=_cdga (_ebaac );_dacf [_cgbda ]=_gebgc ;if _dfad {_gd .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_cgbda ,_gebgc );};};return _dacf ;};func (_bcfc paraList )applyTables (_bbca []*textTable )paraList {_ccacg :=make (map[*textPara ]struct{});var _ffeeb paraList ;for _ ,_ceab :=range _bbca {for _ ,_gfff :=range _ceab ._gged {_ccacg [_gfff ]=struct{}{};};_ffeeb =append (_ffeeb ,_ceab .newTablePara ());};for _ ,_fbag :=range _bcfc {if _ ,_fgegg :=_ccacg [_fbag ];!_fgegg {_ffeeb =append (_ffeeb ,_fbag );};};return _ffeeb ;};func (_fcea *wordBag )blocked (_gece *textWord )bool {if _gece .Urx < _fcea .Llx {_cdeb :=_cggd (_gece .PdfRectangle );_bgdeb :=_gbdf (_fcea .PdfRectangle );if _fcea ._ccc .blocks (_cdeb ,_bgdeb ){if _bfe {_adb .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_gece ,_fcea );};return true ;};}else if _fcea .Urx < _gece .Llx {_bgaa :=_cggd (_fcea .PdfRectangle );_bfab :=_gbdf (_gece .PdfRectangle );if _fcea ._ccc .blocks (_bgaa ,_bfab ){if _bfe {_adb .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s",_gece ,_fcea );};return true ;};};if _gece .Ury < _fcea .Lly {_ebaa :=_fccfc (_gece .PdfRectangle );_ebad :=_daeb (_fcea .PdfRectangle );if _fcea ._bffgc .blocks (_ebaa ,_ebad ){if _bfe {_adb .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_gece ,_fcea );};return true ;};}else if _fcea .Ury < _gece .Lly {_fcag :=_fccfc (_fcea .PdfRectangle );_afe :=_daeb (_gece .PdfRectangle );if _fcea ._bffgc .blocks (_fcag ,_afe ){if _bfe {_adb .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s",_gece ,_fcea );};return true ;};};return false ;};func _beffc (_gbge bounded )float64 {return -_gbge .bbox ().Lly };func _eeccf (_abage ,_dcea _gg .Point ,_ebbedf _dc .Color )(*ruling ,bool ){_ebegb :=lineRuling {_dgfe :_abage ,_aaec :_dcea ,_febb :_cded (_abage ,_dcea ),Color :_ebbedf };if _ebegb ._febb ==_afac {return nil ,false ;};return _ebegb .asRuling ();};func (_ggdee *textTable )getComposite (_ebeaa ,_fcdfa int )(paraList ,_gdc .PdfRectangle ){_befe ,_affb :=_ggdee ._fgcc [_bgfea (_ebeaa ,_fcdfa )];if _dfad {_gd .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a",_ebeaa ,_fcdfa ,_befe .String ());};if !_affb {return nil ,_gdc .PdfRectangle {};};return _befe .parasBBox ();};func _ggfg (_dbagf ,_fbfb _gdc .PdfRectangle )bool {return _dbagf .Lly <=_fbfb .Ury &&_fbfb .Lly <=_dbagf .Ury ;};func (_fedge intSet )add (_abff int ){_fedge [_abff ]=struct{}{}};type textPara struct{_gdc .PdfRectangle ;_ffab _gdc .PdfRectangle ;_acgf []*textLine ;_cabb *textTable ;_ccbb bool ;_bgcce bool ;_fdeda *textPara ;_aagg *textPara ;_dabb *textPara ;_abaa *textPara ;};func (_bdgfc *wordBag )removeDuplicates (){if _edg {_adb .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_bdgfc .text ());};for _ ,_cce :=range _bdgfc .depthIndexes (){if len (_bdgfc ._fdebe [_cce ])==0{continue ;};_fcacb :=_bdgfc ._fdebe [_cce ][0];_gcgg :=_gagbe *_fcacb ._bgdgg ;_dfea :=_fcacb ._ggccd ;for _ ,_ggde :=range _bdgfc .depthBand (_dfea ,_dfea +_gcgg ){_baf :=map[*textWord ]struct{}{};_badad :=_bdgfc ._fdebe [_ggde ];for _ ,_gebd :=range _badad {if _ ,_feded :=_baf [_gebd ];_feded {continue ;};for _ ,_adg :=range _badad {if _ ,_aacd :=_baf [_adg ];_aacd {continue ;};if _adg !=_gebd &&_adg ._adad ==_gebd ._adad &&_g .Abs (_adg .Llx -_gebd .Llx )< _gcgg &&_g .Abs (_adg .Urx -_gebd .Urx )< _gcgg &&_g .Abs (_adg .Lly -_gebd .Lly )< _gcgg &&_g .Abs (_adg .Ury -_gebd .Ury )< _gcgg {_baf [_adg ]=struct{}{};};};};if len (_baf )> 0{_ebab :=0;for _ ,_fggb :=range _badad {if _ ,_fafdf :=_baf [_fggb ];!_fafdf {_badad [_ebab ]=_fggb ;_ebab ++;};};_bdgfc ._fdebe [_ggde ]=_badad [:len (_badad )-len (_baf )];if len (_bdgfc ._fdebe [_ggde ])==0{delete (_bdgfc ._fdebe ,_ggde );};};};};};func (_afbdc *ruling )alignsSec (_fbdb *ruling )bool {const _ebbf =_bgcb +1.0;return _afbdc ._edebg -_ebbf <=_fbdb ._dbfe &&_fbdb ._edebg -_ebbf <=_afbdc ._dbfe ;};type event struct{_edfcgb float64 ;_cbfb bool ;_beaec int ;};func _gecb (_aga ,_ccg _gdc .PdfRectangle )bool {return _cfb (_aga ,_ccg )&&_ggfg (_aga ,_ccg )};func _fefa (_dfddf ,_cdca int )int {if _dfddf < _cdca {return _dfddf ;};return _cdca ;};
|
||
|
||
// Tables returns the tables extracted from the page.
|
||
func (_fbc PageText )Tables ()[]TextTable {if _dfad {_adb .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_fbc ._abfg ));};return _fbc ._abfg ;};
|
||
|
||
// ExtractPageImages returns the image contents of the page extractor, including data
|
||
// and position, size information for each image.
|
||
// A set of options to control page image extraction can be passed in. The options
|
||
// parameter can be nil for the default options. By default, inline stencil masks
|
||
// are not extracted.
|
||
func (_ade *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_aed :=&imageExtractContext {_efb :options };_dbb :=_aed .extractContentStreamImages (_ade ._adf ,_ade ._db );if _dbb !=nil {return nil ,_dbb ;};return &PageImages {Images :_aed ._bg },nil ;};func (_dfgf paraList )xNeighbours (_gebb float64 )map[*textPara ][]int {_faee :=make ([]event ,2*len (_dfgf ));if _gebb ==0{for _aebbb ,_adaf :=range _dfgf {_faee [2*_aebbb ]=event {_adaf .Llx ,true ,_aebbb };_faee [2*_aebbb +1]=event {_adaf .Urx ,false ,_aebbb };};}else {for _afggf ,_cefc :=range _dfgf {_faee [2*_afggf ]=event {_cefc .Llx -_gebb *_cefc .fontsize (),true ,_afggf };_faee [2*_afggf +1]=event {_cefc .Urx +_gebb *_cefc .fontsize (),false ,_afggf };};};return _dfgf .eventNeighbours (_faee );};func (_effa paraList )merge ()*textPara {_adb .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_effa ));if len (_effa )==0{return nil ;};_effa .sortReadingOrder ();_dcefd :=_effa [0].PdfRectangle ;_agabe :=_effa [0]._acgf ;for _ ,_begg :=range _effa [1:]{_dcefd =_cae (_dcefd ,_begg .PdfRectangle );_agabe =append (_agabe ,_begg ._acgf ...);};return _gccb (_dcefd ,_agabe );};func _bgbff (_dfgb []pathSection ){if _ddff < 0.0{return ;};if _cfag {_adb .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_dfgb ));};for _ceba ,_aggdd :=range _dfgb {for _gaefc ,_cddge :=range _aggdd ._baca {for _cbbf ,_aaeg :=range _cddge ._dee {_cddge ._dee [_cbbf ]=_gg .Point {X :_cece (_aaeg .X ),Y :_cece (_aaeg .Y )};if _cfag {_bebcf :=_cddge ._dee [_cbbf ];if !_fddb (_aaeg ,_bebcf ){_abfa :=_gg .Point {X :_bebcf .X -_aaeg .X ,Y :_bebcf .Y -_aaeg .Y };_gd .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_ceba ,_gaefc ,_cbbf ,_aaeg ,_bebcf ,_abfa );};};};};};};
|
||
|
||
// String returns a string describing `ma`.
|
||
func (_cdd TextMarkArray )String ()string {_cfa :=len (_cdd ._fbbe );if _cfa ==0{return "\u0045\u004d\u0050T\u0059";};_bgce :=_cdd ._fbbe [0];_cbda :=_cdd ._fbbe [_cfa -1];return _gd .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_cfa ,_bgce ,_cbda );};type textResult struct{_fgce PageText ;_efc int ;_deb int ;};func (_cda *textObject )moveText (_bde ,_efcg float64 ){_cda .moveLP (_bde ,_efcg )};func (_cffbf *textTable )getRight ()paraList {_edfb :=make (paraList ,_cffbf ._egbe );for _caabf :=0;_caabf < _cffbf ._egbe ;_caabf ++{_cfbag :=_cffbf .get (_cffbf ._ffbe -1,_caabf )._aagg ;if _cfbag ==nil ||_cfbag ._ccbb {return nil ;};_edfb [_caabf ]=_cfbag ;};for _efef :=0;_efef < _cffbf ._egbe -1;_efef ++{if _edfb [_efef ]._abaa !=_edfb [_efef +1]{return nil ;};};return _edfb ;};func (_eceab *shapesState )newSubPath (){_eceab .clearPath ();if _ebge {_adb .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_eceab );};};func (_baea paraList )lines ()[]*textLine {var _fdf []*textLine ;for _ ,_cadb :=range _baea {_fdf =append (_fdf ,_cadb ._acgf ...);};return _fdf ;};const (_gceg =1.0e-6;_ddff =1.0e-4;_fbaf =10;_bcgf =6;_gdee =0.5;_cfc =0.12;_adbe =0.19;_eace =0.04;_bfbe =0.04;_aaaf =1.0;_agge =0.04;_cff =0.4;_dfg =0.7;_dbdb =1.0;_beefb =0.1;_fcgf =1.4;_edgg =0.46;_gfce =0.02;_gagbe =0.2;_fbbg =0.5;_dgcd =4;_ecfe =4.0;_ebac =6;_egda =0.3;_edbb =0.01;_afff =0.02;_bbgf =2;_dbdbd =2;_feae =500;_ffcc =4.0;_bgfgg =4.0;_gffc =0.05;_efbg =0.1;_ffff =2.0;_bgcb =2.0;_aabc =1.5;_fgfaf =3.0;_egfa =0.25;);type subpath struct{_dee []_gg .Point ;_gfad bool ;};func _cedg (_bdce _gg .Point )*subpath {return &subpath {_dee :[]_gg .Point {_bdce }}};const (_cac =false ;_aagc =false ;_gfabd =false ;_ecdb =false ;_ebge =false ;_aaac =false ;_bcef =false ;_fegb =false ;_cdfgc =false ;_fgfa =_cdfgc &&true ;_eebd =_fgfa &&false ;_edg =_cdfgc &&true ;_dfad =false ;_eded =_dfad &&false ;_beab =_dfad &&true ;_cfag =false ;_gfgc =_cfag &&false ;_aab =_cfag &&false ;_dceg =_cfag &&true ;_beeb =_cfag &&false ;_bfe =_cfag &&false ;);func (_fgg *subpath )close (){if !_fddb (_fgg ._dee [0],_fgg .last ()){_fgg .add (_fgg ._dee [0]);};_fgg ._gfad =true ;_fgg .removeDuplicates ();};func _cbada (_efcge map[float64 ]map[float64 ]gridTile )[]float64 {_cada :=make ([]float64 ,0,len (_efcge ));_cbadc :=make (map[float64 ]struct{},len (_efcge ));for _ ,_edcda :=range _efcge {for _eegd :=range _edcda {if _ ,_aefdc :=_cbadc [_eegd ];_aefdc {continue ;};_cada =append (_cada ,_eegd );_cbadc [_eegd ]=struct{}{};};};_ad .Float64s (_cada );return _cada ;};type cachedImage struct{_fed *_gdc .Image ;_agf _gdc .PdfColorspace ;};type rulingList []*ruling ;
|
||
|
||
// String returns a description of `v`.
|
||
func (_bdgd *ruling )String ()string {if _bdgd ._fafed ==_afac {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_cafd ,_dgfg :="\u0078","\u0079";if _bdgd ._fafed ==_dbff {_cafd ,_dgfg ="\u0079","\u0078";};_bffe :="";if _bdgd ._fbab !=0.0{_bffe =_gd .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_bdgd ._fbab );};return _gd .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_bdgd ._fafed ,_cafd ,_bdgd ._bfc ,_dgfg ,_bdgd ._edebg ,_bdgd ._dbfe ,_bdgd ._dbfe -_bdgd ._edebg ,_bdgd ._fffg ,_bdgd .Color ,_bffe );};type fontEntry struct{_gbgc *_gdc .PdfFont ;_eee int64 ;};
|
||
|
||
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
|
||
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
|
||
// Replace with a function like Extract() (*PageText, error)
|
||
func (_fde *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_dbe ,_eadc ,_ffd ,_dgg :=_fde .extractPageText (_fde ._adf ,_fde ._db ,_gg .IdentityMatrix (),0);if _dgg !=nil {return nil ,0,0,_dgg ;};_dbe .computeViews ();_dgg =_gbdfc (_dbe );if _dgg !=nil {return nil ,0,0,_dgg ;};return _dbe ,_eadc ,_ffd ,nil ;};func (_baa *textObject )setTextLeading (_caab float64 ){if _baa ==nil {return ;};_baa ._bgebc ._dage =_caab ;};func _egag (_bbgg []*textWord ,_fcg float64 ,_dbef ,_ddga rulingList )*wordBag {_aecf :=_dadg (_bbgg [0],_fcg ,_dbef ,_ddga );for _ ,_bee :=range _bbgg [1:]{_cbc :=_beb (_bee ._ggccd );_aecf ._fdebe [_cbc ]=append (_aecf ._fdebe [_cbc ],_bee );_aecf .PdfRectangle =_cae (_aecf .PdfRectangle ,_bee .PdfRectangle );};_aecf .sort ();return _aecf ;};type rulingKind int ;
|
||
|
||
// ToText returns the page text as a single string.
|
||
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
|
||
// Text() instead.
|
||
func (_aceb PageText )ToText ()string {return _aceb .Text ()};func (_acdd *textObject )getFontDict (_ebbb string )(_dbbf _dg .PdfObject ,_eac error ){_bfda :=_acdd ._gec ;if _bfda ==nil {_adb .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_ebbb );return nil ,nil ;};_dbbf ,_fcaf :=_bfda .GetFontByName (_dg .PdfObjectName (_ebbb ));if !_fcaf {_adb .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_ebbb );return nil ,_d .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _dbbf ,nil ;};func _gbdf (_fda _gdc .PdfRectangle )*ruling {return &ruling {_fafed :_gddg ,_bfc :_fda .Llx ,_edebg :_fda .Lly ,_dbfe :_fda .Ury };};func _gcf (_aeb ,_bbce bounded )float64 {return _aeb .bbox ().Llx -_bbce .bbox ().Urx };func (_bdcdb *subpath )makeRectRuling (_deddb _dc .Color )(*ruling ,bool ){if _beeb {_adb .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_bdcdb );};_faaa :=_bdcdb ._dee [:4];_edae :=make (map[int ]rulingKind ,len (_faaa ));for _eaab ,_eed :=range _faaa {_acbg :=_bdcdb ._dee [(_eaab +1)%4];_edae [_eaab ]=_decg (_eed ,_acbg );if _beeb {_gd .Printf ("\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066",_eaab ,_edae [_eaab ],_eed ,_acbg );};};if _beeb {_gd .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_edae );};var _addf ,_bggde []int ;for _ffcee ,_cedc :=range _edae {switch _cedc {case _dbff :_bggde =append (_bggde ,_ffcee );case _gddg :_addf =append (_addf ,_ffcee );};};if _beeb {_gd .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_bggde ),_bggde );_gd .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_addf ),_addf );};_aaga :=(len (_bggde )==2&&len (_addf )==2)||(len (_bggde )==2&&len (_addf )==0&&_dcad (_faaa [_bggde [0]],_faaa [_bggde [1]]))||(len (_addf )==2&&len (_bggde )==0&&_eafg (_faaa [_addf [0]],_faaa [_addf [1]]));if _beeb {_gd .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_bggde ),len (_addf ),_aaga );};if !_aaga {if _beeb {_adb .Log .Error ("\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v",_bdcdb );_gd .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_bggde ),len (_addf ),_aaga );};return &ruling {},false ;};if len (_addf )==0{for _fcfca ,_abfd :=range _edae {if _abfd !=_dbff {_addf =append (_addf ,_fcfca );};};};if len (_bggde )==0{for _fdec ,_caee :=range _edae {if _caee !=_gddg {_bggde =append (_bggde ,_fdec );};};};if _beeb {_adb .Log .Info ("\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_bggde ),len (_addf ),len (_faaa ),_bggde ,_addf ,_faaa );};var _fdbf ,_bbfg ,_adgg ,_ccbg _gg .Point ;if _faaa [_bggde [0]].Y > _faaa [_bggde [1]].Y {_adgg ,_ccbg =_faaa [_bggde [0]],_faaa [_bggde [1]];}else {_adgg ,_ccbg =_faaa [_bggde [1]],_faaa [_bggde [0]];};if _faaa [_addf [0]].X > _faaa [_addf [1]].X {_fdbf ,_bbfg =_faaa [_addf [0]],_faaa [_addf [1]];}else {_fdbf ,_bbfg =_faaa [_addf [1]],_faaa [_addf [0]];};_ecad :=_gdc .PdfRectangle {Llx :_fdbf .X ,Urx :_bbfg .X ,Lly :_ccbg .Y ,Ury :_adgg .Y };if _ecad .Llx > _ecad .Urx {_ecad .Llx ,_ecad .Urx =_ecad .Urx ,_ecad .Llx ;};if _ecad .Lly > _ecad .Ury {_ecad .Lly ,_ecad .Ury =_ecad .Ury ,_ecad .Lly ;};_egdac :=rectRuling {PdfRectangle :_ecad ,_fbgfb :_afdfd (_ecad ),Color :_deddb };if _egdac ._fbgfb ==_afac {if _beeb {_adb .Log .Error ("\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c");};return nil ,false ;};_eebb ,_fcdc :=_egdac .asRuling ();if !_fcdc {if _beeb {_adb .Log .Error ("\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg");};return nil ,false ;};if _cfag {_gd .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_eebb .String ());};return _eebb ,true ;};func (_addda *textWord )appendMark (_ecgaf *textMark ,_gbgec _gdc .PdfRectangle ){_addda ._bbbafa =append (_addda ._bbbafa ,_ecgaf );_addda .PdfRectangle =_cae (_addda .PdfRectangle ,_ecgaf .PdfRectangle );if _ecgaf ._fcdad > _addda ._bgdgg {_addda ._bgdgg =_ecgaf ._fcdad ;};_addda ._ggccd =_gbgec .Ury -_addda .PdfRectangle .Lly ;};func (_dagc paraList )topoOrder ()[]int {if _fegb {_adb .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");};_eecc :=len (_dagc );_dff :=make ([]bool ,_eecc );_cfbaa :=make ([]int ,0,_eecc );_cfcf :=_dagc .llyOrdering ();var _baaf func (_abfc int );_baaf =func (_dacag int ){_dff [_dacag ]=true ;for _adeb :=0;_adeb < _eecc ;_adeb ++{if !_dff [_adeb ]{if _dagc .readBefore (_cfcf ,_dacag ,_adeb ){_baaf (_adeb );};};};_cfbaa =append (_cfbaa ,_dacag );};for _deac :=0;_deac < _eecc ;_deac ++{if !_dff [_deac ]{_baaf (_deac );};};return _afgc (_cfbaa );};func (_bgfg *shapesState )moveTo (_gead ,_gbba float64 ){_bgfg ._bbb =true ;_bgfg ._ddcd =_bgfg .devicePoint (_gead ,_gbba );if _ebge {_adb .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_gead ,_gbba ,_bgfg ._ddcd );};};
|
||
|
||
// String returns a description of `p`.
|
||
func (_dgcg *textPara )String ()string {if _dgcg ._bgcce {return _gd .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_dgcg .PdfRectangle );};_cdag :="";if _dgcg ._cabb !=nil {_cdag =_gd .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_dgcg ._cabb ._ffbe ,_dgcg ._cabb ._egbe );};return _gd .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_dgcg .PdfRectangle ,_cdag ,len (_dgcg ._acgf ),_fgde (_dgcg .text (),50));};func (_bdggc rulingList )snapToGroups ()rulingList {_cafa ,_dgdg :=_bdggc .vertsHorzs ();if len (_cafa )> 0{_cafa =_cafa .snapToGroupsDirection ();};if len (_dgdg )> 0{_dgdg =_dgdg .snapToGroupsDirection ();};_fbgcb :=append (_cafa ,_dgdg ...);_fbgcb .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");return _fbgcb ;};func _fgcb (_cdae ,_fccf bounded )float64 {_fded :=_aecde (_cdae ,_fccf );if !_gbee (_fded ){return _fded ;};return _bcf (_cdae ,_fccf );};func (_ged *wordBag )empty (_adec int )bool {_ ,_gffe :=_ged ._fdebe [_adec ];return !_gffe };func (_agd *shapesState )addPoint (_gdg ,_adbg float64 ){_cbea :=_agd .establishSubpath ();_fcd :=_agd .devicePoint (_gdg ,_adbg );if _cbea ==nil {_agd ._bbb =true ;_agd ._ddcd =_fcd ;}else {_cbea .add (_fcd );};};func (_acgg rulingList )toGrids ()[]rulingList {if _cfag {_adb .Log .Info ("t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073",_acgg );};_gbgge :=_acgg .intersections ();if _cfag {_adb .Log .Info ("\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020",len (_acgg ),len (_gbgge ));for _ ,_beggb :=range _dege (_gbgge ){_gd .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_beggb ,_gbgge [_beggb ]);};};_fggg :=make (map[int ]intSet ,len (_acgg ));for _cdagd :=range _acgg {_fdfc :=_acgg .connections (_gbgge ,_cdagd );if len (_fdfc )> 0{_fggg [_cdagd ]=_fdfc ;};};if _cfag {_adb .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064",len (_fggg ));for _ ,_bgdee :=range _dege (_fggg ){_gd .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_bgdee ,_fggg [_bgdee ]);};};_efeb :=_gcda (len (_acgg ),func (_dacg ,_cbaae int )bool {_aebb ,_cfgc :=len (_fggg [_dacg ]),len (_fggg [_cbaae ]);if _aebb !=_cfgc {return _aebb > _cfgc ;};return _acgg .comp (_dacg ,_cbaae );});if _cfag {_adb .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_efeb );};_adae :=[][]int {{_efeb [0]}};_abgf :for _ ,_ggfd :=range _efeb [1:]{for _acba ,_dfcd :=range _adae {for _ ,_gdgdb :=range _dfcd {if _fggg [_gdgdb ].has (_ggfd ){_adae [_acba ]=append (_dfcd ,_ggfd );continue _abgf ;};};};_adae =append (_adae ,[]int {_ggfd });};if _cfag {_adb .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_adae );};_ad .SliceStable (_adae ,func (_fccg ,_dgdf int )bool {return len (_adae [_fccg ])> len (_adae [_dgdf ])});for _ ,_bbdg :=range _adae {_ad .Slice (_bbdg ,func (_fcadb ,_egbf int )bool {return _acgg .comp (_bbdg [_fcadb ],_bbdg [_egbf ])});};_cgfd :=make ([]rulingList ,len (_adae ));for _ebba ,_aagbg :=range _adae {_ggad :=make (rulingList ,len (_aagbg ));for _fcaa ,_bgcf :=range _aagbg {_ggad [_fcaa ]=_acgg [_bgcf ];};_cgfd [_ebba ]=_ggad ;};if _cfag {_adb .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076",_cgfd );};var _ccad []rulingList ;for _ ,_bgcee :=range _cgfd {if _gdfgc ,_bcaf :=_bgcee .isActualGrid ();_bcaf {_bgcee =_gdfgc ;_bgcee =_bgcee .snapToGroups ();_ccad =append (_ccad ,_bgcee );};};if _cfag {_dedf ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073",_ccad );_adb .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_cgfd ),len (_ccad ));};return _ccad ;};func _cded (_fcge ,_fafdb _gg .Point )rulingKind {_afef :=_g .Abs (_fcge .X -_fafdb .X );_bagcc :=_g .Abs (_fcge .Y -_fafdb .Y );return _bdbc (_afef ,_bagcc ,_ffcc );};func (_facc compositeCell )hasLines (_cffb []*textLine )bool {for _eggge ,_effg :=range _cffb {_bcgb :=_gecb (_facc .PdfRectangle ,_effg .PdfRectangle );if _dfad {_gd .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a",_bcgb ,_eggge ,len (_cffb ));_gd .Printf ("\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a",_facc );_gd .Printf ("\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a",_effg );};if _bcgb {return true ;};};return false ;};func (_cefg *textWord )absorb (_bcede *textWord ){_cefg .PdfRectangle =_cae (_cefg .PdfRectangle ,_bcede .PdfRectangle );_cefg ._bbbafa =append (_cefg ._bbbafa ,_bcede ._bbbafa ...);};func (_cdaf *stateStack )pop ()*textState {if _cdaf .empty (){return nil ;};_fdeb :=*(*_cdaf )[len (*_cdaf )-1];*_cdaf =(*_cdaf )[:len (*_cdaf )-1];return &_fdeb ;};func (_ffddc rulingList )findPrimSec (_fage ,_ebgf float64 )*ruling {for _ ,_bfbab :=range _ffddc {if _gbee (_bfbab ._bfc -_fage )&&_bfbab ._edebg -_ffff <=_ebgf &&_ebgf <=_bfbab ._dbfe +_ffff {return _bfbab ;};};return nil ;};func _gccb (_eggg _gdc .PdfRectangle ,_acec []*textLine )*textPara {return &textPara {PdfRectangle :_eggg ,_acgf :_acec };};func _aecde (_fbcd ,_daaf bounded )float64 {return _fbcd .bbox ().Llx -_daaf .bbox ().Llx };func _dedf (_bgddb string ,_cadfg []rulingList ){_adb .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_cadfg ),_bgddb );for _ceeg ,_eddb :=range _cadfg {_gd .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ceeg ,_eddb .String ());};};func (_gcaa *textTable )reduceTiling (_gcfa gridTiling ,_feabb float64 )*textTable {_edgb :=make ([]int ,0,_gcaa ._egbe );_gcdbb :=make ([]int ,0,_gcaa ._ffbe );_gcba :=_gcfa ._cgaf ;_gadc :=_gcfa ._aafd ;for _eecd :=0;_eecd < _gcaa ._egbe ;_eecd ++{_ecbfe :=_eecd > 0&&_g .Abs (_gadc [_eecd -1]-_gadc [_eecd ])< _feabb &&_gcaa .emptyRow (_eecd );if !_ecbfe {_edgb =append (_edgb ,_eecd );};};for _ebfe :=0;_ebfe < _gcaa ._ffbe ;_ebfe ++{_gafef :=_ebfe < _gcaa ._ffbe -1&&_g .Abs (_gcba [_ebfe +1]-_gcba [_ebfe ])< _feabb &&_gcaa .emptyColumn (_ebfe );if !_gafef {_gcdbb =append (_gcdbb ,_ebfe );};};if len (_edgb )==_gcaa ._egbe &&len (_gcdbb )==_gcaa ._ffbe {return _gcaa ;};_gbag :=textTable {_fgdd :_gcaa ._fgdd ,_ffbe :len (_gcdbb ),_egbe :len (_edgb ),_fgcc :make (map[uint64 ]compositeCell ,len (_gcdbb )*len (_edgb ))};if _dfad {_adb .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_gcaa ._ffbe ,_gcaa ._egbe ,len (_gcdbb ),len (_edgb ));_adb .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_gcdbb );_adb .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_edgb );};for _febg ,_aadb :=range _edgb {for _dddd ,_feffe :=range _gcdbb {_agccf ,_eced :=_gcaa .getComposite (_feffe ,_aadb );if len (_agccf )==0{continue ;};if _dfad {_gd .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_dddd ,_febg ,_feffe ,_aadb ,_fgde (_agccf .merge ().text (),50));};_gbag .putComposite (_dddd ,_febg ,_agccf ,_eced );};};return &_gbag ;};func _aade (_ddgea []pathSection )rulingList {_bgbff (_ddgea );if _cfag {_adb .Log .Info ("\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073",len (_ddgea ));};var _adgf rulingList ;for _ ,_gffg :=range _ddgea {for _ ,_fgga :=range _gffg ._baca {if len (_fgga ._dee )< 2{continue ;};_dgee :=_fgga ._dee [0];for _ ,_gaef :=range _fgga ._dee [1:]{if _afcdc ,_gedb :=_eeccf (_dgee ,_gaef ,_gffg .Color );_gedb {_adgf =append (_adgf ,_afcdc );};_dgee =_gaef ;};};};if _cfag {_adb .Log .Info ("m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073",_adgf );};return _adgf ;};func _bgbc (_abfe ,_fbfaa ,_bbddf ,_abgg *textPara )*textTable {_aeccg :=&textTable {_ffbe :2,_egbe :2,_gged :make (map[uint64 ]*textPara ,4)};_aeccg .put (0,0,_abfe );_aeccg .put (1,0,_fbfaa );_aeccg .put (0,1,_bbddf );_aeccg .put (1,1,_abgg );return _aeccg ;};func (_gcde *textPara )taken ()bool {return _gcde ==nil ||_gcde ._ccbb };func _gbdfc (_bdde *PageText )error {_cgag :=_c .GetLicenseKey ();if _cgag !=nil &&_cgag .IsLicensed ()||_f {return nil ;};_gd .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");_gd .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");return _d .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};func (_dgd *imageExtractContext )extractContentStreamImages (_ebg string ,_bd *_gdc .PdfPageResources )error {_de :=_aee .NewContentStreamParser (_ebg );_edf ,_gef :=_de .Parse ();if _gef !=nil {return _gef ;};if _dgd ._ed ==nil {_dgd ._ed =map[*_dg .PdfObjectStream ]*cachedImage {};};if _dgd ._efb ==nil {_dgd ._efb =&ImageExtractOptions {};};_ead :=_aee .NewContentStreamProcessor (*_edf );_ead .AddHandler (_aee .HandlerConditionEnumAllOperands ,"",_dgd .processOperand );return _ead .Process (_bd );};func (_gdfdb rulingList )snapToGroupsDirection ()rulingList {_gdfdb .sortStrict ();_eecb :=make (map[*ruling ]rulingList ,len (_gdfdb ));_ddde :=_gdfdb [0];_ccgg :=func (_cbcg *ruling ){_ddde =_cbcg ;_eecb [_ddde ]=rulingList {_cbcg }};_ccgg (_gdfdb [0]);for _ ,_fegg :=range _gdfdb [1:]{if _fegg ._bfc < _ddde ._bfc -_gceg {_adb .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_ddde ,_fegg );};if _fegg ._bfc > _ddde ._bfc +_bgcb {_ccgg (_fegg );}else {_eecb [_ddde ]=append (_eecb [_ddde ],_fegg );};};_daff :=make (map[*ruling ]float64 ,len (_eecb ));_daeg :=make (map[*ruling ]*ruling ,len (_gdfdb ));for _agcg ,_cceb :=range _eecb {_daff [_agcg ]=_cceb .mergePrimary ();for _ ,_bcdc :=range _cceb {_daeg [_bcdc ]=_agcg ;};};for _ ,_eacc :=range _gdfdb {_eacc ._bfc =_daff [_daeg [_eacc ]];};_bfde :=make (rulingList ,0,len (_gdfdb ));for _ ,_eefdf :=range _eecb {_fbcc :=_eefdf .splitSec ();for _babg ,_eceac :=range _fbcc {_bgdfa :=_eceac .merge ();if len (_bfde )> 0{_ecce :=_bfde [len (_bfde )-1];if _ecce .alignsPrimary (_bgdfa )&&_ecce .alignsSec (_bgdfa ){_adb .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_babg ,_ecce ,_bgdfa );continue ;};};_bfde =append (_bfde ,_bgdfa );};};_bfde .sortStrict ();return _bfde ;};func (_ebdg paraList )readBefore (_edbe []int ,_bcb ,_dgab int )bool {_aadd ,_bbcg :=_ebdg [_bcb ],_ebdg [_dgab ];if _gdeec (_aadd ,_bbcg )&&_aadd .Lly > _bbcg .Lly {return true ;};if !(_aadd ._ffab .Urx < _bbcg ._ffab .Llx ){return false ;};_afcc ,_ggeg :=_aadd .Lly ,_bbcg .Lly ;if _afcc > _ggeg {_ggeg ,_afcc =_afcc ,_ggeg ;};_bbbc :=_g .Max (_aadd ._ffab .Llx ,_bbcg ._ffab .Llx );_bdbbb :=_g .Min (_aadd ._ffab .Urx ,_bbcg ._ffab .Urx );_fbba :=_ebdg .llyRange (_edbe ,_afcc ,_ggeg );for _ ,_geaf :=range _fbba {if _geaf ==_bcb ||_geaf ==_dgab {continue ;};_agde :=_ebdg [_geaf ];if _agde ._ffab .Llx <=_bdbbb &&_bbbc <=_agde ._ffab .Urx {return false ;};};return true ;};
|
||
|
||
// ImageExtractOptions contains options for controlling image extraction from
|
||
// PDF pages.
|
||
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};func (_fbef *textTable )emptyColumn (_edac int )bool {for _ffec :=0;_ffec < _fbef ._egbe ;_ffec ++{_afea :=_fbef .get (_edac ,_ffec );if _afea !=nil &&_afea .text ()!=""{return false ;};};return true ;};type shapesState struct{_fdb _gg .Matrix ;_gge _gg .Matrix ;_bcce []*subpath ;_bbb bool ;_ddcd _gg .Point ;_befd *textObject ;};func (_ffac *stateStack )push (_bda *textState ){_ffgd :=*_bda ;*_ffac =append (*_ffac ,&_ffgd )};var _bded =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_dc .White ,StrokeColor :_dc .White };type markKind int ;func (_cgfa *wordBag )applyRemovals (_ebfc map[int ]map[*textWord ]struct{}){for _eaebg ,_fffa :=range _ebfc {if len (_fffa )==0{continue ;};_fbgf :=_cgfa ._fdebe [_eaebg ];_defb :=len (_fbgf )-len (_fffa );if _defb ==0{delete (_cgfa ._fdebe ,_eaebg );continue ;};_aedf :=make ([]*textWord ,_defb );_aae :=0;for _ ,_eaed :=range _fbgf {if _ ,_fadb :=_fffa [_eaed ];!_fadb {_aedf [_aae ]=_eaed ;_aae ++;};};_cgfa ._fdebe [_eaebg ]=_aedf ;};};type textWord struct{_gdc .PdfRectangle ;_ggccd float64 ;_adad string ;_bbbafa []*textMark ;_bgdgg float64 ;_bcaaf bool ;};type compositeCell struct{_gdc .PdfRectangle ;paraList ;};func (_eagf *wordBag )arrangeText ()*textPara {_eagf .sort ();if _aecc {_eagf .removeDuplicates ();};var _gfdf []*textLine ;for _ ,_befcd :=range _eagf .depthIndexes (){for !_eagf .empty (_befcd ){_cebg :=_eagf .firstReadingIndex (_befcd );_cdeff :=_eagf .firstWord (_cebg );_egca :=_ggdg (_eagf ,_cebg );_cfea :=_cdeff ._bgdgg ;_bbcb :=_cdeff ._ggccd -_gdee *_cfea ;_gcec :=_cdeff ._ggccd +_gdee *_cfea ;_ddd :=_fcgf *_cfea ;_edad :=_edgg *_cfea ;_gccba :for {var _agff *textWord ;_agbd :=0;for _ ,_dcg :=range _eagf .depthBand (_bbcb ,_gcec ){_afggb :=_eagf .highestWord (_dcg ,_bbcb ,_gcec );if _afggb ==nil {continue ;};_gaca :=_gcf (_afggb ,_egca ._cfgd [len (_egca ._cfgd )-1]);if _gaca < -_edad {break _gccba ;};if _gaca > _ddd {continue ;};if _agff !=nil &&_aecde (_afggb ,_agff )>=0{continue ;};_agff =_afggb ;_agbd =_dcg ;};if _agff ==nil {break ;};_egca .pullWord (_eagf ,_agff ,_agbd );};_egca .markWordBoundaries ();_gfdf =append (_gfdf ,_egca );};};if len (_gfdf )==0{return nil ;};_ad .Slice (_gfdf ,func (_fbgag ,_fgaf int )bool {return _ccae (_gfdf [_fbgag ],_gfdf [_fgaf ])< 0});_aede :=_gccb (_eagf .PdfRectangle ,_gfdf );if _cdfgc {_adb .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_aede .String ());if _fgfa {for _aggeb ,_abaec :=range _aede ._acgf {_gd .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_aggeb ,_abaec .String ());if _eebd {for _gegdg ,_fbgg :=range _abaec ._cfgd {_gd .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_gegdg ,_fbgg .String ());for _gdfc ,_agdge :=range _fbgg ._bbbafa {_gd .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_gdfc ,_agdge .String ());};};};};};};return _aede ;};func (_edce *wordBag )scanBand (_bfdcd string ,_ccaf *wordBag ,_afde func (_bfgg *wordBag ,_fcda *textWord )bool ,_adba ,_ceb ,_acb float64 ,_begd ,_aaa bool )int {_gff :=_ccaf ._ecba ;var _geagc map[int ]map[*textWord ]struct{};if !_begd {_geagc =_edce .makeRemovals ();};_ccdgb :=_gdee *_gff ;_ebed :=0;for _ ,_ggfa :=range _edce .depthBand (_adba -_ccdgb ,_ceb +_ccdgb ){if len (_edce ._fdebe [_ggfa ])==0{continue ;};for _ ,_gfga :=range _edce ._fdebe [_ggfa ]{if !(_adba -_ccdgb <=_gfga ._ggccd &&_gfga ._ggccd <=_ceb +_ccdgb ){continue ;};if !_afde (_ccaf ,_gfga ){continue ;};_gfdc :=2.0*_g .Abs (_gfga ._bgdgg -_ccaf ._ecba )/(_gfga ._bgdgg +_ccaf ._ecba );_bggg :=_g .Max (_gfga ._bgdgg /_ccaf ._ecba ,_ccaf ._ecba /_gfga ._bgdgg );_feeg :=_g .Min (_gfdc ,_bggg );if _acb > 0&&_feeg > _acb {continue ;};if _ccaf .blocked (_gfga ){continue ;};if !_begd {_ccaf .pullWord (_gfga ,_ggfa ,_geagc );};_ebed ++;if !_aaa {if _gfga ._ggccd < _adba {_adba =_gfga ._ggccd ;};if _gfga ._ggccd > _ceb {_ceb =_gfga ._ggccd ;};};if _begd {break ;};};};if !_begd {_edce .applyRemovals (_geagc );};return _ebed ;};
|
||
|
||
// String returns a description of `tm`.
|
||
func (_egge *textMark )String ()string {return _gd .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_egge .PdfRectangle ,_egge ._fcdad ,_egge ._bdgg );};
|
||
|
||
// TextMarkArray is a collection of TextMarks.
|
||
type TextMarkArray struct{_fbbe []TextMark };func (_dfgg *compositeCell )updateBBox (){for _ ,_dbgb :=range _dfgg .paraList {_dfgg .PdfRectangle =_cae (_dfgg .PdfRectangle ,_dbgb .PdfRectangle );};};type lineRuling struct{_febb rulingKind ;_ffee markKind ;_dc .Color ;_dgfe ,_aaec _gg .Point ;};const _fff =1.0/1000.0;func (_dcbb *textTable )isExportable ()bool {if _dcbb ._fgdd {return true ;};_ebeb :=func (_bbcab int )bool {_ccacc :=_dcbb .get (0,_bbcab );if _ccacc ==nil {return false ;};_agdd :=_ccacc .text ();_baaa :=_a .RuneCountInString (_agdd );_dedda :=_bdbd .MatchString (_agdd );return _baaa <=1||_dedda ;};for _fead :=0;_fead < _dcbb ._egbe ;_fead ++{if !_ebeb (_fead ){return true ;};};return false ;};func (_ceec *wordBag )depthBand (_dbbfg ,_acad float64 )[]int {if len (_ceec ._fdebe )==0{return nil ;};return _ceec .depthRange (_ceec .getDepthIdx (_dbbfg ),_ceec .getDepthIdx (_acad ));};func _cae (_bebc ,_daac _gdc .PdfRectangle )_gdc .PdfRectangle {return _gdc .PdfRectangle {Llx :_g .Min (_bebc .Llx ,_daac .Llx ),Lly :_g .Min (_bebc .Lly ,_daac .Lly ),Urx :_g .Max (_bebc .Urx ,_daac .Urx ),Ury :_g .Max (_bebc .Ury ,_daac .Ury )};};
|
||
|
||
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
|
||
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
|
||
// `start` and `end` are offsets in the extracted text.
|
||
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
|
||
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
|
||
func (_gdf *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _gdf ==nil {return nil ,_d .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_gd .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );};_aef :=len (_gdf ._fbbe );if _aef ==0{return _gdf ,nil ;};if start < _gdf ._fbbe [0].Offset {start =_gdf ._fbbe [0].Offset ;};if end > _gdf ._fbbe [_aef -1].Offset +1{end =_gdf ._fbbe [_aef -1].Offset +1;};_aefe :=_ad .Search (_aef ,func (_eef int )bool {return _gdf ._fbbe [_eef ].Offset +len (_gdf ._fbbe [_eef ].Text )-1>=start });if !(0<=_aefe &&_aefe < _aef ){_gde :=_gd .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_aefe ,_aef ,_gdf ._fbbe [0],_gdf ._fbbe [_aef -1]);return nil ,_gde ;};_ggga :=_ad .Search (_aef ,func (_gafe int )bool {return _gdf ._fbbe [_gafe ].Offset > end -1});if !(0<=_ggga &&_ggga < _aef ){_gaddg :=_gd .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_ggga ,_aef ,_gdf ._fbbe [0],_gdf ._fbbe [_aef -1]);return nil ,_gaddg ;};if _ggga <=_aefe {return nil ,_gd .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_aefe ,_ggga );};return &TextMarkArray {_fbbe :_gdf ._fbbe [_aefe :_ggga ]},nil ;};func (_efcbc *ruling )encloses (_ebfa ,_faga float64 )bool {return _efcbc ._edebg -_ffff <=_ebfa &&_faga <=_efcbc ._dbfe +_ffff ;};func _dfbf (_cdg *wordBag ,_caabc *textWord ,_gba float64 )bool {return _caabc .Llx < _cdg .Urx +_gba &&_cdg .Llx -_gba < _caabc .Urx ;};func (_ffag paraList )llyRange (_cgbdd []int ,_fefbg ,_ddec float64 )[]int {_bccc :=len (_ffag );if _ddec < _ffag [_cgbdd [0]].Lly ||_fefbg > _ffag [_cgbdd [_bccc -1]].Lly {return nil ;};_bdfb :=_ad .Search (_bccc ,func (_bffd int )bool {return _ffag [_cgbdd [_bffd ]].Lly >=_fefbg });_aabd :=_ad .Search (_bccc ,func (_ggaa int )bool {return _ffag [_cgbdd [_ggaa ]].Lly > _ddec });return _cgbdd [_bdfb :_aabd ];};func (_efbe *textTable )emptyRow (_feggc int )bool {for _ffabf :=0;_ffabf < _efbe ._ffbe ;_ffabf ++{_fbca :=_efbe .get (_ffabf ,_feggc );if _fbca !=nil &&_fbca .text ()!=""{return false ;};};return true ;};func (_bfggb rulingList )aligned ()bool {if len (_bfggb )< 2{return false ;};_dbce :=make (map[*ruling ]int );_dbce [_bfggb [0]]=0;for _ ,_gbgcg :=range _bfggb [1:]{_baee :=false ;for _cfee :=range _dbce {if _gbgcg .gridIntersecting (_cfee ){_dbce [_cfee ]++;_baee =true ;break ;};};if !_baee {_dbce [_gbgcg ]=0;};};_fafce :=0;for _ ,_ddee :=range _dbce {if _ddee ==0{_fafce ++;};};_bdcaa :=float64 (_fafce )/float64 (len (_bfggb ));_bbbd :=_bdcaa <=1.0-_egfa ;if _cfag {_adb .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_bbbd ,_bdcaa ,_fafce ,len (_bfggb ),_bfggb .String ());};return _bbbd ;};func (_bbgfc *textTable )getDown ()paraList {_fegbc :=make (paraList ,_bbgfc ._ffbe );for _cecd :=0;_cecd < _bbgfc ._ffbe ;_cecd ++{_gfcda :=_bbgfc .get (_cecd ,_bbgfc ._egbe -1)._abaa ;if _gfcda ==nil ||_gfcda ._ccbb {return nil ;};_fegbc [_cecd ]=_gfcda ;};for _fbfe :=0;_fbfe < _bbgfc ._ffbe -1;_fbfe ++{if _fegbc [_fbfe ]._aagg !=_fegbc [_fbfe +1]{return nil ;};};return _fegbc ;};func (_febfc rulingList )mergePrimary ()float64 {_ecfd :=_febfc [0]._bfc ;for _ ,_fbaab :=range _febfc [1:]{_ecfd +=_fbaab ._bfc ;};return _ecfd /float64 (len (_febfc ));};func (_ffeba *textTable )subdivide ()*textTable {_ffeba .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");_cedd :=_ffeba .compositeRowCorridors ();_gacga :=_ffeba .compositeColCorridors ();if _dfad {_adb .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_gbce (_cedd ),_gbce (_gacga ));};if len (_cedd )==0||len (_gacga )==0{return _ffeba ;};_cabeb (_cedd );_cabeb (_gacga );if _dfad {_adb .Log .Info ("\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073",_gbce (_cedd ),_gbce (_gacga ));};_bbfge ,_bcbb :=_ddae (_ffeba ._egbe ,_cedd );_ffabg ,_feaf :=_ddae (_ffeba ._ffbe ,_gacga );_gfagf :=make (map[uint64 ]*textPara ,_feaf *_bcbb );_fbaff :=&textTable {PdfRectangle :_ffeba .PdfRectangle ,_fgdd :_ffeba ._fgdd ,_egbe :_bcbb ,_ffbe :_feaf ,_gged :_gfagf };if _dfad {_adb .Log .Info ("\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a"+"\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076",_ffeba ._ffbe ,_ffeba ._egbe ,_feaf ,_bcbb ,_gbce (_cedd ),_gbce (_gacga ),_bbfge ,_ffabg );};for _ebag :=0;_ebag < _ffeba ._egbe ;_ebag ++{_dcba :=_bbfge [_ebag ];for _eccc :=0;_eccc < _ffeba ._ffbe ;_eccc ++{_cgcgb :=_ffabg [_eccc ];if _dfad {_gd .Printf ("\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a",_eccc ,_ebag ,_cgcgb ,_dcba );};_eggd ,_cced :=_ffeba ._fgcc [_bgfea (_eccc ,_ebag )];if !_cced {continue ;};_bbddd :=_eggd .split (_cedd [_ebag ],_gacga [_eccc ]);for _cfaa :=0;_cfaa < _bbddd ._egbe ;_cfaa ++{for _fabd :=0;_fabd < _bbddd ._ffbe ;_fabd ++{_bede :=_bbddd .get (_fabd ,_cfaa );_fbaff .put (_cgcgb +_fabd ,_dcba +_cfaa ,_bede );if _dfad {_gd .Printf ("\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_cgcgb +_fabd ,_dcba +_cfaa ,_bede );};};};};};return _fbaff ;};func _bceca (_gbab ,_fcafg float64 )bool {return _g .Abs (_gbab -_fcafg )<=_ffff };const (_afac rulingKind =iota ;_dbff ;_gddg ;);func (_cabc *shapesState )fill (_bffc *[]pathSection ){_cabd :=pathSection {_baca :_cabc ._bcce ,Color :_cabc ._befd .getFillColor ()};*_bffc =append (*_bffc ,_cabd );if _cfag {_gab :=_cabd .bbox ();_gd .Printf ("\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a",len (*_bffc ),len (_cabd ._baca ),_cabc ,_cabd .Color ,_gab ,_gab .Width (),_gab .Height ());if _gfgc {for _fbbd ,_ddcc :=range _cabd ._baca {_gd .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_fbbd ,_ddcc );if _fbbd ==10{break ;};};};};};func (_fafb rectRuling )asRuling ()(*ruling ,bool ){_ffca :=ruling {_fafed :_fafb ._fbgfb ,Color :_fafb .Color ,_fffg :_cdcc };switch _fafb ._fbgfb {case _gddg :_ffca ._bfc =0.5*(_fafb .Llx +_fafb .Urx );_ffca ._edebg =_fafb .Lly ;_ffca ._dbfe =_fafb .Ury ;_cbcae ,_eadf :=_fafb .checkWidth (_fafb .Llx ,_fafb .Urx );if !_eadf {if _beeb {_adb .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_fafb );};return nil ,false ;};_ffca ._fbab =_cbcae ;case _dbff :_ffca ._bfc =0.5*(_fafb .Lly +_fafb .Ury );_ffca ._edebg =_fafb .Llx ;_ffca ._dbfe =_fafb .Urx ;_gabfe ,_acbf :=_fafb .checkWidth (_fafb .Lly ,_fafb .Ury );if !_acbf {if _beeb {_adb .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_fafb );};return nil ,false ;};_ffca ._fbab =_gabfe ;default:_adb .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_fafb ._fbgfb );return nil ,false ;};return &_ffca ,true ;};func _fccfc (_aeff _gdc .PdfRectangle )*ruling {return &ruling {_fafed :_dbff ,_bfc :_aeff .Ury ,_edebg :_aeff .Llx ,_dbfe :_aeff .Urx };};func (_adde rulingList )secMinMax ()(float64 ,float64 ){_gdfcc ,_dabe :=_adde [0]._edebg ,_adde [0]._dbfe ;for _ ,_bced :=range _adde [1:]{if _bced ._edebg < _gdfcc {_gdfcc =_bced ._edebg ;};if _bced ._dbfe > _dabe {_dabe =_bced ._dbfe ;};};return _gdfcc ,_dabe ;};func (_gagbeg rulingList )tidied (_fbff string )rulingList {_gcaee :=_gagbeg .removeDuplicates ();_gcaee .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");_cdafe :=_gcaee .snapToGroups ();if _cdafe ==nil {return nil ;};_cdafe .sort ();if _cfag {_adb .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_fbff ,len (_gagbeg ),len (_gcaee ),len (_cdafe ));};_cdafe .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _cdafe ;};func (_gfca *textPara )fontsize ()float64 {return _gfca ._acgf [0]._aegc };
|
||
|
||
// String returns a human readable description of `path`.
|
||
func (_gcge *subpath )String ()string {_abc :=_gcge ._dee ;_ffde :=len (_abc );if _ffde <=5{return _gd .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_ffde ,_abc );};return _gd .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_ffde ,_abc [0],_abc [1],_abc [_ffde -1]);};func (_bfdc *shapesState )quadraticTo (_fcbd ,_egf ,_aece ,_aeg float64 ){if _ebge {_adb .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a");};_bfdc .addPoint (_aece ,_aeg );};func _ggg (_dgdec _gg .Matrix )_gg .Point {_dbcg ,_dec :=_dgdec .Translation ();return _gg .Point {X :_dbcg ,Y :_dec };};func _dege (_cfeff map[int ]intSet )[]int {_bdfd :=make ([]int ,0,len (_cfeff ));for _ffae :=range _cfeff {_bdfd =append (_bdfd ,_ffae );};_ad .Ints (_bdfd );return _bdfd ;};var (_eb =_d .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072");_fe =_d .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072"););func (_aeeg paraList )findGridTables (_egdc []gridTiling )[]*textTable {if _dfad {_adb .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_aeeg ));for _dgec ,_abdd :=range _aeeg {_gd .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dgec ,_abdd );};};var _dfac []*textTable ;for _beac ,_dccd :=range _egdc {_ddaa ,_fgeg :=_aeeg .findTableGrid (_dccd );if _ddaa !=nil {_ddaa .log (_gd .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_beac ));_dfac =append (_dfac ,_ddaa );_ddaa .markCells ();};for _febbc :=range _fgeg {_febbc ._ccbb =true ;};};if _dfad {_adb .Log .Info ("\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s",len (_dfac ));};return _dfac ;};
|
||
|
||
// String returns a human readable description of `ss`.
|
||
func (_abde *shapesState )String ()string {return _gd .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_abde ._bcce ),_abde ._bbb );};func (_faca rulingList )log (_badd string ){if !_cfag {return ;};_adb .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_badd ,_faca .String ());for _fdbe ,_gecg :=range _faca {_gd .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fdbe ,_gecg .String ());};};func _gdeec (_ageg ,_bab *textPara )bool {return _cfb (_ageg ._ffab ,_bab ._ffab )};
|
||
|
||
// TableCell is a cell in a TextTable.
|
||
type TableCell struct{
|
||
|
||
// Text is the extracted text.
|
||
Text string ;
|
||
|
||
// Marks returns the TextMarks corresponding to the text in Text.
|
||
Marks TextMarkArray ;};func _decg (_egfcb ,_cbcc _gg .Point )rulingKind {_efcb :=_g .Abs (_egfcb .X -_cbcc .X );_cgdcb :=_g .Abs (_egfcb .Y -_cbcc .Y );return _bdbc (_efcb ,_cgdcb ,_gffc );};func (_gdba *textPara )isAtom ()*textTable {_aafc :=_gdba ;_fcafc :=_gdba ._aagg ;_ceca :=_gdba ._abaa ;if !(_fcafc !=nil &&!_fcafc ._ccbb &&_ceca !=nil &&!_ceca ._ccbb ){return nil ;};_fcdcb :=_fcafc ._abaa ;if !(_fcdcb !=nil &&!_fcdcb ._ccbb &&_fcdcb ==_ceca ._aagg ){return nil ;};return _bgbc (_aafc ,_fcafc ,_ceca ,_fcdcb );};func (_cddg *textTable )reduce ()*textTable {_dacb :=make ([]int ,0,_cddg ._egbe );_bedf :=make ([]int ,0,_cddg ._ffbe );for _fcgfd :=0;_fcgfd < _cddg ._egbe ;_fcgfd ++{if !_cddg .emptyRow (_fcgfd ){_dacb =append (_dacb ,_fcgfd );};};for _ecbfc :=0;_ecbfc < _cddg ._ffbe ;_ecbfc ++{if !_cddg .emptyColumn (_ecbfc ){_bedf =append (_bedf ,_ecbfc );};};if len (_dacb )==_cddg ._egbe &&len (_bedf )==_cddg ._ffbe {return _cddg ;};_defa :=textTable {_fgdd :_cddg ._fgdd ,_ffbe :len (_bedf ),_egbe :len (_dacb ),_gged :make (map[uint64 ]*textPara ,len (_bedf )*len (_dacb ))};if _dfad {_adb .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_cddg ._ffbe ,_cddg ._egbe ,len (_bedf ),len (_dacb ));_adb .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_bedf );_adb .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_dacb );};for _eeaad ,_dggc :=range _dacb {for _egea ,_fbec :=range _bedf {_dacce :=_cddg .get (_fbec ,_dggc );if _dacce ==nil {continue ;};if _dfad {_gd .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_egea ,_eeaad ,_fbec ,_dggc ,_fgde (_dacce .text (),50));};_defa .put (_egea ,_eeaad ,_dacce );};};return &_defa ;};func _baec (_gafb map[float64 ]gridTile )[]float64 {_gefee :=make ([]float64 ,0,len (_gafb ));for _cffc :=range _gafb {_gefee =append (_gefee ,_cffc );};_ad .Float64s (_gefee );return _gefee ;};const (_ddfec =true ;_aecc =true ;_cgbd =true ;_bfdae =false ;_cfbd =false ;_bdfg =6;_eeeb =3.0;_edeb =200;_fede =true ;_fae =true ;_gdcfg =true ;_gaeg =true ;_ffce =false ;);func (_eeb *textObject )moveTextSetLeading (_egd ,_dega float64 ){_eeb ._bgebc ._dage =-_dega ;_eeb .moveLP (_egd ,_dega );};func (_cbdaa intSet )del (_dgca int ){delete (_cbdaa ,_dgca )};func _beccd (_gaag map[float64 ]map[float64 ]gridTile )[]float64 {_gedg :=make ([]float64 ,0,len (_gaag ));for _egefb :=range _gaag {_gedg =append (_gedg ,_egefb );};_ad .Float64s (_gedg );_bfga :=len (_gedg );for _dgaa :=0;_dgaa < _bfga /2;_dgaa ++{_gedg [_dgaa ],_gedg [_bfga -1-_dgaa ]=_gedg [_bfga -1-_dgaa ],_gedg [_dgaa ];};return _gedg ;};func _dbbc (_aegg string )bool {if _a .RuneCountInString (_aegg )< _dgcd {return false ;};_dcbg ,_edbbg :=_a .DecodeLastRuneInString (_aegg );if _edbbg <=0||!_ae .Is (_ae .Hyphen ,_dcbg ){return false ;};_dcbg ,_edbbg =_a .DecodeLastRuneInString (_aegg [:len (_aegg )-_edbbg ]);return _edbbg > 0&&!_ae .IsSpace (_dcbg );};func _gbggf (_fgdb ,_aecff *textPara )bool {if _fgdb ._bgcce ||_aecff ._bgcce {return true ;};return _gbee (_fgdb .depth ()-_aecff .depth ());};func _bdbc (_bebd ,_adcc ,_addb float64 )rulingKind {if _bebd >=_addb &&_fefec (_adcc ,_bebd ){return _dbff ;};if _adcc >=_addb &&_fefec (_bebd ,_adcc ){return _gddg ;};return _afac ;};
|
||
|
||
// Len returns the number of TextMarks in `ma`.
|
||
func (_fbge *TextMarkArray )Len ()int {if _fbge ==nil {return 0;};return len (_fbge ._fbbe );};func (_gfcea paraList )writeText (_cdee _b .Writer ){for _agdc ,_cdad :=range _gfcea {if _cdad ._bgcce {continue ;};_cdad .writeText (_cdee );if _agdc !=len (_gfcea )-1{if _gbggf (_cdad ,_gfcea [_agdc +1]){_cdee .Write ([]byte ("\u0020"));}else {_cdee .Write ([]byte ("\u000a"));_cdee .Write ([]byte ("\u000a"));};};};_cdee .Write ([]byte ("\u000a"));_cdee .Write ([]byte ("\u000a"));};func (_ecbcf *textPara )toCellTextMarks (_bacab *int )[]TextMark {var _feab []TextMark ;for _bbcd ,_badf :=range _ecbcf ._acgf {_gada :=_badf .toTextMarks (_bacab );_aebf :=_ddfec &&_badf .endsInHyphen ()&&_bbcd !=len (_ecbcf ._acgf )-1;if _aebf {_gada =_adedg (_gada ,_bacab );};_feab =append (_feab ,_gada ...);if !(_aebf ||_bbcd ==len (_ecbcf ._acgf )-1){_feab =_daga (_feab ,_bacab ,_ccfd (_badf ._febe ,_ecbcf ._acgf [_bbcd +1]._febe ));};};return _feab ;};type wordBag struct{_gdc .PdfRectangle ;_ecba float64 ;_ccc ,_bffgc rulingList ;_bfg float64 ;_fdebe map[int ][]*textWord ;};func _afdfd (_bafg _gdc .PdfRectangle )rulingKind {_dcdg :=_bafg .Width ();_cgdad :=_bafg .Height ();if _dcdg > _cgdad {if _dcdg >=_ffcc {return _dbff ;};}else {if _cgdad >=_ffcc {return _gddg ;};};return _afac ;};func (_egcb *textObject )reset (){_egcb ._aac =_gg .IdentityMatrix ();_egcb ._dgdc =_gg .IdentityMatrix ();_egcb ._fbad =nil ;};func (_gaaa *textLine )markWordBoundaries (){_gacf :=_gfce *_gaaa ._aegc ;for _afdec ,_fcceg :=range _gaaa ._cfgd [1:]{if _gcf (_fcceg ,_gaaa ._cfgd [_afdec ])>=_gacf {_fcceg ._bcaaf =true ;};};};func (_fbfc *textObject )getFontDirect (_eaef string )(*_gdc .PdfFont ,error ){_bce ,_age :=_fbfc .getFontDict (_eaef );if _age !=nil {return nil ,_age ;};_ebc ,_age :=_gdc .NewPdfFontFromPdfObject (_bce );if _age !=nil {_adb .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_eaef ,_age );};return _ebc ,_age ;};func _ebgb (_fbcg _gdc .PdfRectangle ,_dace bounded )float64 {return _fbcg .Ury -_dace .bbox ().Lly };func _cdga (_cdgab []compositeCell )[]float64 {var _gccdd []*textLine ;_beefd :=0;for _ ,_bfcbe :=range _cdgab {_beefd +=len (_bfcbe .paraList );_gccdd =append (_gccdd ,_bfcbe .lines ()...);};_ad .Slice (_gccdd ,func (_bdee ,_cbfd int )bool {_gdcc ,_cagbg :=_gccdd [_bdee ],_gccdd [_cbfd ];_eeee ,_fgda :=_gdcc ._febe ,_cagbg ._febe ;if !_gbee (_eeee -_fgda ){return _eeee < _fgda ;};return _gdcc .Llx < _cagbg .Llx ;});if _dfad {_gd .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_beefd ,len (_gccdd ));for _eebbb ,_cdeg :=range _gccdd {_gd .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_eebbb ,_cdeg );};};var _dgdfa []float64 ;_feebb :=_gccdd [0];var _egff [][]*textLine ;_edeec :=[]*textLine {_feebb };for _ggdd ,_gebgg :=range _gccdd [1:]{if _gebgg .Ury < _feebb .Lly {_fbfbb :=0.5*(_gebgg .Ury +_feebb .Lly );if _dfad {_gd .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_ggdd ,_gebgg .Ury ,_feebb .Lly ,_fbfbb ,_feebb ,_gebgg );};_dgdfa =append (_dgdfa ,_fbfbb );_egff =append (_egff ,_edeec );_edeec =nil ;};_edeec =append (_edeec ,_gebgg );if _gebgg .Lly < _feebb .Lly {_feebb =_gebgg ;};};if len (_edeec )> 0{_egff =append (_egff ,_edeec );};if _dfad {_gd .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_dgdfa );};if _dfad {_adb .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len (_cdgab ));for _fgced ,_cffee :=range _cdgab {_gd .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fgced ,_cffee );};_adb .Log .Info ("\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d",len (_egff ));for _aafcf ,_dbdf :=range _egff {_gd .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a",_aafcf ,len (_dbdf ));for _befdg ,_cggg :=range _dbdf {_gd .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_befdg ,_cggg );};};};_bfafg :=true ;for _cdccb ,_dcade :=range _egff {_fbbb :=true ;for _cdfb ,_egdf :=range _cdgab {if _dfad {_gd .Printf ("\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a",_cdccb ,len (_egff ),_cdfb ,len (_cdgab ),_egdf );};if !_egdf .hasLines (_dcade ){if _dfad {_gd .Printf ("\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a",_cdccb ,len (_egff ),_cdfb ,len (_cdgab ));};_fbbb =false ;break ;};};if !_fbbb {_bfafg =false ;break ;};};if !_bfafg {if _dfad {_adb .Log .Info ("\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg");};_dgdfa =nil ;};if _dfad &&_dgdfa !=nil {_gd .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a",_dgdfa );};return _dgdfa ;};func (_gggg *textTable )depth ()float64 {_gddf :=1e10;for _becd :=0;_becd < _gggg ._ffbe ;_becd ++{_gcefg :=_gggg .get (_becd ,0);if _gcefg ==nil ||_gcefg ._bgcce {continue ;};_gddf =_g .Min (_gddf ,_gcefg .depth ());};return _gddf ;};func _bgfea (_daae ,_agea int )uint64 {return uint64 (_daae )*0x1000000+uint64 (_agea )};var _debd =map[rulingKind ]string {_afac :"\u006e\u006f\u006e\u0065",_dbff :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_gddg :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};func (_fcab *textObject )nextLine (){_fcab .moveLP (0,-_fcab ._bgebc ._dage )};type gridTiling struct{_gdc .PdfRectangle ;_cgaf []float64 ;_aafd []float64 ;_deae map[float64 ]map[float64 ]gridTile ;};func (_defbc rectRuling )checkWidth (_egdda ,_ffeag float64 )(float64 ,bool ){_fcdf :=_ffeag -_egdda ;_daed :=_fcdf <=_bgcb ;return _fcdf ,_daed ;};
|
||
|
||
// String returns a string describing `tm`.
|
||
func (_cab TextMark )String ()string {_acd :=_cab .BBox ;var _bae string ;if _cab .Font !=nil {_bae =_cab .Font .String ();if len (_bae )> 50{_bae =_bae [:50]+"\u002e\u002e\u002e";};};var _aecd string ;if _cab .Meta {_aecd ="\u0020\u002a\u004d\u002a";};return _gd .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_cab .Offset ,_cab .Text ,[]rune (_cab .Text ),_acd .Llx ,_acd .Lly ,_acd .Urx ,_acd .Ury ,_bae ,_aecd );};func (_gaad *textMark )bbox ()_gdc .PdfRectangle {return _gaad .PdfRectangle };var _becg =map[markKind ]string {_gcbb :"\u0073\u0074\u0072\u006f\u006b\u0065",_cdcc :"\u0066\u0069\u006c\u006c",_abb :"\u0061u\u0067\u006d\u0065\u006e\u0074"};func (_aaf *shapesState )cubicTo (_cdfc ,_cdfg ,_bbdb ,_gebf ,_beg ,_feg float64 ){if _ebge {_adb .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a");};_aaf .addPoint (_beg ,_feg );};type textState struct{_ecbb float64 ;_gefe float64 ;_gacg float64 ;_dage float64 ;_dgc float64 ;_gefc RenderMode ;_daa float64 ;_gdbe *_gdc .PdfFont ;_bfaa _gdc .PdfRectangle ;_gfge int ;_dce int ;};func (_gfdg *textLine )text ()string {var _acgca []string ;for _ ,_fedg :=range _gfdg ._cfgd {if _fedg ._bcaaf {_acgca =append (_acgca ,"\u0020");};_acgca =append (_acgca ,_fedg ._adad );};return _ef .Join (_acgca ,"");};func (_ggff *Extractor )extractPageText (_cbf string ,_ebe *_gdc .PdfPageResources ,_gbg _gg .Matrix ,_fce int )(*PageText ,int ,int ,error ){_adb .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_fce );_gcg :=&PageText {_dacc :_ggff ._ea };_fgf :=_fafd (_ggff ._ea );_dac :=stateStack {&_fgf };_ca :=_bgfc (_ggff ,_ebe ,_aee .GraphicsState {},&_fgf ,&_dac );_cdbg :=shapesState {_gge :_gbg ,_fdb :_gg .IdentityMatrix (),_befd :_ca };var _fba bool ;if _fce > _eca {_aad :=_d .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");_adb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_fce ,_aad );return _gcg ,_fgf ._gfge ,_fgf ._dce ,_aad ;};_ebb :=_aee .NewContentStreamParser (_cbf );_bgf ,_cfe :=_ebb .Parse ();if _cfe !=nil {_adb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cfe );return _gcg ,_fgf ._gfge ,_fgf ._dce ,_cfe ;};_fa :=_aee .NewContentStreamProcessor (*_bgf );_fa .AddHandler (_aee .HandlerConditionEnumAllOperands ,"",func (_fcee *_aee .ContentStreamOperation ,_bgc _aee .GraphicsState ,_gac *_gdc .PdfPageResources )error {_cbe :=_fcee .Operand ;if _gfabd {_adb .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_fcee );};switch _cbe {case "\u0071":if _ebge {_adb .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_cdbg ._fdb );};_dac .push (&_fgf );case "\u0051":if !_dac .empty (){if len (_dac )>=2{_dac .pop ();};_fgf =*_dac .top ();};_cdbg ._fdb =_bgc .CTM ;if _ebge {_adb .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_cdbg ._fdb );};case "\u0042\u0054":if _fba {_adb .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");_gcg ._bdbb =append (_gcg ._bdbb ,_ca ._fbad ...);};_fba =true ;_bbc :=_bgc ;_bbc .CTM =_gbg .Mult (_bbc .CTM );_ca =_bgfc (_ggff ,_gac ,_bbc ,&_fgf ,&_dac );_cdbg ._befd =_ca ;case "\u0045\u0054":if !_fba {_adb .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");};_fba =false ;_gcg ._bdbb =append (_gcg ._bdbb ,_ca ._fbad ...);_ca .reset ();case "\u0054\u002a":_ca .nextLine ();case "\u0054\u0064":if _cge ,_cc :=_ca .checkOp (_fcee ,2,true );!_cge {_adb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cc );return _cc ;};_agb ,_dde ,_aff :=_cgdg (_fcee .Params );if _aff !=nil {return _aff ;};_ca .moveText (_agb ,_dde );case "\u0054\u0044":if _bf ,_ffb :=_ca .checkOp (_fcee ,2,true );!_bf {_adb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ffb );return _ffb ;};_ege ,_ddf ,_bbd :=_cgdg (_fcee .Params );if _bbd !=nil {_adb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bbd );return _bbd ;};_ca .moveTextSetLeading (_ege ,_ddf );case "\u0054\u006a":if _fag ,_bbga :=_ca .checkOp (_fcee ,1,true );!_fag {_adb .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_fcee ,_bbga );return _bbga ;};_fbd ,_ffg :=_dg .GetStringBytes (_fcee .Params [0]);if !_ffg {_adb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_fcee );return _dg .ErrTypeError ;};return _ca .showText (_fbd );case "\u0054\u004a":if _caa ,_dcc :=_ca .checkOp (_fcee ,1,true );!_caa {_adb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dcc );return _dcc ;};_ce ,_gfb :=_dg .GetArray (_fcee .Params [0]);if !_gfb {_adb .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_fcee );return _cfe ;};return _ca .showTextAdjusted (_ce );case "\u0027":if _efd ,_cee :=_ca .checkOp (_fcee ,1,true );!_efd {_adb .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cee );return _cee ;};_gfc ,_ecc :=_dg .GetStringBytes (_fcee .Params [0]);if !_ecc {_adb .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_fcee );return _dg .ErrTypeError ;};_ca .nextLine ();return _ca .showText (_gfc );case "\u0022":if _gaf ,_dea :=_ca .checkOp (_fcee ,3,true );!_gaf {_adb .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dea );return _dea ;};_bba ,_cag ,_dacd :=_cgdg (_fcee .Params [:2]);if _dacd !=nil {return _dacd ;};_df ,_bad :=_dg .GetStringBytes (_fcee .Params [2]);if !_bad {_adb .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_fcee );return _dg .ErrTypeError ;};_ca .setCharSpacing (_bba );_ca .setWordSpacing (_cag );_ca .nextLine ();return _ca .showText (_df );case "\u0054\u004c":_gbd ,_gefa :=_dbc (_fcee );if _gefa !=nil {_adb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gefa );return _gefa ;};_ca .setTextLeading (_gbd );case "\u0054\u0063":_egc ,_geab :=_dbc (_fcee );if _geab !=nil {_adb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_geab );return _geab ;};_ca .setCharSpacing (_egc );case "\u0054\u0066":if _ecd ,_aca :=_ca .checkOp (_fcee ,2,true );!_ecd {_adb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aca );return _aca ;};_fdeg ,_feee :=_dg .GetNameVal (_fcee .Params [0]);if !_feee {_adb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_fcee );return _dg .ErrTypeError ;};_acgcb ,_fge :=_dg .GetNumberAsFloat (_fcee .Params [1]);if !_feee {_adb .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fcee ,_fge );return _fge ;};_fge =_ca .setFont (_fdeg ,_acgcb );_ca ._bdf =_ge .Is (_fge ,_dg .ErrNotSupported );if _fge !=nil &&!_ca ._bdf {return _fge ;};case "\u0054\u006d":if _edd ,_fca :=_ca .checkOp (_fcee ,6,true );!_edd {_adb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fca );return _fca ;};_ebee ,_ecg :=_dg .GetNumbersAsFloat (_fcee .Params );if _ecg !=nil {_adb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ecg );return _ecg ;};_ca .setTextMatrix (_ebee );case "\u0054\u0072":if _gfa ,_efg :=_ca .checkOp (_fcee ,1,true );!_gfa {_adb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_efg );return _efg ;};_cba ,_gdcf :=_dg .GetIntVal (_fcee .Params [0]);if !_gdcf {_adb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_fcee );return _dg .ErrTypeError ;};_ca .setTextRenderMode (_cba );case "\u0054\u0073":if _cca ,_deg :=_ca .checkOp (_fcee ,1,true );!_cca {_adb .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_deg );return _deg ;};_bca ,_ecde :=_dg .GetNumberAsFloat (_fcee .Params [0]);if _ecde !=nil {_adb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ecde );return _ecde ;};_ca .setTextRise (_bca );case "\u0054\u0077":if _gdb ,_eaf :=_ca .checkOp (_fcee ,1,true );!_gdb {_adb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_eaf );return _eaf ;};_dga ,_ffa :=_dg .GetNumberAsFloat (_fcee .Params [0]);if _ffa !=nil {_adb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ffa );return _ffa ;};_ca .setWordSpacing (_dga );case "\u0054\u007a":if _fef ,_cgb :=_ca .checkOp (_fcee ,1,true );!_fef {_adb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cgb );return _cgb ;};_dbd ,_dfb :=_dg .GetNumberAsFloat (_fcee .Params [0]);if _dfb !=nil {_adb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dfb );return _dfb ;};_ca .setHorizScaling (_dbd );case "\u0063\u006d":_cdbg ._fdb =_bgc .CTM ;if _cdbg ._fdb .Singular (){_gbe :=_gg .IdentityMatrix ().Translate (_cdbg ._fdb .Translation ());_adb .Log .Debug ("S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s",_cdbg ._fdb ,_gbe );_cdbg ._fdb =_gbe ;};if _ebge {_adb .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_cdbg ._fdb );};case "\u006d":if len (_fcee .Params )!=2{_adb .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_fe );return nil ;};_eec ,_dge :=_dg .GetNumbersAsFloat (_fcee .Params );if _dge !=nil {return _dge ;};_adb .Log .Debug ("\u004d\u006f\u0076\u0065\u0020\u0074\u006f\u003a\u0020\u0025\u002e\u0032\u0066",_eec );_cdbg .moveTo (_eec [0],_eec [1]);case "\u006c":if len (_fcee .Params )!=2{_adb .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_fe );return nil ;};_def ,_gfcb :=_dg .GetNumbersAsFloat (_fcee .Params );if _gfcb !=nil {return _gfcb ;};_cdbg .lineTo (_def [0],_def [1]);case "\u0063":if len (_fcee .Params )!=6{return _fe ;};_abd ,_bff :=_dg .GetNumbersAsFloat (_fcee .Params );if _bff !=nil {return _bff ;};_adb .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_abd );_cdbg .cubicTo (_abd [0],_abd [1],_abd [2],_abd [3],_abd [4],_abd [5]);case "\u0076","\u0079":if len (_fcee .Params )!=4{return _fe ;};_bdc ,_bgde :=_dg .GetNumbersAsFloat (_fcee .Params );if _bgde !=nil {return _bgde ;};_adb .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_bdc );_cdbg .quadraticTo (_bdc [0],_bdc [1],_bdc [2],_bdc [3]);case "\u0068":_cdbg .closePath ();case "\u0072\u0065":if len (_fcee .Params )!=4{return _fe ;};_gegc ,_fbf :=_dg .GetNumbersAsFloat (_fcee .Params );if _fbf !=nil {return _fbf ;};_cdbg .drawRectangle (_gegc [0],_gegc [1],_gegc [2],_gegc [3]);_cdbg .closePath ();case "\u0053":_cdbg .stroke (&_gcg ._agcc );_cdbg .clearPath ();case "\u0073":_cdbg .closePath ();_cdbg .stroke (&_gcg ._agcc );_cdbg .clearPath ();case "\u0046":_cdbg .fill (&_gcg ._dbge );_cdbg .clearPath ();case "\u0066","\u0066\u002a":_cdbg .closePath ();_cdbg .fill (&_gcg ._dbge );_cdbg .clearPath ();case "\u0042","\u0042\u002a":_cdbg .fill (&_gcg ._dbge );_cdbg .stroke (&_gcg ._agcc );_cdbg .clearPath ();case "\u0062","\u0062\u002a":_cdbg .closePath ();_cdbg .fill (&_gcg ._dbge );_cdbg .stroke (&_gcg ._agcc );_cdbg .clearPath ();case "\u006e":_cdbg .clearPath ();case "\u0044\u006f":if len (_fcee .Params )==0{_adb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_fcee .Params );return _dg .ErrRangeError ;};_fgc ,_gebg :=_dg .GetName (_fcee .Params [0]);if !_gebg {_adb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_fcee .Params [0]);return _dg .ErrTypeError ;};_ ,_bgb :=_gac .GetXObjectByName (*_fgc );if _bgb !=_gdc .XObjectTypeForm {break ;};_caac ,_gebg :=_ggff ._gea [_fgc .String ()];if !_gebg {_gca ,_aea :=_gac .GetXObjectFormByName (*_fgc );if _aea !=nil {_adb .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_aea );return _aea ;};_efgg ,_aea :=_gca .GetContentStream ();if _aea !=nil {_adb .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_aea );return _aea ;};_fcc :=_gca .Resources ;if _fcc ==nil {_fcc =_gac ;};_afa ,_dag ,_ccd ,_aea :=_ggff .extractPageText (string (_efgg ),_fcc ,_gbg .Mult (_bgc .CTM ),_fce +1);if _aea !=nil {_adb .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_aea );return _aea ;};_caac =textResult {*_afa ,_dag ,_ccd };_ggff ._gea [_fgc .String ()]=_caac ;};_cdbg ._fdb =_bgc .CTM ;if _ebge {_adb .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_cdbg ._fdb );};_gcg ._bdbb =append (_gcg ._bdbb ,_caac ._fgce ._bdbb ...);_gcg ._agcc =append (_gcg ._agcc ,_caac ._fgce ._agcc ...);_gcg ._dbge =append (_gcg ._dbge ,_caac ._fgce ._dbge ...);_fgf ._gfge +=_caac ._efc ;_fgf ._dce +=_caac ._deb ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_ca ._dca .ColorspaceNonStroking =_bgc .ColorspaceNonStroking ;_ca ._dca .ColorNonStroking =_bgc .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_ca ._dca .ColorspaceStroking =_bgc .ColorspaceStroking ;_ca ._dca .ColorStroking =_bgc .ColorStroking ;};return nil ;});_cfe =_fa .Process (_ebe );return _gcg ,_fgf ._gfge ,_fgf ._dce ,_cfe ;};func (_bdaa *textPara )text ()string {_egcbf :=new (_da .Buffer );_bdaa .writeText (_egcbf );return _egcbf .String ();};func (_ccb *subpath )last ()_gg .Point {return _ccb ._dee [len (_ccb ._dee )-1]};func (_cbab *stateStack )top ()*textState {if _cbab .empty (){return nil ;};return (*_cbab )[_cbab .size ()-1];};func _cfdca (_fefcb []*textWord ,_gadda *textWord )[]*textWord {for _bfgga ,_dbdfa :=range _fefcb {if _dbdfa ==_gadda {return _afffg (_fefcb ,_bfgga );};};_adb .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_gadda );return nil ;};func (_aggd *textLine )pullWord (_dgda *wordBag ,_ffge *textWord ,_bacb int ){_aggd .appendWord (_ffge );_dgda .removeWord (_ffge ,_bacb );};func _bdgf (_gbc func (*wordBag ,*textWord ,float64 )bool ,_cgab float64 )func (*wordBag ,*textWord )bool {return func (_fcde *wordBag ,_gcab *textWord )bool {return _gbc (_fcde ,_gcab ,_cgab )};};func (_bgddc paraList )tables ()[]TextTable {var _dcbf []TextTable ;if _dfad {_adb .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a");};for _ ,_gcgb :=range _bgddc {_bfaae :=_gcgb ._cabb ;if _bfaae !=nil &&_bfaae .isExportable (){_dcbf =append (_dcbf ,_bfaae .toTextTable ());};};return _dcbf ;};type textTable struct{_gdc .PdfRectangle ;_ffbe ,_egbe int ;_fgdd bool ;_gged map[uint64 ]*textPara ;_fgcc map[uint64 ]compositeCell ;};func (_cecb *textMark )inDiacriticArea (_fbcf *textMark )bool {_ggeb :=_cecb .Llx -_fbcf .Llx ;_afcdd :=_cecb .Urx -_fbcf .Urx ;_egac :=_cecb .Lly -_fbcf .Lly ;return _g .Abs (_ggeb +_afcdd )< _cecb .Width ()*_fbbg &&_g .Abs (_egac )< _cecb .Height ()*_fbbg ;};func (_fea *imageExtractContext )extractXObjectImage (_gf *_dg .PdfObjectName ,_fg _aee .GraphicsState ,_cga *_gdc .PdfPageResources )error {_bga ,_ :=_cga .GetXObjectByName (*_gf );if _bga ==nil {return nil ;};_fbg ,_ac :=_fea ._ed [_bga ];if !_ac {_dbg ,_fdg :=_cga .GetXObjectImageByName (*_gf );if _fdg !=nil {return _fdg ;};if _dbg ==nil {return nil ;};_cdf ,_fdg :=_dbg .ToImage ();if _fdg !=nil {return _fdg ;};_fbg =&cachedImage {_fed :_cdf ,_agf :_dbg .ColorSpace };_fea ._ed [_bga ]=_fbg ;};_ba :=_fbg ._fed ;_ab :=_fbg ._agf ;_geag ,_ggf :=_ab .ImageToRGB (*_ba );if _ggf !=nil {return _ggf ;};_adb .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_fg .CTM .String ());_geb :=ImageMark {Image :&_geag ,Width :_fg .CTM .ScalingFactorX (),Height :_fg .CTM .ScalingFactorY (),Angle :_fg .CTM .Angle ()};_geb .X ,_geb .Y =_fg .CTM .Translation ();_fea ._bg =append (_fea ._bg ,_geb );_fea ._eag ++;return nil ;};func _fefec (_acbe ,_gdgdg float64 )bool {return _acbe /_g .Max (_efbg ,_gdgdg )< _gffc };func (_febf *wordBag )depthRange (_bdeb ,_ecgb int )[]int {var _cggf []int ;for _cafg :=range _febf ._fdebe {if _bdeb <=_cafg &&_cafg <=_ecgb {_cggf =append (_cggf ,_cafg );};};if len (_cggf )==0{return nil ;};_ad .Ints (_cggf );return _cggf ;};func (_gcb *textObject )getFont (_gcd string )(*_gdc .PdfFont ,error ){if _gcb ._dbda ._bc !=nil {_gcb ._dbda ._ggc ++;_dcde ,_egddb :=_gcb ._dbda ._bc [_gcd ];if _egddb {_dcde ._eee =_gcb ._dbda ._ggc ;return _dcde ._gbgc ,nil ;};};_gcdf ,_ecf :=_gcb .getFontDirect (_gcd );if _ecf !=nil {return nil ,_ecf ;};if _gcb ._dbda ._bc !=nil {_bea :=fontEntry {_gcdf ,_gcb ._dbda ._ggc };if len (_gcb ._dbda ._bc )>=_bdff {var _ggce []string ;for _dcb :=range _gcb ._dbda ._bc {_ggce =append (_ggce ,_dcb );};_ad .Slice (_ggce ,func (_gdd ,_fbga int )bool {return _gcb ._dbda ._bc [_ggce [_gdd ]]._eee < _gcb ._dbda ._bc [_ggce [_fbga ]]._eee ;});delete (_gcb ._dbda ._bc ,_ggce [0]);};_gcb ._dbda ._bc [_gcd ]=_bea ;};return _gcdf ,nil ;};func _eebg (_efbb ,_adfa _gdc .PdfRectangle )(_gdc .PdfRectangle ,bool ){if !_gecb (_efbb ,_adfa ){return _gdc .PdfRectangle {},false ;};return _gdc .PdfRectangle {Llx :_g .Max (_efbb .Llx ,_adfa .Llx ),Urx :_g .Min (_efbb .Urx ,_adfa .Urx ),Lly :_g .Max (_efbb .Lly ,_adfa .Lly ),Ury :_g .Min (_efbb .Ury ,_adfa .Ury )},true ;};func (_becb *subpath )clear (){*_becb =subpath {}};func (_dabc paraList )findTextTables ()[]*textTable {var _ccggg []*textTable ;for _ ,_ebae :=range _dabc {if _ebae .taken ()||_ebae .Width ()==0{continue ;};_eaff :=_ebae .isAtom ();if _eaff ==nil {continue ;};_eaff .growTable ();if _eaff ._ffbe *_eaff ._egbe < _ebac {continue ;};_eaff .markCells ();_eaff .log ("\u0067\u0072\u006fw\u006e");_ccggg =append (_ccggg ,_eaff );};return _ccggg ;};func (_dagd *textPara )writeText (_fcfc _b .Writer ){if _dagd ._cabb ==nil {_dagd .writeCellText (_fcfc );return ;};for _efe :=0;_efe < _dagd ._cabb ._egbe ;_efe ++{for _dfc :=0;_dfc < _dagd ._cabb ._ffbe ;_dfc ++{_egef :=_dagd ._cabb .get (_dfc ,_efe );if _egef ==nil {_fcfc .Write ([]byte ("\u0009"));}else {_egef .writeCellText (_fcfc );};_fcfc .Write ([]byte ("\u0020"));};if _efe < _dagd ._cabb ._egbe -1{_fcfc .Write ([]byte ("\u000a"));};};};func _eeda (_fcfcad _gdc .PdfColorspace ,_cdbdg _gdc .PdfColor )_dc .Color {if _fcfcad ==nil ||_cdbdg ==nil {return _dc .Black ;};_gbbfb ,_gdaa :=_fcfcad .ColorToRGB (_cdbdg );if _gdaa !=nil {_adb .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_cdbdg ,_fcfcad ,_gdaa );return _dc .Black ;};_ffgg ,_gdbea :=_gbbfb .(*_gdc .PdfColorDeviceRGB );if !_gdbea {_adb .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_gbbfb );return _dc .Black ;};return _dc .NRGBA {R :uint8 (_ffgg .R ()*255),G :uint8 (_ffgg .G ()*255),B :uint8 (_ffgg .B ()*255),A :uint8 (255)};};func (_eeab *wordBag )maxDepth ()float64 {return _eeab ._bfg -_eeab .Lly };func (_acbc *textObject )newTextMark (_fgff string ,_adff _gg .Matrix ,_gggb _gg .Point ,_cfeb float64 ,_afb *_gdc .PdfFont ,_aedd float64 ,_gffeg ,_dbcf _dc .Color )(textMark ,bool ){_aegb :=_adff .Angle ();_acegf :=_dgeg (_aegb ,_fbaf );var _acdg float64 ;if _acegf %180!=90{_acdg =_adff .ScalingFactorY ();}else {_acdg =_adff .ScalingFactorX ();};_ecaca :=_ggg (_adff );_caea :=_gdc .PdfRectangle {Llx :_ecaca .X ,Lly :_ecaca .Y ,Urx :_gggb .X ,Ury :_gggb .Y };switch _acegf %360{case 90:_caea .Urx -=_acdg ;case 180:_caea .Ury -=_acdg ;case 270:_caea .Urx +=_acdg ;case 0:_caea .Ury +=_acdg ;default:_acegf =0;_caea .Ury +=_acdg ;};if _caea .Llx > _caea .Urx {_caea .Llx ,_caea .Urx =_caea .Urx ,_caea .Llx ;};if _caea .Lly > _caea .Ury {_caea .Lly ,_caea .Ury =_caea .Ury ,_caea .Lly ;};_ggfgd ,_bfgf :=_eebg (_caea ,_acbc ._dbda ._ea );if !_bfgf {_adb .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_caea ,_acbc ._dbda ._ea ,_fgff );};_caea =_ggfgd ;_bcefe :=_caea ;_gbae :=_acbc ._dbda ._ea ;switch _acegf %360{case 90:_gbae .Urx ,_gbae .Ury =_gbae .Ury ,_gbae .Urx ;_bcefe =_gdc .PdfRectangle {Llx :_gbae .Urx -_caea .Ury ,Urx :_gbae .Urx -_caea .Lly ,Lly :_caea .Llx ,Ury :_caea .Urx };case 180:_bcefe =_gdc .PdfRectangle {Llx :_gbae .Urx -_caea .Llx ,Urx :_gbae .Urx -_caea .Urx ,Lly :_gbae .Ury -_caea .Lly ,Ury :_gbae .Ury -_caea .Ury };case 270:_gbae .Urx ,_gbae .Ury =_gbae .Ury ,_gbae .Urx ;_bcefe =_gdc .PdfRectangle {Llx :_caea .Ury ,Urx :_caea .Lly ,Lly :_gbae .Ury -_caea .Llx ,Ury :_gbae .Ury -_caea .Urx };};if _bcefe .Llx > _bcefe .Urx {_bcefe .Llx ,_bcefe .Urx =_bcefe .Urx ,_bcefe .Llx ;};if _bcefe .Lly > _bcefe .Ury {_bcefe .Lly ,_bcefe .Ury =_bcefe .Ury ,_bcefe .Lly ;};_bbfe :=textMark {_bdgg :_fgff ,PdfRectangle :_bcefe ,_fgad :_caea ,_agee :_afb ,_fcdad :_acdg ,_bfba :_aedd ,_eccg :_adff ,_aaee :_gggb ,_dfaf :_acegf ,_eggc :_gffeg ,_bgfb :_dbcf };if _aagc {_adb .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_ecaca ,_gggb ,_bbfe .String ());};return _bbfe ,_bfgf ;};func (_gfcag *textTable )logComposite (_bbee string ){if !_dfad {return ;};_adb .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_gfcag ._ffbe ,_gfcag ._egbe ,_bbee );_gd .Printf ("\u0025\u0035\u0073 \u007c","");for _gcgaa :=0;_gcgaa < _gfcag ._ffbe ;_gcgaa ++{_gd .Printf ("\u0025\u0033\u0064 \u007c",_gcgaa );};_gd .Println ("");_gd .Printf ("\u0025\u0035\u0073 \u002b","");for _egbc :=0;_egbc < _gfcag ._ffbe ;_egbc ++{_gd .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d");};_gd .Println ("");for _deed :=0;_deed < _gfcag ._egbe ;_deed ++{_gd .Printf ("\u0025\u0035\u0064 \u007c",_deed );for _ebgbg :=0;_ebgbg < _gfcag ._ffbe ;_ebgbg ++{_bgcd ,_ :=_gfcag ._fgcc [_bgfea (_ebgbg ,_deed )].parasBBox ();_gd .Printf ("\u0025\u0033\u0064 \u007c",len (_bgcd ));};_gd .Println ("");};_adb .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_gfcag ._ffbe ,_gfcag ._egbe ,_bbee );_gd .Printf ("\u0025\u0035\u0073 \u007c","");for _gbde :=0;_gbde < _gfcag ._ffbe ;_gbde ++{_gd .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_gbde );};_gd .Println ("");_gd .Printf ("\u0025\u0035\u0073 \u002b","");for _eaea :=0;_eaea < _gfcag ._ffbe ;_eaea ++{_gd .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");};_gd .Println ("");for _eggbd :=0;_eggbd < _gfcag ._egbe ;_eggbd ++{_gd .Printf ("\u0025\u0035\u0064 \u007c",_eggbd );for _fcfb :=0;_fcfb < _gfcag ._ffbe ;_fcfb ++{_cbadag ,_ :=_gfcag ._fgcc [_bgfea (_fcfb ,_eggbd )].parasBBox ();_efbc :="";_ecfb :=_cbadag .merge ();if _ecfb !=nil {_efbc =_ecfb .text ();};_efbc =_gd .Sprintf ("\u0025\u0071",_fgde (_efbc ,12));_efbc =_efbc [1:len (_efbc )-1];_gd .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_efbc );};_gd .Println ("");};};func _affd (_fedf []*textMark ,_feea _gdc .PdfRectangle ,_abad rulingList ,_fbeb []gridTiling )paraList {_adb .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_fedf ),_feea );if len (_fedf )==0{return nil ;};_cgeg :=_fgae (_fedf ,_feea );if len (_cgeg )==0{return nil ;};_abad .log ("\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065");_fgab ,_ggda :=_abad .vertsHorzs ();_eff :=_egag (_cgeg ,_feea .Ury ,_fgab ,_ggda );_decc :=_fcba (_eff ,_feea .Ury ,_fgab ,_ggda );_decc =_ebea (_decc );_agdg :=make (paraList ,0,len (_decc ));for _ ,_bedb :=range _decc {_bfbf :=_bedb .arrangeText ();if _bfbf !=nil {_agdg =append (_agdg ,_bfbf );};};if len (_agdg )>=_ebac {_agdg =_agdg .extractTables (_fbeb );};_agdg .sortReadingOrder ();_agdg .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _agdg ;};func (_adfag lineRuling )yMean ()float64 {return 0.5*(_adfag ._dgfe .Y +_adfag ._aaec .Y )};func (_beae *shapesState )establishSubpath ()*subpath {_fgb ,_fbfd :=_beae .lastpointEstablished ();if !_fbfd {_beae ._bcce =append (_beae ._bcce ,_cedg (_fgb ));};if len (_beae ._bcce )==0{return nil ;};_beae ._bbb =false ;return _beae ._bcce [len (_beae ._bcce )-1];};
|
||
|
||
// String returns a human readable description of `s`.
|
||
func (_eggdg intSet )String ()string {var _ffcff []int ;for _dbcbf :=range _eggdg {if _eggdg .has (_dbcbf ){_ffcff =append (_ffcff ,_dbcbf );};};_ad .Ints (_ffcff );return _gd .Sprintf ("\u0025\u002b\u0076",_ffcff );};type stateStack []*textState ;func (_dacab paraList )computeEBBoxes (){if _cac {_adb .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a");};for _ ,_bcddb :=range _dacab {_bcddb ._ffab =_bcddb .PdfRectangle ;};_caead :=_dacab .yNeighbours (0);for _gdge ,_dagcc :=range _dacab {_abcg :=_dagcc ._ffab ;_cgbad ,_eead :=-1.0e9,+1.0e9;for _ ,_gbed :=range _caead [_dagcc ]{_edcdd :=_dacab [_gbed ]._ffab ;if _edcdd .Urx < _abcg .Llx {_cgbad =_g .Max (_cgbad ,_edcdd .Urx );}else if _abcg .Urx < _edcdd .Llx {_eead =_g .Min (_eead ,_edcdd .Llx );};};for _baag ,_ffdg :=range _dacab {_feaec :=_ffdg ._ffab ;if _gdge ==_baag ||_feaec .Ury > _abcg .Lly {continue ;};if _cgbad <=_feaec .Llx &&_feaec .Llx < _abcg .Llx {_abcg .Llx =_feaec .Llx ;}else if _feaec .Urx <=_eead &&_abcg .Urx < _feaec .Urx {_abcg .Urx =_feaec .Urx ;};};if _cac {_gd .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_gdge ,_dagcc ._ffab ,_abcg ,_fgde (_dagcc .text (),50));};_dagcc ._ffab =_abcg ;};if _bfdae {for _ ,_abag :=range _dacab {_abag .PdfRectangle =_abag ._ffab ;};};};func _adedg (_bbaca []TextMark ,_ggaf *int )[]TextMark {_bdgb :=_bbaca [len (_bbaca )-1];_begb :=[]rune (_bdgb .Text );if len (_begb )==1{_bbaca =_bbaca [:len (_bbaca )-1];_acc :=_bbaca [len (_bbaca )-1];*_ggaf =_acc .Offset +len (_acc .Text );}else {_cgfe :=_ggca (_bdgb .Text );*_ggaf +=len (_cgfe )-len (_bdgb .Text );_bdgb .Text =_cgfe ;};return _bbaca ;};func (_ccfdf paraList )inTile (_gagf gridTile )paraList {var _cfdc paraList ;for _ ,_gfcgd :=range _ccfdf {if _gagf .contains (_gfcgd .PdfRectangle ){_cfdc =append (_cfdc ,_gfcgd );};};if _dfad {_gd .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_gagf ,len (_cfdc ));for _gcgad ,_faccf :=range _cfdc {_gd .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gcgad ,_faccf );};_gd .Println ("");};return _cfdc ;};func (_bfgb rulingList )primMinMax ()(float64 ,float64 ){_abgc ,_gagc :=_bfgb [0]._bfc ,_bfgb [0]._bfc ;for _ ,_dgbb :=range _bfgb [1:]{if _dgbb ._bfc < _abgc {_abgc =_dgbb ._bfc ;}else if _dgbb ._bfc > _gagc {_gagc =_dgbb ._bfc ;};};return _abgc ,_gagc ;};func (_accb *textTable )compositeColCorridors ()map[int ][]float64 {_cbadb :=make (map[int ][]float64 ,_accb ._ffbe );if _dfad {_adb .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_accb ._ffbe );};for _aecgc :=0;_aecgc < _accb ._ffbe ;_aecgc ++{_cbadb [_aecgc ]=nil ;};return _cbadb ;};func (_egaf *textTable )toTextTable ()TextTable {if _dfad {_adb .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_egaf ._ffbe ,_egaf ._egbe );};_eeea :=make ([][]TableCell ,_egaf ._egbe );for _dgfd :=0;_dgfd < _egaf ._egbe ;_dgfd ++{_eeea [_dgfd ]=make ([]TableCell ,_egaf ._ffbe );for _ccffd :=0;_ccffd < _egaf ._ffbe ;_ccffd ++{_ebfg :=_egaf .get (_ccffd ,_dgfd );if _ebfg ==nil {continue ;};if _dfad {_gd .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_ccffd ,_dgfd ,_ebfg );};_eeea [_dgfd ][_ccffd ].Text =_ebfg .text ();_abbb :=0;_eeea [_dgfd ][_ccffd ].Marks ._fbbe =_ebfg .toTextMarks (&_abbb );};};return TextTable {W :_egaf ._ffbe ,H :_egaf ._egbe ,Cells :_eeea };};func (_acag *stateStack )empty ()bool {return len (*_acag )==0};func (_afba *textWord )addDiacritic (_cdcg string ){_bdeed :=_afba ._bbbafa [len (_afba ._bbbafa )-1];_bdeed ._bdgg +=_cdcg ;_bdeed ._bdgg =_cg .NFKC .String (_bdeed ._bdgg );};func (_gcga *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_degac :=make (map[int ]map[*textWord ]struct{},len (_gcga ._fdebe ));for _cagg :=range _gcga ._fdebe {_degac [_cagg ]=make (map[*textWord ]struct{});};return _degac ;};
|
||
|
||
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
|
||
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
|
||
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
|
||
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
|
||
type RenderMode int ;func (_ggae compositeCell )split (_eegb ,_cfgb []float64 )*textTable {_afbe :=len (_eegb )+1;_cdea :=len (_cfgb )+1;if _dfad {_adb .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066",_cdea ,_afbe ,_ggae ,_eegb ,_cfgb );_gd .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a",len (_ggae .paraList ));for _cbdc ,_fcfa :=range _ggae .paraList {_gd .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cbdc ,_fcfa .String ());};_gd .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",len (_ggae .lines ()));for _bcec ,_fbbab :=range _ggae .lines (){_gd .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bcec ,_fbbab );};};_eegb =_eadb (_eegb ,_ggae .Ury ,_ggae .Lly );_cfgb =_eadb (_cfgb ,_ggae .Llx ,_ggae .Urx );_dgf :=make (map[uint64 ]*textPara ,_cdea *_afbe );_gdfe :=textTable {_ffbe :_cdea ,_egbe :_afbe ,_gged :_dgf };_fcgc :=_ggae .paraList ;_ad .Slice (_fcgc ,func (_fffe ,_gccd int )bool {_gfec ,_ddge :=_fcgc [_fffe ],_fcgc [_gccd ];_ebda ,_dae :=_gfec .Lly ,_ddge .Lly ;if _ebda !=_dae {return _ebda < _dae ;};return _gfec .Llx < _ddge .Llx ;});_cggff :=make (map[uint64 ]_gdc .PdfRectangle ,_cdea *_afbe );for _bffb ,_cdac :=range _eegb [1:]{_bbad :=_eegb [_bffb ];for _dccc ,_efag :=range _cfgb [1:]{_ebdf :=_cfgb [_dccc ];_cggff [_bgfea (_dccc ,_bffb )]=_gdc .PdfRectangle {Llx :_ebdf ,Urx :_efag ,Lly :_cdac ,Ury :_bbad };};};if _dfad {_adb .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073");_gd .Printf ("\u0020\u0020\u0020\u0020");for _fgfb :=0;_fgfb < _cdea ;_fgfb ++{_gd .Printf ("\u0025\u0033\u0030\u0064\u002c\u0020",_fgfb );};_gd .Println ();for _eacg :=0;_eacg < _afbe ;_eacg ++{_gd .Printf ("\u0020\u0020\u0025\u0032\u0064\u003a",_eacg );for _daef :=0;_daef < _cdea ;_daef ++{_gd .Printf ("\u00256\u002e\u0032\u0066\u002c\u0020",_cggff [_bgfea (_daef ,_eacg )]);};_gd .Println ();};};_gfag :=func (_gdfg *textLine )(int ,int ){for _cadf :=0;_cadf < _afbe ;_cadf ++{for _agbg :=0;_agbg < _cdea ;_agbg ++{if _fgeff (_cggff [_bgfea (_agbg ,_cadf )],_gdfg .PdfRectangle ){return _agbg ,_cadf ;};};};return -1,-1;};_dbbb :=make (map[uint64 ][]*textLine ,_cdea *_afbe );for _ ,_ffea :=range _fcgc .lines (){_cbaa ,_daf :=_gfag (_ffea );if _cbaa < 0{continue ;};_dbbb [_bgfea (_cbaa ,_daf )]=append (_dbbb [_bgfea (_cbaa ,_daf )],_ffea );};for _afbd :=0;_afbd < len (_eegb )-1;_afbd ++{_fdfg :=_eegb [_afbd ];_bcecf :=_eegb [_afbd +1];for _fcfe :=0;_fcfe < len (_cfgb )-1;_fcfe ++{_bcebf :=_cfgb [_fcfe ];_decdg :=_cfgb [_fcfe +1];_fcdag :=_gdc .PdfRectangle {Llx :_bcebf ,Urx :_decdg ,Lly :_bcecf ,Ury :_fdfg };_accf :=_dbbb [_bgfea (_fcfe ,_afbd )];if len (_accf )==0{continue ;};_efcgf :=_gccb (_fcdag ,_accf );_gdfe .put (_fcfe ,_afbd ,_efcgf );};};return &_gdfe ;};const _bdff =10;func _cgdg (_bbdab []_dg .PdfObject )(_dfbab ,_dfdb float64 ,_cabef error ){if len (_bbdab )!=2{return 0,0,_gd .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_bbdab ));};_ebbec ,_cabef :=_dg .GetNumbersAsFloat (_bbdab );if _cabef !=nil {return 0,0,_cabef ;};return _ebbec [0],_ebbec [1],nil ;};type textMark struct{_gdc .PdfRectangle ;_dfaf int ;_bdgg string ;_debf string ;_agee *_gdc .PdfFont ;_fcdad float64 ;_bfba float64 ;_eccg _gg .Matrix ;_aaee _gg .Point ;_fgad _gdc .PdfRectangle ;_eggc _dc .Color ;_bgfb _dc .Color ;};func (_gad *imageExtractContext )processOperand (_af *_aee .ContentStreamOperation ,_eg _aee .GraphicsState ,_eaga *_gdc .PdfPageResources )error {if _af .Operand =="\u0042\u0049"&&len (_af .Params )==1{_bgd ,_geg :=_af .Params [0].(*_aee .ContentStreamInlineImage );if !_geg {return nil ;};if _ee ,_eaa :=_dg .GetBoolVal (_bgd .ImageMask );_eaa {if _ee &&!_gad ._efb .IncludeInlineStencilMasks {return nil ;};};return _gad .extractInlineImage (_bgd ,_eg ,_eaga );}else if _af .Operand =="\u0044\u006f"&&len (_af .Params )==1{_bgdd ,_be :=_dg .GetName (_af .Params [0]);if !_be {_adb .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _eb ;};_ ,_eda :=_eaga .GetXObjectByName (*_bgdd );switch _eda {case _gdc .XObjectTypeImage :return _gad .extractXObjectImage (_bgdd ,_eg ,_eaga );case _gdc .XObjectTypeForm :return _gad .extractFormImages (_bgdd ,_eg ,_eaga );};};return nil ;};
|
||
|
||
// String returns a description of `k`.
|
||
func (_gcef rulingKind )String ()string {_afccb ,_gdca :=_debd [_gcef ];if !_gdca {return _gd .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_gcef );};return _afccb ;};func (_bggb lineRuling )xMean ()float64 {return 0.5*(_bggb ._dgfe .X +_bggb ._aaec .X )};func _daeb (_cbca _gdc .PdfRectangle )*ruling {return &ruling {_fafed :_dbff ,_bfc :_cbca .Lly ,_edebg :_cbca .Llx ,_dbfe :_cbca .Urx };};func (_gae *wordBag )firstWord (_fgd int )*textWord {return _gae ._fdebe [_fgd ][0]};func _fafc (_edcd _gg .Point )_gg .Matrix {return _gg .TranslationMatrix (_edcd .X ,_edcd .Y )};func (_ebec paraList )extractTables (_gadae []gridTiling )paraList {if _dfad {_adb .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_ebec ));};if len (_ebec )< _ebac {return _ebec ;};_dbcb :=_ebec .findTables (_gadae );if _dfad {_adb .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_dbcb ));for _cgfb ,_ecegd :=range _dbcb {_ecegd .log (_gd .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_cgfb ));};};return _ebec .applyTables (_dbcb );};func (_bgfbb gridTiling )complete ()bool {for _ ,_geaga :=range _bgfbb ._deae {for _ ,_bade :=range _geaga {if !_bade .complete (){return false ;};};};return true ;};var _bdbd =_dd .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024");func (_feb *subpath )add (_cbee ..._gg .Point ){_feb ._dee =append (_feb ._dee ,_cbee ...)};func (_dbbfga rulingList )sortStrict (){_ad .Slice (_dbbfga ,func (_aecg ,_fagg int )bool {_bbed ,_fagb :=_dbbfga [_aecg ],_dbbfga [_fagg ];_fdef ,_adea :=_bbed ._fafed ,_fagb ._fafed ;if _fdef !=_adea {return _fdef > _adea ;};_eabg ,_bfdb :=_bbed ._bfc ,_fagb ._bfc ;if !_gbee (_eabg -_bfdb ){return _eabg < _bfdb ;};_eabg ,_bfdb =_bbed ._edebg ,_fagb ._edebg ;if _eabg !=_bfdb {return _eabg < _bfdb ;};return _bbed ._dbfe < _fagb ._dbfe ;});};func (_ecdg paraList )toTextMarks ()[]TextMark {_febfd :=0;var _deded []TextMark ;for _cgac ,_edfc :=range _ecdg {if _edfc ._bgcce {continue ;};_bdfgb :=_edfc .toTextMarks (&_febfd );_deded =append (_deded ,_bdfgb ...);if _cgac !=len (_ecdg )-1{if _gbggf (_edfc ,_ecdg [_cgac +1]){_deded =_daga (_deded ,&_febfd ,"\u0020");}else {_deded =_daga (_deded ,&_febfd ,"\u000a");_deded =_daga (_deded ,&_febfd ,"\u000a");};};};_deded =_daga (_deded ,&_febfd ,"\u000a");_deded =_daga (_deded ,&_febfd ,"\u000a");return _deded ;};func (_edfa *wordBag )minDepth ()float64 {return _edfa ._bfg -(_edfa .Ury -_edfa ._ecba )};func (_bbag *textObject )setWordSpacing (_gega float64 ){if _bbag ==nil {return ;};_bbag ._bgebc ._gefe =_gega ;};func _cabeb (_dcegf map[int ][]float64 ){if len (_dcegf )<=1{return ;};_ffcag :=_cbag (_dcegf );if _dfad {_adb .Log .Info ("\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076",_ffcag );};var _ecded ,_feabg int ;for _ecded ,_feabg =range _ffcag {if _dcegf [_feabg ]!=nil {break ;};};for _ggfaf ,_ebegf :=range _ffcag [_ecded :]{_bggf :=_dcegf [_ebegf ];if _bggf ==nil {continue ;};if _dfad {_gd .Printf ("\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a",_ecded +_ggfaf ,_feabg ,_ebegf );};_dfda :=_dcegf [_ebegf ];if _dfda [len (_dfda )-1]> _bggf [0]{_dfda [len (_dfda )-1]=_bggf [0];_dcegf [_feabg ]=_dfda ;};_feabg =_ebegf ;};};func (_beeba *textLine )toTextMarks (_cagb *int )[]TextMark {var _eacf []TextMark ;for _ ,_aagb :=range _beeba ._cfgd {if _aagb ._bcaaf {_eacf =_daga (_eacf ,_cagb ,"\u0020");};_ffe :=_aagb .toTextMarks (_cagb );_eacf =append (_eacf ,_ffe ...);};return _eacf ;};func (_cfef paraList )log (_bageb string ){if !_fegb {return ;};_adb .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_bageb ,len (_cfef ));for _eefd ,_fadbc :=range _cfef {if _fadbc ==nil {continue ;};_fbfg :=_fadbc .text ();_cgbg :="\u0020\u0020";if _fadbc ._cabb !=nil {_cgbg =_gd .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_fadbc ._cabb ._ffbe ,_fadbc ._cabb ._egbe );};_gd .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_eefd ,_fadbc .PdfRectangle ,_cgbg ,_fgde (_fbfg ,50));};};func _fcgcf (_deag string )bool {for _ ,_aebc :=range _deag {if !_ae .IsSpace (_aebc ){return false ;};};return true ;};func (_cdbge *subpath )removeDuplicates (){if len (_cdbge ._dee )==0{return ;};_ccac :=[]_gg .Point {_cdbge ._dee [0]};for _ ,_geef :=range _cdbge ._dee [1:]{if !_fddb (_geef ,_ccac [len (_ccac )-1]){_ccac =append (_ccac ,_geef );};};_cdbge ._dee =_ccac ;};func (_bdad rulingList )intersections ()map[int ]intSet {var _gbdfg ,_baagg []int ;for _ebbd ,_decce :=range _bdad {switch _decce ._fafed {case _gddg :_gbdfg =append (_gbdfg ,_ebbd );case _dbff :_baagg =append (_baagg ,_ebbd );};};if len (_gbdfg )< _bbgf +1||len (_baagg )< _dbdbd +1{return nil ;};if len (_gbdfg )+len (_baagg )> _feae {_adb .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_bdad ),len (_gbdfg ),len (_baagg ));return nil ;};_gdgee :=make (map[int ]intSet ,len (_gbdfg )+len (_baagg ));for _ ,_bccb :=range _gbdfg {for _ ,_feac :=range _baagg {if _bdad [_bccb ].intersects (_bdad [_feac ]){if _ ,_dggfd :=_gdgee [_bccb ];!_dggfd {_gdgee [_bccb ]=make (intSet );};if _ ,_eedf :=_gdgee [_feac ];!_eedf {_gdgee [_feac ]=make (intSet );};_gdgee [_bccb ].add (_feac );_gdgee [_feac ].add (_bccb );};};};return _gdgee ;};func (_gaa *PageText )computeViews (){var _ggcc rulingList ;if _gdcfg {_bfad :=_aade (_gaa ._agcc );_ggcc =append (_ggcc ,_bfad ...);};if _gaeg {_afd :=_cbef (_gaa ._dbge );_ggcc =append (_ggcc ,_afd ...);};_ggcc ,_efgc :=_ggcc .toTilings ();var _dcdd paraList ;_cbfg :=len (_gaa ._bdbb );for _caae :=0;_caae < 360&&_cbfg > 0;_caae +=90{_dbdab :=make ([]*textMark ,0,len (_gaa ._bdbb )-_cbfg );for _ ,_aeeb :=range _gaa ._bdbb {if _aeeb ._dfaf ==_caae {_dbdab =append (_dbdab ,_aeeb );};};if len (_dbdab )> 0{_eeg :=_affd (_dbdab ,_gaa ._dacc ,_ggcc ,_efgc );_dcdd =append (_dcdd ,_eeg ...);_cbfg -=len (_dbdab );};};_bggd :=new (_da .Buffer );_dcdd .writeText (_bggd );_gaa ._cfde =_bggd .String ();_gaa ._ccff =_dcdd .toTextMarks ();_gaa ._abfg =_dcdd .tables ();if _dfad {_adb .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_gaa ._abfg ));};};func (_dbaf *wordBag )text ()string {_ffc :=_dbaf .allWords ();_cgba :=make ([]string ,len (_ffc ));for _bage ,_cgadd :=range _ffc {_cgba [_bage ]=_cgadd ._adad ;};return _ef .Join (_cgba ,"\u0020");};func (_fdc *textObject )checkOp (_dgb *_aee .ContentStreamOperation ,_bgdb int ,_abf bool )(_ccf bool ,_ega error ){if _fdc ==nil {var _bbf []_dg .PdfObject ;if _bgdb > 0{_bbf =_dgb .Params ;if len (_bbf )> _bgdb {_bbf =_bbf [:_bgdb ];};};_adb .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_dgb .Operand ,_bbf );};if _bgdb >=0{if len (_dgb .Params )!=_bgdb {if _abf {_ega =_d .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");};_adb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_dgb .Operand ,_bgdb ,len (_dgb .Params ),_dgb .Params );return false ,_ega ;};};return true ,nil ;};func _cbag (_gfdgd map[int ][]float64 )[]int {_cefb :=make ([]int ,len (_gfdgd ));_daee :=0;for _caca :=range _gfdgd {_cefb [_daee ]=_caca ;_daee ++;};_ad .Ints (_cefb );return _cefb ;};func (_gfeb *wordBag )pullWord (_fceea *textWord ,_bacd int ,_gdgf map[int ]map[*textWord ]struct{}){_gfeb .PdfRectangle =_cae (_gfeb .PdfRectangle ,_fceea .PdfRectangle );if _fceea ._bgdgg > _gfeb ._ecba {_gfeb ._ecba =_fceea ._bgdgg ;};_gfeb ._fdebe [_bacd ]=append (_gfeb ._fdebe [_bacd ],_fceea );_gdgf [_bacd ][_fceea ]=struct{}{};};func (_bacg gridTile )complete ()bool {return _bacg .numBorders ()==4};
|
||
|
||
// PageText represents the layout of text on a device page.
|
||
type PageText struct{_bdbb []*textMark ;_cfde string ;_ccff []TextMark ;_abfg []TextTable ;_dacc _gdc .PdfRectangle ;_agcc []pathSection ;_dbge []pathSection ;};func (_cgd *imageExtractContext )extractInlineImage (_bdg *_aee .ContentStreamInlineImage ,_fb _aee .GraphicsState ,_cbd *_gdc .PdfPageResources )error {_fee ,_cdb :=_bdg .ToImage (_cbd );if _cdb !=nil {return _cdb ;};_edb ,_cdb :=_bdg .GetColorSpace (_cbd );if _cdb !=nil {return _cdb ;};if _edb ==nil {_edb =_gdc .NewPdfColorspaceDeviceGray ();};_gga ,_cdb :=_edb .ImageToRGB (*_fee );if _cdb !=nil {return _cdb ;};_bb :=ImageMark {Image :&_gga ,Width :_fb .CTM .ScalingFactorX (),Height :_fb .CTM .ScalingFactorY (),Angle :_fb .CTM .Angle ()};_bb .X ,_bb .Y =_fb .CTM .Translation ();_cgd ._bg =append (_cgd ._bg ,_bb );_cgd ._gc ++;return nil ;};func (_ggag *ruling )intersects (_dbgcg *ruling )bool {_fgbb :=(_ggag ._fafed ==_gddg &&_dbgcg ._fafed ==_dbff )||(_dbgcg ._fafed ==_gddg &&_ggag ._fafed ==_dbff );_eefe :=func (_eabd ,_febeg *ruling )bool {return _eabd ._edebg -_ffff <=_febeg ._bfc &&_febeg ._bfc <=_eabd ._dbfe +_ffff ;};_faeb :=_eefe (_ggag ,_dbgcg );_beffg :=_eefe (_dbgcg ,_ggag );if _cfag {_gd .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_fgbb ,_faeb ,_beffg ,_fgbb &&_faeb &&_beffg ,_ggag ,_dbgcg );};return _fgbb &&_faeb &&_beffg ;};func (_gadd *textObject )getFillColor ()_dc .Color {return _eeda (_gadd ._dca .ColorspaceNonStroking ,_gadd ._dca .ColorNonStroking );};func (_aecfe *wordBag )absorb (_ebbc *wordBag ){_ded :=_ebbc .makeRemovals ();for _egee ,_add :=range _ebbc ._fdebe {for _ ,_cbdfb :=range _add {_aecfe .pullWord (_cbdfb ,_egee ,_ded );};};_ebbc .applyRemovals (_ded );};type ruling struct{_fafed rulingKind ;_fffg markKind ;_dc .Color ;_bfc float64 ;_edebg float64 ;_dbfe float64 ;_fbab float64 ;};func (_bddb *shapesState )devicePoint (_ccda ,_bedc float64 )_gg .Point {_fbbf :=_bddb ._gge .Mult (_bddb ._fdb );_ccda ,_bedc =_fbbf .Transform (_ccda ,_bedc );return _gg .NewPoint (_ccda ,_bedc );};func (_efad rulingList )sort (){_ad .Slice (_efad ,_efad .comp )};func _ddae (_gcdef int ,_afdef map[int ][]float64 )([]int ,int ){_aeae :=make ([]int ,_gcdef );_gfgcd :=0;for _dcgg :=0;_dcgg < _gcdef ;_dcgg ++{_aeae [_dcgg ]=_gfgcd ;_gfgcd +=len (_afdef [_dcgg ])+1;};return _aeae ,_gfgcd ;};func _beb (_ecae float64 )int {var _fgcd int ;if _ecae >=0{_fgcd =int (_ecae /_bcgf );}else {_fgcd =int (_ecae /_bcgf )-1;};return _fgcd ;};func (_feff *textTable )newTablePara ()*textPara {_efbd :=_feff .computeBbox ();_gafa :=&textPara {PdfRectangle :_efbd ,_ffab :_efbd ,_cabb :_feff };if _dfad {_adb .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_gafa );};return _gafa ;};func (_eccb *shapesState )closePath (){if _eccb ._bbb {_eccb ._bcce =append (_eccb ._bcce ,_cedg (_eccb ._ddcd ));_eccb ._bbb =false ;}else if len (_eccb ._bcce )==0{if _ebge {_adb .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");};_eccb ._bbb =false ;return ;};_eccb ._bcce [len (_eccb ._bcce )-1].close ();if _ebge {_adb .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_eccb );};};func (_fegf *textPara )bbox ()_gdc .PdfRectangle {return _fegf .PdfRectangle };func (_bdfa paraList )sortReadingOrder (){_adb .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_bdfa ));if len (_bdfa )<=1{return ;};_bdfa .computeEBBoxes ();_ad .Slice (_bdfa ,func (_gaaf ,_fbfa int )bool {return _ccae (_bdfa [_gaaf ],_bdfa [_fbfa ])<=0});_degf :=_bdfa .topoOrder ();_bdfa .reorder (_degf );};
|
||
|
||
// NewFromContents creates a new extractor from contents and page resources.
|
||
func NewFromContents (contents string ,resources *_gdc .PdfPageResources )(*Extractor ,error ){_bcc :=&Extractor {_adf :contents ,_db :resources ,_bc :map[string ]fontEntry {},_gea :map[string ]textResult {}};return _bcc ,nil ;};
|
||
|
||
// Marks returns the TextMark collection for a page. It represents all the text on the page.
|
||
func (_egdd PageText )Marks ()*TextMarkArray {return &TextMarkArray {_fbbe :_egdd ._ccff }};type bounded interface{bbox ()_gdc .PdfRectangle };func (_ecac *wordBag )getDepthIdx (_bdcd float64 )int {_gfde :=_ecac .depthIndexes ();_aeac :=_beb (_bdcd );if _aeac < _gfde [0]{return _gfde [0];};if _aeac > _gfde [len (_gfde )-1]{return _gfde [len (_gfde )-1];};return _aeac ;};func (_ede *wordBag )allWords ()[]*textWord {var _eegf []*textWord ;for _ ,_beef :=range _ede ._fdebe {_eegf =append (_eegf ,_beef ...);};return _eegf ;};func (_fac *textObject )getCurrentFont ()*_gdc .PdfFont {var _bdga *_gdc .PdfFont ;if !_fac ._fad .empty (){_bdga =_fac ._fad .top ()._gdbe ;};if _bdga ==nil {_adb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e");return _gdc .DefaultFont ();};return _bdga ;};type intSet map[int ]struct{};func _fgde (_acaf string ,_dfbeb int )string {if len (_acaf )< _dfbeb {return _acaf ;};return _acaf [:_dfbeb ];};func _fddb (_bfeea ,_agcf _gg .Point )bool {return _bfeea .X ==_agcf .X &&_bfeea .Y ==_agcf .Y };func (_dcefg paraList )findTables (_ebgbc []gridTiling )[]*textTable {_dcefg .addNeighbours ();_ad .Slice (_dcefg ,func (_gfcad ,_acff int )bool {return _fgcb (_dcefg [_gfcad ],_dcefg [_acff ])< 0});var _aedbg []*textTable ;if _fede {_bbcef :=_dcefg .findGridTables (_ebgbc );_aedbg =append (_aedbg ,_bbcef ...);};if _fae {_gbeg :=_dcefg .findTextTables ();_aedbg =append (_aedbg ,_gbeg ...);};return _aedbg ;};func (_dceb gridTile )numBorders ()int {_gabgc :=0;if _dceb ._aged {_gabgc ++;};if _dceb ._gbgb {_gabgc ++;};if _dceb ._cgcd {_gabgc ++;};if _dceb ._aefa {_gabgc ++;};return _gabgc ;};func (_ffacea *textWord )toTextMarks (_gffa *int )[]TextMark {var _bggff []TextMark ;for _ ,_gfgb :=range _ffacea ._bbbafa {_bggff =_cgcb (_bggff ,_gffa ,_gfgb .ToTextMark ());};return _bggff ;};func (_dedd paraList )reorder (_ecaf []int ){_efda :=make (paraList ,len (_dedd ));for _bfabg ,_feegd :=range _ecaf {_efda [_bfabg ]=_dedd [_feegd ];};copy (_dedd ,_efda );};func (_bgdbb *wordBag )firstReadingIndex (_gdfd int )int {_dbgc :=_bgdbb .firstWord (_gdfd )._bgdgg ;_dgag :=float64 (_gdfd +1)*_bcgf ;_dbfg :=_dgag +_ecfe *_dbgc ;_abg :=_gdfd ;for _ ,_dadc :=range _bgdbb .depthBand (_dgag ,_dbfg ){if _aecde (_bgdbb .firstWord (_dadc ),_bgdbb .firstWord (_abg ))< 0{_abg =_dadc ;};};return _abg ;};func (_dgaab *textWord )computeText ()string {_dcbfg :=make ([]string ,len (_dgaab ._bbbafa ));for _eegg ,_efaa :=range _dgaab ._bbbafa {_dcbfg [_eegg ]=_efaa ._bdgg ;};return _ef .Join (_dcbfg ,"");};func (_geec pathSection )bbox ()_gdc .PdfRectangle {_debg :=_geec ._baca [0]._dee [0];_fface :=_gdc .PdfRectangle {Llx :_debg .X ,Urx :_debg .X ,Lly :_debg .Y ,Ury :_debg .Y };_cfg :=func (_fadd _gg .Point ){if _fadd .X < _fface .Llx {_fface .Llx =_fadd .X ;}else if _fadd .X > _fface .Urx {_fface .Urx =_fadd .X ;};if _fadd .Y < _fface .Lly {_fface .Lly =_fadd .Y ;}else if _fadd .Y > _fface .Ury {_fface .Ury =_fadd .Y ;};};for _ ,_egb :=range _geec ._baca [0]._dee [1:]{_cfg (_egb );};for _ ,_bgab :=range _geec ._baca [1:]{for _ ,_dfd :=range _bgab ._dee {_cfg (_dfd );};};return _fface ;};func (_dad *shapesState )stroke (_ddg *[]pathSection ){_afdf :=pathSection {_baca :_dad ._bcce ,Color :_dad ._befd .getStrokeColor ()};*_ddg =append (*_ddg ,_afdf );if _cfag {_gd .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_ddg ),_dad ,_dad ._befd .getStrokeColor (),_afdf .bbox ());if _gfgc {for _aeed ,_bgge :=range _dad ._bcce {_gd .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_aeed ,_bgge );if _aeed ==10{break ;};};};};};
|
||
|
||
// Append appends `mark` to the mark array.
|
||
func (_bfb *TextMarkArray )Append (mark TextMark ){_bfb ._fbbe =append (_bfb ._fbbe ,mark )};func (_egfdb *textWord )bbox ()_gdc .PdfRectangle {return _egfdb .PdfRectangle };func (_edea rulingList )asTiling ()gridTiling {if _dceg {_adb .Log .Info ("r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_edea ));};for _dbfd ,_fegbf :=range _edea [1:]{_ccadg :=_edea [_dbfd ];if _ccadg .alignsPrimary (_fegbf )&&_ccadg .alignsSec (_fegbf ){_adb .Log .Error ("a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073",_fegbf ,_ccadg );};};_edea .sortStrict ();_edea .log ("\u0073n\u0061\u0070\u0070\u0065\u0064");_gfef ,_bdcg :=_edea .vertsHorzs ();_eedd :=_gfef .primaries ();_gdgg :=_bdcg .primaries ();_bfec :=len (_eedd )-1;_abbff :=len (_gdgg )-1;if _bfec ==0||_abbff ==0{return gridTiling {};};_debbb :=_gdc .PdfRectangle {Llx :_eedd [0],Urx :_eedd [_bfec ],Lly :_gdgg [0],Ury :_gdgg [_abbff ]};if _dceg {_adb .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064",len (_gfef ));for _aeccc ,_aagaa :=range _gfef {_gd .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_aeccc ,_aagaa );};_adb .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064",len (_bdcg ));for _fedeg ,_bbbaf :=range _bdcg {_gd .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fedeg ,_bbbaf );};_adb .Log .Info ("\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f",_bfec ,_abbff ,_eedd ,_gdgg );};_aadc :=make ([]gridTile ,_bfec *_abbff );for _abaf :=_abbff -1;_abaf >=0;_abaf --{_adfd :=_gdgg [_abaf ];_aafb :=_gdgg [_abaf +1];for _gfcg :=0;_gfcg < _bfec ;_gfcg ++{_dgbg :=_eedd [_gfcg ];_ffccfe :=_eedd [_gfcg +1];_agaef :=_gfef .findPrimSec (_dgbg ,_adfd );_fab :=_gfef .findPrimSec (_ffccfe ,_adfd );_dgbbb :=_bdcg .findPrimSec (_adfd ,_dgbg );_cfdd :=_bdcg .findPrimSec (_aafb ,_dgbg );_acbb :=_gdc .PdfRectangle {Llx :_dgbg ,Urx :_ffccfe ,Lly :_adfd ,Ury :_aafb };_gfbf :=_cbg (_acbb ,_agaef ,_fab ,_dgbbb ,_cfdd );_aadc [_abaf *_bfec +_gfcg ]=_gfbf ;if _dceg {_gd .Printf ("\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_gfcg ,_abaf ,_gfbf .String (),_gfbf .Width (),_gfbf .Height ());};};};if _dceg {_adb .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_debbb );};_gbcf :=make ([]map[float64 ]gridTile ,_abbff );for _egacd :=_abbff -1;_egacd >=0;_egacd --{if _dceg {_gd .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_egacd );};_gbcf [_egacd ]=make (map[float64 ]gridTile ,_bfec );for _bfcb :=0;_bfcb < _bfec ;_bfcb ++{_dgfc :=_aadc [_egacd *_bfec +_bfcb ];if _dceg {_gd .Printf ("\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bfcb ,_dgfc );};if !_dgfc ._aged {continue ;};_cgbgf :=_bfcb ;for _ggdab :=_bfcb +1;!_dgfc ._gbgb &&_ggdab < _bfec ;_ggdab ++{_cgbf :=_aadc [_egacd *_bfec +_ggdab ];_dgfc .Urx =_cgbf .Urx ;_dgfc ._aefa =_dgfc ._aefa ||_cgbf ._aefa ;_dgfc ._cgcd =_dgfc ._cgcd ||_cgbf ._cgcd ;_dgfc ._gbgb =_cgbf ._gbgb ;if _dceg {_gd .Printf ("\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a",_ggdab ,_cgbf ,_dgfc );};_cgbgf =_ggdab ;};if _dceg {_gd .Printf (" \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n",_bfcb ,_cgbgf ,_dgfc );};_bfcb =_cgbgf ;_gbcf [_egacd ][_dgfc .Llx ]=_dgfc ;};};_edfe :=make (map[float64 ]map[float64 ]gridTile ,_abbff );_gbca :=make (map[float64 ]map[float64 ]struct{},_abbff );for _fefc :=_abbff -1;_fefc >=0;_fefc --{_fcdd :=_aadc [_fefc *_bfec ].Lly ;_edfe [_fcdd ]=make (map[float64 ]gridTile ,_bfec );_gbca [_fcdd ]=make (map[float64 ]struct{},_bfec );};if _dceg {_adb .Log .Info ("\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_debbb );};for _aega :=_abbff -1;_aega >=0;_aega --{_beba :=_aadc [_aega *_bfec ].Lly ;_gdga :=_gbcf [_aega ];if _dceg {_gd .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_aega );};for _ ,_cbad :=range _baec (_gdga ){if _ ,_dgagd :=_gbca [_beba ][_cbad ];_dgagd {continue ;};_befca :=_gdga [_cbad ];if _dceg {_gd .Printf (" \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_befca .String ());};for _dcddd :=_aega -1;_dcddd >=0;_dcddd --{if _befca ._cgcd {break ;};_eega :=_gbcf [_dcddd ];_ccfc ,_bbbf :=_eega [_cbad ];if !_bbbf {break ;};if _ccfc .Urx !=_befca .Urx {break ;};_befca ._cgcd =_ccfc ._cgcd ;_befca .Lly =_ccfc .Lly ;if _dceg {_gd .Printf ("\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_ccfc .String (),_befca .String ());};_gbca [_ccfc .Lly ][_ccfc .Llx ]=struct{}{};};if _aega ==0{_befca ._cgcd =true ;};if _befca .complete (){_edfe [_beba ][_cbad ]=_befca ;};};};_dbgg :=gridTiling {PdfRectangle :_debbb ,_cgaf :_cbada (_edfe ),_aafd :_beccd (_edfe ),_deae :_edfe };_dbgg .log ("\u0043r\u0065\u0061\u0074\u0065\u0064");return _dbgg ;};func (_dcgc *textTable )put (_faace ,_ffgdb int ,_dacff *textPara ){_dcgc ._gged [_bgfea (_faace ,_ffgdb )]=_dacff ;};func _cgdeg (_bgbg []rulingList )(rulingList ,rulingList ){var _dfcdd rulingList ;for _ ,_cccd :=range _bgbg {_dfcdd =append (_dfcdd ,_cccd ...);};return _dfcdd .vertsHorzs ();};func _cggd (_cdage _gdc .PdfRectangle )*ruling {return &ruling {_fafed :_gddg ,_bfc :_cdage .Urx ,_edebg :_cdage .Lly ,_dbfe :_cdage .Ury };};func (_bged *textObject )setTextMatrix (_abe []float64 ){if len (_abe )!=6{_adb .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_abe ));return ;};_dfa ,_aec ,_eaag ,_gafg ,_gbgg ,_cde :=_abe [0],_abe [1],_abe [2],_abe [3],_abe [4],_abe [5];_bged ._aac =_gg .NewMatrix (_dfa ,_aec ,_eaag ,_gafg ,_gbgg ,_cde );_bged ._dgdc =_bged ._aac ;};func _bcf (_fefd ,_gabf bounded )float64 {return _beffc (_fefd )-_beffc (_gabf )};func (_adfaf rulingList )merge ()*ruling {_edafd :=_adfaf [0]._bfc ;_ggfda :=_adfaf [0]._edebg ;_aaeda :=_adfaf [0]._dbfe ;for _ ,_eggb :=range _adfaf [1:]{_edafd +=_eggb ._bfc ;if _eggb ._edebg < _ggfda {_ggfda =_eggb ._edebg ;};if _eggb ._dbfe > _aaeda {_aaeda =_eggb ._dbfe ;};};_ggfgb :=&ruling {_fafed :_adfaf [0]._fafed ,_fffg :_adfaf [0]._fffg ,Color :_adfaf [0].Color ,_bfc :_edafd /float64 (len (_adfaf )),_edebg :_ggfda ,_dbfe :_aaeda };if _aab {_adb .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_adfaf ),_ggfgb );for _cdde ,_eacgc :=range _adfaf {_gd .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cdde ,_eacgc );};};return _ggfgb ;};type imageExtractContext struct{_bg []ImageMark ;_gc int ;_eag int ;_ag int ;_ed map[*_dg .PdfObjectStream ]*cachedImage ;_efb *ImageExtractOptions ;};func (_ggd *wordBag )sort (){for _ ,_gegaf :=range _ggd ._fdebe {_ad .Slice (_gegaf ,func (_gffec ,_dbbe int )bool {return _aecde (_gegaf [_gffec ],_gegaf [_dbbe ])< 0});};};var _f =false ;func (_bbac *textObject )setCharSpacing (_gfbc float64 ){if _bbac ==nil {return ;};_bbac ._bgebc ._ecbb =_gfbc ;if _aaac {_adb .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_gfbc ,_bbac ._bgebc .String ());};};func _eadb (_dbdfb []float64 ,_fgffg ,_abcee float64 )[]float64 {_cgfff ,_ccgbf :=_fgffg ,_abcee ;if _ccgbf < _cgfff {_cgfff ,_ccgbf =_ccgbf ,_cgfff ;};_efebf :=make ([]float64 ,0,len (_dbdfb )+2);_efebf =append (_efebf ,_fgffg );for _ ,_defc :=range _dbdfb {if _defc <=_cgfff {continue ;}else if _defc >=_ccgbf {break ;};_efebf =append (_efebf ,_defc );};_efebf =append (_efebf ,_abcee );return _efebf ;};func (_bcfa *textTable )bbox ()_gdc .PdfRectangle {return _bcfa .PdfRectangle };func (_cacd paraList )eventNeighbours (_deedd []event )map[*textPara ][]int {_ad .Slice (_deedd ,func (_ageaa ,_abeb int )bool {_cfae ,_fcgb :=_deedd [_ageaa ],_deedd [_abeb ];_bcae ,_eaabf :=_cfae ._edfcgb ,_fcgb ._edfcgb ;if _bcae !=_eaabf {return _bcae < _eaabf ;};if _cfae ._cbfb !=_fcgb ._cbfb {return _cfae ._cbfb ;};return _ageaa < _abeb ;});_bbae :=make (map[int ]intSet );_gbfbf :=make (intSet );for _ ,_dedec :=range _deedd {if _dedec ._cbfb {_bbae [_dedec ._beaec ]=make (intSet );for _egfb :=range _gbfbf {if _egfb !=_dedec ._beaec {_bbae [_dedec ._beaec ].add (_egfb );_bbae [_egfb ].add (_dedec ._beaec );};};_gbfbf .add (_dedec ._beaec );}else {_gbfbf .del (_dedec ._beaec );};};_dfffb :=map[*textPara ][]int {};for _bfbd ,_fcef :=range _bbae {_dggb :=_cacd [_bfbd ];if len (_fcef )==0{_dfffb [_dggb ]=nil ;continue ;};_daafc :=make ([]int ,len (_fcef ));_cfgf :=0;for _bfgc :=range _fcef {_daafc [_cfgf ]=_bfgc ;_cfgf ++;};_dfffb [_dggb ]=_daafc ;};return _dfffb ;};func _afffg (_ebdgd []*textWord ,_adgd int )[]*textWord {_ecgde :=len (_ebdgd );copy (_ebdgd [_adgd :],_ebdgd [_adgd +1:]);return _ebdgd [:_ecgde -1];};func (_aeca *wordBag )depthIndexes ()[]int {if len (_aeca ._fdebe )==0{return nil ;};_abae :=make ([]int ,len (_aeca ._fdebe ));_dfab :=0;for _aaed :=range _aeca ._fdebe {_abae [_dfab ]=_aaed ;_dfab ++;};_ad .Ints (_abae );return _abae ;};
|
||
|
||
// ApplyArea processes the page text only within the specified area `bbox`.
|
||
// Each time ApplyArea is called, it updates the result set in `pt`.
|
||
// Can be called multiple times in a row with different bounding boxes.
|
||
func (_dbcd *PageText )ApplyArea (bbox _gdc .PdfRectangle ){_caf :=make ([]*textMark ,0,len (_dbcd ._bdbb ));for _ ,_bbcc :=range _dbcd ._bdbb {if _gecb (_bbcc .bbox (),bbox ){_caf =append (_caf ,_bbcc );};};var _agg paraList ;_efdd :=len (_caf );for _fdcb :=0;_fdcb < 360&&_efdd > 0;_fdcb +=90{_abea :=make ([]*textMark ,0,len (_caf )-_efdd );for _ ,_bbgb :=range _caf {if _bbgb ._dfaf ==_fdcb {_abea =append (_abea ,_bbgb );};};if len (_abea )> 0{_gee :=_affd (_abea ,_dbcd ._dacc ,nil ,nil );_agg =append (_agg ,_gee ...);_efdd -=len (_abea );};};_decf :=new (_da .Buffer );_agg .writeText (_decf );_dbcd ._cfde =_decf .String ();_dbcd ._ccff =_agg .toTextMarks ();_dbcd ._abfg =_agg .tables ();};func (_dgba *textPara )writeCellText (_ccfb _b .Writer ){for _bffdc ,_gcdd :=range _dgba ._acgf {_bdcdc :=_gcdd .text ();_cabe :=_ddfec &&_gcdd .endsInHyphen ()&&_bffdc !=len (_dgba ._acgf )-1;if _cabe {_bdcdc =_ggca (_bdcdc );};_ccfb .Write ([]byte (_bdcdc ));if !(_cabe ||_bffdc ==len (_dgba ._acgf )-1){_ccfb .Write ([]byte (_ccfd (_gcdd ._febe ,_dgba ._acgf [_bffdc +1]._febe )));};};};const (_afge markKind =iota ;_gcbb ;_cdcc ;_abb ;);func (_bffg *shapesState )drawRectangle (_eafe ,_affa ,_gfe ,_cgc float64 ){if _ebge {_bdd :=_bffg .devicePoint (_eafe ,_affa );_bggdd :=_bffg .devicePoint (_eafe +_gfe ,_affa +_cgc );_befc :=_gdc .PdfRectangle {Llx :_bdd .X ,Lly :_bdd .Y ,Urx :_bggdd .X ,Ury :_bggdd .Y };_adb .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_befc );};_bffg .newSubPath ();_bffg .moveTo (_eafe ,_affa );_bffg .lineTo (_eafe +_gfe ,_affa );_bffg .lineTo (_eafe +_gfe ,_affa +_cgc );_bffg .lineTo (_eafe ,_affa +_cgc );_bffg .closePath ();};func (_cdead intSet )has (_beec int )bool {_ ,_fgea :=_cdead [_beec ];return _fgea };func (_bfca rulingList )blocks (_gcdb ,_dadgd *ruling )bool {if _gcdb ._edebg > _dadgd ._dbfe ||_dadgd ._edebg > _gcdb ._dbfe {return false ;};_aagd :=_g .Max (_gcdb ._edebg ,_dadgd ._edebg );_fgbf :=_g .Min (_gcdb ._dbfe ,_dadgd ._dbfe );if _gcdb ._bfc > _dadgd ._bfc {_gcdb ,_dadgd =_dadgd ,_gcdb ;};for _ ,_egab :=range _bfca {if _gcdb ._bfc <=_egab ._bfc +_bgcb &&_egab ._bfc <=_dadgd ._bfc +_bgcb &&_egab ._edebg <=_fgbf &&_aagd <=_egab ._dbfe {return true ;};};return false ;};func (_bead *textLine )endsInHyphen ()bool {_bbaa :=_bead ._cfgd [len (_bead ._cfgd )-1];_dfba :=_bbaa ._adad ;_abgb ,_cgef :=_a .DecodeLastRuneInString (_dfba );if _cgef <=0||!_ae .Is (_ae .Hyphen ,_abgb ){return false ;};if _bbaa ._bcaaf &&_dbbc (_dfba ){return true ;};return _dbbc (_bead .text ());};func (_cgbgg lineRuling )asRuling ()(*ruling ,bool ){_feef :=ruling {_fafed :_cgbgg ._febb ,Color :_cgbgg .Color ,_fffg :_gcbb };switch _cgbgg ._febb {case _gddg :_feef ._bfc =_cgbgg .xMean ();_feef ._edebg =_g .Min (_cgbgg ._dgfe .Y ,_cgbgg ._aaec .Y );_feef ._dbfe =_g .Max (_cgbgg ._dgfe .Y ,_cgbgg ._aaec .Y );case _dbff :_feef ._bfc =_cgbgg .yMean ();_feef ._edebg =_g .Min (_cgbgg ._dgfe .X ,_cgbgg ._aaec .X );_feef ._dbfe =_g .Max (_cgbgg ._dgfe .X ,_cgbgg ._aaec .X );default:_adb .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_cgbgg ._febb );return nil ,false ;};return &_feef ,true ;};
|
||
|
||
// String returns a description of `state`.
|
||
func (_ace *textState )String ()string {_ced :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _ace ._gdbe !=nil {_ced =_ace ._gdbe .BaseFont ();};return _gd .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_ace ._ecbb ,_ace ._gefe ,_ace ._dgc ,_ced );};func (_acddg *ruling )alignsPrimary (_cgbeb *ruling )bool {return _acddg ._fafed ==_cgbeb ._fafed &&_g .Abs (_acddg ._bfc -_cgbeb ._bfc )< _bgcb *0.5;};
|
||
|
||
// Text returns the extracted page text.
|
||
func (_fbaa PageText )Text ()string {return _fbaa ._cfde };
|
||
|
||
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
|
||
func (_ecea *TextMarkArray )BBox ()(_gdc .PdfRectangle ,bool ){var _cef _gdc .PdfRectangle ;_degb :=false ;for _ ,_bbfc :=range _ecea ._fbbe {if _bbfc .Meta ||_fcgcf (_bbfc .Text ){continue ;};if _degb {_cef =_cae (_cef ,_bbfc .BBox );}else {_cef =_bbfc .BBox ;_degb =true ;};};return _cef ,_degb ;};func (_bfdf *textObject )setHorizScaling (_dba float64 ){if _bfdf ==nil {return ;};_bfdf ._bgebc ._gacg =_dba ;};
|
||
|
||
// TextMark represents extracted text on a page with information regarding both textual content,
|
||
// formatting (font and size) and positioning.
|
||
// It is the smallest unit of text on a PDF page, typically a single character.
|
||
//
|
||
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
|
||
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
|
||
// `bbox` of substring `term` in `text`.
|
||
//
|
||
// ex, _ := New(page)
|
||
// // handle errors
|
||
// pageText, _, _, err := ex.ExtractPageText()
|
||
// // handle errors
|
||
// text := pageText.Text()
|
||
// textMarks := pageText.Marks()
|
||
//
|
||
// start := strings.Index(text, term)
|
||
// end := start + len(term)
|
||
// spanMarks, err := textMarks.RangeOffset(start, end)
|
||
// // handle errors
|
||
// bbox, ok := spanMarks.BBox()
|
||
// // handle errors
|
||
type TextMark struct{
|
||
|
||
// Text is the extracted text.
|
||
Text string ;
|
||
|
||
// Original is the text in the PDF. It has not been decoded like `Text`.
|
||
Original string ;
|
||
|
||
// BBox is the bounding box of the text.
|
||
BBox _gdc .PdfRectangle ;
|
||
|
||
// Font is the font the text was drawn with.
|
||
Font *_gdc .PdfFont ;
|
||
|
||
// FontSize is the font size the text was drawn with.
|
||
FontSize float64 ;
|
||
|
||
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
|
||
// text, textMarks := pageText.Text(), pageText.Marks()
|
||
// marks := textMarks.Elements()
|
||
// then marks[i].Offset is the offset of marks[i].Text in text.
|
||
Offset int ;
|
||
|
||
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
|
||
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
|
||
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
|
||
Meta bool ;
|
||
|
||
// FillColor is the fill color of the text.
|
||
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
|
||
FillColor _dc .Color ;
|
||
|
||
// StrokeColor is the stroke color of the text.
|
||
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
|
||
StrokeColor _dc .Color ;
|
||
|
||
// Orientation is the text orientation
|
||
Orientation int ;};func _cgcc (_aecgf float64 )bool {return _g .Abs (_aecgf )< _bgcb };
|
||
|
||
// ToTextMark returns the public view of `tm`.
|
||
func (_fccb *textMark )ToTextMark ()TextMark {return TextMark {Text :_fccb ._bdgg ,Original :_fccb ._debf ,BBox :_fccb ._fgad ,Font :_fccb ._agee ,FontSize :_fccb ._fcdad ,FillColor :_fccb ._eggc ,StrokeColor :_fccb ._bgfb ,Orientation :_fccb ._dfaf };};type paraList []*textPara ;
|
||
|
||
// String returns a string describing `pt`.
|
||
func (_ecca PageText )String ()string {_cedf :=_gd .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_ecca ._bdbb ));_ceef :=[]string {"\u002d"+_cedf };for _ ,_acf :=range _ecca ._bdbb {_ceef =append (_ceef ,_acf .String ());};_ceef =append (_ceef ,"\u002b"+_cedf );return _ef .Join (_ceef ,"\u000a");};
|
||
|
||
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
|
||
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
|
||
func (_gb *Extractor )ExtractTextWithStats ()(_bdb string ,_ebf int ,_acg int ,_fc error ){_acgc ,_ebf ,_acg ,_fc :=_gb .ExtractPageText ();if _fc !=nil {return "",_ebf ,_acg ,_fc ;};return _acgc .Text (),_ebf ,_acg ,nil ;};func (_gag *stateStack )size ()int {return len (*_gag )};
|
||
|
||
// String returns a description of `t`.
|
||
func (_fdebg *textTable )String ()string {return _gd .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_fdebg ._ffbe ,_fdebg ._egbe ,_fdebg ._fgdd );};func _baaag (_eccf ,_dgggd int )int {if _eccf > _dgggd {return _eccf ;};return _dgggd ;};func _gbce (_aaea map[int ][]float64 )string {_acfc :=_cbag (_aaea );_cagac :=make ([]string ,len (_aaea ));for _faae ,_degc :=range _acfc {_cagac [_faae ]=_gd .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_degc ,_aaea [_degc ]);};return _gd .Sprintf ("\u007b\u0025\u0073\u007d",_ef .Join (_cagac ,"\u002c\u0020"));};func _cgcb (_eab []TextMark ,_fccfa *int ,_bceb TextMark )[]TextMark {_bceb .Offset =*_fccfa ;_eab =append (_eab ,_bceb );*_fccfa +=len (_bceb .Text );return _eab ;};func _ccae (_dcfc ,_ecec bounded )float64 {_afcd :=_bcf (_dcfc ,_ecec );if !_gbee (_afcd ){return _afcd ;};return _aecde (_dcfc ,_ecec );};func (_cfba *textLine )appendWord (_bdfe *textWord ){_cfba ._cfgd =append (_cfba ._cfgd ,_bdfe );_cfba .PdfRectangle =_cae (_cfba .PdfRectangle ,_bdfe .PdfRectangle );if _bdfe ._bgdgg > _cfba ._aegc {_cfba ._aegc =_bdfe ._bgdgg ;};if _bdfe ._ggccd > _cfba ._febe {_cfba ._febe =_bdfe ._ggccd ;};};func (_geca rulingList )vertsHorzs ()(rulingList ,rulingList ){var _dcdeb ,_dda rulingList ;for _ ,_cafb :=range _geca {switch _cafb ._fafed {case _gddg :_dcdeb =append (_dcdeb ,_cafb );case _dbff :_dda =append (_dda ,_cafb );};};return _dcdeb ,_dda ;};func (_aage paraList )yNeighbours (_adgcc float64 )map[*textPara ][]int {_bbfa :=make ([]event ,2*len (_aage ));if _adgcc ==0{for _aafac ,_eege :=range _aage {_bbfa [2*_aafac ]=event {_eege .Lly ,true ,_aafac };_bbfa [2*_aafac +1]=event {_eege .Ury ,false ,_aafac };};}else {for _cegd ,_eegdb :=range _aage {_bbfa [2*_cegd ]=event {_eegdb .Lly -_adgcc *_eegdb .fontsize (),true ,_cegd };_bbfa [2*_cegd +1]=event {_eegdb .Ury +_adgcc *_eegdb .fontsize (),false ,_cegd };};};return _aage .eventNeighbours (_bbfa );};func (_gadf gridTile )contains (_ggbc _gdc .PdfRectangle )bool {if _gadf .numBorders ()< 3{return false ;};if _gadf ._aged &&_ggbc .Llx < _gadf .Llx -_aabc {return false ;};if _gadf ._gbgb &&_ggbc .Urx > _gadf .Urx +_aabc {return false ;};if _gadf ._cgcd &&_ggbc .Lly < _gadf .Lly -_aabc {return false ;};if _gadf ._aefa &&_ggbc .Ury > _gadf .Ury +_aabc {return false ;};return true ;};func (_ecdfd *textTable )computeBbox ()_gdc .PdfRectangle {var _gfbb _gdc .PdfRectangle ;_bgdeg :=false ;for _fdbd :=0;_fdbd < _ecdfd ._egbe ;_fdbd ++{for _ccgb :=0;_ccgb < _ecdfd ._ffbe ;_ccgb ++{_cdbf :=_ecdfd .get (_ccgb ,_fdbd );if _cdbf ==nil {continue ;};if !_bgdeg {_gfbb =_cdbf .PdfRectangle ;_bgdeg =true ;}else {_gfbb =_cae (_gfbb ,_cdbf .PdfRectangle );};};};return _gfbb ;};func (_beff *wordBag )highestWord (_eeca int ,_dfe ,_abcdc float64 )*textWord {for _ ,_efge :=range _beff ._fdebe [_eeca ]{if _dfe <=_efge ._ggccd &&_efge ._ggccd <=_abcdc {return _efge ;};};return nil ;};func (_bdea *textLine )bbox ()_gdc .PdfRectangle {return _bdea .PdfRectangle };func (_ebbed *shapesState )lineTo (_bagc ,_bbe float64 ){if _ebge {_adb .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_bagc ,_bbe ,_ebbed .devicePoint (_bagc ,_bbe ));};_ebbed .addPoint (_bagc ,_bbe );};func (_agdf compositeCell )String ()string {_eceg :="";if len (_agdf .paraList )> 0{_eceg =_fgde (_agdf .paraList .merge ().text (),50);};return _gd .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_agdf .PdfRectangle ,len (_agdf .paraList ),_eceg );};func (_aedg *textTable )markCells (){for _acbef :=0;_acbef < _aedg ._egbe ;_acbef ++{for _cdbd :=0;_cdbd < _aedg ._ffbe ;_cdbd ++{_ceda :=_aedg .get (_cdbd ,_acbef );if _ceda !=nil {_ceda ._ccbb =true ;};};};};type gridTile struct{_gdc .PdfRectangle ;_aefa ,_aged ,_cgcd ,_gbgb bool ;};func _dcfcg (_gffga []*textMark ,_dgad _gdc .PdfRectangle )*textWord {_geaaa :=_gffga [0].PdfRectangle ;_gacb :=_gffga [0]._fcdad ;for _ ,_fdga :=range _gffga [1:]{_geaaa =_cae (_geaaa ,_fdga .PdfRectangle );if _fdga ._fcdad > _gacb {_gacb =_fdga ._fcdad ;};};return &textWord {PdfRectangle :_geaaa ,_bbbafa :_gffga ,_ggccd :_dgad .Ury -_geaaa .Lly ,_bgdgg :_gacb };};
|
||
|
||
// PageImages represents extracted images on a PDF page with spatial information:
|
||
// display position and size.
|
||
type PageImages struct{Images []ImageMark ;};
|
||
|
||
// String returns a string describing the current state of the textState stack.
|
||
func (_cgdc *stateStack )String ()string {_eage :=[]string {_gd .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_cgdc ))};for _cdbb ,_gegd :=range *_cgdc {_dcf :="\u003c\u006e\u0069l\u003e";if _gegd !=nil {_dcf =_gegd .String ();};_eage =append (_eage ,_gd .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_cdbb ,_dcf ));};return _ef .Join (_eage ,"\u000a");};func (_gfgd compositeCell )parasBBox ()(paraList ,_gdc .PdfRectangle ){return _gfgd .paraList ,_gfgd .PdfRectangle ;};func (_bdcc *shapesState )lastpointEstablished ()(_gg .Point ,bool ){if _bdcc ._bbb {return _bdcc ._ddcd ,false ;};_ffbg :=len (_bdcc ._bcce );if _ffbg > 0&&_bdcc ._bcce [_ffbg -1]._gfad {return _bdcc ._bcce [_ffbg -1].last (),false ;};return _gg .Point {},true ;};
|
||
|
||
// String returns a human readable description of `vecs`.
|
||
func (_ecfc rulingList )String ()string {if len (_ecfc )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_gfcc ,_ebdb :=_ecfc .vertsHorzs ();_gcff :=len (_gfcc );_eagc :=len (_ebdb );if _gcff ==0||_eagc ==0{return _gd .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_gcff ,_eagc );};_edfd :=_gdc .PdfRectangle {Llx :_gfcc [0]._bfc ,Urx :_gfcc [_gcff -1]._bfc ,Lly :_ebdb [_eagc -1]._bfc ,Ury :_ebdb [0]._bfc };return _gd .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_gcff ,_eagc ,_edfd );};
|
||
|
||
// String returns a description of `b`.
|
||
func (_bfdg *wordBag )String ()string {var _gbbg []string ;for _ ,_fefb :=range _bfdg .depthIndexes (){_cgg :=_bfdg ._fdebe [_fefb ];for _ ,_aedfg :=range _cgg {_gbbg =append (_gbbg ,_aedfg ._adad );};};return _gd .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_bfdg .PdfRectangle ,_bfdg ._ecba ,len (_gbbg ),_gbbg );};func (_fec *textObject )getStrokeColor ()_dc .Color {return _eeda (_fec ._dca .ColorspaceStroking ,_fec ._dca .ColorStroking );};func (_fagd *wordBag )removeWord (_bggef *textWord ,_gffb int ){_bagf :=_fagd ._fdebe [_gffb ];_bagf =_cfdca (_bagf ,_bggef );if len (_bagf )==0{delete (_fagd ._fdebe ,_gffb );}else {_fagd ._fdebe [_gffb ]=_bagf ;};};func _eafg (_gdcg ,_efce _gg .Point )bool {_ebgg :=_g .Abs (_gdcg .X -_efce .X );_fgee :=_g .Abs (_gdcg .Y -_efce .Y );return _fefec (_ebgg ,_fgee );};func _fgae (_abga []*textMark ,_aedaa _gdc .PdfRectangle )[]*textWord {var _babc []*textWord ;var _efgfe *textWord ;if _aagc {_adb .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_abga ));};_edeag :=func (){if _efgfe !=nil {_dada :=_efgfe .computeText ();if !_fcgcf (_dada ){_efgfe ._adad =_dada ;_babc =append (_babc ,_efgfe );if _aagc {_adb .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_babc )-1,_efgfe .String ());for _cgeba ,_bbdge :=range _efgfe ._bbbafa {_gd .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cgeba ,_bbdge .String ());};};};_efgfe =nil ;};};for _ ,_bcddbc :=range _abga {if _cgbd &&_efgfe !=nil &&len (_efgfe ._bbbafa )> 0{_cbfdc :=_efgfe ._bbbafa [len (_efgfe ._bbbafa )-1];_fgcbc ,_ddca :=_bgccee (_bcddbc ._bdgg );_fbbfa ,_afee :=_bgccee (_cbfdc ._bdgg );if _ddca &&!_afee &&_cbfdc .inDiacriticArea (_bcddbc ){_efgfe .addDiacritic (_fgcbc );continue ;};if _afee &&!_ddca &&_bcddbc .inDiacriticArea (_cbfdc ){_efgfe ._bbbafa =_efgfe ._bbbafa [:len (_efgfe ._bbbafa )-1];_efgfe .appendMark (_bcddbc ,_aedaa );_efgfe .addDiacritic (_fbbfa );continue ;};};_dageag :=_fcgcf (_bcddbc ._bdgg );if _dageag {_edeag ();continue ;};if _efgfe ==nil &&!_dageag {_efgfe =_dcfcg ([]*textMark {_bcddbc },_aedaa );continue ;};_geadb :=_efgfe ._bgdgg ;_gdfb :=_g .Abs (_ebgb (_aedaa ,_bcddbc )-_efgfe ._ggccd )/_geadb ;_bdfeb :=_gcf (_bcddbc ,_efgfe )/_geadb ;if _bdfeb >=_cfc ||!(-_adbe <=_bdfeb &&_gdfb <=_eace ){_edeag ();_efgfe =_dcfcg ([]*textMark {_bcddbc },_aedaa );continue ;};_efgfe .appendMark (_bcddbc ,_aedaa );};_edeag ();return _babc ;};func _cfb (_ceaa ,_gagb _gdc .PdfRectangle )bool {return _gagb .Llx <=_ceaa .Urx &&_ceaa .Llx <=_gagb .Urx ;};func _cbef (_ceea []pathSection )rulingList {_bgbff (_ceea );if _cfag {_adb .Log .Info ("\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs",len (_ceea ));};var _bgea rulingList ;for _ ,_eegbg :=range _ceea {for _ ,_acga :=range _eegbg ._baca {if !_acga .isQuadrilateral (){if _cfag {_adb .Log .Error ("!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073",_acga );};continue ;};if _cgdcg ,_aaacc :=_acga .makeRectRuling (_eegbg .Color );_aaacc {_bgea =append (_bgea ,_cgdcg );}else {if _beeb {_adb .Log .Error ("\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073",_acga );};};};};if _cfag {_adb .Log .Info ("\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073",_bgea .String ());};return _bgea ;};func (_dfbd rulingList )augmentGrid ()(rulingList ,rulingList ){_gcgf ,_aacc :=_dfbd .vertsHorzs ();if len (_gcgf )==0||len (_aacc )==0{return _gcgf ,_aacc ;};_edaf ,_fdfb :=_gcgf ,_aacc ;_addd :=_gcgf .bbox ();_dffc :=_aacc .bbox ();if _cfag {_adb .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066",_addd );_adb .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066",_dffc );};var _gffge ,_babf ,_adgc ,_ceecg *ruling ;if _dffc .Llx < _addd .Llx -_ffff {_gffge =&ruling {_fffg :_abb ,_fafed :_gddg ,_bfc :_dffc .Llx ,_edebg :_addd .Lly ,_dbfe :_addd .Ury };_gcgf =append (rulingList {_gffge },_gcgf ...);};if _dffc .Urx > _addd .Urx +_ffff {_babf =&ruling {_fffg :_abb ,_fafed :_gddg ,_bfc :_dffc .Urx ,_edebg :_addd .Lly ,_dbfe :_addd .Ury };_gcgf =append (_gcgf ,_babf );};if _addd .Lly < _dffc .Lly -_ffff {_adgc =&ruling {_fffg :_abb ,_fafed :_dbff ,_bfc :_addd .Lly ,_edebg :_dffc .Llx ,_dbfe :_dffc .Urx };_aacc =append (rulingList {_adgc },_aacc ...);};if _addd .Ury > _dffc .Ury +_ffff {_ceecg =&ruling {_fffg :_abb ,_fafed :_dbff ,_bfc :_addd .Ury ,_edebg :_dffc .Llx ,_dbfe :_dffc .Urx };_aacc =append (_aacc ,_ceecg );};if len (_gcgf )+len (_aacc )==len (_dfbd ){return _edaf ,_fdfb ;};_gdac :=append (_gcgf ,_aacc ...);_dfbd .log ("u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064");_gdac .log ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d");return _gcgf ,_aacc ;};func _cece (_fgbd float64 )float64 {return _ddff *_g .Round (_fgbd /_ddff )};func _gcda (_gbfg int ,_aeccd func (int ,int )bool )[]int {_dddg :=make ([]int ,_gbfg );for _ggebf :=range _dddg {_dddg [_ggebf ]=_ggebf ;};_ad .Slice (_dddg ,func (_dfdcb ,_cadc int )bool {return _aeccd (_dddg [_dfdcb ],_dddg [_cadc ])});return _dddg ;};func (_cefe *subpath )isQuadrilateral ()bool {if len (_cefe ._dee )< 4||len (_cefe ._dee )> 5{return false ;};if len (_cefe ._dee )==5{_gagd :=_cefe ._dee [0];_gdfgf :=_cefe ._dee [4];if _gagd .X !=_gdfgf .X ||_gagd .Y !=_gdfgf .Y {return false ;};};return true ;};func (_bgbb *ruling )gridIntersecting (_eaefe *ruling )bool {return _bceca (_bgbb ._edebg ,_eaefe ._edebg )&&_bceca (_bgbb ._dbfe ,_eaefe ._dbfe );};func (_bbeg paraList )addNeighbours (){_abgcb :=func (_bagd []int ,_feda *textPara )([]*textPara ,[]*textPara ){_eade :=make ([]*textPara ,0,len (_bagd )-1);_dfcde :=make ([]*textPara ,0,len (_bagd )-1);for _ ,_gcffe :=range _bagd {_adcf :=_bbeg [_gcffe ];if _adcf .Urx <=_feda .Llx {_eade =append (_eade ,_adcf );}else if _adcf .Llx >=_feda .Urx {_dfcde =append (_dfcde ,_adcf );};};return _eade ,_dfcde ;};_bafb :=func (_gafaa []int ,_cacf *textPara )([]*textPara ,[]*textPara ){_acaa :=make ([]*textPara ,0,len (_gafaa )-1);_dfeb :=make ([]*textPara ,0,len (_gafaa )-1);for _ ,_gfgdg :=range _gafaa {_cbcab :=_bbeg [_gfgdg ];if _cbcab .Ury <=_cacf .Lly {_dfeb =append (_dfeb ,_cbcab );}else if _cbcab .Lly >=_cacf .Ury {_acaa =append (_acaa ,_cbcab );};};return _acaa ,_dfeb ;};_eagg :=_bbeg .yNeighbours (_afff );for _ ,_fbbaf :=range _bbeg {_fbdd :=_eagg [_fbbaf ];if len (_fbdd )==0{continue ;};_gbbb ,_fffee :=_abgcb (_fbdd ,_fbbaf );if len (_gbbb )==0&&len (_fffee )==0{continue ;};if len (_gbbb )> 0{_efaf :=_gbbb [0];for _ ,_dbbbg :=range _gbbb [1:]{if _dbbbg .Urx >=_efaf .Urx {_efaf =_dbbbg ;};};for _ ,_beeda :=range _gbbb {if _beeda !=_efaf &&_beeda .Urx > _efaf .Llx {_efaf =nil ;break ;};};if _efaf !=nil &&_ggfg (_fbbaf .PdfRectangle ,_efaf .PdfRectangle ){_fbbaf ._fdeda =_efaf ;};};if len (_fffee )> 0{_ccedf :=_fffee [0];for _ ,_fbdg :=range _fffee [1:]{if _fbdg .Llx <=_ccedf .Llx {_ccedf =_fbdg ;};};for _ ,_bcbf :=range _fffee {if _bcbf !=_ccedf &&_bcbf .Llx < _ccedf .Urx {_ccedf =nil ;break ;};};if _ccedf !=nil &&_ggfg (_fbbaf .PdfRectangle ,_ccedf .PdfRectangle ){_fbbaf ._aagg =_ccedf ;};};};_eagg =_bbeg .xNeighbours (_edbb );for _ ,_bdfbg :=range _bbeg {_abdg :=_eagg [_bdfbg ];if len (_abdg )==0{continue ;};_cbeac ,_feebd :=_bafb (_abdg ,_bdfbg );if len (_cbeac )==0&&len (_feebd )==0{continue ;};if len (_feebd )> 0{_gaefb :=_feebd [0];for _ ,_ddgf :=range _feebd [1:]{if _ddgf .Ury >=_gaefb .Ury {_gaefb =_ddgf ;};};for _ ,_fbcea :=range _feebd {if _fbcea !=_gaefb &&_fbcea .Ury > _gaefb .Lly {_gaefb =nil ;break ;};};if _gaefb !=nil &&_cfb (_bdfbg .PdfRectangle ,_gaefb .PdfRectangle ){_bdfbg ._abaa =_gaefb ;};};if len (_cbeac )> 0{_edfbc :=_cbeac [0];for _ ,_gdgae :=range _cbeac [1:]{if _gdgae .Lly <=_edfbc .Lly {_edfbc =_gdgae ;};};for _ ,_fcgff :=range _cbeac {if _fcgff !=_edfbc &&_fcgff .Lly < _edfbc .Ury {_edfbc =nil ;break ;};};if _edfbc !=nil &&_cfb (_bdfbg .PdfRectangle ,_edfbc .PdfRectangle ){_bdfbg ._dabb =_edfbc ;};};};for _ ,_gccc :=range _bbeg {if _gccc ._fdeda !=nil &&_gccc ._fdeda ._aagg !=_gccc {_gccc ._fdeda =nil ;};if _gccc ._dabb !=nil &&_gccc ._dabb ._abaa !=_gccc {_gccc ._dabb =nil ;};if _gccc ._aagg !=nil &&_gccc ._aagg ._fdeda !=_gccc {_gccc ._aagg =nil ;};if _gccc ._abaa !=nil &&_gccc ._abaa ._dabb !=_gccc {_gccc ._abaa =nil ;};};};func _cbg (_fggbd _gdc .PdfRectangle ,_bfgd ,_cgbfb ,_cgbe ,_bfaafc *ruling )gridTile {_aeda :=_fggbd .Llx ;_gfcd :=_fggbd .Urx ;_dfbe :=_fggbd .Lly ;_ddgg :=_fggbd .Ury ;return gridTile {PdfRectangle :_fggbd ,_aged :_bfgd !=nil &&_bfgd .encloses (_dfbe ,_ddgg ),_gbgb :_cgbfb !=nil &&_cgbfb .encloses (_dfbe ,_ddgg ),_cgcd :_cgbe !=nil &&_cgbe .encloses (_aeda ,_gfcd ),_aefa :_bfaafc !=nil &&_bfaafc .encloses (_aeda ,_gfcd )};};type pathSection struct{_baca []*subpath ;_dc .Color ;};func (_begba rulingList )toTilings ()(rulingList ,[]gridTiling ){_begba .log ("\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s");if len (_begba )==0{return nil ,nil ;};_begba =_begba .tidied ("\u0061\u006c\u006c");_begba .log ("\u0074\u0069\u0064\u0069\u0065\u0064");_bbdd :=_begba .toGrids ();_agag :=make ([]gridTiling ,len (_bbdd ));for _bfee ,_dgbc :=range _bbdd {_agag [_bfee ]=_dgbc .asTiling ();};return _begba ,_agag ;};func _afgc (_cgcg []int )[]int {_cdgc :=make ([]int ,len (_cgcg ));for _adef ,_dbged :=range _cgcg {_cdgc [len (_cgcg )-1-_adef ]=_dbged ;};return _cdgc ;};
|
||
|
||
// String returns a string descibing `i`.
|
||
func (_dfbaf gridTile )String ()string {_bgba :=func (_bgfgc bool ,_cegfe string )string {if _bgfgc {return _cegfe ;};return "\u005f";};return _gd .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_dfbaf .PdfRectangle ,_bgba (_dfbaf ._aged ,"\u004c"),_bgba (_dfbaf ._gbgb ,"\u0052"),_bgba (_dfbaf ._cgcd ,"\u0042"),_bgba (_dfbaf ._aefa ,"\u0054"));};func (_ecee rulingList )removeDuplicates ()rulingList {if len (_ecee )==0{return nil ;};_ecee .sort ();_fgfe :=rulingList {_ecee [0]};for _ ,_aacdb :=range _ecee [1:]{if _aacdb .equals (_fgfe [len (_fgfe )-1]){continue ;};_fgfe =append (_fgfe ,_aacdb );};return _fgfe ;};func (_fdgg *textTable )growTable (){_cafgg :=func (_cebc paraList ){_fdgg ._egbe ++;for _beed :=0;_beed < _fdgg ._ffbe ;_beed ++{_cebga :=_cebc [_beed ];_fdgg .put (_beed ,_fdgg ._egbe -1,_cebga );};};_caga :=func (_eceee paraList ){_fdgg ._ffbe ++;for _geba :=0;_geba < _fdgg ._egbe ;_geba ++{_abfea :=_eceee [_geba ];_fdgg .put (_fdgg ._ffbe -1,_geba ,_abfea );};};if _eded {_fdgg .log ("\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce");};for _ccbd :=0;;_ccbd ++{_addfb :=false ;_edga :=_fdgg .getDown ();_abdfc :=_fdgg .getRight ();if _eded {_gd .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ccbd ,_fdgg );_gd .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a",_edga );_gd .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a",_abdfc );};if _edga !=nil &&_abdfc !=nil {_bcea :=_edga [len (_edga )-1];if _bcea !=nil &&!_bcea ._ccbb &&_bcea ==_abdfc [len (_abdfc )-1]{_cafgg (_edga );if _abdfc =_fdgg .getRight ();_abdfc !=nil {_caga (_abdfc );_fdgg .put (_fdgg ._ffbe -1,_fdgg ._egbe -1,_bcea );};_addfb =true ;};};if !_addfb &&_edga !=nil {_cafgg (_edga );_addfb =true ;};if !_addfb &&_abdfc !=nil {_caga (_abdfc );_addfb =true ;};if !_addfb {break ;};};};
|
||
|
||
// ImageMark represents an image drawn on a page and its position in device coordinates.
|
||
// All coordinates are in device coordinates.
|
||
type ImageMark struct{Image *_gdc .Image ;
|
||
|
||
// Dimensions of the image as displayed in the PDF.
|
||
Width float64 ;Height float64 ;
|
||
|
||
// Position of the image in PDF coordinates (lower left corner).
|
||
X float64 ;Y float64 ;
|
||
|
||
// Angle in degrees, if rotated.
|
||
Angle float64 ;};func (_gcae *textObject )renderText (_bgfe []byte )error {if _gcae ._bdf {_adb .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");return nil ;};_cec :=_gcae .getCurrentFont ();_gfab :=_cec .BytesToCharcodes (_bgfe );_bag ,_edag ,_edc :=_cec .CharcodesToStrings (_gfab );if _edc > 0{_adb .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_edag ,_edc );};_gcae ._bgebc ._gfge +=_edag ;_gcae ._bgebc ._dce +=_edc ;_gfaf :=_gcae ._bgebc ;_fbe :=_gfaf ._dgc ;_bed :=_gfaf ._gacg /100.0;_daca ,_gfd :=_cec .GetRuneMetrics (' ');if !_gfd {_daca ,_gfd =_cec .GetCharMetrics (32);};if !_gfd {_daca ,_ =_gdc .DefaultFont ().GetRuneMetrics (' ');};_dbdaa :=_daca .Wx *_fff ;_adb .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_dbdaa ,_bag ,_cec ,_fbe );_dcef :=_gg .NewMatrix (_fbe *_bed ,0,0,_fbe ,0,_gfaf ._daa );if _aaac {_adb .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_gfab ),_gfab ,_bag );};_adb .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_gfab ),_gfab ,len (_bag ));_eaeb :=_gcae .getFillColor ();_cea :=_gcae .getStrokeColor ();for _aceg ,_bfaaf :=range _bag {_aba :=[]rune (_bfaaf );if len (_aba )==1&&_aba [0]=='\x00'{continue ;};_agcb :=_gfab [_aceg ];_cfed :=_gcae ._dca .CTM .Mult (_gcae ._aac ).Mult (_dcef );_aeee :=0.0;if len (_aba )==1&&_aba [0]==32{_aeee =_gfaf ._gefe ;};_fcf ,_debb :=_cec .GetCharMetrics (_agcb );if !_debb {_adb .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_agcb ,_aba ,_aba ,_cec );return _gd .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_cec .String (),_agcb );};_cgda :=_gg .Point {X :_fcf .Wx *_fff ,Y :_fcf .Wy *_fff };_bgcc :=_gg .Point {X :(_cgda .X *_fbe +_aeee )*_bed };_ddfdf :=_gg .Point {X :(_cgda .X *_fbe +_gfaf ._ecbb +_aeee )*_bed };if _aaac {_adb .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_fbe ,_gfaf ._ecbb ,_gfaf ._gefe ,_bed );_adb .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_cgda ,_bgcc ,_ddfdf );};_gbf :=_fafc (_bgcc );_fga :=_fafc (_ddfdf );_gebe :=_gcae ._dca .CTM .Mult (_gcae ._aac ).Mult (_gbf );if _ecdb {_adb .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_gcae ._dca .CTM ,_gcae ._aac ,_fga ,_ggg (_gcae ._dca .CTM .Mult (_gcae ._aac ).Mult (_fga )),_gbf ,_gebe ,_ggg (_gebe ));};_egg ,_gbb :=_gcae .newTextMark (_ga .ExpandLigatures (_aba ),_cfed ,_ggg (_gebe ),_g .Abs (_dbdaa *_cfed .ScalingFactorX ()),_cec ,_gcae ._bgebc ._ecbb ,_eaeb ,_cea );if !_gbb {_adb .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067");continue ;};if _cec ==nil {_adb .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _cec .Encoder ()==nil {_adb .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_cec );}else {if _ddfe ,_fcae :=_cec .Encoder ().CharcodeToRune (_agcb );_fcae {_egg ._debf =string (_ddfe );};};_adb .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_aceg ,_agcb ,_egg ,_cfed );_gcae ._fbad =append (_gcae ._fbad ,&_egg );_gcae ._aac .Concat (_fga );};return nil ;};func (_ceed rulingList )bbox ()_gdc .PdfRectangle {var _egefa _gdc .PdfRectangle ;if len (_ceed )==0{_adb .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073");return _gdc .PdfRectangle {};};if _ceed [0]._fafed ==_dbff {_egefa .Llx ,_egefa .Urx =_ceed .secMinMax ();_egefa .Lly ,_egefa .Ury =_ceed .primMinMax ();}else {_egefa .Llx ,_egefa .Urx =_ceed .primMinMax ();_egefa .Lly ,_egefa .Ury =_ceed .secMinMax ();};return _egefa ;};
|
||
|
||
// Extractor stores and offers functionality for extracting content from PDF pages.
|
||
type Extractor struct{_adf string ;_db *_gdc .PdfPageResources ;_ea _gdc .PdfRectangle ;_bc map[string ]fontEntry ;_gea map[string ]textResult ;_ggc int64 ;_eba int ;};
|
||
|
||
// Elements returns the TextMarks in `ma`.
|
||
func (_dcaf *TextMarkArray )Elements ()[]TextMark {return _dcaf ._fbbe };
|
||
|
||
// String returns a description of `w`.
|
||
func (_eadg *textWord )String ()string {return _gd .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_eadg ._ggccd ,_eadg .PdfRectangle ,_eadg ._bgdgg ,_eadg ._adad );};func _daga (_eeaa []TextMark ,_bcab *int ,_ebdc string )[]TextMark {_ffdd :=_bded ;_ffdd .Text =_ebdc ;return _cgcb (_eeaa ,_bcab ,_ffdd );};const _eca =20;func (_eabc rulingList )connections (_ceafc map[int ]intSet ,_bdca int )intSet {_dbga :=make (intSet );_fgggd :=make (intSet );var _afab func (int );_afab =func (_gfdea int ){if !_fgggd .has (_gfdea ){_fgggd .add (_gfdea );for _dcdf :=range _eabc {if _ceafc [_dcdf ].has (_gfdea ){_dbga .add (_dcdf );};};for _agaa :=range _eabc {if _dbga .has (_agaa ){_afab (_agaa );};};};};_afab (_bdca );return _dbga ;};func _ccfd (_gabg ,_bgdf float64 )string {_fbce :=!_gbee (_gabg -_bgdf );if _fbce {return "\u000a";};return "\u0020";};func (_bfdba paraList )findTableGrid (_gegf gridTiling )(*textTable ,map[*textPara ]struct{}){_eedde :=len (_gegf ._cgaf );_dagea :=len (_gegf ._aafd );_geaa :=textTable {_fgdd :true ,_ffbe :_eedde ,_egbe :_dagea ,_gged :make (map[uint64 ]*textPara ,_eedde *_dagea ),_fgcc :make (map[uint64 ]compositeCell ,_eedde *_dagea )};_bfeed :=make (map[*textPara ]struct{});_dgcb :=int ((1.0-_egda )*float64 (_eedde *_dagea ));_efdf :=0;if _dceg {_adb .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_eedde ,_dagea );};for _edda ,_abfeg :=range _gegf ._aafd {_fegc ,_caed :=_gegf ._deae [_abfeg ];if !_caed {continue ;};for _aafa ,_afefe :=range _gegf ._cgaf {_dfbee ,_geeb :=_fegc [_afefe ];if !_geeb {continue ;};_eecae :=_bfdba .inTile (_dfbee );if len (_eecae )==0{_efdf ++;if _efdf > _dgcb {if _dceg {_adb .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_efdf );};return nil ,nil ;};}else {_geaa .putComposite (_aafa ,_edda ,_eecae ,_dfbee .PdfRectangle );for _ ,_aabdf :=range _eecae {_bfeed [_aabdf ]=struct{}{};};};};};_fabf :=0;for _gaed :=0;_gaed < _eedde ;_gaed ++{_cgfcd :=_geaa .get (_gaed ,0);if _cgfcd ==nil ||!_cgfcd ._bgcce {_fabf ++;};};if _fabf ==0{if _dceg {_adb .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};return nil ,nil ;};_aabca :=_geaa .reduceTiling (_gegf ,_fgfaf );_aabca =_aabca .subdivide ();return _aabca ,_bfeed ;};func _dgeg (_fbgc float64 ,_fcdec int )int {if _fcdec ==0{_fcdec =1;};_cdfd :=float64 (_fcdec );return int (_g .Round (_fbgc /_cdfd )*_cdfd );};
|
||
|
||
// New returns an Extractor instance for extracting content from the input PDF page.
|
||
func New (page *_gdc .PdfPage )(*Extractor ,error ){_cd ,_ff :=page .GetAllContentStreams ();if _ff !=nil {return nil ,_ff ;};_cb ,_ff :=page .GetMediaBox ();if _ff !=nil {return nil ,_gd .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_ff );};_ada :=&Extractor {_adf :_cd ,_db :page .Resources ,_ea :*_cb ,_bc :map[string ]fontEntry {},_gea :map[string ]textResult {}};if _ada ._ea .Llx > _ada ._ea .Urx {_adb .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_ada ._ea );_ada ._ea .Llx ,_ada ._ea .Urx =_ada ._ea .Urx ,_ada ._ea .Llx ;};if _ada ._ea .Lly > _ada ._ea .Ury {_adb .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_ada ._ea );_ada ._ea .Lly ,_ada ._ea .Ury =_ada ._ea .Ury ,_ada ._ea .Lly ;};return _ada ,nil ;};func (_edfcg *textTable )get (_cegc ,_daccc int )*textPara {return _edfcg ._gged [_bgfea (_cegc ,_daccc )]};
|
||
|
||
// String returns a description of `k`.
|
||
func (_dded markKind )String ()string {_beffb ,_bdef :=_becg [_dded ];if !_bdef {return _gd .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_dded );};return _beffb ;};func _ggdg (_egcca *wordBag ,_aded int )*textLine {_cdef :=_egcca .firstWord (_aded );_gace :=textLine {PdfRectangle :_cdef .PdfRectangle ,_aegc :_cdef ._bgdgg ,_febe :_cdef ._ggccd };_gace .pullWord (_egcca ,_cdef ,_aded );return &_gace ;};func _fafd (_ceg _gdc .PdfRectangle )textState {return textState {_gacg :100,_gefc :RenderModeFill ,_bfaa :_ceg };};func (_fadba *textPara )depth ()float64 {if _fadba ._bgcce {return -1.0;};if len (_fadba ._acgf )> 0{return _fadba ._acgf [0]._febe ;};return _fadba ._cabb .depth ();};func _dadg (_fadf *textWord ,_bgdg float64 ,_cccf ,_fbfdc rulingList )*wordBag {_aefd :=_beb (_fadf ._ggccd );_fgcg :=[]*textWord {_fadf };_gdde :=wordBag {_fdebe :map[int ][]*textWord {_aefd :_fgcg },PdfRectangle :_fadf .PdfRectangle ,_ecba :_fadf ._bgdgg ,_bfg :_bgdg ,_ccc :_cccf ,_bffgc :_fbfdc };return &_gdde ;};func (_acagg rulingList )comp (_dcge ,_gacfd int )bool {_ddfa ,_gdec :=_acagg [_dcge ],_acagg [_gacfd ];_bbacg ,_ggbe :=_ddfa ._fafed ,_gdec ._fafed ;if _bbacg !=_ggbe {return _bbacg > _ggbe ;};if _bbacg ==_afac {return false ;};_ffcf :=func (_adecd bool )bool {if _bbacg ==_dbff {return _adecd ;};return !_adecd ;};_cbb ,_bgbf :=_ddfa ._bfc ,_gdec ._bfc ;if _cbb !=_bgbf {return _ffcf (_cbb > _bgbf );};_cbb ,_bgbf =_ddfa ._edebg ,_gdec ._edebg ;if _cbb !=_bgbf {return _ffcf (_cbb < _bgbf );};return _ffcf (_ddfa ._dbfe < _gdec ._dbfe );};func _fgeff (_ceaac ,_ecdf _gdc .PdfRectangle )bool {return _ceaac .Llx <=_ecdf .Llx &&_ecdf .Urx <=_ceaac .Urx &&_ceaac .Lly <=_ecdf .Lly &&_ecdf .Ury <=_ceaac .Ury ;};
|
||
|
||
// TextTable represents a table.
|
||
// Cells are ordered top-to-bottom, left-to-right.
|
||
// Cells[y] is the (0-offset) y'th row in the table.
|
||
// Cells[y][x] is the (0-offset) x'th column in the table.
|
||
type TextTable struct{W ,H int ;Cells [][]TableCell ;};func (_geecg *textPara )toTextMarks (_befb *int )[]TextMark {if _geecg ._cabb ==nil {return _geecg .toCellTextMarks (_befb );};var _ggb []TextMark ;for _cdc :=0;_cdc < _geecg ._cabb ._egbe ;_cdc ++{for _dggg :=0;_dggg < _geecg ._cabb ._ffbe ;_dggg ++{_egfc :=_geecg ._cabb .get (_dggg ,_cdc );if _egfc ==nil {_ggb =_daga (_ggb ,_befb ,"\u0009");}else {_cbdfg :=_egfc .toCellTextMarks (_befb );_ggb =append (_ggb ,_cbdfg ...);};_ggb =_daga (_ggb ,_befb ,"\u0020");};if _cdc < _geecg ._cabb ._egbe -1{_ggb =_daga (_ggb ,_befb ,"\u000a");};};return _ggb ;};func _gbee (_bcgg float64 )bool {return _g .Abs (_bcgg )< _gceg };
|
||
|
||
// ExtractText processes and extracts all text data in content streams and returns as a string.
|
||
// It takes into account character encodings in the PDF file, which are decoded by
|
||
// CharcodeBytesToUnicode.
|
||
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
|
||
func (_eea *Extractor )ExtractText ()(string ,error ){_cfd ,_ ,_ ,_bge :=_eea .ExtractTextWithStats ();return _cfd ,_bge ;};func (_dcgeb rulingList )splitSec ()[]rulingList {_ad .Slice (_dcgeb ,func (_aabe ,_gfba int )bool {_faaca ,_dfgd :=_dcgeb [_aabe ],_dcgeb [_gfba ];if _faaca ._edebg !=_dfgd ._edebg {return _faaca ._edebg < _dfgd ._edebg ;};return _faaca ._dbfe < _dfgd ._dbfe ;});_ebega :=make (map[*ruling ]struct{},len (_dcgeb ));_accg :=func (_bbef *ruling )rulingList {_bcfb :=rulingList {_bbef };_ebega [_bbef ]=struct{}{};for _ ,_bbaaa :=range _dcgeb {if _ ,_bfaab :=_ebega [_bbaaa ];_bfaab {continue ;};for _ ,_fbfgb :=range _bcfb {if _bbaaa .alignsSec (_fbfgb ){_bcfb =append (_bcfb ,_bbaaa );_ebega [_bbaaa ]=struct{}{};break ;};};};return _bcfb ;};_caggf :=[]rulingList {_accg (_dcgeb [0])};for _ ,_dcdde :=range _dcgeb [1:]{if _ ,_bdfab :=_ebega [_dcdde ];_bdfab {continue ;};_caggf =append (_caggf ,_accg (_dcdde ));};return _caggf ;};func _dbc (_afc *_aee .ContentStreamOperation )(float64 ,error ){if len (_afc .Params )!=1{_ddfd :=_d .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");_adb .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_afc .Operand ,1,len (_afc .Params ),_afc .Params );return 0.0,_ddfd ;};return _dg .GetNumberAsFloat (_afc .Params [0]);}; |