mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-26 13:48:55 +08:00
253 lines
180 KiB
Go
253 lines
180 KiB
Go
//
|
||
// Copyright 2020 FoxyUtils ehf. All rights reserved.
|
||
//
|
||
// This is a commercial product and requires a license to operate.
|
||
// A trial license can be obtained at https://unidoc.io
|
||
//
|
||
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
|
||
//
|
||
// Use of this source code is governed by the UniDoc End User License Agreement
|
||
// terms that can be accessed at https://unidoc.io/eula/
|
||
|
||
//
|
||
// Package extractor is used for quickly extracting PDF content through a simple interface.
|
||
// Currently offers functionality for extracting textual content.
|
||
//
|
||
package extractor ;import (_ag "bytes";_be "errors";_gb "fmt";_f "github.com/unidoc/unipdf/v3/common";_dc "github.com/unidoc/unipdf/v3/contentstream";_ee "github.com/unidoc/unipdf/v3/core";_de "github.com/unidoc/unipdf/v3/internal/license";_ba "github.com/unidoc/unipdf/v3/internal/textencoding";_ab "github.com/unidoc/unipdf/v3/internal/transform";_ed "github.com/unidoc/unipdf/v3/model";_e "golang.org/x/text/unicode/norm";_d "golang.org/x/xerrors";_af "image/color";_g "io";_c "math";_a "regexp";_ae "sort";_bee "strings";_gf "unicode";_bc "unicode/utf8";);func (_aecbg compositeCell )split (_deaa ,_edfe []float64 )*textTable {_ceef :=len (_deaa )+1;_gfeb :=len (_edfe )+1;if _cece {_f .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066",_gfeb ,_ceef ,_aecbg ,_deaa ,_edfe );_gb .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a",len (_aecbg .paraList ));for _fcddg ,_cbfe :=range _aecbg .paraList {_gb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fcddg ,_cbfe .String ());};_gb .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",len (_aecbg .lines ()));for _daac ,_cafb :=range _aecbg .lines (){_gb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_daac ,_cafb );};};_deaa =_geee (_deaa ,_aecbg .Ury ,_aecbg .Lly );_edfe =_geee (_edfe ,_aecbg .Llx ,_aecbg .Urx );_dabea :=make (map[uint64 ]*textPara ,_gfeb *_ceef );_aefe :=textTable {_effgf :_gfeb ,_cecff :_ceef ,_afdde :_dabea };_bfag :=_aecbg .paraList ;_ae .Slice (_bfag ,func (_eafba ,_gfbf int )bool {_cbdcg ,_ggcf :=_bfag [_eafba ],_bfag [_gfbf ];_gade ,_ebcae :=_cbdcg .Lly ,_ggcf .Lly ;if _gade !=_ebcae {return _gade < _ebcae ;};return _cbdcg .Llx < _ggcf .Llx ;});_cgdf :=make (map[uint64 ]_ed .PdfRectangle ,_gfeb *_ceef );for _ffgg ,_abgb :=range _deaa [1:]{_eeaf :=_deaa [_ffgg ];for _aaae ,_dbge :=range _edfe [1:]{_acfd :=_edfe [_aaae ];_cgdf [_bacb (_aaae ,_ffgg )]=_ed .PdfRectangle {Llx :_acfd ,Urx :_dbge ,Lly :_abgb ,Ury :_eeaf };};};if _cece {_f .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073");_gb .Printf ("\u0020\u0020\u0020\u0020");for _afge :=0;_afge < _gfeb ;_afge ++{_gb .Printf ("\u0025\u0033\u0030\u0064\u002c\u0020",_afge );};_gb .Println ();for _aafb :=0;_aafb < _ceef ;_aafb ++{_gb .Printf ("\u0020\u0020\u0025\u0032\u0064\u003a",_aafb );for _fbcab :=0;_fbcab < _gfeb ;_fbcab ++{_gb .Printf ("\u00256\u002e\u0032\u0066\u002c\u0020",_cgdf [_bacb (_fbcab ,_aafb )]);};_gb .Println ();};};_gabd :=func (_bbda *textLine )(int ,int ){for _gaaa :=0;_gaaa < _ceef ;_gaaa ++{for _aecf :=0;_aecf < _gfeb ;_aecf ++{if _daba (_cgdf [_bacb (_aecf ,_gaaa )],_bbda .PdfRectangle ){return _aecf ,_gaaa ;};};};return -1,-1;};_fgdc :=make (map[uint64 ][]*textLine ,_gfeb *_ceef );for _ ,_aafc :=range _bfag .lines (){_gcea ,_fadd :=_gabd (_aafc );if _gcea < 0{continue ;};_fgdc [_bacb (_gcea ,_fadd )]=append (_fgdc [_bacb (_gcea ,_fadd )],_aafc );};for _bcce :=0;_bcce < len (_deaa )-1;_bcce ++{_bfdfb :=_deaa [_bcce ];_efdcg :=_deaa [_bcce +1];for _baga :=0;_baga < len (_edfe )-1;_baga ++{_fdce :=_edfe [_baga ];_aebf :=_edfe [_baga +1];_eeda :=_ed .PdfRectangle {Llx :_fdce ,Urx :_aebf ,Lly :_efdcg ,Ury :_bfdfb };_egff :=_fgdc [_bacb (_baga ,_bcce )];if len (_egff )==0{continue ;};_gccgd :=_bcbc (_eeda ,_egff );_aefe .put (_baga ,_bcce ,_gccgd );};};return &_aefe ;};const (_bdcbg markKind =iota ;_gdac ;_gafbf ;_gagg ;);func (_cbgf *textPara )writeText (_adaf _g .Writer ){if _cbgf ._begg ==nil {_cbgf .writeCellText (_adaf );return ;};for _eggcd :=0;_eggcd < _cbgf ._begg ._cecff ;_eggcd ++{for _badg :=0;_badg < _cbgf ._begg ._effgf ;_badg ++{_eedf :=_cbgf ._begg .get (_badg ,_eggcd );if _eedf ==nil {_adaf .Write ([]byte ("\u0009"));}else {_eedf .writeCellText (_adaf );};_adaf .Write ([]byte ("\u0020"));};if _eggcd < _cbgf ._begg ._cecff -1{_adaf .Write ([]byte ("\u000a"));};};};func _gdadf (_ebeb ,_ebbd ,_gfge float64 )rulingKind {if _ebeb >=_gfge &&_cefa (_ebbd ,_ebeb ){return _eebcb ;};if _ebbd >=_gfge &&_cefa (_ebeb ,_ebbd ){return _caeda ;};return _eeae ;};func (_fae *textObject )setTextRenderMode (_gec int ){if _fae ==nil {return ;};_fae ._gdf ._dbfc =RenderMode (_gec );};var _bd =false ;
|
||
|
||
// NewFromContents creates a new extractor from contents and page resources.
|
||
func NewFromContents (contents string ,resources *_ed .PdfPageResources )(*Extractor ,error ){const _bfg ="\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s";_eb :=&Extractor {_fb :contents ,_eef :resources ,_cc :map[string ]fontEntry {},_dcc :map[string ]textResult {}};_de .TrackUse (_bfg );return _eb ,nil ;};func _abbc (_bbdd ,_gbgg _ab .Point )bool {return _bbdd .X ==_gbgg .X &&_bbdd .Y ==_gbgg .Y };func _ccbd (_ecfb string )string {_adfg :=[]rune (_ecfb );return string (_adfg [:len (_adfg )-1])};type stateStack []*textState ;func _fece (_gfff _ed .PdfRectangle )*ruling {return &ruling {_aeffb :_caeda ,_ggaeg :_gfff .Urx ,_gbcgc :_gfff .Lly ,_gbab :_gfff .Ury };};func (_ebge *Extractor )extractPageText (_dfa string ,_bgd *_ed .PdfPageResources ,_cgf _ab .Matrix ,_bdd int )(*PageText ,int ,int ,error ){_f .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_bdd );_ffb :=&PageText {_eaee :_ebge ._deg };_affd :=_ebb (_ebge ._deg );_gea :=stateStack {&_affd };_eed :=_gca (_ebge ,_bgd ,_dc .GraphicsState {},&_affd ,&_gea );_fbe :=shapesState {_ffef :_cgf ,_dbcg :_ab .IdentityMatrix (),_adb :_eed };var _gfd bool ;if _bdd > _fcc {_geb :=_be .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_bdd ,_geb );return _ffb ,_affd ._cecf ,_affd ._defa ,_geb ;};_bbe :=_dc .NewContentStreamParser (_dfa );_cecc ,_bba :=_bbe .Parse ();if _bba !=nil {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bba );return _ffb ,_affd ._cecf ,_affd ._defa ,_bba ;};_dge :=_dc .NewContentStreamProcessor (*_cecc );_dge .AddHandler (_dc .HandlerConditionEnumAllOperands ,"",func (_gbb *_dc .ContentStreamOperation ,_dcf _dc .GraphicsState ,_cge *_ed .PdfPageResources )error {_gbcg :=_gbb .Operand ;if _agdb {_f .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_gbb );};switch _gbcg {case "\u0071":if _dddgb {_f .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_fbe ._dbcg );};_gea .push (&_affd );case "\u0051":if !_gea .empty (){if len (_gea )>=2{_gea .pop ();};_affd =*_gea .top ();};_fbe ._dbcg =_dcf .CTM ;if _dddgb {_f .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_fbe ._dbcg );};case "\u0042\u0054":if _gfd {_f .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");_ffb ._cga =append (_ffb ._cga ,_eed ._cfdg ...);};_gfd =true ;_bcb :=_dcf ;_bcb .CTM =_cgf .Mult (_bcb .CTM );_eed =_gca (_ebge ,_cge ,_bcb ,&_affd ,&_gea );_fbe ._adb =_eed ;case "\u0045\u0054":if !_gfd {_f .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");};_gfd =false ;_ffb ._cga =append (_ffb ._cga ,_eed ._cfdg ...);_eed .reset ();case "\u0054\u002a":_eed .nextLine ();case "\u0054\u0064":if _ced ,_bfd :=_eed .checkOp (_gbb ,2,true );!_ced {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bfd );return _bfd ;};_dfc ,_bbeb ,_gdd :=_ffea (_gbb .Params );if _gdd !=nil {return _gdd ;};_eed .moveText (_dfc ,_bbeb );case "\u0054\u0044":if _gbcc ,_cbdc :=_eed .checkOp (_gbb ,2,true );!_gbcc {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cbdc );return _cbdc ;};_eaf ,_eafg ,_bfb :=_ffea (_gbb .Params );if _bfb !=nil {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bfb );return _bfb ;};_eed .moveTextSetLeading (_eaf ,_eafg );case "\u0054\u006a":if _fag ,_dgg :=_eed .checkOp (_gbb ,1,true );!_fag {_f .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_gbb ,_dgg );return _dgg ;};_gggd ,_fgb :=_ee .GetStringBytes (_gbb .Params [0]);if !_fgb {_f .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_gbb );return _ee .ErrTypeError ;};return _eed .showText (_gggd );case "\u0054\u004a":if _dbgf ,_gaf :=_eed .checkOp (_gbb ,1,true );!_dbgf {_f .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gaf );return _gaf ;};_cff ,_dca :=_ee .GetArray (_gbb .Params [0]);if !_dca {_f .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_gbb );return _bba ;};return _eed .showTextAdjusted (_cff );case "\u0027":if _gfa ,_ccc :=_eed .checkOp (_gbb ,1,true );!_gfa {_f .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ccc );return _ccc ;};_eab ,_bab :=_ee .GetStringBytes (_gbb .Params [0]);if !_bab {_f .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_gbb );return _ee .ErrTypeError ;};_eed .nextLine ();return _eed .showText (_eab );case "\u0022":if _dgb ,_gef :=_eed .checkOp (_gbb ,3,true );!_dgb {_f .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gef );return _gef ;};_aa ,_dbgg ,_dfd :=_ffea (_gbb .Params [:2]);if _dfd !=nil {return _dfd ;};_bbae ,_afe :=_ee .GetStringBytes (_gbb .Params [2]);if !_afe {_f .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_gbb );return _ee .ErrTypeError ;};_eed .setCharSpacing (_aa );_eed .setWordSpacing (_dbgg );_eed .nextLine ();return _eed .showText (_bbae );case "\u0054\u004c":_dfde ,_gbe :=_aad (_gbb );if _gbe !=nil {_f .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gbe );return _gbe ;};_eed .setTextLeading (_dfde );case "\u0054\u0063":_abc ,_gfac :=_aad (_gbb );if _gfac !=nil {_f .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gfac );return _gfac ;};_eed .setCharSpacing (_abc );case "\u0054\u0066":if _dgc ,_cac :=_eed .checkOp (_gbb ,2,true );!_dgc {_f .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cac );return _cac ;};_efa ,_baf :=_ee .GetNameVal (_gbb .Params [0]);if !_baf {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_gbb );return _ee .ErrTypeError ;};_aecc ,_eae :=_ee .GetNumberAsFloat (_gbb .Params [1]);if !_baf {_f .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gbb ,_eae );return _eae ;};_eae =_eed .setFont (_efa ,_aecc );_eed ._bdcd =_d .Is (_eae ,_ee .ErrNotSupported );if _eae !=nil &&!_eed ._bdcd {return _eae ;};case "\u0054\u006d":if _ggb ,_edb :=_eed .checkOp (_gbb ,6,true );!_ggb {_f .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_edb );return _edb ;};_cbce ,_ccd :=_ee .GetNumbersAsFloat (_gbb .Params );if _ccd !=nil {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ccd );return _ccd ;};_eed .setTextMatrix (_cbce );case "\u0054\u0072":if _dff ,_bffc :=_eed .checkOp (_gbb ,1,true );!_dff {_f .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bffc );return _bffc ;};_dec ,_bbc :=_ee .GetIntVal (_gbb .Params [0]);if !_bbc {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_gbb );return _ee .ErrTypeError ;};_eed .setTextRenderMode (_dec );case "\u0054\u0073":if _abdg ,_bda :=_eed .checkOp (_gbb ,1,true );!_abdg {_f .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bda );return _bda ;};_fcg ,_aed :=_ee .GetNumberAsFloat (_gbb .Params [0]);if _aed !=nil {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aed );return _aed ;};_eed .setTextRise (_fcg );case "\u0054\u0077":if _fff ,_eff :=_eed .checkOp (_gbb ,1,true );!_fff {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_eff );return _eff ;};_bgdc ,_ad :=_ee .GetNumberAsFloat (_gbb .Params [0]);if _ad !=nil {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ad );return _ad ;};_eed .setWordSpacing (_bgdc );case "\u0054\u007a":if _gfec ,_agaf :=_eed .checkOp (_gbb ,1,true );!_gfec {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_agaf );return _agaf ;};_eebe ,_ebe :=_ee .GetNumberAsFloat (_gbb .Params [0]);if _ebe !=nil {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ebe );return _ebe ;};_eed .setHorizScaling (_eebe );case "\u0063\u006d":_fbe ._dbcg =_dcf .CTM ;if _fbe ._dbcg .Singular (){_dege :=_ab .IdentityMatrix ().Translate (_fbe ._dbcg .Translation ());_f .Log .Debug ("S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s",_fbe ._dbcg ,_dege );_fbe ._dbcg =_dege ;};if _dddgb {_f .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_fbe ._dbcg );};case "\u006d":if len (_gbb .Params )!=2{_f .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_gc );return nil ;};_dfg ,_fga :=_ee .GetNumbersAsFloat (_gbb .Params );if _fga !=nil {return _fga ;};_f .Log .Debug ("\u004d\u006f\u0076\u0065\u0020\u0074\u006f\u003a\u0020\u0025\u002e\u0032\u0066",_dfg );_fbe .moveTo (_dfg [0],_dfg [1]);case "\u006c":if len (_gbb .Params )!=2{_f .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_gc );return nil ;};_eedb ,_ccf :=_ee .GetNumbersAsFloat (_gbb .Params );if _ccf !=nil {return _ccf ;};_fbe .lineTo (_eedb [0],_eedb [1]);case "\u0063":if len (_gbb .Params )!=6{return _gc ;};_gda ,_dbc :=_ee .GetNumbersAsFloat (_gbb .Params );if _dbc !=nil {return _dbc ;};_f .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_gda );_fbe .cubicTo (_gda [0],_gda [1],_gda [2],_gda [3],_gda [4],_gda [5]);case "\u0076","\u0079":if len (_gbb .Params )!=4{return _gc ;};_bffa ,_cag :=_ee .GetNumbersAsFloat (_gbb .Params );if _cag !=nil {return _cag ;};_f .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_bffa );_fbe .quadraticTo (_bffa [0],_bffa [1],_bffa [2],_bffa [3]);case "\u0068":_fbe .closePath ();case "\u0072\u0065":if len (_gbb .Params )!=4{return _gc ;};_aae ,_cdb :=_ee .GetNumbersAsFloat (_gbb .Params );if _cdb !=nil {return _cdb ;};_fbe .drawRectangle (_aae [0],_aae [1],_aae [2],_aae [3]);_fbe .closePath ();case "\u0053":_fbe .stroke (&_ffb ._cacf );_fbe .clearPath ();case "\u0073":_fbe .closePath ();_fbe .stroke (&_ffb ._cacf );_fbe .clearPath ();case "\u0046":_fbe .fill (&_ffb ._adfe );_fbe .clearPath ();case "\u0066","\u0066\u002a":_fbe .closePath ();_fbe .fill (&_ffb ._adfe );_fbe .clearPath ();case "\u0042","\u0042\u002a":_fbe .fill (&_ffb ._adfe );_fbe .stroke (&_ffb ._cacf );_fbe .clearPath ();case "\u0062","\u0062\u002a":_fbe .closePath ();_fbe .fill (&_ffb ._adfe );_fbe .stroke (&_ffb ._cacf );_fbe .clearPath ();case "\u006e":_fbe .clearPath ();case "\u0044\u006f":if len (_gbb .Params )==0{_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_gbb .Params );return _ee .ErrRangeError ;};_def ,_efd :=_ee .GetName (_gbb .Params [0]);if !_efd {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_gbb .Params [0]);return _ee .ErrTypeError ;};_ ,_fcbe :=_cge .GetXObjectByName (*_def );if _fcbe !=_ed .XObjectTypeForm {break ;};_dbf ,_efd :=_ebge ._dcc [_def .String ()];if !_efd {_ffe ,_adf :=_cge .GetXObjectFormByName (*_def );if _adf !=nil {_f .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_adf );return _adf ;};_dffc ,_adf :=_ffe .GetContentStream ();if _adf !=nil {_f .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_adf );return _adf ;};_cea :=_ffe .Resources ;if _cea ==nil {_cea =_cge ;};_babb ,_ecge ,_cdd ,_adf :=_ebge .extractPageText (string (_dffc ),_cea ,_cgf .Mult (_dcf .CTM ),_bdd +1);if _adf !=nil {_f .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_adf );return _adf ;};_dbf =textResult {*_babb ,_ecge ,_cdd };_ebge ._dcc [_def .String ()]=_dbf ;};_fbe ._dbcg =_dcf .CTM ;if _dddgb {_f .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_fbe ._dbcg );};_ffb ._cga =append (_ffb ._cga ,_dbf ._dde ._cga ...);_ffb ._cacf =append (_ffb ._cacf ,_dbf ._dde ._cacf ...);_ffb ._adfe =append (_ffb ._adfe ,_dbf ._dde ._adfe ...);_affd ._cecf +=_dbf ._bed ;_affd ._defa +=_dbf ._bea ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_eed ._dgf .ColorspaceNonStroking =_dcf .ColorspaceNonStroking ;_eed ._dgf .ColorNonStroking =_dcf .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_eed ._dgf .ColorspaceStroking =_dcf .ColorspaceStroking ;_eed ._dgf .ColorStroking =_dcf .ColorStroking ;};return nil ;});_bba =_dge .Process (_bgd );return _ffb ,_affd ._cecf ,_affd ._defa ,_bba ;};func (_ade *stateStack )size ()int {return len (*_ade )};func (_egcg rulingList )isActualGrid ()(rulingList ,bool ){_afcgd ,_cagbf :=_egcg .augmentGrid ();if !(len (_afcgd )>=_debf +1&&len (_cagbf )>=_aeb +1){if _dgea {_f .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_afcgd ),len (_cagbf ),_debf +1,_aeb +1);};return nil ,false ;};if _dgea {_f .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_egcg ,len (_afcgd )>=2,len (_cagbf )>=2,len (_afcgd )>=2&&len (_cagbf )>=2);for _gagb ,_cbcd :=range _egcg {_gb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_gagb ,_cbcd );};};if _ddeb {_adcbe ,_ccga :=_afcgd [0],_afcgd [len (_afcgd )-1];_efdcf ,_dced :=_cagbf [0],_cagbf [len (_cagbf )-1];if !(_cdega (_adcbe ._ggaeg -_efdcf ._gbcgc )&&_cdega (_ccga ._ggaeg -_efdcf ._gbab )&&_cdega (_efdcf ._ggaeg -_adcbe ._gbab )&&_cdega (_dced ._ggaeg -_adcbe ._gbcgc )){if _dgea {_f .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_adcbe ,_ccga ,_efdcf ,_dced );};return nil ,false ;};}else {if !_afcgd .aligned (){if _cccf {_f .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_afcgd ));};return nil ,false ;};if !_cagbf .aligned (){if _dgea {_f .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_cagbf ));};return nil ,false ;};};_dabf :=append (_afcgd ,_cagbf ...);return _dabf ,true ;};func _bage (_cbcf int ,_eeaaf map[int ][]float64 )([]int ,int ){_fddge :=make ([]int ,_cbcf );_aeeb :=0;for _ceff :=0;_ceff < _cbcf ;_ceff ++{_fddge [_ceff ]=_aeeb ;_aeeb +=len (_eeaaf [_ceff ])+1;};return _fddge ,_aeeb ;};func (_dggd paraList )log (_debc string ){if !_feg {return ;};_f .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_debc ,len (_dggd ));for _agbd ,_dedc :=range _dggd {if _dedc ==nil {continue ;};_dffg :=_dedc .text ();_gcaag :="\u0020\u0020";if _dedc ._begg !=nil {_gcaag =_gb .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_dedc ._begg ._effgf ,_dedc ._begg ._cecff );};_gb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_agbd ,_dedc .PdfRectangle ,_gcaag ,_cgee (_dffg ,50));};};
|
||
|
||
// String returns a string describing `ma`.
|
||
func (_gbca TextMarkArray )String ()string {_afbe :=len (_gbca ._cgdg );if _afbe ==0{return "\u0045\u004d\u0050T\u0059";};_gffcd :=_gbca ._cgdg [0];_dfdf :=_gbca ._cgdg [_afbe -1];return _gb .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_afbe ,_gffcd ,_dfdf );};func (_bgc *stateStack )empty ()bool {return len (*_bgc )==0};type compositeCell struct{_ed .PdfRectangle ;paraList ;};type subpath struct{_cecb []_ab .Point ;_dfgc bool ;};func (_dfeag rulingList )bbox ()_ed .PdfRectangle {var _dbbd _ed .PdfRectangle ;if len (_dfeag )==0{_f .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073");return _ed .PdfRectangle {};};if _dfeag [0]._aeffb ==_eebcb {_dbbd .Llx ,_dbbd .Urx =_dfeag .secMinMax ();_dbbd .Lly ,_dbbd .Ury =_dfeag .primMinMax ();}else {_dbbd .Llx ,_dbbd .Urx =_dfeag .primMinMax ();_dbbd .Lly ,_dbbd .Ury =_dfeag .secMinMax ();};return _dbbd ;};type textObject struct{_geg *Extractor ;_dae *_ed .PdfPageResources ;_dgf _dc .GraphicsState ;_gdf *textState ;_gge *stateStack ;_decf _ab .Matrix ;_egdf _ab .Matrix ;_cfdg []*textMark ;_bdcd bool ;};func (_deec *wordBag )maxDepth ()float64 {return _deec ._cfe -_deec .Lly };func (_ffdf *wordBag )getDepthIdx (_faef float64 )int {_gddc :=_ffdf .depthIndexes ();_bdcbe :=_bfe (_faef );if _bdcbe < _gddc [0]{return _gddc [0];};if _bdcbe > _gddc [len (_gddc )-1]{return _gddc [len (_gddc )-1];};return _bdcbe ;};func (_abb *imageExtractContext )extractXObjectImage (_cbd *_ee .PdfObjectName ,_aec _dc .GraphicsState ,_fbc *_ed .PdfPageResources )error {_aga ,_ :=_fbc .GetXObjectByName (*_cbd );if _aga ==nil {return nil ;};_caa ,_gce :=_abb ._gba [_aga ];if !_gce {_bg ,_acd :=_fbc .GetXObjectImageByName (*_cbd );if _acd !=nil {return _acd ;};if _bg ==nil {return nil ;};_cbf ,_acd :=_bg .ToImage ();if _acd !=nil {return _acd ;};_caa =&cachedImage {_bff :_cbf ,_df :_bg .ColorSpace };_abb ._gba [_aga ]=_caa ;};_gab :=_caa ._bff ;_ebga :=_caa ._df ;_eg ,_fcbc :=_ebga .ImageToRGB (*_gab );if _fcbc !=nil {return _fcbc ;};_f .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_aec .CTM .String ());_cebf :=ImageMark {Image :&_eg ,Width :_aec .CTM .ScalingFactorX (),Height :_aec .CTM .ScalingFactorY (),Angle :_aec .CTM .Angle ()};_cebf .X ,_cebf .Y =_aec .CTM .Translation ();_abb ._fcb =append (_abb ._fcb ,_cebf );_abb ._ff ++;return nil ;};
|
||
|
||
// String returns a string describing `pt`.
|
||
func (_ddbd PageText )String ()string {_dgba :=_gb .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_ddbd ._cga ));_fce :=[]string {"\u002d"+_dgba };for _ ,_efgd :=range _ddbd ._cga {_fce =append (_fce ,_efgd .String ());};_fce =append (_fce ,"\u002b"+_dgba );return _bee .Join (_fce ,"\u000a");};func (_dabb rulingList )removeDuplicates ()rulingList {if len (_dabb )==0{return nil ;};_dabb .sort ();_bdce :=rulingList {_dabb [0]};for _ ,_ggba :=range _dabb [1:]{if _ggba .equals (_bdce [len (_bdce )-1]){continue ;};_bdce =append (_bdce ,_ggba );};return _bdce ;};func (_gbebe *subpath )makeRectRuling (_gdgb _af .Color )(*ruling ,bool ){if _deba {_f .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_gbebe );};_ebdba :=_gbebe ._cecb [:4];_bcef :=make (map[int ]rulingKind ,len (_ebdba ));for _cddfb ,_cagff :=range _ebdba {_eccf :=_gbebe ._cecb [(_cddfb +1)%4];_bcef [_cddfb ]=_ddabb (_cagff ,_eccf );if _deba {_gb .Printf ("\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066",_cddfb ,_bcef [_cddfb ],_cagff ,_eccf );};};if _deba {_gb .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_bcef );};var _bdff ,_gebe []int ;for _beeg ,_fcec :=range _bcef {switch _fcec {case _eebcb :_gebe =append (_gebe ,_beeg );case _caeda :_bdff =append (_bdff ,_beeg );};};if _deba {_gb .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_gebe ),_gebe );_gb .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_bdff ),_bdff );};_fgfa :=(len (_gebe )==2&&len (_bdff )==2)||(len (_gebe )==2&&len (_bdff )==0&&_ceae (_ebdba [_gebe [0]],_ebdba [_gebe [1]]))||(len (_bdff )==2&&len (_gebe )==0&&_aaec (_ebdba [_bdff [0]],_ebdba [_bdff [1]]));if _deba {_gb .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_gebe ),len (_bdff ),_fgfa );};if !_fgfa {if _deba {_f .Log .Error ("\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v",_gbebe );_gb .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_gebe ),len (_bdff ),_fgfa );};return &ruling {},false ;};if len (_bdff )==0{for _cdbfb ,_bdab :=range _bcef {if _bdab !=_eebcb {_bdff =append (_bdff ,_cdbfb );};};};if len (_gebe )==0{for _gggda ,_cdcd :=range _bcef {if _cdcd !=_caeda {_gebe =append (_gebe ,_gggda );};};};if _deba {_f .Log .Info ("\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_gebe ),len (_bdff ),len (_ebdba ),_gebe ,_bdff ,_ebdba );};var _dgacg ,_gfbb ,_beaf ,_gbce _ab .Point ;if _ebdba [_gebe [0]].Y > _ebdba [_gebe [1]].Y {_beaf ,_gbce =_ebdba [_gebe [0]],_ebdba [_gebe [1]];}else {_beaf ,_gbce =_ebdba [_gebe [1]],_ebdba [_gebe [0]];};if _ebdba [_bdff [0]].X > _ebdba [_bdff [1]].X {_dgacg ,_gfbb =_ebdba [_bdff [0]],_ebdba [_bdff [1]];}else {_dgacg ,_gfbb =_ebdba [_bdff [1]],_ebdba [_bdff [0]];};_aefa :=_ed .PdfRectangle {Llx :_dgacg .X ,Urx :_gfbb .X ,Lly :_gbce .Y ,Ury :_beaf .Y };if _aefa .Llx > _aefa .Urx {_aefa .Llx ,_aefa .Urx =_aefa .Urx ,_aefa .Llx ;};if _aefa .Lly > _aefa .Ury {_aefa .Lly ,_aefa .Ury =_aefa .Ury ,_aefa .Lly ;};_beefe :=rectRuling {PdfRectangle :_aefa ,_cebaa :_aagf (_aefa ),Color :_gdgb };if _beefe ._cebaa ==_eeae {if _deba {_f .Log .Error ("\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c");};return nil ,false ;};_eefa ,_ecae :=_beefe .asRuling ();if !_ecae {if _deba {_f .Log .Error ("\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg");};return nil ,false ;};if _dgea {_gb .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_eefa .String ());};return _eefa ,true ;};func (_eeaa *textTable )depth ()float64 {_begba :=1e10;for _beabc :=0;_beabc < _eeaa ._effgf ;_beabc ++{_adba :=_eeaa .get (_beabc ,0);if _adba ==nil ||_adba ._egdb {continue ;};_begba =_c .Min (_begba ,_adba .depth ());};return _begba ;};func _bad (_cca []*textMark ,_edfg _ed .PdfRectangle ,_gbeba rulingList ,_aecd []gridTiling )paraList {_f .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_cca ),_edfg );if len (_cca )==0{return nil ;};_ddfg :=_gbfcd (_cca ,_edfg );if len (_ddfg )==0{return nil ;};_gbeba .log ("\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065");_dgfe ,_feff :=_gbeba .vertsHorzs ();_dcgad :=_cfgb (_ddfg ,_edfg .Ury ,_dgfe ,_feff );_agca :=_dddb (_dcgad ,_edfg .Ury ,_dgfe ,_feff );_agca =_gefa (_agca );_feag :=make (paraList ,0,len (_agca ));for _ ,_acea :=range _agca {_fbcfc :=_acea .arrangeText ();if _fbcfc !=nil {_feag =append (_feag ,_fbcfc );};};if len (_feag )>=_agbc {_feag =_feag .extractTables (_aecd );};_feag .sortReadingOrder ();_feag .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _feag ;};func (_baea paraList )extractTables (_bfab []gridTiling )paraList {if _cece {_f .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_baea ));};if len (_baea )< _agbc {return _baea ;};_ggbb :=_baea .findTables (_bfab );if _cece {_f .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_ggbb ));for _gffce ,_egged :=range _ggbb {_egged .log (_gb .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_gffce ));};};return _baea .applyTables (_ggbb );};func (_cggbf *textTable )emptyColumn (_afcgc int )bool {for _dfed :=0;_dfed < _cggbf ._cecff ;_dfed ++{_cdbec :=_cggbf .get (_afcgc ,_dfed );if _cdbec !=nil &&_cdbec .text ()!=""{return false ;};};return true ;};func (_acb *imageExtractContext )extractFormImages (_fec *_ee .PdfObjectName ,_cbc _dc .GraphicsState ,_bgf *_ed .PdfPageResources )error {_bb ,_gcfg :=_bgf .GetXObjectFormByName (*_fec );if _gcfg !=nil {return _gcfg ;};if _bb ==nil {return nil ;};_age ,_gcfg :=_bb .GetContentStream ();if _gcfg !=nil {return _gcfg ;};_eec :=_bb .Resources ;if _eec ==nil {_eec =_bgf ;};_gcfg =_acb .extractContentStreamImages (string (_age ),_eec );if _gcfg !=nil {return _gcfg ;};_acb ._baa ++;return nil ;};func _cgee (_edccg string ,_bggab int )string {if len (_edccg )< _bggab {return _edccg ;};return _edccg [:_bggab ];};func (_aacge *textTable )get (_dcde ,_caad int )*textPara {return _aacge ._afdde [_bacb (_dcde ,_caad )]};func (_ecaae *wordBag )minDepth ()float64 {return _ecaae ._cfe -(_ecaae .Ury -_ecaae ._fcggb )};func (_ceca *textObject )reset (){_ceca ._decf =_ab .IdentityMatrix ();_ceca ._egdf =_ab .IdentityMatrix ();_ceca ._cfdg =nil ;};func (_ggcc paraList )findTables (_aegb []gridTiling )[]*textTable {_ggcc .addNeighbours ();_ae .Slice (_ggcc ,func (_ggaab ,_cadb int )bool {return _egge (_ggcc [_ggaab ],_ggcc [_cadb ])< 0});var _fdceg []*textTable ;if _fedg {_gaefg :=_ggcc .findGridTables (_aegb );_fdceg =append (_fdceg ,_gaefg ...);};if _bbcae {_cdga :=_ggcc .findTextTables ();_fdceg =append (_fdceg ,_cdga ...);};return _fdceg ;};func _fbef (_fea ,_fagd bounded )float64 {_ddbb :=_cagb (_fea ,_fagd );if !_afbcee (_ddbb ){return _ddbb ;};return _eegd (_fea ,_fagd );};
|
||
|
||
// String returns a human readable description of `vecs`.
|
||
func (_fbcd rulingList )String ()string {if len (_fbcd )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_fcca ,_bceg :=_fbcd .vertsHorzs ();_fgad :=len (_fcca );_aeace :=len (_bceg );if _fgad ==0||_aeace ==0{return _gb .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_fgad ,_aeace );};_eaeg :=_ed .PdfRectangle {Llx :_fcca [0]._ggaeg ,Urx :_fcca [_fgad -1]._ggaeg ,Lly :_bceg [_aeace -1]._ggaeg ,Ury :_bceg [0]._ggaeg };return _gb .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_fgad ,_aeace ,_eaeg );};func (_dafe *shapesState )moveTo (_fcef ,_efbg float64 ){_dafe ._ffbd =true ;_dafe ._fcbf =_dafe .devicePoint (_fcef ,_efbg );if _dddgb {_f .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_fcef ,_efbg ,_dafe ._fcbf );};};func (_ege *textObject )getCurrentFont ()*_ed .PdfFont {var _cgbd *_ed .PdfFont ;if !_ege ._gge .empty (){_cgbd =_ege ._gge .top ()._agcd ;};if _cgbd ==nil {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e");return _ed .DefaultFont ();};return _cgbd ;};func _cdeg (_ccgf ,_dbab _ed .PdfRectangle )bool {return _gfda (_ccgf ,_dbab )&&_bdag (_ccgf ,_dbab )};func _acfda (_daaa map[int ][]float64 ){if len (_daaa )<=1{return ;};_cfcf :=_gbef (_daaa );if _cece {_f .Log .Info ("\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076",_cfcf );};var _bfdg ,_dcbg int ;for _bfdg ,_dcbg =range _cfcf {if _daaa [_dcbg ]!=nil {break ;};};for _bafg ,_agbff :=range _cfcf [_bfdg :]{_agbgg :=_daaa [_agbff ];if _agbgg ==nil {continue ;};if _cece {_gb .Printf ("\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a",_bfdg +_bafg ,_dcbg ,_agbff );};_dggdg :=_daaa [_agbff ];if _dggdg [len (_dggdg )-1]> _agbgg [0]{_dggdg [len (_dggdg )-1]=_agbgg [0];_daaa [_dcbg ]=_dggdg ;};_dcbg =_agbff ;};};func (_eedc *textObject )setCharSpacing (_eafgd float64 ){if _eedc ==nil {return ;};_eedc ._gdf ._geca =_eafgd ;if _agbf {_f .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_eafgd ,_eedc ._gdf .String ());};};func _ddabb (_adaaga ,_aade _ab .Point )rulingKind {_gbdc :=_c .Abs (_adaaga .X -_aade .X );_gaded :=_c .Abs (_adaaga .Y -_aade .Y );return _gdadf (_gbdc ,_gaded ,_eacg );};func (_ceba *shapesState )lastpointEstablished ()(_ab .Point ,bool ){if _ceba ._ffbd {return _ceba ._fcbf ,false ;};_fbgb :=len (_ceba ._fbd );if _fbgb > 0&&_ceba ._fbd [_fbgb -1]._dfgc {return _ceba ._fbd [_fbgb -1].last (),false ;};return _ab .Point {},true ;};func _gfda (_gccg ,_gfb _ed .PdfRectangle )bool {return _gfb .Llx <=_gccg .Urx &&_gccg .Llx <=_gfb .Urx };
|
||
|
||
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
|
||
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
|
||
// `start` and `end` are offsets in the extracted text.
|
||
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
|
||
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
|
||
func (_dcab *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _dcab ==nil {return nil ,_be .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_gb .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );};_cdec :=len (_dcab ._cgdg );if _cdec ==0{return _dcab ,nil ;};if start < _dcab ._cgdg [0].Offset {start =_dcab ._cgdg [0].Offset ;};if end > _dcab ._cgdg [_cdec -1].Offset +1{end =_dcab ._cgdg [_cdec -1].Offset +1;};_cdc :=_ae .Search (_cdec ,func (_dcb int )bool {return _dcab ._cgdg [_dcb ].Offset +len (_dcab ._cgdg [_dcb ].Text )-1>=start });if !(0<=_cdc &&_cdc < _cdec ){_add :=_gb .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_cdc ,_cdec ,_dcab ._cgdg [0],_dcab ._cgdg [_cdec -1]);return nil ,_add ;};_gcfab :=_ae .Search (_cdec ,func (_daf int )bool {return _dcab ._cgdg [_daf ].Offset > end -1});if !(0<=_gcfab &&_gcfab < _cdec ){_bdcgg :=_gb .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_gcfab ,_cdec ,_dcab ._cgdg [0],_dcab ._cgdg [_cdec -1]);return nil ,_bdcgg ;};if _gcfab <=_cdc {return nil ,_gb .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_cdc ,_gcfab );};return &TextMarkArray {_cgdg :_dcab ._cgdg [_cdc :_gcfab ]},nil ;};func _daea (_bddgg *wordBag ,_cbga int )*textLine {_egbd :=_bddgg .firstWord (_cbga );_ebda :=textLine {PdfRectangle :_egbd .PdfRectangle ,_dag :_egbd ._cbdb ,_cfbc :_egbd ._eabdg };_ebda .pullWord (_bddgg ,_egbd ,_cbga );return &_ebda ;};
|
||
|
||
// Text returns the extracted page text.
|
||
func (_abfd PageText )Text ()string {return _abfd ._eda };type rulingKind int ;func _cdce (_bfef []rulingList )(rulingList ,rulingList ){var _gaab rulingList ;for _ ,_gdeg :=range _bfef {_gaab =append (_gaab ,_gdeg ...);};return _gaab .vertsHorzs ();};func (_dagd rulingList )toGrids ()[]rulingList {if _dgea {_f .Log .Info ("t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073",_dagd );};_faabc :=_dagd .intersections ();if _dgea {_f .Log .Info ("\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020",len (_dagd ),len (_faabc ));for _ ,_dacb :=range _aaeee (_faabc ){_gb .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_dacb ,_faabc [_dacb ]);};};_dgcfd :=make (map[int ]intSet ,len (_dagd ));for _abgd :=range _dagd {_ffbg :=_dagd .connections (_faabc ,_abgd );if len (_ffbg )> 0{_dgcfd [_abgd ]=_ffbg ;};};if _dgea {_f .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064",len (_dgcfd ));for _ ,_bfad :=range _aaeee (_dgcfd ){_gb .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_bfad ,_dgcfd [_bfad ]);};};_ffed :=_bgaf (len (_dagd ),func (_eede ,_cbccc int )bool {_bcga ,_gdacd :=len (_dgcfd [_eede ]),len (_dgcfd [_cbccc ]);if _bcga !=_gdacd {return _bcga > _gdacd ;};return _dagd .comp (_eede ,_cbccc );});if _dgea {_f .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_ffed );};_dgcd :=[][]int {{_ffed [0]}};_dgef :for _ ,_acaca :=range _ffed [1:]{for _eecf ,_ceaf :=range _dgcd {for _ ,_fecc :=range _ceaf {if _dgcfd [_fecc ].has (_acaca ){_dgcd [_eecf ]=append (_ceaf ,_acaca );continue _dgef ;};};};_dgcd =append (_dgcd ,[]int {_acaca });};if _dgea {_f .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_dgcd );};_ae .SliceStable (_dgcd ,func (_cced ,_feec int )bool {return len (_dgcd [_cced ])> len (_dgcd [_feec ])});for _ ,_deea :=range _dgcd {_ae .Slice (_deea ,func (_egga ,_cddb int )bool {return _dagd .comp (_deea [_egga ],_deea [_cddb ])});};_adfd :=make ([]rulingList ,len (_dgcd ));for _gbcf ,_gacf :=range _dgcd {_gadd :=make (rulingList ,len (_gacf ));for _afbc ,_gcaad :=range _gacf {_gadd [_afbc ]=_dagd [_gcaad ];};_adfd [_gbcf ]=_gadd ;};if _dgea {_f .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076",_adfd );};var _aeeg []rulingList ;for _ ,_abcfe :=range _adfd {if _bfca ,_aadgd :=_abcfe .isActualGrid ();_aadgd {_abcfe =_bfca ;_abcfe =_abcfe .snapToGroups ();_aeeg =append (_aeeg ,_abcfe );};};if _dgea {_ddgb ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073",_aeeg );_f .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_adfd ),len (_aeeg ));};return _aeeg ;};func (_gcdb rulingList )tidied (_ebdc string )rulingList {_adgg :=_gcdb .removeDuplicates ();_adgg .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");_egadg :=_adgg .snapToGroups ();if _egadg ==nil {return nil ;};_egadg .sort ();if _dgea {_f .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_ebdc ,len (_gcdb ),len (_adgg ),len (_egadg ));};_egadg .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _egadg ;};func (_bgfd *wordBag )highestWord (_eaac int ,_bbdf ,_ffgd float64 )*textWord {for _ ,_bebaa :=range _bgfd ._adee [_eaac ]{if _bbdf <=_bebaa ._eabdg &&_bebaa ._eabdg <=_ffgd {return _bebaa ;};};return nil ;};type intSet map[int ]struct{};func (_dcg *textObject )moveText (_dfe ,_deb float64 ){_dcg .moveLP (_dfe ,_deb )};
|
||
|
||
// String returns a human readable description of `ss`.
|
||
func (_bbf *shapesState )String ()string {return _gb .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_bbf ._fbd ),_bbf ._ffbd );};func (_cfcg *wordBag )applyRemovals (_ggae map[int ]map[*textWord ]struct{}){for _ddfc ,_cgbb :=range _ggae {if len (_cgbb )==0{continue ;};_efaa :=_cfcg ._adee [_ddfc ];_baffa :=len (_efaa )-len (_cgbb );if _baffa ==0{delete (_cfcg ._adee ,_ddfc );continue ;};_acbb :=make ([]*textWord ,_baffa );_bebd :=0;for _ ,_fafd :=range _efaa {if _ ,_cgce :=_cgbb [_fafd ];!_cgce {_acbb [_bebd ]=_fafd ;_bebd ++;};};_cfcg ._adee [_ddfc ]=_acbb ;};};func (_dcccc rulingList )sort (){_ae .Slice (_dcccc ,_dcccc .comp )};func (_gdae *textObject )moveLP (_ccb ,_cfb float64 ){_gdae ._egdf .Concat (_ab .NewMatrix (1,0,0,1,_ccb ,_cfb ));_gdae ._decf =_gdae ._egdf ;};func _dddb (_fcbcg *wordBag ,_cebdb float64 ,_aac ,_ebecd rulingList )[]*wordBag {var _dfaa []*wordBag ;for _ ,_cbdf :=range _fcbcg .depthIndexes (){_bggf :=false ;for !_fcbcg .empty (_cbdf ){_dadc :=_fcbcg .firstReadingIndex (_cbdf );_cagf :=_fcbcg .firstWord (_dadc );_dcaff :=_eeee (_cagf ,_cebdb ,_aac ,_ebecd );_fcbcg .removeWord (_cagf ,_dadc );if _cgac {_f .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_cagf .String ());};for _ggf :=true ;_ggf ;_ggf =_bggf {_bggf =false ;_babba :=_cdf *_dcaff ._fcggb ;_aafgc :=_egddg *_dcaff ._fcggb ;_fffa :=_gdgg *_dcaff ._fcggb ;if _cgac {_f .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_dcaff .minDepth (),_dcaff .maxDepth (),_fffa ,_aafgc );};if _fcbcg .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_dcaff ,_gfed (_abde ,0),_dcaff .minDepth ()-_fffa ,_dcaff .maxDepth ()+_fffa ,_agee ,false ,false )> 0{_bggf =true ;};if _fcbcg .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_dcaff ,_gfed (_abde ,_aafgc ),_dcaff .minDepth (),_dcaff .maxDepth (),_ebf ,false ,false )> 0{_bggf =true ;};if _bggf {continue ;};_dbde :=_fcbcg .scanBand ("",_dcaff ,_gfed (_abcc ,_babba ),_dcaff .minDepth (),_dcaff .maxDepth (),_egad ,true ,false );if _dbde > 0{_eafa :=(_dcaff .maxDepth ()-_dcaff .minDepth ())/_dcaff ._fcggb ;if (_dbde > 1&&float64 (_dbde )> 0.3*_eafa )||_dbde <=10{if _fcbcg .scanBand ("\u006f\u0074\u0068e\u0072",_dcaff ,_gfed (_abcc ,_babba ),_dcaff .minDepth (),_dcaff .maxDepth (),_egad ,false ,true )> 0{_bggf =true ;};};};};_dfaa =append (_dfaa ,_dcaff );};};return _dfaa ;};func (_ffffe gridTiling )log (_geed string ){if !_bcbb {return ;};_f .Log .Info ("\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071",len (_ffffe ._abfe ),len (_ffffe ._gabed ),_geed );_gb .Printf ("\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a",_ffffe ._abfe );_gb .Printf ("\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a",_ffffe ._gabed );for _bdgba ,_gaag :=range _ffffe ._gabed {_aecbec ,_cacaa :=_ffffe ._eege [_gaag ];if !_cacaa {continue ;};_gb .Printf ("%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_bdgba ,_gaag );for _aaega ,_dbe :=range _ffffe ._abfe {_debb ,_dececa :=_aecbec [_dbe ];if !_dececa {continue ;};_gb .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_aaega ,_debb .String ());};};};func (_bbaae *textWord )bbox ()_ed .PdfRectangle {return _bbaae .PdfRectangle };type textState struct{_geca float64 ;_eefb float64 ;_eggc float64 ;_feb float64 ;_ggge float64 ;_dbfc RenderMode ;_bfdf float64 ;_agcd *_ed .PdfFont ;_cfd _ed .PdfRectangle ;_cecf int ;_defa int ;};func (_aebg gridTiling )complete ()bool {for _ ,_dbaf :=range _aebg ._eege {for _ ,_efgc :=range _dbaf {if !_efgc .complete (){return false ;};};};return true ;};const (RenderModeStroke RenderMode =1<<iota ;RenderModeFill ;RenderModeClip ;);
|
||
|
||
// Tables returns the tables extracted from the page.
|
||
func (_bfbb PageText )Tables ()[]TextTable {if _cece {_f .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_bfbb ._bag ));};return _bfbb ._bag ;};func (_cad *textMark )inDiacriticArea (_dbdg *textMark )bool {_ddee :=_cad .Llx -_dbdg .Llx ;_fega :=_cad .Urx -_dbdg .Urx ;_cbff :=_cad .Lly -_dbdg .Lly ;return _c .Abs (_ddee +_fega )< _cad .Width ()*_eccc &&_c .Abs (_cbff )< _cad .Height ()*_eccc ;};func (_efff *textPara )isAtom ()*textTable {_dbce :=_efff ;_cega :=_efff ._bdb ;_bgad :=_efff ._bbccb ;if !(_cega !=nil &&!_cega ._febd &&_bgad !=nil &&!_bgad ._febd ){return nil ;};_efbc :=_cega ._bbccb ;if !(_efbc !=nil &&!_efbc ._febd &&_efbc ==_bgad ._bdb ){return nil ;};return _ceebd (_dbce ,_cega ,_bgad ,_efbc );};type imageExtractContext struct{_fcb []ImageMark ;_aef int ;_ff int ;_baa int ;_gba map[*_ee .PdfObjectStream ]*cachedImage ;_fd *ImageExtractOptions ;};func _cagdb (_dbdgg []TextMark ,_eebcf *int ,_efdda string )[]TextMark {_fbge :=_afd ;_fbge .Text =_efdda ;return _cfee (_dbdgg ,_eebcf ,_fbge );};func _gbfcd (_fbgbd []*textMark ,_fgedd _ed .PdfRectangle )[]*textWord {var _gbbge []*textWord ;var _bcae *textWord ;if _babbf {_f .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_fbgbd ));};_gffb :=func (){if _bcae !=nil {_fafa :=_bcae .computeText ();if !_acef (_fafa ){_bcae ._feecg =_fafa ;_gbbge =append (_gbbge ,_bcae );if _babbf {_f .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_gbbge )-1,_bcae .String ());for _ggeg ,_afgcb :=range _bcae ._feedb {_gb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ggeg ,_afgcb .String ());};};};_bcae =nil ;};};for _ ,_aege :=range _fbgbd {if _cbe &&_bcae !=nil &&len (_bcae ._feedb )> 0{_ecfag :=_bcae ._feedb [len (_bcae ._feedb )-1];_bbec ,_fceae :=_daga (_aege ._bcdc );_eade ,_decfc :=_daga (_ecfag ._bcdc );if _fceae &&!_decfc &&_ecfag .inDiacriticArea (_aege ){_bcae .addDiacritic (_bbec );continue ;};if _decfc &&!_fceae &&_aege .inDiacriticArea (_ecfag ){_bcae ._feedb =_bcae ._feedb [:len (_bcae ._feedb )-1];_bcae .appendMark (_aege ,_fgedd );_bcae .addDiacritic (_eade );continue ;};};_cadbf :=_acef (_aege ._bcdc );if _cadbf {_gffb ();continue ;};if _bcae ==nil &&!_cadbf {_bcae =_fdeb ([]*textMark {_aege },_fgedd );continue ;};_faec :=_bcae ._cbdb ;_aagfc :=_c .Abs (_adcc (_fgedd ,_aege )-_bcae ._eabdg )/_faec ;_bebdc :=_fbbd (_aege ,_bcae )/_faec ;if _bebdc >=_ggaf ||!(-_eag <=_bebdc &&_aagfc <=_aeff ){_gffb ();_bcae =_fdeb ([]*textMark {_aege },_fgedd );continue ;};_bcae .appendMark (_aege ,_fgedd );};_gffb ();return _gbbge ;};
|
||
|
||
// ImageExtractOptions contains options for controlling image extraction from
|
||
// PDF pages.
|
||
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};var (_egfe =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};);func (_bgea *shapesState )clearPath (){_bgea ._fbd =nil ;_bgea ._ffbd =false ;if _dddgb {_f .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_bgea );};};func (_fabbf *textTable )getDown ()paraList {_cede :=make (paraList ,_fabbf ._effgf );for _cedef :=0;_cedef < _fabbf ._effgf ;_cedef ++{_fcda :=_fabbf .get (_cedef ,_fabbf ._cecff -1)._bbccb ;if _fcda ==nil ||_fcda ._febd {return nil ;};_cede [_cedef ]=_fcda ;};for _gdge :=0;_gdge < _fabbf ._effgf -1;_gdge ++{if _cede [_gdge ]._bdb !=_cede [_gdge +1]{return nil ;};};return _cede ;};func (_cbeb paraList )topoOrder ()[]int {if _feg {_f .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");};_gggc :=len (_cbeb );_cdfb :=make ([]bool ,_gggc );_feca :=make ([]int ,0,_gggc );_geaf :=_cbeb .llyOrdering ();var _faab func (_eabf int );_faab =func (_ecbg int ){_cdfb [_ecbg ]=true ;for _aeac :=0;_aeac < _gggc ;_aeac ++{if !_cdfb [_aeac ]{if _cbeb .readBefore (_geaf ,_ecbg ,_aeac ){_faab (_aeac );};};};_feca =append (_feca ,_ecbg );};for _cdaa :=0;_cdaa < _gggc ;_cdaa ++{if !_cdfb [_cdaa ]{_faab (_cdaa );};};return _eced (_feca );};func (_abec *textWord )absorb (_ecedd *textWord ){_abec .PdfRectangle =_afeae (_abec .PdfRectangle ,_ecedd .PdfRectangle );_abec ._feedb =append (_abec ._feedb ,_ecedd ._feedb ...);};func _ebb (_eeff _ed .PdfRectangle )textState {return textState {_eggc :100,_dbfc :RenderModeFill ,_cfd :_eeff };};func (_eba *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_ffad :=make (map[int ]map[*textWord ]struct{},len (_eba ._adee ));for _efdea :=range _eba ._adee {_ffad [_efdea ]=make (map[*textWord ]struct{});};return _ffad ;};func (_dcede intSet )del (_efbcg int ){delete (_dcede ,_efbcg )};func (_bedf *wordBag )scanBand (_abg string ,_cdgb *wordBag ,_cebe func (_dcbf *wordBag ,_cfeg *textWord )bool ,_bga ,_eafgf ,_acf float64 ,_adge ,_dbaa bool )int {_gcdf :=_cdgb ._fcggb ;var _efdd map[int ]map[*textWord ]struct{};if !_adge {_efdd =_bedf .makeRemovals ();};_cdcb :=_abcb *_gcdf ;_fged :=0;for _ ,_gfegg :=range _bedf .depthBand (_bga -_cdcb ,_eafgf +_cdcb ){if len (_bedf ._adee [_gfegg ])==0{continue ;};for _ ,_ebec :=range _bedf ._adee [_gfegg ]{if !(_bga -_cdcb <=_ebec ._eabdg &&_ebec ._eabdg <=_eafgf +_cdcb ){continue ;};if !_cebe (_cdgb ,_ebec ){continue ;};_ecb :=2.0*_c .Abs (_ebec ._cbdb -_cdgb ._fcggb )/(_ebec ._cbdb +_cdgb ._fcggb );_afea :=_c .Max (_ebec ._cbdb /_cdgb ._fcggb ,_cdgb ._fcggb /_ebec ._cbdb );_bbcc :=_c .Min (_ecb ,_afea );if _acf > 0&&_bbcc > _acf {continue ;};if _cdgb .blocked (_ebec ){continue ;};if !_adge {_cdgb .pullWord (_ebec ,_gfegg ,_efdd );};_fged ++;if !_dbaa {if _ebec ._eabdg < _bga {_bga =_ebec ._eabdg ;};if _ebec ._eabdg > _eafgf {_eafgf =_ebec ._eabdg ;};};if _adge {break ;};};};if !_adge {_bedf .applyRemovals (_efdd );};return _fged ;};func _abde (_gcef *wordBag ,_gbbe *textWord ,_adbe float64 )bool {return _gbbe .Llx < _gcef .Urx +_adbe &&_gcef .Llx -_adbe < _gbbe .Urx ;};func (_fda *textObject )moveTextSetLeading (_cdg ,_cdde float64 ){_fda ._gdf ._feb =-_cdde ;_fda .moveLP (_cdg ,_cdde );};func _ggbf (_fgbd map[int ][]float64 )string {_abffd :=_gbef (_fgbd );_ddef :=make ([]string ,len (_fgbd ));for _afbce ,_aeaab :=range _abffd {_ddef [_afbce ]=_gb .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_aeaab ,_fgbd [_aeaab ]);};return _gb .Sprintf ("\u007b\u0025\u0073\u007d",_bee .Join (_ddef ,"\u002c\u0020"));};func (_ggbed *textTable )compositeColCorridors ()map[int ][]float64 {_egfa :=make (map[int ][]float64 ,_ggbed ._effgf );if _cece {_f .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_ggbed ._effgf );};for _ebdbc :=0;_ebdbc < _ggbed ._effgf ;_ebdbc ++{_egfa [_ebdbc ]=nil ;};return _egfa ;};func _aad (_fdc *_dc .ContentStreamOperation )(float64 ,error ){if len (_fdc .Params )!=1{_cfa :=_be .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_fdc .Operand ,1,len (_fdc .Params ),_fdc .Params );return 0.0,_cfa ;};return _ee .GetNumberAsFloat (_fdc .Params [0]);};func (_ggde rectRuling )asRuling ()(*ruling ,bool ){_cgae :=ruling {_aeffb :_ggde ._cebaa ,Color :_ggde .Color ,_caba :_gafbf };switch _ggde ._cebaa {case _caeda :_cgae ._ggaeg =0.5*(_ggde .Llx +_ggde .Urx );_cgae ._gbcgc =_ggde .Lly ;_cgae ._gbab =_ggde .Ury ;_afafa ,_adef :=_ggde .checkWidth (_ggde .Llx ,_ggde .Urx );if !_adef {if _deba {_f .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_ggde );};return nil ,false ;};_cgae ._gcfgb =_afafa ;case _eebcb :_cgae ._ggaeg =0.5*(_ggde .Lly +_ggde .Ury );_cgae ._gbcgc =_ggde .Llx ;_cgae ._gbab =_ggde .Urx ;_feeb ,_gefb :=_ggde .checkWidth (_ggde .Lly ,_ggde .Ury );if !_gefb {if _deba {_f .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_ggde );};return nil ,false ;};_cgae ._gcfgb =_feeb ;default:_f .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_ggde ._cebaa );return nil ,false ;};return &_cgae ,true ;};
|
||
|
||
// String returns a string describing `tm`.
|
||
func (_ace TextMark )String ()string {_baaef :=_ace .BBox ;var _deac string ;if _ace .Font !=nil {_deac =_ace .Font .String ();if len (_deac )> 50{_deac =_deac [:50]+"\u002e\u002e\u002e";};};var _ecca string ;if _ace .Meta {_ecca ="\u0020\u002a\u004d\u002a";};return _gb .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_ace .Offset ,_ace .Text ,[]rune (_ace .Text ),_baaef .Llx ,_baaef .Lly ,_baaef .Urx ,_baaef .Ury ,_deac ,_ecca );};func (_cedcd paraList )xNeighbours (_ffged float64 )map[*textPara ][]int {_ffgdc :=make ([]event ,2*len (_cedcd ));if _ffged ==0{for _gdbf ,_gfcda :=range _cedcd {_ffgdc [2*_gdbf ]=event {_gfcda .Llx ,true ,_gdbf };_ffgdc [2*_gdbf +1]=event {_gfcda .Urx ,false ,_gdbf };};}else {for _cgbe ,_badd :=range _cedcd {_ffgdc [2*_cgbe ]=event {_badd .Llx -_ffged *_badd .fontsize (),true ,_cgbe };_ffgdc [2*_cgbe +1]=event {_badd .Urx +_ffged *_badd .fontsize (),false ,_cgbe };};};return _cedcd .eventNeighbours (_ffgdc );};func _fbfa (_dbca _ab .Point )*subpath {return &subpath {_cecb :[]_ab .Point {_dbca }}};func (_bae *wordBag )depthRange (_afgda ,_acbbd int )[]int {var _gdad []int ;for _afaf :=range _bae ._adee {if _afgda <=_afaf &&_afaf <=_acbbd {_gdad =append (_gdad ,_afaf );};};if len (_gdad )==0{return nil ;};_ae .Ints (_gdad );return _gdad ;};func (_egec *textLine )text ()string {var _gccb []string ;for _ ,_geba :=range _egec ._gad {if _geba ._cdfbc {_gccb =append (_gccb ,"\u0020");};_gccb =append (_gccb ,_geba ._feecg );};return _bee .Join (_gccb ,"");};type textPara struct{_ed .PdfRectangle ;_eaba _ed .PdfRectangle ;_egddd []*textLine ;_begg *textTable ;_febd bool ;_egdb bool ;_begb *textPara ;_bdb *textPara ;_ebecb *textPara ;_bbccb *textPara ;};func _geee (_efgdg []float64 ,_dcfcc ,_gbacae float64 )[]float64 {_agcg ,_bcgc :=_dcfcc ,_gbacae ;if _bcgc < _agcg {_agcg ,_bcgc =_bcgc ,_agcg ;};_eefbd :=make ([]float64 ,0,len (_efgdg )+2);_eefbd =append (_eefbd ,_dcfcc );for _ ,_feda :=range _efgdg {if _feda <=_agcg {continue ;}else if _feda >=_bcgc {break ;};_eefbd =append (_eefbd ,_feda );};_eefbd =append (_eefbd ,_gbacae );return _eefbd ;};
|
||
|
||
// String returns a description of `t`.
|
||
func (_cfaf *textTable )String ()string {return _gb .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_cfaf ._effgf ,_cfaf ._cecff ,_cfaf ._gcedd );};
|
||
|
||
// String returns a human readable description of `path`.
|
||
func (_aab *subpath )String ()string {_dccc :=_aab ._cecb ;_ceac :=len (_dccc );if _ceac <=5{return _gb .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_ceac ,_dccc );};return _gb .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_ceac ,_dccc [0],_dccc [1],_dccc [_ceac -1]);};func _aagf (_cebea _ed .PdfRectangle )rulingKind {_ggfe :=_cebea .Width ();_efad :=_cebea .Height ();if _ggfe > _efad {if _ggfe >=_agdg {return _eebcb ;};}else {if _efad >=_agdg {return _caeda ;};};return _eeae ;};func _bfe (_fcac float64 )int {var _cab int ;if _fcac >=0{_cab =int (_fcac /_ecda );}else {_cab =int (_fcac /_ecda )-1;};return _cab ;};func (_bebg rulingList )primMinMax ()(float64 ,float64 ){_ebff ,_cceg :=_bebg [0]._ggaeg ,_bebg [0]._ggaeg ;for _ ,_afbf :=range _bebg [1:]{if _afbf ._ggaeg < _ebff {_ebff =_afbf ._ggaeg ;}else if _afbf ._ggaeg > _cceg {_cceg =_afbf ._ggaeg ;};};return _ebff ,_cceg ;};func (_fbad *textTable )growTable (){_abbd :=func (_cdcgc paraList ){_fbad ._cecff ++;for _aacd :=0;_aacd < _fbad ._effgf ;_aacd ++{_dfcbc :=_cdcgc [_aacd ];_fbad .put (_aacd ,_fbad ._cecff -1,_dfcbc );};};_beda :=func (_agagf paraList ){_fbad ._effgf ++;for _gdfc :=0;_gdfc < _fbad ._cecff ;_gdfc ++{_dgcg :=_agagf [_gdfc ];_fbad .put (_fbad ._effgf -1,_gdfc ,_dgcg );};};if _aedb {_fbad .log ("\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce");};for _gcba :=0;;_gcba ++{_gggb :=false ;_deeee :=_fbad .getDown ();_agfb :=_fbad .getRight ();if _aedb {_gb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gcba ,_fbad );_gb .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a",_deeee );_gb .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a",_agfb );};if _deeee !=nil &&_agfb !=nil {_fbec :=_deeee [len (_deeee )-1];if _fbec !=nil &&!_fbec ._febd &&_fbec ==_agfb [len (_agfb )-1]{_abbd (_deeee );if _agfb =_fbad .getRight ();_agfb !=nil {_beda (_agfb );_fbad .put (_fbad ._effgf -1,_fbad ._cecff -1,_fbec );};_gggb =true ;};};if !_gggb &&_deeee !=nil {_abbd (_deeee );_gggb =true ;};if !_gggb &&_agfb !=nil {_beda (_agfb );_gggb =true ;};if !_gggb {break ;};};};const _bcc =1.0/1000.0;const (_agcdd =false ;_babbf =false ;_agdb =false ;_ebbf =false ;_dddgb =false ;_agbf =false ;_cgac =false ;_feg =false ;_dcfd =false ;_dbd =_dcfd &&true ;_efdc =_dbd &&false ;_eaaa =_dcfd &&true ;_cece =false ;_aedb =_cece &&false ;_dfdd =_cece &&true ;_dgea =false ;_ebgf =_dgea &&false ;_cccf =_dgea &&false ;_bcbb =_dgea &&true ;_deba =_dgea &&false ;_fade =_dgea &&false ;);func _cefa (_ccgg ,_bffaf float64 )bool {return _ccgg /_c .Max (_ecefc ,_bffaf )< _eacg };type lineRuling struct{_cdgc rulingKind ;_efddc markKind ;_af .Color ;_cadf ,_abeeg _ab .Point ;};
|
||
|
||
// Elements returns the TextMarks in `ma`.
|
||
func (_afg *TextMarkArray )Elements ()[]TextMark {return _afg ._cgdg };func (_edae *wordBag )firstReadingIndex (_aecbe int )int {_afc :=_edae .firstWord (_aecbe )._cbdb ;_agdf :=float64 (_aecbe +1)*_ecda ;_bbaac :=_agdf +_cfcb *_afc ;_bgb :=_aecbe ;for _ ,_ggd :=range _edae .depthBand (_agdf ,_bbaac ){if _eegd (_edae .firstWord (_ggd ),_edae .firstWord (_bgb ))< 0{_bgb =_ggd ;};};return _bgb ;};func (_ccfd lineRuling )asRuling ()(*ruling ,bool ){_afdd :=ruling {_aeffb :_ccfd ._cdgc ,Color :_ccfd .Color ,_caba :_gdac };switch _ccfd ._cdgc {case _caeda :_afdd ._ggaeg =_ccfd .xMean ();_afdd ._gbcgc =_c .Min (_ccfd ._cadf .Y ,_ccfd ._abeeg .Y );_afdd ._gbab =_c .Max (_ccfd ._cadf .Y ,_ccfd ._abeeg .Y );case _eebcb :_afdd ._ggaeg =_ccfd .yMean ();_afdd ._gbcgc =_c .Min (_ccfd ._cadf .X ,_ccfd ._abeeg .X );_afdd ._gbab =_c .Max (_ccfd ._cadf .X ,_ccfd ._abeeg .X );default:_f .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_ccfd ._cdgc );return nil ,false ;};return &_afdd ,true ;};func (_bca paraList )computeEBBoxes (){if _agcdd {_f .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a");};for _ ,_affe :=range _bca {_affe ._eaba =_affe .PdfRectangle ;};_dfff :=_bca .yNeighbours (0);for _bfcc ,_gcbfd :=range _bca {_gcfb :=_gcbfd ._eaba ;_dgcb ,_afbd :=-1.0e9,+1.0e9;for _ ,_fgbg :=range _dfff [_gcbfd ]{_cbdd :=_bca [_fgbg ]._eaba ;if _cbdd .Urx < _gcfb .Llx {_dgcb =_c .Max (_dgcb ,_cbdd .Urx );}else if _gcfb .Urx < _cbdd .Llx {_afbd =_c .Min (_afbd ,_cbdd .Llx );};};for _gae ,_abgf :=range _bca {_ddc :=_abgf ._eaba ;if _bfcc ==_gae ||_ddc .Ury > _gcfb .Lly {continue ;};if _dgcb <=_ddc .Llx &&_ddc .Llx < _gcfb .Llx {_gcfb .Llx =_ddc .Llx ;}else if _ddc .Urx <=_afbd &&_gcfb .Urx < _ddc .Urx {_gcfb .Urx =_ddc .Urx ;};};if _agcdd {_gb .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_bfcc ,_gcbfd ._eaba ,_gcfb ,_cgee (_gcbfd .text (),50));};_gcbfd ._eaba =_gcfb ;};if _agce {for _ ,_aadg :=range _bca {_aadg .PdfRectangle =_aadg ._eaba ;};};};const (_beae =true ;_afad =true ;_cbe =true ;_agce =false ;_bffce =false ;_dcd =6;_effg =3.0;_gdag =200;_fedg =true ;_bbcae =true ;_aeab =true ;_ffdg =true ;_ddeb =false ;);func (_fbg *shapesState )newSubPath (){_fbg .clearPath ();if _dddgb {_f .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_fbg );};};func (_feea paraList )yNeighbours (_edbc float64 )map[*textPara ][]int {_cfea :=make ([]event ,2*len (_feea ));if _edbc ==0{for _fafc ,_afcebf :=range _feea {_cfea [2*_fafc ]=event {_afcebf .Lly ,true ,_fafc };_cfea [2*_fafc +1]=event {_afcebf .Ury ,false ,_fafc };};}else {for _fcbd ,_bfaf :=range _feea {_cfea [2*_fcbd ]=event {_bfaf .Lly -_edbc *_bfaf .fontsize (),true ,_fcbd };_cfea [2*_fcbd +1]=event {_bfaf .Ury +_edbc *_bfaf .fontsize (),false ,_fcbd };};};return _feea .eventNeighbours (_cfea );};func (_aeca *wordBag )depthBand (_cgdd ,_dab float64 )[]int {if len (_aeca ._adee )==0{return nil ;};return _aeca .depthRange (_aeca .getDepthIdx (_cgdd ),_aeca .getDepthIdx (_dab ));};func _cagb (_edaea ,_abee bounded )float64 {return _gbf (_edaea )-_gbf (_abee )};func (_bec *shapesState )drawRectangle (_fba ,_fdfd ,_cgbg ,_dcfe float64 ){if _dddgb {_cba :=_bec .devicePoint (_fba ,_fdfd );_egcaa :=_bec .devicePoint (_fba +_cgbg ,_fdfd +_dcfe );_bef :=_ed .PdfRectangle {Llx :_cba .X ,Lly :_cba .Y ,Urx :_egcaa .X ,Ury :_egcaa .Y };_f .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_bef );};_bec .newSubPath ();_bec .moveTo (_fba ,_fdfd );_bec .lineTo (_fba +_cgbg ,_fdfd );_bec .lineTo (_fba +_cgbg ,_fdfd +_dcfe );_bec .lineTo (_fba ,_fdfd +_dcfe );_bec .closePath ();};func _fdeb (_aeadg []*textMark ,_bgcag _ed .PdfRectangle )*textWord {_afeda :=_aeadg [0].PdfRectangle ;_bfdb :=_aeadg [0]._defd ;for _ ,_fbbb :=range _aeadg [1:]{_afeda =_afeae (_afeda ,_fbbb .PdfRectangle );if _fbbb ._defd > _bfdb {_bfdb =_fbbb ._defd ;};};return &textWord {PdfRectangle :_afeda ,_feedb :_aeadg ,_eabdg :_bgcag .Ury -_afeda .Lly ,_cbdb :_bfdb };};func (_fcgg pathSection )bbox ()_ed .PdfRectangle {_bcdg :=_fcgg ._fbf [0]._cecb [0];_baff :=_ed .PdfRectangle {Llx :_bcdg .X ,Urx :_bcdg .X ,Lly :_bcdg .Y ,Ury :_bcdg .Y };_gcce :=func (_deef _ab .Point ){if _deef .X < _baff .Llx {_baff .Llx =_deef .X ;}else if _deef .X > _baff .Urx {_baff .Urx =_deef .X ;};if _deef .Y < _baff .Lly {_baff .Lly =_deef .Y ;}else if _deef .Y > _baff .Ury {_baff .Ury =_deef .Y ;};};for _ ,_ged :=range _fcgg ._fbf [0]._cecb [1:]{_gcce (_ged );};for _ ,_baca :=range _fcgg ._fbf [1:]{for _ ,_ddf :=range _baca ._cecb {_gcce (_ddf );};};return _baff ;};const (_eeae rulingKind =iota ;_eebcb ;_caeda ;);func _dfaf (_gebc string )bool {if _bc .RuneCountInString (_gebc )< _agde {return false ;};_bacc ,_fbca :=_bc .DecodeLastRuneInString (_gebc );if _fbca <=0||!_gf .Is (_gf .Hyphen ,_bacc ){return false ;};_bacc ,_fbca =_bc .DecodeLastRuneInString (_gebc [:len (_gebc )-_fbca ]);return _fbca > 0&&!_gf .IsSpace (_bacc );};
|
||
|
||
// New returns an Extractor instance for extracting content from the input PDF page.
|
||
func New (page *_ed .PdfPage )(*Extractor ,error ){const _bf ="\u0065\u0078\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077";_fc ,_fa :=page .GetAllContentStreams ();if _fa !=nil {return nil ,_fa ;};_ce ,_fa :=page .GetMediaBox ();if _fa !=nil {return nil ,_gb .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_fa );};_bce :=&Extractor {_fb :_fc ,_eef :page .Resources ,_deg :*_ce ,_cc :map[string ]fontEntry {},_dcc :map[string ]textResult {}};if _bce ._deg .Llx > _bce ._deg .Urx {_f .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_bce ._deg );_bce ._deg .Llx ,_bce ._deg .Urx =_bce ._deg .Urx ,_bce ._deg .Llx ;};if _bce ._deg .Lly > _bce ._deg .Ury {_f .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_bce ._deg );_bce ._deg .Lly ,_bce ._deg .Ury =_bce ._deg .Ury ,_bce ._deg .Lly ;};_de .TrackUse (_bf );return _bce ,nil ;};func (_bfgd *textObject )setWordSpacing (_efde float64 ){if _bfgd ==nil {return ;};_bfgd ._gdf ._eefb =_efde ;};func _afeae (_baaec ,_dddg _ed .PdfRectangle )_ed .PdfRectangle {return _ed .PdfRectangle {Llx :_c .Min (_baaec .Llx ,_dddg .Llx ),Lly :_c .Min (_baaec .Lly ,_dddg .Lly ),Urx :_c .Max (_baaec .Urx ,_dddg .Urx ),Ury :_c .Max (_baaec .Ury ,_dddg .Ury )};};type rulingList []*ruling ;func _ceae (_adfgg ,_cdaee _ab .Point )bool {_fgfe :=_c .Abs (_adfgg .X -_cdaee .X );_agfc :=_c .Abs (_adfgg .Y -_cdaee .Y );return _cefa (_agfc ,_fgfe );};func _gdcd (_bdde ,_ffbf int )int {if _bdde < _ffbf {return _bdde ;};return _ffbf ;};func (_dfcb rulingList )augmentGrid ()(rulingList ,rulingList ){_ffecf ,_eacgb :=_dfcb .vertsHorzs ();if len (_ffecf )==0||len (_eacgb )==0{return _ffecf ,_eacgb ;};_efbe ,_aafae :=_ffecf ,_eacgb ;_edbd :=_ffecf .bbox ();_dbbb :=_eacgb .bbox ();if _dgea {_f .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066",_edbd );_f .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066",_dbbb );};var _ccda ,_gdb ,_abdgc ,_egcb *ruling ;if _dbbb .Llx < _edbd .Llx -_eea {_ccda =&ruling {_caba :_gagg ,_aeffb :_caeda ,_ggaeg :_dbbb .Llx ,_gbcgc :_edbd .Lly ,_gbab :_edbd .Ury };_ffecf =append (rulingList {_ccda },_ffecf ...);};if _dbbb .Urx > _edbd .Urx +_eea {_gdb =&ruling {_caba :_gagg ,_aeffb :_caeda ,_ggaeg :_dbbb .Urx ,_gbcgc :_edbd .Lly ,_gbab :_edbd .Ury };_ffecf =append (_ffecf ,_gdb );};if _edbd .Lly < _dbbb .Lly -_eea {_abdgc =&ruling {_caba :_gagg ,_aeffb :_eebcb ,_ggaeg :_edbd .Lly ,_gbcgc :_dbbb .Llx ,_gbab :_dbbb .Urx };_eacgb =append (rulingList {_abdgc },_eacgb ...);};if _edbd .Ury > _dbbb .Ury +_eea {_egcb =&ruling {_caba :_gagg ,_aeffb :_eebcb ,_ggaeg :_edbd .Ury ,_gbcgc :_dbbb .Llx ,_gbab :_dbbb .Urx };_eacgb =append (_eacgb ,_egcb );};if len (_ffecf )+len (_eacgb )==len (_dfcb ){return _efbe ,_aafae ;};_bgdg :=append (_ffecf ,_eacgb ...);_dfcb .log ("u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064");_bgdg .log ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d");return _ffecf ,_eacgb ;};func (_gag *textLine )endsInHyphen ()bool {_fbac :=_gag ._gad [len (_gag ._gad )-1];_ecea :=_fbac ._feecg ;_dbba ,_ccea :=_bc .DecodeLastRuneInString (_ecea );if _ccea <=0||!_gf .Is (_gf .Hyphen ,_dbba ){return false ;};if _fbac ._cdfbc &&_dfaf (_ecea ){return true ;};return _dfaf (_gag .text ());};func (_fdefb *textPara )toTextMarks (_gbfa *int )[]TextMark {if _fdefb ._begg ==nil {return _fdefb .toCellTextMarks (_gbfa );};var _ebgb []TextMark ;for _dfcd :=0;_dfcd < _fdefb ._begg ._cecff ;_dfcd ++{for _dgcff :=0;_dgcff < _fdefb ._begg ._effgf ;_dgcff ++{_dcbab :=_fdefb ._begg .get (_dgcff ,_dfcd );if _dcbab ==nil {_ebgb =_cagdb (_ebgb ,_gbfa ,"\u0009");}else {_fddg :=_dcbab .toCellTextMarks (_gbfa );_ebgb =append (_ebgb ,_fddg ...);};_ebgb =_cagdb (_ebgb ,_gbfa ,"\u0020");};if _dfcd < _fdefb ._begg ._cecff -1{_ebgb =_cagdb (_ebgb ,_gbfa ,"\u000a");};};return _ebgb ;};func (_dfeac *textMark )bbox ()_ed .PdfRectangle {return _dfeac .PdfRectangle };func _bdagb (_abdgb []TextMark ,_dfee *int )[]TextMark {_bgcf :=_abdgb [len (_abdgb )-1];_fbbf :=[]rune (_bgcf .Text );if len (_fbbf )==1{_abdgb =_abdgb [:len (_abdgb )-1];_fdece :=_abdgb [len (_abdgb )-1];*_dfee =_fdece .Offset +len (_fdece .Text );}else {_agfd :=_ccbd (_bgcf .Text );*_dfee +=len (_agfd )-len (_bgcf .Text );_bgcf .Text =_agfd ;};return _abdgb ;};
|
||
|
||
// ToTextMark returns the public view of `tm`.
|
||
func (_gcg *textMark )ToTextMark ()TextMark {return TextMark {Text :_gcg ._bcdc ,Original :_gcg ._gacb ,BBox :_gcg ._cdfc ,Font :_gcg ._bgga ,FontSize :_gcg ._defd ,FillColor :_gcg ._adaag ,StrokeColor :_gcg ._ebac ,Orientation :_gcg ._gfeca };};func _bfdeb (_afac ,_faad *textPara )bool {return _gfda (_afac ._eaba ,_faad ._eaba )};func (_gcdfe *textTable )reduce ()*textTable {_fdcc :=make ([]int ,0,_gcdfe ._cecff );_gegec :=make ([]int ,0,_gcdfe ._effgf );for _eead :=0;_eead < _gcdfe ._cecff ;_eead ++{if !_gcdfe .emptyRow (_eead ){_fdcc =append (_fdcc ,_eead );};};for _ffdb :=0;_ffdb < _gcdfe ._effgf ;_ffdb ++{if !_gcdfe .emptyColumn (_ffdb ){_gegec =append (_gegec ,_ffdb );};};if len (_fdcc )==_gcdfe ._cecff &&len (_gegec )==_gcdfe ._effgf {return _gcdfe ;};_bcec :=textTable {_gcedd :_gcdfe ._gcedd ,_effgf :len (_gegec ),_cecff :len (_fdcc ),_afdde :make (map[uint64 ]*textPara ,len (_gegec )*len (_fdcc ))};if _cece {_f .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_gcdfe ._effgf ,_gcdfe ._cecff ,len (_gegec ),len (_fdcc ));_f .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_gegec );_f .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_fdcc );};for _aedfg ,_defb :=range _fdcc {for _bgae ,_becc :=range _gegec {_ffge :=_gcdfe .get (_becc ,_defb );if _ffge ==nil {continue ;};if _cece {_gb .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_bgae ,_aedfg ,_becc ,_defb ,_cgee (_ffge .text (),50));};_bcec .put (_bgae ,_aedfg ,_ffge );};};return &_bcec ;};func (_eaef *wordBag )removeWord (_beab *textWord ,_ecdd int ){_gebd :=_eaef ._adee [_ecdd ];_gebd =_bedd (_gebd ,_beab );if len (_gebd )==0{delete (_eaef ._adee ,_ecdd );}else {_eaef ._adee [_ecdd ]=_gebd ;};};func _fdda (_agdfe _ed .PdfColorspace ,_eedaa _ed .PdfColor )_af .Color {if _agdfe ==nil ||_eedaa ==nil {return _af .Black ;};_gcab ,_ccfg :=_agdfe .ColorToRGB (_eedaa );if _ccfg !=nil {_f .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_eedaa ,_agdfe ,_ccfg );return _af .Black ;};_ccaa ,_ecfg :=_gcab .(*_ed .PdfColorDeviceRGB );if !_ecfg {_f .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_gcab );return _af .Black ;};return _af .NRGBA {R :uint8 (_ccaa .R ()*255),G :uint8 (_ccaa .G ()*255),B :uint8 (_ccaa .B ()*255),A :uint8 (255)};};func (_acba *PageText )computeViews (){var _edf rulingList ;if _aeab {_efab :=_facba (_acba ._cacf );_edf =append (_edf ,_efab ...);};if _ffdg {_fafg :=_gaggc (_acba ._adfe );_edf =append (_edf ,_fafg ...);};_edf ,_bedb :=_edf .toTilings ();var _fac paraList ;_ggab :=len (_acba ._cga );for _gcc :=0;_gcc < 360&&_ggab > 0;_gcc +=90{_gcaa :=make ([]*textMark ,0,len (_acba ._cga )-_ggab );for _ ,_fdfb :=range _acba ._cga {if _fdfb ._gfeca ==_gcc {_gcaa =append (_gcaa ,_fdfb );};};if len (_gcaa )> 0{_fffg :=_bad (_gcaa ,_acba ._eaee ,_edf ,_bedb );_fac =append (_fac ,_fffg ...);_ggab -=len (_gcaa );};};_eaa :=new (_ag .Buffer );_fac .writeText (_eaa );_acba ._eda =_eaa .String ();_acba ._ffee =_fac .toTextMarks ();_acba ._bag =_fac .tables ();if _cece {_f .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_acba ._bag ));};};func (_gdfb *wordBag )text ()string {_cfce :=_gdfb .allWords ();_agag :=make ([]string ,len (_cfce ));for _edef ,_edcg :=range _cfce {_agag [_edef ]=_edcg ._feecg ;};return _bee .Join (_agag ,"\u0020");};func _fbbd (_ggeeg ,_bffbf bounded )float64 {return _ggeeg .bbox ().Llx -_bffbf .bbox ().Urx };func (_efc *textObject )getFontDict (_dce string )(_egbc _ee .PdfObject ,_cce error ){_cdbf :=_efc ._dae ;if _cdbf ==nil {_f .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_dce );return nil ,nil ;};_egbc ,_eded :=_cdbf .GetFontByName (_ee .PdfObjectName (_dce ));if !_eded {_f .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_dce );return nil ,_be .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _egbc ,nil ;};func _agec (_dcac ,_ecfa _ed .PdfRectangle )(_ed .PdfRectangle ,bool ){if !_cdeg (_dcac ,_ecfa ){return _ed .PdfRectangle {},false ;};return _ed .PdfRectangle {Llx :_c .Max (_dcac .Llx ,_ecfa .Llx ),Urx :_c .Min (_dcac .Urx ,_ecfa .Urx ),Lly :_c .Max (_dcac .Lly ,_ecfa .Lly ),Ury :_c .Min (_dcac .Ury ,_ecfa .Ury )},true ;};var _afd =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_af .White ,StrokeColor :_af .White };type gridTile struct{_ed .PdfRectangle ;_dfccg ,_beed ,_bggc ,_faac bool ;};func (_cdeca *subpath )clear (){*_cdeca =subpath {}};func _cddf (_dgcc _ab .Matrix )_ab .Point {_fecd ,_dbgc :=_dgcc .Translation ();return _ab .Point {X :_fecd ,Y :_dbgc };};var (_cf =_be .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072");_gc =_be .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072"););func (_bgfa paraList )sortReadingOrder (){_f .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_bgfa ));if len (_bgfa )<=1{return ;};_bgfa .computeEBBoxes ();_ae .Slice (_bgfa ,func (_aag ,_dgab int )bool {return _fbef (_bgfa [_aag ],_bgfa [_dgab ])<=0});_faa :=_bgfa .topoOrder ();_bgfa .reorder (_faa );};func (_beabb *textPara )fontsize ()float64 {return _beabb ._egddd [0]._dag };type pathSection struct{_fbf []*subpath ;_af .Color ;};func (_dbec *textTable )markCells (){for _cgag :=0;_cgag < _dbec ._cecff ;_cgag ++{for _gcdd :=0;_gcdd < _dbec ._effgf ;_gcdd ++{_adfga :=_dbec .get (_gcdd ,_cgag );if _adfga !=nil {_adfga ._febd =true ;};};};};func (_eddc *shapesState )lineTo (_eeg ,_fde float64 ){if _dddgb {_f .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_eeg ,_fde ,_eddc .devicePoint (_eeg ,_fde ));};_eddc .addPoint (_eeg ,_fde );};func (_feac rectRuling )checkWidth (_bbaf ,_ebgg float64 )(float64 ,bool ){_efbgc :=_ebgg -_bbaf ;_adeef :=_efbgc <=_bebb ;return _efbgc ,_adeef ;};func _bacb (_daeae ,_gafge int )uint64 {return uint64 (_daeae )*0x1000000+uint64 (_gafge )};func (_fbdg paraList )findTableGrid (_adecg gridTiling )(*textTable ,map[*textPara ]struct{}){_cbgg :=len (_adecg ._abfe );_efgg :=len (_adecg ._gabed );_ffag :=textTable {_gcedd :true ,_effgf :_cbgg ,_cecff :_efgg ,_afdde :make (map[uint64 ]*textPara ,_cbgg *_efgg ),_egaa :make (map[uint64 ]compositeCell ,_cbgg *_efgg )};_baefb :=make (map[*textPara ]struct{});_afba :=int ((1.0-_gaae )*float64 (_cbgg *_efgg ));_adbbb :=0;if _bcbb {_f .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_cbgg ,_efgg );};for _gbbbd ,_ecgd :=range _adecg ._gabed {_daaae ,_eecdd :=_adecg ._eege [_ecgd ];if !_eecdd {continue ;};for _gccbe ,_egbda :=range _adecg ._abfe {_bbaab ,_ggfba :=_daaae [_egbda ];if !_ggfba {continue ;};_fbgf :=_fbdg .inTile (_bbaab );if len (_fbgf )==0{_adbbb ++;if _adbbb > _afba {if _bcbb {_f .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_adbbb );};return nil ,nil ;};}else {_ffag .putComposite (_gccbe ,_gbbbd ,_fbgf ,_bbaab .PdfRectangle );for _ ,_ddccc :=range _fbgf {_baefb [_ddccc ]=struct{}{};};};};};_aggg :=0;for _cbfbe :=0;_cbfbe < _cbgg ;_cbfbe ++{_eabc :=_ffag .get (_cbfbe ,0);if _eabc ==nil ||!_eabc ._egdb {_aggg ++;};};if _aggg ==0{if _bcbb {_f .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};return nil ,nil ;};_acaf :=_ffag .reduceTiling (_adecg ,_fefg );_acaf =_acaf .subdivide ();return _acaf ,_baefb ;};func (_ede *textObject )setTextMatrix (_eabd []float64 ){if len (_eabd )!=6{_f .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_eabd ));return ;};_ffeg ,_ded ,_ebgac ,_ecc ,_ccff ,_dcgb :=_eabd [0],_eabd [1],_eabd [2],_eabd [3],_eabd [4],_eabd [5];_ede ._decf =_ab .NewMatrix (_ffeg ,_ded ,_ebgac ,_ecc ,_ccff ,_dcgb );_ede ._egdf =_ede ._decf ;};func (_dbcc rulingList )secMinMax ()(float64 ,float64 ){_aaee ,_abbea :=_dbcc [0]._gbcgc ,_dbcc [0]._gbab ;for _ ,_fcadc :=range _dbcc [1:]{if _fcadc ._gbcgc < _aaee {_aaee =_fcadc ._gbcgc ;};if _fcadc ._gbab > _abbea {_abbea =_fcadc ._gbab ;};};return _aaee ,_abbea ;};func (_ecdg *textLine )toTextMarks (_agab *int )[]TextMark {var _gcaf []TextMark ;for _ ,_gfdad :=range _ecdg ._gad {if _gfdad ._cdfbc {_gcaf =_cagdb (_gcaf ,_agab ,"\u0020");};_baaa :=_gfdad .toTextMarks (_agab );_gcaf =append (_gcaf ,_baaa ...);};return _gcaf ;};
|
||
|
||
// ExtractText processes and extracts all text data in content streams and returns as a string.
|
||
// It takes into account character encodings in the PDF file, which are decoded by
|
||
// CharcodeBytesToUnicode.
|
||
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
|
||
func (_ec *Extractor )ExtractText ()(string ,error ){_agc ,_ ,_ ,_cbb :=_ec .ExtractTextWithStats ();return _agc ,_cbb ;};var _ccba =_a .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024");func _bcbc (_adcb _ed .PdfRectangle ,_bedc []*textLine )*textPara {return &textPara {PdfRectangle :_adcb ,_egddd :_bedc };};func _abea (_fdba ,_efcc float64 )string {_fecdg :=!_afbcee (_fdba -_efcc );if _fecdg {return "\u000a";};return "\u0020";};func (_aeda *textObject )showText (_bfde []byte )error {return _aeda .renderText (_bfde )};func _edba (_efddad float64 ,_cabb int )int {if _cabb ==0{_cabb =1;};_cdaf :=float64 (_cabb );return int (_c .Round (_efddad /_cdaf )*_cdaf );};func (_bbd *textObject )getFontDirect (_ffff string )(*_ed .PdfFont ,error ){_egde ,_adec :=_bbd .getFontDict (_ffff );if _adec !=nil {return nil ,_adec ;};_ceg ,_adec :=_ed .NewPdfFontFromPdfObject (_egde );if _adec !=nil {_f .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ffff ,_adec );};return _ceg ,_adec ;};func (_fcfe *textPara )bbox ()_ed .PdfRectangle {return _fcfe .PdfRectangle };func (_abf *textObject )setTextRise (_baae float64 ){if _abf ==nil {return ;};_abf ._gdf ._bfdf =_baae ;};func _aaddf (_fbgbf *PageText )error {_egaae :=_de .GetLicenseKey ();if _egaae !=nil &&_egaae .IsLicensed ()||_bd {return nil ;};_gb .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");_gb .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");return _be .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};func (_edeb *shapesState )cubicTo (_fdd ,_bbg ,_egca ,_bfaa ,_cgc ,_dba float64 ){if _dddgb {_f .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a");};_edeb .addPoint (_cgc ,_dba );};func (_edda *wordBag )allWords ()[]*textWord {var _fgab []*textWord ;for _ ,_fcdd :=range _edda ._adee {_fgab =append (_fgab ,_fcdd ...);};return _fgab ;};func (_affc rulingList )findPrimSec (_gedg ,_abffc float64 )*ruling {for _ ,_bbaeb :=range _affc {if _afbcee (_bbaeb ._ggaeg -_gedg )&&_bbaeb ._gbcgc -_eea <=_abffc &&_abffc <=_bbaeb ._gbab +_eea {return _bbaeb ;};};return nil ;};
|
||
|
||
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
|
||
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
|
||
func (_edc *Extractor )ExtractTextWithStats ()(_egc string ,_ecg int ,_gga int ,_cae error ){_cgd ,_ecg ,_gga ,_cae :=_edc .ExtractPageText ();if _cae !=nil {return "",_ecg ,_gga ,_cae ;};return _cgd .Text (),_ecg ,_gga ,nil ;};func (_fgaf *textLine )bbox ()_ed .PdfRectangle {return _fgaf .PdfRectangle };func (_bgde *textPara )text ()string {_bfbba :=new (_ag .Buffer );_bgde .writeText (_bfbba );return _bfbba .String ();};func _eegd (_eddaa ,_cagg bounded )float64 {return _eddaa .bbox ().Llx -_cagg .bbox ().Llx };
|
||
|
||
// String returns a description of `v`.
|
||
func (_abcd *ruling )String ()string {if _abcd ._aeffb ==_eeae {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_ebaec ,_dfeab :="\u0078","\u0079";if _abcd ._aeffb ==_eebcb {_ebaec ,_dfeab ="\u0079","\u0078";};_aeffg :="";if _abcd ._gcfgb !=0.0{_aeffg =_gb .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_abcd ._gcfgb );};return _gb .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_abcd ._aeffb ,_ebaec ,_abcd ._ggaeg ,_dfeab ,_abcd ._gbcgc ,_abcd ._gbab ,_abcd ._gbab -_abcd ._gbcgc ,_abcd ._caba ,_abcd .Color ,_aeffg );};func _cfgb (_gced []*textWord ,_cbg float64 ,_ddfa ,_edebf rulingList )*wordBag {_fbeg :=_eeee (_gced [0],_cbg ,_ddfa ,_edebf );for _ ,_fcea :=range _gced [1:]{_fee :=_bfe (_fcea ._eabdg );_fbeg ._adee [_fee ]=append (_fbeg ._adee [_fee ],_fcea );_fbeg .PdfRectangle =_afeae (_fbeg .PdfRectangle ,_fcea .PdfRectangle );};_fbeg .sort ();return _fbeg ;};func (_cade rulingList )log (_fccga string ){if !_dgea {return ;};_f .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_fccga ,_cade .String ());for _egef ,_geeg :=range _cade {_gb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_egef ,_geeg .String ());};};func (_effb paraList )merge ()*textPara {_f .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_effb ));if len (_effb )==0{return nil ;};_effb .sortReadingOrder ();_gdab :=_effb [0].PdfRectangle ;_decfb :=_effb [0]._egddd ;for _ ,_edaf :=range _effb [1:]{_gdab =_afeae (_gdab ,_edaf .PdfRectangle );_decfb =append (_decfb ,_edaf ._egddd ...);};return _bcbc (_gdab ,_decfb );};func (_gdaaa *textTable )bbox ()_ed .PdfRectangle {return _gdaaa .PdfRectangle };func (_ccbg *textTable )subdivide ()*textTable {_ccbg .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");_edcb :=_ccbg .compositeRowCorridors ();_ddbe :=_ccbg .compositeColCorridors ();if _cece {_f .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_ggbf (_edcb ),_ggbf (_ddbe ));};if len (_edcb )==0||len (_ddbe )==0{return _ccbg ;};_acfda (_edcb );_acfda (_ddbe );if _cece {_f .Log .Info ("\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073",_ggbf (_edcb ),_ggbf (_ddbe ));};_eccd ,_adbd :=_bage (_ccbg ._cecff ,_edcb );_gbfb ,_cdecb :=_bage (_ccbg ._effgf ,_ddbe );_defe :=make (map[uint64 ]*textPara ,_cdecb *_adbd );_abdb :=&textTable {PdfRectangle :_ccbg .PdfRectangle ,_gcedd :_ccbg ._gcedd ,_cecff :_adbd ,_effgf :_cdecb ,_afdde :_defe };if _cece {_f .Log .Info ("\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a"+"\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076",_ccbg ._effgf ,_ccbg ._cecff ,_cdecb ,_adbd ,_ggbf (_edcb ),_ggbf (_ddbe ),_eccd ,_gbfb );};for _edg :=0;_edg < _ccbg ._cecff ;_edg ++{_bedba :=_eccd [_edg ];for _ddbeg :=0;_ddbeg < _ccbg ._effgf ;_ddbeg ++{_gedc :=_gbfb [_ddbeg ];if _cece {_gb .Printf ("\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a",_ddbeg ,_edg ,_gedc ,_bedba );};_bffg ,_dfgfa :=_ccbg ._egaa [_bacb (_ddbeg ,_edg )];if !_dfgfa {continue ;};_feefd :=_bffg .split (_edcb [_edg ],_ddbe [_ddbeg ]);for _baef :=0;_baef < _feefd ._cecff ;_baef ++{for _egbfg :=0;_egbfg < _feefd ._effgf ;_egbfg ++{_bafda :=_feefd .get (_egbfg ,_baef );_abdb .put (_gedc +_egbfg ,_bedba +_baef ,_bafda );if _cece {_gb .Printf ("\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_gedc +_egbfg ,_bedba +_baef ,_bafda );};};};};};return _abdb ;};func _bdfc (_agabc map[float64 ]map[float64 ]gridTile )[]float64 {_affee :=make ([]float64 ,0,len (_agabc ));for _acfa :=range _agabc {_affee =append (_affee ,_acfa );};_ae .Float64s (_affee );_cefdd :=len (_affee );for _eecd :=0;_eecd < _cefdd /2;_eecd ++{_affee [_eecd ],_affee [_cefdd -1-_eecd ]=_affee [_cefdd -1-_eecd ],_affee [_eecd ];};return _affee ;};
|
||
|
||
// String returns a string descibing `i`.
|
||
func (_feceg gridTile )String ()string {_gefba :=func (_cffd bool ,_becf string )string {if _cffd {return _becf ;};return "\u005f";};return _gb .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_feceg .PdfRectangle ,_gefba (_feceg ._beed ,"\u004c"),_gefba (_feceg ._faac ,"\u0052"),_gefba (_feceg ._bggc ,"\u0042"),_gefba (_feceg ._dfccg ,"\u0054"));};func (_bbgg *shapesState )closePath (){if _bbgg ._ffbd {_bbgg ._fbd =append (_bbgg ._fbd ,_fbfa (_bbgg ._fcbf ));_bbgg ._ffbd =false ;}else if len (_bbgg ._fbd )==0{if _dddgb {_f .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");};_bbgg ._ffbd =false ;return ;};_bbgg ._fbd [len (_bbgg ._fbd )-1].close ();if _dddgb {_f .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_bbgg );};};
|
||
|
||
// String returns a description of `w`.
|
||
func (_fecff *textWord )String ()string {return _gb .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_fecff ._eabdg ,_fecff .PdfRectangle ,_fecff ._cbdb ,_fecff ._feecg );};func (_aca *textObject )renderText (_dgfa []byte )error {if _aca ._bdcd {_f .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");return nil ;};_ggea :=_aca .getCurrentFont ();_afb :=_ggea .BytesToCharcodes (_dgfa );_cdbe ,_fbb ,_ecaf :=_ggea .CharcodesToStrings (_afb );if _ecaf > 0{_f .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_fbb ,_ecaf );};_aca ._gdf ._cecf +=_fbb ;_aca ._gdf ._defa +=_ecaf ;_affa :=_aca ._gdf ;_gafg :=_affa ._ggge ;_gde :=_affa ._eggc /100.0;_ffa ,_gcd :=_ggea .GetRuneMetrics (' ');if !_gcd {_ffa ,_gcd =_ggea .GetCharMetrics (32);};if !_gcd {_ffa ,_ =_ed .DefaultFont ().GetRuneMetrics (' ');};_abe :=_ffa .Wx *_bcc ;_f .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_abe ,_cdbe ,_ggea ,_gafg );_eebc :=_ab .NewMatrix (_gafg *_gde ,0,0,_gafg ,0,_affa ._bfdf );if _agbf {_f .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_afb ),_afb ,_cdbe );};_f .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_afb ),_afb ,len (_cdbe ));_fgce :=_aca .getFillColor ();_gff :=_aca .getStrokeColor ();for _cebd ,_gegf :=range _cdbe {_ffd :=[]rune (_gegf );if len (_ffd )==1&&_ffd [0]=='\x00'{continue ;};_acdg :=_afb [_cebd ];_cgb :=_aca ._dgf .CTM .Mult (_aca ._decf ).Mult (_eebc );_eddf :=0.0;if len (_ffd )==1&&_ffd [0]==32{_eddf =_affa ._eefb ;};_fgag ,_gfecd :=_ggea .GetCharMetrics (_acdg );if !_gfecd {_f .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_acdg ,_ffd ,_ffd ,_ggea );return _gb .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_ggea .String (),_acdg );};_dece :=_ab .Point {X :_fgag .Wx *_bcc ,Y :_fgag .Wy *_bcc };_dccf :=_ab .Point {X :(_dece .X *_gafg +_eddf )*_gde };_ddb :=_ab .Point {X :(_dece .X *_gafg +_affa ._geca +_eddf )*_gde };if _agbf {_f .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_gafg ,_affa ._geca ,_affa ._eefb ,_gde );_f .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_dece ,_dccf ,_ddb );};_ada :=_ebd (_dccf );_bbce :=_ebd (_ddb );_dfag :=_aca ._dgf .CTM .Mult (_aca ._decf ).Mult (_ada );if _ebbf {_f .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_aca ._dgf .CTM ,_aca ._decf ,_bbce ,_cddf (_aca ._dgf .CTM .Mult (_aca ._decf ).Mult (_bbce )),_ada ,_dfag ,_cddf (_dfag ));};_bddg ,_beb :=_aca .newTextMark (_ba .ExpandLigatures (_ffd ),_cgb ,_cddf (_dfag ),_c .Abs (_abe *_cgb .ScalingFactorX ()),_ggea ,_aca ._gdf ._geca ,_fgce ,_gff );if !_beb {_f .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067");continue ;};if _ggea ==nil {_f .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _ggea .Encoder ()==nil {_f .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_ggea );}else {if _aaf ,_fcf :=_ggea .Encoder ().CharcodeToRune (_acdg );_fcf {_bddg ._gacb =string (_aaf );};};_f .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_cebd ,_acdg ,_bddg ,_cgb );_aca ._cfdg =append (_aca ._cfdg ,&_bddg );_aca ._decf .Concat (_bbce );};return nil ;};func (_agdd *ruling )encloses (_eccb ,_bafc float64 )bool {return _agdd ._gbcgc -_eea <=_eccb &&_bafc <=_agdd ._gbab +_eea ;};
|
||
|
||
// Len returns the number of TextMarks in `ma`.
|
||
func (_ffae *TextMarkArray )Len ()int {if _ffae ==nil {return 0;};return len (_ffae ._cgdg );};var _dfeg =map[rulingKind ]string {_eeae :"\u006e\u006f\u006e\u0065",_eebcb :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_caeda :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};func (_fcdda rulingList )sortStrict (){_ae .Slice (_fcdda ,func (_gfgb ,_aedaed int )bool {_gcgc ,_gfce :=_fcdda [_gfgb ],_fcdda [_aedaed ];_eegc ,_fccgg :=_gcgc ._aeffb ,_gfce ._aeffb ;if _eegc !=_fccgg {return _eegc > _fccgg ;};_gggdf ,_dbbed :=_gcgc ._ggaeg ,_gfce ._ggaeg ;if !_afbcee (_gggdf -_dbbed ){return _gggdf < _dbbed ;};_gggdf ,_dbbed =_gcgc ._gbcgc ,_gfce ._gbcgc ;if _gggdf !=_dbbed {return _gggdf < _dbbed ;};return _gcgc ._gbab < _gfce ._gbab ;});};func (_eee *textObject )getFont (_cgg string )(*_ed .PdfFont ,error ){if _eee ._geg ._cc !=nil {_eee ._geg ._cb ++;_eafb ,_bdcf :=_eee ._geg ._cc [_cgg ];if _bdcf {_eafb ._aged =_eee ._geg ._cb ;return _eafb ._fdg ,nil ;};};_bced ,_gcec :=_eee .getFontDirect (_cgg );if _gcec !=nil {return nil ,_gcec ;};if _eee ._geg ._cc !=nil {_ccbb :=fontEntry {_bced ,_eee ._geg ._cb };if len (_eee ._geg ._cc )>=_dgeeb {var _bcd []string ;for _eebd :=range _eee ._geg ._cc {_bcd =append (_bcd ,_eebd );};_ae .Slice (_bcd ,func (_dgee ,_dda int )bool {return _eee ._geg ._cc [_bcd [_dgee ]]._aged < _eee ._geg ._cc [_bcd [_dda ]]._aged });delete (_eee ._geg ._cc ,_bcd [0]);};_eee ._geg ._cc [_cgg ]=_ccbb ;};return _bced ,nil ;};func _ddgb (_bacg string ,_ddcc []rulingList ){_f .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_ddcc ),_bacg );for _cbcgb ,_defc :=range _ddcc {_gb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cbcgb ,_defc .String ());};};func _gede (_bead _ed .PdfRectangle )*ruling {return &ruling {_aeffb :_caeda ,_ggaeg :_bead .Llx ,_gbcgc :_bead .Lly ,_gbab :_bead .Ury };};func (_edff *wordBag )pullWord (_gdadc *textWord ,_cfaa int ,_dbgb map[int ]map[*textWord ]struct{}){_edff .PdfRectangle =_afeae (_edff .PdfRectangle ,_gdadc .PdfRectangle );if _gdadc ._cbdb > _edff ._fcggb {_edff ._fcggb =_gdadc ._cbdb ;};_edff ._adee [_cfaa ]=append (_edff ._adee [_cfaa ],_gdadc );_dbgb [_cfaa ][_gdadc ]=struct{}{};};func _ffea (_gecb []_ee .PdfObject )(_cdfdb ,_egab float64 ,_gbabc error ){if len (_gecb )!=2{return 0,0,_gb .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_gecb ));};_cgcf ,_gbabc :=_ee .GetNumbersAsFloat (_gecb );if _gbabc !=nil {return 0,0,_gbabc ;};return _cgcf [0],_cgcf [1],nil ;};func (_abbg lineRuling )xMean ()float64 {return 0.5*(_abbg ._cadf .X +_abbg ._abeeg .X )};type textTable struct{_ed .PdfRectangle ;_effgf ,_cecff int ;_gcedd bool ;_afdde map[uint64 ]*textPara ;_egaa map[uint64 ]compositeCell ;};func _ffgf (_cfdd _ed .PdfRectangle )*ruling {return &ruling {_aeffb :_eebcb ,_ggaeg :_cfdd .Lly ,_gbcgc :_cfdd .Llx ,_gbab :_cfdd .Urx };};func (_gcege *textTable )getComposite (_afed ,_bffgf int )(paraList ,_ed .PdfRectangle ){_eaeb ,_gaggce :=_gcege ._egaa [_bacb (_afed ,_bffgf )];if _cece {_gb .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a",_afed ,_bffgf ,_eaeb .String ());};if !_gaggce {return nil ,_ed .PdfRectangle {};};return _eaeb .parasBBox ();};func _daga (_faca string )(string ,bool ){_fdeg :=[]rune (_faca );if len (_fdeg )!=1{return "",false ;};_cfddf ,_dcgee :=_egfe [_fdeg [0]];return _cfddf ,_dcgee ;};
|
||
|
||
// String returns a string describing the current state of the textState stack.
|
||
func (_egb *stateStack )String ()string {_dga :=[]string {_gb .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_egb ))};for _gfg ,_dgcf :=range *_egb {_dgbb :="\u003c\u006e\u0069l\u003e";if _dgcf !=nil {_dgbb =_dgcf .String ();};_dga =append (_dga ,_gb .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_gfg ,_dgbb ));};return _bee .Join (_dga ,"\u000a");};func (_gdeee *subpath )last ()_ab .Point {return _gdeee ._cecb [len (_gdeee ._cecb )-1]};func (_bdgb *wordBag )blocked (_deae *textWord )bool {if _deae .Urx < _bdgb .Llx {_abeb :=_fece (_deae .PdfRectangle );_bbfa :=_gede (_bdgb .PdfRectangle );if _bdgb ._ccfc .blocks (_abeb ,_bbfa ){if _fade {_f .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_deae ,_bdgb );};return true ;};}else if _bdgb .Urx < _deae .Llx {_cda :=_fece (_bdgb .PdfRectangle );_eece :=_gede (_deae .PdfRectangle );if _bdgb ._ccfc .blocks (_cda ,_eece ){if _fade {_f .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s",_deae ,_bdgb );};return true ;};};if _deae .Ury < _bdgb .Lly {_adaa :=_cfcd (_deae .PdfRectangle );_aedfe :=_ffgf (_bdgb .PdfRectangle );if _bdgb ._dffb .blocks (_adaa ,_aedfe ){if _fade {_f .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_deae ,_bdgb );};return true ;};}else if _bdgb .Ury < _deae .Lly {_ddg :=_cfcd (_bdgb .PdfRectangle );_cacb :=_ffgf (_deae .PdfRectangle );if _bdgb ._dffb .blocks (_ddg ,_cacb ){if _fade {_f .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s",_deae ,_bdgb );};return true ;};};return false ;};func _cdab (_bbfg ,_afff *textPara )bool {if _bbfg ._egdb ||_afff ._egdb {return true ;};return _afbcee (_bbfg .depth ()-_afff .depth ());};func (_egeg paraList )lines ()[]*textLine {var _dabe []*textLine ;for _ ,_fecg :=range _egeg {_dabe =append (_dabe ,_fecg ._egddd ...);};return _dabe ;};func (_gceg gridTile )numBorders ()int {_bbebd :=0;if _gceg ._beed {_bbebd ++;};if _gceg ._faac {_bbebd ++;};if _gceg ._bggc {_bbebd ++;};if _gceg ._dfccg {_bbebd ++;};return _bbebd ;};
|
||
|
||
// TableCell is a cell in a TextTable.
|
||
type TableCell struct{
|
||
|
||
// Text is the extracted text.
|
||
Text string ;
|
||
|
||
// Marks returns the TextMarks corresponding to the text in Text.
|
||
Marks TextMarkArray ;};
|
||
|
||
// String returns a description of `state`.
|
||
func (_dbga *textState )String ()string {_cfg :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _dbga ._agcd !=nil {_cfg =_dbga ._agcd .BaseFont ();};return _gb .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_dbga ._geca ,_dbga ._eefb ,_dbga ._ggge ,_cfg );};func (_ceee *textTable )emptyRow (_ggffb int )bool {for _gead :=0;_gead < _ceee ._effgf ;_gead ++{_egdea :=_ceee .get (_gead ,_ggffb );if _egdea !=nil &&_egdea .text ()!=""{return false ;};};return true ;};func (_eafc *textObject )nextLine (){_eafc .moveLP (0,-_eafc ._gdf ._feb )};func (_gaaae rulingList )primaries ()[]float64 {_ffdgb :=make (map[float64 ]struct{},len (_gaaae ));for _ ,_ccfdb :=range _gaaae {_ffdgb [_ccfdb ._ggaeg ]=struct{}{};};_aaegd :=make ([]float64 ,len (_ffdgb ));_cfde :=0;for _egeff :=range _ffdgb {_aaegd [_cfde ]=_egeff ;_cfde ++;};_ae .Float64s (_aaegd );return _aaegd ;};func (_gbge rulingList )mergePrimary ()float64 {_afbcf :=_gbge [0]._ggaeg ;for _ ,_edaa :=range _gbge [1:]{_afbcf +=_edaa ._ggaeg ;};return _afbcf /float64 (len (_gbge ));};
|
||
|
||
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
|
||
func (_caf *TextMarkArray )BBox ()(_ed .PdfRectangle ,bool ){var _agba _ed .PdfRectangle ;_gdee :=false ;for _ ,_gfeg :=range _caf ._cgdg {if _gfeg .Meta ||_acef (_gfeg .Text ){continue ;};if _gdee {_agba =_afeae (_agba ,_gfeg .BBox );}else {_agba =_gfeg .BBox ;_gdee =true ;};};return _agba ,_gdee ;};func _facba (_bgeaf []pathSection )rulingList {_gddg (_bgeaf );if _dgea {_f .Log .Info ("\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073",len (_bgeaf ));};var _dgeb rulingList ;for _ ,_defab :=range _bgeaf {for _ ,_cfed :=range _defab ._fbf {if len (_cfed ._cecb )< 2{continue ;};_afce :=_cfed ._cecb [0];for _ ,_ebecf :=range _cfed ._cecb [1:]{if _eedg ,_fdab :=_gggecd (_afce ,_ebecf ,_defab .Color );_fdab {_dgeb =append (_dgeb ,_eedg );};_afce =_ebecf ;};};};if _dgea {_f .Log .Info ("m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073",_dgeb );};return _dgeb ;};
|
||
|
||
// TextMarkArray is a collection of TextMarks.
|
||
type TextMarkArray struct{_cgdg []TextMark };func (_gcbe intSet )has (_gdcb int )bool {_ ,_acab :=_gcbe [_gdcb ];return _acab };func _gbf (_gee bounded )float64 {return -_gee .bbox ().Lly };func _acef (_aeaca string )bool {for _ ,_edac :=range _aeaca {if !_gf .IsSpace (_edac ){return false ;};};return true ;};func (_eeea *textTable )putComposite (_fgfad ,_abfgd int ,_dbdd paraList ,_ecadg _ed .PdfRectangle ){if len (_dbdd )==0{_f .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073");return ;};_adcg :=compositeCell {_ecadg ,_dbdd };if _cece {_gb .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_fgfad ,_abfgd ,_adcg .String ());};_adcg .updateBBox ();_eeea ._egaa [_bacb (_fgfad ,_abfgd )]=_adcg ;};func (_gegfa *textTable )computeBbox ()_ed .PdfRectangle {var _dgccc _ed .PdfRectangle ;_caff :=false ;for _efcf :=0;_efcf < _gegfa ._cecff ;_efcf ++{for _debfg :=0;_debfg < _gegfa ._effgf ;_debfg ++{_dega :=_gegfa .get (_debfg ,_efcf );if _dega ==nil {continue ;};if !_caff {_dgccc =_dega .PdfRectangle ;_caff =true ;}else {_dgccc =_afeae (_dgccc ,_dega .PdfRectangle );};};};return _dgccc ;};var _bgce =map[markKind ]string {_gdac :"\u0073\u0074\u0072\u006f\u006b\u0065",_gafbf :"\u0066\u0069\u006c\u006c",_gagg :"\u0061u\u0067\u006d\u0065\u006e\u0074"};
|
||
|
||
// String returns a description of `l`.
|
||
func (_afef *textLine )String ()string {return _gb .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_afef ._cfbc ,_afef .PdfRectangle ,_afef ._dag ,_afef .text ());};func (_bbad *textTable )getRight ()paraList {_edfcc :=make (paraList ,_bbad ._cecff );for _ccbbc :=0;_ccbbc < _bbad ._cecff ;_ccbbc ++{_egdc :=_bbad .get (_bbad ._effgf -1,_ccbbc )._bdb ;if _egdc ==nil ||_egdc ._febd {return nil ;};_edfcc [_ccbbc ]=_egdc ;};for _cbaf :=0;_cbaf < _bbad ._cecff -1;_cbaf ++{if _edfcc [_cbaf ]._bbccb !=_edfcc [_cbaf +1]{return nil ;};};return _edfcc ;};func (_cgdc *textObject )setTextLeading (_gaa float64 ){if _cgdc ==nil {return ;};_cgdc ._gdf ._feb =_gaa ;};func (_degg *shapesState )devicePoint (_gbbf ,_aafg float64 )_ab .Point {_aaeg :=_degg ._ffef .Mult (_degg ._dbcg );_gbbf ,_aafg =_aaeg .Transform (_gbbf ,_aafg );return _ab .NewPoint (_gbbf ,_aafg );};func (_egag rulingList )splitSec ()[]rulingList {_ae .Slice (_egag ,func (_bdba ,_gccac int )bool {_gfae ,_bdee :=_egag [_bdba ],_egag [_gccac ];if _gfae ._gbcgc !=_bdee ._gbcgc {return _gfae ._gbcgc < _bdee ._gbcgc ;};return _gfae ._gbab < _bdee ._gbab ;});_ecbae :=make (map[*ruling ]struct{},len (_egag ));_afgc :=func (_ceaa *ruling )rulingList {_ebdf :=rulingList {_ceaa };_ecbae [_ceaa ]=struct{}{};for _ ,_aefb :=range _egag {if _ ,_bbb :=_ecbae [_aefb ];_bbb {continue ;};for _ ,_eegac :=range _ebdf {if _aefb .alignsSec (_eegac ){_ebdf =append (_ebdf ,_aefb );_ecbae [_aefb ]=struct{}{};break ;};};};return _ebdf ;};_gadf :=[]rulingList {_afgc (_egag [0])};for _ ,_dbbdb :=range _egag [1:]{if _ ,_abebd :=_ecbae [_dbbdb ];_abebd {continue ;};_gadf =append (_gadf ,_afgc (_dbbdb ));};return _gadf ;};
|
||
|
||
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
|
||
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
|
||
// Replace with a function like Extract() (*PageText, error)
|
||
func (_cbfb *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_bdc ,_ggg ,_gcfa ,_gbc :=_cbfb .extractPageText (_cbfb ._fb ,_cbfb ._eef ,_ab .IdentityMatrix (),0);if _gbc !=nil {return nil ,0,0,_gbc ;};_bdc .computeViews ();_gbc =_aaddf (_bdc );if _gbc !=nil {return nil ,0,0,_gbc ;};return _bdc ,_ggg ,_gcfa ,nil ;};func (_afafc paraList )findGridTables (_edbe []gridTiling )[]*textTable {if _cece {_f .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_afafc ));for _afffd ,_cbef :=range _afafc {_gb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_afffd ,_cbef );};};var _eacgbf []*textTable ;for _babe ,_efabd :=range _edbe {_fecga ,_gfaa :=_afafc .findTableGrid (_efabd );if _fecga !=nil {_fecga .log (_gb .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_babe ));_eacgbf =append (_eacgbf ,_fecga );_fecga .markCells ();};for _edbf :=range _gfaa {_edbf ._febd =true ;};};if _cece {_f .Log .Info ("\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s",len (_eacgbf ));};return _eacgbf ;};func (_fcafg paraList )addNeighbours (){_gbgb :=func (_eggb []int ,_bgge *textPara )([]*textPara ,[]*textPara ){_ecbf :=make ([]*textPara ,0,len (_eggb )-1);_eabg :=make ([]*textPara ,0,len (_eggb )-1);for _ ,_gafgb :=range _eggb {_fgef :=_fcafg [_gafgb ];if _fgef .Urx <=_bgge .Llx {_ecbf =append (_ecbf ,_fgef );}else if _fgef .Llx >=_bgge .Urx {_eabg =append (_eabg ,_fgef );};};return _ecbf ,_eabg ;};_bcgcg :=func (_cdgba []int ,_bbbg *textPara )([]*textPara ,[]*textPara ){_beaa :=make ([]*textPara ,0,len (_cdgba )-1);_aeaf :=make ([]*textPara ,0,len (_cdgba )-1);for _ ,_aafbd :=range _cdgba {_eeafg :=_fcafg [_aafbd ];if _eeafg .Ury <=_bbbg .Lly {_aeaf =append (_aeaf ,_eeafg );}else if _eeafg .Lly >=_bbbg .Ury {_beaa =append (_beaa ,_eeafg );};};return _beaa ,_aeaf ;};_fefde :=_fcafg .yNeighbours (_fcff );for _ ,_dgag :=range _fcafg {_gdefg :=_fefde [_dgag ];if len (_gdefg )==0{continue ;};_aace ,_edfb :=_gbgb (_gdefg ,_dgag );if len (_aace )==0&&len (_edfb )==0{continue ;};if len (_aace )> 0{_gfddd :=_aace [0];for _ ,_dabag :=range _aace [1:]{if _dabag .Urx >=_gfddd .Urx {_gfddd =_dabag ;};};for _ ,_bcecc :=range _aace {if _bcecc !=_gfddd &&_bcecc .Urx > _gfddd .Llx {_gfddd =nil ;break ;};};if _gfddd !=nil &&_bdag (_dgag .PdfRectangle ,_gfddd .PdfRectangle ){_dgag ._begb =_gfddd ;};};if len (_edfb )> 0{_ddda :=_edfb [0];for _ ,_cdag :=range _edfb [1:]{if _cdag .Llx <=_ddda .Llx {_ddda =_cdag ;};};for _ ,_bcedc :=range _edfb {if _bcedc !=_ddda &&_bcedc .Llx < _ddda .Urx {_ddda =nil ;break ;};};if _ddda !=nil &&_bdag (_dgag .PdfRectangle ,_ddda .PdfRectangle ){_dgag ._bdb =_ddda ;};};};_fefde =_fcafg .xNeighbours (_aadd );for _ ,_fbbe :=range _fcafg {_fecb :=_fefde [_fbbe ];if len (_fecb )==0{continue ;};_fgabc ,_adgd :=_bcgcg (_fecb ,_fbbe );if len (_fgabc )==0&&len (_adgd )==0{continue ;};if len (_adgd )> 0{_bedag :=_adgd [0];for _ ,_affgf :=range _adgd [1:]{if _affgf .Ury >=_bedag .Ury {_bedag =_affgf ;};};for _ ,_geafa :=range _adgd {if _geafa !=_bedag &&_geafa .Ury > _bedag .Lly {_bedag =nil ;break ;};};if _bedag !=nil &&_gfda (_fbbe .PdfRectangle ,_bedag .PdfRectangle ){_fbbe ._bbccb =_bedag ;};};if len (_fgabc )> 0{_gfedd :=_fgabc [0];for _ ,_gfdg :=range _fgabc [1:]{if _gfdg .Lly <=_gfedd .Lly {_gfedd =_gfdg ;};};for _ ,_dfae :=range _fgabc {if _dfae !=_gfedd &&_dfae .Lly < _gfedd .Ury {_gfedd =nil ;break ;};};if _gfedd !=nil &&_gfda (_fbbe .PdfRectangle ,_gfedd .PdfRectangle ){_fbbe ._ebecb =_gfedd ;};};};for _ ,_efgdb :=range _fcafg {if _efgdb ._begb !=nil &&_efgdb ._begb ._bdb !=_efgdb {_efgdb ._begb =nil ;};if _efgdb ._ebecb !=nil &&_efgdb ._ebecb ._bbccb !=_efgdb {_efgdb ._ebecb =nil ;};if _efgdb ._bdb !=nil &&_efgdb ._bdb ._begb !=_efgdb {_efgdb ._bdb =nil ;};if _efgdb ._bbccb !=nil &&_efgdb ._bbccb ._ebecb !=_efgdb {_efgdb ._bbccb =nil ;};};};type markKind int ;func (_ebdb *subpath )add (_dfdc ..._ab .Point ){_ebdb ._cecb =append (_ebdb ._cecb ,_dfdc ...)};func (_eagf rulingList )comp (_aggbg ,_addd int )bool {_ggfa ,_fbefa :=_eagf [_aggbg ],_eagf [_addd ];_abfc ,_gdbb :=_ggfa ._aeffb ,_fbefa ._aeffb ;if _abfc !=_gdbb {return _abfc > _gdbb ;};if _abfc ==_eeae {return false ;};_gggcc :=func (_fede bool )bool {if _abfc ==_eebcb {return _fede ;};return !_fede ;};_cafbe ,_agfa :=_ggfa ._ggaeg ,_fbefa ._ggaeg ;if _cafbe !=_agfa {return _gggcc (_cafbe > _agfa );};_cafbe ,_agfa =_ggfa ._gbcgc ,_fbefa ._gbcgc ;if _cafbe !=_agfa {return _gggcc (_cafbe < _agfa );};return _gggcc (_ggfa ._gbab < _fbefa ._gbab );};func _ebaca (_bcdf map[float64 ]map[float64 ]gridTile )[]float64 {_eebf :=make ([]float64 ,0,len (_bcdf ));_bfdfe :=make (map[float64 ]struct{},len (_bcdf ));for _ ,_gfgba :=range _bcdf {for _efec :=range _gfgba {if _ ,_cdcef :=_bfdfe [_efec ];_cdcef {continue ;};_eebf =append (_eebf ,_efec );_bfdfe [_efec ]=struct{}{};};};_ae .Float64s (_eebf );return _eebf ;};func (_fcbb *textTable )isExportable ()bool {if _fcbb ._gcedd {return true ;};_bgdff :=func (_cage int )bool {_egfg :=_fcbb .get (0,_cage );if _egfg ==nil {return false ;};_efgcd :=_egfg .text ();_cbfd :=_bc .RuneCountInString (_efgcd );_dcdg :=_ccba .MatchString (_efgcd );return _cbfd <=1||_dcdg ;};for _aacg :=0;_aacg < _fcbb ._cecff ;_aacg ++{if !_bgdff (_aacg ){return true ;};};return false ;};func (_aaef paraList )reorder (_cgcc []int ){_feage :=make (paraList ,len (_aaef ));for _eeef ,_aaab :=range _cgcc {_feage [_eeef ]=_aaef [_aaab ];};copy (_aaef ,_feage );};func (_cada rulingList )asTiling ()gridTiling {if _bcbb {_f .Log .Info ("r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_cada ));};for _gaee ,_fadef :=range _cada [1:]{_gdbbg :=_cada [_gaee ];if _gdbbg .alignsPrimary (_fadef )&&_gdbbg .alignsSec (_fadef ){_f .Log .Error ("a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073",_fadef ,_gdbbg );};};_cada .sortStrict ();_cada .log ("\u0073n\u0061\u0070\u0070\u0065\u0064");_efbb ,_gfbbg :=_cada .vertsHorzs ();_ccccf :=_efbb .primaries ();_aaff :=_gfbbg .primaries ();_gcac :=len (_ccccf )-1;_eafad :=len (_aaff )-1;if _gcac ==0||_eafad ==0{return gridTiling {};};_gcedc :=_ed .PdfRectangle {Llx :_ccccf [0],Urx :_ccccf [_gcac ],Lly :_aaff [0],Ury :_aaff [_eafad ]};if _bcbb {_f .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064",len (_efbb ));for _bgeg ,_gdbd :=range _efbb {_gb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bgeg ,_gdbd );};_f .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064",len (_gfbbg ));for _ceggc ,_cabd :=range _gfbbg {_gb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ceggc ,_cabd );};_f .Log .Info ("\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f",_gcac ,_eafad ,_ccccf ,_aaff );};_eega :=make ([]gridTile ,_gcac *_eafad );for _aagg :=_eafad -1;_aagg >=0;_aagg --{_dacf :=_aaff [_aagg ];_cadeg :=_aaff [_aagg +1];for _eeca :=0;_eeca < _gcac ;_eeca ++{_fagg :=_ccccf [_eeca ];_cdaab :=_ccccf [_eeca +1];_gcbg :=_efbb .findPrimSec (_fagg ,_dacf );_fdee :=_efbb .findPrimSec (_cdaab ,_dacf );_eace :=_gfbbg .findPrimSec (_dacf ,_fagg );_affac :=_gfbbg .findPrimSec (_cadeg ,_fagg );_gedef :=_ed .PdfRectangle {Llx :_fagg ,Urx :_cdaab ,Lly :_dacf ,Ury :_cadeg };_ddfd :=_bcaa (_gedef ,_gcbg ,_fdee ,_eace ,_affac );_eega [_aagg *_gcac +_eeca ]=_ddfd ;if _bcbb {_gb .Printf ("\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_eeca ,_aagg ,_ddfd .String (),_ddfd .Width (),_ddfd .Height ());};};};if _bcbb {_f .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_gcedc );};_eddb :=make ([]map[float64 ]gridTile ,_eafad );for _aabf :=_eafad -1;_aabf >=0;_aabf --{if _bcbb {_gb .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_aabf );};_eddb [_aabf ]=make (map[float64 ]gridTile ,_gcac );for _bfdfg :=0;_bfdfg < _gcac ;_bfdfg ++{_ecag :=_eega [_aabf *_gcac +_bfdfg ];if _bcbb {_gb .Printf ("\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bfdfg ,_ecag );};if !_ecag ._beed {continue ;};_agdc :=_bfdfg ;for _dfege :=_bfdfg +1;!_ecag ._faac &&_dfege < _gcac ;_dfege ++{_efcd :=_eega [_aabf *_gcac +_dfege ];_ecag .Urx =_efcd .Urx ;_ecag ._dfccg =_ecag ._dfccg ||_efcd ._dfccg ;_ecag ._bggc =_ecag ._bggc ||_efcd ._bggc ;_ecag ._faac =_efcd ._faac ;if _bcbb {_gb .Printf ("\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a",_dfege ,_efcd ,_ecag );};_agdc =_dfege ;};if _bcbb {_gb .Printf (" \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n",_bfdfg ,_agdc ,_ecag );};_bfdfg =_agdc ;_eddb [_aabf ][_ecag .Llx ]=_ecag ;};};_beedc :=make (map[float64 ]map[float64 ]gridTile ,_eafad );_gege :=make (map[float64 ]map[float64 ]struct{},_eafad );for _agdbd :=_eafad -1;_agdbd >=0;_agdbd --{_bfccd :=_eega [_agdbd *_gcac ].Lly ;_beedc [_bfccd ]=make (map[float64 ]gridTile ,_gcac );_gege [_bfccd ]=make (map[float64 ]struct{},_gcac );};if _bcbb {_f .Log .Info ("\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_gcedc );};for _dddd :=_eafad -1;_dddd >=0;_dddd --{_gdcg :=_eega [_dddd *_gcac ].Lly ;_ceefb :=_eddb [_dddd ];if _bcbb {_gb .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_dddd );};for _ ,_fgddf :=range _eaad (_ceefb ){if _ ,_ageb :=_gege [_gdcg ][_fgddf ];_ageb {continue ;};_cccb :=_ceefb [_fgddf ];if _bcbb {_gb .Printf (" \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_cccb .String ());};for _cgda :=_dddd -1;_cgda >=0;_cgda --{if _cccb ._bggc {break ;};_fbdc :=_eddb [_cgda ];_dafbg ,_deceg :=_fbdc [_fgddf ];if !_deceg {break ;};if _dafbg .Urx !=_cccb .Urx {break ;};_cccb ._bggc =_dafbg ._bggc ;_cccb .Lly =_dafbg .Lly ;if _bcbb {_gb .Printf ("\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_dafbg .String (),_cccb .String ());};_gege [_dafbg .Lly ][_dafbg .Llx ]=struct{}{};};if _dddd ==0{_cccb ._bggc =true ;};if _cccb .complete (){_beedc [_gdcg ][_fgddf ]=_cccb ;};};};_ggdg :=gridTiling {PdfRectangle :_gcedc ,_abfe :_ebaca (_beedc ),_gabed :_bdfc (_beedc ),_eege :_beedc };_ggdg .log ("\u0043r\u0065\u0061\u0074\u0065\u0064");return _ggdg ;};func _gefa (_fabb []*wordBag )[]*wordBag {if len (_fabb )<=1{return _fabb ;};if _dcfd {_f .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");};_ae .Slice (_fabb ,func (_dcga ,_aaa int )bool {_feef ,_befd :=_fabb [_dcga ],_fabb [_aaa ];_bbca :=_feef .Width ()*_feef .Height ();_aeaa :=_befd .Width ()*_befd .Height ();if _bbca !=_aeaa {return _bbca > _aeaa ;};if _feef .Height ()!=_befd .Height (){return _feef .Height ()> _befd .Height ();};return _dcga < _aaa ;});var _cegc []*wordBag ;_ccg :=make (intSet );for _ceeg :=0;_ceeg < len (_fabb );_ceeg ++{if _ccg .has (_ceeg ){continue ;};_aada :=_fabb [_ceeg ];for _gbcd :=_ceeg +1;_gbcd < len (_fabb );_gbcd ++{if _ccg .has (_ceeg ){continue ;};_cegg :=_fabb [_gbcd ];_aedd :=_aada .PdfRectangle ;_aedd .Llx -=_aada ._fcggb ;if _daba (_aedd ,_cegg .PdfRectangle ){_aada .absorb (_cegg );_ccg .add (_gbcd );};};_cegc =append (_cegc ,_aada );};if len (_fabb )!=len (_cegc )+len (_ccg ){_f .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_fabb ),len (_cegc ),len (_ccg ));};return _cegc ;};func (_beba *shapesState )stroke (_gcecg *[]pathSection ){_fafga :=pathSection {_fbf :_beba ._fbd ,Color :_beba ._adb .getStrokeColor ()};*_gcecg =append (*_gcecg ,_fafga );if _dgea {_gb .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_gcecg ),_beba ,_beba ._adb .getStrokeColor (),_fafga .bbox ());if _ebgf {for _cbbg ,_facc :=range _beba ._fbd {_gb .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_cbbg ,_facc );if _cbbg ==10{break ;};};};};};func (_cbde *stateStack )top ()*textState {if _cbde .empty (){return nil ;};return (*_cbde )[_cbde .size ()-1];};func (_ecfaa compositeCell )String ()string {_dfgf :="";if len (_ecfaa .paraList )> 0{_dfgf =_cgee (_ecfaa .paraList .merge ().text (),50);};return _gb .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_ecfaa .PdfRectangle ,len (_ecfaa .paraList ),_dfgf );};func (_cccc compositeCell )parasBBox ()(paraList ,_ed .PdfRectangle ){return _cccc .paraList ,_cccc .PdfRectangle ;};type bounded interface{bbox ()_ed .PdfRectangle };func _afbcee (_abag float64 )bool {return _c .Abs (_abag )< _eabe };func (_dedgf gridTile )complete ()bool {return _dedgf .numBorders ()==4};func (_dacfd paraList )inTile (_abad gridTile )paraList {var _efdbb paraList ;for _ ,_dcad :=range _dacfd {if _abad .contains (_dcad .PdfRectangle ){_efdbb =append (_efdbb ,_dcad );};};if _cece {_gb .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_abad ,len (_efdbb ));for _eegdad ,_adfef :=range _efdbb {_gb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_eegdad ,_adfef );};_gb .Println ("");};return _efdbb ;};type textLine struct{_ed .PdfRectangle ;_cfbc float64 ;_gad []*textWord ;_dag float64 ;};func (_ffbc paraList )toTextMarks ()[]TextMark {_cfbba :=0;var _abffb []TextMark ;for _dfb ,_ggbc :=range _ffbc {if _ggbc ._egdb {continue ;};_gbac :=_ggbc .toTextMarks (&_cfbba );_abffb =append (_abffb ,_gbac ...);if _dfb !=len (_ffbc )-1{if _cdab (_ggbc ,_ffbc [_dfb +1]){_abffb =_cagdb (_abffb ,&_cfbba ,"\u0020");}else {_abffb =_cagdb (_abffb ,&_cfbba ,"\u000a");_abffb =_cagdb (_abffb ,&_cfbba ,"\u000a");};};};_abffb =_cagdb (_abffb ,&_cfbba ,"\u000a");_abffb =_cagdb (_abffb ,&_cfbba ,"\u000a");return _abffb ;};func (_eefc rulingList )blocks (_gbfc ,_gdfa *ruling )bool {if _gbfc ._gbcgc > _gdfa ._gbab ||_gdfa ._gbcgc > _gbfc ._gbab {return false ;};_cdbd :=_c .Max (_gbfc ._gbcgc ,_gdfa ._gbcgc );_feed :=_c .Min (_gbfc ._gbab ,_gdfa ._gbab );if _gbfc ._ggaeg > _gdfa ._ggaeg {_gbfc ,_gdfa =_gdfa ,_gbfc ;};for _ ,_cbbga :=range _eefc {if _gbfc ._ggaeg <=_cbbga ._ggaeg +_bebb &&_cbbga ._ggaeg <=_gdfa ._ggaeg +_bebb &&_cbbga ._gbcgc <=_feed &&_cdbd <=_cbbga ._gbab {return true ;};};return false ;};func (_badc *compositeCell )updateBBox (){for _ ,_bgfgf :=range _badc .paraList {_badc .PdfRectangle =_afeae (_badc .PdfRectangle ,_bgfgf .PdfRectangle );};};func (_dgbg *textLine )appendWord (_fdef *textWord ){_dgbg ._gad =append (_dgbg ._gad ,_fdef );_dgbg .PdfRectangle =_afeae (_dgbg .PdfRectangle ,_fdef .PdfRectangle );if _fdef ._cbdb > _dgbg ._dag {_dgbg ._dag =_fdef ._cbdb ;};if _fdef ._eabdg > _dgbg ._cfbc {_dgbg ._cfbc =_fdef ._eabdg ;};};func _cfee (_eeffb []TextMark ,_bacab *int ,_bfc TextMark )[]TextMark {_bfc .Offset =*_bacab ;_eeffb =append (_eeffb ,_bfc );*_bacab +=len (_bfc .Text );return _eeffb ;};func (_gabf *textObject )setHorizScaling (_fgf float64 ){if _gabf ==nil {return ;};_gabf ._gdf ._eggc =_fgf ;};type cachedImage struct{_bff *_ed .Image ;_df _ed .PdfColorspace ;};func (_baed *textLine )markWordBoundaries (){_edcc :=_fbab *_baed ._dag ;for _cdbed ,_ddab :=range _baed ._gad [1:]{if _fbbd (_ddab ,_baed ._gad [_cdbed ])>=_edcc {_ddab ._cdfbc =true ;};};};func (_dg *imageExtractContext )extractInlineImage (_da *_dc .ContentStreamInlineImage ,_abd _dc .GraphicsState ,_dbg *_ed .PdfPageResources )error {_ceb ,_gcf :=_da .ToImage (_dbg );if _gcf !=nil {return _gcf ;};_bcg ,_gcf :=_da .GetColorSpace (_dbg );if _gcf !=nil {return _gcf ;};if _bcg ==nil {_bcg =_ed .NewPdfColorspaceDeviceGray ();};_fdf ,_gcf :=_bcg .ImageToRGB (*_ceb );if _gcf !=nil {return _gcf ;};_gg :=ImageMark {Image :&_fdf ,Width :_abd .CTM .ScalingFactorX (),Height :_abd .CTM .ScalingFactorY (),Angle :_abd .CTM .Angle ()};_gg .X ,_gg .Y =_abd .CTM .Translation ();_dg ._fcb =append (_dg ._fcb ,_gg );_dg ._aef ++;return nil ;};func _gfed (_cagd func (*wordBag ,*textWord ,float64 )bool ,_eeffa float64 )func (*wordBag ,*textWord )bool {return func (_ega *wordBag ,_acacc *textWord )bool {return _cagd (_ega ,_acacc ,_eeffa )};};func _ebd (_geff _ab .Point )_ab .Matrix {return _ab .TranslationMatrix (_geff .X ,_geff .Y )};func (_cbgc *ruling )alignsSec (_aba *ruling )bool {const _fdaf =_bebb +1.0;return _cbgc ._gbcgc -_fdaf <=_aba ._gbab &&_aba ._gbcgc -_fdaf <=_cbgc ._gbab ;};func _abcc (_acfg *wordBag ,_abge *textWord ,_dafc float64 )bool {return _acfg .Urx <=_abge .Llx &&_abge .Llx < _acfg .Urx +_dafc ;};type event struct{_eeeb float64 ;_fdbdd bool ;_dcag int ;};func (_eeec *wordBag )depthIndexes ()[]int {if len (_eeec ._adee )==0{return nil ;};_gfde :=make ([]int ,len (_eeec ._adee ));_bfea :=0;for _ffdc :=range _eeec ._adee {_gfde [_bfea ]=_ffdc ;_bfea ++;};_ae .Ints (_gfde );return _gfde ;};func _fcee (_egdcd float64 )float64 {return _gbdd *_c .Round (_egdcd /_gbdd )};
|
||
|
||
// String returns a description of `p`.
|
||
func (_cecef *textPara )String ()string {if _cecef ._egdb {return _gb .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_cecef .PdfRectangle );};_dfcg :="";if _cecef ._begg !=nil {_dfcg =_gb .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_cecef ._begg ._effgf ,_cecef ._begg ._cecff );};return _gb .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_cecef .PdfRectangle ,_dfcg ,len (_cecef ._egddd ),_cgee (_cecef .text (),50));};func (_ggad *subpath )isQuadrilateral ()bool {if len (_ggad ._cecb )< 4||len (_ggad ._cecb )> 5{return false ;};if len (_ggad ._cecb )==5{_dfdec :=_ggad ._cecb [0];_cbdg :=_ggad ._cecb [4];if _dfdec .X !=_cbdg .X ||_dfdec .Y !=_cbdg .Y {return false ;};};return true ;};func _bgaf (_dcgc int ,_gacfd func (int ,int )bool )[]int {_fgbf :=make ([]int ,_dcgc );for _eeac :=range _fgbf {_fgbf [_eeac ]=_eeac ;};_ae .Slice (_fgbf ,func (_dgega ,_aafd int )bool {return _gacfd (_fgbf [_dgega ],_fgbf [_aafd ])});return _fgbf ;};func (_aegf *stateStack )push (_dfca *textState ){_adg :=*_dfca ;*_aegf =append (*_aegf ,&_adg )};func _bgbb (_ffc ,_fgde float64 )bool {return _c .Abs (_ffc -_fgde )<=_eea };func (_abcbf rulingList )merge ()*ruling {_fedcd :=_abcbf [0]._ggaeg ;_cggc :=_abcbf [0]._gbcgc ;_gfabf :=_abcbf [0]._gbab ;for _ ,_cddc :=range _abcbf [1:]{_fedcd +=_cddc ._ggaeg ;if _cddc ._gbcgc < _cggc {_cggc =_cddc ._gbcgc ;};if _cddc ._gbab > _gfabf {_gfabf =_cddc ._gbab ;};};_cgff :=&ruling {_aeffb :_abcbf [0]._aeffb ,_caba :_abcbf [0]._caba ,Color :_abcbf [0].Color ,_ggaeg :_fedcd /float64 (len (_abcbf )),_gbcgc :_cggc ,_gbab :_gfabf };if _cccf {_f .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_abcbf ),_cgff );for _bebf ,_daab :=range _abcbf {_gb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bebf ,_daab );};};return _cgff ;};func _egge (_cfgg ,_eebcg bounded )float64 {_dfad :=_eegd (_cfgg ,_eebcg );if !_afbcee (_dfad ){return _dfad ;};return _cagb (_cfgg ,_eebcg );};func (_dd *imageExtractContext )processOperand (_ge *_dc .ContentStreamOperation ,_gfe _dc .GraphicsState ,_fe *_ed .PdfPageResources )error {if _ge .Operand =="\u0042\u0049"&&len (_ge .Params )==1{_efe ,_ebg :=_ge .Params [0].(*_dc .ContentStreamInlineImage );if !_ebg {return nil ;};if _fge ,_ca :=_ee .GetBoolVal (_efe .ImageMask );_ca {if _fge &&!_dd ._fd .IncludeInlineStencilMasks {return nil ;};};return _dd .extractInlineImage (_efe ,_gfe ,_fe );}else if _ge .Operand =="\u0044\u006f"&&len (_ge .Params )==1{_eeb ,_faf :=_ee .GetName (_ge .Params [0]);if !_faf {_f .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _cf ;};_ ,_cd :=_fe .GetXObjectByName (*_eeb );switch _cd {case _ed .XObjectTypeImage :return _dd .extractXObjectImage (_eeb ,_gfe ,_fe );case _ed .XObjectTypeForm :return _dd .extractFormImages (_eeb ,_gfe ,_fe );};};return nil ;};func _daba (_ecef ,_ddga _ed .PdfRectangle )bool {return _ecef .Llx <=_ddga .Llx &&_ddga .Urx <=_ecef .Urx &&_ecef .Lly <=_ddga .Lly &&_ddga .Ury <=_ecef .Ury ;};func (_acdc paraList )writeText (_bbab _g .Writer ){for _febg ,_abbe :=range _acdc {if _abbe ._egdb {continue ;};_abbe .writeText (_bbab );if _febg !=len (_acdc )-1{if _cdab (_abbe ,_acdc [_febg +1]){_bbab .Write ([]byte ("\u0020"));}else {_bbab .Write ([]byte ("\u000a"));_bbab .Write ([]byte ("\u000a"));};};};_bbab .Write ([]byte ("\u000a"));_bbab .Write ([]byte ("\u000a"));};func (_gfab rulingList )snapToGroups ()rulingList {_gddd ,_bccb :=_gfab .vertsHorzs ();if len (_gddd )> 0{_gddd =_gddd .snapToGroupsDirection ();};if len (_bccb )> 0{_bccb =_bccb .snapToGroupsDirection ();};_bddf :=append (_gddd ,_bccb ...);_bddf .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");return _bddf ;};func _afga (_cabc []compositeCell )[]float64 {var _bcdgf []*textLine ;_fgcad :=0;for _ ,_fgdcd :=range _cabc {_fgcad +=len (_fgdcd .paraList );_bcdgf =append (_bcdgf ,_fgdcd .lines ()...);};_ae .Slice (_bcdgf ,func (_bdfa ,_cfbef int )bool {_cabdc ,_dfgg :=_bcdgf [_bdfa ],_bcdgf [_cfbef ];_gabc ,_ffbbd :=_cabdc ._cfbc ,_dfgg ._cfbc ;if !_afbcee (_gabc -_ffbbd ){return _gabc < _ffbbd ;};return _cabdc .Llx < _dfgg .Llx ;});if _cece {_gb .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_fgcad ,len (_bcdgf ));for _eadg ,_ecbc :=range _bcdgf {_gb .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_eadg ,_ecbc );};};var _eegdf []float64 ;_cfef :=_bcdgf [0];var _fggf [][]*textLine ;_bdceg :=[]*textLine {_cfef };for _cbda ,_dbae :=range _bcdgf [1:]{if _dbae .Ury < _cfef .Lly {_cagfgc :=0.5*(_dbae .Ury +_cfef .Lly );if _cece {_gb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_cbda ,_dbae .Ury ,_cfef .Lly ,_cagfgc ,_cfef ,_dbae );};_eegdf =append (_eegdf ,_cagfgc );_fggf =append (_fggf ,_bdceg );_bdceg =nil ;};_bdceg =append (_bdceg ,_dbae );if _dbae .Lly < _cfef .Lly {_cfef =_dbae ;};};if len (_bdceg )> 0{_fggf =append (_fggf ,_bdceg );};if _cece {_gb .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_eegdf );};if _cece {_f .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len (_cabc ));for _eafbac ,_efbd :=range _cabc {_gb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_eafbac ,_efbd );};_f .Log .Info ("\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d",len (_fggf ));for _bdgf ,_ggfb :=range _fggf {_gb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a",_bdgf ,len (_ggfb ));for _gbddg ,_fgced :=range _ggfb {_gb .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_gbddg ,_fgced );};};};_eeeee :=true ;for _decg ,_bcca :=range _fggf {_cdcefd :=true ;for _cfafb ,_cbafe :=range _cabc {if _cece {_gb .Printf ("\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a",_decg ,len (_fggf ),_cfafb ,len (_cabc ),_cbafe );};if !_cbafe .hasLines (_bcca ){if _cece {_gb .Printf ("\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a",_decg ,len (_fggf ),_cfafb ,len (_cabc ));};_cdcefd =false ;break ;};};if !_cdcefd {_eeeee =false ;break ;};};if !_eeeee {if _cece {_f .Log .Info ("\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg");};_eegdf =nil ;};if _cece &&_eegdf !=nil {_gb .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a",_eegdf );};return _eegdf ;};func _ceebd (_ceaae ,_acfge ,_eefcc ,_eaefe *textPara )*textTable {_cdcc :=&textTable {_effgf :2,_cecff :2,_afdde :make (map[uint64 ]*textPara ,4)};_cdcc .put (0,0,_ceaae );_cdcc .put (1,0,_acfge );_cdcc .put (0,1,_eefcc );_cdcc .put (1,1,_eaefe );return _cdcc ;};func _aaec (_bfee ,_badgb _ab .Point )bool {_ggfc :=_c .Abs (_bfee .X -_badgb .X );_ddaf :=_c .Abs (_bfee .Y -_badgb .Y );return _cefa (_ggfc ,_ddaf );};type textResult struct{_dde PageText ;_bed int ;_bea int ;};func (_cgaa *textTable )compositeRowCorridors ()map[int ][]float64 {_egccd :=make (map[int ][]float64 ,_cgaa ._cecff );if _cece {_f .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_cgaa ._cecff );};for _cacd :=1;_cacd < _cgaa ._cecff ;_cacd ++{var _dfgfe []compositeCell ;for _ededb :=0;_ededb < _cgaa ._effgf ;_ededb ++{if _gfcb ,_gdbdc :=_cgaa ._egaa [_bacb (_ededb ,_cacd )];_gdbdc {_dfgfe =append (_dfgfe ,_gfcb );};};if len (_dfgfe )==0{continue ;};_dgbbg :=_afga (_dfgfe );_egccd [_cacd ]=_dgbbg ;if _cece {_gb .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_cacd ,_dgbbg );};};return _egccd ;};const _fcc =20;func (_dgegd *textPara )taken ()bool {return _dgegd ==nil ||_dgegd ._febd };
|
||
|
||
// ApplyArea processes the page text only within the specified area `bbox`.
|
||
// Each time ApplyArea is called, it updates the result set in `pt`.
|
||
// Can be called multiple times in a row with different bounding boxes.
|
||
func (_fef *PageText )ApplyArea (bbox _ed .PdfRectangle ){_dea :=make ([]*textMark ,0,len (_fef ._cga ));for _ ,_fdb :=range _fef ._cga {if _cdeg (_fdb .bbox (),bbox ){_dea =append (_dea ,_fdb );};};var _gcbf paraList ;_aea :=len (_dea );for _bffb :=0;_bffb < 360&&_aea > 0;_bffb +=90{_abfb :=make ([]*textMark ,0,len (_dea )-_aea );for _ ,_dgacd :=range _dea {if _dgacd ._gfeca ==_bffb {_abfb =append (_abfb ,_dgacd );};};if len (_abfb )> 0{_dac :=_bad (_abfb ,_fef ._eaee ,nil ,nil );_gcbf =append (_gcbf ,_dac ...);_aea -=len (_abfb );};};_gdaa :=new (_ag .Buffer );_gcbf .writeText (_gdaa );_fef ._eda =_gdaa .String ();_fef ._ffee =_gcbf .toTextMarks ();_fef ._bag =_gcbf .tables ();};func (_fca *textObject )checkOp (_gcb *_dc .ContentStreamOperation ,_aeg int ,_egd bool )(_aeccb bool ,_efg error ){if _fca ==nil {var _aefg []_ee .PdfObject ;if _aeg > 0{_aefg =_gcb .Params ;if len (_aefg )> _aeg {_aefg =_aefg [:_aeg ];};};_f .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_gcb .Operand ,_aefg );};if _aeg >=0{if len (_gcb .Params )!=_aeg {if _egd {_efg =_be .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");};_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_gcb .Operand ,_aeg ,len (_gcb .Params ),_gcb .Params );return false ,_efg ;};};return true ,nil ;};func (_dbb *shapesState )addPoint (_fad ,_decec float64 ){_agf :=_dbb .establishSubpath ();_bgfg :=_dbb .devicePoint (_fad ,_decec );if _agf ==nil {_dbb ._ffbd =true ;_dbb ._fcbf =_bgfg ;}else {_agf .add (_bgfg );};};type ruling struct{_aeffb rulingKind ;_caba markKind ;_af .Color ;_ggaeg float64 ;_gbcgc float64 ;_gbab float64 ;_gcfgb float64 ;};func (_fgec lineRuling )yMean ()float64 {return 0.5*(_fgec ._cadf .Y +_fgec ._abeeg .Y )};func (_cegb rulingList )intersections ()map[int ]intSet {var _gccag ,_bafa []int ;for _gbbgc ,_gadedd :=range _cegb {switch _gadedd ._aeffb {case _caeda :_gccag =append (_gccag ,_gbbgc );case _eebcb :_bafa =append (_bafa ,_gbbgc );};};if len (_gccag )< _debf +1||len (_bafa )< _aeb +1{return nil ;};if len (_gccag )+len (_bafa )> _dfea {_f .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_cegb ),len (_gccag ),len (_bafa ));return nil ;};_dcea :=make (map[int ]intSet ,len (_gccag )+len (_bafa ));for _ ,_agcda :=range _gccag {for _ ,_dffe :=range _bafa {if _cegb [_agcda ].intersects (_cegb [_dffe ]){if _ ,_bcdef :=_dcea [_agcda ];!_bcdef {_dcea [_agcda ]=make (intSet );};if _ ,_gbg :=_dcea [_dffe ];!_gbg {_dcea [_dffe ]=make (intSet );};_dcea [_agcda ].add (_dffe );_dcea [_dffe ].add (_agcda );};};};return _dcea ;};func (_dcfed *wordBag )removeDuplicates (){if _eaaa {_f .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_dcfed .text ());};for _ ,_bbge :=range _dcfed .depthIndexes (){if len (_dcfed ._adee [_bbge ])==0{continue ;};_aabc :=_dcfed ._adee [_bbge ][0];_egbf :=_cggb *_aabc ._cbdb ;_efea :=_aabc ._eabdg ;for _ ,_gfdeg :=range _dcfed .depthBand (_efea ,_efea +_egbf ){_ceeb :=map[*textWord ]struct{}{};_aggb :=_dcfed ._adee [_gfdeg ];for _ ,_dfab :=range _aggb {if _ ,_ffefe :=_ceeb [_dfab ];_ffefe {continue ;};for _ ,_gafb :=range _aggb {if _ ,_cceag :=_ceeb [_gafb ];_cceag {continue ;};if _gafb !=_dfab &&_gafb ._feecg ==_dfab ._feecg &&_c .Abs (_gafb .Llx -_dfab .Llx )< _egbf &&_c .Abs (_gafb .Urx -_dfab .Urx )< _egbf &&_c .Abs (_gafb .Lly -_dfab .Lly )< _egbf &&_c .Abs (_gafb .Ury -_dfab .Ury )< _egbf {_ceeb [_gafb ]=struct{}{};};};};if len (_ceeb )> 0{_aeaaf :=0;for _ ,_adbce :=range _aggb {if _ ,_fdcf :=_ceeb [_adbce ];!_fdcf {_aggb [_aeaaf ]=_adbce ;_aeaaf ++;};};_dcfed ._adee [_gfdeg ]=_aggb [:len (_aggb )-len (_ceeb )];if len (_dcfed ._adee [_gfdeg ])==0{delete (_dcfed ._adee ,_gfdeg );};};};};};
|
||
|
||
// String returns a description of `k`.
|
||
func (_aaage rulingKind )String ()string {_aadb ,_egea :=_dfeg [_aaage ];if !_egea {return _gb .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_aaage );};return _aadb ;};func (_aafa paraList )readBefore (_faefa []int ,_ggff ,_bgca int )bool {_eegda ,_cead :=_aafa [_ggff ],_aafa [_bgca ];if _bfdeb (_eegda ,_cead )&&_eegda .Lly > _cead .Lly {return true ;};if !(_eegda ._eaba .Urx < _cead ._eaba .Llx ){return false ;};_cdfg ,_acfgc :=_eegda .Lly ,_cead .Lly ;if _cdfg > _acfgc {_acfgc ,_cdfg =_cdfg ,_acfgc ;};_fcgf :=_c .Max (_eegda ._eaba .Llx ,_cead ._eaba .Llx );_agg :=_c .Min (_eegda ._eaba .Urx ,_cead ._eaba .Urx );_cagfg :=_aafa .llyRange (_faefa ,_cdfg ,_acfgc );for _ ,_ffaf :=range _cagfg {if _ffaf ==_ggff ||_ffaf ==_bgca {continue ;};_fddf :=_aafa [_ffaf ];if _fddf ._eaba .Llx <=_agg &&_fcgf <=_fddf ._eaba .Urx {return false ;};};return true ;};func (_bbceb *ruling )equals (_ebgbg *ruling )bool {return _bbceb ._aeffb ==_ebgbg ._aeffb &&_bgbb (_bbceb ._ggaeg ,_ebgbg ._ggaeg )&&_bgbb (_bbceb ._gbcgc ,_ebgbg ._gbcgc )&&_bgbb (_bbceb ._gbab ,_ebgbg ._gbab );};func (_gcfaf *wordBag )sort (){for _ ,_dfac :=range _gcfaf ._adee {_ae .Slice (_dfac ,func (_acac ,_dfda int )bool {return _eegd (_dfac [_acac ],_dfac [_dfda ])< 0});};};
|
||
|
||
// Marks returns the TextMark collection for a page. It represents all the text on the page.
|
||
func (_ffg PageText )Marks ()*TextMarkArray {return &TextMarkArray {_cgdg :_ffg ._ffee }};func (_fcebe paraList )eventNeighbours (_cdbb []event )map[*textPara ][]int {_ae .Slice (_cdbb ,func (_ggce ,_eggge int )bool {_aaaec ,_afbaa :=_cdbb [_ggce ],_cdbb [_eggge ];_dbggf ,_ceccae :=_aaaec ._eeeb ,_afbaa ._eeeb ;if _dbggf !=_ceccae {return _dbggf < _ceccae ;};if _aaaec ._fdbdd !=_afbaa ._fdbdd {return _aaaec ._fdbdd ;};return _ggce < _eggge ;});_fdbaa :=make (map[int ]intSet );_bedcg :=make (intSet );for _ ,_bebbe :=range _cdbb {if _bebbe ._fdbdd {_fdbaa [_bebbe ._dcag ]=make (intSet );for _geag :=range _bedcg {if _geag !=_bebbe ._dcag {_fdbaa [_bebbe ._dcag ].add (_geag );_fdbaa [_geag ].add (_bebbe ._dcag );};};_bedcg .add (_bebbe ._dcag );}else {_bedcg .del (_bebbe ._dcag );};};_gbaa :=map[*textPara ][]int {};for _gdgc ,_fgee :=range _fdbaa {_dfaff :=_fcebe [_gdgc ];if len (_fgee )==0{_gbaa [_dfaff ]=nil ;continue ;};_ebbac :=make ([]int ,len (_fgee ));_ddfge :=0;for _ccggd :=range _fgee {_ebbac [_ddfge ]=_ccggd ;_ddfge ++;};_gbaa [_dfaff ]=_ebbac ;};return _gbaa ;};func (_cbdce *shapesState )fill (_ecd *[]pathSection ){_dgbf :=pathSection {_fbf :_cbdce ._fbd ,Color :_cbdce ._adb .getFillColor ()};*_ecd =append (*_ecd ,_dgbf );if _dgea {_caed :=_dgbf .bbox ();_gb .Printf ("\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a",len (*_ecd ),len (_dgbf ._fbf ),_cbdce ,_dgbf .Color ,_caed ,_caed .Width (),_caed .Height ());if _ebgf {for _deee ,_aecb :=range _dgbf ._fbf {_gb .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_deee ,_aecb );if _deee ==10{break ;};};};};};func (_agb *imageExtractContext )extractContentStreamImages (_db string ,_fg *_ed .PdfPageResources )error {_afa :=_dc .NewContentStreamParser (_db );_cec ,_gd :=_afa .Parse ();if _gd !=nil {return _gd ;};if _agb ._gba ==nil {_agb ._gba =map[*_ee .PdfObjectStream ]*cachedImage {};};if _agb ._fd ==nil {_agb ._fd =&ImageExtractOptions {};};_ac :=_dc .NewContentStreamProcessor (*_cec );_ac .AddHandler (_dc .HandlerConditionEnumAllOperands ,"",_agb .processOperand );return _ac .Process (_fg );};
|
||
|
||
// ImageMark represents an image drawn on a page and its position in device coordinates.
|
||
// All coordinates are in device coordinates.
|
||
type ImageMark struct{Image *_ed .Image ;
|
||
|
||
// Dimensions of the image as displayed in the PDF.
|
||
Width float64 ;Height float64 ;
|
||
|
||
// Position of the image in PDF coordinates (lower left corner).
|
||
X float64 ;Y float64 ;
|
||
|
||
// Angle in degrees, if rotated.
|
||
Angle float64 ;};func (_ebba paraList )tables ()[]TextTable {var _fbfae []TextTable ;if _cece {_f .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a");};for _ ,_bgaa :=range _ebba {_efed :=_bgaa ._begg ;if _efed !=nil &&_efed .isExportable (){_fbfae =append (_fbfae ,_efed .toTextTable ());};};return _fbfae ;};func (_adeeb *textTable )reduceTiling (_aaaf gridTiling ,_ggcad float64 )*textTable {_acfe :=make ([]int ,0,_adeeb ._cecff );_cdgf :=make ([]int ,0,_adeeb ._effgf );_becfe :=_aaaf ._abfe ;_facgf :=_aaaf ._gabed ;for _gfcg :=0;_gfcg < _adeeb ._cecff ;_gfcg ++{_cebab :=_gfcg > 0&&_c .Abs (_facgf [_gfcg -1]-_facgf [_gfcg ])< _ggcad &&_adeeb .emptyRow (_gfcg );if !_cebab {_acfe =append (_acfe ,_gfcg );};};for _ddfac :=0;_ddfac < _adeeb ._effgf ;_ddfac ++{_ecdag :=_ddfac < _adeeb ._effgf -1&&_c .Abs (_becfe [_ddfac +1]-_becfe [_ddfac ])< _ggcad &&_adeeb .emptyColumn (_ddfac );if !_ecdag {_cdgf =append (_cdgf ,_ddfac );};};if len (_acfe )==_adeeb ._cecff &&len (_cdgf )==_adeeb ._effgf {return _adeeb ;};_ccffd :=textTable {_gcedd :_adeeb ._gcedd ,_effgf :len (_cdgf ),_cecff :len (_acfe ),_egaa :make (map[uint64 ]compositeCell ,len (_cdgf )*len (_acfe ))};if _cece {_f .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_adeeb ._effgf ,_adeeb ._cecff ,len (_cdgf ),len (_acfe ));_f .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_cdgf );_f .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_acfe );};for _daaca ,_adbb :=range _acfe {for _gcge ,_bfdc :=range _cdgf {_cdgd ,_gbddf :=_adeeb .getComposite (_bfdc ,_adbb );if len (_cdgd )==0{continue ;};if _cece {_gb .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_gcge ,_daaca ,_bfdc ,_adbb ,_cgee (_cdgd .merge ().text (),50));};_ccffd .putComposite (_gcge ,_daaca ,_cdgd ,_gbddf );};};return &_ccffd ;};func (_bagb compositeCell )hasLines (_ebcg []*textLine )bool {for _gfbe ,_adga :=range _ebcg {_bafd :=_cdeg (_bagb .PdfRectangle ,_adga .PdfRectangle );if _cece {_gb .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a",_bafd ,_gfbe ,len (_ebcg ));_gb .Printf ("\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a",_bagb );_gb .Printf ("\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a",_adga );};if _bafd {return true ;};};return false ;};func (_gfebf rulingList )vertsHorzs ()(rulingList ,rulingList ){var _gfgec ,_dbgba rulingList ;for _ ,_agafd :=range _gfebf {switch _agafd ._aeffb {case _caeda :_gfgec =append (_gfgec ,_agafd );case _eebcb :_dbgba =append (_dbgba ,_agafd );};};return _gfgec ,_dbgba ;};func (_ddfae *textWord )toTextMarks (_bfda *int )[]TextMark {var _dbbeg []TextMark ;for _ ,_fcga :=range _ddfae ._feedb {_dbbeg =_cfee (_dbbeg ,_bfda ,_fcga .ToTextMark ());};return _dbbeg ;};func _eced (_dcabc []int )[]int {_bffbb :=make ([]int ,len (_dcabc ));for _cfbe ,_ffgdf :=range _dcabc {_bffbb [len (_dcabc )-1-_cfbe ]=_ffgdf ;};return _bffbb ;};func (_ddd *wordBag )absorb (_eefd *wordBag ){_fage :=_eefd .makeRemovals ();for _egdd ,_gac :=range _eefd ._adee {for _ ,_eeceb :=range _gac {_ddd .pullWord (_eeceb ,_egdd ,_fage );};};_eefd .applyRemovals (_fage );};func _eaad (_adadc map[float64 ]gridTile )[]float64 {_fdbc :=make ([]float64 ,0,len (_adadc ));for _gdegf :=range _adadc {_fdbc =append (_fdbc ,_gdegf );};_ae .Float64s (_fdbc );return _fdbc ;};type textMark struct{_ed .PdfRectangle ;_gfeca int ;_bcdc string ;_gacb string ;_bgga *_ed .PdfFont ;_defd float64 ;_eebdd float64 ;_cdfd _ab .Matrix ;_dcaf _ab .Point ;_cdfc _ed .PdfRectangle ;_adaag _af .Color ;_ebac _af .Color ;};func (_gdef paraList )findTextTables ()[]*textTable {var _dfeb []*textTable ;for _ ,_bdda :=range _gdef {if _bdda .taken ()||_bdda .Width ()==0{continue ;};_eccab :=_bdda .isAtom ();if _eccab ==nil {continue ;};_eccab .growTable ();if _eccab ._effgf *_eccab ._cecff < _agbc {continue ;};_eccab .markCells ();_eccab .log ("\u0067\u0072\u006fw\u006e");_dfeb =append (_dfeb ,_eccab );};return _dfeb ;};func (_egcd *ruling )intersects (_ccdgd *ruling )bool {_gcbfe :=(_egcd ._aeffb ==_caeda &&_ccdgd ._aeffb ==_eebcb )||(_ccdgd ._aeffb ==_caeda &&_egcd ._aeffb ==_eebcb );_egac :=func (_bcad ,_abdga *ruling )bool {return _bcad ._gbcgc -_eea <=_abdga ._ggaeg &&_abdga ._ggaeg <=_bcad ._gbab +_eea ;};_egcdc :=_egac (_egcd ,_ccdgd );_babbd :=_egac (_ccdgd ,_egcd );if _dgea {_gb .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_gcbfe ,_egcdc ,_babbd ,_gcbfe &&_egcdc &&_babbd ,_egcd ,_ccdgd );};return _gcbfe &&_egcdc &&_babbd ;};func (_bffaa *textObject )newTextMark (_cdac string ,_adccf _ab .Matrix ,_gbbc _ab .Point ,_adcd float64 ,_cecce *_ed .PdfFont ,_afec float64 ,_gdc ,_ggc _af .Color )(textMark ,bool ){_dgaf :=_adccf .Angle ();_gbeb :=_edba (_dgaf ,_edfc );var _aee float64 ;if _gbeb %180!=90{_aee =_adccf .ScalingFactorY ();}else {_aee =_adccf .ScalingFactorX ();};_gfecae :=_cddf (_adccf );_cef :=_ed .PdfRectangle {Llx :_gfecae .X ,Lly :_gfecae .Y ,Urx :_gbbc .X ,Ury :_gbbc .Y };switch _gbeb %360{case 90:_cef .Urx -=_aee ;case 180:_cef .Ury -=_aee ;case 270:_cef .Urx +=_aee ;case 0:_cef .Ury +=_aee ;default:_gbeb =0;_cef .Ury +=_aee ;};if _cef .Llx > _cef .Urx {_cef .Llx ,_cef .Urx =_cef .Urx ,_cef .Llx ;};if _cef .Lly > _cef .Ury {_cef .Lly ,_cef .Ury =_cef .Ury ,_cef .Lly ;};_bbcd ,_bbfd :=_agec (_cef ,_bffaa ._geg ._deg );if !_bbfd {_f .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_cef ,_bffaa ._geg ._deg ,_cdac );};_cef =_bbcd ;_caga :=_cef ;_cdbeb :=_bffaa ._geg ._deg ;switch _gbeb %360{case 90:_cdbeb .Urx ,_cdbeb .Ury =_cdbeb .Ury ,_cdbeb .Urx ;_caga =_ed .PdfRectangle {Llx :_cdbeb .Urx -_cef .Ury ,Urx :_cdbeb .Urx -_cef .Lly ,Lly :_cef .Llx ,Ury :_cef .Urx };case 180:_caga =_ed .PdfRectangle {Llx :_cdbeb .Urx -_cef .Llx ,Urx :_cdbeb .Urx -_cef .Urx ,Lly :_cdbeb .Ury -_cef .Lly ,Ury :_cdbeb .Ury -_cef .Ury };case 270:_cdbeb .Urx ,_cdbeb .Ury =_cdbeb .Ury ,_cdbeb .Urx ;_caga =_ed .PdfRectangle {Llx :_cef .Ury ,Urx :_cef .Lly ,Lly :_cdbeb .Ury -_cef .Llx ,Ury :_cdbeb .Ury -_cef .Urx };};if _caga .Llx > _caga .Urx {_caga .Llx ,_caga .Urx =_caga .Urx ,_caga .Llx ;};if _caga .Lly > _caga .Ury {_caga .Lly ,_caga .Ury =_caga .Ury ,_caga .Lly ;};_acag :=textMark {_bcdc :_cdac ,PdfRectangle :_caga ,_cdfc :_cef ,_bgga :_cecce ,_defd :_aee ,_eebdd :_afec ,_cdfd :_adccf ,_dcaf :_gbbc ,_gfeca :_gbeb ,_adaag :_gdc ,_ebac :_ggc };if _babbf {_f .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_gfecae ,_gbbc ,_acag .String ());};return _acag ,_bbfd ;};func (_eaeed *textTable )toTextTable ()TextTable {if _cece {_f .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_eaeed ._effgf ,_eaeed ._cecff );};_bcag :=make ([][]TableCell ,_eaeed ._cecff );for _acacd :=0;_acacd < _eaeed ._cecff ;_acacd ++{_bcag [_acacd ]=make ([]TableCell ,_eaeed ._effgf );for _cgfc :=0;_cgfc < _eaeed ._effgf ;_cgfc ++{_cecag :=_eaeed .get (_cgfc ,_acacd );if _cecag ==nil {continue ;};if _cece {_gb .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_cgfc ,_acacd ,_cecag );};_bcag [_acacd ][_cgfc ].Text =_cecag .text ();_bdffe :=0;_bcag [_acacd ][_cgfc ].Marks ._cgdg =_cecag .toTextMarks (&_bdffe );};};return TextTable {W :_eaeed ._effgf ,H :_eaeed ._cecff ,Cells :_bcag };};func _dggb (_fgdb ,_abcg _ab .Point )rulingKind {_dgcbc :=_c .Abs (_fgdb .X -_abcg .X );_afcg :=_c .Abs (_fgdb .Y -_abcg .Y );return _gdadf (_dgcbc ,_afcg ,_agdg );};func (_cedc *textTable )logComposite (_bbac string ){if !_cece {return ;};_f .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_cedc ._effgf ,_cedc ._cecff ,_bbac );_gb .Printf ("\u0025\u0035\u0073 \u007c","");for _fefb :=0;_fefb < _cedc ._effgf ;_fefb ++{_gb .Printf ("\u0025\u0033\u0064 \u007c",_fefb );};_gb .Println ("");_gb .Printf ("\u0025\u0035\u0073 \u002b","");for _ffac :=0;_ffac < _cedc ._effgf ;_ffac ++{_gb .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d");};_gb .Println ("");for _gggdg :=0;_gggdg < _cedc ._cecff ;_gggdg ++{_gb .Printf ("\u0025\u0035\u0064 \u007c",_gggdg );for _fffge :=0;_fffge < _cedc ._effgf ;_fffge ++{_fedge ,_ :=_cedc ._egaa [_bacb (_fffge ,_gggdg )].parasBBox ();_gb .Printf ("\u0025\u0033\u0064 \u007c",len (_fedge ));};_gb .Println ("");};_f .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_cedc ._effgf ,_cedc ._cecff ,_bbac );_gb .Printf ("\u0025\u0035\u0073 \u007c","");for _febdg :=0;_febdg < _cedc ._effgf ;_febdg ++{_gb .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_febdg );};_gb .Println ("");_gb .Printf ("\u0025\u0035\u0073 \u002b","");for _cagef :=0;_cagef < _cedc ._effgf ;_cagef ++{_gb .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");};_gb .Println ("");for _fddc :=0;_fddc < _cedc ._cecff ;_fddc ++{_gb .Printf ("\u0025\u0035\u0064 \u007c",_fddc );for _dfddd :=0;_dfddd < _cedc ._effgf ;_dfddd ++{_afedc ,_ :=_cedc ._egaa [_bacb (_dfddd ,_fddc )].parasBBox ();_cadc :="";_gdce :=_afedc .merge ();if _gdce !=nil {_cadc =_gdce .text ();};_cadc =_gb .Sprintf ("\u0025\u0071",_cgee (_cadc ,12));_cadc =_cadc [1:len (_cadc )-1];_gb .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_cadc );};_gb .Println ("");};};func (_dafb *textPara )toCellTextMarks (_gggec *int )[]TextMark {var _eebcgd []TextMark ;for _efded ,_fcbcc :=range _dafb ._egddd {_caca :=_fcbcc .toTextMarks (_gggec );_adad :=_beae &&_fcbcc .endsInHyphen ()&&_efded !=len (_dafb ._egddd )-1;if _adad {_caca =_bdagb (_caca ,_gggec );};_eebcgd =append (_eebcgd ,_caca ...);if !(_adad ||_efded ==len (_dafb ._egddd )-1){_eebcgd =_cagdb (_eebcgd ,_gggec ,_abea (_fcbcc ._cfbc ,_dafb ._egddd [_efded +1]._cfbc ));};};return _eebcgd ;};func (_fecf *subpath )close (){if !_abbc (_fecf ._cecb [0],_fecf .last ()){_fecf .add (_fecf ._cecb [0]);};_fecf ._dfgc =true ;_fecf .removeDuplicates ();};func (_edee *shapesState )establishSubpath ()*subpath {_dcba ,_fbcf :=_edee .lastpointEstablished ();if !_fbcf {_edee ._fbd =append (_edee ._fbd ,_fbfa (_dcba ));};if len (_edee ._fbd )==0{return nil ;};_edee ._ffbd =false ;return _edee ._fbd [len (_edee ._fbd )-1];};
|
||
|
||
// ExtractPageImages returns the image contents of the page extractor, including data
|
||
// and position, size information for each image.
|
||
// A set of options to control page image extraction can be passed in. The options
|
||
// parameter can be nil for the default options. By default, inline stencil masks
|
||
// are not extracted.
|
||
func (_ea *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_ga :=&imageExtractContext {_fd :options };_ef :=_ga .extractContentStreamImages (_ea ._fb ,_ea ._eef );if _ef !=nil {return nil ,_ef ;};return &PageImages {Images :_ga ._fcb },nil ;};func (_ebebf *textTable )log (_fgaff string ){if !_cece {return ;};_f .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_fgaff ,_ebebf ._effgf ,_ebebf ._cecff ,_ebebf ._gcedd ,_ebebf .PdfRectangle );for _bfgc :=0;_bfgc < _ebebf ._cecff ;_bfgc ++{for _bfeg :=0;_bfeg < _ebebf ._effgf ;_bfeg ++{_daae :=_ebebf .get (_bfeg ,_bfgc );if _daae ==nil {continue ;};_gb .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_bfeg ,_bfgc ,_daae .PdfRectangle ,_cgee (_daae .text (),50),_bc .RuneCountInString (_daae .text ()));};};};func _bedd (_geea []*textWord ,_ggbab *textWord )[]*textWord {for _ggggf ,_eedaf :=range _geea {if _eedaf ==_ggbab {return _eagee (_geea ,_ggggf );};};_f .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_ggbab );return nil ;};func _gbef (_gbbb map[int ][]float64 )[]int {_eebdg :=make ([]int ,len (_gbbb ));_cebb :=0;for _daee :=range _gbbb {_eebdg [_cebb ]=_daee ;_cebb ++;};_ae .Ints (_eebdg );return _eebdg ;};func (_bdfd paraList )applyTables (_gfea []*textTable )paraList {var _cbgd paraList ;for _ ,_eebb :=range _gfea {_cbgd =append (_cbgd ,_eebb .newTablePara ());};for _ ,_agfdf :=range _bdfd {if _agfdf ._febd {continue ;};_cbgd =append (_cbgd ,_agfdf );};return _cbgd ;};func (_fcaf *textPara )depth ()float64 {if _fcaf ._egdb {return -1.0;};if len (_fcaf ._egddd )> 0{return _fcaf ._egddd [0]._cfbc ;};return _fcaf ._begg .depth ();};const _dgeeb =10;func (_gcbb *shapesState )quadraticTo (_ebc ,_ece ,_fdgb ,_eddfb float64 ){if _dddgb {_f .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a");};_gcbb .addPoint (_fdgb ,_eddfb );};func _gddg (_gace []pathSection ){if _gbdd < 0.0{return ;};if _dgea {_f .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_gace ));};for _gafc ,_aecg :=range _gace {for _fbcc ,_fdbd :=range _aecg ._fbf {for _fbdgb ,_gdff :=range _fdbd ._cecb {_fdbd ._cecb [_fbdgb ]=_ab .Point {X :_fcee (_gdff .X ),Y :_fcee (_gdff .Y )};if _dgea {_edaaf :=_fdbd ._cecb [_fbdgb ];if !_abbc (_gdff ,_edaaf ){_fdca :=_ab .Point {X :_edaaf .X -_gdff .X ,Y :_edaaf .Y -_gdff .Y };_gb .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_gafc ,_fbcc ,_fbdgb ,_gdff ,_edaaf ,_fdca );};};};};};};func (_agafa *stateStack )pop ()*textState {if _agafa .empty (){return nil ;};_gaaf :=*(*_agafa )[len (*_agafa )-1];*_agafa =(*_agafa )[:len (*_agafa )-1];return &_gaaf ;};type textWord struct{_ed .PdfRectangle ;_eabdg float64 ;_feecg string ;_feedb []*textMark ;_cbdb float64 ;_cdfbc bool ;};type wordBag struct{_ed .PdfRectangle ;_fcggb float64 ;_ccfc ,_dffb rulingList ;_cfe float64 ;_adee map[int ][]*textWord ;};func (_ebca paraList )llyOrdering ()[]int {_cdee :=make ([]int ,len (_ebca ));for _efdb :=range _ebca {_cdee [_efdb ]=_efdb ;};_ae .SliceStable (_cdee ,func (_fefd ,_afeg int )bool {_bcde ,_ccdg :=_cdee [_fefd ],_cdee [_afeg ];return _ebca [_bcde ].Lly < _ebca [_ccdg ].Lly ;});return _cdee ;};func (_cfbb *textLine )pullWord (_ebbe *wordBag ,_gffg *textWord ,_edad int ){_cfbb .appendWord (_gffg );_ebbe .removeWord (_gffg ,_edad );};func _aaeee (_babbe map[int ]intSet )[]int {_bgegc :=make ([]int ,0,len (_babbe ));for _ecfd :=range _babbe {_bgegc =append (_bgegc ,_ecfd );};_ae .Ints (_bgegc );return _bgegc ;};func _eagee (_agcab []*textWord ,_bede int )[]*textWord {_bgeb :=len (_agcab );copy (_agcab [_bede :],_agcab [_bede +1:]);return _agcab [:_bgeb -1];};const (_eabe =1.0e-6;_gbdd =1.0e-4;_edfc =10;_ecda =6;_abcb =0.5;_ggaf =0.12;_eag =0.19;_aeff =0.04;_eaacd =0.04;_gdgg =1.0;_agee =0.04;_egddg =0.4;_ebf =0.7;_cdf =1.0;_egad =0.1;_bbee =1.4;_ebgd =0.46;_fbab =0.02;_cggb =0.2;_eccc =0.5;_agde =4;_cfcb =4.0;_agbc =6;_gaae =0.3;_aadd =0.01;_fcff =0.02;_debf =2;_aeb =2;_dfea =500;_agdg =4.0;_cdff =4.0;_eacg =0.05;_ecefc =0.1;_eea =2.0;_bebb =2.0;_daa =1.5;_fefg =3.0;_dfcc =0.25;);
|
||
|
||
// String returns a human readable description of `s`.
|
||
func (_aead intSet )String ()string {var _efada []int ;for _abdec :=range _aead {if _aead .has (_abdec ){_efada =append (_efada ,_abdec );};};_ae .Ints (_efada );return _gb .Sprintf ("\u0025\u002b\u0076",_efada );};type rectRuling struct{_cebaa rulingKind ;_bcba markKind ;_af .Color ;_ed .PdfRectangle ;};func (_dddf *textPara )writeCellText (_ggaa _g .Writer ){for _ecba ,_caaa :=range _dddf ._egddd {_bdaa :=_caaa .text ();_dgfg :=_beae &&_caaa .endsInHyphen ()&&_ecba !=len (_dddf ._egddd )-1;if _dgfg {_bdaa =_ccbd (_bdaa );};_ggaa .Write ([]byte (_bdaa ));if !(_dgfg ||_ecba ==len (_dddf ._egddd )-1){_ggaa .Write ([]byte (_abea (_caaa ._cfbc ,_dddf ._egddd [_ecba +1]._cfbc )));};};};
|
||
|
||
// Extractor stores and offers functionality for extracting content from PDF pages.
|
||
type Extractor struct{_fb string ;_eef *_ed .PdfPageResources ;_deg _ed .PdfRectangle ;_cc map[string ]fontEntry ;_dcc map[string ]textResult ;_cb int64 ;_edd int ;};func (_bgcg *subpath )removeDuplicates (){if len (_bgcg ._cecb )==0{return ;};_afgd :=[]_ab .Point {_bgcg ._cecb [0]};for _ ,_cebg :=range _bgcg ._cecb [1:]{if !_abbc (_cebg ,_afgd [len (_afgd )-1]){_afgd =append (_afgd ,_cebg );};};_bgcg ._cecb =_afgd ;};
|
||
|
||
// Append appends `mark` to the mark array.
|
||
func (_bfdd *TextMarkArray )Append (mark TextMark ){_bfdd ._cgdg =append (_bfdd ._cgdg ,mark )};func (_bdcbd *wordBag )empty (_cee int )bool {_ ,_adbc :=_bdcbd ._adee [_cee ];return !_adbc };func (_fdec paraList )llyRange (_fceb []int ,_beeff ,_aaag float64 )[]int {_cbbb :=len (_fdec );if _aaag < _fdec [_fceb [0]].Lly ||_beeff > _fdec [_fceb [_cbbb -1]].Lly {return nil ;};_agcc :=_ae .Search (_cbbb ,func (_fgdf int )bool {return _fdec [_fceb [_fgdf ]].Lly >=_beeff });_cfag :=_ae .Search (_cbbb ,func (_eabfc int )bool {return _fdec [_fceb [_eabfc ]].Lly > _aaag });return _fceb [_agcc :_cfag ];};type paraList []*textPara ;
|
||
|
||
// String returns a description of `k`.
|
||
func (_fabe markKind )String ()string {_abfg ,_eage :=_bgce [_fabe ];if !_eage {return _gb .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_fabe );};return _abfg ;};
|
||
|
||
// ToText returns the page text as a single string.
|
||
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
|
||
// Text() instead.
|
||
func (_gffc PageText )ToText ()string {return _gffc .Text ()};func (_fcad *wordBag )firstWord (_gfca int )*textWord {return _fcad ._adee [_gfca ][0]};type shapesState struct{_dbcg _ab .Matrix ;_ffef _ab .Matrix ;_fbd []*subpath ;_ffbd bool ;_fcbf _ab .Point ;_adb *textObject ;};func _cfcd (_fgafe _ed .PdfRectangle )*ruling {return &ruling {_aeffb :_eebcb ,_ggaeg :_fgafe .Ury ,_gbcgc :_fgafe .Llx ,_gbab :_fgafe .Urx };};func _adcc (_eac _ed .PdfRectangle ,_bdf bounded )float64 {return _eac .Ury -_bdf .bbox ().Lly };func (_ddfgb rulingList )toTilings ()(rulingList ,[]gridTiling ){_ddfgb .log ("\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s");if len (_ddfgb )==0{return nil ,nil ;};_ddfgb =_ddfgb .tidied ("\u0061\u006c\u006c");_ddfgb .log ("\u0074\u0069\u0064\u0069\u0065\u0064");_dcfc :=_ddfgb .toGrids ();_eedd :=make ([]gridTiling ,len (_dcfc ));for _ffab ,_faea :=range _dcfc {_eedd [_ffab ]=_faea .asTiling ();};return _ddfgb ,_eedd ;};func (_aedf *textObject )setFont (_bdcg string ,_eaff float64 )error {if _aedf ==nil {return nil ;};_aedf ._gdf ._ggge =_eaff ;_egg ,_gabe :=_aedf .getFont (_bdcg );if _gabe !=nil {return _gabe ;};_aedf ._gdf ._agcd =_egg ;if _aedf ._gge .empty (){_aedf ._gge .push (_aedf ._gdf );}else {_aedf ._gge .top ()._agcd =_aedf ._gdf ._agcd ;};return nil ;};func _gca (_adc *Extractor ,_abff *_ed .PdfPageResources ,_ecf _dc .GraphicsState ,_dgac *textState ,_efb *stateStack )*textObject {return &textObject {_geg :_adc ,_dae :_abff ,_dgf :_ecf ,_gge :_efb ,_gdf :_dgac ,_decf :_ab .IdentityMatrix (),_egdf :_ab .IdentityMatrix ()};};
|
||
|
||
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
|
||
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
|
||
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
|
||
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
|
||
type RenderMode int ;
|
||
|
||
// TextMark represents extracted text on a page with information regarding both textual content,
|
||
// formatting (font and size) and positioning.
|
||
// It is the smallest unit of text on a PDF page, typically a single character.
|
||
//
|
||
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
|
||
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
|
||
// `bbox` of substring `term` in `text`.
|
||
//
|
||
// ex, _ := New(page)
|
||
// // handle errors
|
||
// pageText, _, _, err := ex.ExtractPageText()
|
||
// // handle errors
|
||
// text := pageText.Text()
|
||
// textMarks := pageText.Marks()
|
||
//
|
||
// start := strings.Index(text, term)
|
||
// end := start + len(term)
|
||
// spanMarks, err := textMarks.RangeOffset(start, end)
|
||
// // handle errors
|
||
// bbox, ok := spanMarks.BBox()
|
||
// // handle errors
|
||
type TextMark struct{
|
||
|
||
// Text is the extracted text.
|
||
Text string ;
|
||
|
||
// Original is the text in the PDF. It has not been decoded like `Text`.
|
||
Original string ;
|
||
|
||
// BBox is the bounding box of the text.
|
||
BBox _ed .PdfRectangle ;
|
||
|
||
// Font is the font the text was drawn with.
|
||
Font *_ed .PdfFont ;
|
||
|
||
// FontSize is the font size the text was drawn with.
|
||
FontSize float64 ;
|
||
|
||
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
|
||
// text, textMarks := pageText.Text(), pageText.Marks()
|
||
// marks := textMarks.Elements()
|
||
// then marks[i].Offset is the offset of marks[i].Text in text.
|
||
Offset int ;
|
||
|
||
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
|
||
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
|
||
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
|
||
Meta bool ;
|
||
|
||
// FillColor is the fill color of the text.
|
||
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
|
||
FillColor _af .Color ;
|
||
|
||
// StrokeColor is the stroke color of the text.
|
||
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
|
||
StrokeColor _af .Color ;
|
||
|
||
// Orientation is the text orientation
|
||
Orientation int ;};func (_gcag rulingList )snapToGroupsDirection ()rulingList {_gcag .sortStrict ();_cecca :=make (map[*ruling ]rulingList ,len (_gcag ));_fgea :=_gcag [0];_gggg :=func (_cgdfd *ruling ){_fgea =_cgdfd ;_cecca [_fgea ]=rulingList {_cgdfd }};_gggg (_gcag [0]);for _ ,_egeaf :=range _gcag [1:]{if _egeaf ._ggaeg < _fgea ._ggaeg -_eabe {_f .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_fgea ,_egeaf );};if _egeaf ._ggaeg > _fgea ._ggaeg +_bebb {_gggg (_egeaf );}else {_cecca [_fgea ]=append (_cecca [_fgea ],_egeaf );};};_dcbb :=make (map[*ruling ]float64 ,len (_cecca ));_dffee :=make (map[*ruling ]*ruling ,len (_gcag ));for _fcbcd ,_bgega :=range _cecca {_dcbb [_fcbcd ]=_bgega .mergePrimary ();for _ ,_ebfeb :=range _bgega {_dffee [_ebfeb ]=_fcbcd ;};};for _ ,_ebgfe :=range _gcag {_ebgfe ._ggaeg =_dcbb [_dffee [_ebgfe ]];};_cdcg :=make (rulingList ,0,len (_gcag ));for _ ,_daag :=range _cecca {_ggdcc :=_daag .splitSec ();for _aabg ,_gbbgg :=range _ggdcc {_gafd :=_gbbgg .merge ();if len (_cdcg )> 0{_agdgd :=_cdcg [len (_cdcg )-1];if _agdgd .alignsPrimary (_gafd )&&_agdgd .alignsSec (_gafd ){_f .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_aabg ,_agdgd ,_gafd );continue ;};};_cdcg =append (_cdcg ,_gafd );};};_cdcg .sortStrict ();return _cdcg ;};
|
||
|
||
// PageText represents the layout of text on a device page.
|
||
type PageText struct{_cga []*textMark ;_eda string ;_ffee []TextMark ;_bag []TextTable ;_eaee _ed .PdfRectangle ;_cacf []pathSection ;_adfe []pathSection ;};func (_eddg *textWord )addDiacritic (_gbccb string ){_acffb :=_eddg ._feedb [len (_eddg ._feedb )-1];_acffb ._bcdc +=_gbccb ;_acffb ._bcdc =_e .NFKC .String (_acffb ._bcdc );};func (_dedg rulingList )connections (_dfcdb map[int ]intSet ,_acbd int )intSet {_eggcg :=make (intSet );_bfeag :=make (intSet );var _cfbg func (int );_cfbg =func (_agac int ){if !_bfeag .has (_agac ){_bfeag .add (_agac );for _eeceg :=range _dedg {if _dfcdb [_eeceg ].has (_agac ){_eggcg .add (_eeceg );};};for _fdea :=range _dedg {if _eggcg .has (_fdea ){_cfbg (_fdea );};};};};_cfbg (_acbd );return _eggcg ;};func (_cddd *textWord )computeText ()string {_edaba :=make ([]string ,len (_cddd ._feedb ));for _gdddg ,_daedb :=range _cddd ._feedb {_edaba [_gdddg ]=_daedb ._bcdc ;};return _bee .Join (_edaba ,"");};func _bcaa (_fecdc _ed .PdfRectangle ,_dgd ,_cbab ,_fegb ,_acff *ruling )gridTile {_gdegc :=_fecdc .Llx ;_egba :=_fecdc .Urx ;_eggg :=_fecdc .Lly ;_fbae :=_fecdc .Ury ;return gridTile {PdfRectangle :_fecdc ,_beed :_dgd !=nil &&_dgd .encloses (_eggg ,_fbae ),_faac :_cbab !=nil &&_cbab .encloses (_eggg ,_fbae ),_bggc :_fegb !=nil &&_fegb .encloses (_gdegc ,_egba ),_dfccg :_acff !=nil &&_acff .encloses (_gdegc ,_egba )};};
|
||
|
||
// TextTable represents a table.
|
||
// Cells are ordered top-to-bottom, left-to-right.
|
||
// Cells[y] is the (0-offset) y'th row in the table.
|
||
// Cells[y][x] is the (0-offset) x'th column in the table.
|
||
type TextTable struct{W ,H int ;Cells [][]TableCell ;};func (_baaaf *textTable )newTablePara ()*textPara {_afbef :=_baaaf .computeBbox ();_cebfd :=&textPara {PdfRectangle :_afbef ,_eaba :_afbef ,_begg :_baaaf };if _cece {_f .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_cebfd );};return _cebfd ;};
|
||
|
||
// String returns a description of `b`.
|
||
func (_bgff *wordBag )String ()string {var _ecaa []string ;for _ ,_fcbfb :=range _bgff .depthIndexes (){_fabf :=_bgff ._adee [_fcbfb ];for _ ,_fgdd :=range _fabf {_ecaa =append (_ecaa ,_fgdd ._feecg );};};return _gb .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_bgff .PdfRectangle ,_bgff ._fcggb ,len (_ecaa ),_ecaa );};func (_cdea gridTile )contains (_ead _ed .PdfRectangle )bool {if _cdea .numBorders ()< 3{return false ;};if _cdea ._beed &&_ead .Llx < _cdea .Llx -_daa {return false ;};if _cdea ._faac &&_ead .Urx > _cdea .Urx +_daa {return false ;};if _cdea ._bggc &&_ead .Lly < _cdea .Lly -_daa {return false ;};if _cdea ._dfccg &&_ead .Ury > _cdea .Ury +_daa {return false ;};return true ;};func _gaggc (_gfbc []pathSection )rulingList {_gddg (_gfbc );if _dgea {_f .Log .Info ("\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs",len (_gfbc ));};var _fbga rulingList ;for _ ,_dgeba :=range _gfbc {for _ ,_fgbc :=range _dgeba ._fbf {if !_fgbc .isQuadrilateral (){if _dgea {_f .Log .Error ("!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073",_fgbc );};continue ;};if _cggg ,_abgc :=_fgbc .makeRectRuling (_dgeba .Color );_abgc {_fbga =append (_fbga ,_cggg );}else {if _deba {_f .Log .Error ("\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073",_fgbc );};};};};if _dgea {_f .Log .Info ("\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073",_fbga .String ());};return _fbga ;};func _dcge (_eefbc ,_bedab int )int {if _eefbc > _bedab {return _eefbc ;};return _bedab ;};func (_cfc *textObject )getStrokeColor ()_af .Color {return _fdda (_cfc ._dgf .ColorspaceStroking ,_cfc ._dgf .ColorStroking );};
|
||
|
||
// PageImages represents extracted images on a PDF page with spatial information:
|
||
// display position and size.
|
||
type PageImages struct{Images []ImageMark ;};func (_bfa *textObject )getFillColor ()_af .Color {return _fdda (_bfa ._dgf .ColorspaceNonStroking ,_bfa ._dgf .ColorNonStroking );};func _bdag (_cbbge ,_ggdc _ed .PdfRectangle )bool {return _cbbge .Lly <=_ggdc .Ury &&_ggdc .Lly <=_cbbge .Ury ;};func _gggecd (_gfecg ,_cbca _ab .Point ,_agbe _af .Color )(*ruling ,bool ){_deggg :=lineRuling {_cadf :_gfecg ,_abeeg :_cbca ,_cdgc :_dggb (_gfecg ,_cbca ),Color :_agbe };if _deggg ._cdgc ==_eeae {return nil ,false ;};return _deggg .asRuling ();};func (_cabf *ruling )gridIntersecting (_facg *ruling )bool {return _bgbb (_cabf ._gbcgc ,_facg ._gbcgc )&&_bgbb (_cabf ._gbab ,_facg ._gbab );};type gridTiling struct{_ed .PdfRectangle ;_abfe []float64 ;_gabed []float64 ;_eege map[float64 ]map[float64 ]gridTile ;};func (_bgffc intSet )add (_cfage int ){_bgffc [_cfage ]=struct{}{}};func _eeee (_bbaa *textWord ,_feeg float64 ,_aede ,_fcd rulingList )*wordBag {_bcge :=_bfe (_bbaa ._eabdg );_gcff :=[]*textWord {_bbaa };_agbg :=wordBag {_adee :map[int ][]*textWord {_bcge :_gcff },PdfRectangle :_bbaa .PdfRectangle ,_fcggb :_bbaa ._cbdb ,_cfe :_feeg ,_ccfc :_aede ,_dffb :_fcd };return &_agbg ;};func (_cggd *ruling )alignsPrimary (_bgdf *ruling )bool {return _cggd ._aeffb ==_bgdf ._aeffb &&_c .Abs (_cggd ._ggaeg -_bgdf ._ggaeg )< _bebb *0.5;};func (_egf *wordBag )arrangeText ()*textPara {_egf .sort ();if _afad {_egf .removeDuplicates ();};var _cbdceb []*textLine ;for _ ,_gdfe :=range _egf .depthIndexes (){for !_egf .empty (_gdfe ){_edab :=_egf .firstReadingIndex (_gdfe );_fbfd :=_egf .firstWord (_edab );_bdaf :=_daea (_egf ,_edab );_ggca :=_fbfd ._cbdb ;_fcae :=_fbfd ._eabdg -_abcb *_ggca ;_abcf :=_fbfd ._eabdg +_abcb *_ggca ;_gbea :=_bbee *_ggca ;_ggffa :=_ebgd *_ggca ;_afaa :for {var _cacbf *textWord ;_cdae :=0;for _ ,_cgfg :=range _egf .depthBand (_fcae ,_abcf ){_dbfe :=_egf .highestWord (_cgfg ,_fcae ,_abcf );if _dbfe ==nil {continue ;};_ebdd :=_fbbd (_dbfe ,_bdaf ._gad [len (_bdaf ._gad )-1]);if _ebdd < -_ggffa {break _afaa ;};if _ebdd > _gbea {continue ;};if _cacbf !=nil &&_eegd (_dbfe ,_cacbf )>=0{continue ;};_cacbf =_dbfe ;_cdae =_cgfg ;};if _cacbf ==nil {break ;};_bdaf .pullWord (_egf ,_cacbf ,_cdae );};_bdaf .markWordBoundaries ();_cbdceb =append (_cbdceb ,_bdaf );};};if len (_cbdceb )==0{return nil ;};_ae .Slice (_cbdceb ,func (_efdec ,_febga int )bool {return _fbef (_cbdceb [_efdec ],_cbdceb [_febga ])< 0});_gece :=_bcbc (_egf .PdfRectangle ,_cbdceb );if _dcfd {_f .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_gece .String ());if _dbd {for _cdfda ,_ebdg :=range _gece ._egddd {_gb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cdfda ,_ebdg .String ());if _efdc {for _acfgg ,_gfdd :=range _ebdg ._gad {_gb .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_acfgg ,_gfdd .String ());for _ebae ,_adgf :=range _gfdd ._feedb {_gb .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_ebae ,_adgf .String ());};};};};};};return _gece ;};
|
||
|
||
// String returns a description of `tm`.
|
||
func (_cbdcd *textMark )String ()string {return _gb .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_cbdcd .PdfRectangle ,_cbdcd ._defd ,_cbdcd ._bcdc );};func (_fabc *textTable )put (_cdfce ,_adgea int ,_egdddf *textPara ){_fabc ._afdde [_bacb (_cdfce ,_adgea )]=_egdddf ;};func (_edgg *textWord )appendMark (_acc *textMark ,_cbfa _ed .PdfRectangle ){_edgg ._feedb =append (_edgg ._feedb ,_acc );_edgg .PdfRectangle =_afeae (_edgg .PdfRectangle ,_acc .PdfRectangle );if _acc ._defd > _edgg ._cbdb {_edgg ._cbdb =_acc ._defd ;};_edgg ._eabdg =_cbfa .Ury -_edgg .PdfRectangle .Lly ;};type fontEntry struct{_fdg *_ed .PdfFont ;_aged int64 ;};func _cdega (_afde float64 )bool {return _c .Abs (_afde )< _bebb };func (_gfdf rulingList )aligned ()bool {if len (_gfdf )< 2{return false ;};_bada :=make (map[*ruling ]int );_bada [_gfdf [0]]=0;for _ ,_gaaaf :=range _gfdf [1:]{_edbac :=false ;for _dbbe :=range _bada {if _gaaaf .gridIntersecting (_dbbe ){_bada [_dbbe ]++;_edbac =true ;break ;};};if !_edbac {_bada [_gaaaf ]=0;};};_gaef :=0;for _ ,_efccg :=range _bada {if _efccg ==0{_gaef ++;};};_cbeg :=float64 (_gaef )/float64 (len (_gfdf ));_befdb :=_cbeg <=1.0-_dfcc ;if _dgea {_f .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_befdb ,_cbeg ,_gaef ,len (_gfdf ),_gfdf .String ());};return _befdb ;};func (_fgc *textObject )showTextAdjusted (_gbbg *_ee .PdfObjectArray )error {_fgd :=false ;for _ ,_bdg :=range _gbbg .Elements (){switch _bdg .(type ){case *_ee .PdfObjectFloat ,*_ee .PdfObjectInteger :_bac ,_dee :=_ee .GetNumberAsFloat (_bdg );if _dee !=nil {_f .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_bdg ,_gbbg );return _dee ;};_bcf ,_dad :=-_bac *0.001*_fgc ._gdf ._ggge ,0.0;if _fgd {_dad ,_bcf =_bcf ,_dad ;};_bge :=_ebd (_ab .Point {X :_bcf ,Y :_dad });_fgc ._decf .Concat (_bge );case *_ee .PdfObjectString :_fccg ,_ffbb :=_ee .GetStringBytes (_bdg );if !_ffbb {_f .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_bdg ,_gbbg );return _ee .ErrTypeError ;};_fgc .renderText (_fccg );default:_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_bdg ,_gbbg );return _ee .ErrTypeError ;};};return nil ;}; |