mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-27 13:48:51 +08:00
253 lines
180 KiB
Go
253 lines
180 KiB
Go
//
|
||
// Copyright 2020 FoxyUtils ehf. All rights reserved.
|
||
//
|
||
// This is a commercial product and requires a license to operate.
|
||
// A trial license can be obtained at https://unidoc.io
|
||
//
|
||
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
|
||
//
|
||
// Use of this source code is governed by the UniDoc End User License Agreement
|
||
// terms that can be accessed at https://unidoc.io/eula/
|
||
|
||
//
|
||
// Package extractor is used for quickly extracting PDF content through a simple interface.
|
||
// Currently offers functionality for extracting textual content.
|
||
//
|
||
package extractor ;import (_fc "bytes";_ba "errors";_cf "fmt";_cd "github.com/unidoc/unipdf/v3/common";_gc "github.com/unidoc/unipdf/v3/contentstream";_ce "github.com/unidoc/unipdf/v3/core";_fe "github.com/unidoc/unipdf/v3/internal/license";_ca "github.com/unidoc/unipdf/v3/internal/textencoding";_cec "github.com/unidoc/unipdf/v3/internal/transform";_bf "github.com/unidoc/unipdf/v3/model";_dg "golang.org/x/text/unicode/norm";_bd "golang.org/x/xerrors";_g "image/color";_d "io";_c "math";_f "regexp";_ac "sort";_e "strings";_ag "unicode";_a "unicode/utf8";);
|
||
|
||
// ApplyArea processes the page text only within the specified area `bbox`.
|
||
// Each time ApplyArea is called, it updates the result set in `pt`.
|
||
// Can be called multiple times in a row with different bounding boxes.
|
||
func (_fbeb *PageText )ApplyArea (bbox _bf .PdfRectangle ){_gbbf :=make ([]*textMark ,0,len (_fbeb ._deda ));for _ ,_cad :=range _fbeb ._deda {if _ffgg (_cad .bbox (),bbox ){_gbbf =append (_gbbf ,_cad );};};var _eegf paraList ;_bce :=len (_gbbf );for _dade :=0;_dade < 360&&_bce > 0;_dade +=90{_baa :=make ([]*textMark ,0,len (_gbbf )-_bce );for _ ,_cfee :=range _gbbf {if _cfee ._bcaf ==_dade {_baa =append (_baa ,_cfee );};};if len (_baa )> 0{_acd :=_baed (_baa ,_fbeb ._fed ,nil ,nil );_eegf =append (_eegf ,_acd ...);_bce -=len (_baa );};};_cdaf :=new (_fc .Buffer );_eegf .writeText (_cdaf );_fbeb ._dgfb =_cdaf .String ();_fbeb ._ebb =_eegf .toTextMarks ();_fbeb ._edef =_eegf .tables ();};type imageExtractContext struct{_ae []ImageMark ;_gg int ;_dcd int ;_ee int ;_bbb map[*_ce .PdfObjectStream ]*cachedImage ;_dgf *ImageExtractOptions ;};const (_fedc rulingKind =iota ;_aadg ;_ffag ;);func (_bebb *stateStack )size ()int {return len (*_bebb )};func _fgce (_feddf _bf .PdfRectangle ,_dgge bounded )float64 {return _feddf .Ury -_dgge .bbox ().Lly };func (_deb *textPara )text ()string {_cefc :=new (_fc .Buffer );_deb .writeText (_cefc );return _cefc .String ();};
|
||
|
||
// Marks returns the TextMark collection for a page. It represents all the text on the page.
|
||
func (_cdd PageText )Marks ()*TextMarkArray {return &TextMarkArray {_gef :_cdd ._ebb }};type subpath struct{_gaaca []_cec .Point ;_fgg bool ;};var _gf =false ;func (_acee paraList )findTextTables ()[]*textTable {var _gfged []*textTable ;for _ ,_ceac :=range _acee {if _ceac .taken ()||_ceac .Width ()==0{continue ;};_gbccg :=_ceac .isAtom ();if _gbccg ==nil {continue ;};_gbccg .growTable ();if _gbccg ._agcd *_gbccg ._bdfg < _fdcff {continue ;};_gbccg .markCells ();_gbccg .log ("\u0067\u0072\u006fw\u006e");_gfged =append (_gfged ,_gbccg );};return _gfged ;};
|
||
|
||
// New returns an Extractor instance for extracting content from the input PDF page.
|
||
func New (page *_bf .PdfPage )(*Extractor ,error ){const _ceg ="\u0065\u0078\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077";_da ,_gb :=page .GetAllContentStreams ();if _gb !=nil {return nil ,_gb ;};_dad ,_gb :=page .GetMediaBox ();if _gb !=nil {return nil ,_cf .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_gb );};_bee :=&Extractor {_dd :_da ,_bac :page .Resources ,_be :*_dad ,_dc :map[string ]fontEntry {},_af :map[string ]textResult {}};if _bee ._be .Llx > _bee ._be .Urx {_cd .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_bee ._be );_bee ._be .Llx ,_bee ._be .Urx =_bee ._be .Urx ,_bee ._be .Llx ;};if _bee ._be .Lly > _bee ._be .Ury {_cd .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_bee ._be );_bee ._be .Lly ,_bee ._be .Ury =_bee ._be .Ury ,_bee ._be .Lly ;};_fe .TrackUse (_ceg );return _bee ,nil ;};func (_gagg *textObject )setTextLeading (_ed float64 ){if _gagg ==nil {return ;};_gagg ._gffg ._dfd =_ed ;};func (_bgbb paraList )eventNeighbours (_cdde []event )map[*textPara ][]int {_ac .Slice (_cdde ,func (_beed ,_eaecf int )bool {_bdefb ,_afgad :=_cdde [_beed ],_cdde [_eaecf ];_fgfbg ,_bbgg :=_bdefb ._bebea ,_afgad ._bebea ;if _fgfbg !=_bbgg {return _fgfbg < _bbgg ;};if _bdefb ._bafa !=_afgad ._bafa {return _bdefb ._bafa ;};return _beed < _eaecf ;});_bdge :=make (map[int ]intSet );_abcc :=make (intSet );for _ ,_fdaee :=range _cdde {if _fdaee ._bafa {_bdge [_fdaee ._dedba ]=make (intSet );for _feaf :=range _abcc {if _feaf !=_fdaee ._dedba {_bdge [_fdaee ._dedba ].add (_feaf );_bdge [_feaf ].add (_fdaee ._dedba );};};_abcc .add (_fdaee ._dedba );}else {_abcc .del (_fdaee ._dedba );};};_fbeaf :=map[*textPara ][]int {};for _feed ,_fdegb :=range _bdge {_aedag :=_bgbb [_feed ];if len (_fdegb )==0{_fbeaf [_aedag ]=nil ;continue ;};_gdefd :=make ([]int ,len (_fdegb ));_bgdbc :=0;for _efggc :=range _fdegb {_gdefd [_bgdbc ]=_efggc ;_bgdbc ++;};_fbeaf [_aedag ]=_gdefd ;};return _fbeaf ;};func (_agfaa *textPara )fontsize ()float64 {return _agfaa ._ffgc [0]._gabbd };func (_bbbf *shapesState )drawRectangle (_geaa ,_feeb ,_gec ,_cfb float64 ){if _fgdd {_ebe :=_bbbf .devicePoint (_geaa ,_feeb );_gdfa :=_bbbf .devicePoint (_geaa +_gec ,_feeb +_cfb );_dcfg :=_bf .PdfRectangle {Llx :_ebe .X ,Lly :_ebe .Y ,Urx :_gdfa .X ,Ury :_gdfa .Y };_cd .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_dcfg );};_bbbf .newSubPath ();_bbbf .moveTo (_geaa ,_feeb );_bbbf .lineTo (_geaa +_gec ,_feeb );_bbbf .lineTo (_geaa +_gec ,_feeb +_cfb );_bbbf .lineTo (_geaa ,_feeb +_cfb );_bbbf .closePath ();};func _cgbc (_fadf _bf .PdfColorspace ,_egaf _bf .PdfColor )_g .Color {if _fadf ==nil ||_egaf ==nil {return _g .Black ;};_deedf ,_egccc :=_fadf .ColorToRGB (_egaf );if _egccc !=nil {_cd .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_egaf ,_fadf ,_egccc );return _g .Black ;};_faga ,_eeace :=_deedf .(*_bf .PdfColorDeviceRGB );if !_eeace {_cd .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_deedf );return _g .Black ;};return _g .NRGBA {R :uint8 (_faga .R ()*255),G :uint8 (_faga .G ()*255),B :uint8 (_faga .B ()*255),A :uint8 (255)};};
|
||
|
||
// String returns a description of `w`.
|
||
func (_bcdfe *textWord )String ()string {return _cf .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_bcdfe ._cddc ,_bcdfe .PdfRectangle ,_bcdfe ._bcdga ,_bcdfe ._gbdc );};const _fdde =1.0/1000.0;
|
||
|
||
// NewFromContents creates a new extractor from contents and page resources.
|
||
func NewFromContents (contents string ,resources *_bf .PdfPageResources )(*Extractor ,error ){const _fec ="\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s";_bag :=&Extractor {_dd :contents ,_bac :resources ,_dc :map[string ]fontEntry {},_af :map[string ]textResult {}};_fe .TrackUse (_fec );return _bag ,nil ;};func (_cgd *textObject )setHorizScaling (_bgc float64 ){if _cgd ==nil {return ;};_cgd ._gffg ._bcga =_bgc ;};func (_cefga rulingList )merge ()*ruling {_ggcd :=_cefga [0]._adcb ;_acdeg :=_cefga [0]._cdcc ;_bcaeg :=_cefga [0]._bdag ;for _ ,_dffa :=range _cefga [1:]{_ggcd +=_dffa ._adcb ;if _dffa ._cdcc < _acdeg {_acdeg =_dffa ._cdcc ;};if _dffa ._bdag > _bcaeg {_bcaeg =_dffa ._bdag ;};};_dgef :=&ruling {_gcae :_cefga [0]._gcae ,_cfga :_cefga [0]._cfga ,Color :_cefga [0].Color ,_adcb :_ggcd /float64 (len (_cefga )),_cdcc :_acdeg ,_bdag :_bcaeg };if _aggc {_cd .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_cefga ),_dgef );for _ddag ,_gddcd :=range _cefga {_cf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ddag ,_gddcd );};};return _dgef ;};
|
||
|
||
// String returns a description of `t`.
|
||
func (_bgcg *textTable )String ()string {return _cf .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_bgcg ._agcd ,_bgcg ._bdfg ,_bgcg ._abad );};func (_acefd rulingList )connections (_egbd map[int ]intSet ,_fad int )intSet {_fbfc :=make (intSet );_fgdee :=make (intSet );var _bggb func (int );_bggb =func (_debe int ){if !_fgdee .has (_debe ){_fgdee .add (_debe );for _dggg :=range _acefd {if _egbd [_dggg ].has (_debe ){_fbfc .add (_dggg );};};for _cfegdg :=range _acefd {if _fbfc .has (_cfegdg ){_bggb (_cfegdg );};};};};_bggb (_fad );return _fbfc ;};type textMark struct{_bf .PdfRectangle ;_bcaf int ;_bgfa string ;_ffaa string ;_gcag *_bf .PdfFont ;_ada float64 ;_eebd float64 ;_dba _cec .Matrix ;_cgcga _cec .Point ;_bagb _bf .PdfRectangle ;_faeg _g .Color ;_ebcfg _g .Color ;};func (_aegc *textObject )setFont (_cfge string ,_dfgd float64 )error {if _aegc ==nil {return nil ;};_aegc ._gffg ._bdd =_dfgd ;_gcd ,_bbdf :=_aegc .getFont (_cfge );if _bbdf !=nil {return _bbdf ;};_aegc ._gffg ._cgcf =_gcd ;if _aegc ._cgcd .empty (){_aegc ._cgcd .push (_aegc ._gffg );}else {_aegc ._cgcd .top ()._cgcf =_aegc ._gffg ._cgcf ;};return nil ;};
|
||
|
||
// String returns a string describing `tm`.
|
||
func (_fdbb TextMark )String ()string {_bfc :=_fdbb .BBox ;var _ffe string ;if _fdbb .Font !=nil {_ffe =_fdbb .Font .String ();if len (_ffe )> 50{_ffe =_ffe [:50]+"\u002e\u002e\u002e";};};var _bffe string ;if _fdbb .Meta {_bffe ="\u0020\u002a\u004d\u002a";};return _cf .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_fdbb .Offset ,_fdbb .Text ,[]rune (_fdbb .Text ),_bfc .Llx ,_bfc .Lly ,_bfc .Urx ,_bfc .Ury ,_ffe ,_bffe );};type shapesState struct{_cbbe _cec .Matrix ;_ebd _cec .Matrix ;_dda []*subpath ;_decf bool ;_fafa _cec .Point ;_cfc *textObject ;};
|
||
|
||
// PageImages represents extracted images on a PDF page with spatial information:
|
||
// display position and size.
|
||
type PageImages struct{Images []ImageMark ;};func (_abea *textObject )getFontDirect (_gdcb string )(*_bf .PdfFont ,error ){_ecec ,_gefb :=_abea .getFontDict (_gdcb );if _gefb !=nil {return nil ,_gefb ;};_dcaed ,_gefb :=_bf .NewPdfFontFromPdfObject (_ecec );if _gefb !=nil {_cd .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gdcb ,_gefb );};return _dcaed ,_gefb ;};func (_cddg *wordBag )maxDepth ()float64 {return _cddg ._afec -_cddg .Lly };func (_aeff *PageText )computeViews (){var _bff rulingList ;if _fbec {_bagg :=_cdcgb (_aeff ._cbb );_bff =append (_bff ,_bagg ...);};if _geeg {_caag :=_cfcdb (_aeff ._fgbc );_bff =append (_bff ,_caag ...);};_bff ,_feg :=_bff .toTilings ();var _adcf paraList ;_gfba :=len (_aeff ._deda );for _becd :=0;_becd < 360&&_gfba > 0;_becd +=90{_fbea :=make ([]*textMark ,0,len (_aeff ._deda )-_gfba );for _ ,_beeg :=range _aeff ._deda {if _beeg ._bcaf ==_becd {_fbea =append (_fbea ,_beeg );};};if len (_fbea )> 0{_ebg :=_baed (_fbea ,_aeff ._fed ,_bff ,_feg );_adcf =append (_adcf ,_ebg ...);_gfba -=len (_fbea );};};_dbee :=new (_fc .Buffer );_adcf .writeText (_dbee );_aeff ._dgfb =_dbee .String ();_aeff ._ebb =_adcf .toTextMarks ();_aeff ._edef =_adcf .tables ();if _ccae {_cd .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_aeff ._edef ));};};func _ecedb (_cdae string ,_fbcga int )string {if len (_cdae )< _fbcga {return _cdae ;};return _cdae [:_fbcga ];};func (_fg *imageExtractContext )processOperand (_df *_gc .ContentStreamOperation ,_bec _gc .GraphicsState ,_bcc *_bf .PdfPageResources )error {if _df .Operand =="\u0042\u0049"&&len (_df .Params )==1{_acb ,_db :=_df .Params [0].(*_gc .ContentStreamInlineImage );if !_db {return nil ;};if _dce ,_feca :=_ce .GetBoolVal (_acb .ImageMask );_feca {if _dce &&!_fg ._dgf .IncludeInlineStencilMasks {return nil ;};};return _fg .extractInlineImage (_acb ,_bec ,_bcc );}else if _df .Operand =="\u0044\u006f"&&len (_df .Params )==1{_gff ,_bdb :=_ce .GetName (_df .Params [0]);if !_bdb {_cd .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _fa ;};_ ,_dbf :=_bcc .GetXObjectByName (*_gff );switch _dbf {case _bf .XObjectTypeImage :return _fg .extractXObjectImage (_gff ,_bec ,_bcc );case _bf .XObjectTypeForm :return _fg .extractFormImages (_gff ,_bec ,_bcc );};};return nil ;};func _cggfd (_gced []pathSection ){if _gfbac < 0.0{return ;};if _fgfe {_cd .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_gced ));};for _dggdg ,_ccdfa :=range _gced {for _gdcbd ,_gdegg :=range _ccdfa ._fgaff {for _gceda ,_gbeb :=range _gdegg ._gaaca {_gdegg ._gaaca [_gceda ]=_cec .Point {X :_gcaa (_gbeb .X ),Y :_gcaa (_gbeb .Y )};if _fgfe {_bafef :=_gdegg ._gaaca [_gceda ];if !_feee (_gbeb ,_bafef ){_beee :=_cec .Point {X :_bafef .X -_gbeb .X ,Y :_bafef .Y -_gbeb .Y };_cf .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_dggdg ,_gdcbd ,_gceda ,_gbeb ,_bafef ,_beee );};};};};};};
|
||
|
||
// Len returns the number of TextMarks in `ma`.
|
||
func (_fgaf *TextMarkArray )Len ()int {if _fgaf ==nil {return 0;};return len (_fgaf ._gef );};func (_dbcff rulingList )sortStrict (){_ac .Slice (_dbcff ,func (_edaf ,_aaaf int )bool {_afegc ,_cbce :=_dbcff [_edaf ],_dbcff [_aaaf ];_aabc ,_bcfe :=_afegc ._gcae ,_cbce ._gcae ;if _aabc !=_bcfe {return _aabc > _bcfe ;};_gbgb ,_gdgc :=_afegc ._adcb ,_cbce ._adcb ;if !_gbgf (_gbgb -_gdgc ){return _gbgb < _gdgc ;};_gbgb ,_gdgc =_afegc ._cdcc ,_cbce ._cdcc ;if _gbgb !=_gdgc {return _gbgb < _gdgc ;};return _afegc ._bdag < _cbce ._bdag ;});};func (_gcdc paraList )topoOrder ()[]int {if _agd {_cd .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");};_daac :=len (_gcdc );_efaa :=make ([]bool ,_daac );_gggb :=make ([]int ,0,_daac );_gbee :=_gcdc .llyOrdering ();var _fbdb func (_agfba int );_fbdb =func (_aace int ){_efaa [_aace ]=true ;for _fcbff :=0;_fcbff < _daac ;_fcbff ++{if !_efaa [_fcbff ]{if _gcdc .readBefore (_gbee ,_aace ,_fcbff ){_fbdb (_fcbff );};};};_gggb =append (_gggb ,_aace );};for _eca :=0;_eca < _daac ;_eca ++{if !_efaa [_eca ]{_fbdb (_eca );};};return _befb (_gggb );};func (_egcbc paraList )findTables (_fgdg []gridTiling )[]*textTable {_egcbc .addNeighbours ();_ac .Slice (_egcbc ,func (_fbdbg ,_ccfgg int )bool {return _bbde (_egcbc [_fbdbg ],_egcbc [_ccfgg ])< 0});var _agbc []*textTable ;if _bggd {_gcabe :=_egcbc .findGridTables (_fgdg );_agbc =append (_agbc ,_gcabe ...);};if _gcc {_becc :=_egcbc .findTextTables ();_agbc =append (_agbc ,_becc ...);};return _agbc ;};func (_eacc *textTable )bbox ()_bf .PdfRectangle {return _eacc .PdfRectangle };func _aafe (_agdccd int ,_aecf func (int ,int )bool )[]int {_eaacb :=make ([]int ,_agdccd );for _abagb :=range _eaacb {_eaacb [_abagb ]=_abagb ;};_ac .Slice (_eaacb ,func (_fcce ,_cdebe int )bool {return _aecf (_eaacb [_fcce ],_eaacb [_cdebe ])});return _eaacb ;};func (_gbfef *textTable )isExportable ()bool {if _gbfef ._abad {return true ;};_aae :=func (_gcgf int )bool {_dgeb :=_gbfef .get (0,_gcgf );if _dgeb ==nil {return false ;};_agdae :=_dgeb .text ();_fdec :=_a .RuneCountInString (_agdae );_bbgcb :=_bbcc .MatchString (_agdae );return _fdec <=1||_bbgcb ;};for _gfdgb :=0;_gfdgb < _gbfef ._bdfg ;_gfdgb ++{if !_aae (_gfdgb ){return true ;};};return false ;};func (_adfg rectRuling )checkWidth (_cgeb ,_edcc float64 )(float64 ,bool ){_afga :=_edcc -_cgeb ;_ccfa :=_afga <=_ecba ;return _afga ,_ccfa ;};func _agad (_eggg *wordBag ,_fbcb *textWord ,_bcfac float64 )bool {return _fbcb .Llx < _eggg .Urx +_bcfac &&_eggg .Llx -_bcfac < _fbcb .Urx ;};type rulingKind int ;const (_bafc markKind =iota ;_gfaa ;_ggcec ;_cfgce ;);func _egae (_acbb ,_dagca float64 )string {_aegcc :=!_gbgf (_acbb -_dagca );if _aegcc {return "\u000a";};return "\u0020";};func (_ecgg *subpath )last ()_cec .Point {return _ecgg ._gaaca [len (_ecgg ._gaaca )-1]};func (_bcaef paraList )log (_egcb string ){if !_agd {return ;};_cd .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_egcb ,len (_bcaef ));for _eggga ,_agag :=range _bcaef {if _agag ==nil {continue ;};_dae :=_agag .text ();_cbbd :="\u0020\u0020";if _agag ._bfag !=nil {_cbbd =_cf .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_agag ._bfag ._agcd ,_agag ._bfag ._bdfg );};_cf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_eggga ,_agag .PdfRectangle ,_cbbd ,_ecedb (_dae ,50));};};func (_cecgd *wordBag )minDepth ()float64 {return _cecgd ._afec -(_cecgd .Ury -_cecgd ._dadc )};func (_cbe *subpath )clear (){*_cbe =subpath {}};var _bbcc =_f .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024");func _geed (_fff _bf .PdfRectangle )*ruling {return &ruling {_gcae :_ffag ,_adcb :_fff .Llx ,_cdcc :_fff .Lly ,_bdag :_fff .Ury };};func (_gdgd *compositeCell )updateBBox (){for _ ,_caabg :=range _gdgd .paraList {_gdgd .PdfRectangle =_gffe (_gdgd .PdfRectangle ,_caabg .PdfRectangle );};};func (_gab *textObject )setWordSpacing (_dea float64 ){if _gab ==nil {return ;};_gab ._gffg ._bdf =_dea ;};func (_bca *textObject )getFontDict (_bbdaa string )(_bgab _ce .PdfObject ,_ggfg error ){_dbdb :=_bca ._dbe ;if _dbdb ==nil {_cd .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_bbdaa );return nil ,nil ;};_bgab ,_ccfe :=_dbdb .GetFontByName (_ce .PdfObjectName (_bbdaa ));if !_ccfe {_cd .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_bbdaa );return nil ,_ba .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _bgab ,nil ;};func (_fge *shapesState )closePath (){if _fge ._decf {_fge ._dda =append (_fge ._dda ,_ecf (_fge ._fafa ));_fge ._decf =false ;}else if len (_fge ._dda )==0{if _fgdd {_cd .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");};_fge ._decf =false ;return ;};_fge ._dda [len (_fge ._dda )-1].close ();if _fgdd {_cd .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_fge );};};type bounded interface{bbox ()_bf .PdfRectangle };
|
||
|
||
// String returns a description of `state`.
|
||
func (_cga *textState )String ()string {_feb :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _cga ._cgcf !=nil {_feb =_cga ._cgcf .BaseFont ();};return _cf .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_cga ._cef ,_cga ._bdf ,_cga ._bdd ,_feb );};func _aec (_dabd func (*wordBag ,*textWord ,float64 )bool ,_fdcf float64 )func (*wordBag ,*textWord )bool {return func (_gcbc *wordBag ,_fdgb *textWord )bool {return _dabd (_gcbc ,_fdgb ,_fdcf )};};func (_fbage gridTile )contains (_decaa _bf .PdfRectangle )bool {if _fbage .numBorders ()< 3{return false ;};if _fbage ._cabgg &&_decaa .Llx < _fbage .Llx -_gcfge {return false ;};if _fbage ._aded &&_decaa .Urx > _fbage .Urx +_gcfge {return false ;};if _fbage ._agcf &&_decaa .Lly < _fbage .Lly -_gcfge {return false ;};if _fbage ._gcca &&_decaa .Ury > _fbage .Ury +_gcfge {return false ;};return true ;};
|
||
|
||
// ToTextMark returns the public view of `tm`.
|
||
func (_gfge *textMark )ToTextMark ()TextMark {return TextMark {Text :_gfge ._bgfa ,Original :_gfge ._ffaa ,BBox :_gfge ._bagb ,Font :_gfge ._gcag ,FontSize :_gfge ._ada ,FillColor :_gfge ._faeg ,StrokeColor :_gfge ._ebcfg ,Orientation :_gfge ._bcaf };};func _ecf (_gbbd _cec .Point )*subpath {return &subpath {_gaaca :[]_cec .Point {_gbbd }}};
|
||
|
||
// String returns a description of `tm`.
|
||
func (_bfbf *textMark )String ()string {return _cf .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_bfbf .PdfRectangle ,_bfbf ._ada ,_bfbf ._bgfa );};func (_gbff *textObject )getCurrentFont ()*_bf .PdfFont {var _efd *_bf .PdfFont ;if !_gbff ._cgcd .empty (){_efd =_gbff ._cgcd .top ()._cgcf ;};if _efd ==nil {_cd .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e");return _bf .DefaultFont ();};return _efd ;};type gridTile struct{_bf .PdfRectangle ;_gcca ,_cabgg ,_agcf ,_aded bool ;};func (_ccaef *subpath )makeRectRuling (_bfcd _g .Color )(*ruling ,bool ){if _caec {_cd .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_ccaef );};_gbfg :=_ccaef ._gaaca [:4];_acab :=make (map[int ]rulingKind ,len (_gbfg ));for _degd ,_fgaa :=range _gbfg {_dbeb :=_ccaef ._gaaca [(_degd +1)%4];_acab [_degd ]=_dgbf (_fgaa ,_dbeb );if _caec {_cf .Printf ("\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066",_degd ,_acab [_degd ],_fgaa ,_dbeb );};};if _caec {_cf .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_acab );};var _bcaee ,_fgfd []int ;for _fdbe ,_cabb :=range _acab {switch _cabb {case _aadg :_fgfd =append (_fgfd ,_fdbe );case _ffag :_bcaee =append (_bcaee ,_fdbe );};};if _caec {_cf .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_fgfd ),_fgfd );_cf .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_bcaee ),_bcaee );};_cdbf :=(len (_fgfd )==2&&len (_bcaee )==2)||(len (_fgfd )==2&&len (_bcaee )==0&&_gfbab (_gbfg [_fgfd [0]],_gbfg [_fgfd [1]]))||(len (_bcaee )==2&&len (_fgfd )==0&&_gcebg (_gbfg [_bcaee [0]],_gbfg [_bcaee [1]]));if _caec {_cf .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_fgfd ),len (_bcaee ),_cdbf );};if !_cdbf {if _caec {_cd .Log .Error ("\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v",_ccaef );_cf .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_fgfd ),len (_bcaee ),_cdbf );};return &ruling {},false ;};if len (_bcaee )==0{for _bfacf ,_fedcb :=range _acab {if _fedcb !=_aadg {_bcaee =append (_bcaee ,_bfacf );};};};if len (_fgfd )==0{for _bggee ,_bebeb :=range _acab {if _bebeb !=_ffag {_fgfd =append (_fgfd ,_bggee );};};};if _caec {_cd .Log .Info ("\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_fgfd ),len (_bcaee ),len (_gbfg ),_fgfd ,_bcaee ,_gbfg );};var _gegb ,_gffgb ,_agdeg ,_ddgb _cec .Point ;if _gbfg [_fgfd [0]].Y > _gbfg [_fgfd [1]].Y {_agdeg ,_ddgb =_gbfg [_fgfd [0]],_gbfg [_fgfd [1]];}else {_agdeg ,_ddgb =_gbfg [_fgfd [1]],_gbfg [_fgfd [0]];};if _gbfg [_bcaee [0]].X > _gbfg [_bcaee [1]].X {_gegb ,_gffgb =_gbfg [_bcaee [0]],_gbfg [_bcaee [1]];}else {_gegb ,_gffgb =_gbfg [_bcaee [1]],_gbfg [_bcaee [0]];};_eadf :=_bf .PdfRectangle {Llx :_gegb .X ,Urx :_gffgb .X ,Lly :_ddgb .Y ,Ury :_agdeg .Y };if _eadf .Llx > _eadf .Urx {_eadf .Llx ,_eadf .Urx =_eadf .Urx ,_eadf .Llx ;};if _eadf .Lly > _eadf .Ury {_eadf .Lly ,_eadf .Ury =_eadf .Ury ,_eadf .Lly ;};_dcgf :=rectRuling {PdfRectangle :_eadf ,_ged :_acbba (_eadf ),Color :_bfcd };if _dcgf ._ged ==_fedc {if _caec {_cd .Log .Error ("\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c");};return nil ,false ;};_ggba ,_ebed :=_dcgf .asRuling ();if !_ebed {if _caec {_cd .Log .Error ("\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg");};return nil ,false ;};if _fgfe {_cf .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_ggba .String ());};return _ggba ,true ;};
|
||
|
||
// Extractor stores and offers functionality for extracting content from PDF pages.
|
||
type Extractor struct{_dd string ;_bac *_bf .PdfPageResources ;_be _bf .PdfRectangle ;_dc map[string ]fontEntry ;_af map[string ]textResult ;_bb int64 ;_agc int ;};func (_ddac paraList )readBefore (_aacec []int ,_gadge ,_cgaf int )bool {_bfbfa ,_acfe :=_ddac [_gadge ],_ddac [_cgaf ];if _addc (_bfbfa ,_acfe )&&_bfbfa .Lly > _acfe .Lly {return true ;};if !(_bfbfa ._bcda .Urx < _acfe ._bcda .Llx ){return false ;};_cced ,_acbf :=_bfbfa .Lly ,_acfe .Lly ;if _cced > _acbf {_acbf ,_cced =_cced ,_acbf ;};_ccde :=_c .Max (_bfbfa ._bcda .Llx ,_acfe ._bcda .Llx );_cace :=_c .Min (_bfbfa ._bcda .Urx ,_acfe ._bcda .Urx );_cffd :=_ddac .llyRange (_aacec ,_cced ,_acbf );for _ ,_acddb :=range _cffd {if _acddb ==_gadge ||_acddb ==_cgaf {continue ;};_babd :=_ddac [_acddb ];if _babd ._bcda .Llx <=_cace &&_ccde <=_babd ._bcda .Urx {return false ;};};return true ;};func _gbca (_gbea []*textMark ,_ebeff _bf .PdfRectangle )[]*textWord {var _begf []*textWord ;var _fbba *textWord ;if _bbac {_cd .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_gbea ));};_cdgd :=func (){if _fbba !=nil {_efef :=_fbba .computeText ();if !_bedeb (_efef ){_fbba ._gbdc =_efef ;_begf =append (_begf ,_fbba );if _bbac {_cd .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_begf )-1,_fbba .String ());for _eefcg ,_eaba :=range _fbba ._eacag {_cf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_eefcg ,_eaba .String ());};};};_fbba =nil ;};};for _ ,_deaag :=range _gbea {if _abc &&_fbba !=nil &&len (_fbba ._eacag )> 0{_dgdc :=_fbba ._eacag [len (_fbba ._eacag )-1];_daeg ,_abbg :=_bdccf (_deaag ._bgfa );_fgfdc ,_aadfb :=_bdccf (_dgdc ._bgfa );if _abbg &&!_aadfb &&_dgdc .inDiacriticArea (_deaag ){_fbba .addDiacritic (_daeg );continue ;};if _aadfb &&!_abbg &&_deaag .inDiacriticArea (_dgdc ){_fbba ._eacag =_fbba ._eacag [:len (_fbba ._eacag )-1];_fbba .appendMark (_deaag ,_ebeff );_fbba .addDiacritic (_fgfdc );continue ;};};_gdag :=_bedeb (_deaag ._bgfa );if _gdag {_cdgd ();continue ;};if _fbba ==nil &&!_gdag {_fbba =_eece ([]*textMark {_deaag },_ebeff );continue ;};_efbed :=_fbba ._bcdga ;_efbf :=_c .Abs (_fgce (_ebeff ,_deaag )-_fbba ._cddc )/_efbed ;_dgdb :=_gcgd (_deaag ,_fbba )/_efbed ;if _dgdb >=_fbff ||!(-_ebcf <=_dgdb &&_efbf <=_afg ){_cdgd ();_fbba =_eece ([]*textMark {_deaag },_ebeff );continue ;};_fbba .appendMark (_deaag ,_ebeff );};_cdgd ();return _begf ;};func _baed (_cdbd []*textMark ,_ggcg _bf .PdfRectangle ,_agab rulingList ,_ebcd []gridTiling )paraList {_cd .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_cdbd ),_ggcg );if len (_cdbd )==0{return nil ;};_fbdf :=_gbca (_cdbd ,_ggcg );if len (_fbdf )==0{return nil ;};_agab .log ("\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065");_dace ,_aefc :=_agab .vertsHorzs ();_gfff :=_aeda (_fbdf ,_ggcg .Ury ,_dace ,_aefc );_aeef :=_aeaf (_gfff ,_ggcg .Ury ,_dace ,_aefc );_aeef =_gaag (_aeef );_acdd :=make (paraList ,0,len (_aeef ));for _ ,_fcbf :=range _aeef {_dcfa :=_fcbf .arrangeText ();if _dcfa !=nil {_acdd =append (_acdd ,_dcfa );};};if len (_acdd )>=_fdcff {_acdd =_acdd .extractTables (_ebcd );};_acdd .sortReadingOrder ();_acdd .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _acdd ;};func _feee (_cdca ,_agbe _cec .Point )bool {return _cdca .X ==_agbe .X &&_cdca .Y ==_agbe .Y };
|
||
|
||
// TextTable represents a table.
|
||
// Cells are ordered top-to-bottom, left-to-right.
|
||
// Cells[y] is the (0-offset) y'th row in the table.
|
||
// Cells[y][x] is the (0-offset) x'th column in the table.
|
||
type TextTable struct{W ,H int ;Cells [][]TableCell ;};func (_bede compositeCell )String ()string {_eaf :="";if len (_bede .paraList )> 0{_eaf =_ecedb (_bede .paraList .merge ().text (),50);};return _cf .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_bede .PdfRectangle ,len (_bede .paraList ),_eaf );};func _acbba (_deee _bf .PdfRectangle )rulingKind {_bbbe :=_deee .Width ();_bccd :=_deee .Height ();if _bbbe > _bccd {if _bbbe >=_ggde {return _aadg ;};}else {if _bccd >=_ggde {return _ffag ;};};return _fedc ;};func (_fdbce *textTable )subdivide ()*textTable {_fdbce .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");_fgba :=_fdbce .compositeRowCorridors ();_gbbcc :=_fdbce .compositeColCorridors ();if _ccae {_cd .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_fbagg (_fgba ),_fbagg (_gbbcc ));};if len (_fgba )==0||len (_gbbcc )==0{return _fdbce ;};_accc (_fgba );_accc (_gbbcc );if _ccae {_cd .Log .Info ("\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073",_fbagg (_fgba ),_fbagg (_gbbcc ));};_ddeb ,_fcea :=_babce (_fdbce ._bdfg ,_fgba );_gdfgf ,_dfgfb :=_babce (_fdbce ._agcd ,_gbbcc );_bcdf :=make (map[uint64 ]*textPara ,_dfgfb *_fcea );_gcdff :=&textTable {PdfRectangle :_fdbce .PdfRectangle ,_abad :_fdbce ._abad ,_bdfg :_fcea ,_agcd :_dfgfb ,_afbef :_bcdf };if _ccae {_cd .Log .Info ("\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a"+"\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076",_fdbce ._agcd ,_fdbce ._bdfg ,_dfgfb ,_fcea ,_fbagg (_fgba ),_fbagg (_gbbcc ),_ddeb ,_gdfgf );};for _addb :=0;_addb < _fdbce ._bdfg ;_addb ++{_ggedc :=_ddeb [_addb ];for _aafc :=0;_aafc < _fdbce ._agcd ;_aafc ++{_afbg :=_gdfgf [_aafc ];if _ccae {_cf .Printf ("\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a",_aafc ,_addb ,_afbg ,_ggedc );};_abdeb ,_cbbc :=_fdbce ._bfcbf [_bfddd (_aafc ,_addb )];if !_cbbc {continue ;};_cdagcf :=_abdeb .split (_fgba [_addb ],_gbbcc [_aafc ]);for _dbbd :=0;_dbbd < _cdagcf ._bdfg ;_dbbd ++{for _fgec :=0;_fgec < _cdagcf ._agcd ;_fgec ++{_geff :=_cdagcf .get (_fgec ,_dbbd );_gcdff .put (_afbg +_fgec ,_ggedc +_dbbd ,_geff );if _ccae {_cf .Printf ("\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_afbg +_fgec ,_ggedc +_dbbd ,_geff );};};};};};return _gcdff ;};func _cfdeg (_dcafd map[int ]intSet )[]int {_ecbd :=make ([]int ,0,len (_dcafd ));for _dbbdf :=range _dcafd {_ecbd =append (_ecbd ,_dbbdf );};_ac .Ints (_ecbd );return _ecbd ;};func (_cfeg *subpath )close (){if !_feee (_cfeg ._gaaca [0],_cfeg .last ()){_cfeg .add (_cfeg ._gaaca [0]);};_cfeg ._fgg =true ;_cfeg .removeDuplicates ();};func _adbc (_fgfc string )string {_bfaa :=[]rune (_fgfc );return string (_bfaa [:len (_bfaa )-1])};func (_bgcb *ruling )encloses (_bgfae ,_bdad float64 )bool {return _bgcb ._cdcc -_egdeb <=_bgfae &&_bdad <=_bgcb ._bdag +_egdeb ;};func _bbde (_dfcg ,_aeea bounded )float64 {_cbad :=_ddge (_dfcg ,_aeea );if !_gbgf (_cbad ){return _cbad ;};return _ege (_dfcg ,_aeea );};var (_bega =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};);func (_agg *imageExtractContext )extractInlineImage (_fd *_gc .ContentStreamInlineImage ,_bcg _gc .GraphicsState ,_gcb *_bf .PdfPageResources )error {_gd ,_bdg :=_fd .ToImage (_gcb );if _bdg !=nil {return _bdg ;};_bccf ,_bdg :=_fd .GetColorSpace (_gcb );if _bdg !=nil {return _bdg ;};if _bccf ==nil {_bccf =_bf .NewPdfColorspaceDeviceGray ();};_bcgc ,_bdg :=_bccf .ImageToRGB (*_gd );if _bdg !=nil {return _bdg ;};_eeb :=ImageMark {Image :&_bcgc ,Width :_bcg .CTM .ScalingFactorX (),Height :_bcg .CTM .ScalingFactorY (),Angle :_bcg .CTM .Angle ()};_eeb .X ,_eeb .Y =_bcg .CTM .Translation ();_agg ._ae =append (_agg ._ae ,_eeb );_agg ._gg ++;return nil ;};func (_bfddb lineRuling )xMean ()float64 {return 0.5*(_bfddb ._ebgf .X +_bfddb ._cgbg .X )};func (_gfbg paraList )merge ()*textPara {_cd .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_gfbg ));if len (_gfbg )==0{return nil ;};_gfbg .sortReadingOrder ();_addg :=_gfbg [0].PdfRectangle ;_abbef :=_gfbg [0]._ffgc ;for _ ,_faag :=range _gfbg [1:]{_addg =_gffe (_addg ,_faag .PdfRectangle );_abbef =append (_abbef ,_faag ._ffgc ...);};return _cbadc (_addg ,_abbef );};func (_cfde *textTable )newTablePara ()*textPara {_caafb :=_cfde .computeBbox ();_gebf :=&textPara {PdfRectangle :_caafb ,_bcda :_caafb ,_bfag :_cfde };if _ccae {_cd .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_gebf );};return _gebf ;};type textResult struct{_dfad PageText ;_gaac int ;_bab int ;};type lineRuling struct{_cfa rulingKind ;_agde markKind ;_g .Color ;_ebgf ,_cgbg _cec .Point ;};var (_fa =_ba .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072");_cfg =_ba .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072"););
|
||
|
||
// ImageExtractOptions contains options for controlling image extraction from
|
||
// PDF pages.
|
||
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};func _ceeg (_ebfe ,_dccd _bf .PdfRectangle )bool {return _ebfe .Llx <=_dccd .Llx &&_dccd .Urx <=_ebfe .Urx &&_ebfe .Lly <=_dccd .Lly &&_dccd .Ury <=_ebfe .Ury ;};func (_acbfb paraList )llyOrdering ()[]int {_fdaaa :=make ([]int ,len (_acbfb ));for _bfce :=range _acbfb {_fdaaa [_bfce ]=_bfce ;};_ac .SliceStable (_fdaaa ,func (_dbad ,_bffc int )bool {_ggfa ,_gfae :=_fdaaa [_dbad ],_fdaaa [_bffc ];return _acbfb [_ggfa ].Lly < _acbfb [_gfae ].Lly ;});return _fdaaa ;};type ruling struct{_gcae rulingKind ;_cfga markKind ;_g .Color ;_adcb float64 ;_cdcc float64 ;_bdag float64 ;_dddb float64 ;};func (_fgde *textMark )bbox ()_bf .PdfRectangle {return _fgde .PdfRectangle };type stateStack []*textState ;func (_deed *shapesState )addPoint (_cdebd ,_gcdf float64 ){_gfdb :=_deed .establishSubpath ();_fbfd :=_deed .devicePoint (_cdebd ,_gcdf );if _gfdb ==nil {_deed ._decf =true ;_deed ._fafa =_fbfd ;}else {_gfdb .add (_fbfd );};};func (_ddd *Extractor )extractPageText (_fde string ,_efe *_bf .PdfPageResources ,_dfa _cec .Matrix ,_cg int )(*PageText ,int ,int ,error ){_cd .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_cg );_ffd :=&PageText {_fed :_ddd ._be };_faf :=_ccd (_ddd ._be );_dcf :=stateStack {&_faf };_fcc :=_fcd (_ddd ,_efe ,_gc .GraphicsState {},&_faf ,&_dcf );_fgbd :=shapesState {_ebd :_dfa ,_cbbe :_cec .IdentityMatrix (),_cfc :_fcc };var _eab bool ;if _cg > _efa {_bbca :=_ba .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");_cd .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_cg ,_bbca );return _ffd ,_faf ._bfbb ,_faf ._bga ,_bbca ;};_dbd :=_gc .NewContentStreamParser (_fde );_gcg ,_bbe :=_dbd .Parse ();if _bbe !=nil {_cd .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bbe );return _ffd ,_faf ._bfbb ,_faf ._bga ,_bbe ;};_ced :=_gc .NewContentStreamProcessor (*_gcg );_ced .AddHandler (_gc .HandlerConditionEnumAllOperands ,"",func (_gbb *_gc .ContentStreamOperation ,_daa _gc .GraphicsState ,_gbf *_bf .PdfPageResources )error {_cfe :=_gbb .Operand ;if _fcaag {_cd .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_gbb );};switch _cfe {case "\u0071":if _fgdd {_cd .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_fgbd ._cbbe );};_dcf .push (&_faf );case "\u0051":if !_dcf .empty (){if len (_dcf )>=2{_dcf .pop ();};_faf =*_dcf .top ();};_fgbd ._cbbe =_daa .CTM ;if _fgdd {_cd .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_fgbd ._cbbe );};case "\u0042\u0054":if _eab {_cd .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");_ffd ._deda =append (_ffd ._deda ,_fcc ._eabd ...);};_eab =true ;_bgg :=_daa ;_bgg .CTM =_dfa .Mult (_bgg .CTM );_fcc =_fcd (_ddd ,_gbf ,_bgg ,&_faf ,&_dcf );_fgbd ._cfc =_fcc ;case "\u0045\u0054":if !_eab {_cd .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");};_eab =false ;_ffd ._deda =append (_ffd ._deda ,_fcc ._eabd ...);_fcc .reset ();case "\u0054\u002a":_fcc .nextLine ();case "\u0054\u0064":if _bage ,_daag :=_fcc .checkOp (_gbb ,2,true );!_bage {_cd .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_daag );return _daag ;};_fcb ,_bad ,_bgf :=_afed (_gbb .Params );if _bgf !=nil {return _bgf ;};_fcc .moveText (_fcb ,_bad );case "\u0054\u0044":if _dbb ,_gcgg :=_fcc .checkOp (_gbb ,2,true );!_dbb {_cd .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gcgg );return _gcgg ;};_bba ,_dcb ,_ccf :=_afed (_gbb .Params );if _ccf !=nil {_cd .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ccf );return _ccf ;};_fcc .moveTextSetLeading (_bba ,_dcb );case "\u0054\u006a":if _gfd ,_fdea :=_fcc .checkOp (_gbb ,1,true );!_gfd {_cd .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_gbb ,_fdea );return _fdea ;};_age ,_gba :=_ce .GetStringBytes (_gbb .Params [0]);if !_gba {_cd .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_gbb );return _ce .ErrTypeError ;};return _fcc .showText (_age );case "\u0054\u004a":if _ccg ,_fef :=_fcc .checkOp (_gbb ,1,true );!_ccg {_cd .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fef );return _fef ;};_fgc ,_fee :=_ce .GetArray (_gbb .Params [0]);if !_fee {_cd .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_gbb );return _bbe ;};return _fcc .showTextAdjusted (_fgc );case "\u0027":if _fdg ,_cca :=_fcc .checkOp (_gbb ,1,true );!_fdg {_cd .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cca );return _cca ;};_aed ,_bcf :=_ce .GetStringBytes (_gbb .Params [0]);if !_bcf {_cd .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_gbb );return _ce .ErrTypeError ;};_fcc .nextLine ();return _fcc .showText (_aed );case "\u0022":if _gbce ,_dbc :=_fcc .checkOp (_gbb ,3,true );!_gbce {_cd .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dbc );return _dbc ;};_ggb ,_caf ,_aeg :=_afed (_gbb .Params [:2]);if _aeg !=nil {return _aeg ;};_ceb ,_cecg :=_ce .GetStringBytes (_gbb .Params [2]);if !_cecg {_cd .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_gbb );return _ce .ErrTypeError ;};_fcc .setCharSpacing (_ggb );_fcc .setWordSpacing (_caf );_fcc .nextLine ();return _fcc .showText (_ceb );case "\u0054\u004c":_gda ,_ece :=_acc (_gbb );if _ece !=nil {_cd .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ece );return _ece ;};_fcc .setTextLeading (_gda );case "\u0054\u0063":_eeg ,_fdd :=_acc (_gbb );if _fdd !=nil {_cd .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fdd );return _fdd ;};_fcc .setCharSpacing (_eeg );case "\u0054\u0066":if _cfgc ,_cgc :=_fcc .checkOp (_gbb ,2,true );!_cfgc {_cd .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cgc );return _cgc ;};_dgd ,_bcd :=_ce .GetNameVal (_gbb .Params [0]);if !_bcd {_cd .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_gbb );return _ce .ErrTypeError ;};_fb ,_gag :=_ce .GetNumberAsFloat (_gbb .Params [1]);if !_bcd {_cd .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gbb ,_gag );return _gag ;};_gag =_fcc .setFont (_dgd ,_fb );_fcc ._dfgf =_bd .Is (_gag ,_ce .ErrNotSupported );if _gag !=nil &&!_fcc ._dfgf {return _gag ;};case "\u0054\u006d":if _gcfe ,_ecd :=_fcc .checkOp (_gbb ,6,true );!_gcfe {_cd .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ecd );return _ecd ;};_baf ,_ddc :=_ce .GetNumbersAsFloat (_gbb .Params );if _ddc !=nil {_cd .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ddc );return _ddc ;};_fcc .setTextMatrix (_baf );case "\u0054\u0072":if _ggf ,_deg :=_fcc .checkOp (_gbb ,1,true );!_ggf {_cd .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_deg );return _deg ;};_dgaf ,_bdbc :=_ce .GetIntVal (_gbb .Params [0]);if !_bdbc {_cd .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_gbb );return _ce .ErrTypeError ;};_fcc .setTextRenderMode (_dgaf );case "\u0054\u0073":if _gaga ,_bdc :=_fcc .checkOp (_gbb ,1,true );!_gaga {_cd .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bdc );return _bdc ;};_ace ,_acg :=_ce .GetNumberAsFloat (_gbb .Params [0]);if _acg !=nil {_cd .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_acg );return _acg ;};_fcc .setTextRise (_ace );case "\u0054\u0077":if _gbcc ,_cbd :=_fcc .checkOp (_gbb ,1,true );!_gbcc {_cd .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cbd );return _cbd ;};_gcff ,_ecg :=_ce .GetNumberAsFloat (_gbb .Params [0]);if _ecg !=nil {_cd .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ecg );return _ecg ;};_fcc .setWordSpacing (_gcff );case "\u0054\u007a":if _bda ,_dgc :=_fcc .checkOp (_gbb ,1,true );!_bda {_cd .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dgc );return _dgc ;};_gea ,_agf :=_ce .GetNumberAsFloat (_gbb .Params [0]);if _agf !=nil {_cd .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_agf );return _agf ;};_fcc .setHorizScaling (_gea );case "\u0063\u006d":_fgbd ._cbbe =_daa .CTM ;if _fgbd ._cbbe .Singular (){_dcg :=_cec .IdentityMatrix ().Translate (_fgbd ._cbbe .Translation ());_cd .Log .Debug ("S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s",_fgbd ._cbbe ,_dcg );_fgbd ._cbbe =_dcg ;};if _fgdd {_cd .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_fgbd ._cbbe );};case "\u006d":if len (_gbb .Params )!=2{_cd .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_cfg );return nil ;};_eaab ,_aceg :=_ce .GetNumbersAsFloat (_gbb .Params );if _aceg !=nil {return _aceg ;};_cd .Log .Debug ("\u004d\u006f\u0076\u0065\u0020\u0074\u006f\u003a\u0020\u0025\u002e\u0032\u0066",_eaab );_fgbd .moveTo (_eaab [0],_eaab [1]);case "\u006c":if len (_gbb .Params )!=2{_cd .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_cfg );return nil ;};_cdb ,_gdd :=_ce .GetNumbersAsFloat (_gbb .Params );if _gdd !=nil {return _gdd ;};_fgbd .lineTo (_cdb [0],_cdb [1]);case "\u0063":if len (_gbb .Params )!=6{return _cfg ;};_ebc ,_aebg :=_ce .GetNumbersAsFloat (_gbb .Params );if _aebg !=nil {return _aebg ;};_cd .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_ebc );_fgbd .cubicTo (_ebc [0],_ebc [1],_ebc [2],_ebc [3],_ebc [4],_ebc [5]);case "\u0076","\u0079":if len (_gbb .Params )!=4{return _cfg ;};_eea ,_caff :=_ce .GetNumbersAsFloat (_gbb .Params );if _caff !=nil {return _caff ;};_cd .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_eea );_fgbd .quadraticTo (_eea [0],_eea [1],_eea [2],_eea [3]);case "\u0068":_fgbd .closePath ();case "\u0072\u0065":if len (_gbb .Params )!=4{return _cfg ;};_fcbe ,_ggg :=_ce .GetNumbersAsFloat (_gbb .Params );if _ggg !=nil {return _ggg ;};_fgbd .drawRectangle (_fcbe [0],_fcbe [1],_fcbe [2],_fcbe [3]);_fgbd .closePath ();case "\u0053":_fgbd .stroke (&_ffd ._cbb );_fgbd .clearPath ();case "\u0073":_fgbd .closePath ();_fgbd .stroke (&_ffd ._cbb );_fgbd .clearPath ();case "\u0046":_fgbd .fill (&_ffd ._fgbc );_fgbd .clearPath ();case "\u0066","\u0066\u002a":_fgbd .closePath ();_fgbd .fill (&_ffd ._fgbc );_fgbd .clearPath ();case "\u0042","\u0042\u002a":_fgbd .fill (&_ffd ._fgbc );_fgbd .stroke (&_ffd ._cbb );_fgbd .clearPath ();case "\u0062","\u0062\u002a":_fgbd .closePath ();_fgbd .fill (&_ffd ._fgbc );_fgbd .stroke (&_ffd ._cbb );_fgbd .clearPath ();case "\u006e":_fgbd .clearPath ();case "\u0044\u006f":if len (_gbb .Params )==0{_cd .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_gbb .Params );return _ce .ErrRangeError ;};_fba ,_ad :=_ce .GetName (_gbb .Params [0]);if !_ad {_cd .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_gbb .Params [0]);return _ce .ErrTypeError ;};_ ,_egd :=_gbf .GetXObjectByName (*_fba );if _egd !=_bf .XObjectTypeForm {break ;};_dee ,_ad :=_ddd ._af [_fba .String ()];if !_ad {_eaac ,_ded :=_gbf .GetXObjectFormByName (*_fba );if _ded !=nil {_cd .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_ded );return _ded ;};_fdb ,_ded :=_eaac .GetContentStream ();if _ded !=nil {_cd .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_ded );return _ded ;};_abe :=_eaac .Resources ;if _abe ==nil {_abe =_gbf ;};_gde ,_gaa ,_dfg ,_ded :=_ddd .extractPageText (string (_fdb ),_abe ,_dfa .Mult (_daa .CTM ),_cg +1);if _ded !=nil {_cd .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_ded );return _ded ;};_dee =textResult {*_gde ,_gaa ,_dfg };_ddd ._af [_fba .String ()]=_dee ;};_fgbd ._cbbe =_daa .CTM ;if _fgdd {_cd .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_fgbd ._cbbe );};_ffd ._deda =append (_ffd ._deda ,_dee ._dfad ._deda ...);_ffd ._cbb =append (_ffd ._cbb ,_dee ._dfad ._cbb ...);_ffd ._fgbc =append (_ffd ._fgbc ,_dee ._dfad ._fgbc ...);_faf ._bfbb +=_dee ._gaac ;_faf ._bga +=_dee ._bab ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_fcc ._bcfd .ColorspaceNonStroking =_daa .ColorspaceNonStroking ;_fcc ._bcfd .ColorNonStroking =_daa .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_fcc ._bcfd .ColorspaceStroking =_daa .ColorspaceStroking ;_fcc ._bcfd .ColorStroking =_daa .ColorStroking ;};return nil ;});_bbe =_ced .Process (_efe );return _ffd ,_faf ._bfbb ,_faf ._bga ,_bbe ;};func (_ecc *wordBag )pullWord (_fdge *textWord ,_aaf int ,_dac map[int ]map[*textWord ]struct{}){_ecc .PdfRectangle =_gffe (_ecc .PdfRectangle ,_fdge .PdfRectangle );if _fdge ._bcdga > _ecc ._dadc {_ecc ._dadc =_fdge ._bcdga ;};_ecc ._gfc [_aaf ]=append (_ecc ._gfc [_aaf ],_fdge );_dac [_aaf ][_fdge ]=struct{}{};};func _dgbf (_agdf ,_efgcc _cec .Point )rulingKind {_ccbf :=_c .Abs (_agdf .X -_efgcc .X );_bgac :=_c .Abs (_agdf .Y -_efgcc .Y );return _cadd (_ccbf ,_bgac ,_ffeg );};func _dabcc (_eaaf ,_eeag ,_dgcde ,_dffeg *textPara )*textTable {_cgdba :=&textTable {_agcd :2,_bdfg :2,_afbef :make (map[uint64 ]*textPara ,4)};_cgdba .put (0,0,_eaaf );_cgdba .put (1,0,_eeag );_cgdba .put (0,1,_dgcde );_cgdba .put (1,1,_dffeg );return _cgdba ;};func (_bdbb *wordBag )sort (){for _ ,_gfg :=range _bdbb ._gfc {_ac .Slice (_gfg ,func (_ebef ,_efea int )bool {return _ddge (_gfg [_ebef ],_gfg [_efea ])< 0});};};func _bcfc (_gbeca _bf .PdfRectangle )*ruling {return &ruling {_gcae :_ffag ,_adcb :_gbeca .Urx ,_cdcc :_gbeca .Lly ,_bdag :_gbeca .Ury };};
|
||
|
||
// PageText represents the layout of text on a device page.
|
||
type PageText struct{_deda []*textMark ;_dgfb string ;_ebb []TextMark ;_edef []TextTable ;_fed _bf .PdfRectangle ;_cbb []pathSection ;_fgbc []pathSection ;};func (_ffaf *textTable )getDown ()paraList {_dfbg :=make (paraList ,_ffaf ._agcd );for _cebeb :=0;_cebeb < _ffaf ._agcd ;_cebeb ++{_cafeb :=_ffaf .get (_cebeb ,_ffaf ._bdfg -1)._cedaf ;if _cafeb ==nil ||_cafeb ._gecbf {return nil ;};_dfbg [_cebeb ]=_cafeb ;};for _ffc :=0;_ffc < _ffaf ._agcd -1;_ffc ++{if _dfbg [_ffc ]._agabc !=_dfbg [_ffc +1]{return nil ;};};return _dfbg ;};func _gcebg (_fedcbb ,_abbca _cec .Point )bool {_aeac :=_c .Abs (_fedcbb .X -_abbca .X );_bcgg :=_c .Abs (_fedcbb .Y -_abbca .Y );return _gcabb (_aeac ,_bcgg );};const (_aagg =false ;_bbac =false ;_fcaag =false ;_fgf =false ;_fgdd =false ;_babc =false ;_cbbf =false ;_agd =false ;_fbcd =false ;_geae =_fbcd &&true ;_fdda =_geae &&false ;_cgb =_fbcd &&true ;_ccae =false ;_gcbb =_ccae &&false ;_cagd =_ccae &&true ;_fgfe =false ;_egde =_fgfe &&false ;_aggc =_fgfe &&false ;_adbd =_fgfe &&true ;_caec =_fgfe &&false ;_fcde =_fgfe &&false ;);func (_fcf *shapesState )fill (_egg *[]pathSection ){_fbf :=pathSection {_fgaff :_fcf ._dda ,Color :_fcf ._cfc .getFillColor ()};*_egg =append (*_egg ,_fbf );if _fgfe {_ddf :=_fbf .bbox ();_cf .Printf ("\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a",len (*_egg ),len (_fbf ._fgaff ),_fcf ,_fbf .Color ,_ddf ,_ddf .Width (),_ddf .Height ());if _egde {for _ggdb ,_bgge :=range _fbf ._fgaff {_cf .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_ggdb ,_bgge );if _ggdb ==10{break ;};};};};};
|
||
|
||
// String returns a description of `k`.
|
||
func (_ebgb markKind )String ()string {_ecgd ,_eeaa :=_agfcc [_ebgb ];if !_eeaa {return _cf .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_ebgb );};return _ecgd ;};func _daaba (_bdebb []rulingList )(rulingList ,rulingList ){var _dcfb rulingList ;for _ ,_fgafg :=range _bdebb {_dcfb =append (_dcfb ,_fgafg ...);};return _dcfb .vertsHorzs ();};func _ccd (_aged _bf .PdfRectangle )textState {return textState {_bcga :100,_fbcc :RenderModeFill ,_fdeg :_aged };};func (_ceafb *textObject )getFont (_fcbed string )(*_bf .PdfFont ,error ){if _ceafb ._cab ._dc !=nil {_ceafb ._cab ._bb ++;_bbaf ,_gfdc :=_ceafb ._cab ._dc [_fcbed ];if _gfdc {_bbaf ._dbbg =_ceafb ._cab ._bb ;return _bbaf ._bfgg ,nil ;};};_cddb ,_cff :=_ceafb .getFontDirect (_fcbed );if _cff !=nil {return nil ,_cff ;};if _ceafb ._cab ._dc !=nil {_aebb :=fontEntry {_cddb ,_ceafb ._cab ._bb };if len (_ceafb ._cab ._dc )>=_abd {var _gdb []string ;for _gbba :=range _ceafb ._cab ._dc {_gdb =append (_gdb ,_gbba );};_ac .Slice (_gdb ,func (_aceb ,_gbdd int )bool {return _ceafb ._cab ._dc [_gdb [_aceb ]]._dbbg < _ceafb ._cab ._dc [_gdb [_gbdd ]]._dbbg ;});delete (_ceafb ._cab ._dc ,_gdb [0]);};_ceafb ._cab ._dc [_fcbed ]=_aebb ;};return _cddb ,nil ;};func (_afb *wordBag )depthIndexes ()[]int {if len (_afb ._gfc )==0{return nil ;};_fdcc :=make ([]int ,len (_afb ._gfc ));_ccbe :=0;for _cdc :=range _afb ._gfc {_fdcc [_ccbe ]=_cdc ;_ccbe ++;};_ac .Ints (_fdcc );return _fdcc ;};func _cbadc (_gddb _bf .PdfRectangle ,_feggd []*textLine )*textPara {return &textPara {PdfRectangle :_gddb ,_ffgc :_feggd };};func _aeda (_bgbe []*textWord ,_cce float64 ,_acbd ,_gee rulingList )*wordBag {_fecag :=_acegc (_bgbe [0],_cce ,_acbd ,_gee );for _ ,_eebe :=range _bgbe [1:]{_gdbd :=_geb (_eebe ._cddc );_fecag ._gfc [_gdbd ]=append (_fecag ._gfc [_gdbd ],_eebe );_fecag .PdfRectangle =_gffe (_fecag .PdfRectangle ,_eebe .PdfRectangle );};_fecag .sort ();return _fecag ;};func (_bgfc *stateStack )top ()*textState {if _bgfc .empty (){return nil ;};return (*_bgfc )[_bgfc .size ()-1];};func (_egc *wordBag )depthBand (_beegf ,_bcbb float64 )[]int {if len (_egc ._gfc )==0{return nil ;};return _egc .depthRange (_egc .getDepthIdx (_beegf ),_egc .getDepthIdx (_bcbb ));};func (_acf *textObject )nextLine (){_acf .moveLP (0,-_acf ._gffg ._dfd )};func (_bbbb intSet )del (_dafg int ){delete (_bbbb ,_dafg )};func _cae (_gbcf bounded )float64 {return -_gbcf .bbox ().Lly };func (_cfdf *textWord )appendMark (_ebagf *textMark ,_ceff _bf .PdfRectangle ){_cfdf ._eacag =append (_cfdf ._eacag ,_ebagf );_cfdf .PdfRectangle =_gffe (_cfdf .PdfRectangle ,_ebagf .PdfRectangle );if _ebagf ._ada > _cfdf ._bcdga {_cfdf ._bcdga =_ebagf ._ada ;};_cfdf ._cddc =_ceff .Ury -_cfdf .PdfRectangle .Lly ;};func _bggc (_cafe _cec .Point )_cec .Matrix {return _cec .TranslationMatrix (_cafe .X ,_cafe .Y )};var _agfcc =map[markKind ]string {_gfaa :"\u0073\u0074\u0072\u006f\u006b\u0065",_ggcec :"\u0066\u0069\u006c\u006c",_cfgce :"\u0061u\u0067\u006d\u0065\u006e\u0074"};
|
||
|
||
// ToText returns the page text as a single string.
|
||
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
|
||
// Text() instead.
|
||
func (_ddea PageText )ToText ()string {return _ddea .Text ()};var _acba =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_g .White ,StrokeColor :_g .White };func _gcaa (_bdfa float64 )float64 {return _gfbac *_c .Round (_bdfa /_gfbac )};func _aagf (_dced []compositeCell )[]float64 {var _eebee []*textLine ;_ggbfb :=0;for _ ,_afdaf :=range _dced {_ggbfb +=len (_afdaf .paraList );_eebee =append (_eebee ,_afdaf .lines ()...);};_ac .Slice (_eebee ,func (_fadd ,_dged int )bool {_faff ,_cdbaf :=_eebee [_fadd ],_eebee [_dged ];_cbcb ,_cffe :=_faff ._abbea ,_cdbaf ._abbea ;if !_gbgf (_cbcb -_cffe ){return _cbcb < _cffe ;};return _faff .Llx < _cdbaf .Llx ;});if _ccae {_cf .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_ggbfb ,len (_eebee ));for _ccad ,_daacd :=range _eebee {_cf .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_ccad ,_daacd );};};var _dcbe []float64 ;_gfab :=_eebee [0];var _bbea [][]*textLine ;_abba :=[]*textLine {_gfab };for _badaa ,_dfdeg :=range _eebee [1:]{if _dfdeg .Ury < _gfab .Lly {_efgae :=0.5*(_dfdeg .Ury +_gfab .Lly );if _ccae {_cf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_badaa ,_dfdeg .Ury ,_gfab .Lly ,_efgae ,_gfab ,_dfdeg );};_dcbe =append (_dcbe ,_efgae );_bbea =append (_bbea ,_abba );_abba =nil ;};_abba =append (_abba ,_dfdeg );if _dfdeg .Lly < _gfab .Lly {_gfab =_dfdeg ;};};if len (_abba )> 0{_bbea =append (_bbea ,_abba );};if _ccae {_cf .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_dcbe );};if _ccae {_cd .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len (_dced ));for _ccbce ,_cddbb :=range _dced {_cf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ccbce ,_cddbb );};_cd .Log .Info ("\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d",len (_bbea ));for _gbde ,_cbefb :=range _bbea {_cf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a",_gbde ,len (_cbefb ));for _bafec ,_addgc :=range _cbefb {_cf .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_bafec ,_addgc );};};};_faeb :=true ;for _bgdf ,_fbbe :=range _bbea {_eefba :=true ;for _gdef ,_gafe :=range _dced {if _ccae {_cf .Printf ("\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a",_bgdf ,len (_bbea ),_gdef ,len (_dced ),_gafe );};if !_gafe .hasLines (_fbbe ){if _ccae {_cf .Printf ("\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a",_bgdf ,len (_bbea ),_gdef ,len (_dced ));};_eefba =false ;break ;};};if !_eefba {_faeb =false ;break ;};};if !_faeb {if _ccae {_cd .Log .Info ("\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg");};_dcbe =nil ;};if _ccae &&_dcbe !=nil {_cf .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a",_dcbe );};return _dcbe ;};func (_bgabd *textLine )appendWord (_caef *textWord ){_bgabd ._cddd =append (_bgabd ._cddd ,_caef );_bgabd .PdfRectangle =_gffe (_bgabd .PdfRectangle ,_caef .PdfRectangle );if _caef ._bcdga > _bgabd ._gabbd {_bgabd ._gabbd =_caef ._bcdga ;};if _caef ._cddc > _bgabd ._abbea {_bgabd ._abbea =_caef ._cddc ;};};func (_cegbb *textTable )growTable (){_cggbc :=func (_dbbgg paraList ){_cegbb ._bdfg ++;for _cffgb :=0;_cffgb < _cegbb ._agcd ;_cffgb ++{_gdfaa :=_dbbgg [_cffgb ];_cegbb .put (_cffgb ,_cegbb ._bdfg -1,_gdfaa );};};_dgcc :=func (_gbadc paraList ){_cegbb ._agcd ++;for _gece :=0;_gece < _cegbb ._bdfg ;_gece ++{_cggc :=_gbadc [_gece ];_cegbb .put (_cegbb ._agcd -1,_gece ,_cggc );};};if _gcbb {_cegbb .log ("\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce");};for _fddg :=0;;_fddg ++{_gadbe :=false ;_becg :=_cegbb .getDown ();_gggc :=_cegbb .getRight ();if _gcbb {_cf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fddg ,_cegbb );_cf .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a",_becg );_cf .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a",_gggc );};if _becg !=nil &&_gggc !=nil {_dcba :=_becg [len (_becg )-1];if _dcba !=nil &&!_dcba ._gecbf &&_dcba ==_gggc [len (_gggc )-1]{_cggbc (_becg );if _gggc =_cegbb .getRight ();_gggc !=nil {_dgcc (_gggc );_cegbb .put (_cegbb ._agcd -1,_cegbb ._bdfg -1,_dcba );};_gadbe =true ;};};if !_gadbe &&_becg !=nil {_cggbc (_becg );_gadbe =true ;};if !_gadbe &&_gggc !=nil {_dgcc (_gggc );_gadbe =true ;};if !_gadbe {break ;};};};type textWord struct{_bf .PdfRectangle ;_cddc float64 ;_gbdc string ;_eacag []*textMark ;_bcdga float64 ;_eeed bool ;};func (_beaf paraList )lines ()[]*textLine {var _abce []*textLine ;for _ ,_gbdbc :=range _beaf {_abce =append (_abce ,_gbdbc ._ffgc ...);};return _abce ;};func _gfbab (_egag ,_ffdfe _cec .Point )bool {_ecfc :=_c .Abs (_egag .X -_ffdfe .X );_dedb :=_c .Abs (_egag .Y -_ffdfe .Y );return _gcabb (_dedb ,_ecfc );};
|
||
|
||
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
|
||
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
|
||
// `start` and `end` are offsets in the extracted text.
|
||
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
|
||
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
|
||
func (_babf *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _babf ==nil {return nil ,_ba .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_cf .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );};_eded :=len (_babf ._gef );if _eded ==0{return _babf ,nil ;};if start < _babf ._gef [0].Offset {start =_babf ._gef [0].Offset ;};if end > _babf ._gef [_eded -1].Offset +1{end =_babf ._gef [_eded -1].Offset +1;};_aff :=_ac .Search (_eded ,func (_bada int )bool {return _babf ._gef [_bada ].Offset +len (_babf ._gef [_bada ].Text )-1>=start });if !(0<=_aff &&_aff < _eded ){_gaad :=_cf .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_aff ,_eded ,_babf ._gef [0],_babf ._gef [_eded -1]);return nil ,_gaad ;};_ebfd :=_ac .Search (_eded ,func (_acgd int )bool {return _babf ._gef [_acgd ].Offset > end -1});if !(0<=_ebfd &&_ebfd < _eded ){_fegg :=_cf .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_ebfd ,_eded ,_babf ._gef [0],_babf ._gef [_eded -1]);return nil ,_fegg ;};if _ebfd <=_aff {return nil ,_cf .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_aff ,_ebfd );};return &TextMarkArray {_gef :_babf ._gef [_aff :_ebfd ]},nil ;};func (_fgff lineRuling )yMean ()float64 {return 0.5*(_fgff ._ebgf .Y +_fgff ._cgbg .Y )};func _gbaf (_cba _cec .Matrix )_cec .Point {_decb ,_fccda :=_cba .Translation ();return _cec .Point {X :_decb ,Y :_fccda };};func _babce (_adfda int ,_efbcf map[int ][]float64 )([]int ,int ){_ggfcf :=make ([]int ,_adfda );_gfca :=0;for _aagd :=0;_aagd < _adfda ;_aagd ++{_ggfcf [_aagd ]=_gfca ;_gfca +=len (_efbcf [_aagd ])+1;};return _ggfcf ,_gfca ;};func (_gcabbd rulingList )snapToGroupsDirection ()rulingList {_gcabbd .sortStrict ();_gged :=make (map[*ruling ]rulingList ,len (_gcabbd ));_fbaga :=_gcabbd [0];_bafff :=func (_cffdf *ruling ){_fbaga =_cffdf ;_gged [_fbaga ]=rulingList {_cffdf }};_bafff (_gcabbd [0]);for _ ,_bgadg :=range _gcabbd [1:]{if _bgadg ._adcb < _fbaga ._adcb -_cggb {_cd .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_fbaga ,_bgadg );};if _bgadg ._adcb > _fbaga ._adcb +_ecba {_bafff (_bgadg );}else {_gged [_fbaga ]=append (_gged [_fbaga ],_bgadg );};};_gfgeg :=make (map[*ruling ]float64 ,len (_gged ));_bgca :=make (map[*ruling ]*ruling ,len (_gcabbd ));for _abbde ,_effb :=range _gged {_gfgeg [_abbde ]=_effb .mergePrimary ();for _ ,_bfbc :=range _effb {_bgca [_bfbc ]=_abbde ;};};for _ ,_dfacd :=range _gcabbd {_dfacd ._adcb =_gfgeg [_bgca [_dfacd ]];};_gfee :=make (rulingList ,0,len (_gcabbd ));for _ ,_gcafb :=range _gged {_gcggee :=_gcafb .splitSec ();for _egcc ,_fefe :=range _gcggee {_edag :=_fefe .merge ();if len (_gfee )> 0{_agdfe :=_gfee [len (_gfee )-1];if _agdfe .alignsPrimary (_edag )&&_agdfe .alignsSec (_edag ){_cd .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_egcc ,_agdfe ,_edag );continue ;};};_gfee =append (_gfee ,_edag );};};_gfee .sortStrict ();return _gfee ;};func (_cgcb *textPara )depth ()float64 {if _cgcb ._gccg {return -1.0;};if len (_cgcb ._ffgc )> 0{return _cgcb ._ffgc [0]._abbea ;};return _cgcb ._bfag .depth ();};const (_cggb =1.0e-6;_gfbac =1.0e-4;_edfd =10;_eff =6;_gaff =0.5;_fbff =0.12;_ebcf =0.19;_afg =0.04;_dadcg =0.04;_dbcf =1.0;_dffd =0.04;_cbdc =0.4;_afcf =0.7;_cdcg =1.0;_afcd =0.1;_bfcb =1.4;_gbecg =0.46;_bgeg =0.02;_gdeg =0.2;_abaeg =0.5;_dffdd =4;_geef =4.0;_fdcff =6;_caffc =0.3;_dfdd =0.01;_aaae =0.02;_def =2;_deec =2;_bgabf =500;_ggde =4.0;_afbb =4.0;_ffeg =0.05;_egeb =0.1;_egdeb =2.0;_ecba =2.0;_gcfge =1.5;_fcdc =3.0;_gfdg =0.25;);func _ddge (_bdcb ,_fae bounded )float64 {return _bdcb .bbox ().Llx -_fae .bbox ().Llx };func (_cedag *wordBag )highestWord (_aabe int ,_abbd ,_acge float64 )*textWord {for _ ,_gbfe :=range _cedag ._gfc [_aabe ]{if _abbd <=_gbfe ._cddc &&_gbfe ._cddc <=_acge {return _gbfe ;};};return nil ;};func (_ggfc *textObject )setTextRise (_addf float64 ){if _ggfc ==nil {return ;};_ggfc ._gffg ._ccb =_addf ;};func _gac (_edgbd _bf .PdfRectangle ,_fgfee ,_beab ,_ecea ,_dgca *ruling )gridTile {_cdagc :=_edgbd .Llx ;_edcdf :=_edgbd .Urx ;_dfgge :=_edgbd .Lly ;_cgbb :=_edgbd .Ury ;return gridTile {PdfRectangle :_edgbd ,_cabgg :_fgfee !=nil &&_fgfee .encloses (_dfgge ,_cgbb ),_aded :_beab !=nil &&_beab .encloses (_dfgge ,_cgbb ),_agcf :_ecea !=nil &&_ecea .encloses (_cdagc ,_edcdf ),_gcca :_dgca !=nil &&_dgca .encloses (_cdagc ,_edcdf )};};
|
||
|
||
// TableCell is a cell in a TextTable.
|
||
type TableCell struct{
|
||
|
||
// Text is the extracted text.
|
||
Text string ;
|
||
|
||
// Marks returns the TextMarks corresponding to the text in Text.
|
||
Marks TextMarkArray ;};func (_effdg rulingList )snapToGroups ()rulingList {_fdfc ,_egac :=_effdg .vertsHorzs ();if len (_fdfc )> 0{_fdfc =_fdfc .snapToGroupsDirection ();};if len (_egac )> 0{_egac =_egac .snapToGroupsDirection ();};_daace :=append (_fdfc ,_egac ...);_daace .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");return _daace ;};func (_dgce *textTable )depth ()float64 {_daccd :=1e10;for _fcbd :=0;_fcbd < _dgce ._agcd ;_fcbd ++{_cdee :=_dgce .get (_fcbd ,0);if _cdee ==nil ||_cdee ._gccg {continue ;};_daccd =_c .Min (_daccd ,_cdee .depth ());};return _daccd ;};func (_gbd *textObject )moveLP (_cccd ,_fdc float64 ){_gbd ._ddef .Concat (_cec .NewMatrix (1,0,0,1,_cccd ,_fdc ));_gbd ._ceaf =_gbd ._ddef ;};func (_ceeda *ruling )alignsPrimary (_agea *ruling )bool {return _ceeda ._gcae ==_agea ._gcae &&_c .Abs (_ceeda ._adcb -_agea ._adcb )< _ecba *0.5;};func _befb (_fdad []int )[]int {_cege :=make ([]int ,len (_fdad ));for _aefcd ,_dagc :=range _fdad {_cege [len (_fdad )-1-_aefcd ]=_dagc ;};return _cege ;};func (_egca *textPara )bbox ()_bf .PdfRectangle {return _egca .PdfRectangle };func _afeg (_ffee ,_bef _bf .PdfRectangle )(_bf .PdfRectangle ,bool ){if !_ffgg (_ffee ,_bef ){return _bf .PdfRectangle {},false ;};return _bf .PdfRectangle {Llx :_c .Max (_ffee .Llx ,_bef .Llx ),Urx :_c .Min (_ffee .Urx ,_bef .Urx ),Lly :_c .Max (_ffee .Lly ,_bef .Lly ),Ury :_c .Min (_ffee .Ury ,_bef .Ury )},true ;};
|
||
|
||
// String returns a string describing the current state of the textState stack.
|
||
func (_bgb *stateStack )String ()string {_cfgb :=[]string {_cf .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_bgb ))};for _gegd ,_afef :=range *_bgb {_eacf :="\u003c\u006e\u0069l\u003e";if _afef !=nil {_eacf =_afef .String ();};_cfgb =append (_cfgb ,_cf .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_gegd ,_eacf ));};return _e .Join (_cfgb ,"\u000a");};func (_abed paraList )reorder (_baff []int ){_gdaf :=make (paraList ,len (_abed ));for _gecf ,_gffc :=range _baff {_gdaf [_gecf ]=_abed [_gffc ];};copy (_abed ,_gdaf );};func _fcd (_eabde *Extractor ,_bfg *_bf .PdfPageResources ,_ead _gc .GraphicsState ,_aegg *textState ,_cfec *stateStack )*textObject {return &textObject {_cab :_eabde ,_dbe :_bfg ,_bcfd :_ead ,_cgcd :_cfec ,_gffg :_aegg ,_ceaf :_cec .IdentityMatrix (),_ddef :_cec .IdentityMatrix ()};};func _bedeb (_ecfa string )bool {for _ ,_fcgfg :=range _ecfa {if !_ag .IsSpace (_fcgfg ){return false ;};};return true ;};func _afad (_egfea _bf .PdfRectangle )*ruling {return &ruling {_gcae :_aadg ,_adcb :_egfea .Ury ,_cdcc :_egfea .Llx ,_bdag :_egfea .Urx };};func _ceed (_fege []TextMark ,_gbcg *int ,_ecfd string )[]TextMark {_cbac :=_acba ;_cbac .Text =_ecfd ;return _dcbd (_fege ,_gbcg ,_cbac );};func _addc (_aebd ,_edfa *textPara )bool {return _eceg (_aebd ._bcda ,_edfa ._bcda )};func _afbe (_dafb _bf .PdfRectangle )*ruling {return &ruling {_gcae :_aadg ,_adcb :_dafb .Lly ,_cdcc :_dafb .Llx ,_bdag :_dafb .Urx };};func (_efaac gridTile )numBorders ()int {_ecad :=0;if _efaac ._cabgg {_ecad ++;};if _efaac ._aded {_ecad ++;};if _efaac ._agcf {_ecad ++;};if _efaac ._gcca {_ecad ++;};return _ecad ;};func (_ebfdc *ruling )intersects (_febca *ruling )bool {_dbab :=(_ebfdc ._gcae ==_ffag &&_febca ._gcae ==_aadg )||(_febca ._gcae ==_ffag &&_ebfdc ._gcae ==_aadg );_eabbg :=func (_ddce ,_cged *ruling )bool {return _ddce ._cdcc -_egdeb <=_cged ._adcb &&_cged ._adcb <=_ddce ._bdag +_egdeb ;};_fgcbg :=_eabbg (_ebfdc ,_febca );_bbfc :=_eabbg (_febca ,_ebfdc );if _fgfe {_cf .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_dbab ,_fgcbg ,_bbfc ,_dbab &&_fgcbg &&_bbfc ,_ebfdc ,_febca );};return _dbab &&_fgcbg &&_bbfc ;};func _acegc (_fcge *textWord ,_gfdbg float64 ,_fedd ,_cecf rulingList )*wordBag {_fac :=_geb (_fcge ._cddc );_bdac :=[]*textWord {_fcge };_gbad :=wordBag {_gfc :map[int ][]*textWord {_fac :_bdac },PdfRectangle :_fcge .PdfRectangle ,_dadc :_fcge ._bcdga ,_afec :_gfdbg ,_gfda :_fedd ,_caab :_cecf };return &_gbad ;};func (_gae *textTable )getRight ()paraList {_acgad :=make (paraList ,_gae ._bdfg );for _fdgf :=0;_fdgf < _gae ._bdfg ;_fdgf ++{_aadc :=_gae .get (_gae ._agcd -1,_fdgf )._agabc ;if _aadc ==nil ||_aadc ._gecbf {return nil ;};_acgad [_fdgf ]=_aadc ;};for _egee :=0;_egee < _gae ._bdfg -1;_egee ++{if _acgad [_egee ]._cedaf !=_acgad [_egee +1]{return nil ;};};return _acgad ;};func _eceg (_aebcd ,_gcfff _bf .PdfRectangle )bool {return _gcfff .Llx <=_aebcd .Urx &&_aebcd .Llx <=_gcfff .Urx ;};type rectRuling struct{_ged rulingKind ;_ccgba markKind ;_g .Color ;_bf .PdfRectangle ;};type markKind int ;func (_cbbg paraList )applyTables (_degc []*textTable )paraList {_efead :=make (map[*textPara ]struct{});var _dgdf paraList ;for _ ,_aafa :=range _degc {for _ ,_dbdgf :=range _aafa ._afbef {_efead [_dbdgf ]=struct{}{};};_dgdf =append (_dgdf ,_aafa .newTablePara ());};for _ ,_geeea :=range _cbbg {if _ ,_daaa :=_efead [_geeea ];!_daaa {_dgdf =append (_dgdf ,_geeea );};};return _dgdf ;};func _afed (_efgea []_ce .PdfObject )(_cdaa ,_dccc float64 ,_bgefb error ){if len (_efgea )!=2{return 0,0,_cf .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_efgea ));};_afff ,_bgefb :=_ce .GetNumbersAsFloat (_efgea );if _bgefb !=nil {return 0,0,_bgefb ;};return _afff [0],_afff [1],nil ;};func (_eegbf *textTable )toTextTable ()TextTable {if _ccae {_cd .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_eegbf ._agcd ,_eegbf ._bdfg );};_acfec :=make ([][]TableCell ,_eegbf ._bdfg );for _edgc :=0;_edgc < _eegbf ._bdfg ;_edgc ++{_acfec [_edgc ]=make ([]TableCell ,_eegbf ._agcd );for _ddbg :=0;_ddbg < _eegbf ._agcd ;_ddbg ++{_cgfa :=_eegbf .get (_ddbg ,_edgc );if _cgfa ==nil {continue ;};if _ccae {_cf .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_ddbg ,_edgc ,_cgfa );};_acfec [_edgc ][_ddbg ].Text =_cgfa .text ();_cbdec :=0;_acfec [_edgc ][_ddbg ].Marks ._gef =_cgfa .toTextMarks (&_cbdec );};};return TextTable {W :_eegbf ._agcd ,H :_eegbf ._bdfg ,Cells :_acfec };};func (_cdbe *textTable )compositeRowCorridors ()map[int ][]float64 {_eadb :=make (map[int ][]float64 ,_cdbe ._bdfg );if _ccae {_cd .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_cdbe ._bdfg );};for _efged :=1;_efged < _cdbe ._bdfg ;_efged ++{var _deac []compositeCell ;for _dcce :=0;_dcce < _cdbe ._agcd ;_dcce ++{if _afda ,_fegac :=_cdbe ._bfcbf [_bfddd (_dcce ,_efged )];_fegac {_deac =append (_deac ,_afda );};};if len (_deac )==0{continue ;};_daaf :=_aagf (_deac );_eadb [_efged ]=_daaf ;if _ccae {_cf .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_efged ,_daaf );};};return _eadb ;};const (_dagb =true ;_ccgb =true ;_abc =true ;_dfgg =false ;_bed =false ;_fbd =6;_ddbbg =3.0;_dfaa =200;_bggd =true ;_gcc =true ;_fbec =true ;_geeg =true ;_ffed =false ;);func _cfcdb (_bafd []pathSection )rulingList {_cggfd (_bafd );if _fgfe {_cd .Log .Info ("\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs",len (_bafd ));};var _fdbd rulingList ;for _ ,_cbgf :=range _bafd {for _ ,_edfb :=range _cbgf ._fgaff {if !_edfb .isQuadrilateral (){if _fgfe {_cd .Log .Error ("!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073",_edfb );};continue ;};if _bbgc ,_gdfb :=_edfb .makeRectRuling (_cbgf .Color );_gdfb {_fdbd =append (_fdbd ,_bbgc );}else {if _caec {_cd .Log .Error ("\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073",_edfb );};};};};if _fgfe {_cd .Log .Info ("\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073",_fdbd .String ());};return _fdbd ;};func (_aadf rulingList )secMinMax ()(float64 ,float64 ){_cbge ,_bddf :=_aadf [0]._cdcc ,_aadf [0]._bdag ;for _ ,_gffb :=range _aadf [1:]{if _gffb ._cdcc < _cbge {_cbge =_gffb ._cdcc ;};if _gffb ._bdag > _bddf {_bddf =_gffb ._bdag ;};};return _cbge ,_bddf ;};
|
||
|
||
// ImageMark represents an image drawn on a page and its position in device coordinates.
|
||
// All coordinates are in device coordinates.
|
||
type ImageMark struct{Image *_bf .Image ;
|
||
|
||
// Dimensions of the image as displayed in the PDF.
|
||
Width float64 ;Height float64 ;
|
||
|
||
// Position of the image in PDF coordinates (lower left corner).
|
||
X float64 ;Y float64 ;
|
||
|
||
// Angle in degrees, if rotated.
|
||
Angle float64 ;};func (_cecfb paraList )findTableGrid (_fcgf gridTiling )(*textTable ,map[*textPara ]struct{}){_eacd :=len (_fcgf ._ccag );_fgea :=len (_fcgf ._fgfb );_aacd :=textTable {_abad :true ,_agcd :_eacd ,_bdfg :_fgea ,_afbef :make (map[uint64 ]*textPara ,_eacd *_fgea ),_bfcbf :make (map[uint64 ]compositeCell ,_eacd *_fgea )};_eceaf :=make (map[*textPara ]struct{});_bdcgb :=int ((1.0-_caffc )*float64 (_eacd *_fgea ));_egcff :=0;if _adbd {_cd .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_eacd ,_fgea );};for _ccafe ,_ccef :=range _fcgf ._fgfb {_dagbd ,_bbdb :=_fcgf ._edccb [_ccef ];if !_bbdb {continue ;};for _acebe ,_ddfa :=range _fcgf ._ccag {_fggab ,_dega :=_dagbd [_ddfa ];if !_dega {continue ;};_bfbe :=_cecfb .inTile (_fggab );if len (_bfbe )==0{_egcff ++;if _egcff > _bdcgb {if _adbd {_cd .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_egcff );};return nil ,nil ;};}else {_aacd .putComposite (_acebe ,_ccafe ,_bfbe ,_fggab .PdfRectangle );for _ ,_gfdgbb :=range _bfbe {_eceaf [_gfdgbb ]=struct{}{};};};};};_babdf :=0;for _caffg :=0;_caffg < _eacd ;_caffg ++{_dfbe :=_aacd .get (_caffg ,0);if _dfbe ==nil ||!_dfbe ._gccg {_babdf ++;};};if _babdf ==0{if _adbd {_cd .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};return nil ,nil ;};_cbfc :=_aacd .reduceTiling (_fcgf ,_fcdc );_cbfc =_cbfc .subdivide ();return _cbfc ,_eceaf ;};func (_cdbb *wordBag )empty (_adf int )bool {_ ,_fdfd :=_cdbb ._gfc [_adf ];return !_fdfd };
|
||
|
||
// String returns a description of `v`.
|
||
func (_cedg *ruling )String ()string {if _cedg ._gcae ==_fedc {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_deeg ,_cffdg :="\u0078","\u0079";if _cedg ._gcae ==_aadg {_deeg ,_cffdg ="\u0079","\u0078";};_ggfgf :="";if _cedg ._dddb !=0.0{_ggfgf =_cf .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_cedg ._dddb );};return _cf .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_cedg ._gcae ,_deeg ,_cedg ._adcb ,_cffdg ,_cedg ._cdcc ,_cedg ._bdag ,_cedg ._bdag -_cedg ._cdcc ,_cedg ._cfga ,_cedg .Color ,_ggfgf );};func (_eae *wordBag )arrangeText ()*textPara {_eae .sort ();if _ccgb {_eae .removeDuplicates ();};var _cbfd []*textLine ;for _ ,_cegg :=range _eae .depthIndexes (){for !_eae .empty (_cegg ){_eccd :=_eae .firstReadingIndex (_cegg );_gecd :=_eae .firstWord (_eccd );_beag :=_dagf (_eae ,_eccd );_afdf :=_gecd ._bcdga ;_gbdb :=_gecd ._cddc -_gaff *_afdf ;_afae :=_gecd ._cddc +_gaff *_afdf ;_bgfb :=_bfcb *_afdf ;_baec :=_gbecg *_afdf ;_adff :for {var _bcef *textWord ;_gafc :=0;for _ ,_bcfae :=range _eae .depthBand (_gbdb ,_afae ){_cgecf :=_eae .highestWord (_bcfae ,_gbdb ,_afae );if _cgecf ==nil {continue ;};_eabe :=_gcgd (_cgecf ,_beag ._cddd [len (_beag ._cddd )-1]);if _eabe < -_baec {break _adff ;};if _eabe > _bgfb {continue ;};if _bcef !=nil &&_ddge (_cgecf ,_bcef )>=0{continue ;};_bcef =_cgecf ;_gafc =_bcfae ;};if _bcef ==nil {break ;};_beag .pullWord (_eae ,_bcef ,_gafc );};_beag .markWordBoundaries ();_cbfd =append (_cbfd ,_beag );};};if len (_cbfd )==0{return nil ;};_ac .Slice (_cbfd ,func (_bcad ,_ebegd int )bool {return _agge (_cbfd [_bcad ],_cbfd [_ebegd ])< 0});_bdacb :=_cbadc (_eae .PdfRectangle ,_cbfd );if _fbcd {_cd .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_bdacb .String ());if _geae {for _acegf ,_caaf :=range _bdacb ._ffgc {_cf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_acegf ,_caaf .String ());if _fdda {for _fbfag ,_gbccd :=range _caaf ._cddd {_cf .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_fbfag ,_gbccd .String ());for _fdcb ,_fgbdb :=range _gbccd ._eacag {_cf .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_fdcb ,_fgbdb .String ());};};};};};};return _bdacb ;};
|
||
|
||
// String returns a human readable description of `s`.
|
||
func (_ggbb intSet )String ()string {var _cdbae []int ;for _fgdce :=range _ggbb {if _ggbb .has (_fgdce ){_cdbae =append (_cdbae ,_fgdce );};};_ac .Ints (_cdbae );return _cf .Sprintf ("\u0025\u002b\u0076",_cdbae );};func (_bbafd rulingList )toGrids ()[]rulingList {if _fgfe {_cd .Log .Info ("t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073",_bbafd );};_adacg :=_bbafd .intersections ();if _fgfe {_cd .Log .Info ("\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020",len (_bbafd ),len (_adacg ));for _ ,_deegd :=range _cfdeg (_adacg ){_cf .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_deegd ,_adacg [_deegd ]);};};_ceag :=make (map[int ]intSet ,len (_bbafd ));for _dgaa :=range _bbafd {_cdf :=_bbafd .connections (_adacg ,_dgaa );if len (_cdf )> 0{_ceag [_dgaa ]=_cdf ;};};if _fgfe {_cd .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064",len (_ceag ));for _ ,_bedd :=range _cfdeg (_ceag ){_cf .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_bedd ,_ceag [_bedd ]);};};_ddeda :=_aafe (len (_bbafd ),func (_edda ,_bgea int )bool {_afebb ,_fdeag :=len (_ceag [_edda ]),len (_ceag [_bgea ]);if _afebb !=_fdeag {return _afebb > _fdeag ;};return _bbafd .comp (_edda ,_bgea );});if _fgfe {_cd .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_ddeda );};_eaaba :=[][]int {{_ddeda [0]}};_gcab :for _ ,_gcgge :=range _ddeda [1:]{for _fcbb ,_bedb :=range _eaaba {for _ ,_bbga :=range _bedb {if _ceag [_bbga ].has (_gcgge ){_eaaba [_fcbb ]=append (_bedb ,_gcgge );continue _gcab ;};};};_eaaba =append (_eaaba ,[]int {_gcgge });};if _fgfe {_cd .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_eaaba );};_ac .SliceStable (_eaaba ,func (_efde ,_ceafc int )bool {return len (_eaaba [_efde ])> len (_eaaba [_ceafc ])});for _ ,_egcf :=range _eaaba {_ac .Slice (_egcf ,func (_egfeg ,_adda int )bool {return _bbafd .comp (_egcf [_egfeg ],_egcf [_adda ])});};_abfe :=make ([]rulingList ,len (_eaaba ));for _egdd ,_dgbba :=range _eaaba {_ccafd :=make (rulingList ,len (_dgbba ));for _gceb ,_bdde :=range _dgbba {_ccafd [_gceb ]=_bbafd [_bdde ];};_abfe [_egdd ]=_ccafd ;};if _fgfe {_cd .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076",_abfe );};var _dcgd []rulingList ;for _ ,_gbga :=range _abfe {if _bgagb ,_acad :=_gbga .isActualGrid ();_acad {_gbga =_bgagb ;_gbga =_gbga .snapToGroups ();_dcgd =append (_dcgd ,_gbga );};};if _fgfe {_ggbg ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073",_dcgd );_cd .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_abfe ),len (_dcgd ));};return _dcgd ;};func (_bgcag intSet )add (_bcade int ){_bgcag [_bcade ]=struct{}{}};func (_dfdg *ruling )equals (_dfae *ruling )bool {return _dfdg ._gcae ==_dfae ._gcae &&_aade (_dfdg ._adcb ,_dfae ._adcb )&&_aade (_dfdg ._cdcc ,_dfae ._cdcc )&&_aade (_dfdg ._bdag ,_dfae ._bdag );};func _acga (_bcbba map[float64 ]gridTile )[]float64 {_dcgcc :=make ([]float64 ,0,len (_bcbba ));for _cacc :=range _bcbba {_dcgcc =append (_dcgcc ,_cacc );};_ac .Float64s (_dcgcc );return _dcgcc ;};type pathSection struct{_fgaff []*subpath ;_g .Color ;};func (_abdac paraList )tables ()[]TextTable {var _cfgd []TextTable ;if _ccae {_cd .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a");};for _ ,_aeee :=range _abdac {_ddedb :=_aeee ._bfag ;if _ddedb !=nil &&_ddedb .isExportable (){_cfgd =append (_cfgd ,_ddedb .toTextTable ());};};return _cfgd ;};func (_fbc *stateStack )pop ()*textState {if _fbc .empty (){return nil ;};_caa :=*(*_fbc )[len (*_fbc )-1];*_fbc =(*_fbc )[:len (*_fbc )-1];return &_caa ;};type textState struct{_cef float64 ;_bdf float64 ;_bcga float64 ;_dfd float64 ;_bdd float64 ;_fbcc RenderMode ;_ccb float64 ;_cgcf *_bf .PdfFont ;_fdeg _bf .PdfRectangle ;_bfbb int ;_bga int ;};
|
||
|
||
// String returns a description of `p`.
|
||
func (_fdged *textPara )String ()string {if _fdged ._gccg {return _cf .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_fdged .PdfRectangle );};_gbdfb :="";if _fdged ._bfag !=nil {_gbdfb =_cf .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_fdged ._bfag ._agcd ,_fdged ._bfag ._bdfg );};return _cf .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_fdged .PdfRectangle ,_gbdfb ,len (_fdged ._ffgc ),_ecedb (_fdged .text (),50));};func _cdbg (_dcea float64 ,_fcga int )int {if _fcga ==0{_fcga =1;};_fdfa :=float64 (_fcga );return int (_c .Round (_dcea /_fdfa )*_fdfa );};func _eaega (_cebba map[float64 ]map[float64 ]gridTile )[]float64 {_edbgc :=make ([]float64 ,0,len (_cebba ));for _beef :=range _cebba {_edbgc =append (_edbgc ,_beef );};_ac .Float64s (_edbgc );_caada :=len (_edbgc );for _fbeac :=0;_fbeac < _caada /2;_fbeac ++{_edbgc [_fbeac ],_edbgc [_caada -1-_fbeac ]=_edbgc [_caada -1-_fbeac ],_edbgc [_fbeac ];};return _edbgc ;};
|
||
|
||
// Append appends `mark` to the mark array.
|
||
func (_beg *TextMarkArray )Append (mark TextMark ){_beg ._gef =append (_beg ._gef ,mark )};func (_baggg paraList )sortReadingOrder (){_cd .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_baggg ));if len (_baggg )<=1{return ;};_baggg .computeEBBoxes ();_ac .Slice (_baggg ,func (_dggeb ,_bffg int )bool {return _agge (_baggg [_dggeb ],_baggg [_bffg ])<=0});_dgad :=_baggg .topoOrder ();_baggg .reorder (_dgad );};
|
||
|
||
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
|
||
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
|
||
// Replace with a function like Extract() (*PageText, error)
|
||
func (_bfb *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_cdec ,_eg ,_gcf ,_fgb :=_bfb .extractPageText (_bfb ._dd ,_bfb ._bac ,_cec .IdentityMatrix (),0);if _fgb !=nil {return nil ,0,0,_fgb ;};_cdec .computeViews ();_fgb =_eecf (_cdec );if _fgb !=nil {return nil ,0,0,_fgb ;};return _cdec ,_eg ,_gcf ,nil ;};func (_gecfb rulingList )bbox ()_bf .PdfRectangle {var _fgfa _bf .PdfRectangle ;if len (_gecfb )==0{_cd .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073");return _bf .PdfRectangle {};};if _gecfb [0]._gcae ==_aadg {_fgfa .Llx ,_fgfa .Urx =_gecfb .secMinMax ();_fgfa .Lly ,_fgfa .Ury =_gecfb .primMinMax ();}else {_fgfa .Llx ,_fgfa .Urx =_gecfb .primMinMax ();_fgfa .Lly ,_fgfa .Ury =_gecfb .secMinMax ();};return _fgfa ;};func (_dfgb *ruling )alignsSec (_gbbgc *ruling )bool {const _fddf =_ecba +1.0;return _dfgb ._cdcc -_fddf <=_gbbgc ._bdag &&_gbbgc ._cdcc -_fddf <=_dfgb ._bdag ;};func _cdg (_edfc ,_fceff _bf .PdfRectangle )bool {return _edfc .Lly <=_fceff .Ury &&_fceff .Lly <=_edfc .Ury ;};func (_ffbg *textTable )putComposite (_gcdg ,_ebab int ,_fdfcg paraList ,_cabf _bf .PdfRectangle ){if len (_fdfcg )==0{_cd .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073");return ;};_efgdb :=compositeCell {_cabf ,_fdfcg };if _ccae {_cf .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_gcdg ,_ebab ,_efgdb .String ());};_efgdb .updateBBox ();_ffbg ._bfcbf [_bfddd (_gcdg ,_ebab )]=_efgdb ;};func _cadd (_bbfg ,_ebdd ,_cdgb float64 )rulingKind {if _bbfg >=_cdgb &&_gcabb (_ebdd ,_bbfg ){return _aadg ;};if _ebdd >=_cdgb &&_gcabb (_bbfg ,_ebdd ){return _ffag ;};return _fedc ;};func (_dcfc *wordBag )allWords ()[]*textWord {var _gcfb []*textWord ;for _ ,_ecgcd :=range _dcfc ._gfc {_gcfb =append (_gcfb ,_ecgcd ...);};return _gcfb ;};func (_gbbg *textLine )text ()string {var _bffea []string ;for _ ,_dadf :=range _gbbg ._cddd {if _dadf ._eeed {_bffea =append (_bffea ,"\u0020");};_bffea =append (_bffea ,_dadf ._gbdc );};return _e .Join (_bffea ,"");};func _afbbg (_gded ,_gbcee _cec .Point )rulingKind {_afac :=_c .Abs (_gded .X -_gbcee .X );_ccgf :=_c .Abs (_gded .Y -_gbcee .Y );return _cadd (_afac ,_ccgf ,_ggde );};func (_dbbb gridTiling )complete ()bool {for _ ,_dfe :=range _dbbb ._edccb {for _ ,_agdb :=range _dfe {if !_agdb .complete (){return false ;};};};return true ;};func (_cabd *wordBag )depthRange (_ceaff ,_bgbf int )[]int {var _fccc []int ;for _dcbc :=range _cabd ._gfc {if _ceaff <=_dcbc &&_dcbc <=_bgbf {_fccc =append (_fccc ,_dcbc );};};if len (_fccc )==0{return nil ;};_ac .Ints (_fccc );return _fccc ;};func _eecf (_eccc *PageText )error {_abcca :=_fe .GetLicenseKey ();if _abcca !=nil &&_abcca .IsLicensed ()||_gf {return nil ;};_cf .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");_cf .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");return _ba .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};func _fbagg (_fgfed map[int ][]float64 )string {_dbdbc :=_bbacb (_fgfed );_fdfe :=make ([]string ,len (_fgfed ));for _adbf ,_abdd :=range _dbdbc {_fdfe [_adbf ]=_cf .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_abdd ,_fgfed [_abdd ]);};return _cf .Sprintf ("\u007b\u0025\u0073\u007d",_e .Join (_fdfe ,"\u002c\u0020"));};func (_cgcdg *textMark )inDiacriticArea (_cegd *textMark )bool {_acde :=_cgcdg .Llx -_cegd .Llx ;_cffb :=_cgcdg .Urx -_cegd .Urx ;_bgdb :=_cgcdg .Lly -_cegd .Lly ;return _c .Abs (_acde +_cffb )< _cgcdg .Width ()*_abaeg &&_c .Abs (_bgdb )< _cgcdg .Height ()*_abaeg ;};func (_bfeb *wordBag )blocked (_eaabd *textWord )bool {if _eaabd .Urx < _bfeb .Llx {_cgae :=_bcfc (_eaabd .PdfRectangle );_gfdag :=_geed (_bfeb .PdfRectangle );if _bfeb ._gfda .blocks (_cgae ,_gfdag ){if _fcde {_cd .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_eaabd ,_bfeb );};return true ;};}else if _bfeb .Urx < _eaabd .Llx {_bfgf :=_bcfc (_bfeb .PdfRectangle );_abda :=_geed (_eaabd .PdfRectangle );if _bfeb ._gfda .blocks (_bfgf ,_abda ){if _fcde {_cd .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s",_eaabd ,_bfeb );};return true ;};};if _eaabd .Ury < _bfeb .Lly {_fdgc :=_afad (_eaabd .PdfRectangle );_gcfec :=_afbe (_bfeb .PdfRectangle );if _bfeb ._caab .blocks (_fdgc ,_gcfec ){if _fcde {_cd .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_eaabd ,_bfeb );};return true ;};}else if _bfeb .Ury < _eaabd .Lly {_abbe :=_afad (_bfeb .PdfRectangle );_edc :=_afbe (_eaabd .PdfRectangle );if _bfeb ._caab .blocks (_abbe ,_edc ){if _fcde {_cd .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s",_eaabd ,_bfeb );};return true ;};};return false ;};func (_geea rulingList )findPrimSec (_fgcb ,_baefc float64 )*ruling {for _ ,_fcae :=range _geea {if _gbgf (_fcae ._adcb -_fgcb )&&_fcae ._cdcc -_egdeb <=_baefc &&_baefc <=_fcae ._bdag +_egdeb {return _fcae ;};};return nil ;};func (_eega rectRuling )asRuling ()(*ruling ,bool ){_ebbg :=ruling {_gcae :_eega ._ged ,Color :_eega .Color ,_cfga :_ggcec };switch _eega ._ged {case _ffag :_ebbg ._adcb =0.5*(_eega .Llx +_eega .Urx );_ebbg ._cdcc =_eega .Lly ;_ebbg ._bdag =_eega .Ury ;_cbaf ,_gccf :=_eega .checkWidth (_eega .Llx ,_eega .Urx );if !_gccf {if _caec {_cd .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_eega );};return nil ,false ;};_ebbg ._dddb =_cbaf ;case _aadg :_ebbg ._adcb =0.5*(_eega .Lly +_eega .Ury );_ebbg ._cdcc =_eega .Llx ;_ebbg ._bdag =_eega .Urx ;_bgaba ,_facb :=_eega .checkWidth (_eega .Lly ,_eega .Ury );if !_facb {if _caec {_cd .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_eega );};return nil ,false ;};_ebbg ._dddb =_bgaba ;default:_cd .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_eega ._ged );return nil ,false ;};return &_ebbg ,true ;};func _accc (_ddbc map[int ][]float64 ){if len (_ddbc )<=1{return ;};_bgee :=_bbacb (_ddbc );if _ccae {_cd .Log .Info ("\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076",_bgee );};var _acbe ,_gebe int ;for _acbe ,_gebe =range _bgee {if _ddbc [_gebe ]!=nil {break ;};};for _gdad ,_bebbf :=range _bgee [_acbe :]{_aaea :=_ddbc [_bebbf ];if _aaea ==nil {continue ;};if _ccae {_cf .Printf ("\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a",_acbe +_gdad ,_gebe ,_bebbf );};_egdc :=_ddbc [_bebbf ];if _egdc [len (_egdc )-1]> _aaea [0]{_egdc [len (_egdc )-1]=_aaea [0];_ddbc [_gebe ]=_egdc ;};_gebe =_bebbf ;};};
|
||
|
||
// String returns a human readable description of `path`.
|
||
func (_fdcd *subpath )String ()string {_ecgc :=_fdcd ._gaaca ;_bgcd :=len (_ecgc );if _bgcd <=5{return _cf .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_bgcd ,_ecgc );};return _cf .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_bgcd ,_ecgc [0],_ecgc [1],_ecgc [_bgcd -1]);};func (_aagge *wordBag )removeDuplicates (){if _cgb {_cd .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_aagge .text ());};for _ ,_agec :=range _aagge .depthIndexes (){if len (_aagge ._gfc [_agec ])==0{continue ;};_afa :=_aagge ._gfc [_agec ][0];_agdd :=_gdeg *_afa ._bcdga ;_ddgc :=_afa ._cddc ;for _ ,_gefg :=range _aagge .depthBand (_ddgc ,_ddgc +_agdd ){_fgged :=map[*textWord ]struct{}{};_dgada :=_aagge ._gfc [_gefg ];for _ ,_efbce :=range _dgada {if _ ,_ceee :=_fgged [_efbce ];_ceee {continue ;};for _ ,_abeb :=range _dgada {if _ ,_aedac :=_fgged [_abeb ];_aedac {continue ;};if _abeb !=_efbce &&_abeb ._gbdc ==_efbce ._gbdc &&_c .Abs (_abeb .Llx -_efbce .Llx )< _agdd &&_c .Abs (_abeb .Urx -_efbce .Urx )< _agdd &&_c .Abs (_abeb .Lly -_efbce .Lly )< _agdd &&_c .Abs (_abeb .Ury -_efbce .Ury )< _agdd {_fgged [_abeb ]=struct{}{};};};};if len (_fgged )> 0{_ggge :=0;for _ ,_ccaf :=range _dgada {if _ ,_abbc :=_fgged [_ccaf ];!_abbc {_dgada [_ggge ]=_ccaf ;_ggge ++;};};_aagge ._gfc [_gefg ]=_dgada [:len (_dgada )-len (_fgged )];if len (_aagge ._gfc [_gefg ])==0{delete (_aagge ._gfc ,_gefg );};};};};};func _agge (_fcad ,_ecb bounded )float64 {_cfcg :=_ege (_fcad ,_ecb );if !_gbgf (_cfcg ){return _cfcg ;};return _ddge (_fcad ,_ecb );};func _eece (_bbdbe []*textMark ,_gafa _bf .PdfRectangle )*textWord {_gfgc :=_bbdbe [0].PdfRectangle ;_cgbec :=_bbdbe [0]._ada ;for _ ,_ebabg :=range _bbdbe [1:]{_gfgc =_gffe (_gfgc ,_ebabg .PdfRectangle );if _ebabg ._ada > _cgbec {_cgbec =_ebabg ._ada ;};};return &textWord {PdfRectangle :_gfgc ,_eacag :_bbdbe ,_cddc :_gafa .Ury -_gfgc .Lly ,_bcdga :_cgbec };};
|
||
|
||
// String returns a description of `b`.
|
||
func (_gca *wordBag )String ()string {var _gegdg []string ;for _ ,_ggc :=range _gca .depthIndexes (){_dafe :=_gca ._gfc [_ggc ];for _ ,_affg :=range _dafe {_gegdg =append (_gegdg ,_affg ._gbdc );};};return _cf .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_gca .PdfRectangle ,_gca ._dadc ,len (_gegdg ),_gegdg );};func (_bc *imageExtractContext )extractContentStreamImages (_ec string ,_eee *_bf .PdfPageResources )error {_aeb :=_gc .NewContentStreamParser (_ec );_de ,_gfa :=_aeb .Parse ();if _gfa !=nil {return _gfa ;};if _bc ._bbb ==nil {_bc ._bbb =map[*_ce .PdfObjectStream ]*cachedImage {};};if _bc ._dgf ==nil {_bc ._dgf =&ImageExtractOptions {};};_bde :=_gc .NewContentStreamProcessor (*_de );_bde .AddHandler (_gc .HandlerConditionEnumAllOperands ,"",_bc .processOperand );return _bde .Process (_eee );};type textObject struct{_cab *Extractor ;_dbe *_bf .PdfPageResources ;_bcfd _gc .GraphicsState ;_gffg *textState ;_cgcd *stateStack ;_ceaf _cec .Matrix ;_ddef _cec .Matrix ;_eabd []*textMark ;_dfgf bool ;};func (_cadf *textTable )get (_dafbd ,_fbcbf int )*textPara {return _cadf ._afbef [_bfddd (_dafbd ,_fbcbf )];};func _aeaf (_efgd *wordBag ,_agfb float64 ,_bcae ,_gadg rulingList )[]*wordBag {var _efge []*wordBag ;for _ ,_cfcd :=range _efgd .depthIndexes (){_fgdc :=false ;for !_efgd .empty (_cfcd ){_aad :=_efgd .firstReadingIndex (_cfcd );_dede :=_efgd .firstWord (_aad );_cfd :=_acegc (_dede ,_agfb ,_bcae ,_gadg );_efgd .removeWord (_dede ,_aad );if _cbbf {_cd .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_dede .String ());};for _dccgc :=true ;_dccgc ;_dccgc =_fgdc {_fgdc =false ;_bbcg :=_cdcg *_cfd ._dadc ;_eebg :=_cbdc *_cfd ._dadc ;_begeb :=_dbcf *_cfd ._dadc ;if _cbbf {_cd .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_cfd .minDepth (),_cfd .maxDepth (),_begeb ,_eebg );};if _efgd .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_cfd ,_aec (_agad ,0),_cfd .minDepth ()-_begeb ,_cfd .maxDepth ()+_begeb ,_dffd ,false ,false )> 0{_fgdc =true ;};if _efgd .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_cfd ,_aec (_agad ,_eebg ),_cfd .minDepth (),_cfd .maxDepth (),_afcf ,false ,false )> 0{_fgdc =true ;};if _fgdc {continue ;};_effd :=_efgd .scanBand ("",_cfd ,_aec (_bebe ,_bbcg ),_cfd .minDepth (),_cfd .maxDepth (),_afcd ,true ,false );if _effd > 0{_dcgc :=(_cfd .maxDepth ()-_cfd .minDepth ())/_cfd ._dadc ;if (_effd > 1&&float64 (_effd )> 0.3*_dcgc )||_effd <=10{if _efgd .scanBand ("\u006f\u0074\u0068e\u0072",_cfd ,_aec (_bebe ,_bbcg ),_cfd .minDepth (),_cfd .maxDepth (),_afcd ,false ,true )> 0{_fgdc =true ;};};};};_efge =append (_efge ,_cfd );};};return _efge ;};func (_cffa rulingList )tidied (_dccfg string )rulingList {_ccab :=_cffa .removeDuplicates ();_ccab .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");_fafc :=_ccab .snapToGroups ();if _fafc ==nil {return nil ;};_fafc .sort ();if _fgfe {_cd .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_dccfg ,len (_cffa ),len (_ccab ),len (_fafc ));};_fafc .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _fafc ;};func (_efbd *shapesState )newSubPath (){_efbd .clearPath ();if _fgdd {_cd .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_efbd );};};func (_bfdcg paraList )computeEBBoxes (){if _aagg {_cd .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a");};for _ ,_dbgf :=range _bfdcg {_dbgf ._bcda =_dbgf .PdfRectangle ;};_adca :=_bfdcg .yNeighbours (0);for _gdbf ,_efdb :=range _bfdcg {_dfac :=_efdb ._bcda ;_dcfaa ,_bfa :=-1.0e9,+1.0e9;for _ ,_gagf :=range _adca [_efdb ]{_faed :=_bfdcg [_gagf ]._bcda ;if _faed .Urx < _dfac .Llx {_dcfaa =_c .Max (_dcfaa ,_faed .Urx );}else if _dfac .Urx < _faed .Llx {_bfa =_c .Min (_bfa ,_faed .Llx );};};for _aaga ,_bgda :=range _bfdcg {_gdfc :=_bgda ._bcda ;if _gdbf ==_aaga ||_gdfc .Ury > _dfac .Lly {continue ;};if _dcfaa <=_gdfc .Llx &&_gdfc .Llx < _dfac .Llx {_dfac .Llx =_gdfc .Llx ;}else if _gdfc .Urx <=_bfa &&_dfac .Urx < _gdfc .Urx {_dfac .Urx =_gdfc .Urx ;};};if _aagg {_cf .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_gdbf ,_efdb ._bcda ,_dfac ,_ecedb (_efdb .text (),50));};_efdb ._bcda =_dfac ;};if _dfgg {for _ ,_begeg :=range _bfdcg {_begeg .PdfRectangle =_begeg ._bcda ;};};};func (_gdacg *wordBag )firstReadingIndex (_ddg int )int {_eefc :=_gdacg .firstWord (_ddg )._bcdga ;_eagb :=float64 (_ddg +1)*_eff ;_abae :=_eagb +_geef *_eefc ;_dabc :=_ddg ;for _ ,_abf :=range _gdacg .depthBand (_eagb ,_abae ){if _ddge (_gdacg .firstWord (_abf ),_gdacg .firstWord (_dabc ))< 0{_dabc =_abf ;};};return _dabc ;};
|
||
|
||
// String returns a description of `l`.
|
||
func (_fbb *textLine )String ()string {return _cf .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_fbb ._abbea ,_fbb .PdfRectangle ,_fbb ._gabbd ,_fbb .text ());};func (_fdff rulingList )primaries ()[]float64 {_bafee :=make (map[float64 ]struct{},len (_fdff ));for _ ,_gcfbf :=range _fdff {_bafee [_gcfbf ._adcb ]=struct{}{};};_ebga :=make ([]float64 ,len (_bafee ));_bafdb :=0;for _cfab :=range _bafee {_ebga [_bafdb ]=_cfab ;_bafdb ++;};_ac .Float64s (_ebga );return _ebga ;};type cachedImage struct{_ge *_bf .Image ;_bbd _bf .PdfColorspace ;};type gridTiling struct{_bf .PdfRectangle ;_ccag []float64 ;_fgfb []float64 ;_edccb map[float64 ]map[float64 ]gridTile ;};func (_dbcd rulingList )augmentGrid ()(rulingList ,rulingList ){_deeab ,_cfed :=_dbcd .vertsHorzs ();if len (_deeab )==0||len (_cfed )==0{return _deeab ,_cfed ;};_fgga ,_fdbg :=_deeab ,_cfed ;_abac :=_deeab .bbox ();_fgdec :=_cfed .bbox ();if _fgfe {_cd .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066",_abac );_cd .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066",_fgdec );};var _fcefc ,_dbdf ,_gadgf ,_ffb *ruling ;if _fgdec .Llx < _abac .Llx -_egdeb {_fcefc =&ruling {_cfga :_cfgce ,_gcae :_ffag ,_adcb :_fgdec .Llx ,_cdcc :_abac .Lly ,_bdag :_abac .Ury };_deeab =append (rulingList {_fcefc },_deeab ...);};if _fgdec .Urx > _abac .Urx +_egdeb {_dbdf =&ruling {_cfga :_cfgce ,_gcae :_ffag ,_adcb :_fgdec .Urx ,_cdcc :_abac .Lly ,_bdag :_abac .Ury };_deeab =append (_deeab ,_dbdf );};if _abac .Lly < _fgdec .Lly -_egdeb {_gadgf =&ruling {_cfga :_cfgce ,_gcae :_aadg ,_adcb :_abac .Lly ,_cdcc :_fgdec .Llx ,_bdag :_fgdec .Urx };_cfed =append (rulingList {_gadgf },_cfed ...);};if _abac .Ury > _fgdec .Ury +_egdeb {_ffb =&ruling {_cfga :_cfgce ,_gcae :_aadg ,_adcb :_abac .Ury ,_cdcc :_fgdec .Llx ,_bdag :_fgdec .Urx };_cfed =append (_cfed ,_ffb );};if len (_deeab )+len (_cfed )==len (_dbcd ){return _fgga ,_fdbg ;};_ebgd :=append (_deeab ,_cfed ...);_dbcd .log ("u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064");_ebgd .log ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d");return _deeab ,_cfed ;};func (_baab *textTable )compositeColCorridors ()map[int ][]float64 {_caae :=make (map[int ][]float64 ,_baab ._agcd );if _ccae {_cd .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_baab ._agcd );};for _gdbdd :=0;_gdbdd < _baab ._agcd ;_gdbdd ++{_caae [_gdbdd ]=nil ;};return _caae ;};func (_fffb *textWord )toTextMarks (_bfeg *int )[]TextMark {var _aegbb []TextMark ;for _ ,_daacee :=range _fffb ._eacag {_aegbb =_dcbd (_aegbb ,_bfeg ,_daacee .ToTextMark ());};return _aegbb ;};func (_fbdg rulingList )blocks (_ecbe ,_agcb *ruling )bool {if _ecbe ._cdcc > _agcb ._bdag ||_agcb ._cdcc > _ecbe ._bdag {return false ;};_dgcbc :=_c .Max (_ecbe ._cdcc ,_agcb ._cdcc );_dbfge :=_c .Min (_ecbe ._bdag ,_agcb ._bdag );if _ecbe ._adcb > _agcb ._adcb {_ecbe ,_agcb =_agcb ,_ecbe ;};for _ ,_ecgaa :=range _fbdg {if _ecbe ._adcb <=_ecgaa ._adcb +_ecba &&_ecgaa ._adcb <=_agcb ._adcb +_ecba &&_ecgaa ._cdcc <=_dbfge &&_dgcbc <=_ecgaa ._bdag {return true ;};};return false ;};func (_deab *shapesState )quadraticTo (_ddb ,_acef ,_cadc ,_fecb float64 ){if _fgdd {_cd .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a");};_deab .addPoint (_cadc ,_fecb );};func (_cgbd intSet )has (_gcde int )bool {_ ,_cbgc :=_cgbd [_gcde ];return _cbgc };
|
||
|
||
// String returns a string descibing `i`.
|
||
func (_efee gridTile )String ()string {_fbef :=func (_gbdbf bool ,_eafa string )string {if _gbdbf {return _eafa ;};return "\u005f";};return _cf .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_efee .PdfRectangle ,_fbef (_efee ._cabgg ,"\u004c"),_fbef (_efee ._aded ,"\u0052"),_fbef (_efee ._agcf ,"\u0042"),_fbef (_efee ._gcca ,"\u0054"));};
|
||
|
||
// String returns a string describing `pt`.
|
||
func (_aef PageText )String ()string {_fbe :=_cf .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_aef ._deda ));_febg :=[]string {"\u002d"+_fbe };for _ ,_ceda :=range _aef ._deda {_febg =append (_febg ,_ceda .String ());};_febg =append (_febg ,"\u002b"+_fbe );return _e .Join (_febg ,"\u000a");};func (_gge *textLine )markWordBoundaries (){_eabb :=_bgeg *_gge ._gabbd ;for _acaf ,_dcge :=range _gge ._cddd [1:]{if _gcgd (_dcge ,_gge ._cddd [_acaf ])>=_eabb {_dcge ._eeed =true ;};};};func _ege (_fabg ,_fdgd bounded )float64 {return _cae (_fabg )-_cae (_fdgd )};type compositeCell struct{_bf .PdfRectangle ;paraList ;};func (_bbff *wordBag )getDepthIdx (_gaggc float64 )int {_dgb :=_bbff .depthIndexes ();_fafe :=_geb (_gaggc );if _fafe < _dgb [0]{return _dgb [0];};if _fafe > _dgb [len (_dgb )-1]{return _dgb [len (_dgb )-1];};return _fafe ;};func (_ebae *subpath )add (_edb ..._cec .Point ){_ebae ._gaaca =append (_ebae ._gaaca ,_edb ...)};func _cdcgb (_faecc []pathSection )rulingList {_cggfd (_faecc );if _fgfe {_cd .Log .Info ("\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073",len (_faecc ));};var _gddbc rulingList ;for _ ,_gagd :=range _faecc {for _ ,_cdab :=range _gagd ._fgaff {if len (_cdab ._gaaca )< 2{continue ;};_cfbfe :=_cdab ._gaaca [0];for _ ,_abeag :=range _cdab ._gaaca [1:]{if _aedd ,_dadca :=_egea (_cfbfe ,_abeag ,_gagd .Color );_dadca {_gddbc =append (_gddbc ,_aedd );};_cfbfe =_abeag ;};};};if _fgfe {_cd .Log .Info ("m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073",_gddbc );};return _gddbc ;};func _acc (_bbec *_gc .ContentStreamOperation )(float64 ,error ){if len (_bbec .Params )!=1{_ggad :=_ba .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");_cd .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_bbec .Operand ,1,len (_bbec .Params ),_bbec .Params );return 0.0,_ggad ;};return _ce .GetNumberAsFloat (_bbec .Params [0]);};func (_ebedc rulingList )comp (_ebeb ,_bcada int )bool {_aabf ,_ccdd :=_ebedc [_ebeb ],_ebedc [_bcada ];_cafc ,_dcga :=_aabf ._gcae ,_ccdd ._gcae ;if _cafc !=_dcga {return _cafc > _dcga ;};if _cafc ==_fedc {return false ;};_bdec :=func (_egebb bool )bool {if _cafc ==_aadg {return _egebb ;};return !_egebb ;};_bbgcg ,_cfda :=_aabf ._adcb ,_ccdd ._adcb ;if _bbgcg !=_cfda {return _bdec (_bbgcg > _cfda );};_bbgcg ,_cfda =_aabf ._cdcc ,_ccdd ._cdcc ;if _bbgcg !=_cfda {return _bdec (_bbgcg < _cfda );};return _bdec (_aabf ._bdag < _ccdd ._bdag );};func (_dfacdc *textWord )bbox ()_bf .PdfRectangle {return _dfacdc .PdfRectangle };func (_abbf rulingList )removeDuplicates ()rulingList {if len (_abbf )==0{return nil ;};_abbf .sort ();_cdefa :=rulingList {_abbf [0]};for _ ,_eefb :=range _abbf [1:]{if _eefb .equals (_cdefa [len (_cdefa )-1]){continue ;};_cdefa =append (_cdefa ,_eefb );};return _cdefa ;};func (_edfbg gridTiling )log (_bfdg string ){if !_adbd {return ;};_cd .Log .Info ("\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071",len (_edfbg ._ccag ),len (_edfbg ._fgfb ),_bfdg );_cf .Printf ("\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a",_edfbg ._ccag );_cf .Printf ("\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a",_edfbg ._fgfb );for _cfba ,_baggc :=range _edfbg ._fgfb {_bcbg ,_fdadb :=_edfbg ._edccb [_baggc ];if !_fdadb {continue ;};_cf .Printf ("%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_cfba ,_baggc );for _agebc ,_cggf :=range _edfbg ._ccag {_aegb ,_cbef :=_bcbg [_cggf ];if !_cbef {continue ;};_cf .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_agebc ,_aegb .String ());};};};func (_dfabf gridTile )complete ()bool {return _dfabf .numBorders ()==4};func (_bfggd rulingList )primMinMax ()(float64 ,float64 ){_bcbc ,_eeddc :=_bfggd [0]._adcb ,_bfggd [0]._adcb ;for _ ,_cdeba :=range _bfggd [1:]{if _cdeba ._adcb < _bcbc {_bcbc =_cdeba ._adcb ;}else if _cdeba ._adcb > _eeddc {_eeddc =_cdeba ._adcb ;};};return _bcbc ,_eeddc ;};func _aeeeb (_fbce ,_gdba int )int {if _fbce < _gdba {return _fbce ;};return _gdba ;};func (_cbg *shapesState )stroke (_cegb *[]pathSection ){_bdeg :=pathSection {_fgaff :_cbg ._dda ,Color :_cbg ._cfc .getStrokeColor ()};*_cegb =append (*_cegb ,_bdeg );if _fgfe {_cf .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_cegb ),_cbg ,_cbg ._cfc .getStrokeColor (),_bdeg .bbox ());if _egde {for _bfe ,_aee :=range _cbg ._dda {_cf .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_bfe ,_aee );if _bfe ==10{break ;};};};};};func (_fccac rulingList )asTiling ()gridTiling {if _adbd {_cd .Log .Info ("r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_fccac ));};for _gabg ,_bebcb :=range _fccac [1:]{_caad :=_fccac [_gabg ];if _caad .alignsPrimary (_bebcb )&&_caad .alignsSec (_bebcb ){_cd .Log .Error ("a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073",_bebcb ,_caad );};};_fccac .sortStrict ();_fccac .log ("\u0073n\u0061\u0070\u0070\u0065\u0064");_ggdeg ,_gbbfa :=_fccac .vertsHorzs ();_gada :=_ggdeg .primaries ();_efgdg :=_gbbfa .primaries ();_gecc :=len (_gada )-1;_gccab :=len (_efgdg )-1;if _gecc ==0||_gccab ==0{return gridTiling {};};_cebe :=_bf .PdfRectangle {Llx :_gada [0],Urx :_gada [_gecc ],Lly :_efgdg [0],Ury :_efgdg [_gccab ]};if _adbd {_cd .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064",len (_ggdeg ));for _bfad ,_gfcg :=range _ggdeg {_cf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bfad ,_gfcg );};_cd .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064",len (_gbbfa ));for _ccgef ,_bfca :=range _gbbfa {_cf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ccgef ,_bfca );};_cd .Log .Info ("\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f",_gecc ,_gccab ,_gada ,_efgdg );};_cagc :=make ([]gridTile ,_gecc *_gccab );for _cbba :=_gccab -1;_cbba >=0;_cbba --{_dedbf :=_efgdg [_cbba ];_cfae :=_efgdg [_cbba +1];for _cgf :=0;_cgf < _gecc ;_cgf ++{_ggeg :=_gada [_cgf ];_adcfe :=_gada [_cgf +1];_dgbbb :=_ggdeg .findPrimSec (_ggeg ,_dedbf );_gccda :=_ggdeg .findPrimSec (_adcfe ,_dedbf );_feda :=_gbbfa .findPrimSec (_dedbf ,_ggeg );_fecbf :=_gbbfa .findPrimSec (_cfae ,_ggeg );_acege :=_bf .PdfRectangle {Llx :_ggeg ,Urx :_adcfe ,Lly :_dedbf ,Ury :_cfae };_faca :=_gac (_acege ,_dgbbb ,_gccda ,_feda ,_fecbf );_cagc [_cbba *_gecc +_cgf ]=_faca ;if _adbd {_cf .Printf ("\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_cgf ,_cbba ,_faca .String (),_faca .Width (),_faca .Height ());};};};if _adbd {_cd .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_cebe );};_eegfb :=make ([]map[float64 ]gridTile ,_gccab );for _fbaf :=_gccab -1;_fbaf >=0;_fbaf --{if _adbd {_cf .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_fbaf );};_eegfb [_fbaf ]=make (map[float64 ]gridTile ,_gecc );for _gbaa :=0;_gbaa < _gecc ;_gbaa ++{_abaf :=_cagc [_fbaf *_gecc +_gbaa ];if _adbd {_cf .Printf ("\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gbaa ,_abaf );};if !_abaf ._cabgg {continue ;};_afce :=_gbaa ;for _cdagg :=_gbaa +1;!_abaf ._aded &&_cdagg < _gecc ;_cdagg ++{_ebee :=_cagc [_fbaf *_gecc +_cdagg ];_abaf .Urx =_ebee .Urx ;_abaf ._gcca =_abaf ._gcca ||_ebee ._gcca ;_abaf ._agcf =_abaf ._agcf ||_ebee ._agcf ;_abaf ._aded =_ebee ._aded ;if _adbd {_cf .Printf ("\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a",_cdagg ,_ebee ,_abaf );};_afce =_cdagg ;};if _adbd {_cf .Printf (" \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n",_gbaa ,_afce ,_abaf );};_gbaa =_afce ;_eegfb [_fbaf ][_abaf .Llx ]=_abaf ;};};_efbdg :=make (map[float64 ]map[float64 ]gridTile ,_gccab );_fbcde :=make (map[float64 ]map[float64 ]struct{},_gccab );for _bfcf :=_gccab -1;_bfcf >=0;_bfcf --{_eefbd :=_cagc [_bfcf *_gecc ].Lly ;_efbdg [_eefbd ]=make (map[float64 ]gridTile ,_gecc );_fbcde [_eefbd ]=make (map[float64 ]struct{},_gecc );};if _adbd {_cd .Log .Info ("\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_cebe );};for _bfdcd :=_gccab -1;_bfdcd >=0;_bfdcd --{_adbe :=_cagc [_bfdcd *_gecc ].Lly ;_eegfbb :=_eegfb [_bfdcd ];if _adbd {_cf .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_bfdcd );};for _ ,_gfcf :=range _acga (_eegfbb ){if _ ,_abgac :=_fbcde [_adbe ][_gfcf ];_abgac {continue ;};_gdaed :=_eegfbb [_gfcf ];if _adbd {_cf .Printf (" \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_gdaed .String ());};for _fged :=_bfdcd -1;_fged >=0;_fged --{if _gdaed ._agcf {break ;};_dcbb :=_eegfb [_fged ];_eeaab ,_badc :=_dcbb [_gfcf ];if !_badc {break ;};if _eeaab .Urx !=_gdaed .Urx {break ;};_gdaed ._agcf =_eeaab ._agcf ;_gdaed .Lly =_eeaab .Lly ;if _adbd {_cf .Printf ("\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_eeaab .String (),_gdaed .String ());};_fbcde [_eeaab .Lly ][_eeaab .Llx ]=struct{}{};};if _bfdcd ==0{_gdaed ._agcf =true ;};if _gdaed .complete (){_efbdg [_adbe ][_gfcf ]=_gdaed ;};};};_bedf :=gridTiling {PdfRectangle :_cebe ,_ccag :_fcdcd (_efbdg ),_fgfb :_eaega (_efbdg ),_edccb :_efbdg };_bedf .log ("\u0043r\u0065\u0061\u0074\u0065\u0064");return _bedf ;};func (_ageb *stateStack )empty ()bool {return len (*_ageb )==0};func (_cdbbd *textTable )reduce ()*textTable {_fcgge :=make ([]int ,0,_cdbbd ._bdfg );_efag :=make ([]int ,0,_cdbbd ._agcd );for _cbgg :=0;_cbgg < _cdbbd ._bdfg ;_cbgg ++{if !_cdbbd .emptyRow (_cbgg ){_fcgge =append (_fcgge ,_cbgg );};};for _cada :=0;_cada < _cdbbd ._agcd ;_cada ++{if !_cdbbd .emptyColumn (_cada ){_efag =append (_efag ,_cada );};};if len (_fcgge )==_cdbbd ._bdfg &&len (_efag )==_cdbbd ._agcd {return _cdbbd ;};_bcdg :=textTable {_abad :_cdbbd ._abad ,_agcd :len (_efag ),_bdfg :len (_fcgge ),_afbef :make (map[uint64 ]*textPara ,len (_efag )*len (_fcgge ))};if _ccae {_cd .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_cdbbd ._agcd ,_cdbbd ._bdfg ,len (_efag ),len (_fcgge ));_cd .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_efag );_cd .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_fcgge );};for _eabba ,_dfcc :=range _fcgge {for _dcee ,_eagc :=range _efag {_dfcb :=_cdbbd .get (_eagc ,_dfcc );if _dfcb ==nil {continue ;};if _ccae {_cf .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_dcee ,_eabba ,_eagc ,_dfcc ,_ecedb (_dfcb .text (),50));};_bcdg .put (_dcee ,_eabba ,_dfcb );};};return &_bcdg ;};func _gffe (_efebf ,_bgag _bf .PdfRectangle )_bf .PdfRectangle {return _bf .PdfRectangle {Llx :_c .Min (_efebf .Llx ,_bgag .Llx ),Lly :_c .Min (_efebf .Lly ,_bgag .Lly ),Urx :_c .Max (_efebf .Urx ,_bgag .Urx ),Ury :_c .Max (_efebf .Ury ,_bgag .Ury )};};func (_acgde *textWord )addDiacritic (_aabg string ){_facg :=_acgde ._eacag [len (_acgde ._eacag )-1];_facg ._bgfa +=_aabg ;_facg ._bgfa =_dg .NFKC .String (_facg ._bgfa );};func (_cead paraList )llyRange (_ccfed []int ,_effc ,_gcaf float64 )[]int {_ggaa :=len (_cead );if _gcaf < _cead [_ccfed [0]].Lly ||_effc > _cead [_ccfed [_ggaa -1]].Lly {return nil ;};_ade :=_ac .Search (_ggaa ,func (_dffe int )bool {return _cead [_ccfed [_dffe ]].Lly >=_effc });_ggda :=_ac .Search (_ggaa ,func (_dgcb int )bool {return _cead [_ccfed [_dgcb ]].Lly > _gcaf });return _ccfed [_ade :_ggda ];};func (_aeed *wordBag )text ()string {_bcge :=_aeed .allWords ();_gaadb :=make ([]string ,len (_bcge ));for _bfcg ,_dag :=range _bcge {_gaadb [_bfcg ]=_dag ._gbdc ;};return _e .Join (_gaadb ,"\u0020");};func (_bdegb paraList )yNeighbours (_edbf float64 )map[*textPara ][]int {_ddagf :=make ([]event ,2*len (_bdegb ));if _edbf ==0{for _defa ,_abbdf :=range _bdegb {_ddagf [2*_defa ]=event {_abbdf .Lly ,true ,_defa };_ddagf [2*_defa +1]=event {_abbdf .Ury ,false ,_defa };};}else {for _ageab ,_dbec :=range _bdegb {_ddagf [2*_ageab ]=event {_dbec .Lly -_edbf *_dbec .fontsize (),true ,_ageab };_ddagf [2*_ageab +1]=event {_dbec .Ury +_edbf *_dbec .fontsize (),false ,_ageab };};};return _bdegb .eventNeighbours (_ddagf );};func _ffgg (_fefa ,_feea _bf .PdfRectangle )bool {return _eceg (_fefa ,_feea )&&_cdg (_fefa ,_feea )};func (_bgaa *wordBag )removeWord (_cbf *textWord ,_acae int ){_cag :=_bgaa ._gfc [_acae ];_cag =_fegbe (_cag ,_cbf );if len (_cag )==0{delete (_bgaa ._gfc ,_acae );}else {_bgaa ._gfc [_acae ]=_cag ;};};func (_acbbaa *textTable )logComposite (_cadbg string ){if !_ccae {return ;};_cd .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_acbbaa ._agcd ,_acbbaa ._bdfg ,_cadbg );_cf .Printf ("\u0025\u0035\u0073 \u007c","");for _ffdee :=0;_ffdee < _acbbaa ._agcd ;_ffdee ++{_cf .Printf ("\u0025\u0033\u0064 \u007c",_ffdee );};_cf .Println ("");_cf .Printf ("\u0025\u0035\u0073 \u002b","");for _bggdb :=0;_bggdb < _acbbaa ._agcd ;_bggdb ++{_cf .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d");};_cf .Println ("");for _ecbb :=0;_ecbb < _acbbaa ._bdfg ;_ecbb ++{_cf .Printf ("\u0025\u0035\u0064 \u007c",_ecbb );for _dgagf :=0;_dgagf < _acbbaa ._agcd ;_dgagf ++{_dafba ,_ :=_acbbaa ._bfcbf [_bfddd (_dgagf ,_ecbb )].parasBBox ();_cf .Printf ("\u0025\u0033\u0064 \u007c",len (_dafba ));};_cf .Println ("");};_cd .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_acbbaa ._agcd ,_acbbaa ._bdfg ,_cadbg );_cf .Printf ("\u0025\u0035\u0073 \u007c","");for _eaec :=0;_eaec < _acbbaa ._agcd ;_eaec ++{_cf .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_eaec );};_cf .Println ("");_cf .Printf ("\u0025\u0035\u0073 \u002b","");for _dedg :=0;_dedg < _acbbaa ._agcd ;_dedg ++{_cf .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");};_cf .Println ("");for _ffegb :=0;_ffegb < _acbbaa ._bdfg ;_ffegb ++{_cf .Printf ("\u0025\u0035\u0064 \u007c",_ffegb );for _gaee :=0;_gaee < _acbbaa ._agcd ;_gaee ++{_agece ,_ :=_acbbaa ._bfcbf [_bfddd (_gaee ,_ffegb )].parasBBox ();_cegc :="";_ddad :=_agece .merge ();if _ddad !=nil {_cegc =_ddad .text ();};_cegc =_cf .Sprintf ("\u0025\u0071",_ecedb (_cegc ,12));_cegc =_cegc [1:len (_cegc )-1];_cf .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_cegc );};_cf .Println ("");};};func (_aggg paraList )xNeighbours (_bggcg float64 )map[*textPara ][]int {_cgda :=make ([]event ,2*len (_aggg ));if _bggcg ==0{for _dffea ,_dcfcb :=range _aggg {_cgda [2*_dffea ]=event {_dcfcb .Llx ,true ,_dffea };_cgda [2*_dffea +1]=event {_dcfcb .Urx ,false ,_dffea };};}else {for _gfacf ,_eacb :=range _aggg {_cgda [2*_gfacf ]=event {_eacb .Llx -_bggcg *_eacb .fontsize (),true ,_gfacf };_cgda [2*_gfacf +1]=event {_eacb .Urx +_bggcg *_eacb .fontsize (),false ,_gfacf };};};return _aggg .eventNeighbours (_cgda );};type event struct{_bebea float64 ;_bafa bool ;_dedba int ;};func (_fcca compositeCell )parasBBox ()(paraList ,_bf .PdfRectangle ){return _fcca .paraList ,_fcca .PdfRectangle ;};func (_dege *textObject )showTextAdjusted (_bdca *_ce .PdfObjectArray )error {_fce :=false ;for _ ,_fecd :=range _bdca .Elements (){switch _fecd .(type ){case *_ce .PdfObjectFloat ,*_ce .PdfObjectInteger :_dde ,_gcfg :=_ce .GetNumberAsFloat (_fecd );if _gcfg !=nil {_cd .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_fecd ,_bdca );return _gcfg ;};_gga ,_faa :=-_dde *0.001*_dege ._gffg ._bdd ,0.0;if _fce {_faa ,_gga =_gga ,_faa ;};_cea :=_bggc (_cec .Point {X :_gga ,Y :_faa });_dege ._ceaf .Concat (_cea );case *_ce .PdfObjectString :_adc ,_egb :=_ce .GetStringBytes (_fecd );if !_egb {_cd .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_fecd ,_bdca );return _ce .ErrTypeError ;};_dege .renderText (_adc );default:_cd .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_fecd ,_bdca );return _ce .ErrTypeError ;};};return nil ;};func _bebe (_aea *wordBag ,_cfbfb *textWord ,_cac float64 )bool {return _aea .Urx <=_cfbfb .Llx &&_cfbfb .Llx < _aea .Urx +_cac ;};func (_gdac *shapesState )moveTo (_gbg ,_fca float64 ){_gdac ._decf =true ;_gdac ._fafa =_gdac .devicePoint (_gbg ,_fca );if _fgdd {_cd .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_gbg ,_fca ,_gdac ._fafa );};};func (_ebbc rulingList )mergePrimary ()float64 {_geaae :=_ebbc [0]._adcb ;for _ ,_ggceb :=range _ebbc [1:]{_geaae +=_ggceb ._adcb ;};return _geaae /float64 (len (_ebbc ));};func (_gdf *textObject )reset (){_gdf ._ceaf =_cec .IdentityMatrix ();_gdf ._ddef =_cec .IdentityMatrix ();_gdf ._eabd =nil ;};func (_dafa rulingList )isActualGrid ()(rulingList ,bool ){_ffede ,_bebc :=_dafa .augmentGrid ();if !(len (_ffede )>=_def +1&&len (_bebc )>=_deec +1){if _fgfe {_cd .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_ffede ),len (_bebc ),_def +1,_deec +1);};return nil ,false ;};if _fgfe {_cd .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_dafa ,len (_ffede )>=2,len (_bebc )>=2,len (_ffede )>=2&&len (_bebc )>=2);for _cfag ,_adad :=range _dafa {_cf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_cfag ,_adad );};};if _ffed {_fddc ,_ffga :=_ffede [0],_ffede [len (_ffede )-1];_gedb ,_cbgfc :=_bebc [0],_bebc [len (_bebc )-1];if !(_ddec (_fddc ._adcb -_gedb ._cdcc )&&_ddec (_ffga ._adcb -_gedb ._bdag )&&_ddec (_gedb ._adcb -_fddc ._bdag )&&_ddec (_cbgfc ._adcb -_fddc ._cdcc )){if _fgfe {_cd .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_fddc ,_ffga ,_gedb ,_cbgfc );};return nil ,false ;};}else {if !_ffede .aligned (){if _aggc {_cd .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_ffede ));};return nil ,false ;};if !_bebc .aligned (){if _fgfe {_cd .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_bebc ));};return nil ,false ;};};_ebgbb :=append (_ffede ,_bebc ...);return _ebgbb ,true ;};func (_bddb rulingList )vertsHorzs ()(rulingList ,rulingList ){var _fbcg ,_egcg rulingList ;for _ ,_fffa :=range _bddb {switch _fffa ._gcae {case _ffag :_fbcg =append (_fbcg ,_fffa );case _aadg :_egcg =append (_egcg ,_fffa );};};return _fbcg ,_egcg ;};func (_cbgb rulingList )aligned ()bool {if len (_cbgb )< 2{return false ;};_fagg :=make (map[*ruling ]int );_fagg [_cbgb [0]]=0;for _ ,_bcgec :=range _cbgb [1:]{_begea :=false ;for _gdfg :=range _fagg {if _bcgec .gridIntersecting (_gdfg ){_fagg [_gdfg ]++;_begea =true ;break ;};};if !_begea {_fagg [_bcgec ]=0;};};_ceafbb :=0;for _ ,_dfbb :=range _fagg {if _dfbb ==0{_ceafbb ++;};};_ebec :=float64 (_ceafbb )/float64 (len (_cbgb ));_ffgbc :=_ebec <=1.0-_gfdg ;if _fgfe {_cd .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_ffgbc ,_ebec ,_ceafbb ,len (_cbgb ),_cbgb .String ());};return _ffgbc ;};func _gaag (_eced []*wordBag )[]*wordBag {if len (_eced )<=1{return _eced ;};if _fbcd {_cd .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");};_ac .Slice (_eced ,func (_adcg ,_gfad int )bool {_ffec ,_gbdf :=_eced [_adcg ],_eced [_gfad ];_cfegd :=_ffec .Width ()*_ffec .Height ();_cebc :=_gbdf .Width ()*_gbdf .Height ();if _cfegd !=_cebc {return _cfegd > _cebc ;};if _ffec .Height ()!=_gbdf .Height (){return _ffec .Height ()> _gbdf .Height ();};return _adcg < _gfad ;});var _deeb []*wordBag ;_cge :=make (intSet );for _gbfd :=0;_gbfd < len (_eced );_gbfd ++{if _cge .has (_gbfd ){continue ;};_ccbc :=_eced [_gbfd ];for _eebc :=_gbfd +1;_eebc < len (_eced );_eebc ++{if _cge .has (_gbfd ){continue ;};_deaa :=_eced [_eebc ];_dgdd :=_ccbc .PdfRectangle ;_dgdd .Llx -=_ccbc ._dadc ;if _ceeg (_dgdd ,_deaa .PdfRectangle ){_ccbc .absorb (_deaa );_cge .add (_eebc );};};_deeb =append (_deeb ,_ccbc );};if len (_eced )!=len (_deeb )+len (_cge ){_cd .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_eced ),len (_deeb ),len (_cge ));};return _deeb ;};func (_dfca *shapesState )cubicTo (_fdegc ,_ggfce ,_bea ,_fdaa ,_eddf ,_ffgd float64 ){if _fgdd {_cd .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a");};_dfca .addPoint (_eddf ,_ffgd );};func _fgead (_ebeca []*textWord ,_ebba int )[]*textWord {_abfc :=len (_ebeca );copy (_ebeca [_ebba :],_ebeca [_ebba +1:]);return _ebeca [:_abfc -1];};
|
||
|
||
// String returns a string describing `ma`.
|
||
func (_eadc TextMarkArray )String ()string {_fbebg :=len (_eadc ._gef );if _fbebg ==0{return "\u0045\u004d\u0050T\u0059";};_agcc :=_eadc ._gef [0];_febc :=_eadc ._gef [_fbebg -1];return _cf .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_fbebg ,_agcc ,_febc );};func (_bdef *textTable )put (_fdfb ,_edbcf int ,_adgf *textPara ){_bdef ._afbef [_bfddd (_fdfb ,_edbcf )]=_adgf ;};func (_aaa *textObject )getStrokeColor ()_g .Color {return _cgbc (_aaa ._bcfd .ColorspaceStroking ,_aaa ._bcfd .ColorStroking );};func (_dfdc *textWord )computeText ()string {_dfeg :=make ([]string ,len (_dfdc ._eacag ));for _acfcg ,_acbee :=range _dfdc ._eacag {_dfeg [_acfcg ]=_acbee ._bgfa ;};return _e .Join (_dfeg ,"");};func (_ecedg paraList )extractTables (_bcgga []gridTiling )paraList {if _ccae {_cd .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_ecedg ));};if len (_ecedg )< _fdcff {return _ecedg ;};_fdae :=_ecedg .findTables (_bcgga );if _ccae {_cd .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_fdae ));for _ccdf ,_gegg :=range _fdae {_gegg .log (_cf .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_ccdf ));};};return _ecedg .applyTables (_fdae );};type intSet map[int ]struct{};func (_adaf rulingList )log (_dgfe string ){if !_fgfe {return ;};_cd .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_dgfe ,_adaf .String ());for _fgdeb ,_bcadb :=range _adaf {_cf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fgdeb ,_bcadb .String ());};};
|
||
|
||
// String returns a human readable description of `vecs`.
|
||
func (_acbbf rulingList )String ()string {if len (_acbbf )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_gfce ,_agdcc :=_acbbf .vertsHorzs ();_aafg :=len (_gfce );_fdeb :=len (_agdcc );if _aafg ==0||_fdeb ==0{return _cf .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_aafg ,_fdeb );};_agcce :=_bf .PdfRectangle {Llx :_gfce [0]._adcb ,Urx :_gfce [_aafg -1]._adcb ,Lly :_agdcc [_fdeb -1]._adcb ,Ury :_agdcc [0]._adcb };return _cf .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_aafg ,_fdeb ,_agcce );};
|
||
|
||
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
|
||
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
|
||
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
|
||
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
|
||
type RenderMode int ;func (_ebege compositeCell )hasLines (_ebaea []*textLine )bool {for _affd ,_acdb :=range _ebaea {_daab :=_ffgg (_ebege .PdfRectangle ,_acdb .PdfRectangle );if _ccae {_cf .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a",_daab ,_affd ,len (_ebaea ));_cf .Printf ("\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a",_ebege );_cf .Printf ("\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a",_acdb );};if _daab {return true ;};};return false ;};
|
||
|
||
// String returns a description of `k`.
|
||
func (_cbde rulingKind )String ()string {_bacd ,_fdaae :=_dfda [_cbde ];if !_fdaae {return _cf .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_cbde );};return _bacd ;};func (_fbbd compositeCell )split (_bgeb ,_gecg []float64 )*textTable {_daae :=len (_bgeb )+1;_eedd :=len (_gecg )+1;if _ccae {_cd .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066",_eedd ,_daae ,_fbbd ,_bgeb ,_gecg );_cf .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a",len (_fbbd .paraList ));for _cfcgf ,_ddede :=range _fbbd .paraList {_cf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cfcgf ,_ddede .String ());};_cf .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",len (_fbbd .lines ()));for _fagf ,_ebcg :=range _fbbd .lines (){_cf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fagf ,_ebcg );};};_bgeb =_bdcc (_bgeb ,_fbbd .Ury ,_fbbd .Lly );_gecg =_bdcc (_gecg ,_fbbd .Llx ,_fbbd .Urx );_gcea :=make (map[uint64 ]*textPara ,_eedd *_daae );_aacf :=textTable {_agcd :_eedd ,_bdfg :_daae ,_afbef :_gcea };_edcd :=_fbbd .paraList ;_ac .Slice (_edcd ,func (_abdb ,_cgab int )bool {_bfgga ,_dcaf :=_edcd [_abdb ],_edcd [_cgab ];_cagf ,_cfeee :=_bfgga .Lly ,_dcaf .Lly ;if _cagf !=_cfeee {return _cagf < _cfeee ;};return _bfgga .Llx < _dcaf .Llx ;});_dacc :=make (map[uint64 ]_bf .PdfRectangle ,_eedd *_daae );for _gdbg ,_bgad :=range _bgeb [1:]{_fggc :=_bgeb [_gdbg ];for _dbdg ,_fgcg :=range _gecg [1:]{_eegc :=_gecg [_dbdg ];_dacc [_bfddd (_dbdg ,_gdbg )]=_bf .PdfRectangle {Llx :_eegc ,Urx :_fgcg ,Lly :_bgad ,Ury :_fggc };};};if _ccae {_cd .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073");_cf .Printf ("\u0020\u0020\u0020\u0020");for _ffde :=0;_ffde < _eedd ;_ffde ++{_cf .Printf ("\u0025\u0033\u0030\u0064\u002c\u0020",_ffde );};_cf .Println ();for _begb :=0;_begb < _daae ;_begb ++{_cf .Printf ("\u0020\u0020\u0025\u0032\u0064\u003a",_begb );for _bdee :=0;_bdee < _eedd ;_bdee ++{_cf .Printf ("\u00256\u002e\u0032\u0066\u002c\u0020",_dacc [_bfddd (_bdee ,_begb )]);};_cf .Println ();};};_acfd :=func (_feag *textLine )(int ,int ){for _dgagb :=0;_dgagb < _daae ;_dgagb ++{for _ecebe :=0;_ecebe < _eedd ;_ecebe ++{if _ceeg (_dacc [_bfddd (_ecebe ,_dgagb )],_feag .PdfRectangle ){return _ecebe ,_dgagb ;};};};return -1,-1;};_bfac :=make (map[uint64 ][]*textLine ,_eedd *_daae );for _ ,_gbfa :=range _edcd .lines (){_gfacd ,_afeb :=_acfd (_gbfa );if _gfacd < 0{continue ;};_bfac [_bfddd (_gfacd ,_afeb )]=append (_bfac [_bfddd (_gfacd ,_afeb )],_gbfa );};for _ddcd :=0;_ddcd < len (_bgeb )-1;_ddcd ++{_agaga :=_bgeb [_ddcd ];_fdce :=_bgeb [_ddcd +1];for _ceab :=0;_ceab < len (_gecg )-1;_ceab ++{_eagad :=_gecg [_ceab ];_dbda :=_gecg [_ceab +1];_aggec :=_bf .PdfRectangle {Llx :_eagad ,Urx :_dbda ,Lly :_fdce ,Ury :_agaga };_bbg :=_bfac [_bfddd (_ceab ,_ddcd )];if len (_bbg )==0{continue ;};_cdea :=_cbadc (_aggec ,_bbg );_aacf .put (_ceab ,_ddcd ,_cdea );};};return &_aacf ;};const _efa =20;func (_acbad *subpath )removeDuplicates (){if len (_acbad ._gaaca )==0{return ;};_gagc :=[]_cec .Point {_acbad ._gaaca [0]};for _ ,_cfbf :=range _acbad ._gaaca [1:]{if !_feee (_cfbf ,_gagc [len (_gagc )-1]){_gagc =append (_gagc ,_cfbf );};};_acbad ._gaaca =_gagc ;};type fontEntry struct{_bfgg *_bf .PdfFont ;_dbbg int64 ;};type rulingList []*ruling ;func (_ffdf *textObject )moveText (_fabb ,_bbda float64 ){_ffdf .moveLP (_fabb ,_bbda )};func _bfddd (_abgfe ,_egdb int )uint64 {return uint64 (_abgfe )*0x1000000+uint64 (_egdb )};func (_gbfgg *textTable )emptyColumn (_bcfb int )bool {for _aeabd :=0;_aeabd < _gbfgg ._bdfg ;_aeabd ++{_decfc :=_gbfgg .get (_bcfb ,_aeabd );if _decfc !=nil &&_decfc .text ()!=""{return false ;};};return true ;};func (_ecda rulingList )intersections ()map[int ]intSet {var _ebge ,_cbdd []int ;for _dfab ,_eaca :=range _ecda {switch _eaca ._gcae {case _ffag :_ebge =append (_ebge ,_dfab );case _aadg :_cbdd =append (_cbdd ,_dfab );};};if len (_ebge )< _def +1||len (_cbdd )< _deec +1{return nil ;};if len (_ebge )+len (_cbdd )> _bgabf {_cd .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_ecda ),len (_ebge ),len (_cbdd ));return nil ;};_bbad :=make (map[int ]intSet ,len (_ebge )+len (_cbdd ));for _ ,_befd :=range _ebge {for _ ,_fbcdf :=range _cbdd {if _ecda [_befd ].intersects (_ecda [_fbcdf ]){if _ ,_daef :=_bbad [_befd ];!_daef {_bbad [_befd ]=make (intSet );};if _ ,_cefcb :=_bbad [_fbcdf ];!_cefcb {_bbad [_fbcdf ]=make (intSet );};_bbad [_befd ].add (_fbcdf );_bbad [_fbcdf ].add (_befd );};};};return _bbad ;};func (_aggd *textObject )showText (_eac []byte )error {return _aggd .renderText (_eac )};func _gcabb (_bddab ,_ggdbb float64 )bool {return _bddab /_c .Max (_egeb ,_ggdbb )< _ffeg };
|
||
|
||
// Text returns the extracted page text.
|
||
func (_ccbb PageText )Text ()string {return _ccbb ._dgfb };func (_adba *textLine )toTextMarks (_acfb *int )[]TextMark {var _egbb []TextMark ;for _ ,_dfgc :=range _adba ._cddd {if _dfgc ._eeed {_egbb =_ceed (_egbb ,_acfb ,"\u0020");};_bdda :=_dfgc .toTextMarks (_acfb );_egbb =append (_egbb ,_bdda ...);};return _egbb ;};func _dcbd (_dagd []TextMark ,_cdag *int ,_fedg TextMark )[]TextMark {_fedg .Offset =*_cdag ;_dagd =append (_dagd ,_fedg );*_cdag +=len (_fedg .Text );return _dagd ;};func _egea (_dgbb ,_efdab _cec .Point ,_gfbae _g .Color )(*ruling ,bool ){_dfacc :=lineRuling {_ebgf :_dgbb ,_cgbg :_efdab ,_cfa :_afbbg (_dgbb ,_efdab ),Color :_gfbae };if _dfacc ._cfa ==_fedc {return nil ,false ;};return _dfacc .asRuling ();};func (_aeba *textWord )absorb (_bagda *textWord ){_aeba .PdfRectangle =_gffe (_aeba .PdfRectangle ,_bagda .PdfRectangle );_aeba ._eacag =append (_aeba ._eacag ,_bagda ._eacag ...);};func _gcgd (_cgg ,_cee bounded )float64 {return _cgg .bbox ().Llx -_cee .bbox ().Urx };
|
||
|
||
// TextMarkArray is a collection of TextMarks.
|
||
type TextMarkArray struct{_gef []TextMark };func _bdcc (_abgf []float64 ,_ffcd ,_gbbaf float64 )[]float64 {_bcgee ,_gcfa :=_ffcd ,_gbbaf ;if _gcfa < _bcgee {_bcgee ,_gcfa =_gcfa ,_bcgee ;};_fcgga :=make ([]float64 ,0,len (_abgf )+2);_fcgga =append (_fcgga ,_ffcd );for _ ,_gdec :=range _abgf {if _gdec <=_bcgee {continue ;}else if _gdec >=_gcfa {break ;};_fcgga =append (_fcgga ,_gdec );};_fcgga =append (_fcgga ,_gbbaf );return _fcgga ;};func (_bddbc *textTable )getComposite (_efdae ,_dfag int )(paraList ,_bf .PdfRectangle ){_ffcf ,_aaba :=_bddbc ._bfcbf [_bfddd (_efdae ,_dfag )];if _ccae {_cf .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a",_efdae ,_dfag ,_ffcf .String ());};if !_aaba {return nil ,_bf .PdfRectangle {};};return _ffcf .parasBBox ();};func (_bfbbd rulingList )splitSec ()[]rulingList {_ac .Slice (_bfbbd ,func (_ebfa ,_ebgg int )bool {_agda ,_ebbgd :=_bfbbd [_ebfa ],_bfbbd [_ebgg ];if _agda ._cdcc !=_ebbgd ._cdcc {return _agda ._cdcc < _ebbgd ._cdcc ;};return _agda ._bdag < _ebbgd ._bdag ;});_fega :=make (map[*ruling ]struct{},len (_bfbbd ));_gaage :=func (_dedeg *ruling )rulingList {_caabb :=rulingList {_dedeg };_fega [_dedeg ]=struct{}{};for _ ,_bgaab :=range _bfbbd {if _ ,_cgbe :=_fega [_bgaab ];_cgbe {continue ;};for _ ,_fadc :=range _caabb {if _bgaab .alignsSec (_fadc ){_caabb =append (_caabb ,_bgaab );_fega [_bgaab ]=struct{}{};break ;};};};return _caabb ;};_fdbc :=[]rulingList {_gaage (_bfbbd [0])};for _ ,_gadb :=range _bfbbd [1:]{if _ ,_dffde :=_fega [_gadb ];_dffde {continue ;};_fdbc =append (_fdbc ,_gaage (_gadb ));};return _fdbc ;};func (_edcb *textLine )pullWord (_fbfff *wordBag ,_abga *textWord ,_ecdg int ){_edcb .appendWord (_abga );_fbfff .removeWord (_abga ,_ecdg );};func (_egdf *shapesState )clearPath (){_egdf ._dda =nil ;_egdf ._decf =false ;if _fgdd {_cd .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_egdf );};};func _bafb (_gdbc ,_fbffc int )int {if _gdbc > _fbffc {return _gdbc ;};return _fbffc ;};func (_cbca paraList )findGridTables (_geba []gridTiling )[]*textTable {if _ccae {_cd .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_cbca ));for _dadcag ,_gfcb :=range _cbca {_cf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dadcag ,_gfcb );};};var _deaaf []*textTable ;for _eedg ,_fcgg :=range _geba {_adedg ,_cgbbf :=_cbca .findTableGrid (_fcgg );if _adedg !=nil {_adedg .log (_cf .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_eedg ));_deaaf =append (_deaaf ,_adedg );_adedg .markCells ();};for _gacd :=range _cgbbf {_gacd ._gecbf =true ;};};if _ccae {_cd .Log .Info ("\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s",len (_deaaf ));};return _deaaf ;};func (_ef *imageExtractContext )extractXObjectImage (_bbf *_ce .PdfObjectName ,_gaf _gc .GraphicsState ,_dca *_bf .PdfPageResources )error {_cde ,_ :=_dca .GetXObjectByName (*_bbf );if _cde ==nil {return nil ;};_eb ,_gfac :=_ef ._bbb [_cde ];if !_gfac {_ff ,_efb :=_dca .GetXObjectImageByName (*_bbf );if _efb !=nil {return _efb ;};if _ff ==nil {return nil ;};_bg ,_efb :=_ff .ToImage ();if _efb !=nil {return _efb ;};_eb =&cachedImage {_ge :_bg ,_bbd :_ff .ColorSpace };_ef ._bbb [_cde ]=_eb ;};_gbe :=_eb ._ge ;_ea :=_eb ._bbd ;_gfacg ,_gbc :=_ea .ImageToRGB (*_gbe );if _gbc !=nil {return _gbc ;};_cd .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_gaf .CTM .String ());_eaa :=ImageMark {Image :&_gfacg ,Width :_gaf .CTM .ScalingFactorX (),Height :_gaf .CTM .ScalingFactorY (),Angle :_gaf .CTM .Angle ()};_eaa .X ,_eaa .Y =_gaf .CTM .Translation ();_ef ._ae =append (_ef ._ae ,_eaa );_ef ._dcd ++;return nil ;};func (_fcfg *textTable )log (_eeff string ){if !_ccae {return ;};_cd .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_eeff ,_fcfg ._agcd ,_fcfg ._bdfg ,_fcfg ._abad ,_fcfg .PdfRectangle );for _ddgae :=0;_ddgae < _fcfg ._bdfg ;_ddgae ++{for _bccb :=0;_bccb < _fcfg ._agcd ;_bccb ++{_eabdef :=_fcfg .get (_bccb ,_ddgae );if _eabdef ==nil {continue ;};_cf .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_bccb ,_ddgae ,_eabdef .PdfRectangle ,_ecedb (_eabdef .text (),50),_a .RuneCountInString (_eabdef .text ()));};};};func _geb (_fgag float64 )int {var _gfe int ;if _fgag >=0{_gfe =int (_fgag /_eff );}else {_gfe =int (_fgag /_eff )-1;};return _gfe ;};func (_bbcf *stateStack )push (_dcae *textState ){_ffg :=*_dcae ;*_bbcf =append (*_bbcf ,&_ffg )};func (_cecd lineRuling )asRuling ()(*ruling ,bool ){_badf :=ruling {_gcae :_cecd ._cfa ,Color :_cecd .Color ,_cfga :_gfaa };switch _cecd ._cfa {case _ffag :_badf ._adcb =_cecd .xMean ();_badf ._cdcc =_c .Min (_cecd ._ebgf .Y ,_cecd ._cgbg .Y );_badf ._bdag =_c .Max (_cecd ._ebgf .Y ,_cecd ._cgbg .Y );case _aadg :_badf ._adcb =_cecd .yMean ();_badf ._cdcc =_c .Min (_cecd ._ebgf .X ,_cecd ._cgbg .X );_badf ._bdag =_c .Max (_cecd ._ebgf .X ,_cecd ._cgbg .X );default:_cd .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_cecd ._cfa );return nil ,false ;};return &_badf ,true ;};
|
||
|
||
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
|
||
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
|
||
func (_dec *Extractor )ExtractTextWithStats ()(_ab string ,_cb int ,_dga int ,_bge error ){_eef ,_cb ,_dga ,_bge :=_dec .ExtractPageText ();if _bge !=nil {return "",_cb ,_dga ,_bge ;};return _eef .Text (),_cb ,_dga ,nil ;};func (_adag paraList )addNeighbours (){_eada :=func (_bbffd []int ,_gcga *textPara )([]*textPara ,[]*textPara ){_edbd :=make ([]*textPara ,0,len (_bbffd )-1);_egba :=make ([]*textPara ,0,len (_bbffd )-1);for _ ,_fegb :=range _bbffd {_feeg :=_adag [_fegb ];if _feeg .Urx <=_gcga .Llx {_edbd =append (_edbd ,_feeg );}else if _feeg .Llx >=_gcga .Urx {_egba =append (_egba ,_feeg );};};return _edbd ,_egba ;};_bggeed :=func (_eegg []int ,_debc *textPara )([]*textPara ,[]*textPara ){_facba :=make ([]*textPara ,0,len (_eegg )-1);_eaad :=make ([]*textPara ,0,len (_eegg )-1);for _ ,_cffcg :=range _eegg {_cabbf :=_adag [_cffcg ];if _cabbf .Ury <=_debc .Lly {_eaad =append (_eaad ,_cabbf );}else if _cabbf .Lly >=_debc .Ury {_facba =append (_facba ,_cabbf );};};return _facba ,_eaad ;};_afgf :=_adag .yNeighbours (_aaae );for _ ,_egcgd :=range _adag {_egebe :=_afgf [_egcgd ];if len (_egebe )==0{continue ;};_gdbb ,_ggea :=_eada (_egebe ,_egcgd );if len (_gdbb )==0&&len (_ggea )==0{continue ;};if len (_gdbb )> 0{_fbgg :=_gdbb [0];for _ ,_bcee :=range _gdbb [1:]{if _bcee .Urx >=_fbgg .Urx {_fbgg =_bcee ;};};for _ ,_bgabab :=range _gdbb {if _bgabab !=_fbgg &&_bgabab .Urx > _fbgg .Llx {_fbgg =nil ;break ;};};if _fbgg !=nil &&_cdg (_egcgd .PdfRectangle ,_fbgg .PdfRectangle ){_egcgd ._efcbf =_fbgg ;};};if len (_ggea )> 0{_egff :=_ggea [0];for _ ,_egfb :=range _ggea [1:]{if _egfb .Llx <=_egff .Llx {_egff =_egfb ;};};for _ ,_beea :=range _ggea {if _beea !=_egff &&_beea .Llx < _egff .Urx {_egff =nil ;break ;};};if _egff !=nil &&_cdg (_egcgd .PdfRectangle ,_egff .PdfRectangle ){_egcgd ._agabc =_egff ;};};};_afgf =_adag .xNeighbours (_dfdd );for _ ,_aecg :=range _adag {_gafee :=_afgf [_aecg ];if len (_gafee )==0{continue ;};_ffea ,_bbae :=_bggeed (_gafee ,_aecg );if len (_ffea )==0&&len (_bbae )==0{continue ;};if len (_bbae )> 0{_aaee :=_bbae [0];for _ ,_ecdgd :=range _bbae [1:]{if _ecdgd .Ury >=_aaee .Ury {_aaee =_ecdgd ;};};for _ ,_aacdc :=range _bbae {if _aacdc !=_aaee &&_aacdc .Ury > _aaee .Lly {_aaee =nil ;break ;};};if _aaee !=nil &&_eceg (_aecg .PdfRectangle ,_aaee .PdfRectangle ){_aecg ._cedaf =_aaee ;};};if len (_ffea )> 0{_deege :=_ffea [0];for _ ,_dbge :=range _ffea [1:]{if _dbge .Lly <=_deege .Lly {_deege =_dbge ;};};for _ ,_aeccg :=range _ffea {if _aeccg !=_deege &&_aeccg .Lly < _deege .Ury {_deege =nil ;break ;};};if _deege !=nil &&_eceg (_aecg .PdfRectangle ,_deege .PdfRectangle ){_aecg ._bbfd =_deege ;};};};for _ ,_ededf :=range _adag {if _ededf ._efcbf !=nil &&_ededf ._efcbf ._agabc !=_ededf {_ededf ._efcbf =nil ;};if _ededf ._bbfd !=nil &&_ededf ._bbfd ._cedaf !=_ededf {_ededf ._bbfd =nil ;};if _ededf ._agabc !=nil &&_ededf ._agabc ._efcbf !=_ededf {_ededf ._agabc =nil ;};if _ededf ._cedaf !=nil &&_ededf ._cedaf ._bbfd !=_ededf {_ededf ._cedaf =nil ;};};};
|
||
|
||
// ExtractPageImages returns the image contents of the page extractor, including data
|
||
// and position, size information for each image.
|
||
// A set of options to control page image extraction can be passed in. The options
|
||
// parameter can be nil for the default options. By default, inline stencil masks
|
||
// are not extracted.
|
||
func (_fag *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_afe :=&imageExtractContext {_dgf :options };_ga :=_afe .extractContentStreamImages (_fag ._dd ,_fag ._bac );if _ga !=nil {return nil ,_ga ;};return &PageImages {Images :_afe ._ae },nil ;};func (_bfee *textPara )writeText (_cdad _d .Writer ){if _bfee ._bfag ==nil {_bfee .writeCellText (_cdad );return ;};for _aaag :=0;_aaag < _bfee ._bfag ._bdfg ;_aaag ++{for _cgec :=0;_cgec < _bfee ._bfag ._agcd ;_cgec ++{_cbag :=_bfee ._bfag .get (_cgec ,_aaag );if _cbag ==nil {_cdad .Write ([]byte ("\u0009"));}else {_cbag .writeCellText (_cdad );};_cdad .Write ([]byte ("\u0020"));};if _aaag < _bfee ._bfag ._bdfg -1{_cdad .Write ([]byte ("\u000a"));};};};func (_cafee *wordBag )firstWord (_abag int )*textWord {return _cafee ._gfc [_abag ][0]};func (_becb *textObject )checkOp (_accf *_gc .ContentStreamOperation ,_edf int ,_edg bool )(_bgd bool ,_dcc error ){if _becb ==nil {var _gffa []_ce .PdfObject ;if _edf > 0{_gffa =_accf .Params ;if len (_gffa )> _edf {_gffa =_gffa [:_edf ];};};_cd .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_accf .Operand ,_gffa );};if _edf >=0{if len (_accf .Params )!=_edf {if _edg {_dcc =_ba .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");};_cd .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_accf .Operand ,_edf ,len (_accf .Params ),_accf .Params );return false ,_dcc ;};};return true ,nil ;};func _fcdcd (_gccgg map[float64 ]map[float64 ]gridTile )[]float64 {_efcd :=make ([]float64 ,0,len (_gccgg ));_bdcf :=make (map[float64 ]struct{},len (_gccgg ));for _ ,_bbfcg :=range _gccgg {for _ggcea :=range _bbfcg {if _ ,_bdcg :=_bdcf [_ggcea ];_bdcg {continue ;};_efcd =append (_efcd ,_ggcea );_bdcf [_ggcea ]=struct{}{};};};_ac .Float64s (_efcd );return _efcd ;};func (_eegb paraList )toTextMarks ()[]TextMark {_geee :=0;var _ffgga []TextMark ;for _bgabc ,_edcg :=range _eegb {if _edcg ._gccg {continue ;};_aefb :=_edcg .toTextMarks (&_geee );_ffgga =append (_ffgga ,_aefb ...);if _bgabc !=len (_eegb )-1{if _agcg (_edcg ,_eegb [_bgabc +1]){_ffgga =_ceed (_ffgga ,&_geee ,"\u0020");}else {_ffgga =_ceed (_ffgga ,&_geee ,"\u000a");_ffgga =_ceed (_ffgga ,&_geee ,"\u000a");};};};_ffgga =_ceed (_ffgga ,&_geee ,"\u000a");_ffgga =_ceed (_ffgga ,&_geee ,"\u000a");return _ffgga ;};func _dagf (_ebag *wordBag ,_gabbb int )*textLine {_egf :=_ebag .firstWord (_gabbb );_bfdc :=textLine {PdfRectangle :_egf .PdfRectangle ,_gabbd :_egf ._bcdga ,_abbea :_egf ._cddc };_bfdc .pullWord (_ebag ,_egf ,_gabbb );return &_bfdc ;};func (_geab rulingList )sort (){_ac .Slice (_geab ,_geab .comp )};var _dfda =map[rulingKind ]string {_fedc :"\u006e\u006f\u006e\u0065",_aadg :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_ffag :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};type textPara struct{_bf .PdfRectangle ;_bcda _bf .PdfRectangle ;_ffgc []*textLine ;_bfag *textTable ;_gecbf bool ;_gccg bool ;_efcbf *textPara ;_agabc *textPara ;_bbfd *textPara ;_cedaf *textPara ;};func (_fbg *wordBag )applyRemovals (_deca map[int ]map[*textWord ]struct{}){for _dgag ,_gfdd :=range _deca {if len (_gfdd )==0{continue ;};_dge :=_fbg ._gfc [_dgag ];_fcfc :=len (_dge )-len (_gfdd );if _fcfc ==0{delete (_fbg ._gfc ,_dgag );continue ;};_gbec :=make ([]*textWord ,_fcfc );_dff :=0;for _ ,_cfbd :=range _dge {if _ ,_aefa :=_gfdd [_cfbd ];!_aefa {_gbec [_dff ]=_cfbd ;_dff ++;};};_fbg ._gfc [_dgag ]=_gbec ;};};func (_cgdc *textObject )getFillColor ()_g .Color {return _cgbc (_cgdc ._bcfd .ColorspaceNonStroking ,_cgdc ._bcfd .ColorNonStroking );};func (_edgb *textLine )bbox ()_bf .PdfRectangle {return _edgb .PdfRectangle };func (_dfb *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_egdg :=make (map[int ]map[*textWord ]struct{},len (_dfb ._gfc ));for _efda :=range _dfb ._gfc {_egdg [_efda ]=make (map[*textWord ]struct{});};return _egdg ;};func (_cafb *textObject )renderText (_afd []byte )error {if _cafb ._dfgf {_cd .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");return nil ;};_gcbd :=_cafb .getCurrentFont ();_gfaf :=_gcbd .BytesToCharcodes (_afd );_eeac ,_efc ,_ebf :=_gcbd .CharcodesToStrings (_gfaf );if _ebf > 0{_cd .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_efc ,_ebf );};_cafb ._gffg ._bfbb +=_efc ;_cafb ._gffg ._bga +=_ebf ;_eed :=_cafb ._gffg ;_cdeb :=_eed ._bdd ;_gffgg :=_eed ._bcga /100.0;_fbad ,_efg :=_gcbd .GetRuneMetrics (' ');if !_efg {_fbad ,_efg =_gcbd .GetCharMetrics (32);};if !_efg {_fbad ,_ =_bf .DefaultFont ().GetRuneMetrics (' ');};_fdaf :=_fbad .Wx *_fdde ;_cd .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_fdaf ,_eeac ,_gcbd ,_cdeb );_dabf :=_cec .NewMatrix (_cdeb *_gffgg ,0,0,_cdeb ,0,_eed ._ccb );if _babc {_cd .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_gfaf ),_gfaf ,_eeac );};_cd .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_gfaf ),_gfaf ,len (_eeac ));_dddd :=_cafb .getFillColor ();_faab :=_cafb .getStrokeColor ();for _edd ,_ede :=range _eeac {_gdc :=[]rune (_ede );if len (_gdc )==1&&_gdc [0]=='\x00'{continue ;};_ecga :=_gfaf [_edd ];_efgb :=_cafb ._bcfd .CTM .Mult (_cafb ._ceaf ).Mult (_dabf );_fccd :=0.0;if len (_gdc )==1&&_gdc [0]==32{_fccd =_eed ._bdf ;};_dfc ,_dgde :=_gcbd .GetCharMetrics (_ecga );if !_dgde {_cd .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_ecga ,_gdc ,_gdc ,_gcbd );return _cf .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_gcbd .String (),_ecga );};_efbe :=_cec .Point {X :_dfc .Wx *_fdde ,Y :_dfc .Wy *_fdde };_cbc :=_cec .Point {X :(_efbe .X *_cdeb +_fccd )*_gffgg };_eba :=_cec .Point {X :(_efbe .X *_cdeb +_eed ._cef +_fccd )*_gffgg };if _babc {_cd .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_cdeb ,_eed ._cef ,_eed ._bdf ,_gffgg );_cd .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_efbe ,_cbc ,_eba );};_efba :=_bggc (_cbc );_ecdb :=_bggc (_eba );_efgg :=_cafb ._bcfd .CTM .Mult (_cafb ._ceaf ).Mult (_efba );if _fgf {_cd .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_cafb ._bcfd .CTM ,_cafb ._ceaf ,_ecdb ,_gbaf (_cafb ._bcfd .CTM .Mult (_cafb ._ceaf ).Mult (_ecdb )),_efba ,_efgg ,_gbaf (_efgg ));};_cgdb ,_eag :=_cafb .newTextMark (_ca .ExpandLigatures (_gdc ),_efgb ,_gbaf (_efgg ),_c .Abs (_fdaf *_efgb .ScalingFactorX ()),_gcbd ,_cafb ._gffg ._cef ,_dddd ,_faab );if !_eag {_cd .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067");continue ;};if _gcbd ==nil {_cd .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _gcbd .Encoder ()==nil {_cd .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_gcbd );}else {if _gfbe ,_badb :=_gcbd .Encoder ().CharcodeToRune (_ecga );_badb {_cgdb ._ffaa =string (_gfbe );};};_cd .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_edd ,_ecga ,_cgdb ,_efgb );_cafb ._eabd =append (_cafb ._eabd ,&_cgdb );_cafb ._ceaf .Concat (_ecdb );};return nil ;};func _bbacb (_dbdbg map[int ][]float64 )[]int {_ccgfb :=make ([]int ,len (_dbdbg ));_acgc :=0;for _decfe :=range _dbdbg {_ccgfb [_acgc ]=_decfe ;_acgc ++;};_ac .Ints (_ccgfb );return _ccgfb ;};const _abd =10;func (_eggc *textPara )taken ()bool {return _eggc ==nil ||_eggc ._gecbf };func (_dcda *textPara )writeCellText (_eede _d .Writer ){for _bdcbc ,_efbc :=range _dcda ._ffgc {_abde :=_efbc .text ();_gadd :=_dagb &&_efbc .endsInHyphen ()&&_bdcbc !=len (_dcda ._ffgc )-1;if _gadd {_abde =_adbc (_abde );};_eede .Write ([]byte (_abde ));if !(_gadd ||_bdcbc ==len (_dcda ._ffgc )-1){_eede .Write ([]byte (_egae (_efbc ._abbea ,_dcda ._ffgc [_bdcbc +1]._abbea )));};};};func (_fcg *textObject )setCharSpacing (_gaaf float64 ){if _fcg ==nil {return ;};_fcg ._gffg ._cef =_gaaf ;if _babc {_cd .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_gaaf ,_fcg ._gffg .String ());};};func (_baecb *subpath )isQuadrilateral ()bool {if len (_baecb ._gaaca )< 4||len (_baecb ._gaaca )> 5{return false ;};if len (_baecb ._gaaca )==5{_deef :=_baecb ._gaaca [0];_ecdd :=_baecb ._gaaca [4];if _deef .X !=_ecdd .X ||_deef .Y !=_ecdd .Y {return false ;};};return true ;};func _ddec (_bdcd float64 )bool {return _c .Abs (_bdcd )< _ecba };func _aade (_dbfeb ,_cccg float64 )bool {return _c .Abs (_dbfeb -_cccg )<=_egdeb };func (_dfde rulingList )toTilings ()(rulingList ,[]gridTiling ){_dfde .log ("\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s");if len (_dfde )==0{return nil ,nil ;};_dfde =_dfde .tidied ("\u0061\u006c\u006c");_dfde .log ("\u0074\u0069\u0064\u0069\u0065\u0064");_egdfc :=_dfde .toGrids ();_bgfba :=make ([]gridTiling ,len (_egdfc ));for _bbfgg ,_fbag :=range _egdfc {_bgfba [_bbfgg ]=_fbag .asTiling ();};return _dfde ,_bgfba ;};func (_egfe paraList )writeText (_bbba _d .Writer ){for _gecb ,_cgef :=range _egfe {if _cgef ._gccg {continue ;};_cgef .writeText (_bbba );if _gecb !=len (_egfe )-1{if _agcg (_cgef ,_egfe [_gecb +1]){_bbba .Write ([]byte ("\u0020"));}else {_bbba .Write ([]byte ("\u000a"));_bbba .Write ([]byte ("\u000a"));};};};_bbba .Write ([]byte ("\u000a"));_bbba .Write ([]byte ("\u000a"));};func _fbecg (_dbg string )bool {if _a .RuneCountInString (_dbg )< _dffdd {return false ;};_edgg ,_ccge :=_a .DecodeLastRuneInString (_dbg );if _ccge <=0||!_ag .Is (_ag .Hyphen ,_edgg ){return false ;};_edgg ,_ccge =_a .DecodeLastRuneInString (_dbg [:len (_dbg )-_ccge ]);return _ccge > 0&&!_ag .IsSpace (_edgg );};func (_fecf *wordBag )absorb (_fbaa *wordBag ){_aebc :=_fbaa .makeRemovals ();for _dceg ,_bae :=range _fbaa ._gfc {for _ ,_dded :=range _bae {_fecf .pullWord (_dded ,_dceg ,_aebc );};};_fbaa .applyRemovals (_aebc );};type textTable struct{_bf .PdfRectangle ;_agcd ,_bdfg int ;_abad bool ;_afbef map[uint64 ]*textPara ;_bfcbf map[uint64 ]compositeCell ;};func (_fdba *textTable )markCells (){for _ecfde :=0;_ecfde < _fdba ._bdfg ;_ecfde ++{for _fbge :=0;_fbge < _fdba ._agcd ;_fbge ++{_cafg :=_fdba .get (_fbge ,_ecfde );if _cafg !=nil {_cafg ._gecbf =true ;};};};};func (_ffgb *wordBag )scanBand (_gabb string ,_bgef *wordBag ,_fcaa func (_aca *wordBag ,_efeb *textWord )bool ,_eaga ,_fbfa ,_cdba float64 ,_edbe ,_acfc bool )int {_agb :=_bgef ._dadc ;var _gbbb map[int ]map[*textWord ]struct{};if !_edbe {_gbbb =_ffgb .makeRemovals ();};_bggea :=_gaff *_agb ;_adb :=0;for _ ,_ggce :=range _ffgb .depthBand (_eaga -_bggea ,_fbfa +_bggea ){if len (_ffgb ._gfc [_ggce ])==0{continue ;};for _ ,_ebdb :=range _ffgb ._gfc [_ggce ]{if !(_eaga -_bggea <=_ebdb ._cddc &&_ebdb ._cddc <=_fbfa +_bggea ){continue ;};if !_fcaa (_bgef ,_ebdb ){continue ;};_bggg :=2.0*_c .Abs (_ebdb ._bcdga -_bgef ._dadc )/(_ebdb ._bcdga +_bgef ._dadc );_cefd :=_c .Max (_ebdb ._bcdga /_bgef ._dadc ,_bgef ._dadc /_ebdb ._bcdga );_bbbd :=_c .Min (_bggg ,_cefd );if _cdba > 0&&_bbbd > _cdba {continue ;};if _bgef .blocked (_ebdb ){continue ;};if !_edbe {_bgef .pullWord (_ebdb ,_ggce ,_gbbb );};_adb ++;if !_acfc {if _ebdb ._cddc < _eaga {_eaga =_ebdb ._cddc ;};if _ebdb ._cddc > _fbfa {_fbfa =_ebdb ._cddc ;};};if _edbe {break ;};};};if !_edbe {_ffgb .applyRemovals (_gbbb );};return _adb ;};
|
||
|
||
// String returns a human readable description of `ss`.
|
||
func (_daf *shapesState )String ()string {return _cf .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_daf ._dda ),_daf ._decf );};func (_cebb *shapesState )lineTo (_cafef ,_dccg float64 ){if _fgdd {_cd .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_cafef ,_dccg ,_cebb .devicePoint (_cafef ,_dccg ));};_cebb .addPoint (_cafef ,_dccg );};func (_aa *imageExtractContext )extractFormImages (_ggd *_ce .PdfObjectName ,_fda _gc .GraphicsState ,_ccc *_bf .PdfPageResources )error {_bbc ,_dbfb :=_ccc .GetXObjectFormByName (*_ggd );if _dbfb !=nil {return _dbfb ;};if _bbc ==nil {return nil ;};_fab ,_dbfb :=_bbc .GetContentStream ();if _dbfb !=nil {return _dbfb ;};_geg :=_bbc .Resources ;if _geg ==nil {_geg =_ccc ;};_dbfb =_aa .extractContentStreamImages (string (_fab ),_geg );if _dbfb !=nil {return _dbfb ;};_aa ._ee ++;return nil ;};func (_fea pathSection )bbox ()_bf .PdfRectangle {_bdfc :=_fea ._fgaff [0]._gaaca [0];_gggg :=_bf .PdfRectangle {Llx :_bdfc .X ,Urx :_bdfc .X ,Lly :_bdfc .Y ,Ury :_bdfc .Y };_gdae :=func (_dfdf _cec .Point ){if _dfdf .X < _gggg .Llx {_gggg .Llx =_dfdf .X ;}else if _dfdf .X > _gggg .Urx {_gggg .Urx =_dfdf .X ;};if _dfdf .Y < _gggg .Lly {_gggg .Lly =_dfdf .Y ;}else if _dfdf .Y > _gggg .Ury {_gggg .Ury =_dfdf .Y ;};};for _ ,_aac :=range _fea ._fgaff [0]._gaaca [1:]{_gdae (_aac );};for _ ,_fcdg :=range _fea ._fgaff [1:]{for _ ,_aebf :=range _fcdg ._gaaca {_gdae (_aebf );};};return _gggg ;};
|
||
|
||
// Elements returns the TextMarks in `ma`.
|
||
func (_dcgb *TextMarkArray )Elements ()[]TextMark {return _dcgb ._gef };func (_edcde *textTable )reduceTiling (_fdcbe gridTiling ,_cfece float64 )*textTable {_afbd :=make ([]int ,0,_edcde ._bdfg );_bacde :=make ([]int ,0,_edcde ._agcd );_fbeccd :=_fdcbe ._ccag ;_fgbdd :=_fdcbe ._fgfb ;for _egfg :=0;_egfg < _edcde ._bdfg ;_egfg ++{_deba :=_egfg > 0&&_c .Abs (_fgbdd [_egfg -1]-_fgbdd [_egfg ])< _cfece &&_edcde .emptyRow (_egfg );if !_deba {_afbd =append (_afbd ,_egfg );};};for _ffbb :=0;_ffbb < _edcde ._agcd ;_ffbb ++{_fgdcf :=_ffbb < _edcde ._agcd -1&&_c .Abs (_fbeccd [_ffbb +1]-_fbeccd [_ffbb ])< _cfece &&_edcde .emptyColumn (_ffbb );if !_fgdcf {_bacde =append (_bacde ,_ffbb );};};if len (_afbd )==_edcde ._bdfg &&len (_bacde )==_edcde ._agcd {return _edcde ;};_bcag :=textTable {_abad :_edcde ._abad ,_agcd :len (_bacde ),_bdfg :len (_afbd ),_bfcbf :make (map[uint64 ]compositeCell ,len (_bacde )*len (_afbd ))};if _ccae {_cd .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_edcde ._agcd ,_edcde ._bdfg ,len (_bacde ),len (_afbd ));_cd .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_bacde );_cd .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_afbd );};for _addbe ,_fgcd :=range _afbd {for _ggfe ,_dfabc :=range _bacde {_abbbb ,_fedcd :=_edcde .getComposite (_dfabc ,_fgcd );if len (_abbbb )==0{continue ;};if _ccae {_cf .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_ggfe ,_addbe ,_dfabc ,_fgcd ,_ecedb (_abbbb .merge ().text (),50));};_bcag .putComposite (_ggfe ,_addbe ,_abbbb ,_fedcd );};};return &_bcag ;};func (_gffge *shapesState )lastpointEstablished ()(_cec .Point ,bool ){if _gffge ._decf {return _gffge ._fafa ,false ;};_bcfa :=len (_gffge ._dda );if _bcfa > 0&&_gffge ._dda [_bcfa -1]._fgg {return _gffge ._dda [_bcfa -1].last (),false ;};return _cec .Point {},true ;};
|
||
|
||
// Tables returns the tables extracted from the page.
|
||
func (_gce PageText )Tables ()[]TextTable {if _ccae {_cd .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_gce ._edef ));};return _gce ._edef ;};const (RenderModeStroke RenderMode =1<<iota ;RenderModeFill ;RenderModeClip ;);func (_eeeb *textPara )toTextMarks (_cabc *int )[]TextMark {if _eeeb ._bfag ==nil {return _eeeb .toCellTextMarks (_cabc );};var _acbc []TextMark ;for _dbfe :=0;_dbfe < _eeeb ._bfag ._bdfg ;_dbfe ++{for _ecegc :=0;_ecegc < _eeeb ._bfag ._agcd ;_ecegc ++{_cfgdf :=_eeeb ._bfag .get (_ecegc ,_dbfe );if _cfgdf ==nil {_acbc =_ceed (_acbc ,_cabc ,"\u0009");}else {_ega :=_cfgdf .toCellTextMarks (_cabc );_acbc =append (_acbc ,_ega ...);};_acbc =_ceed (_acbc ,_cabc ,"\u0020");};if _dbfe < _eeeb ._bfag ._bdfg -1{_acbc =_ceed (_acbc ,_cabc ,"\u000a");};};return _acbc ;};
|
||
|
||
// TextMark represents extracted text on a page with information regarding both textual content,
|
||
// formatting (font and size) and positioning.
|
||
// It is the smallest unit of text on a PDF page, typically a single character.
|
||
//
|
||
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
|
||
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
|
||
// `bbox` of substring `term` in `text`.
|
||
//
|
||
// ex, _ := New(page)
|
||
// // handle errors
|
||
// pageText, _, _, err := ex.ExtractPageText()
|
||
// // handle errors
|
||
// text := pageText.Text()
|
||
// textMarks := pageText.Marks()
|
||
//
|
||
// start := strings.Index(text, term)
|
||
// end := start + len(term)
|
||
// spanMarks, err := textMarks.RangeOffset(start, end)
|
||
// // handle errors
|
||
// bbox, ok := spanMarks.BBox()
|
||
// // handle errors
|
||
type TextMark struct{
|
||
|
||
// Text is the extracted text.
|
||
Text string ;
|
||
|
||
// Original is the text in the PDF. It has not been decoded like `Text`.
|
||
Original string ;
|
||
|
||
// BBox is the bounding box of the text.
|
||
BBox _bf .PdfRectangle ;
|
||
|
||
// Font is the font the text was drawn with.
|
||
Font *_bf .PdfFont ;
|
||
|
||
// FontSize is the font size the text was drawn with.
|
||
FontSize float64 ;
|
||
|
||
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
|
||
// text, textMarks := pageText.Text(), pageText.Marks()
|
||
// marks := textMarks.Elements()
|
||
// then marks[i].Offset is the offset of marks[i].Text in text.
|
||
Offset int ;
|
||
|
||
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
|
||
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
|
||
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
|
||
Meta bool ;
|
||
|
||
// FillColor is the fill color of the text.
|
||
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
|
||
FillColor _g .Color ;
|
||
|
||
// StrokeColor is the stroke color of the text.
|
||
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
|
||
StrokeColor _g .Color ;
|
||
|
||
// Orientation is the text orientation
|
||
Orientation int ;};func (_cdef *textObject )setTextRenderMode (_gfb int ){if _cdef ==nil {return ;};_cdef ._gffg ._fbcc =RenderMode (_gfb );};
|
||
|
||
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
|
||
func (_babfg *TextMarkArray )BBox ()(_bf .PdfRectangle ,bool ){var _fcef _bf .PdfRectangle ;_fdf :=false ;for _ ,_dccf :=range _babfg ._gef {if _dccf .Meta ||_bedeb (_dccf .Text ){continue ;};if _fdf {_fcef =_gffe (_fcef ,_dccf .BBox );}else {_fcef =_dccf .BBox ;_fdf =true ;};};return _fcef ,_fdf ;};func (_add *textObject )setTextMatrix (_abg []float64 ){if len (_abg )!=6{_cd .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_abg ));return ;};_aab ,_cgcg ,_aggf ,_daagg ,_agfc ,_ccfg :=_abg [0],_abg [1],_abg [2],_abg [3],_abg [4],_abg [5];_add ._ceaf =_cec .NewMatrix (_aab ,_cgcg ,_aggf ,_daagg ,_agfc ,_ccfg );_add ._ddef =_add ._ceaf ;};type paraList []*textPara ;func (_ggfcc *textPara )toCellTextMarks (_aaaa *int )[]TextMark {var _edee []TextMark ;for _ebeg ,_dbeeg :=range _ggfcc ._ffgc {_fbgb :=_dbeeg .toTextMarks (_aaaa );_eabf :=_dagb &&_dbeeg .endsInHyphen ()&&_ebeg !=len (_ggfcc ._ffgc )-1;if _eabf {_fbgb =_dcdc (_fbgb ,_aaaa );};_edee =append (_edee ,_fbgb ...);if !(_eabf ||_ebeg ==len (_ggfcc ._ffgc )-1){_edee =_ceed (_edee ,_aaaa ,_egae (_dbeeg ._abbea ,_ggfcc ._ffgc [_ebeg +1]._abbea ));};};return _edee ;};func (_efgga *shapesState )devicePoint (_cfbg ,_eceb float64 )_cec .Point {_fdaad :=_efgga ._ebd .Mult (_efgga ._cbbe );_cfbg ,_eceb =_fdaad .Transform (_cfbg ,_eceb );return _cec .NewPoint (_cfbg ,_eceb );};func (_bbbg *textObject )newTextMark (_addd string ,_cfgef _cec .Matrix ,_cabg _cec .Point ,_edbc float64 ,_dfff *_bf .PdfFont ,_adac float64 ,_bfde ,_dcdg _g .Color )(textMark ,bool ){_eeea :=_cfgef .Angle ();_bded :=_cdbg (_eeea ,_edfd );var _bead float64 ;if _bded %180!=90{_bead =_cfgef .ScalingFactorY ();}else {_bead =_cfgef .ScalingFactorX ();};_abbb :=_gbaf (_cfgef );_adfd :=_bf .PdfRectangle {Llx :_abbb .X ,Lly :_abbb .Y ,Urx :_cabg .X ,Ury :_cabg .Y };switch _bded %360{case 90:_adfd .Urx -=_bead ;case 180:_adfd .Ury -=_bead ;case 270:_adfd .Urx +=_bead ;case 0:_adfd .Ury +=_bead ;default:_bded =0;_adfd .Ury +=_bead ;};if _adfd .Llx > _adfd .Urx {_adfd .Llx ,_adfd .Urx =_adfd .Urx ,_adfd .Llx ;};if _adfd .Lly > _adfd .Ury {_adfd .Lly ,_adfd .Ury =_adfd .Ury ,_adfd .Lly ;};_fegc ,_fcdgb :=_afeg (_adfd ,_bbbg ._cab ._be );if !_fcdgb {_cd .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_adfd ,_bbbg ._cab ._be ,_addd );};_adfd =_fegc ;_faae :=_adfd ;_dacd :=_bbbg ._cab ._be ;switch _bded %360{case 90:_dacd .Urx ,_dacd .Ury =_dacd .Ury ,_dacd .Urx ;_faae =_bf .PdfRectangle {Llx :_dacd .Urx -_adfd .Ury ,Urx :_dacd .Urx -_adfd .Lly ,Lly :_adfd .Llx ,Ury :_adfd .Urx };case 180:_faae =_bf .PdfRectangle {Llx :_dacd .Urx -_adfd .Llx ,Urx :_dacd .Urx -_adfd .Urx ,Lly :_dacd .Ury -_adfd .Lly ,Ury :_dacd .Ury -_adfd .Ury };case 270:_dacd .Urx ,_dacd .Ury =_dacd .Ury ,_dacd .Urx ;_faae =_bf .PdfRectangle {Llx :_adfd .Ury ,Urx :_adfd .Lly ,Lly :_dacd .Ury -_adfd .Llx ,Ury :_dacd .Ury -_adfd .Urx };};if _faae .Llx > _faae .Urx {_faae .Llx ,_faae .Urx =_faae .Urx ,_faae .Llx ;};if _faae .Lly > _faae .Ury {_faae .Lly ,_faae .Ury =_faae .Ury ,_faae .Lly ;};_cfgcb :=textMark {_bgfa :_addd ,PdfRectangle :_faae ,_bagb :_adfd ,_gcag :_dfff ,_ada :_bead ,_eebd :_adac ,_dba :_cfgef ,_cgcga :_cabg ,_bcaf :_bded ,_faeg :_bfde ,_ebcfg :_dcdg };if _bbac {_cd .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_abbb ,_cabg ,_cfgcb .String ());};return _cfgcb ,_fcdgb ;};func _dcdc (_edbg []TextMark ,_fgge *int )[]TextMark {_cffg :=_edbg [len (_edbg )-1];_cadca :=[]rune (_cffg .Text );if len (_cadca )==1{_edbg =_edbg [:len (_edbg )-1];_afgd :=_edbg [len (_edbg )-1];*_fgge =_afgd .Offset +len (_afgd .Text );}else {_bfdd :=_adbc (_cffg .Text );*_fgge +=len (_bfdd )-len (_cffg .Text );_cffg .Text =_bfdd ;};return _edbg ;};func (_ddgbc paraList )inTile (_bggf gridTile )paraList {var _defe paraList ;for _ ,_dgeg :=range _ddgbc {if _bggf .contains (_dgeg .PdfRectangle ){_defe =append (_defe ,_dgeg );};};if _ccae {_cf .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_bggf ,len (_defe ));for _gdeb ,_dgcg :=range _defe {_cf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gdeb ,_dgcg );};_cf .Println ("");};return _defe ;};type textLine struct{_bf .PdfRectangle ;_abbea float64 ;_cddd []*textWord ;_gabbd float64 ;};func (_fbae *shapesState )establishSubpath ()*subpath {_fgeb ,_fagb :=_fbae .lastpointEstablished ();if !_fagb {_fbae ._dda =append (_fbae ._dda ,_ecf (_fgeb ));};if len (_fbae ._dda )==0{return nil ;};_fbae ._decf =false ;return _fbae ._dda [len (_fbae ._dda )-1];};func _bdccf (_bagd string )(string ,bool ){_gcee :=[]rune (_bagd );if len (_gcee )!=1{return "",false ;};_baeca ,_cdfc :=_bega [_gcee [0]];return _baeca ,_cdfc ;};func (_afc *textObject )moveTextSetLeading (_dab ,_dgg float64 ){_afc ._gffg ._dfd =-_dgg ;_afc .moveLP (_dab ,_dgg );};func (_gcec *textTable )emptyRow (_ddga int )bool {for _dada :=0;_dada < _gcec ._agcd ;_dada ++{_dgcaa :=_gcec .get (_dada ,_ddga );if _dgcaa !=nil &&_dgcaa .text ()!=""{return false ;};};return true ;};func (_gbfga *ruling )gridIntersecting (_dcafc *ruling )bool {return _aade (_gbfga ._cdcc ,_dcafc ._cdcc )&&_aade (_gbfga ._bdag ,_dcafc ._bdag );};type wordBag struct{_bf .PdfRectangle ;_dadc float64 ;_gfda ,_caab rulingList ;_afec float64 ;_gfc map[int ][]*textWord ;};func (_bgaf *textTable )computeBbox ()_bf .PdfRectangle {var _dcfe _bf .PdfRectangle ;_aeab :=false ;for _fada :=0;_fada < _bgaf ._bdfg ;_fada ++{for _ggac :=0;_ggac < _bgaf ._agcd ;_ggac ++{_gccb :=_bgaf .get (_ggac ,_fada );if _gccb ==nil {continue ;};if !_aeab {_dcfe =_gccb .PdfRectangle ;_aeab =true ;}else {_dcfe =_gffe (_dcfe ,_gccb .PdfRectangle );};};};return _dcfe ;};func (_ggcc *textLine )endsInHyphen ()bool {_dage :=_ggcc ._cddd [len (_ggcc ._cddd )-1];_efcb :=_dage ._gbdc ;_eec ,_eda :=_a .DecodeLastRuneInString (_efcb );if _eda <=0||!_ag .Is (_ag .Hyphen ,_eec ){return false ;};if _dage ._eeed &&_fbecg (_efcb ){return true ;};return _fbecg (_ggcc .text ());};
|
||
|
||
// ExtractText processes and extracts all text data in content streams and returns as a string.
|
||
// It takes into account character encodings in the PDF file, which are decoded by
|
||
// CharcodeBytesToUnicode.
|
||
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
|
||
func (_dbfg *Extractor )ExtractText ()(string ,error ){_fecc ,_ ,_ ,_bfd :=_dbfg .ExtractTextWithStats ();return _fecc ,_bfd ;};func _gbgf (_cegde float64 )bool {return _c .Abs (_cegde )< _cggb };func _fegbe (_faad []*textWord ,_adcgf *textWord )[]*textWord {for _gffbb ,_bdfca :=range _faad {if _bdfca ==_adcgf {return _fgead (_faad ,_gffbb );};};_cd .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_adcgf );return nil ;};func (_gbef *textPara )isAtom ()*textTable {_cagdb :=_gbef ;_cdgg :=_gbef ._agabc ;_edcf :=_gbef ._cedaf ;if !(_cdgg !=nil &&!_cdgg ._gecbf &&_edcf !=nil &&!_edcf ._gecbf ){return nil ;};_gega :=_cdgg ._cedaf ;if !(_gega !=nil &&!_gega ._gecbf &&_gega ==_edcf ._agabc ){return nil ;};return _dabcc (_cagdb ,_cdgg ,_edcf ,_gega );};func _agcg (_ddba ,_bcdb *textPara )bool {if _ddba ._gccg ||_bcdb ._gccg {return true ;};return _gbgf (_ddba .depth ()-_bcdb .depth ());};func _ggbg (_agccd string ,_eaeg []rulingList ){_cd .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_eaeg ),_agccd );for _afdg ,_cacb :=range _eaeg {_cf .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_afdg ,_cacb .String ());};}; |