mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-26 13:48:55 +08:00
241 lines
128 KiB
Go
241 lines
128 KiB
Go
//
|
||
// Copyright 2020 FoxyUtils ehf. All rights reserved.
|
||
//
|
||
// This is a commercial product and requires a license to operate.
|
||
// A trial license can be obtained at https://unidoc.io
|
||
//
|
||
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
|
||
//
|
||
// Use of this source code is governed by the UniDoc End User License Agreement
|
||
// terms that can be accessed at https://unidoc.io/eula/
|
||
|
||
//
|
||
// Package extractor is used for quickly extracting PDF content through a simple interface.
|
||
// Currently offers functionality for extracting textual content.
|
||
//
|
||
package extractor ;import (_g "bytes";_d "errors";_bcb "fmt";_f "github.com/unidoc/unipdf/v3/common";_cc "github.com/unidoc/unipdf/v3/common/license";_bdb "github.com/unidoc/unipdf/v3/contentstream";_ea "github.com/unidoc/unipdf/v3/core";_cg "github.com/unidoc/unipdf/v3/internal/textencoding";_dd "github.com/unidoc/unipdf/v3/internal/transform";_ab "github.com/unidoc/unipdf/v3/model";_eg "golang.org/x/text/unicode/norm";_fc "golang.org/x/xerrors";_ca "image/color";_bd "io";_dg "math";_a "regexp";_de "sort";_bc "strings";_e "unicode";_c "unicode/utf8";);
|
||
|
||
// Marks returns the TextMark collection for a page. It represents all the text on the page.
|
||
func (_bcf PageText )Marks ()*TextMarkArray {return &TextMarkArray {_faf :_bcf ._bbg }};func _edc (_cdfb *Extractor ,_efd *_ab .PdfPageResources ,_bcgf _bdb .GraphicsState ,_eed *textState ,_ccfe *stateStack )*textObject {return &textObject {_beb :_cdfb ,_ffg :_efd ,_ffb :_bcgf ,_fce :_ccfe ,_geag :_eed ,_bfe :_dd .IdentityMatrix (),_eddf :_dd .IdentityMatrix ()};};func (_gbdc lineRuling )xDelta ()float64 {return _dg .Abs (_gbdc ._fbgf .X -_gbdc ._fbgf .X )};func _dgdg (_bcfc string )string {_edbede :=[]rune (_bcfc );return string (_edbede [:len (_edbede )-1])};func (_fegf *imageExtractContext )extractFormImages (_gdf *_ea .PdfObjectName ,_afg _bdb .GraphicsState ,_aea *_ab .PdfPageResources )error {_fag ,_dcb :=_aea .GetXObjectFormByName (*_gdf );if _dcb !=nil {return _dcb ;};if _fag ==nil {return nil ;};_ffd ,_dcb :=_fag .GetContentStream ();if _dcb !=nil {return _dcb ;};_dca :=_fag .Resources ;if _dca ==nil {_dca =_aea ;};_dcb =_fegf .extractContentStreamImages (string (_ffd ),_dca );if _dcb !=nil {return _dcb ;};_fegf ._fe ++;return nil ;};func (_bfce rulingList )toGrids ()[]rulingList {if len (_bfce )==0{return nil ;};if len (_bfce )> 200{_f .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0054\u004f\u0020\u004d\u0041\u004eY\u0020r\u0075\u006c\u0069\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064",len (_bfce ));return nil ;};_fcfa :=_bfce .intersections ();if _bfdg {_f .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064s\u003a\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0069\u006e\u0074e\u0072\u0073\u0065\u0063\u0074\u0073\u003d%\u0064\u0020\u0025\u0076",len (_bfce ),len (_fcfa ),_fcfa );};_addd :=make (map[int ]intSet ,len (_bfce ));for _dbeg :=range _bfce {_gdcd :=_bfce .connections (_fcfa ,_dbeg );if len (_gdcd )> 0{_addd [_dbeg ]=_gdcd ;};};if _bfdg {_f .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0076",_addd );};_dbce :=_gffea (len (_bfce ),func (_bcbd ,_cec int )bool {_fgaa ,_gfgb :=len (_addd [_bcbd ]),len (_addd [_cec ]);if _fgaa !=_gfgb {return _fgaa > _gfgb ;};return _bfce .comp (_bcbd ,_cec );});if _bfdg {_f .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_dbce );};_gfgeb :=[][]int {{_dbce [0]}};_fedfc :for _ ,_agaag :=range _dbce [1:]{for _cdaf ,_aace :=range _gfgeb {for _ ,_badf :=range _aace {if _addd [_badf ].has (_agaag ){_gfgeb [_cdaf ]=append (_aace ,_agaag );continue _fedfc ;};};};_gfgeb =append (_gfgeb ,[]int {_agaag });};if _bfdg {_f .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_gfgeb );};_de .SliceStable (_gfgeb ,func (_bcac ,_begg int )bool {return len (_gfgeb [_bcac ])> len (_gfgeb [_begg ])});for _ ,_abab :=range _gfgeb {_de .Slice (_abab ,func (_dcfaf ,_ddfff int )bool {return _bfce .comp (_abab [_dcfaf ],_abab [_ddfff ])});};_dcgfa :=make ([]rulingList ,len (_gfgeb ));for _fabb ,_dbd :=range _gfgeb {_eged :=make (rulingList ,len (_dbd ));for _fgcd ,_dfbab :=range _dbd {_eged [_fgcd ]=_bfce [_dfbab ];};_dcgfa [_fabb ]=_eged ;};if _bfdg {_f .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069d\u0073\u003d\u0025\u0076",_dcgfa );};var _dade []rulingList ;for _ ,_bgff :=range _dcgfa {if _bgff .isActualGrid (){_dade =append (_dade ,_bgff );};};if _bfdg {_f .Log .Info ("\u0074\u006f\u0047ri\u0064\u0073\u003a\u0020\u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0076",_dade );_f .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_dcgfa ),len (_dade ));};return _dade ;};func (_dbbf rulingList )cells ()(int ,int ,[]_ab .PdfRectangle ){_dbbf .sortStrict ();_fbdgc ,_bebc :=_dbbf .vertsHorzs ();_bacde :=len (_fbdgc )-1;_faae :=len (_bebc )-1;if _bfdg {_f .Log .Info ("\u0072\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002ec\u0065\u006c\u006c\u0073\u003a\u0020\u0076\u0065\u0072\u0074s\u003d\u0025\u0064",len (_fbdgc ));for _ddec ,_bdbd :=range _fbdgc {_bcb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ddec ,_bdbd );};_f .Log .Info ("\u0072\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002ec\u0065\u006c\u006c\u0073\u003a\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064",len (_bebc ));for _feece ,_edfe :=range _bebc {_bcb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_feece ,_edfe );};_f .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0063\u0065\u006cl\u0073\u003a\u0020\u0076\u0065\u0063\u0073=\u0025\u0064\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078%\u0064",len (_dbbf ),_bacde ,_faae );};_dbga :=make ([]_ab .PdfRectangle ,_bacde *_faae );for _ggdgg :=0;_ggdgg < _faae ;_ggdgg ++{_bcda :=_bebc [_ggdgg ]._deeda ;_aega :=_bebc [_ggdgg +1]._deeda ;for _cgeb :=0;_cgeb < _bacde ;_cgeb ++{_bgeff :=_fbdgc [_cgeb ]._deeda ;_abaaf :=_fbdgc [_cgeb +1]._deeda ;_dbga [_ggdgg *_bacde +_cgeb ]=_ab .PdfRectangle {Llx :_bgeff ,Urx :_abaaf ,Lly :_bcda ,Ury :_aega };};};return _bacde ,_faae ,_dbga ;};func (_cfff *textPara )writeText (_efcf _bd .Writer ){if _cfff ._egbc ==nil {_cfff .writeCellText (_efcf );return ;};for _dffb :=0;_dffb < _cfff ._egbc ._bfcd ;_dffb ++{for _dbff :=0;_dbff < _cfff ._egbc ._agea ;_dbff ++{_dada :=_cfff ._egbc .get (_dbff ,_dffb );if _dada ==nil {_efcf .Write ([]byte ("\u0009"));}else {_dada .writeCellText (_efcf );};_efcf .Write ([]byte ("\u0020"));};if _dffb < _cfff ._egbc ._bfcd -1{_efcf .Write ([]byte ("\u000a"));};};};func _dacab (_cagf []int )[]int {_fddg :=make ([]int ,len (_cagf ));for _cdae ,_cacgc :=range _cagf {_fddg [len (_cagf )-1-_cdae ]=_cacgc ;};return _fddg ;};func (_edff paraList )applyTables (_fcff []*textTable )paraList {_cdga :=make (map[*textPara ]struct{});var _aabge paraList ;for _ ,_gfecg :=range _fcff {for _ ,_geafb :=range _gfecg ._beab {_cdga [_geafb ]=struct{}{};};_aabge =append (_aabge ,_gfecg .newTablePara ());};for _ ,_gceg :=range _edff {if _ ,_bdbfc :=_cdga [_gceg ];!_bdbfc {_aabge =append (_aabge ,_gceg );};};return _aabge ;};const (RenderModeStroke RenderMode =1<<iota ;RenderModeFill ;RenderModeClip ;);func (_dfd rectRuling )asRuling ()(*ruling ,bool ){_efce :=ruling {_aefag :_dfd ._dfbe };switch _dfd ._dfbe {case _geb :_efce ._deeda =0.5*(_dfd .Llx +_dfd .Urx );_efce ._efac =_dfd .Lly ;_efce ._bgae =_dfd .Ury ;case _ffad :_efce ._deeda =0.5*(_dfd .Lly +_dfd .Ury );_efce ._efac =_dfd .Llx ;_efce ._bgae =_dfd .Urx ;default:_f .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_dfd ._dfbe );return nil ,false ;};return &_efce ,true ;};type textTable struct{_ab .PdfRectangle ;_agea ,_bfcd int ;_gdbdc bool ;_beab map[uint64 ]*textPara ;};
|
||
|
||
// String returns a human readable description of `path`.
|
||
func (_gaaf *subpath )String ()string {_gaafc :=_gaaf ._ggdg ;_gbd :=len (_gaafc );if _gbd <=5{return _bcb .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_gbd ,_gaafc );};return _bcb .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_gbd ,_gaafc [0],_gaafc [1],_gaafc [_gbd -1]);};func (_acec paraList )reorder (_ggaf []int ){_abe :=make (paraList ,len (_acec ));for _dgcf ,_ebac :=range _ggaf {_abe [_dgcf ]=_acec [_ebac ];};copy (_acec ,_abe );};func (_acag *textObject )getCurrentFont ()*_ab .PdfFont {var _dge *_ab .PdfFont ;if !_acag ._fce .empty (){_dge =_acag ._fce .top ()._bdff ;};if _dge ==nil {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e");return _ab .DefaultFont ();};return _dge ;};func (_eafb *shapesState )newSubPath (){_eafb .clearPath ();if _facf {_f .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_eafb );};};func _dcg (_dda _ab .PdfRectangle ,_ffdf bounded )float64 {return _dda .Ury -_ffdf .bbox ().Lly };
|
||
|
||
// TableCell is a cell in a TextTable.
|
||
type TableCell struct{
|
||
|
||
// Text is the extracted text.
|
||
Text string ;
|
||
|
||
// Marks returns the TextMarks corresponding to the text in Text.
|
||
Marks TextMarkArray ;};func _dgaa (_edeaa ,_gacd float64 )rulingKind {if _edeaa >=_ecg &&_bedfc (_gacd ,_edeaa ){return _ffad ;};if _gacd >=_ecg &&_bedfc (_edeaa ,_gacd ){return _geb ;};return _bbag ;};func (_cbfa *subpath )removeDuplicates (){if len (_cbfa ._ggdg )==0{return ;};_gcdd :=[]_dd .Point {_cbfa ._ggdg [0]};for _ ,_fcca :=range _cbfa ._ggdg [1:]{if !_cacfa (_fcca ,_gcdd [len (_gcdd )-1]){_gcdd =append (_gcdd ,_fcca );};};_cbfa ._ggdg =_gcdd ;};func _cggf (_dfff []_ea .PdfObject )(_bfeg ,_beed float64 ,_edbg error ){if len (_dfff )!=2{return 0,0,_bcb .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_dfff ));};_ggcef ,_edbg :=_ea .GetNumbersAsFloat (_dfff );if _edbg !=nil {return 0,0,_edbg ;};return _ggcef [0],_ggcef [1],nil ;};func (_ebbb *textPara )bbox ()_ab .PdfRectangle {return _ebbb .PdfRectangle };func _eea (_aaee []TextMark ,_abae *int ,_becbg TextMark )[]TextMark {_becbg .Offset =*_abae ;_aaee =append (_aaee ,_becbg );*_abae +=len (_becbg .Text );return _aaee ;};func (_abfa *subpath )makeRectRuling ()(*ruling ,bool ){if _bfdg {_f .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_abfa );};_cbgd :=_abfa ._ggdg [:4];_cbea :=make (map[int ]rulingKind ,len (_cbgd ));for _gfea ,_efbce :=range _cbgd {_agcbe :=_abfa ._ggdg [(_gfea +1)%4];_cbea [_gfea ]=_bcdfe (_efbce ,_agcbe );};if _bfdg {_bcb .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_cbea );};var _abeg ,_gafg []int ;for _cacf ,_ebfd :=range _cbea {switch _ebfd {case _ffad :_gafg =append (_gafg ,_cacf );case _geb :_abeg =append (_abeg ,_cacf );};};if _bfdg {_bcb .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_gafg ),_gafg );_bcb .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_abeg ),_abeg );};_eead :=(len (_gafg )==2&&len (_abeg )==2)||(len (_gafg )==2&&len (_abeg )==0&&_faff (_cbgd [_gafg [0]],_cbgd [_gafg [1]]))||(len (_abeg )==2&&len (_gafg )==0&&_ggafe (_cbgd [_abeg [0]],_cbgd [_abeg [1]]));if _bfdg {_bcb .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_gafg ),len (_abeg ),_eead );};if !_eead {return &ruling {},false ;};if len (_abeg )==0{for _eaca ,_aabf :=range _cbea {if _aabf !=_ffad {_abeg =append (_abeg ,_eaca );};};};if len (_gafg )==0{for _fbbb ,_egdg :=range _cbea {if _egdg !=_geb {_gafg =append (_gafg ,_fbbb );};};};if _bfdg {_f .Log .Info ("\u0020\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025\u0064 \u0076\u0065\u0072\u0074\u0073\u003d\u0025d\u0020\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_gafg ),len (_abeg ),len (_cbgd ),_gafg ,_abeg ,_cbgd );};var _bffe ,_ecggd ,_aadd ,_ddcgb _dd .Point ;if _cbgd [_gafg [0]].Y > _cbgd [_gafg [1]].Y {_aadd ,_ddcgb =_cbgd [_gafg [0]],_cbgd [_gafg [1]];}else {_aadd ,_ddcgb =_cbgd [_gafg [1]],_cbgd [_gafg [0]];};if _cbgd [_abeg [0]].X > _cbgd [_abeg [1]].X {_bffe ,_ecggd =_cbgd [_abeg [0]],_cbgd [_abeg [1]];}else {_bffe ,_ecggd =_cbgd [_abeg [1]],_cbgd [_abeg [0]];};_dfcg :=_ab .PdfRectangle {Llx :_bffe .X ,Urx :_ecggd .X ,Lly :_ddcgb .Y ,Ury :_aadd .Y };if _dfcg .Llx > _dfcg .Urx {_dfcg .Llx ,_dfcg .Urx =_dfcg .Urx ,_dfcg .Llx ;};if _dfcg .Lly > _dfcg .Ury {_dfcg .Lly ,_dfcg .Ury =_dfcg .Ury ,_dfcg .Lly ;};_ffbee :=rectRuling {PdfRectangle :_dfcg ,_dfbe :_cbge (_dfcg )};if _ffbee ._dfbe ==_bbag {return nil ,false ;};_ffgf ,_ebbbc :=_ffbee .asRuling ();if !_ebbbc {return nil ,false ;};if _bfdg {_bcb .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_ffgf .String ());};return _ffgf ,true ;};func _bfee (_cgea _dd .Point )*subpath {return &subpath {_ggdg :[]_dd .Point {_cgea }}};func _bedg (_ecef *textWord ,_geca float64 )*wordBag {_fgfca :=_gffd (_ecef ._dbed );_fafe :=[]*textWord {_ecef };_dged :=wordBag {_dgaf :map[int ][]*textWord {_fgfca :_fafe },PdfRectangle :_ecef .PdfRectangle ,_fde :_ecef ._fegfd ,_ddfe :_geca };return &_dged ;};
|
||
|
||
// String returns a string describing the current state of the textState stack.
|
||
func (_ccbc *stateStack )String ()string {_gdd :=[]string {_bcb .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_ccbc ))};for _gcb ,_eaba :=range *_ccbc {_fbad :="\u003c\u006e\u0069l\u003e";if _eaba !=nil {_fbad =_eaba .String ();};_gdd =append (_gdd ,_bcb .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_gcb ,_fbad ));};return _bc .Join (_gdd ,"\u000a");};
|
||
|
||
// String returns a description of `tm`.
|
||
func (_gaef *textMark )String ()string {return _bcb .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_gaef .PdfRectangle ,_gaef ._ecbb ,_gaef ._efab );};func (_fgdb *shapesState )quadraticTo (_daca ,_aeddg ,_agce ,_eaf float64 ){_fgdb .addPoint (_agce ,_eaf );};func _fffc (_gfgd []*subpath )[]rulingList {_beffb (_gfgd );var _efad rulingList ;for _ ,_bccf :=range _gfgd {if len (_bccf ._ggdg )< 2{continue ;};_fbfg :=_bccf ._ggdg [0];for _ ,_bcfff :=range _bccf ._ggdg [1:]{if _fgad ,_fbfbe :=_agcef (_fbfg ,_bcfff );_fbfbe {_efad =append (_efad ,_fgad );};_fbfg =_bcfff ;};};_efad =_efad .tidied ("\u0073t\u0072\u006f\u006b\u0065\u0073");return _efad .toGrids ();};func (_fggf *textObject )showTextAdjusted (_acc *_ea .PdfObjectArray )error {_bg :=false ;for _ ,_fagc :=range _acc .Elements (){switch _fagc .(type ){case *_ea .PdfObjectFloat ,*_ea .PdfObjectInteger :_gbfg ,_fbac :=_ea .GetNumberAsFloat (_fagc );if _fbac !=nil {_f .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_fagc ,_acc );return _fbac ;};_ebc ,_edbf :=-_gbfg *0.001*_fggf ._geag ._cdb ,0.0;if _bg {_edbf ,_ebc =_ebc ,_edbf ;};_ebbd :=_cfc (_dd .Point {X :_ebc ,Y :_edbf });_fggf ._bfe .Concat (_ebbd );case *_ea .PdfObjectString :_eggf ,_gfc :=_ea .GetStringBytes (_fagc );if !_gfc {_f .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_fagc ,_acc );return _ea .ErrTypeError ;};_fggf .renderText (_eggf );default:_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_fagc ,_acc );return _ea .ErrTypeError ;};};return nil ;};func _gbagg (_dfcgc _ab .PdfColorspace ,_dddga _ab .PdfColor )_ca .Color {if _dfcgc ==nil ||_dddga ==nil {return _ca .Black ;};_dbaa ,_cagfb :=_dfcgc .ColorToRGB (_dddga );if _cagfb !=nil {_f .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_dddga ,_dfcgc ,_cagfb );return _ca .Black ;};_ggad ,_fbgb :=_dbaa .(*_ab .PdfColorDeviceRGB );if !_fbgb {_f .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_dbaa );return _ca .Black ;};return _ca .NRGBA {R :uint8 (_ggad .R ()*255),G :uint8 (_ggad .G ()*255),B :uint8 (_ggad .B ()*255),A :uint8 (255)};};func (_afac rulingList )vertsHorzs ()(rulingList ,rulingList ){var _afbf ,_gaeb rulingList ;for _ ,_bceb :=range _afac {switch _bceb ._aefag {case _geb :_afbf =append (_afbf ,_bceb );case _ffad :_gaeb =append (_gaeb ,_bceb );};};return _afbf ,_gaeb ;};type textState struct{_bge float64 ;_eba float64 ;_dcbc float64 ;_ebce float64 ;_cdb float64 ;_addf RenderMode ;_cfa float64 ;_bdff *_ab .PdfFont ;_feff _ab .PdfRectangle ;_eefb int ;_dag int ;};func (_cbeg *wordBag )allWords ()[]*textWord {var _cccb []*textWord ;for _ ,_gabd :=range _cbeg ._dgaf {_cccb =append (_cccb ,_gabd ...);};return _cccb ;};func (_bfcc *wordBag )getDepthIdx (_ecfe float64 )int {_aabg :=_bfcc .depthIndexes ();_gdbf :=_gffd (_ecfe );if _gdbf < _aabg [0]{return _aabg [0];};if _gdbf > _aabg [len (_aabg )-1]{return _aabg [len (_aabg )-1];};return _gdbf ;};func (_gfcg lineRuling )yDelta ()float64 {return _dg .Abs (_gfcg ._fbgf .Y -_gfcg ._fbgf .Y )};func (_febf *wordBag )removeDuplicates (){if _gbed {_f .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_febf .text ());};for _ ,_baba :=range _febf .depthIndexes (){if len (_febf ._dgaf [_baba ])==0{continue ;};_deee :=_febf ._dgaf [_baba ][0];_ebbg :=_aaff *_deee ._fegfd ;_eedb :=_deee ._dbed ;for _ ,_dcd :=range _febf .depthBand (_eedb ,_eedb +_ebbg ){_ffeaf :=map[*textWord ]struct{}{};_dddg :=_febf ._dgaf [_dcd ];for _ ,_dgg :=range _dddg {if _ ,_dfec :=_ffeaf [_dgg ];_dfec {continue ;};for _ ,_bdeec :=range _dddg {if _ ,_bgbd :=_ffeaf [_bdeec ];_bgbd {continue ;};if _bdeec !=_dgg &&_bdeec ._aeee ==_dgg ._aeee &&_dg .Abs (_bdeec .Llx -_dgg .Llx )< _ebbg &&_dg .Abs (_bdeec .Urx -_dgg .Urx )< _ebbg &&_dg .Abs (_bdeec .Lly -_dgg .Lly )< _ebbg &&_dg .Abs (_bdeec .Ury -_dgg .Ury )< _ebbg {_ffeaf [_bdeec ]=struct{}{};};};};if len (_ffeaf )> 0{_dgad :=0;for _ ,_eeac :=range _dddg {if _ ,_ggafg :=_ffeaf [_eeac ];!_ggafg {_dddg [_dgad ]=_eeac ;_dgad ++;};};_febf ._dgaf [_dcd ]=_dddg [:len (_dddg )-len (_ffeaf )];if len (_febf ._dgaf [_dcd ])==0{delete (_febf ._dgaf ,_dcd );};};};};};func (_gedae paraList )extractTables (_gcgeg []rulingList )paraList {if _gfeee {_f .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_gedae ));};if len (_gedae )< _edbfg {return _gedae ;};_bgda :=_gedae .findTables (_gcgeg );if _gfeee {_f .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_bgda ));for _gbaga ,_begc :=range _bgda {_begc .log (_bcb .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_gbaga ));};};return _gedae .applyTables (_bgda );};func (_caaea *textPara )isAtom ()*textTable {_ceabb :=_caaea ;_ggeg :=_caaea ._fcbf ;_fecb :=_caaea ._gcfc ;if !(_ggeg !=nil &&!_ggeg ._ffde &&_fecb !=nil &&!_fecb ._ffde ){return nil ;};_dcgca :=_ggeg ._gcfc ;if !(_dcgca !=nil &&!_dcgca ._ffde &&_dcgca ==_fecb ._fcbf ){return nil ;};return _edba (_ceabb ,_ggeg ,_fecb ,_dcgca );};type paraList []*textPara ;var _gbfa =map[rulingKind ]string {_bbag :"\u006e\u006f\u006e\u0065",_ffad :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_geb :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};
|
||
|
||
// String returns a description of `l`.
|
||
func (_gbae *textLine )String ()string {return _bcb .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_gbae ._gcfb ,_gbae .PdfRectangle ,_gbae ._ddfd ,_gbae .text ());};func (_cbfcf *textLine )text ()string {var _acba []string ;for _ ,_dafg :=range _cbfcf ._cgab {if _dafg ._deff {_acba =append (_acba ,"\u0020");};_acba =append (_acba ,_dafg ._aeee );};return _bc .Join (_acba ,"");};func (_geba rulingList )intersections ()map[int ]intSet {var _accg ,_debd []int ;for _gcecb ,_adcb :=range _geba {switch _adcb ._aefag {case _geb :_accg =append (_accg ,_gcecb );case _ffad :_debd =append (_debd ,_gcecb );};};_bbea :=make (map[int ]intSet ,len (_accg )+len (_debd ));for _ ,_daeb :=range _accg {for _ ,_gag :=range _debd {if _geba [_daeb ].intersects (_geba [_gag ]){if _ ,_cade :=_bbea [_daeb ];!_cade {_bbea [_daeb ]=make (intSet );};if _ ,_eabb :=_bbea [_gag ];!_eabb {_bbea [_gag ]=make (intSet );};_bbea [_daeb ].add (_gag );_bbea [_gag ].add (_daeb );};};};return _bbea ;};
|
||
|
||
// String returns a description of `b`.
|
||
func (_bddc *wordBag )String ()string {var _bdbc []string ;for _ ,_afed :=range _bddc .depthIndexes (){_aae ,_ :=_bddc ._dgaf [_afed ];for _ ,_abca :=range _aae {_bdbc =append (_bdbc ,_abca ._aeee );};};return _bcb .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_bddc .PdfRectangle ,_bddc ._fde ,len (_bdbc ),_bdbc );};
|
||
|
||
// ImageMark represents an image drawn on a page and its position in device coordinates.
|
||
// All coordinates are in device coordinates.
|
||
type ImageMark struct{Image *_ab .Image ;
|
||
|
||
// Dimensions of the image as displayed in the PDF.
|
||
Width float64 ;Height float64 ;
|
||
|
||
// Position of the image in PDF coordinates (lower left corner).
|
||
X float64 ;Y float64 ;
|
||
|
||
// Angle in degrees, if rotated.
|
||
Angle float64 ;};func (_gbbd *textObject )setTextRenderMode (_cgga int ){if _gbbd ==nil {return ;};_gbbd ._geag ._addf =RenderMode (_cgga );};func _bbaa (_efa bounded )float64 {return -_efa .bbox ().Lly };func (_bgbb paraList )computeEBBoxes (){if _feca {_f .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a");};for _ ,_fefe :=range _bgbb {_fefe ._aaef =_fefe .PdfRectangle ;};_ffcdd :=_bgbb .yNeighbours (0);for _fagb ,_fabge :=range _bgbb {_aeb :=_fabge ._aaef ;_aegd ,_ddefc :=-1.0e9,+1.0e9;for _ ,_efaf :=range _ffcdd [_fabge ]{_cgce :=_bgbb [_efaf ]._aaef ;if _cgce .Urx < _aeb .Llx {_aegd =_dg .Max (_aegd ,_cgce .Urx );}else if _aeb .Urx < _cgce .Llx {_ddefc =_dg .Min (_ddefc ,_cgce .Llx );};};for _fbfc ,_gdg :=range _bgbb {_dffd :=_gdg ._aaef ;if _fagb ==_fbfc ||_dffd .Ury > _aeb .Lly {continue ;};if _aegd <=_dffd .Llx &&_dffd .Llx < _aeb .Llx {_aeb .Llx =_dffd .Llx ;}else if _dffd .Urx <=_ddefc &&_aeb .Urx < _dffd .Urx {_aeb .Urx =_dffd .Urx ;};};if _feca {_bcb .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_fagb ,_fabge ._aaef ,_aeb ,_gefbc (_fabge .text (),50));};_fabge ._aaef =_aeb ;};if _feec {for _ ,_ebee :=range _bgbb {_ebee .PdfRectangle =_ebee ._aaef ;};};};func (_dcab *wordBag )pullWord (_adfa *wordBag ,_bede *textWord ,_dcecc int ){_dcab .PdfRectangle =_bfcg (_dcab .PdfRectangle ,_bede .PdfRectangle );if _bede ._fegfd > _dcab ._fde {_dcab ._fde =_bede ._fegfd ;};_dcab ._dgaf [_dcecc ]=append (_dcab ._dgaf [_dcecc ],_bede );_adfa .removeWord (_bede ,_dcecc );};
|
||
|
||
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
|
||
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
|
||
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
|
||
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
|
||
type RenderMode int ;func (_gbfe *shapesState )cubicTo (_bafe ,_gcd ,_ebd ,_cfe ,_cabf ,_dgd float64 ){_gbfe .addPoint (_cabf ,_dgd );};func (_gafc paraList )addNeighbours (){_ceegg :=func (_befd []int ,_efgcb *textPara )([]*textPara ,[]*textPara ){_ddbg :=make ([]*textPara ,0,len (_befd )-1);_ccdfc :=make ([]*textPara ,0,len (_befd )-1);for _ ,_ggga :=range _befd {_daaf :=_gafc [_ggga ];if _daaf .Urx <=_efgcb .Llx {_ddbg =append (_ddbg ,_daaf );}else if _daaf .Llx >=_efgcb .Urx {_ccdfc =append (_ccdfc ,_daaf );};};return _ddbg ,_ccdfc ;};_eabf :=func (_cfgba []int ,_faecc *textPara )([]*textPara ,[]*textPara ){_bebfg :=make ([]*textPara ,0,len (_cfgba )-1);_dcbaf :=make ([]*textPara ,0,len (_cfgba )-1);for _ ,_cbgc :=range _cfgba {_cbead :=_gafc [_cbgc ];if _cbead .Ury <=_faecc .Lly {_dcbaf =append (_dcbaf ,_cbead );}else if _cbead .Lly >=_faecc .Ury {_bebfg =append (_bebfg ,_cbead );};};return _bebfg ,_dcbaf ;};_ebag :=_gafc .yNeighbours (_egdc );for _ ,_ecgb :=range _gafc {_dgadd :=_ebag [_ecgb ];if len (_dgadd )==0{continue ;};_cbce ,_gcfbg :=_ceegg (_dgadd ,_ecgb );if len (_cbce )==0&&len (_gcfbg )==0{continue ;};if len (_cbce )> 0{_dgdbc :=_cbce [0];for _ ,_debc :=range _cbce [1:]{if _debc .Urx >=_dgdbc .Urx {_dgdbc =_debc ;};};for _ ,_aaag :=range _cbce {if _aaag !=_dgdbc &&_aaag .Urx > _dgdbc .Llx {_dgdbc =nil ;break ;};};if _dgdbc !=nil &&_ggfea (_ecgb .PdfRectangle ,_dgdbc .PdfRectangle ){_ecgb ._fged =_dgdbc ;};};if len (_gcfbg )> 0{_gdgf :=_gcfbg [0];for _ ,_faaf :=range _gcfbg [1:]{if _faaf .Llx <=_gdgf .Llx {_gdgf =_faaf ;};};for _ ,_dccg :=range _gcfbg {if _dccg !=_gdgf &&_dccg .Llx < _gdgf .Urx {_gdgf =nil ;break ;};};if _gdgf !=nil &&_ggfea (_ecgb .PdfRectangle ,_gdgf .PdfRectangle ){_ecgb ._fcbf =_gdgf ;};};};_ebag =_gafc .xNeighbours (_gda );for _ ,_adgef :=range _gafc {_abba :=_ebag [_adgef ];if len (_abba )==0{continue ;};_edaa ,_agba :=_eabf (_abba ,_adgef );if len (_edaa )==0&&len (_agba )==0{continue ;};if len (_agba )> 0{_ddefb :=_agba [0];for _ ,_bdec :=range _agba [1:]{if _bdec .Ury >=_ddefb .Ury {_ddefb =_bdec ;};};for _ ,_dbgfd :=range _agba {if _dbgfd !=_ddefb &&_dbgfd .Ury > _ddefb .Lly {_ddefb =nil ;break ;};};if _ddefb !=nil &&_dcfb (_adgef .PdfRectangle ,_ddefb .PdfRectangle ){_adgef ._gcfc =_ddefb ;};};if len (_edaa )> 0{_eadf :=_edaa [0];for _ ,_ecfad :=range _edaa [1:]{if _ecfad .Lly <=_eadf .Lly {_eadf =_ecfad ;};};for _ ,_aaaf :=range _edaa {if _aaaf !=_eadf &&_aaaf .Lly < _eadf .Ury {_eadf =nil ;break ;};};if _eadf !=nil &&_dcfb (_adgef .PdfRectangle ,_eadf .PdfRectangle ){_adgef ._fcba =_eadf ;};};};for _ ,_cgcec :=range _gafc {if _cgcec ._fged !=nil &&_cgcec ._fged ._fcbf !=_cgcec {_cgcec ._fged =nil ;};if _cgcec ._fcba !=nil &&_cgcec ._fcba ._gcfc !=_cgcec {_cgcec ._fcba =nil ;};if _cgcec ._fcbf !=nil &&_cgcec ._fcbf ._fged !=_cgcec {_cgcec ._fcbf =nil ;};if _cgcec ._gcfc !=nil &&_cgcec ._gcfc ._fcba !=_cgcec {_cgcec ._gcfc =nil ;};};};func (_bggf *wordBag )absorb (_ffda *wordBag ){for _bfcb ,_bcde :=range _ffda ._dgaf {for _ ,_gaf :=range _bcde {_bggf .pullWord (_ffda ,_gaf ,_bfcb );};};};func (_aded *wordBag )sort (){for _ ,_dba :=range _aded ._dgaf {_de .Slice (_dba ,func (_dgb ,_bfed int )bool {return _fad (_dba [_dgb ],_dba [_bfed ])< 0});};};func (_fbg *wordBag )maxDepth ()float64 {return _fbg ._ddfe -_fbg .Lly };func _fafcc (_eefea *PageText )error {_ccfee :=_cc .GetLicenseKey ();if _ccfee !=nil &&_ccfee .IsLicensed ()||_cd {return nil ;};_bcb .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");_bcb .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");return _d .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};type intSet map[int ]struct{};func _fega (_abge float64 ,_febce int )int {if _febce ==0{_febce =1;};_ace :=float64 (_febce );return int (_dg .Round (_abge /_ace )*_ace );};var _bccfg =_a .MustCompile ("\u005c\u0064\u002b\u005c\u002e\u003f");func (_egb *wordBag )empty (_bffg int )bool {_ ,_bgcc :=_egb ._dgaf [_bffg ];return !_bgcc };func (_fbdf *textMark )inDiacriticArea (_fgag *textMark )bool {_abde :=_fbdf .Llx -_fgag .Llx ;_gdec :=_fbdf .Urx -_fgag .Urx ;_caaf :=_fbdf .Lly -_fgag .Lly ;return _dg .Abs (_abde +_gdec )< _fbdf .Width ()*_egaeg &&_dg .Abs (_caaf )< _fbdf .Height ()*_egaeg ;};func _dacd (_fbbc []TextMark ,_ddcga *int )[]TextMark {_fgfcg :=_fbbc [len (_fbbc )-1];_daa :=[]rune (_fgfcg .Text );if len (_daa )==1{_fbbc =_fbbc [:len (_fbbc )-1];_bafef :=_fbbc [len (_fbbc )-1];*_ddcga =_bafef .Offset +len (_bafef .Text );}else {_dcgf :=_dgdg (_fgfcg .Text );*_ddcga +=len (_dcgf )-len (_fgfcg .Text );_fgfcg .Text =_dcgf ;};return _fbbc ;};func (_eag *textObject )nextLine (){_eag .moveLP (0,-_eag ._geag ._ebce )};var _cd =false ;
|
||
|
||
// TextMarkArray is a collection of TextMarks.
|
||
type TextMarkArray struct{_faf []TextMark };
|
||
|
||
// Append appends `mark` to the mark array.
|
||
func (_aggc *TextMarkArray )Append (mark TextMark ){_aggc ._faf =append (_aggc ._faf ,mark )};const _dffe =1.0/1000.0;func _dbad (_dfcac []*textMark ,_acfe _ab .PdfRectangle ,_bgdf []rulingList )paraList {_f .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_dfcac ),_acfe );if len (_dfcac )==0{return nil ;};_dgdd :=_fedg (_dfcac ,_acfe );if len (_dgdd )==0{return nil ;};_eafbg :=_cbgg (_dgdd ,_acfe .Ury );_feffb :=_fgfab (_eafbg ,_acfe .Ury );_feffb =_baab (_feffb );_bbca :=make (paraList ,0,len (_feffb ));for _ ,_aeec :=range _feffb {_gede :=_aeec .arrangeText ();if _gede !=nil {_bbca =append (_bbca ,_gede );};};if len (_bbca )>=_edbfg {_bbca =_bbca .extractTables (_bgdf );};_bbca .sortReadingOrder ();_bbca .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _bbca ;};func (_gcca paraList )findTableGrid (_gdbc rulingList )*textTable {_aafgc ,_dfg ,_adedc :=_gdbc .cells ();_fdgff :=textTable {_agea :_aafgc ,_bfcd :_dfg ,_gdbdc :true ,_beab :make (map[uint64 ]*textPara )};for _gfbb :=0;_gfbb < _dfg ;_gfbb ++{for _gfdf :=0;_gfdf < _aafgc ;_gfdf ++{_ceeg :=_adedc [_gfbb *_aafgc +_gfdf ];_bccc :=_gcca .inRect (_ceeg );if _bccc !=nil {_fdgff .put (_gfdf ,_dfg -1-_gfbb ,_bccc );}else {return nil ;};};};return &_fdgff ;};func _eeda (_efcb _ab .PdfRectangle ,_dcba []*textLine )*textPara {return &textPara {PdfRectangle :_efcb ,_fafb :_dcba };};func _gbcg (_cbgee float64 )float64 {return _adeb *_dg .Round (_cbgee /_adeb )};func _fadg (_bgadf ,_edfa *textPara )bool {return _eaga (_bgadf .depth ()-_edfa .depth ())};
|
||
|
||
// String returns a human readable description of `ss`.
|
||
func (_aafb *shapesState )String ()string {return _bcb .Sprintf ("%\u0064 \u0073\u0075\u0062\u0070\u0061\u0074\u0068\u0073 \u0066\u0072\u0065\u0073h=\u0025\u0074",len (_aafb ._fcce ),_aafb ._dgcd );};type rulingList []*ruling ;func (_dae *textObject )getFontDirect (_bccg string )(*_ab .PdfFont ,error ){_aag ,_bdgc :=_dae .getFontDict (_bccg );if _bdgc !=nil {return nil ,_bdgc ;};_ggbf ,_bdgc :=_ab .NewPdfFontFromPdfObject (_aag );if _bdgc !=nil {_f .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bccg ,_bdgc );};return _ggbf ,_bdgc ;};func (_cgc *textObject )setFont (_bea string ,_eab float64 )error {if _cgc ==nil {return nil ;};_cgc ._geag ._cdb =_eab ;_cbc ,_aef :=_cgc .getFont (_bea );if _aef !=nil {return _aef ;};_cgc ._geag ._bdff =_cbc ;if _cgc ._fce .empty (){_cgc ._fce .push (_cgc ._geag );}else {_cgc ._fce .top ()._bdff =_cgc ._geag ._bdff ;};return nil ;};func (_dadbg *wordBag )arrangeText ()*textPara {_dadbg .sort ();if _bcge {_dadbg .removeDuplicates ();};var _cdbg []*textLine ;for _ ,_bdfb :=range _dadbg .depthIndexes (){for !_dadbg .empty (_bdfb ){_beeb :=_dadbg .firstReadingIndex (_bdfb );_agaa :=_dadbg .firstWord (_beeb );_cadb :=_afef (_dadbg ,_beeb );_fgc :=_agaa ._fegfd ;_fbda :=_agaa ._dbed -_fcef *_fgc ;_gfgg :=_agaa ._dbed +_fcef *_fgc ;_bbegde :=_egge *_fgc ;_caeaa :=_cgd *_fgc ;_dbcae :for {var _dgfg *textWord ;_bbbdc :=0;for _ ,_edag :=range _dadbg .depthBand (_fbda ,_gfgg ){_eafa :=_dadbg .highestWord (_edag ,_fbda ,_gfgg );if _eafa ==nil {continue ;};_edab :=_gca (_eafa ,_cadb ._cgab [len (_cadb ._cgab )-1]);if _edab < -_caeaa {break _dbcae ;};if _edab > _bbegde {continue ;};if _dgfg !=nil &&_fad (_eafa ,_dgfg )>=0{continue ;};_dgfg =_eafa ;_bbbdc =_edag ;};if _dgfg ==nil {break ;};_cadb .pullWord (_dadbg ,_dgfg ,_bbbdc );};_cadb .markWordBoundaries ();_cdbg =append (_cdbg ,_cadb );};};if len (_cdbg )==0{return nil ;};_de .Slice (_cdbg ,func (_abgf ,_ffaf int )bool {return _addb (_cdbg [_abgf ],_cdbg [_ffaf ])< 0});_fffb :=_eeda (_dadbg .PdfRectangle ,_cdbg );if _dfbg {_f .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_fffb .String ());if _bdgb {for _ddgb ,_eacd :=range _fffb ._fafb {_bcb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ddgb ,_eacd .String ());if _gcab {for _bbgb ,_fegab :=range _eacd ._cgab {_bcb .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_bbgb ,_fegab .String ());for _adcg ,_ecda :=range _fegab ._bcce {_bcb .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_adcg ,_ecda .String ());};};};};};};return _fffb ;};func (_abcdb *textObject )getFontDict (_bccb string )(_aefa _ea .PdfObject ,_gfd error ){_gbbb :=_abcdb ._ffg ;if _gbbb ==nil {_f .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_bccb );return nil ,nil ;};_aefa ,_ceef :=_gbbb .GetFontByName (_ea .PdfObjectName (_bccb ));if !_ceef {_f .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_bccb );return nil ,_d .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _aefa ,nil ;};func _bedfc (_fcee ,_gbef float64 )bool {return _fcee /_dg .Max (1.0,_gbef )< _fdc };type textObject struct{_beb *Extractor ;_ffg *_ab .PdfPageResources ;_ffb _bdb .GraphicsState ;_geag *textState ;_fce *stateStack ;_bfe _dd .Matrix ;_eddf _dd .Matrix ;_gedc []*textMark ;_cce bool ;};func (_efge rulingList )connections (_fcaff map[int ]intSet ,_acfd int )intSet {_geaf :=make (intSet );_dead :=make (intSet );var _gdab func (int );_gdab =func (_dcag int ){if !_dead .has (_dcag ){_dead .add (_dcag );for _bgffd :=range _efge {if _fcaff [_bgffd ].has (_dcag ){_geaf .add (_bgffd );};};for _ddcdg :=range _efge {if _geaf .has (_ddcdg ){_gdab (_ddcdg );};};};};_gdab (_acfd );return _geaf ;};func _cgfb (_fbee ,_fdee _ab .PdfRectangle )bool {return _dcfb (_fbee ,_fdee )&&_ggfea (_fbee ,_fdee )};func (_gdeg paraList )tables ()[]TextTable {var _ebgd []TextTable ;for _ ,_adbc :=range _gdeg {_gbbba :=_adbc ._egbc ;if _gbbba !=nil &&_gbbba .isExportable (){_ebgd =append (_ebgd ,_gbbba .toTextTable ());};};return _ebgd ;};func (_gbag *ruling )equals (_aafc *ruling )bool {return _gbag ._aefag ==_aafc ._aefag &&_gbag ._deeda ==_aafc ._deeda &&_gbag ._efac ==_aafc ._efac &&_gbag ._bgae ==_aafc ._bgae ;};var _gddb =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_ca .White ,StrokeColor :_ca .White };func (_afea *wordBag )depthRange (_dgae ,_fdgg int )[]int {_dafb :=_afea .depthIndexes ();var _efgca []int ;for _ ,_fefb :=range _dafb {if _dgae <=_fefb &&_fefb <=_fdgg {_efgca =append (_efgca ,_fefb );};};return _efgca ;};func (_fdfe *textWord )toTextMarks (_dbadf *int )[]TextMark {var _gfca []TextMark ;for _ ,_dcdf :=range _fdfe ._bcce {_gfca =_eea (_gfca ,_dbadf ,_dcdf .ToTextMark ());};return _gfca ;};func (_gff *shapesState )lastpointEstablished ()(_dd .Point ,bool ){if _gff ._dgcd {return _gff ._cdc ,false ;};_cde :=len (_gff ._fcce );if _cde > 0&&_gff ._fcce [_cde -1]._dcfd {return _gff ._fcce [_cde -1].last (),false ;};return _dd .Point {},true ;};func _fad (_deca ,_feeg bounded )float64 {return _deca .bbox ().Llx -_feeg .bbox ().Llx };func (_dffg rulingList )aligned ()bool {if len (_dffg )< 2{return false ;};_fdebe :=_dffg [0];for _ ,_cffa :=range _dffg [1:]{if !(_eaga (_cffa ._efac -_fdebe ._efac )&&_eaga (_cffa ._bgae -_fdebe ._bgae )){return false ;};};return true ;};func (_acbag paraList )inRect (_cfcf _ab .PdfRectangle )*textPara {var _fcge paraList ;for _ ,_decde :=range _acbag {_fegeb :=_decde .PdfRectangle ;_fegeb .Lly +=_cacd ;_fegeb .Ury -=_cacd ;if _dgdb (_cfcf ,_fegeb ){_fcge =append (_fcge ,_decde );};};if len (_fcge )!=1{return nil ;};return _fcge [0];};func _dgcdf (_cgeae float64 )bool {return _dg .Abs (_cgeae )< _fege };type ruling struct{_aefag rulingKind ;_deeda float64 ;_efac float64 ;_bgae float64 ;};func _egfge (_dbae string )bool {for _ ,_gcdg :=range _dbae {if !_e .IsSpace (_gcdg ){return false ;};};return true ;};func (_ddeee *textPara )toTextMarks (_abgb *int )[]TextMark {if _ddeee ._egbc ==nil {return _ddeee .toCellTextMarks (_abgb );};var _gggg []TextMark ;for _gfdg :=0;_gfdg < _ddeee ._egbc ._bfcd ;_gfdg ++{for _edgd :=0;_edgd < _ddeee ._egbc ._agea ;_edgd ++{_baee :=_ddeee ._egbc .get (_edgd ,_gfdg );if _baee ==nil {_gggg =_aaab (_gggg ,_abgb ,"\u0009");}else {_ebdfb :=_baee .toCellTextMarks (_abgb );_gggg =append (_gggg ,_ebdfb ...);};_gggg =_aaab (_gggg ,_abgb ,"\u0020");};if _gfdg < _ddeee ._egbc ._bfcd -1{_gggg =_aaab (_gggg ,_abgb ,"\u000a");};};return _gggg ;};func (_ggbd *shapesState )stroke (_ddcd *[]*subpath ){*_ddcd =append (*_ddcd ,_ggbd ._fcce ...);if _bfdg {_f .Log .Info ("\u0053T\u0052\u004f\u004b\u0045\u003a\u0020\u0025\u0064\u0020\u0073\u0074r\u006f\u006b\u0065\u0073\u0020\u0073\u0073\u003d\u0025\u0073",len (*_ddcd ),_ggbd );for _ffce ,_dbggd :=range _ggbd ._fcce {_bcb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ffce ,_dbggd );if _ffce ==10{break ;};};};};func (_gedf *wordBag )minDepth ()float64 {return _gedf ._ddfe -(_gedf .Ury -_gedf ._fde )};type imageExtractContext struct{_dc []ImageMark ;_ga int ;_ag int ;_fe int ;_ge map[*_ea .PdfObjectStream ]*cachedImage ;_gf *ImageExtractOptions ;};func (_bcfa paraList )llyOrdering ()[]int {_beda :=make ([]int ,len (_bcfa ));for _cfaef :=range _bcfa {_beda [_cfaef ]=_cfaef ;};_de .SliceStable (_beda ,func (_fegd ,_ddcdf int )bool {_aeaab ,_faee :=_beda [_fegd ],_beda [_ddcdf ];return _bcfa [_aeaab ].Lly < _bcfa [_faee ].Lly ;});return _beda ;};func (_baea paraList )writeText (_cdcaf _bd .Writer ){for _bddd ,_bbegd :=range _baea {_bbegd .writeText (_cdcaf );if _bddd !=len (_baea )-1{if _fadg (_bbegd ,_baea [_bddd +1]){_cdcaf .Write ([]byte ("\u0020"));}else {_cdcaf .Write ([]byte ("\u000a"));_cdcaf .Write ([]byte ("\u000a"));};};};_cdcaf .Write ([]byte ("\u000a"));_cdcaf .Write ([]byte ("\u000a"));};func _agdf (_beae ,_acagc float64 )string {_cegb :=!_eaga (_beae -_acagc );if _cegb {return "\u000a";};return "\u0020";};const (_feca =false ;_eafg =false ;_ddb =false ;_facf =false ;_fcdac =false ;_ccbf =false ;_cfagg =false ;_dfbg =false ;_bdgb =_dfbg &&true ;_gcab =_bdgb &&false ;_gbed =_dfbg &&true ;_gfeee =false ;_cfae =_gfeee ||false ;_bfdg =false ;);func _bfef (_fdaa []*textMark ,_aacec _ab .PdfRectangle )*textWord {_baag :=_fdaa [0].PdfRectangle ;_dbea :=_fdaa [0]._ecbb ;for _ ,_ffeb :=range _fdaa [1:]{_baag =_bfcg (_baag ,_ffeb .PdfRectangle );if _ffeb ._ecbb > _dbea {_dbea =_ffeb ._ecbb ;};};return &textWord {PdfRectangle :_baag ,_bcce :_fdaa ,_dbed :_aacec .Ury -_baag .Lly ,_fegfd :_dbea };};func _bccbg (_gee ,_cdddf *textPara )bool {return _dcfb (_gee ._aaef ,_cdddf ._aaef )};type wordBag struct{_ab .PdfRectangle ;_fde float64 ;_ddfe float64 ;_dgaf map[int ][]*textWord ;};const _bdc =10;func (_ebe *shapesState )establishSubpath ()*subpath {_gbg ,_effg :=_ebe .lastpointEstablished ();if !_effg {_ebe ._fcce =append (_ebe ._fcce ,_bfee (_gbg ));};if len (_ebe ._fcce )==0{return nil ;};_ebe ._dgcd =false ;return _ebe ._fcce [len (_ebe ._fcce )-1];};func _gefbc (_eceeeb string ,_affd int )string {if len (_eceeeb )< _affd {return _eceeeb ;};return _eceeeb [:_affd ];};
|
||
|
||
// Elements returns the TextMarks in `ma`.
|
||
func (_bcbf *TextMarkArray )Elements ()[]TextMark {return _bcbf ._faf };type lineRuling struct{_ddeb rulingKind ;_ddbb ,_fbgf _dd .Point ;};func (_ggef paraList )findTables (_cegbc []rulingList )[]*textTable {_ggef .addNeighbours ();_de .Slice (_ggef ,func (_fbgfb ,_cedf int )bool {return _caba (_ggef [_fbgfb ],_ggef [_cedf ])< 0});var _afcc []*textTable ;if _bdeed {_dcgg :=_ggef .findGridTables (_cegbc );_afcc =append (_afcc ,_dcgg ...);};if _fdda {_eabc :=_ggef .findTextTables ();_afcc =append (_afcc ,_eabc ...);};return _afcc ;};
|
||
|
||
// String returns a string describing `pt`.
|
||
func (_defc PageText )String ()string {_degfa :=_bcb .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_defc ._cdg ));_afb :=[]string {"\u002d"+_degfa };for _ ,_ecee :=range _defc ._cdg {_afb =append (_afb ,_ecee .String ());};_afb =append (_afb ,"\u002b"+_degfa );return _bc .Join (_afb ,"\u000a");};func (_cbeb *textTable )getDown ()paraList {_bdea :=make (paraList ,_cbeb ._agea );for _gbce :=0;_gbce < _cbeb ._agea ;_gbce ++{_feeb :=_cbeb .get (_gbce ,_cbeb ._bfcd -1)._gcfc ;if _feeb ==nil ||_feeb ._ffde {return nil ;};_bdea [_gbce ]=_feeb ;};for _baga :=0;_baga < _cbeb ._agea -1;_baga ++{if _bdea [_baga ]._fcbf !=_bdea [_baga +1]{return nil ;};};return _bdea ;};type fontEntry struct{_edaf *_ab .PdfFont ;_ggdb int64 ;};func (_cfag *textObject )getStrokeColor ()_ca .Color {return _gbagg (_cfag ._ffb .ColorspaceStroking ,_cfag ._ffb .ColorStroking );};func (_aafbd *textTable )get (_adfcf ,_fbea int )*textPara {return _aafbd ._beab [_fbaf (_adfcf ,_fbea )]};type bounded interface{bbox ()_ab .PdfRectangle };
|
||
|
||
// PageImages represents extracted images on a PDF page with spatial information:
|
||
// display position and size.
|
||
type PageImages struct{Images []ImageMark ;};
|
||
|
||
// Tables returns the tables extracted from the page.
|
||
func (_ddc PageText )Tables ()[]TextTable {return _ddc ._cbb };func (_cadbb *textTable )toTextTable ()TextTable {if _gfeee {_f .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_cadbb ._agea ,_cadbb ._bfcd );};_fffd :=make ([][]TableCell ,_cadbb ._bfcd );for _eege :=0;_eege < _cadbb ._bfcd ;_eege ++{_fffd [_eege ]=make ([]TableCell ,_cadbb ._agea );for _gcef :=0;_gcef < _cadbb ._agea ;_gcef ++{_cfbea :=_cadbb .get (_gcef ,_eege );if _gfeee {_bcb .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_gcef ,_eege ,_cfbea );};if _cfbea ==nil {continue ;};_fffd [_eege ][_gcef ].Text =_cfbea .text ();_ebdbd :=0;_fffd [_eege ][_gcef ].Marks ._faf =_cfbea .toTextMarks (&_ebdbd );};};return TextTable {W :_cadbb ._agea ,H :_cadbb ._bfcd ,Cells :_fffd };};type textLine struct{_ab .PdfRectangle ;_gcfb float64 ;_cgab []*textWord ;_ddfd float64 ;};func (_cfed *textTable )getRight ()paraList {_ddfec :=make (paraList ,_cfed ._bfcd );for _gabb :=0;_gabb < _cfed ._bfcd ;_gabb ++{_cggcb :=_cfed .get (_cfed ._agea -1,_gabb )._fcbf ;if _cggcb ==nil ||_cggcb ._ffde {return nil ;};_ddfec [_gabb ]=_cggcb ;};for _geg :=0;_geg < _cfed ._bfcd -1;_geg ++{if _ddfec [_geg ]._gcfc !=_ddfec [_geg +1]{return nil ;};};return _ddfec ;};
|
||
|
||
// PageText represents the layout of text on a device page.
|
||
type PageText struct{_cdg []*textMark ;_dadb string ;_bbg []TextMark ;_cbb []TextTable ;_fae _ab .PdfRectangle ;_edf []*subpath ;_dafc []*subpath ;};func (_cadc *textTable )log (_gaebc string ){if !_gfeee {return ;};_f .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_gaebc ,_cadc ._agea ,_cadc ._bfcd ,_cadc ._gdbdc ,_cadc .PdfRectangle );for _gega :=0;_gega < _cadc ._bfcd ;_gega ++{for _acdb :=0;_acdb < _cadc ._agea ;_acdb ++{_gaca :=_cadc .get (_acdb ,_gega );_bcb .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_acdb ,_gega ,_gaca .PdfRectangle ,_gefbc (_gaca .text (),50),_c .RuneCountInString (_gaca .text ()));};};};func (_cggc *textObject )newTextMark (_eaab string ,_defd _dd .Matrix ,_dfca _dd .Point ,_beff float64 ,_bdgbf *_ab .PdfFont ,_dgfa float64 ,_caf ,_feagf _ca .Color )(textMark ,bool ){_edgad :=_defd .Angle ();_agee :=_fega (_edgad ,_gbdf );var _gcec float64 ;if _agee %180!=90{_gcec =_defd .ScalingFactorY ();}else {_gcec =_defd .ScalingFactorX ();};_dabg :=_bgea (_defd );_gddc :=_ab .PdfRectangle {Llx :_dabg .X ,Lly :_dabg .Y ,Urx :_dfca .X ,Ury :_dfca .Y };switch _agee %360{case 90:_gddc .Urx -=_gcec ;case 180:_gddc .Ury -=_gcec ;case 270:_gddc .Urx +=_gcec ;case 0:_gddc .Ury +=_gcec ;default:_agee =0;_gddc .Ury +=_gcec ;};if _gddc .Llx > _gddc .Urx {_gddc .Llx ,_gddc .Urx =_gddc .Urx ,_gddc .Llx ;};if _gddc .Lly > _gddc .Ury {_gddc .Lly ,_gddc .Ury =_gddc .Ury ,_gddc .Lly ;};_acg ,_dgcg :=_cgb (_gddc ,_cggc ._beb ._fcb );if !_dgcg {_f .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_gddc ,_cggc ._beb ._fcb ,_eaab );};_gddc =_acg ;_ecgg :=_gddc ;_fabd :=_cggc ._beb ._fcb ;switch _agee %360{case 90:_fabd .Urx ,_fabd .Ury =_fabd .Ury ,_fabd .Urx ;_ecgg =_ab .PdfRectangle {Llx :_fabd .Urx -_gddc .Ury ,Urx :_fabd .Urx -_gddc .Lly ,Lly :_gddc .Llx ,Ury :_gddc .Urx };case 180:_ecgg =_ab .PdfRectangle {Llx :_fabd .Urx -_gddc .Llx ,Urx :_fabd .Urx -_gddc .Urx ,Lly :_fabd .Ury -_gddc .Lly ,Ury :_fabd .Ury -_gddc .Ury };case 270:_fabd .Urx ,_fabd .Ury =_fabd .Ury ,_fabd .Urx ;_ecgg =_ab .PdfRectangle {Llx :_gddc .Ury ,Urx :_gddc .Lly ,Lly :_fabd .Ury -_gddc .Llx ,Ury :_fabd .Ury -_gddc .Urx };};if _ecgg .Llx > _ecgg .Urx {_ecgg .Llx ,_ecgg .Urx =_ecgg .Urx ,_ecgg .Llx ;};if _ecgg .Lly > _ecgg .Ury {_ecgg .Lly ,_ecgg .Ury =_ecgg .Ury ,_ecgg .Lly ;};_afbb :=textMark {_efab :_eaab ,PdfRectangle :_ecgg ,_cgag :_gddc ,_fbfb :_bdgbf ,_ecbb :_gcec ,_dgcb :_dgfa ,_eegab :_defd ,_ddad :_dfca ,_eaeb :_agee ,_cacc :_caf ,_gaee :_feagf };if _ddb {_f .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_dabg ,_dfca ,_afbb .String ());};return _afbb ,_dgcg ;};type textPara struct{_ab .PdfRectangle ;_aaef _ab .PdfRectangle ;_fafb []*textLine ;_egbc *textTable ;_ffde bool ;_fged *textPara ;_fcbf *textPara ;_fcba *textPara ;_gcfc *textPara ;};func (_aaac *shapesState )addPoint (_faec ,_gcgf float64 ){_bbbd :=_aaac .establishSubpath ();_cefe :=_aaac .devicePoint (_faec ,_gcgf );if _bbbd ==nil {_aaac ._dgcd =true ;_aaac ._cdc =_cefe ;}else {_bbbd .add (_cefe );};};var (_af =_d .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072");_abf =_d .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072"););func _baab (_eefe []*wordBag )[]*wordBag {if len (_eefe )<=1{return _eefe ;};if _dfbg {_f .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");};_de .Slice (_eefe ,func (_dfe ,_bgcf int )bool {_fdgb ,_gad :=_eefe [_dfe ],_eefe [_bgcf ];_edbed :=_fdgb .Width ()*_fdgb .Height ();_ddff :=_gad .Width ()*_gad .Height ();if _edbed !=_ddff {return _edbed > _ddff ;};if _fdgb .Height ()!=_gad .Height (){return _fdgb .Height ()> _gad .Height ();};return _dfe < _bgcf ;});var _abce []*wordBag ;_ade :=make (intSet );for _dea :=0;_dea < len (_eefe );_dea ++{if _ade .has (_dea ){continue ;};_dece :=_eefe [_dea ];for _dcedg :=_dea +1;_dcedg < len (_eefe );_dcedg ++{if _ade .has (_dea ){continue ;};_fddd :=_eefe [_dcedg ];_ccgd :=_dece .PdfRectangle ;_ccgd .Llx -=_dece ._fde ;if _dgdb (_ccgd ,_fddd .PdfRectangle ){if _dfbg {_bcb .Printf ("\u0009\u0020\u0025\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_fddd .PdfRectangle ,_gefbc (_fddd .text (),50));};_dece .absorb (_fddd );_ade .add (_dcedg );};};_abce =append (_abce ,_dece );};if len (_eefe )!=len (_abce )+len (_ade ){_f .Log .Error ("\u006d\u0065\u0072\u0067\u0065\u0057o\u0072\u0064\u0042\u0061\u0067\u0073\u003a\u0020\u0025\u0064\u002d\u003e\u0025d\u0020\u0061\u0062\u0073\u006f\u0072\u0062e\u0064\u003d\u0025\u0064",len (_eefe ),len (_abce ),len (_ade ));};return _abce ;};func _addb (_baada ,_bage bounded )float64 {_aeaf :=_cbad (_baada ,_bage );if !_eaga (_aeaf ){return _aeaf ;};return _fad (_baada ,_bage );};func (_fdgfe *ruling )intersects (_cbac *ruling )bool {_fdebf :=(_fdgfe ._aefag ==_geb &&_cbac ._aefag ==_ffad )||(_cbac ._aefag ==_geb &&_fdgfe ._aefag ==_ffad );_cbgga :=func (_agbb ,_gagc *ruling )bool {return _agbb ._efac <=_gagc ._deeda +_deec &&_gagc ._deeda -_deec <=_agbb ._bgae ;};_egac :=_cbgga (_fdgfe ,_cbac );_bbgc :=_cbgga (_cbac ,_fdgfe );if _bfdg {_f .Log .Info ("\u0069\u006e\u0074\u0065r\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006ft\u0068\u006f\u0067\u006f\u006e\u0061\u006c\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u002d>\u0020\u0025\u0074\u000a\u0009\u0020\u0076\u003d\u0025\u0073\u000a\u0009\u0076\u0032\u003d\u0025\u0073",_fdebf ,_egac ,_bbgc ,_fdebf &&_egac &&_bbgc ,_fdgfe ,_cbac );};return _fdebf &&_egac &&_bbgc ;};func (_ggcf *textPara )fontsize ()float64 {return _ggcf ._fafb [0]._ddfd };func (_eef *textObject )setCharSpacing (_ccb float64 ){if _eef ==nil {return ;};_eef ._geag ._bge =_ccb ;if _fcdac {_f .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_ccb ,_eef ._geag .String ());};};func (_dgec *wordBag )highestWord (_cfcg int ,_cfcb ,_cbd float64 )*textWord {for _ ,_fcbd :=range _dgec ._dgaf [_cfcg ]{if _cfcb <=_fcbd ._dbed &&_fcbd ._dbed <=_cbd {return _fcbd ;};};return nil ;};func _ffgb (_fcde []*subpath )[]rulingList {_beffb (_fcde );var _ecbd rulingList ;for _ ,_cdbgc :=range _fcde {if !_cdbgc .isQuadrilateral (){continue ;};if _daeec ,_acab :=_cdbgc .makeRectRuling ();_acab {_ecbd =append (_ecbd ,_daeec );};};_ecbd =_ecbd .tidied ("\u0066\u0069\u006cl\u0073");return _ecbd .toGrids ();};func (_eagg *textTable )put (_fddgc ,_fdbac int ,_ccgg *textPara ){_eagg ._beab [_fbaf (_fddgc ,_fdbac )]=_ccgg ;};var (_gcdb =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};);func (_gacc *wordBag )depthIndexes ()[]int {if len (_gacc ._dgaf )==0{return nil ;};_bbf :=make ([]int ,len (_gacc ._dgaf ));_dcfa :=0;for _ebdb :=range _gacc ._dgaf {_bbf [_dcfa ]=_ebdb ;_dcfa ++;};_de .Ints (_bbf );return _bbf ;};type event struct{_ffgg float64 ;_adaf bool ;_dcaa int ;};func (_bcaf *wordBag )depthBand (_bdee ,_bgaa float64 )[]int {if len (_bcaf ._dgaf )==0{return nil ;};return _bcaf .depthRange (_bcaf .getDepthIdx (_bdee ),_bcaf .getDepthIdx (_bgaa ));};func (_edg *textObject )setTextLeading (_fff float64 ){if _edg ==nil {return ;};_edg ._geag ._ebce =_fff ;};
|
||
|
||
// String returns a description of `state`.
|
||
func (_ffcg *textState )String ()string {_ggf :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _ffcg ._bdff !=nil {_ggf =_ffcg ._bdff .BaseFont ();};return _bcb .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_ffcg ._bge ,_ffcg ._eba ,_ffcg ._cdb ,_ggf );};const (_bbag rulingKind =iota ;_ffad ;_geb ;);
|
||
|
||
// String returns a description of `k`.
|
||
func (_cabac rulingKind )String ()string {_fafg ,_fcgd :=_gbfa [_cabac ];if !_fcgd {return _bcb .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_cabac );};return _fafg ;};func (_cbga intSet )has (_addcd int )bool {_ ,_ggeb :=_cbga [_addcd ];return _ggeb };func (_gaea rulingList )isActualGrid ()bool {_ebced ,_ccbfg :=_gaea .vertsHorzs ();if _bfdg {_f .Log .Info ("\u0069\u0073\u0041\u0063\u0074u\u0061\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u006e\u0075\u006d\u0056\u0065r\u0074\u003d\u0025\u0064\u0020\u006e\u0075\u006d\u0048\u006f\u0072\u007a\u003d\u0025\u0064\u0020\u003a\u0020\u0025\u0074\u0020\u0026\u0020\u0025\u0074\u0020\u002d\u003e\u0020\u0025t",len (_ebced ),len (_ccbfg ),len (_ebced )>=2,len (_ccbfg )>=2,len (_ebced )>=2&&len (_ccbfg )>=2);for _fafga ,_bgbbb :=range _gaea {_bcb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_fafga ,_bgbbb );};};if !(len (_ebced )>=_cff +1&&len (_ccbfg )>=_gafb +1){return false ;};if !(_ebced .aligned ()&&_ccbfg .aligned ()){return false ;};_bebg ,_eccb :=_ebced [0],_ebced [len (_ebced )-1];_aadf ,_dcae :=_ccbfg [0],_ccbfg [len (_ccbfg )-1];return _dgcdf (_bebg ._deeda -_aadf ._efac )&&_dgcdf (_eccb ._deeda -_aadf ._bgae )&&_dgcdf (_aadf ._deeda -_bebg ._bgae )&&_dgcdf (_dcae ._deeda -_bebg ._efac );};func _eeebd (_fbbcf []*textWord ,_dgac *textWord )[]*textWord {for _dgfag ,_ffac :=range _fbbcf {if _ffac ==_dgac {return _cdge (_fbbcf ,_dgfag );};};_f .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_dgac );return nil ;};func (_ffccd *textWord )addDiacritic (_babdb string ){_gdfef :=_ffccd ._bcce [len (_ffccd ._bcce )-1];_gdfef ._efab =_gdfef ._efab +_babdb ;_gdfef ._efab =_eg .NFKC .String (_gdfef ._efab );};func (_bec *imageExtractContext )extractInlineImage (_da *_bdb .ContentStreamInlineImage ,_baf _bdb .GraphicsState ,_cge *_ab .PdfPageResources )error {_ega ,_ada :=_da .ToImage (_cge );if _ada !=nil {return _ada ;};_bafc ,_ada :=_da .GetColorSpace (_cge );if _ada !=nil {return _ada ;};if _bafc ==nil {_bafc =_ab .NewPdfColorspaceDeviceGray ();};_aa ,_ada :=_bafc .ImageToRGB (*_ega );if _ada !=nil {return _ada ;};_feb :=ImageMark {Image :&_aa ,Width :_baf .CTM .ScalingFactorX (),Height :_baf .CTM .ScalingFactorY (),Angle :_baf .CTM .Angle ()};_feb .X ,_feb .Y =_baf .CTM .Translation ();_bec ._dc =append (_bec ._dc ,_feb );_bec ._ga ++;return nil ;};func (_ffcb *wordBag )scanBand (_agde string ,_bgg *wordBag ,_eceg func (_dab *wordBag ,_gdb *textWord )bool ,_ddgf ,_gace ,_afc float64 ,_cga ,_fafc bool )int {_dceee :=_bgg ._fde ;_egf :=_fcef *_dceee ;_beba :=0;_aaeb ,_ccdf :=_ddgf ,_gace ;var _bgga []*textWord ;for _ ,_gecg :=range _ffcb .depthBand (_ddgf -_egf ,_gace +_egf ){if len (_ffcb ._dgaf [_gecg ])==0{continue ;};for _ ,_defe :=range _ffcb ._dgaf [_gecg ]{if !(_ddgf -_egf <=_defe ._dbed &&_defe ._dbed <=_gace +_egf ){continue ;};if !_eceg (_bgg ,_defe ){continue ;};_cfad :=2.0*_dg .Abs (_defe ._fegfd -_bgg ._fde )/(_defe ._fegfd +_bgg ._fde );_fbacf :=_dg .Max (_defe ._fegfd /_bgg ._fde ,_bgg ._fde /_defe ._fegfd );_efc :=_dg .Min (_cfad ,_fbacf );if _afc > 0&&_efc > _afc {continue ;};if !_cga {_bgg .pullWord (_ffcb ,_defe ,_gecg );};_bgga =append (_bgga ,_defe );_beba ++;if !_fafc {if _defe ._dbed < _ddgf {_ddgf =_defe ._dbed ;};if _defe ._dbed > _gace {_gace =_defe ._dbed ;};};if _cga {break ;};};};if _ccbf &&len (_agde )> 0{_f .Log .Info ("\u0073\u0063a\u006e\u0042\u0061\u006e\u0064\u003a\u0020\u0025\u0073\u0020\u005b\u0025\u002e\u0032\u0066\u0020\u0025\u002e\u0032f\u005d\u2192\u005b\u0025\u002e\u0032f\u0020\u0025\u002e\u0032\u0066\u005d\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u002e2\u0066\u0020f\u006f\u006e\u0074\u0073i\u007a\u0065\u003d\u0025\u002e2\u0066\u000a\u0009\u0025\u0071",_agde ,_aaeb ,_ccdf ,_ddgf ,_gace ,_bgg .PdfRectangle ,_bgg ._fde ,_gefbc (_bgg .text (),100));for _cfbe ,_afcd :=range _bgga {_bcb .Printf ("\u0020 \u0020\u0020\u0025\u0073\u000a",_afcd );if _cfbe >=5{break ;};};};return _beba ;};func _abcgc (_ecfd func (*wordBag ,*textWord ,float64 )bool ,_ffcd float64 )func (*wordBag ,*textWord )bool {return func (_fgdcc *wordBag ,_eaef *textWord )bool {return _ecfd (_fgdcc ,_eaef ,_ffcd )};};
|
||
|
||
// TextTable represents a table.
|
||
// Cells are ordered top-to-bottom, left-to-right.
|
||
// Cells[y] is the (0-offset) y'th row in the table.
|
||
// Cells[y][x] is the (0-offset) x'th column in the table.
|
||
type TextTable struct{W ,H int ;Cells [][]TableCell ;};func _beffb (_addcc []*subpath ){if _adeb < 0.0{return ;};for _gdfc ,_gefb :=range _addcc {for _ddbe ,_bfba :=range _gefb ._ggdg {_gefb ._ggdg [_ddbe ]=_dd .Point {X :_gbcg (_bfba .X ),Y :_gbcg (_bfba .Y )};if _bfdg {_edef :=_gefb ._ggdg [_ddbe ];if !_cacfa (_bfba ,_edef ){_adge :=_dd .Point {X :_edef .X -_bfba .X ,Y :_edef .Y -_bfba .Y };_f .Log .Info (" \u0025\u0064\u0020\u002d\u0020\u0025d\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u002d\u003e \u0025\u002e\u0032f\u0020(\u0025\u0067\u0029",_gdfc ,_ddbe ,_bfba ,_edef ,_adge );};};};};};
|
||
|
||
// ToTextMark returns the public view of `tm`.
|
||
func (_defa *textMark )ToTextMark ()TextMark {return TextMark {Text :_defa ._efab ,Original :_defa ._gedgc ,BBox :_defa ._cgag ,Font :_defa ._fbfb ,FontSize :_defa ._ecbb ,FillColor :_defa ._cacc ,StrokeColor :_defa ._gaee };};func _agcef (_gdfe ,_effb _dd .Point )(*ruling ,bool ){_fgbb :=lineRuling {_ddbb :_gdfe ,_fbgf :_effb ,_ddeb :_bcdfe (_gdfe ,_effb )};if _fgbb ._ddeb ==_bbag {return nil ,false ;};return _fgbb .asRuling ();};const (_dgff =true ;_bcge =true ;_dgce =true ;_feec =false ;_edfg =false ;_bcafe =6;_gdc =3.0;_bdcf =200;_bdeed =true ;_fdda =true ;_dcca =true ;_adfc =true ;);
|
||
|
||
// ExtractPageImages returns the image contents of the page extractor, including data
|
||
// and position, size information for each image.
|
||
// A set of options to control page image extraction can be passed in. The options
|
||
// parameter can be nil for the default options. By default, inline stencil masks
|
||
// are not extracted.
|
||
func (_ded *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_cb :=&imageExtractContext {_gf :options };_eac :=_cb .extractContentStreamImages (_ded ._aba ,_ded ._be );if _eac !=nil {return nil ,_eac ;};return &PageImages {Images :_cb ._dc },nil ;};func (_eccgc *textWord )computeText ()string {_ggebc :=make ([]string ,len (_eccgc ._bcce ));for _becg ,_ebbc :=range _eccgc ._bcce {_ggebc [_becg ]=_ebbc ._efab ;};return _bc .Join (_ggebc ,"");};func (_eee *shapesState )devicePoint (_gfa ,_age float64 )_dd .Point {_gbgg :=_eee ._efec .Mult (_eee ._eae );_gfa ,_age =_gbgg .Transform (_gfa ,_age );return _dd .NewPoint (_gfa ,_age );};func (_abbe *stateStack )pop ()*textState {if _abbe .empty (){return nil ;};_fcd :=*(*_abbe )[len (*_abbe )-1];*_abbe =(*_abbe )[:len (*_abbe )-1];return &_fcd ;};type subpath struct{_ggdg []_dd .Point ;_dcfd bool ;};func (_edgbb *shapesState )moveTo (_caae ,_bdfc float64 ){_edgbb ._dgcd =true ;_edgbb ._cdc =_edgbb .devicePoint (_caae ,_bdfc );if _facf {_f .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0063\u0075\u0072\u0072\u0065\u006e\u0074\u003d%.\u0032\u0066",_caae ,_bdfc ,_edgbb ._cdc );};};func (_bdefd paraList )yNeighbours (_gcfbd float64 )map[*textPara ][]int {_defdb :=make ([]event ,2*len (_bdefd ));if _gcfbd ==0{for _adff ,_beef :=range _bdefd {_defdb [2*_adff ]=event {_beef .Lly ,true ,_adff };_defdb [2*_adff +1]=event {_beef .Ury ,false ,_adff };};}else {for _cfbc ,_ebae :=range _bdefd {_defdb [2*_cfbc ]=event {_ebae .Lly -_gcfbd *_ebae .fontsize (),true ,_cfbc };_defdb [2*_cfbc +1]=event {_ebae .Ury +_gcfbd *_ebae .fontsize (),false ,_cfbc };};};return _bdefd .eventNeighbours (_defdb );};func (_bggc *textTable )growTable (){_bfbf :=func (_febe paraList ){_bggc ._bfcd ++;for _gdbe :=0;_gdbe < _bggc ._agea ;_gdbe ++{_cfde :=_febe [_gdbe ];_bggc .put (_gdbe ,_bggc ._bfcd -1,_cfde );};};_egab :=func (_bfccg paraList ){_bggc ._agea ++;for _ecbfa :=0;_ecbfa < _bggc ._bfcd ;_ecbfa ++{_gfgdb :=_bfccg [_ecbfa ];_bggc .put (_bggc ._agea -1,_ecbfa ,_gfgdb );};};for {_efba :=false ;_gaga :=_bggc .getDown ();_fdcb :=_bggc .getRight ();if _gaga !=nil &&_fdcb !=nil {_baca :=_gaga [len (_gaga )-1];if _baca !=nil &&!_baca ._ffde &&_baca ==_fdcb [len (_fdcb )-1]{_bfbf (_gaga );if _fdcb =_bggc .getRight ();_fdcb !=nil {_egab (_fdcb );_bggc .put (_bggc ._agea -1,_bggc ._bfcd -1,_baca );};_efba =true ;};};if !_efba &&_gaga !=nil {_bfbf (_gaga );_efba =true ;};if !_efba &&_fdcb !=nil {_egab (_fdcb );_efba =true ;};if !_efba {break ;};};};func (_ebg *textObject )moveLP (_becd ,_fca float64 ){_ebg ._eddf .Concat (_dd .NewMatrix (1,0,0,1,_becd ,_fca ));_ebg ._bfe =_ebg ._eddf ;};type cachedImage struct{_fg *_ab .Image ;_egg _ab .PdfColorspace ;};func _gfbd (_ageb ,_aaefg int )int {if _ageb < _aaefg {return _ageb ;};return _aaefg ;};func _ggafe (_gaed ,_ecege _dd .Point )bool {_bafb :=_dg .Abs (_gaed .X -_ecege .X );_abgfa :=_dg .Abs (_gaed .Y -_ecege .Y );return _bedfc (_bafb ,_abgfa );};
|
||
|
||
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
|
||
func (_defcd *TextMarkArray )BBox ()(_ab .PdfRectangle ,bool ){var _babdf _ab .PdfRectangle ;_fea :=false ;for _ ,_gde :=range _defcd ._faf {if _gde .Meta ||_egfge (_gde .Text ){continue ;};if _fea {_babdf =_bfcg (_babdf ,_gde .BBox );}else {_babdf =_gde .BBox ;_fea =true ;};};return _babdf ,_fea ;};func (_ebdc *shapesState )drawRectangle (_efgc ,_eagd ,_edcd ,_edda float64 ){if _facf {_gbac :=_ebdc .devicePoint (_efgc ,_eagd );_geda :=_ebdc .devicePoint (_efgc +_edcd ,_eagd +_edda );_eedg :=_ab .PdfRectangle {Llx :_gbac .X ,Lly :_gbac .Y ,Urx :_geda .X ,Ury :_geda .Y };_f .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_eedg );};_ebdc .newSubPath ();_ebdc .moveTo (_efgc ,_eagd );_ebdc .lineTo (_efgc +_edcd ,_eagd );_ebdc .lineTo (_efgc +_edcd ,_eagd +_edda );_ebdc .lineTo (_efgc ,_eagd +_edda );_ebdc .closePath ();};func (_gbbca *subpath )isQuadrilateral ()bool {if len (_gbbca ._ggdg )< 4||len (_gbbca ._ggdg )> 5{return false ;};if len (_gbbca ._ggdg )==5{_bafed :=_gbbca ._ggdg [0];_dfbfc :=_gbbca ._ggdg [4];if _bafed .X !=_dfbfc .X ||_bafed .Y !=_dfbfc .Y {return false ;};};return true ;};func _afef (_fgga *wordBag ,_bcff int )*textLine {_ffea :=_fgga .firstWord (_bcff );_fdca :=textLine {PdfRectangle :_ffea .PdfRectangle ,_ddfd :_ffea ._fegfd ,_gcfb :_ffea ._dbed };_fdca .pullWord (_fgga ,_ffea ,_bcff );return &_fdca ;};
|
||
|
||
// NewFromContents creates a new extractor from contents and page resources.
|
||
func NewFromContents (contents string ,resources *_ab .PdfPageResources )(*Extractor ,error ){_fcf :=&Extractor {_aba :contents ,_be :resources ,_fa :map[string ]fontEntry {},_gc :map[string ]textResult {}};return _fcf ,nil ;};func _cdge (_cbed []*textWord ,_cgde int )[]*textWord {_ecade :=len (_cbed );copy (_cbed [_cgde :],_cbed [_cgde +1:]);return _cbed [:_ecade -1];};func (_addc rulingList )coalesce ()rulingList {if len (_addc )==0{return nil ;};_addc .sortStrict ();_bbfg :=_addc [0];var _badc rulingList ;for _ ,_cfbf :=range _addc [1:]{_bacc :=_bbfg ._aefag ==_cfbf ._aefag &&_bbfg ._deeda ==_cfbf ._deeda &&_cfbf ._efac <=_bbfg ._bgae +1.0;if _bacc {_gecgc :=*_bbfg ;_bbfg ._bgae =_cfbf ._bgae ;if _bbfg ._bgae < _bbfg ._efac {_f .Log .Error ("\u0076\u0030\u002ehi\u0020\u003c\u0020\u0076\u0030\u002e\u006c\u006f\u000a\t\u00760\u003d%\u0073\n\u0009\u0020\u0076\u003d\u0025\u0073\u000a\u0009\u0020\u002d\u003e\u0025\u0073",_gecgc .String (),_cfbf .String (),_bbfg .String ());return nil ;};}else {_badc =append (_badc ,_bbfg );_bbfg =_cfbf ;};};_badc =append (_badc ,_bbfg );return _badc ;};func (_eadc *textObject )setTextMatrix (_fab []float64 ){if len (_fab )!=6{_f .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_fab ));return ;};_abd ,_gaa ,_gfb ,_adbe ,_ecf ,_dcec :=_fab [0],_fab [1],_fab [2],_fab [3],_fab [4],_fab [5];_eadc ._bfe =_dd .NewMatrix (_abd ,_gaa ,_gfb ,_adbe ,_ecf ,_dcec );_eadc ._eddf =_eadc ._bfe ;};const _eb =20;func _edee (_dadg *wordBag ,_cceb *textWord ,_bef float64 )bool {return _cceb .Llx < _dadg .Urx +_bef &&_dadg .Llx -_bef < _cceb .Urx ;};func (_fgdc *subpath )close (){if !_cacfa (_fgdc ._ggdg [0],_fgdc .last ()){_fgdc .add (_fgdc ._ggdg [0]);};_fgdc ._dcfd =true ;_fgdc .removeDuplicates ();};func (_fdbd *stateStack )top ()*textState {if _fdbd .empty (){return nil ;};return (*_fdbd )[_fdbd .size ()-1];};
|
||
|
||
// String returns a description of `w`.
|
||
func (_agfa *textWord )String ()string {return _bcb .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_agfa ._dbed ,_agfa .PdfRectangle ,_agfa ._fegfd ,_agfa ._aeee );};
|
||
|
||
// String returns a description of `t`.
|
||
func (_fdcf *textTable )String ()string {return _bcb .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_fdcf ._agea ,_fdcf ._bfcd ,_fdcf ._gdbdc );};func (_ce *imageExtractContext )processOperand (_cdd *_bdb .ContentStreamOperation ,_agg _bdb .GraphicsState ,_dgc *_ab .PdfPageResources )error {if _cdd .Operand =="\u0042\u0049"&&len (_cdd .Params )==1{_db ,_cbe :=_cdd .Params [0].(*_bdb .ContentStreamInlineImage );if !_cbe {return nil ;};if _ecc ,_ceb :=_ea .GetBoolVal (_db .ImageMask );_ceb {if _ecc &&!_ce ._gf .IncludeInlineStencilMasks {return nil ;};};return _ce .extractInlineImage (_db ,_agg ,_dgc );}else if _cdd .Operand =="\u0044\u006f"&&len (_cdd .Params )==1{_ba ,_bfd :=_ea .GetName (_cdd .Params [0]);if !_bfd {_f .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _af ;};_ ,_gcf :=_dgc .GetXObjectByName (*_ba );switch _gcf {case _ab .XObjectTypeImage :return _ce .extractXObjectImage (_ba ,_agg ,_dgc );case _ab .XObjectTypeForm :return _ce .extractFormImages (_ba ,_agg ,_dgc );};};return nil ;};func (_fgff *textPara )depth ()float64 {if len (_fgff ._fafb )> 0{return _fgff ._fafb [0]._gcfb ;};return _fgff ._egbc .get (0,0).depth ();};func (_eega *textLine )markWordBoundaries (){_egec :=_adbd *_eega ._ddfd ;for _ceab ,_gdbd :=range _eega ._cgab [1:]{if _gca (_gdbd ,_eega ._cgab [_ceab ])>=_egec {_gdbd ._deff =true ;};};};func (_fdd *textObject )checkOp (_gea *_bdb .ContentStreamOperation ,_cagc int ,_ggde bool )(_cdfg bool ,_add error ){if _fdd ==nil {var _eeg []_ea .PdfObject ;if _cagc > 0{_eeg =_gea .Params ;if len (_eeg )> _cagc {_eeg =_eeg [:_cagc ];};};_f .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_gea .Operand ,_eeg );};if _cagc >=0{if len (_gea .Params )!=_cagc {if _ggde {_add =_d .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");};_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_gea .Operand ,_cagc ,len (_gea .Params ),_gea .Params );return false ,_add ;};};return true ,nil ;};
|
||
|
||
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
|
||
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
|
||
func (_bcd *Extractor )ExtractTextWithStats ()(_aed string ,_dga int ,_cda int ,_ccf error ){_cdf ,_dga ,_cda ,_ccf :=_bcd .ExtractPageText ();if _ccf !=nil {return "",_dga ,_cda ,_ccf ;};return _cdf .Text (),_dga ,_cda ,nil ;};func (_geee paraList )llyRange (_dcea []int ,_cfgf ,_eagf float64 )[]int {_gbbbd :=len (_geee );if _eagf < _geee [_dcea [0]].Lly ||_cfgf > _geee [_dcea [_gbbbd -1]].Lly {return nil ;};_ccea :=_de .Search (_gbbbd ,func (_cdadc int )bool {return _geee [_dcea [_cdadc ]].Lly >=_cfgf });_fdae :=_de .Search (_gbbbd ,func (_acgg int )bool {return _geee [_dcea [_acgg ]].Lly > _eagf });return _dcea [_ccea :_fdae ];};func (_dgaga intSet )del (_caaed int ){delete (_dgaga ,_caaed )};func (_afae *textTable )bbox ()_ab .PdfRectangle {return _afae .PdfRectangle };func (_ccce rulingList )tidied (_gdfeb string )rulingList {_dfbb :=_ccce .removeDuplicates ();_ecec :=_dfbb .coalesce ();if _ecec ==nil {return nil ;};_ecec .sort ();if _bfdg {_f .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_gdfeb ,len (_ccce ),len (_dfbb ),len (_ecec ));for _bccd ,_aeacc :=range _ecec {_bcb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bccd ,_aeacc );};};return _ecec ;};func (_egef *textObject )renderText (_eacc []byte )error {if _egef ._cce {_f .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");return nil ;};_eddg :=_egef .getCurrentFont ();_gfce :=_eddg .BytesToCharcodes (_eacc );_fed ,_aab ,_abdc :=_eddg .CharcodesToStrings (_gfce );if _abdc > 0{_f .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_aab ,_abdc );};_egef ._geag ._eefb +=_aab ;_egef ._geag ._dag +=_abdc ;_gcbf :=_egef ._geag ;_eaa :=_gcbf ._cdb ;_abcg :=_gcbf ._dcbc /100.0;_bdbf ,_dff :=_eddg .GetRuneMetrics (' ');if !_dff {_bdbf ,_dff =_eddg .GetCharMetrics (32);};if !_dff {_bdbf ,_ =_ab .DefaultFont ().GetRuneMetrics (' ');};_ebaf :=_bdbf .Wx *_dffe ;_f .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_ebaf ,_fed ,_eddg ,_eaa );_bfb :=_dd .NewMatrix (_eaa *_abcg ,0,0,_eaa ,0,_gcbf ._cfa );if _fcdac {_f .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_gfce ),_gfce ,_fed );};_f .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_gfce ),_gfce ,len (_fed ));_edbe :=_egef .getFillColor ();_dfb :=_egef .getStrokeColor ();for _dedg ,_eff :=range _fed {_efdc :=[]rune (_eff );if len (_efdc )==1&&_efdc [0]=='\x00'{continue ;};_dedb :=_gfce [_dedg ];_gba :=_egef ._ffb .CTM .Mult (_egef ._bfe ).Mult (_bfb );_degf :=0.0;if len (_efdc )==1&&_efdc [0]==32{_degf =_gcbf ._eba ;};_ddfa ,_edbc :=_eddg .GetCharMetrics (_dedb );if !_edbc {_f .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_dedb ,_efdc ,_efdc ,_eddg );return _bcb .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_eddg .String (),_dedb );};_ggg :=_dd .Point {X :_ddfa .Wx *_dffe ,Y :_ddfa .Wy *_dffe };_baa :=_dd .Point {X :(_ggg .X *_eaa +_degf )*_abcg };_eddc :=_dd .Point {X :(_ggg .X *_eaa +_gcbf ._bge +_degf )*_abcg };if _fcdac {_f .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_eaa ,_gcbf ._bge ,_gcbf ._eba ,_abcg );_f .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_ggg ,_baa ,_eddc );};_dbcf :=_cfc (_baa );_dbfe :=_cfc (_eddc );_fgd :=_egef ._ffb .CTM .Mult (_egef ._bfe ).Mult (_dbcf );if _ddb {_f .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_egef ._ffb .CTM ,_egef ._bfe ,_dbfe ,_bgea (_egef ._ffb .CTM .Mult (_egef ._bfe ).Mult (_dbfe )),_dbcf ,_fgd ,_bgea (_fgd ));};_bgee ,_cgec :=_egef .newTextMark (_cg .ExpandLigatures (_efdc ),_gba ,_bgea (_fgd ),_dg .Abs (_ebaf *_gba .ScalingFactorX ()),_eddg ,_egef ._geag ._bge ,_edbe ,_dfb );if !_cgec {_f .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067");continue ;};if _eddg ==nil {_f .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _eddg .Encoder ()==nil {_f .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_eddg );}else {if _fbdg ,_fcgf :=_eddg .Encoder ().CharcodeToRune (_dedb );_fcgf {_bgee ._gedgc =string (_fbdg );};};_f .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_dedg ,_dedb ,_bgee ,_gba );_egef ._gedc =append (_egef ._gedc ,&_bgee );_egef ._bfe .Concat (_dbfe );};return nil ;};
|
||
|
||
// String returns a human readable description of `s`.
|
||
func (_bdfa intSet )String ()string {var _gafcc []int ;for _gcgfa :=range _bdfa {if _bdfa .has (_gcgfa ){_gafcc =append (_gafcc ,_gcgfa );};};_de .Ints (_gafcc );return _bcb .Sprintf ("\u0025\u002b\u0076",_gafcc );};func (_gfg *imageExtractContext )extractContentStreamImages (_bf string ,_ad *_ab .PdfPageResources )error {_fee :=_bdb .NewContentStreamParser (_bf );_ef ,_gg :=_fee .Parse ();if _gg !=nil {return _gg ;};if _gfg ._ge ==nil {_gfg ._ge =map[*_ea .PdfObjectStream ]*cachedImage {};};if _gfg ._gf ==nil {_gfg ._gf =&ImageExtractOptions {};};_ae :=_bdb .NewContentStreamProcessor (*_ef );_ae .AddHandler (_bdb .HandlerConditionEnumAllOperands ,"",func (_gga *_bdb .ContentStreamOperation ,_afe _bdb .GraphicsState ,_bde *_ab .PdfPageResources )error {return _gfg .processOperand (_gga ,_afe ,_bde );});return _ae .Process (_ad );};func (_eccg paraList )sortReadingOrder (){_f .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_eccg ));if len (_eccg )<=1{return ;};_eccg .computeEBBoxes ();_de .Slice (_eccg ,func (_dgcdg ,_fedaa int )bool {return _addb (_eccg [_dgcdg ],_eccg [_fedaa ])<=0});_dceg :=_eccg .topoOrder ();_eccg .reorder (_dceg );};func (_ddge *textTable )computeBbox ()_ab .PdfRectangle {_fgfb :=_ddge .get (0,0).PdfRectangle ;for _cagg :=1;_cagg < _ddge ._agea ;_cagg ++{_fgfb =_bfcg (_fgfb ,_ddge .get (_cagg ,0).PdfRectangle );};for _cfba :=1;_cfba < _ddge ._bfcd ;_cfba ++{for _dbbg :=0;_dbbg < _ddge ._agea ;_dbbg ++{_bagd :=_ddge .get (_dbbg ,_cfba );if _bagd !=nil {_fgfb =_bfcg (_fgfb ,_bagd .PdfRectangle );};};};return _fgfb ;};func _bgea (_fbadd _dd .Matrix )_dd .Point {_edgb ,_gfec :=_fbadd .Translation ();return _dd .Point {X :_edgb ,Y :_gfec };};func (_dbgb *textMark )bbox ()_ab .PdfRectangle {return _dbgb .PdfRectangle };func (_deeee *textWord )absorb (_efae *textWord ){_deeee .PdfRectangle =_bfcg (_deeee .PdfRectangle ,_efae .PdfRectangle );_deeee ._bcce =append (_deeee ._bcce ,_efae ._bcce ...);};func (_bca *textObject )setWordSpacing (_cebc float64 ){if _bca ==nil {return ;};_bca ._geag ._eba =_cebc ;};type textMark struct{_ab .PdfRectangle ;_eaeb int ;_efab string ;_gedgc string ;_fbfb *_ab .PdfFont ;_ecbb float64 ;_dgcb float64 ;_eegab _dd .Matrix ;_ddad _dd .Point ;_cgag _ab .PdfRectangle ;_cacc _ca .Color ;_gaee _ca .Color ;};func (_fga *shapesState )closePath (){if _fga ._dgcd {_fga ._fcce =append (_fga ._fcce ,_bfee (_fga ._cdc ));_fga ._dgcd =false ;}else if len (_fga ._fcce )==0{_f .Log .Error ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");_fga ._dgcd =false ;return ;};_fga ._fcce [len (_fga ._fcce )-1].close ();if _facf {_f .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_fga );};};func _gffd (_cefc float64 )int {var _bdfcf int ;if _cefc >=0{_bdfcf =int (_cefc /_fdgf );}else {_bdfcf =int (_cefc /_fdgf )-1;};return _bdfcf ;};func (_bgc *textObject )reset (){_bgc ._bfe =_dd .IdentityMatrix ();_bgc ._eddf =_dd .IdentityMatrix ();_bgc ._gedc =nil ;};func (_egad *subpath )last ()_dd .Point {return _egad ._ggdg [len (_egad ._ggdg )-1]};func (_bgf *textLine )toTextMarks (_gcddd *int )[]TextMark {var _gbad []TextMark ;for _ ,_bcgfd :=range _bgf ._cgab {if _bcgfd ._deff {_gbad =_aaab (_gbad ,_gcddd ,"\u0020");};_fedc :=_bcgfd .toTextMarks (_gcddd );_gbad =append (_gbad ,_fedc ...);};return _gbad ;};func (_ggfg *textPara )text ()string {_caeb :=new (_g .Buffer );_ggfg .writeText (_caeb );return _caeb .String ();};func _ggfea (_bbbe ,_ebdf _ab .PdfRectangle )bool {return _bbbe .Lly <=_ebdf .Ury &&_ebdf .Lly <=_bbbe .Ury ;};func (_bfeb *subpath )add (_agb ..._dd .Point ){_bfeb ._ggdg =append (_bfeb ._ggdg ,_agb ...)};func (_dgfc paraList )xNeighbours (_dfdd float64 )map[*textPara ][]int {_cbgcg :=make ([]event ,2*len (_dgfc ));if _dfdd ==0{for _bfda ,_bfea :=range _dgfc {_cbgcg [2*_bfda ]=event {_bfea .Llx ,true ,_bfda };_cbgcg [2*_bfda +1]=event {_bfea .Urx ,false ,_bfda };};}else {for _eaabg ,_aaaa :=range _dgfc {_cbgcg [2*_eaabg ]=event {_aaaa .Llx -_dfdd *_aaaa .fontsize (),true ,_eaabg };_cbgcg [2*_eaabg +1]=event {_aaaa .Urx +_dfdd *_aaaa .fontsize (),false ,_eaabg };};};return _dgfc .eventNeighbours (_cbgcg );};func (_cfbeb paraList )findTextTables ()[]*textTable {var _bbdd []*textTable ;for _ ,_cbegg :=range _cfbeb {if _cbegg .taken ()||_cbegg .Width ()==0{continue ;};_decd :=_cbegg .isAtom ();if _decd ==nil {continue ;};_decd .growTable ();if _decd ._agea *_decd ._bfcd < _edbfg {continue ;};_decd .markCells ();_decd .log ("\u0067\u0072\u006fw\u006e");_bbdd =append (_bbdd ,_decd );};return _bbdd ;};func (_fdba *stateStack )push (_bdd *textState ){_cacg :=*_bdd ;*_fdba =append (*_fdba ,&_cacg )};func (_cfg *shapesState )lineTo (_effc ,_cdabd float64 ){_cfg .addPoint (_effc ,_cdabd );if _facf {_f .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_effc ,_cdabd ,_cfg .devicePoint (_effc ,_cdabd ));};};func _fbaf (_acbd ,_bcffff int )uint64 {return uint64 (_acbd )*0x1000000+uint64 (_bcffff )};func _bcg (_dafd _ab .PdfRectangle )textState {return textState {_dcbc :100,_addf :RenderModeFill ,_feff :_dafd };};
|
||
|
||
// Len returns the number of TextMarks in `ma`.
|
||
func (_bace *TextMarkArray )Len ()int {if _bace ==nil {return 0;};return len (_bace ._faf );};func _bcdfe (_cceg ,_adgc _dd .Point )rulingKind {_gfeg :=_dg .Abs (_cceg .X -_adgc .X );_adfg :=_dg .Abs (_cceg .Y -_adgc .Y );return _dgaa (_gfeg ,_adfg );};func _fgfab (_cbff *wordBag ,_fabg float64 )[]*wordBag {var _accb []*wordBag ;for _ ,_cbega :=range _cbff .depthIndexes (){_eeee :=false ;for !_cbff .empty (_cbega ){_gffg :=_cbff .firstReadingIndex (_cbega );_fbga :=_cbff .firstWord (_gffg );_gbbc :=_bedg (_fbga ,_fabg );_cbff .removeWord (_fbga ,_gffg );if _ccbf {_f .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_fbga .String ());};for _cgfe :=true ;_cgfe ;_cgfe =_eeee {_eeee =false ;_acd :=_cgbe *_gbbc ._fde ;_defcc :=_dbba *_gbbc ._fde ;_cabfg :=_eceee *_gbbc ._fde ;if _ccbf {_f .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_gbbc .minDepth (),_gbbc .maxDepth (),_cabfg ,_defcc );};if _cbff .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_gbbc ,_abcgc (_edee ,0),_gbbc .minDepth ()-_cabfg ,_gbbc .maxDepth ()+_cabfg ,_ecdbd ,false ,false )> 0{_eeee =true ;};if _cbff .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_gbbc ,_abcgc (_edee ,_defcc ),_gbbc .minDepth (),_gbbc .maxDepth (),_eec ,false ,false )> 0{_eeee =true ;};if _eeee {continue ;};_cggg :=_cbff .scanBand ("",_gbbc ,_abcgc (_dbb ,_acd ),_gbbc .minDepth (),_gbbc .maxDepth (),_bgd ,true ,false );if _cggg > 0{_afgg :=(_gbbc .maxDepth ()-_gbbc .minDepth ())/_gbbc ._fde ;if (_cggg > 1&&float64 (_cggg )> 0.3*_afgg )||_cggg <=10{if _cbff .scanBand ("\u006f\u0074\u0068e\u0072",_gbbc ,_abcgc (_dbb ,_acd ),_gbbc .minDepth (),_gbbc .maxDepth (),_bgd ,false ,true )> 0{_eeee =true ;};};};};_accb =append (_accb ,_gbbc );};};return _accb ;};func (_defb *textTable )isExportable ()bool {_fdaeb :=func (_cgbg int )bool {_fbgaf :=_defb .get (0,_cgbg );_bfcdc :=_fbgaf .text ();_cgdd :=_c .RuneCountInString (_bfcdc );_ddcc :=_bccfg .MatchString (_bfcdc );return _cgdd <=1||_ddcc ;};for _cdbf :=0;_cdbf < _defb ._bfcd ;_cdbf ++{if !_fdaeb (_cdbf ){return true ;};};return false ;};func (_cddd *textLine )appendWord (_ffdac *textWord ){_cddd ._cgab =append (_cddd ._cgab ,_ffdac );_cddd .PdfRectangle =_bfcg (_cddd .PdfRectangle ,_ffdac .PdfRectangle );if _ffdac ._fegfd > _cddd ._ddfd {_cddd ._ddfd =_ffdac ._fegfd ;};if _ffdac ._dbed > _cddd ._gcfb {_cddd ._gcfb =_ffdac ._dbed ;};};func _cbgg (_ccc []*textWord ,_ceefd float64 )*wordBag {_dafa :=_bedg (_ccc [0],_ceefd );for _ ,_fbbg :=range _ccc [1:]{_bff :=_gffd (_fbbg ._dbed );_dafa ._dgaf [_bff ]=append (_dafa ._dgaf [_bff ],_fbbg );};_dafa .sort ();return _dafa ;};func _bfcg (_gbca ,_egae _ab .PdfRectangle )_ab .PdfRectangle {return _ab .PdfRectangle {Llx :_dg .Min (_gbca .Llx ,_egae .Llx ),Lly :_dg .Min (_gbca .Lly ,_egae .Lly ),Urx :_dg .Max (_gbca .Urx ,_egae .Urx ),Ury :_dg .Max (_gbca .Ury ,_egae .Ury )};};
|
||
|
||
// String returns a description of `v`.
|
||
func (_dgbf *ruling )String ()string {if _dgbf ._aefag ==_bbag {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_caga ,_dgagd :="\u0078","\u0079";if _dgbf ._aefag ==_ffad {_caga ,_dgagd ="\u0079","\u0078";};return _bcb .Sprintf ("\u0025\u0031\u0030\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d \u0025\u0036\u002e\u0032\u0066 \u0028\u00256\u002e\u0032\u0066\u0029",_dgbf ._aefag ,_caga ,_dgbf ._deeda ,_dgagd ,_dgbf ._efac ,_dgbf ._bgae ,_dgbf ._bgae -_dgbf ._efac );};func (_bdga *textLine )endsInHyphen ()bool {_ddaa :=_bdga ._cgab [len (_bdga ._cgab )-1];_fgfa :=[]rune (_ddaa ._aeee );if !_e .Is (_e .Hyphen ,_fgfa [len (_fgfa )-1]){return false ;};if _ddaa ._deff &&_ggfb (_fgfa ){return true ;};return _ggfb ([]rune (_bdga .text ()));};func (_bfdb lineRuling )asRuling ()(*ruling ,bool ){_adg :=ruling {_aefag :_bfdb ._ddeb };switch _bfdb ._ddeb {case _geb :_adg ._deeda =_bfdb .xMean ();_adg ._efac =_dg .Min (_bfdb ._ddbb .Y ,_bfdb ._fbgf .Y );_adg ._bgae =_dg .Max (_bfdb ._ddbb .Y ,_bfdb ._fbgf .Y );case _ffad :_adg ._deeda =_bfdb .yMean ();_adg ._efac =_dg .Min (_bfdb ._ddbb .X ,_bfdb ._fbgf .X );_adg ._bgae =_dg .Max (_bfdb ._ddbb .X ,_bfdb ._fbgf .X );default:_f .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_bfdb ._ddeb );return nil ,false ;};return &_adg ,true ;};
|
||
|
||
// ExtractText processes and extracts all text data in content streams and returns as a string.
|
||
// It takes into account character encodings in the PDF file, which are decoded by
|
||
// CharcodeBytesToUnicode.
|
||
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
|
||
func (_bce *Extractor )ExtractText ()(string ,error ){_bac ,_ ,_ ,_adf :=_bce .ExtractTextWithStats ();return _bac ,_adf ;};func (_ddcdgb paraList )findGridTables (_abbc []rulingList )[]*textTable {if _gfeee {_f .Log .Info ("\u0066\u0069\u006e\u0064T\u0061\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072i\u0064s\u003a\u0020\u0025\u0064\u0020\u0070\u0061r\u0061\u0073",len (_ddcdgb ));for _fcfgb ,_gfga :=range _ddcdgb {_bcb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fcfgb ,_gfga );};};var _fdcg []*textTable ;for _dcgc ,_cfd :=range _abbc {_ffbb :=_ddcdgb .findTableGrid (_cfd );if _ffbb !=nil {_ffbb .log (_bcb .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_dcgc ));_fdcg =append (_fdcg ,_ffbb );_ffbb .markCells ();};};return _fdcg ;};func _dcfb (_ddee ,_ced _ab .PdfRectangle )bool {return _ced .Llx <=_ddee .Urx &&_ddee .Llx <=_ced .Urx };func (_dfbae *wordBag )firstWord (_bbec int )*textWord {return _dfbae ._dgaf [_bbec ][0]};func (_ggb *PageText )computeViews (){_feda :=_fffc (_ggb ._edf );_ggfe :=_ffgb (_ggb ._dafc );var _cdad []rulingList ;if _dcca {_cdad =append (_cdad ,_feda ...);};if _adfc {_cdad =append (_cdad ,_ggfe ...);};if _bfdg {if len (_feda )> 0{_f .Log .Info ("S\u0074\u0072\u006f\u006b\u0065\u0073\u003a\u0020\u0025\u0064",len (_ggb ._edf ));_f .Log .Info ("\u0053\u0074r\u006f\u006b\u0065 \u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0064",len (_feda ));for _bafa ,_fcbg :=range _feda {_bcb .Printf ("\u0025\u0034d\u003a\u0020\u0025d\u0020\u0072\u0075\u006c\u0069\u006e\u0067\u0073\u000a",_bafa ,len (_fcbg ));for _bfc ,_eede :=range _fcbg {_bcb .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_bfc ,_eede );};};};if len (_ggfe )> 0{_f .Log .Info ("\u0046i\u006c\u006c\u0073\u003a\u0020\u0025d",len (_ggb ._dafc ));_f .Log .Info ("\u0046\u0069\u006c\u006c\u0020\u0047\u0072\u0069\u0064s\u003a\u0020\u0025\u0064",len (_ggfe ));for _cgcf ,_degd :=range _ggfe {_bcb .Printf ("\u0025\u0034d\u003a\u0020\u0025d\u0020\u0072\u0075\u006c\u0069\u006e\u0067\u0073\u000a",_cgcf ,len (_degd ));for _fge ,_ceg :=range _degd {_bcb .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_fge ,_ceg );};};};};var _dgag paraList ;_aafg :=len (_ggb ._cdg );for _gfge :=0;_gfge < 360&&_aafg > 0;_gfge +=90{_dcc :=make ([]*textMark ,0,len (_ggb ._cdg )-_aafg );for _ ,_cddc :=range _ggb ._cdg {if _cddc ._eaeb ==_gfge {_dcc =append (_dcc ,_cddc );};};if len (_dcc )> 0{_ggc :=_dbad (_dcc ,_ggb ._fae ,_cdad );_dgag =append (_dgag ,_ggc ...);_aafg -=len (_dcc );};};_baad :=new (_g .Buffer );_dgag .writeText (_baad );_ggb ._dadb =_baad .String ();_ggb ._bbg =_dgag .toTextMarks ();_ggb ._cbb =_dgag .tables ();};
|
||
|
||
// ToText returns the page text as a single string.
|
||
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
|
||
// Text() instead.
|
||
func (_aad PageText )ToText ()string {return _aad .Text ()};func _bdffa (_ddabf ,_eccga int )int {if _ddabf > _eccga {return _ddabf ;};return _eccga ;};func (_ece *stateStack )size ()int {return len (*_ece )};func _ggfb (_baef []rune )bool {return len (_baef )>=_ddce &&_e .Is (_e .Hyphen ,_baef [len (_baef )-1])&&!_e .IsSpace (_baef [len (_baef )-2]);};type rectRuling struct{_dfbe rulingKind ;_ab .PdfRectangle ;};func (_fdgaf *textObject )showText (_fbe []byte )error {return _fdgaf .renderText (_fbe )};func (_ggce *wordBag )removeWord (_ecbf *textWord ,_eacff int ){_cagd :=_eeebd (_ggce .stratum (_eacff ),_ecbf );if len (_cagd )==0{delete (_ggce ._dgaf ,_eacff );}else {_ggce ._dgaf [_eacff ]=_cagd ;};};func _gffea (_efda int ,_fcdc func (int ,int )bool )[]int {_fccd :=make ([]int ,_efda );for _ggafd :=range _fccd {_fccd [_ggafd ]=_ggafd ;};_de .Slice (_fccd ,func (_agec ,_dddc int )bool {return _fcdc (_fccd [_agec ],_fccd [_dddc ])});return _fccd ;};func (_gaae rulingList )sort (){_de .Slice (_gaae ,func (_faac ,_gecc int )bool {return _gaae .comp (_faac ,_gecc )});};func (_gbaee paraList )toTextMarks ()[]TextMark {_adee :=0;var _bagb []TextMark ;for _dbeb ,_cdde :=range _gbaee {_gbdd :=_cdde .toTextMarks (&_adee );_bagb =append (_bagb ,_gbdd ...);if _dbeb !=len (_gbaee )-1{if _fadg (_cdde ,_gbaee [_dbeb +1]){_bagb =_aaab (_bagb ,&_adee ,"\u0020");}else {_bagb =_aaab (_bagb ,&_adee ,"\u000a");_bagb =_aaab (_bagb ,&_adee ,"\u000a");};};};_bagb =_aaab (_bagb ,&_adee ,"\u000a");_bagb =_aaab (_bagb ,&_adee ,"\u000a");return _bagb ;};func (_cabc *textPara )taken ()bool {return _cabc ==nil ||_cabc ._ffde };type textWord struct{_ab .PdfRectangle ;_dbed float64 ;_aeee string ;_bcce []*textMark ;_fegfd float64 ;_deff bool ;};func _faff (_ggdgb ,_cgaf _dd .Point )bool {_ffgd :=_dg .Abs (_ggdgb .X -_cgaf .X );_gadg :=_dg .Abs (_ggdgb .Y -_cgaf .Y );return _bedfc (_gadg ,_ffgd );};func _dgdb (_gcad ,_eaed _ab .PdfRectangle )bool {return _gcad .Llx <=_eaed .Llx &&_eaed .Urx <=_gcad .Urx &&_gcad .Lly <=_eaed .Lly &&_eaed .Ury <=_gcad .Ury ;};func (_gcge *textObject )getFont (_bgcb string )(*_ab .PdfFont ,error ){if _gcge ._beb ._fa !=nil {_gcge ._beb ._gd ++;_cef ,_dbcg :=_gcge ._beb ._fa [_bgcb ];if _dbcg {_cef ._ggdb =_gcge ._beb ._gd ;return _cef ._edaf ,nil ;};};_gec ,_bba :=_gcge .getFontDirect (_bgcb );if _bba !=nil {return nil ,_bba ;};if _gcge ._beb ._fa !=nil {_fgfc :=fontEntry {_gec ,_gcge ._beb ._gd };if len (_gcge ._beb ._fa )>=_bdc {var _cbgb []string ;for _ccef :=range _gcge ._beb ._fa {_cbgb =append (_cbgb ,_ccef );};_de .Slice (_cbgb ,func (_faef ,_decb int )bool {return _gcge ._beb ._fa [_cbgb [_faef ]]._ggdb < _gcge ._beb ._fa [_cbgb [_decb ]]._ggdb ;});delete (_gcge ._beb ._fa ,_cbgb [0]);};_gcge ._beb ._fa [_bgcb ]=_fgfc ;};return _gec ,nil ;};func (_bdg *textObject )getFillColor ()_ca .Color {return _gbagg (_bdg ._ffb .ColorspaceNonStroking ,_bdg ._ffb .ColorNonStroking );};func _fedg (_aafe []*textMark ,_adcc _ab .PdfRectangle )[]*textWord {var _accf []*textWord ;var _bafd *textWord ;_gaff :=func (){if _bafd !=nil {_ggfad :=_bafd .computeText ();if !_egfge (_ggfad ){_bafd ._aeee =_ggfad ;_accf =append (_accf ,_bafd );if _feca {_f .Log .Info ("\u0077o\u0072\u0064\u003d\u0025\u0073",_bafd .String ());for _bfedf ,_debdg :=range _bafd ._bcce {_bcb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bfedf ,_debdg .String ());};};};_bafd =nil ;};};for _ ,_gbge :=range _aafe {if _dgce &&_bafd !=nil &&len (_bafd ._bcce )> 0{_fdea :=_bafd ._bcce [len (_bafd ._bcce )-1];_gfag ,_fcdcf :=_gfggb (_gbge ._efab );_eafd ,_dgdca :=_gfggb (_fdea ._efab );if _fcdcf &&!_dgdca &&_fdea .inDiacriticArea (_gbge ){_bafd .addDiacritic (_gfag );continue ;};if _dgdca &&!_fcdcf &&_gbge .inDiacriticArea (_fdea ){_bafd ._bcce =_bafd ._bcce [:len (_bafd ._bcce )-1];_bafd .appendMark (_gbge ,_adcc );_bafd .addDiacritic (_eafd );continue ;};};_bddb :=_egfge (_gbge ._efab );if _bddb {_gaff ();continue ;};if _bafd ==nil &&!_bddb {_bafd =_bfef ([]*textMark {_gbge },_adcc );continue ;};_cgabf :=_bafd ._fegfd ;_fecff :=_dg .Abs (_dcg (_adcc ,_gbge )-_bafd ._dbed )/_cgabf ;_acbc :=_gca (_gbge ,_bafd )/_cgabf ;if _acbc >=_bebf ||!(-_fecf <=_acbc &&_fecff <=_eegf ){_gaff ();_bafd =_bfef ([]*textMark {_gbge },_adcc );continue ;};_bafd .appendMark (_gbge ,_adcc );};_gaff ();return _accf ;};
|
||
|
||
// TextMark represents extracted text on a page with information regarding both textual content,
|
||
// formatting (font and size) and positioning.
|
||
// It is the smallest unit of text on a PDF page, typically a single character.
|
||
//
|
||
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
|
||
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
|
||
// `bbox` of substring `term` in `text`.
|
||
//
|
||
// ex, _ := New(page)
|
||
// // handle errors
|
||
// pageText, _, _, err := ex.ExtractPageText()
|
||
// // handle errors
|
||
// text := pageText.Text()
|
||
// textMarks := pageText.Marks()
|
||
//
|
||
// start := strings.Index(text, term)
|
||
// end := start + len(term)
|
||
// spanMarks, err := textMarks.RangeOffset(start, end)
|
||
// // handle errors
|
||
// bbox, ok := spanMarks.BBox()
|
||
// // handle errors
|
||
type TextMark struct{
|
||
|
||
// Text is the extracted text.
|
||
Text string ;
|
||
|
||
// Original is the text in the PDF. It has not been decoded like `Text`.
|
||
Original string ;
|
||
|
||
// BBox is the bounding box of the text.
|
||
BBox _ab .PdfRectangle ;
|
||
|
||
// Font is the font the text was drawn with.
|
||
Font *_ab .PdfFont ;
|
||
|
||
// FontSize is the font size the text was drawn with.
|
||
FontSize float64 ;
|
||
|
||
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
|
||
// text, textMarks := pageText.Text(), pageText.Marks()
|
||
// marks := textMarks.Elements()
|
||
// then marks[i].Offset is the offset of marks[i].Text in text.
|
||
Offset int ;
|
||
|
||
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
|
||
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
|
||
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
|
||
Meta bool ;
|
||
|
||
// FillColor is the fill color of the text.
|
||
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
|
||
FillColor _ca .Color ;
|
||
|
||
// StrokeColor is the stroke color of the text.
|
||
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
|
||
StrokeColor _ca .Color ;};func (_dde *textObject )moveTextSetLeading (_cfbg ,_adb float64 ){_dde ._geag ._ebce =-_adb ;_dde .moveLP (_cfbg ,_adb );};func (_cbbb *subpath )clear (){*_cbbb =subpath {}};func (_adae *imageExtractContext )extractXObjectImage (_deg *_ea .PdfObjectName ,_bed _bdb .GraphicsState ,_dec *_ab .PdfPageResources )error {_fb ,_ :=_dec .GetXObjectByName (*_deg );if _fb ==nil {return nil ;};_feg ,_ac :=_adae ._ge [_fb ];if !_ac {_ddg ,_fdb :=_dec .GetXObjectImageByName (*_deg );if _fdb !=nil {return _fdb ;};if _ddg ==nil {return nil ;};_cbg ,_fdb :=_ddg .ToImage ();if _fdb !=nil {return _fdb ;};_feg =&cachedImage {_fg :_cbg ,_egg :_ddg .ColorSpace };_adae ._ge [_fb ]=_feg ;};_abg :=_feg ._fg ;_fgb :=_feg ._egg ;_acf ,_dbg :=_fgb .ImageToRGB (*_abg );if _dbg !=nil {return _dbg ;};_f .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_bed .CTM .String ());_dac :=ImageMark {Image :&_acf ,Width :_bed .CTM .ScalingFactorX (),Height :_bed .CTM .ScalingFactorY (),Angle :_bed .CTM .Angle ()};_dac .X ,_dac .Y =_bed .CTM .Translation ();_adae ._dc =append (_adae ._dc ,_dac );_adae ._ag ++;return nil ;};func (_cgabd rulingList )removeDuplicates ()rulingList {if len (_cgabd )==0{return nil ;};_cgabd .sort ();_aac :=rulingList {_cgabd [0]};for _ ,_dfde :=range _cgabd [1:]{if _dfde .equals (_aac [len (_aac )-1]){continue ;};_aac =append (_aac ,_dfde );};return _aac ;};
|
||
|
||
// ImageExtractOptions contains options for controlling image extraction from
|
||
// PDF pages.
|
||
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};func _cgb (_cbdf ,_dbec _ab .PdfRectangle )(_ab .PdfRectangle ,bool ){if !_cgfb (_cbdf ,_dbec ){return _ab .PdfRectangle {},false ;};return _ab .PdfRectangle {Llx :_dg .Max (_cbdf .Llx ,_dbec .Llx ),Urx :_dg .Min (_cbdf .Urx ,_dbec .Urx ),Lly :_dg .Max (_cbdf .Lly ,_dbec .Lly ),Ury :_dg .Min (_cbdf .Ury ,_dbec .Ury )},true ;};func _dbb (_ecdb *wordBag ,_degg *textWord ,_bgad float64 )bool {return _ecdb .Urx <=_degg .Llx &&_degg .Llx < _ecdb .Urx +_bgad ;};func (_ggdgd intSet )add (_fefg int ){_ggdgd [_fefg ]=struct{}{}};
|
||
|
||
// Text returns the extracted page text.
|
||
func (_efbc PageText )Text ()string {return _efbc ._dadb };
|
||
|
||
// Extractor stores and offers functionality for extracting content from PDF pages.
|
||
type Extractor struct{_aba string ;_be *_ab .PdfPageResources ;_fcb _ab .PdfRectangle ;_fa map[string ]fontEntry ;_gc map[string ]textResult ;_gd int64 ;_gce int ;};func _cfc (_eacf _dd .Point )_dd .Matrix {return _dd .TranslationMatrix (_eacf .X ,_eacf .Y )};func (_aeba lineRuling )xMean ()float64 {return 0.5*(_aeba ._ddbb .X +_aeba ._fbgf .X )};func (_dfae paraList )log (_fdeb string ){if !_cfagg {return ;};_f .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_fdeb ,len (_dfae ));for _acaa ,_ccde :=range _dfae {if _ccde ==nil {continue ;};_ecfa :=_ccde .text ();_eddgc :="\u0020\u0020";if _ccde ._egbc !=nil {_eddgc =_bcb .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_ccde ._egbc ._agea ,_ccde ._egbc ._bfcd );};_bcb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_acaa ,_ccde .PdfRectangle ,_eddgc ,_gefbc (_ecfa ,50));};};func _eaga (_fcfe float64 )bool {return _dg .Abs (_fcfe )< _fcaf };func (_edea *textLine )pullWord (_gbgd *wordBag ,_cdca *textWord ,_afgbe int ){_edea .appendWord (_cdca );_gbgd .removeWord (_cdca ,_afgbe );};func _gca (_aee ,_gbc bounded )float64 {return _aee .bbox ().Llx -_gbc .bbox ().Urx };func (_aeaa *Extractor )extractPageText (_bb string ,_ggd *_ab .PdfPageResources ,_ecb _dd .Matrix ,_efb int )(*PageText ,int ,int ,error ){_f .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_efb );_bab :=&PageText {_fae :_aeaa ._fcb };_fbd :=_bcg (_aeaa ._fcb );var _egd stateStack ;_cac :=_edc (_aeaa ,_ggd ,_bdb .GraphicsState {},&_fbd ,&_egd );_gef :=shapesState {_efec :_ecb };var _eace bool ;if _efb > _eb {_ebf :=_d .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_efb ,_ebf );return _bab ,_fbd ._eefb ,_fbd ._dag ,_ebf ;};_egaa :=_bdb .NewContentStreamParser (_bb );_gb ,_eca :=_egaa .Parse ();if _eca !=nil {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_eca );return _bab ,_fbd ._eefb ,_fbd ._dag ,_eca ;};_bbc :=_bdb .NewContentStreamProcessor (*_gb );_bbc .AddHandler (_bdb .HandlerConditionEnumAllOperands ,"",func (_ecd *_bdb .ContentStreamOperation ,_bbcb _bdb .GraphicsState ,_aaa *_ab .PdfPageResources )error {_efe :=_ecd .Operand ;if _eafg {_f .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_ecd );};switch _efe {case "\u0071":_egd .push (&_fbd );case "\u0051":if !_egd .empty (){_fbd =*_egd .top ();if len (_egd )>=2{_egd .pop ();};};_gef ._eae =_bbcb .CTM ;case "\u0042\u0054":if _eace {_f .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");_bab ._cdg =append (_bab ._cdg ,_cac ._gedc ...);};_eace =true ;_eeb :=_bbcb ;_eeb .CTM =_ecb .Mult (_eeb .CTM );_cac =_edc (_aeaa ,_aaa ,_eeb ,&_fbd ,&_egd );case "\u0045\u0054":if !_eace {_f .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");};_eace =false ;_bab ._cdg =append (_bab ._cdg ,_cac ._gedc ...);_cac .reset ();case "\u0054\u002a":_cac .nextLine ();case "\u0054\u0064":if _cag ,_gac :=_cac .checkOp (_ecd ,2,true );!_cag {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gac );return _gac ;};_gcc ,_ege ,_cf :=_cggf (_ecd .Params );if _cf !=nil {return _cf ;};_cac .moveText (_gcc ,_ege );case "\u0054\u0044":if _ffc ,_babc :=_cac .checkOp (_ecd ,2,true );!_ffc {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_babc );return _babc ;};_feef ,_gbb ,_fgf :=_cggf (_ecd .Params );if _fgf !=nil {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fgf );return _fgf ;};_cac .moveTextSetLeading (_feef ,_gbb );case "\u0054\u006a":if _abc ,_cea :=_cac .checkOp (_ecd ,1,true );!_abc {_f .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_ecd ,_cea );return _cea ;};_edd ,_dbgf :=_ea .GetStringBytes (_ecd .Params [0]);if !_dbgf {_f .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_ecd );return _ea .ErrTypeError ;};return _cac .showText (_edd );case "\u0054\u004a":if _bced ,_gbf :=_cac .checkOp (_ecd ,1,true );!_bced {_f .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gbf );return _gbf ;};_cgf ,_agcb :=_ea .GetArray (_ecd .Params [0]);if !_agcb {_f .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_ecd );return _eca ;};return _cac .showTextAdjusted (_cgf );case "\u0027":if _cad ,_cab :=_cac .checkOp (_ecd ,1,true );!_cad {_f .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cab );return _cab ;};_ggdc ,_dced :=_ea .GetStringBytes (_ecd .Params [0]);if !_dced {_f .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_ecd );return _ea .ErrTypeError ;};_cac .nextLine ();return _cac .showText (_ggdc );case "\u0022":if _bcc ,_daf :=_cac .checkOp (_ecd ,3,true );!_bcc {_f .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_daf );return _daf ;};_febc ,_acb ,_ede :=_cggf (_ecd .Params [:2]);if _ede !=nil {return _ede ;};_fba ,_bcdb :=_ea .GetStringBytes (_ecd .Params [2]);if !_bcdb {_f .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_ecd );return _ea .ErrTypeError ;};_cac .setCharSpacing (_febc );_cac .setWordSpacing (_acb );_cac .nextLine ();return _cac .showText (_fba );case "\u0054\u004c":_cdab ,_dbf :=_dbe (_ecd );if _dbf !=nil {_f .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dbf );return _dbf ;};_cac .setTextLeading (_cdab );case "\u0054\u0063":_fda ,_gfe :=_dbe (_ecd );if _gfe !=nil {_f .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gfe );return _gfe ;};_cac .setCharSpacing (_fda );case "\u0054\u0066":if _cbf ,_ceae :=_cac .checkOp (_ecd ,2,true );!_cbf {_f .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ceae );return _ceae ;};_aedd ,_fgg :=_ea .GetNameVal (_ecd .Params [0]);if !_fgg {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_ecd );return _ea .ErrTypeError ;};_aeac ,_abcd :=_ea .GetNumberAsFloat (_ecd .Params [1]);if !_fgg {_f .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ecd ,_abcd );return _abcd ;};_abcd =_cac .setFont (_aedd ,_aeac );_cac ._cce =_fc .Is (_abcd ,_ea .ErrNotSupported );if _abcd !=nil &&!_cac ._cce {return _abcd ;};case "\u0054\u006d":if _fbf ,_cfb :=_cac .checkOp (_ecd ,6,true );!_fbf {_f .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cfb );return _cfb ;};_caa ,_babd :=_ea .GetNumbersAsFloat (_ecd .Params );if _babd !=nil {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_babd );return _babd ;};_cac .setTextMatrix (_caa );case "\u0054\u0072":if _aca ,_cbfc :=_cac .checkOp (_ecd ,1,true );!_aca {_f .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cbfc );return _cbfc ;};_gae ,_ebb :=_ea .GetIntVal (_ecd .Params [0]);if !_ebb {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_ecd );return _ea .ErrTypeError ;};_cac .setTextRenderMode (_gae );case "\u0054\u0073":if _gefa ,_fdg :=_cac .checkOp (_ecd ,1,true );!_gefa {_f .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fdg );return _fdg ;};_adc ,_bbe :=_ea .GetNumberAsFloat (_ecd .Params [0]);if _bbe !=nil {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bbe );return _bbe ;};_cac .setTextRise (_adc );case "\u0054\u0077":if _ead ,_aaf :=_cac .checkOp (_ecd ,1,true );!_ead {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aaf );return _aaf ;};_bbb ,_bfa :=_ea .GetNumberAsFloat (_ecd .Params [0]);if _bfa !=nil {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bfa );return _bfa ;};_cac .setWordSpacing (_bbb );case "\u0054\u007a":if _fec ,_eccd :=_cac .checkOp (_ecd ,1,true );!_fec {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_eccd );return _eccd ;};_bfad ,_gfee :=_ea .GetNumberAsFloat (_ecd .Params [0]);if _gfee !=nil {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gfee );return _gfee ;};_cac .setHorizScaling (_bfad );case "\u0063\u006d":_gef ._eae =_bbcb .CTM ;case "\u006d":if len (_ecd .Params )!=2{_f .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_abf );return nil ;};_gab ,_efg :=_ea .GetNumbersAsFloat (_ecd .Params );if _efg !=nil {return _efg ;};_f .Log .Debug ("\u004d\u006f\u0076\u0065\u0020\u0074\u006f\u003a\u0020\u0025\u002e\u0032\u0066",_gab );_gef .moveTo (_gab [0],_gab [1]);case "\u006c":if len (_ecd .Params )!=2{_f .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_abf );return nil ;};_cee ,_dcee :=_ea .GetNumbersAsFloat (_ecd .Params );if _dcee !=nil {return _dcee ;};_gef .lineTo (_cee [0],_cee [1]);case "\u0063":if len (_ecd .Params )!=6{return _abf ;};_fdga ,_ddfb :=_ea .GetNumbersAsFloat (_ecd .Params );if _ddfb !=nil {return _ddfb ;};_f .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_fdga );_gef .cubicTo (_fdga [0],_fdga [1],_fdga [2],_fdga [3],_fdga [4],_fdga [5]);case "\u0076","\u0079":if len (_ecd .Params )!=4{return _abf ;};_cgfa ,_fcg :=_ea .GetNumbersAsFloat (_ecd .Params );if _fcg !=nil {return _fcg ;};_f .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_cgfa );_gef .quadraticTo (_cgfa [0],_cgfa [1],_cgfa [2],_cgfa [3]);case "\u0068":_gef .closePath ();case "\u0072\u0065":if len (_ecd .Params )!=4{return _abf ;};_dee ,_faa :=_ea .GetNumbersAsFloat (_ecd .Params );if _faa !=nil {return _faa ;};_gef .drawRectangle (_dee [0],_dee [1],_dee [2],_dee [3]);_gef .closePath ();case "\u0053":_gef .stroke (&_bab ._edf );_gef .clearPath ();case "\u0073":_gef .closePath ();_gef .stroke (&_bab ._edf );_gef .clearPath ();case "\u0046":_gef .fill (&_bab ._dafc );_gef .clearPath ();case "\u0066","\u0066\u002a":_gef .closePath ();_gef .fill (&_bab ._dafc );_gef .clearPath ();case "\u0042","\u0042\u002a":_gef .fill (&_bab ._dafc );_gef .stroke (&_bab ._edf );_gef .clearPath ();case "\u0062","\u0062\u002a":_gef .closePath ();_gef .fill (&_bab ._dafc );_gef .stroke (&_bab ._edf );_gef .clearPath ();case "\u006e":_gef .clearPath ();case "\u0044\u006f":if len (_ecd .Params )==0{_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_ecd .Params );return _ea .ErrRangeError ;};_egga ,_fcc :=_ea .GetName (_ecd .Params [0]);if !_fcc {_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_ecd .Params [0]);return _ea .ErrTypeError ;};_ ,_dad :=_aaa .GetXObjectByName (*_egga );if _dad !=_ab .XObjectTypeForm {break ;};_dbc ,_fcc :=_aeaa ._gc [_egga .String ()];if !_fcc {_fbb ,_deb :=_aaa .GetXObjectFormByName (*_egga );if _deb !=nil {_f .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_deb );return _deb ;};_cgg ,_deb :=_fbb .GetContentStream ();if _deb !=nil {_f .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_deb );return _deb ;};_def :=_fbb .Resources ;if _def ==nil {_def =_aaa ;};_adce ,_fef ,_dgf ,_deb :=_aeaa .extractPageText (string (_cgg ),_def ,_ecb .Mult (_bbcb .CTM ),_efb +1);if _deb !=nil {_f .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_deb );return _deb ;};_dbc =textResult {*_adce ,_fef ,_dgf };_aeaa ._gc [_egga .String ()]=_dbc ;};_gef ._eae =_bbcb .CTM ;_bab ._cdg =append (_bab ._cdg ,_dbc ._cadd ._cdg ...);_bab ._edf =append (_bab ._edf ,_dbc ._cadd ._edf ...);_bab ._dafc =append (_bab ._dafc ,_dbc ._cadd ._dafc ...);_fbd ._eefb +=_dbc ._edb ;_fbd ._dag +=_dbc ._fac ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_cac ._ffb .ColorspaceNonStroking =_bbcb .ColorspaceNonStroking ;_cac ._ffb .ColorNonStroking =_bbcb .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_cac ._ffb .ColorspaceStroking =_bbcb .ColorspaceStroking ;_cac ._ffb .ColorStroking =_bbcb .ColorStroking ;};return nil ;});_eca =_bbc .Process (_ggd );return _bab ,_fbd ._eefb ,_fbd ._dag ,_eca ;};
|
||
|
||
// New returns an Extractor instance for extracting content from the input PDF page.
|
||
func New (page *_ab .PdfPage )(*Extractor ,error ){_ec ,_ddf :=page .GetAllContentStreams ();if _ddf !=nil {return nil ,_ddf ;};_ff ,_ddf :=page .GetMediaBox ();if _ddf !=nil {return nil ,_bcb .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_ddf );};_fd :=&Extractor {_aba :_ec ,_be :page .Resources ,_fcb :*_ff ,_fa :map[string ]fontEntry {},_gc :map[string ]textResult {}};if _fd ._fcb .Llx > _fd ._fcb .Urx {_f .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_fd ._fcb );_fd ._fcb .Llx ,_fd ._fcb .Urx =_fd ._fcb .Urx ,_fd ._fcb .Llx ;};if _fd ._fcb .Lly > _fd ._fcb .Ury {_f .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_fd ._fcb );_fd ._fcb .Lly ,_fd ._fcb .Ury =_fd ._fcb .Ury ,_fd ._fcb .Lly ;};return _fd ,nil ;};func (_ccd *textObject )moveText (_bdf ,_dcf float64 ){_ccd .moveLP (_bdf ,_dcf )};func (_dfdc *textWord )bbox ()_ab .PdfRectangle {return _dfdc .PdfRectangle };func (_aagc *shapesState )fill (_gedg *[]*subpath ){*_gedg =append (*_gedg ,_aagc ._fcce ...);if _bfdg {_f .Log .Info ("\u0046\u0049L\u004c\u003a\u0020\u0025\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006e\u0065\u0077\u0029\u0020\u0073s=\u0025\u0073",len (*_gedg ),len (_aagc ._fcce ),_aagc );for _deed ,_fbbe :=range _aagc ._fcce {_bcb .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_deed ,_fbbe );if _deed ==10{break ;};};};};func (_eabg *textTable )newTablePara ()*textPara {_ddab :=_eabg .computeBbox ();return &textPara {PdfRectangle :_ddab ,_aaef :_ddab ,_egbc :_eabg };};type rulingKind int ;func (_fedd *wordBag )text ()string {_dcfe :=_fedd .allWords ();_ffbe :=make ([]string ,len (_dcfe ));for _ffcec ,_cbfdd :=range _dcfe {_ffbe [_ffcec ]=_cbfdd ._aeee ;};return _bc .Join (_ffbe ,"\u0020");};func (_edac rulingList )sortStrict (){_de .Slice (_edac ,func (_febac ,_fgbg int )bool {_efgb ,_acgc :=_edac [_febac ],_edac [_fgbg ];_cede ,_fgbf :=_efgb ._aefag ,_acgc ._aefag ;if _cede !=_fgbf {return _cede > _fgbf ;};_gged ,_aeeca :=_efgb ._deeda ,_acgc ._deeda ;if _gged !=_aeeca {return _gged < _aeeca ;};_gged ,_aeeca =_efgb ._efac ,_acgc ._efac ;if _gged !=_aeeca {return _gged < _aeeca ;};return _efgb ._bgae < _acgc ._bgae ;});};func (_gcg *stateStack )empty ()bool {return len (*_gcg )==0};type textResult struct{_cadd PageText ;_edb int ;_fac int ;};func (_cfgb *wordBag )stratum (_afa int )[]*textWord {_eggd :=_cfgb ._dgaf [_afa ];_fcbdg :=make ([]*textWord ,len (_eggd ));copy (_fcbdg ,_eggd );return _fcbdg ;};func _cbge (_aabb _ab .PdfRectangle )rulingKind {_febg :=_aabb .Width ();_cefg :=_aabb .Height ();return _dgaa (_febg ,_cefg );};func _dbe (_aga *_bdb .ContentStreamOperation )(float64 ,error ){if len (_aga .Params )!=1{_fbaa :=_d .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");_f .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_aga .Operand ,1,len (_aga .Params ),_aga .Params );return 0.0,_fbaa ;};return _ea .GetNumberAsFloat (_aga .Params [0]);};
|
||
|
||
// ApplyArea processes the page text only within the specified area `bbox`.
|
||
// Each time ApplyArea is called, it updates the result set in `pt`.
|
||
// Can be called multiple times in a row with different bounding boxes.
|
||
func (_bee *PageText )ApplyArea (bbox _ab .PdfRectangle ){_bae :=make ([]*textMark ,0,len (_bee ._cdg ));for _ ,_cbfd :=range _bee ._cdg {if _cgfb (_cbfd .bbox (),bbox ){_bae =append (_bae ,_cbfd );};};var _aeg paraList ;_becb :=len (_bae );for _agd :=0;_agd < 360&&_becb > 0;_agd +=90{_fcda :=make ([]*textMark ,0,len (_bae )-_becb );for _ ,_bbbg :=range _bae {if _bbbg ._eaeb ==_agd {_fcda =append (_fcda ,_bbbg );};};if len (_fcda )> 0{_cba :=_dbad (_fcda ,_bee ._fae ,nil );_aeg =append (_aeg ,_cba ...);_becb -=len (_fcda );};};_ecac :=new (_g .Buffer );_aeg .writeText (_ecac );_bee ._dadb =_ecac .String ();_bee ._bbg =_aeg .toTextMarks ();_bee ._cbb =_aeg .tables ();};
|
||
|
||
// String returns a string describing `tm`.
|
||
func (_bbd TextMark )String ()string {_fgee :=_bbd .BBox ;var _dfc string ;if _bbd .Font !=nil {_dfc =_bbd .Font .String ();if len (_dfc )> 50{_dfc =_dfc [:50]+"\u002e\u002e\u002e";};};var _fbeg string ;if _bbd .Meta {_fbeg ="\u0020\u002a\u004d\u002a";};return _bcb .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_bbd .Offset ,_bbd .Text ,[]rune (_bbd .Text ),_fgee .Llx ,_fgee .Lly ,_fgee .Urx ,_fgee .Ury ,_dfc ,_fbeg );};func (_cfee paraList )topoOrder ()[]int {if _cfagg {_f .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");};_gbcc :=len (_cfee );_cbfcg :=make ([]bool ,_gbcc );_bda :=make ([]int ,0,_gbcc );_bacd :=_cfee .llyOrdering ();var _bddaf func (_bbga int );_bddaf =func (_cafe int ){_cbfcg [_cafe ]=true ;for _aaffg :=0;_aaffg < _gbcc ;_aaffg ++{if !_cbfcg [_aaffg ]{if _cfee .readBefore (_bacd ,_cafe ,_aaffg ){_bddaf (_aaffg );};};};_bda =append (_bda ,_cafe );};for _ffbeg :=0;_ffbeg < _gbcc ;_ffbeg ++{if !_cbfcg [_ffbeg ]{_bddaf (_ffbeg );};};return _dacab (_bda );};func (_aeda *textTable )markCells (){for _dgba :=0;_dgba < _aeda ._bfcd ;_dgba ++{for _edbec :=0;_edbec < _aeda ._agea ;_edbec ++{_fedcb :=_aeda .get (_edbec ,_dgba );_fedcb ._ffde =true ;};};};
|
||
|
||
// String returns a description of `p`.
|
||
func (_ecegf *textPara )String ()string {_aggag :="";if _ecegf ._egbc !=nil {_aggag =_bcb .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_ecegf ._egbc ._agea ,_ecegf ._egbc ._bfcd );};return _bcb .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_ecegf .PdfRectangle ,_aggag ,len (_ecegf ._fafb ),_gefbc (_ecegf .text (),50));};type stateStack []*textState ;func (_dcfafa rulingList )comp (_afggd ,_cagb int )bool {_cgeg ,_eeff :=_dcfafa [_afggd ],_dcfafa [_cagb ];_gefg ,_bbac :=_cgeg ._aefag ,_eeff ._aefag ;if _gefg !=_bbac {return _gefg > _bbac ;};if _gefg ==_bbag {return false ;};_ddcb :=func (_fcfg bool )bool {if _gefg ==_ffad {return _fcfg ;};return !_fcfg ;};_beeeb ,_gabda :=_cgeg ._deeda ,_eeff ._deeda ;if _beeeb !=_gabda {return _ddcb (_beeeb > _gabda );};_beeeb ,_gabda =_cgeg ._efac ,_eeff ._efac ;if _beeeb !=_gabda {return _ddcb (_beeeb < _gabda );};return _ddcb (_cgeg ._bgae < _eeff ._bgae );};func (_gbe *shapesState )clearPath (){_gbe ._fcce =nil ;_gbe ._dgcd =false ;if _facf {_f .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_gbe );};};func _cacfa (_cbacc ,_eggb _dd .Point )bool {return _cbacc .X ==_eggb .X &&_cbacc .Y ==_eggb .Y };
|
||
|
||
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
|
||
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
|
||
// `start` and `end` are offsets in the extracted text.
|
||
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
|
||
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
|
||
func (_dfbf *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _dfbf ==nil {return nil ,_d .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_bcb .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );};_bfdf :=len (_dfbf ._faf );if _bfdf ==0{return _dfbf ,nil ;};if start < _dfbf ._faf [0].Offset {start =_dfbf ._faf [0].Offset ;};if end > _dfbf ._faf [_bfdf -1].Offset +1{end =_dfbf ._faf [_bfdf -1].Offset +1;};_cae :=_de .Search (_bfdf ,func (_edga int )bool {return _dfbf ._faf [_edga ].Offset +len (_dfbf ._faf [_edga ].Text )-1>=start });if !(0<=_cae &&_cae < _bfdf ){_bdgf :=_bcb .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_cae ,_bfdf ,_dfbf ._faf [0],_dfbf ._faf [_bfdf -1]);return nil ,_bdgf ;};_ddcf :=_de .Search (_bfdf ,func (_ffe int )bool {return _dfbf ._faf [_ffe ].Offset > end -1});if !(0<=_ddcf &&_ddcf < _bfdf ){_dbca :=_bcb .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_ddcf ,_bfdf ,_dfbf ._faf [0],_dfbf ._faf [_bfdf -1]);return nil ,_dbca ;};if _ddcf <=_cae {return nil ,_bcb .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_cae ,_ddcf );};return &TextMarkArray {_faf :_dfbf ._faf [_cae :_ddcf ]},nil ;};const (_fcaf =1.0e-6;_adeb =1.0e-4;_gbdf =10;_fdgf =6;_fcef =0.5;_bebf =0.11;_fecf =0.19;_eegf =0.04;_fedf =0.04;_eceee =1.0;_ecdbd =0.04;_dbba =0.4;_eec =0.7;_cgbe =1.0;_bgd =0.1;_egge =1.4;_cgd =0.46;_adbd =0.02;_aaff =0.2;_egaeg =0.5;_ddce =4;_feag =4.0;_edbfg =6;_gda =0.01;_egdc =0.02;_cff =2;_gafb =2;_ecg =10.0;_fdc =0.05;_deec =0.3;_fege =1.0;_cacd =1.0;);func (_ged *textObject )setTextRise (_efgd float64 ){if _ged ==nil {return ;};_ged ._geag ._cfa =_efgd ;};func _edba (_agdfd ,_cefd ,_bdef ,_ccegd *textPara )*textTable {_fdf :=&textTable {_agea :2,_bfcd :2,_beab :make (map[uint64 ]*textPara ,4)};_fdf .put (0,0,_agdfd );_fdf .put (1,0,_cefd );_fdf .put (0,1,_bdef );_fdf .put (1,1,_ccegd );return _fdf ;};func _aaab (_efdb []TextMark ,_dfag *int ,_gdff string )[]TextMark {_bad :=_gddb ;_bad .Text =_gdff ;return _eea (_efdb ,_dfag ,_bad );};func (_bfae *wordBag )firstReadingIndex (_afgb int )int {_bgce :=_bfae .firstWord (_afgb )._fegfd ;_ddef :=float64 (_afgb +1)*_fdgf ;_fafd :=_ddef +_feag *_bgce ;_agga :=_afgb ;for _ ,_dfba :=range _bfae .depthBand (_ddef ,_fafd ){if _fad (_bfae .firstWord (_dfba ),_bfae .firstWord (_agga ))< 0{_agga =_dfba ;};};return _agga ;};func (_cgcb lineRuling )yMean ()float64 {return 0.5*(_cgcb ._ddbb .Y +_cgcb ._fbgf .Y )};func _cbad (_fdgge ,_bbde bounded )float64 {return _bbaa (_fdgge )-_bbaa (_bbde )};
|
||
|
||
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
|
||
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
|
||
// Replace with a function like Extract() (*PageText, error)
|
||
func (_dace *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_eda ,_dce ,_ee ,_gge :=_dace .extractPageText (_dace ._aba ,_dace ._be ,_dd .IdentityMatrix (),0);if _gge !=nil {return nil ,0,0,_gge ;};_eda .computeViews ();_gge =_fafcc (_eda );if _gge !=nil {return nil ,0,0,_gge ;};return _eda ,_dce ,_ee ,nil ;};func (_afab *textPara )toCellTextMarks (_bcdf *int )[]TextMark {var _ceefe []TextMark ;for _cddcd ,_beg :=range _afab ._fafb {_cgdb :=_beg .toTextMarks (_bcdf );_fcbe :=_dgff &&_beg .endsInHyphen ()&&_cddcd !=len (_afab ._fafb )-1;if _fcbe {_cgdb =_dacd (_cgdb ,_bcdf );};_ceefe =append (_ceefe ,_cgdb ...);if !(_fcbe ||_cddcd ==len (_afab ._fafb )-1){_ceefe =_aaab (_ceefe ,_bcdf ,_agdf (_beg ._gcfb ,_afab ._fafb [_cddcd +1]._gcfb ));};};return _ceefe ;};func (_bfag paraList )eventNeighbours (_dbdg []event )map[*textPara ][]int {_de .Slice (_dbdg ,func (_edaga ,_bgfa int )bool {_egfg ,_acfb :=_dbdg [_edaga ],_dbdg [_bgfa ];_bdeff ,_aeggd :=_egfg ._ffgg ,_acfb ._ffgg ;if _bdeff !=_aeggd {return _bdeff < _aeggd ;};if _egfg ._adaf !=_acfb ._adaf {return _egfg ._adaf ;};return _edaga < _bgfa ;});_abdd :=make (map[int ]intSet );_fdcaf :=make (intSet );for _ ,_bffee :=range _dbdg {if _bffee ._adaf {_abdd [_bffee ._dcaa ]=make (intSet );for _bdab :=range _fdcaf {if _bdab !=_bffee ._dcaa {_abdd [_bffee ._dcaa ].add (_bdab );_abdd [_bdab ].add (_bffee ._dcaa );};};_fdcaf .add (_bffee ._dcaa );}else {_fdcaf .del (_bffee ._dcaa );};};_cafee :=map[*textPara ][]int {};for _dccc ,_bbaaa :=range _abdd {_cgcc :=_bfag [_dccc ];if len (_bbaaa )==0{_cafee [_cgcc ]=nil ;continue ;};_gfda :=make ([]int ,len (_bbaaa ));_agf :=0;for _abfg :=range _bbaaa {_gfda [_agf ]=_abfg ;_agf ++;};_cafee [_cgcc ]=_gfda ;};return _cafee ;};func (_dcfeg *textPara )writeCellText (_fgdbb _bd .Writer ){for _abaa ,_cabe :=range _dcfeg ._fafb {_ecdg :=_cabe .text ();_beca :=_dgff &&_cabe .endsInHyphen ()&&_abaa !=len (_dcfeg ._fafb )-1;if _beca {_ecdg =_dgdg (_ecdg );};_fgdbb .Write ([]byte (_ecdg ));if !(_beca ||_abaa ==len (_dcfeg ._fafb )-1){_fgdbb .Write ([]byte (_agdf (_cabe ._gcfb ,_dcfeg ._fafb [_abaa +1]._gcfb )));};};};type shapesState struct{_eae _dd .Matrix ;_efec _dd .Matrix ;_fcce []*subpath ;_dgcd bool ;_cdc _dd .Point ;};func (_bdbcd *textLine )bbox ()_ab .PdfRectangle {return _bdbcd .PdfRectangle };func (_bgb paraList )readBefore (_eagc []int ,_aaea ,_afgd int )bool {_ecad ,_gadb :=_bgb [_aaea ],_bgb [_afgd ];if _bccbg (_ecad ,_gadb )&&_ecad .Lly > _gadb .Lly {return true ;};if !(_ecad ._aaef .Urx < _gadb ._aaef .Llx ){return false ;};_adeef ,_fgdcd :=_ecad .Lly ,_gadb .Lly ;if _adeef > _fgdcd {_fgdcd ,_adeef =_adeef ,_fgdcd ;};_bbfb :=_dg .Max (_ecad ._aaef .Llx ,_gadb ._aaef .Llx );_acdc :=_dg .Min (_ecad ._aaef .Urx ,_gadb ._aaef .Urx );_ffa :=_bgb .llyRange (_eagc ,_adeef ,_fgdcd );for _ ,_daee :=range _ffa {if _daee ==_aaea ||_daee ==_afgd {continue ;};_bdba :=_bgb [_daee ];if _bdba ._aaef .Llx <=_acdc &&_bbfb <=_bdba ._aaef .Urx {return false ;};};return true ;};func _caba (_gcfg ,_gaag bounded )float64 {_ecfb :=_fad (_gcfg ,_gaag );if !_eaga (_ecfb ){return _ecfb ;};return _cbad (_gcfg ,_gaag );};func (_aedb *textWord )appendMark (_gddgc *textMark ,_gbba _ab .PdfRectangle ){_aedb ._bcce =append (_aedb ._bcce ,_gddgc );_aedb .PdfRectangle =_bfcg (_aedb .PdfRectangle ,_gddgc .PdfRectangle );if _gddgc ._ecbb > _aedb ._fegfd {_aedb ._fegfd =_gddgc ._ecbb ;};_aedb ._dbed =_gbba .Ury -_aedb .PdfRectangle .Lly ;};func _gfggb (_cdcg string )(string ,bool ){_acfdd :=[]rune (_cdcg );if len (_acfdd )!=1{return "",false ;};_ggdcf ,_ggedb :=_gcdb [_acfdd [0]];return _ggdcf ,_ggedb ;};
|
||
|
||
// String returns a string describing `ma`.
|
||
func (_bbbgc TextMarkArray )String ()string {_fced :=len (_bbbgc ._faf );if _fced ==0{return "\u0045\u004d\u0050T\u0059";};_dbfb :=_bbbgc ._faf [0];_bdbe :=_bbbgc ._faf [_fced -1];return _bcb .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_fced ,_dbfb ,_bdbe );};func (_ccg *textObject )setHorizScaling (_dfa float64 ){if _ccg ==nil {return ;};_ccg ._geag ._dcbc =_dfa ;}; |