mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-29 13:48:54 +08:00
253 lines
180 KiB
Go
253 lines
180 KiB
Go
//
|
||
// Copyright 2020 FoxyUtils ehf. All rights reserved.
|
||
//
|
||
// This is a commercial product and requires a license to operate.
|
||
// A trial license can be obtained at https://unidoc.io
|
||
//
|
||
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
|
||
//
|
||
// Use of this source code is governed by the UniDoc End User License Agreement
|
||
// terms that can be accessed at https://unidoc.io/eula/
|
||
|
||
//
|
||
// Package extractor is used for quickly extracting PDF content through a simple interface.
|
||
// Currently offers functionality for extracting textual content.
|
||
//
|
||
package extractor ;import (_cf "bytes";_e "errors";_ec "fmt";_d "github.com/unidoc/unipdf/v3/common";_bge "github.com/unidoc/unipdf/v3/common/license";_f "github.com/unidoc/unipdf/v3/contentstream";_gf "github.com/unidoc/unipdf/v3/core";_ca "github.com/unidoc/unipdf/v3/internal/textencoding";_ag "github.com/unidoc/unipdf/v3/internal/transform";_ecc "github.com/unidoc/unipdf/v3/model";_df "golang.org/x/text/unicode/norm";_ce "golang.org/x/xerrors";_ac "image/color";_be "io";_c "math";_b "regexp";_ed "sort";_ae "strings";_bg "unicode";_g "unicode/utf8";);func (_ecdf *textObject )getFontDirect (_cebc string )(*_ecc .PdfFont ,error ){_bcd ,_daac :=_ecdf .getFontDict (_cebc );if _daac !=nil {return nil ,_daac ;};_edad ,_daac :=_ecc .NewPdfFontFromPdfObject (_bcd );if _daac !=nil {_d .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cebc ,_daac );};return _edad ,_daac ;};type rulingKind int ;func (_fcbg gridTiling )complete ()bool {for _ ,_ddgba :=range _fcbg ._aegg {for _ ,_gcaa :=range _ddgba {if !_gcaa .complete (){return false ;};};};return true ;};func _ffda (_gcdc *wordBag ,_adda *textWord ,_cacde float64 )bool {return _gcdc .Urx <=_adda .Llx &&_adda .Llx < _gcdc .Urx +_cacde ;};func (_edba paraList )sortReadingOrder (){_d .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_edba ));if len (_edba )<=1{return ;};_edba .computeEBBoxes ();_ed .Slice (_edba ,func (_cbbb ,_bgge int )bool {return _abcg (_edba [_cbbb ],_edba [_bgge ])<=0});_gadb :=_edba .topoOrder ();_edba .reorder (_gadb );};
|
||
|
||
// TextMark represents extracted text on a page with information regarding both textual content,
|
||
// formatting (font and size) and positioning.
|
||
// It is the smallest unit of text on a PDF page, typically a single character.
|
||
//
|
||
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
|
||
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
|
||
// `bbox` of substring `term` in `text`.
|
||
//
|
||
// ex, _ := New(page)
|
||
// // handle errors
|
||
// pageText, _, _, err := ex.ExtractPageText()
|
||
// // handle errors
|
||
// text := pageText.Text()
|
||
// textMarks := pageText.Marks()
|
||
//
|
||
// start := strings.Index(text, term)
|
||
// end := start + len(term)
|
||
// spanMarks, err := textMarks.RangeOffset(start, end)
|
||
// // handle errors
|
||
// bbox, ok := spanMarks.BBox()
|
||
// // handle errors
|
||
type TextMark struct{
|
||
|
||
// Text is the extracted text.
|
||
Text string ;
|
||
|
||
// Original is the text in the PDF. It has not been decoded like `Text`.
|
||
Original string ;
|
||
|
||
// BBox is the bounding box of the text.
|
||
BBox _ecc .PdfRectangle ;
|
||
|
||
// Font is the font the text was drawn with.
|
||
Font *_ecc .PdfFont ;
|
||
|
||
// FontSize is the font size the text was drawn with.
|
||
FontSize float64 ;
|
||
|
||
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
|
||
// text, textMarks := pageText.Text(), pageText.Marks()
|
||
// marks := textMarks.Elements()
|
||
// then marks[i].Offset is the offset of marks[i].Text in text.
|
||
Offset int ;
|
||
|
||
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
|
||
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
|
||
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
|
||
Meta bool ;
|
||
|
||
// FillColor is the fill color of the text.
|
||
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
|
||
FillColor _ac .Color ;
|
||
|
||
// StrokeColor is the stroke color of the text.
|
||
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
|
||
StrokeColor _ac .Color ;
|
||
|
||
// Orientation is the text orientation
|
||
Orientation int ;};func _aaeb (_beaf ,_afb _ecc .PdfRectangle )bool {return _ebd (_beaf ,_afb )&&_ecae (_beaf ,_afb )};func _gfcg (_cbba *wordBag ,_bdfff *textWord ,_fcgd float64 )bool {return _bdfff .Llx < _cbba .Urx +_fcgd &&_cbba .Llx -_fcgd < _bdfff .Urx ;};
|
||
|
||
// Tables returns the tables extracted from the page.
|
||
func (_feg PageText )Tables ()[]TextTable {if _egfe {_d .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_feg ._ddd ));};return _feg ._ddd ;};
|
||
|
||
// String returns a human readable description of `ss`.
|
||
func (_dcf *shapesState )String ()string {return _ec .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_dcf ._bece ),_dcf ._fafb );};type rectRuling struct{_dbafd rulingKind ;_gdbe markKind ;_ac .Color ;_ecc .PdfRectangle ;};func (_afee *shapesState )addPoint (_cebd ,_fcaf float64 ){_degg :=_afee .establishSubpath ();_eeg :=_afee .devicePoint (_cebd ,_fcaf );if _degg ==nil {_afee ._fafb =true ;_afee ._adaa =_eeg ;}else {_degg .add (_eeg );};};func _ffdf (_febb ,_fdgf ,_fbgaf float64 )rulingKind {if _febb >=_fbgaf &&_bgbf (_fdgf ,_febb ){return _fdcbd ;};if _fdgf >=_fbgaf &&_bgbf (_febb ,_fdgf ){return _ccgf ;};return _afaa ;};func _adef (_fegb ,_eebcg int )uint64 {return uint64 (_fegb )*0x1000000+uint64 (_eebcg )};
|
||
|
||
// String returns a string describing `pt`.
|
||
func (_ffbf PageText )String ()string {_cdd :=_ec .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_ffbf ._adg ));_aabf :=[]string {"\u002d"+_cdd };for _ ,_cccc :=range _ffbf ._adg {_aabf =append (_aabf ,_cccc .String ());};_aabf =append (_aabf ,"\u002b"+_cdd );return _ae .Join (_aabf ,"\u000a");};func _dgdc (_babf float64 )int {var _ecdfd int ;if _babf >=0{_ecdfd =int (_babf /_addg );}else {_ecdfd =int (_babf /_addg )-1;};return _ecdfd ;};func (_edgd *textWord )toTextMarks (_abge *int )[]TextMark {var _afdf []TextMark ;for _ ,_bcdd :=range _edgd ._bebcb {_afdf =_efb (_afdf ,_abge ,_bcdd .ToTextMark ());};return _afdf ;};type lineRuling struct{_fgfd rulingKind ;_gbgec markKind ;_ac .Color ;_aefd ,_eaebg _ag .Point ;};func (_gaefd *textTable )depth ()float64 {_bdce :=1e10;for _egcbc :=0;_egcbc < _gaefd ._eafbb ;_egcbc ++{_bfbe :=_gaefd .get (_egcbc ,0);if _bfbe ==nil ||_bfbe ._eddf {continue ;};_bdce =_c .Min (_bdce ,_bfbe .depth ());};return _bdce ;};func _abcg (_cgcc ,_agef bounded )float64 {_bedf :=_cfcc (_cgcc ,_agef );if !_fggg (_bedf ){return _bedf ;};return _cgc (_cgcc ,_agef );};func (_ffad *textObject )moveLP (_dbeb ,_ggae float64 ){_ffad ._gbf .Concat (_ag .NewMatrix (1,0,0,1,_dbeb ,_ggae ));_ffad ._aac =_ffad ._gbf ;};
|
||
|
||
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
|
||
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
|
||
// Replace with a function like Extract() (*PageText, error)
|
||
func (_eaf *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_ba ,_ddf ,_aca ,_ceb :=_eaf .extractPageText (_eaf ._gfg ,_eaf ._ge ,_ag .IdentityMatrix (),0);if _ceb !=nil {return nil ,0,0,_ceb ;};_ba .computeViews ();_ceb =_gbgfd (_ba );if _ceb !=nil {return nil ,0,0,_ceb ;};return _ba ,_ddf ,_aca ,nil ;};func (_cddc *textLine )appendWord (_bceb *textWord ){_cddc ._agae =append (_cddc ._agae ,_bceb );_cddc .PdfRectangle =_fgeb (_cddc .PdfRectangle ,_bceb .PdfRectangle );if _bceb ._agagbg > _cddc ._fbgf {_cddc ._fbgf =_bceb ._agagbg ;};if _bceb ._fedcd > _cddc ._eccbg {_cddc ._eccbg =_bceb ._fedcd ;};};type ruling struct{_cgbe rulingKind ;_fcce markKind ;_ac .Color ;_cbfd float64 ;_aef float64 ;_bcdag float64 ;_gfcb float64 ;};func _aceff (_agbb map[float64 ]gridTile )[]float64 {_dddbb :=make ([]float64 ,0,len (_agbb ));for _gdafe :=range _agbb {_dddbb =append (_dddbb ,_gdafe );};_ed .Float64s (_dddbb );return _dddbb ;};func (_bdcbb *textTable )logComposite (_fcbf string ){if !_egfe {return ;};_d .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_bdcbb ._eafbb ,_bdcbb ._becb ,_fcbf );_ec .Printf ("\u0025\u0035\u0073 \u007c","");for _ecadf :=0;_ecadf < _bdcbb ._eafbb ;_ecadf ++{_ec .Printf ("\u0025\u0033\u0064 \u007c",_ecadf );};_ec .Println ("");_ec .Printf ("\u0025\u0035\u0073 \u002b","");for _egag :=0;_egag < _bdcbb ._eafbb ;_egag ++{_ec .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d");};_ec .Println ("");for _dcgc :=0;_dcgc < _bdcbb ._becb ;_dcgc ++{_ec .Printf ("\u0025\u0035\u0064 \u007c",_dcgc );for _dadd :=0;_dadd < _bdcbb ._eafbb ;_dadd ++{_abde ,_ :=_bdcbb ._agcf [_adef (_dadd ,_dcgc )].parasBBox ();_ec .Printf ("\u0025\u0033\u0064 \u007c",len (_abde ));};_ec .Println ("");};_d .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_bdcbb ._eafbb ,_bdcbb ._becb ,_fcbf );_ec .Printf ("\u0025\u0035\u0073 \u007c","");for _ababa :=0;_ababa < _bdcbb ._eafbb ;_ababa ++{_ec .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_ababa );};_ec .Println ("");_ec .Printf ("\u0025\u0035\u0073 \u002b","");for _eded :=0;_eded < _bdcbb ._eafbb ;_eded ++{_ec .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");};_ec .Println ("");for _acfeed :=0;_acfeed < _bdcbb ._becb ;_acfeed ++{_ec .Printf ("\u0025\u0035\u0064 \u007c",_acfeed );for _baea :=0;_baea < _bdcbb ._eafbb ;_baea ++{_dfcdc ,_ :=_bdcbb ._agcf [_adef (_baea ,_acfeed )].parasBBox ();_ccdbb :="";_bdag :=_dfcdc .merge ();if _bdag !=nil {_ccdbb =_bdag .text ();};_ccdbb =_ec .Sprintf ("\u0025\u0071",_cecc (_ccdbb ,12));_ccdbb =_ccdbb [1:len (_ccdbb )-1];_ec .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_ccdbb );};_ec .Println ("");};};type shapesState struct{_baca _ag .Matrix ;_ffae _ag .Matrix ;_bece []*subpath ;_fafb bool ;_adaa _ag .Point ;_edadg *textObject ;};func (_bffc compositeCell )split (_efea ,_gaccg []float64 )*textTable {_efgg :=len (_efea )+1;_gagd :=len (_gaccg )+1;if _egfe {_d .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066",_gagd ,_efgg ,_bffc ,_efea ,_gaccg );_ec .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a",len (_bffc .paraList ));for _gcgg ,_eaec :=range _bffc .paraList {_ec .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gcgg ,_eaec .String ());};_ec .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",len (_bffc .lines ()));for _gace ,_fbace :=range _bffc .lines (){_ec .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gace ,_fbace );};};_efea =_ebdee (_efea ,_bffc .Ury ,_bffc .Lly );_gaccg =_ebdee (_gaccg ,_bffc .Llx ,_bffc .Urx );_afabc :=make (map[uint64 ]*textPara ,_gagd *_efgg );_gdfd :=textTable {_eafbb :_gagd ,_becb :_efgg ,_cgfb :_afabc };_cdcb :=_bffc .paraList ;_ed .Slice (_cdcb ,func (_gacba ,_fgga int )bool {_ddgbf ,_eeadd :=_cdcb [_gacba ],_cdcb [_fgga ];_fffce ,_ggag :=_ddgbf .Lly ,_eeadd .Lly ;if _fffce !=_ggag {return _fffce < _ggag ;};return _ddgbf .Llx < _eeadd .Llx ;});_bda :=make (map[uint64 ]_ecc .PdfRectangle ,_gagd *_efgg );for _deaf ,_baag :=range _efea [1:]{_cbae :=_efea [_deaf ];for _aggbg ,_eedae :=range _gaccg [1:]{_bgcb :=_gaccg [_aggbg ];_bda [_adef (_aggbg ,_deaf )]=_ecc .PdfRectangle {Llx :_bgcb ,Urx :_eedae ,Lly :_baag ,Ury :_cbae };};};if _egfe {_d .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073");_ec .Printf ("\u0020\u0020\u0020\u0020");for _gafd :=0;_gafd < _gagd ;_gafd ++{_ec .Printf ("\u0025\u0033\u0030\u0064\u002c\u0020",_gafd );};_ec .Println ();for _egfed :=0;_egfed < _efgg ;_egfed ++{_ec .Printf ("\u0020\u0020\u0025\u0032\u0064\u003a",_egfed );for _gcfg :=0;_gcfg < _gagd ;_gcfg ++{_ec .Printf ("\u00256\u002e\u0032\u0066\u002c\u0020",_bda [_adef (_gcfg ,_egfed )]);};_ec .Println ();};};_ceda :=func (_cdead *textLine )(int ,int ){for _eadc :=0;_eadc < _efgg ;_eadc ++{for _ccef :=0;_ccef < _gagd ;_ccef ++{if _aedc (_bda [_adef (_ccef ,_eadc )],_cdead .PdfRectangle ){return _ccef ,_eadc ;};};};return -1,-1;};_dbed :=make (map[uint64 ][]*textLine ,_gagd *_efgg );for _ ,_acgc :=range _cdcb .lines (){_ccdg ,_bcebe :=_ceda (_acgc );if _ccdg < 0{continue ;};_dbed [_adef (_ccdg ,_bcebe )]=append (_dbed [_adef (_ccdg ,_bcebe )],_acgc );};for _dgaf :=0;_dgaf < len (_efea )-1;_dgaf ++{_geff :=_efea [_dgaf ];_dbae :=_efea [_dgaf +1];for _cdcgg :=0;_cdcgg < len (_gaccg )-1;_cdcgg ++{_gcbaf :=_gaccg [_cdcgg ];_baegb :=_gaccg [_cdcgg +1];_fcbe :=_ecc .PdfRectangle {Llx :_gcbaf ,Urx :_baegb ,Lly :_dbae ,Ury :_geff };_egbb :=_dbed [_adef (_cdcgg ,_dgaf )];if len (_egbb )==0{continue ;};_dcccf :=_geba (_fcbe ,_egbb );_gdfd .put (_cdcgg ,_dgaf ,_dcccf );};};return &_gdfd ;};func _dgab (_bebag _ecc .PdfRectangle ,_eadcab ,_feagee ,_edgf ,_fcca *ruling )gridTile {_gfeda :=_bebag .Llx ;_dfcg :=_bebag .Urx ;_egbce :=_bebag .Lly ;_adeea :=_bebag .Ury ;return gridTile {PdfRectangle :_bebag ,_ffgb :_eadcab !=nil &&_eadcab .encloses (_egbce ,_adeea ),_bbaa :_feagee !=nil &&_feagee .encloses (_egbce ,_adeea ),_bade :_edgf !=nil &&_edgf .encloses (_gfeda ,_dfcg ),_bbb :_fcca !=nil &&_fcca .encloses (_gfeda ,_dfcg )};};func (_cffba *textLine )pullWord (_ddag *wordBag ,_badce *textWord ,_ccgb int ){_cffba .appendWord (_badce );_ddag .removeWord (_badce ,_ccgb );};var _bead =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_ac .White ,StrokeColor :_ac .White };func (_dffeg intSet )has (_eceeb int )bool {_ ,_befcc :=_dffeg [_eceeb ];return _befcc };func (_bbec *subpath )removeDuplicates (){if len (_bbec ._cada )==0{return ;};_fagf :=[]_ag .Point {_bbec ._cada [0]};for _ ,_egd :=range _bbec ._cada [1:]{if !_eebcc (_egd ,_fagf [len (_fagf )-1]){_fagf =append (_fagf ,_egd );};};_bbec ._cada =_fagf ;};func (_eacd *textLine )toTextMarks (_gcf *int )[]TextMark {var _cbbf []TextMark ;for _ ,_def :=range _eacd ._agae {if _def ._eeaa {_cbbf =_baaf (_cbbf ,_gcf ,"\u0020");};_ecgg :=_def .toTextMarks (_gcf );_cbbf =append (_cbbf ,_ecgg ...);};return _cbbf ;};func _bbfbf (_gaaea ,_cbaed _ag .Point )bool {_aecc :=_c .Abs (_gaaea .X -_cbaed .X );_acee :=_c .Abs (_gaaea .Y -_cbaed .Y );return _bgbf (_acee ,_aecc );};func _cecc (_ddbe string ,_efcf int )string {if len (_ddbe )< _efcf {return _ddbe ;};return _ddbe [:_efcf ];};func (_cef *textObject )setHorizScaling (_ddfe float64 ){if _cef ==nil {return ;};_cef ._faa ._acfg =_ddfe ;};func _bgad (_cdab ,_bddc _ag .Point )rulingKind {_gfaa :=_c .Abs (_cdab .X -_bddc .X );_cgag :=_c .Abs (_cdab .Y -_bddc .Y );return _ffdf (_gfaa ,_cgag ,_gcag );};func _edgbda (_ccaa []*textMark ,_edbe _ecc .PdfRectangle )[]*textWord {var _ffce []*textWord ;var _degf *textWord ;if _cgccd {_d .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_ccaa ));};_bbfa :=func (){if _degf !=nil {_addge :=_degf .computeText ();if !_fbgdb (_addge ){_degf ._fede =_addge ;_ffce =append (_ffce ,_degf );if _cgccd {_d .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_ffce )-1,_degf .String ());for _bceda ,_gcfgb :=range _degf ._bebcb {_ec .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bceda ,_gcfgb .String ());};};};_degf =nil ;};};for _ ,_efdc :=range _ccaa {if _adfa &&_degf !=nil &&len (_degf ._bebcb )> 0{_cbdd :=_degf ._bebcb [len (_degf ._bebcb )-1];_fdcf ,_bgbc :=_befb (_efdc ._ebe );_ddce ,_gabf :=_befb (_cbdd ._ebe );if _bgbc &&!_gabf &&_cbdd .inDiacriticArea (_efdc ){_degf .addDiacritic (_fdcf );continue ;};if _gabf &&!_bgbc &&_efdc .inDiacriticArea (_cbdd ){_degf ._bebcb =_degf ._bebcb [:len (_degf ._bebcb )-1];_degf .appendMark (_efdc ,_edbe );_degf .addDiacritic (_ddce );continue ;};};_gbacf :=_fbgdb (_efdc ._ebe );if _gbacf {_bbfa ();continue ;};if _degf ==nil &&!_gbacf {_degf =_faac ([]*textMark {_efdc },_edbe );continue ;};_caacd :=_degf ._agagbg ;_eggg :=_c .Abs (_acfgf (_edbe ,_efdc )-_degf ._fedcd )/_caacd ;_cadbb :=_cgbg (_efdc ,_degf )/_caacd ;if _cadbb >=_febg ||!(-_ebad <=_cadbb &&_eggg <=_cged ){_bbfa ();_degf =_faac ([]*textMark {_efdc },_edbe );continue ;};_degf .appendMark (_efdc ,_edbe );};_bbfa ();return _ffce ;};func (_dece intSet )del (_ccba int ){delete (_dece ,_ccba )};type fontEntry struct{_bcef *_ecc .PdfFont ;_eaa int64 ;};func (_fcc *shapesState )cubicTo (_acdc ,_egg ,_fece ,_caea ,_gafe ,_eebc float64 ){if _fbdd {_d .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a");};_fcc .addPoint (_gafe ,_eebc );};func (_gbcg *textPara )text ()string {_abaec :=new (_cf .Buffer );_gbcg .writeText (_abaec );return _abaec .String ();};func (_gedd rulingList )asTiling ()gridTiling {if _eegg {_d .Log .Info ("r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_gedd ));};for _feff ,_eabf :=range _gedd [1:]{_dbdce :=_gedd [_feff ];if _dbdce .alignsPrimary (_eabf )&&_dbdce .alignsSec (_eabf ){_d .Log .Error ("a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073",_eabf ,_dbdce );};};_gedd .sortStrict ();_gedd .log ("\u0073n\u0061\u0070\u0070\u0065\u0064");_fgacg ,_ffca :=_gedd .vertsHorzs ();_ffac :=_fgacg .primaries ();_bcebfc :=_ffca .primaries ();_gafa :=len (_ffac )-1;_cbeb :=len (_bcebfc )-1;if _gafa ==0||_cbeb ==0{return gridTiling {};};_adaec :=_ecc .PdfRectangle {Llx :_ffac [0],Urx :_ffac [_gafa ],Lly :_bcebfc [0],Ury :_bcebfc [_cbeb ]};if _eegg {_d .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064",len (_fgacg ));for _cggaa ,_eff :=range _fgacg {_ec .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cggaa ,_eff );};_d .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064",len (_ffca ));for _bgab ,_eddfd :=range _ffca {_ec .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bgab ,_eddfd );};_d .Log .Info ("\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f",_gafa ,_cbeb ,_ffac ,_bcebfc );};_gfed :=make ([]gridTile ,_gafa *_cbeb );for _eeba :=_cbeb -1;_eeba >=0;_eeba --{_feeg :=_bcebfc [_eeba ];_badeb :=_bcebfc [_eeba +1];for _agde :=0;_agde < _gafa ;_agde ++{_bcebfd :=_ffac [_agde ];_bagbd :=_ffac [_agde +1];_fdgb :=_fgacg .findPrimSec (_bcebfd ,_feeg );_bdec :=_fgacg .findPrimSec (_bagbd ,_feeg );_bgceg :=_ffca .findPrimSec (_feeg ,_bcebfd );_cdbb :=_ffca .findPrimSec (_badeb ,_bcebfd );_gcbe :=_ecc .PdfRectangle {Llx :_bcebfd ,Urx :_bagbd ,Lly :_feeg ,Ury :_badeb };_cefd :=_dgab (_gcbe ,_fdgb ,_bdec ,_bgceg ,_cdbb );_gfed [_eeba *_gafa +_agde ]=_cefd ;if _eegg {_ec .Printf ("\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_agde ,_eeba ,_cefd .String (),_cefd .Width (),_cefd .Height ());};};};if _eegg {_d .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_adaec );};_dede :=make ([]map[float64 ]gridTile ,_cbeb );for _bagd :=_cbeb -1;_bagd >=0;_bagd --{if _eegg {_ec .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_bagd );};_dede [_bagd ]=make (map[float64 ]gridTile ,_gafa );for _ccggf :=0;_ccggf < _gafa ;_ccggf ++{_ffcc :=_gfed [_bagd *_gafa +_ccggf ];if _eegg {_ec .Printf ("\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ccggf ,_ffcc );};if !_ffcc ._ffgb {continue ;};_ggdb :=_ccggf ;for _eefg :=_ccggf +1;!_ffcc ._bbaa &&_eefg < _gafa ;_eefg ++{_ffacf :=_gfed [_bagd *_gafa +_eefg ];_ffcc .Urx =_ffacf .Urx ;_ffcc ._bbb =_ffcc ._bbb ||_ffacf ._bbb ;_ffcc ._bade =_ffcc ._bade ||_ffacf ._bade ;_ffcc ._bbaa =_ffacf ._bbaa ;if _eegg {_ec .Printf ("\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a",_eefg ,_ffacf ,_ffcc );};_ggdb =_eefg ;};if _eegg {_ec .Printf (" \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n",_ccggf ,_ggdb ,_ffcc );};_ccggf =_ggdb ;_dede [_bagd ][_ffcc .Llx ]=_ffcc ;};};_fbaceb :=make (map[float64 ]map[float64 ]gridTile ,_cbeb );_dgeb :=make (map[float64 ]map[float64 ]struct{},_cbeb );for _ddcb :=_cbeb -1;_ddcb >=0;_ddcb --{_cafa :=_gfed [_ddcb *_gafa ].Lly ;_fbaceb [_cafa ]=make (map[float64 ]gridTile ,_gafa );_dgeb [_cafa ]=make (map[float64 ]struct{},_gafa );};if _eegg {_d .Log .Info ("\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_adaec );};for _ggcb :=_cbeb -1;_ggcb >=0;_ggcb --{_agga :=_gfed [_ggcb *_gafa ].Lly ;_dcbf :=_dede [_ggcb ];if _eegg {_ec .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_ggcb );};for _ ,_efgaf :=range _aceff (_dcbf ){if _ ,_gcfag :=_dgeb [_agga ][_efgaf ];_gcfag {continue ;};_fedf :=_dcbf [_efgaf ];if _eegg {_ec .Printf (" \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_fedf .String ());};for _caaf :=_ggcb -1;_caaf >=0;_caaf --{if _fedf ._bade {break ;};_cacbgc :=_dede [_caaf ];_ebdd ,_agab :=_cacbgc [_efgaf ];if !_agab {break ;};if _ebdd .Urx !=_fedf .Urx {break ;};_fedf ._bade =_ebdd ._bade ;_fedf .Lly =_ebdd .Lly ;if _eegg {_ec .Printf ("\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_ebdd .String (),_fedf .String ());};_dgeb [_ebdd .Lly ][_ebdd .Llx ]=struct{}{};};if _ggcb ==0{_fedf ._bade =true ;};if _fedf .complete (){_fbaceb [_agga ][_efgaf ]=_fedf ;};};};_edfc :=gridTiling {PdfRectangle :_adaec ,_degd :_bebg (_fbaceb ),_bcgf :_aaaggf (_fbaceb ),_aegg :_fbaceb };_edfc .log ("\u0043r\u0065\u0061\u0074\u0065\u0064");return _edfc ;};func (_ffgf paraList )llyRange (_cbaf []int ,_gfdge ,_abce float64 )[]int {_ddeg :=len (_ffgf );if _abce < _ffgf [_cbaf [0]].Lly ||_gfdge > _ffgf [_cbaf [_ddeg -1]].Lly {return nil ;};_cadg :=_ed .Search (_ddeg ,func (_efbf int )bool {return _ffgf [_cbaf [_efbf ]].Lly >=_gfdge });_cafe :=_ed .Search (_ddeg ,func (_aggb int )bool {return _ffgf [_cbaf [_aggb ]].Lly > _abce });return _cbaf [_cadg :_cafe ];};func (_gdef rulingList )primMinMax ()(float64 ,float64 ){_egcae ,_ffcf :=_gdef [0]._cbfd ,_gdef [0]._cbfd ;for _ ,_eeca :=range _gdef [1:]{if _eeca ._cbfd < _egcae {_egcae =_eeca ._cbfd ;}else if _eeca ._cbfd > _ffcf {_ffcf =_eeca ._cbfd ;};};return _egcae ,_ffcf ;};type textLine struct{_ecc .PdfRectangle ;_eccbg float64 ;_agae []*textWord ;_fbgf float64 ;};type pathSection struct{_bafa []*subpath ;_ac .Color ;};func (_agaec *textMark )inDiacriticArea (_edd *textMark )bool {_fcdf :=_agaec .Llx -_edd .Llx ;_deee :=_agaec .Urx -_edd .Urx ;_bgfe :=_agaec .Lly -_edd .Lly ;return _c .Abs (_fcdf +_deee )< _agaec .Width ()*_acgb &&_c .Abs (_bgfe )< _agaec .Height ()*_acgb ;};func _aggbb (_cfage []pathSection ){if _cafc < 0.0{return ;};if _ded {_d .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_cfage ));};for _cdaf ,_fefe :=range _cfage {for _daea ,_aegd :=range _fefe ._bafa {for _beeeg ,_bbef :=range _aegd ._cada {_aegd ._cada [_beeeg ]=_ag .Point {X :_egdbb (_bbef .X ),Y :_egdbb (_bbef .Y )};if _ded {_afgbc :=_aegd ._cada [_beeeg ];if !_eebcc (_bbef ,_afgbc ){_agdeec :=_ag .Point {X :_afgbc .X -_bbef .X ,Y :_afgbc .Y -_bbef .Y };_ec .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_cdaf ,_daea ,_beeeg ,_bbef ,_afgbc ,_agdeec );};};};};};};func _afgb (_aebe ,_ecad ,_dgda ,_egcag *textPara )*textTable {_ggac :=&textTable {_eafbb :2,_becb :2,_cgfb :make (map[uint64 ]*textPara ,4)};_ggac .put (0,0,_aebe );_ggac .put (1,0,_ecad );_ggac .put (0,1,_dgda );_ggac .put (1,1,_egcag );return _ggac ;};func (_cfcad lineRuling )yMean ()float64 {return 0.5*(_cfcad ._aefd .Y +_cfcad ._eaebg .Y )};func (_bdcfg *compositeCell )updateBBox (){for _ ,_gfbg :=range _bdcfg .paraList {_bdcfg .PdfRectangle =_fgeb (_bdcfg .PdfRectangle ,_gfbg .PdfRectangle );};};func (_ffbea *subpath )last ()_ag .Point {return _ffbea ._cada [len (_ffbea ._cada )-1]};
|
||
|
||
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
|
||
func (_dgff *TextMarkArray )BBox ()(_ecc .PdfRectangle ,bool ){var _eeag _ecc .PdfRectangle ;_gccc :=false ;for _ ,_gca :=range _dgff ._bgea {if _gca .Meta ||_fbgdb (_gca .Text ){continue ;};if _gccc {_eeag =_fgeb (_eeag ,_gca .BBox );}else {_eeag =_gca .BBox ;_gccc =true ;};};return _eeag ,_gccc ;};
|
||
|
||
// PageImages represents extracted images on a PDF page with spatial information:
|
||
// display position and size.
|
||
type PageImages struct{Images []ImageMark ;};type textMark struct{_ecc .PdfRectangle ;_dbfb int ;_ebe string ;_fgefc string ;_ffgag *_ecc .PdfFont ;_dac float64 ;_accb float64 ;_beea _ag .Matrix ;_gbab _ag .Point ;_bfff _ecc .PdfRectangle ;_ggee _ac .Color ;_dec _ac .Color ;};
|
||
|
||
// String returns a description of `k`.
|
||
func (_egefd markKind )String ()string {_cbaee ,_afggf :=_bbfe [_egefd ];if !_afggf {return _ec .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_egefd );};return _cbaee ;};
|
||
|
||
// String returns a description of `tm`.
|
||
func (_geabd *textMark )String ()string {return _ec .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_geabd .PdfRectangle ,_geabd ._dac ,_geabd ._ebe );};func (_bega *subpath )makeRectRuling (_gcdfd _ac .Color )(*ruling ,bool ){if _aea {_d .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_bega );};_aegcc :=_bega ._cada [:4];_aebg :=make (map[int ]rulingKind ,len (_aegcc ));for _cdcd ,_dcfg :=range _aegcc {_abaff :=_bega ._cada [(_cdcd +1)%4];_aebg [_cdcd ]=_feaf (_dcfg ,_abaff );if _aea {_ec .Printf ("\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066",_cdcd ,_aebg [_cdcd ],_dcfg ,_abaff );};};if _aea {_ec .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_aebg );};var _fcede ,_bcc []int ;for _febc ,_cfca :=range _aebg {switch _cfca {case _fdcbd :_bcc =append (_bcc ,_febc );case _ccgf :_fcede =append (_fcede ,_febc );};};if _aea {_ec .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_bcc ),_bcc );_ec .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_fcede ),_fcede );};_fbafb :=(len (_bcc )==2&&len (_fcede )==2)||(len (_bcc )==2&&len (_fcede )==0&&_bbfbf (_aegcc [_bcc [0]],_aegcc [_bcc [1]]))||(len (_fcede )==2&&len (_bcc )==0&&_dbba (_aegcc [_fcede [0]],_aegcc [_fcede [1]]));if _aea {_ec .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_bcc ),len (_fcede ),_fbafb );};if !_fbafb {if _aea {_d .Log .Error ("\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v",_bega );_ec .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_bcc ),len (_fcede ),_fbafb );};return &ruling {},false ;};if len (_fcede )==0{for _ebcb ,_aefc :=range _aebg {if _aefc !=_fdcbd {_fcede =append (_fcede ,_ebcb );};};};if len (_bcc )==0{for _gebad ,_eabbg :=range _aebg {if _eabbg !=_ccgf {_bcc =append (_bcc ,_gebad );};};};if _aea {_d .Log .Info ("\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_bcc ),len (_fcede ),len (_aegcc ),_bcc ,_fcede ,_aegcc );};var _edae ,_agfff ,_dfdgb ,_ddfee _ag .Point ;if _aegcc [_bcc [0]].Y > _aegcc [_bcc [1]].Y {_dfdgb ,_ddfee =_aegcc [_bcc [0]],_aegcc [_bcc [1]];}else {_dfdgb ,_ddfee =_aegcc [_bcc [1]],_aegcc [_bcc [0]];};if _aegcc [_fcede [0]].X > _aegcc [_fcede [1]].X {_edae ,_agfff =_aegcc [_fcede [0]],_aegcc [_fcede [1]];}else {_edae ,_agfff =_aegcc [_fcede [1]],_aegcc [_fcede [0]];};_bdedf :=_ecc .PdfRectangle {Llx :_edae .X ,Urx :_agfff .X ,Lly :_ddfee .Y ,Ury :_dfdgb .Y };if _bdedf .Llx > _bdedf .Urx {_bdedf .Llx ,_bdedf .Urx =_bdedf .Urx ,_bdedf .Llx ;};if _bdedf .Lly > _bdedf .Ury {_bdedf .Lly ,_bdedf .Ury =_bdedf .Ury ,_bdedf .Lly ;};_dgfgb :=rectRuling {PdfRectangle :_bdedf ,_dbafd :_edcc (_bdedf ),Color :_gcdfd };if _dgfgb ._dbafd ==_afaa {if _aea {_d .Log .Error ("\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c");};return nil ,false ;};_aedd ,_cdbf :=_dgfgb .asRuling ();if !_cdbf {if _aea {_d .Log .Error ("\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg");};return nil ,false ;};if _ded {_ec .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_aedd .String ());};return _aedd ,true ;};func (_gacbg paraList )llyOrdering ()[]int {_fedd :=make ([]int ,len (_gacbg ));for _bgec :=range _gacbg {_fedd [_bgec ]=_bgec ;};_ed .SliceStable (_fedd ,func (_aeca ,_cbf int )bool {_abaea ,_fbcf :=_fedd [_aeca ],_fedd [_cbf ];return _gacbg [_abaea ].Lly < _gacbg [_fbcf ].Lly ;});return _fedd ;};type textObject struct{_gcbf *Extractor ;_fbb *_ecc .PdfPageResources ;_deb _f .GraphicsState ;_faa *textState ;_dff *stateStack ;_aac _ag .Matrix ;_gbf _ag .Matrix ;_bdb []*textMark ;_affc bool ;};func _facb (_faba ,_dgffb _ag .Point ,_caef _ac .Color )(*ruling ,bool ){_eaab :=lineRuling {_aefd :_faba ,_eaebg :_dgffb ,_fgfd :_bgad (_faba ,_dgffb ),Color :_caef };if _eaab ._fgfd ==_afaa {return nil ,false ;};return _eaab .asRuling ();};func (_bfbd *textObject )getFontDict (_abfef string )(_bcg _gf .PdfObject ,_gbdg error ){_ecee :=_bfbd ._fbb ;if _ecee ==nil {_d .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_abfef );return nil ,nil ;};_bcg ,_gdd :=_ecee .GetFontByName (_gf .PdfObjectName (_abfef ));if !_gdd {_d .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_abfef );return nil ,_e .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _bcg ,nil ;};const _beae =1.0/1000.0;var (_af =_e .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072");_db =_e .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072"););func _cccbf (_fbgb string ,_gfca []rulingList ){_d .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_gfca ),_fbgb );for _cffaa ,_fgba :=range _gfca {_ec .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cffaa ,_fgba .String ());};};func (_dbff *textPara )toTextMarks (_aaed *int )[]TextMark {if _dbff ._bbd ==nil {return _dbff .toCellTextMarks (_aaed );};var _faggd []TextMark ;for _gggb :=0;_gggb < _dbff ._bbd ._becb ;_gggb ++{for _cgae :=0;_cgae < _dbff ._bbd ._eafbb ;_cgae ++{_eadd :=_dbff ._bbd .get (_cgae ,_gggb );if _eadd ==nil {_faggd =_baaf (_faggd ,_aaed ,"\u0009");}else {_dfce :=_eadd .toCellTextMarks (_aaed );_faggd =append (_faggd ,_dfce ...);};_faggd =_baaf (_faggd ,_aaed ,"\u0020");};if _gggb < _dbff ._bbd ._becb -1{_faggd =_baaf (_faggd ,_aaed ,"\u000a");};};return _faggd ;};func (_defc lineRuling )asRuling ()(*ruling ,bool ){_cced :=ruling {_cgbe :_defc ._fgfd ,Color :_defc .Color ,_fcce :_baga };switch _defc ._fgfd {case _ccgf :_cced ._cbfd =_defc .xMean ();_cced ._aef =_c .Min (_defc ._aefd .Y ,_defc ._eaebg .Y );_cced ._bcdag =_c .Max (_defc ._aefd .Y ,_defc ._eaebg .Y );case _fdcbd :_cced ._cbfd =_defc .yMean ();_cced ._aef =_c .Min (_defc ._aefd .X ,_defc ._eaebg .X );_cced ._bcdag =_c .Max (_defc ._aefd .X ,_defc ._eaebg .X );default:_d .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_defc ._fgfd );return nil ,false ;};return &_cced ,true ;};
|
||
|
||
// NewFromContents creates a new extractor from contents and page resources.
|
||
func NewFromContents (contents string ,resources *_ecc .PdfPageResources )(*Extractor ,error ){_edg :=&Extractor {_gfg :contents ,_ge :resources ,_cbc :map[string ]fontEntry {},_eb :map[string ]textResult {}};return _edg ,nil ;};type textState struct{_gec float64 ;_cad float64 ;_acfg float64 ;_bbgf float64 ;_ffdb float64 ;_egbe RenderMode ;_age float64 ;_cec *_ecc .PdfFont ;_aceg _ecc .PdfRectangle ;_cdbc int ;_caa int ;};func (_dabb *textObject )getFont (_gabd string )(*_ecc .PdfFont ,error ){if _dabb ._gcbf ._cbc !=nil {_dabb ._gcbf ._ee ++;_fef ,_addd :=_dabb ._gcbf ._cbc [_gabd ];if _addd {_fef ._eaa =_dabb ._gcbf ._ee ;return _fef ._bcef ,nil ;};};_bddf ,_cggc :=_dabb .getFontDirect (_gabd );if _cggc !=nil {return nil ,_cggc ;};if _dabb ._gcbf ._cbc !=nil {_fdbf :=fontEntry {_bddf ,_dabb ._gcbf ._ee };if len (_dabb ._gcbf ._cbc )>=_fcff {var _gfc []string ;for _gffc :=range _dabb ._gcbf ._cbc {_gfc =append (_gfc ,_gffc );};_ed .Slice (_gfc ,func (_fgeg ,_cagb int )bool {return _dabb ._gcbf ._cbc [_gfc [_fgeg ]]._eaa < _dabb ._gcbf ._cbc [_gfc [_cagb ]]._eaa ;});delete (_dabb ._gcbf ._cbc ,_gfc [0]);};_dabb ._gcbf ._cbc [_gabd ]=_fdbf ;};return _bddf ,nil ;};
|
||
|
||
// ImageMark represents an image drawn on a page and its position in device coordinates.
|
||
// All coordinates are in device coordinates.
|
||
type ImageMark struct{Image *_ecc .Image ;
|
||
|
||
// Dimensions of the image as displayed in the PDF.
|
||
Width float64 ;Height float64 ;
|
||
|
||
// Position of the image in PDF coordinates (lower left corner).
|
||
X float64 ;Y float64 ;
|
||
|
||
// Angle in degrees, if rotated.
|
||
Angle float64 ;};func _dgeg (_gfbgg int ,_fcfe map[int ][]float64 )([]int ,int ){_eggb :=make ([]int ,_gfbgg );_fgaf :=0;for _affee :=0;_affee < _gfbgg ;_affee ++{_eggb [_affee ]=_fgaf ;_fgaf +=len (_fcfe [_affee ])+1;};return _eggb ,_fgaf ;};func (_gacd *textObject )setTextMatrix (_dbbg []float64 ){if len (_dbbg )!=6{_d .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_dbbg ));return ;};_aedf ,_gacc ,_bfe ,_faf ,_cbgg ,_fda :=_dbbg [0],_dbbg [1],_dbbg [2],_dbbg [3],_dbbg [4],_dbbg [5];_gacd ._aac =_ag .NewMatrix (_aedf ,_gacc ,_bfe ,_faf ,_cbgg ,_fda );_gacd ._gbf =_gacd ._aac ;};func (_ggedb *wordBag )text ()string {_eaed :=_ggedb .allWords ();_ceaf :=make ([]string ,len (_eaed ));for _gbef ,_eged :=range _eaed {_ceaf [_gbef ]=_eged ._fede ;};return _ae .Join (_ceaf ,"\u0020");};func (_cagc paraList )findTextTables ()[]*textTable {var _gegg []*textTable ;for _ ,_geaddf :=range _cagc {if _geaddf .taken ()||_geaddf .Width ()==0{continue ;};_adaae :=_geaddf .isAtom ();if _adaae ==nil {continue ;};_adaae .growTable ();if _adaae ._eafbb *_adaae ._becb < _adaf {continue ;};_adaae .markCells ();_adaae .log ("\u0067\u0072\u006fw\u006e");_gegg =append (_gegg ,_adaae );};return _gegg ;};type textPara struct{_ecc .PdfRectangle ;_decd _ecc .PdfRectangle ;_beedd []*textLine ;_bbd *textTable ;_bbcfc bool ;_eddf bool ;_eeff *textPara ;_feag *textPara ;_ffbb *textPara ;_ddfea *textPara ;};func _cgbg (_bdfe ,_fcbc bounded )float64 {return _bdfe .bbox ().Llx -_fcbc .bbox ().Urx };func _cae (_afaf _ag .Matrix )_ag .Point {_cace ,_efaf :=_afaf .Translation ();return _ag .Point {X :_cace ,Y :_efaf };};
|
||
|
||
// String returns a string describing the current state of the textState stack.
|
||
func (_bggf *stateStack )String ()string {_cbgge :=[]string {_ec .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_bggf ))};for _efa ,_cgf :=range *_bggf {_efag :="\u003c\u006e\u0069l\u003e";if _cgf !=nil {_efag =_cgf .String ();};_cbgge =append (_cbgge ,_ec .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_efa ,_efag ));};return _ae .Join (_cbgge ,"\u000a");};
|
||
|
||
// Elements returns the TextMarks in `ma`.
|
||
func (_bce *TextMarkArray )Elements ()[]TextMark {return _bce ._bgea };func (_afg paraList )toTextMarks ()[]TextMark {_eeaf :=0;var _fgde []TextMark ;for _cdgf ,_eaff :=range _afg {if _eaff ._eddf {continue ;};_fbcd :=_eaff .toTextMarks (&_eeaf );_fgde =append (_fgde ,_fbcd ...);if _cdgf !=len (_afg )-1{if _ggdd (_eaff ,_afg [_cdgf +1]){_fgde =_baaf (_fgde ,&_eeaf ,"\u0020");}else {_fgde =_baaf (_fgde ,&_eeaf ,"\u000a");_fgde =_baaf (_fgde ,&_eeaf ,"\u000a");};};};_fgde =_baaf (_fgde ,&_eeaf ,"\u000a");_fgde =_baaf (_fgde ,&_eeaf ,"\u000a");return _fgde ;};func (_adfd rulingList )sort (){_ed .Slice (_adfd ,_adfd .comp )};func (_bccdf *textTable )subdivide ()*textTable {_bccdf .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");_ceggg :=_bccdf .compositeRowCorridors ();_adaca :=_bccdf .compositeColCorridors ();if _egfe {_d .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_abgf (_ceggg ),_abgf (_adaca ));};if len (_ceggg )==0||len (_adaca )==0{return _bccdf ;};_bgggf (_ceggg );_bgggf (_adaca );if _egfe {_d .Log .Info ("\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073",_abgf (_ceggg ),_abgf (_adaca ));};_bfef ,_dbcb :=_dgeg (_bccdf ._becb ,_ceggg );_efbg ,_cfda :=_dgeg (_bccdf ._eafbb ,_adaca );_adfe :=make (map[uint64 ]*textPara ,_cfda *_dbcb );_fdbfc :=&textTable {PdfRectangle :_bccdf .PdfRectangle ,_fgegf :_bccdf ._fgegf ,_becb :_dbcb ,_eafbb :_cfda ,_cgfb :_adfe };if _egfe {_d .Log .Info ("\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a"+"\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076",_bccdf ._eafbb ,_bccdf ._becb ,_cfda ,_dbcb ,_abgf (_ceggg ),_abgf (_adaca ),_bfef ,_efbg );};for _cgda :=0;_cgda < _bccdf ._becb ;_cgda ++{_adcf :=_bfef [_cgda ];for _ffcb :=0;_ffcb < _bccdf ._eafbb ;_ffcb ++{_ebgc :=_efbg [_ffcb ];if _egfe {_ec .Printf ("\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a",_ffcb ,_cgda ,_ebgc ,_adcf );};_cbff ,_gfaeg :=_bccdf ._agcf [_adef (_ffcb ,_cgda )];if !_gfaeg {continue ;};_fefb :=_cbff .split (_ceggg [_cgda ],_adaca [_ffcb ]);for _fdcg :=0;_fdcg < _fefb ._becb ;_fdcg ++{for _deef :=0;_deef < _fefb ._eafbb ;_deef ++{_cgdfa :=_fefb .get (_deef ,_fdcg );_fdbfc .put (_ebgc +_deef ,_adcf +_fdcg ,_cgdfa );if _egfe {_ec .Printf ("\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_ebgc +_deef ,_adcf +_fdcg ,_cgdfa );};};};};};return _fdbfc ;};func (_caae *textObject )getFillColor ()_ac .Color {return _eacfe (_caae ._deb .ColorspaceNonStroking ,_caae ._deb .ColorNonStroking );};func (_fgge paraList )eventNeighbours (_bedbe []event )map[*textPara ][]int {_ed .Slice (_bedbe ,func (_fabcc ,_cdce int )bool {_ffgfg ,_aggf :=_bedbe [_fabcc ],_bedbe [_cdce ];_edeaf ,_cegba :=_ffgfg ._fffced ,_aggf ._fffced ;if _edeaf !=_cegba {return _edeaf < _cegba ;};if _ffgfg ._gbgc !=_aggf ._gbgc {return _ffgfg ._gbgc ;};return _fabcc < _cdce ;});_dbaa :=make (map[int ]intSet );_bggef :=make (intSet );for _ ,_bageb :=range _bedbe {if _bageb ._gbgc {_dbaa [_bageb ._gddbg ]=make (intSet );for _caec :=range _bggef {if _caec !=_bageb ._gddbg {_dbaa [_bageb ._gddbg ].add (_caec );_dbaa [_caec ].add (_bageb ._gddbg );};};_bggef .add (_bageb ._gddbg );}else {_bggef .del (_bageb ._gddbg );};};_dbdca :=map[*textPara ][]int {};for _cgfbb ,_bfda :=range _dbaa {_dded :=_fgge [_cgfbb ];if len (_bfda )==0{_dbdca [_dded ]=nil ;continue ;};_ccb :=make ([]int ,len (_bfda ));_cabf :=0;for _cbdgg :=range _bfda {_ccb [_cabf ]=_cbdgg ;_cabf ++;};_dbdca [_dded ]=_ccb ;};return _dbdca ;};func (_bcdbf *textTable )getComposite (_ffff ,_cafdd int )(paraList ,_ecc .PdfRectangle ){_acgd ,_bgddfb :=_bcdbf ._agcf [_adef (_ffff ,_cafdd )];if _egfe {_ec .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a",_ffff ,_cafdd ,_acgd .String ());};if !_bgddfb {return nil ,_ecc .PdfRectangle {};};return _acgd .parasBBox ();};
|
||
|
||
// ExtractText processes and extracts all text data in content streams and returns as a string.
|
||
// It takes into account character encodings in the PDF file, which are decoded by
|
||
// CharcodeBytesToUnicode.
|
||
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
|
||
func (_abb *Extractor )ExtractText ()(string ,error ){_fec ,_ ,_ ,_cabg :=_abb .ExtractTextWithStats ();return _fec ,_cabg ;};func (_fbdc *ruling )alignsSec (_dege *ruling )bool {const _dgdg =_fbac +1.0;return _fbdc ._aef -_dgdg <=_dege ._bcdag &&_dege ._aef -_dgdg <=_fbdc ._bcdag ;};func (_bbc *imageExtractContext )extractContentStreamImages (_cfe string ,_acd *_ecc .PdfPageResources )error {_da :=_f .NewContentStreamParser (_cfe );_fdg ,_ebg :=_da .Parse ();if _ebg !=nil {return _ebg ;};if _bbc ._agf ==nil {_bbc ._agf =map[*_gf .PdfObjectStream ]*cachedImage {};};if _bbc ._ad ==nil {_bbc ._ad =&ImageExtractOptions {};};_bc :=_f .NewContentStreamProcessor (*_fdg );_bc .AddHandler (_f .HandlerConditionEnumAllOperands ,"",_bbc .processOperand );return _bc .Process (_acd );};func (_fgdga rulingList )toGrids ()[]rulingList {if _ded {_d .Log .Info ("t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073",_fgdga );};_gdfb :=_fgdga .intersections ();if _ded {_d .Log .Info ("\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020",len (_fgdga ),len (_gdfb ));for _ ,_abfeb :=range _abfcd (_gdfb ){_ec .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_abfeb ,_gdfb [_abfeb ]);};};_gabdf :=make (map[int ]intSet ,len (_fgdga ));for _dgfe :=range _fgdga {_adff :=_fgdga .connections (_gdfb ,_dgfe );if len (_adff )> 0{_gabdf [_dgfe ]=_adff ;};};if _ded {_d .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064",len (_gabdf ));for _ ,_caag :=range _abfcd (_gabdf ){_ec .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_caag ,_gabdf [_caag ]);};};_cdbd :=_bdfb (len (_fgdga ),func (_faggf ,_adee int )bool {_fbcbf ,_fdceb :=len (_gabdf [_faggf ]),len (_gabdf [_adee ]);if _fbcbf !=_fdceb {return _fbcbf > _fdceb ;};return _fgdga .comp (_faggf ,_adee );});if _ded {_d .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_cdbd );};_dbda :=[][]int {{_cdbd [0]}};_dfbbc :for _ ,_efcba :=range _cdbd [1:]{for _ceag ,_gfbd :=range _dbda {for _ ,_faggg :=range _gfbd {if _gabdf [_faggg ].has (_efcba ){_dbda [_ceag ]=append (_gfbd ,_efcba );continue _dfbbc ;};};};_dbda =append (_dbda ,[]int {_efcba });};if _ded {_d .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_dbda );};_ed .SliceStable (_dbda ,func (_eecf ,_bgfg int )bool {return len (_dbda [_eecf ])> len (_dbda [_bgfg ])});for _ ,_aaad :=range _dbda {_ed .Slice (_aaad ,func (_bgeb ,_aaagg int )bool {return _fgdga .comp (_aaad [_bgeb ],_aaad [_aaagg ])});};_egca :=make ([]rulingList ,len (_dbda ));for _afdb ,_caed :=range _dbda {_badcc :=make (rulingList ,len (_caed ));for _eecbf ,_geac :=range _caed {_badcc [_eecbf ]=_fgdga [_geac ];};_egca [_afdb ]=_badcc ;};if _ded {_d .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076",_egca );};var _edbg []rulingList ;for _ ,_ffee :=range _egca {if _eebdb ,_gfdgg :=_ffee .isActualGrid ();_gfdgg {_ffee =_eebdb ;_ffee =_ffee .snapToGroups ();_edbg =append (_edbg ,_ffee );};};if _ded {_cccbf ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073",_edbg );_d .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_egca ),len (_edbg ));};return _edbg ;};func (_ceff *textPara )writeCellText (_gdg _be .Writer ){for _acbf ,_ggbc :=range _ceff ._beedd {_gacg :=_ggbc .text ();_eaba :=_aee &&_ggbc .endsInHyphen ()&&_acbf !=len (_ceff ._beedd )-1;if _eaba {_gacg =_dddc (_gacg );};_gdg .Write ([]byte (_gacg ));if !(_eaba ||_acbf ==len (_ceff ._beedd )-1){_gdg .Write ([]byte (_fgfa (_ggbc ._eccbg ,_ceff ._beedd [_acbf +1]._eccbg )));};};};func (_cga *imageExtractContext )extractFormImages (_bgb *_gf .PdfObjectName ,_aed _f .GraphicsState ,_agg *_ecc .PdfPageResources )error {_fag ,_gac :=_agg .GetXObjectFormByName (*_bgb );if _gac !=nil {return _gac ;};if _fag ==nil {return nil ;};_bd ,_gac :=_fag .GetContentStream ();if _gac !=nil {return _gac ;};_deg :=_fag .Resources ;if _deg ==nil {_deg =_agg ;};_gac =_cga .extractContentStreamImages (string (_bd ),_deg );if _gac !=nil {return _gac ;};_cga ._cag ++;return nil ;};func _fdc (_dffcc []*wordBag )[]*wordBag {if len (_dffcc )<=1{return _dffcc ;};if _fgad {_d .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");};_ed .Slice (_dffcc ,func (_gdfa ,_efc int )bool {_cddd ,_bfec :=_dffcc [_gdfa ],_dffcc [_efc ];_fdcb :=_cddd .Width ()*_cddd .Height ();_cebf :=_bfec .Width ()*_bfec .Height ();if _fdcb !=_cebf {return _fdcb > _cebf ;};if _cddd .Height ()!=_bfec .Height (){return _cddd .Height ()> _bfec .Height ();};return _gdfa < _efc ;});var _gbfe []*wordBag ;_cdae :=make (intSet );for _dccg :=0;_dccg < len (_dffcc );_dccg ++{if _cdae .has (_dccg ){continue ;};_efg :=_dffcc [_dccg ];for _fddd :=_dccg +1;_fddd < len (_dffcc );_fddd ++{if _cdae .has (_dccg ){continue ;};_cfaag :=_dffcc [_fddd ];_cffa :=_efg .PdfRectangle ;_cffa .Llx -=_efg ._gcccc ;if _aedc (_cffa ,_cfaag .PdfRectangle ){_efg .absorb (_cfaag );_cdae .add (_fddd );};};_gbfe =append (_gbfe ,_efg );};if len (_dffcc )!=len (_gbfe )+len (_cdae ){_d .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_dffcc ),len (_gbfe ),len (_cdae ));};return _gbfe ;};func _afdd (_eefe *wordBag ,_cgafb int )*textLine {_cbd :=_eefe .firstWord (_cgafb );_addad :=textLine {PdfRectangle :_cbd .PdfRectangle ,_fbgf :_cbd ._agagbg ,_eccbg :_cbd ._fedcd };_addad .pullWord (_eefe ,_cbd ,_cgafb );return &_addad ;};func (_daged *wordBag )blocked (_cdad *textWord )bool {if _cdad .Urx < _daged .Llx {_fbge :=_cgea (_cdad .PdfRectangle );_fedb :=_bdge (_daged .PdfRectangle );if _daged ._eafe .blocks (_fbge ,_fedb ){if _ddc {_d .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_cdad ,_daged );};return true ;};}else if _daged .Urx < _cdad .Llx {_eafg :=_cgea (_daged .PdfRectangle );_egdb :=_bdge (_cdad .PdfRectangle );if _daged ._eafe .blocks (_eafg ,_egdb ){if _ddc {_d .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s",_cdad ,_daged );};return true ;};};if _cdad .Ury < _daged .Lly {_ecca :=_ddda (_cdad .PdfRectangle );_ccga :=_defg (_daged .PdfRectangle );if _daged ._fbda .blocks (_ecca ,_ccga ){if _ddc {_d .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_cdad ,_daged );};return true ;};}else if _daged .Ury < _cdad .Lly {_gbcb :=_ddda (_daged .PdfRectangle );_dbebf :=_defg (_cdad .PdfRectangle );if _daged ._fbda .blocks (_gbcb ,_dbebf ){if _ddc {_d .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s",_cdad ,_daged );};return true ;};};return false ;};func _ebd (_edb ,_agbg _ecc .PdfRectangle )bool {return _agbg .Llx <=_edb .Urx &&_edb .Llx <=_agbg .Urx };func (_dgc *imageExtractContext )extractInlineImage (_ecf *_f .ContentStreamInlineImage ,_dd _f .GraphicsState ,_gd *_ecc .PdfPageResources )error {_fbc ,_ga :=_ecf .ToImage (_gd );if _ga !=nil {return _ga ;};_gb ,_ga :=_ecf .GetColorSpace (_gd );if _ga !=nil {return _ga ;};if _gb ==nil {_gb =_ecc .NewPdfColorspaceDeviceGray ();};_ace ,_ga :=_gb .ImageToRGB (*_fbc );if _ga !=nil {return _ga ;};_bgg :=ImageMark {Image :&_ace ,Width :_dd .CTM .ScalingFactorX (),Height :_dd .CTM .ScalingFactorY (),Angle :_dd .CTM .Angle ()};_bgg .X ,_bgg .Y =_dd .CTM .Translation ();_dgc ._bgeg =append (_dgc ._bgeg ,_bgg );_dgc ._eeb ++;return nil ;};func (_gfdf *shapesState )newSubPath (){_gfdf .clearPath ();if _fbdd {_d .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_gfdf );};};func (_gfdb *textObject )setWordSpacing (_cdf float64 ){if _gfdb ==nil {return ;};_gfdb ._faa ._cad =_cdf ;};
|
||
|
||
// Extractor stores and offers functionality for extracting content from PDF pages.
|
||
type Extractor struct{_gfg string ;_ge *_ecc .PdfPageResources ;_cb _ecc .PdfRectangle ;_cbc map[string ]fontEntry ;_eb map[string ]textResult ;_ee int64 ;_gc int ;};func (_gfefe rulingList )vertsHorzs ()(rulingList ,rulingList ){var _gede ,_fbbc rulingList ;for _ ,_efgea :=range _gfefe {switch _efgea ._cgbe {case _ccgf :_gede =append (_gede ,_efgea );case _fdcbd :_fbbc =append (_fbbc ,_efgea );};};return _gede ,_fbbc ;};func _eeed (_aaff string )bool {if _g .RuneCountInString (_aaff )< _afba {return false ;};_cgff ,_eebf :=_g .DecodeLastRuneInString (_aaff );if _eebf <=0||!_bg .Is (_bg .Hyphen ,_cgff ){return false ;};_cgff ,_eebf =_g .DecodeLastRuneInString (_aaff [:len (_aaff )-_eebf ]);return _eebf > 0&&!_bg .IsSpace (_cgff );};func _gae (_ebc []*textWord ,_dbcc float64 ,_gagcf ,_dabd rulingList )*wordBag {_egee :=_gfdc (_ebc [0],_dbcc ,_gagcf ,_dabd );for _ ,_aeb :=range _ebc [1:]{_fbcb :=_dgdc (_aeb ._fedcd );_egee ._cda [_fbcb ]=append (_egee ._cda [_fbcb ],_aeb );_egee .PdfRectangle =_fgeb (_egee .PdfRectangle ,_aeb .PdfRectangle );};_egee .sort ();return _egee ;};func (_dcdc *textTable )compositeRowCorridors ()map[int ][]float64 {_aadb :=make (map[int ][]float64 ,_dcdc ._becb );if _egfe {_d .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_dcdc ._becb );};for _bbcg :=1;_bbcg < _dcdc ._becb ;_bbcg ++{var _bebcf []compositeCell ;for _daeg :=0;_daeg < _dcdc ._eafbb ;_daeg ++{if _bbca ,_adeg :=_dcdc ._agcf [_adef (_daeg ,_bbcg )];_adeg {_bebcf =append (_bebcf ,_bbca );};};if len (_bebcf )==0{continue ;};_acaa :=_bbdf (_bebcf );_aadb [_bbcg ]=_acaa ;if _egfe {_ec .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_bbcg ,_acaa );};};return _aadb ;};func _ffgaf (_aaag []*textMark ,_gcdf _ecc .PdfRectangle ,_ccfd rulingList ,_ddcc []gridTiling )paraList {_d .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_aaag ),_gcdf );if len (_aaag )==0{return nil ;};_eggc :=_edgbda (_aaag ,_gcdf );if len (_eggc )==0{return nil ;};_ccfd .log ("\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065");_egef ,_fdde :=_ccfd .vertsHorzs ();_gceg :=_gae (_eggc ,_gcdf .Ury ,_egef ,_fdde );_ddca :=_cgfff (_gceg ,_gcdf .Ury ,_egef ,_fdde );_ddca =_fdc (_ddca );_baacb :=make (paraList ,0,len (_ddca ));for _ ,_aafg :=range _ddca {_ccdd :=_aafg .arrangeText ();if _ccdd !=nil {_baacb =append (_baacb ,_ccdd );};};if len (_baacb )>=_adaf {_baacb =_baacb .extractTables (_ddcc );};_baacb .sortReadingOrder ();_baacb .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _baacb ;};func (_eece *textWord )absorb (_dbacb *textWord ){_eece .PdfRectangle =_fgeb (_eece .PdfRectangle ,_dbacb .PdfRectangle );_eece ._bebcb =append (_eece ._bebcb ,_dbacb ._bebcb ...);};
|
||
|
||
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
|
||
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
|
||
// `start` and `end` are offsets in the extracted text.
|
||
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
|
||
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
|
||
func (_ggf *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _ggf ==nil {return nil ,_e .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_ec .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );};_bcf :=len (_ggf ._bgea );if _bcf ==0{return _ggf ,nil ;};if start < _ggf ._bgea [0].Offset {start =_ggf ._bgea [0].Offset ;};if end > _ggf ._bgea [_bcf -1].Offset +1{end =_ggf ._bgea [_bcf -1].Offset +1;};_ageg :=_ed .Search (_bcf ,func (_cbgd int )bool {return _ggf ._bgea [_cbgd ].Offset +len (_ggf ._bgea [_cbgd ].Text )-1>=start });if !(0<=_ageg &&_ageg < _bcf ){_eag :=_ec .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_ageg ,_bcf ,_ggf ._bgea [0],_ggf ._bgea [_bcf -1]);return nil ,_eag ;};_ceab :=_ed .Search (_bcf ,func (_fed int )bool {return _ggf ._bgea [_fed ].Offset > end -1});if !(0<=_ceab &&_ceab < _bcf ){_bgeaf :=_ec .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_ceab ,_bcf ,_ggf ._bgea [0],_ggf ._bgea [_bcf -1]);return nil ,_bgeaf ;};if _ceab <=_ageg {return nil ,_ec .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_ageg ,_ceab );};return &TextMarkArray {_bgea :_ggf ._bgea [_ageg :_ceab ]},nil ;};func (_dcda *wordBag )depthRange (_ffcd ,_baab int )[]int {var _ccgg []int ;for _bdgg :=range _dcda ._cda {if _ffcd <=_bdgg &&_bdgg <=_baab {_ccgg =append (_ccgg ,_bdgg );};};if len (_ccgg )==0{return nil ;};_ed .Ints (_ccgg );return _ccgg ;};const (_dfbb =1.0e-6;_cafc =1.0e-4;_dcdg =10;_addg =6;_fgdg =0.5;_febg =0.12;_ebad =0.19;_cged =0.04;_eaag =0.04;_afeea =1.0;_ddga =0.04;_cffb =0.4;_cbge =0.7;_ggc =1.0;_beeg =0.1;_eccb =1.4;_deab =0.46;_bffa =0.02;_gacb =0.2;_acgb =0.5;_afba =4;_dddb =4.0;_adaf =6;_daaa =0.3;_fadb =0.01;_dddfd =0.02;_ccd =2;_fafa =2;_cagf =500;_gcag =4.0;_fbab =4.0;_cfgdg =0.05;_geb =0.1;_gfdcf =2.0;_fbac =2.0;_ddgd =1.5;_fbff =3.0;_dgfg =0.25;);func (_fgef *Extractor )extractPageText (_eg string ,_dage *_ecc .PdfPageResources ,_cage _ag .Matrix ,_acfe int )(*PageText ,int ,int ,error ){_d .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_acfe );_bab :=&PageText {_bbcfe :_fgef ._cb };_bdd :=_dcdf (_fgef ._cb );_ffe :=stateStack {&_bdd };_bad :=_gagc (_fgef ,_dage ,_f .GraphicsState {},&_bdd ,&_ffe );_gad :=shapesState {_ffae :_cage ,_baca :_ag .IdentityMatrix (),_edadg :_bad };var _gfd bool ;if _acfe > _cab {_dba :=_e .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_acfe ,_dba );return _bab ,_bdd ._cdbc ,_bdd ._caa ,_dba ;};_ceaa :=_f .NewContentStreamParser (_eg );_cfg ,_gde :=_ceaa .Parse ();if _gde !=nil {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gde );return _bab ,_bdd ._cdbc ,_bdd ._caa ,_gde ;};_fdb :=_f .NewContentStreamProcessor (*_cfg );_fdb .AddHandler (_f .HandlerConditionEnumAllOperands ,"",func (_gea *_f .ContentStreamOperation ,_cfc _f .GraphicsState ,_gef *_ecc .PdfPageResources )error {_dcd :=_gea .Operand ;if _cffg {_d .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_gea );};switch _dcd {case "\u0071":if _fbdd {_d .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_gad ._baca );};_ffe .push (&_bdd );case "\u0051":if !_ffe .empty (){if len (_ffe )>=2{_ffe .pop ();};_bdd =*_ffe .top ();};_gad ._baca =_cfc .CTM ;if _fbdd {_d .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_gad ._baca );};case "\u0042\u0054":if _gfd {_d .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");_bab ._adg =append (_bab ._adg ,_bad ._bdb ...);};_gfd =true ;_gdb :=_cfc ;_gdb .CTM =_cage .Mult (_gdb .CTM );_bad =_gagc (_fgef ,_gef ,_gdb ,&_bdd ,&_ffe );_gad ._edadg =_bad ;case "\u0045\u0054":if !_gfd {_d .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");};_gfd =false ;_bab ._adg =append (_bab ._adg ,_bad ._bdb ...);_bad .reset ();case "\u0054\u002a":_bad .nextLine ();case "\u0054\u0064":if _dgf ,_adc :=_bad .checkOp (_gea ,2,true );!_dgf {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_adc );return _adc ;};_gdba ,_gcd ,_fbg :=_cbed (_gea .Params );if _fbg !=nil {return _fbg ;};_bad .moveText (_gdba ,_gcd );case "\u0054\u0044":if _bde ,_cdb :=_bad .checkOp (_gea ,2,true );!_bde {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cdb );return _cdb ;};_eeea ,_gbc ,_gg :=_cbed (_gea .Params );if _gg !=nil {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gg );return _gg ;};_bad .moveTextSetLeading (_eeea ,_gbc );case "\u0054\u006a":if _cc ,_gge :=_bad .checkOp (_gea ,1,true );!_cc {_d .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_gea ,_gge );return _gge ;};_afd ,_ddg :=_gf .GetStringBytes (_gea .Params [0]);if !_ddg {_d .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_gea );return _gf .ErrTypeError ;};return _bad .showText (_afd );case "\u0054\u004a":if _bca ,_ecd :=_bad .checkOp (_gea ,1,true );!_bca {_d .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ecd );return _ecd ;};_gacf ,_cbb :=_gf .GetArray (_gea .Params [0]);if !_cbb {_d .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_gea );return _gde ;};return _bad .showTextAdjusted (_gacf );case "\u0027":if _dbe ,_bec :=_bad .checkOp (_gea ,1,true );!_dbe {_d .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bec );return _bec ;};_abf ,_bdf :=_gf .GetStringBytes (_gea .Params [0]);if !_bdf {_d .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_gea );return _gf .ErrTypeError ;};_bad .nextLine ();return _bad .showText (_abf );case "\u0022":if _acb ,_bae :=_bad .checkOp (_gea ,3,true );!_acb {_d .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bae );return _bae ;};_gcb ,_eacb ,_ggg :=_cbed (_gea .Params [:2]);if _ggg !=nil {return _ggg ;};_gead ,_gga :=_gf .GetStringBytes (_gea .Params [2]);if !_gga {_d .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_gea );return _gf .ErrTypeError ;};_bad .setCharSpacing (_gcb );_bad .setWordSpacing (_eacb );_bad .nextLine ();return _bad .showText (_gead );case "\u0054\u004c":_ecec ,_cfgf :=_cde (_gea );if _cfgf !=nil {_d .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cfgf );return _cfgf ;};_bad .setTextLeading (_ecec );case "\u0054\u0063":_bbg ,_fea :=_cde (_gea );if _fea !=nil {_d .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fea );return _fea ;};_bad .setCharSpacing (_bbg );case "\u0054\u0066":if _dcc ,_ega :=_bad .checkOp (_gea ,2,true );!_dcc {_d .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ega );return _ega ;};_ffd ,_baf :=_gf .GetNameVal (_gea .Params [0]);if !_baf {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_gea );return _gf .ErrTypeError ;};_bbcb ,_egb :=_gf .GetNumberAsFloat (_gea .Params [1]);if !_baf {_d .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gea ,_egb );return _egb ;};_egb =_bad .setFont (_ffd ,_bbcb );_bad ._affc =_ce .Is (_egb ,_gf .ErrNotSupported );if _egb !=nil &&!_bad ._affc {return _egb ;};case "\u0054\u006d":if _gfa ,_abc :=_bad .checkOp (_gea ,6,true );!_gfa {_d .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_abc );return _abc ;};_bdc ,_cac :=_gf .GetNumbersAsFloat (_gea .Params );if _cac !=nil {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cac );return _cac ;};_bad .setTextMatrix (_bdc );case "\u0054\u0072":if _fgf ,_bac :=_bad .checkOp (_gea ,1,true );!_fgf {_d .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bac );return _bac ;};_abba ,_eab :=_gf .GetIntVal (_gea .Params [0]);if !_eab {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_gea );return _gf .ErrTypeError ;};_bad .setTextRenderMode (_abba );case "\u0054\u0073":if _fcf ,_aff :=_bad .checkOp (_gea ,1,true );!_fcf {_d .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aff );return _aff ;};_eae ,_cfd :=_gf .GetNumberAsFloat (_gea .Params [0]);if _cfd !=nil {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cfd );return _cfd ;};_bad .setTextRise (_eae );case "\u0054\u0077":if _gag ,_abfa :=_bad .checkOp (_gea ,1,true );!_gag {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_abfa );return _abfa ;};_baa ,_eccg :=_gf .GetNumberAsFloat (_gea .Params [0]);if _eccg !=nil {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_eccg );return _eccg ;};_bad .setWordSpacing (_baa );case "\u0054\u007a":if _ccf ,_ef :=_bad .checkOp (_gea ,1,true );!_ccf {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ef );return _ef ;};_fad ,_agd :=_gf .GetNumberAsFloat (_gea .Params [0]);if _agd !=nil {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_agd );return _agd ;};_bad .setHorizScaling (_fad );case "\u0063\u006d":_gad ._baca =_cfc .CTM ;if _gad ._baca .Singular (){_bbe :=_ag .IdentityMatrix ().Translate (_gad ._baca .Translation ());_d .Log .Debug ("S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s",_gad ._baca ,_bbe );_gad ._baca =_bbe ;};if _fbdd {_d .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_gad ._baca );};case "\u006d":if len (_gea .Params )!=2{_d .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_db );return nil ;};_agb ,_fff :=_gf .GetNumbersAsFloat (_gea .Params );if _fff !=nil {return _fff ;};_d .Log .Debug ("\u004d\u006f\u0076\u0065\u0020\u0074\u006f\u003a\u0020\u0025\u002e\u0032\u0066",_agb );_gad .moveTo (_agb [0],_agb [1]);case "\u006c":if len (_gea .Params )!=2{_d .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_db );return nil ;};_agc ,_gcc :=_gf .GetNumbersAsFloat (_gea .Params );if _gcc !=nil {return _gcc ;};_gad .lineTo (_agc [0],_agc [1]);case "\u0063":if len (_gea .Params )!=6{return _db ;};_fbgd ,_abcd :=_gf .GetNumbersAsFloat (_gea .Params );if _abcd !=nil {return _abcd ;};_d .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_fbgd );_gad .cubicTo (_fbgd [0],_fbgd [1],_fbgd [2],_fbgd [3],_fbgd [4],_fbgd [5]);case "\u0076","\u0079":if len (_gea .Params )!=4{return _db ;};_ffb ,_bfa :=_gf .GetNumbersAsFloat (_gea .Params );if _bfa !=nil {return _bfa ;};_d .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_ffb );_gad .quadraticTo (_ffb [0],_ffb [1],_ffb [2],_ffb [3]);case "\u0068":_gad .closePath ();case "\u0072\u0065":if len (_gea .Params )!=4{return _db ;};_abg ,_gada :=_gf .GetNumbersAsFloat (_gea .Params );if _gada !=nil {return _gada ;};_gad .drawRectangle (_abg [0],_abg [1],_abg [2],_abg [3]);_gad .closePath ();case "\u0053":_gad .stroke (&_bab ._bdde );_gad .clearPath ();case "\u0073":_gad .closePath ();_gad .stroke (&_bab ._bdde );_gad .clearPath ();case "\u0046":_gad .fill (&_bab ._dea );_gad .clearPath ();case "\u0066","\u0066\u002a":_gad .closePath ();_gad .fill (&_bab ._dea );_gad .clearPath ();case "\u0042","\u0042\u002a":_gad .fill (&_bab ._dea );_gad .stroke (&_bab ._bdde );_gad .clearPath ();case "\u0062","\u0062\u002a":_gad .closePath ();_gad .fill (&_bab ._dea );_gad .stroke (&_bab ._bdde );_gad .clearPath ();case "\u006e":_gad .clearPath ();case "\u0044\u006f":if len (_gea .Params )==0{_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_gea .Params );return _gf .ErrRangeError ;};_bfg ,_dfg :=_gf .GetName (_gea .Params [0]);if !_dfg {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_gea .Params [0]);return _gf .ErrTypeError ;};_ ,_dda :=_gef .GetXObjectByName (*_bfg );if _dda !=_ecc .XObjectTypeForm {break ;};_efd ,_dfg :=_fgef ._eb [_bfg .String ()];if !_dfg {_cfa ,_dbb :=_gef .GetXObjectFormByName (*_bfg );if _dbb !=nil {_d .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_dbb );return _dbb ;};_afa ,_dbb :=_cfa .GetContentStream ();if _dbb !=nil {_d .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_dbb );return _dbb ;};_aga :=_cfa .Resources ;if _aga ==nil {_aga =_gef ;};_dcg ,_gagb ,_cega ,_dbb :=_fgef .extractPageText (string (_afa ),_aga ,_cage .Mult (_cfc .CTM ),_acfe +1);if _dbb !=nil {_d .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_dbb );return _dbb ;};_efd =textResult {*_dcg ,_gagb ,_cega };_fgef ._eb [_bfg .String ()]=_efd ;};_gad ._baca =_cfc .CTM ;if _fbdd {_d .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_gad ._baca );};_bab ._adg =append (_bab ._adg ,_efd ._ffg ._adg ...);_bab ._bdde =append (_bab ._bdde ,_efd ._ffg ._bdde ...);_bab ._dea =append (_bab ._dea ,_efd ._ffg ._dea ...);_bdd ._cdbc +=_efd ._fdgc ;_bdd ._caa +=_efd ._cfgfd ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_bad ._deb .ColorspaceNonStroking =_cfc .ColorspaceNonStroking ;_bad ._deb .ColorNonStroking =_cfc .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_bad ._deb .ColorspaceStroking =_cfc .ColorspaceStroking ;_bad ._deb .ColorStroking =_cfc .ColorStroking ;};return nil ;});_gde =_fdb .Process (_dage );return _bab ,_bdd ._cdbc ,_bdd ._caa ,_gde ;};func (_bdga paraList )extractTables (_aagdd []gridTiling )paraList {if _egfe {_d .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_bdga ));};if len (_bdga )< _adaf {return _bdga ;};_ddagg :=_bdga .findTables (_aagdd );if _egfe {_d .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_ddagg ));for _baba ,_becba :=range _ddagg {_becba .log (_ec .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_baba ));};};return _bdga .applyTables (_ddagg );};func (_becg *wordBag )firstReadingIndex (_bggcg int )int {_gfdg :=_becg .firstWord (_bggcg )._agagbg ;_debf :=float64 (_bggcg +1)*_addg ;_dfc :=_debf +_dddb *_gfdg ;_bcea :=_bggcg ;for _ ,_gdc :=range _becg .depthBand (_debf ,_dfc ){if _cgc (_becg .firstWord (_gdc ),_becg .firstWord (_bcea ))< 0{_bcea =_gdc ;};};return _bcea ;};func _bgggf (_cfab map[int ][]float64 ){if len (_cfab )<=1{return ;};_bbge :=_gabeea (_cfab );if _egfe {_d .Log .Info ("\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076",_bbge );};var _gfdff ,_ecbce int ;for _gfdff ,_ecbce =range _bbge {if _cfab [_ecbce ]!=nil {break ;};};for _edea ,_cbbdd :=range _bbge [_gfdff :]{_agabe :=_cfab [_cbbdd ];if _agabe ==nil {continue ;};if _egfe {_ec .Printf ("\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a",_gfdff +_edea ,_ecbce ,_cbbdd );};_aefga :=_cfab [_cbbdd ];if _aefga [len (_aefga )-1]> _agabe [0]{_aefga [len (_aefga )-1]=_agabe [0];_cfab [_ecbce ]=_aefga ;};_ecbce =_cbbdd ;};};func (_dbdf *textLine )endsInHyphen ()bool {_cbga :=_dbdf ._agae [len (_dbdf ._agae )-1];_efae :=_cbga ._fede ;_ccca ,_acec :=_g .DecodeLastRuneInString (_efae );if _acec <=0||!_bg .Is (_bg .Hyphen ,_ccca ){return false ;};if _cbga ._eeaa &&_eeed (_efae ){return true ;};return _eeed (_dbdf .text ());};func _dbba (_fbdg ,_dfgbb _ag .Point )bool {_gffa :=_c .Abs (_fbdg .X -_dfgbb .X );_ecab :=_c .Abs (_fbdg .Y -_dfgbb .Y );return _bgbf (_gffa ,_ecab );};func (_gaf *textObject )moveTextSetLeading (_fffc ,_fcg float64 ){_gaf ._faa ._bbgf =-_fcg ;_gaf .moveLP (_fffc ,_fcg );};func (_fbgff *wordBag )arrangeText ()*textPara {_fbgff .sort ();if _aaa {_fbgff .removeDuplicates ();};var _dadfc []*textLine ;for _ ,_dfgb :=range _fbgff .depthIndexes (){for !_fbgff .empty (_dfgb ){_acfbb :=_fbgff .firstReadingIndex (_dfgb );_gcgc :=_fbgff .firstWord (_acfbb );_ecbff :=_afdd (_fbgff ,_acfbb );_eada :=_gcgc ._agagbg ;_agdf :=_gcgc ._fedcd -_fgdg *_eada ;_ffdg :=_gcgc ._fedcd +_fgdg *_eada ;_fdce :=_eccb *_eada ;_dgdb :=_deab *_eada ;_eebd :for {var _dagg *textWord ;_ggdg :=0;for _ ,_dggd :=range _fbgff .depthBand (_agdf ,_ffdg ){_bgbdd :=_fbgff .highestWord (_dggd ,_agdf ,_ffdg );if _bgbdd ==nil {continue ;};_beced :=_cgbg (_bgbdd ,_ecbff ._agae [len (_ecbff ._agae )-1]);if _beced < -_dgdb {break _eebd ;};if _beced > _fdce {continue ;};if _dagg !=nil &&_cgc (_bgbdd ,_dagg )>=0{continue ;};_dagg =_bgbdd ;_ggdg =_dggd ;};if _dagg ==nil {break ;};_ecbff .pullWord (_fbgff ,_dagg ,_ggdg );};_ecbff .markWordBoundaries ();_dadfc =append (_dadfc ,_ecbff );};};if len (_dadfc )==0{return nil ;};_ed .Slice (_dadfc ,func (_ddgb ,_daab int )bool {return _abcg (_dadfc [_ddgb ],_dadfc [_daab ])< 0});_dfdd :=_geba (_fbgff .PdfRectangle ,_dadfc );if _fgad {_d .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_dfdd .String ());if _agff {for _gafea ,_eecb :=range _dfdd ._beedd {_ec .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gafea ,_eecb .String ());if _eedaf {for _dcfc ,_bba :=range _eecb ._agae {_ec .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_dcfc ,_bba .String ());for _aegc ,_bedde :=range _bba ._bebcb {_ec .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_aegc ,_bedde .String ());};};};};};};return _dfdd ;};func _bgbf (_aaade ,_eaddg float64 )bool {return _aaade /_c .Max (_geb ,_eaddg )< _cfgdg };func (_ada *textObject )reset (){_ada ._aac =_ag .IdentityMatrix ();_ada ._gbf =_ag .IdentityMatrix ();_ada ._bdb =nil ;};func (_bccc gridTile )contains (_geffc _ecc .PdfRectangle )bool {if _bccc .numBorders ()< 3{return false ;};if _bccc ._ffgb &&_geffc .Llx < _bccc .Llx -_ddgd {return false ;};if _bccc ._bbaa &&_geffc .Urx > _bccc .Urx +_ddgd {return false ;};if _bccc ._bade &&_geffc .Lly < _bccc .Lly -_ddgd {return false ;};if _bccc ._bbb &&_geffc .Ury > _bccc .Ury +_ddgd {return false ;};return true ;};func _gfdc (_gcbfe *textWord ,_cba float64 ,_fba ,_aafa rulingList )*wordBag {_beed :=_dgdc (_gcbfe ._fedcd );_egf :=[]*textWord {_gcbfe };_ggfg :=wordBag {_cda :map[int ][]*textWord {_beed :_egf },PdfRectangle :_gcbfe .PdfRectangle ,_gcccc :_gcbfe ._agagbg ,_baeg :_cba ,_eafe :_fba ,_fbda :_aafa };return &_ggfg ;};
|
||
|
||
// String returns a description of `state`.
|
||
func (_dad *textState )String ()string {_dgd :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _dad ._cec !=nil {_dgd =_dad ._cec .BaseFont ();};return _ec .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_dad ._gec ,_dad ._cad ,_dad ._ffdb ,_dgd );};func (_gdaf rulingList )comp (_fbdfb ,_dfdb int )bool {_efga ,_bagb :=_gdaf [_fbdfb ],_gdaf [_dfdb ];_edcg ,_bfge :=_efga ._cgbe ,_bagb ._cgbe ;if _edcg !=_bfge {return _edcg > _bfge ;};if _edcg ==_afaa {return false ;};_cfgc :=func (_gcfc bool )bool {if _edcg ==_fdcbd {return _gcfc ;};return !_gcfc ;};_cgcef ,_fecc :=_efga ._cbfd ,_bagb ._cbfd ;if _cgcef !=_fecc {return _cfgc (_cgcef > _fecc );};_cgcef ,_fecc =_efga ._aef ,_bagb ._aef ;if _cgcef !=_fecc {return _cfgc (_cgcef < _fecc );};return _cfgc (_efga ._bcdag < _bagb ._bcdag );};func (_abca *wordBag )allWords ()[]*textWord {var _abed []*textWord ;for _ ,_afed :=range _abca ._cda {_abed =append (_abed ,_afed ...);};return _abed ;};func (_ccddd rulingList )blocks (_dcdfg ,_fdgaa *ruling )bool {if _dcdfg ._aef > _fdgaa ._bcdag ||_fdgaa ._aef > _dcdfg ._bcdag {return false ;};_gcee :=_c .Max (_dcdfg ._aef ,_fdgaa ._aef );_gbbaf :=_c .Min (_dcdfg ._bcdag ,_fdgaa ._bcdag );if _dcdfg ._cbfd > _fdgaa ._cbfd {_dcdfg ,_fdgaa =_fdgaa ,_dcdfg ;};for _ ,_ceef :=range _ccddd {if _dcdfg ._cbfd <=_ceef ._cbfd +_fbac &&_ceef ._cbfd <=_fdgaa ._cbfd +_fbac &&_ceef ._aef <=_gbbaf &&_gcee <=_ceef ._bcdag {return true ;};};return false ;};func (_fggb *textTable )getRight ()paraList {_beaef :=make (paraList ,_fggb ._becb );for _fgcde :=0;_fgcde < _fggb ._becb ;_fgcde ++{_bgbae :=_fggb .get (_fggb ._eafbb -1,_fgcde )._feag ;if _bgbae ==nil ||_bgbae ._bbcfc {return nil ;};_beaef [_fgcde ]=_bgbae ;};for _abfb :=0;_abfb < _fggb ._becb -1;_abfb ++{if _beaef [_abfb ]._ddfea !=_beaef [_abfb +1]{return nil ;};};return _beaef ;};var _dg =false ;func (_bfbdc *textTable )markCells (){for _afdad :=0;_afdad < _bfbdc ._becb ;_afdad ++{for _afggb :=0;_afggb < _bfbdc ._eafbb ;_afggb ++{_gdad :=_bfbdc .get (_afggb ,_afdad );if _gdad !=nil {_gdad ._bbcfc =true ;};};};};func (_cffc rulingList )aligned ()bool {if len (_cffc )< 2{return false ;};_eadca :=make (map[*ruling ]int );_eadca [_cffc [0]]=0;for _ ,_aabfc :=range _cffc [1:]{_dfbe :=false ;for _ebgfb :=range _eadca {if _aabfc .gridIntersecting (_ebgfb ){_eadca [_ebgfb ]++;_dfbe =true ;break ;};};if !_dfbe {_eadca [_aabfc ]=0;};};_ggbf :=0;for _ ,_cgce :=range _eadca {if _cgce ==0{_ggbf ++;};};_addc :=float64 (_ggbf )/float64 (len (_cffc ));_geca :=_addc <=1.0-_dgfg ;if _ded {_d .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_geca ,_addc ,_ggbf ,len (_cffc ),_cffc .String ());};return _geca ;};func _efb (_dbdfg []TextMark ,_cgbca *int ,_gbb TextMark )[]TextMark {_gbb .Offset =*_cgbca ;_dbdfg =append (_dbdfg ,_gbb );*_cgbca +=len (_gbb .Text );return _dbdfg ;};func _defg (_cbgf _ecc .PdfRectangle )*ruling {return &ruling {_cgbe :_fdcbd ,_cbfd :_cbgf .Lly ,_aef :_cbgf .Llx ,_bcdag :_cbgf .Urx };};type textTable struct{_ecc .PdfRectangle ;_eafbb ,_becb int ;_fgegf bool ;_cgfb map[uint64 ]*textPara ;_agcf map[uint64 ]compositeCell ;};
|
||
|
||
// Text returns the extracted page text.
|
||
func (_dgbd PageText )Text ()string {return _dgbd ._edc };func (_afeef *textTable )newTablePara ()*textPara {_gcadc :=_afeef .computeBbox ();_debcc :=&textPara {PdfRectangle :_gcadc ,_decd :_gcadc ,_bbd :_afeef };if _egfe {_d .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_debcc );};return _debcc ;};func (_bgdd paraList )merge ()*textPara {_d .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_bgdd ));if len (_bgdd )==0{return nil ;};_bgdd .sortReadingOrder ();_bdfg :=_bgdd [0].PdfRectangle ;_gbge :=_bgdd [0]._beedd ;for _ ,_cgffa :=range _bgdd [1:]{_bdfg =_fgeb (_bdfg ,_cgffa .PdfRectangle );_gbge =append (_gbge ,_cgffa ._beedd ...);};return _geba (_bdfg ,_gbge );};
|
||
|
||
// String returns a description of `w`.
|
||
func (_dcee *textWord )String ()string {return _ec .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_dcee ._fedcd ,_dcee .PdfRectangle ,_dcee ._agagbg ,_dcee ._fede );};func _cdbe (_fgbg ,_ggbe bounded )float64 {_ecbg :=_cgc (_fgbg ,_ggbe );if !_fggg (_ecbg ){return _ecbg ;};return _cfcc (_fgbg ,_ggbe );};func (_fab *shapesState )lastpointEstablished ()(_ag .Point ,bool ){if _fab ._fafb {return _fab ._adaa ,false ;};_abff :=len (_fab ._bece );if _abff > 0&&_fab ._bece [_abff -1]._eagd {return _fab ._bece [_abff -1].last (),false ;};return _ag .Point {},true ;};func (_ccc *stateStack )top ()*textState {if _ccc .empty (){return nil ;};return (*_ccc )[_ccc .size ()-1];};func (_fbad paraList )reorder (_ebbb []int ){_dbga :=make (paraList ,len (_fbad ));for _befc ,_bcfga :=range _ebbb {_dbga [_befc ]=_fbad [_bcfga ];};copy (_fbad ,_dbga );};func (_fdgae *wordBag )empty (_bag int )bool {_ ,_dggf :=_fdgae ._cda [_bag ];return !_dggf };func (_fgd *textObject )getStrokeColor ()_ac .Color {return _eacfe (_fgd ._deb .ColorspaceStroking ,_fgd ._deb .ColorStroking );};func (_dbgg *textTable )isExportable ()bool {if _dbgg ._fgegf {return true ;};_geaa :=func (_bgddf int )bool {_dbac :=_dbgg .get (0,_bgddf );if _dbac ==nil {return false ;};_gfbgd :=_dbac .text ();_eecc :=_g .RuneCountInString (_gfbgd );_baega :=_bggca .MatchString (_gfbgd );return _eecc <=1||_baega ;};for _egcdb :=0;_egcdb < _dbgg ._becb ;_egcdb ++{if !_geaa (_egcdb ){return true ;};};return false ;};func _dcdf (_cgb _ecc .PdfRectangle )textState {return textState {_acfg :100,_egbe :RenderModeFill ,_aceg :_cgb };};func (_eace *textObject )moveText (_ddae ,_afe float64 ){_eace .moveLP (_ddae ,_afe )};func (_aba *textObject )checkOp (_bacb *_f .ContentStreamOperation ,_gbg int ,_gdeg bool )(_ffa bool ,_eed error ){if _aba ==nil {var _ggde []_gf .PdfObject ;if _gbg > 0{_ggde =_bacb .Params ;if len (_ggde )> _gbg {_ggde =_ggde [:_gbg ];};};_d .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_bacb .Operand ,_ggde );};if _gbg >=0{if len (_bacb .Params )!=_gbg {if _gdeg {_eed =_e .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");};_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_bacb .Operand ,_gbg ,len (_bacb .Params ),_bacb .Params );return false ,_eed ;};};return true ,nil ;};func (_cgaf *textObject )renderText (_dfa []byte )error {if _cgaf ._affc {_d .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");return nil ;};_dbeg :=_cgaf .getCurrentFont ();_dfd :=_dbeg .BytesToCharcodes (_dfa );_fceg ,_geadg ,_daa :=_dbeg .CharcodesToStrings (_dfd );if _daa > 0{_d .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_geadg ,_daa );};_cgaf ._faa ._cdbc +=_geadg ;_cgaf ._faa ._caa +=_daa ;_bcb :=_cgaf ._faa ;_ebf :=_bcb ._ffdb ;_eccd :=_bcb ._acfg /100.0;_aad ,_ebab :=_dbeg .GetRuneMetrics (' ');if !_ebab {_aad ,_ebab =_dbeg .GetCharMetrics (32);};if !_ebab {_aad ,_ =_ecc .DefaultFont ().GetRuneMetrics (' ');};_dfae :=_aad .Wx *_beae ;_d .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_dfae ,_fceg ,_dbeg ,_ebf );_dade :=_ag .NewMatrix (_ebf *_eccd ,0,0,_ebf ,0,_bcb ._age );if _ede {_d .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_dfd ),_dfd ,_fceg );};_d .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_dfd ),_dfd ,len (_fceg ));_afag :=_cgaf .getFillColor ();_dee :=_cgaf .getStrokeColor ();for _bfb ,_fcd :=range _fceg {_fca :=[]rune (_fcd );if len (_fca )==1&&_fca [0]=='\x00'{continue ;};_bea :=_dfd [_bfb ];_fcac :=_cgaf ._deb .CTM .Mult (_cgaf ._aac ).Mult (_dade );_fcb :=0.0;if len (_fca )==1&&_fca [0]==32{_fcb =_bcb ._cad ;};_dce ,_aab :=_dbeg .GetCharMetrics (_bea );if !_aab {_d .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_bea ,_fca ,_fca ,_dbeg );return _ec .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_dbeg .String (),_bea );};_edge :=_ag .Point {X :_dce .Wx *_beae ,Y :_dce .Wy *_beae };_fdbb :=_ag .Point {X :(_edge .X *_ebf +_fcb )*_eccd };_gfbcb :=_ag .Point {X :(_edge .X *_ebf +_bcb ._gec +_fcb )*_eccd };if _ede {_d .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_ebf ,_bcb ._gec ,_bcb ._cad ,_eccd );_d .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_edge ,_fdbb ,_gfbcb );};_fbf :=_ccfb (_fdbb );_ecg :=_ccfb (_gfbcb );_dggb :=_cgaf ._deb .CTM .Mult (_cgaf ._aac ).Mult (_fbf );if _aaeg {_d .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_cgaf ._deb .CTM ,_cgaf ._aac ,_ecg ,_cae (_cgaf ._deb .CTM .Mult (_cgaf ._aac ).Mult (_ecg )),_fbf ,_dggb ,_cae (_dggb ));};_cee ,_acfd :=_cgaf .newTextMark (_ca .ExpandLigatures (_fca ),_fcac ,_cae (_dggb ),_c .Abs (_dfae *_fcac .ScalingFactorX ()),_dbeg ,_cgaf ._faa ._gec ,_afag ,_dee );if !_acfd {_d .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067");continue ;};if _dbeg ==nil {_d .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _dbeg .Encoder ()==nil {_d .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_dbeg );}else {if _acfb ,_cge :=_dbeg .Encoder ().CharcodeToRune (_bea );_cge {_cee ._fgefc =string (_acfb );};};_d .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_bfb ,_bea ,_cee ,_fcac );_cgaf ._bdb =append (_cgaf ._bdb ,&_cee );_cgaf ._aac .Concat (_ecg );};return nil ;};type stateStack []*textState ;func (_deafg rulingList )augmentGrid ()(rulingList ,rulingList ){_edafb ,_gabee :=_deafg .vertsHorzs ();if len (_edafb )==0||len (_gabee )==0{return _edafb ,_gabee ;};_aaege ,_beadb :=_edafb ,_gabee ;_ebde :=_edafb .bbox ();_fagbg :=_gabee .bbox ();if _ded {_d .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066",_ebde );_d .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066",_fagbg );};var _fccfe ,_gccb ,_geacf ,_ggfgf *ruling ;if _fagbg .Llx < _ebde .Llx -_gfdcf {_fccfe =&ruling {_fcce :_ggbcc ,_cgbe :_ccgf ,_cbfd :_fagbg .Llx ,_aef :_ebde .Lly ,_bcdag :_ebde .Ury };_edafb =append (rulingList {_fccfe },_edafb ...);};if _fagbg .Urx > _ebde .Urx +_gfdcf {_gccb =&ruling {_fcce :_ggbcc ,_cgbe :_ccgf ,_cbfd :_fagbg .Urx ,_aef :_ebde .Lly ,_bcdag :_ebde .Ury };_edafb =append (_edafb ,_gccb );};if _ebde .Lly < _fagbg .Lly -_gfdcf {_geacf =&ruling {_fcce :_ggbcc ,_cgbe :_fdcbd ,_cbfd :_ebde .Lly ,_aef :_fagbg .Llx ,_bcdag :_fagbg .Urx };_gabee =append (rulingList {_geacf },_gabee ...);};if _ebde .Ury > _fagbg .Ury +_gfdcf {_ggfgf =&ruling {_fcce :_ggbcc ,_cgbe :_fdcbd ,_cbfd :_ebde .Ury ,_aef :_fagbg .Llx ,_bcdag :_fagbg .Urx };_gabee =append (_gabee ,_ggfgf );};if len (_edafb )+len (_gabee )==len (_deafg ){return _aaege ,_beadb ;};_fcbeg :=append (_edafb ,_gabee ...);_deafg .log ("u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064");_fcbeg .log ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d");return _edafb ,_gabee ;};func _fbee (_feb _ag .Point )*subpath {return &subpath {_cada :[]_ag .Point {_feb }}};func (_bdda paraList )inTile (_abcbc gridTile )paraList {var _cfag paraList ;for _ ,_bfgb :=range _bdda {if _abcbc .contains (_bfgb .PdfRectangle ){_cfag =append (_cfag ,_bfgb );};};if _egfe {_ec .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_abcbc ,len (_cfag ));for _edfe ,_aeccb :=range _cfag {_ec .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_edfe ,_aeccb );};_ec .Println ("");};return _cfag ;};func (_dceb paraList )addNeighbours (){_abaeca :=func (_cagbc []int ,_cffbf *textPara )([]*textPara ,[]*textPara ){_cdgab :=make ([]*textPara ,0,len (_cagbc )-1);_efgf :=make ([]*textPara ,0,len (_cagbc )-1);for _ ,_efeb :=range _cagbc {_cdec :=_dceb [_efeb ];if _cdec .Urx <=_cffbf .Llx {_cdgab =append (_cdgab ,_cdec );}else if _cdec .Llx >=_cffbf .Urx {_efgf =append (_efgf ,_cdec );};};return _cdgab ,_efgf ;};_dbafa :=func (_ddfbc []int ,_eeaga *textPara )([]*textPara ,[]*textPara ){_cdba :=make ([]*textPara ,0,len (_ddfbc )-1);_ececa :=make ([]*textPara ,0,len (_ddfbc )-1);for _ ,_cdcgb :=range _ddfbc {_bbdac :=_dceb [_cdcgb ];if _bbdac .Ury <=_eeaga .Lly {_ececa =append (_ececa ,_bbdac );}else if _bbdac .Lly >=_eeaga .Ury {_cdba =append (_cdba ,_bbdac );};};return _cdba ,_ececa ;};_fggd :=_dceb .yNeighbours (_dddfd );for _ ,_febgd :=range _dceb {_dgafd :=_fggd [_febgd ];if len (_dgafd )==0{continue ;};_dbdbb ,_fadbc :=_abaeca (_dgafd ,_febgd );if len (_dbdbb )==0&&len (_fadbc )==0{continue ;};if len (_dbdbb )> 0{_eadcac :=_dbdbb [0];for _ ,_ddcce :=range _dbdbb [1:]{if _ddcce .Urx >=_eadcac .Urx {_eadcac =_ddcce ;};};for _ ,_bbdb :=range _dbdbb {if _bbdb !=_eadcac &&_bbdb .Urx > _eadcac .Llx {_eadcac =nil ;break ;};};if _eadcac !=nil &&_ecae (_febgd .PdfRectangle ,_eadcac .PdfRectangle ){_febgd ._eeff =_eadcac ;};};if len (_fadbc )> 0{_fadgb :=_fadbc [0];for _ ,_aeaa :=range _fadbc [1:]{if _aeaa .Llx <=_fadgb .Llx {_fadgb =_aeaa ;};};for _ ,_fgdgc :=range _fadbc {if _fgdgc !=_fadgb &&_fgdgc .Llx < _fadgb .Urx {_fadgb =nil ;break ;};};if _fadgb !=nil &&_ecae (_febgd .PdfRectangle ,_fadgb .PdfRectangle ){_febgd ._feag =_fadgb ;};};};_fggd =_dceb .xNeighbours (_fadb );for _ ,_ffcg :=range _dceb {_dcfa :=_fggd [_ffcg ];if len (_dcfa )==0{continue ;};_ebeg ,_gcadb :=_dbafa (_dcfa ,_ffcg );if len (_ebeg )==0&&len (_gcadb )==0{continue ;};if len (_gcadb )> 0{_gbad :=_gcadb [0];for _ ,_bbcag :=range _gcadb [1:]{if _bbcag .Ury >=_gbad .Ury {_gbad =_bbcag ;};};for _ ,_edeb :=range _gcadb {if _edeb !=_gbad &&_edeb .Ury > _gbad .Lly {_gbad =nil ;break ;};};if _gbad !=nil &&_ebd (_ffcg .PdfRectangle ,_gbad .PdfRectangle ){_ffcg ._ddfea =_gbad ;};};if len (_ebeg )> 0{_dbecg :=_ebeg [0];for _ ,_aggad :=range _ebeg [1:]{if _aggad .Lly <=_dbecg .Lly {_dbecg =_aggad ;};};for _ ,_edadged :=range _ebeg {if _edadged !=_dbecg &&_edadged .Lly < _dbecg .Ury {_dbecg =nil ;break ;};};if _dbecg !=nil &&_ebd (_ffcg .PdfRectangle ,_dbecg .PdfRectangle ){_ffcg ._ffbb =_dbecg ;};};};for _ ,_fdbe :=range _dceb {if _fdbe ._eeff !=nil &&_fdbe ._eeff ._feag !=_fdbe {_fdbe ._eeff =nil ;};if _fdbe ._ffbb !=nil &&_fdbe ._ffbb ._ddfea !=_fdbe {_fdbe ._ffbb =nil ;};if _fdbe ._feag !=nil &&_fdbe ._feag ._eeff !=_fdbe {_fdbe ._feag =nil ;};if _fdbe ._ddfea !=nil &&_fdbe ._ddfea ._ffbb !=_fdbe {_fdbe ._ddfea =nil ;};};};func (_caf *wordBag )absorb (_cbeg *wordBag ){_dfdf :=_cbeg .makeRemovals ();for _bfecb ,_caee :=range _cbeg ._cda {for _ ,_dbfd :=range _caee {_caf .pullWord (_dbfd ,_bfecb ,_dfdf );};};_cbeg .applyRemovals (_dfdf );};
|
||
|
||
// String returns a string describing `tm`.
|
||
func (_add TextMark )String ()string {_cgg :=_add .BBox ;var _fga string ;if _add .Font !=nil {_fga =_add .Font .String ();if len (_fga )> 50{_fga =_fga [:50]+"\u002e\u002e\u002e";};};var _debg string ;if _add .Meta {_debg ="\u0020\u002a\u004d\u002a";};return _ec .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_add .Offset ,_add .Text ,[]rune (_add .Text ),_cgg .Llx ,_cgg .Lly ,_cgg .Urx ,_cgg .Ury ,_fga ,_debg );};func _bdge (_egcd _ecc .PdfRectangle )*ruling {return &ruling {_cgbe :_ccgf ,_cbfd :_egcd .Llx ,_aef :_egcd .Lly ,_bcdag :_egcd .Ury };};type cachedImage struct{_cg *_ecc .Image ;_bb _ecc .PdfColorspace ;};func (_baggg rulingList )splitSec ()[]rulingList {_ed .Slice (_baggg ,func (_decb ,_cfbf int )bool {_fccb ,_fbdeb :=_baggg [_decb ],_baggg [_cfbf ];if _fccb ._aef !=_fbdeb ._aef {return _fccb ._aef < _fbdeb ._aef ;};return _fccb ._bcdag < _fbdeb ._bcdag ;});_gfbdg :=make (map[*ruling ]struct{},len (_baggg ));_cbca :=func (_cebdd *ruling )rulingList {_cfdd :=rulingList {_cebdd };_gfbdg [_cebdd ]=struct{}{};for _ ,_dafe :=range _baggg {if _ ,_fecd :=_gfbdg [_dafe ];_fecd {continue ;};for _ ,_agea :=range _cfdd {if _dafe .alignsSec (_agea ){_cfdd =append (_cfdd ,_dafe );_gfbdg [_dafe ]=struct{}{};break ;};};};return _cfdd ;};_bbgb :=[]rulingList {_cbca (_baggg [0])};for _ ,_daff :=range _baggg [1:]{if _ ,_agdee :=_gfbdg [_daff ];_agdee {continue ;};_bbgb =append (_bbgb ,_cbca (_daff ));};return _bbgb ;};func (_aafd *textTable )emptyColumn (_geada int )bool {for _ebfc :=0;_ebfc < _aafd ._becb ;_ebfc ++{_cebcd :=_aafd .get (_geada ,_ebfc );if _cebcd !=nil &&_cebcd .text ()!=""{return false ;};};return true ;};func (_cfaa *stateStack )size ()int {return len (*_cfaa )};func (_beda *textTable )reduce ()*textTable {_eefgb :=make ([]int ,0,_beda ._becb );_cdag :=make ([]int ,0,_beda ._eafbb );for _bbdcd :=0;_bbdcd < _beda ._becb ;_bbdcd ++{if !_beda .emptyRow (_bbdcd ){_eefgb =append (_eefgb ,_bbdcd );};};for _cgeg :=0;_cgeg < _beda ._eafbb ;_cgeg ++{if !_beda .emptyColumn (_cgeg ){_cdag =append (_cdag ,_cgeg );};};if len (_eefgb )==_beda ._becb &&len (_cdag )==_beda ._eafbb {return _beda ;};_baaaf :=textTable {_fgegf :_beda ._fgegf ,_eafbb :len (_cdag ),_becb :len (_eefgb ),_cgfb :make (map[uint64 ]*textPara ,len (_cdag )*len (_eefgb ))};if _egfe {_d .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_beda ._eafbb ,_beda ._becb ,len (_cdag ),len (_eefgb ));_d .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_cdag );_d .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_eefgb );};for _bfdg ,_gbdba :=range _eefgb {for _eefd ,_ddcca :=range _cdag {_agega :=_beda .get (_ddcca ,_gbdba );if _agega ==nil {continue ;};if _egfe {_ec .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_eefd ,_bfdg ,_ddcca ,_gbdba ,_cecc (_agega .text (),50));};_baaaf .put (_eefd ,_bfdg ,_agega );};};return &_baaaf ;};const (_aee =true ;_aaa =true ;_adfa =true ;_edf =false ;_cgeb =false ;_dddf =6;_fabc =3.0;_dbdb =200;_bgbd =true ;_dagb =true ;_gcca =true ;_fbec =true ;_gee =false ;);func (_ff *imageExtractContext )extractXObjectImage (_fe *_gf .PdfObjectName ,_cd _f .GraphicsState ,_ceg *_ecc .PdfPageResources )error {_bbcf ,_ :=_ceg .GetXObjectByName (*_fe );if _bbcf ==nil {return nil ;};_fge ,_fdf :=_ff ._agf [_bbcf ];if !_fdf {_dgg ,_bga :=_ceg .GetXObjectImageByName (*_fe );if _bga !=nil {return _bga ;};if _dgg ==nil {return nil ;};_fdfa ,_bga :=_dgg .ToImage ();if _bga !=nil {return _bga ;};_fge =&cachedImage {_cg :_fdfa ,_bb :_dgg .ColorSpace };_ff ._agf [_bbcf ]=_fge ;};_aae :=_fge ._cg ;_bee :=_fge ._bb ;_fc ,_gff :=_bee .ImageToRGB (*_aae );if _gff !=nil {return _gff ;};_d .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_cd .CTM .String ());_bgfb :=ImageMark {Image :&_fc ,Width :_cd .CTM .ScalingFactorX (),Height :_cd .CTM .ScalingFactorY (),Angle :_cd .CTM .Angle ()};_bgfb .X ,_bgfb .Y =_cd .CTM .Translation ();_ff ._bgeg =append (_ff ._bgeg ,_bgfb );_ff ._fd ++;return nil ;};type gridTiling struct{_ecc .PdfRectangle ;_degd []float64 ;_bcgf []float64 ;_aegg map[float64 ]map[float64 ]gridTile ;};func _fggg (_edgbd float64 )bool {return _c .Abs (_edgbd )< _dfbb };func (_eecbc *textTable )put (_faef ,_gbfd int ,_gfaaf *textPara ){_eecbc ._cgfb [_adef (_faef ,_gbfd )]=_gfaaf ;};func (_acffb paraList )yNeighbours (_ddfbd float64 )map[*textPara ][]int {_cfgff :=make ([]event ,2*len (_acffb ));if _ddfbd ==0{for _gdfbe ,_cagd :=range _acffb {_cfgff [2*_gdfbe ]=event {_cagd .Lly ,true ,_gdfbe };_cfgff [2*_gdfbe +1]=event {_cagd .Ury ,false ,_gdfbe };};}else {for _bgfeb ,_adge :=range _acffb {_cfgff [2*_bgfeb ]=event {_adge .Lly -_ddfbd *_adge .fontsize (),true ,_bgfeb };_cfgff [2*_bgfeb +1]=event {_adge .Ury +_ddfbd *_adge .fontsize (),false ,_bgfeb };};};return _acffb .eventNeighbours (_cfgff );};
|
||
|
||
// Append appends `mark` to the mark array.
|
||
func (_eabc *TextMarkArray )Append (mark TextMark ){_eabc ._bgea =append (_eabc ._bgea ,mark )};func (_dbfgf lineRuling )xMean ()float64 {return 0.5*(_dbfgf ._aefd .X +_dbfgf ._eaebg .X )};func _geba (_eefff _ecc .PdfRectangle ,_debe []*textLine )*textPara {return &textPara {PdfRectangle :_eefff ,_beedd :_debe };};const (_cggcc =false ;_cgccd =false ;_cffg =false ;_aaeg =false ;_fbdd =false ;_ede =false ;_efcb =false ;_gcbb =false ;_fgad =false ;_agff =_fgad &&true ;_eedaf =_agff &&false ;_cgafe =_fgad &&true ;_egfe =false ;_bcgc =_egfe &&false ;_afde =_egfe &&true ;_ded =false ;_fde =_ded &&false ;_abbc =_ded &&false ;_eegg =_ded &&true ;_aea =_ded &&false ;_ddc =_ded &&false ;);func (_cccf *shapesState )establishSubpath ()*subpath {_ffedg ,_ddb :=_cccf .lastpointEstablished ();if !_ddb {_cccf ._bece =append (_cccf ._bece ,_fbee (_ffedg ));};if len (_cccf ._bece )==0{return nil ;};_cccf ._fafb =false ;return _cccf ._bece [len (_cccf ._bece )-1];};func (_geab *textObject )showText (_gfgf []byte )error {return _geab .renderText (_gfgf )};func (_fdgca *shapesState )clearPath (){_fdgca ._bece =nil ;_fdgca ._fafb =false ;if _fbdd {_d .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_fdgca );};};func (_ddcae rulingList )tidied (_gbbb string )rulingList {_gccf :=_ddcae .removeDuplicates ();_gccf .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");_abbg :=_gccf .snapToGroups ();if _abbg ==nil {return nil ;};_abbg .sort ();if _ded {_d .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_gbbb ,len (_ddcae ),len (_gccf ),len (_abbg ));};_abbg .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _abbg ;};
|
||
|
||
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
|
||
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
|
||
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
|
||
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
|
||
type RenderMode int ;func _cacbg (_cceg ,_fdgac *textPara )bool {return _ebd (_cceg ._decd ,_fdgac ._decd )};func _baaf (_beag []TextMark ,_egfc *int ,_efab string )[]TextMark {_dedd :=_bead ;_dedd .Text =_efab ;return _efb (_beag ,_egfc ,_dedd );};func _edcc (_gdaa _ecc .PdfRectangle )rulingKind {_aefg :=_gdaa .Width ();_fgfef :=_gdaa .Height ();if _aefg > _fgfef {if _aefg >=_gcag {return _fdcbd ;};}else {if _fgfef >=_gcag {return _ccgf ;};};return _afaa ;};
|
||
|
||
// ToTextMark returns the public view of `tm`.
|
||
func (_fege *textMark )ToTextMark ()TextMark {return TextMark {Text :_fege ._ebe ,Original :_fege ._fgefc ,BBox :_fege ._bfff ,Font :_fege ._ffgag ,FontSize :_fege ._dac ,FillColor :_fege ._ggee ,StrokeColor :_fege ._dec ,Orientation :_fege ._dbfb };};func _bdfb (_gbgf int ,_aegfe func (int ,int )bool )[]int {_bgcf :=make ([]int ,_gbgf );for _dfdfd :=range _bgcf {_bgcf [_dfdfd ]=_dfdfd ;};_ed .Slice (_bgcf ,func (_gbae ,_baaae int )bool {return _aegfe (_bgcf [_gbae ],_bgcf [_baaae ])});return _bgcf ;};func (_ebfeb *wordBag )sort (){for _ ,_gba :=range _ebfeb ._cda {_ed .Slice (_gba ,func (_ebb ,_fbdf int )bool {return _cgc (_gba [_ebb ],_gba [_fbdf ])< 0});};};func (_gfaef *textPara )isAtom ()*textTable {_dbcce :=_gfaef ;_gbdbg :=_gfaef ._feag ;_edbb :=_gfaef ._ddfea ;if !(_gbdbg !=nil &&!_gbdbg ._bbcfc &&_edbb !=nil &&!_edbb ._bbcfc ){return nil ;};_ggcf :=_gbdbg ._ddfea ;if !(_ggcf !=nil &&!_ggcf ._bbcfc &&_ggcf ==_edbb ._feag ){return nil ;};return _afgb (_dbcce ,_gbdbg ,_edbb ,_ggcf );};
|
||
|
||
// TextMarkArray is a collection of TextMarks.
|
||
type TextMarkArray struct{_bgea []TextMark };func _fbgdb (_gcaca string )bool {for _ ,_deca :=range _gcaca {if !_bg .IsSpace (_deca ){return false ;};};return true ;};func (_eeec *shapesState )stroke (_fffa *[]pathSection ){_bdff :=pathSection {_bafa :_eeec ._bece ,Color :_eeec ._edadg .getStrokeColor ()};*_fffa =append (*_fffa ,_bdff );if _ded {_ec .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_fffa ),_eeec ,_eeec ._edadg .getStrokeColor (),_bdff .bbox ());if _fde {for _acae ,_gdege :=range _eeec ._bece {_ec .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_acae ,_gdege );if _acae ==10{break ;};};};};};func (_gcbfc *textLine )text ()string {var _edaf []string ;for _ ,_agagb :=range _gcbfc ._agae {if _agagb ._eeaa {_edaf =append (_edaf ,"\u0020");};_edaf =append (_edaf ,_agagb ._fede );};return _ae .Join (_edaf ,"");};var _bbfe =map[markKind ]string {_baga :"\u0073\u0074\u0072\u006f\u006b\u0065",_daag :"\u0066\u0069\u006c\u006c",_ggbcc :"\u0061u\u0067\u006d\u0065\u006e\u0074"};func (_dbdg rulingList )bbox ()_ecc .PdfRectangle {var _fcdb _ecc .PdfRectangle ;if len (_dbdg )==0{_d .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073");return _ecc .PdfRectangle {};};if _dbdg [0]._cgbe ==_fdcbd {_fcdb .Llx ,_fcdb .Urx =_dbdg .secMinMax ();_fcdb .Lly ,_fcdb .Ury =_dbdg .primMinMax ();}else {_fcdb .Llx ,_fcdb .Urx =_dbdg .primMinMax ();_fcdb .Lly ,_fcdb .Ury =_dbdg .secMinMax ();};return _fcdb ;};func (_gfef *textPara )writeText (_eabga _be .Writer ){if _gfef ._bbd ==nil {_gfef .writeCellText (_eabga );return ;};for _ecbf :=0;_ecbf < _gfef ._bbd ._becb ;_ecbf ++{for _efge :=0;_efge < _gfef ._bbd ._eafbb ;_efge ++{_gbdce :=_gfef ._bbd .get (_efge ,_ecbf );if _gbdce ==nil {_eabga .Write ([]byte ("\u0009"));}else {_gbdce .writeCellText (_eabga );};_eabga .Write ([]byte ("\u0020"));};if _ecbf < _gfef ._bbd ._becb -1{_eabga .Write ([]byte ("\u000a"));};};};func (_fee *shapesState )quadraticTo (_cdg ,_fbe ,_ceabb ,_aeg float64 ){if _fbdd {_d .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a");};_fee .addPoint (_ceabb ,_aeg );};func (_fbbe *shapesState )fill (_dffc *[]pathSection ){_cdeb :=pathSection {_bafa :_fbbe ._bece ,Color :_fbbe ._edadg .getFillColor ()};*_dffc =append (*_dffc ,_cdeb );if _ded {_dfe :=_cdeb .bbox ();_ec .Printf ("\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a",len (*_dffc ),len (_cdeb ._bafa ),_fbbe ,_cdeb .Color ,_dfe ,_dfe .Width (),_dfe .Height ());if _fde {for _debd ,_gagaa :=range _cdeb ._bafa {_ec .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_debd ,_gagaa );if _debd ==10{break ;};};};};};func _feaf (_ddgg ,_fcfc _ag .Point )rulingKind {_baefg :=_c .Abs (_ddgg .X -_fcfc .X );_cbcb :=_c .Abs (_ddgg .Y -_fcfc .Y );return _ffdf (_baefg ,_cbcb ,_cfgdg );};func (_bacg rulingList )removeDuplicates ()rulingList {if len (_bacg )==0{return nil ;};_bacg .sort ();_faag :=rulingList {_bacg [0]};for _ ,_fae :=range _bacg [1:]{if _fae .equals (_faag [len (_faag )-1]){continue ;};_faag =append (_faag ,_fae );};return _faag ;};func (_adcef rulingList )merge ()*ruling {_ggeg :=_adcef [0]._cbfd ;_deaag :=_adcef [0]._aef ;_cafcb :=_adcef [0]._bcdag ;for _ ,_bgcc :=range _adcef [1:]{_ggeg +=_bgcc ._cbfd ;if _bgcc ._aef < _deaag {_deaag =_bgcc ._aef ;};if _bgcc ._bcdag > _cafcb {_cafcb =_bgcc ._bcdag ;};};_ebaa :=&ruling {_cgbe :_adcef [0]._cgbe ,_fcce :_adcef [0]._fcce ,Color :_adcef [0].Color ,_cbfd :_ggeg /float64 (len (_adcef )),_aef :_deaag ,_bcdag :_cafcb };if _abbc {_d .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_adcef ),_ebaa );for _gege ,_bcga :=range _adcef {_ec .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gege ,_bcga );};};return _ebaa ;};func (_eeece *textTable )toTextTable ()TextTable {if _egfe {_d .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_eeece ._eafbb ,_eeece ._becb );};_aaea :=make ([][]TableCell ,_eeece ._becb );for _abcbb :=0;_abcbb < _eeece ._becb ;_abcbb ++{_aaea [_abcbb ]=make ([]TableCell ,_eeece ._eafbb );for _bgbg :=0;_bgbg < _eeece ._eafbb ;_bgbg ++{_adcb :=_eeece .get (_bgbg ,_abcbb );if _adcb ==nil {continue ;};if _egfe {_ec .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_bgbg ,_abcbb ,_adcb );};_aaea [_abcbb ][_bgbg ].Text =_adcb .text ();_fddeg :=0;_aaea [_abcbb ][_bgbg ].Marks ._bgea =_adcb .toTextMarks (&_fddeg );};};return TextTable {W :_eeece ._eafbb ,H :_eeece ._becb ,Cells :_aaea };};func (_bbebb rulingList )mergePrimary ()float64 {_cdfg :=_bbebb [0]._cbfd ;for _ ,_bbda :=range _bbebb [1:]{_cdfg +=_bbda ._cbfd ;};return _cdfg /float64 (len (_bbebb ));};func (_cacdf *subpath )clear (){*_cacdf =subpath {}};func _befb (_ddddg string )(string ,bool ){_fcdd :=[]rune (_ddddg );if len (_fcdd )!=1{return "",false ;};_bbgfa ,_agca :=_ccgd [_fcdd [0]];return _bbgfa ,_agca ;};func _acef (_ade []TextMark ,_eagb *int )[]TextMark {_agegc :=_ade [len (_ade )-1];_dbgab :=[]rune (_agegc .Text );if len (_dbgab )==1{_ade =_ade [:len (_ade )-1];_feedf :=_ade [len (_ade )-1];*_eagb =_feedf .Offset +len (_feedf .Text );}else {_acfc :=_dddc (_agegc .Text );*_eagb +=len (_acfc )-len (_agegc .Text );_agegc .Text =_acfc ;};return _ade ;};const _cab =20;func _cgea (_cebb _ecc .PdfRectangle )*ruling {return &ruling {_cgbe :_ccgf ,_cbfd :_cebb .Urx ,_aef :_cebb .Lly ,_bcdag :_cebb .Ury };};func _cbggf (_ddfd float64 )bool {return _c .Abs (_ddfd )< _fbac };func (_ddeb compositeCell )String ()string {_feda :="";if len (_ddeb .paraList )> 0{_feda =_cecc (_ddeb .paraList .merge ().text (),50);};return _ec .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_ddeb .PdfRectangle ,len (_ddeb .paraList ),_feda );};func _ebgd (_gfaff []pathSection )rulingList {_aggbb (_gfaff );if _ded {_d .Log .Info ("\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs",len (_gfaff ));};var _gcga rulingList ;for _ ,_beaba :=range _gfaff {for _ ,_cdeg :=range _beaba ._bafa {if !_cdeg .isQuadrilateral (){if _ded {_d .Log .Error ("!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073",_cdeg );};continue ;};if _dbfbc ,_bfed :=_cdeg .makeRectRuling (_beaba .Color );_bfed {_gcga =append (_gcga ,_dbfbc );}else {if _aea {_d .Log .Error ("\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073",_cdeg );};};};};if _ded {_d .Log .Info ("\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073",_gcga .String ());};return _gcga ;};
|
||
|
||
// String returns a description of `b`.
|
||
func (_baec *wordBag )String ()string {var _afea []string ;for _ ,_eage :=range _baec .depthIndexes (){_bded :=_baec ._cda [_eage ];for _ ,_dabg :=range _bded {_afea =append (_afea ,_dabg ._fede );};};return _ec .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_baec .PdfRectangle ,_baec ._gcccc ,len (_afea ),_afea );};func (_cgbcd *textTable )computeBbox ()_ecc .PdfRectangle {var _bgde _ecc .PdfRectangle ;_eaaa :=false ;for _dfbbf :=0;_dfbbf < _cgbcd ._becb ;_dfbbf ++{for _egae :=0;_egae < _cgbcd ._eafbb ;_egae ++{_ddcba :=_cgbcd .get (_egae ,_dfbbf );if _ddcba ==nil {continue ;};if !_eaaa {_bgde =_ddcba .PdfRectangle ;_eaaa =true ;}else {_bgde =_fgeb (_bgde ,_ddcba .PdfRectangle );};};};return _bgde ;};
|
||
|
||
// ApplyArea processes the page text only within the specified area `bbox`.
|
||
// Each time ApplyArea is called, it updates the result set in `pt`.
|
||
// Can be called multiple times in a row with different bounding boxes.
|
||
func (_caeg *PageText )ApplyArea (bbox _ecc .PdfRectangle ){_gded :=make ([]*textMark ,0,len (_caeg ._adg ));for _ ,_gdf :=range _caeg ._adg {if _aaeb (_gdf .bbox (),bbox ){_gded =append (_gded ,_gdf );};};var _deaa paraList ;_bada :=len (_gded );for _bdef :=0;_bdef < 360&&_bada > 0;_bdef +=90{_ccg :=make ([]*textMark ,0,len (_gded )-_bada );for _ ,_dde :=range _gded {if _dde ._dbfb ==_bdef {_ccg =append (_ccg ,_dde );};};if len (_ccg )> 0{_cgbc :=_ffgaf (_ccg ,_caeg ._bbcfe ,nil ,nil );_deaa =append (_deaa ,_cgbc ...);_bada -=len (_ccg );};};_eea :=new (_cf .Buffer );_deaa .writeText (_eea );_caeg ._edc =_eea .String ();_caeg ._faff =_deaa .toTextMarks ();_caeg ._ddd =_deaa .tables ();};func (_afeeg paraList )applyTables (_ffadg []*textTable )paraList {_cbage :=make (map[*textPara ]struct{});var _acgbb paraList ;for _ ,_gfbdb :=range _ffadg {for _ ,_bbecb :=range _gfbdb ._cgfb {_cbage [_bbecb ]=struct{}{};};_acgbb =append (_acgbb ,_gfbdb .newTablePara ());};for _ ,_bccd :=range _afeeg {if _ ,_gbec :=_cbage [_bccd ];!_gbec {_acgbb =append (_acgbb ,_bccd );};};return _acgbb ;};func (_gaagf *textWord )appendMark (_bbgec *textMark ,_baae _ecc .PdfRectangle ){_gaagf ._bebcb =append (_gaagf ._bebcb ,_bbgec );_gaagf .PdfRectangle =_fgeb (_gaagf .PdfRectangle ,_bbgec .PdfRectangle );if _bbgec ._dac > _gaagf ._agagbg {_gaagf ._agagbg =_bbgec ._dac ;};_gaagf ._fedcd =_baae .Ury -_gaagf .PdfRectangle .Lly ;};func _eebcc (_cabc ,_gdfdc _ag .Point )bool {return _cabc .X ==_gdfdc .X &&_cabc .Y ==_gdfdc .Y };func (_fedc *textObject )getCurrentFont ()*_ecc .PdfFont {var _ggb *_ecc .PdfFont ;if !_fedc ._dff .empty (){_ggb =_fedc ._dff .top ()._cec ;};if _ggb ==nil {_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e");return _ecc .DefaultFont ();};return _ggb ;};func (_bfcb *wordBag )pullWord (_dbf *textWord ,_ecfe int ,_bed map[int ]map[*textWord ]struct{}){_bfcb .PdfRectangle =_fgeb (_bfcb .PdfRectangle ,_dbf .PdfRectangle );if _dbf ._agagbg > _bfcb ._gcccc {_bfcb ._gcccc =_dbf ._agagbg ;};_bfcb ._cda [_ecfe ]=append (_bfcb ._cda [_ecfe ],_dbf );_bed [_ecfe ][_dbf ]=struct{}{};};const (_cabe markKind =iota ;_baga ;_daag ;_ggbcc ;);func (_fgfe *textObject )showTextAdjusted (_gbd *_gf .PdfObjectArray )error {_cff :=false ;for _ ,_gdbc :=range _gbd .Elements (){switch _gdbc .(type ){case *_gf .PdfObjectFloat ,*_gf .PdfObjectInteger :_dbc ,_aag :=_gf .GetNumberAsFloat (_gdbc );if _aag !=nil {_d .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_gdbc ,_gbd );return _aag ;};_eafa ,_adb :=-_dbc *0.001*_fgfe ._faa ._ffdb ,0.0;if _cff {_adb ,_eafa =_eafa ,_adb ;};_agdd :=_ccfb (_ag .Point {X :_eafa ,Y :_adb });_fgfe ._aac .Concat (_agdd );case *_gf .PdfObjectString :_ffge ,_beb :=_gf .GetStringBytes (_gdbc );if !_beb {_d .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_gdbc ,_gbd );return _gf .ErrTypeError ;};_fgfe .renderText (_ffge );default:_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_gdbc ,_gbd );return _gf .ErrTypeError ;};};return nil ;};func (_gdfc rulingList )secMinMax ()(float64 ,float64 ){_egbc ,_ebcdg :=_gdfc [0]._aef ,_gdfc [0]._bcdag ;for _ ,_adae :=range _gdfc [1:]{if _adae ._aef < _egbc {_egbc =_adae ._aef ;};if _adae ._bcdag > _ebcdg {_ebcdg =_adae ._bcdag ;};};return _egbc ,_ebcdg ;};func (_agec *subpath )close (){if !_eebcc (_agec ._cada [0],_agec .last ()){_agec .add (_agec ._cada [0]);};_agec ._eagd =true ;_agec .removeDuplicates ();};func (_fcaa *shapesState )lineTo (_cacd ,_fcfd float64 ){if _fbdd {_d .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_cacd ,_fcfd ,_fcaa .devicePoint (_cacd ,_fcfd ));};_fcaa .addPoint (_cacd ,_fcfd );};func (_fbdfc *textObject )newTextMark (_babg string ,_dgad _ag .Matrix ,_dabbb _ag .Point ,_ffadd float64 ,_dabe *_ecc .PdfFont ,_bcbda float64 ,_acbc ,_adafg _ac .Color )(textMark ,bool ){_gecg :=_dgad .Angle ();_ecbc :=_ceac (_gecg ,_dcdg );var _agaa float64 ;if _ecbc %180!=90{_agaa =_dgad .ScalingFactorY ();}else {_agaa =_dgad .ScalingFactorX ();};_ddcg :=_cae (_dgad );_gfbe :=_ecc .PdfRectangle {Llx :_ddcg .X ,Lly :_ddcg .Y ,Urx :_dabbb .X ,Ury :_dabbb .Y };switch _ecbc %360{case 90:_gfbe .Urx -=_agaa ;case 180:_gfbe .Ury -=_agaa ;case 270:_gfbe .Urx +=_agaa ;case 0:_gfbe .Ury +=_agaa ;default:_ecbc =0;_gfbe .Ury +=_agaa ;};if _gfbe .Llx > _gfbe .Urx {_gfbe .Llx ,_gfbe .Urx =_gfbe .Urx ,_gfbe .Llx ;};if _gfbe .Lly > _gfbe .Ury {_gfbe .Lly ,_gfbe .Ury =_gfbe .Ury ,_gfbe .Lly ;};_gdag ,_cgcf :=_aafe (_gfbe ,_fbdfc ._gcbf ._cb );if !_cgcf {_d .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_gfbe ,_fbdfc ._gcbf ._cb ,_babg );};_gfbe =_gdag ;_feec :=_gfbe ;_ebgf :=_fbdfc ._gcbf ._cb ;switch _ecbc %360{case 90:_ebgf .Urx ,_ebgf .Ury =_ebgf .Ury ,_ebgf .Urx ;_feec =_ecc .PdfRectangle {Llx :_ebgf .Urx -_gfbe .Ury ,Urx :_ebgf .Urx -_gfbe .Lly ,Lly :_gfbe .Llx ,Ury :_gfbe .Urx };case 180:_feec =_ecc .PdfRectangle {Llx :_ebgf .Urx -_gfbe .Llx ,Urx :_ebgf .Urx -_gfbe .Urx ,Lly :_ebgf .Ury -_gfbe .Lly ,Ury :_ebgf .Ury -_gfbe .Ury };case 270:_ebgf .Urx ,_ebgf .Ury =_ebgf .Ury ,_ebgf .Urx ;_feec =_ecc .PdfRectangle {Llx :_gfbe .Ury ,Urx :_gfbe .Lly ,Lly :_ebgf .Ury -_gfbe .Llx ,Ury :_ebgf .Ury -_gfbe .Urx };};if _feec .Llx > _feec .Urx {_feec .Llx ,_feec .Urx =_feec .Urx ,_feec .Llx ;};if _feec .Lly > _feec .Ury {_feec .Lly ,_feec .Ury =_feec .Ury ,_feec .Lly ;};_feee :=textMark {_ebe :_babg ,PdfRectangle :_feec ,_bfff :_gfbe ,_ffgag :_dabe ,_dac :_agaa ,_accb :_bcbda ,_beea :_dgad ,_gbab :_dabbb ,_dbfb :_ecbc ,_ggee :_acbc ,_dec :_adafg };if _cgccd {_d .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_ddcg ,_dabbb ,_feee .String ());};return _feee ,_cgcf ;};func (_ggaf *wordBag )depthBand (_facg ,_bcfa float64 )[]int {if len (_ggaf ._cda )==0{return nil ;};return _ggaf .depthRange (_ggaf .getDepthIdx (_facg ),_ggaf .getDepthIdx (_bcfa ));};func (_fadd *wordBag )getDepthIdx (_adac float64 )int {_dcde :=_fadd .depthIndexes ();_ecff :=_dgdc (_adac );if _ecff < _dcde [0]{return _dcde [0];};if _ecff > _dcde [len (_dcde )-1]{return _dcde [len (_dcde )-1];};return _ecff ;};func (_agce *textTable )compositeColCorridors ()map[int ][]float64 {_effb :=make (map[int ][]float64 ,_agce ._eafbb );if _egfe {_d .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_agce ._eafbb );};for _eaad :=0;_eaad < _agce ._eafbb ;_eaad ++{_effb [_eaad ]=nil ;};return _effb ;};type textResult struct{_ffg PageText ;_fdgc int ;_cfgfd int ;};var _bggca =_b .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024");
|
||
|
||
// ImageExtractOptions contains options for controlling image extraction from
|
||
// PDF pages.
|
||
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};func _ageb (_fcacc []int )[]int {_ecba :=make ([]int ,len (_fcacc ));for _cffgc ,_abeb :=range _fcacc {_ecba [len (_fcacc )-1-_cffgc ]=_abeb ;};return _ecba ;};func (_gbdc *wordBag )depthIndexes ()[]int {if len (_gbdc ._cda )==0{return nil ;};_eedc :=make ([]int ,len (_gbdc ._cda ));_agag :=0;for _cfgd :=range _gbdc ._cda {_eedc [_agag ]=_cfgd ;_agag ++;};_ed .Ints (_eedc );return _eedc ;};func (_dfded rulingList )toTilings ()(rulingList ,[]gridTiling ){_dfded .log ("\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s");if len (_dfded )==0{return nil ,nil ;};_dfded =_dfded .tidied ("\u0061\u006c\u006c");_dfded .log ("\u0074\u0069\u0064\u0069\u0065\u0064");_bddee :=_dfded .toGrids ();_dge :=make ([]gridTiling ,len (_bddee ));for _bcfaf ,_adbd :=range _bddee {_dge [_bcfaf ]=_adbd .asTiling ();};return _dfded ,_dge ;};func (_fgcg paraList )readBefore (_cbdg []int ,_adga ,_dfdg int )bool {_aagb ,_fbecc :=_fgcg [_adga ],_fgcg [_dfdg ];if _cacbg (_aagb ,_fbecc )&&_aagb .Lly > _fbecc .Lly {return true ;};if !(_aagb ._decd .Urx < _fbecc ._decd .Llx ){return false ;};_ccab ,_bdgf :=_aagb .Lly ,_fbecc .Lly ;if _ccab > _bdgf {_bdgf ,_ccab =_ccab ,_bdgf ;};_ffbfe :=_c .Max (_aagb ._decd .Llx ,_fbecc ._decd .Llx );_bagg :=_c .Min (_aagb ._decd .Urx ,_fbecc ._decd .Urx );_bafd :=_fgcg .llyRange (_cbdg ,_ccab ,_bdgf );for _ ,_gddb :=range _bafd {if _gddb ==_adga ||_gddb ==_dfdg {continue ;};_aadf :=_fgcg [_gddb ];if _aadf ._decd .Llx <=_bagg &&_ffbfe <=_aadf ._decd .Urx {return false ;};};return true ;};func _faac (_ccgba []*textMark ,_cgafa _ecc .PdfRectangle )*textWord {_cedad :=_ccgba [0].PdfRectangle ;_edgg :=_ccgba [0]._dac ;for _ ,_affec :=range _ccgba [1:]{_cedad =_fgeb (_cedad ,_affec .PdfRectangle );if _affec ._dac > _edgg {_edgg =_affec ._dac ;};};return &textWord {PdfRectangle :_cedad ,_bebcb :_ccgba ,_fedcd :_cgafa .Ury -_cedad .Lly ,_agagbg :_edgg };};func (_dega *subpath )add (_cfeb ..._ag .Point ){_dega ._cada =append (_dega ._cada ,_cfeb ...)};
|
||
|
||
// String returns a string describing `ma`.
|
||
func (_fbgdf TextMarkArray )String ()string {_eabg :=len (_fbgdf ._bgea );if _eabg ==0{return "\u0045\u004d\u0050T\u0059";};_geg :=_fbgdf ._bgea [0];_fcfg :=_fbgdf ._bgea [_eabg -1];return _ec .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_eabg ,_geg ,_fcfg );};func (_ffeda rulingList )findPrimSec (_dbad ,_bbcfd float64 )*ruling {for _ ,_gcdg :=range _ffeda {if _fggg (_gcdg ._cbfd -_dbad )&&_gcdg ._aef -_gfdcf <=_bbcfd &&_bbcfd <=_gcdg ._bcdag +_gfdcf {return _gcdg ;};};return nil ;};func (_bdg *stateStack )pop ()*textState {if _bdg .empty (){return nil ;};_abag :=*(*_bdg )[len (*_bdg )-1];*_bdg =(*_bdg )[:len (*_bdg )-1];return &_abag ;};func (_cgdf *ruling )equals (_aeac *ruling )bool {return _cgdf ._cgbe ==_aeac ._cgbe &&_dggc (_cgdf ._cbfd ,_aeac ._cbfd )&&_dggc (_cgdf ._aef ,_aeac ._aef )&&_dggc (_cgdf ._bcdag ,_aeac ._bcdag );};func _fgfa (_adgb ,_ecdff float64 )string {_gcg :=!_fggg (_adgb -_ecdff );if _gcg {return "\u000a";};return "\u0020";};
|
||
|
||
// PageText represents the layout of text on a device page.
|
||
type PageText struct{_adg []*textMark ;_edc string ;_faff []TextMark ;_ddd []TextTable ;_bbcfe _ecc .PdfRectangle ;_bdde []pathSection ;_dea []pathSection ;};type compositeCell struct{_ecc .PdfRectangle ;paraList ;};func (_dbef paraList )topoOrder ()[]int {if _gcbb {_d .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");};_cdgd :=len (_dbef );_badd :=make ([]bool ,_cdgd );_agad :=make ([]int ,0,_cdgd );_cgcd :=_dbef .llyOrdering ();var _fffb func (_bcde int );_fffb =func (_geabg int ){_badd [_geabg ]=true ;for _bfga :=0;_bfga < _cdgd ;_bfga ++{if !_badd [_bfga ]{if _dbef .readBefore (_cgcd ,_geabg ,_bfga ){_fffb (_bfga );};};};_agad =append (_agad ,_geabg );};for _cfef :=0;_cfef < _cdgd ;_cfef ++{if !_badd [_cfef ]{_fffb (_cfef );};};return _ageb (_agad );};func (_ffed *textObject )setTextRenderMode (_dfb int ){if _ffed ==nil {return ;};_ffed ._faa ._egbe =RenderMode (_dfb );};func (_ccaaf *textWord )addDiacritic (_dccf string ){_befe :=_ccaaf ._bebcb [len (_ccaaf ._bebcb )-1];_befe ._ebe +=_dccf ;_befe ._ebe =_df .NFKC .String (_befe ._ebe );};func (_bcab rulingList )sortStrict (){_ed .Slice (_bcab ,func (_bdab ,_eaee int )bool {_bbcd ,_cefe :=_bcab [_bdab ],_bcab [_eaee ];_fdfaa ,_gfdcb :=_bbcd ._cgbe ,_cefe ._cgbe ;if _fdfaa !=_gfdcb {return _fdfaa > _gfdcb ;};_fgdb ,_dbbgd :=_bbcd ._cbfd ,_cefe ._cbfd ;if !_fggg (_fgdb -_dbbgd ){return _fgdb < _dbbgd ;};_fgdb ,_dbbgd =_bbcd ._aef ,_cefe ._aef ;if _fgdb !=_dbbgd {return _fgdb < _dbbgd ;};return _bbcd ._bcdag < _cefe ._bcdag ;});};
|
||
|
||
// String returns a human readable description of `vecs`.
|
||
func (_adce rulingList )String ()string {if len (_adce )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_eabd ,_bdcb :=_adce .vertsHorzs ();_eebe :=len (_eabd );_acff :=len (_bdcb );if _eebe ==0||_acff ==0{return _ec .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_eebe ,_acff );};_ddgbfd :=_ecc .PdfRectangle {Llx :_eabd [0]._cbfd ,Urx :_eabd [_eebe -1]._cbfd ,Lly :_bdcb [_acff -1]._cbfd ,Ury :_bdcb [0]._cbfd };return _ec .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_eebe ,_acff ,_ddgbfd );};func _cfcc (_fcdc ,_cgga bounded )float64 {return _gce (_fcdc )-_gce (_cgga )};func _acfgf (_cbgdf _ecc .PdfRectangle ,_cegb bounded )float64 {return _cbgdf .Ury -_cegb .bbox ().Lly };type bounded interface{bbox ()_ecc .PdfRectangle };func (_fcef *shapesState )moveTo (_eec ,_gaga float64 ){_fcef ._fafb =true ;_fcef ._adaa =_fcef .devicePoint (_eec ,_gaga );if _fbdd {_d .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_eec ,_gaga ,_fcef ._adaa );};};func _abfcd (_ebbc map[int ]intSet )[]int {_dgee :=make ([]int ,0,len (_ebbc ));for _dbfe :=range _ebbc {_dgee =append (_dgee ,_dbfe );};_ed .Ints (_dgee );return _dgee ;};func (_becec *wordBag )removeDuplicates (){if _cgafe {_d .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_becec .text ());};for _ ,_eafdc :=range _becec .depthIndexes (){if len (_becec ._cda [_eafdc ])==0{continue ;};_cadb :=_becec ._cda [_eafdc ][0];_bgff :=_gacb *_cadb ._agagbg ;_fgfg :=_cadb ._fedcd ;for _ ,_beaec :=range _becec .depthBand (_fgfg ,_fgfg +_bgff ){_aecb :=map[*textWord ]struct{}{};_dggfc :=_becec ._cda [_beaec ];for _ ,_fabb :=range _dggfc {if _ ,_acad :=_aecb [_fabb ];_acad {continue ;};for _ ,_gaaf :=range _dggfc {if _ ,_bbdc :=_aecb [_gaaf ];_bbdc {continue ;};if _gaaf !=_fabb &&_gaaf ._fede ==_fabb ._fede &&_c .Abs (_gaaf .Llx -_fabb .Llx )< _bgff &&_c .Abs (_gaaf .Urx -_fabb .Urx )< _bgff &&_c .Abs (_gaaf .Lly -_fabb .Lly )< _bgff &&_c .Abs (_gaaf .Ury -_fabb .Ury )< _bgff {_aecb [_gaaf ]=struct{}{};};};};if len (_aecb )> 0{_bffg :=0;for _ ,_acdeb :=range _dggfc {if _ ,_fbeb :=_aecb [_acdeb ];!_fbeb {_dggfc [_bffg ]=_acdeb ;_bffg ++;};};_becec ._cda [_beaec ]=_dggfc [:len (_dggfc )-len (_aecb )];if len (_becec ._cda [_beaec ])==0{delete (_becec ._cda ,_beaec );};};};};};func _cgfff (_gabe *wordBag ,_fecg float64 ,_dfde ,_cgdc rulingList )[]*wordBag {var _gbegf []*wordBag ;for _ ,_bedd :=range _gabe .depthIndexes (){_cfaf :=false ;for !_gabe .empty (_bedd ){_bgba :=_gabe .firstReadingIndex (_bedd );_efcg :=_gabe .firstWord (_bgba );_dbaf :=_gfdc (_efcg ,_fecg ,_dfde ,_cgdc );_gabe .removeWord (_efcg ,_bgba );if _efcb {_d .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_efcg .String ());};for _bfaa :=true ;_bfaa ;_bfaa =_cfaf {_cfaf =false ;_fbbea :=_ggc *_dbaf ._gcccc ;_ebca :=_cffb *_dbaf ._gcccc ;_fegf :=_afeea *_dbaf ._gcccc ;if _efcb {_d .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_dbaf .minDepth (),_dbaf .maxDepth (),_fegf ,_ebca );};if _gabe .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_dbaf ,_beef (_gfcg ,0),_dbaf .minDepth ()-_fegf ,_dbaf .maxDepth ()+_fegf ,_ddga ,false ,false )> 0{_cfaf =true ;};if _gabe .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_dbaf ,_beef (_gfcg ,_ebca ),_dbaf .minDepth (),_dbaf .maxDepth (),_cbge ,false ,false )> 0{_cfaf =true ;};if _cfaf {continue ;};_eacbc :=_gabe .scanBand ("",_dbaf ,_beef (_ffda ,_fbbea ),_dbaf .minDepth (),_dbaf .maxDepth (),_beeg ,true ,false );if _eacbc > 0{_eeef :=(_dbaf .maxDepth ()-_dbaf .minDepth ())/_dbaf ._gcccc ;if (_eacbc > 1&&float64 (_eacbc )> 0.3*_eeef )||_eacbc <=10{if _gabe .scanBand ("\u006f\u0074\u0068e\u0072",_dbaf ,_beef (_ffda ,_fbbea ),_dbaf .minDepth (),_dbaf .maxDepth (),_beeg ,false ,true )> 0{_cfaf =true ;};};};};_gbegf =append (_gbegf ,_dbaf );};};return _gbegf ;};
|
||
|
||
// String returns a description of `k`.
|
||
func (_aaaa rulingKind )String ()string {_bcgdg ,_gbdgc :=_ecbd [_aaaa ];if !_gbdgc {return _ec .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_aaaa );};return _bcgdg ;};func _ceac (_aagd float64 ,_afff int )int {if _afff ==0{_afff =1;};_aeef :=float64 (_afff );return int (_c .Round (_aagd /_aeef )*_aeef );};func (_bdad *textTable )growTable (){_fdceg :=func (_egbd paraList ){_bdad ._becb ++;for _feebe :=0;_feebe < _bdad ._eafbb ;_feebe ++{_cfddg :=_egbd [_feebe ];_bdad .put (_feebe ,_bdad ._becb -1,_cfddg );};};_aegb :=func (_bcac paraList ){_bdad ._eafbb ++;for _bdgc :=0;_bdgc < _bdad ._becb ;_bdgc ++{_aaca :=_bcac [_bdgc ];_bdad .put (_bdad ._eafbb -1,_bdgc ,_aaca );};};if _bcgc {_bdad .log ("\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce");};for _fffba :=0;;_fffba ++{_bbffe :=false ;_dafa :=_bdad .getDown ();_bage :=_bdad .getRight ();if _bcgc {_ec .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fffba ,_bdad );_ec .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a",_dafa );_ec .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a",_bage );};if _dafa !=nil &&_bage !=nil {_gdff :=_dafa [len (_dafa )-1];if _gdff !=nil &&!_gdff ._bbcfc &&_gdff ==_bage [len (_bage )-1]{_fdceg (_dafa );if _bage =_bdad .getRight ();_bage !=nil {_aegb (_bage );_bdad .put (_bdad ._eafbb -1,_bdad ._becb -1,_gdff );};_bbffe =true ;};};if !_bbffe &&_dafa !=nil {_fdceg (_dafa );_bbffe =true ;};if !_bbffe &&_bage !=nil {_aegb (_bage );_bbffe =true ;};if !_bbffe {break ;};};};func (_bgdde paraList )log (_cbda string ){if !_gcbb {return ;};_d .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_cbda ,len (_bgdde ));for _ccccf ,_babb :=range _bgdde {if _babb ==nil {continue ;};_faaa :=_babb .text ();_dfbd :="\u0020\u0020";if _babb ._bbd !=nil {_dfbd =_ec .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_babb ._bbd ._eafbb ,_babb ._bbd ._becb );};_ec .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_ccccf ,_babb .PdfRectangle ,_dfbd ,_cecc (_faaa ,50));};};func (_cccb *textPara )toCellTextMarks (_efe *int )[]TextMark {var _cbgb []TextMark ;for _aeab ,_caac :=range _cccb ._beedd {_gcfe :=_caac .toTextMarks (_efe );_fbde :=_aee &&_caac .endsInHyphen ()&&_aeab !=len (_cccb ._beedd )-1;if _fbde {_gcfe =_acef (_gcfe ,_efe );};_cbgb =append (_cbgb ,_gcfe ...);if !(_fbde ||_aeab ==len (_cccb ._beedd )-1){_cbgb =_baaf (_cbgb ,_efe ,_fgfa (_caac ._eccbg ,_cccb ._beedd [_aeab +1]._eccbg ));};};return _cbgb ;};type wordBag struct{_ecc .PdfRectangle ;_gcccc float64 ;_eafe ,_fbda rulingList ;_baeg float64 ;_cda map[int ][]*textWord ;};func (_cdfbc *textTable )get (_gceea ,_aafde int )*textPara {return _cdfbc ._cgfb [_adef (_gceea ,_aafde )];};func (_ffde *textObject )nextLine (){_ffde .moveLP (0,-_ffde ._faa ._bbgf )};
|
||
|
||
// String returns a string descibing `i`.
|
||
func (_affe gridTile )String ()string {_ddab :=func (_ddfeeg bool ,_fega string )string {if _ddfeeg {return _fega ;};return "\u005f";};return _ec .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_affe .PdfRectangle ,_ddab (_affe ._ffgb ,"\u004c"),_ddab (_affe ._bbaa ,"\u0052"),_ddab (_affe ._bade ,"\u0042"),_ddab (_affe ._bbb ,"\u0054"));};func (_egea *textWord )bbox ()_ecc .PdfRectangle {return _egea .PdfRectangle };func _eacfe (_egfge _ecc .PdfColorspace ,_aeccf _ecc .PdfColor )_ac .Color {if _egfge ==nil ||_aeccf ==nil {return _ac .Black ;};_fdag ,_ebag :=_egfge .ColorToRGB (_aeccf );if _ebag !=nil {_d .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_aeccf ,_egfge ,_ebag );return _ac .Black ;};_decea ,_ebga :=_fdag .(*_ecc .PdfColorDeviceRGB );if !_ebga {_d .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_fdag );return _ac .Black ;};return _ac .NRGBA {R :uint8 (_decea .R ()*255),G :uint8 (_decea .G ()*255),B :uint8 (_decea .B ()*255),A :uint8 (255)};};func (_ffbg *textLine )markWordBoundaries (){_dbbb :=_bffa *_ffbg ._fbgf ;for _afbb ,_fcffg :=range _ffbg ._agae [1:]{if _cgbg (_fcffg ,_ffbg ._agae [_afbb ])>=_dbbb {_fcffg ._eeaa =true ;};};};func _dggc (_dffe ,_ceage float64 )bool {return _c .Abs (_dffe -_ceage )<=_gfdcf };func (_bcdf *ruling )alignsPrimary (_ffab *ruling )bool {return _bcdf ._cgbe ==_ffab ._cgbe &&_c .Abs (_bcdf ._cbfd -_ffab ._cbfd )< _fbac *0.5;};func (_dbd *wordBag )highestWord (_baef int ,_dbag ,_cgfa float64 )*textWord {for _ ,_bgac :=range _dbd ._cda [_baef ]{if _dbag <=_bgac ._fedcd &&_bgac ._fedcd <=_cgfa {return _bgac ;};};return nil ;};func (_ebaf gridTiling )log (_faagg string ){if !_eegg {return ;};_d .Log .Info ("\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071",len (_ebaf ._degd ),len (_ebaf ._bcgf ),_faagg );_ec .Printf ("\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a",_ebaf ._degd );_ec .Printf ("\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a",_ebaf ._bcgf );for _acab ,_caaa :=range _ebaf ._bcgf {_ebcc ,_cceff :=_ebaf ._aegg [_caaa ];if !_cceff {continue ;};_ec .Printf ("%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_acab ,_caaa );for _gafcb ,_caegf :=range _ebaf ._degd {_dged ,_fgdd :=_ebcc [_caegf ];if !_fgdd {continue ;};_ec .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_gafcb ,_dged .String ());};};};type event struct{_fffced float64 ;_gbgc bool ;_gddbg int ;};func (_gdcg *ruling )gridIntersecting (_efbc *ruling )bool {return _dggc (_gdcg ._aef ,_efbc ._aef )&&_dggc (_gdcg ._bcdag ,_efbc ._bcdag );};type paraList []*textPara ;func (_gebd rulingList )log (_cggd string ){if !_ded {return ;};_d .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_cggd ,_gebd .String ());for _egcbg ,_ecgc :=range _gebd {_ec .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_egcbg ,_ecgc .String ());};};var (_ccgd =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};);func (_afbd paraList )computeEBBoxes (){if _cggcc {_d .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a");};for _ ,_eddd :=range _afbd {_eddd ._decd =_eddd .PdfRectangle ;};_gbed :=_afbd .yNeighbours (0);for _dccc ,_bcbe :=range _afbd {_cgfaa :=_bcbe ._decd ;_dcb ,_abcb :=-1.0e9,+1.0e9;for _ ,_aebf :=range _gbed [_bcbe ]{_bfd :=_afbd [_aebf ]._decd ;if _bfd .Urx < _cgfaa .Llx {_dcb =_c .Max (_dcb ,_bfd .Urx );}else if _cgfaa .Urx < _bfd .Llx {_abcb =_c .Min (_abcb ,_bfd .Llx );};};for _bcebf ,_agggb :=range _afbd {_feeb :=_agggb ._decd ;if _dccc ==_bcebf ||_feeb .Ury > _cgfaa .Lly {continue ;};if _dcb <=_feeb .Llx &&_feeb .Llx < _cgfaa .Llx {_cgfaa .Llx =_feeb .Llx ;}else if _feeb .Urx <=_abcb &&_cgfaa .Urx < _feeb .Urx {_cgfaa .Urx =_feeb .Urx ;};};if _cggcc {_ec .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_dccc ,_bcbe ._decd ,_cgfaa ,_cecc (_bcbe .text (),50));};_bcbe ._decd =_cgfaa ;};if _edf {for _ ,_bgd :=range _afbd {_bgd .PdfRectangle =_bgd ._decd ;};};};func (_deac rulingList )connections (_dbdc map[int ]intSet ,_gfba int )intSet {_dbec :=make (intSet );_egeg :=make (intSet );var _gcccf func (int );_gcccf =func (_efad int ){if !_egeg .has (_efad ){_egeg .add (_efad );for _aeegf :=range _deac {if _dbdc [_aeegf ].has (_efad ){_dbec .add (_aeegf );};};for _fcba :=range _deac {if _dbec .has (_fcba ){_gcccf (_fcba );};};};};_gcccf (_gfba );return _dbec ;};func _ggdd (_gfae ,_eagea *textPara )bool {if _gfae ._eddf ||_eagea ._eddf {return true ;};return _fggg (_gfae .depth ()-_eagea .depth ());};func _fgeb (_bdcf ,_gda _ecc .PdfRectangle )_ecc .PdfRectangle {return _ecc .PdfRectangle {Llx :_c .Min (_bdcf .Llx ,_gda .Llx ),Lly :_c .Min (_bdcf .Lly ,_gda .Lly ),Urx :_c .Max (_bdcf .Urx ,_gda .Urx ),Ury :_c .Max (_bdcf .Ury ,_gda .Ury )};};func (_cbad *subpath )isQuadrilateral ()bool {if len (_cbad ._cada )< 4||len (_cbad ._cada )> 5{return false ;};if len (_cbad ._cada )==5{_gdga :=_cbad ._cada [0];_fgfad :=_cbad ._cada [4];if _gdga .X !=_fgfad .X ||_gdga .Y !=_fgfad .Y {return false ;};};return true ;};
|
||
|
||
// String returns a human readable description of `s`.
|
||
func (_geeec intSet )String ()string {var _dcef []int ;for _ebddaf :=range _geeec {if _geeec .has (_ebddaf ){_dcef =append (_dcef ,_ebddaf );};};_ed .Ints (_dcef );return _ec .Sprintf ("\u0025\u002b\u0076",_dcef );};
|
||
|
||
// Marks returns the TextMark collection for a page. It represents all the text on the page.
|
||
func (_abfae PageText )Marks ()*TextMarkArray {return &TextMarkArray {_bgea :_abfae ._faff }};func (_ddfb *textTable )log (_adca string ){if !_egfe {return ;};_d .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_adca ,_ddfb ._eafbb ,_ddfb ._becb ,_ddfb ._fgegf ,_ddfb .PdfRectangle );for _cffe :=0;_cffe < _ddfb ._becb ;_cffe ++{for _gdeda :=0;_gdeda < _ddfb ._eafbb ;_gdeda ++{_fcgc :=_ddfb .get (_gdeda ,_cffe );if _fcgc ==nil {continue ;};_ec .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_gdeda ,_cffe ,_fcgc .PdfRectangle ,_cecc (_fcgc .text (),50),_g .RuneCountInString (_fcgc .text ()));};};};
|
||
|
||
// TextTable represents a table.
|
||
// Cells are ordered top-to-bottom, left-to-right.
|
||
// Cells[y] is the (0-offset) y'th row in the table.
|
||
// Cells[y][x] is the (0-offset) x'th column in the table.
|
||
type TextTable struct{W ,H int ;Cells [][]TableCell ;};func (_gdab rulingList )snapToGroups ()rulingList {_ccea ,_eaca :=_gdab .vertsHorzs ();if len (_ccea )> 0{_ccea =_ccea .snapToGroupsDirection ();};if len (_eaca )> 0{_eaca =_eaca .snapToGroupsDirection ();};_bgcec :=append (_ccea ,_eaca ...);_bgcec .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");return _bgcec ;};
|
||
|
||
// String returns a description of `t`.
|
||
func (_ebdb *textTable )String ()string {return _ec .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_ebdb ._eafbb ,_ebdb ._becb ,_ebdb ._fgegf );};func (_gbdf *textObject )setCharSpacing (_gcba float64 ){if _gbdf ==nil {return ;};_gbdf ._faa ._gec =_gcba ;if _ede {_d .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_gcba ,_gbdf ._faa .String ());};};func _fafe (_acbcc []pathSection )rulingList {_aggbb (_acbcc );if _ded {_d .Log .Info ("\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073",len (_acbcc ));};var _gaad rulingList ;for _ ,_feage :=range _acbcc {for _ ,_gbgg :=range _feage ._bafa {if len (_gbgg ._cada )< 2{continue ;};_degb :=_gbgg ._cada [0];for _ ,_fbbb :=range _gbgg ._cada [1:]{if _afda ,_dfbda :=_facb (_degb ,_fbbb ,_feage .Color );_dfbda {_gaad =append (_gaad ,_afda );};_degb =_fbbb ;};};};if _ded {_d .Log .Info ("m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073",_gaad );};return _gaad ;};func (_bafg paraList )tables ()[]TextTable {var _acfee []TextTable ;if _egfe {_d .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a");};for _ ,_eaeb :=range _bafg {_abae :=_eaeb ._bbd ;if _abae !=nil &&_abae .isExportable (){_acfee =append (_acfee ,_abae .toTextTable ());};};return _acfee ;};func (_dbfg *textLine )bbox ()_ecc .PdfRectangle {return _dbfg .PdfRectangle };func (_agba *textObject )setTextRise (_abfe float64 ){if _agba ==nil {return ;};_agba ._faa ._age =_abfe ;};func (_bgc *wordBag )applyRemovals (_abccb map[int ]map[*textWord ]struct{}){for _cacac ,_bcfg :=range _abccb {if len (_bcfg )==0{continue ;};_egcb :=_bgc ._cda [_cacac ];_ced :=len (_egcb )-len (_bcfg );if _ced ==0{delete (_bgc ._cda ,_cacac );continue ;};_cca :=make ([]*textWord ,_ced );_adf :=0;for _ ,_faaf :=range _egcb {if _ ,_fced :=_bcfg [_faaf ];!_fced {_cca [_adf ]=_faaf ;_adf ++;};};_bgc ._cda [_cacac ]=_cca ;};};type textWord struct{_ecc .PdfRectangle ;_fedcd float64 ;_fede string ;_bebcb []*textMark ;_agagbg float64 ;_eeaa bool ;};func _aedc (_fefg ,_bbgc _ecc .PdfRectangle )bool {return _fefg .Llx <=_bbgc .Llx &&_bbgc .Urx <=_fefg .Urx &&_fefg .Lly <=_bbgc .Lly &&_bbgc .Ury <=_fefg .Ury ;};func _ecae (_ggga ,_eead _ecc .PdfRectangle )bool {return _ggga .Lly <=_eead .Ury &&_eead .Lly <=_ggga .Ury ;};func _bbdf (_dafb []compositeCell )[]float64 {var _dffg []*textLine ;_effe :=0;for _ ,_adde :=range _dafb {_effe +=len (_adde .paraList );_dffg =append (_dffg ,_adde .lines ()...);};_ed .Slice (_dffg ,func (_agage ,_defcc int )bool {_bfaea ,_ebdda :=_dffg [_agage ],_dffg [_defcc ];_efagc ,_gbac :=_bfaea ._eccbg ,_ebdda ._eccbg ;if !_fggg (_efagc -_gbac ){return _efagc < _gbac ;};return _bfaea .Llx < _ebdda .Llx ;});if _egfe {_ec .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_effe ,len (_dffg ));for _cadd ,_bcgdc :=range _dffg {_ec .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_cadd ,_bcgdc );};};var _fdaf []float64 ;_agcd :=_dffg [0];var _gefe [][]*textLine ;_babge :=[]*textLine {_agcd };for _gcec ,_caeeb :=range _dffg [1:]{if _caeeb .Ury < _agcd .Lly {_ddbce :=0.5*(_caeeb .Ury +_agcd .Lly );if _egfe {_ec .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_gcec ,_caeeb .Ury ,_agcd .Lly ,_ddbce ,_agcd ,_caeeb );};_fdaf =append (_fdaf ,_ddbce );_gefe =append (_gefe ,_babge );_babge =nil ;};_babge =append (_babge ,_caeeb );if _caeeb .Lly < _agcd .Lly {_agcd =_caeeb ;};};if len (_babge )> 0{_gefe =append (_gefe ,_babge );};if _egfe {_ec .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_fdaf );};if _egfe {_d .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len (_dafb ));for _cfcg ,_cabeb :=range _dafb {_ec .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cfcg ,_cabeb );};_d .Log .Info ("\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d",len (_gefe ));for _abdd ,_fbdb :=range _gefe {_ec .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a",_abdd ,len (_fbdb ));for _dfbc ,_adbg :=range _fbdb {_ec .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_dfbc ,_adbg );};};};_bgbda :=true ;for _ffgee ,_becf :=range _gefe {_cbdf :=true ;for _eadcg ,_egfd :=range _dafb {if _egfe {_ec .Printf ("\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a",_ffgee ,len (_gefe ),_eadcg ,len (_dafb ),_egfd );};if !_egfd .hasLines (_becf ){if _egfe {_ec .Printf ("\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a",_ffgee ,len (_gefe ),_eadcg ,len (_dafb ));};_cbdf =false ;break ;};};if !_cbdf {_bgbda =false ;break ;};};if !_bgbda {if _egfe {_d .Log .Info ("\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg");};_fdaf =nil ;};if _egfe &&_fdaf !=nil {_ec .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a",_fdaf );};return _fdaf ;};func (_afgg *textPara )fontsize ()float64 {return _afgg ._beedd [0]._fbgf };func (_eeeaa *textWord )computeText ()string {_fcbdb :=make ([]string ,len (_eeeaa ._bebcb ));for _addac ,_abbe :=range _eeeaa ._bebcb {_fcbdb [_addac ]=_abbe ._ebe ;};return _ae .Join (_fcbdb ,"");};func (_beee *textPara )taken ()bool {return _beee ==nil ||_beee ._bbcfc };type gridTile struct{_ecc .PdfRectangle ;_bbb ,_ffgb ,_bade ,_bbaa bool ;};type markKind int ;func (_bbf *textObject )setFont (_eda string ,_gaa float64 )error {if _bbf ==nil {return nil ;};_bbf ._faa ._ffdb =_gaa ;_bbfb ,_ecb :=_bbf .getFont (_eda );if _ecb !=nil {return _ecb ;};_bbf ._faa ._cec =_bbfb ;if _bbf ._dff .empty (){_bbf ._dff .push (_bbf ._faa );}else {_bbf ._dff .top ()._cec =_bbf ._faa ._cec ;};return nil ;};
|
||
|
||
// Len returns the number of TextMarks in `ma`.
|
||
func (_abcc *TextMarkArray )Len ()int {if _abcc ==nil {return 0;};return len (_abcc ._bgea );};func (_eca *shapesState )closePath (){if _eca ._fafb {_eca ._bece =append (_eca ._bece ,_fbee (_eca ._adaa ));_eca ._fafb =false ;}else if len (_eca ._bece )==0{if _fbdd {_d .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");};_eca ._fafb =false ;return ;};_eca ._bece [len (_eca ._bece )-1].close ();if _fbdd {_d .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_eca );};};func (_dgac *textTable )reduceTiling (_gffb gridTiling ,_dcea float64 )*textTable {_ccda :=make ([]int ,0,_dgac ._becb );_afc :=make ([]int ,0,_dgac ._eafbb );_cafd :=_gffb ._degd ;_gaafe :=_gffb ._bcgf ;for _ggbef :=0;_ggbef < _dgac ._becb ;_ggbef ++{_agcc :=_ggbef > 0&&_c .Abs (_gaafe [_ggbef -1]-_gaafe [_ggbef ])< _dcea &&_dgac .emptyRow (_ggbef );if !_agcc {_ccda =append (_ccda ,_ggbef );};};for _eacbcb :=0;_eacbcb < _dgac ._eafbb ;_eacbcb ++{_cddf :=_eacbcb < _dgac ._eafbb -1&&_c .Abs (_cafd [_eacbcb +1]-_cafd [_eacbcb ])< _dcea &&_dgac .emptyColumn (_eacbcb );if !_cddf {_afc =append (_afc ,_eacbcb );};};if len (_ccda )==_dgac ._becb &&len (_afc )==_dgac ._eafbb {return _dgac ;};_aege :=textTable {_fgegf :_dgac ._fgegf ,_eafbb :len (_afc ),_becb :len (_ccda ),_agcf :make (map[uint64 ]compositeCell ,len (_afc )*len (_ccda ))};if _egfe {_d .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_dgac ._eafbb ,_dgac ._becb ,len (_afc ),len (_ccda ));_d .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_afc );_d .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_ccda );};for _aacg ,_effa :=range _ccda {for _eecag ,_addcg :=range _afc {_acdg ,_badf :=_dgac .getComposite (_addcg ,_effa );if len (_acdg )==0{continue ;};if _egfe {_ec .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_eecag ,_aacg ,_addcg ,_effa ,_cecc (_acdg .merge ().text (),50));};_aege .putComposite (_eecag ,_aacg ,_acdg ,_badf );};};return &_aege ;};func (_cdga *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_fac :=make (map[int ]map[*textWord ]struct{},len (_cdga ._cda ));for _baac :=range _cdga ._cda {_fac [_baac ]=make (map[*textWord ]struct{});};return _fac ;};func (_bgcd rulingList )primaries ()[]float64 {_ebed :=make (map[float64 ]struct{},len (_bgcd ));for _ ,_ecfd :=range _bgcd {_ebed [_ecfd ._cbfd ]=struct{}{};};_badda :=make ([]float64 ,len (_ebed ));_eafb :=0;for _cfeba :=range _ebed {_badda [_eafb ]=_cfeba ;_eafb ++;};_ed .Float64s (_badda );return _badda ;};func _cgc (_fbcbd ,_ceae bounded )float64 {return _fbcbd .bbox ().Llx -_ceae .bbox ().Llx };func (_ddcge gridTile )numBorders ()int {_aafgg :=0;if _ddcge ._ffgb {_aafgg ++;};if _ddcge ._bbaa {_aafgg ++;};if _ddcge ._bade {_aafgg ++;};if _ddcge ._bbb {_aafgg ++;};return _aafgg ;};func (_ecbge rulingList )isActualGrid ()(rulingList ,bool ){_bdca ,_aagc :=_ecbge .augmentGrid ();if !(len (_bdca )>=_ccd +1&&len (_aagc )>=_fafa +1){if _ded {_d .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_bdca ),len (_aagc ),_ccd +1,_fafa +1);};return nil ,false ;};if _ded {_d .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_ecbge ,len (_bdca )>=2,len (_aagc )>=2,len (_bdca )>=2&&len (_aagc )>=2);for _ccce ,_bgbe :=range _ecbge {_ec .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_ccce ,_bgbe );};};if _gee {_gaef ,_gaaa :=_bdca [0],_bdca [len (_bdca )-1];_fedbe ,_beba :=_aagc [0],_aagc [len (_aagc )-1];if !(_cbggf (_gaef ._cbfd -_fedbe ._aef )&&_cbggf (_gaaa ._cbfd -_fedbe ._bcdag )&&_cbggf (_fedbe ._cbfd -_gaef ._bcdag )&&_cbggf (_beba ._cbfd -_gaef ._aef )){if _ded {_d .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_gaef ,_gaaa ,_fedbe ,_beba );};return nil ,false ;};}else {if !_bdca .aligned (){if _abbc {_d .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_bdca ));};return nil ,false ;};if !_aagc .aligned (){if _ded {_d .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_aagc ));};return nil ,false ;};};_gbca :=append (_bdca ,_aagc ...);return _gbca ,true ;};func _cabgg (_ccbaf []*textWord ,_ffcab *textWord )[]*textWord {for _efde ,_dafaa :=range _ccbaf {if _dafaa ==_ffcab {return _fdeg (_ccbaf ,_efde );};};_d .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_ffcab );return nil ;};
|
||
|
||
// String returns a description of `p`.
|
||
func (_cfb *textPara )String ()string {if _cfb ._eddf {return _ec .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_cfb .PdfRectangle );};_agdc :="";if _cfb ._bbd !=nil {_agdc =_ec .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_cfb ._bbd ._eafbb ,_cfb ._bbd ._becb );};return _ec .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_cfb .PdfRectangle ,_agdc ,len (_cfb ._beedd ),_cecc (_cfb .text (),50));};func (_eba *imageExtractContext )processOperand (_fgc *_f .ContentStreamOperation ,_aaf _f .GraphicsState ,_bf *_ecc .PdfPageResources )error {if _fgc .Operand =="\u0042\u0049"&&len (_fgc .Params )==1{_cea ,_eee :=_fgc .Params [0].(*_f .ContentStreamInlineImage );if !_eee {return nil ;};if _de ,_ab :=_gf .GetBoolVal (_cea .ImageMask );_ab {if _de &&!_eba ._ad .IncludeInlineStencilMasks {return nil ;};};return _eba .extractInlineImage (_cea ,_aaf ,_bf );}else if _fgc .Operand =="\u0044\u006f"&&len (_fgc .Params )==1{_fgcd ,_gfbc :=_gf .GetName (_fgc .Params [0]);if !_gfbc {_d .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _af ;};_ ,_acf :=_bf .GetXObjectByName (*_fgcd );switch _acf {case _ecc .XObjectTypeImage :return _eba .extractXObjectImage (_fgcd ,_aaf ,_bf );case _ecc .XObjectTypeForm :return _eba .extractFormImages (_fgcd ,_aaf ,_bf );};};return nil ;};func (_bebc paraList )findTables (_gaca []gridTiling )[]*textTable {_bebc .addNeighbours ();_ed .Slice (_bebc ,func (_cdgc ,_face int )bool {return _cdbe (_bebc [_cdgc ],_bebc [_face ])< 0});var _ggcg []*textTable ;if _bgbd {_egbg :=_bebc .findGridTables (_gaca );_ggcg =append (_ggcg ,_egbg ...);};if _dagb {_bcdb :=_bebc .findTextTables ();_ggcg =append (_ggcg ,_bcdb ...);};return _ggcg ;};func (_gcfa paraList )lines ()[]*textLine {var _dgae []*textLine ;for _ ,_bbeb :=range _gcfa {_dgae =append (_dgae ,_bbeb ._beedd ...);};return _dgae ;};func (_bddb *stateStack )push (_dab *textState ){_abab :=*_dab ;*_bddb =append (*_bddb ,&_abab )};func _ddda (_ebfa _ecc .PdfRectangle )*ruling {return &ruling {_cgbe :_fdcbd ,_cbfd :_ebfa .Ury ,_aef :_ebfa .Llx ,_bcdag :_ebfa .Urx };};func (_fccf *textPara )bbox ()_ecc .PdfRectangle {return _fccf .PdfRectangle };func (_dbca pathSection )bbox ()_ecc .PdfRectangle {_cfce :=_dbca ._bafa [0]._cada [0];_dga :=_ecc .PdfRectangle {Llx :_cfce .X ,Urx :_cfce .X ,Lly :_cfce .Y ,Ury :_cfce .Y };_gfaf :=func (_cfga _ag .Point ){if _cfga .X < _dga .Llx {_dga .Llx =_cfga .X ;}else if _cfga .X > _dga .Urx {_dga .Urx =_cfga .X ;};if _cfga .Y < _dga .Lly {_dga .Lly =_cfga .Y ;}else if _cfga .Y > _dga .Ury {_dga .Ury =_cfga .Y ;};};for _ ,_dggbc :=range _dbca ._bafa [0]._cada [1:]{_gfaf (_dggbc );};for _ ,_ebfd :=range _dbca ._bafa [1:]{for _ ,_ggab :=range _ebfd ._cada {_gfaf (_ggab );};};return _dga ;};func (_fcfgc intSet )add (_eacf int ){_fcfgc [_eacf ]=struct{}{}};func (_dcdcc *textTable )emptyRow (_abcbe int )bool {for _dddd :=0;_dddd < _dcdcc ._eafbb ;_dddd ++{_ddfg :=_dcdcc .get (_dddd ,_abcbe );if _ddfg !=nil &&_ddfg .text ()!=""{return false ;};};return true ;};
|
||
|
||
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
|
||
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
|
||
func (_abe *Extractor )ExtractTextWithStats ()(_cbg string ,_gbe int ,_eac int ,_dc error ){_acc ,_gbe ,_eac ,_dc :=_abe .ExtractPageText ();if _dc !=nil {return "",_gbe ,_eac ,_dc ;};return _acc .Text (),_gbe ,_eac ,nil ;};func _bege (_gfbf ,_cfggb int )int {if _gfbf < _cfggb {return _gfbf ;};return _cfggb ;};func (_fddg *shapesState )devicePoint (_ffbe ,_eafd float64 )_ag .Point {_gdbd :=_fddg ._ffae .Mult (_fddg ._baca );_ffbe ,_eafd =_gdbd .Transform (_ffbe ,_eafd );return _ag .NewPoint (_ffbe ,_eafd );};type intSet map[int ]struct{};func _gagc (_fdga *Extractor ,_eaea *_ecc .PdfPageResources ,_fce _f .GraphicsState ,_dgb *textState ,_gab *stateStack )*textObject {return &textObject {_gcbf :_fdga ,_fbb :_eaea ,_deb :_fce ,_dff :_gab ,_faa :_dgb ,_aac :_ag .IdentityMatrix (),_gbf :_ag .IdentityMatrix ()};};func _gbba (_ffdag []rulingList )(rulingList ,rulingList ){var _aegf rulingList ;for _ ,_beafc :=range _ffdag {_aegf =append (_aegf ,_beafc ...);};return _aegf .vertsHorzs ();};func (_gbdcb paraList )findTableGrid (_eccc gridTiling )(*textTable ,map[*textPara ]struct{}){_deacc :=len (_eccc ._degd );_geace :=len (_eccc ._bcgf );_daaf :=textTable {_fgegf :true ,_eafbb :_deacc ,_becb :_geace ,_cgfb :make (map[uint64 ]*textPara ,_deacc *_geace ),_agcf :make (map[uint64 ]compositeCell ,_deacc *_geace )};_fabac :=make (map[*textPara ]struct{});_edadge :=int ((1.0-_daaa )*float64 (_deacc *_geace ));_cgfcb :=0;if _eegg {_d .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_deacc ,_geace );};for _bbee ,_gbdbd :=range _eccc ._bcgf {_gafae ,_bedb :=_eccc ._aegg [_gbdbd ];if !_bedb {continue ;};for _gaage ,_affg :=range _eccc ._degd {_aged ,_bbde :=_gafae [_affg ];if !_bbde {continue ;};_gdbbb :=_gbdcb .inTile (_aged );if len (_gdbbb )==0{_cgfcb ++;if _cgfcb > _edadge {if _eegg {_d .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_cgfcb );};return nil ,nil ;};}else {_daaf .putComposite (_gaage ,_bbee ,_gdbbb ,_aged .PdfRectangle );for _ ,_fggf :=range _gdbbb {_fabac [_fggf ]=struct{}{};};};};};_dfec :=0;for _beceg :=0;_beceg < _deacc ;_beceg ++{_bgada :=_daaf .get (_beceg ,0);if _bgada ==nil ||!_bgada ._eddf {_dfec ++;};};if _dfec ==0{if _eegg {_d .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};return nil ,nil ;};_gdgd :=_daaf .reduceTiling (_eccc ,_fbff );_gdgd =_gdgd .subdivide ();return _gdgd ,_fabac ;};func (_fadg *wordBag )removeWord (_fbed *textWord ,_dbg int ){_gfab :=_fadg ._cda [_dbg ];_gfab =_cabgg (_gfab ,_fbed );if len (_gfab )==0{delete (_fadg ._cda ,_dbg );}else {_fadg ._cda [_dbg ]=_gfab ;};};func _gbgfd (_eddde *PageText )error {_cfagd :=_bge .GetLicenseKey ();if _cfagd !=nil &&_cfagd .IsLicensed ()||_dg {return nil ;};_ec .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");_ec .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");return _e .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};func (_ebgg paraList )writeText (_fgcc _be .Writer ){for _efcc ,_acbg :=range _ebgg {if _acbg ._eddf {continue ;};_acbg .writeText (_fgcc );if _efcc !=len (_ebgg )-1{if _ggdd (_acbg ,_ebgg [_efcc +1]){_fgcc .Write ([]byte ("\u0020"));}else {_fgcc .Write ([]byte ("\u000a"));_fgcc .Write ([]byte ("\u000a"));};};};_fgcc .Write ([]byte ("\u000a"));_fgcc .Write ([]byte ("\u000a"));};func _ccfb (_bggg _ag .Point )_ag .Matrix {return _ag .TranslationMatrix (_bggg .X ,_bggg .Y )};func _beef (_cdea func (*wordBag ,*textWord ,float64 )bool ,_egfg float64 )func (*wordBag ,*textWord )bool {return func (_ddgf *wordBag ,_eccdg *textWord )bool {return _cdea (_ddgf ,_eccdg ,_egfg )};};func _abgf (_bgcg map[int ][]float64 )string {_ccdb :=_gabeea (_bgcg );_aegccb :=make ([]string ,len (_bgcg ));for _edafc ,_aabcb :=range _ccdb {_aegccb [_edafc ]=_ec .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_aabcb ,_bgcg [_aabcb ]);};return _ec .Sprintf ("\u007b\u0025\u0073\u007d",_ae .Join (_aegccb ,"\u002c\u0020"));};func _bebg (_eggf map[float64 ]map[float64 ]gridTile )[]float64 {_edfa :=make ([]float64 ,0,len (_eggf ));_faee :=make (map[float64 ]struct{},len (_eggf ));for _ ,_bfde :=range _eggf {for _bcdee :=range _bfde {if _ ,_agfb :=_faee [_bcdee ];_agfb {continue ;};_edfa =append (_edfa ,_bcdee );_faee [_bcdee ]=struct{}{};};};_ed .Float64s (_edfa );return _edfa ;};
|
||
|
||
// New returns an Extractor instance for extracting content from the input PDF page.
|
||
func New (page *_ecc .PdfPage )(*Extractor ,error ){_aa ,_bgf :=page .GetAllContentStreams ();if _bgf !=nil {return nil ,_bgf ;};_ece ,_bgf :=page .GetMediaBox ();if _bgf !=nil {return nil ,_ec .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_bgf );};_aec :=&Extractor {_gfg :_aa ,_ge :page .Resources ,_cb :*_ece ,_cbc :map[string ]fontEntry {},_eb :map[string ]textResult {}};if _aec ._cb .Llx > _aec ._cb .Urx {_d .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_aec ._cb );_aec ._cb .Llx ,_aec ._cb .Urx =_aec ._cb .Urx ,_aec ._cb .Llx ;};if _aec ._cb .Lly > _aec ._cb .Ury {_d .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_aec ._cb );_aec ._cb .Lly ,_aec ._cb .Ury =_aec ._cb .Ury ,_aec ._cb .Lly ;};return _aec ,nil ;};func (_gfde *wordBag )scanBand (_bcda string ,_adgg *wordBag ,_cfac func (_aecf *wordBag ,_bff *textWord )bool ,_fgb ,_aaec ,_gfcc float64 ,_beab ,_ebfe bool )int {_bcbd :=_adgg ._gcccc ;var _gfgc map[int ]map[*textWord ]struct{};if !_beab {_gfgc =_gfde .makeRemovals ();};_bggc :=_fgdg *_bcbd ;_bgce :=0;for _ ,_cgafc :=range _gfde .depthBand (_fgb -_bggc ,_aaec +_bggc ){if len (_gfde ._cda [_cgafc ])==0{continue ;};for _ ,_gged :=range _gfde ._cda [_cgafc ]{if !(_fgb -_bggc <=_gged ._fedcd &&_gged ._fedcd <=_aaec +_bggc ){continue ;};if !_cfac (_adgg ,_gged ){continue ;};_fgac :=2.0*_c .Abs (_gged ._agagbg -_adgg ._gcccc )/(_gged ._agagbg +_adgg ._gcccc );_bcfgb :=_c .Max (_gged ._agagbg /_adgg ._gcccc ,_adgg ._gcccc /_gged ._agagbg );_cgd :=_c .Min (_fgac ,_bcfgb );if _gfcc > 0&&_cgd > _gfcc {continue ;};if _adgg .blocked (_gged ){continue ;};if !_beab {_adgg .pullWord (_gged ,_cgafc ,_gfgc );};_bgce ++;if !_ebfe {if _gged ._fedcd < _fgb {_fgb =_gged ._fedcd ;};if _gged ._fedcd > _aaec {_aaec =_gged ._fedcd ;};};if _beab {break ;};};};if !_beab {_gfde .applyRemovals (_gfgc );};return _bgce ;};func (_bdaac *textTable )putComposite (_gagcff ,_aggc int ,_fgfab paraList ,_bcedb _ecc .PdfRectangle ){if len (_fgfab )==0{_d .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073");return ;};_begg :=compositeCell {_bcedb ,_fgfab };if _egfe {_ec .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_gagcff ,_aggc ,_begg .String ());};_begg .updateBBox ();_bdaac ._agcf [_adef (_gagcff ,_aggc )]=_begg ;};
|
||
|
||
// String returns a human readable description of `path`.
|
||
func (_aadg *subpath )String ()string {_aecg :=_aadg ._cada ;_geadd :=len (_aecg );if _geadd <=5{return _ec .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_geadd ,_aecg );};return _ec .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_geadd ,_aecg [0],_aecg [1],_aecg [_geadd -1]);};func (_cadag rectRuling )asRuling ()(*ruling ,bool ){_eceb :=ruling {_cgbe :_cadag ._dbafd ,Color :_cadag .Color ,_fcce :_daag };switch _cadag ._dbafd {case _ccgf :_eceb ._cbfd =0.5*(_cadag .Llx +_cadag .Urx );_eceb ._aef =_cadag .Lly ;_eceb ._bcdag =_cadag .Ury ;_ged ,_fbcda :=_cadag .checkWidth (_cadag .Llx ,_cadag .Urx );if !_fbcda {if _aea {_d .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_cadag );};return nil ,false ;};_eceb ._gfcb =_ged ;case _fdcbd :_eceb ._cbfd =0.5*(_cadag .Lly +_cadag .Ury );_eceb ._aef =_cadag .Llx ;_eceb ._bcdag =_cadag .Urx ;_bdbc ,_cbgba :=_cadag .checkWidth (_cadag .Lly ,_cadag .Ury );if !_cbgba {if _aea {_d .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_cadag );};return nil ,false ;};_eceb ._gfcb =_bdbc ;default:_d .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_cadag ._dbafd );return nil ,false ;};return &_eceb ,true ;};const (RenderModeStroke RenderMode =1<<iota ;RenderModeFill ;RenderModeClip ;);func (_cfgg *textTable )bbox ()_ecc .PdfRectangle {return _cfgg .PdfRectangle };func (_gdbb *textMark )bbox ()_ecc .PdfRectangle {return _gdbb .PdfRectangle };func (_bbae compositeCell )parasBBox ()(paraList ,_ecc .PdfRectangle ){return _bbae .paraList ,_bbae .PdfRectangle ;};func (_bbcba rectRuling )checkWidth (_dfgg ,_fbaf float64 )(float64 ,bool ){_aeeg :=_fbaf -_dfgg ;_ffaee :=_aeeg <=_fbac ;return _aeeg ,_ffaee ;};func _cbed (_dcded []_gf .PdfObject )(_aeaf ,_deec float64 ,_cbbbe error ){if len (_dcded )!=2{return 0,0,_ec .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_dcded ));};_bccb ,_cbbbe :=_gf .GetNumbersAsFloat (_dcded );if _cbbbe !=nil {return 0,0,_cbbbe ;};return _bccb [0],_bccb [1],nil ;};func _efdf (_gefb ,_bdeb int )int {if _gefb > _bdeb {return _gefb ;};return _bdeb ;};func (_dgdd *wordBag )minDepth ()float64 {return _dgdd ._baeg -(_dgdd .Ury -_dgdd ._gcccc )};func _egdbb (_ffadf float64 )float64 {return _cafc *_c .Round (_ffadf /_cafc )};type rulingList []*ruling ;func _ebdee (_effg []float64 ,_bced ,_adfb float64 )[]float64 {_fgbe ,_dbbe :=_bced ,_adfb ;if _dbbe < _fgbe {_fgbe ,_dbbe =_dbbe ,_fgbe ;};_acea :=make ([]float64 ,0,len (_effg )+2);_acea =append (_acea ,_bced );for _ ,_fcfeb :=range _effg {if _fcfeb <=_fgbe {continue ;}else if _fcfeb >=_dbbe {break ;};_acea =append (_acea ,_fcfeb );};_acea =append (_acea ,_adfb );return _acea ;};
|
||
|
||
// ExtractPageImages returns the image contents of the page extractor, including data
|
||
// and position, size information for each image.
|
||
// A set of options to control page image extraction can be passed in. The options
|
||
// parameter can be nil for the default options. By default, inline stencil masks
|
||
// are not extracted.
|
||
func (_fg *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_gfb :=&imageExtractContext {_ad :options };_fa :=_gfb .extractContentStreamImages (_fg ._gfg ,_fg ._ge );if _fa !=nil {return nil ,_fa ;};return &PageImages {Images :_gfb ._bgeg },nil ;};func _fdeg (_fffae []*textWord ,_bcgcd int )[]*textWord {_gegf :=len (_fffae );copy (_fffae [_bcgcd :],_fffae [_bcgcd +1:]);return _fffae [:_gegf -1];};func (_egde *ruling )intersects (_dagc *ruling )bool {_ffbc :=(_egde ._cgbe ==_ccgf &&_dagc ._cgbe ==_fdcbd )||(_dagc ._cgbe ==_ccgf &&_egde ._cgbe ==_fdcbd );_ebbd :=func (_dabga ,_gcde *ruling )bool {return _dabga ._aef -_gfdcf <=_gcde ._cbfd &&_gcde ._cbfd <=_dabga ._bcdag +_gfdcf ;};_bbff :=_ebbd (_egde ,_dagc );_fgbag :=_ebbd (_dagc ,_egde );if _ded {_ec .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_ffbc ,_bbff ,_fgbag ,_ffbc &&_bbff &&_fgbag ,_egde ,_dagc );};return _ffbc &&_bbff &&_fgbag ;};type imageExtractContext struct{_bgeg []ImageMark ;_eeb int ;_fd int ;_cag int ;_agf map[*_gf .PdfObjectStream ]*cachedImage ;_ad *ImageExtractOptions ;};func (_eaaf *ruling )encloses (_acga ,_gdfe float64 )bool {return _eaaf ._aef -_gfdcf <=_acga &&_gdfe <=_eaaf ._bcdag +_gfdcf ;};func (_eecfc rulingList )intersections ()map[int ]intSet {var _cfff ,_aabff []int ;for _gdeb ,_fbebe :=range _eecfc {switch _fbebe ._cgbe {case _ccgf :_cfff =append (_cfff ,_gdeb );case _fdcbd :_aabff =append (_aabff ,_gdeb );};};if len (_cfff )< _ccd +1||len (_aabff )< _fafa +1{return nil ;};if len (_cfff )+len (_aabff )> _cagf {_d .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_eecfc ),len (_cfff ),len (_aabff ));return nil ;};_eagf :=make (map[int ]intSet ,len (_cfff )+len (_aabff ));for _ ,_dcdb :=range _cfff {for _ ,_gccag :=range _aabff {if _eecfc [_dcdb ].intersects (_eecfc [_gccag ]){if _ ,_bddba :=_eagf [_dcdb ];!_bddba {_eagf [_dcdb ]=make (intSet );};if _ ,_deeba :=_eagf [_gccag ];!_deeba {_eagf [_gccag ]=make (intSet );};_eagf [_dcdb ].add (_gccag );_eagf [_gccag ].add (_dcdb );};};};return _eagf ;};func _gce (_bdfd bounded )float64 {return -_bdfd .bbox ().Lly };func _dddc (_acde string )string {_fafbc :=[]rune (_acde );return string (_fafbc [:len (_fafbc )-1])};
|
||
|
||
// String returns a description of `l`.
|
||
func (_badc *textLine )String ()string {return _ec .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_badc ._eccbg ,_badc .PdfRectangle ,_badc ._fbgf ,_badc .text ());};func (_ead *PageText )computeViews (){var _afafc rulingList ;if _gcca {_fffe :=_fafe (_ead ._bdde );_afafc =append (_afafc ,_fffe ...);};if _fbec {_cbe :=_ebgd (_ead ._dea );_afafc =append (_afafc ,_cbe ...);};_afafc ,_edga :=_afafc .toTilings ();var _daf paraList ;_deeb :=len (_ead ._adg );for _agfa :=0;_agfa < 360&&_deeb > 0;_agfa +=90{_eeda :=make ([]*textMark ,0,len (_ead ._adg )-_deeb );for _ ,_cdc :=range _ead ._adg {if _cdc ._dbfb ==_agfa {_eeda =append (_eeda ,_cdc );};};if len (_eeda )> 0{_ffc :=_ffgaf (_eeda ,_ead ._bbcfe ,_afafc ,_edga );_daf =append (_daf ,_ffc ...);_deeb -=len (_eeda );};};_gfgd :=new (_cf .Buffer );_daf .writeText (_gfgd );_ead ._edc =_gfgd .String ();_ead ._faff =_daf .toTextMarks ();_ead ._ddd =_daf .tables ();if _egfe {_d .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_ead ._ddd ));};};
|
||
|
||
// String returns a description of `v`.
|
||
func (_aabc *ruling )String ()string {if _aabc ._cgbe ==_afaa {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_agfg ,_bggfb :="\u0078","\u0079";if _aabc ._cgbe ==_fdcbd {_agfg ,_bggfb ="\u0079","\u0078";};_ebcd :="";if _aabc ._gfcb !=0.0{_ebcd =_ec .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_aabc ._gfcb );};return _ec .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_aabc ._cgbe ,_agfg ,_aabc ._cbfd ,_bggfb ,_aabc ._aef ,_aabc ._bcdag ,_aabc ._bcdag -_aabc ._aef ,_aabc ._fcce ,_aabc .Color ,_ebcd );};func (_gbfg *wordBag )firstWord (_fabf int )*textWord {return _gbfg ._cda [_fabf ][0]};func (_cgdfc gridTile )complete ()bool {return _cgdfc .numBorders ()==4};func (_fbgffb rulingList )snapToGroupsDirection ()rulingList {_fbgffb .sortStrict ();_eeeg :=make (map[*ruling ]rulingList ,len (_fbgffb ));_dcbb :=_fbgffb [0];_ccfbc :=func (_dcba *ruling ){_dcbb =_dcba ;_eeeg [_dcbb ]=rulingList {_dcba }};_ccfbc (_fbgffb [0]);for _ ,_gdfg :=range _fbgffb [1:]{if _gdfg ._cbfd < _dcbb ._cbfd -_dfbb {_d .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_dcbb ,_gdfg );};if _gdfg ._cbfd > _dcbb ._cbfd +_fbac {_ccfbc (_gdfg );}else {_eeeg [_dcbb ]=append (_eeeg [_dcbb ],_gdfg );};};_aafff :=make (map[*ruling ]float64 ,len (_eeeg ));_acecb :=make (map[*ruling ]*ruling ,len (_fbgffb ));for _eacbg ,_befg :=range _eeeg {_aafff [_eacbg ]=_befg .mergePrimary ();for _ ,_bfbb :=range _befg {_acecb [_bfbb ]=_eacbg ;};};for _ ,_bagde :=range _fbgffb {_bagde ._cbfd =_aafff [_acecb [_bagde ]];};_babga :=make (rulingList ,0,len (_fbgffb ));for _ ,_ddgbb :=range _eeeg {_gbga :=_ddgbb .splitSec ();for _ceaaa ,_fabe :=range _gbga {_fgfb :=_fabe .merge ();if len (_babga )> 0{_bdcg :=_babga [len (_babga )-1];if _bdcg .alignsPrimary (_fgfb )&&_bdcg .alignsSec (_fgfb ){_d .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_ceaaa ,_bdcg ,_fgfb );continue ;};};_babga =append (_babga ,_fgfb );};};_babga .sortStrict ();return _babga ;};func (_gaag compositeCell )hasLines (_fbaa []*textLine )bool {for _fgg ,_abdf :=range _fbaa {_dcga :=_aaeb (_gaag .PdfRectangle ,_abdf .PdfRectangle );if _egfe {_ec .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a",_dcga ,_fgg ,len (_fbaa ));_ec .Printf ("\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a",_gaag );_ec .Printf ("\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a",_abdf );};if _dcga {return true ;};};return false ;};type subpath struct{_cada []_ag .Point ;_eagd bool ;};func _gabeea (_aabfb map[int ][]float64 )[]int {_bbeg :=make ([]int ,len (_aabfb ));_cabd :=0;for _beecb :=range _aabfb {_bbeg [_cabd ]=_beecb ;_cabd ++;};_ed .Ints (_bbeg );return _bbeg ;};var _ecbd =map[rulingKind ]string {_afaa :"\u006e\u006f\u006e\u0065",_fdcbd :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_ccgf :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};func (_abaf *stateStack )empty ()bool {return len (*_abaf )==0};
|
||
|
||
// TableCell is a cell in a TextTable.
|
||
type TableCell struct{
|
||
|
||
// Text is the extracted text.
|
||
Text string ;
|
||
|
||
// Marks returns the TextMarks corresponding to the text in Text.
|
||
Marks TextMarkArray ;};func (_dcce paraList )findGridTables (_ebda []gridTiling )[]*textTable {if _egfe {_d .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_dcce ));for _gdac ,_bcfd :=range _dcce {_ec .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gdac ,_bcfd );};};var _dacc []*textTable ;for _bafag ,_bffb :=range _ebda {_dageb ,_gcdef :=_dcce .findTableGrid (_bffb );if _dageb !=nil {_dageb .log (_ec .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_bafag ));_dacc =append (_dacc ,_dageb );_dageb .markCells ();};for _gcea :=range _gcdef {_gcea ._bbcfc =true ;};};if _egfe {_d .Log .Info ("\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s",len (_dacc ));};return _dacc ;};func (_edgb *shapesState )drawRectangle (_fbga ,_eade ,_edce ,_fdbbc float64 ){if _fbdd {_ffga :=_edgb .devicePoint (_fbga ,_eade );_aggg :=_edgb .devicePoint (_fbga +_edce ,_eade +_fdbbc );_bfc :=_ecc .PdfRectangle {Llx :_ffga .X ,Lly :_ffga .Y ,Urx :_aggg .X ,Ury :_aggg .Y };_d .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_bfc );};_edgb .newSubPath ();_edgb .moveTo (_fbga ,_eade );_edgb .lineTo (_fbga +_edce ,_eade );_edgb .lineTo (_fbga +_edce ,_eade +_fdbbc );_edgb .lineTo (_fbga ,_eade +_fdbbc );_edgb .closePath ();};const _fcff =10;func _aafe (_dgga ,_cacb _ecc .PdfRectangle )(_ecc .PdfRectangle ,bool ){if !_aaeb (_dgga ,_cacb ){return _ecc .PdfRectangle {},false ;};return _ecc .PdfRectangle {Llx :_c .Max (_dgga .Llx ,_cacb .Llx ),Urx :_c .Min (_dgga .Urx ,_cacb .Urx ),Lly :_c .Max (_dgga .Lly ,_cacb .Lly ),Ury :_c .Min (_dgga .Ury ,_cacb .Ury )},true ;};func _cde (_ggd *_f .ContentStreamOperation )(float64 ,error ){if len (_ggd .Params )!=1{_fgcf :=_e .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");_d .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_ggd .Operand ,1,len (_ggd .Params ),_ggd .Params );return 0.0,_fgcf ;};return _gf .GetNumberAsFloat (_ggd .Params [0]);};func _aaaggf (_fcee map[float64 ]map[float64 ]gridTile )[]float64 {_dbge :=make ([]float64 ,0,len (_fcee ));for _ecea :=range _fcee {_dbge =append (_dbge ,_ecea );};_ed .Float64s (_dbge );_cegg :=len (_dbge );for _affcd :=0;_affcd < _cegg /2;_affcd ++{_dbge [_affcd ],_dbge [_cegg -1-_affcd ]=_dbge [_cegg -1-_affcd ],_dbge [_affcd ];};return _dbge ;};
|
||
|
||
// ToText returns the page text as a single string.
|
||
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
|
||
// Text() instead.
|
||
func (_acg PageText )ToText ()string {return _acg .Text ()};func (_eef *textObject )setTextLeading (_dae float64 ){if _eef ==nil {return ;};_eef ._faa ._bbgf =_dae ;};const (_afaa rulingKind =iota ;_fdcbd ;_ccgf ;);func (_dbbc *textPara )depth ()float64 {if _dbbc ._eddf {return -1.0;};if len (_dbbc ._beedd )> 0{return _dbbc ._beedd [0]._eccbg ;};return _dbbc ._bbd .depth ();};func (_eafeb paraList )xNeighbours (_fcbb float64 )map[*textPara ][]int {_agagea :=make ([]event ,2*len (_eafeb ));if _fcbb ==0{for _dcca ,_fbdee :=range _eafeb {_agagea [2*_dcca ]=event {_fbdee .Llx ,true ,_dcca };_agagea [2*_dcca +1]=event {_fbdee .Urx ,false ,_dcca };};}else {for _bebad ,_cadge :=range _eafeb {_agagea [2*_bebad ]=event {_cadge .Llx -_fcbb *_cadge .fontsize (),true ,_bebad };_agagea [2*_bebad +1]=event {_cadge .Urx +_fcbb *_cadge .fontsize (),false ,_bebad };};};return _eafeb .eventNeighbours (_agagea );};func (_gfbb *wordBag )maxDepth ()float64 {return _gfbb ._baeg -_gfbb .Lly };func (_gdbce *textTable )getDown ()paraList {_ffec :=make (paraList ,_gdbce ._eafbb );for _eeafg :=0;_eeafg < _gdbce ._eafbb ;_eeafg ++{_ggacb :=_gdbce .get (_eeafg ,_gdbce ._becb -1)._ddfea ;if _ggacb ==nil ||_ggacb ._bbcfc {return nil ;};_ffec [_eeafg ]=_ggacb ;};for _dafc :=0;_dafc < _gdbce ._eafbb -1;_dafc ++{if _ffec [_dafc ]._feag !=_ffec [_dafc +1]{return nil ;};};return _ffec ;}; |