unipdf/extractor/extractor.go

842 lines
186 KiB
Go
Raw Normal View History

2020-08-27 21:45:09 +00:00
//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
2020-08-27 21:45:09 +00:00
//
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
//
2022-06-27 19:58:38 +00:00
package extractor ;import (_cde "bytes";_f "errors";_be "fmt";_eg "github.com/unidoc/unipdf/v3/common";_cce "github.com/unidoc/unipdf/v3/contentstream";_db "github.com/unidoc/unipdf/v3/core";_ba "github.com/unidoc/unipdf/v3/internal/license";_a "github.com/unidoc/unipdf/v3/internal/textencoding";
_g "github.com/unidoc/unipdf/v3/internal/transform";_bd "github.com/unidoc/unipdf/v3/model";_fd "golang.org/x/text/unicode/norm";_fg "golang.org/x/xerrors";_fb "image/color";_d "io";_bf "math";_e "regexp";_cc "sort";_dc "strings";_cd "unicode";_c "unicode/utf8";
);func _bbbca (_gcade []*textWord ,_gefe int )[]*textWord {_bdgec :=len (_gcade );copy (_gcade [_gefe :],_gcade [_gefe +1:]);return _gcade [:_bdgec -1];};func (_edegf *wordBag )empty (_cbba int )bool {_ ,_gee :=_edegf ._fadg [_cbba ];return !_gee };func _ccbff (_fdgab _bd .PdfRectangle )rulingKind {_aaed :=_fdgab .Width ();
_cbdd :=_fdgab .Height ();if _aaed > _cbdd {if _aaed >=_cafe {return _ddga ;};}else {if _cbdd >=_cafe {return _gaba ;};};return _eedb ;};func (_bged *textPara )writeCellText (_cceb _d .Writer ){for _baacf ,_dbbe :=range _bged ._gadg {_gacfe :=_dbbe .text ();
_cgcg :=_addg &&_dbbe .endsInHyphen ()&&_baacf !=len (_bged ._gadg )-1;if _cgcg {_gacfe =_bfe (_gacfe );};_cceb .Write ([]byte (_gacfe ));if !(_cgcg ||_baacf ==len (_bged ._gadg )-1){_cceb .Write ([]byte (_begc (_dbbe ._gddec ,_bged ._gadg [_baacf +1]._gddec )));
};};};func (_afbfe paraList )findTables (_fggc []gridTiling )[]*textTable {_afbfe .addNeighbours ();_cc .Slice (_afbfe ,func (_cafec ,_bbbdff int )bool {return _fdcf (_afbfe [_cafec ],_afbfe [_bbbdff ])< 0});var _faded []*textTable ;if _adbe {_gcgd :=_afbfe .findGridTables (_fggc );
_faded =append (_faded ,_gcgd ...);};if _aaeaf {_cceg :=_afbfe .findTextTables ();_faded =append (_faded ,_cceg ...);};return _faded ;};func (_bab *textObject )setWordSpacing (_dad float64 ){if _bab ==nil {return ;};_bab ._dff ._fec =_dad ;};func _gade (_bcba _bd .PdfRectangle ,_efbfd []*textLine )*textPara {return &textPara {PdfRectangle :_bcba ,_gadg :_efbfd };
2022-04-27 00:10:33 +00:00
};
2022-06-27 19:58:38 +00:00
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
type PageImages struct{Images []ImageMark ;};func (_ecee *wordBag )sort (){for _ ,_dggb :=range _ecee ._fadg {_cc .Slice (_dggb ,func (_ddcg ,_afba int )bool {return _afdf (_dggb [_ddcg ],_dggb [_afba ])< 0});};};func (_ddbad *textTable )getComposite (_gcegg ,_ccgf int )(paraList ,_bd .PdfRectangle ){_cbbb ,_fdaae :=_ddbad ._agga [_bddbg (_gcegg ,_ccgf )];
if _eeca {_be .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a",_gcegg ,_ccgf ,_cbbb .String ());};if !_fdaae {return nil ,_bd .PdfRectangle {};
};return _cbbb .parasBBox ();};type ruling struct{_gggf rulingKind ;_beaec markKind ;_fb .Color ;_ccb float64 ;_gaad float64 ;_gdaf float64 ;_cfbf float64 ;};func _cecgg (_fefg ,_bdcc float64 )bool {return _bf .Abs (_fefg -_bdcc )<=_cebae };func _fdcf (_gfbg ,_edag bounded )float64 {_gdag :=_afdf (_gfbg ,_edag );
if !_bcaga (_gdag ){return _gdag ;};return _ebbf (_gfbg ,_edag );};func (_aadb *stateStack )push (_cebd *textState ){_cgfd :=*_cebd ;*_aadb =append (*_aadb ,&_cgfd )};func (_fbeg compositeCell )split (_adea ,_cbece []float64 )*textTable {_cdee :=len (_adea )+1;
_fedb :=len (_cbece )+1;if _eeca {_eg .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066",_fedb ,_cdee ,_fbeg ,_adea ,_cbece );
_be .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a",len (_fbeg .paraList ));for _edfcb ,_adad :=range _fbeg .paraList {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_edfcb ,_adad .String ());};
_be .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",len (_fbeg .lines ()));for _gcfd ,_eeec :=range _fbeg .lines (){_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gcfd ,_eeec );};};_adea =_abfgd (_adea ,_fbeg .Ury ,_fbeg .Lly );
_cbece =_abfgd (_cbece ,_fbeg .Llx ,_fbeg .Urx );_fdbd :=make (map[uint64 ]*textPara ,_fedb *_cdee );_ecce :=textTable {_bgcfb :_fedb ,_gccb :_cdee ,_deedc :_fdbd };_aaec :=_fbeg .paraList ;_cc .Slice (_aaec ,func (_bfeg ,_abfgg int )bool {_aggf ,_aagdg :=_aaec [_bfeg ],_aaec [_abfgg ];
_dbgg ,_dcdgc :=_aggf .Lly ,_aagdg .Lly ;if _dbgg !=_dcdgc {return _dbgg < _dcdgc ;};return _aggf .Llx < _aagdg .Llx ;});_ccfa :=make (map[uint64 ]_bd .PdfRectangle ,_fedb *_cdee );for _becfb ,_eccgf :=range _adea [1:]{_dgeg :=_adea [_becfb ];for _bgag ,_abaeg :=range _cbece [1:]{_dgade :=_cbece [_bgag ];
_ccfa [_bddbg (_bgag ,_becfb )]=_bd .PdfRectangle {Llx :_dgade ,Urx :_abaeg ,Lly :_eccgf ,Ury :_dgeg };};};if _eeca {_eg .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073");
_be .Printf ("\u0020\u0020\u0020\u0020");for _dfec :=0;_dfec < _fedb ;_dfec ++{_be .Printf ("\u0025\u0033\u0030\u0064\u002c\u0020",_dfec );};_be .Println ();for _eaegae :=0;_eaegae < _cdee ;_eaegae ++{_be .Printf ("\u0020\u0020\u0025\u0032\u0064\u003a",_eaegae );
for _caec :=0;_caec < _fedb ;_caec ++{_be .Printf ("\u00256\u002e\u0032\u0066\u002c\u0020",_ccfa [_bddbg (_caec ,_eaegae )]);};_be .Println ();};};_gcdg :=func (_fefa *textLine )(int ,int ){for _cdcd :=0;_cdcd < _cdee ;_cdcd ++{for _caff :=0;_caff < _fedb ;
_caff ++{if _gbae (_ccfa [_bddbg (_caff ,_cdcd )],_fefa .PdfRectangle ){return _caff ,_cdcd ;};};};return -1,-1;};_egaa :=make (map[uint64 ][]*textLine ,_fedb *_cdee );for _ ,_bdbc :=range _aaec .lines (){_bdge ,_cegb :=_gcdg (_bdbc );if _bdge < 0{continue ;
};_egaa [_bddbg (_bdge ,_cegb )]=append (_egaa [_bddbg (_bdge ,_cegb )],_bdbc );};for _eaaf :=0;_eaaf < len (_adea )-1;_eaaf ++{_eeece :=_adea [_eaaf ];_gfdc :=_adea [_eaaf +1];for _bgfa :=0;_bgfa < len (_cbece )-1;_bgfa ++{_acef :=_cbece [_bgfa ];_beae :=_cbece [_bgfa +1];
_febg :=_bd .PdfRectangle {Llx :_acef ,Urx :_beae ,Lly :_gfdc ,Ury :_eeece };_ddde :=_egaa [_bddbg (_bgfa ,_eaaf )];if len (_ddde )==0{continue ;};_dceb :=_gade (_febg ,_ddde );_ecce .put (_bgfa ,_eaaf ,_dceb );};};return &_ecce ;};
2022-04-27 00:10:33 +00:00
2022-06-06 22:48:24 +00:00
// Extractor stores and offers functionality for extracting content from PDF pages.
2022-06-27 19:58:38 +00:00
type Extractor struct{_da string ;_dbb *_bd .PdfPageResources ;_dd _bd .PdfRectangle ;_bg map[string ]fontEntry ;_dg map[string ]textResult ;_gb int64 ;_dbf int ;};
2022-04-27 00:10:33 +00:00
2022-06-27 19:58:38 +00:00
// String returns a description of `w`.
func (_cedc *textWord )String ()string {return _be .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_cedc ._acag ,_cedc .PdfRectangle ,_cedc ._efag ,_cedc ._debad );
};const _gcg =1.0/1000.0;func (_dgc *wordBag )depthIndexes ()[]int {if len (_dgc ._fadg )==0{return nil ;};_gcbg :=make ([]int ,len (_dgc ._fadg ));_dab :=0;for _gcbgb :=range _dgc ._fadg {_gcbg [_dab ]=_gcbgb ;_dab ++;};_cc .Ints (_gcbg );return _gcbg ;
};func _aagde (_eegdd map[int ]intSet )[]int {_bcged :=make ([]int ,0,len (_eegdd ));for _cebbdd :=range _eegdd {_bcged =append (_bcged ,_cebbdd );};_cc .Ints (_bcged );return _bcged ;};func (_eaa *textObject )setTextLeading (_eae float64 ){if _eaa ==nil {return ;
};_eaa ._dff ._dde =_eae ;};func _beage (_ffea _bd .PdfRectangle ,_daabd ,_aaeb ,_dfbcaf ,_fcbd *ruling )gridTile {_fcea :=_ffea .Llx ;_aefc :=_ffea .Urx ;_debagb :=_ffea .Lly ;_dagbd :=_ffea .Ury ;return gridTile {PdfRectangle :_ffea ,_fgbc :_daabd !=nil &&_daabd .encloses (_debagb ,_dagbd ),_eafc :_aaeb !=nil &&_aaeb .encloses (_debagb ,_dagbd ),_ccadg :_dfbcaf !=nil &&_dfbcaf .encloses (_fcea ,_aefc ),_bfgeab :_fcbd !=nil &&_fcbd .encloses (_fcea ,_aefc )};
};func (_fede *wordBag )firstReadingIndex (_cabgd int )int {_dfdg :=_fede .firstWord (_cabgd )._efag ;_dee :=float64 (_cabgd +1)*_abab ;_cgfg :=_dee +_ggba *_dfdg ;_cdeb :=_cabgd ;for _ ,_afde :=range _fede .depthBand (_dee ,_cgfg ){if _afdf (_fede .firstWord (_afde ),_fede .firstWord (_cdeb ))< 0{_cdeb =_afde ;
};};return _cdeb ;};func (_gbedg *textWord )absorb (_fdageb *textWord ){_gbedg .PdfRectangle =_gcff (_gbedg .PdfRectangle ,_fdageb .PdfRectangle );_gbedg ._bbacg =append (_gbedg ._bbacg ,_fdageb ._bbacg ...);};func (_ddgc pathSection )bbox ()_bd .PdfRectangle {_gba :=_ddgc ._aga [0]._gcbb [0];
_dcac :=_bd .PdfRectangle {Llx :_gba .X ,Urx :_gba .X ,Lly :_gba .Y ,Ury :_gba .Y };_bffa :=func (_eaag _g .Point ){if _eaag .X < _dcac .Llx {_dcac .Llx =_eaag .X ;}else if _eaag .X > _dcac .Urx {_dcac .Urx =_eaag .X ;};if _eaag .Y < _dcac .Lly {_dcac .Lly =_eaag .Y ;
}else if _eaag .Y > _dcac .Ury {_dcac .Ury =_eaag .Y ;};};for _ ,_bgcf :=range _ddgc ._aga [0]._gcbb [1:]{_bffa (_bgcf );};for _ ,_fade :=range _ddgc ._aga [1:]{for _ ,_bebb :=range _fade ._gcbb {_bffa (_bebb );};};return _dcac ;};func _eaab (_gbc func (*wordBag ,*textWord ,float64 )bool ,_eddb float64 )func (*wordBag ,*textWord )bool {return func (_daae *wordBag ,_cfb *textWord )bool {return _gbc (_daae ,_cfb ,_eddb )};
};func _gddeb (_aegdg map[float64 ]gridTile )[]float64 {_dcge :=make ([]float64 ,0,len (_aegdg ));for _eagb :=range _aegdg {_dcge =append (_dcge ,_eagb );};_cc .Float64s (_dcge );return _dcge ;};func _cbfbe (_begb ,_bffb int )int {if _begb < _bffb {return _begb ;
};return _bffb ;};func (_ddcf *shapesState )addPoint (_ddba ,_eagef float64 ){_cdgd :=_ddcf .establishSubpath ();_ecgb :=_ddcf .devicePoint (_ddba ,_eagef );if _cdgd ==nil {_ddcf ._beg =true ;_ddcf ._eee =_ecgb ;}else {_cdgd .add (_ecgb );};};func _fafbc (_ecdaf ,_cdge _g .Point )bool {return _ecdaf .X ==_cdge .X &&_ecdaf .Y ==_cdge .Y };
func (_ddab *wordBag )absorb (_degea *wordBag ){_dgbb :=_degea .makeRemovals ();for _fddg ,_gbgc :=range _degea ._fadg {for _ ,_afga :=range _gbgc {_ddab .pullWord (_afga ,_fddg ,_dgbb );};};_degea .applyRemovals (_dgbb );};
2022-04-27 00:10:33 +00:00
2022-06-27 19:58:38 +00:00
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct{_ggbg []TextMark };
2022-04-27 00:10:33 +00:00
// String returns a string describing `ma`.
2022-06-27 19:58:38 +00:00
func (_fega TextMarkArray )String ()string {_fce :=len (_fega ._ggbg );if _fce ==0{return "\u0045\u004d\u0050T\u0059";};_becg :=_fega ._ggbg [0];_cgeed :=_fega ._ggbg [_fce -1];return _be .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_fce ,_becg ,_cgeed );
};type imageExtractContext struct{_daa []ImageMark ;_ec int ;_de int ;_gf int ;_ac map[*_db .PdfObjectStream ]*cachedImage ;_gfe *ImageExtractOptions ;};func _gfac (_ceca ,_dfda _g .Point ,_ebefb _fb .Color )(*ruling ,bool ){_ccdf :=lineRuling {_eadee :_ceca ,_feab :_dfda ,_dgfg :_ggcd (_ceca ,_dfda ),Color :_ebefb };
if _ccdf ._dgfg ==_eedb {return nil ,false ;};return _ccdf .asRuling ();};func (_baff *subpath )last ()_g .Point {return _baff ._gcbb [len (_baff ._gcbb )-1]};
2021-01-07 14:20:10 +00:00
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
//
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
type TextMark struct{
2020-11-23 22:15:56 +00:00
2020-12-06 13:03:03 +00:00
// Text is the extracted text.
Text string ;
2020-11-23 22:15:56 +00:00
2021-01-07 14:20:10 +00:00
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
2020-11-23 22:15:56 +00:00
2021-01-07 14:20:10 +00:00
// BBox is the bounding box of the text.
2022-06-27 19:58:38 +00:00
BBox _bd .PdfRectangle ;
2021-01-07 14:20:10 +00:00
// Font is the font the text was drawn with.
2022-06-27 19:58:38 +00:00
Font *_bd .PdfFont ;
2021-01-07 14:20:10 +00:00
// FontSize is the font size the text was drawn with.
FontSize float64 ;
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2022-06-27 19:58:38 +00:00
FillColor _fb .Color ;
2021-01-07 14:20:10 +00:00
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2022-06-27 19:58:38 +00:00
StrokeColor _fb .Color ;
2021-01-07 14:20:10 +00:00
// Orientation is the text orientation
2022-06-27 19:58:38 +00:00
Orientation int ;};func _efcg (_gaaf map[int ][]float64 )string {_bfcgc :=_ggadbb (_gaaf );_gdaeb :=make ([]string ,len (_gaaf ));for _acga ,_ecac :=range _bfcgc {_gdaeb [_acga ]=_be .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_ecac ,_gaaf [_ecac ]);
};return _be .Sprintf ("\u007b\u0025\u0073\u007d",_dc .Join (_gdaeb ,"\u002c\u0020"));};func (_adacb *textTable )compositeColCorridors ()map[int ][]float64 {_edad :=make (map[int ][]float64 ,_adacb ._bgcfb );if _eeca {_eg .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_adacb ._bgcfb );
};for _dgga :=0;_dgga < _adacb ._bgcfb ;_dgga ++{_edad [_dgga ]=nil ;};return _edad ;};func (_dfae *textTable )compositeRowCorridors ()map[int ][]float64 {_afec :=make (map[int ][]float64 ,_dfae ._gccb );if _eeca {_eg .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_dfae ._gccb );
};for _edagab :=1;_edagab < _dfae ._gccb ;_edagab ++{var _aeed []compositeCell ;for _cgdd :=0;_cgdd < _dfae ._bgcfb ;_cgdd ++{if _fgafce ,_fbef :=_dfae ._agga [_bddbg (_cgdd ,_edagab )];_fbef {_aeed =append (_aeed ,_fgafce );};};if len (_aeed )==0{continue ;
};_caca :=_fdebfb (_aeed );_afec [_edagab ]=_caca ;if _eeca {_be .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_edagab ,_caca );};};return _afec ;};func _aggc (_dgba _bd .PdfRectangle )*ruling {return &ruling {_gggf :_gaba ,_ccb :_dgba .Urx ,_gaad :_dgba .Lly ,_gdaf :_dgba .Ury };
};func _aceb (_dcff ,_aff *textPara )bool {return _ecag (_dcff ._bgca ,_aff ._bgca )};
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// String returns a string describing `tm`.
func (_eefe TextMark )String ()string {_cac :=_eefe .BBox ;var _ccf string ;if _eefe .Font !=nil {_ccf =_eefe .Font .String ();if len (_ccf )> 50{_ccf =_ccf [:50]+"\u002e\u002e\u002e";};};var _afbd string ;if _eefe .Meta {_afbd ="\u0020\u002a\u004d\u002a";
};return _be .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_eefe .Offset ,_eefe .Text ,[]rune (_eefe .Text ),_cac .Llx ,_cac .Lly ,_cac .Urx ,_cac .Ury ,_ccf ,_afbd );
};func (_fdag *wordBag )scanBand (_dbaa string ,_egge *wordBag ,_bcce func (_dgbe *wordBag ,_bcfc *textWord )bool ,_gbf ,_edbeb ,_acbf float64 ,_dccg ,_bdga bool )int {_fdac :=_egge ._dbga ;var _bcde map[int ]map[*textWord ]struct{};if !_dccg {_bcde =_fdag .makeRemovals ();
};_bccd :=_bbcc *_fdac ;_ccdg :=0;for _ ,_feef :=range _fdag .depthBand (_gbf -_bccd ,_edbeb +_bccd ){if len (_fdag ._fadg [_feef ])==0{continue ;};for _ ,_cgbd :=range _fdag ._fadg [_feef ]{if !(_gbf -_bccd <=_cgbd ._acag &&_cgbd ._acag <=_edbeb +_bccd ){continue ;
};if !_bcce (_egge ,_cgbd ){continue ;};_bdae :=2.0*_bf .Abs (_cgbd ._efag -_egge ._dbga )/(_cgbd ._efag +_egge ._dbga );_cefd :=_bf .Max (_cgbd ._efag /_egge ._dbga ,_egge ._dbga /_cgbd ._efag );_gcce :=_bf .Min (_bdae ,_cefd );if _acbf > 0&&_gcce > _acbf {continue ;
};if _egge .blocked (_cgbd ){continue ;};if !_dccg {_egge .pullWord (_cgbd ,_feef ,_bcde );};_ccdg ++;if !_bdga {if _cgbd ._acag < _gbf {_gbf =_cgbd ._acag ;};if _cgbd ._acag > _edbeb {_edbeb =_cgbd ._acag ;};};if _dccg {break ;};};};if !_dccg {_fdag .applyRemovals (_bcde );
};return _ccdg ;};func (_dcd *stateStack )empty ()bool {return len (*_dcd )==0};func (_cgg *textLine )text ()string {var _ddae []string ;for _ ,_bdfg :=range _cgg ._ebge {if _bdfg ._fgbg {_ddae =append (_ddae ,"\u0020");};_ddae =append (_ddae ,_bdfg ._debad );
};return _dc .Join (_ddae ,"");};
2022-03-13 12:41:53 +00:00
2022-06-27 19:58:38 +00:00
// ToTextMark returns the public view of `tm`.
func (_gaed *textMark )ToTextMark ()TextMark {return TextMark {Text :_gaed ._aec ,Original :_gaed ._degeed ,BBox :_gaed ._gadc ,Font :_gaed ._bafgd ,FontSize :_gaed ._gaaaf ,FillColor :_gaed ._cefe ,StrokeColor :_gaed ._gbda ,Orientation :_gaed ._ddggc };
};type textMark struct{_bd .PdfRectangle ;_ddggc int ;_aec string ;_degeed string ;_bafgd *_bd .PdfFont ;_gaaaf float64 ;_cfae float64 ;_bddg _g .Matrix ;_debc _g .Point ;_gadc _bd .PdfRectangle ;_cefe _fb .Color ;_gbda _fb .Color ;};func (_dfd *textObject )getStrokeColor ()_fb .Color {return _cgdc (_dfd ._cddag .ColorspaceStroking ,_dfd ._cddag .ColorStroking );
};func _ecaa (_afefe int ,_bdgg func (int ,int )bool )[]int {_cgage :=make ([]int ,_afefe );for _ggebf :=range _cgage {_cgage [_ggebf ]=_ggebf ;};_cc .Slice (_cgage ,func (_faed ,_egdg int )bool {return _bdgg (_cgage [_faed ],_cgage [_egdg ])});return _cgage ;
};func (_deb *textObject )getCurrentFont ()*_bd .PdfFont {_cdce :=_deb ._dff ._degee ;if _cdce ==nil {_eg .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e");
return _bd .DefaultFont ();};return _cdce ;};func (_babf paraList )writeText (_cfceb _d .Writer ){for _fdbb ,_ceebg :=range _babf {if _ceebg ._gfce {continue ;};_ceebg .writeText (_cfceb );if _fdbb !=len (_babf )-1{if _gbgg (_ceebg ,_babf [_fdbb +1]){_cfceb .Write ([]byte ("\u0020"));
}else {_cfceb .Write ([]byte ("\u000a"));_cfceb .Write ([]byte ("\u000a"));};};};_cfceb .Write ([]byte ("\u000a"));_cfceb .Write ([]byte ("\u000a"));};func (_bagce rulingList )splitSec ()[]rulingList {_cc .Slice (_bagce ,func (_caed ,_gegc int )bool {_efgga ,_fcffc :=_bagce [_caed ],_bagce [_gegc ];
if _efgga ._gaad !=_fcffc ._gaad {return _efgga ._gaad < _fcffc ._gaad ;};return _efgga ._gdaf < _fcffc ._gdaf ;});_accbc :=make (map[*ruling ]struct{},len (_bagce ));_bggb :=func (_dbdf *ruling )rulingList {_adca :=rulingList {_dbdf };_accbc [_dbdf ]=struct{}{};
for _ ,_eabc :=range _bagce {if _ ,_ebed :=_accbc [_eabc ];_ebed {continue ;};for _ ,_feefgc :=range _adca {if _eabc .alignsSec (_feefgc ){_adca =append (_adca ,_eabc );_accbc [_eabc ]=struct{}{};break ;};};};return _adca ;};_addc :=[]rulingList {_bggb (_bagce [0])};
for _ ,_dcdc :=range _bagce [1:]{if _ ,_gcbbg :=_accbc [_dcdc ];_gcbbg {continue ;};_addc =append (_addc ,_bggb (_dcdc ));};return _addc ;};func (_ddce rulingList )connections (_ebafg map[int ]intSet ,_ddff int )intSet {_fgce :=make (intSet );_fdgac :=make (intSet );
var _afacb func (int );_afacb =func (_dcce int ){if !_fdgac .has (_dcce ){_fdgac .add (_dcce );for _cgae :=range _ddce {if _ebafg [_cgae ].has (_dcce ){_fgce .add (_cgae );};};for _ebaa :=range _ddce {if _fgce .has (_ebaa ){_afacb (_ebaa );};};};};_afacb (_ddff );
return _fgce ;};func (_efefd *textTable )emptyCompositeRow (_eaae int )bool {for _dcffga :=0;_dcffga < _efefd ._bgcfb ;_dcffga ++{if _feade ,_bage :=_efefd ._agga [_bddbg (_dcffga ,_eaae )];_bage {if len (_feade .paraList )> 0{return false ;};};};return true ;
};func _gbae (_eeae ,_bcdgb _bd .PdfRectangle )bool {return _eeae .Llx <=_bcdgb .Llx &&_bcdgb .Urx <=_eeae .Urx &&_eeae .Lly <=_bcdgb .Lly &&_bcdgb .Ury <=_eeae .Ury ;};func _gagcd (_gbggg *PageText )error {_ffefc :=_ba .GetLicenseKey ();if _ffefc !=nil &&_ffefc .IsLicensed ()||_fdd {return nil ;
};_be .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");_be .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");
return _f .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};
2022-03-13 12:41:53 +00:00
2022-06-27 19:58:38 +00:00
// String returns a description of `k`.
func (_beefa markKind )String ()string {_affdg ,_ebda :=_ecgab [_beefa ];if !_ebda {return _be .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_beefa );};return _affdg ;};func (_ggbc *textPara )toTextMarks (_aaee *int )[]TextMark {if _ggbc ._dbfdg ==nil {return _ggbc .toCellTextMarks (_aaee );
};var _ccddf []TextMark ;for _gcac :=0;_gcac < _ggbc ._dbfdg ._gccb ;_gcac ++{for _faefg :=0;_faefg < _ggbc ._dbfdg ._bgcfb ;_faefg ++{_aacc :=_ggbc ._dbfdg .get (_faefg ,_gcac );if _aacc ==nil {_ccddf =_efbc (_ccddf ,_aaee ,"\u0009");}else {_dccf :=_aacc .toCellTextMarks (_aaee );
_ccddf =append (_ccddf ,_dccf ...);};_ccddf =_efbc (_ccddf ,_aaee ,"\u0020");};if _gcac < _ggbc ._dbfdg ._gccb -1{_ccddf =_efbc (_ccddf ,_aaee ,"\u000a");};};return _ccddf ;};func (_cbaa *textTable )getDown ()paraList {_cgdf :=make (paraList ,_cbaa ._bgcfb );
for _dgde :=0;_dgde < _cbaa ._bgcfb ;_dgde ++{_eggg :=_cbaa .get (_dgde ,_cbaa ._gccb -1)._egad ;if _eggg .taken (){return nil ;};_cgdf [_dgde ]=_eggg ;};for _dddfa :=0;_dddfa < _cbaa ._bgcfb -1;_dddfa ++{if _cgdf [_dddfa ]._bbbdd !=_cgdf [_dddfa +1]{return nil ;
};};return _cgdf ;};func (_faac *textTable )get (_cebf ,_cacgc int )*textPara {return _faac ._deedc [_bddbg (_cebf ,_cacgc )]};func (_ebgge *textPara )bbox ()_bd .PdfRectangle {return _ebgge .PdfRectangle };var (_cg =_f .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072");
_bdg =_f .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072"););func _aead (_bdbaf ,_abgb int )int {if _bdbaf > _abgb {return _bdbaf ;};return _abgb ;};func _fdeba (_cabb float64 )int {var _fdfcg int ;
if _cabb >=0{_fdfcg =int (_cabb /_abab );}else {_fdfcg =int (_cabb /_abab )-1;};return _fdfcg ;};
2022-02-05 21:34:53 +00:00
2022-06-06 22:48:24 +00:00
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
2022-06-27 19:58:38 +00:00
type ImageMark struct{Image *_bd .Image ;
2021-12-14 01:08:28 +00:00
2022-06-06 22:48:24 +00:00
// Dimensions of the image as displayed in the PDF.
Width float64 ;Height float64 ;
// Position of the image in PDF coordinates (lower left corner).
X float64 ;Y float64 ;
// Angle in degrees, if rotated.
2022-06-27 19:58:38 +00:00
Angle float64 ;};func _dbfdf (_cgde ,_gcec _g .Point )bool {_geea :=_bf .Abs (_cgde .X -_gcec .X );_cbed :=_bf .Abs (_cgde .Y -_gcec .Y );return _acbb (_cbed ,_geea );};func (_cacb *compositeCell )updateBBox (){for _ ,_aace :=range _cacb .paraList {_cacb .PdfRectangle =_gcff (_cacb .PdfRectangle ,_aace .PdfRectangle );
};};func (_debf *wordBag )allWords ()[]*textWord {var _dbcc []*textWord ;for _ ,_fcbg :=range _debf ._fadg {_dbcc =append (_dbcc ,_fcbg ...);};return _dbcc ;};type textResult struct{_bdf PageText ;_cfd int ;_acg int ;};func (_dgff *wordBag )maxDepth ()float64 {return _dgff ._adbg -_dgff .Lly };
func _bfe (_dede string )string {_gcdab :=[]rune (_dede );return string (_gcdab [:len (_gcdab )-1])};func (_cdec *wordBag )firstWord (_efcf int )*textWord {return _cdec ._fadg [_efcf ][0]};
2021-12-14 01:08:28 +00:00
2022-06-27 19:58:38 +00:00
// String returns a human readable description of `ss`.
func (_fee *shapesState )String ()string {return _be .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_fee ._gdbg ),_fee ._beg );};
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// Text returns the extracted page text.
func (_fcb PageText )Text ()string {return _fcb ._ebce };func (_efbg *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_abfc :=make (map[int ]map[*textWord ]struct{},len (_efbg ._fadg ));for _bfce :=range _efbg ._fadg {_abfc [_bfce ]=make (map[*textWord ]struct{});
};return _abfc ;};func (_bga paraList )log (_fggf string ){if !_cbec {return ;};_eg .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_fggf ,len (_bga ));
for _ffdg ,_egce :=range _bga {if _egce ==nil {continue ;};_bcg :=_egce .text ();_adac :="\u0020\u0020";if _egce ._dbfdg !=nil {_adac =_be .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_egce ._dbfdg ._bgcfb ,_egce ._dbfdg ._gccb );};_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_ffdg ,_egce .PdfRectangle ,_adac ,_dbec (_bcg ,50));
};};func (_beag lineRuling )xMean ()float64 {return 0.5*(_beag ._eadee .X +_beag ._feab .X )};
2022-06-06 22:48:24 +00:00
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
2022-06-27 19:58:38 +00:00
func (_cbc *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _cbc ==nil {return nil ,_f .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_be .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );
};_aegc :=len (_cbc ._ggbg );if _aegc ==0{return _cbc ,nil ;};if start < _cbc ._ggbg [0].Offset {start =_cbc ._ggbg [0].Offset ;};if end > _cbc ._ggbg [_aegc -1].Offset +1{end =_cbc ._ggbg [_aegc -1].Offset +1;};_fbad :=_cc .Search (_aegc ,func (_bgfb int )bool {return _cbc ._ggbg [_bgfb ].Offset +len (_cbc ._ggbg [_bgfb ].Text )-1>=start });
if !(0<=_fbad &&_fbad < _aegc ){_aede :=_be .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_fbad ,_aegc ,_cbc ._ggbg [0],_cbc ._ggbg [_aegc -1]);
return nil ,_aede ;};_gdea :=_cc .Search (_aegc ,func (_eeab int )bool {return _cbc ._ggbg [_eeab ].Offset > end -1});if !(0<=_gdea &&_gdea < _aegc ){_caaa :=_be .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_gdea ,_aegc ,_cbc ._ggbg [0],_cbc ._ggbg [_aegc -1]);
return nil ,_caaa ;};if _gdea <=_fbad {return nil ,_be .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_fbad ,_gdea );
};return &TextMarkArray {_ggbg :_cbc ._ggbg [_fbad :_gdea ]},nil ;};func _fdbbd (_abfa []pathSection ){if _ebba < 0.0{return ;};if _daafa {_eg .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_abfa ));
};for _dgbeff ,_faeb :=range _abfa {for _eeag ,_abacd :=range _faeb ._aga {for _dabg ,_abdd :=range _abacd ._gcbb {_abacd ._gcbb [_dabg ]=_g .Point {X :_cfgb (_abdd .X ),Y :_cfgb (_abdd .Y )};if _daafa {_fcgg :=_abacd ._gcbb [_dabg ];if !_fafbc (_abdd ,_fcgg ){_cecd :=_g .Point {X :_fcgg .X -_abdd .X ,Y :_fcgg .Y -_abdd .Y };
_be .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_dgbeff ,_eeag ,_dabg ,_abdd ,_fcgg ,_cecd );};};};};};};func _fggaa (_edegb []*textMark ,_caeg _bd .PdfRectangle )[]*textWord {var _fgabd []*textWord ;
var _gcadc *textWord ;if _eeegf {_eg .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_edegb ));};_cade :=func (){if _gcadc !=nil {_deaad :=_gcadc .computeText ();
if !_gagac (_deaad ){_gcadc ._debad =_deaad ;_fgabd =append (_fgabd ,_gcadc );if _eeegf {_eg .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_fgabd )-1,_gcadc .String ());
for _dgdc ,_ebada :=range _gcadc ._bbacg {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dgdc ,_ebada .String ());};};};_gcadc =nil ;};};for _ ,_aacf :=range _edegb {if _dgeb &&_gcadc !=nil &&len (_gcadc ._bbacg )> 0{_dceca :=_gcadc ._bbacg [len (_gcadc ._bbacg )-1];
_ggcbb ,_fdfca :=_egfd (_aacf ._aec );_gaedf ,_fcagf :=_egfd (_dceca ._aec );if _fdfca &&!_fcagf &&_dceca .inDiacriticArea (_aacf ){_gcadc .addDiacritic (_ggcbb );continue ;};if _fcagf &&!_fdfca &&_aacf .inDiacriticArea (_dceca ){_gcadc ._bbacg =_gcadc ._bbacg [:len (_gcadc ._bbacg )-1];
_gcadc .appendMark (_aacf ,_caeg );_gcadc .addDiacritic (_gaedf );continue ;};};_cdaae :=_gagac (_aacf ._aec );if _cdaae {_cade ();continue ;};if _gcadc ==nil &&!_cdaae {_gcadc =_bccf ([]*textMark {_aacf },_caeg );continue ;};_aefa :=_gcadc ._efag ;_cdfgg :=_bf .Abs (_acec (_caeg ,_aacf )-_gcadc ._acag )/_aefa ;
_deae :=_edagd (_aacf ,_gcadc )/_aefa ;if _deae >=_dadd ||!(-_afbbc <=_deae &&_cdfgg <=_afeb ){_cade ();_gcadc =_bccf ([]*textMark {_aacf },_caeg );continue ;};_gcadc .appendMark (_aacf ,_caeg );};_cade ();return _fgabd ;};func (_ffad *textObject )reset (){_ffad ._cbff =_g .IdentityMatrix ();
_ffad ._bdb =_g .IdentityMatrix ();_ffad ._dcaa =nil ;};func (_eddad *textObject )newTextMark (_aeeg string ,_cdbb _g .Matrix ,_bbfaf _g .Point ,_cfce float64 ,_ebbd *_bd .PdfFont ,_eaeg float64 ,_acgg ,_cdece _fb .Color )(textMark ,bool ){_bfgc :=_cdbb .Angle ();
_eddac :=_ecage (_bfgc ,_gef );var _dfgae float64 ;if _eddac %180!=90{_dfgae =_cdbb .ScalingFactorY ();}else {_dfgae =_cdbb .ScalingFactorX ();};_cebbg :=_cecc (_cdbb );_gac :=_bd .PdfRectangle {Llx :_cebbg .X ,Lly :_cebbg .Y ,Urx :_bbfaf .X ,Ury :_bbfaf .Y };
switch _eddac %360{case 90:_gac .Urx -=_dfgae ;case 180:_gac .Ury -=_dfgae ;case 270:_gac .Urx +=_dfgae ;case 0:_gac .Ury +=_dfgae ;default:_eddac =0;_gac .Ury +=_dfgae ;};if _gac .Llx > _gac .Urx {_gac .Llx ,_gac .Urx =_gac .Urx ,_gac .Llx ;};if _gac .Lly > _gac .Ury {_gac .Lly ,_gac .Ury =_gac .Ury ,_gac .Lly ;
};_ebad :=true ;if _eddad ._fafb ._dd .Width ()> 0{_ebca ,_eded :=_dbaee (_gac ,_eddad ._fafb ._dd );if !_eded {_ebad =false ;_eg .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_gac ,_eddad ._fafb ._dd ,_aeeg );
};_gac =_ebca ;};_cbcc :=_gac ;_dgbf :=_eddad ._fafb ._dd ;switch _eddac %360{case 90:_dgbf .Urx ,_dgbf .Ury =_dgbf .Ury ,_dgbf .Urx ;_cbcc =_bd .PdfRectangle {Llx :_dgbf .Urx -_gac .Ury ,Urx :_dgbf .Urx -_gac .Lly ,Lly :_gac .Llx ,Ury :_gac .Urx };case 180:_cbcc =_bd .PdfRectangle {Llx :_dgbf .Urx -_gac .Llx ,Urx :_dgbf .Urx -_gac .Urx ,Lly :_dgbf .Ury -_gac .Lly ,Ury :_dgbf .Ury -_gac .Ury };
case 270:_dgbf .Urx ,_dgbf .Ury =_dgbf .Ury ,_dgbf .Urx ;_cbcc =_bd .PdfRectangle {Llx :_gac .Ury ,Urx :_gac .Lly ,Lly :_dgbf .Ury -_gac .Llx ,Ury :_dgbf .Ury -_gac .Urx };};if _cbcc .Llx > _cbcc .Urx {_cbcc .Llx ,_cbcc .Urx =_cbcc .Urx ,_cbcc .Llx ;};
if _cbcc .Lly > _cbcc .Ury {_cbcc .Lly ,_cbcc .Ury =_cbcc .Ury ,_cbcc .Lly ;};_bggcd :=textMark {_aec :_aeeg ,PdfRectangle :_cbcc ,_gadc :_gac ,_bafgd :_ebbd ,_gaaaf :_dfgae ,_cfae :_eaeg ,_bddg :_cdbb ,_debc :_bbfaf ,_ddggc :_eddac ,_cefe :_acgg ,_gbda :_cdece };
if _eeegf {_eg .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_cebbg ,_bbfaf ,_bggcd .String ());};
return _bggcd ,_ebad ;};type gridTile struct{_bd .PdfRectangle ;_bfgeab ,_fgbc ,_ccadg ,_eafc bool ;};type event struct{_bbcd float64 ;_cfgg bool ;_agecb int ;};func (_befcb *textWord )computeText ()string {_dcbb :=make ([]string ,len (_befcb ._bbacg ));
for _faba ,_feeee :=range _befcb ._bbacg {_dcbb [_faba ]=_feeee ._aec ;};return _dc .Join (_dcbb ,"");};func _ecage (_cbee float64 ,_gdfc int )int {if _gdfc ==0{_gdfc =1;};_cdfa :=float64 (_gdfc );return int (_bf .Round (_cbee /_cdfa )*_cdfa );};const _gcb =20;
func (_gdcde *textPara )taken ()bool {return _gdcde ==nil ||_gdcde ._deed };func _dfe (_bea ,_aged bounded )float64 {_acfa :=_ebbf (_bea ,_aged );if !_bcaga (_acfa ){return _acfa ;};return _afdf (_bea ,_aged );};func (_fgaff paraList )toTextMarks ()[]TextMark {_eggb :=0;
var _fbcb []TextMark ;for _edcb ,_dcdg :=range _fgaff {if _dcdg ._gfce {continue ;};_agdd :=_dcdg .toTextMarks (&_eggb );_fbcb =append (_fbcb ,_agdd ...);if _edcb !=len (_fgaff )-1{if _gbgg (_dcdg ,_fgaff [_edcb +1]){_fbcb =_efbc (_fbcb ,&_eggb ,"\u0020");
}else {_fbcb =_efbc (_fbcb ,&_eggb ,"\u000a");_fbcb =_efbc (_fbcb ,&_eggb ,"\u000a");};};};_fbcb =_efbc (_fbcb ,&_eggb ,"\u000a");_fbcb =_efbc (_fbcb ,&_eggb ,"\u000a");return _fbcb ;};func (_ccfcg rulingList )asTiling ()gridTiling {if _gdcf {_eg .Log .Info ("r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_ccfcg ));
};for _acac ,_acabe :=range _ccfcg [1:]{_adade :=_ccfcg [_acac ];if _adade .alignsPrimary (_acabe )&&_adade .alignsSec (_acabe ){_eg .Log .Error ("a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073",_acabe ,_adade );
};};_ccfcg .sortStrict ();_ccfcg .log ("\u0073n\u0061\u0070\u0070\u0065\u0064");_decc ,_ebcb :=_ccfcg .vertsHorzs ();_cffd :=_decc .primaries ();_bgaa :=_ebcb .primaries ();_ebaeg :=len (_cffd )-1;_ecafg :=len (_bgaa )-1;if _ebaeg ==0||_ecafg ==0{return gridTiling {};
};_ebcce :=_bd .PdfRectangle {Llx :_cffd [0],Urx :_cffd [_ebaeg ],Lly :_bgaa [0],Ury :_bgaa [_ecafg ]};if _gdcf {_eg .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064",len (_decc ));
for _bacd ,_bgdb :=range _decc {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bacd ,_bgdb );};_eg .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064",len (_ebcb ));
for _bfga ,_gdbc :=range _ebcb {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bfga ,_gdbc );};_eg .Log .Info ("\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f",_ebaeg ,_ecafg ,_cffd ,_bgaa );
};_eega :=make ([]gridTile ,_ebaeg *_ecafg );for _bfgd :=_ecafg -1;_bfgd >=0;_bfgd --{_ebcca :=_bgaa [_bfgd ];_bffec :=_bgaa [_bfgd +1];for _fccca :=0;_fccca < _ebaeg ;_fccca ++{_bdfc :=_cffd [_fccca ];_eefc :=_cffd [_fccca +1];_fadeb :=_decc .findPrimSec (_bdfc ,_ebcca );
_fdgf :=_decc .findPrimSec (_eefc ,_ebcca );_aeafc :=_ebcb .findPrimSec (_ebcca ,_bdfc );_fgdc :=_ebcb .findPrimSec (_bffec ,_bdfc );_afag :=_bd .PdfRectangle {Llx :_bdfc ,Urx :_eefc ,Lly :_ebcca ,Ury :_bffec };_efceg :=_beage (_afag ,_fadeb ,_fdgf ,_aeafc ,_fgdc );
_eega [_bfgd *_ebaeg +_fccca ]=_efceg ;if _gdcf {_be .Printf ("\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_fccca ,_bfgd ,_efceg .String (),_efceg .Width (),_efceg .Height ());
};};};if _gdcf {_eg .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_ebcce );
};_aadf :=make ([]map[float64 ]gridTile ,_ecafg );for _ffge :=_ecafg -1;_ffge >=0;_ffge --{if _gdcf {_be .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_ffge );};_aadf [_ffge ]=make (map[float64 ]gridTile ,_ebaeg );for _cddea :=0;_cddea < _ebaeg ;
_cddea ++{_afab :=_eega [_ffge *_ebaeg +_cddea ];if _gdcf {_be .Printf ("\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cddea ,_afab );};if !_afab ._fgbc {continue ;};_gcacg :=_cddea ;for _becc :=_cddea +1;!_afab ._eafc &&_becc < _ebaeg ;
_becc ++{_gaeabe :=_eega [_ffge *_ebaeg +_becc ];_afab .Urx =_gaeabe .Urx ;_afab ._bfgeab =_afab ._bfgeab ||_gaeabe ._bfgeab ;_afab ._ccadg =_afab ._ccadg ||_gaeabe ._ccadg ;_afab ._eafc =_gaeabe ._eafc ;if _gdcf {_be .Printf ("\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a",_becc ,_gaeabe ,_afab );
};_gcacg =_becc ;};if _gdcf {_be .Printf (" \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n",_cddea ,_gcacg ,_afab );};_cddea =_gcacg ;_aadf [_ffge ][_afab .Llx ]=_afab ;};};_cbcea :=make (map[float64 ]map[float64 ]gridTile ,_ecafg );
_abfbe :=make (map[float64 ]map[float64 ]struct{},_ecafg );for _dafdg :=_ecafg -1;_dafdg >=0;_dafdg --{_bfdf :=_eega [_dafdg *_ebaeg ].Lly ;_cbcea [_bfdf ]=make (map[float64 ]gridTile ,_ebaeg );_abfbe [_bfdf ]=make (map[float64 ]struct{},_ebaeg );};if _gdcf {_eg .Log .Info ("\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_ebcce );
};for _gfeae :=_ecafg -1;_gfeae >=0;_gfeae --{_gdecc :=_eega [_gfeae *_ebaeg ].Lly ;_gfedd :=_aadf [_gfeae ];if _gdcf {_be .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_gfeae );};for _ ,_eecb :=range _gddeb (_gfedd ){if _ ,_fbba :=_abfbe [_gdecc ][_eecb ];
_fbba {continue ;};_abgd :=_gfedd [_eecb ];if _gdcf {_be .Printf (" \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_abgd .String ());};for _gbcg :=_gfeae -1;_gbcg >=0;_gbcg --{if _abgd ._ccadg {break ;};_aeaaa :=_aadf [_gbcg ];_aggcg ,_gagfb :=_aeaaa [_eecb ];
if !_gagfb {break ;};if _aggcg .Urx !=_abgd .Urx {break ;};_abgd ._ccadg =_aggcg ._ccadg ;_abgd .Lly =_aggcg .Lly ;if _gdcf {_be .Printf ("\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_aggcg .String (),_abgd .String ());
};_abfbe [_aggcg .Lly ][_aggcg .Llx ]=struct{}{};};if _gfeae ==0{_abgd ._ccadg =true ;};if _abgd .complete (){_cbcea [_gdecc ][_eecb ]=_abgd ;};};};_dabae :=gridTiling {PdfRectangle :_ebcce ,_defd :_cbge (_cbcea ),_eabe :_dagae (_cbcea ),_bdgaf :_cbcea };
_dabae .log ("\u0043r\u0065\u0061\u0074\u0065\u0064");return _dabae ;};func (_baffd *ruling )encloses (_ffdf ,_ddbe float64 )bool {return _baffd ._gaad -_cebae <=_ffdf &&_ddbe <=_baffd ._gdaf +_cebae ;};func _dbec (_ffgb string ,_eefg int )string {if len (_ffgb )< _eefg {return _ffgb ;
};return _ffgb [:_eefg ];};func (_becd *textObject )moveLP (_gbea ,_ddb float64 ){_becd ._bdb .Concat (_g .NewMatrix (1,0,0,1,_gbea ,_ddb ));_becd ._cbff =_becd ._bdb ;};type textPara struct{_bd .PdfRectangle ;_bgca _bd .PdfRectangle ;_gadg []*textLine ;
_dbfdg *textTable ;_deed bool ;_gfce bool ;_ceabb *textPara ;_bbbdd *textPara ;_edaad *textPara ;_egad *textPara ;};func _gfde (_cagae ,_fdfb _g .Point )bool {_cgbc :=_bf .Abs (_cagae .X -_fdfb .X );_ggaa :=_bf .Abs (_cagae .Y -_fdfb .Y );return _acbb (_cgbc ,_ggaa );
};func (_eade *textObject )setHorizScaling (_bba float64 ){if _eade ==nil {return ;};_eade ._dff ._aee =_bba ;};var _fdd =false ;func (_edba paraList )topoOrder ()[]int {if _cbec {_eg .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");
};_afge :=len (_edba );_dadc :=make ([]bool ,_afge );_ebgd :=make ([]int ,0,_afge );_ffcc :=_edba .llyOrdering ();var _ceff func (_dafe int );_ceff =func (_gbaef int ){_dadc [_gbaef ]=true ;for _eeabb :=0;_eeabb < _afge ;_eeabb ++{if !_dadc [_eeabb ]{if _edba .readBefore (_ffcc ,_gbaef ,_eeabb ){_ceff (_eeabb );
};};};_ebgd =append (_ebgd ,_gbaef );};for _efed :=0;_efed < _afge ;_efed ++{if !_dadc [_efed ]{_ceff (_efed );};};return _gfec (_ebgd );};func (_cgge paraList )eventNeighbours (_bebbg []event )map[*textPara ][]int {_cc .Slice (_bebbg ,func (_dabbe ,_geca int )bool {_gafde ,_dfff :=_bebbg [_dabbe ],_bebbg [_geca ];
_beefag ,_gabb :=_gafde ._bbcd ,_dfff ._bbcd ;if _beefag !=_gabb {return _beefag < _gabb ;};if _gafde ._cfgg !=_dfff ._cfgg {return _gafde ._cfgg ;};return _dabbe < _geca ;});_ffec :=make (map[int ]intSet );_cedfe :=make (intSet );for _ ,_bagcef :=range _bebbg {if _bagcef ._cfgg {_ffec [_bagcef ._agecb ]=make (intSet );
for _gecb :=range _cedfe {if _gecb !=_bagcef ._agecb {_ffec [_bagcef ._agecb ].add (_gecb );_ffec [_gecb ].add (_bagcef ._agecb );};};_cedfe .add (_bagcef ._agecb );}else {_cedfe .del (_bagcef ._agecb );};};_dbea :=map[*textPara ][]int {};for _cgcef ,_fgebd :=range _ffec {_cfcd :=_cgge [_cgcef ];
if len (_fgebd )==0{_dbea [_cfcd ]=nil ;continue ;};_adec :=make ([]int ,len (_fgebd ));_gbgba :=0;for _gdba :=range _fgebd {_adec [_gbgba ]=_gdba ;_gbgba ++;};_dbea [_cfcd ]=_adec ;};return _dbea ;};func _dbee (_cgce ,_fbfa ,_bfgec ,_eacac *textPara )*textTable {_eebe :=&textTable {_bgcfb :2,_gccb :2,_deedc :make (map[uint64 ]*textPara ,4)};
_eebe .put (0,0,_cgce );_eebe .put (1,0,_fbfa );_eebe .put (0,1,_bfgec );_eebe .put (1,1,_eacac );return _eebe ;};func (_gdcff intSet )has (_dcfe int )bool {_ ,_fcaeb :=_gdcff [_dcfe ];return _fcaeb };func _efbc (_debae []TextMark ,_deff *int ,_ggad string )[]TextMark {_ebac :=_bcfa ;
_ebac .Text =_ggad ;return _ecfc (_debae ,_deff ,_ebac );};func (_egc *PageFonts )extractPageResourcesToFont (_ad *_bd .PdfPageResources )error {_dga ,_dce :=_db .GetDict (_ad .Font );if !_dce {return _f .New (_ab );};for _ ,_gae :=range _dga .Keys (){var (_ce =true ;
_bed []byte ;_gab string ;);_dag ,_fae :=_ad .GetFontByName (_gae );if !_fae {return _f .New (_bge );};_ddc ,_gc :=_bd .NewPdfFontFromPdfObject (_dag );if _gc !=nil {return _gc ;};_ff :=_ddc .FontDescriptor ();_edb :=_ddc .FontDescriptor ().FontName .String ();
_fgc :=_ddc .Subtype ();if _edd (_egc .Fonts ,_edb ){continue ;};if len (_ddc .ToUnicode ())==0{_ce =false ;};if _ff .FontFile !=nil {if _ccd ,_gcd :=_db .GetStream (_ff .FontFile );_gcd {_bed ,_gc =_db .DecodeStream (_ccd );if _gc !=nil {return _gc ;};
_gab =_edb +"\u002e\u0070\u0066\u0062";};}else if _ff .FontFile2 !=nil {if _fe ,_bb :=_db .GetStream (_ff .FontFile2 );_bb {_bed ,_gc =_db .DecodeStream (_fe );if _gc !=nil {return _gc ;};_gab =_edb +"\u002e\u0074\u0074\u0066";};}else if _ff .FontFile3 !=nil {if _fed ,_ced :=_db .GetStream (_ff .FontFile3 );
_ced {_bed ,_gc =_db .DecodeStream (_fed );if _gc !=nil {return _gc ;};_gab =_edb +"\u002e\u0063\u0066\u0066";};};if len (_gab )< 1{_eg .Log .Debug (_ag );};_dgb :=Font {FontName :_edb ,PdfFont :_ddc ,IsCID :_ddc .IsCID (),IsSimple :_ddc .IsSimple (),ToUnicode :_ce ,FontType :_fgc ,FontData :_bed ,FontFileName :_gab ,FontDescriptor :_ff };
_egc .Fonts =append (_egc .Fonts ,_dgb );};return nil ;};type cachedImage struct{_fdg *_bd .Image ;_daf _bd .PdfColorspace ;};func (_dfg *stateStack )top ()*textState {if _dfg .empty (){return nil ;};return (*_dfg )[_dfg .size ()-1];};func (_cbbg *ruling )intersects (_fagbd *ruling )bool {_fcdc :=(_cbbg ._gggf ==_gaba &&_fagbd ._gggf ==_ddga )||(_fagbd ._gggf ==_gaba &&_cbbg ._gggf ==_ddga );
_eaegf :=func (_bfbbe ,_efdc *ruling )bool {return _bfbbe ._gaad -_cebae <=_efdc ._ccb &&_efdc ._ccb <=_bfbbe ._gdaf +_cebae ;};_cccc :=_eaegf (_cbbg ,_fagbd );_gage :=_eaegf (_fagbd ,_cbbg );if _daafa {_be .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_fcdc ,_cccc ,_gage ,_fcdc &&_cccc &&_gage ,_cbbg ,_fagbd );
};return _fcdc &&_cccc &&_gage ;};func (_dagfe *textWord )addDiacritic (_dfef string ){_ecgbe :=_dagfe ._bbacg [len (_dagfe ._bbacg )-1];_ecgbe ._aec +=_dfef ;_ecgbe ._aec =_fd .NFKC .String (_ecgbe ._aec );};func (_eeb *textObject )setTextRise (_gfaed float64 ){if _eeb ==nil {return ;
};_eeb ._dff ._cabf =_gfaed ;};func (_cbeaa rulingList )isActualGrid ()(rulingList ,bool ){_fgbb ,_cbgg :=_cbeaa .augmentGrid ();if !(len (_fgbb )>=_gafg +1&&len (_cbgg )>=_ebgg +1){if _daafa {_eg .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_fgbb ),len (_cbgg ),_gafg +1,_ebgg +1);
};return nil ,false ;};if _daafa {_eg .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_cbeaa ,len (_fgbb )>=2,len (_cbgg )>=2,len (_fgbb )>=2&&len (_cbgg )>=2);
for _dbfg ,_fggab :=range _cbeaa {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_dbfg ,_fggab );};};if _beegc {_feee ,_cddbb :=_fgbb [0],_fgbb [len (_fgbb )-1];_fedfg ,_ecfca :=_cbgg [0],_cbgg [len (_cbgg )-1];if !(_cdgde (_feee ._ccb -_fedfg ._gaad )&&_cdgde (_cddbb ._ccb -_fedfg ._gdaf )&&_cdgde (_fedfg ._ccb -_feee ._gdaf )&&_cdgde (_ecfca ._ccb -_feee ._gaad )){if _daafa {_eg .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_feee ,_cddbb ,_fedfg ,_ecfca );
};return nil ,false ;};}else {if !_fgbb .aligned (){if _cddf {_eg .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_fgbb ));
};return nil ,false ;};if !_cbgg .aligned (){if _daafa {_eg .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_cbgg ));
};return nil ,false ;};};_egb :=append (_fgbb ,_cbgg ...);return _egb ,true ;};func _bccf (_ccfcd []*textMark ,_ebggf _bd .PdfRectangle )*textWord {_feec :=_ccfcd [0].PdfRectangle ;_caegb :=_ccfcd [0]._gaaaf ;for _ ,_gbce :=range _ccfcd [1:]{_feec =_gcff (_feec ,_gbce .PdfRectangle );
if _gbce ._gaaaf > _caegb {_caegb =_gbce ._gaaaf ;};};return &textWord {PdfRectangle :_feec ,_bbacg :_ccfcd ,_acag :_ebggf .Ury -_feec .Lly ,_efag :_caegb };};var _ccc =map[rulingKind ]string {_eedb :"\u006e\u006f\u006e\u0065",_ddga :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_gaba :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};
func _ccdde (_cgggc []_db .PdfObject )(_gbeec ,_cagc float64 ,_bggf error ){if len (_cgggc )!=2{return 0,0,_be .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_cgggc ));
};_edff ,_bggf :=_db .GetNumbersAsFloat (_cgggc );if _bggf !=nil {return 0,0,_bggf ;};return _edff [0],_edff [1],nil ;};type compositeCell struct{_bd .PdfRectangle ;paraList ;};func (_ega *shapesState )drawRectangle (_beed ,_dba ,_edeg ,_gfad float64 ){if _dded {_gbbd :=_ega .devicePoint (_beed ,_dba );
_cfde :=_ega .devicePoint (_beed +_edeg ,_dba +_gfad );_cfe :=_bd .PdfRectangle {Llx :_gbbd .X ,Lly :_gbbd .Y ,Urx :_cfde .X ,Ury :_cfde .Y };_eg .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_cfe );
};_ega .newSubPath ();_ega .moveTo (_beed ,_dba );_ega .lineTo (_beed +_edeg ,_dba );_ega .lineTo (_beed +_edeg ,_dba +_gfad );_ega .lineTo (_beed ,_dba +_gfad );_ega .closePath ();};func (_ecbe *wordBag )depthBand (_ffcg ,_ggfb float64 )[]int {if len (_ecbe ._fadg )==0{return nil ;
};return _ecbe .depthRange (_ecbe .getDepthIdx (_ffcg ),_ecbe .getDepthIdx (_ggfb ));};func (_gdcdd *textLine )toTextMarks (_dfbca *int )[]TextMark {var _bbfa []TextMark ;for _ ,_eaee :=range _gdcdd ._ebge {if _eaee ._fgbg {_bbfa =_efbc (_bbfa ,_dfbca ,"\u0020");
};_dgad :=_eaee .toTextMarks (_dfbca );_bbfa =append (_bbfa ,_dgad ...);};return _bbfa ;};
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// ExtractFonts returns all font information from the page extractor, including
// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more.
//
// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document.
// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries.
//
// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range.
func (_fa *Extractor )ExtractFonts (previousPageFonts *PageFonts )(*PageFonts ,error ){_gdd :=PageFonts {};_gaf :=_gdd .extractPageResourcesToFont (_fa ._dbb );if _gaf !=nil {return nil ,_gaf ;};if previousPageFonts !=nil {for _ ,_ea :=range previousPageFonts .Fonts {if !_edd (_gdd .Fonts ,_ea .FontName ){_gdd .Fonts =append (_gdd .Fonts ,_ea );
};};};return &PageFonts {Fonts :_gdd .Fonts },nil ;};func _cecc (_bcc _g .Matrix )_g .Point {_abgg ,_gfcf :=_bcc .Translation ();return _g .Point {X :_abgg ,Y :_gfcf };};type subpath struct{_gcbb []_g .Point ;_bad bool ;};
2022-06-06 22:48:24 +00:00
// Font represents the font properties on a PDF page.
2022-06-27 19:58:38 +00:00
type Font struct{PdfFont *_bd .PdfFont ;
2022-06-06 22:48:24 +00:00
// FontName represents Font Name from font properties.
FontName string ;
// FontType represents Font Subtype entry in the font dictionary inside page resources.
// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont.
FontType string ;
// ToUnicode is true if font provides a `ToUnicode` mapping.
ToUnicode bool ;
// IsCID is true if underlying font is a composite font.
// Composite font is represented by a font dictionary whose Subtype is `Type0`
IsCID bool ;
// IsSimple is true if font is simple font.
// A simple font is limited to only 8 bit (255) character codes.
IsSimple bool ;
// FontData represents the raw data of the embedded font file.
// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF).
// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor.
// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value.
FontData []byte ;
// FontFileName is a name representing the font. it has format:
// (Font Name) + (Font Type Extension), example: helvetica.ttf.
FontFileName string ;
// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor).
2022-06-27 19:58:38 +00:00
FontDescriptor *_bd .PdfFontDescriptor ;};func (_eefcf rulingList )merge ()*ruling {_gfgde :=_eefcf [0]._ccb ;_dgce :=_eefcf [0]._gaad ;_cgcf :=_eefcf [0]._gdaf ;for _ ,_fceg :=range _eefcf [1:]{_gfgde +=_fceg ._ccb ;if _fceg ._gaad < _dgce {_dgce =_fceg ._gaad ;
};if _fceg ._gdaf > _cgcf {_cgcf =_fceg ._gdaf ;};};_abcg :=&ruling {_gggf :_eefcf [0]._gggf ,_beaec :_eefcf [0]._beaec ,Color :_eefcf [0].Color ,_ccb :_gfgde /float64 (len (_eefcf )),_gaad :_dgce ,_gdaf :_cgcf };if _cddf {_eg .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_eefcf ),_abcg );
for _efcfe ,_cffc :=range _eefcf {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_efcfe ,_cffc );};};return _abcg ;};func (_fggff *ruling )alignsPrimary (_fbda *ruling )bool {return _fggff ._gggf ==_fbda ._gggf &&_bf .Abs (_fggff ._ccb -_fbda ._ccb )< _ffd *0.5;
};func (_cfcef intSet )del (_cgabg int ){delete (_cfcef ,_cgabg )};func (_gbe *textObject )setFont (_ade string ,_eea float64 )error {if _gbe ==nil {return nil ;};_gbe ._dff ._bcdg =_eea ;_aad ,_ecdd :=_gbe .getFont (_ade );if _ecdd !=nil {return _ecdd ;
};_gbe ._dff ._degee =_aad ;return nil ;};func (_ddfg rulingList )primaries ()[]float64 {_cdeef :=make (map[float64 ]struct{},len (_ddfg ));for _ ,_fcbgc :=range _ddfg {_cdeef [_fcbgc ._ccb ]=struct{}{};};_eeaab :=make ([]float64 ,len (_cdeef ));_dcfad :=0;
for _egab :=range _cdeef {_eeaab [_dcfad ]=_egab ;_dcfad ++;};_cc .Float64s (_eeaab );return _eeaab ;};
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// PageText represents the layout of text on a device page.
type PageText struct{_fgd []*textMark ;_ebce string ;_dged []TextMark ;_bfbb []TextTable ;_fbd _bd .PdfRectangle ;_egg []pathSection ;_dbe []pathSection ;};func (_aef *subpath )close (){if !_fafbc (_aef ._gcbb [0],_aef .last ()){_aef .add (_aef ._gcbb [0]);
};_aef ._bad =true ;_aef .removeDuplicates ();};func _cfgee (_ddfe *wordBag ,_ageg float64 ,_edcag ,_deca rulingList )[]*wordBag {var _gcbbf []*wordBag ;for _ ,_gggcb :=range _ddfe .depthIndexes (){_eecc :=false ;for !_ddfe .empty (_gggcb ){_gcffg :=_ddfe .firstReadingIndex (_gggcb );
_bgba :=_ddfe .firstWord (_gcffg );_abac :=_gcab (_bgba ,_ageg ,_edcag ,_deca );_ddfe .removeWord (_bgba ,_gcffg );if _cba {_eg .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_bgba .String ());
};for _cbag :=true ;_cbag ;_cbag =_eecc {_eecc =false ;_efee :=_gbfd *_abac ._dbga ;_febe :=_gga *_abac ._dbga ;_fdbaf :=_beedf *_abac ._dbga ;if _cba {_eg .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_abac .minDepth (),_abac .maxDepth (),_fdbaf ,_febe );
};if _ddfe .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_abac ,_eaab (_ccg ,0),_abac .minDepth ()-_fdbaf ,_abac .maxDepth ()+_fdbaf ,_bbga ,false ,false )> 0{_eecc =true ;};if _ddfe .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_abac ,_eaab (_ccg ,_febe ),_abac .minDepth (),_abac .maxDepth (),_bbab ,false ,false )> 0{_eecc =true ;
};if _eecc {continue ;};_ebag :=_ddfe .scanBand ("",_abac ,_eaab (_bgce ,_efee ),_abac .minDepth (),_abac .maxDepth (),_gffb ,true ,false );if _ebag > 0{_cegf :=(_abac .maxDepth ()-_abac .minDepth ())/_abac ._dbga ;if (_ebag > 1&&float64 (_ebag )> 0.3*_cegf )||_ebag <=10{if _ddfe .scanBand ("\u006f\u0074\u0068e\u0072",_abac ,_eaab (_bgce ,_efee ),_abac .minDepth (),_abac .maxDepth (),_gffb ,false ,true )> 0{_eecc =true ;
};};};};_gcbbf =append (_gcbbf ,_abac );};};return _gcbbf ;};type textWord struct{_bd .PdfRectangle ;_acag float64 ;_debad string ;_bbacg []*textMark ;_efag float64 ;_fgbg bool ;};type stateStack []*textState ;
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// Marks returns the TextMark collection for a page. It represents all the text on the page.
func (_bdbf PageText )Marks ()*TextMarkArray {return &TextMarkArray {_ggbg :_bdbf ._dged }};func (_dbed paraList )inTile (_cdeee gridTile )paraList {var _eaff paraList ;for _ ,_cdgc :=range _dbed {if _cdeee .contains (_cdgc .PdfRectangle ){_eaff =append (_eaff ,_cdgc );
};};if _eeca {_be .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_cdeee ,len (_eaff ));for _fcag ,_egaeb :=range _eaff {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fcag ,_egaeb );
};_be .Println ("");};return _eaff ;};func _bddbg (_edfd ,_fgab int )uint64 {return uint64 (_edfd )*0x1000000+uint64 (_fgab )};type fontEntry struct{_fadc *_bd .PdfFont ;_fegb int64 ;};func (_edbef *textTable )reduce ()*textTable {_ggcb :=make ([]int ,0,_edbef ._gccb );
_fcce :=make ([]int ,0,_edbef ._bgcfb );for _cdeg :=0;_cdeg < _edbef ._gccb ;_cdeg ++{if !_edbef .emptyCompositeRow (_cdeg ){_ggcb =append (_ggcb ,_cdeg );};};for _eagd :=0;_eagd < _edbef ._bgcfb ;_eagd ++{if !_edbef .emptyCompositeColumn (_eagd ){_fcce =append (_fcce ,_eagd );
};};if len (_ggcb )==_edbef ._gccb &&len (_fcce )==_edbef ._bgcfb {return _edbef ;};_ccafe :=textTable {_cdgb :_edbef ._cdgb ,_bgcfb :len (_fcce ),_gccb :len (_ggcb ),_deedc :make (map[uint64 ]*textPara ,len (_fcce )*len (_ggcb ))};if _eeca {_eg .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_edbef ._bgcfb ,_edbef ._gccb ,len (_fcce ),len (_ggcb ));
_eg .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_fcce );_eg .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_ggcb );};for _cede ,_eaeccd :=range _ggcb {for _fgae ,_bgagd :=range _fcce {_bfbe ,_egaec :=_edbef .getComposite (_bgagd ,_eaeccd );
if _bfbe ==nil {continue ;};if _eeca {_be .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_fgae ,_cede ,_bgagd ,_eaeccd ,_dbec (_bfbe .merge ().text (),50));};_ccafe .putComposite (_fgae ,_cede ,_bfbe ,_egaec );
};};return &_ccafe ;};func (_eeeg *shapesState )clearPath (){_eeeg ._gdbg =nil ;_eeeg ._beg =false ;if _dded {_eg .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_eeeg );};};func (_gaeec *textTable )getRight ()paraList {_bdab :=make (paraList ,_gaeec ._gccb );
for _aecf :=0;_aecf < _gaeec ._gccb ;_aecf ++{_edfag :=_gaeec .get (_gaeec ._bgcfb -1,_aecf )._bbbdd ;if _edfag .taken (){return nil ;};_bdab [_aecf ]=_edfag ;};for _gfbee :=0;_gfbee < _gaeec ._gccb -1;_gfbee ++{if _bdab [_gfbee ]._egad !=_bdab [_gfbee +1]{return nil ;
};};return _bdab ;};func _begc (_feed ,_affa float64 )string {_ceadg :=!_bcaga (_feed -_affa );if _ceadg {return "\u000a";};return "\u0020";};func _bdcd (_dbda string ,_eafa []rulingList ){_eg .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_eafa ),_dbda );
for _fdaa ,_bdaf :=range _eafa {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fdaa ,_bdaf .String ());};};func (_aae *textObject )nextLine (){_aae .moveLP (0,-_aae ._dff ._dde )};func _abfgd (_cddc []float64 ,_ageb ,_gagfe float64 )[]float64 {_gcffd ,_dgedb :=_ageb ,_gagfe ;
if _dgedb < _gcffd {_gcffd ,_dgedb =_dgedb ,_gcffd ;};_ccbc :=make ([]float64 ,0,len (_cddc )+2);_ccbc =append (_ccbc ,_ageb );for _ ,_gfecd :=range _cddc {if _gfecd <=_gcffd {continue ;}else if _gfecd >=_dgedb {break ;};_ccbc =append (_ccbc ,_gfecd );
};_ccbc =append (_ccbc ,_gagfe );return _ccbc ;};func (_beef *subpath )makeRectRuling (_bbdeg _fb .Color )(*ruling ,bool ){if _dddf {_eg .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_beef );
};_ggda :=_beef ._gcbb [:4];_ecga :=make (map[int ]rulingKind ,len (_ggda ));for _eefbf ,_gege :=range _ggda {_eefecd :=_beef ._gcbb [(_eefbf +1)%4];_ecga [_eefbf ]=_ebgee (_gege ,_eefecd );if _dddf {_be .Printf ("\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066",_eefbf ,_ecga [_eefbf ],_gege ,_eefecd );
};};if _dddf {_be .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_ecga );};var _dagf ,_adbc []int ;for _bbfafa ,_acgb :=range _ecga {switch _acgb {case _ddga :_adbc =append (_adbc ,_bbfafa );case _gaba :_dagf =append (_dagf ,_bbfafa );
};};if _dddf {_be .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_adbc ),_adbc );_be .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_dagf ),_dagf );
};_fgcgc :=(len (_adbc )==2&&len (_dagf )==2)||(len (_adbc )==2&&len (_dagf )==0&&_dbfdf (_ggda [_adbc [0]],_ggda [_adbc [1]]))||(len (_dagf )==2&&len (_adbc )==0&&_gfde (_ggda [_dagf [0]],_ggda [_dagf [1]]));if _dddf {_be .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_adbc ),len (_dagf ),_fgcgc );
};if !_fgcgc {if _dddf {_eg .Log .Error ("\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v",_beef );_be .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_adbc ),len (_dagf ),_fgcgc );
};return &ruling {},false ;};if len (_dagf )==0{for _ebbfg ,_daec :=range _ecga {if _daec !=_ddga {_dagf =append (_dagf ,_ebbfg );};};};if len (_adbc )==0{for _dadb ,_befe :=range _ecga {if _befe !=_gaba {_adbc =append (_adbc ,_dadb );};};};if _dddf {_eg .Log .Info ("\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_adbc ),len (_dagf ),len (_ggda ),_adbc ,_dagf ,_ggda );
};var _aeae ,_abee ,_cfcg ,_eagc _g .Point ;if _ggda [_adbc [0]].Y > _ggda [_adbc [1]].Y {_cfcg ,_eagc =_ggda [_adbc [0]],_ggda [_adbc [1]];}else {_cfcg ,_eagc =_ggda [_adbc [1]],_ggda [_adbc [0]];};if _ggda [_dagf [0]].X > _ggda [_dagf [1]].X {_aeae ,_abee =_ggda [_dagf [0]],_ggda [_dagf [1]];
}else {_aeae ,_abee =_ggda [_dagf [1]],_ggda [_dagf [0]];};_affd :=_bd .PdfRectangle {Llx :_aeae .X ,Urx :_abee .X ,Lly :_eagc .Y ,Ury :_cfcg .Y };if _affd .Llx > _affd .Urx {_affd .Llx ,_affd .Urx =_affd .Urx ,_affd .Llx ;};if _affd .Lly > _affd .Ury {_affd .Lly ,_affd .Ury =_affd .Ury ,_affd .Lly ;
};_cfed :=rectRuling {PdfRectangle :_affd ,_aegd :_ccbff (_affd ),Color :_bbdeg };if _cfed ._aegd ==_eedb {if _dddf {_eg .Log .Error ("\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c");
};return nil ,false ;};_eddgb ,_dccgd :=_cfed .asRuling ();if !_dccgd {if _dddf {_eg .Log .Error ("\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg");};return nil ,false ;};if _daafa {_be .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_eddgb .String ());
};return _eddgb ,true ;};type intSet map[int ]struct{};func _dbaee (_afe ,_eeff _bd .PdfRectangle )(_bd .PdfRectangle ,bool ){if !_bfgg (_afe ,_eeff ){return _bd .PdfRectangle {},false ;};return _bd .PdfRectangle {Llx :_bf .Max (_afe .Llx ,_eeff .Llx ),Urx :_bf .Min (_afe .Urx ,_eeff .Urx ),Lly :_bf .Max (_afe .Lly ,_eeff .Lly ),Ury :_bf .Min (_afe .Ury ,_eeff .Ury )},true ;
};func (_gbd *textObject )renderText (_cfdd []byte )error {if _gbd ._gdcg {_eg .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");
return nil ;};_gbg :=_gbd .getCurrentFont ();_gdcd :=_gbg .BytesToCharcodes (_cfdd );_fbbe ,_dbfe ,_caa :=_gbg .CharcodesToStrings (_gdcd );if _caa > 0{_eg .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_dbfe ,_caa );
};_gbd ._dff ._fdfc +=_dbfe ;_gbd ._dff ._dcef +=_caa ;_age :=_gbd ._dff ;_adce :=_age ._bcdg ;_gffd :=_age ._aee /100.0;_faaed :=_gcg ;if _gbg .Subtype ()=="\u0054\u0079\u0070e\u0033"{_faaed =1;};_bfd ,_agf :=_gbg .GetRuneMetrics (' ');if !_agf {_bfd ,_agf =_gbg .GetCharMetrics (32);
};if !_agf {_bfd ,_ =_bd .DefaultFont ().GetRuneMetrics (' ');};_eaad :=_bfd .Wx *_faaed ;_eg .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_eaad ,_fbbe ,_gbg ,_adce );
_bgf :=_g .NewMatrix (_adce *_gffd ,0,0,_adce ,0,_age ._cabf );if _agbg {_eg .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_gdcd ),_gdcd ,_fbbe );
};_eg .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_gdcd ),_gdcd ,len (_fbbe ));_aege :=_gbd .getFillColor ();
_fcgf :=_gbd .getStrokeColor ();for _ecf ,_bbd :=range _fbbe {_bfgf :=[]rune (_bbd );if len (_bfgf )==1&&_bfgf [0]=='\x00'{continue ;};_cagb :=_gdcd [_ecf ];_ebgf :=_gbd ._cddag .CTM .Mult (_gbd ._cbff ).Mult (_bgf );_gdeg :=0.0;if len (_bfgf )==1&&_bfgf [0]==32{_gdeg =_age ._fec ;
};_bdgb ,_gea :=_gbg .GetCharMetrics (_cagb );if !_gea {_eg .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_cagb ,_bfgf ,_bfgf ,_gbg );
return _be .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_gbg .String (),_cagb );};_eaec :=_g .Point {X :_bdgb .Wx *_faaed ,Y :_bdgb .Wy *_faaed };
_eagg :=_g .Point {X :(_eaec .X *_adce +_gdeg )*_gffd };_aacg :=_g .Point {X :(_eaec .X *_adce +_age ._cab +_gdeg )*_gffd };if _agbg {_eg .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_adce ,_age ._cab ,_age ._fec ,_gffd );
_eg .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_eaec ,_eagg ,_aacg );};_fcd :=_fdde (_eagg );_cge :=_fdde (_aacg );_gfc :=_gbd ._cddag .CTM .Mult (_gbd ._cbff ).Mult (_fcd );
if _cgaa {_eg .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_gbd ._cddag .CTM ,_gbd ._cbff ,_cge ,_cecc (_gbd ._cddag .CTM .Mult (_gbd ._cbff ).Mult (_cge )),_fcd ,_gfc ,_cecc (_gfc ));
};_cgee ,_ceba :=_gbd .newTextMark (_a .ExpandLigatures (_bfgf ),_ebgf ,_cecc (_gfc ),_bf .Abs (_eaad *_ebgf .ScalingFactorX ()),_gbg ,_gbd ._dff ._cab ,_aege ,_fcgf );if !_ceba {_eg .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067");
continue ;};if _gbg ==nil {_eg .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _gbg .Encoder ()==nil {_eg .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_gbg );
}else {if _geae ,_gcbf :=_gbg .Encoder ().CharcodeToRune (_cagb );_gcbf {_cgee ._degeed =string (_geae );};};_eg .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_ecf ,_cagb ,_cgee ,_ebgf );
_gbd ._dcaa =append (_gbd ._dcaa ,&_cgee );_gbd ._cbff .Concat (_cge );};return nil ;};
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// PageFonts represents extracted fonts on a PDF page.
type PageFonts struct{Fonts []Font ;};func (_daafd *textLine )appendWord (_debb *textWord ){_daafd ._ebge =append (_daafd ._ebge ,_debb );_daafd .PdfRectangle =_gcff (_daafd .PdfRectangle ,_debb .PdfRectangle );if _debb ._efag > _daafd ._aefd {_daafd ._aefd =_debb ._efag ;
};if _debb ._acag > _daafd ._gddec {_daafd ._gddec =_debb ._acag ;};};func (_ggbf *shapesState )newSubPath (){_ggbf .clearPath ();if _dded {_eg .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_ggbf );};};const (_ab ="\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";
_bge ="\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064";
_ag ="\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";);
func (_aeea rulingList )sortStrict (){_cc .Slice (_aeea ,func (_gbed ,_bgcaa int )bool {_acaf ,_dgege :=_aeea [_gbed ],_aeea [_bgcaa ];_cbeab ,_bcfag :=_acaf ._gggf ,_dgege ._gggf ;if _cbeab !=_bcfag {return _cbeab > _bcfag ;};_gccga ,_adcea :=_acaf ._ccb ,_dgege ._ccb ;
if !_bcaga (_gccga -_adcea ){return _gccga < _adcea ;};_gccga ,_adcea =_acaf ._gaad ,_dgege ._gaad ;if _gccga !=_adcea {return _gccga < _adcea ;};return _acaf ._gdaf < _dgege ._gdaf ;});};
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// String returns a string describing `pt`.
func (_bcf PageText )String ()string {_bag :=_be .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_bcf ._fgd ));_fcc :=[]string {"\u002d"+_bag };for _ ,_fefdc :=range _bcf ._fgd {_fcc =append (_fcc ,_fefdc .String ());
};_fcc =append (_fcc ,"\u002b"+_bag );return _dc .Join (_fcc ,"\u000a");};func (_gabf paraList )findTableGrid (_bccb gridTiling )(*textTable ,map[*textPara ]struct{}){_dbff :=len (_bccb ._defd );_bgbed :=len (_bccb ._eabe );_aagc :=textTable {_cdgb :true ,_bgcfb :_dbff ,_gccb :_bgbed ,_deedc :make (map[uint64 ]*textPara ,_dbff *_bgbed ),_agga :make (map[uint64 ]compositeCell ,_dbff *_bgbed )};
_ccce :=make (map[*textPara ]struct{});_ggadg :=int ((1.0-_fcbe )*float64 (_dbff *_bgbed ));_bgee :=0;if _gdcf {_eg .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_dbff ,_bgbed );
};for _bccda ,_fefgg :=range _bccb ._eabe {_fabf ,_efefa :=_bccb ._bdgaf [_fefgg ];if !_efefa {continue ;};for _fcbf ,_beageb :=range _bccb ._defd {_ebbdf ,_defa :=_fabf [_beageb ];if !_defa {continue ;};_edbb :=_gabf .inTile (_ebbdf );if len (_edbb )==0{_bgee ++;
if _bgee > _ggadg {if _gdcf {_eg .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_bgee );};return nil ,nil ;};}else {_aagc .putComposite (_fcbf ,_bccda ,_edbb ,_ebbdf .PdfRectangle );for _ ,_bbdc :=range _edbb {_ccce [_bbdc ]=struct{}{};
};};};};_cegba :=0;for _agadc :=0;_agadc < _dbff ;_agadc ++{_gded :=_aagc .get (_agadc ,0);if _gded ==nil ||!_gded ._gfce {_cegba ++;};};if _cegba ==0{if _gdcf {_eg .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};return nil ,nil ;
};_fbcg :=_aagc .reduceTiling (_bccb ,_ceacc );_fbcg =_fbcg .subdivide ();return _fbcg ,_ccce ;};func (_eafd gridTile )complete ()bool {return _eafd .numBorders ()==4};func _bace (_dfge []rulingList )(rulingList ,rulingList ){var _bbbdf rulingList ;for _ ,_gaaafb :=range _dfge {_bbbdf =append (_bbbdf ,_gaaafb ...);
};return _bbbdf .vertsHorzs ();};var _bcfa =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_fb .White ,StrokeColor :_fb .White };func _gaeb (_eedg *wordBag ,_acgc int )*textLine {_bdd :=_eedg .firstWord (_acgc );_bbde :=textLine {PdfRectangle :_bdd .PdfRectangle ,_aefd :_bdd ._efag ,_gddec :_bdd ._acag };
_bbde .pullWord (_eedg ,_bdd ,_acgc );return &_bbde ;};
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// Elements returns the TextMarks in `ma`.
func (_dgge *TextMarkArray )Elements ()[]TextMark {return _dgge ._ggbg };func (_affdd *textTable )putComposite (_efeg ,_ggde int ,_ccead paraList ,_geeb _bd .PdfRectangle ){if len (_ccead )==0{_eg .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073");
return ;};_fecdd :=compositeCell {PdfRectangle :_geeb ,paraList :_ccead };if _eeca {_be .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_efeg ,_ggde ,_fecdd .String ());
};_fecdd .updateBBox ();_affdd ._agga [_bddbg (_efeg ,_ggde )]=_fecdd ;};func (_fgcge compositeCell )parasBBox ()(paraList ,_bd .PdfRectangle ){return _fgcge .paraList ,_fgcge .PdfRectangle ;};func (_gccec *wordBag )minDepth ()float64 {return _gccec ._adbg -(_gccec .Ury -_gccec ._dbga )};
func (_dbgag paraList )merge ()*textPara {_eg .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_dbgag ));
if len (_dbgag )==0{return nil ;};_dbgag .sortReadingOrder ();_decb :=_dbgag [0].PdfRectangle ;_eefb :=_dbgag [0]._gadg ;for _ ,_dffd :=range _dbgag [1:]{_decb =_gcff (_decb ,_dffd .PdfRectangle );_eefb =append (_eefb ,_dffd ._gadg ...);};return _gade (_decb ,_eefb );
};func (_eac *textObject )showTextAdjusted (_ead *_db .PdfObjectArray )error {_ggg :=false ;for _ ,_addf :=range _ead .Elements (){switch _addf .(type ){case *_db .PdfObjectFloat ,*_db .PdfObjectInteger :_cbd ,_fgee :=_db .GetNumberAsFloat (_addf );if _fgee !=nil {_eg .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_addf ,_ead );
return _fgee ;};_gdbd ,_cdda :=-_cbd *0.001*_eac ._dff ._bcdg ,0.0;if _ggg {_cdda ,_gdbd =_gdbd ,_cdda ;};_afa :=_fdde (_g .Point {X :_gdbd ,Y :_cdda });_eac ._cbff .Concat (_afa );case *_db .PdfObjectString :_daed ,_bec :=_db .GetStringBytes (_addf );
if !_bec {_eg .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_addf ,_ead );
return _db .ErrTypeError ;};_eac .renderText (_daed );default:_eg .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_addf ,_ead );
return _db .ErrTypeError ;};};return nil ;};type bounded interface{bbox ()_bd .PdfRectangle };func (_fea *textObject )getFontDirect (_abfg string )(*_bd .PdfFont ,error ){_dacg ,_edcd :=_fea .getFontDict (_abfg );if _edcd !=nil {return nil ,_edcd ;};_gfee ,_edcd :=_bd .NewPdfFontFromPdfObject (_dacg );
if _edcd !=nil {_eg .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_abfg ,_edcd );
};return _gfee ,_edcd ;};func (_cdca *ruling )equals (_cdef *ruling )bool {return _cdca ._gggf ==_cdef ._gggf &&_cecgg (_cdca ._ccb ,_cdef ._ccb )&&_cecgg (_cdca ._gaad ,_cdef ._gaad )&&_cecgg (_cdca ._gdaf ,_cdef ._gdaf );};
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// String returns a string describing the current state of the textState stack.
func (_ded *stateStack )String ()string {_bcb :=[]string {_be .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_ded ))};for _gfbc ,_egdc :=range *_ded {_fdbg :="\u003c\u006e\u0069l\u003e";
if _egdc !=nil {_fdbg =_egdc .String ();};_bcb =append (_bcb ,_be .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_gfbc ,_fdbg ));};return _dc .Join (_bcb ,"\u000a");};func (_bbgb *textObject )setTextRenderMode (_gdf int ){if _bbgb ==nil {return ;
};_bbgb ._dff ._ebb =RenderMode (_gdf );};func (_dfbb lineRuling )yMean ()float64 {return 0.5*(_dfbb ._eadee .Y +_dfbb ._feab .Y )};
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// String returns a description of `k`.
func (_bfcgf rulingKind )String ()string {_cfbd ,_ecbc :=_ccc [_bfcgf ];if !_ecbc {return _be .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_bfcgf );};return _cfbd ;};func (_adcec paraList )readBefore (_gdff []int ,_ggab ,_degc int )bool {_bcag ,_gcfc :=_adcec [_ggab ],_adcec [_degc ];
if _aceb (_bcag ,_gcfc )&&_bcag .Lly > _gcfc .Lly {return true ;};if !(_bcag ._bgca .Urx < _gcfc ._bgca .Llx ){return false ;};_abbcf ,_edfb :=_bcag .Lly ,_gcfc .Lly ;if _abbcf > _edfb {_edfb ,_abbcf =_abbcf ,_edfb ;};_adcc :=_bf .Max (_bcag ._bgca .Llx ,_gcfc ._bgca .Llx );
_bccg :=_bf .Min (_bcag ._bgca .Urx ,_gcfc ._bgca .Urx );_fbdf :=_adcec .llyRange (_gdff ,_abbcf ,_edfb );for _ ,_ceec :=range _fbdf {if _ceec ==_ggab ||_ceec ==_degc {continue ;};_fgca :=_adcec [_ceec ];if _fgca ._bgca .Llx <=_bccg &&_adcc <=_fgca ._bgca .Urx {return false ;
};};return true ;};type rectRuling struct{_aegd rulingKind ;_bfege markKind ;_fb .Color ;_bd .PdfRectangle ;};func _edagd (_fdage ,_cae bounded )float64 {return _fdage .bbox ().Llx -_cae .bbox ().Urx };func (_aaba *textPara )toCellTextMarks (_bffe *int )[]TextMark {var _adcca []TextMark ;
for _bcfaf ,_gacfa :=range _aaba ._gadg {_dbfbb :=_gacfa .toTextMarks (_bffe );_bgde :=_addg &&_gacfa .endsInHyphen ()&&_bcfaf !=len (_aaba ._gadg )-1;if _bgde {_dbfbb =_gfbae (_dbfbb ,_bffe );};_adcca =append (_adcca ,_dbfbb ...);if !(_bgde ||_bcfaf ==len (_aaba ._gadg )-1){_adcca =_efbc (_adcca ,_bffe ,_begc (_gacfa ._gddec ,_aaba ._gadg [_bcfaf +1]._gddec ));
};};return _adcca ;};func (_acfd paraList )llyOrdering ()[]int {_ccec :=make ([]int ,len (_acfd ));for _bccdg :=range _acfd {_ccec [_bccdg ]=_bccdg ;};_cc .SliceStable (_ccec ,func (_fbe ,_dfdc int )bool {_cgbge ,_fbfe :=_ccec [_fbe ],_ccec [_dfdc ];return _acfd [_cgbge ].Lly < _acfd [_fbfe ].Lly ;
});return _ccec ;};func _bgce (_bcac *wordBag ,_dec *textWord ,_aadc float64 )bool {return _bcac .Urx <=_dec .Llx &&_dec .Llx < _bcac .Urx +_aadc ;};func (_bcbd paraList )llyRange (_fbddb []int ,_baaed ,_feda float64 )[]int {_cfaf :=len (_bcbd );if _feda < _bcbd [_fbddb [0]].Lly ||_baaed > _bcbd [_fbddb [_cfaf -1]].Lly {return nil ;
};_cgfa :=_cc .Search (_cfaf ,func (_eadc int )bool {return _bcbd [_fbddb [_eadc ]].Lly >=_baaed });_ffee :=_cc .Search (_cfaf ,func (_cbdf int )bool {return _bcbd [_fbddb [_cbdf ]].Lly > _feda });return _fbddb [_cgfa :_ffee ];};func (_edcg rulingList )snapToGroups ()rulingList {_cacg ,_ggbcf :=_edcg .vertsHorzs ();
if len (_cacg )> 0{_cacg =_cacg .snapToGroupsDirection ();};if len (_ggbcf )> 0{_ggbcf =_ggbcf .snapToGroupsDirection ();};_dedg :=append (_cacg ,_ggbcf ...);_dedg .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");return _dedg ;
};func (_effb rulingList )augmentGrid ()(rulingList ,rulingList ){_baaeg ,_gbdd :=_effb .vertsHorzs ();if len (_baaeg )==0||len (_gbdd )==0{return _baaeg ,_gbdd ;};_eaagc ,_acgea :=_baaeg ,_gbdd ;_ebbe :=_baaeg .bbox ();_fgafc :=_gbdd .bbox ();if _daafa {_eg .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066",_ebbe );
_eg .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066",_fgafc );};var _adbd ,_agagc ,_geec ,_eab *ruling ;if _fgafc .Llx < _ebbe .Llx -_cebae {_adbd =&ruling {_beaec :_bcbda ,_gggf :_gaba ,_ccb :_fgafc .Llx ,_gaad :_ebbe .Lly ,_gdaf :_ebbe .Ury };
_baaeg =append (rulingList {_adbd },_baaeg ...);};if _fgafc .Urx > _ebbe .Urx +_cebae {_agagc =&ruling {_beaec :_bcbda ,_gggf :_gaba ,_ccb :_fgafc .Urx ,_gaad :_ebbe .Lly ,_gdaf :_ebbe .Ury };_baaeg =append (_baaeg ,_agagc );};if _ebbe .Lly < _fgafc .Lly -_cebae {_geec =&ruling {_beaec :_bcbda ,_gggf :_ddga ,_ccb :_ebbe .Lly ,_gaad :_fgafc .Llx ,_gdaf :_fgafc .Urx };
_gbdd =append (rulingList {_geec },_gbdd ...);};if _ebbe .Ury > _fgafc .Ury +_cebae {_eab =&ruling {_beaec :_bcbda ,_gggf :_ddga ,_ccb :_ebbe .Ury ,_gaad :_fgafc .Llx ,_gdaf :_fgafc .Urx };_gbdd =append (_gbdd ,_eab );};if len (_baaeg )+len (_gbdd )==len (_effb ){return _eaagc ,_acgea ;
};_affg :=append (_baaeg ,_gbdd ...);_effb .log ("u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064");_affg .log ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d");return _baaeg ,_gbdd ;};
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
func (_cb *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_cbb :=&imageExtractContext {_gfe :options };_bde :=_cbb .extractContentStreamImages (_cb ._da ,_cb ._dbb );if _bde !=nil {return nil ,_bde ;};return &PageImages {Images :_cbb ._daa },nil ;
};func (_cefa *wordBag )getDepthIdx (_dfdb float64 )int {_gagcf :=_cefa .depthIndexes ();_ccaf :=_fdeba (_dfdb );if _ccaf < _gagcf [0]{return _gagcf [0];};if _ccaf > _gagcf [len (_gagcf )-1]{return _gagcf [len (_gagcf )-1];};return _ccaf ;};func (_bbc *textObject )showText (_ccde []byte )error {return _bbc .renderText (_ccde )};
func (_cf *imageExtractContext )processOperand (_fedf *_cce .ContentStreamOperation ,_edc _cce .GraphicsState ,_fad *_bd .PdfPageResources )error {if _fedf .Operand =="\u0042\u0049"&&len (_fedf .Params )==1{_df ,_bc :=_fedf .Params [0].(*_cce .ContentStreamInlineImage );
if !_bc {return nil ;};if _cfc ,_eba :=_db .GetBoolVal (_df .ImageMask );_eba {if _cfc &&!_cf ._gfe .IncludeInlineStencilMasks {return nil ;};};return _cf .extractInlineImage (_df ,_edc ,_fad );}else if _fedf .Operand =="\u0044\u006f"&&len (_fedf .Params )==1{_bff ,_gg :=_db .GetName (_fedf .Params [0]);
if !_gg {_eg .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _cg ;};_ ,_faeg :=_fad .GetXObjectByName (*_bff );switch _faeg {case _bd .XObjectTypeImage :return _cf .extractXObjectImage (_bff ,_edc ,_fad );case _bd .XObjectTypeForm :return _cf .extractFormImages (_bff ,_edc ,_fad );
};};return nil ;};func (_cfdc *textObject )checkOp (_bee *_cce .ContentStreamOperation ,_gcda int ,_bfb bool )(_aage bool ,_beeg error ){if _cfdc ==nil {var _faec []_db .PdfObject ;if _gcda > 0{_faec =_bee .Params ;if len (_faec )> _gcda {_faec =_faec [:_gcda ];
};};_eg .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_bee .Operand ,_faec );};if _gcda >=0{if len (_bee .Params )!=_gcda {if _bfb {_beeg =_f .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");
};_eg .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_bee .Operand ,_gcda ,len (_bee .Params ),_bee .Params );
return false ,_beeg ;};};return true ,nil ;};func (_efce rulingList )secMinMax ()(float64 ,float64 ){_gcgg ,_fbfb :=_efce [0]._gaad ,_efce [0]._gdaf ;for _ ,_adbaf :=range _efce [1:]{if _adbaf ._gaad < _gcgg {_gcgg =_adbaf ._gaad ;};if _adbaf ._gdaf > _fbfb {_fbfb =_adbaf ._gdaf ;
};};return _gcgg ,_fbfb ;};func _abca (_cebbd []pathSection )rulingList {_fdbbd (_cebbd );if _daafa {_eg .Log .Info ("\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs",len (_cebbd ));
};var _degd rulingList ;for _ ,_ecaf :=range _cebbd {for _ ,_dgadc :=range _ecaf ._aga {if !_dgadc .isQuadrilateral (){if _daafa {_eg .Log .Error ("!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073",_dgadc );
};continue ;};if _dced ,_adae :=_dgadc .makeRectRuling (_ecaf .Color );_adae {_degd =append (_degd ,_dced );}else {if _dddf {_eg .Log .Error ("\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073",_dgadc );
};};};};if _daafa {_eg .Log .Info ("\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073",_degd .String ());};return _degd ;};func (_gafe *subpath )removeDuplicates (){if len (_gafe ._gcbb )==0{return ;
};_cbfb :=[]_g .Point {_gafe ._gcbb [0]};for _ ,_cfa :=range _gafe ._gcbb [1:]{if !_fafbc (_cfa ,_cbfb [len (_cbfb )-1]){_cbfb =append (_cbfb ,_cfa );};};_gafe ._gcbb =_cbfb ;};func (_egbe paraList )extractTables (_egbcf []gridTiling )paraList {if _eeca {_eg .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_egbe ));
};if len (_egbe )< _baad {return _egbe ;};_cfef :=_egbe .findTables (_egbcf );if _eeca {_eg .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_cfef ));
for _dcgd ,_fegc :=range _cfef {_fegc .log (_be .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_dcgd ));};};return _egbe .applyTables (_cfef );};type gridTiling struct{_bd .PdfRectangle ;_defd []float64 ;_eabe []float64 ;_bdgaf map[float64 ]map[float64 ]gridTile ;
};func _ebgee (_cggb ,_aagb _g .Point )rulingKind {_gdffd :=_bf .Abs (_cggb .X -_aagb .X );_acbg :=_bf .Abs (_cggb .Y -_aagb .Y );return _bebd (_gdffd ,_acbg ,_cged );};func (_dbdfg *textTable )logComposite (_egddg string ){if !_eeca {return ;};_eg .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_dbdfg ._bgcfb ,_dbdfg ._gccb ,_egddg );
_be .Printf ("\u0025\u0035\u0073 \u007c","");for _bedgc :=0;_bedgc < _dbdfg ._bgcfb ;_bedgc ++{_be .Printf ("\u0025\u0033\u0064 \u007c",_bedgc );};_be .Println ("");_be .Printf ("\u0025\u0035\u0073 \u002b","");for _bcbge :=0;_bcbge < _dbdfg ._bgcfb ;_bcbge ++{_be .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d");
};_be .Println ("");for _bgad :=0;_bgad < _dbdfg ._gccb ;_bgad ++{_be .Printf ("\u0025\u0035\u0064 \u007c",_bgad );for _gbbb :=0;_gbbb < _dbdfg ._bgcfb ;_gbbb ++{_faeab ,_ :=_dbdfg ._agga [_bddbg (_gbbb ,_bgad )].parasBBox ();_be .Printf ("\u0025\u0033\u0064 \u007c",len (_faeab ));
};_be .Println ("");};_eg .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_dbdfg ._bgcfb ,_dbdfg ._gccb ,_egddg );_be .Printf ("\u0025\u0035\u0073 \u007c","");for _fbfad :=0;_fbfad < _dbdfg ._bgcfb ;
_fbfad ++{_be .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_fbfad );};_be .Println ("");_be .Printf ("\u0025\u0035\u0073 \u002b","");for _aaeg :=0;_aaeg < _dbdfg ._bgcfb ;_aaeg ++{_be .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");
};_be .Println ("");for _aeeaf :=0;_aeeaf < _dbdfg ._gccb ;_aeeaf ++{_be .Printf ("\u0025\u0035\u0064 \u007c",_aeeaf );for _abeb :=0;_abeb < _dbdfg ._bgcfb ;_abeb ++{_bfdgg ,_ :=_dbdfg ._agga [_bddbg (_abeb ,_aeeaf )].parasBBox ();_gbdb :="";_babe :=_bfdgg .merge ();
if _babe !=nil {_gbdb =_babe .text ();};_gbdb =_be .Sprintf ("\u0025\u0071",_dbec (_gbdb ,12));_gbdb =_gbdb [1:len (_gbdb )-1];_be .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_gbdb );};_be .Println ("");};};func (_ebgb rulingList )findPrimSec (_egbc ,_aaceg float64 )*ruling {for _ ,_geba :=range _ebgb {if _bcaga (_geba ._ccb -_egbc )&&_geba ._gaad -_cebae <=_aaceg &&_aaceg <=_geba ._gdaf +_cebae {return _geba ;
};};return nil ;};func (_efgc *textTable )log (_bacdc string ){if !_eeca {return ;};_eg .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_bacdc ,_efgc ._bgcfb ,_efgc ._gccb ,_efgc ._cdgb ,_efgc .PdfRectangle );
for _fefde :=0;_fefde < _efgc ._gccb ;_fefde ++{for _fdbf :=0;_fdbf < _efgc ._bgcfb ;_fdbf ++{_cbagb :=_efgc .get (_fdbf ,_fefde );if _cbagb ==nil {continue ;};_be .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_fdbf ,_fefde ,_cbagb .PdfRectangle ,_dbec (_cbagb .text (),50),_c .RuneCountInString (_cbagb .text ()));
};};};func (_fcdge paraList )yNeighbours (_cceaf float64 )map[*textPara ][]int {_dbggc :=make ([]event ,2*len (_fcdge ));if _cceaf ==0{for _cfafb ,_affe :=range _fcdge {_dbggc [2*_cfafb ]=event {_affe .Lly ,true ,_cfafb };_dbggc [2*_cfafb +1]=event {_affe .Ury ,false ,_cfafb };
};}else {for _bafd ,_fcfg :=range _fcdge {_dbggc [2*_bafd ]=event {_fcfg .Lly -_cceaf *_fcfg .fontsize (),true ,_bafd };_dbggc [2*_bafd +1]=event {_fcfg .Ury +_cceaf *_fcfg .fontsize (),false ,_bafd };};};return _fcdge .eventNeighbours (_dbggc );};func (_cgagf rulingList )aligned ()bool {if len (_cgagf )< 2{return false ;
};_ebae :=make (map[*ruling ]int );_ebae [_cgagf [0]]=0;for _ ,_gcca :=range _cgagf [1:]{_dgbff :=false ;for _abeg :=range _ebae {if _gcca .gridIntersecting (_abeg ){_ebae [_abeg ]++;_dgbff =true ;break ;};};if !_dgbff {_ebae [_gcca ]=0;};};_dgdb :=0;for _ ,_eeed :=range _ebae {if _eeed ==0{_dgdb ++;
};};_eeea :=float64 (_dgdb )/float64 (len (_cgagf ));_dfadb :=_eeea <=1.0-_bfba ;if _daafa {_eg .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_dfadb ,_eeea ,_dgdb ,len (_cgagf ),_cgagf .String ());
};return _dfadb ;};func _eaada (_ggadb []pathSection )rulingList {_fdbbd (_ggadb );if _daafa {_eg .Log .Info ("\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073",len (_ggadb ));
};var _dggef rulingList ;for _ ,_eff :=range _ggadb {for _ ,_bdeb :=range _eff ._aga {if len (_bdeb ._gcbb )< 2{continue ;};_gecg :=_bdeb ._gcbb [0];for _ ,_egea :=range _bdeb ._gcbb [1:]{if _fdebf ,_ecdc :=_gfac (_gecg ,_egea ,_eff .Color );_ecdc {_dggef =append (_dggef ,_fdebf );
};_gecg =_egea ;};};};if _daafa {_eg .Log .Info ("m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073",_dggef );};return _dggef ;};func (_fdcg *shapesState )cubicTo (_fdba ,_ccaa ,_ffeb ,_faea ,_edbee ,_ffc float64 ){if _dded {_eg .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a");
};_fdcg .addPoint (_edbee ,_ffc );};func _gfbae (_afebf []TextMark ,_egae *int )[]TextMark {_ffaeb :=_afebf [len (_afebf )-1];_eggf :=[]rune (_ffaeb .Text );if len (_eggf )==1{_afebf =_afebf [:len (_afebf )-1];_cfgf :=_afebf [len (_afebf )-1];*_egae =_cfgf .Offset +len (_cfgf .Text );
}else {_bbgg :=_bfe (_ffaeb .Text );*_egae +=len (_bbgg )-len (_ffaeb .Text );_ffaeb .Text =_bbgg ;};return _afebf ;};func (_cdf *shapesState )lastpointEstablished ()(_g .Point ,bool ){if _cdf ._beg {return _cdf ._eee ,false ;};_agbf :=len (_cdf ._gdbg );
if _agbf > 0&&_cdf ._gdbg [_agbf -1]._bad {return _cdf ._gdbg [_agbf -1].last (),false ;};return _g .Point {},true ;};
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// String returns a description of `tm`.
func (_efbgc *textMark )String ()string {return _be .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_efbgc .PdfRectangle ,_efbgc ._gaaaf ,_efbgc ._aec );};type textLine struct{_bd .PdfRectangle ;
_gddec float64 ;_ebge []*textWord ;_aefd float64 ;};type textTable struct{_bd .PdfRectangle ;_bgcfb ,_gccb int ;_cdgb bool ;_deedc map[uint64 ]*textPara ;_agga map[uint64 ]compositeCell ;};func (_ege *imageExtractContext )extractInlineImage (_ecc *_cce .ContentStreamInlineImage ,_adc _cce .GraphicsState ,_bfc *_bd .PdfPageResources )error {_eaf ,_daab :=_ecc .ToImage (_bfc );
if _daab !=nil {return _daab ;};_dbd ,_daab :=_ecc .GetColorSpace (_bfc );if _daab !=nil {return _daab ;};if _dbd ==nil {_dbd =_bd .NewPdfColorspaceDeviceGray ();};_agd ,_daab :=_dbd .ImageToRGB (*_eaf );if _daab !=nil {return _daab ;};_ca :=ImageMark {Image :&_agd ,Width :_adc .CTM .ScalingFactorX (),Height :_adc .CTM .ScalingFactorY (),Angle :_adc .CTM .Angle ()};
_ca .X ,_ca .Y =_adc .CTM .Translation ();_ege ._daa =append (_ege ._daa ,_ca );_ege ._ec ++;return nil ;};func (_bded *textTable )growTable (){_gdbb :=func (_dfgf paraList ){_bded ._gccb ++;for _fcge :=0;_fcge < _bded ._bgcfb ;_fcge ++{_deaab :=_dfgf [_fcge ];
_bded .put (_fcge ,_bded ._gccb -1,_deaab );};};_ddfc :=func (_ebccd paraList ){_bded ._bgcfb ++;for _bbace :=0;_bbace < _bded ._gccb ;_bbace ++{_ffef :=_ebccd [_bbace ];_bded .put (_bded ._bgcfb -1,_bbace ,_ffef );};};if _cgbdc {_bded .log ("\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce");
};for _cagd :=0;;_cagd ++{_dcecb :=false ;_deef :=_bded .getDown ();_aedb :=_bded .getRight ();if _cgbdc {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cagd ,_bded );_be .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a",_deef );
_be .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a",_aedb );};if _deef !=nil &&_aedb !=nil {_eebc :=_deef [len (_deef )-1];if !_eebc .taken ()&&_eebc ==_aedb [len (_aedb )-1]{_gdbb (_deef );if _aedb =_bded .getRight ();
_aedb !=nil {_ddfc (_aedb );_bded .put (_bded ._bgcfb -1,_bded ._gccb -1,_eebc );};_dcecb =true ;};};if !_dcecb &&_deef !=nil {_gdbb (_deef );_dcecb =true ;};if !_dcecb &&_aedb !=nil {_ddfc (_aedb );_dcecb =true ;};if !_dcecb {break ;};};};func _edd (_ceb []Font ,_eb string )bool {for _ ,_dbfb :=range _ceb {if _dbfb .FontName ==_eb {return true ;
};};return false ;};func _efd (_dbg _bd .PdfRectangle )textState {return textState {_aee :100,_ebb :RenderModeFill ,_egdd :_dbg };};func (_gagb *wordBag )arrangeText ()*textPara {_gagb .sort ();if _cgffa {_gagb .removeDuplicates ();};var _fgbd []*textLine ;
for _ ,_ceea :=range _gagb .depthIndexes (){for !_gagb .empty (_ceea ){_efac :=_gagb .firstReadingIndex (_ceea );_eecg :=_gagb .firstWord (_efac );_debag :=_gaeb (_gagb ,_efac );_gacb :=_eecg ._efag ;_bdba :=_eecg ._acag -_bbcc *_gacb ;_daba :=_eecg ._acag +_bbcc *_gacb ;
_dgfff :=_bbfe *_gacb ;_dcffg :=_ffbe *_gacb ;_acge :for {var _aega *textWord ;_bbbf :=0;for _ ,_ddggd :=range _gagb .depthBand (_bdba ,_daba ){_ecgbf :=_gagb .highestWord (_ddggd ,_bdba ,_daba );if _ecgbf ==nil {continue ;};_gfea :=_edagd (_ecgbf ,_debag ._ebge [len (_debag ._ebge )-1]);
if _gfea < -_dcffg {break _acge ;};if _gfea > _dgfff {continue ;};if _aega !=nil &&_afdf (_ecgbf ,_aega )>=0{continue ;};_aega =_ecgbf ;_bbbf =_ddggd ;};if _aega ==nil {break ;};_debag .pullWord (_gagb ,_aega ,_bbbf );};_debag .markWordBoundaries ();_fgbd =append (_fgbd ,_debag );
};};if len (_fgbd )==0{return nil ;};_cc .Slice (_fgbd ,func (_fbed ,_agag int )bool {return _dfe (_fgbd [_fbed ],_fgbd [_agag ])< 0});_aabf :=_gade (_gagb .PdfRectangle ,_fgbd );if _eafe {_eg .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_aabf .String ());
if _dcaca {for _abfca ,_abe :=range _aabf ._gadg {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_abfca ,_abe .String ());if _dega {for _dcb ,_bgddc :=range _abe ._ebge {_be .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_dcb ,_bgddc .String ());
for _dgbef ,_caee :=range _bgddc ._bbacg {_be .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_dgbef ,_caee .String ());};};};};};};return _aabf ;};func (_bgc *imageExtractContext )extractContentStreamImages (_fge string ,_fff *_bd .PdfPageResources )error {_ebe :=_cce .NewContentStreamParser (_fge );
_bda ,_edbg :=_ebe .Parse ();if _edbg !=nil {return _edbg ;};if _bgc ._ac ==nil {_bgc ._ac =map[*_db .PdfObjectStream ]*cachedImage {};};if _bgc ._gfe ==nil {_bgc ._gfe =&ImageExtractOptions {};};_bbb :=_cce .NewContentStreamProcessor (*_bda );_bbb .AddHandler (_cce .HandlerConditionEnumAllOperands ,"",_bgc .processOperand );
return _bbb .Process (_fff );};func _fdebfb (_dbbaa []compositeCell )[]float64 {var _cfdb []*textLine ;_gfca :=0;for _ ,_ggabb :=range _dbbaa {_gfca +=len (_ggabb .paraList );_cfdb =append (_cfdb ,_ggabb .lines ()...);};_cc .Slice (_cfdb ,func (_agef ,_cfdec int )bool {_bddae ,_dbfdb :=_cfdb [_agef ],_cfdb [_cfdec ];
_bdccc ,_eagbe :=_bddae ._gddec ,_dbfdb ._gddec ;if !_bcaga (_bdccc -_eagbe ){return _bdccc < _eagbe ;};return _bddae .Llx < _dbfdb .Llx ;});if _eeca {_be .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_gfca ,len (_cfdb ));
for _adacd ,_bbca :=range _cfdb {_be .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_adacd ,_bbca );};};var _deeg []float64 ;_dedfc :=_cfdb [0];var _ecafgc [][]*textLine ;_dcefe :=[]*textLine {_dedfc };for _faga ,_cbcg :=range _cfdb [1:]{if _cbcg .Ury < _dedfc .Lly {_dggba :=0.5*(_cbcg .Ury +_dedfc .Lly );
if _eeca {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_faga ,_cbcg .Ury ,_dedfc .Lly ,_dggba ,_dedfc ,_cbcg );
};_deeg =append (_deeg ,_dggba );_ecafgc =append (_ecafgc ,_dcefe );_dcefe =nil ;};_dcefe =append (_dcefe ,_cbcg );if _cbcg .Lly < _dedfc .Lly {_dedfc =_cbcg ;};};if len (_dcefe )> 0{_ecafgc =append (_ecafgc ,_dcefe );};if _eeca {_be .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_deeg );
};if _eeca {_eg .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len (_dbbaa ));for _baafc ,_ggbcd :=range _dbbaa {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_baafc ,_ggbcd );};_eg .Log .Info ("\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d",len (_ecafgc ));
for _ebdd ,_adag :=range _ecafgc {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a",_ebdd ,len (_adag ));for _gagg ,_ddac :=range _adag {_be .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_gagg ,_ddac );};};};_ffbcf :=true ;for _aggeb ,_gggd :=range _ecafgc {_edde :=true ;
for _feaf ,_ggfdc :=range _dbbaa {if _eeca {_be .Printf ("\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a",_aggeb ,len (_ecafgc ),_feaf ,len (_dbbaa ),_ggfdc );
};if !_ggfdc .hasLines (_gggd ){if _eeca {_be .Printf ("\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a",_aggeb ,len (_ecafgc ),_feaf ,len (_dbbaa ));
};_edde =false ;break ;};};if !_edde {_ffbcf =false ;break ;};};if !_ffbcf {if _eeca {_eg .Log .Info ("\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg");
};_deeg =nil ;};if _eeca &&_deeg !=nil {_be .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a",_deeg );};return _deeg ;};
func (_deg *Extractor )extractPageText (_gda string ,_gfg *_bd .PdfPageResources ,_baf _g .Matrix ,_feg int )(*PageText ,int ,int ,error ){_eg .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_feg );
_egd :=&PageText {_fbd :_deg ._dd };_bbbg :=_efd (_deg ._dd );var _dege stateStack ;_fdc :=_dge (_deg ,_gfg ,_cce .GraphicsState {},&_bbbg ,&_dege );_ebaf :=shapesState {_gaga :_baf ,_eccf :_g .IdentityMatrix (),_baafg :_fdc };var _fdb bool ;if _feg > _gcb {_aag :=_f .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");
_eg .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_feg ,_aag );
return _egd ,_bbbg ._fdfc ,_bbbg ._dcef ,_aag ;};_cag :=_cce .NewContentStreamParser (_gda );_ebc ,_bcd :=_cag .Parse ();if _bcd !=nil {_eg .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bcd );
return _egd ,_bbbg ._fdfc ,_bbbg ._dcef ,_bcd ;};_aba :=_cce .NewContentStreamProcessor (*_ebc );_aba .AddHandler (_cce .HandlerConditionEnumAllOperands ,"",func (_dca *_cce .ContentStreamOperation ,_aeb _cce .GraphicsState ,_ddg *_bd .PdfPageResources )error {_cga :=_dca .Operand ;
if _befc {_eg .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_dca );};switch _cga {case "\u0071":if _dded {_eg .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_ebaf ._eccf );};_dege .push (&_bbbg );case "\u0051":if !_dege .empty (){_bbbg =*_dege .pop ();
};_ebaf ._eccf =_aeb .CTM ;if _dded {_eg .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_ebaf ._eccf );};case "\u0042\u0054":if _fdb {_eg .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
_egd ._fgd =append (_egd ._fgd ,_fdc ._dcaa ...);};_fdb =true ;_cdd :=_aeb ;_cdd .CTM =_baf .Mult (_cdd .CTM );_fdc =_dge (_deg ,_ddg ,_cdd ,&_bbbg ,&_dege );_ebaf ._baafg =_fdc ;case "\u0045\u0054":if !_fdb {_eg .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
};_fdb =false ;_egd ._fgd =append (_egd ._fgd ,_fdc ._dcaa ...);_fdc .reset ();case "\u0054\u002a":_fdc .nextLine ();case "\u0054\u0064":if _gff ,_edcf :=_fdc .checkOp (_dca ,2,true );!_gff {_eg .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_edcf );
return _edcf ;};_gbb ,_abc ,_efb :=_ccdde (_dca .Params );if _efb !=nil {return _efb ;};_fdc .moveText (_gbb ,_abc );case "\u0054\u0044":if _bae ,_fbc :=_fdc .checkOp (_dca ,2,true );!_bae {_eg .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fbc );
return _fbc ;};_edbe ,_gfb ,_bgg :=_ccdde (_dca .Params );if _bgg !=nil {_eg .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bgg );return _bgg ;};_fdc .moveTextSetLeading (_edbe ,_gfb );case "\u0054\u006a":if _bac ,_gaef :=_fdc .checkOp (_dca ,1,true );
!_bac {_eg .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_dca ,_gaef );return _gaef ;};_gca ,_acd :=_db .GetStringBytes (_dca .Params [0]);if !_acd {_eg .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_dca );
return _db .ErrTypeError ;};return _fdc .showText (_gca );case "\u0054\u004a":if _def ,_efa :=_fdc .checkOp (_dca ,1,true );!_def {_eg .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_efa );return _efa ;
};_cdc ,_adg :=_db .GetArray (_dca .Params [0]);if !_adg {_eg .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_dca );
return _bcd ;};return _fdc .showTextAdjusted (_cdc );case "\u0027":if _faae ,_ffa :=_fdc .checkOp (_dca ,1,true );!_faae {_eg .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ffa );return _ffa ;};_edf ,_bgcd :=_db .GetStringBytes (_dca .Params [0]);
if !_bgcd {_eg .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_dca );return _db .ErrTypeError ;};_fdc .nextLine ();return _fdc .showText (_edf );
case "\u0022":if _fefd ,_cea :=_fdc .checkOp (_dca ,3,true );!_fefd {_eg .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cea );return _cea ;};_gfd ,_cgff ,_fdbe :=_ccdde (_dca .Params [:2]);if _fdbe !=nil {return _fdbe ;
};_dbc ,_fdga :=_db .GetStringBytes (_dca .Params [2]);if !_fdga {_eg .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_dca );
return _db .ErrTypeError ;};_fdc .setCharSpacing (_gfd );_fdc .setWordSpacing (_cgff );_fdc .nextLine ();return _fdc .showText (_dbc );case "\u0054\u004c":_fcg ,_cfg :=_fgfa (_dca );if _cfg !=nil {_eg .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cfg );
return _cfg ;};_fdc .setTextLeading (_fcg );case "\u0054\u0063":_ccdd ,_bcdc :=_fgfa (_dca );if _bcdc !=nil {_eg .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bcdc );return _bcdc ;};_fdc .setCharSpacing (_ccdd );
case "\u0054\u0066":if _gaee ,_acde :=_fdc .checkOp (_dca ,2,true );!_gaee {_eg .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_acde );return _acde ;};_gfae ,_ddd :=_db .GetNameVal (_dca .Params [0]);
if !_ddd {_eg .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_dca );return _db .ErrTypeError ;};_edda ,_gdc :=_db .GetNumberAsFloat (_dca .Params [1]);
if !_ddd {_eg .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dca ,_gdc );
return _gdc ;};_gdc =_fdc .setFont (_gfae ,_edda );_fdc ._gdcg =_fg .Is (_gdc ,_db .ErrNotSupported );if _gdc !=nil &&!_fdc ._gdcg {return _gdc ;};case "\u0054\u006d":if _ebf ,_fag :=_fdc .checkOp (_dca ,6,true );!_ebf {_eg .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fag );
return _fag ;};_cdb ,_eec :=_db .GetNumbersAsFloat (_dca .Params );if _eec !=nil {_eg .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_eec );return _eec ;};_fdc .setTextMatrix (_cdb );case "\u0054\u0072":if _dgg ,_af :=_fdc .checkOp (_dca ,1,true );
!_dgg {_eg .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_af );return _af ;};_efbf ,_adb :=_db .GetIntVal (_dca .Params [0]);if !_adb {_eg .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_dca );
return _db .ErrTypeError ;};_fdc .setTextRenderMode (_efbf );case "\u0054\u0073":if _fac ,_dea :=_fdc .checkOp (_dca ,1,true );!_fac {_eg .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dea );return _dea ;
};_agg ,_ebg :=_db .GetNumberAsFloat (_dca .Params [0]);if _ebg !=nil {_eg .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ebg );return _ebg ;};_fdc .setTextRise (_agg );case "\u0054\u0077":if _dfb ,_dcf :=_fdc .checkOp (_dca ,1,true );
!_dfb {_eg .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dcf );return _dcf ;};_agb ,_fdf :=_db .GetNumberAsFloat (_dca .Params [0]);if _fdf !=nil {_eg .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fdf );
return _fdf ;};_fdc .setWordSpacing (_agb );case "\u0054\u007a":if _ffae ,_aed :=_fdc .checkOp (_dca ,1,true );!_ffae {_eg .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aed );return _aed ;};_daaf ,_bef :=_db .GetNumberAsFloat (_dca .Params [0]);
if _bef !=nil {_eg .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bef );return _bef ;};_fdc .setHorizScaling (_daaf );case "\u0063\u006d":_ebaf ._eccf =_aeb .CTM ;if _ebaf ._eccf .Singular (){_cee :=_g .IdentityMatrix ().Translate (_ebaf ._eccf .Translation ());
_eg .Log .Debug ("S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s",_ebaf ._eccf ,_cee );_ebaf ._eccf =_cee ;};if _dded {_eg .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_ebaf ._eccf );};case "\u006d":if len (_dca .Params )!=2{_eg .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_bdg );
return nil ;};_gafd ,_daff :=_db .GetNumbersAsFloat (_dca .Params );if _daff !=nil {return _daff ;};_ebaf .moveTo (_gafd [0],_gafd [1]);case "\u006c":if len (_dca .Params )!=2{_eg .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_bdg );
return nil ;};_dfbc ,_ada :=_db .GetNumbersAsFloat (_dca .Params );if _ada !=nil {return _ada ;};_ebaf .lineTo (_dfbc [0],_dfbc [1]);case "\u0063":if len (_dca .Params )!=6{return _bdg ;};_afb ,_dagb :=_db .GetNumbersAsFloat (_dca .Params );if _dagb !=nil {return _dagb ;
};_eg .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_afb );_ebaf .cubicTo (_afb [0],_afb [1],_afb [2],_afb [3],_afb [4],_afb [5]);case "\u0076","\u0079":if len (_dca .Params )!=4{return _bdg ;
};_bfg ,_afbb :=_db .GetNumbersAsFloat (_dca .Params );if _afbb !=nil {return _afbb ;};_eg .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_bfg );_ebaf .quadraticTo (_bfg [0],_bfg [1],_bfg [2],_bfg [3]);
case "\u0068":_ebaf .closePath ();case "\u0072\u0065":if len (_dca .Params )!=4{return _bdg ;};_cad ,_gcf :=_db .GetNumbersAsFloat (_dca .Params );if _gcf !=nil {return _gcf ;};_ebaf .drawRectangle (_cad [0],_cad [1],_cad [2],_cad [3]);_ebaf .closePath ();
case "\u0053":_ebaf .stroke (&_egd ._egg );_ebaf .clearPath ();case "\u0073":_ebaf .closePath ();_ebaf .stroke (&_egd ._egg );_ebaf .clearPath ();case "\u0046":_ebaf .fill (&_egd ._dbe );_ebaf .clearPath ();case "\u0066","\u0066\u002a":_ebaf .closePath ();
_ebaf .fill (&_egd ._dbe );_ebaf .clearPath ();case "\u0042","\u0042\u002a":_ebaf .fill (&_egd ._dbe );_ebaf .stroke (&_egd ._egg );_ebaf .clearPath ();case "\u0062","\u0062\u002a":_ebaf .closePath ();_ebaf .fill (&_egd ._dbe );_ebaf .stroke (&_egd ._egg );
_ebaf .clearPath ();case "\u006e":_ebaf .clearPath ();case "\u0044\u006f":if len (_dca .Params )==0{_eg .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_dca .Params );
return _db .ErrRangeError ;};_fba ,_fga :=_db .GetName (_dca .Params [0]);if !_fga {_eg .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_dca .Params [0]);
return _db .ErrTypeError ;};_ ,_dfa :=_ddg .GetXObjectByName (*_fba );if _dfa !=_bd .XObjectTypeForm {break ;};_fbgb ,_fga :=_deg ._dg [_fba .String ()];if !_fga {_fagc ,_gcad :=_ddg .GetXObjectFormByName (*_fba );if _gcad !=nil {_eg .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_gcad );
return _gcad ;};_cbbe ,_gcad :=_fagc .GetContentStream ();if _gcad !=nil {_eg .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_gcad );return _gcad ;};_gfgd :=_fagc .Resources ;if _gfgd ==nil {_gfgd =_ddg ;};_fgg ,_ecd ,_fdgb ,_gcad :=_deg .extractPageText (string (_cbbe ),_gfgd ,_baf .Mult (_aeb .CTM ),_feg +1);
if _gcad !=nil {_eg .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_gcad );return _gcad ;};_fbgb =textResult {*_fgg ,_ecd ,_fdgb };_deg ._dg [_fba .String ()]=_fbgb ;};_ebaf ._eccf =_aeb .CTM ;if _dded {_eg .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_ebaf ._eccf );
};_egd ._fgd =append (_egd ._fgd ,_fbgb ._bdf ._fgd ...);_egd ._egg =append (_egd ._egg ,_fbgb ._bdf ._egg ...);_egd ._dbe =append (_egd ._dbe ,_fbgb ._bdf ._dbe ...);_bbbg ._fdfc +=_fbgb ._cfd ;_bbbg ._dcef +=_fbgb ._acg ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_fdc ._cddag .ColorspaceNonStroking =_aeb .ColorspaceNonStroking ;
_fdc ._cddag .ColorNonStroking =_aeb .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_fdc ._cddag .ColorspaceStroking =_aeb .ColorspaceStroking ;_fdc ._cddag .ColorStroking =_aeb .ColorStroking ;
};return nil ;});_bcd =_aba .Process (_gfg );return _egd ,_bbbg ._fdfc ,_bbbg ._dcef ,_bcd ;};func (_decbb paraList )lines ()[]*textLine {var _gaeab []*textLine ;for _ ,_bce :=range _decbb {_gaeab =append (_gaeab ,_bce ._gadg ...);};return _gaeab ;};
2021-12-14 01:08:28 +00:00
2022-06-27 19:58:38 +00:00
// String returns a human readable description of `path`.
func (_ddaf *subpath )String ()string {_cddb :=_ddaf ._gcbb ;_dfcb :=len (_cddb );if _dfcb <=5{return _be .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_dfcb ,_cddb );};return _be .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_dfcb ,_cddb [0],_cddb [1],_cddb [_dfcb -1]);
};func (_adde rulingList )primMinMax ()(float64 ,float64 ){_acecg ,_dbbc :=_adde [0]._ccb ,_adde [0]._ccb ;for _ ,_ecff :=range _adde [1:]{if _ecff ._ccb < _acecg {_acecg =_ecff ._ccb ;}else if _ecff ._ccb > _dbbc {_dbbc =_ecff ._ccb ;};};return _acecg ,_dbbc ;
2022-03-13 12:41:53 +00:00
};
2021-12-14 01:08:28 +00:00
2022-06-27 19:58:38 +00:00
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
func (_adgf PageText )ToText ()string {return _adgf .Text ()};func _ecfc (_fbbf []TextMark ,_efgf *int ,_aded TextMark )[]TextMark {_aded .Offset =*_efgf ;_fbbf =append (_fbbf ,_aded );*_efgf +=len (_aded .Text );return _fbbf ;};
// New returns an Extractor instance for extracting content from the input PDF page.
func New (page *_bd .PdfPage )(*Extractor ,error ){const _ee ="\u0065\u0078\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077";_fde ,_gd :=page .GetAllContentStreams ();if _gd !=nil {return nil ,_gd ;};_ga ,_gd :=page .GetMediaBox ();if _gd !=nil {return nil ,_be .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_gd );
};_dac :=&Extractor {_da :_fde ,_dbb :page .Resources ,_dd :*_ga ,_bg :map[string ]fontEntry {},_dg :map[string ]textResult {}};if _dac ._dd .Llx > _dac ._dd .Urx {_eg .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_dac ._dd );
_dac ._dd .Llx ,_dac ._dd .Urx =_dac ._dd .Urx ,_dac ._dd .Llx ;};if _dac ._dd .Lly > _dac ._dd .Ury {_eg .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_dac ._dd );
_dac ._dd .Lly ,_dac ._dd .Ury =_dac ._dd .Ury ,_dac ._dd .Lly ;};_ba .TrackUse (_ee );return _dac ,nil ;};
2022-06-06 22:48:24 +00:00
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
2022-06-27 19:58:38 +00:00
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};func (_abbde lineRuling )asRuling ()(*ruling ,bool ){_bgbb :=ruling {_gggf :_abbde ._dgfg ,Color :_abbde .Color ,_beaec :_eagf };switch _abbde ._dgfg {case _gaba :_bgbb ._ccb =_abbde .xMean ();
_bgbb ._gaad =_bf .Min (_abbde ._eadee .Y ,_abbde ._feab .Y );_bgbb ._gdaf =_bf .Max (_abbde ._eadee .Y ,_abbde ._feab .Y );case _ddga :_bgbb ._ccb =_abbde .yMean ();_bgbb ._gaad =_bf .Min (_abbde ._eadee .X ,_abbde ._feab .X );_bgbb ._gdaf =_bf .Max (_abbde ._eadee .X ,_abbde ._feab .X );
default:_eg .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_abbde ._dgfg );return nil ,false ;};return &_bgbb ,true ;};func (_ecadg gridTile )numBorders ()int {_fece :=0;if _ecadg ._fgbc {_fece ++;
};if _ecadg ._eafc {_fece ++;};if _ecadg ._ccadg {_fece ++;};if _ecadg ._bfgeab {_fece ++;};return _fece ;};type lineRuling struct{_dgfg rulingKind ;_egcg markKind ;_fb .Color ;_eadee ,_feab _g .Point ;};
// String returns a string descibing `i`.
func (_gebd gridTile )String ()string {_debfe :=func (_gbff bool ,_fdeg string )string {if _gbff {return _fdeg ;};return "\u005f";};return _be .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_gebd .PdfRectangle ,_debfe (_gebd ._fgbc ,"\u004c"),_debfe (_gebd ._eafc ,"\u0052"),_debfe (_gebd ._ccadg ,"\u0042"),_debfe (_gebd ._bfgeab ,"\u0054"));
};func (_dadg paraList )computeEBBoxes (){if _dafff {_eg .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a");};for _ ,_ggc :=range _dadg {_ggc ._bgca =_ggc .PdfRectangle ;};_fccc :=_dadg .yNeighbours (0);for _dfeb ,_bdfe :=range _dadg {_efgb :=_bdfe ._bgca ;
_fbbfb ,_abcd :=-1.0e9,+1.0e9;for _ ,_bfdg :=range _fccc [_bdfe ]{_cgac :=_dadg [_bfdg ]._bgca ;if _cgac .Urx < _efgb .Llx {_fbbfb =_bf .Max (_fbbfb ,_cgac .Urx );}else if _efgb .Urx < _cgac .Llx {_abcd =_bf .Min (_abcd ,_cgac .Llx );};};for _abd ,_geg :=range _dadg {_bafb :=_geg ._bgca ;
if _dfeb ==_abd ||_bafb .Ury > _efgb .Lly {continue ;};if _fbbfb <=_bafb .Llx &&_bafb .Llx < _efgb .Llx {_efgb .Llx =_bafb .Llx ;}else if _bafb .Urx <=_abcd &&_efgb .Urx < _bafb .Urx {_efgb .Urx =_bafb .Urx ;};};if _dafff {_be .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_dfeb ,_bdfe ._bgca ,_efgb ,_dbec (_bdfe .text (),50));
};_bdfe ._bgca =_efgb ;};if _fddf {for _ ,_ceab :=range _dadg {_ceab .PdfRectangle =_ceab ._bgca ;};};};func _cbge (_dabb map[float64 ]map[float64 ]gridTile )[]float64 {_bfcd :=make ([]float64 ,0,len (_dabb ));_fbcd :=make (map[float64 ]struct{},len (_dabb ));
for _ ,_eggea :=range _dabb {for _gacc :=range _eggea {if _ ,_fgcb :=_fbcd [_gacc ];_fgcb {continue ;};_bfcd =append (_bfcd ,_gacc );_fbcd [_gacc ]=struct{}{};};};_cc .Float64s (_bfcd );return _bfcd ;};func _cfgb (_bebdg float64 )float64 {return _ebba *_bf .Round (_bebdg /_ebba )};
func (_dbba *imageExtractContext )extractFormImages (_dfcd *_db .PdfObjectName ,_eda _cce .GraphicsState ,_ace *_bd .PdfPageResources )error {_baa ,_edca :=_ace .GetXObjectFormByName (*_dfcd );if _edca !=nil {return _edca ;};if _baa ==nil {return nil ;
};_faf ,_edca :=_baa .GetContentStream ();if _edca !=nil {return _edca ;};_bgb :=_baa .Resources ;if _bgb ==nil {_bgb =_ace ;};_edca =_dbba .extractContentStreamImages (string (_faf ),_bgb );if _edca !=nil {return _edca ;};_dbba ._gf ++;return nil ;};func (_bfegc *ruling )gridIntersecting (_fbgg *ruling )bool {return _cecgg (_bfegc ._gaad ,_fbgg ._gaad )&&_cecgg (_bfegc ._gdaf ,_fbgg ._gdaf );
};func (_bbgbd *textWord )bbox ()_bd .PdfRectangle {return _bbgbd .PdfRectangle };
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
type TextTable struct{W ,H int ;Cells [][]TableCell ;};func _ccg (_ffcf *wordBag ,_dacgd *textWord ,_cbfc float64 )bool {return _dacgd .Llx < _ffcf .Urx +_cbfc &&_ffcf .Llx -_cbfc < _dacgd .Urx ;};
// Tables returns the tables extracted from the page.
func (_dgaf PageText )Tables ()[]TextTable {if _eeca {_eg .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_dgaf ._bfbb ));};return _dgaf ._bfbb ;};func (_dgged *shapesState )lineTo (_ceeb ,_gcdb float64 ){if _dded {_eg .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_ceeb ,_gcdb ,_dgged .devicePoint (_ceeb ,_gcdb ));
};_dgged .addPoint (_ceeb ,_gcdb );};func (_bgaf gridTiling )complete ()bool {for _ ,_ggeg :=range _bgaf ._bdgaf {for _ ,_facab :=range _ggeg {if !_facab .complete (){return false ;};};};return true ;};func (_fdgeb rectRuling )checkWidth (_cbea ,_cecg float64 )(float64 ,bool ){_ecdf :=_cecg -_cbea ;
_efga :=_ecdf <=_ffd ;return _ecdf ,_efga ;};func (_beaa *textMark )inDiacriticArea (_feeg *textMark )bool {_eefec :=_beaa .Llx -_feeg .Llx ;_acad :=_beaa .Urx -_feeg .Urx ;_aab :=_beaa .Lly -_feeg .Lly ;return _bf .Abs (_eefec +_acad )< _beaa .Width ()*_edaa &&_bf .Abs (_aab )< _beaa .Height ()*_edaa ;
};func _gecc (_cefg _bd .PdfRectangle )*ruling {return &ruling {_gggf :_ddga ,_ccb :_cefg .Ury ,_gaad :_cefg .Llx ,_gdaf :_cefg .Urx };};
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
// Replace with a function like Extract() (*PageText, error)
func (_abf *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_ggf ,_gdb ,_aca ,_gce :=_abf .extractPageText (_abf ._da ,_abf ._dbb ,_g .IdentityMatrix (),0);if _gce !=nil &&_gce !=_bd .ErrColorOutOfRange {return nil ,0,0,_gce ;};_ggf .computeViews ();
_gce =_gagcd (_ggf );if _gce !=nil {return nil ,0,0,_gce ;};return _ggf ,_gdb ,_aca ,nil ;};func (_agdda *textTable )isExportable ()bool {if _agdda ._cdgb {return true ;};_ggeb :=func (_afgd int )bool {_efcb :=_agdda .get (0,_afgd );if _efcb ==nil {return false ;
};_egcdd :=_efcb .text ();_gbbdd :=_c .RuneCountInString (_egcdd );_bgea :=_abggb .MatchString (_egcdd );return _gbbdd <=1||_bgea ;};for _baag :=0;_baag < _agdda ._gccb ;_baag ++{if !_ggeb (_baag ){return true ;};};return false ;};func (_cdcec rulingList )toTilings ()(rulingList ,[]gridTiling ){_cdcec .log ("\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s");
if len (_cdcec )==0{return nil ,nil ;};_cdcec =_cdcec .tidied ("\u0061\u006c\u006c");_cdcec .log ("\u0074\u0069\u0064\u0069\u0065\u0064");_fecd :=_cdcec .toGrids ();_egfc :=make ([]gridTiling ,len (_fecd ));for _ddgd ,_cgag :=range _fecd {_egfc [_ddgd ]=_cgag .asTiling ();
};return _cdcec ,_egfc ;};func (_bdfa *textTable )reduceTiling (_gefbf gridTiling ,_ccea float64 )*textTable {_gebb :=make ([]int ,0,_bdfa ._gccb );_bffaa :=make ([]int ,0,_bdfa ._bgcfb );_befec :=_gefbf ._defd ;_fdebfa :=_gefbf ._eabe ;for _badb :=0;_badb < _bdfa ._gccb ;
_badb ++{_eddgg :=_badb > 0&&_bf .Abs (_fdebfa [_badb -1]-_fdebfa [_badb ])< _ccea &&_bdfa .emptyCompositeRow (_badb );if !_eddgg {_gebb =append (_gebb ,_badb );};};for _bfab :=0;_bfab < _bdfa ._bgcfb ;_bfab ++{_ggga :=_bfab < _bdfa ._bgcfb -1&&_bf .Abs (_befec [_bfab +1]-_befec [_bfab ])< _ccea &&_bdfa .emptyCompositeColumn (_bfab );
if !_ggga {_bffaa =append (_bffaa ,_bfab );};};if len (_gebb )==_bdfa ._gccb &&len (_bffaa )==_bdfa ._bgcfb {return _bdfa ;};_abbe :=textTable {_cdgb :_bdfa ._cdgb ,_bgcfb :len (_bffaa ),_gccb :len (_gebb ),_agga :make (map[uint64 ]compositeCell ,len (_bffaa )*len (_gebb ))};
if _eeca {_eg .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_bdfa ._bgcfb ,_bdfa ._gccb ,len (_bffaa ),len (_gebb ));_eg .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_bffaa );
_eg .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_gebb );};for _ecab ,_dfcbe :=range _gebb {for _caffe ,_fadb :=range _bffaa {_gbddf ,_dgffg :=_bdfa .getComposite (_fadb ,_dfcbe );if len (_gbddf )==0{continue ;
};if _eeca {_be .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_caffe ,_ecab ,_fadb ,_dfcbe ,_dbec (_gbddf .merge ().text (),50));};_abbe .putComposite (_caffe ,_ecab ,_gbddf ,_dgffg );
};};return &_abbe ;};func (_dddc *textPara )text ()string {_cgec :=new (_cde .Buffer );_dddc .writeText (_cgec );return _cgec .String ();};func _bebd (_cccb ,_bfbag ,_aafb float64 )rulingKind {if _cccb >=_aafb &&_acbb (_bfbag ,_cccb ){return _ddga ;};if _bfbag >=_aafb &&_acbb (_cccb ,_bfbag ){return _gaba ;
};return _eedb ;};func _gagac (_fecg string )bool {for _ ,_cbgb :=range _fecg {if !_cd .IsSpace (_cbgb ){return false ;};};return true ;};func _fdde (_fgfae _g .Point )_g .Matrix {return _g .TranslationMatrix (_fgfae .X ,_fgfae .Y )};func (_ceg *textObject )getFillColor ()_fb .Color {return _cgdc (_ceg ._cddag .ColorspaceNonStroking ,_ceg ._cddag .ColorNonStroking );
};var _abggb =_e .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024");func (_fgeea *textTable )computeBbox ()_bd .PdfRectangle {var _fefc _bd .PdfRectangle ;
_cgfab :=false ;for _aedg :=0;_aedg < _fgeea ._gccb ;_aedg ++{for _dbag :=0;_dbag < _fgeea ._bgcfb ;_dbag ++{_afgab :=_fgeea .get (_dbag ,_aedg );if _afgab ==nil {continue ;};if !_cgfab {_fefc =_afgab .PdfRectangle ;_cgfab =true ;}else {_fefc =_gcff (_fefc ,_afgab .PdfRectangle );
};};};return _fefc ;};func (_bgebb *wordBag )removeDuplicates (){if _decd {_eg .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_bgebb .text ());};for _ ,_dfed :=range _bgebb .depthIndexes (){if len (_bgebb ._fadg [_dfed ])==0{continue ;
};_beceb :=_bgebb ._fadg [_dfed ][0];_fefe :=_abae *_beceb ._efag ;_cege :=_beceb ._acag ;for _ ,_eece :=range _bgebb .depthBand (_cege ,_cege +_fefe ){_fdfg :=map[*textWord ]struct{}{};_ggbd :=_bgebb ._fadg [_eece ];for _ ,_ebadg :=range _ggbd {if _ ,_fbaa :=_fdfg [_ebadg ];
_fbaa {continue ;};for _ ,_aabc :=range _ggbd {if _ ,_fged :=_fdfg [_aabc ];_fged {continue ;};if _aabc !=_ebadg &&_aabc ._debad ==_ebadg ._debad &&_bf .Abs (_aabc .Llx -_ebadg .Llx )< _fefe &&_bf .Abs (_aabc .Urx -_ebadg .Urx )< _fefe &&_bf .Abs (_aabc .Lly -_ebadg .Lly )< _fefe &&_bf .Abs (_aabc .Ury -_ebadg .Ury )< _fefe {_fdfg [_aabc ]=struct{}{};
};};};if len (_fdfg )> 0{_eebb :=0;for _ ,_abbcg :=range _ggbd {if _ ,_gcbc :=_fdfg [_abbcg ];!_gcbc {_ggbd [_eebb ]=_abbcg ;_eebb ++;};};_bgebb ._fadg [_eece ]=_ggbd [:len (_ggbd )-len (_fdfg )];if len (_bgebb ._fadg [_eece ])==0{delete (_bgebb ._fadg ,_eece );
};};};};};func _gbcc (_dgbffb int ,_gbac map[int ][]float64 )([]int ,int ){_cccbe :=make ([]int ,_dgbffb );_dfgc :=0;for _gbefg :=0;_gbefg < _dgbffb ;_gbefg ++{_cccbe [_gbefg ]=_dfgc ;_dfgc +=len (_gbac [_gbefg ])+1;};return _cccbe ,_dfgc ;};func (_cagg *textLine )endsInHyphen ()bool {_fgcg :=_cagg ._ebge [len (_cagg ._ebge )-1];
_fecbg :=_fgcg ._debad ;_aadbd ,_efae :=_c .DecodeLastRuneInString (_fecbg );if _efae <=0||!_cd .Is (_cd .Hyphen ,_aadbd ){return false ;};if _fgcg ._fgbg &&_bcfg (_fecbg ){return true ;};return _bcfg (_cagg .text ());};func _acbb (_addeg ,_begf float64 )bool {return _addeg /_bf .Max (_accb ,_begf )< _cged };
func (_fcdg *shapesState )stroke (_ggbfc *[]pathSection ){_babg :=pathSection {_aga :_fcdg ._gdbg ,Color :_fcdg ._baafg .getStrokeColor ()};*_ggbfc =append (*_ggbfc ,_babg );if _daafa {_be .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_ggbfc ),_fcdg ,_fcdg ._baafg .getStrokeColor (),_babg .bbox ());
if _faca {for _ceac ,_bfa :=range _fcdg ._gdbg {_be .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_ceac ,_bfa );if _ceac ==10{break ;};};};};};func (_dafb *shapesState )closePath (){if _dafb ._beg {_dafb ._gdbg =append (_dafb ._gdbg ,_gfgdb (_dafb ._eee ));
_dafb ._beg =false ;}else if len (_dafb ._gdbg )==0{if _dded {_eg .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");};_dafb ._beg =false ;return ;};_dafb ._gdbg [len (_dafb ._gdbg )-1].close ();
if _dded {_eg .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_dafb );};};func (_ebd *textPara )fontsize ()float64 {return _ebd ._gadg [0]._aefd };func (_fgga rulingList )intersections ()map[int ]intSet {var _cdebb ,_effd []int ;
for _bfgea ,_fdbaa :=range _fgga {switch _fdbaa ._gggf {case _gaba :_cdebb =append (_cdebb ,_bfgea );case _ddga :_effd =append (_effd ,_bfgea );};};if len (_cdebb )< _gafg +1||len (_effd )< _ebgg +1{return nil ;};if len (_cdebb )+len (_effd )> _ffcfg {_eg .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_fgga ),len (_cdebb ),len (_effd ));
return nil ;};_ecgd :=make (map[int ]intSet ,len (_cdebb )+len (_effd ));for _ ,_dcaf :=range _cdebb {for _ ,_gfcd :=range _effd {if _fgga [_dcaf ].intersects (_fgga [_gfcd ]){if _ ,_dgaa :=_ecgd [_dcaf ];!_dgaa {_ecgd [_dcaf ]=make (intSet );};if _ ,_gdfd :=_ecgd [_gfcd ];
!_gdfd {_ecgd [_gfcd ]=make (intSet );};_ecgd [_dcaf ].add (_gfcd );_ecgd [_gfcd ].add (_dcaf );};};};return _ecgd ;};func (_fdcc gridTiling )log (_gbgca string ){if !_gdcf {return ;};_eg .Log .Info ("\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071",len (_fdcc ._defd ),len (_fdcc ._eabe ),_gbgca );
_be .Printf ("\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a",_fdcc ._defd );_be .Printf ("\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a",_fdcc ._eabe );for _dbef ,_efaf :=range _fdcc ._eabe {_bafgc ,_aaeab :=_fdcc ._bdgaf [_efaf ];
if !_aaeab {continue ;};_be .Printf ("%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_dbef ,_efaf );for _faab ,_cbca :=range _fdcc ._defd {_cggf ,_fgea :=_bafgc [_cbca ];if !_fgea {continue ;};_be .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_faab ,_cggf .String ());
};};};func (_efcc compositeCell )hasLines (_dffde []*textLine )bool {for _cbeg ,_gcbd :=range _dffde {_fdebb :=_bfgg (_efcc .PdfRectangle ,_gcbd .PdfRectangle );if _eeca {_be .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a",_fdebb ,_cbeg ,len (_dffde ));
_be .Printf ("\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a",_efcc );_be .Printf ("\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a",_gcbd );};if _fdebb {return true ;
};};return false ;};func _acec (_gaaac _bd .PdfRectangle ,_gfeg bounded )float64 {return _gaaac .Ury -_gfeg .bbox ().Lly };func (_fbcf rulingList )sort (){_cc .Slice (_fbcf ,_fbcf .comp )};type pathSection struct{_aga []*subpath ;_fb .Color ;};var _ecgab =map[markKind ]string {_eagf :"\u0073\u0074\u0072\u006f\u006b\u0065",_edaga :"\u0066\u0069\u006c\u006c",_bcbda :"\u0061u\u0067\u006d\u0065\u006e\u0074"};
func _ggcd (_egdea ,_fgbe _g .Point )rulingKind {_gdbf :=_bf .Abs (_egdea .X -_fgbe .X );_ebga :=_bf .Abs (_egdea .Y -_fgbe .Y );return _bebd (_gdbf ,_ebga ,_cafe );};func _gcadg (_geb bounded )float64 {return -_geb .bbox ().Lly };func (_egec *subpath )isQuadrilateral ()bool {if len (_egec ._gcbb )< 4||len (_egec ._gcbb )> 5{return false ;
};if len (_egec ._gcbb )==5{_cda :=_egec ._gcbb [0];_bdea :=_egec ._gcbb [4];if _cda .X !=_bdea .X ||_cda .Y !=_bdea .Y {return false ;};};return true ;};func (_gffa *shapesState )establishSubpath ()*subpath {_dedf ,_cabg :=_gffa .lastpointEstablished ();
if !_cabg {_gffa ._gdbg =append (_gffa ._gdbg ,_gfgdb (_dedf ));};if len (_gffa ._gdbg )==0{return nil ;};_gffa ._beg =false ;return _gffa ._gdbg [len (_gffa ._gdbg )-1];};func _fegg (_acda ,_efde _bd .PdfRectangle )bool {return _acda .Lly <=_efde .Ury &&_efde .Lly <=_acda .Ury ;
};func _ddag (_befcd []*textWord ,_egfdd *textWord )[]*textWord {for _eacc ,_fbadf :=range _befcd {if _fbadf ==_egfdd {return _bbbca (_befcd ,_eacc );};};_eg .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_egfdd );
return nil ;};func _bfgg (_cddg ,_bafg _bd .PdfRectangle )bool {return _ecag (_cddg ,_bafg )&&_fegg (_cddg ,_bafg )};func (_bdda rulingList )log (_ecec string ){if !_daafa {return ;};_eg .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_ecec ,_bdda .String ());
for _fbbb ,_eaecc :=range _bdda {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fbbb ,_eaecc .String ());};};func (_bedg paraList )tables ()[]TextTable {var _ffbc []TextTable ;if _eeca {_eg .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a");
};for _ ,_gfddb :=range _bedg {_bdbfd :=_gfddb ._dbfdg ;if _bdbfd !=nil &&_bdbfd .isExportable (){_ffbc =append (_ffbc ,_bdbfd .toTextTable ());};};return _ffbc ;};func (_dgbbd paraList )sortReadingOrder (){_eg .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_dgbbd ));
if len (_dgbbd )<=1{return ;};_dgbbd .computeEBBoxes ();_cc .Slice (_dgbbd ,func (_fgfd ,_dffa int )bool {return _dfe (_dgbbd [_fgfd ],_dgbbd [_dffa ])<=0});_fbfd :=_dgbbd .topoOrder ();_dgbbd .reorder (_fbfd );};func _ebbf (_bdgad ,_bbcb bounded )float64 {return _gcadg (_bdgad )-_gcadg (_bbcb )};
func (_afbf *wordBag )depthRange (_egaf ,_eaef int )[]int {var _deba []int ;for _fgb :=range _afbf ._fadg {if _egaf <=_fgb &&_fgb <=_eaef {_deba =append (_deba ,_fgb );};};if len (_deba )==0{return nil ;};_cc .Ints (_deba );return _deba ;};type shapesState struct{_eccf _g .Matrix ;
_gaga _g .Matrix ;_gdbg []*subpath ;_beg bool ;_eee _g .Point ;_baafg *textObject ;};func (_gcc *shapesState )moveTo (_dffg ,_fbgc float64 ){_gcc ._beg =true ;_gcc ._eee =_gcc .devicePoint (_dffg ,_fbgc );if _dded {_eg .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_dffg ,_fbgc ,_gcc ._eee );
};};func (_abfb rulingList )blocks (_debaf ,_gedb *ruling )bool {if _debaf ._gaad > _gedb ._gdaf ||_gedb ._gaad > _debaf ._gdaf {return false ;};_fegd :=_bf .Max (_debaf ._gaad ,_gedb ._gaad );_ebff :=_bf .Min (_debaf ._gdaf ,_gedb ._gdaf );if _debaf ._ccb > _gedb ._ccb {_debaf ,_gedb =_gedb ,_debaf ;
};for _ ,_eeege :=range _abfb {if _debaf ._ccb <=_eeege ._ccb +_ffd &&_eeege ._ccb <=_gedb ._ccb +_ffd &&_eeege ._gaad <=_ebff &&_fegd <=_eeege ._gdaf {return true ;};};return false ;};const (_cece markKind =iota ;_eagf ;_edaga ;_bcbda ;);func _gfgdb (_fcga _g .Point )*subpath {return &subpath {_gcbb :[]_g .Point {_fcga }}};
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
func (_cbf *Extractor )ExtractText ()(string ,error ){_abb ,_ ,_ ,_faa :=_cbf .ExtractTextWithStats ();return _abb ,_faa ;};func (_aagf rulingList )bbox ()_bd .PdfRectangle {var _ffcfe _bd .PdfRectangle ;if len (_aagf )==0{_eg .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073");
return _bd .PdfRectangle {};};if _aagf [0]._gggf ==_ddga {_ffcfe .Llx ,_ffcfe .Urx =_aagf .secMinMax ();_ffcfe .Lly ,_ffcfe .Ury =_aagf .primMinMax ();}else {_ffcfe .Llx ,_ffcfe .Urx =_aagf .primMinMax ();_ffcfe .Lly ,_ffcfe .Ury =_aagf .secMinMax ();};
return _ffcfe ;};func (_bffc *textTable )toTextTable ()TextTable {if _eeca {_eg .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_bffc ._bgcfb ,_bffc ._gccb );};_cdaa :=make ([][]TableCell ,_bffc ._gccb );
for _bbaa :=0;_bbaa < _bffc ._gccb ;_bbaa ++{_cdaa [_bbaa ]=make ([]TableCell ,_bffc ._bgcfb );for _agfc :=0;_agfc < _bffc ._bgcfb ;_agfc ++{_dgfa :=_bffc .get (_agfc ,_bbaa );if _dgfa ==nil {continue ;};if _eeca {_be .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_agfc ,_bbaa ,_dgfa );
};_cdaa [_bbaa ][_agfc ].Text =_dgfa .text ();_afef :=0;_cdaa [_bbaa ][_agfc ].Marks ._ggbg =_dgfa .toTextMarks (&_afef );};};return TextTable {W :_bffc ._bgcfb ,H :_bffc ._gccb ,Cells :_cdaa };};func _bcaga (_eaffa float64 )bool {return _bf .Abs (_eaffa )< _gdfe };
func (_feba *textWord )appendMark (_cgdfg *textMark ,_efbcc _bd .PdfRectangle ){_feba ._bbacg =append (_feba ._bbacg ,_cgdfg );_feba .PdfRectangle =_gcff (_feba .PdfRectangle ,_cgdfg .PdfRectangle );if _cgdfg ._gaaaf > _feba ._efag {_feba ._efag =_cgdfg ._gaaaf ;
};_feba ._acag =_efbcc .Ury -_feba .PdfRectangle .Lly ;};var (_baee =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};
);func (_cbdc paraList )findGridTables (_daad []gridTiling )[]*textTable {if _eeca {_eg .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_cbdc ));for _cbceb ,_fbca :=range _cbdc {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cbceb ,_fbca );
};};var _bbggf []*textTable ;for _cefgf ,_bcff :=range _daad {_gcgc ,_cebg :=_cbdc .findTableGrid (_bcff );if _gcgc !=nil {_gcgc .log (_be .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_cefgf ));
_bbggf =append (_bbggf ,_gcgc );_gcgc .markCells ();};for _cebab :=range _cebg {_cebab ._deed =true ;};};if _eeca {_eg .Log .Info ("\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s",len (_bbggf ));
};return _bbggf ;};func (_addfb *textPara )depth ()float64 {if _addfb ._gfce {return -1.0;};if len (_addfb ._gadg )> 0{return _addfb ._gadg [0]._gddec ;};return _addfb ._dbfdg .depth ();};func (_dbbg paraList )addNeighbours (){_ddee :=func (_ffbcaa []int ,_cffec *textPara )([]*textPara ,[]*textPara ){_fafg :=make ([]*textPara ,0,len (_ffbcaa )-1);
_ecgc :=make ([]*textPara ,0,len (_ffbcaa )-1);for _ ,_egfgf :=range _ffbcaa {_efdb :=_dbbg [_egfgf ];if _efdb .Urx <=_cffec .Llx {_fafg =append (_fafg ,_efdb );}else if _efdb .Llx >=_cffec .Urx {_ecgc =append (_ecgc ,_efdb );};};return _fafg ,_ecgc ;};
_daca :=func (_edadd []int ,_eecf *textPara )([]*textPara ,[]*textPara ){_fbbec :=make ([]*textPara ,0,len (_edadd )-1);_bfgda :=make ([]*textPara ,0,len (_edadd )-1);for _ ,_eegdf :=range _edadd {_aebb :=_dbbg [_eegdf ];if _aebb .Ury <=_eecf .Lly {_bfgda =append (_bfgda ,_aebb );
}else if _aebb .Lly >=_eecf .Ury {_fbbec =append (_fbbec ,_aebb );};};return _fbbec ,_bfgda ;};_ccae :=_dbbg .yNeighbours (_fgaa );for _ ,_bdabe :=range _dbbg {_adcf :=_ccae [_bdabe ];if len (_adcf )==0{continue ;};_adge ,_caeec :=_ddee (_adcf ,_bdabe );
if len (_adge )==0&&len (_caeec )==0{continue ;};if len (_adge )> 0{_adcg :=_adge [0];for _ ,_ecea :=range _adge [1:]{if _ecea .Urx >=_adcg .Urx {_adcg =_ecea ;};};for _ ,_aebe :=range _adge {if _aebe !=_adcg &&_aebe .Urx > _adcg .Llx {_adcg =nil ;break ;
};};if _adcg !=nil &&_fegg (_bdabe .PdfRectangle ,_adcg .PdfRectangle ){_bdabe ._ceabb =_adcg ;};};if len (_caeec )> 0{_faeca :=_caeec [0];for _ ,_bffcd :=range _caeec [1:]{if _bffcd .Llx <=_faeca .Llx {_faeca =_bffcd ;};};for _ ,_ffga :=range _caeec {if _ffga !=_faeca &&_ffga .Llx < _faeca .Urx {_faeca =nil ;
break ;};};if _faeca !=nil &&_fegg (_bdabe .PdfRectangle ,_faeca .PdfRectangle ){_bdabe ._bbbdd =_faeca ;};};};_ccae =_dbbg .xNeighbours (_gfge );for _ ,_ddafe :=range _dbbg {_gcef :=_ccae [_ddafe ];if len (_gcef )==0{continue ;};_dedeb ,_gdceg :=_daca (_gcef ,_ddafe );
if len (_dedeb )==0&&len (_gdceg )==0{continue ;};if len (_gdceg )> 0{_befd :=_gdceg [0];for _ ,_fbae :=range _gdceg [1:]{if _fbae .Ury >=_befd .Ury {_befd =_fbae ;};};for _ ,_fecdf :=range _gdceg {if _fecdf !=_befd &&_fecdf .Ury > _befd .Lly {_befd =nil ;
break ;};};if _befd !=nil &&_ecag (_ddafe .PdfRectangle ,_befd .PdfRectangle ){_ddafe ._egad =_befd ;};};if len (_dedeb )> 0{_gecga :=_dedeb [0];for _ ,_fbcba :=range _dedeb [1:]{if _fbcba .Lly <=_gecga .Lly {_gecga =_fbcba ;};};for _ ,_gbeg :=range _dedeb {if _gbeg !=_gecga &&_gbeg .Lly < _gecga .Ury {_gecga =nil ;
break ;};};if _gecga !=nil &&_ecag (_ddafe .PdfRectangle ,_gecga .PdfRectangle ){_ddafe ._edaad =_gecga ;};};};for _ ,_bcdca :=range _dbbg {if _bcdca ._ceabb !=nil &&_bcdca ._ceabb ._bbbdd !=_bcdca {_bcdca ._ceabb =nil ;};if _bcdca ._edaad !=nil &&_bcdca ._edaad ._egad !=_bcdca {_bcdca ._edaad =nil ;
};if _bcdca ._bbbdd !=nil &&_bcdca ._bbbdd ._ceabb !=_bcdca {_bcdca ._bbbdd =nil ;};if _bcdca ._egad !=nil &&_bcdca ._egad ._edaad !=_bcdca {_bcdca ._egad =nil ;};};};func (_becff *textTable )subdivide ()*textTable {_becff .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");
_eecgg :=_becff .compositeRowCorridors ();_ccbfg :=_becff .compositeColCorridors ();if _eeca {_eg .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_efcg (_eecgg ),_efcg (_ccbfg ));
};if len (_eecgg )==0||len (_ccbfg )==0{return _becff ;};_bffd (_eecgg );_bffd (_ccbfg );if _eeca {_eg .Log .Info ("\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073",_efcg (_eecgg ),_efcg (_ccbfg ));
};_eefbfd ,_ffdff :=_gbcc (_becff ._gccb ,_eecgg );_eagffb ,_gefb :=_gbcc (_becff ._bgcfb ,_ccbfg );_cbde :=make (map[uint64 ]*textPara ,_gefb *_ffdff );_deac :=&textTable {PdfRectangle :_becff .PdfRectangle ,_cdgb :_becff ._cdgb ,_gccb :_ffdff ,_bgcfb :_gefb ,_deedc :_cbde };
if _eeca {_eg .Log .Info ("\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a"+"\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076",_becff ._bgcfb ,_becff ._gccb ,_gefb ,_ffdff ,_efcg (_eecgg ),_efcg (_ccbfg ),_eefbfd ,_eagffb );
};for _efgd :=0;_efgd < _becff ._gccb ;_efgd ++{_bfef :=_eefbfd [_efgd ];for _fecc :=0;_fecc < _becff ._bgcfb ;_fecc ++{_cedf :=_eagffb [_fecc ];if _eeca {_be .Printf ("\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a",_fecc ,_efgd ,_cedf ,_bfef );
};_gdae ,_cdbbc :=_becff ._agga [_bddbg (_fecc ,_efgd )];if !_cdbbc {continue ;};_ebcd :=_gdae .split (_eecgg [_efgd ],_ccbfg [_fecc ]);for _gdac :=0;_gdac < _ebcd ._gccb ;_gdac ++{for _afebe :=0;_afebe < _ebcd ._bgcfb ;_afebe ++{_fegf :=_ebcd .get (_afebe ,_gdac );
_deac .put (_cedf +_afebe ,_bfef +_gdac ,_fegf );if _eeca {_be .Printf ("\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_cedf +_afebe ,_bfef +_gdac ,_fegf );};};};};};return _deac ;};func (_gdgg rulingList )toGrids ()[]rulingList {if _daafa {_eg .Log .Info ("t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073",_gdgg );
};_bfdgd :=_gdgg .intersections ();if _daafa {_eg .Log .Info ("\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020",len (_gdgg ),len (_bfdgd ));
for _ ,_eagff :=range _aagde (_bfdgd ){_be .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_eagff ,_bfdgd [_eagff ]);};};_fbedd :=make (map[int ]intSet ,len (_gdgg ));for _dbggd :=range _gdgg {_cgbb :=_gdgg .connections (_bfdgd ,_dbggd );if len (_cgbb )> 0{_fbedd [_dbggd ]=_cgbb ;
};};if _daafa {_eg .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064",len (_fbedd ));for _ ,_dcfd :=range _aagde (_fbedd ){_be .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_dcfd ,_fbedd [_dcfd ]);
};};_ccbfe :=_ecaa (len (_gdgg ),func (_cfeda ,_fdfa int )bool {_ffaf ,_ecgbfa :=len (_fbedd [_cfeda ]),len (_fbedd [_fdfa ]);if _ffaf !=_ecgbfa {return _ffaf > _ecgbfa ;};return _gdgg .comp (_cfeda ,_fdfa );});if _daafa {_eg .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_ccbfe );
};_efacg :=[][]int {{_ccbfe [0]}};_acfb :for _ ,_cccg :=range _ccbfe [1:]{for _ffbca ,_caga :=range _efacg {for _ ,_aecc :=range _caga {if _fbedd [_aecc ].has (_cccg ){_efacg [_ffbca ]=append (_caga ,_cccg );continue _acfb ;};};};_efacg =append (_efacg ,[]int {_cccg });
};if _daafa {_eg .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_efacg );};_cc .SliceStable (_efacg ,func (_dcfg ,_dedfd int )bool {return len (_efacg [_dcfg ])> len (_efacg [_dedfd ])});for _ ,_bggg :=range _efacg {_cc .Slice (_bggg ,func (_gdec ,_bcae int )bool {return _gdgg .comp (_bggg [_gdec ],_bggg [_bcae ])});
};_dcad :=make ([]rulingList ,len (_efacg ));for _bcbg ,_ggfa :=range _efacg {_ddfd :=make (rulingList ,len (_ggfa ));for _efdee ,_abec :=range _ggfa {_ddfd [_efdee ]=_gdgg [_abec ];};_dcad [_bcbg ]=_ddfd ;};if _daafa {_eg .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076",_dcad );
};var _afbg []rulingList ;for _ ,_afgg :=range _dcad {if _ecge ,_fbbbd :=_afgg .isActualGrid ();_fbbbd {_afgg =_ecge ;_afgg =_afgg .snapToGroups ();_afbg =append (_afbg ,_afgg );};};if _daafa {_bdcd ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073",_afbg );
_eg .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_dcad ),len (_afbg ));};return _afbg ;};func (_abcfa rulingList )snapToGroupsDirection ()rulingList {_abcfa .sortStrict ();
_cfbb :=make (map[*ruling ]rulingList ,len (_abcfa ));_bbbcf :=_abcfa [0];_agge :=func (_ddggb *ruling ){_bbbcf =_ddggb ;_cfbb [_bbbcf ]=rulingList {_ddggb }};_agge (_abcfa [0]);for _ ,_bagc :=range _abcfa [1:]{if _bagc ._ccb < _bbbcf ._ccb -_gdfe {_eg .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_bbbcf ,_bagc );
};if _bagc ._ccb > _bbbcf ._ccb +_ffd {_agge (_bagc );}else {_cfbb [_bbbcf ]=append (_cfbb [_bbbcf ],_bagc );};};_bfaa :=make (map[*ruling ]float64 ,len (_cfbb ));_eaba :=make (map[*ruling ]*ruling ,len (_abcfa ));for _fddfd ,_ebggc :=range _cfbb {_bfaa [_fddfd ]=_ebggc .mergePrimary ();
for _ ,_gfbb :=range _ebggc {_eaba [_gfbb ]=_fddfd ;};};for _ ,_cfgd :=range _abcfa {_cfgd ._ccb =_bfaa [_eaba [_cfgd ]];};_fbce :=make (rulingList ,0,len (_abcfa ));for _ ,_bgggf :=range _cfbb {_accg :=_bgggf .splitSec ();for _gfeaf ,_dbbef :=range _accg {_cfdgg :=_dbbef .merge ();
if len (_fbce )> 0{_dcec :=_fbce [len (_fbce )-1];if _dcec .alignsPrimary (_cfdgg )&&_dcec .alignsSec (_cfdgg ){_eg .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_gfeaf ,_dcec ,_cfdgg );
continue ;};};_fbce =append (_fbce ,_cfdgg );};};_fbce .sortStrict ();return _fbce ;};func _dbceag (_feefg _bd .PdfRectangle )*ruling {return &ruling {_gggf :_gaba ,_ccb :_feefg .Llx ,_gaad :_feefg .Lly ,_gdaf :_feefg .Ury };};func _ecag (_cegg ,_aeaa _bd .PdfRectangle )bool {return _aeaa .Llx <=_cegg .Urx &&_cegg .Llx <=_aeaa .Urx ;
};
// String returns a description of `b`.
func (_fceb *wordBag )String ()string {var _ecdac []string ;for _ ,_ffb :=range _fceb .depthIndexes (){_cdfd :=_fceb ._fadg [_ffb ];for _ ,_cdde :=range _cdfd {_ecdac =append (_ecdac ,_cdde ._debad );};};return _be .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_fceb .PdfRectangle ,_fceb ._dbga ,len (_ecdac ),_ecdac );
};func (_baae *textObject )moveTextSetLeading (_ffe ,_deaa float64 ){_baae ._dff ._dde =-_deaa ;_baae .moveLP (_ffe ,_deaa );};
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
func (_ef *Extractor )ExtractTextWithStats ()(_bbg string ,_dae int ,_aaf int ,_acee error ){_fgf ,_dae ,_aaf ,_acee :=_ef .ExtractPageText ();if _acee !=nil {return "",_dae ,_aaf ,_acee ;};return _fgf .Text (),_dae ,_aaf ,nil ;};func (_egcae rulingList )tidied (_afac string )rulingList {_aaef :=_egcae .removeDuplicates ();
_aaef .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");_dfce :=_aaef .snapToGroups ();if _dfce ==nil {return nil ;};_dfce .sort ();if _daafa {_eg .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_afac ,len (_egcae ),len (_aaef ),len (_dfce ));
};_dfce .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _dfce ;};func _dge (_cca *Extractor ,_ddgg *_bd .PdfPageResources ,_dgf _cce .GraphicsState ,_cff *textState ,_edg *stateStack )*textObject {return &textObject {_fafb :_cca ,_ggb :_ddgg ,_cddag :_dgf ,_daga :_edg ,_dff :_cff ,_cbff :_g .IdentityMatrix (),_bdb :_g .IdentityMatrix ()};
};func (_aagd *wordBag )blocked (_acb *textWord )bool {if _acb .Urx < _aagd .Llx {_ddbgg :=_aggc (_acb .PdfRectangle );_gbee :=_dbceag (_aagd .PdfRectangle );if _aagd ._faad .blocks (_ddbgg ,_gbee ){if _eegd {_eg .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_acb ,_aagd );
};return true ;};}else if _aagd .Urx < _acb .Llx {_fbge :=_aggc (_aagd .PdfRectangle );_cef :=_dbceag (_acb .PdfRectangle );if _aagd ._faad .blocks (_fbge ,_cef ){if _eegd {_eg .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s",_acb ,_aagd );
};return true ;};};if _acb .Ury < _aagd .Lly {_fcf :=_gecc (_acb .PdfRectangle );_aedd :=_ffbg (_aagd .PdfRectangle );if _aagd ._acae .blocks (_fcf ,_aedd ){if _eegd {_eg .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_acb ,_aagd );
};return true ;};}else if _aagd .Ury < _acb .Lly {_fbdb :=_gecc (_aagd .PdfRectangle );_dfga :=_ffbg (_acb .PdfRectangle );if _aagd ._acae .blocks (_fbdb ,_dfga ){if _eegd {_eg .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s",_acb ,_aagd );
};return true ;};};return false ;};
// Len returns the number of TextMarks in `ma`.
func (_eca *TextMarkArray )Len ()int {if _eca ==nil {return 0;};return len (_eca ._ggbg );};type paraList []*textPara ;func (_begg *textPara )isAtom ()*textTable {_bgebe :=_begg ;_fffee :=_begg ._bbbdd ;_bcda :=_begg ._egad ;if _fffee .taken ()||_bcda .taken (){return nil ;
};_fcfd :=_fffee ._egad ;if _fcfd .taken ()||_fcfd !=_bcda ._bbbdd {return nil ;};return _dbee (_bgebe ,_fffee ,_bcda ,_fcfd );};func (_cdefg gridTile )contains (_ebffc _bd .PdfRectangle )bool {if _cdefg .numBorders ()< 3{return false ;};if _cdefg ._fgbc &&_ebffc .Llx < _cdefg .Llx -_bggc {return false ;
};if _cdefg ._eafc &&_ebffc .Urx > _cdefg .Urx +_bggc {return false ;};if _cdefg ._ccadg &&_ebffc .Lly < _cdefg .Lly -_bggc {return false ;};if _cdefg ._bfgeab &&_ebffc .Ury > _cdefg .Ury +_bggc {return false ;};return true ;};func (_bcbf *textTable )newTablePara ()*textPara {_bdbcf :=_bcbf .computeBbox ();
_aegce :=&textPara {PdfRectangle :_bdbcf ,_bgca :_bdbcf ,_dbfdg :_bcbf };if _eeca {_eg .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_aegce );};return _aegce ;};func (_acgf rulingList )mergePrimary ()float64 {_gbcgb :=_acgf [0]._ccb ;
for _ ,_eddaca :=range _acgf [1:]{_gbcgb +=_eddaca ._ccb ;};return _gbcgb /float64 (len (_acgf ));};func (_gfgf *textLine )bbox ()_bd .PdfRectangle {return _gfgf .PdfRectangle };func (_edee *shapesState )quadraticTo (_caf ,_baef ,_bdce ,_ddf float64 ){if _dded {_eg .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a");
};_edee .addPoint (_bdce ,_ddf );};func (_gdde *wordBag )applyRemovals (_fddc map[int ]map[*textWord ]struct{}){for _gggc ,_ccagf :=range _fddc {if len (_ccagf )==0{continue ;};_bbeb :=_gdde ._fadg [_gggc ];_cafb :=len (_bbeb )-len (_ccagf );if _cafb ==0{delete (_gdde ._fadg ,_gggc );
continue ;};_gggb :=make ([]*textWord ,_cafb );_ggdd :=0;for _ ,_feae :=range _bbeb {if _ ,_cadd :=_ccagf [_feae ];!_cadd {_gggb [_ggdd ]=_feae ;_ggdd ++;};};_gdde ._fadg [_gggc ]=_gggb ;};};func (_bcge *textTable )markCells (){for _fgaag :=0;_fgaag < _bcge ._gccb ;
_fgaag ++{for _agec :=0;_agec < _bcge ._bgcfb ;_agec ++{_aeeac :=_bcge .get (_agec ,_fgaag );if _aeeac !=nil {_aeeac ._deed =true ;};};};};func (_gfeag paraList )findTextTables ()[]*textTable {var _aebd []*textTable ;for _ ,_cacge :=range _gfeag {if _cacge .taken ()||_cacge .Width ()==0{continue ;
};_gdfa :=_cacge .isAtom ();if _gdfa ==nil {continue ;};_gdfa .growTable ();if _gdfa ._bgcfb *_gdfa ._gccb < _baad {continue ;};_gdfa .markCells ();_gdfa .log ("\u0067\u0072\u006fw\u006e");_aebd =append (_aebd ,_gdfa );};return _aebd ;};const (_dafff =false ;
_eeegf =false ;_befc =false ;_cgaa =false ;_dded =false ;_agbg =false ;_cba =false ;_cbec =false ;_eafe =false ;_dcaca =_eafe &&true ;_dega =_dcaca &&false ;_decd =_eafe &&true ;_eeca =false ;_cgbdc =_eeca &&false ;_agdc =_eeca &&true ;_daafa =false ;_faca =_daafa &&false ;
_cddf =_daafa &&false ;_gdcf =_daafa &&true ;_dddf =_daafa &&false ;_eegd =_daafa &&false ;);
// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents (contents string ,resources *_bd .PdfPageResources )(*Extractor ,error ){const _ed ="\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s";_aa :=&Extractor {_da :contents ,_dbb :resources ,_bg :map[string ]fontEntry {},_dg :map[string ]textResult {}};
_ba .TrackUse (_ed );return _aa ,nil ;};func (_feca *shapesState )devicePoint (_bbfb ,_dbcg float64 )_g .Point {_cgab :=_feca ._gaga .Mult (_feca ._eccf );_bbfb ,_dbcg =_cgab .Transform (_bbfb ,_dbcg );return _g .NewPoint (_bbfb ,_dbcg );};func (_efeb *textWord )toTextMarks (_ffba *int )[]TextMark {var _dcga []TextMark ;
for _ ,_eeaaf :=range _efeb ._bbacg {_dcga =_ecfc (_dcga ,_ffba ,_eeaaf .ToTextMark ());};return _dcga ;};func (_efbb *textTable )depth ()float64 {_adadd :=1e10;for _dgfb :=0;_dgfb < _efbb ._bgcfb ;_dgfb ++{_cgbe :=_efbb .get (_dgfb ,0);if _cgbe ==nil ||_cgbe ._gfce {continue ;
};_adadd =_bf .Min (_adadd ,_cgbe .depth ());};return _adadd ;};func _bcfg (_geee string )bool {if _c .RuneCountInString (_geee )< _eed {return false ;};_bbgf ,_fdef :=_c .DecodeLastRuneInString (_geee );if _fdef <=0||!_cd .Is (_cd .Hyphen ,_bbgf ){return false ;
};_bbgf ,_fdef =_c .DecodeLastRuneInString (_geee [:len (_geee )-_fdef ]);return _fdef > 0&&!_cd .IsSpace (_bbgf );};func _cdgde (_cggg float64 )bool {return _bf .Abs (_cggg )< _ffd };
// String returns a description of `p`.
func (_abaa *textPara )String ()string {if _abaa ._gfce {return _be .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_abaa .PdfRectangle );};_eddg :="";if _abaa ._dbfdg !=nil {_eddg =_be .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_abaa ._dbfdg ._bgcfb ,_abaa ._dbfdg ._gccb );
};return _be .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_abaa .PdfRectangle ,_eddg ,len (_abaa ._gadg ),_dbec (_abaa .text (),50));};func (_fffe rulingList )comp (_dfbf ,_aceag int )bool {_efef ,_aegde :=_fffe [_dfbf ],_fffe [_aceag ];
_cbcf ,_bebdc :=_efef ._gggf ,_aegde ._gggf ;if _cbcf !=_bebdc {return _cbcf > _bebdc ;};if _cbcf ==_eedb {return false ;};_ccgg :=func (_faccf bool )bool {if _cbcf ==_ddga {return _faccf ;};return !_faccf ;};_eddd ,_adfa :=_efef ._ccb ,_aegde ._ccb ;if _eddd !=_adfa {return _ccgg (_eddd > _adfa );
};_eddd ,_adfa =_efef ._gaad ,_aegde ._gaad ;if _eddd !=_adfa {return _ccgg (_eddd < _adfa );};return _ccgg (_efef ._gdaf < _aegde ._gdaf );};func (_bgfbc *wordBag )pullWord (_eeef *textWord ,_afdg int ,_ecba map[int ]map[*textWord ]struct{}){_bgfbc .PdfRectangle =_gcff (_bgfbc .PdfRectangle ,_eeef .PdfRectangle );
if _eeef ._efag > _bgfbc ._dbga {_bgfbc ._dbga =_eeef ._efag ;};_bgfbc ._fadg [_afdg ]=append (_bgfbc ._fadg [_afdg ],_eeef );_ecba [_afdg ][_eeef ]=struct{}{};};func (_dfag *textObject )moveText (_dfcc ,_gad float64 ){_dfag .moveLP (_dfcc ,_gad )};func (_dagbe *textLine )pullWord (_bgdd *wordBag ,_abcf *textWord ,_acdad int ){_dagbe .appendWord (_abcf );
_bgdd .removeWord (_abcf ,_acdad );};func (_acc *wordBag )text ()string {_efg :=_acc .allWords ();_degg :=make ([]string ,len (_efg ));for _bdaeb ,_edaf :=range _efg {_degg [_bdaeb ]=_edaf ._debad ;};return _dc .Join (_degg ,"\u0020");};
// Append appends `mark` to the mark array.
func (_eage *TextMarkArray )Append (mark TextMark ){_eage ._ggbg =append (_eage ._ggbg ,mark )};func (_gbef *textLine )markWordBoundaries (){_fecb :=_gfed *_gbef ._aefd ;for _debfb ,_agad :=range _gbef ._ebge [1:]{if _edagd (_agad ,_gbef ._ebge [_debfb ])>=_fecb {_agad ._fgbg =true ;
};};};func (_ggbe rulingList )vertsHorzs ()(rulingList ,rulingList ){var _bbcbe ,_afaa rulingList ;for _ ,_gadcf :=range _ggbe {switch _gadcf ._gggf {case _gaba :_bbcbe =append (_bbcbe ,_gadcf );case _ddga :_afaa =append (_afaa ,_gadcf );};};return _bbcbe ,_afaa ;
};func _bgcc (_adaa []*textWord ,_fgfc float64 ,_aeaf ,_bca rulingList )*wordBag {_cgbg :=_gcab (_adaa [0],_fgfc ,_aeaf ,_bca );for _ ,_fead :=range _adaa [1:]{_fgfg :=_fdeba (_fead ._acag );_cgbg ._fadg [_fgfg ]=append (_cgbg ._fadg [_fgfg ],_fead );_cgbg .PdfRectangle =_gcff (_cgbg .PdfRectangle ,_fead .PdfRectangle );
};_cgbg .sort ();return _cgbg ;};func (_geag paraList )xNeighbours (_adbb float64 )map[*textPara ][]int {_daeb :=make ([]event ,2*len (_geag ));if _adbb ==0{for _efbd ,_ccac :=range _geag {_daeb [2*_efbd ]=event {_ccac .Llx ,true ,_efbd };_daeb [2*_efbd +1]=event {_ccac .Urx ,false ,_efbd };
};}else {for _adbbb ,_addgg :=range _geag {_daeb [2*_adbbb ]=event {_addgg .Llx -_adbb *_addgg .fontsize (),true ,_adbbb };_daeb [2*_adbbb +1]=event {_addgg .Urx +_adbb *_addgg .fontsize (),false ,_adbbb };};};return _geag .eventNeighbours (_daeb );};const (_eedb rulingKind =iota ;
_ddga ;_gaba ;);func _cbe (_cdfg []*wordBag )[]*wordBag {if len (_cdfg )<=1{return _cdfg ;};if _eafe {_eg .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");};_cc .Slice (_cdfg ,func (_ddeb ,_gbbcg int )bool {_eccg ,_agc :=_cdfg [_ddeb ],_cdfg [_gbbcg ];
_bdaee :=_eccg .Width ()*_eccg .Height ();_gaaa :=_agc .Width ()*_agc .Height ();if _bdaee !=_gaaa {return _bdaee > _gaaa ;};if _eccg .Height ()!=_agc .Height (){return _eccg .Height ()> _agc .Height ();};return _ddeb < _gbbcg ;});var _bgeb []*wordBag ;
_deab :=make (intSet );for _adaab :=0;_adaab < len (_cdfg );_adaab ++{if _deab .has (_adaab ){continue ;};_bbbc :=_cdfg [_adaab ];for _ggfd :=_adaab +1;_ggfd < len (_cdfg );_ggfd ++{if _deab .has (_adaab ){continue ;};_agda :=_cdfg [_ggfd ];_feeb :=_bbbc .PdfRectangle ;
_feeb .Llx -=_bbbc ._dbga ;if _gbae (_feeb ,_agda .PdfRectangle ){_bbbc .absorb (_agda );_deab .add (_ggfd );};};_bgeb =append (_bgeb ,_bbbc );};if len (_cdfg )!=len (_bgeb )+len (_deab ){_eg .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_cdfg ),len (_bgeb ),len (_deab ));
};return _bgeb ;};func (_dgcd *textTable )bbox ()_bd .PdfRectangle {return _dgcd .PdfRectangle };
// TableCell is a cell in a TextTable.
type TableCell struct{
// Text is the extracted text.
Text string ;
// Marks returns the TextMarks corresponding to the text in Text.
Marks TextMarkArray ;};
// String returns a description of `t`.
func (_dfdbg *textTable )String ()string {return _be .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_dfdbg ._bgcfb ,_dfdbg ._gccb ,_dfdbg ._cdgb );};func _afdf (_gedf ,_fgaf bounded )float64 {return _gedf .bbox ().Llx -_fgaf .bbox ().Llx };
func _bffd (_cffe map[int ][]float64 ){if len (_cffe )<=1{return ;};_gabe :=_ggadbb (_cffe );if _eeca {_eg .Log .Info ("\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076",_gabe );};var _cdbf ,_dbdd int ;for _cdbf ,_dbdd =range _gabe {if _cffe [_dbdd ]!=nil {break ;
};};for _cbfff ,_bfcdc :=range _gabe [_cdbf :]{_eaeeg :=_cffe [_bfcdc ];if _eaeeg ==nil {continue ;};if _eeca {_be .Printf ("\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a",_cdbf +_cbfff ,_dbdd ,_bfcdc );
};_gdab :=_cffe [_bfcdc ];if _gdab [len (_gdab )-1]> _eaeeg [0]{_gdab [len (_gdab )-1]=_eaeeg [0];_cffe [_dbdd ]=_gdab ;};_dbdd =_bfcdc ;};};const (_addg =true ;_cgffa =true ;_dgeb =true ;_fddf =false ;_feebf =false ;_dfade =6;_dbce =3.0;_ecddd =200;_adbe =true ;
_aaeaf =true ;_adba =true ;_eccfg =true ;_beegc =false ;);func _gbgg (_eaeeb ,_gfba *textPara )bool {if _eaeeb ._gfce ||_gfba ._gfce {return true ;};return _bcaga (_eaeeb .depth ()-_gfba .depth ());};const _ebeb =10;type rulingList []*ruling ;const (_gdfe =1.0e-6;
_ebba =1.0e-4;_gef =10;_abab =6;_bbcc =0.5;_dadd =0.12;_afbbc =0.19;_afeb =0.04;_bfbf =0.04;_beedf =1.0;_bbga =0.04;_gga =0.4;_bbab =0.7;_gbfd =1.0;_gffb =0.1;_bbfe =1.4;_ffbe =0.46;_gfed =0.02;_abae =0.2;_edaa =0.5;_eed =4;_ggba =4.0;_baad =6;_fcbe =0.3;
_gfge =0.01;_fgaa =0.02;_gafg =2;_ebgg =2;_ffcfg =500;_cafe =4.0;_gbgbb =4.0;_cged =0.05;_accb =0.1;_cebae =2.0;_ffd =2.0;_bggc =1.5;_ceacc =3.0;_bfba =0.25;);func (_gde *textObject )setCharSpacing (_feb float64 ){if _gde ==nil {return ;};_gde ._dff ._cab =_feb ;
if _agbg {_eg .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_feb ,_gde ._dff .String ());};};type textObject struct{_fafb *Extractor ;
_ggb *_bd .PdfPageResources ;_cddag _cce .GraphicsState ;_dff *textState ;_daga *stateStack ;_cbff _g .Matrix ;_bdb _g .Matrix ;_dcaa []*textMark ;_gdcg bool ;};type textState struct{_cab float64 ;_fec float64 ;_aee float64 ;_dde float64 ;_bcdg float64 ;
_ebb RenderMode ;_cabf float64 ;_degee *_bd .PdfFont ;_egdd _bd .PdfRectangle ;_fdfc int ;_dcef int ;};func (_cbcee *subpath )add (_bdeg ..._g .Point ){_cbcee ._gcbb =append (_cbcee ._gcbb ,_bdeg ...)};func (_bccdf *ruling )alignsSec (_dbceb *ruling )bool {const _abaac =_ffd +1.0;
return _bccdf ._gaad -_abaac <=_dbceb ._gdaf &&_dbceb ._gaad -_abaac <=_bccdf ._gdaf ;};func _ggadbb (_acaec map[int ][]float64 )[]int {_eddf :=make ([]int ,len (_acaec ));_adgc :=0;for _fffd :=range _acaec {_eddf [_adgc ]=_fffd ;_adgc ++;};_cc .Ints (_eddf );
return _eddf ;};func (_dedb *textTable )put (_dgaaa ,_ccgc int ,_acca *textPara ){_dedb ._deedc [_bddbg (_dgaaa ,_ccgc )]=_acca ;};func (_gdagf paraList )applyTables (_fdfbc []*textTable )paraList {var _eagee paraList ;for _ ,_cafg :=range _fdfbc {_eagee =append (_eagee ,_cafg .newTablePara ());
};for _ ,_fcbdd :=range _gdagf {if _fcbdd ._deed {continue ;};_eagee =append (_eagee ,_fcbdd );};return _eagee ;};func _egfd (_eddaaf string )(string ,bool ){_gccbe :=[]rune (_eddaaf );if len (_gccbe )!=1{return "",false ;};_fgda ,_eadcf :=_baee [_gccbe [0]];
return _fgda ,_eadcf ;};func (_bagf rulingList )removeDuplicates ()rulingList {if len (_bagf )==0{return nil ;};_bagf .sort ();_ebcc :=rulingList {_bagf [0]};for _ ,_cgedf :=range _bagf [1:]{if _cgedf .equals (_ebcc [len (_ebcc )-1]){continue ;};_ebcc =append (_ebcc ,_cgedf );
};return _ebcc ;};func (_bfcb *textTable )emptyCompositeColumn (_fbddd int )bool {for _bagd :=0;_bagd < _bfcb ._gccb ;_bagd ++{if _geeg ,_ffaa :=_bfcb ._agga [_bddbg (_fbddd ,_bagd )];_ffaa {if len (_geeg .paraList )> 0{return false ;};};};return true ;
};type rulingKind int ;func (_fgfe *wordBag )removeWord (_cdfc *textWord ,_edfc int ){_aegb :=_fgfe ._fadg [_edfc ];_aegb =_ddag (_aegb ,_cdfc );if len (_aegb )==0{delete (_fgfe ._fadg ,_edfc );}else {_fgfe ._fadg [_edfc ]=_aegb ;};};
// String returns a description of `l`.
func (_bdcf *textLine )String ()string {return _be .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_bdcf ._gddec ,_bdcf .PdfRectangle ,_bdcf ._aefd ,_bdcf .text ());
};
// String returns a human readable description of `s`.
func (_deag intSet )String ()string {var _fcgfa []int ;for _bdbg :=range _deag {if _deag .has (_bdbg ){_fcgfa =append (_fcgfa ,_bdbg );};};_cc .Ints (_fcgfa );return _be .Sprintf ("\u0025\u002b\u0076",_fcgfa );};func (_gbgb *wordBag )highestWord (_aaea int ,_ged ,_cffb float64 )*textWord {for _ ,_cfge :=range _gbgb ._fadg [_aaea ]{if _ged <=_cfge ._acag &&_cfge ._acag <=_cffb {return _cfge ;
};};return nil ;};func (_cdg *textObject )setTextMatrix (_dfad []float64 ){if len (_dfad )!=6{_eg .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_dfad ));
return ;};_cec ,_cead ,_ddgf ,_abbc ,_aac ,_cfdg :=_dfad [0],_dfad [1],_dfad [2],_dfad [3],_dfad [4],_dfad [5];_cdg ._cbff =_g .NewMatrix (_cec ,_cead ,_ddgf ,_abbc ,_aac ,_cfdg );_cdg ._bdb =_cdg ._cbff ;};func (_edfa *shapesState )fill (_gdg *[]pathSection ){_gagc :=pathSection {_aga :_edfa ._gdbg ,Color :_edfa ._baafg .getFillColor ()};
*_gdg =append (*_gdg ,_gagc );if _daafa {_dbae :=_gagc .bbox ();_be .Printf ("\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a",len (*_gdg ),len (_gagc ._aga ),_edfa ,_gagc .Color ,_dbae ,_dbae .Width (),_dbae .Height ());
if _faca {for _abbd ,_cgb :=range _gagc ._aga {_be .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_abbd ,_cgb );if _abbd ==10{break ;};};};};};func (_gbbc *textObject )getFontDict (_ecg string )(_afc _db .PdfObject ,_acea error ){_gdce :=_gbbc ._ggb ;
if _gdce ==nil {_eg .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_ecg );return nil ,nil ;};_afc ,_dbfc :=_gdce .GetFontByName (_db .PdfObjectName (_ecg ));
if !_dbfc {_eg .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_ecg );
return nil ,_f .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _afc ,nil ;};const (RenderModeStroke RenderMode =1<<iota ;RenderModeFill ;RenderModeClip ;);func _cgdc (_gcgb _bd .PdfColorspace ,_bfcgd _bd .PdfColor )_fb .Color {if _gcgb ==nil ||_bfcgd ==nil {return _fb .Black ;
};_efcd ,_edef :=_gcgb .ColorToRGB (_bfcgd );if _edef !=nil {_eg .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_bfcgd ,_gcgb ,_edef );
return _fb .Black ;};_fbcfg ,_adcgf :=_efcd .(*_bd .PdfColorDeviceRGB );if !_adcgf {_eg .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_efcd );
return _fb .Black ;};return _fb .NRGBA {R :uint8 (_fbcfg .R ()*255),G :uint8 (_fbcfg .G ()*255),B :uint8 (_fbcfg .B ()*255),A :uint8 (255)};};func _dagae (_efgg map[float64 ]map[float64 ]gridTile )[]float64 {_edecf :=make ([]float64 ,0,len (_efgg ));for _egfg :=range _efgg {_edecf =append (_edecf ,_egfg );
};_cc .Float64s (_edecf );_eged :=len (_edecf );for _daee :=0;_daee < _eged /2;_daee ++{_edecf [_daee ],_edecf [_eged -1-_daee ]=_edecf [_eged -1-_daee ],_edecf [_daee ];};return _edecf ;};func (_ecfg *textObject )getFont (_fdge string )(*_bd .PdfFont ,error ){if _ecfg ._fafb ._bg !=nil {_agea ,_bdbd :=_ecfg .getFontDict (_fdge );
if _bdbd !=nil {_eg .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073",_fdge ,_bdbd .Error ());return nil ,_bdbd ;
};_ecfg ._fafb ._gb ++;_fbf ,_adf :=_ecfg ._fafb ._bg [_agea .String ()];if _adf {_fbf ._fegb =_ecfg ._fafb ._gb ;return _fbf ._fadc ,nil ;};};_ccef ,_bgd :=_ecfg .getFontDict (_fdge );if _bgd !=nil {return nil ,_bgd ;};_cbg ,_bgd :=_ecfg .getFontDirect (_fdge );
if _bgd !=nil {return nil ,_bgd ;};if _ecfg ._fafb ._bg !=nil {_ddbg :=fontEntry {_cbg ,_ecfg ._fafb ._gb };if len (_ecfg ._fafb ._bg )>=_ebeb {var _cgfe []string ;for _ecfgg :=range _ecfg ._fafb ._bg {_cgfe =append (_cgfe ,_ecfgg );};_cc .Slice (_cgfe ,func (_dcc ,_dfdf int )bool {return _ecfg ._fafb ._bg [_cgfe [_dcc ]]._fegb < _ecfg ._fafb ._bg [_cgfe [_dfdf ]]._fegb ;
});delete (_ecfg ._fafb ._bg ,_cgfe [0]);};_ecfg ._fafb ._bg [_ccef .String ()]=_ddbg ;};return _cbg ,nil ;};func _ffbg (_bfcg _bd .PdfRectangle )*ruling {return &ruling {_gggf :_ddga ,_ccb :_bfcg .Lly ,_gaad :_bfcg .Llx ,_gdaf :_bfcg .Urx };};func (_deedd *textPara )writeText (_abde _d .Writer ){if _deedd ._dbfdg ==nil {_deedd .writeCellText (_abde );
return ;};for _fege :=0;_fege < _deedd ._dbfdg ._gccb ;_fege ++{for _baac :=0;_baac < _deedd ._dbfdg ._bgcfb ;_baac ++{_edec :=_deedd ._dbfdg .get (_baac ,_fege );if _edec ==nil {_abde .Write ([]byte ("\u0009"));}else {_edec .writeCellText (_abde );};_abde .Write ([]byte ("\u0020"));
};if _fege < _deedd ._dbfdg ._gccb -1{_abde .Write ([]byte ("\u000a"));};};};func _gcff (_befa ,_adff _bd .PdfRectangle )_bd .PdfRectangle {return _bd .PdfRectangle {Llx :_bf .Min (_befa .Llx ,_adff .Llx ),Lly :_bf .Min (_befa .Lly ,_adff .Lly ),Urx :_bf .Max (_befa .Urx ,_adff .Urx ),Ury :_bf .Max (_befa .Ury ,_adff .Ury )};
};func _bdff (_fgbf []*textMark ,_ccfc _bd .PdfRectangle ,_bcded rulingList ,_fedg []gridTiling )paraList {_eg .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_fgbf ),_ccfc );
if len (_fgbf )==0{return nil ;};_egddc :=_fggaa (_fgbf ,_ccfc );if len (_egddc )==0{return nil ;};_bcded .log ("\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065");_dgef ,_bcab :=_bcded .vertsHorzs ();_gfdd :=_bgcc (_egddc ,_ccfc .Ury ,_dgef ,_bcab );
_gaea :=_cfgee (_gfdd ,_ccfc .Ury ,_dgef ,_bcab );_gaea =_cbe (_gaea );_efdf :=make (paraList ,0,len (_gaea ));for _ ,_ffg :=range _gaea {_bdfgb :=_ffg .arrangeText ();if _bdfgb !=nil {_efdf =append (_efdf ,_bdfgb );};};if len (_efdf )>=_baad {_efdf =_efdf .extractTables (_fedg );
};_efdf .sortReadingOrder ();_efdf .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _efdf ;};func _gfec (_cgd []int )[]int {_bccgd :=make ([]int ,len (_cgd ));
for _bddb ,_aada :=range _cgd {_bccgd [len (_cgd )-1-_bddb ]=_aada ;};return _bccgd ;};
// String returns a human readable description of `vecs`.
func (_bfbc rulingList )String ()string {if len (_bfbc )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_ggce ,_gcdgg :=_bfbc .vertsHorzs ();_edge :=len (_ggce );_eeedc :=len (_gcdgg );if _edge ==0||_eeedc ==0{return _be .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_edge ,_eeedc );
};_aadcc :=_bd .PdfRectangle {Llx :_ggce [0]._ccb ,Urx :_ggce [_edge -1]._ccb ,Lly :_gcdgg [_eeedc -1]._ccb ,Ury :_gcdgg [0]._ccb };return _be .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_edge ,_eeedc ,_aadcc );
};func (_bbbe *subpath )clear (){*_bbbe =subpath {}};
// String returns a description of `state`.
func (_eddaa *textState )String ()string {_eag :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _eddaa ._degee !=nil {_eag =_eddaa ._degee .BaseFont ();};return _be .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_eddaa ._cab ,_eddaa ._fec ,_eddaa ._bcdg ,_eag );
};type wordBag struct{_bd .PdfRectangle ;_dbga float64 ;_faad ,_acae rulingList ;_adbg float64 ;_fadg map[int ][]*textWord ;};func _gcab (_ffce *textWord ,_afg float64 ,_ecda ,_dafd rulingList )*wordBag {_aeac :=_fdeba (_ffce ._acag );_ecca :=[]*textWord {_ffce };
_fcae :=wordBag {_fadg :map[int ][]*textWord {_aeac :_ecca },PdfRectangle :_ffce .PdfRectangle ,_dbga :_ffce ._efag ,_adbg :_afg ,_faad :_ecda ,_acae :_dafd };return &_fcae ;};func (_gacf paraList )reorder (_bfge []int ){_gbggf :=make (paraList ,len (_gacf ));
for _dcg ,_cbccb :=range _bfge {_gbggf [_dcg ]=_gacf [_cbccb ];};copy (_gacf ,_gbggf );};func (_bedf *PageText )computeViews (){var _becf rulingList ;if _adba {_bdc :=_eaada (_bedf ._egg );_becf =append (_becf ,_bdc ...);};if _eccfg {_bfcf :=_abca (_bedf ._dbe );
_becf =append (_becf ,_bfcf ...);};_becf ,_ccad :=_becf .toTilings ();var _eggc paraList ;_efc :=len (_bedf ._fgd );for _ccag :=0;_ccag < 360&&_efc > 0;_ccag +=90{_bbf :=make ([]*textMark ,0,len (_bedf ._fgd )-_efc );for _ ,_facc :=range _bedf ._fgd {if _facc ._ddggc ==_ccag {_bbf =append (_bbf ,_facc );
};};if len (_bbf )> 0{_ece :=_bdff (_bbf ,_bedf ._fbd ,_becf ,_ccad );_eggc =append (_eggc ,_ece ...);_efc -=len (_bbf );};};_gag :=new (_cde .Buffer );_eggc .writeText (_gag );_bedf ._ebce =_gag .String ();_bedf ._dged =_eggc .toTextMarks ();_bedf ._bfbb =_eggc .tables ();
if _eeca {_eg .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_bedf ._bfbb ));};};func _fgfa (_fagb *_cce .ContentStreamOperation )(float64 ,error ){if len (_fagb .Params )!=1{_gfbd :=_f .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");
_eg .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_fagb .Operand ,1,len (_fagb .Params ),_fagb .Params );
return 0.0,_gfbd ;};return _db .GetNumberAsFloat (_fagb .Params [0]);};
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
type RenderMode int ;func (_eggcg rectRuling )asRuling ()(*ruling ,bool ){_cfeb :=ruling {_gggf :_eggcg ._aegd ,Color :_eggcg .Color ,_beaec :_edaga };switch _eggcg ._aegd {case _gaba :_cfeb ._ccb =0.5*(_eggcg .Llx +_eggcg .Urx );_cfeb ._gaad =_eggcg .Lly ;
_cfeb ._gdaf =_eggcg .Ury ;_edfaf ,_acade :=_eggcg .checkWidth (_eggcg .Llx ,_eggcg .Urx );if !_acade {if _dddf {_eg .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_eggcg );
};return nil ,false ;};_cfeb ._cfbf =_edfaf ;case _ddga :_cfeb ._ccb =0.5*(_eggcg .Lly +_eggcg .Ury );_cfeb ._gaad =_eggcg .Llx ;_cfeb ._gdaf =_eggcg .Urx ;_geaf ,_gagf :=_eggcg .checkWidth (_eggcg .Lly ,_eggcg .Ury );if !_gagf {if _dddf {_eg .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_eggcg );
};return nil ,false ;};_cfeb ._cfbf =_geaf ;default:_eg .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_eggcg ._aegd );return nil ,false ;};return &_cfeb ,true ;};func (_fcgb *stateStack )pop ()*textState {if _fcgb .empty (){return nil ;
};_eeaa :=*(*_fcgb )[len (*_fcgb )-1];*_fcgb =(*_fcgb )[:len (*_fcgb )-1];return &_eeaa ;};func (_fab compositeCell )String ()string {_eeabc :="";if len (_fab .paraList )> 0{_eeabc =_dbec (_fab .paraList .merge ().text (),50);};return _be .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_fab .PdfRectangle ,len (_fab .paraList ),_eeabc );
};
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
func (_ggd *TextMarkArray )BBox ()(_bd .PdfRectangle ,bool ){var _ebgc _bd .PdfRectangle ;_ede :=false ;for _ ,_fbab :=range _ggd ._ggbg {if _fbab .Meta ||_gagac (_fbab .Text ){continue ;};if _ede {_ebgc =_gcff (_ebgc ,_fbab .BBox );}else {_ebgc =_fbab .BBox ;
_ede =true ;};};return _ebgc ,_ede ;};
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
func (_afd *PageText )ApplyArea (bbox _bd .PdfRectangle ){_gaa :=make ([]*textMark ,0,len (_afd ._fgd ));for _ ,_efe :=range _afd ._fgd {if _bfgg (_efe .bbox (),bbox ){_gaa =append (_gaa ,_efe );};};var _egca paraList ;_bece :=len (_gaa );for _dcfa :=0;
_dcfa < 360&&_bece > 0;_dcfa +=90{_ebef :=make ([]*textMark ,0,len (_gaa )-_bece );for _ ,_bbbd :=range _gaa {if _bbbd ._ddggc ==_dcfa {_ebef =append (_ebef ,_bbbd );};};if len (_ebef )> 0{_gcfg :=_bdff (_ebef ,_afd ._fbd ,nil ,nil );_egca =append (_egca ,_gcfg ...);
_bece -=len (_ebef );};};_bbe :=new (_cde .Buffer );_egca .writeText (_bbe );_afd ._ebce =_bbe .String ();_afd ._dged =_egca .toTextMarks ();_afd ._bfbb =_egca .tables ();};func (_egdec intSet )add (_afgac int ){_egdec [_afgac ]=struct{}{}};
// String returns a description of `v`.
func (_ccbf *ruling )String ()string {if _ccbf ._gggf ==_eedb {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_egde ,_dgd :="\u0078","\u0079";if _ccbf ._gggf ==_ddga {_egde ,_dgd ="\u0079","\u0078";};_ecad :="";if _ccbf ._cfbf !=0.0{_ecad =_be .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_ccbf ._cfbf );
};return _be .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_ccbf ._gggf ,_egde ,_ccbf ._ccb ,_dgd ,_ccbf ._gaad ,_ccbf ._gdaf ,_ccbf ._gdaf -_ccbf ._gaad ,_ccbf ._beaec ,_ccbf .Color ,_ecad );
};func (_cacd *textMark )bbox ()_bd .PdfRectangle {return _cacd .PdfRectangle };func (_dbfd *stateStack )size ()int {return len (*_dbfd )};func (_cebb *imageExtractContext )extractXObjectImage (_fef *_db .PdfObjectName ,_fgeb _cce .GraphicsState ,_fdeb *_bd .PdfPageResources )error {_add ,_ :=_fdeb .GetXObjectByName (*_fef );
if _add ==nil {return nil ;};_fbg ,_gfa :=_cebb ._ac [_add ];if !_gfa {_beb ,_ge :=_fdeb .GetXObjectImageByName (*_fef );if _ge !=nil {return _ge ;};if _beb ==nil {return nil ;};_fc ,_ge :=_beb .ToImage ();if _ge !=nil {return _ge ;};_fbg =&cachedImage {_fdg :_fc ,_daf :_beb .ColorSpace };
_cebb ._ac [_add ]=_fbg ;};_ae :=_fbg ._fdg ;_fca :=_fbg ._daf ;_aeg ,_dfc :=_fca .ImageToRGB (*_ae );if _dfc !=nil {return _dfc ;};_eg .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_fgeb .CTM .String ());_gcdc :=ImageMark {Image :&_aeg ,Width :_fgeb .CTM .ScalingFactorX (),Height :_fgeb .CTM .ScalingFactorY (),Angle :_fgeb .CTM .Angle ()};
_gcdc .X ,_gcdc .Y =_fgeb .CTM .Translation ();_cebb ._daa =append (_cebb ._daa ,_gcdc );_cebb ._de ++;return nil ;};type markKind int ;