unipdf/extractor/extractor.go

253 lines
180 KiB
Go
Raw Normal View History

2020-08-27 21:45:09 +00:00
//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
2020-08-27 21:45:09 +00:00
//
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
//
2020-11-23 22:15:56 +00:00
package extractor ;import (_ac "bytes";_f "errors";_dc "fmt";_ad "github.com/unidoc/unipdf/v3/common";_ag "github.com/unidoc/unipdf/v3/common/license";_ga "github.com/unidoc/unipdf/v3/contentstream";_gd "github.com/unidoc/unipdf/v3/core";_ce "github.com/unidoc/unipdf/v3/internal/textencoding";_cd "github.com/unidoc/unipdf/v3/internal/transform";_dd "github.com/unidoc/unipdf/v3/model";_cb "golang.org/x/text/unicode/norm";_af "golang.org/x/xerrors";_ed "image/color";_a "io";_ge "math";_e "regexp";_cf "sort";_d "strings";_b "unicode";_g "unicode/utf8";);func (_ggcg paraList )topoOrder ()[]int {if _gcdc {_ad .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");};_ccfe :=len (_ggcg );_ddb :=make ([]bool ,_ccfe );_dfbc :=make ([]int ,0,_ccfe );_fdgc :=_ggcg .llyOrdering ();var _gege func (_geacf int );_gege =func (_bgea int ){_ddb [_bgea ]=true ;for _bdbe :=0;_bdbe < _ccfe ;_bdbe ++{if !_ddb [_bdbe ]{if _ggcg .readBefore (_fdgc ,_bgea ,_bdbe ){_gege (_bdbe );};};};_dfbc =append (_dfbc ,_bgea );};for _efdaa :=0;_efdaa < _ccfe ;_efdaa ++{if !_ddb [_efdaa ]{_gege (_efdaa );};};return _gefb (_dfbc );};
2020-11-11 18:48:37 +00:00
2020-11-23 22:15:56 +00:00
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
func (_ddfd *PageText )ApplyArea (bbox _dd .PdfRectangle ){_cbga :=make ([]*textMark ,0,len (_ddfd ._gae ));for _ ,_edf :=range _ddfd ._gae {if _adc (_edf .bbox (),bbox ){_cbga =append (_cbga ,_edf );};};var _ccef paraList ;_fggd :=len (_cbga );for _cgb :=0;_cgb < 360&&_fggd > 0;_cgb +=90{_fcfe :=make ([]*textMark ,0,len (_cbga )-_fggd );for _ ,_gcca :=range _cbga {if _gcca ._deeg ==_cgb {_fcfe =append (_fcfe ,_gcca );};};if len (_fcfe )> 0{_gbdc :=_ddcd (_fcfe ,_ddfd ._bgc ,nil ,nil );_ccef =append (_ccef ,_gbdc ...);_fggd -=len (_fcfe );};};_fcdc :=new (_ac .Buffer );_ccef .writeText (_fcdc );_ddfd ._gdcd =_fcdc .String ();_ddfd ._gge =_ccef .toTextMarks ();_ddfd ._feb =_ccef .tables ();};func (_egdb *textMark )inDiacriticArea (_degc *textMark )bool {_dbcfb :=_egdb .Llx -_degc .Llx ;_cfaa :=_egdb .Urx -_degc .Urx ;_dfgg :=_egdb .Lly -_degc .Lly ;return _ge .Abs (_dbcfb +_cfaa )< _egdb .Width ()*_eggb &&_ge .Abs (_dfgg )< _egdb .Height ()*_eggb ;};func (_fcfb rectRuling )asRuling ()(*ruling ,bool ){_cbe :=ruling {_abgae :_fcfb ._dgdf ,Color :_fcfb .Color ,_aaff :_ffa };switch _fcfb ._dgdf {case _fdff :_cbe ._acaf =0.5*(_fcfb .Llx +_fcfb .Urx );_cbe ._fgdd =_fcfb .Lly ;_cbe ._cage =_fcfb .Ury ;_ecag ,_fbbdg :=_fcfb .checkWidth (_fcfb .Llx ,_fcfb .Urx );if !_fbbdg {if _aaf {_ad .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_fcfb );};return nil ,false ;};_cbe ._faff =_ecag ;case _feae :_cbe ._acaf =0.5*(_fcfb .Lly +_fcfb .Ury );_cbe ._fgdd =_fcfb .Llx ;_cbe ._cage =_fcfb .Urx ;_egbe ,_fdac :=_fcfb .checkWidth (_fcfb .Lly ,_fcfb .Ury );if !_fdac {if _aaf {_ad .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_fcfb );};return nil ,false ;};_cbe ._faff =_egbe ;default:_ad .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_fcfb ._dgdf );return nil ,false ;};return &_cbe ,true ;};func (_efed paraList )reorder (_bdce []int ){_cedd :=make (paraList ,len (_efed ));for _efca ,_efde :=range _bdce {_cedd [_efca ]=_efed [_efde ];};copy (_efed ,_cedd );};func (_bfc rulingList )secMinMax ()(float64 ,float64 ){_gaagg ,_gebde :=_bfc [0]._fgdd ,_bfc [0]._cage ;for _ ,_ecbc :=range _bfc [1:]{if _ecbc ._fgdd < _gaagg {_gaagg =_ecbc ._fgdd ;};if _ecbc ._cage > _gebde {_gebde =_ecbc ._cage ;};};return _gaagg ,_gebde ;};const _aefd =10;func _fbae (_ffcd float64 ,_degg int )int {if _degg ==0{_degg =1;};_afeb :=float64 (_degg );return int (_ge .Round (_ffcd /_afeb )*_afeb );};func _baac (_eaab ,_eacab float64 )string {_gcbba :=!_dgdb (_eaab -_eacab );if _gcbba {return "\u000a";};return "\u0020";};func _cgbgd (_gbba string ,_fgded int )string {if len (_gbba )< _fgded {return _gbba ;};return _gbba [:_fgded ];};
2020-11-11 18:48:37 +00:00
2020-11-23 22:15:56 +00:00
// String returns a description of `state`.
func (_bbgc *textState )String ()string {_fdde :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _bbgc ._eccb !=nil {_fdde =_bbgc ._eccb .BaseFont ();};return _dc .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_bbgc ._ade ,_bbgc ._gcee ,_bbgc ._dccf ,_fdde );};
2020-11-11 18:48:37 +00:00
2020-11-23 22:15:56 +00:00
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};func (_fcb *textObject )setWordSpacing (_gcfff float64 ){if _fcb ==nil {return ;};_fcb ._bfdgc ._gcee =_gcfff ;};func (_facf rulingList )log (_gfdd string ){if !_fggb {return ;};_ad .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_gfdd ,_facf .String ());for _degbf ,_bacf :=range _facf {_dc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_degbf ,_bacf .String ());};};func (_cafa *wordBag )arrangeText ()*textPara {_cafa .sort ();if _bbafc {_cafa .removeDuplicates ();};var _bgedc []*textLine ;for _ ,_egdbe :=range _cafa .depthIndexes (){for !_cafa .empty (_egdbe ){_ecfe :=_cafa .firstReadingIndex (_egdbe );_dfcbe :=_cafa .firstWord (_ecfe );_eeab :=_agc (_cafa ,_ecfe );_cafg :=_dfcbe ._dafb ;_ddaed :=_dfcbe ._gceede -_fdae *_cafg ;_gagc :=_dfcbe ._gceede +_fdae *_cafg ;_aeeb :=_abfd *_cafg ;_ddca :=_abe *_cafg ;_cbdcg :for {var _aeace *textWord ;_cdce :=0;for _ ,_eefa :=range _cafa .depthBand (_ddaed ,_gagc ){_adce :=_cafa .highestWord (_eefa ,_ddaed ,_gagc );if _adce ==nil {continue ;};_gddb :=_acgd (_adce ,_eeab ._becbb [len (_eeab ._becbb )-1]);if _gddb < -_ddca {break _cbdcg ;};if _gddb > _aeeb {continue ;};if _aeace !=nil &&_bedg (_adce ,_aeace )>=0{continue ;};_aeace =_adce ;_cdce =_eefa ;};if _aeace ==nil {break ;};_eeab .pullWord (_cafa ,_aeace ,_cdce );};_eeab .markWordBoundaries ();_bgedc =append (_bgedc ,_eeab );};};if len (_bgedc )==0{return nil ;};_cf .Slice (_bgedc ,func (_faaed ,_gebg int )bool {return _gcd (_bgedc [_faaed ],_bgedc [_gebg ])< 0});_cccg :=_cbac (_cafa .PdfRectangle ,_bgedc );if _fada {_ad .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_cccg .String ());if _ggcd {for _cccf ,_fcaf :=range _cccg ._fcaad {_dc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cccf ,_fcaf .String ());if _gfbcc {for _eeaf ,_cgbdd :=range _fcaf ._becbb {_dc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_eeaf ,_cgbdd .String ());for _abbd ,_gdbgd :=range _cgbdd ._dgcbf {_dc .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_abbd ,_gdbgd .String ());};};};};};};return _cccg ;};func (_gdf *imageExtractContext )extractFormImages (_dg *_gd .PdfObjectName ,_aff _ga .GraphicsState ,_ggg *_dd .PdfPageResources )error {_cca ,_bcf :=_ggg .GetXObjectFormByName (*_dg );if _bcf !=nil {return _bcf ;};if _cca ==nil {return nil ;};_gda ,_bcf :=_cca .GetContentStream ();if _bcf !=nil {return _bcf ;};_gdff :=_cca .Resources ;if _gdff ==nil {_gdff =_ggg ;};_bcf =_gdf .extractContentStreamImages (string (_gda ),_gdff );if _bcf !=nil {return _bcf ;};_gdf ._db ++;return nil ;};
2020-11-11 18:48:37 +00:00
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
// Replace with a function like Extract() (*PageText, error)
2020-11-23 22:15:56 +00:00
func (_bcg *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_cffc ,_ged ,_gbd ,_abg :=_bcg .extractPageText (_bcg ._add ,_bcg ._aa ,_cd .IdentityMatrix (),0);if _abg !=nil {return nil ,0,0,_abg ;};_cffc .computeViews ();_abg =_afgfd (_cffc );if _abg !=nil {return nil ,0,0,_abg ;};return _cffc ,_ged ,_gbd ,nil ;};func (_fgeg *textLine )endsInHyphen ()bool {_fccg :=_fgeg ._becbb [len (_fgeg ._becbb )-1];_fafg :=_fccg ._ebed ;_ebdd ,_agad :=_g .DecodeLastRuneInString (_fafg );if _agad <=0||!_b .Is (_b .Hyphen ,_ebdd ){return false ;};if _fccg ._fabdc &&_fcgd (_fafg ){return true ;};return _fcgd (_fgeg .text ());};
2020-11-11 18:48:37 +00:00
// String returns a description of `p`.
2020-11-23 22:15:56 +00:00
func (_bece *textPara )String ()string {if _bece ._ebfg {return _dc .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_bece .PdfRectangle );};_dbea :="";if _bece ._cgf !=nil {_dbea =_dc .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_bece ._cgf ._gddg ,_bece ._cgf ._adfe );};return _dc .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_bece .PdfRectangle ,_dbea ,len (_bece ._fcaad ),_cgbgd (_bece .text (),50));};func _affga (_caaca _dd .PdfRectangle )*ruling {return &ruling {_abgae :_fdff ,_acaf :_caaca .Urx ,_fgdd :_caaca .Lly ,_cage :_caaca .Ury };};func (_cgfg *ruling )intersects (_fbce *ruling )bool {_egdce :=(_cgfg ._abgae ==_fdff &&_fbce ._abgae ==_feae )||(_fbce ._abgae ==_fdff &&_cgfg ._abgae ==_feae );_dcca :=func (_fcca ,_gfgcg *ruling )bool {return _fcca ._fgdd -_dad <=_gfgcg ._acaf &&_gfgcg ._acaf <=_fcca ._cage +_dad ;};_agce :=_dcca (_cgfg ,_fbce );_bgac :=_dcca (_fbce ,_cgfg );if _fggb {_dc .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_egdce ,_agce ,_bgac ,_egdce &&_agce &&_bgac ,_cgfg ,_fbce );};return _egdce &&_agce &&_bgac ;};func (_abb *subpath )last ()_cd .Point {return _abb ._addd [len (_abb ._addd )-1]};func (_fec *textObject )setTextRenderMode (_dbcf int ){if _fec ==nil {return ;};_fec ._bfdgc ._babc =RenderMode (_dbcf );};
2020-08-27 21:45:09 +00:00
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
//
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
type TextMark struct{
// Text is the extracted text.
Text string ;
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
2020-08-27 21:45:09 +00:00
// BBox is the bounding box of the text.
2020-11-23 22:15:56 +00:00
BBox _dd .PdfRectangle ;
2020-08-27 21:45:09 +00:00
// Font is the font the text was drawn with.
2020-11-23 22:15:56 +00:00
Font *_dd .PdfFont ;
2020-08-27 21:45:09 +00:00
// FontSize is the font size the text was drawn with.
FontSize float64 ;
2018-11-28 18:06:03 +11:00
2020-08-27 21:45:09 +00:00
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2020-11-23 22:15:56 +00:00
FillColor _ed .Color ;
2020-08-27 21:45:09 +00:00
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2020-11-23 22:15:56 +00:00
StrokeColor _ed .Color ;
2020-10-05 19:28:24 +00:00
2020-11-11 18:48:37 +00:00
// Orientation is the text orientation
2020-11-23 22:15:56 +00:00
Orientation int ;};
2020-10-05 19:28:24 +00:00
2020-11-11 18:48:37 +00:00
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
2020-11-23 22:15:56 +00:00
type ImageMark struct{Image *_dd .Image ;
2020-10-05 19:28:24 +00:00
2020-11-11 18:48:37 +00:00
// Dimensions of the image as displayed in the PDF.
Width float64 ;Height float64 ;
2020-09-28 23:18:17 +00:00
2020-11-11 18:48:37 +00:00
// Position of the image in PDF coordinates (lower left corner).
X float64 ;Y float64 ;
2020-09-28 23:18:17 +00:00
2020-11-11 18:48:37 +00:00
// Angle in degrees, if rotated.
2020-11-23 22:15:56 +00:00
Angle float64 ;};func _dbgda (_gafaa []*textWord ,_ebbf *textWord )[]*textWord {for _dagd ,_cdadge :=range _gafaa {if _cdadge ==_ebbf {return _eebab (_gafaa ,_dagd );};};_ad .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_ebbf );return nil ;};func _affde (_bgced string )(string ,bool ){_abgfe :=[]rune (_bgced );if len (_abgfe )!=1{return "",false ;};_gadg ,_cgge :=_aecgf [_abgfe [0]];return _gadg ,_cgge ;};func (_eaebda paraList )extractTables (_ecgff []gridTiling )paraList {if _bedbb {_ad .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_eaebda ));};if len (_eaebda )< _ega {return _eaebda ;};_dggg :=_eaebda .findTables (_ecgff );if _bedbb {_ad .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_dggg ));for _cfeb ,_abfcd :=range _dggg {_abfcd .log (_dc .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_cfeb ));};};return _eaebda .applyTables (_dggg );};
2020-10-12 14:17:59 +00:00
2020-11-23 22:15:56 +00:00
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct{_dcf []TextMark };
2020-10-05 19:28:24 +00:00
2020-11-23 22:15:56 +00:00
// String returns a string describing `tm`.
func (_eff TextMark )String ()string {_agec :=_eff .BBox ;var _ageb string ;if _eff .Font !=nil {_ageb =_eff .Font .String ();if len (_ageb )> 50{_ageb =_ageb [:50]+"\u002e\u002e\u002e";};};var _ddfb string ;if _eff .Meta {_ddfb ="\u0020\u002a\u004d\u002a";};return _dc .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_eff .Offset ,_eff .Text ,[]rune (_eff .Text ),_agec .Llx ,_agec .Lly ,_agec .Urx ,_agec .Ury ,_ageb ,_ddfb );};func (_gcada gridTiling )complete ()bool {for _ ,_aafg :=range _gcada ._ggac {for _ ,_dbdd :=range _aafg {if !_dbdd .complete (){return false ;};};};return true ;};func (_dagb *ruling )alignsSec (_gdecb *ruling )bool {const _cbgc =_gbg +1.0;return _dagb ._fgdd -_cbgc <=_gdecb ._cage &&_gdecb ._fgdd -_cbgc <=_dagb ._cage ;};func (_dged paraList )toTextMarks ()[]TextMark {_ggee :=0;var _cadd []TextMark ;for _gcdd ,_feaaf :=range _dged {if _feaaf ._ebfg {continue ;};_abged :=_feaaf .toTextMarks (&_ggee );_cadd =append (_cadd ,_abged ...);if _gcdd !=len (_dged )-1{if _dgfd (_feaaf ,_dged [_gcdd +1]){_cadd =_fabc (_cadd ,&_ggee ,"\u0020");}else {_cadd =_fabc (_cadd ,&_ggee ,"\u000a");_cadd =_fabc (_cadd ,&_ggee ,"\u000a");};};};_cadd =_fabc (_cadd ,&_ggee ,"\u000a");_cadd =_fabc (_cadd ,&_ggee ,"\u000a");return _cadd ;};func _dae (_aega *Extractor ,_fcda *_dd .PdfPageResources ,_afdb _ga .GraphicsState ,_fa *textState ,_eace *stateStack )*textObject {return &textObject {_bfe :_aega ,_eaae :_fcda ,_bgda :_afdb ,_fea :_eace ,_bfdgc :_fa ,_fddg :_cd .IdentityMatrix (),_cba :_cd .IdentityMatrix ()};};func _adc (_gbcd ,_dcgag _dd .PdfRectangle )bool {return _fafcf (_gbcd ,_dcgag )&&_befe (_gbcd ,_dcgag )};func (_ebfgd rulingList )tidied (_fgea string )rulingList {_fcag :=_ebfgd .removeDuplicates ();_fcag .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");_defda :=_fcag .snapToGroups ();if _defda ==nil {return nil ;};_defda .sort ();if _fggb {_ad .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_fgea ,len (_ebfgd ),len (_fcag ),len (_defda ));};_defda .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _defda ;};
2020-10-12 14:17:59 +00:00
2020-11-11 18:48:37 +00:00
// String returns a description of `w`.
2020-11-23 22:15:56 +00:00
func (_bedf *textWord )String ()string {return _dc .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_bedf ._gceede ,_bedf .PdfRectangle ,_bedf ._dafb ,_bedf ._ebed );};func (_dcff *textPara )text ()string {_acefd :=new (_ac .Buffer );_dcff .writeText (_acefd );return _acefd .String ();};func (_cgfe rulingList )bbox ()_dd .PdfRectangle {var _ageeea _dd .PdfRectangle ;if len (_cgfe )==0{_ad .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073");return _dd .PdfRectangle {};};if _cgfe [0]._abgae ==_feae {_ageeea .Llx ,_ageeea .Urx =_cgfe .secMinMax ();_ageeea .Lly ,_ageeea .Ury =_cgfe .primMinMax ();}else {_ageeea .Llx ,_ageeea .Urx =_cgfe .primMinMax ();_ageeea .Lly ,_ageeea .Ury =_cgfe .secMinMax ();};return _ageeea ;};func _cgee (_fadc map[float64 ]gridTile )[]float64 {_aaeab :=make ([]float64 ,0,len (_fadc ));for _ccde :=range _fadc {_aaeab =append (_aaeab ,_ccde );};_cf .Float64s (_aaeab );return _aaeab ;};func _eddd (_eacf ,_fccdb int )uint64 {return uint64 (_eacf )*0x1000000+uint64 (_fccdb )};func _feccac (_bdcec []rulingList )(rulingList ,rulingList ){var _fbdd rulingList ;for _ ,_gcbfe :=range _bdcec {_fbdd =append (_fbdd ,_gcbfe ...);};return _fbdd .vertsHorzs ();};const (_cffb rulingKind =iota ;_feae ;_fdff ;);func (_ddfg *wordBag )sort (){for _ ,_fadd :=range _ddfg ._efeb {_cf .Slice (_fadd ,func (_feg ,_fbff int )bool {return _bedg (_fadd [_feg ],_fadd [_fbff ])< 0});};};const (_cbc =false ;_cga =false ;_dffd =false ;_eedd =false ;_ecef =false ;_bedbd =false ;_fbba =false ;_gcdc =false ;_fada =false ;_ggcd =_fada &&true ;_gfbcc =_ggcd &&false ;_dbae =_fada &&true ;_bedbb =false ;_dcfc =_bedbb &&false ;_dea =_bedbb &&true ;_fggb =false ;_caa =_fggb &&false ;_dgcc =_fggb &&false ;_dgdd =_fggb &&true ;_aaf =_fggb &&false ;_bgff =_fggb &&false ;);func (_fcae *textTable )put (_aabeb ,_fgge int ,_efcbe *textPara ){_fcae ._efbae [_eddd (_aabeb ,_fgge )]=_efcbe ;};func _aade (_dccc _cd .Matrix )_cd .Point {_eeea ,_gbc :=_dccc .Translation ();return _cd .Point {X :_eeea ,Y :_gbc };};
2020-10-05 19:28:24 +00:00
2020-11-23 22:15:56 +00:00
// String returns a string describing `ma`.
func (_gfd TextMarkArray )String ()string {_dggd :=len (_gfd ._dcf );if _dggd ==0{return "\u0045\u004d\u0050T\u0059";};_daea :=_gfd ._dcf [0];_cef :=_gfd ._dcf [_dggd -1];return _dc .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_dggd ,_daea ,_cef );};func (_efbg *textTable )compositeColCorridors ()map[int ][]float64 {_efff :=make (map[int ][]float64 ,_efbg ._gddg );if _bedbb {_ad .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_efbg ._gddg );};for _bdef :=0;_bdef < _efbg ._gddg ;_bdef ++{_efff [_bdef ]=nil ;};return _efff ;};func _gdbfg (_ebda _dd .PdfRectangle ,_aebdd ,_abde ,_acda ,_deefe *ruling )gridTile {_ggag :=_ebda .Llx ;_eceg :=_ebda .Urx ;_gbaa :=_ebda .Lly ;_ddcb :=_ebda .Ury ;return gridTile {PdfRectangle :_ebda ,_gfgca :_aebdd !=nil &&_aebdd .encloses (_gbaa ,_ddcb ),_ebce :_abde !=nil &&_abde .encloses (_gbaa ,_ddcb ),_bgfee :_acda !=nil &&_acda .encloses (_ggag ,_eceg ),_aabe :_deefe !=nil &&_deefe .encloses (_ggag ,_eceg )};};
2020-10-19 10:58:10 +00:00
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
2020-11-23 22:15:56 +00:00
type TextTable struct{W ,H int ;Cells [][]TableCell ;};func (_dcfdg rulingList )mergePrimary ()float64 {_fege :=_dcfdg [0]._acaf ;for _ ,_acdag :=range _dcfdg [1:]{_fege +=_acdag ._acaf ;};return _fege /float64 (len (_dcfdg ));};func _dgdb (_dbda float64 )bool {return _ge .Abs (_dbda )< _cgea };func (_gfbe paraList )readBefore (_bbf []int ,_fbg ,_fbad int )bool {_bdbed ,_ffcb :=_gfbe [_fbg ],_gfbe [_fbad ];if _fgad (_bdbed ,_ffcb )&&_bdbed .Lly > _ffcb .Lly {return true ;};if !(_bdbed ._gaca .Urx < _ffcb ._gaca .Llx ){return false ;};_fcffa ,_eabf :=_bdbed .Lly ,_ffcb .Lly ;if _fcffa > _eabf {_eabf ,_fcffa =_fcffa ,_eabf ;};_acfd :=_ge .Max (_bdbed ._gaca .Llx ,_ffcb ._gaca .Llx );_gdfc :=_ge .Min (_bdbed ._gaca .Urx ,_ffcb ._gaca .Urx );_aabb :=_gfbe .llyRange (_bbf ,_fcffa ,_eabf );for _ ,_edac :=range _aabb {if _edac ==_fbg ||_edac ==_fbad {continue ;};_cbgfd :=_gfbe [_edac ];if _cbgfd ._gaca .Llx <=_gdfc &&_acfd <=_cbgfd ._gaca .Urx {return false ;};};return true ;};func _babdb (_dadc *wordBag ,_gfcc float64 ,_cgca ,_gdbg rulingList )[]*wordBag {var _edd []*wordBag ;for _ ,_abgg :=range _dadc .depthIndexes (){_abggf :=false ;for !_dadc .empty (_abgg ){_cdfc :=_dadc .firstReadingIndex (_abgg );_egba :=_dadc .firstWord (_cdfc );_ddfa :=_babd (_egba ,_gfcc ,_cgca ,_gdbg );_dadc .removeWord (_egba ,_cdfc );if _fbba {_ad .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_egba .String ());};for _fggg :=true ;_fggg ;_fggg =_abggf {_abggf =false ;_egbg :=_ebca *_ddfa ._ebge ;_edcd :=_gafbg *_ddfa ._ebge ;_dcdc :=_efbe *_ddfa ._ebge ;if _fbba {_ad .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_ddfa .minDepth (),_ddfa .maxDepth (),_dcdc ,_edcd );};if _dadc .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_ddfa ,_fcaa (_ggad ,0),_ddfa .minDepth ()-_dcdc ,_ddfa .maxDepth ()+_dcdc ,_agfg ,false ,false )> 0{_abggf =true ;};if _dadc .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_ddfa ,_fcaa (_ggad ,_edcd ),_ddfa .minDepth (),_ddfa .maxDepth (),_ccfc ,false ,false )> 0{_abggf =true ;};if _abggf {continue ;};_abge :=_dadc .scanBand ("",_ddfa ,_fcaa (_cbdc ,_egbg ),_ddfa .minDepth (),_ddfa .maxDepth (),_ddda ,true ,false );if _abge > 0{_bfae :=(_ddfa .maxDepth ()-_ddfa .minDepth ())/_ddfa ._ebge ;if (_abge > 1&&float64 (_abge )> 0.3*_bfae )||_abge <=10{if _dadc .scanBand ("\u006f\u0074\u0068e\u0072",_ddfa ,_fcaa (_cbdc ,_egbg ),_ddfa .minDepth (),_ddfa .maxDepth (),_ddda ,false ,true )> 0{_abggf =true ;};};};};_edd =append (_edd ,_ddfa );};};return _edd ;};func (_efbc *textObject )renderText (_aaa []byte )error {if _efbc ._eeg {_ad .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");return nil ;};_cgd :=_efbc .getCurrentFont ();_ebega :=_cgd .BytesToCharcodes (_aaa );_gdb ,_fbb ,_eegf :=_cgd .CharcodesToStrings (_ebega );if _eegf > 0{_ad .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_fbb ,_eegf );};_efbc ._bfdgc ._gca +=_fbb ;_efbc ._bfdgc ._aae +=_eegf ;_ecd :=_efbc ._bfdgc ;_aeae :=_ecd ._dccf ;_cbgf :=_ecd ._cdda /100.0;_gafb ,_efg :=_cgd .GetRuneMetrics (' ');if !_efg {_gafb ,_efg =_cgd .GetCharMetrics (32);};if !_efg {_gafb ,_ =_dd .DefaultFont ().GetRuneMetrics (' ');};_bed :=_gafb .Wx *_ada ;_ad .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u0
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
func (_fcga *TextMarkArray )BBox ()(_dd .PdfRectangle ,bool ){var _fae _dd .PdfRectangle ;_cabb :=false ;for _ ,_efgd :=range _fcga ._dcf {if _efgd .Meta ||_bbge (_efgd .Text ){continue ;};if _cabb {_fae =_ceab (_fae ,_efgd .BBox );}else {_fae =_efgd .BBox ;_cabb =true ;};};return _fae ,_cabb ;};func (_aeab gridTile )complete ()bool {return _aeab .numBorders ()==4};func (_dabd rulingList )connections (_eceb map[int ]intSet ,_dcag int )intSet {_acfe :=make (intSet );_gbfb :=make (intSet );var _eegad func (int );_eegad =func (_eegd int ){if !_gbfb .has (_eegd ){_gbfb .add (_eegd );for _ccgg :=range _dabd {if _eceb [_ccgg ].has (_eegd ){_acfe .add (_ccgg );};};for _dcgaa :=range _dabd {if _acfe .has (_dcgaa ){_eegad (_dcgaa );};};};};_eegad (_dcag );return _acfe ;};type fontEntry struct{_cagc *_dd .PdfFont ;_fdag int64 ;};func (_fbag *textLine )toTextMarks (_gfef *int )[]TextMark {var _aabc []TextMark ;for _ ,_afae :=range _fbag ._becbb {if _afae ._fabdc {_aabc =_fabc (_aabc ,_gfef ,"\u0020");};_eaca :=_afae .toTextMarks (_gfef );_aabc =append (_aabc ,_eaca ...);};return _aabc ;};func (_gdfg *shapesState )closePath (){if _gdfg ._dedf {_gdfg ._cgbd =append (_gdfg ._cgbd ,_fafc (_gdfg ._fecc ));_gdfg ._dedf =false ;}else if len (_gdfg ._cgbd )==0{if _ecef {_ad .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");};_gdfg ._dedf =false ;return ;};_gdfg ._cgbd [len (_gdfg ._cgbd )-1].close ();if _ecef {_ad .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_gdfg );};};func (_afbc *shapesState )clearPath (){_afbc ._cgbd =nil ;_afbc ._dedf =false ;if _ecef {_ad .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_afbc );};};
// String returns a description of `l`.
func (_efdd *textLine )String ()string {return _dc .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_efdd ._dgfa ,_efdd .PdfRectangle ,_efdd ._cdgd ,_efdd .text ());};
// String returns a human readable description of `path`.
func (_eeaa *subpath )String ()string {_daa :=_eeaa ._addd ;_adab :=len (_daa );if _adab <=5{return _dc .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_adab ,_daa );};return _dc .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_adab ,_daa [0],_daa [1],_daa [_adab -1]);};func _cbdc (_fcff *wordBag ,_cddd *textWord ,_cfb float64 )bool {return _fcff .Urx <=_cddd .Llx &&_cddd .Llx < _fcff .Urx +_cfb ;};
// String returns a description of `t`.
func (_baedg *textTable )String ()string {return _dc .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_baedg ._gddg ,_baedg ._adfe ,_baedg ._eafc );};
// Len returns the number of TextMarks in `ma`.
func (_dge *TextMarkArray )Len ()int {if _dge ==nil {return 0;};return len (_dge ._dcf );};func (_gba *textObject )showText (_daf []byte )error {return _gba .renderText (_daf )};func _bee (_dbgd string )string {_eacgc :=[]rune (_dbgd );return string (_eacgc [:len (_eacgc )-1])};func (_eec *shapesState )drawRectangle (_dgc ,_egf ,_eae ,_bdec float64 ){if _ecef {_ffc :=_eec .devicePoint (_dgc ,_egf );_bbaf :=_eec .devicePoint (_dgc +_eae ,_egf +_bdec );_ebf :=_dd .PdfRectangle {Llx :_ffc .X ,Lly :_ffc .Y ,Urx :_bbaf .X ,Ury :_bbaf .Y };_ad .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_ebf );};_eec .newSubPath ();_eec .moveTo (_dgc ,_egf );_eec .lineTo (_dgc +_eae ,_egf );_eec .lineTo (_dgc +_eae ,_egf +_bdec );_eec .lineTo (_dgc ,_egf +_bdec );_eec .closePath ();};type bounded interface{bbox ()_dd .PdfRectangle };func (_bcffa *textWord )addDiacritic (_cdec string ){_geea :=_bcffa ._dgcbf [len (_bcffa ._dgcbf )-1];_geea ._fgeb +=_cdec ;_geea ._fgeb =_cb .NFKC .String (_geea ._fgeb );};func (_cegg *textObject )setTextMatrix (_decd []float64 ){if len (_decd )!=6{_ad .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_decd ));return ;};_fbc ,_dbcd ,_ecff ,_gcfd ,_bdbb ,_ffe :=_decd [0],_decd [1],_decd [2],_decd [3],_decd [4],_decd [5];_cegg ._fddg =_cd .NewMatrix (_fbc ,_dbcd ,_ecff ,_gcfd ,_bdbb ,_ffe );_cegg ._cba =_cegg ._fddg ;};func (_fbe *stateStack )size ()int {return len (*_fbe )};func _accc (_fba []*textWord ,_eeeg float64 ,_fga ,_gfeb rulingList )*wordBag {_eagg :=_babd (_fba [0],_eeeg ,_fga ,_gfeb );for _ ,_effg :=range _fba [1:]{_bda :=_egc (_effg ._gceede );_eagg ._efeb [_bda ]=append (_eagg ._efeb [_bda ],_effg );_eagg .PdfRectangle =_ceab (_eagg .PdfRectangle ,_effg .PdfRectangle );};_eagg .sort ();return _eagg ;};func _ebbb (_ccdfa []pathSection ){if _aaabg < 0.0{return ;};if _fggb {_ad .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_ccdfa ));};for _ddddc ,_ageed :=range _ccdfa {for _dffb ,_bcebc :=range _ageed ._fgf {for _gfae ,_gdce :=range _bcebc ._addd {_bcebc ._addd [_gfae ]=_cd .Point {X :_dfgfe (_gdce .X ),Y :_dfgfe (_gdce .Y )};if _fggb {_bdgf :=_bcebc ._addd [_gfae ];if !_fefcb (_gdce ,_bdgf ){_abddg :=_cd .Point {X :_bdgf .X -_gdce .X ,Y :_bdgf .Y -_gdce .Y };_dc .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_ddddc ,_dffb ,_gfae ,_gdce ,_bdgf ,_abddg );};};};};};};func _bdccd (_ddbgg ,_ebbe float64 )bool {return _ge .Abs (_ddbgg -_ebbe )<=_dad };func (_bbde rulingList )comp (_fega ,_fgdb int )bool {_egbad ,_ccgb :=_bbde [_fega ],_bbde [_fgdb ];_cccc ,_dbefe :=_egbad ._abgae ,_ccgb ._abgae ;if _cccc !=_dbefe {return _cccc > _dbefe ;};if _cccc ==_cffb {return false ;};_ebaa :=func (_cdab bool )bool {if _cccc ==_feae {return _cdab ;};return !_cdab ;};_ebcc ,_fdgb :=_egbad ._acaf ,_ccgb ._acaf ;if _ebcc !=_fdgb {return _ebaa (_ebcc > _fdgb );};_ebcc ,_fdgb =_egbad ._fgdd ,_ccgb ._fgdd ;if _ebcc !=_fdgb {return _ebaa (_ebcc < _fdgb );};return _ebaa (_egbad ._cage < _ccgb ._cage );};func (_adff *shapesState )cubicTo (_adb ,_aga ,_edbc ,_decac ,_bedb ,_ggea float64 ){if _ecef {_ad .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a");};_adff .addPoint (_bedb ,_ggea );};func (_bcec *textObject )setCharSpacing (_acad float64 ){if _bcec ==nil {return ;};_bcec ._bfdgc ._ade =_acad ;if _bedbd {_ad .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_acad ,_bcec ._bfdgc .String ());};};func _aage (_deefc _dd .PdfRectangle )*ruling {return &ruling {_abgae :_fdff ,_acaf :_deefc .Llx ,_fgdd :_deefc .Lly ,_cage :_deefc .Ury };};func _ggad (_feac *wordBag ,_c
// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents (contents string ,resources *_dd .PdfPageResources )(*Extractor ,error ){_bdb :=&Extractor {_add :contents ,_aa :resources ,_dca :map[string ]fontEntry {},_gdg :map[string ]textResult {}};return _bdb ,nil ;};func (_eefg *textObject )getFontDirect (_dcbc string )(*_dd .PdfFont ,error ){_eebb ,_bfde :=_eefg .getFontDict (_dcbc );if _bfde !=nil {return nil ,_bfde ;};_eca ,_bfde :=_dd .NewPdfFontFromPdfObject (_eebb );if _bfde !=nil {_ad .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dcbc ,_bfde );};return _eca ,_bfde ;};func (_aac *shapesState )addPoint (_gacg ,_ddde float64 ){_fbeca :=_aac .establishSubpath ();_aaeg :=_aac .devicePoint (_gacg ,_ddde );if _fbeca ==nil {_aac ._dedf =true ;_aac ._fecc =_aaeg ;}else {_fbeca .add (_aaeg );};};const (_efddc markKind =iota ;_dega ;_ffa ;_ffbde ;);func _dgga (_ddgf ,_bfggc bounded )float64 {_bca :=_bedg (_ddgf ,_bfggc );if !_dgdb (_bca ){return _bca ;};return _efda (_ddgf ,_bfggc );};func _cbac (_gcbdb _dd .PdfRectangle ,_aeea []*textLine )*textPara {return &textPara {PdfRectangle :_gcbdb ,_fcaad :_aeea };};
// String returns a human readable description of `ss`.
func (_bcd *shapesState )String ()string {return _dc .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_bcd ._cgbd ),_bcd ._dedf );};const _ccc =20;func (_ageg *textObject )getFontDict (_aeac string )(_ggba _gd .PdfObject ,_fbbd error ){_gfaa :=_ageg ._eaae ;if _gfaa ==nil {_ad .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_aeac );return nil ,nil ;};_ggba ,_fdf :=_gfaa .GetFontByName (_gd .PdfObjectName (_aeac ));if !_fdf {_ad .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_aeac );return nil ,_f .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _ggba ,nil ;};func (_ggbb *PageText )computeViews (){var _cdb rulingList ;if _gbcf {_dcaf :=_abed (_ggbb ._eea );_cdb =append (_cdb ,_dcaf ...);};if _gbcfd {_baeg :=_aeaa (_ggbb ._abgcd );_cdb =append (_cdb ,_baeg ...);};_cdb ,_affgf :=_cdb .toTilings ();var _defd paraList ;_bged :=len (_ggbb ._gae );for _bdg :=0;_bdg < 360&&_bged > 0;_bdg +=90{_bdbd :=make ([]*textMark ,0,len (_ggbb ._gae )-_bged );for _ ,_dacg :=range _ggbb ._gae {if _dacg ._deeg ==_bdg {_bdbd =append (_bdbd ,_dacg );};};if len (_bdbd )> 0{_gcfb :=_ddcd (_bdbd ,_ggbb ._bgc ,_cdb ,_affgf );_defd =append (_defd ,_gcfb ...);_bged -=len (_bdbd );};};_fdgg :=new (_ac .Buffer );_defd .writeText (_fdgg );_ggbb ._gdcd =_fdgg .String ();_ggbb ._gge =_defd .toTextMarks ();_ggbb ._feb =_defd .tables ();if _bedbb {_ad .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_ggbb ._feb ));};};func (_dgcb *wordBag )pullWord (_gbb *textWord ,_eage int ,_efadd map[int ]map[*textWord ]struct{}){_dgcb .PdfRectangle =_ceab (_dgcb .PdfRectangle ,_gbb .PdfRectangle );if _gbb ._dafb > _dgcb ._ebge {_dgcb ._ebge =_gbb ._dafb ;};_dgcb ._efeb [_eage ]=append (_dgcb ._efeb [_eage ],_gbb );_efadd [_eage ][_gbb ]=struct{}{};};func _egfb (_eece []compositeCell )[]float64 {var _fffbf []*textLine ;_eced :=0;for _ ,_cdgaa :=range _eece {_eced +=len (_cdgaa .paraList );_fffbf =append (_fffbf ,_cdgaa .lines ()...);};_cf .Slice (_fffbf ,func (_bfggcc ,_gcddd int )bool {_ecbe ,_ddef :=_fffbf [_bfggcc ],_fffbf [_gcddd ];_gfag ,_fbcde :=_ecbe ._dgfa ,_ddef ._dgfa ;if !_dgdb (_gfag -_fbcde ){return _gfag < _fbcde ;};return _ecbe .Llx < _ddef .Llx ;});if _bedbb {_dc .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_eced ,len (_fffbf ));for _dadff ,_agbe :=range _fffbf {_dc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_dadff ,_agbe );};};var _gbee []float64 ;_fbbag :=_fffbf [0];var _daag [][]*textLine ;_decec :=[]*textLine {_fbbag };for _bfeg ,_fcfed :=range _fffbf [1:]{if _fcfed .Ury < _fbbag .Lly {_caeb :=0.5*(_fcfed .Ury +_fbbag .Lly );if _bedbb {_dc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_bfeg ,_fcfed .Ury ,_fbbag .Lly ,_caeb ,_fbbag ,_fcfed );};_gbee =append (_gbee ,_caeb );_daag =append (_daag ,_decec );_decec =nil ;};_decec =append (_decec ,_fcfed );if _fcfed .Lly < _fbbag .Lly {_fbbag =_fcfed ;};};if len (_decec )> 0{_daag =append (_daag ,_decec );};if _bedbb {_dc .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_gbee );};if _bedbb {_ad .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len
// Extractor stores and offers functionality for extracting content from PDF pages.
type Extractor struct{_add string ;_aa *_dd .PdfPageResources ;_eda _dd .PdfRectangle ;_dca map[string ]fontEntry ;_gdg map[string ]textResult ;_aab int64 ;_geb int ;};func (_gggd paraList )tables ()[]TextTable {var _bced []TextTable ;if _bedbb {_ad .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a");};for _ ,_bdfg :=range _gggd {_bbac :=_bdfg ._cgf ;if _bbac !=nil &&_bbac .isExportable (){_bced =append (_bced ,_bbac .toTextTable ());};};return _bced ;};
// String returns a description of `b`.
func (_dabe *wordBag )String ()string {var _ebgd []string ;for _ ,_ffeg :=range _dabe .depthIndexes (){_aced :=_dabe ._efeb [_ffeg ];for _ ,_fade :=range _aced {_ebgd =append (_ebgd ,_fade ._ebed );};};return _dc .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_dabe .PdfRectangle ,_dabe ._ebge ,len (_ebgd ),_ebgd );};func _fafc (_fff _cd .Point )*subpath {return &subpath {_addd :[]_cd .Point {_fff }}};func (_dfb *stateStack )top ()*textState {if _dfb .empty (){return nil ;};return (*_dfb )[_dfb .size ()-1];};type wordBag struct{_dd .PdfRectangle ;_ebge float64 ;_gcce ,_bbb rulingList ;_fgfd float64 ;_efeb map[int ][]*textWord ;};func _gacgc (_cded map[float64 ]map[float64 ]gridTile )[]float64 {_beac :=make ([]float64 ,0,len (_cded ));_eafe :=make (map[float64 ]struct{},len (_cded ));for _ ,_fddc :=range _cded {for _fddb :=range _fddc {if _ ,_accbd :=_eafe [_fddb ];_accbd {continue ;};_beac =append (_beac ,_fddb );_eafe [_fddb ]=struct{}{};};};_cf .Float64s (_beac );return _beac ;};func _daca (_dgfec []TextMark ,_dgcdc *int ,_fgef TextMark )[]TextMark {_fgef .Offset =*_dgcdc ;_dgfec =append (_dgfec ,_fgef );*_dgcdc +=len (_fgef .Text );return _dgfec ;};func _babd (_ege *textWord ,_fbf float64 ,_efad ,_bcdb rulingList )*wordBag {_bdbbc :=_egc (_ege ._gceede );_aeacb :=[]*textWord {_ege };_afee :=wordBag {_efeb :map[int ][]*textWord {_bdbbc :_aeacb },PdfRectangle :_ege .PdfRectangle ,_ebge :_ege ._dafb ,_fgfd :_fbf ,_gcce :_efad ,_bbb :_bcdb };return &_afee ;};type textPara struct{_dd .PdfRectangle ;_gaca _dd .PdfRectangle ;_fcaad []*textLine ;_cgf *textTable ;_ebaf bool ;_ebfg bool ;_aeaf *textPara ;_geged *textPara ;_baeda *textPara ;_gfaf *textPara ;};func (_cbgfg rulingList )vertsHorzs ()(rulingList ,rulingList ){var _acadb ,_bdcbc rulingList ;for _ ,_ccba :=range _cbgfg {switch _ccba ._abgae {case _fdff :_acadb =append (_acadb ,_ccba );case _feae :_bdcbc =append (_bdcbc ,_ccba );};};return _acadb ,_bdcbc ;};
2020-10-19 10:58:10 +00:00
2020-11-11 18:48:37 +00:00
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
2020-11-23 22:15:56 +00:00
func (_bge *Extractor )ExtractText ()(string ,error ){_affg ,_ ,_ ,_ab :=_bge .ExtractTextWithStats ();return _affg ,_ab ;};type cachedImage struct{_ef *_dd .Image ;_efd _dd .PdfColorspace ;};func (_dcaa *textPara )taken ()bool {return _dcaa ==nil ||_dcaa ._ebaf };func (_dcga *textObject )getFillColor ()_ed .Color {return _ecdd (_dcga ._bgda .ColorspaceNonStroking ,_dcga ._bgda .ColorNonStroking );};type rectRuling struct{_dgdf rulingKind ;_ggcc markKind ;_ed .Color ;_dd .PdfRectangle ;};const (_cceg =true ;_bbafc =true ;_egb =true ;_bffb =false ;_bdc =false ;_cfc =6;_fgfe =3.0;_fcfef =200;_gfbcb =true ;_cabc =true ;_gbcf =true ;_gbcfd =true ;_beade =false ;);func (_faga *textMark )bbox ()_dd .PdfRectangle {return _faga .PdfRectangle };func (_cec *imageExtractContext )extractInlineImage (_bc *_ga .ContentStreamInlineImage ,_bg _ga .GraphicsState ,_edae *_dd .PdfPageResources )error {_fda ,_fdaf :=_bc .ToImage (_edae );if _fdaf !=nil {return _fdaf ;};_fdad ,_fdaf :=_bc .GetColorSpace (_edae );if _fdaf !=nil {return _fdaf ;};if _fdad ==nil {_fdad =_dd .NewPdfColorspaceDeviceGray ();};_aad ,_fdaf :=_fdad .ImageToRGB (*_fda );if _fdaf !=nil {return _fdaf ;};_afe :=ImageMark {Image :&_aad ,Width :_bg .CTM .ScalingFactorX (),Height :_bg .CTM .ScalingFactorY (),Angle :_bg .CTM .Angle ()};_afe .X ,_afe .Y =_bg .CTM .Translation ();_cec ._da =append (_cec ._da ,_afe );_cec ._ace ++;return nil ;};func _aabab (_fecgg ,_ffeec _cd .Point ,_aeacf _ed .Color )(*ruling ,bool ){_badd :=lineRuling {_aeeac :_fecgg ,_daeb :_ffeec ,_dag :_bfgb (_fecgg ,_ffeec ),Color :_aeacf };if _badd ._dag ==_cffb {return nil ,false ;};return _badd .asRuling ();};
// String returns a description of `k`.
func (_eddf rulingKind )String ()string {_deee ,_fecgga :=_cagbe [_eddf ];if !_fecgga {return _dc .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_eddf );};return _deee ;};func (_gadbb *textPara )bbox ()_dd .PdfRectangle {return _gadbb .PdfRectangle };type textMark struct{_dd .PdfRectangle ;_deeg int ;_fgeb string ;_dbffe string ;_dfdg *_dd .PdfFont ;_geafc float64 ;_ffd float64 ;_bcc _cd .Matrix ;_deag _cd .Point ;_eeag _dd .PdfRectangle ;_aaad _ed .Color ;_gadc _ed .Color ;};type shapesState struct{_eded _cd .Matrix ;_agb _cd .Matrix ;_cgbd []*subpath ;_dedf bool ;_fecc _cd .Point ;_fcac *textObject ;};func _fdb (_bcgb _cd .Point )_cd .Matrix {return _cd .TranslationMatrix (_bcgb .X ,_bcgb .Y )};func (_gbgcd *textTable )reduce ()*textTable {_egbd :=make ([]int ,0,_gbgcd ._adfe );_ebcag :=make ([]int ,0,_gbgcd ._gddg );for _ggadf :=0;_ggadf < _gbgcd ._adfe ;_ggadf ++{if !_gbgcd .emptyRow (_ggadf ){_egbd =append (_egbd ,_ggadf );};};for _ecad :=0;_ecad < _gbgcd ._gddg ;_ecad ++{if !_gbgcd .emptyColumn (_ecad ){_ebcag =append (_ebcag ,_ecad );};};if len (_egbd )==_gbgcd ._adfe &&len (_ebcag )==_gbgcd ._gddg {return _gbgcd ;};_bbabd :=textTable {_eafc :_gbgcd ._eafc ,_gddg :len (_ebcag ),_adfe :len (_egbd ),_efbae :make (map[uint64 ]*textPara ,len (_ebcag )*len (_egbd ))};if _bedbb {_ad .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_gbgcd ._gddg ,_gbgcd ._adfe ,len (_ebcag ),len (_egbd ));_ad .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_ebcag );_ad .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_egbd );};for _dbaa ,_gfdf :=range _egbd {for _babdc ,_eagf :=range _ebcag {_cfggg :=_gbgcd .get (_eagf ,_gfdf );if _cfggg ==nil {continue ;};if _bedbb {_dc .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_babdc ,_dbaa ,_eagf ,_gfdf ,_cgbgd (_cfggg .text (),50));};_bbabd .put (_babdc ,_dbaa ,_cfggg );};};return &_bbabd ;};
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
func (_fdg *Extractor )ExtractTextWithStats ()(_ddg string ,_gcb int ,_bce int ,_bgg error ){_bde ,_gcb ,_bce ,_bgg :=_fdg .ExtractPageText ();if _bgg !=nil {return "",_gcb ,_bce ,_bgg ;};return _bde .Text (),_gcb ,_bce ,nil ;};func (_dgec *textLine )markWordBoundaries (){_abbc :=_cgeg *_dgec ._cdgd ;for _addc ,_eaf :=range _dgec ._becbb [1:]{if _acgd (_eaf ,_dgec ._becbb [_addc ])>=_abbc {_eaf ._fabdc =true ;};};};
// String returns a description of `v`.
func (_cgeag *ruling )String ()string {if _cgeag ._abgae ==_cffb {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_bggae ,_dgcf :="\u0078","\u0079";if _cgeag ._abgae ==_feae {_bggae ,_dgcf ="\u0079","\u0078";};_eega :="";if _cgeag ._faff !=0.0{_eega =_dc .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_cgeag ._faff );};return _dc .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_cgeag ._abgae ,_bggae ,_cgeag ._acaf ,_dgcf ,_cgeag ._fgdd ,_cgeag ._cage ,_cgeag ._cage -_cgeag ._fgdd ,_cgeag ._aaff ,_cgeag .Color ,_eega );};func (_aed *subpath )removeDuplicates (){if len (_aed ._addd )==0{return ;};_aceb :=[]_cd .Point {_aed ._addd [0]};for _ ,_ffeb :=range _aed ._addd [1:]{if !_fefcb (_ffeb ,_aceb [len (_aceb )-1]){_aceb =append (_aceb ,_ffeb );};};_aed ._addd =_aceb ;};func (_dgd *shapesState )stroke (_cdae *[]pathSection ){_eaeb :=pathSection {_fgf :_dgd ._cgbd ,Color :_dgd ._fcac .getStrokeColor ()};*_cdae =append (*_cdae ,_eaeb );if _fggb {_dc .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_cdae ),_dgd ,_dgd ._fcac .getStrokeColor (),_eaeb .bbox ());if _caa {for _gfeeb ,_aebd :=range _dgd ._cgbd {_dc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_gfeeb ,_aebd );if _gfeeb ==10{break ;};};};};};func (_dddb *ruling )gridIntersecting (_gdcc *ruling )bool {return _bdccd (_dddb ._fgdd ,_gdcc ._fgdd )&&_bdccd (_dddb ._cage ,_gdcc ._cage );};func _aadgg (_ffge ,_dbcg _cd .Point )bool {_bafcb :=_ge .Abs (_ffge .X -_dbcg .X );_gcad :=_ge .Abs (_ffge .Y -_dbcg .Y );return _fcdf (_gcad ,_bafcb );};func (_agee *imageExtractContext )extractContentStreamImages (_aabf string ,_gc *_dd .PdfPageResources )error {_ggb :=_ga .NewContentStreamParser (_aabf );_accd ,_dcc :=_ggb .Parse ();if _dcc !=nil {return _dcc ;};if _agee ._fg ==nil {_agee ._fg =map[*_gd .PdfObjectStream ]*cachedImage {};};if _agee ._dcg ==nil {_agee ._dcg =&ImageExtractOptions {};};_fc :=_ga .NewContentStreamProcessor (*_accd );_fc .AddHandler (_ga .HandlerConditionEnumAllOperands ,"",_agee .processOperand );return _fc .Process (_gc );};func _eebab (_fbac []*textWord ,_agfa int )[]*textWord {_ecadg :=len (_fbac );copy (_fbac [_agfa :],_fbac [_agfa +1:]);return _fbac [:_ecadg -1];};func _ceab (_bacg ,_cbad _dd .PdfRectangle )_dd .PdfRectangle {return _dd .PdfRectangle {Llx :_ge .Min (_bacg .Llx ,_cbad .Llx ),Lly :_ge .Min (_bacg .Lly ,_cbad .Lly ),Urx :_ge .Max (_bacg .Urx ,_cbad .Urx ),Ury :_ge .Max (_bacg .Ury ,_cbad .Ury )};};
// Append appends `mark` to the mark array.
func (_cdac *TextMarkArray )Append (mark TextMark ){_cdac ._dcf =append (_cdac ._dcf ,mark )};func (_fgfeg *ruling )encloses (_dbcgf ,_dced float64 )bool {return _fgfeg ._fgdd -_dad <=_dbcgf &&_dced <=_fgfeg ._cage +_dad ;};func (_bbeea rulingList )snapToGroups ()rulingList {_cada ,_bgfec :=_bbeea .vertsHorzs ();if len (_cada )> 0{_cada =_cada .snapToGroupsDirection ();};if len (_bgfec )> 0{_bgfec =_bgfec .snapToGroupsDirection ();};_dfbf :=append (_cada ,_bgfec ...);_dfbf .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");return _dfbf ;};func (_gfbg paraList )addNeighbours (){_fbee :=func (_edgg []int ,_bbffd *textPara )([]*textPara ,[]*textPara ){_babga :=make ([]*textPara ,0,len (_edgg )-1);_cebd :=make ([]*textPara ,0,len (_edgg )-1);for _ ,_ccgac :=range _edgg {_eabff :=_gfbg [_ccgac ];if _eabff .Urx <=_bbffd .Llx {_babga =append (_babga ,_eabff );}else if _eabff .Llx >=_bbffd .Urx {_cebd =append (_cebd ,_eabff );};};return _babga ,_cebd ;};_dggb :=func (_eccc []int ,_acdg *textPara )([]*textPara ,[]*textPara ){_eccgc :=make ([]*textPara ,0,len (_eccc )-1);_fgfc :=make ([]*textPara ,0,len (_eccc )-1);for _ ,_fccec :=range _eccc {_ecfb :=_gfbg [_fccec ];if _ecfb .Ury <=_acdg .Lly {_fgfc =append (_fgfc ,_ecfb );}else if _ecfb .Lly >=_acdg .Ury {_eccgc =append (_eccgc ,_ecfb );};};return _eccgc ,_fgfc ;};_gdbd :=_gfbg .yNeighbours (_agde );for _ ,_cdadg :=range _gfbg {_ebdeg :=_gdbd [_cdadg ];if len (_ebdeg )==0{continue ;};_gbcfa ,_gfbd :=_fbee (_ebdeg ,_cdadg );if len (_gbcfa )==0&&len (_gfbd )==0{continue ;};if len (_gbcfa )> 0{_fegbc :=_gbcfa [0];for _ ,_efeff :=range _gbcfa [1:]{if _efeff .Urx >=_fegbc .Urx {_fegbc =_efeff ;};};for _ ,_dfcg :=range _gbcfa {if _dfcg !=_fegbc &&_dfcg .Urx > _fegbc .Llx {_fegbc =nil ;break ;};};if _fegbc !=nil &&_befe (_cdadg .PdfRectangle ,_fegbc .PdfRectangle ){_cdadg ._aeaf =_fegbc ;};};if len (_gfbd )> 0{_gefbb :=_gfbd [0];for _ ,_daagg :=range _gfbd [1:]{if _daagg .Llx <=_gefbb .Llx {_gefbb =_daagg ;};};for _ ,_bcdd :=range _gfbd {if _bcdd !=_gefbb &&_bcdd .Llx < _gefbb .Urx {_gefbb =nil ;break ;};};if _gefbb !=nil &&_befe (_cdadg .PdfRectangle ,_gefbb .PdfRectangle ){_cdadg ._geged =_gefbb ;};};};_gdbd =_gfbg .xNeighbours (_egfe );for _ ,_fbgfa :=range _gfbg {_afed :=_gdbd [_fbgfa ];if len (_afed )==0{continue ;};_cgdg ,_abcad :=_dggb (_afed ,_fbgfa );if len (_cgdg )==0&&len (_abcad )==0{continue ;};if len (_abcad )> 0{_cdea :=_abcad [0];for _ ,_afeda :=range _abcad [1:]{if _afeda .Ury >=_cdea .Ury {_cdea =_afeda ;};};for _ ,_accg :=range _abcad {if _accg !=_cdea &&_accg .Ury > _cdea .Lly {_cdea =nil ;break ;};};if _cdea !=nil &&_fafcf (_fbgfa .PdfRectangle ,_cdea .PdfRectangle ){_fbgfa ._gfaf =_cdea ;};};if len (_cgdg )> 0{_gddd :=_cgdg [0];for _ ,_eedfc :=range _cgdg [1:]{if _eedfc .Lly <=_gddd .Lly {_gddd =_eedfc ;};};for _ ,_bega :=range _cgdg {if _bega !=_gddd &&_bega .Lly < _gddd .Ury {_gddd =nil ;break ;};};if _gddd !=nil &&_fafcf (_fbgfa .PdfRectangle ,_gddd .PdfRectangle ){_fbgfa ._baeda =_gddd ;};};};for _ ,_decb :=range _gfbg {if _decb ._aeaf !=nil &&_decb ._aeaf ._geged !=_decb {_decb ._aeaf =nil ;};if _decb ._baeda !=nil &&_decb ._baeda ._gfaf !=_decb {_decb ._baeda =nil ;};if _decb ._geged !=nil &&_decb ._geged ._aeaf !=_decb {_decb ._geged =nil ;};if _decb ._gfaf !=nil &&_decb ._gfaf ._baeda !=_decb {_decb ._gfaf =nil ;};};};func _efda (_baag ,_egfg bounded )float64 {return _gebdb (_baag )-_gebdb (_egfg )};func (_ebbd *shapesState )devicePoint (_eebe ,_dgfc float64 )_cd .Point {_fece :=_ebbd ._agb .Mult (_ebbd ._eded );_eebe ,_dgfc =_fece .Transform (_eebe ,_dgfc );return _cd .NewPoint (_eebe ,_dgfc );};func (_gbgfd *subpath )makeRectRuling (_ddbb _ed .Color )(*ruling ,bool ){if _aaf {_ad .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_gbgfd );};_acga :=_gbgfd ._addd [:4];_ggbe :=make (map[int ]rulingKind ,len (_acga ));for _edcb ,_eacec :=range _acga {_ccdc :=_gbgfd ._addd [(_edcb +1)%4];_ggbe [_edcb ]=_afbe (_eacec ,_ccdc );if _aaf {_dc .Pr
// String returns a human readable description of `s`.
func (_bdcca intSet )String ()string {var _bgebg []int ;for _efge :=range _bdcca {if _bdcca .has (_efge ){_bgebg =append (_bgebg ,_efge );};};_cf .Ints (_bgebg );return _dc .Sprintf ("\u0025\u002b\u0076",_bgebg );};
// String returns a description of `k`.
func (_gceb markKind )String ()string {_bgae ,_accfg :=_fac [_gceb ];if !_accfg {return _dc .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_gceb );};return _bgae ;};func _cdba (_egag []TextMark ,_bagg *int )[]TextMark {_fefb :=_egag [len (_egag )-1];_gace :=[]rune (_fefb .Text );if len (_gace )==1{_egag =_egag [:len (_egag )-1];_ccga :=_egag [len (_egag )-1];*_bagg =_ccga .Offset +len (_ccga .Text );}else {_dadf :=_bee (_fefb .Text );*_bagg +=len (_dadf )-len (_fefb .Text );_fefb .Text =_dadf ;};return _egag ;};func (_bgec paraList )merge ()*textPara {_ad .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_bgec ));if len (_bgec )==0{return nil ;};_bgec .sortReadingOrder ();_dbef :=_bgec [0].PdfRectangle ;_cagca :=_bgec [0]._fcaad ;for _ ,_bgdb :=range _bgec [1:]{_dbef =_ceab (_dbef ,_bgdb .PdfRectangle );_cagca =append (_cagca ,_bgdb ._fcaad ...);};return _cbac (_dbef ,_cagca );};func (_cgg gridTile )contains (_edca _dd .PdfRectangle )bool {if _cgg .numBorders ()< 3{return false ;};if _cgg ._gfgca &&_edca .Llx < _cgg .Llx -_aedb {return false ;};if _cgg ._ebce &&_edca .Urx > _cgg .Urx +_aedb {return false ;};if _cgg ._bgfee &&_edca .Lly < _cgg .Lly -_aedb {return false ;};if _cgg ._aabe &&_edca .Ury > _cgg .Ury +_aedb {return false ;};return true ;};func (_bgeb rulingList )isActualGrid ()(rulingList ,bool ){_eaeab ,_adcf :=_bgeb .augmentGrid ();if !(len (_eaeab )>=_geda +1&&len (_adcf )>=_aaed +1){if _fggb {_ad .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_eaeab ),len (_adcf ),_geda +1,_aaed +1);};return nil ,false ;};if _fggb {_ad .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_bgeb ,len (_eaeab )>=2,len (_adcf )>=2,len (_eaeab )>=2&&len (_adcf )>=2);for _cfcf ,_gfeba :=range _bgeb {_dc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_cfcf ,_gfeba );};};if _beade {_eefc ,_bbdc :=_eaeab [0],_eaeab [len (_eaeab )-1];_ffde ,_gadf :=_adcf [0],_adcf [len (_adcf )-1];if !(_ccfb (_eefc ._acaf -_ffde ._fgdd )&&_ccfb (_bbdc ._acaf -_ffde ._cage )&&_ccfb (_ffde ._acaf -_eefc ._cage )&&_ccfb (_gadf ._acaf -_eefc ._fgdd )){if _fggb {_ad .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_eefc ,_bbdc ,_ffde ,_gadf );};return nil ,false ;};}else {if !_eaeab .aligned (){if _dgcc {_ad .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_eaeab ));};return nil ,false ;};if !_adcf .aligned (){if _fggb {_ad .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_adcf ));};return nil ,false ;};};_fdddb :=append (_eaeab ,_adcf ...);return _fdddb ,true ;};func (_egfc *wordBag )firstReadingIndex (_abcg int )int {_agbca :=_egfc .firstWord (_abcg )._dafb ;_dgcd :=float64 (_abcg +1)*_dfca ;_ece :=_dgcd +_ccac *_agbca ;_fecg :=_abcg ;for _ ,_bcff :=range _egfc .depthBand (_dgcd ,_ece ){if _bedg (_egfc .firstWord (_bcff ),_egfc .firstWord (_fecg ))< 0{_fecg =_bcff ;};};return _fecg ;};type ruling struct{_abgae rulingKind ;_aaff markKind ;_ed .Color ;_acaf float64 ;_fgdd float64 ;_cage float64 ;_faff float64 ;};
// String returns a description of `tm`.
func (_cbbcd *textMark )String ()string {return _dc .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_cbbcd .PdfRectangle ,_cbbcd ._geafc ,_cbbcd ._fgeb );};func (_gfdb *wordBag )maxDepth ()float64 {return _gfdb ._fgfd -_gfdb .Lly };func (_ggga paraList )yNeighbours (_eccda float64 )map[*textPara ][]int {_gcgf :=make ([]event ,2*len (_ggga ));if _eccda ==0{for _ccee ,_cede :=range _ggga {_gcgf [2*_ccee ]=event {_cede .Lly ,true ,_ccee };_gcgf [2*_ccee +1]=event {_cede .Ury ,false ,_ccee };};}else {for _caecb ,_gfcg :=range _ggga {_gcgf [2*_caecb ]=event {_gfcg .Lly -_eccda *_gfcg .fontsize (),true ,_caecb };_gcgf [2*_caecb +1]=event {_gfcg .Ury +_eccda *_gfcg .fontsize (),false ,_caecb };};};return _ggga .eventNeighbours (_gcgf );};func _cbdf (_dbeb ,_bceae int )int {if _dbeb > _bceae {return _dbeb ;};return _bceae ;};func (_bbecf rulingList )primMinMax ()(float64 ,float64 ){_dcfd ,_edffc :=_bbecf [0]._acaf ,_bbecf [0]._acaf ;for _ ,_eccbcf :=range _bbecf [1:]{if _eccbcf ._acaf < _dcfd {_dcfd =_eccbcf ._acaf ;}else if _eccbcf ._acaf > _edffc {_edffc =_eccbcf ._acaf ;};};return _dcfd ,_edffc ;};func _faa (_geaf []*wordBag )[]*wordBag {if len (_geaf )<=1{return _geaf ;};if _fada {_ad .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");};_cf .Slice (_geaf ,func (_gga ,_acf int )bool {_baad ,_dcab :=_geaf [_gga ],_geaf [_acf ];_afceg :=_baad .Width ()*_baad .Height ();_gbcc :=_dcab .Width ()*_dcab .Height ();if _afceg !=_gbcc {return _afceg > _gbcc ;};if _baad .Height ()!=_dcab .Height (){return _baad .Height ()> _dcab .Height ();};return _gga < _acf ;});var _gefcf []*wordBag ;_ccg :=make (intSet );for _dgfe :=0;_dgfe < len (_geaf );_dgfe ++{if _ccg .has (_dgfe ){continue ;};_eagc :=_geaf [_dgfe ];for _gcea :=_dgfe +1;_gcea < len (_geaf );_gcea ++{if _ccg .has (_dgfe ){continue ;};_gfge :=_geaf [_gcea ];_aggd :=_eagc .PdfRectangle ;_aggd .Llx -=_eagc ._ebge ;if _ggc (_aggd ,_gfge .PdfRectangle ){_eagc .absorb (_gfge );_ccg .add (_gcea );};};_gefcf =append (_gefcf ,_eagc );};if len (_geaf )!=len (_gefcf )+len (_ccg ){_ad .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_geaf ),len (_gefcf ),len (_ccg ));};return _gefcf ;};func _fcaa (_fadec func (*wordBag ,*textWord ,float64 )bool ,_ceca float64 )func (*wordBag ,*textWord )bool {return func (_begf *wordBag ,_gdbf *textWord )bool {return _fadec (_begf ,_gdbf ,_ceca )};};func (_cfed paraList )llyRange (_dffc []int ,_ddbd ,_gceed float64 )[]int {_efbea :=len (_cfed );if _gceed < _cfed [_dffc [0]].Lly ||_ddbd > _cfed [_dffc [_efbea -1]].Lly {return nil ;};_dcae :=_cf .Search (_efbea ,func (_gcab int )bool {return _cfed [_dffc [_gcab ]].Lly >=_ddbd });_dfbg :=_cf .Search (_efbea ,func (_fbecc int )bool {return _cfed [_dffc [_fbecc ]].Lly > _gceed });return _dffc [_dcae :_dfbg ];};func (_ffbb *stateStack )push (_gag *textState ){_bagd :=*_gag ;*_ffbb =append (*_ffbb ,&_bagd )};func (_bb *imageExtractContext )extractXObjectImage (_edb *_gd .PdfObjectName ,_dbb _ga .GraphicsState ,_gbf *_dd .PdfPageResources )error {_ea ,_ :=_gbf .GetXObjectByName (*_edb );if _ea ==nil {return nil ;};_adf ,_eaa :=_bb ._fg [_ea ];if !_eaa {_gfb ,_ff :=_gbf .GetXObjectImageByName (*_edb );if _ff !=nil {return _ff ;};if _gfb ==nil {return nil ;};_gebd ,_ff :=_gfb .ToImage ();if _ff !=nil {return _ff ;};_adf =&cachedImage {_ef :_gebd ,_efd :_gfb .ColorSpace };_bb ._fg [_ea ]=_adf ;};_agf :=_adf ._ef ;_ae :=_adf ._efd ;_ecb ,_gff :=_ae .ImageToRGB (*_agf );if _gff !=nil {return _gff ;};_ad .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_dbb .CTM .String ());_be :=ImageMark {Image :&_ecb ,Width :_dbb .CTM .ScalingFactorX (),Height :_dbb .CTM .ScalingFactorY (),Angle :_dbb .CTM .Angle ()};_be .X ,_be .Y =_dbb .CTM .Translation ();_bb ._da =append (_bb ._da ,_be );_bb ._cea ++;return nil ;};
// TableCell is a cell in a TextTable.
type TableCell struct{
// Text is the extracted text.
Text string ;
// Marks returns the TextMarks corresponding to the text in Text.
Marks TextMarkArray ;};func (_beg *shapesState )moveTo (_cbbc ,_bcea float64 ){_beg ._dedf =true ;_beg ._fecc =_beg .devicePoint (_cbbc ,_bcea );if _ecef {_ad .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_cbbc ,_bcea ,_beg ._fecc );};};func _ddeeg (_dfge map[int ][]float64 )string {_fcgf :=_fgag (_dfge );_cafgd :=make ([]string ,len (_dfge ));for _eegb ,_efgda :=range _fcgf {_cafgd [_eegb ]=_dc .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_efgda ,_dfge [_efgda ]);};return _dc .Sprintf ("\u007b\u0025\u0073\u007d",_d .Join (_cafgd ,"\u002c\u0020"));};
// Marks returns the TextMark collection for a page. It represents all the text on the page.
func (_ggbc PageText )Marks ()*TextMarkArray {return &TextMarkArray {_dcf :_ggbc ._gge }};func (_dfba *textTable )toTextTable ()TextTable {if _bedbb {_ad .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_dfba ._gddg ,_dfba ._adfe );};_degdc :=make ([][]TableCell ,_dfba ._adfe );for _ecga :=0;_ecga < _dfba ._adfe ;_ecga ++{_degdc [_ecga ]=make ([]TableCell ,_dfba ._gddg );for _aggb :=0;_aggb < _dfba ._gddg ;_aggb ++{_ggaa :=_dfba .get (_aggb ,_ecga );if _ggaa ==nil {continue ;};if _bedbb {_dc .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_aggb ,_ecga ,_ggaa );};_degdc [_ecga ][_aggb ].Text =_ggaa .text ();_ecfc :=0;_degdc [_ecga ][_aggb ].Marks ._dcf =_ggaa .toTextMarks (&_ecfc );};};return TextTable {W :_dfba ._gddg ,H :_dfba ._adfe ,Cells :_degdc };};func (_aea *textObject )getStrokeColor ()_ed .Color {return _ecdd (_aea ._bgda .ColorspaceStroking ,_aea ._bgda .ColorStroking );};func (_cabdb paraList )xNeighbours (_fbbf float64 )map[*textPara ][]int {_bdgd :=make ([]event ,2*len (_cabdb ));if _fbbf ==0{for _bgacb ,_cged :=range _cabdb {_bdgd [2*_bgacb ]=event {_cged .Llx ,true ,_bgacb };_bdgd [2*_bgacb +1]=event {_cged .Urx ,false ,_bgacb };};}else {for _edgf ,_beaca :=range _cabdb {_bdgd [2*_edgf ]=event {_beaca .Llx -_fbbf *_beaca .fontsize (),true ,_edgf };_bdgd [2*_edgf +1]=event {_beaca .Urx +_fbbf *_beaca .fontsize (),false ,_edgf };};};return _cabdb .eventNeighbours (_bdgd );};func (_bfg *textObject )moveText (_dde ,_dcb float64 ){_bfg .moveLP (_dde ,_dcb )};func (_egeg paraList )sortReadingOrder (){_ad .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_egeg ));if len (_egeg )<=1{return ;};_egeg .computeEBBoxes ();_cf .Slice (_egeg ,func (_egfa ,_accf int )bool {return _gcd (_egeg [_egfa ],_egeg [_accf ])<=0});_dfgd :=_egeg .topoOrder ();_egeg .reorder (_dfgd );};func (_bfgce paraList )eventNeighbours (_ggbac []event )map[*textPara ][]int {_cf .Slice (_ggbac ,func (_cccfg ,_bccbe int )bool {_cefbe ,_affa :=_ggbac [_cccfg ],_ggbac [_bccbe ];_abbf ,_aedba :=_cefbe ._fafa ,_affa ._fafa ;if _abbf !=_aedba {return _abbf < _aedba ;};if _cefbe ._gead !=_affa ._gead {return _cefbe ._gead ;};return _cccfg < _bccbe ;});_daace :=make (map[int ]intSet );_bcdce :=make (intSet );for _ ,_ddcaea :=range _ggbac {if _ddcaea ._gead {_daace [_ddcaea ._bdd ]=make (intSet );for _bfac :=range _bcdce {if _bfac !=_ddcaea ._bdd {_daace [_ddcaea ._bdd ].add (_bfac );_daace [_bfac ].add (_ddcaea ._bdd );};};_bcdce .add (_ddcaea ._bdd );}else {_bcdce .del (_ddcaea ._bdd );};};_afgf :=map[*textPara ][]int {};for _caeca ,_dccd :=range _daace {_gegde :=_bfgce [_caeca ];if len (_dccd )==0{_afgf [_gegde ]=nil ;continue ;};_ddcgf :=make ([]int ,len (_dccd ));_gfbb :=0;for _aggac :=range _dccd {_ddcgf [_gfbb ]=_aggac ;_gfbb ++;};_afgf [_gegde ]=_ddcgf ;};return _afgf ;};func _gfgcb (_fbggd map[float64 ]map[float64 ]gridTile )[]float64 {_aegd :=make ([]float64 ,0,len (_fbggd ));for _gegd :=range _fbggd {_aegd =append (_aegd ,_gegd );};_cf .Float64s (_aegd );_efeag :=len (_aegd );for _abea :=0;_abea < _efeag /2;_abea ++{_aegd [_abea ],_aegd [_efeag -1-_abea ]=_aegd [_efeag -1-_abea ],_aegd [_abea ];};return _aegd ;};func (_dda *textObject )nextLine (){_dda .moveLP (0,-_dda ._bfdgc ._dbac )};func (_adec *subpath )clear (){*_adec =subpath {}};func (_eafda paraList )findGridTables (_aface []gridTiling )[]*textTable {if _bedbb {_ad .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_eafda ));for _faea ,_aagf :=range _eafda {_dc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_faea ,_aagf );};};var _acadf []*textTable ;for _fdaeg ,_fbcd :=range _aface {_afbbg ,_gfce :=_eafda .findTableG
// Elements returns the TextMarks in `ma`.
func (_ggf *TextMarkArray )Elements ()[]TextMark {return _ggf ._dcf };func (_accbe rulingList )merge ()*ruling {_babg :=_accbe [0]._acaf ;_dgbd :=_accbe [0]._fgdd ;_defge :=_accbe [0]._cage ;for _ ,_egac :=range _accbe [1:]{_babg +=_egac ._acaf ;if _egac ._fgdd < _dgbd {_dgbd =_egac ._fgdd ;};if _egac ._cage > _defge {_defge =_egac ._cage ;};};_aefba :=&ruling {_abgae :_accbe [0]._abgae ,_aaff :_accbe [0]._aaff ,Color :_accbe [0].Color ,_acaf :_babg /float64 (len (_accbe )),_fgdd :_dgbd ,_cage :_defge };if _dgcc {_ad .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_accbe ),_aefba );for _aadgge ,_bgbcf :=range _accbe {_dc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_aadgge ,_bgbcf );};};return _aefba ;};func (_cceb *shapesState )quadraticTo (_dcd ,_fab ,_efea ,_fddf float64 ){if _ecef {_ad .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a");};_cceb .addPoint (_efea ,_fddf );};var _cagbe =map[rulingKind ]string {_cffb :"\u006e\u006f\u006e\u0065",_feae :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_fdff :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};func (_bgca *textLine )pullWord (_gdga *wordBag ,_fabd *textWord ,_begd int ){_bgca .appendWord (_fabd );_gdga .removeWord (_fabd ,_begd );};func (_gcfc *shapesState )lastpointEstablished ()(_cd .Point ,bool ){if _gcfc ._dedf {return _gcfc ._fecc ,false ;};_fbbdd :=len (_gcfc ._cgbd );if _fbbdd > 0&&_gcfc ._cgbd [_fbbdd -1]._eade {return _gcfc ._cgbd [_fbbdd -1].last (),false ;};return _cd .Point {},true ;};func (_cfe *textLine )appendWord (_abca *textWord ){_cfe ._becbb =append (_cfe ._becbb ,_abca );_cfe .PdfRectangle =_ceab (_cfe .PdfRectangle ,_abca .PdfRectangle );if _abca ._dafb > _cfe ._cdgd {_cfe ._cdgd =_abca ._dafb ;};if _abca ._gceede > _cfe ._dgfa {_cfe ._dgfa =_abca ._gceede ;};};type textTable struct{_dd .PdfRectangle ;_gddg ,_adfe int ;_eafc bool ;_efbae map[uint64 ]*textPara ;_baba map[uint64 ]compositeCell ;};func (_gebbe intSet )del (_fcec int ){delete (_gebbe ,_fcec )};func (_bba *textObject )setTextRise (_cdd float64 ){if _bba ==nil {return ;};_bba ._bfdgc ._gfee =_cdd ;};func (_fag *textLine )text ()string {var _bfab []string ;for _ ,_gcbe :=range _fag ._becbb {if _gcbe ._fabdc {_bfab =append (_bfab ,"\u0020");};_bfab =append (_bfab ,_gcbe ._ebed );};return _d .Join (_bfab ,"");};type textResult struct{_ead PageText ;_dba int ;_abc int ;};
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
type PageImages struct{Images []ImageMark ;};func (_dbfe compositeCell )String ()string {_bad :="";if len (_dbfe .paraList )> 0{_bad =_cgbgd (_dbfe .paraList .merge ().text (),50);};return _dc .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_dbfe .PdfRectangle ,len (_dbfe .paraList ),_bad );};type textLine struct{_dd .PdfRectangle ;_dgfa float64 ;_becbb []*textWord ;_cdgd float64 ;};func (_bfgbg *textTable )depth ()float64 {_fcfa :=1e10;for _ffcc :=0;_ffcc < _bfgbg ._gddg ;_ffcc ++{_caaf :=_bfgbg .get (_ffcc ,0);if _caaf ==nil ||_caaf ._ebfg {continue ;};_fcfa =_ge .Min (_fcfa ,_caaf .depth ());};return _fcfa ;};func (_agebe rectRuling )checkWidth (_agcd ,_cabf float64 )(float64 ,bool ){_cade :=_cabf -_agcd ;_cecg :=_cade <=_gbg ;return _cade ,_cecg ;};const _ada =1.0/1000.0;
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
func (_beb PageText )ToText ()string {return _beb .Text ()};func (_gdfb *textTable )markCells (){for _beag :=0;_beag < _gdfb ._adfe ;_beag ++{for _ecec :=0;_ecec < _gdfb ._gddg ;_ecec ++{_daab :=_gdfb .get (_ecec ,_beag );if _daab !=nil {_daab ._ebaf =true ;};};};};func (_gedd rulingList )removeDuplicates ()rulingList {if len (_gedd )==0{return nil ;};_gedd .sort ();_aafc :=rulingList {_gedd [0]};for _ ,_fcgcg :=range _gedd [1:]{if _fcgcg .equals (_aafc [len (_aafc )-1]){continue ;};_aafc =append (_aafc ,_fcgcg );};return _aafc ;};func (_gdgd *compositeCell )updateBBox (){for _ ,_acdd :=range _gdgd .paraList {_gdgd .PdfRectangle =_ceab (_gdgd .PdfRectangle ,_acdd .PdfRectangle );};};type stateStack []*textState ;func (_geafg *wordBag )removeDuplicates (){if _dbae {_ad .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_geafg .text ());};for _ ,_bbgf :=range _geafg .depthIndexes (){if len (_geafg ._efeb [_bbgf ])==0{continue ;};_gffcb :=_geafg ._efeb [_bbgf ][0];_faae :=_bgad *_gffcb ._dafb ;_eede :=_gffcb ._gceede ;for _ ,_acfa :=range _geafg .depthBand (_eede ,_eede +_faae ){_gcga :=map[*textWord ]struct{}{};_fecec :=_geafg ._efeb [_acfa ];for _ ,_bbec :=range _fecec {if _ ,_deef :=_gcga [_bbec ];_deef {continue ;};for _ ,_agcc :=range _fecec {if _ ,_agbcc :=_gcga [_agcc ];_agbcc {continue ;};if _agcc !=_bbec &&_agcc ._ebed ==_bbec ._ebed &&_ge .Abs (_agcc .Llx -_bbec .Llx )< _faae &&_ge .Abs (_agcc .Urx -_bbec .Urx )< _faae &&_ge .Abs (_agcc .Lly -_bbec .Lly )< _faae &&_ge .Abs (_agcc .Ury -_bbec .Ury )< _faae {_gcga [_agcc ]=struct{}{};};};};if len (_gcga )> 0{_eeeac :=0;for _ ,_egad :=range _fecec {if _ ,_aaabe :=_gcga [_egad ];!_aaabe {_fecec [_eeeac ]=_egad ;_eeeac ++;};};_geafg ._efeb [_acfa ]=_fecec [:len (_fecec )-len (_gcga )];if len (_geafg ._efeb [_acfa ])==0{delete (_geafg ._efeb ,_acfa );};};};};};func (_cdaa *wordBag )depthRange (_dgaf ,_cccd int )[]int {var _bbga []int ;for _feec :=range _cdaa ._efeb {if _dgaf <=_feec &&_feec <=_cccd {_bbga =append (_bbga ,_feec );};};if len (_bbga )==0{return nil ;};_cf .Ints (_bbga );return _bbga ;};func (_bbegd paraList )writeText (_cdga _a .Writer ){for _aaea ,_gbfc :=range _bbegd {if _gbfc ._ebfg {continue ;};_gbfc .writeText (_cdga );if _aaea !=len (_bbegd )-1{if _dgfd (_gbfc ,_bbegd [_aaea +1]){_cdga .Write ([]byte ("\u0020"));}else {_cdga .Write ([]byte ("\u000a"));_cdga .Write ([]byte ("\u000a"));};};};_cdga .Write ([]byte ("\u000a"));_cdga .Write ([]byte ("\u000a"));};
// Tables returns the tables extracted from the page.
func (_acb PageText )Tables ()[]TextTable {if _bedbb {_ad .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_acb ._feb ));};return _acb ._feb ;};func (_befbg paraList )findTableGrid (_fadgc gridTiling )(*textTable ,map[*textPara ]struct{}){_egafb :=len (_fadgc ._cgdb );_debd :=len (_fadgc ._cfeab );_faaea :=textTable {_eafc :true ,_gddg :_egafb ,_adfe :_debd ,_efbae :make (map[uint64 ]*textPara ,_egafb *_debd ),_baba :make (map[uint64 ]compositeCell ,_egafb *_debd )};_cagcc :=make (map[*textPara ]struct{});_fcagb :=int ((1.0-_eaceg )*float64 (_egafb *_debd ));_cadf :=0;if _dgdd {_ad .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_egafb ,_debd );};for _abeg ,_daeg :=range _fadgc ._cfeab {_dadca ,_febd :=_fadgc ._ggac [_daeg ];if !_febd {continue ;};for _gdae ,_edefa :=range _fadgc ._cgdb {_gfba ,_gdba :=_dadca [_edefa ];if !_gdba {continue ;};_ebdb :=_befbg .inTile (_gfba );if len (_ebdb )==0{_cadf ++;if _cadf > _fcagb {if _dgdd {_ad .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_cadf );};return nil ,nil ;};}else {_faaea .putComposite (_gdae ,_abeg ,_ebdb ,_gfba .PdfRectangle );for _ ,_dcbe :=range _ebdb {_cagcc [_dcbe ]=struct{}{};};};};};_bfagf :=0;for _gbcdc :=0;_gbcdc < _egafb ;_gbcdc ++{_efga :=_faaea .get (_gbcdc ,0);if _efga ==nil ||!_efga ._ebfg {_bfagf ++;};};if _bfagf ==0{if _dgdd {_ad .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};return nil ,nil ;};_cccbd :=_faaea .reduceTiling (_fadgc ,_efgc );_cccbd =_cccbd .subdivide ();return _cccbd ,_cagcc ;};func _dfgfe (_fbcdf float64 )float64 {return _aaabg *_ge .Round (_fbcdf /_aaabg )};func (_dgb lineRuling )xMean ()float64 {return 0.5*(_dgb ._aeeac .X +_dgb ._daeb .X )};const (_cgea =1.0e-6;_aaabg =1.0e-4;_cccae =10;_dfca =6;_fdae =0.5;_bdbbe =0.12;_dcgd =0.19;_dbff =0.04;_dffg =0.04;_efbe =1.0;_agfg =0.04;_gafbg =0.4;_ccfc =0.7;_ebca =1.0;_ddda =0.1;_abfd =1.4;_abe =0.46;_cgeg =0.02;_bgad =0.2;_eggb =0.5;_gfga =4;_ccac =4.0;_ega =6;_eaceg =0.3;_egfe =0.01;_agde =0.02;_geda =2;_aaed =2;_cfgg =500;_bbag =4.0;_fdc =4.0;_gcbf =0.05;_eaea =0.1;_dad =2.0;_gbg =2.0;_aedb =1.5;_efgc =3.0;_bgce =0.25;);
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
func (_afcc *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _afcc ==nil {return nil ,_f .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_dc .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );};_efe :=len (_afcc ._dcf );if _efe ==0{return _afcc ,nil ;};if start < _afcc ._dcf [0].Offset {start =_afcc ._dcf [0].Offset ;};if end > _afcc ._dcf [_efe -1].Offset +1{end =_afcc ._dcf [_efe -1].Offset +1;};_bbdb :=_cf .Search (_efe ,func (_cbdgg int )bool {return _afcc ._dcf [_cbdgg ].Offset +len (_afcc ._dcf [_cbdgg ].Text )-1>=start });if !(0<=_bbdb &&_bbdb < _efe ){_edaeg :=_dc .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_bbdb ,_efe ,_afcc ._dcf [0],_afcc ._dcf [_efe -1]);return nil ,_edaeg ;};_fde :=_cf .Search (_efe ,func (_defb int )bool {return _afcc ._dcf [_defb ].Offset > end -1});if !(0<=_fde &&_fde < _efe ){_bgcd :=_dc .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_fde ,_efe ,_afcc ._dcf [0],_afcc ._dcf [_efe -1]);return nil ,_bgcd ;};if _fde <=_bbdb {return nil ,_dc .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_bbdb ,_fde );};return &TextMarkArray {_dcf :_afcc ._dcf [_bbdb :_fde ]},nil ;};func (_gddbd compositeCell )parasBBox ()(paraList ,_dd .PdfRectangle ){return _gddbd .paraList ,_gddbd .PdfRectangle ;};func _gefb (_gfccf []int )[]int {_cfeaf :=make ([]int ,len (_gfccf ));for _acecb ,_gdfe :=range _gfccf {_cfeaf [len (_gfccf )-1-_acecb ]=_gdfe ;};return _cfeaf ;};func (_cda *textObject )reset (){_cda ._fddg =_cd .IdentityMatrix ();_cda ._cba =_cd .IdentityMatrix ();_cda ._bgdab =nil ;};func (_bbgb paraList )log (_efba string ){if !_gcdc {return ;};_ad .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_efba ,len (_bbgb ));for _gabg ,_ggfg :=range _bbgb {if _ggfg ==nil {continue ;};_cbcc :=_ggfg .text ();_aedg :="\u0020\u0020";if _ggfg ._cgf !=nil {_aedg =_dc .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_ggfg ._cgf ._gddg ,_ggfg ._cgf ._adfe );};_dc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_gabg ,_ggfg .PdfRectangle ,_aedg ,_cgbgd (_cbcc ,50));};};type textObject struct{_bfe *Extractor ;_eaae *_dd .PdfPageResources ;_bgda _ga .GraphicsState ;_bfdgc *textState ;_fea *stateStack ;_fddg _cd .Matrix ;_cba _cd .Matrix ;_bgdab []*textMark ;_eeg bool ;};func _ccfb (_gccb float64 )bool {return _ge .Abs (_gccb )< _gbg };func (_ebcb compositeCell )split (_acdf ,_bgccb []float64 )*textTable {_adcd :=len (_acdf )+1;_ddbg :=len (_bgccb )+1;if _bedbb {_ad .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
func (_age *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_gg :=&imageExtractContext {_dcg :options };_acc :=_gg .extractContentStreamImages (_age ._add ,_age ._aa );if _acc !=nil {return nil ,_acc ;};return &PageImages {Images :_gg ._da },nil ;};var (_aecgf =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};);func (_afcg intSet )has (_badcd int )bool {_ ,_aafgd :=_afcg [_badcd ];return _aafgd };func (_gbgc *textPara )depth ()float64 {if _gbgc ._ebfg {return -1.0;};if len (_gbgc ._fcaad )> 0{return _gbgc ._fcaad [0]._dgfa ;};return _gbgc ._cgf .depth ();};func _ecgc (_cfag string ,_gefab []rulingList ){_ad .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_gefab ),_cfag );for _fbgb ,_bfgab :=range _gefab {_dc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fbgb ,_bfgab .String ());};};func (_dcef *textTable )reduceTiling (_bcabe gridTiling ,_gcaa float64 )*textTable {_dbbdb :=make ([]int ,0,_dcef ._adfe );_aebdb :=make ([]int ,0,_dcef ._gddg );_ggge :=_bcabe ._cgdb ;_eebec :=_bcabe ._cfeab ;for _gcfcd :=0;_gcfcd < _dcef ._adfe ;_gcfcd ++{_aabg :=_gcfcd > 0&&_ge .Abs (_eebec [_gcfcd -1]-_eebec [_gcfcd ])< _gcaa &&_dcef .emptyRow (_gcfcd );if !_aabg {_dbbdb =append (_dbbdb ,_gcfcd );};};for _face :=0;_face < _dcef ._gddg ;_face ++{_ecac :=_face < _dcef ._gddg -1&&_ge .Abs (_ggge [_face +1]-_ggge [_face ])< _gcaa &&_dcef .emptyColumn (_face );if !_ecac {_aebdb =append (_aebdb ,_face );};};if len (_dbbdb )==_dcef ._adfe &&len (_aebdb )==_dcef ._gddg {return _dcef ;};_abbdf :=textTable {_eafc :_dcef ._eafc ,_gddg :len (_aebdb ),_adfe :len (_dbbdb ),_baba :make (map[uint64 ]compositeCell ,len (_aebdb )*len (_dbbdb ))};if _bedbb {_ad .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_dcef ._gddg ,_dcef ._adfe ,len (_aebdb ),len (_dbbdb ));_ad .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_aebdb );_ad .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_dbbdb );};for _bfag ,_aeabe :=range _dbbdb {for _dgae ,_aagff :=range _aebdb {_dbgeb ,_ecdbd :=_dcef .getComposite (_aagff ,_aeabe );if len (_dbgeb )==0{continue ;};if _bedbb {_dc .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_dgae ,_bfag ,_aagff ,_aeabe ,_cgbgd (_dbgeb .merge ().text (),50));};_abbdf .putComposite (_dgae ,_bfag ,_dbgeb ,_ecdbd );};};return &_abbdf ;};func (_bfafa *textTable )getRight ()paraList {_dddbb :=make (paraList ,_bfafa ._adfe );for _gcfbc :=0;_gcfbc < _bfafa ._adfe ;_gcfbc ++{_adddf :=_bfafa .get (_bfafa ._gddg -1,_gcfbc )._geged ;if _adddf ==nil ||_adddf ._ebaf {return nil ;};_dddbb [_gcfbc ]=_adddf ;};for _gaced :=0;_gaced < _bfafa ._adfe -1;_gaced ++{if _dddbb [_gaced ]._gfaf !=_dddbb [_gaced +1]{return nil ;};};return _dddbb ;};func (_acef *wordBag )firstWord (_ggfd int )*textWord {return _acef ._efeb [_ggfd ][0]};func _egfac (_bebg int ,_agcf func (int ,int )bool )[]int {_cfdf :=make ([]int ,_bebg );for _ecaf :=range _cfdf {_cfdf [_ecaf ]=_ecaf ;};_cf .Slice (_cfdf ,func (_daada ,_bgfb int )bool {return _agcf (_cfdf [_daada ],_cfdf [_bgfb ])});return _cfdf ;};func (_bffd *textObject )getCurrentFont ()*_dd .PdfFont {var _baa *_dd .PdfFont ;if !_bffd .
// PageText represents the layout of text on a device page.
type PageText struct{_gae []*textMark ;_gdcd string ;_gge []TextMark ;_feb []TextTable ;_bgc _dd .PdfRectangle ;_eea []pathSection ;_abgcd []pathSection ;};func _dbe (_cdg _dd .PdfRectangle ,_cdcd bounded )float64 {return _cdg .Ury -_cdcd .bbox ().Lly };func (_eeba *subpath )close (){if !_fefcb (_eeba ._addd [0],_eeba .last ()){_eeba .add (_eeba ._addd [0]);};_eeba ._eade =true ;_eeba .removeDuplicates ();};func _bfgb (_fdge ,_abcf _cd .Point )rulingKind {_dccff :=_ge .Abs (_fdge .X -_abcf .X );_fafb :=_ge .Abs (_fdge .Y -_abcf .Y );return _gbgfc (_dccff ,_fafb ,_bbag );};func (_ddff *textWord )appendMark (_caga *textMark ,_facg _dd .PdfRectangle ){_ddff ._dgcbf =append (_ddff ._dgcbf ,_caga );_ddff .PdfRectangle =_ceab (_ddff .PdfRectangle ,_caga .PdfRectangle );if _caga ._geafc > _ddff ._dafb {_ddff ._dafb =_caga ._geafc ;};_ddff ._gceede =_facg .Ury -_ddff .PdfRectangle .Lly ;};func (_faf *textObject )moveLP (_eba ,_eeb float64 ){_faf ._cba .Concat (_cd .NewMatrix (1,0,0,1,_eba ,_eeb ));_faf ._fddg =_faf ._cba ;};func (_dddae *textTable )log (_dacaa string ){if !_bedbb {return ;};_ad .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_dacaa ,_dddae ._gddg ,_dddae ._adfe ,_dddae ._eafc ,_dddae .PdfRectangle );for _abbdfa :=0;_abbdfa < _dddae ._adfe ;_abbdfa ++{for _acge :=0;_acge < _dddae ._gddg ;_acge ++{_dfae :=_dddae .get (_acge ,_abbdfa );if _dfae ==nil {continue ;};_dc .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_acge ,_abbdfa ,_dfae .PdfRectangle ,_cgbgd (_dfae .text (),50),_g .RuneCountInString (_dfae .text ()));};};};func _bbge (_eeff string )bool {for _ ,_bfcd :=range _eeff {if !_b .IsSpace (_bfcd ){return false ;};};return true ;};func (_faagd *textTable )compositeRowCorridors ()map[int ][]float64 {_dafg :=make (map[int ][]float64 ,_faagd ._adfe );if _bedbb {_ad .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_faagd ._adfe );};for _dadg :=1;_dadg < _faagd ._adfe ;_dadg ++{var _cbdaf []compositeCell ;for _cefc :=0;_cefc < _faagd ._gddg ;_cefc ++{if _bbbe ,_ggbfe :=_faagd ._baba [_eddd (_cefc ,_dadg )];_ggbfe {_cbdaf =append (_cbdaf ,_bbbe );};};if len (_cbdaf )==0{continue ;};_bafd :=_egfb (_cbdaf );_dafg [_dadg ]=_bafd ;if _bedbb {_dc .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_dadg ,_bafd );};};return _dafg ;};func _ccf (_cfaba _dd .PdfRectangle )textState {return textState {_cdda :100,_babc :RenderModeFill ,_fcg :_cfaba };};func (_ffbf *textTable )putComposite (_affd ,_fccee int ,_dagbc paraList ,_bded _dd .PdfRectangle ){if len (_dagbc )==0{_ad .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073");return ;};_fgca :=compositeCell {_bded ,_dagbc };if _bedbb {_dc .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_affd ,_fccee ,_fgca .String ());};_fgca .updateBBox ();_ffbf ._baba [_eddd (_affd ,_fccee )]=_fgca ;};type event struct{_fafa float64 ;_gead bool ;_bdd int ;};func (_cbfb lineRuling )yMean ()float64 {return 0.5*(_cbfb ._aeeac .Y +_cbfb ._daeb .Y )};
// String returns a string describing `pt`.
func (_fccd PageText )String ()string {_bef :=_dc .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_fccd ._gae ));_efc :=[]string {"\u002d"+_bef };for _ ,_fbea :=range _fccd ._gae {_efc =append (_efc ,_fbea .String ());};_efc =append (_efc ,"\u002b"+_bef );return _d .Join (_efc ,"\u000a");};func (_ggdb *shapesState )establishSubpath ()*subpath {_cbdb ,_gdda :=_ggdb .lastpointEstablished ();if !_gdda {_ggdb ._cgbd =append (_ggdb ._cgbd ,_fafc (_cbdb ));};if len (_ggdb ._cgbd )==0{return nil ;};_ggdb ._dedf =false ;return _ggdb ._cgbd [len (_ggdb ._cgbd )-1];};func (_ceef *ruling )equals (_eedc *ruling )bool {return _ceef ._abgae ==_eedc ._abgae &&_bdccd (_ceef ._acaf ,_eedc ._acaf )&&_bdccd (_ceef ._fgdd ,_eedc ._fgdd )&&_bdccd (_ceef ._cage ,_eedc ._cage );};func (_aaab *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_fgc :=make (map[int ]map[*textWord ]struct{},len (_aaab ._efeb ));for _dfc :=range _aaab ._efeb {_fgc [_dfc ]=make (map[*textWord ]struct{});};return _fgc ;};func (_affgc *textTable )isExportable ()bool {if _affgc ._eafc {return true ;};_feff :=func (_cccdb int )bool {_fcbc :=_affgc .get (0,_cccdb );if _fcbc ==nil {return false ;};_faag :=_fcbc .text ();_ddab :=_g .RuneCountInString (_faag );_dbbd :=_ffcf .MatchString (_faag );return _ddab <=1||_dbbd ;};for _eaedg :=0;_eaedg < _affgc ._adfe ;_eaedg ++{if !_feff (_eaedg ){return true ;};};return false ;};type compositeCell struct{_dd .PdfRectangle ;paraList ;};func _ggc (_ggce ,_cdad _dd .PdfRectangle )bool {return _ggce .Llx <=_cdad .Llx &&_cdad .Urx <=_ggce .Urx &&_ggce .Lly <=_cdad .Lly &&_cdad .Ury <=_ggce .Ury ;};func (_ddcg paraList )inTile (_cagbg gridTile )paraList {var _gfda paraList ;for _ ,_cecgg :=range _ddcg {if _cagbg .contains (_cecgg .PdfRectangle ){_gfda =append (_gfda ,_cecgg );};};if _bedbb {_dc .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_cagbg ,len (_gfda ));for _dfdgb ,_acdc :=range _gfda {_dc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dfdgb ,_acdc );};_dc .Println ("");};return _gfda ;};func _dbdfa (_bfbe _dd .PdfRectangle )*ruling {return &ruling {_abgae :_feae ,_acaf :_bfbe .Lly ,_fgdd :_bfbe .Llx ,_cage :_bfbe .Urx };};
// New returns an Extractor instance for extracting content from the input PDF page.
func New (page *_dd .PdfPage )(*Extractor ,error ){_def ,_eb :=page .GetAllContentStreams ();if _eb !=nil {return nil ,_eb ;};_bd ,_eb :=page .GetMediaBox ();if _eb !=nil {return nil ,_dc .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_eb );};_ba :=&Extractor {_add :_def ,_aa :page .Resources ,_eda :*_bd ,_dca :map[string ]fontEntry {},_gdg :map[string ]textResult {}};if _ba ._eda .Llx > _ba ._eda .Urx {_ad .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_ba ._eda );_ba ._eda .Llx ,_ba ._eda .Urx =_ba ._eda .Urx ,_ba ._eda .Llx ;};if _ba ._eda .Lly > _ba ._eda .Ury {_ad .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_ba ._eda );_ba ._eda .Lly ,_ba ._eda .Ury =_ba ._eda .Ury ,_ba ._eda .Lly ;};return _ba ,nil ;};func _egcc (_bafe _dd .PdfRectangle )rulingKind {_ecg :=_bafe .Width ();_eaga :=_bafe .Height ();if _ecg > _eaga {if _ecg >=_bbag {return _feae ;};}else {if _eaga >=_bbag {return _fdff ;};};return _cffb ;};func _afba (_ffb *_ga .ContentStreamOperation )(float64 ,error ){if len (_ffb .Params )!=1{_gcbd :=_f .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");_ad .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_ffb .Operand ,1,len (_ffb .Params ),_ffb .Params );return 0.0,_gcbd ;};return _gd .GetNumberAsFloat (_ffb .Params [0]);};func (_feeg rulingList )sortStrict (){_cf .Slice (_feeg ,func (_cegf ,_fffb int )bool {_caddb ,_dfgf :=_feeg [_cegf ],_feeg [_fffb ];_ceddc ,_ebdde :=_caddb ._abgae ,_dfgf ._abgae ;if _ceddc !=_ebdde {return _ceddc > _ebdde ;};_fabe ,_gcde :=_caddb ._acaf ,_dfgf ._acaf ;if !_dgdb (_fabe -_gcde ){return _fabe < _gcde ;};_fabe ,_gcde =_caddb ._fgdd ,_dfgf ._fgdd ;if _fabe !=_gcde {return _fabe < _gcde ;};return _caddb ._cage < _dfgf ._cage ;});};type subpath struct{_addd []_cd .Point ;_eade bool ;};func (_gdec paraList )lines ()[]*textLine {var _eggg []*textLine ;for _ ,_feaae :=range _gdec {_eggg =append (_eggg ,_feaae ._fcaad ...);};return _eggg ;};func (_gccdf *wordBag )applyRemovals (_cbgff map[int ]map[*textWord ]struct{}){for _ffbd ,_aebc :=range _cbgff {if len (_aebc )==0{continue ;};_befb :=_gccdf ._efeb [_ffbd ];_babe :=len (_befb )-len (_aebc );if _babe ==0{delete (_gccdf ._efeb ,_ffbd );continue ;};_gafbd :=make ([]*textWord ,_babe );_dbd :=0;for _ ,_agga :=range _befb {if _ ,_eggf :=_aebc [_agga ];!_eggf {_gafbd [_dbd ]=_agga ;_dbd ++;};};_gccdf ._efeb [_ffbd ]=_gafbd ;};};type rulingList []*ruling ;var _ffcf =_e .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024");func (_bgaff *textTable )subdivide ()*textTable {_bgaff .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");_bdbf :=_bgaff .compositeRowCorridors ();_gaeg :=_bgaff .compositeColCorridors ();if _bedbb {_ad .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_ddeeg (_bdbf ),_ddeeg (_gaeg ));};if len (_bdbf )==0||len (_gaeg )==0{return _bgaff ;};_bgba (_bdbf );_bgba (_gaeg );if _bedbb {_ad .Log
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
type RenderMode int ;func _ffegf (_gbbg ,_fgeag _cd .Point )bool {_gdad :=_ge .Abs (_gbbg .X -_fgeag .X );_cgdd :=_ge .Abs (_gbbg .Y -_fgeag .Y );return _fcdf (_gdad ,_cgdd );};func (_cecf *textObject )setTextLeading (_eabg float64 ){if _cecf ==nil {return ;};_cecf ._bfdgc ._dbac =_eabg ;};func (_acbf *textTable )emptyColumn (_ebae int )bool {for _cfedg :=0;_cfedg < _acbf ._adfe ;_cfedg ++{_gdbgdc :=_acbf .get (_ebae ,_cfedg );if _gdbgdc !=nil &&_gdbgdc .text ()!=""{return false ;};};return true ;};func _dgfd (_gdgc ,_afcf *textPara )bool {if _gdgc ._ebfg ||_afcf ._ebfg {return true ;};return _dgdb (_gdgc .depth ()-_afcf .depth ());};func (_defe rulingList )snapToGroupsDirection ()rulingList {_defe .sortStrict ();_acbd :=make (map[*ruling ]rulingList ,len (_defe ));_cbgaf :=_defe [0];_cgff :=func (_adddd *ruling ){_cbgaf =_adddd ;_acbd [_cbgaf ]=rulingList {_adddd }};_cgff (_defe [0]);for _ ,_fddbb :=range _defe [1:]{if _fddbb ._acaf < _cbgaf ._acaf -_cgea {_ad .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_cbgaf ,_fddbb );};if _fddbb ._acaf > _cbgaf ._acaf +_gbg {_cgff (_fddbb );}else {_acbd [_cbgaf ]=append (_acbd [_cbgaf ],_fddbb );};};_fgda :=make (map[*ruling ]float64 ,len (_acbd ));_acfg :=make (map[*ruling ]*ruling ,len (_defe ));for _faedd ,_bfff :=range _acbd {_fgda [_faedd ]=_bfff .mergePrimary ();for _ ,_ccdcf :=range _bfff {_acfg [_ccdcf ]=_faedd ;};};for _ ,_dabef :=range _defe {_dabef ._acaf =_fgda [_acfg [_dabef ]];};_gdccc :=make (rulingList ,0,len (_defe ));for _ ,_gbdb :=range _acbd {_fegg :=_gbdb .splitSec ();for _ebcd ,_gbcec :=range _fegg {_bcgf :=_gbcec .merge ();if len (_gdccc )> 0{_cbde :=_gdccc [len (_gdccc )-1];if _cbde .alignsPrimary (_bcgf )&&_cbde .alignsSec (_bcgf ){_ad .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_ebcd ,_cbde ,_bcgf );continue ;};};_gdccc =append (_gdccc ,_bcgf );};};_gdccc .sortStrict ();return _gdccc ;};func (_defg *stateStack )empty ()bool {return len (*_defg )==0};func (_dbeaf *textWord )computeText ()string {_edecf :=make ([]string ,len (_dbeaf ._dgcbf ));for _cafgf ,_bafga :=range _dbeaf ._dgcbf {_edecf [_cafgf ]=_bafga ._fgeb ;};return _d .Join (_edecf ,"");};func (_ebfa rulingList )sort (){_cf .Slice (_ebfa ,_ebfa .comp )};
// ToTextMark returns the public view of `tm`.
func (_gbeg *textMark )ToTextMark ()TextMark {return TextMark {Text :_gbeg ._fgeb ,Original :_gbeg ._dbffe ,BBox :_gbeg ._eeag ,Font :_gbeg ._dfdg ,FontSize :_gbeg ._geafc ,FillColor :_gbeg ._aaad ,StrokeColor :_gbeg ._gadc ,Orientation :_gbeg ._deeg };};func (_gebec *wordBag )removeWord (_fcgc *textWord ,_bceb int ){_egeb :=_gebec ._efeb [_bceb ];_egeb =_dbgda (_egeb ,_fcgc );if len (_egeb )==0{delete (_gebec ._efeb ,_bceb );}else {_gebec ._efeb [_bceb ]=_egeb ;};};func (_dafc *textTable )logComposite (_ebba string ){if !_bedbb {return ;};_ad .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_dafc ._gddg ,_dafc ._adfe ,_ebba );_dc .Printf ("\u0025\u0035\u0073 \u007c","");for _badg :=0;_badg < _dafc ._gddg ;_badg ++{_dc .Printf ("\u0025\u0033\u0064 \u007c",_badg );};_dc .Println ("");_dc .Printf ("\u0025\u0035\u0073 \u002b","");for _bcaa :=0;_bcaa < _dafc ._gddg ;_bcaa ++{_dc .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d");};_dc .Println ("");for _gcceg :=0;_gcceg < _dafc ._adfe ;_gcceg ++{_dc .Printf ("\u0025\u0035\u0064 \u007c",_gcceg );for _cbaf :=0;_cbaf < _dafc ._gddg ;_cbaf ++{_gefcb ,_ :=_dafc ._baba [_eddd (_cbaf ,_gcceg )].parasBBox ();_dc .Printf ("\u0025\u0033\u0064 \u007c",len (_gefcb ));};_dc .Println ("");};_ad .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_dafc ._gddg ,_dafc ._adfe ,_ebba );_dc .Printf ("\u0025\u0035\u0073 \u007c","");for _egbc :=0;_egbc < _dafc ._gddg ;_egbc ++{_dc .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_egbc );};_dc .Println ("");_dc .Printf ("\u0025\u0035\u0073 \u002b","");for _baegc :=0;_baegc < _dafc ._gddg ;_baegc ++{_dc .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");};_dc .Println ("");for _eacbc :=0;_eacbc < _dafc ._adfe ;_eacbc ++{_dc .Printf ("\u0025\u0035\u0064 \u007c",_eacbc );for _egcce :=0;_egcce < _dafc ._gddg ;_egcce ++{_fddcg ,_ :=_dafc ._baba [_eddd (_egcce ,_eacbc )].parasBBox ();_fabda :="";_ggafa :=_fddcg .merge ();if _ggafa !=nil {_fabda =_ggafa .text ();};_fabda =_dc .Sprintf ("\u0025\u0071",_cgbgd (_fabda ,12));_fabda =_fabda [1:len (_fabda )-1];_dc .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_fabda );};_dc .Println ("");};};const (RenderModeStroke RenderMode =1<<iota ;RenderModeFill ;RenderModeClip ;);func (_fffa *textLine )bbox ()_dd .PdfRectangle {return _fffa .PdfRectangle };func (_cabde *textPara )writeCellText (_cgac _a .Writer ){for _fdcg ,_feag :=range _cabde ._fcaad {_afeeb :=_feag .text ();_efdad :=_cceg &&_feag .endsInHyphen ()&&_fdcg !=len (_cabde ._fcaad )-1;if _efdad {_afeeb =_bee (_afeeb );};_cgac .Write ([]byte (_afeeb ));if !(_efdad ||_fdcg ==len (_cabde ._fcaad )-1){_cgac .Write ([]byte (_baac (_feag ._dgfa ,_cabde ._fcaad [_fdcg +1]._dgfa )));};};};func _bggfc (_decda []*textMark ,_bdgg _dd .PdfRectangle )[]*textWord {var _aadcb []*textWord ;var _dfga *textWord ;if _cga {_ad .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_decda ));};_degge :=func (){if _dfga !=nil {_abdc :=_dfga .computeText ();if !_bbge (_abdc ){_dfga ._ebed =_abdc ;_aadcb =append (_aadcb ,_dfga );if _cga {_ad .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_aadcb )-1,_dfga .String ());for _bceag ,_dgafc :=range _dfga ._dgcbf {_dc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bceag ,_dgafc .String ());};};};_dfga =nil ;};};for _ ,_cgdgg :=range _decda {if _egb &&_dfga !=nil &&len (_dfga ._dgcbf )> 0{_bfeb :=_dfga ._dgcbf [len (_dfga ._dgcbf )-1];_fcada ,_aecd :=_affde (_cgdgg ._fgeb );_faage ,_ggddb :=_affde (_bfeb ._fgeb );if _aecd &&!_ggddb &&_bfeb .inDiacriticArea (_cgdgg ){_dfga .addDiacritic (_fcada );continue ;};if _ggddb &&!_aecd &&_cgdgg .inDiacriticArea (_bfeb ){_dfga ._dgcbf =_dfga ._dgcbf [:len (_dfga ._dgcbf )-1];_dfga .appendMark (_cgdgg ,_bdgg );_dfga .addDiacritic (_faage );con
// String returns a human readable description of `vecs`.
func (_cgbddb rulingList )String ()string {if len (_cgbddb )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_efcd ,_gabcc :=_cgbddb .vertsHorzs ();_deffg :=len (_efcd );_afaba :=len (_gabcc );if _deffg ==0||_afaba ==0{return _dc .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_deffg ,_afaba );};_adgf :=_dd .PdfRectangle {Llx :_efcd [0]._acaf ,Urx :_efcd [_deffg -1]._acaf ,Lly :_gabcc [_afaba -1]._acaf ,Ury :_gabcc [0]._acaf };return _dc .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_deffg ,_afaba ,_adgf );};func (_eccd rulingList )blocks (_gadce ,_eedef *ruling )bool {if _gadce ._fgdd > _eedef ._cage ||_eedef ._fgdd > _gadce ._cage {return false ;};_fgae :=_ge .Max (_gadce ._fgdd ,_eedef ._fgdd );_bafef :=_ge .Min (_gadce ._cage ,_eedef ._cage );if _gadce ._acaf > _eedef ._acaf {_gadce ,_eedef =_eedef ,_gadce ;};for _ ,_agdgf :=range _eccd {if _gadce ._acaf <=_agdgf ._acaf +_gbg &&_agdgf ._acaf <=_eedef ._acaf +_gbg &&_agdgf ._fgdd <=_bafef &&_fgae <=_agdgf ._cage {return true ;};};return false ;};func (_baed *textObject )setFont (_bec string ,_gafc float64 )error {if _baed ==nil {return nil ;};_baed ._bfdgc ._dccf =_gafc ;_gfgc ,_gefc :=_baed .getFont (_bec );if _gefc !=nil {return _gefc ;};_baed ._bfdgc ._eccb =_gfgc ;if _baed ._fea .empty (){_baed ._fea .push (_baed ._bfdgc );}else {_baed ._fea .top ()._eccb =_baed ._bfdgc ._eccb ;};return nil ;};func (_cabd *shapesState )lineTo (_edec ,_gfbf float64 ){if _ecef {_ad .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_edec ,_gfbf ,_cabd .devicePoint (_edec ,_gfbf ));};_cabd .addPoint (_edec ,_gfbf );};
// String returns a string describing the current state of the textState stack.
func (_eccg *stateStack )String ()string {_afef :=[]string {_dc .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_eccg ))};for _aef ,_ebbg :=range *_eccg {_agd :="\u003c\u006e\u0069l\u003e";if _ebbg !=nil {_agd =_ebbg .String ();};_afef =append (_afef ,_dc .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_aef ,_agd ));};return _d .Join (_afef ,"\u000a");};func _dfgc (_aecg _dd .PdfRectangle )*ruling {return &ruling {_abgae :_feae ,_acaf :_aecg .Ury ,_fgdd :_aecg .Llx ,_cage :_aecg .Urx };};func (_cbda rulingList )intersections ()map[int ]intSet {var _aadc ,_faddf []int ;for _edff ,_acgf :=range _cbda {switch _acgf ._abgae {case _fdff :_aadc =append (_aadc ,_edff );case _feae :_faddf =append (_faddf ,_edff );};};if len (_aadc )< _geda +1||len (_faddf )< _aaed +1{return nil ;};if len (_aadc )+len (_faddf )> _cfgg {_ad .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_cbda ),len (_aadc ),len (_faddf ));return nil ;};_dcad :=make (map[int ]intSet ,len (_aadc )+len (_faddf ));for _ ,_fbgd :=range _aadc {for _ ,_beadb :=range _faddf {if _cbda [_fbgd ].intersects (_cbda [_beadb ]){if _ ,_edde :=_dcad [_fbgd ];!_edde {_dcad [_fbgd ]=make (intSet );};if _ ,_cefba :=_dcad [_beadb ];!_cefba {_dcad [_beadb ]=make (intSet );};_dcad [_fbgd ].add (_beadb );_dcad [_beadb ].add (_fbgd );};};};return _dcad ;};func (_ceb *stateStack )pop ()*textState {if _ceb .empty (){return nil ;};_afag :=*(*_ceb )[len (*_ceb )-1];*_ceb =(*_ceb )[:len (*_ceb )-1];return &_afag ;};func (_bbcb rulingList )aligned ()bool {if len (_bbcb )< 2{return false ;};_cega :=make (map[*ruling ]int );_cega [_bbcb [0]]=0;for _ ,_gbce :=range _bbcb [1:]{_ebee :=false ;for _dddf :=range _cega {if _gbce .gridIntersecting (_dddf ){_cega [_dddf ]++;_ebee =true ;break ;};};if !_ebee {_cega [_gbce ]=0;};};_cgdf :=0;for _ ,_bfdca :=range _cega {if _bfdca ==0{_cgdf ++;};};_ggead :=float64 (_cgdf )/float64 (len (_bbcb ));_feee :=_ggead <=1.0-_bgce ;if _fggb {_ad .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_feee ,_ggead ,_cgdf ,len (_bbcb ),_bbcb .String ());};return _feee ;};func (_gacc *wordBag )text ()string {_ggeg :=_gacc .allWords ();_degd :=make ([]string ,len (_ggeg ));for _efebf ,_eacg :=range _ggeg {_degd [_efebf ]=_eacg ._ebed ;};return _d .Join (_degd ,"\u0020");};
// String returns a string descibing `i`.
func (_edag gridTile )String ()string {_edfa :=func (_gabac bool ,_fecfd string )string {if _gabac {return _fecfd ;};return "\u005f";};return _dc .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_edag .PdfRectangle ,_edfa (_edag ._gfgca ,"\u004c"),_edfa (_edag ._ebce ,"\u0052"),_edfa (_edag ._bgfee ,"\u0042"),_edfa (_edag ._aabe ,"\u0054"));};func (_afab *wordBag )getDepthIdx (_abfc float64 )int {_cdf :=_afab .depthIndexes ();_gaag :=_egc (_abfc );if _gaag < _cdf [0]{return _cdf [0];};if _gaag > _cdf [len (_cdf )-1]{return _cdf [len (_cdf )-1];};return _gaag ;};func (_eaeaa *subpath )isQuadrilateral ()bool {if len (_eaeaa ._addd )< 4||len (_eaeaa ._addd )> 5{return false ;};if len (_eaeaa ._addd )==5{_eeaab :=_eaeaa ._addd [0];_cdaf :=_eaeaa ._addd [4];if _eeaab .X !=_cdaf .X ||_eeaab .Y !=_cdaf .Y {return false ;};};return true ;};type gridTile struct{_dd .PdfRectangle ;_aabe ,_gfgca ,_bgfee ,_ebce bool ;};type intSet map[int ]struct{};func (_egfab *textTable )get (_ebde ,_dcce int )*textPara {return _egfab ._efbae [_eddd (_ebde ,_dcce )]};func _afgfd (_caff *PageText )error {_fgcaa :=_ag .GetLicenseKey ();if _fgcaa !=nil &&_fgcaa .IsLicensed ()||_ca {return nil ;};_dc .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");_dc .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");return _f .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};func (_gdgag *textTable )newTablePara ()*textPara {_ccgc :=_gdgag .computeBbox ();_dfbcf :=&textPara {PdfRectangle :_ccgc ,_gaca :_ccgc ,_cgf :_gdgag };if _bedbb {_ad .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_dfbcf );};return _dfbcf ;};var _fac =map[markKind ]string {_dega :"\u0073\u0074\u0072\u006f\u006b\u0065",_ffa :"\u0066\u0069\u006c\u006c",_ffbde :"\u0061u\u0067\u006d\u0065\u006e\u0074"};func (_eacc paraList )applyTables (_fcce []*textTable )paraList {_cbcg :=make (map[*textPara ]struct{});var _becc paraList ;for _ ,_fadb :=range _fcce {for _ ,_dcfcg :=range _fadb ._efbae {_cbcg [_dcfcg ]=struct{}{};};_becc =append (_becc ,_fadb .newTablePara ());};for _ ,_gcge :=range _eacc {if _ ,_eaee :=_cbcg [_gcge ];!_eaee {_becc =append (_becc ,_gcge );};};return _becc ;};func _befe (_gdcdc ,_ecdb _dd .PdfRectangle )bool {return _gdcdc .Lly <=_ecdb .Ury &&_ecdb .Lly <=_gdcdc .Ury ;};func (_efce *textObject )newTextMark (_gccg string ,_ceec _cd .Matrix ,_gcac _cd .Point ,_cdcf float64 ,_gde *_dd .PdfFont ,_gade float64 ,_cgc ,_aded _ed .Color )(textMark ,bool ){_fbeg :=_ceec .Angle ();_fegb :=_fbae (_fbeg ,_cccae );var _gfab float64 ;if _fegb %180!=90{_gfab =_ceec .ScalingFactorY ();}else {_gfab =_ceec .ScalingFactorX ();};_agcb :=_aade (_ceec );_aafe :=_dd .PdfRectangle {Llx :_agcb .X ,Lly :_agcb .Y ,Urx :_gcac .X ,Ury :_gcac .Y };switch _fegb %360{case 90:_aafe .Urx -=_gfab ;case 180:_aafe .Ury -=_gfab ;case 270:_aafe .Urx +=_gfab ;case 0:_aafe .Ury +=_gfab ;default:_fegb =0;_aafe .Ury +=_gfab ;};if _aafe .Llx > _aafe .Urx {_aafe .Llx ,_aafe .Urx =_aafe .Urx ,_aafe .Llx ;};if _aafe .Lly > _aafe .Ury {_aafe .Lly ,_aafe .Ury =_aafe .Ury ,_aafe .Lly ;};_bdad ,_ffba :=_dbdg (_aafe ,_efce ._bfe ._eda );if !_ffba {_ad .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_aafe ,_efce ._bfe ._eda ,_gccg );};_aafe =_bdad ;_bfdc :=_aafe ;_ggcdb :=_efce ._bfe ._eda ;switch _fegb %360{case 90:_ggcdb .Urx ,_ggcdb .Ury =_ggcdb .Ury ,_gg
// Text returns the extracted page text.
func (_ddf PageText )Text ()string {return _ddf ._gdcd };