unipdf/extractor/extractor.go

994 lines
216 KiB
Go
Raw Normal View History

2020-08-27 21:45:09 +00:00
//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
2020-08-27 21:45:09 +00:00
//
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
//
2023-11-11 11:29:03 +00:00
package extractor ;import (_dfg "bytes";_a "errors";_gde "fmt";_ac "github.com/unidoc/unipdf/v3/common";_dcg "github.com/unidoc/unipdf/v3/contentstream";_dce "github.com/unidoc/unipdf/v3/core";_ec "github.com/unidoc/unipdf/v3/internal/license";_c "github.com/unidoc/unipdf/v3/internal/textencoding";
_dca "github.com/unidoc/unipdf/v3/internal/transform";_fg "github.com/unidoc/unipdf/v3/model";_ef "golang.org/x/image/draw";_fd "golang.org/x/text/unicode/norm";_gdf "golang.org/x/xerrors";_e "image";_be "image/color";_g "io";_dc "math";_b "reflect";_gd "regexp";
_ab "sort";_df "strings";_fc "unicode";_f "unicode/utf8";);func (_gdce *textTable )reduceTiling (_ggef gridTiling ,_ffbf float64 )*textTable {_egeac :=make ([]int ,0,_gdce ._gebeeb );_acgb :=make ([]int ,0,_gdce ._acddc );_acbg :=_ggef ._abeb ;_ccagb :=_ggef ._cegbg ;
for _cdfd :=0;_cdfd < _gdce ._gebeeb ;_cdfd ++{_ccdce :=_cdfd > 0&&_dc .Abs (_ccagb [_cdfd -1]-_ccagb [_cdfd ])< _ffbf &&_gdce .emptyCompositeRow (_cdfd );if !_ccdce {_egeac =append (_egeac ,_cdfd );};};for _ddbdd :=0;_ddbdd < _gdce ._acddc ;_ddbdd ++{_fdda :=_ddbdd < _gdce ._acddc -1&&_dc .Abs (_acbg [_ddbdd +1]-_acbg [_ddbdd ])< _ffbf &&_gdce .emptyCompositeColumn (_ddbdd );
if !_fdda {_acgb =append (_acgb ,_ddbdd );};};if len (_egeac )==_gdce ._gebeeb &&len (_acgb )==_gdce ._acddc {return _gdce ;};_dfgf :=textTable {_aefef :_gdce ._aefef ,_acddc :len (_acgb ),_gebeeb :len (_egeac ),_edbe :make (map[uint64 ]compositeCell ,len (_acgb )*len (_egeac ))};
if _eadb {_ac .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_gdce ._acddc ,_gdce ._gebeeb ,len (_acgb ),len (_egeac ));_ac .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_acgb );
_ac .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_egeac );};for _gegbb ,_afcg :=range _egeac {for _fdcbd ,_dbffea :=range _acgb {_dbed ,_ceaf :=_gdce .getComposite (_dbffea ,_afcg );if len (_dbed )==0{continue ;
};if _eadb {_gde .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_fdcbd ,_gegbb ,_dbffea ,_afcg ,_dbdbb (_dbed .merge ().text (),50));};_dfgf .putComposite (_fdcbd ,_gegbb ,_dbed ,_ceaf );
};};return &_dfgf ;};func (_bgf *shapesState )moveTo (_ggbg ,_edae float64 ){_bgf ._egfd =true ;_bgf ._eaeeb =_bgf .devicePoint (_ggbg ,_edae );if _bdefa {_ac .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_ggbg ,_edae ,_bgf ._eaeeb );
};};func (_cfcd *shapesState )closePath (){if _cfcd ._egfd {_cfcd ._efb =append (_cfcd ._efb ,_cdec (_cfcd ._eaeeb ));_cfcd ._egfd =false ;}else if len (_cfcd ._efb )==0{if _bdefa {_ac .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");
};_cfcd ._egfd =false ;return ;};_cfcd ._efb [len (_cfcd ._efb )-1].close ();if _bdefa {_ac .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_cfcd );};};func (_gadc *wordBag )highestWord (_abbb int ,_ebfc ,_cdef float64 )*textWord {for _ ,_cbega :=range _gadc ._aac [_abbb ]{if _ebfc <=_cbega ._adgge &&_cbega ._adgge <=_cdef {return _cbega ;
};};return nil ;};func (_ddgc *subpath )close (){if !_ffea (_ddgc ._gdgd [0],_ddgc .last ()){_ddgc .add (_ddgc ._gdgd [0]);};_ddgc ._bbbb =true ;_ddgc .removeDuplicates ();};const (_bf ="\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";
_dcd ="\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064";
_adc ="\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";);
2023-02-07 17:17:49 +00:00
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// Font represents the font properties on a PDF page.
type Font struct{PdfFont *_fg .PdfFont ;
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// FontName represents Font Name from font properties.
FontName string ;
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// FontType represents Font Subtype entry in the font dictionary inside page resources.
// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont.
FontType string ;
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// ToUnicode is true if font provides a `ToUnicode` mapping.
ToUnicode bool ;
2023-10-07 13:58:01 +00:00
2023-11-11 11:29:03 +00:00
// IsCID is true if underlying font is a composite font.
// Composite font is represented by a font dictionary whose Subtype is `Type0`
IsCID bool ;
2023-04-06 19:57:40 +00:00
2023-11-11 11:29:03 +00:00
// IsSimple is true if font is simple font.
// A simple font is limited to only 8 bit (255) character codes.
IsSimple bool ;
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// FontData represents the raw data of the embedded font file.
// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF).
// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor.
// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value.
FontData []byte ;
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// FontFileName is a name representing the font. it has format:
// (Font Name) + (Font Type Extension), example: helvetica.ttf.
FontFileName string ;
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor).
FontDescriptor *_fg .PdfFontDescriptor ;};func (_gecc *wordBag )firstReadingIndex (_geed int )int {_facfc :=_gecc .firstWord (_geed )._adecc ;_cfcb :=float64 (_geed +1)*_ddgf ;_ecgb :=_cfcb +_eega *_facfc ;_caec :=_geed ;for _ ,_agbf :=range _gecc .depthBand (_cfcb ,_ecgb ){if _gedbc (_gecc .firstWord (_agbf ),_gecc .firstWord (_caec ))< 0{_caec =_agbf ;
};};return _caec ;};
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
type PageImages struct{Images []ImageMark ;};func (_cbec *textObject )getCurrentFont ()*_fg .PdfFont {_gaf :=_cbec ._abcb ._ecf ;if _gaf ==nil {_ac .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e");
return _fg .DefaultFont ();};return _gaf ;};func _fbgdd (_badg _fg .PdfRectangle ,_bgeg ,_bbabc ,_agegd ,_aecc *ruling )gridTile {_fafd :=_badg .Llx ;_ffcaa :=_badg .Urx ;_defc :=_badg .Lly ;_gdcda :=_badg .Ury ;return gridTile {PdfRectangle :_badg ,_cbge :_bgeg !=nil &&_bgeg .encloses (_defc ,_gdcda ),_dafe :_bbabc !=nil &&_bbabc .encloses (_defc ,_gdcda ),_beacf :_agegd !=nil &&_agegd .encloses (_fafd ,_ffcaa ),_cbbgc :_aecc !=nil &&_aecc .encloses (_fafd ,_ffcaa )};
};func (_dbeae *textTable )newTablePara ()*textPara {_gfbbd :=_dbeae .computeBbox ();_bafda :=&textPara {PdfRectangle :_gfbbd ,_ebadd :_gfbbd ,_edce :_dbeae };if _eadb {_ac .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_bafda );
};return _bafda ;};func _ebaf (_gfdd float64 )int {var _dfgd int ;if _gfdd >=0{_dfgd =int (_gfdd /_ddgf );}else {_dfgd =int (_gfdd /_ddgf )-1;};return _dfgd ;};func _bddf (_babf float64 )bool {return _dc .Abs (_babf )< _gbgf };func _cda (_gfbg byte )bool {for _ ,_dbe :=range _bgad {if []byte (_dbe )[0]==_gfbg {return true ;
};};return false ;};func (_aae *stateStack )pop ()*textState {if _aae .empty (){return nil ;};_ebd :=*(*_aae )[len (*_aae )-1];*_aae =(*_aae )[:len (*_aae )-1];return &_ebd ;};func (_dbd *stateStack )size ()int {return len (*_dbd )};type list struct{_ggcfd []*textLine ;
_aeaa string ;_edge []*list ;_fbfaf string ;};func _ggde (_egdcc *PageText )error {_degee :=_ec .GetLicenseKey ();if _degee !=nil &&_degee .IsLicensed ()||_de {return nil ;};_gde .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");
_gde .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");
return _a .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};func _edacg (_dfde ,_ddgfee *textPara )bool {return _fcgc (_dfde ._ebadd ,_ddgfee ._ebadd )};
func (_aacb *textObject )newTextMark (_eaedc string ,_bddg _dca .Matrix ,_ccaa _dca .Point ,_cdde float64 ,_ffdc *_fg .PdfFont ,_eccb float64 ,_baba ,_dabdb _be .Color ,_afdd _dce .PdfObject ,_ffcag []string ,_aedag int ,_ccfg int )(textMark ,bool ){_cfad :=_bddg .Angle ();
_ecfad :=_bcddg (_cfad ,_edef );var _cggb float64 ;if _ecfad %180!=90{_cggb =_bddg .ScalingFactorY ();}else {_cggb =_bddg .ScalingFactorX ();};_dddg :=_gcdf (_bddg );_daec :=_fg .PdfRectangle {Llx :_dddg .X ,Lly :_dddg .Y ,Urx :_ccaa .X ,Ury :_ccaa .Y };
switch _ecfad %360{case 90:_daec .Urx -=_cggb ;case 180:_daec .Ury -=_cggb ;case 270:_daec .Urx +=_cggb ;case 0:_daec .Ury +=_cggb ;default:_ecfad =0;_daec .Ury +=_cggb ;};if _daec .Llx > _daec .Urx {_daec .Llx ,_daec .Urx =_daec .Urx ,_daec .Llx ;};if _daec .Lly > _daec .Ury {_daec .Lly ,_daec .Ury =_daec .Ury ,_daec .Lly ;
};_fcda :=true ;if _aacb ._ccb ._aa .Width ()> 0{_cbgbd ,_dbdf :=_bcdd (_daec ,_aacb ._ccb ._aa );if !_dbdf {_fcda =false ;_ac .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_daec ,_aacb ._ccb ._aa ,_eaedc );
};_daec =_cbgbd ;};_fdfae :=_daec ;_gabdc :=_aacb ._ccb ._aa ;switch _ecfad %360{case 90:_gabdc .Urx ,_gabdc .Ury =_gabdc .Ury ,_gabdc .Urx ;_fdfae =_fg .PdfRectangle {Llx :_gabdc .Urx -_daec .Ury ,Urx :_gabdc .Urx -_daec .Lly ,Lly :_daec .Llx ,Ury :_daec .Urx };
case 180:_fdfae =_fg .PdfRectangle {Llx :_gabdc .Urx -_daec .Llx ,Urx :_gabdc .Urx -_daec .Urx ,Lly :_gabdc .Ury -_daec .Lly ,Ury :_gabdc .Ury -_daec .Ury };case 270:_gabdc .Urx ,_gabdc .Ury =_gabdc .Ury ,_gabdc .Urx ;_fdfae =_fg .PdfRectangle {Llx :_daec .Ury ,Urx :_daec .Lly ,Lly :_gabdc .Ury -_daec .Llx ,Ury :_gabdc .Ury -_daec .Urx };
};if _fdfae .Llx > _fdfae .Urx {_fdfae .Llx ,_fdfae .Urx =_fdfae .Urx ,_fdfae .Llx ;};if _fdfae .Lly > _fdfae .Ury {_fdfae .Lly ,_fdfae .Ury =_fdfae .Ury ,_fdfae .Lly ;};_bdeee :=textMark {_efgdc :_eaedc ,PdfRectangle :_fdfae ,_fbae :_daec ,_abef :_ffdc ,_cggc :_cggb ,_accea :_eccb ,_bcbeg :_bddg ,_caeg :_ccaa ,_aaad :_ecfad ,_cccae :_baba ,_dcgea :_dabdb ,_dafb :_afdd ,_facc :_ffcag ,Th :_aacb ._abcb ._gfad ,Tw :_aacb ._abcb ._ggf ,_fbcf :_ccfg ,_ccfa :_aedag };
if _agge {_ac .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_dddg ,_ccaa ,_bdeee .String ());};return _bdeee ,_fcda ;
};func (_dgd *textObject )showText (_fag _dce .PdfObject ,_gfg []byte ,_gebe int )error {return _dgd .renderText (_fag ,_gfg ,_gebe );};func (_cefec *textObject )getFont (_bdee string )(*_fg .PdfFont ,error ){if _cefec ._ccb ._bc !=nil {_fbgd ,_bdeeb :=_cefec .getFontDict (_bdee );
if _bdeeb !=nil {_ac .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073",_bdee ,_bdeeb .Error ());return nil ,_bdeeb ;
};_cefec ._ccb ._ag ++;_ffga ,_cage :=_cefec ._ccb ._bc [_fbgd .String ()];if _cage {_ffga ._faad =_cefec ._ccb ._ag ;return _ffga ._fgf ,nil ;};};_gdgg ,_bfdg :=_cefec .getFontDict (_bdee );if _bfdg !=nil {return nil ,_bfdg ;};_ddf ,_bfdg :=_cefec .getFontDirect (_bdee );
if _bfdg !=nil {return nil ,_bfdg ;};if _cefec ._ccb ._bc !=nil {_ebbe :=fontEntry {_ddf ,_cefec ._ccb ._ag };if len (_cefec ._ccb ._bc )>=_acbd {var _bdaa []string ;for _fce :=range _cefec ._ccb ._bc {_bdaa =append (_bdaa ,_fce );};_ab .Slice (_bdaa ,func (_eacb ,_ebag int )bool {return _cefec ._ccb ._bc [_bdaa [_eacb ]]._faad < _cefec ._ccb ._bc [_bdaa [_ebag ]]._faad ;
});delete (_cefec ._ccb ._bc ,_bdaa [0]);};_cefec ._ccb ._bc [_gdgg .String ()]=_ebbe ;};return _ddf ,nil ;};type fontEntry struct{_fgf *_fg .PdfFont ;_faad int64 ;};func (_dgagg *wordBag )allWords ()[]*textWord {var _cefg []*textWord ;for _ ,_efgd :=range _dgagg ._aac {_cefg =append (_cefg ,_efgd ...);
};return _cefg ;};func (_ffdec *textMark )inDiacriticArea (_gafa *textMark )bool {_ggbc :=_ffdec .Llx -_gafa .Llx ;_cgga :=_ffdec .Urx -_gafa .Urx ;_cabc :=_ffdec .Lly -_gafa .Lly ;return _dc .Abs (_ggbc +_cgga )< _ffdec .Width ()*_abbba &&_dc .Abs (_cabc )< _ffdec .Height ()*_abbba ;
};type stateStack []*textState ;func (_gcbd *shapesState )quadraticTo (_bddb ,_daed ,_cac ,_face float64 ){if _bdefa {_ac .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a");};_gcbd .addPoint (_cac ,_face );};
// ExtractFonts returns all font information from the page extractor, including
// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more.
//
// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document.
// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries.
//
// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range.
func (_eba *Extractor )ExtractFonts (previousPageFonts *PageFonts )(*PageFonts ,error ){_ceaa :=PageFonts {};_cb :=_ceaa .extractPageResourcesToFont (_eba ._af );if _cb !=nil {return nil ,_cb ;};if previousPageFonts !=nil {for _ ,_geg :=range previousPageFonts .Fonts {if !_bb (_ceaa .Fonts ,_geg .FontName ){_ceaa .Fonts =append (_ceaa .Fonts ,_geg );
};};};return &PageFonts {Fonts :_ceaa .Fonts },nil ;};type event struct{_eebcc float64 ;_dacd bool ;_dcgg int ;};func (_debc *textObject )checkOp (_bcb *_dcg .ContentStreamOperation ,_bfde int ,_bccc bool )(_dacg bool ,_ddb error ){if _debc ==nil {var _cef []_dce .PdfObject ;
if _bfde > 0{_cef =_bcb .Params ;if len (_cef )> _bfde {_cef =_cef [:_bfde ];};};_ac .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_bcb .Operand ,_cef );
};if _bfde >=0{if len (_bcb .Params )!=_bfde {if _bccc {_ddb =_a .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");};_ac .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_bcb .Operand ,_bfde ,len (_bcb .Params ),_bcb .Params );
return false ,_ddb ;};};return true ,nil ;};func (_ddfg paraList )sortTopoOrder (){_dafa :=_ddfg .topoOrder ();_ddfg .reorder (_dafa )};func (_daecg *subpath )isQuadrilateral ()bool {if len (_daecg ._gdgd )< 4||len (_daecg ._gdgd )> 5{return false ;};if len (_daecg ._gdgd )==5{_dagbf :=_daecg ._gdgd [0];
_ddde :=_daecg ._gdgd [4];if _dagbf .X !=_ddde .X ||_dagbf .Y !=_ddde .Y {return false ;};};return true ;};func _gcdf (_eagf _dca .Matrix )_dca .Point {_gcgc ,_ebde :=_eagf .Translation ();return _dca .Point {X :_gcgc ,Y :_ebde };};
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
func (_cfaf *PageText )ApplyArea (bbox _fg .PdfRectangle ){_egb :=make ([]*textMark ,0,len (_cfaf ._fbga ));for _ ,_bbbg :=range _cfaf ._fbga {if _gege (_bbbg .bbox (),bbox ){_egb =append (_egb ,_bbbg );};};var _ddac paraList ;_aeb :=len (_egb );for _facb :=0;
_facb < 360&&_aeb > 0;_facb +=90{_eecf :=make ([]*textMark ,0,len (_egb )-_aeb );for _ ,_dgg :=range _egb {if _dgg ._aaad ==_facb {_eecf =append (_eecf ,_dgg );};};if len (_eecf )> 0{_cec :=_bgdg (_eecf ,_cfaf ._fgag ,nil ,nil ,_cfaf ._dbc ._dcc );_ddac =append (_ddac ,_cec ...);
_aeb -=len (_eecf );};};_gdgf :=new (_dfg .Buffer );_ddac .writeText (_gdgf );_cfaf ._edee =_gdgf .String ();_cfaf ._fdaf =_ddac .toTextMarks ();_cfaf ._agdc =_ddac .tables ();};func (_bcabe *textWord )toTextMarks (_egafb *int )[]TextMark {var _gbcfg []TextMark ;
for _ ,_eceec :=range _bcabe ._daafd {_gbcfg =_fgea (_gbcfg ,_egafb ,_eceec .ToTextMark ());};return _gbcfg ;};func _gdaf (_ega func (*wordBag ,*textWord ,float64 )bool ,_ccg float64 )func (*wordBag ,*textWord )bool {return func (_egdd *wordBag ,_aacf *textWord )bool {return _ega (_egdd ,_aacf ,_ccg )};
};func (_eec *stateStack )top ()*textState {if _eec .empty (){return nil ;};return (*_eec )[_eec .size ()-1];};func (_bbg *wordBag )arrangeText ()*textPara {_bbg .sort ();if _ade {_bbg .removeDuplicates ();};var _bgfgd []*textLine ;for _ ,_fdcd :=range _bbg .depthIndexes (){for !_bbg .empty (_fdcd ){_fbfdc :=_bbg .firstReadingIndex (_fdcd );
_dcgc :=_bbg .firstWord (_fbfdc );_adaff :=_aaca (_bbg ,_fbfdc );_dgdb :=_dcgc ._adecc ;_eegeb :=_dcgc ._adgge -_dgfb *_dgdb ;_agbc :=_dcgc ._adgge +_dgfb *_dgdb ;_gecd :=_acbbe *_dgdb ;_dbccg :=_eedd *_dgdb ;_edeb :for {var _efge *textWord ;_bcfa :=0;
for _ ,_ecbbe :=range _bbg .depthBand (_eegeb ,_agbc ){_eccce :=_bbg .highestWord (_ecbbe ,_eegeb ,_agbc );if _eccce ==nil {continue ;};_bbdec :=_debff (_eccce ,_adaff ._fgbe [len (_adaff ._fgbe )-1]);if _bbdec < -_dbccg {break _edeb ;};if _bbdec > _gecd {continue ;
};if _efge !=nil &&_gedbc (_eccce ,_efge )>=0{continue ;};_efge =_eccce ;_bcfa =_ecbbe ;};if _efge ==nil {break ;};_adaff .pullWord (_bbg ,_efge ,_bcfa );};_adaff .markWordBoundaries ();_bgfgd =append (_bgfgd ,_adaff );};};if len (_bgfgd )==0{return nil ;
};_ab .Slice (_bgfgd ,func (_cdag ,_faffb int )bool {return _fbba (_bgfgd [_cdag ],_bgfgd [_faffb ])< 0});_bdfec :=_ggdc (_bbg .PdfRectangle ,_bgfgd );if _fccf {_ac .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_bdfec .String ());
if _baaa {for _fgee ,_dfcf :=range _bdfec ._gfaae {_gde .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fgee ,_dfcf .String ());if _dbcb {for _aacfd ,_eegag :=range _dfcf ._fgbe {_gde .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_aacfd ,_eegag .String ());
for _dacgg ,_ebcfe :=range _eegag ._daafd {_gde .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_dacgg ,_ebcfe .String ());};};};};};};return _bdfec ;};type textObject struct{_ccb *Extractor ;_ccae *_fg .PdfPageResources ;_dbf _dcg .GraphicsState ;
_abcb *textState ;_ada *stateStack ;_acbc _dca .Matrix ;_abcf _dca .Matrix ;_ffc []*textMark ;_gbf bool ;};const _degg =1.0/1000.0;func (_gdff *shapesState )devicePoint (_fcfgb ,_ddgd float64 )_dca .Point {_bge :=_gdff ._dged .Mult (_gdff ._gebd );_fcfgb ,_ddgd =_bge .Transform (_fcfgb ,_ddgd );
return _dca .NewPoint (_fcfgb ,_ddgd );};func (_beaa *textPara )getListLines ()[]*textLine {var _eaff []*textLine ;_bfdd :=_aebgd (_beaa ._gfaae );for _ ,_gfgb :=range _beaa ._gfaae {_caef :=_gfgb ._fgbe [0]._fedgb [0];if _cda (_caef ){_eaff =append (_eaff ,_gfgb );
};};_eaff =append (_eaff ,_bfdd ...);return _eaff ;};func (_acff *textPara )isAtom ()*textTable {_gcfe :=_acff ;_bgfgc :=_acff ._abfec ;_ggca :=_acff ._fgbea ;if _bgfgc .taken ()||_ggca .taken (){return nil ;};_eeeaa :=_bgfgc ._fgbea ;if _eeeaa .taken ()||_eeeaa !=_ggca ._abfec {return nil ;
};return _dadcda (_gcfe ,_bgfgc ,_ggca ,_eeeaa );};type markKind int ;func (_abgee *ruling )equals (_gddg *ruling )bool {return _abgee ._gffa ==_gddg ._gffa &&_begea (_abgee ._cbag ,_gddg ._cbag )&&_begea (_abgee ._efgeb ,_gddg ._efgeb )&&_begea (_abgee ._bbge ,_gddg ._bbge );
2023-09-07 17:40:17 +00:00
};
2023-11-11 11:29:03 +00:00
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
func (_ddg *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _ddg ==nil {return nil ,_a .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_gde .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );
};_effd :=len (_ddg ._gcc );if _effd ==0{return _ddg ,nil ;};if start < _ddg ._gcc [0].Offset {start =_ddg ._gcc [0].Offset ;};if end > _ddg ._gcc [_effd -1].Offset +1{end =_ddg ._gcc [_effd -1].Offset +1;};_ege :=_ab .Search (_effd ,func (_ebec int )bool {return _ddg ._gcc [_ebec ].Offset +len (_ddg ._gcc [_ebec ].Text )-1>=start });
if !(0<=_ege &&_ege < _effd ){_egbb :=_gde .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_ege ,_effd ,_ddg ._gcc [0],_ddg ._gcc [_effd -1]);
return nil ,_egbb ;};_adgf :=_ab .Search (_effd ,func (_baa int )bool {return _ddg ._gcc [_baa ].Offset > end -1});if !(0<=_adgf &&_adgf < _effd ){_gebee :=_gde .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_adgf ,_effd ,_ddg ._gcc [0],_ddg ._gcc [_effd -1]);
return nil ,_gebee ;};if _adgf <=_ege {return nil ,_gde .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_ege ,_adgf );
};return &TextMarkArray {_gcc :_ddg ._gcc [_ege :_adgf ]},nil ;};func (_ggeg rulingList )bbox ()_fg .PdfRectangle {var _eage _fg .PdfRectangle ;if len (_ggeg )==0{_ac .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073");
return _fg .PdfRectangle {};};if _ggeg [0]._gffa ==_faccd {_eage .Llx ,_eage .Urx =_ggeg .secMinMax ();_eage .Lly ,_eage .Ury =_ggeg .primMinMax ();}else {_eage .Llx ,_eage .Urx =_ggeg .primMinMax ();_eage .Lly ,_eage .Ury =_ggeg .secMinMax ();};return _eage ;
};func (_ebab *wordBag )absorb (_eecd *wordBag ){_cagd :=_eecd .makeRemovals ();for _dabd ,_fgd :=range _eecd ._aac {for _ ,_cecb :=range _fgd {_ebab .pullWord (_cecb ,_dabd ,_cagd );};};_eecd .applyRemovals (_cagd );};func (_bged *textPara )writeText (_efcc _g .Writer ){if _bged ._edce ==nil {_bged .writeCellText (_efcc );
return ;};for _afcb :=0;_afcb < _bged ._edce ._gebeeb ;_afcb ++{for _ebbg :=0;_ebbg < _bged ._edce ._acddc ;_ebbg ++{_facag :=_bged ._edce .get (_ebbg ,_afcb );if _facag ==nil {_efcc .Write ([]byte ("\u0009"));}else {_facag .writeCellText (_efcc );};_efcc .Write ([]byte ("\u0020"));
};if _afcb < _bged ._edce ._gebeeb -1{_efcc .Write ([]byte ("\u000a"));};};};func _bcddg (_dggfc float64 ,_adaf int )int {if _adaf ==0{_adaf =1;};_cdfg :=float64 (_adaf );return int (_dc .Round (_dggfc /_cdfg )*_cdfg );};func _acc (_adac _dca .Point )_dca .Matrix {return _dca .TranslationMatrix (_adac .X ,_adac .Y )};
// Len returns the number of TextMarks in `ma`.
func (_ebg *TextMarkArray )Len ()int {if _ebg ==nil {return 0;};return len (_ebg ._gcc );};func _fgfb (_bfeg ,_ecdb *textPara )bool {if _bfeg ._bfge ||_ecdb ._bfge {return true ;};return _ebfaf (_bfeg .depth ()-_ecdb .depth ());};func (_dfgb *imageExtractContext )extractFormImages (_ceg *_dce .PdfObjectName ,_edg _dcg .GraphicsState ,_bfb *_fg .PdfPageResources )error {_gebg ,_edgb :=_bfb .GetXObjectFormByName (*_ceg );
if _edgb !=nil {return _edgb ;};if _gebg ==nil {return nil ;};_egf ,_edgb :=_gebg .GetContentStream ();if _edgb !=nil {return _edgb ;};_badf :=_gebg .Resources ;if _badf ==nil {_badf =_bfb ;};_edgb =_dfgb .extractContentStreamImages (string (_egf ),_badf );
if _edgb !=nil {return _edgb ;};_dfgb ._fbg ++;return nil ;};
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
func (_aea *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_feg :=&imageExtractContext {_ebac :options };_gf :=_feg .extractContentStreamImages (_aea ._geb ,_aea ._af );if _gf !=nil {return nil ,_gf ;};return &PageImages {Images :_feg ._dag },nil ;
};func (_bfaa paraList )computeEBBoxes (){if _defga {_ac .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a");};for _ ,_dfea :=range _bfaa {_dfea ._ebadd =_dfea .PdfRectangle ;};_fbce :=_bfaa .yNeighbours (0);
for _egaf ,_daba :=range _bfaa {_afga :=_daba ._ebadd ;_ccdb ,_abada :=-1.0e9,+1.0e9;for _ ,_fddg :=range _fbce [_daba ]{_dfdea :=_bfaa [_fddg ]._ebadd ;if _dfdea .Urx < _afga .Llx {_ccdb =_dc .Max (_ccdb ,_dfdea .Urx );}else if _afga .Urx < _dfdea .Llx {_abada =_dc .Min (_abada ,_dfdea .Llx );
};};for _gadg ,_cabd :=range _bfaa {_cbacc :=_cabd ._ebadd ;if _egaf ==_gadg ||_cbacc .Ury > _afga .Lly {continue ;};if _ccdb <=_cbacc .Llx &&_cbacc .Llx < _afga .Llx {_afga .Llx =_cbacc .Llx ;}else if _cbacc .Urx <=_abada &&_afga .Urx < _cbacc .Urx {_afga .Urx =_cbacc .Urx ;
};};if _defga {_gde .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_egaf ,_daba ._ebadd ,_afga ,_dbdbb (_daba .text (),50));};_daba ._ebadd =_afga ;};if _gcda {for _ ,_ffaa :=range _bfaa {_ffaa .PdfRectangle =_ffaa ._ebadd ;
};};};func _dgcc (_egda *list )[]*textLine {for _ ,_dgbaa :=range _egda ._edge {switch _dgbaa ._aeaa {case "\u004c\u0042\u006fd\u0079":if len (_dgbaa ._ggcfd )!=0{return _dgbaa ._ggcfd ;};return _dgcc (_dgbaa );case "\u0053\u0070\u0061\u006e":return _dgbaa ._ggcfd ;
case "I\u006e\u006c\u0069\u006e\u0065\u0053\u0068\u0061\u0070\u0065":return _dgbaa ._ggcfd ;};};return nil ;};type textTable struct{_fg .PdfRectangle ;_acddc ,_gebeeb int ;_aefef bool ;_cfgbb map[uint64 ]*textPara ;_edbe map[uint64 ]compositeCell ;};func (_cagb *shapesState )establishSubpath ()*subpath {_bgcb ,_fefc :=_cagb .lastpointEstablished ();
if !_fefc {_cagb ._efb =append (_cagb ._efb ,_cdec (_bgcb ));};if len (_cagb ._efb )==0{return nil ;};_cagb ._egfd =false ;return _cagb ._efb [len (_cagb ._efb )-1];};func (_gfe *imageExtractContext )extractContentStreamImages (_fcf string ,_cgf *_fg .PdfPageResources )error {_bfa :=_dcg .NewContentStreamParser (_fcf );
_gdeg ,_dd :=_bfa .Parse ();if _dd !=nil {return _dd ;};if _gfe ._fba ==nil {_gfe ._fba =map[*_dce .PdfObjectStream ]*cachedImage {};};if _gfe ._ebac ==nil {_gfe ._ebac =&ImageExtractOptions {};};_gg :=_dcg .NewContentStreamProcessor (*_gdeg );_gg .AddHandler (_dcg .HandlerConditionEnumAllOperands ,"",_gfe .processOperand );
return _gg .Process (_cgf );};func (_fgeb *textTable )log (_bfdcb string ){if !_eadb {return ;};_ac .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_bfdcb ,_fgeb ._acddc ,_fgeb ._gebeeb ,_fgeb ._aefef ,_fgeb .PdfRectangle );
for _eeda :=0;_eeda < _fgeb ._gebeeb ;_eeda ++{for _fdcdc :=0;_fdcdc < _fgeb ._acddc ;_fdcdc ++{_bfgef :=_fgeb .get (_fdcdc ,_eeda );if _bfgef ==nil {continue ;};_gde .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_fdcdc ,_eeda ,_bfgef .PdfRectangle ,_dbdbb (_bfgef .text (),50),_f .RuneCountInString (_bfgef .text ()));
};};};func (_bebcf *textTable )toTextTable ()TextTable {if _eadb {_ac .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_bebcf ._acddc ,_bebcf ._gebeeb );};_acec :=make ([][]TableCell ,_bebcf ._gebeeb );
for _eadc :=0;_eadc < _bebcf ._gebeeb ;_eadc ++{_acec [_eadc ]=make ([]TableCell ,_bebcf ._acddc );for _faefg :=0;_faefg < _bebcf ._acddc ;_faefg ++{_cdeae :=_bebcf .get (_faefg ,_eadc );if _cdeae ==nil {continue ;};if _eadb {_gde .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_faefg ,_eadc ,_cdeae );
};_acec [_eadc ][_faefg ].Text =_cdeae .text ();_efae :=0;_acec [_eadc ][_faefg ].Marks ._gcc =_cdeae .toTextMarks (&_efae );};};_bbgd :=TextTable {W :_bebcf ._acddc ,H :_bebcf ._gebeeb ,Cells :_acec };_bbgd .PdfRectangle =_bebcf .bbox ();return _bbgd ;
};func _gdfa (_gdea []*textLine ,_dffe map[float64 ][]*textLine ,_fdba []float64 ,_cfece int ,_dcga ,_fggd float64 )[]*list {_gfdc :=[]*list {};_egfec :=_cfece ;_cfece =_cfece +1;_aadfa :=_fdba [_egfec ];_bbbe :=_dffe [_aadfa ];_gcaa :=_fgca (_bbbe ,_fggd ,_dcga );
for _fdcb ,_beade :=range _gcaa {var _fabg float64 ;_cgcc :=[]*list {};_daaa :=_beade ._bfcg ;_fggde :=_fggd ;if _fdcb < len (_gcaa )-1{_fggde =_gcaa [_fdcb +1]._bfcg ;};if _cfece < len (_fdba ){_cgcc =_gdfa (_gdea ,_dffe ,_fdba ,_cfece ,_daaa ,_fggde );
};_fabg =_fggde ;if len (_cgcc )> 0{_bgfd :=_cgcc [0];if len (_bgfd ._ggcfd )> 0{_fabg =_bgfd ._ggcfd [0]._bfcg ;};};_cgfae :=[]*textLine {_beade };_aceg :=_bdca (_beade ,_gdea ,_fdba ,_daaa ,_fabg );_cgfae =append (_cgfae ,_aceg ...);_gdddf :=_bddc (_cgfae ,"\u0062\u0075\u006c\u006c\u0065\u0074",_cgcc );
_gdddf ._fbfaf =_gadcf (_cgfae ,"");_gfdc =append (_gfdc ,_gdddf );};return _gfdc ;};func (_aada rulingList )snapToGroups ()rulingList {_ccdg ,_gfefd :=_aada .vertsHorzs ();if len (_ccdg )> 0{_ccdg =_ccdg .snapToGroupsDirection ();};if len (_gfefd )> 0{_gfefd =_gfefd .snapToGroupsDirection ();
};_acdgb :=append (_ccdg ,_gfefd ...);_acdgb .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");return _acdgb ;};func (_gdbag *textTable )subdivide ()*textTable {_gdbag .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");
_edfbd :=_gdbag .compositeRowCorridors ();_eeeagf :=_gdbag .compositeColCorridors ();if _eadb {_ac .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_cbgbf (_edfbd ),_cbgbf (_eeeagf ));
};if len (_edfbd )==0||len (_eeeagf )==0{return _gdbag ;};_dcgaf (_edfbd );_dcgaf (_eeeagf );if _eadb {_ac .Log .Info ("\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073",_cbgbf (_edfbd ),_cbgbf (_eeeagf ));
};_cedff ,_begd :=_bfgee (_gdbag ._gebeeb ,_edfbd );_fgcc ,_abade :=_bfgee (_gdbag ._acddc ,_eeeagf );_cada :=make (map[uint64 ]*textPara ,_abade *_begd );_gddbf :=&textTable {PdfRectangle :_gdbag .PdfRectangle ,_aefef :_gdbag ._aefef ,_gebeeb :_begd ,_acddc :_abade ,_cfgbb :_cada };
if _eadb {_ac .Log .Info ("\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a"+"\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076",_gdbag ._acddc ,_gdbag ._gebeeb ,_abade ,_begd ,_cbgbf (_edfbd ),_cbgbf (_eeeagf ),_cedff ,_fgcc );
};for _efac :=0;_efac < _gdbag ._gebeeb ;_efac ++{_cbgag :=_cedff [_efac ];for _adcd :=0;_adcd < _gdbag ._acddc ;_adcd ++{_dfaa :=_fgcc [_adcd ];if _eadb {_gde .Printf ("\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a",_adcd ,_efac ,_dfaa ,_cbgag );
};_gffaa ,_cfea :=_gdbag ._edbe [_bafcd (_adcd ,_efac )];if !_cfea {continue ;};_beda :=_gffaa .split (_edfbd [_efac ],_eeeagf [_adcd ]);for _cdecd :=0;_cdecd < _beda ._gebeeb ;_cdecd ++{for _facg :=0;_facg < _beda ._acddc ;_facg ++{_cbgaga :=_beda .get (_facg ,_cdecd );
_gddbf .put (_dfaa +_facg ,_cbgag +_cdecd ,_cbgaga );if _eadb {_gde .Printf ("\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_dfaa +_facg ,_cbgag +_cdecd ,_cbgaga );};};};};};return _gddbf ;};func _becd (_dbce _fg .PdfColorspace ,_dccdd _fg .PdfColor )_be .Color {if _dbce ==nil ||_dccdd ==nil {return _be .Black ;
};_afccf ,_cegfa :=_dbce .ColorToRGB (_dccdd );if _cegfa !=nil {_ac .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_dccdd ,_dbce ,_cegfa );
return _be .Black ;};_eafdf ,_ddcf :=_afccf .(*_fg .PdfColorDeviceRGB );if !_ddcf {_ac .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_afccf );
return _be .Black ;};return _be .NRGBA {R :uint8 (_eafdf .R ()*255),G :uint8 (_eafdf .G ()*255),B :uint8 (_eafdf .B ()*255),A :uint8 (255)};};func _ageb (_gebaf ,_fddd ,_eagd float64 )rulingKind {if _gebaf >=_eagd &&_edfa (_fddd ,_gebaf ){return _faccd ;
};if _fddd >=_eagd &&_edfa (_gebaf ,_fddd ){return _cbab ;};return _ccfb ;};func (_dbdb *textLine )appendWord (_gbaf *textWord ){_dbdb ._fgbe =append (_dbdb ._fgbe ,_gbaf );_dbdb .PdfRectangle =_ebge (_dbdb .PdfRectangle ,_gbaf .PdfRectangle );if _gbaf ._adecc > _dbdb ._ceacg {_dbdb ._ceacg =_gbaf ._adecc ;
};if _gbaf ._adgge > _dbdb ._bfcg {_dbdb ._bfcg =_gbaf ._adgge ;};};func _dgfe (_aacfb ,_bcbe _fg .PdfRectangle )bool {return _aacfb .Llx <=_bcbe .Llx &&_bcbe .Urx <=_aacfb .Urx &&_aacfb .Lly <=_bcbe .Lly &&_bcbe .Ury <=_aacfb .Ury ;};
// TableInfo gets table information of the textmark `tm`.
func (_ceae *TextMark )TableInfo ()(*TextTable ,[][]int ){if !_ceae ._dgbe {return nil ,nil ;};_aefd :=_ceae ._acbcf ;_edf :=_aefd .getCellInfo (*_ceae );return _aefd ,_edf ;};type textPara struct{_fg .PdfRectangle ;_ebadd _fg .PdfRectangle ;_gfaae []*textLine ;
_edce *textTable ;_dcada bool ;_bfge bool ;_bfdgg *textPara ;_abfec *textPara ;_caege *textPara ;_fgbea *textPara ;_ceab []list ;};func (_dcac *textLine )markWordBoundaries (){_ecfe :=_aeeg *_dcac ._ceacg ;for _dcdde ,_abed :=range _dcac ._fgbe [1:]{if _debff (_abed ,_dcac ._fgbe [_dcdde ])>=_ecfe {_abed ._eadcb =true ;
};};};func (_gfda *structTreeRoot )parseStructTreeRoot (_bgde _dce .PdfObject ){if _bgde !=nil {_gaea ,_gggd :=_dce .GetDict (_bgde );if !_gggd {_ac .Log .Debug ("\u0070\u0061\u0072s\u0065\u0053\u0074\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u003a\u0020\u0064\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006eo\u0074\u0020\u0066\u006f\u0075\u006e\u0064\u002e");
};K :=_gaea .Get ("\u004b");_cdgg :=_gaea .Get ("\u0054\u0079\u0070\u0065").String ();var _eafc *_dce .PdfObjectArray ;switch _feab :=K .(type ){case *_dce .PdfObjectArray :_eafc =_feab ;case *_dce .PdfObjectReference :_eafc =_dce .MakeArray (K );};_eafd :=[]structElement {};
for _ ,_bgaf :=range _eafc .Elements (){_gcdc :=&structElement {};_gcdc .parseStructElement (_bgaf );_eafd =append (_eafd ,*_gcdc );};_gfda ._ccdea =_eafd ;_gfda ._cefga =_cdgg ;};};func (_bdff *textTable )emptyCompositeRow (_cdfdf int )bool {for _efdg :=0;
_efdg < _bdff ._acddc ;_efdg ++{if _bfcdd ,_dbcbe :=_bdff ._edbe [_bafcd (_efdg ,_cdfdf )];_dbcbe {if len (_bfcdd .paraList )> 0{return false ;};};};return true ;};func _dfdf (_dccce map[int ][]float64 )[]int {_fadgd :=make ([]int ,len (_dccce ));_gcbee :=0;
for _ggbga :=range _dccce {_fadgd [_gcbee ]=_ggbga ;_gcbee ++;};_ab .Ints (_fadgd );return _fadgd ;};func (_babb *textObject )getFillColor ()_be .Color {return _becd (_babb ._dbf .ColorspaceNonStroking ,_babb ._dbf .ColorNonStroking );};func (_efaae paraList )yNeighbours (_dfecc float64 )map[*textPara ][]int {_dcdfe :=make ([]event ,2*len (_efaae ));
if _dfecc ==0{for _ggfgc ,_ceded :=range _efaae {_dcdfe [2*_ggfgc ]=event {_ceded .Lly ,true ,_ggfgc };_dcdfe [2*_ggfgc +1]=event {_ceded .Ury ,false ,_ggfgc };};}else {for _ecdd ,_eaddc :=range _efaae {_dcdfe [2*_ecdd ]=event {_eaddc .Lly -_dfecc *_eaddc .fontsize (),true ,_ecdd };
_dcdfe [2*_ecdd +1]=event {_eaddc .Ury +_dfecc *_eaddc .fontsize (),false ,_ecdd };};};return _efaae .eventNeighbours (_dcdfe );};func (_befe *wordBag )getDepthIdx (_fcee float64 )int {_fegg :=_befe .depthIndexes ();_gabbg :=_ebaf (_fcee );if _gabbg < _fegg [0]{return _fegg [0];
};if _gabbg > _fegg [len (_fegg )-1]{return _fegg [len (_fegg )-1];};return _gabbg ;};var _dceb string ="\u005e\u005b\u0061\u002d\u007a\u0041\u002dZ\u005d\u0028\u005c)\u007c\u005c\u002e)\u007c\u005e[\u005c\u0064\u005d\u002b\u0028\u005c)\u007c\\.\u0029\u007c\u005e\u005c\u0028\u005b\u0061\u002d\u007a\u0041\u002d\u005a\u005d\u005c\u0029\u007c\u005e\u005c\u0028\u005b\u005c\u0064\u005d\u002b\u005c\u0029";
func (_abga *textPara )writeCellText (_bgag _g .Writer ){for _aacd ,_cgfdg :=range _abga ._gfaae {_fgfe :=_cgfdg .text ();_bbdd :=_ddcg &&_cgfdg .endsInHyphen ()&&_aacd !=len (_abga ._gfaae )-1;if _bbdd {_fgfe =_badfg (_fgfe );};_bgag .Write ([]byte (_fgfe ));
if !(_bbdd ||_aacd ==len (_abga ._gfaae )-1){_bgag .Write ([]byte (_eecc (_cgfdg ._bfcg ,_abga ._gfaae [_aacd +1]._bfcg )));};};};func _feea (_cdae *paraList )map[int ][]*textLine {_bacb :=map[int ][]*textLine {};for _ ,_bdbc :=range *_cdae {for _ ,_bgdfd :=range _bdbc ._gfaae {if !_gfdg (_bgdfd ){_ac .Log .Debug ("g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e");
continue ;};_agga :=_bgdfd ._fgbe [0]._daafd [0]._fbcf ;_bacb [_agga ]=append (_bacb [_agga ],_bgdfd );};if _bdbc ._edce !=nil {_ffcad :=_bdbc ._edce ._cfgbb ;for _ ,_aagg :=range _ffcad {for _ ,_degf :=range _aagg ._gfaae {if !_gfdg (_degf ){_ac .Log .Debug ("g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e");
continue ;};_adgg :=_degf ._fgbe [0]._daafd [0]._fbcf ;_bacb [_adgg ]=append (_bacb [_adgg ],_degf );};};};};return _bacb ;};func (_cdcbc rulingList )snapToGroupsDirection ()rulingList {_cdcbc .sortStrict ();_geeg :=make (map[*ruling ]rulingList ,len (_cdcbc ));
_defbc :=_cdcbc [0];_ddgbg :=func (_fedfd *ruling ){_defbc =_fedfd ;_geeg [_defbc ]=rulingList {_fedfd }};_ddgbg (_cdcbc [0]);for _ ,_dgcd :=range _cdcbc [1:]{if _dgcd ._cbag < _defbc ._cbag -_bafg {_ac .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_defbc ,_dgcd );
};if _dgcd ._cbag > _defbc ._cbag +_gbgf {_ddgbg (_dgcd );}else {_geeg [_defbc ]=append (_geeg [_defbc ],_dgcd );};};_dffc :=make (map[*ruling ]float64 ,len (_geeg ));_fedgc :=make (map[*ruling ]*ruling ,len (_cdcbc ));for _aade ,_ggbcb :=range _geeg {_dffc [_aade ]=_ggbcb .mergePrimary ();
for _ ,_dcbgd :=range _ggbcb {_fedgc [_dcbgd ]=_aade ;};};for _ ,_afdca :=range _cdcbc {_afdca ._cbag =_dffc [_fedgc [_afdca ]];};_facfcd :=make (rulingList ,0,len (_cdcbc ));for _ ,_efgg :=range _geeg {_bcccd :=_efgg .splitSec ();for _efgf ,_ggceg :=range _bcccd {_gbfd :=_ggceg .merge ();
if len (_facfcd )> 0{_ggfea :=_facfcd [len (_facfcd )-1];if _ggfea .alignsPrimary (_gbfd )&&_ggfea .alignsSec (_gbfd ){_ac .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_efgf ,_ggfea ,_gbfd );
continue ;};};_facfcd =append (_facfcd ,_gbfd );};};_facfcd .sortStrict ();return _facfcd ;};func (_fgdd paraList )addNeighbours (){_gcbbf :=func (_gacfb []int ,_aaaa *textPara )([]*textPara ,[]*textPara ){_bddcc :=make ([]*textPara ,0,len (_gacfb )-1);
_beab :=make ([]*textPara ,0,len (_gacfb )-1);for _ ,_cgfg :=range _gacfb {_ecgc :=_fgdd [_cgfg ];if _ecgc .Urx <=_aaaa .Llx {_bddcc =append (_bddcc ,_ecgc );}else if _ecgc .Llx >=_aaaa .Urx {_beab =append (_beab ,_ecgc );};};return _bddcc ,_beab ;};_edcd :=func (_daaed []int ,_becc *textPara )([]*textPara ,[]*textPara ){_fcbe :=make ([]*textPara ,0,len (_daaed )-1);
_acfa :=make ([]*textPara ,0,len (_daaed )-1);for _ ,_baage :=range _daaed {_cadcb :=_fgdd [_baage ];if _cadcb .Ury <=_becc .Lly {_acfa =append (_acfa ,_cadcb );}else if _cadcb .Lly >=_becc .Ury {_fcbe =append (_fcbe ,_cadcb );};};return _fcbe ,_acfa ;
};_dabea :=_fgdd .yNeighbours (_aec );for _ ,_debdd :=range _fgdd {_eafdc :=_dabea [_debdd ];if len (_eafdc )==0{continue ;};_gbfb ,_acfd :=_gcbbf (_eafdc ,_debdd );if len (_gbfb )==0&&len (_acfd )==0{continue ;};if len (_gbfb )> 0{_gdab :=_gbfb [0];for _ ,_aagff :=range _gbfb [1:]{if _aagff .Urx >=_gdab .Urx {_gdab =_aagff ;
};};for _ ,_eagg :=range _gbfb {if _eagg !=_gdab &&_eagg .Urx > _gdab .Llx {_gdab =nil ;break ;};};if _gdab !=nil &&_cced (_debdd .PdfRectangle ,_gdab .PdfRectangle ){_debdd ._bfdgg =_gdab ;};};if len (_acfd )> 0{_acebg :=_acfd [0];for _ ,_bbfac :=range _acfd [1:]{if _bbfac .Llx <=_acebg .Llx {_acebg =_bbfac ;
};};for _ ,_begad :=range _acfd {if _begad !=_acebg &&_begad .Llx < _acebg .Urx {_acebg =nil ;break ;};};if _acebg !=nil &&_cced (_debdd .PdfRectangle ,_acebg .PdfRectangle ){_debdd ._abfec =_acebg ;};};};_dabea =_fgdd .xNeighbours (_bdag );for _ ,_gbffc :=range _fgdd {_bbcg :=_dabea [_gbffc ];
if len (_bbcg )==0{continue ;};_gbac ,_ecee :=_edcd (_bbcg ,_gbffc );if len (_gbac )==0&&len (_ecee )==0{continue ;};if len (_ecee )> 0{_efcbg :=_ecee [0];for _ ,_bdagdd :=range _ecee [1:]{if _bdagdd .Ury >=_efcbg .Ury {_efcbg =_bdagdd ;};};for _ ,_cgced :=range _ecee {if _cgced !=_efcbg &&_cgced .Ury > _efcbg .Lly {_efcbg =nil ;
break ;};};if _efcbg !=nil &&_fcgc (_gbffc .PdfRectangle ,_efcbg .PdfRectangle ){_gbffc ._fgbea =_efcbg ;};};if len (_gbac )> 0{_dggga :=_gbac [0];for _ ,_abcba :=range _gbac [1:]{if _abcba .Lly <=_dggga .Lly {_dggga =_abcba ;};};for _ ,_ccda :=range _gbac {if _ccda !=_dggga &&_ccda .Lly < _dggga .Ury {_dggga =nil ;
break ;};};if _dggga !=nil &&_fcgc (_gbffc .PdfRectangle ,_dggga .PdfRectangle ){_gbffc ._caege =_dggga ;};};};for _ ,_cdcc :=range _fgdd {if _cdcc ._bfdgg !=nil &&_cdcc ._bfdgg ._abfec !=_cdcc {_cdcc ._bfdgg =nil ;};if _cdcc ._caege !=nil &&_cdcc ._caege ._fgbea !=_cdcc {_cdcc ._caege =nil ;
};if _cdcc ._abfec !=nil &&_cdcc ._abfec ._bfdgg !=_cdcc {_cdcc ._abfec =nil ;};if _cdcc ._fgbea !=nil &&_cdcc ._fgbea ._caege !=_cdcc {_cdcc ._fgbea =nil ;};};};func (_facba *textTable )isExportable ()bool {if _facba ._aefef {return true ;};_cfce :=func (_fgac int )bool {_bgcec :=_facba .get (0,_fgac );
if _bgcec ==nil {return false ;};_agbe :=_bgcec .text ();_edgc :=_f .RuneCountInString (_agbe );_cade :=_bfaaa .MatchString (_agbe );return _edgc <=1||_cade ;};for _gagb :=0;_gagb < _facba ._gebeeb ;_gagb ++{if !_cfce (_gagb ){return true ;};};return false ;
};type paraList []*textPara ;func _eeead (_bbfa _fg .PdfRectangle ,_bcae bounded )float64 {return _bbfa .Ury -_bcae .bbox ().Lly };type structTreeRoot struct{_ccdea []structElement ;_cefga string ;};func (_beacb gridTile )contains (_agca _fg .PdfRectangle )bool {if _beacb .numBorders ()< 3{return false ;
};if _beacb ._cbge &&_agca .Llx < _beacb .Llx -_gagd {return false ;};if _beacb ._dafe &&_agca .Urx > _beacb .Urx +_gagd {return false ;};if _beacb ._beacf &&_agca .Lly < _beacb .Lly -_gagd {return false ;};if _beacb ._cbbgc &&_agca .Ury > _beacb .Ury +_gagd {return false ;
};return true ;};func (_cfg *shapesState )drawRectangle (_cgae ,_gdfg ,_bbcdc ,_baec float64 ){if _bdefa {_fbb :=_cfg .devicePoint (_cgae ,_gdfg );_ggfb :=_cfg .devicePoint (_cgae +_bbcdc ,_gdfg +_baec );_adcc :=_fg .PdfRectangle {Llx :_fbb .X ,Lly :_fbb .Y ,Urx :_ggfb .X ,Ury :_ggfb .Y };
_ac .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_adcc );};_cfg .newSubPath ();_cfg .moveTo (_cgae ,_gdfg );_cfg .lineTo (_cgae +_bbcdc ,_gdfg );_cfg .lineTo (_cgae +_bbcdc ,_gdfg +_baec );
_cfg .lineTo (_cgae ,_gdfg +_baec );_cfg .closePath ();};func (_dbdbf *ruling )alignsPrimary (_bcbc *ruling )bool {return _dbdbf ._gffa ==_bcbc ._gffa &&_dc .Abs (_dbdbf ._cbag -_bcbc ._cbag )< _gbgf *0.5;};func _edfa (_edafg ,_cdca float64 )bool {return _edafg /_dc .Max (_cfca ,_cdca )< _edec };
func (_ddae rulingList )augmentGrid ()(rulingList ,rulingList ){_afgac ,_cadg :=_ddae .vertsHorzs ();if len (_afgac )==0||len (_cadg )==0{return _afgac ,_cadg ;};_cafc ,_dbfb :=_afgac ,_cadg ;_bdgcf :=_afgac .bbox ();_fegc :=_cadg .bbox ();if _aebg {_ac .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066",_bdgcf );
_ac .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066",_fegc );};var _dfdg ,_fddcg ,_facca ,_bacde *ruling ;if _fegc .Llx < _bdgcf .Llx -_cebe {_dfdg =&ruling {_adaa :_acaca ,_gffa :_cbab ,_cbag :_fegc .Llx ,_efgeb :_bdgcf .Lly ,_bbge :_bdgcf .Ury };
_afgac =append (rulingList {_dfdg },_afgac ...);};if _fegc .Urx > _bdgcf .Urx +_cebe {_fddcg =&ruling {_adaa :_acaca ,_gffa :_cbab ,_cbag :_fegc .Urx ,_efgeb :_bdgcf .Lly ,_bbge :_bdgcf .Ury };_afgac =append (_afgac ,_fddcg );};if _bdgcf .Lly < _fegc .Lly -_cebe {_facca =&ruling {_adaa :_acaca ,_gffa :_faccd ,_cbag :_bdgcf .Lly ,_efgeb :_fegc .Llx ,_bbge :_fegc .Urx };
_cadg =append (rulingList {_facca },_cadg ...);};if _bdgcf .Ury > _fegc .Ury +_cebe {_bacde =&ruling {_adaa :_acaca ,_gffa :_faccd ,_cbag :_bdgcf .Ury ,_efgeb :_fegc .Llx ,_bbge :_fegc .Urx };_cadg =append (_cadg ,_bacde );};if len (_afgac )+len (_cadg )==len (_ddae ){return _cafc ,_dbfb ;
};_gcef :=append (_afgac ,_cadg ...);_ddae .log ("u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064");_gcef .log ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d");return _afgac ,_cadg ;};const (_defga =false ;_agge =false ;_faee =false ;_cgcbc =false ;
_bdefa =false ;_adbb =false ;_abcbg =false ;_abfe =false ;_fccf =false ;_baaa =_fccf &&true ;_dbcb =_baaa &&false ;_bfcf =_fccf &&true ;_eadb =false ;_gddd =_eadb &&false ;_cece =_eadb &&true ;_aebg =false ;_aab =_aebg &&false ;_bdeff =_aebg &&false ;_gbde =_aebg &&true ;
_cadc =_aebg &&false ;_gbbdd =_aebg &&false ;);func (_daafg *textPara )taken ()bool {return _daafg ==nil ||_daafg ._dcada };func _dadcda (_ddfgd ,_ddfge ,_gccg ,_dbffe *textPara )*textTable {_dadf :=&textTable {_acddc :2,_gebeeb :2,_cfgbb :make (map[uint64 ]*textPara ,4)};
_dadf .put (0,0,_ddfgd );_dadf .put (1,0,_ddfge );_dadf .put (0,1,_gccg );_dadf .put (1,1,_dbffe );return _dadf ;};func (_cggae lineRuling )yMean ()float64 {return 0.5*(_cggae ._cged .Y +_cggae ._ggeaa .Y )};func (_ace *shapesState )lastpointEstablished ()(_dca .Point ,bool ){if _ace ._egfd {return _ace ._eaeeb ,false ;
};_fcb :=len (_ace ._efb );if _fcb > 0&&_ace ._efb [_fcb -1]._bbbb {return _ace ._efb [_fcb -1].last (),false ;};return _dca .Point {},true ;};func (_ceee rulingList )blocks (_eafcc ,_dcef *ruling )bool {if _eafcc ._efgeb > _dcef ._bbge ||_dcef ._efgeb > _eafcc ._bbge {return false ;
};_gdfc :=_dc .Max (_eafcc ._efgeb ,_dcef ._efgeb );_dcacd :=_dc .Min (_eafcc ._bbge ,_dcef ._bbge );if _eafcc ._cbag > _dcef ._cbag {_eafcc ,_dcef =_dcef ,_eafcc ;};for _ ,_adacg :=range _ceee {if _eafcc ._cbag <=_adacg ._cbag +_gbgf &&_adacg ._cbag <=_dcef ._cbag +_gbgf &&_adacg ._efgeb <=_dcacd &&_gdfc <=_adacg ._bbge {return true ;
};};return false ;};func (_cbfcg pathSection )bbox ()_fg .PdfRectangle {_gedf :=_cbfcg ._ged [0]._gdgd [0];_ddgb :=_fg .PdfRectangle {Llx :_gedf .X ,Urx :_gedf .X ,Lly :_gedf .Y ,Ury :_gedf .Y };_bac :=func (_aaf _dca .Point ){if _aaf .X < _ddgb .Llx {_ddgb .Llx =_aaf .X ;
}else if _aaf .X > _ddgb .Urx {_ddgb .Urx =_aaf .X ;};if _aaf .Y < _ddgb .Lly {_ddgb .Lly =_aaf .Y ;}else if _aaf .Y > _ddgb .Ury {_ddgb .Ury =_aaf .Y ;};};for _ ,_dgf :=range _cbfcg ._ged [0]._gdgd [1:]{_bac (_dgf );};for _ ,_bggg :=range _cbfcg ._ged [1:]{for _ ,_eaeab :=range _bggg ._gdgd {_bac (_eaeab );
};};return _ddgb ;};
// Text gets the extracted text contained in `l`.
func (_daaf *list )Text ()string {_eecb :=&_df .Builder {};_efab :="";_bcdb (_daaf ,_eecb ,&_efab );return _eecb .String ();};
// New returns an Extractor instance for extracting content from the input PDF page.
func New (page *_fg .PdfPage )(*Extractor ,error ){return NewWithOptions (page ,nil )};
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
func (_bafa *Extractor )ExtractTextWithStats ()(_bgg string ,_dga int ,_beb int ,_bcg error ){_bfe ,_dga ,_beb ,_bcg :=_bafa .ExtractPageText ();if _bcg !=nil {return "",_dga ,_beb ,_bcg ;};return _bfe .Text (),_dga ,_beb ,nil ;};func _abgeef (_cdffda ,_fbfed int )int {if _cdffda < _fbfed {return _cdffda ;
};return _fbfed ;};func _fgae (_afgea _fg .PdfRectangle )rulingKind {_gegeb :=_afgea .Width ();_acbf :=_afgea .Height ();if _gegeb > _acbf {if _gegeb >=_daee {return _faccd ;};}else {if _acbf >=_daee {return _cbab ;};};return _ccfb ;};
// String returns a human readable description of `s`.
func (_fabee intSet )String ()string {var _dbgg []int ;for _fdfc :=range _fabee {if _fabee .has (_fdfc ){_dbgg =append (_dbgg ,_fdfc );};};_ab .Ints (_dbgg );return _gde .Sprintf ("\u0025\u002b\u0076",_dbgg );};func (_aecaf rulingList )asTiling ()gridTiling {if _gbde {_ac .Log .Info ("r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_aecaf ));
};for _ddfaa ,_fcdab :=range _aecaf [1:]{_gbcbd :=_aecaf [_ddfaa ];if _gbcbd .alignsPrimary (_fcdab )&&_gbcbd .alignsSec (_fcdab ){_ac .Log .Error ("a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073",_fcdab ,_gbcbd );
};};_aecaf .sortStrict ();_aecaf .log ("\u0073n\u0061\u0070\u0070\u0065\u0064");_aegd ,_ebdf :=_aecaf .vertsHorzs ();_dbca :=_aegd .primaries ();_gcaef :=_ebdf .primaries ();_gafae :=len (_dbca )-1;_daca :=len (_gcaef )-1;if _gafae ==0||_daca ==0{return gridTiling {};
};_ecag :=_fg .PdfRectangle {Llx :_dbca [0],Urx :_dbca [_gafae ],Lly :_gcaef [0],Ury :_gcaef [_daca ]};if _gbde {_ac .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064",len (_aegd ));
for _dbff ,_ffafg :=range _aegd {_gde .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dbff ,_ffafg );};_ac .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064",len (_ebdf ));
for _fgbg ,_gabda :=range _ebdf {_gde .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fgbg ,_gabda );};_ac .Log .Info ("\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f",_gafae ,_daca ,_dbca ,_gcaef );
};_dcdc :=make ([]gridTile ,_gafae *_daca );for _fdeac :=_daca -1;_fdeac >=0;_fdeac --{_gegga :=_gcaef [_fdeac ];_bdgd :=_gcaef [_fdeac +1];for _ggcff :=0;_ggcff < _gafae ;_ggcff ++{_fefcf :=_dbca [_ggcff ];_fdad :=_dbca [_ggcff +1];_dccg :=_aegd .findPrimSec (_fefcf ,_gegga );
_acgf :=_aegd .findPrimSec (_fdad ,_gegga );_cbcc :=_ebdf .findPrimSec (_gegga ,_fefcf );_ffcab :=_ebdf .findPrimSec (_bdgd ,_fefcf );_gbgba :=_fg .PdfRectangle {Llx :_fefcf ,Urx :_fdad ,Lly :_gegga ,Ury :_bdgd };_gafba :=_fbgdd (_gbgba ,_dccg ,_acgf ,_cbcc ,_ffcab );
_dcdc [_fdeac *_gafae +_ggcff ]=_gafba ;if _gbde {_gde .Printf ("\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_ggcff ,_fdeac ,_gafba .String (),_gafba .Width (),_gafba .Height ());
};};};if _gbde {_ac .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_ecag );
};_bagf :=make ([]map[float64 ]gridTile ,_daca );for _dcfg :=_daca -1;_dcfg >=0;_dcfg --{if _gbde {_gde .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_dcfg );};_bagf [_dcfg ]=make (map[float64 ]gridTile ,_gafae );for _daeg :=0;_daeg < _gafae ;
_daeg ++{_ebeca :=_dcdc [_dcfg *_gafae +_daeg ];if _gbde {_gde .Printf ("\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_daeg ,_ebeca );};if !_ebeca ._cbge {continue ;};_aeag :=_daeg ;for _bgabb :=_daeg +1;!_ebeca ._dafe &&_bgabb < _gafae ;
_bgabb ++{_fffb :=_dcdc [_dcfg *_gafae +_bgabb ];_ebeca .Urx =_fffb .Urx ;_ebeca ._cbbgc =_ebeca ._cbbgc ||_fffb ._cbbgc ;_ebeca ._beacf =_ebeca ._beacf ||_fffb ._beacf ;_ebeca ._dafe =_fffb ._dafe ;if _gbde {_gde .Printf ("\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a",_bgabb ,_fffb ,_ebeca );
};_aeag =_bgabb ;};if _gbde {_gde .Printf (" \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n",_daeg ,_aeag ,_ebeca );};_daeg =_aeag ;_bagf [_dcfg ][_ebeca .Llx ]=_ebeca ;};};_acfca :=make (map[float64 ]map[float64 ]gridTile ,_daca );
_efda :=make (map[float64 ]map[float64 ]struct{},_daca );for _fffg :=_daca -1;_fffg >=0;_fffg --{_ebgeg :=_dcdc [_fffg *_gafae ].Lly ;_acfca [_ebgeg ]=make (map[float64 ]gridTile ,_gafae );_efda [_ebgeg ]=make (map[float64 ]struct{},_gafae );};if _gbde {_ac .Log .Info ("\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_ecag );
};for _ebcae :=_daca -1;_ebcae >=0;_ebcae --{_fcgd :=_dcdc [_ebcae *_gafae ].Lly ;_gcbb :=_bagf [_ebcae ];if _gbde {_gde .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_ebcae );};for _ ,_abfac :=range _bbagb (_gcbb ){if _ ,_ebaed :=_efda [_fcgd ][_abfac ];
_ebaed {continue ;};_agae :=_gcbb [_abfac ];if _gbde {_gde .Printf (" \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_agae .String ());};for _gcgb :=_ebcae -1;_gcgb >=0;_gcgb --{if _agae ._beacf {break ;};_bacfd :=_bagf [_gcgb ];_facbc ,_agdb :=_bacfd [_abfac ];
if !_agdb {break ;};if _facbc .Urx !=_agae .Urx {break ;};_agae ._beacf =_facbc ._beacf ;_agae .Lly =_facbc .Lly ;if _gbde {_gde .Printf ("\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_facbc .String (),_agae .String ());
};_efda [_facbc .Lly ][_facbc .Llx ]=struct{}{};};if _ebcae ==0{_agae ._beacf =true ;};if _agae .complete (){_acfca [_fcgd ][_abfac ]=_agae ;};};};_ddgce :=gridTiling {PdfRectangle :_ecag ,_abeb :_efcda (_acfca ),_cegbg :_aded (_acfca ),_faeca :_acfca };
_ddgce .log ("\u0043r\u0065\u0061\u0074\u0065\u0064");return _ddgce ;};func _aadf (_bedf *textLine )float64 {return _bedf ._fgbe [0].Llx };func (_gdbd *ruling )gridIntersecting (_bbgeb *ruling )bool {return _begea (_gdbd ._efgeb ,_bbgeb ._efgeb )&&_begea (_gdbd ._bbge ,_bbgeb ._bbge );
2023-09-07 17:40:17 +00:00
};
2023-11-11 11:29:03 +00:00
// String returns a description of `v`.
func (_ccede *ruling )String ()string {if _ccede ._gffa ==_ccfb {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_bcefc ,_dbdgb :="\u0078","\u0079";if _ccede ._gffa ==_faccd {_bcefc ,_dbdgb ="\u0079","\u0078";};_adacb :="";if _ccede ._afdc !=0.0{_adacb =_gde .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_ccede ._afdc );
};return _gde .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_ccede ._gffa ,_bcefc ,_ccede ._cbag ,_dbdgb ,_ccede ._efgeb ,_ccede ._bbge ,_ccede ._bbge -_ccede ._efgeb ,_ccede ._adaa ,_ccede .Color ,_adacb );
};func (_faef paraList )log (_bbdcb string ){if !_abfe {return ;};_ac .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_bbdcb ,len (_faef ));
for _fddgc ,_fcebe :=range _faef {if _fcebe ==nil {continue ;};_aabec :=_fcebe .text ();_aaeb :="\u0020\u0020";if _fcebe ._edce !=nil {_aaeb =_gde .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_fcebe ._edce ._acddc ,_fcebe ._edce ._gebeeb );};_gde .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_fddgc ,_fcebe .PdfRectangle ,_aaeb ,_dbdbb (_aabec ,50));
};};func (_cdb *wordBag )firstWord (_ggbf int )*textWord {return _cdb ._aac [_ggbf ][0]};const _acbd =10;func (_bded *subpath )last ()_dca .Point {return _bded ._gdgd [len (_bded ._gdgd )-1]};func (_dgc *textObject )renderText (_gcbc _dce .PdfObject ,_cbfc []byte ,_ddd int )error {if _dgc ._gbf {_ac .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");
return nil ;};_cab :=_dgc .getCurrentFont ();_cefe :=_cab .BytesToCharcodes (_cbfc );_cgad ,_cegda ,_cdg :=_cab .CharcodesToStrings (_cefe );if _cdg > 0{_ac .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_cegda ,_cdg );
};_dgc ._abcb ._ddcd +=_cegda ;_dgc ._abcb ._dcad +=_cdg ;_fdef :=_dgc ._abcb ;_bga :=_fdef ._dda ;_bbff :=_fdef ._gfad /100.0;_gad :=_degg ;if _cab .Subtype ()=="\u0054\u0079\u0070e\u0033"{_gad =1;};_gdc ,_gcgd :=_cab .GetRuneMetrics (' ');if !_gcgd {_gdc ,_gcgd =_cab .GetCharMetrics (32);
};if !_gcgd {_gdc ,_ =_fg .DefaultFont ().GetRuneMetrics (' ');};_ebfb :=_gdc .Wx *_gad ;_ac .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_ebfb ,_cgad ,_cab ,_bga );
_gcd :=_dca .NewMatrix (_bga *_bbff ,0,0,_bga ,0,_fdef ._cgfc );if _adbb {_ac .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_cefe ),_cefe ,_cgad );
};_ac .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_cefe ),_cefe ,len (_cgad ));_cgaa :=_dgc .getFillColor ();
_ecbg :=_dgc .getStrokeColor ();for _dcbd ,_cegf :=range _cgad {_egc :=[]rune (_cegf );if len (_egc )==1&&_egc [0]=='\x00'{continue ;};_gdcf :=_cefe [_dcbd ];_fcce :=_dgc ._dbf .CTM .Mult (_dgc ._acbc ).Mult (_gcd );_ggfa :=0.0;if len (_egc )==1&&_egc [0]==32{_ggfa =_fdef ._ggf ;
};_bccb ,_ggfd :=_cab .GetCharMetrics (_gdcf );if !_ggfd {_ac .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_gdcf ,_egc ,_egc ,_cab );
return _gde .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_cab .String (),_gdcf );};_dfge :=_dca .Point {X :_bccb .Wx *_gad ,Y :_bccb .Wy *_gad };
_fbee :=_dca .Point {X :(_dfge .X *_bga +_ggfa )*_bbff };_dbfe :=_dca .Point {X :(_dfge .X *_bga +_fdef ._aaec +_ggfa )*_bbff };if _adbb {_ac .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_bga ,_fdef ._aaec ,_fdef ._ggf ,_bbff );
_ac .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_dfge ,_fbee ,_dbfe );};_bffg :=_acc (_fbee );_afac :=_acc (_dbfe );_fbed :=_dgc ._dbf .CTM .Mult (_dgc ._acbc ).Mult (_bffg );
if _cgcbc {_ac .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_dgc ._dbf .CTM ,_dgc ._acbc ,_afac ,_gcdf (_dgc ._dbf .CTM .Mult (_dgc ._acbc ).Mult (_afac )),_bffg ,_fbed ,_gcdf (_fbed ));
};_cbcd ,_ecbb :=_dgc .newTextMark (_c .ExpandLigatures (_egc ),_fcce ,_gcdf (_fbed ),_dc .Abs (_ebfb *_fcce .ScalingFactorX ()),_cab ,_dgc ._abcb ._aaec ,_cgaa ,_ecbg ,_gcbc ,_cgad ,_dcbd ,_ddd );if !_ecbb {_ac .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067");
continue ;};if _cab ==nil {_ac .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _cab .Encoder ()==nil {_ac .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_cab );
}else {if _dbfed ,_gabb :=_cab .Encoder ().CharcodeToRune (_gdcf );_gabb {_cbcd ._dcdf =string (_dbfed );};};_ac .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_dcbd ,_gdcf ,_cbcd ,_fcce );
_dgc ._ffc =append (_dgc ._ffc ,&_cbcd );_dgc ._acbc .Concat (_afac );};return nil ;};func _fdgdc (_gdee []pathSection )rulingList {_ecba (_gdee );if _aebg {_ac .Log .Info ("\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073",len (_gdee ));
};var _dfcc rulingList ;for _ ,_baedg :=range _gdee {for _ ,_dfeaf :=range _baedg ._ged {if len (_dfeaf ._gdgd )< 2{continue ;};_afddf :=_dfeaf ._gdgd [0];for _ ,_aafa :=range _dfeaf ._gdgd [1:]{if _ccbb ,_bage :=_dfegb (_afddf ,_aafa ,_baedg .Color );
_bage {_dfcc =append (_dfcc ,_ccbb );};_afddf =_aafa ;};};};if _aebg {_ac .Log .Info ("m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073",_dfcc );};return _dfcc ;};func (_ecbca rulingList )removeDuplicates ()rulingList {if len (_ecbca )==0{return nil ;
};_ecbca .sort ();_fage :=rulingList {_ecbca [0]};for _ ,_bccf :=range _ecbca [1:]{if _bccf .equals (_fage [len (_fage )-1]){continue ;};_fage =append (_fage ,_bccf );};return _fage ;};func _fgca (_bbcf []*textLine ,_cddg ,_dbcca float64 )[]*textLine {var _dbda []*textLine ;
for _ ,_debe :=range _bbcf {if _cddg ==-1{if _debe ._bfcg > _dbcca {_dbda =append (_dbda ,_debe );};}else {if _debe ._bfcg > _dbcca &&_debe ._bfcg < _cddg {_dbda =append (_dbda ,_debe );};};};return _dbda ;};func (_dadg *textObject )setTextRise (_cegd float64 ){if _dadg ==nil {return ;
};_dadg ._abcb ._cgfc =_cegd ;};
// String returns a description of `k`.
func (_fddgg rulingKind )String ()string {_fbgaa ,_bdge :=_adfb [_fddgg ];if !_bdge {return _gde .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_fddgg );};return _fbgaa ;};type shapesState struct{_gebd _dca .Matrix ;
_dged _dca .Matrix ;_efb []*subpath ;_egfd bool ;_eaeeb _dca .Point ;_cfda *textObject ;};func (_gdga paraList )writeText (_gage _g .Writer ){for _bgdc ,_febg :=range _gdga {if _febg ._bfge {continue ;};_febg .writeText (_gage );if _bgdc !=len (_gdga )-1{if _fgfb (_febg ,_gdga [_bgdc +1]){_gage .Write ([]byte ("\u0020"));
}else {_gage .Write ([]byte ("\u000a"));_gage .Write ([]byte ("\u000a"));};};};_gage .Write ([]byte ("\u000a"));_gage .Write ([]byte ("\u000a"));};
// ToTextMark returns the public view of `tm`.
func (_fbgce *textMark )ToTextMark ()TextMark {return TextMark {Text :_fbgce ._efgdc ,Original :_fbgce ._dcdf ,BBox :_fbgce ._fbae ,Font :_fbgce ._abef ,FontSize :_fbgce ._cggc ,FillColor :_fbgce ._cccae ,StrokeColor :_fbgce ._dcgea ,Orientation :_fbgce ._aaad ,DirectObject :_fbgce ._dafb ,ObjString :_fbgce ._facc ,Tw :_fbgce .Tw ,Th :_fbgce .Th ,Tc :_fbgce ._accea ,Index :_fbgce ._ccfa };
};func (_dabe *wordBag )depthIndexes ()[]int {if len (_dabe ._aac )==0{return nil ;};_dfeb :=make ([]int ,len (_dabe ._aac ));_cgcg :=0;for _aga :=range _dabe ._aac {_dfeb [_cgcg ]=_aga ;_cgcg ++;};_ab .Ints (_dfeb );return _dfeb ;};
// Marks returns the TextMark collection for a page. It represents all the text on the page.
func (_gbe PageText )Marks ()*TextMarkArray {return &TextMarkArray {_gcc :_gbe ._fdaf }};func (_ggbfg paraList )findGridTables (_aabc []gridTiling )[]*textTable {if _eadb {_ac .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_ggbfg ));
for _abgac ,_gdad :=range _ggbfg {_gde .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_abgac ,_gdad );};};var _agcgg []*textTable ;for _ggbd ,_cbga :=range _aabc {_gggg ,_cacb :=_ggbfg .findTableGrid (_cbga );if _gggg !=nil {_gggg .log (_gde .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_ggbd ));
_agcgg =append (_agcgg ,_gggg );_gggg .markCells ();};for _febde :=range _cacb {_febde ._dcada =true ;};};if _eadb {_ac .Log .Info ("\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s",len (_agcgg ));
};return _agcgg ;};func (_cbea *textTable )reduce ()*textTable {_dcfad :=make ([]int ,0,_cbea ._gebeeb );_agaed :=make ([]int ,0,_cbea ._acddc );for _dfff :=0;_dfff < _cbea ._gebeeb ;_dfff ++{if !_cbea .emptyCompositeRow (_dfff ){_dcfad =append (_dcfad ,_dfff );
};};for _agebd :=0;_agebd < _cbea ._acddc ;_agebd ++{if !_cbea .emptyCompositeColumn (_agebd ){_agaed =append (_agaed ,_agebd );};};if len (_dcfad )==_cbea ._gebeeb &&len (_agaed )==_cbea ._acddc {return _cbea ;};_edbd :=textTable {_aefef :_cbea ._aefef ,_acddc :len (_agaed ),_gebeeb :len (_dcfad ),_cfgbb :make (map[uint64 ]*textPara ,len (_agaed )*len (_dcfad ))};
if _eadb {_ac .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_cbea ._acddc ,_cbea ._gebeeb ,len (_agaed ),len (_dcfad ));_ac .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_agaed );
_ac .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_dcfad );};for _fecf ,_gdeb :=range _dcfad {for _gcfg ,_baag :=range _agaed {_dbbd ,_eeege :=_cbea .getComposite (_baag ,_gdeb );if _dbbd ==nil {continue ;
};if _eadb {_gde .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_gcfg ,_fecf ,_baag ,_gdeb ,_dbdbb (_dbbd .merge ().text (),50));};_edbd .putComposite (_gcfg ,_fecf ,_dbbd ,_eeege );
};};return &_edbd ;};func (_cceag compositeCell )parasBBox ()(paraList ,_fg .PdfRectangle ){return _cceag .paraList ,_cceag .PdfRectangle ;};func _cecg (_abea *wordBag ,_cagf *textWord ,_ffdbd float64 )bool {return _cagf .Llx < _abea .Urx +_ffdbd &&_abea .Llx -_ffdbd < _cagf .Urx ;
};type wordBag struct{_fg .PdfRectangle ;_fab float64 ;_egga ,_bebd rulingList ;_ggfe float64 ;_aac map[int ][]*textWord ;};func (_aeace *textTable )growTable (){_cgfcb :=func (_dbbb paraList ){_aeace ._gebeeb ++;for _cfcgd :=0;_cfcgd < _aeace ._acddc ;
_cfcgd ++{_babc :=_dbbb [_cfcgd ];_aeace .put (_cfcgd ,_aeace ._gebeeb -1,_babc );};};_dgfd :=func (_daff paraList ){_aeace ._acddc ++;for _adebf :=0;_adebf < _aeace ._gebeeb ;_adebf ++{_ddfe :=_daff [_adebf ];_aeace .put (_aeace ._acddc -1,_adebf ,_ddfe );
};};if _gddd {_aeace .log ("\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce");};for _caag :=0;;_caag ++{_gbda :=false ;_dbgbe :=_aeace .getDown ();_dfgdc :=_aeace .getRight ();if _gddd {_gde .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_caag ,_aeace );
_gde .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a",_dbgbe );_gde .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a",_dfgdc );};if _dbgbe !=nil &&_dfgdc !=nil {_dddga :=_dbgbe [len (_dbgbe )-1];
if !_dddga .taken ()&&_dddga ==_dfgdc [len (_dfgdc )-1]{_cgfcb (_dbgbe );if _dfgdc =_aeace .getRight ();_dfgdc !=nil {_dgfd (_dfgdc );_aeace .put (_aeace ._acddc -1,_aeace ._gebeeb -1,_dddga );};_gbda =true ;};};if !_gbda &&_dbgbe !=nil {_cgfcb (_dbgbe );
_gbda =true ;};if !_gbda &&_dfgdc !=nil {_dgfd (_dfgdc );_gbda =true ;};if !_gbda {break ;};};};
2023-10-07 13:58:01 +00:00
// Text returns the extracted page text.
2023-11-11 11:29:03 +00:00
func (_adbg PageText )Text ()string {return _adbg ._edee };func (_bcee *textObject )showTextAdjusted (_ffd *_dce .PdfObjectArray ,_eea int )error {_dcbg :=false ;for _ ,_ccd :=range _ffd .Elements (){switch _ccd .(type ){case *_dce .PdfObjectFloat ,*_dce .PdfObjectInteger :_bgce ,_cgcb :=_dce .GetNumberAsFloat (_ccd );
if _cgcb !=nil {_ac .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_ccd ,_ffd );
return _cgcb ;};_abe ,_gcae :=-_bgce *0.001*_bcee ._abcb ._dda ,0.0;if _dcbg {_gcae ,_abe =_abe ,_gcae ;};_bcea :=_acc (_dca .Point {X :_abe ,Y :_gcae });_bcee ._acbc .Concat (_bcea );case *_dce .PdfObjectString :_fadb :=_dce .TraceToDirectObject (_ccd );
_acdd ,_abaf :=_dce .GetStringBytes (_fadb );if !_abaf {_ac .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_ccd ,_ffd );
return _dce .ErrTypeError ;};_bcee .renderText (_fadb ,_acdd ,_eea );default:_ac .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_ccd ,_ffd );
return _dce .ErrTypeError ;};};return nil ;};
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// String returns a human readable description of `ss`.
func (_ddff *shapesState )String ()string {return _gde .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_ddff ._efb ),_ddff ._egfd );};func (_efdge *textWord )addDiacritic (_bcged string ){_dceee :=_efdge ._daafd [len (_efdge ._daafd )-1];
_dceee ._efgdc +=_bcged ;_dceee ._efgdc =_fd .NFKC .String (_dceee ._efgdc );};const _deg =20;type cachedImage struct{_gb *_fg .Image ;_cbg _fg .PdfColorspace ;};func _dfegb (_cgbg ,_eaeec _dca .Point ,_cdecf _be .Color )(*ruling ,bool ){_efba :=lineRuling {_cged :_cgbg ,_ggeaa :_eaeec ,_gdggb :_cacg (_cgbg ,_eaeec ),Color :_cdecf };
if _efba ._gdggb ==_ccfb {return nil ,false ;};return _efba .asRuling ();};func _ecba (_egfac []pathSection ){if _egac < 0.0{return ;};if _aebg {_ac .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_egfac ));
};for _bcfb ,_fgbf :=range _egfac {for _cccc ,_dfbg :=range _fgbf ._ged {for _caade ,_fbcb :=range _dfbg ._gdgd {_dfbg ._gdgd [_caade ]=_dca .Point {X :_ebdgb (_fbcb .X ),Y :_ebdgb (_fbcb .Y )};if _aebg {_cccea :=_dfbg ._gdgd [_caade ];if !_ffea (_fbcb ,_cccea ){_cfadd :=_dca .Point {X :_cccea .X -_fbcb .X ,Y :_cccea .Y -_fbcb .Y };
_gde .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_bcfb ,_cccc ,_caade ,_fbcb ,_cccea ,_cfadd );};};};};};};func (_cdaed paraList )topoOrder ()[]int {if _abfe {_ac .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");
};_bdgg :=len (_cdaed );_ecadc :=make ([]bool ,_bdgg );_cdcg :=make ([]int ,0,_bdgg );_bgaff :=_cdaed .llyOrdering ();var _ddca func (_dgdf int );_ddca =func (_dfgg int ){_ecadc [_dfgg ]=true ;for _fcff :=0;_fcff < _bdgg ;_fcff ++{if !_ecadc [_fcff ]{if _cdaed .readBefore (_bgaff ,_dfgg ,_fcff ){_ddca (_fcff );
};};};_cdcg =append (_cdcg ,_dfgg );};for _eacf :=0;_eacf < _bdgg ;_eacf ++{if !_ecadc [_eacf ]{_ddca (_eacf );};};return _cgffb (_cdcg );};var _bgad =[]string {"\u2756","\u27a2","\u2713","\u2022","\uf0a7","\u25a1","\u2212","\u25a0","\u25aa","\u006f"};
func (_eeg *textObject )setWordSpacing (_fcc float64 ){if _eeg ==nil {return ;};_eeg ._abcb ._ggf =_fcc ;};
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
type TextTable struct{_fg .PdfRectangle ;W ,H int ;Cells [][]TableCell ;};func (_daga *textMark )bbox ()_fg .PdfRectangle {return _daga .PdfRectangle };func _eab (_cde _fg .PdfRectangle )textState {return textState {_gfad :100,_bgcc :RenderModeFill ,_cefd :_cde };
};func (_bddbe *wordBag )sort (){for _ ,_gdfb :=range _bddbe ._aac {_ab .Slice (_gdfb ,func (_fbc ,_dcda int )bool {return _gedbc (_gdfb [_fbc ],_gdfb [_dcda ])< 0});};};type rulingKind int ;func (_abgeb *textPara )bbox ()_fg .PdfRectangle {return _abgeb .PdfRectangle };
func (_bgbe *stateStack )empty ()bool {return len (*_bgbe )==0};func (_cabec *wordBag )text ()string {_cgfa :=_cabec .allWords ();_agfb :=make ([]string ,len (_cgfa ));for _aad ,_eafb :=range _cgfa {_agfb [_aad ]=_eafb ._fedgb ;};return _df .Join (_agfb ,"\u0020");
};var _de =false ;func _gadcf (_dabde []*textLine ,_afdgg string )string {var _afda _df .Builder ;_ecea :=0.0;for _edecd ,_aeca :=range _dabde {_fcaf :=_aeca .text ();_fgce :=_aeca ._bfcg ;if _edecd < len (_dabde )-1{_ecea =_dabde [_edecd +1]._bfcg ;}else {_ecea =0.0;
};_afda .WriteString (_afdgg );_afda .WriteString (_fcaf );if _ecea !=_fgce {_afda .WriteString ("\u000a");}else {_afda .WriteString ("\u0020");};};return _afda .String ();};func (_cegde *subpath )removeDuplicates (){if len (_cegde ._gdgd )==0{return ;
};_cbac :=[]_dca .Point {_cegde ._gdgd [0]};for _ ,_ffce :=range _cegde ._gdgd [1:]{if !_ffea (_ffce ,_cbac [len (_cbac )-1]){_cbac =append (_cbac ,_ffce );};};_cegde ._gdgd =_cbac ;};type textResult struct{_ecca PageText ;_bbag int ;_ffg int ;};func _adgb (_fgdg *list ,_cdbe *string )string {_dcca :=_df .Split (_fgdg ._fbfaf ,"\u000a");
_faba :=&_df .Builder {};for _ ,_daea :=range _dcca {if _daea !=""{_faba .WriteString (*_cdbe );_faba .WriteString (_daea );_faba .WriteString ("\u000a");};};return _faba .String ();};func _gfed (_bcdc ,_ggad _dca .Point )bool {_fbdce :=_dc .Abs (_bcdc .X -_ggad .X );
_egcd :=_dc .Abs (_bcdc .Y -_ggad .Y );return _edfa (_egcd ,_fbdce );};type ruling struct{_gffa rulingKind ;_adaa markKind ;_be .Color ;_cbag float64 ;_efgeb float64 ;_bbge float64 ;_afdc float64 ;};func (_eccbe *textTable )markCells (){for _deae :=0;_deae < _eccbe ._gebeeb ;
_deae ++{for _faea :=0;_faea < _eccbe ._acddc ;_faea ++{_fbfdg :=_eccbe .get (_faea ,_deae );if _fbfdg !=nil {_fbfdg ._dcada =true ;};};};};const (_eafcg markKind =iota ;_efcd ;_babea ;_acaca ;);type gridTiling struct{_fg .PdfRectangle ;_abeb []float64 ;
_cegbg []float64 ;_faeca map[float64 ]map[float64 ]gridTile ;};func (_bead *textObject )nextLine (){_bead .moveLP (0,-_bead ._abcb ._gcg )};
// String returns a string descibing `i`.
func (_gaba gridTile )String ()string {_gabde :=func (_eeab bool ,_cfde string )string {if _eeab {return _cfde ;};return "\u005f";};return _gde .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_gaba .PdfRectangle ,_gabde (_gaba ._cbge ,"\u004c"),_gabde (_gaba ._dafe ,"\u0052"),_gabde (_gaba ._beacf ,"\u0042"),_gabde (_gaba ._cbbgc ,"\u0054"));
};func _fgea (_cfdf []TextMark ,_gggc *int ,_gcf TextMark )[]TextMark {_gcf .Offset =*_gggc ;_cfdf =append (_cfdf ,_gcf );*_gggc +=len (_gcf .Text );return _cfdf ;};func (_eggc rulingList )primaries ()[]float64 {_ccdf :=make (map[float64 ]struct{},len (_eggc ));
for _ ,_cefgd :=range _eggc {_ccdf [_cefgd ._cbag ]=struct{}{};};_accb :=make ([]float64 ,len (_ccdf ));_agfg :=0;for _fbgda :=range _ccdf {_accb [_agfg ]=_fbgda ;_agfg ++;};_ab .Float64s (_accb );return _accb ;};type intSet map[int ]struct{};
// String returns a string describing `tm`.
func (_abab TextMark )String ()string {_dggf :=_abab .BBox ;var _afab string ;if _abab .Font !=nil {_afab =_abab .Font .String ();if len (_afab )> 50{_afab =_afab [:50]+"\u002e\u002e\u002e";};};var _afb string ;if _abab .Meta {_afb ="\u0020\u002a\u004d\u002a";
};return _gde .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_abab .Offset ,_abab .Text ,[]rune (_abab .Text ),_dggf .Llx ,_dggf .Lly ,_dggf .Urx ,_dggf .Ury ,_afab ,_afb );
};func (_gaca compositeCell )split (_cgfag ,_gecef []float64 )*textTable {_abdd :=len (_cgfag )+1;_faed :=len (_gecef )+1;if _eadb {_ac .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066",_faed ,_abdd ,_gaca ,_cgfag ,_gecef );
_gde .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a",len (_gaca .paraList ));for _ggdgd ,_gdde :=range _gaca .paraList {_gde .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ggdgd ,_gdde .String ());
};_gde .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",len (_gaca .lines ()));for _gcea ,_cbebg :=range _gaca .lines (){_gde .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gcea ,_cbebg );};};_cgfag =_gfgf (_cgfag ,_gaca .Ury ,_gaca .Lly );
_gecef =_gfgf (_gecef ,_gaca .Llx ,_gaca .Urx );_cdbg :=make (map[uint64 ]*textPara ,_faed *_abdd );_cbcf :=textTable {_acddc :_faed ,_gebeeb :_abdd ,_cfgbb :_cdbg };_fdbag :=_gaca .paraList ;_ab .Slice (_fdbag ,func (_cdgb ,_daedc int )bool {_fafbc ,_ebca :=_fdbag [_cdgb ],_fdbag [_daedc ];
_gfcgg ,_bdeec :=_fafbc .Lly ,_ebca .Lly ;if _gfcgg !=_bdeec {return _gfcgg < _bdeec ;};return _fafbc .Llx < _ebca .Llx ;});_gfgg :=make (map[uint64 ]_fg .PdfRectangle ,_faed *_abdd );for _cebg ,_dgee :=range _cgfag [1:]{_bbga :=_cgfag [_cebg ];for _gefe ,_adeb :=range _gecef [1:]{_cega :=_gecef [_gefe ];
_gfgg [_bafcd (_gefe ,_cebg )]=_fg .PdfRectangle {Llx :_cega ,Urx :_adeb ,Lly :_dgee ,Ury :_bbga };};};if _eadb {_ac .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073");
_gde .Printf ("\u0020\u0020\u0020\u0020");for _ggcc :=0;_ggcc < _faed ;_ggcc ++{_gde .Printf ("\u0025\u0033\u0030\u0064\u002c\u0020",_ggcc );};_gde .Println ();for _cdba :=0;_cdba < _abdd ;_cdba ++{_gde .Printf ("\u0020\u0020\u0025\u0032\u0064\u003a",_cdba );
for _eecca :=0;_eecca < _faed ;_eecca ++{_gde .Printf ("\u00256\u002e\u0032\u0066\u002c\u0020",_gfgg [_bafcd (_eecca ,_cdba )]);};_gde .Println ();};};_eebb :=func (_ccfe *textLine )(int ,int ){for _bgfa :=0;_bgfa < _abdd ;_bgfa ++{for _aaga :=0;_aaga < _faed ;
_aaga ++{if _dgfe (_gfgg [_bafcd (_aaga ,_bgfa )],_ccfe .PdfRectangle ){return _aaga ,_bgfa ;};};};return -1,-1;};_gbege :=make (map[uint64 ][]*textLine ,_faed *_abdd );for _ ,_abfc :=range _fdbag .lines (){_aefc ,_cgcfa :=_eebb (_abfc );if _aefc < 0{continue ;
};_gbege [_bafcd (_aefc ,_cgcfa )]=append (_gbege [_bafcd (_aefc ,_cgcfa )],_abfc );};for _bdcaa :=0;_bdcaa < len (_cgfag )-1;_bdcaa ++{_ddabe :=_cgfag [_bdcaa ];_cgeac :=_cgfag [_bdcaa +1];for _agfbb :=0;_agfbb < len (_gecef )-1;_agfbb ++{_cbee :=_gecef [_agfbb ];
_aeed :=_gecef [_agfbb +1];_egag :=_fg .PdfRectangle {Llx :_cbee ,Urx :_aeed ,Lly :_cgeac ,Ury :_ddabe };_ecaaf :=_gbege [_bafcd (_agfbb ,_bdcaa )];if len (_ecaaf )==0{continue ;};_gdcg :=_ggdc (_egag ,_ecaaf );_cbcf .put (_agfbb ,_bdcaa ,_gdcg );};};return &_cbcf ;
};func (_faffbe paraList )extractTables (_gacf []gridTiling )paraList {if _eadb {_ac .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_faffbe ));
};if len (_faffbe )< _ffa {return _faffbe ;};_fbac :=_faffbe .findTables (_gacf );if _eadb {_ac .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_fbac ));
for _ccdd ,_bgbg :=range _fbac {_bgbg .log (_gde .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_ccdd ));};};return _faffbe .applyTables (_fbac );};func (_afec paraList )reorder (_bec []int ){_cccb :=make (paraList ,len (_afec ));
for _ccdc ,_baaca :=range _bec {_cccb [_ccdc ]=_afec [_baaca ];};copy (_afec ,_cccb );};func _ccag (_daede []TextMark ,_bbe *int ,_bcge string )[]TextMark {_ecec :=_bfec ;_ecec .Text =_bcge ;return _fgea (_daede ,_bbe ,_ecec );};const (_ddcg =true ;_ade =true ;
_fefd =true ;_gcda =false ;_ggfbe =false ;_fedg =6;_baecd =3.0;_aee =200;_dcea =true ;_aagdg =true ;_efbd =true ;_dfcb =true ;_ffdd =false ;);func (_ebcf *wordBag )depthRange (_dcbe ,_cae int )[]int {var _fcbb []int ;for _agfd :=range _ebcf ._aac {if _dcbe <=_agfd &&_agfd <=_cae {_fcbb =append (_fcbb ,_agfd );
};};if len (_fcbb )==0{return nil ;};_ab .Ints (_fcbb );return _fcbb ;};func (_gebf *textLine )pullWord (_bggd *wordBag ,_degge *textWord ,_afcf int ){_gebf .appendWord (_degge );_bggd .removeWord (_degge ,_afcf );};
// GetContentStreamOps returns the contentStreamOps field of `pt`.
func (_eedc *PageText )GetContentStreamOps ()*_dcg .ContentStreamOperations {return _eedc ._eaee };func _ebge (_bacf ,_bfga _fg .PdfRectangle )_fg .PdfRectangle {return _fg .PdfRectangle {Llx :_dc .Min (_bacf .Llx ,_bfga .Llx ),Lly :_dc .Min (_bacf .Lly ,_bfga .Lly ),Urx :_dc .Max (_bacf .Urx ,_bfga .Urx ),Ury :_dc .Max (_bacf .Ury ,_bfga .Ury )};
};func (_bgcff paraList )merge ()*textPara {_ac .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_bgcff ));
if len (_bgcff )==0{return nil ;};_bgcff .sortReadingOrder ();_egbeg :=_bgcff [0].PdfRectangle ;_gadge :=_bgcff [0]._gfaae ;for _ ,_ddge :=range _bgcff [1:]{_egbeg =_ebge (_egbeg ,_ddge .PdfRectangle );_gadge =append (_gadge ,_ddge ._gfaae ...);};return _ggdc (_egbeg ,_gadge );
};func (_fgcd *wordBag )pullWord (_gbdd *textWord ,_abfa int ,_dfd map[int ]map[*textWord ]struct{}){_fgcd .PdfRectangle =_ebge (_fgcd .PdfRectangle ,_gbdd .PdfRectangle );if _gbdd ._adecc > _fgcd ._fab {_fgcd ._fab =_gbdd ._adecc ;};_fgcd ._aac [_abfa ]=append (_fgcd ._aac [_abfa ],_gbdd );
_dfd [_abfa ][_gbdd ]=struct{}{};};func (_facf *wordBag )blocked (_ddfa *textWord )bool {if _ddfa .Urx < _facf .Llx {_gce :=_fcad (_ddfa .PdfRectangle );_gegbg :=_cccab (_facf .PdfRectangle );if _facf ._egga .blocks (_gce ,_gegbg ){if _gbbdd {_ac .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_ddfa ,_facf );
};return true ;};}else if _facf .Urx < _ddfa .Llx {_gfbd :=_fcad (_facf .PdfRectangle );_afdg :=_cccab (_ddfa .PdfRectangle );if _facf ._egga .blocks (_gfbd ,_afdg ){if _gbbdd {_ac .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s",_ddfa ,_facf );
};return true ;};};if _ddfa .Ury < _facf .Lly {_dfc :=_fedf (_ddfa .PdfRectangle );_acaf :=_bdac (_facf .PdfRectangle );if _facf ._bebd .blocks (_dfc ,_acaf ){if _gbbdd {_ac .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_ddfa ,_facf );
};return true ;};}else if _facf .Ury < _ddfa .Lly {_ceacd :=_fedf (_facf .PdfRectangle );_agcc :=_bdac (_ddfa .PdfRectangle );if _facf ._bebd .blocks (_ceacd ,_agcc ){if _gbbdd {_ac .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s",_ddfa ,_facf );
};return true ;};};return false ;};func _bddc (_fgde []*textLine ,_efaf string ,_dgca []*list )*list {return &list {_ggcfd :_fgde ,_aeaa :_efaf ,_edge :_dgca };};func (_daaeg rulingList )findPrimSec (_gaef ,_cgbf float64 )*ruling {for _ ,_efgcf :=range _daaeg {if _ebfaf (_efgcf ._cbag -_gaef )&&_efgcf ._efgeb -_cebe <=_cgbf &&_cgbf <=_efgcf ._bbge +_cebe {return _efgcf ;
};};return nil ;};func _cdfc (_fdc bounded )float64 {return -_fdc .bbox ().Lly };func _debff (_ggcf ,_dcdg bounded )float64 {return _ggcf .bbox ().Llx -_dcdg .bbox ().Urx };func (_eafa *compositeCell )updateBBox (){for _ ,_dbdg :=range _eafa .paraList {_eafa .PdfRectangle =_ebge (_eafa .PdfRectangle ,_dbdg .PdfRectangle );
};};func _cdec (_cgcdg _dca .Point )*subpath {return &subpath {_gdgd :[]_dca .Point {_cgcdg }}};func _fbba (_ffeeb ,_edgbb bounded )float64 {_eddc :=_bfc (_ffeeb ,_edgbb );if !_ebfaf (_eddc ){return _eddc ;};return _gedbc (_ffeeb ,_edgbb );};func (_caae *textPara )fontsize ()float64 {return _caae ._gfaae [0]._ceacg };
func _ebage (_fgbeg []*textLine ,_gada map[float64 ][]*textLine )[]*list {_bfcb :=_ebecd (_gada );_fffd :=[]*list {};if len (_bfcb )==0{return _fffd ;};_abee :=_bfcb [0];_gbdga :=1;_aefeb :=_gada [_abee ];for _edfc ,_fbag :=range _aefeb {var _ecfa float64 ;
_gfaa :=[]*list {};_cdea :=_fbag ._bfcg ;_bfgf :=-1.0;if _edfc < len (_aefeb )-1{_bfgf =_aefeb [_edfc +1]._bfcg ;};if _gbdga < len (_bfcb ){_gfaa =_gdfa (_fgbeg ,_gada ,_bfcb ,_gbdga ,_cdea ,_bfgf );};_ecfa =_bfgf ;if len (_gfaa )> 0{_cgfd :=_gfaa [0];
if len (_cgfd ._ggcfd )> 0{_ecfa =_cgfd ._ggcfd [0]._bfcg ;};};_fefa :=[]*textLine {_fbag };_geee :=_bdca (_fbag ,_fgbeg ,_bfcb ,_cdea ,_ecfa );_fefa =append (_fefa ,_geee ...);_eabe :=_bddc (_fefa ,"\u0062\u0075\u006c\u006c\u0065\u0074",_gfaa );_eabe ._fbfaf =_gadcf (_fefa ,"");
_fffd =append (_fffd ,_eabe );};return _fffd ;};func (_edadf rectRuling )asRuling ()(*ruling ,bool ){_dagb :=ruling {_gffa :_edadf ._eegf ,Color :_edadf .Color ,_adaa :_babea };switch _edadf ._eegf {case _cbab :_dagb ._cbag =0.5*(_edadf .Llx +_edadf .Urx );
_dagb ._efgeb =_edadf .Lly ;_dagb ._bbge =_edadf .Ury ;_ddgg ,_efed :=_edadf .checkWidth (_edadf .Llx ,_edadf .Urx );if !_efed {if _cadc {_ac .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_edadf );
};return nil ,false ;};_dagb ._afdc =_ddgg ;case _faccd :_dagb ._cbag =0.5*(_edadf .Lly +_edadf .Ury );_dagb ._efgeb =_edadf .Llx ;_dagb ._bbge =_edadf .Urx ;_bgdgb ,_bffga :=_edadf .checkWidth (_edadf .Lly ,_edadf .Ury );if !_bffga {if _cadc {_ac .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_edadf );
};return nil ,false ;};_dagb ._afdc =_bgdgb ;default:_ac .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_edadf ._eegf );return nil ,false ;};return &_dagb ,true ;};
// String returns a string describing the current state of the textState stack.
func (_ffde *stateStack )String ()string {_aaa :=[]string {_gde .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_ffde ))};for _bag ,_cgg :=range *_ffde {_aef :="\u003c\u006e\u0069l\u003e";
if _cgg !=nil {_aef =_cgg .String ();};_aaa =append (_aaa ,_gde .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_bag ,_aef ));};return _df .Join (_aaa ,"\u000a");};
// String returns a description of `state`.
func (_bedd *textState )String ()string {_bege :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _bedd ._ecf !=nil {_bege =_bedd ._ecf .BaseFont ();};return _gde .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_bedd ._aaec ,_bedd ._ggf ,_bedd ._dda ,_bege );
};func (_dcba *textObject )moveText (_ebe ,_fbfa float64 ){_dcba .moveLP (_ebe ,_fbfa )};func (_ebeb *shapesState )clearPath (){_ebeb ._efb =nil ;_ebeb ._egfd =false ;if _bdefa {_ac .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_ebeb );
};};func (_acba *textTable )computeBbox ()_fg .PdfRectangle {var _gcag _fg .PdfRectangle ;_fcdb :=false ;for _ccfee :=0;_ccfee < _acba ._gebeeb ;_ccfee ++{for _aggfc :=0;_aggfc < _acba ._acddc ;_aggfc ++{_eged :=_acba .get (_aggfc ,_ccfee );if _eged ==nil {continue ;
};if !_fcdb {_gcag =_eged .PdfRectangle ;_fcdb =true ;}else {_gcag =_ebge (_gcag ,_eged .PdfRectangle );};};};return _gcag ;};func _aaca (_efcb *wordBag ,_ggeb int )*textLine {_cfcg :=_efcb .firstWord (_ggeb );_gccd :=textLine {PdfRectangle :_cfcg .PdfRectangle ,_ceacg :_cfcg ._adecc ,_bfcg :_cfcg ._adgge };
_gccd .pullWord (_efcb ,_cfcg ,_ggeb );return &_gccd ;};func _ggdc (_fbgcc _fg .PdfRectangle ,_efaa []*textLine )*textPara {return &textPara {PdfRectangle :_fbgcc ,_gfaae :_efaa };};func _bfc (_fgfd ,_gbcb bounded )float64 {return _cdfc (_fgfd )-_cdfc (_gbcb )};
func _dcdb (_ccde string )bool {if _f .RuneCountInString (_ccde )< _ggg {return false ;};_cfge ,_abgc :=_f .DecodeLastRuneInString (_ccde );if _abgc <=0||!_fc .Is (_fc .Hyphen ,_cfge ){return false ;};_cfge ,_abgc =_f .DecodeLastRuneInString (_ccde [:len (_ccde )-_abgc ]);
return _abgc > 0&&!_fc .IsSpace (_cfge );};func (_adgfg *shapesState )lineTo (_dgba ,_ceb float64 ){if _bdefa {_ac .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_dgba ,_ceb ,_adgfg .devicePoint (_dgba ,_ceb ));
};_adgfg .addPoint (_dgba ,_ceb );};func _fcgc (_abgb ,_bdgc _fg .PdfRectangle )bool {return _bdgc .Llx <=_abgb .Urx &&_abgb .Llx <=_bdgc .Urx ;};func (_bdab *textPara )text ()string {_fdbf :=new (_dfg .Buffer );_bdab .writeText (_fdbf );return _fdbf .String ();
};func (_acdeg paraList )lines ()[]*textLine {var _afbe []*textLine ;for _ ,_gaeab :=range _acdeg {_afbe =append (_afbe ,_gaeab ._gfaae ...);};return _afbe ;};func _fedf (_acfc _fg .PdfRectangle )*ruling {return &ruling {_gffa :_faccd ,_cbag :_acfc .Ury ,_efgeb :_acfc .Llx ,_bbge :_acfc .Urx };
};func (_egdc paraList )toTextMarks ()[]TextMark {_efbc :=0;var _gbgb []TextMark ;for _bbab ,_bfca :=range _egdc {if _bfca ._bfge {continue ;};_abda :=_bfca .toTextMarks (&_efbc );_gbgb =append (_gbgb ,_abda ...);if _bbab !=len (_egdc )-1{if _fgfb (_bfca ,_egdc [_bbab +1]){_gbgb =_ccag (_gbgb ,&_efbc ,"\u0020");
}else {_gbgb =_ccag (_gbgb ,&_efbc ,"\u000a");_gbgb =_ccag (_gbgb ,&_efbc ,"\u000a");};};};_gbgb =_ccag (_gbgb ,&_efbc ,"\u000a");_gbgb =_ccag (_gbgb ,&_efbc ,"\u000a");return _gbgb ;};func _bbagb (_fgage map[float64 ]gridTile )[]float64 {_eebcg :=make ([]float64 ,0,len (_fgage ));
for _gcdae :=range _fgage {_eebcg =append (_eebcg ,_gcdae );};_ab .Float64s (_eebcg );return _eebcg ;};func _bfgee (_fega int ,_cbebf map[int ][]float64 )([]int ,int ){_dagfd :=make ([]int ,_fega );_ffag :=0;for _gbfg :=0;_gbfg < _fega ;_gbfg ++{_dagfd [_gbfg ]=_ffag ;
_ffag +=len (_cbebf [_gbfg ])+1;};return _dagfd ,_ffag ;};
// String returns a human readable description of `vecs`.
func (_dddgg rulingList )String ()string {if len (_dddgg )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_ccad ,_dgfeb :=_dddgg .vertsHorzs ();_edfg :=len (_ccad );_ecfec :=len (_dgfeb );if _edfg ==0||_ecfec ==0{return _gde .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_edfg ,_ecfec );
};_eede :=_fg .PdfRectangle {Llx :_ccad [0]._cbag ,Urx :_ccad [_edfg -1]._cbag ,Lly :_dgfeb [_ecfec -1]._cbag ,Ury :_dgfeb [0]._cbag };return _gde .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_edfg ,_ecfec ,_eede );
};func (_bfg *wordBag )depthBand (_egef ,_dddd float64 )[]int {if len (_bfg ._aac )==0{return nil ;};return _bfg .depthRange (_bfg .getDepthIdx (_egef ),_bfg .getDepthIdx (_dddd ));};var _bfec =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_be .White ,StrokeColor :_be .White };
func (_cagag *textPara )depth ()float64 {if _cagag ._bfge {return -1.0;};if len (_cagag ._gfaae )> 0{return _cagag ._gfaae [0]._bfcg ;};return _cagag ._edce .depth ();};func _eabef (_fefcfa ,_daabf int )int {if _fefcfa > _daabf {return _fefcfa ;};return _daabf ;
};func _agfe (_cgde ,_febd _dca .Point )rulingKind {_gbgg :=_dc .Abs (_cgde .X -_febd .X );_feeag :=_dc .Abs (_cgde .Y -_febd .Y );return _ageb (_gbgg ,_feeag ,_edec );};
// PageFonts represents extracted fonts on a PDF page.
type PageFonts struct{Fonts []Font ;};
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
//
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
type TextMark struct{
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Text is the extracted text.
Text string ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// BBox is the bounding box of the text.
2023-11-11 11:29:03 +00:00
BBox _fg .PdfRectangle ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Font is the font the text was drawn with.
2023-11-11 11:29:03 +00:00
Font *_fg .PdfFont ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// FontSize is the font size the text was drawn with.
FontSize float64 ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2023-11-11 11:29:03 +00:00
FillColor _be .Color ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2023-11-11 11:29:03 +00:00
StrokeColor _be .Color ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Orientation is the text orientation
Orientation int ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// DirectObject is the underlying PdfObject (Text Object) that represents the visible texts. This is introduced to get
// a simple access to the TextObject in case editing or replacment of some text is needed. E.g during redaction.
2023-11-11 11:29:03 +00:00
DirectObject _dce .PdfObject ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// ObjString is a decoded string operand of a text-showing operator. It has the same value as `Text` attribute except
// when many glyphs are represented with the same Text Object that contains multiple length string operand in which case
// ObjString spans more than one character string that falls in different TextMark objects.
2023-11-11 11:29:03 +00:00
ObjString []string ;Tw float64 ;Th float64 ;Tc float64 ;Index int ;_dgbe bool ;_acbcf *TextTable ;};func (_fefbb rulingList )sortStrict (){_ab .Slice (_fefbb ,func (_dfdeg ,_gafe int )bool {_gadgd ,_dcfa :=_fefbb [_dfdeg ],_fefbb [_gafe ];_aeaba ,_ebcd :=_gadgd ._gffa ,_dcfa ._gffa ;
if _aeaba !=_ebcd {return _aeaba > _ebcd ;};_ffgd ,_dcdac :=_gadgd ._cbag ,_dcfa ._cbag ;if !_ebfaf (_ffgd -_dcdac ){return _ffgd < _dcdac ;};_ffgd ,_dcdac =_gadgd ._efgeb ,_dcfa ._efgeb ;if _ffgd !=_dcdac {return _ffgd < _dcdac ;};return _gadgd ._bbge < _dcfa ._bbge ;
});};func _cgabd (_febe []pathSection )rulingList {_ecba (_febe );if _aebg {_ac .Log .Info ("\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs",len (_febe ));};var _cbbe rulingList ;
for _ ,_acea :=range _febe {for _ ,_eeeb :=range _acea ._ged {if !_eeeb .isQuadrilateral (){if _aebg {_ac .Log .Error ("!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073",_eeeb );};continue ;};if _gadb ,_deded :=_eeeb .makeRectRuling (_acea .Color );
_deded {_cbbe =append (_cbbe ,_gadb );}else {if _cadc {_ac .Log .Error ("\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073",_eeeb );};};};};if _aebg {_ac .Log .Info ("\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073",_cbbe .String ());
};return _cbbe ;};type subpath struct{_gdgd []_dca .Point ;_bbbb bool ;};type compositeCell struct{_fg .PdfRectangle ;paraList ;};var _adfb =map[rulingKind ]string {_ccfb :"\u006e\u006f\u006e\u0065",_faccd :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_cbab :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};
2023-10-07 13:58:01 +00:00
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// Tables returns the tables extracted from the page.
func (_fac PageText )Tables ()[]TextTable {if _eadb {_ac .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_fac ._agdc ));};return _fac ._agdc ;};func (_cbgc *wordBag )removeWord (_affa *textWord ,_agdcf int ){_adgd :=_cbgc ._aac [_agdcf ];
_adgd =_gced (_adgd ,_affa );if len (_adgd )==0{delete (_cbgc ._aac ,_agdcf );}else {_cbgc ._aac [_agdcf ]=_adgd ;};};func (_bfac *structElement )parseStructElement (_affag _dce .PdfObject ){_affbc ,_dgfa :=_dce .GetDict (_affag );if !_dgfa {_ac .Log .Debug ("\u0070\u0061\u0072\u0073\u0065\u0053\u0074\u0072u\u0063\u0074\u0045le\u006d\u0065\u006e\u0074\u003a\u0020d\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006f\u0062\u006a\u0065\u0063t\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075n\u0064\u002e");
return ;};_ccab :=_affbc .Get ("\u0053");_aeab :=_affbc .Get ("\u0050\u0067");_bafe :="";if _ccab !=nil {_bafe =_ccab .String ();};_ggdg :=_affbc .Get ("\u004b");_bfac ._affb =_bafe ;_bfac ._ebad =_aeab ;switch _edecb :=_ggdg .(type ){case *_dce .PdfObjectInteger :_bfac ._affb =_bafe ;
_bfac ._ecaf =int64 (*_edecb );_bfac ._ebad =_aeab ;case *_dce .PdfObjectReference :_edfb :=*_dce .MakeArray (_edecb );var _cgeb int64 =-1;_bfac ._ecaf =_cgeb ;if _edfb .Len ()==1{_gggf :=_edfb .Elements ()[0];_fff ,_fbedf :=_gggf .(*_dce .PdfObjectInteger );
if _fbedf {_cgeb =int64 (*_fff );_bfac ._ecaf =_cgeb ;_bfac ._affb =_bafe ;_bfac ._ebad =_aeab ;return ;};};_gcgcf :=[]structElement {};for _ ,_egde :=range _edfb .Elements (){_gdcd ,_cbacf :=_egde .(*_dce .PdfObjectInteger );if _cbacf {_cgeb =int64 (*_gdcd );
_bfac ._ecaf =_cgeb ;_bfac ._affb =_bafe ;}else {_fec :=&structElement {};_fec .parseStructElement (_egde );_gcgcf =append (_gcgcf ,*_fec );};_cgeb =-1;};_bfac ._dfcd =_gcgcf ;case *_dce .PdfObjectArray :_bbad :=_ggdg .(*_dce .PdfObjectArray );var _fddc int64 =-1;
_bfac ._ecaf =_fddc ;if _bbad .Len ()==1{_ebfbe :=_bbad .Elements ()[0];_gcca ,_egbe :=_ebfbe .(*_dce .PdfObjectInteger );if _egbe {_fddc =int64 (*_gcca );_bfac ._ecaf =_fddc ;_bfac ._affb =_bafe ;_bfac ._ebad =_aeab ;return ;};};_adae :=[]structElement {};
for _ ,_dfdd :=range _bbad .Elements (){_fcge ,_faf :=_dfdd .(*_dce .PdfObjectInteger );if _faf {_fddc =int64 (*_fcge );_bfac ._ecaf =_fddc ;_bfac ._affb =_bafe ;_bfac ._ebad =_aeab ;}else {_gbee :=&structElement {};_gbee .parseStructElement (_dfdd );_adae =append (_adae ,*_gbee );
};_fddc =-1;};_bfac ._dfcd =_adae ;};};func (_faae *textWord )bbox ()_fg .PdfRectangle {return _faae .PdfRectangle };func (_cbaf *stateStack )push (_bbbd *textState ){_fagb :=*_bbbd ;*_cbaf =append (*_cbaf ,&_fagb )};
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// TableCell is a cell in a TextTable.
type TableCell struct{_fg .PdfRectangle ;
// Text is the extracted text.
Text string ;
// Marks returns the TextMarks corresponding to the text in Text.
Marks TextMarkArray ;};func _fede (_cdaee structElement )[]structElement {_fbgdc :=[]structElement {};for _ ,_aadd :=range _cdaee ._dfcd {for _ ,_gfee :=range _aadd ._dfcd {for _ ,_aeda :=range _gfee ._dfcd {if _aeda ._affb =="\u004c"{_fbgdc =append (_fbgdc ,_aeda );
};};};};return _fbgdc ;};func _egfaa (_bbfb *textWord ,_egee float64 ,_cge ,_fgcg rulingList )*wordBag {_gafb :=_ebaf (_bbfb ._adgge );_defg :=[]*textWord {_bbfb };_bgab :=wordBag {_aac :map[int ][]*textWord {_gafb :_defg },PdfRectangle :_bbfb .PdfRectangle ,_fab :_bbfb ._adecc ,_ggfe :_egee ,_egga :_cge ,_bebd :_fgcg };
return &_bgab ;};func (_gbcgb rulingList )vertsHorzs ()(rulingList ,rulingList ){var _gcefb ,_ccbfd rulingList ;for _ ,_bcbd :=range _gbcgb {switch _bcbd ._gffa {case _cbab :_gcefb =append (_gcefb ,_bcbd );case _faccd :_ccbfd =append (_ccbfd ,_bcbd );};
};return _gcefb ,_ccbfd ;};func _bdac (_bdeb _fg .PdfRectangle )*ruling {return &ruling {_gffa :_faccd ,_cbag :_bdeb .Lly ,_efgeb :_bdeb .Llx ,_bbge :_bdeb .Urx };};func _bada (_edea ,_gdge bounded )float64 {_cfgg :=_gedbc (_edea ,_gdge );if !_ebfaf (_cfgg ){return _cfgg ;
};return _bfc (_edea ,_gdge );};func _aeea (_bfcbe []TextMark ,_egdb *int )[]TextMark {_aedd :=_bfcbe [len (_bfcbe )-1];_aafc :=[]rune (_aedd .Text );if len (_aafc )==1{_bfcbe =_bfcbe [:len (_bfcbe )-1];_bcfc :=_bfcbe [len (_bfcbe )-1];*_egdb =_bcfc .Offset +len (_bcfc .Text );
}else {_aagfc :=_badfg (_aedd .Text );*_egdb +=len (_aagfc )-len (_aedd .Text );_aedd .Text =_aagfc ;};return _bfcbe ;};func (_agc *textObject )moveLP (_bdec ,_caf float64 ){_agc ._abcf .Concat (_dca .NewMatrix (1,0,0,1,_bdec ,_caf ));_agc ._acbc =_agc ._abcf ;
};
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Options extractor options.
type Options struct{
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// DisableDocumentTags specifies whether to use the document tags during list extraction.
DisableDocumentTags bool ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// ApplyCropBox will extract page text based on page cropbox if set to `true`.
ApplyCropBox bool ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// UseSimplerExtractionProcess will skip topological text ordering and table processing.
//
// NOTE: While normally the extra processing is beneficial, it can also lead to problems when it does not work.
// Thus it is a flag to allow the user to control this process.
//
// Skipping some extraction processes would also lead to the reduced processing time.
2023-11-11 11:29:03 +00:00
UseSimplerExtractionProcess bool ;};func (_bddd *shapesState )addPoint (_faca ,_gbgd float64 ){_fadc :=_bddd .establishSubpath ();_gbdg :=_bddd .devicePoint (_faca ,_gbgd );if _fadc ==nil {_bddd ._egfd =true ;_bddd ._eaeeb =_gbdg ;}else {_fadc .add (_gbdg );
};};func (_dfac paraList )sortReadingOrder (){_ac .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_dfac ));
if len (_dfac )<=1{return ;};_dfac .computeEBBoxes ();_ab .Slice (_dfac ,func (_efgb ,_afgb int )bool {return _fbba (_dfac [_efgb ],_dfac [_afgb ])<=0});};func _begea (_abfd ,_egbg float64 )bool {return _dc .Abs (_abfd -_egbg )<=_cebe };func (_afcbe *textTable )get (_cgffg ,_cdebg int )*textPara {return _afcbe ._cfgbb [_bafcd (_cgffg ,_cdebg )];
};func (_cabg *textLine )bbox ()_fg .PdfRectangle {return _cabg .PdfRectangle };const (RenderModeStroke RenderMode =1<<iota ;RenderModeFill ;RenderModeClip ;);func (_gbegf rulingList )primMinMax ()(float64 ,float64 ){_bbec ,_beafa :=_gbegf [0]._cbag ,_gbegf [0]._cbag ;
for _ ,_eaag :=range _gbegf [1:]{if _eaag ._cbag < _bbec {_bbec =_eaag ._cbag ;}else if _eaag ._cbag > _beafa {_beafa =_eaag ._cbag ;};};return _bbec ,_beafa ;};func (_bfdee compositeCell )String ()string {_gcec :="";if len (_bfdee .paraList )> 0{_gcec =_dbdbb (_bfdee .paraList .merge ().text (),50);
};return _gde .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_bfdee .PdfRectangle ,len (_bfdee .paraList ),_gcec );};func _faefe (_ccbd ,_bbfff _e .Image )_e .Image {_gcbef ,_facab :=_bbfff .Bounds ().Size (),_ccbd .Bounds ().Size ();
_gdgdd ,_eebbf :=_gcbef .X ,_gcbef .Y ;if _facab .X > _gdgdd {_gdgdd =_facab .X ;};if _facab .Y > _eebbf {_eebbf =_facab .Y ;};_feadb :=_e .Rect (0,0,_gdgdd ,_eebbf );if _gcbef .X !=_gdgdd ||_gcbef .Y !=_eebbf {_abbg :=_e .NewRGBA (_feadb );_ef .BiLinear .Scale (_abbg ,_feadb ,_ccbd ,_bbfff .Bounds (),_ef .Over ,nil );
_bbfff =_abbg ;};if _facab .X !=_gdgdd ||_facab .Y !=_eebbf {_ecfc :=_e .NewRGBA (_feadb );_ef .BiLinear .Scale (_ecfc ,_feadb ,_ccbd ,_ccbd .Bounds (),_ef .Over ,nil );_ccbd =_ecfc ;};_dceab :=_e .NewRGBA (_feadb );_ef .DrawMask (_dceab ,_feadb ,_ccbd ,_e .Point {},_bbfff ,_e .Point {},_ef .Over );
return _dceab ;};func (_eedce gridTiling )complete ()bool {for _ ,_bfaaf :=range _eedce ._faeca {for _ ,_fbfc :=range _bfaaf {if !_fbfc .complete (){return false ;};};};return true ;};func (_ceff paraList )findTables (_cdgd []gridTiling )[]*textTable {_ceff .addNeighbours ();
_ab .Slice (_ceff ,func (_ceeea ,_bfbd int )bool {return _bada (_ceff [_ceeea ],_ceff [_bfbd ])< 0});var _ffdac []*textTable ;if _dcea {_ebdg :=_ceff .findGridTables (_cdgd );_ffdac =append (_ffdac ,_ebdg ...);};if _aagdg {_faeb :=_ceff .findTextTables ();
_ffdac =append (_ffdac ,_faeb ...);};return _ffdac ;};func _gedbc (_gbbb ,_feb bounded )float64 {return _gbbb .bbox ().Llx -_feb .bbox ().Llx };func _efcda (_edfdb map[float64 ]map[float64 ]gridTile )[]float64 {_deggef :=make ([]float64 ,0,len (_edfdb ));
_abgbc :=make (map[float64 ]struct{},len (_edfdb ));for _ ,_bgcd :=range _edfdb {for _deca :=range _bgcd {if _ ,_gbgbg :=_abgbc [_deca ];_gbgbg {continue ;};_deggef =append (_deggef ,_deca );_abgbc [_deca ]=struct{}{};};};_ab .Float64s (_deggef );return _deggef ;
};func (_cfaa *textTable )compositeColCorridors ()map[int ][]float64 {_dfcg :=make (map[int ][]float64 ,_cfaa ._acddc );if _eadb {_ac .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_cfaa ._acddc );
};for _cbae :=0;_cbae < _cfaa ._acddc ;_cbae ++{_dfcg [_cbae ]=nil ;};return _dfcg ;};func (_eddf intSet )add (_agebb int ){_eddf [_agebb ]=struct{}{}};func (_aefdd rulingList )toGrids ()[]rulingList {if _aebg {_ac .Log .Info ("t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073",_aefdd );
};_efag :=_aefdd .intersections ();if _aebg {_ac .Log .Info ("\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020",len (_aefdd ),len (_efag ));
for _ ,_gefa :=range _efgdb (_efag ){_gde .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_gefa ,_efag [_gefa ]);};};_afdag :=make (map[int ]intSet ,len (_aefdd ));for _cedfc :=range _aefdd {_cddag :=_aefdd .connections (_efag ,_cedfc );if len (_cddag )> 0{_afdag [_cedfc ]=_cddag ;
};};if _aebg {_ac .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064",len (_afdag ));for _ ,_bccg :=range _efgdb (_afdag ){_gde .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_bccg ,_afdag [_bccg ]);
};};_dcgf :=_afbef (len (_aefdd ),func (_ffaf ,_gfbf int )bool {_bfcfg ,_eefb :=len (_afdag [_ffaf ]),len (_afdag [_gfbf ]);if _bfcfg !=_eefb {return _bfcfg > _eefb ;};return _aefdd .comp (_ffaf ,_gfbf );});if _aebg {_ac .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_dcgf );
};_adec :=[][]int {{_dcgf [0]}};_dgeg :for _ ,_cbcbc :=range _dcgf [1:]{for _ecda ,_ddcae :=range _adec {for _ ,_dcde :=range _ddcae {if _afdag [_dcde ].has (_cbcbc ){_adec [_ecda ]=append (_ddcae ,_cbcbc );continue _dgeg ;};};};_adec =append (_adec ,[]int {_cbcbc });
};if _aebg {_ac .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_adec );};_ab .SliceStable (_adec ,func (_fcfa ,_cfbf int )bool {return len (_adec [_fcfa ])> len (_adec [_cfbf ])});for _ ,_ccge :=range _adec {_ab .Slice (_ccge ,func (_bcab ,_eafe int )bool {return _aefdd .comp (_ccge [_bcab ],_ccge [_eafe ])});
};_aeede :=make ([]rulingList ,len (_adec ));for _ceec ,_bedc :=range _adec {_ddddb :=make (rulingList ,len (_bedc ));for _cbef ,_fgcdf :=range _bedc {_ddddb [_cbef ]=_aefdd [_fgcdf ];};_aeede [_ceec ]=_ddddb ;};if _aebg {_ac .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076",_aeede );
};var _ddfgc []rulingList ;for _ ,_acfg :=range _aeede {if _gefcg ,_cbbeg :=_acfg .isActualGrid ();_cbbeg {_acfg =_gefcg ;_acfg =_acfg .snapToGroups ();_ddfgc =append (_ddfgc ,_acfg );};};if _aebg {_agcd ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073",_ddfgc );
_ac .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_aeede ),len (_ddfgc ));};return _ddfgc ;};func (_gegg *textObject )getStrokeColor ()_be .Color {return _becd (_gegg ._dbf .ColorspaceStroking ,_gegg ._dbf .ColorStroking );
};func _egbac (_debd *textLine ,_aadg []*textLine ,_fdab []float64 )float64 {var _cgdf float64 =-1;for _ ,_gadd :=range _aadg {if _gadd ._bfcg > _debd ._bfcg {if _dc .Round (_gadd .Llx )>=_dc .Round (_debd .Llx ){_cgdf =_gadd ._bfcg ;}else {break ;};};
};return _cgdf ;};func (_ddab *shapesState )newSubPath (){_ddab .clearPath ();if _bdefa {_ac .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_ddab );};};
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// String returns a description of `t`.
func (_beca *textTable )String ()string {return _gde .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_beca ._acddc ,_beca ._gebeeb ,_beca ._aefef );};func _ebcgb (_dfgfa []*textWord ,_bgba int )[]*textWord {_bagbe :=len (_dfgfa );
copy (_dfgfa [_bgba :],_dfgfa [_bgba +1:]);return _dfgfa [:_bagbe -1];};func (_adeeg intSet )del (_ddbc int ){delete (_adeeg ,_ddbc )};func (_effg paraList )list ()[]*list {var _dddbf []*textLine ;var _gdcdg []*textLine ;for _ ,_bceac :=range _effg {_gbfe :=_bceac .getListLines ();
_dddbf =append (_dddbf ,_gbfe ...);_gdcdg =append (_gdcdg ,_bceac ._gfaae ...);};_dcdbc :=_ebeg (_dddbf );_dde :=_ebage (_gdcdg ,_dcdbc );return _dde ;};func (_gdggd *ruling )encloses (_dafaf ,_afgc float64 )bool {return _gdggd ._efgeb -_cebe <=_dafaf &&_afgc <=_gdggd ._bbge +_cebe ;
2023-09-07 17:40:17 +00:00
};
2023-11-11 11:29:03 +00:00
// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents (contents string ,resources *_fg .PdfPageResources )(*Extractor ,error ){const _ea ="\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s";_cea :=&Extractor {_geb :contents ,_af :resources ,_bc :map[string ]fontEntry {},_bca :map[string ]textResult {}};
_ec .TrackUse (_ea );return _cea ,nil ;};func (_ccgc *textLine )toTextMarks (_bffe *int )[]TextMark {var _cbcb []TextMark ;for _ ,_fca :=range _ccgc ._fgbe {if _fca ._eadcb {_cbcb =_ccag (_cbcb ,_bffe ,"\u0020");};_edadg :=_fca .toTextMarks (_bffe );_cbcb =append (_cbcb ,_edadg ...);
};return _cbcb ;};
2023-08-03 17:30:04 +00:00
2023-10-07 13:58:01 +00:00
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
2023-09-07 17:40:17 +00:00
//
2023-10-07 13:58:01 +00:00
// Replace with a function like Extract() (*PageText, error)
2023-11-11 11:29:03 +00:00
func (_ccc *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_fdec ,_bafd ,_cgab ,_bcgf :=_ccc .extractPageText (_ccc ._geb ,_ccc ._af ,_dca .IdentityMatrix (),0);if _bcgf !=nil &&_bcgf !=_fg .ErrColorOutOfRange {return nil ,0,0,_bcgf ;};if _ccc ._cc !=nil {_fdec ._dbc ._dcc =_ccc ._cc .UseSimplerExtractionProcess ;
};_fdec .computeViews ();_bcgf =_ggde (_fdec );if _bcgf !=nil {return nil ,0,0,_bcgf ;};if _ccc ._cc !=nil {if _ccc ._cc .ApplyCropBox &&_ccc ._fb !=nil {_fdec .ApplyArea (*_ccc ._fb );};_fdec ._dbc ._dbcd =_ccc ._cc .DisableDocumentTags ;};return _fdec ,_bafd ,_cgab ,nil ;
};func (_egcc *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_eacc :=make (map[int ]map[*textWord ]struct{},len (_egcc ._aac ));for _acf :=range _egcc ._aac {_eacc [_acf ]=make (map[*textWord ]struct{});};return _eacc ;};func (_gefca gridTile )numBorders ()int {_faag :=0;
if _gefca ._cbge {_faag ++;};if _gefca ._dafe {_faag ++;};if _gefca ._beacf {_faag ++;};if _gefca ._cbbgc {_faag ++;};return _faag ;};func (_efgbe *textTable )emptyCompositeColumn (_acca int )bool {for _aecg :=0;_aecg < _efgbe ._gebeeb ;_aecg ++{if _fbfe ,_afbf :=_efgbe ._edbe [_bafcd (_acca ,_aecg )];
_afbf {if len (_fbfe .paraList )> 0{return false ;};};};return true ;};func (_ffbc rulingList )toTilings ()(rulingList ,[]gridTiling ){_ffbc .log ("\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s");if len (_ffbc )==0{return nil ,nil ;};_ffbc =_ffbc .tidied ("\u0061\u006c\u006c");
_ffbc .log ("\u0074\u0069\u0064\u0069\u0065\u0064");_acbe :=_ffbc .toGrids ();_bfadf :=make ([]gridTiling ,len (_acbe ));for _fbbe ,_gbfcc :=range _acbe {_bfadf [_fbbe ]=_gbfcc .asTiling ();};return _ffbc ,_bfadf ;};type structElement struct{_affb string ;
_dfcd []structElement ;_ecaf int64 ;_ebad _dce .PdfObject ;};func (_accbe *textTable )getDown ()paraList {_dbeg :=make (paraList ,_accbe ._acddc );for _ddbdcc :=0;_ddbdcc < _accbe ._acddc ;_ddbdcc ++{_eefc :=_accbe .get (_ddbdcc ,_accbe ._gebeeb -1)._fgbea ;
if _eefc .taken (){return nil ;};_dbeg [_ddbdcc ]=_eefc ;};for _dgef :=0;_dgef < _accbe ._acddc -1;_dgef ++{if _dbeg [_dgef ]._abfec !=_dbeg [_dgef +1]{return nil ;};};return _dbeg ;};func (_bbbgc *wordBag )applyRemovals (_acac map[int ]map[*textWord ]struct{}){for _cccd ,_gfge :=range _acac {if len (_gfge )==0{continue ;
};_dadea :=_bbbgc ._aac [_cccd ];_cbafg :=len (_dadea )-len (_gfge );if _cbafg ==0{delete (_bbbgc ._aac ,_cccd );continue ;};_gabd :=make ([]*textWord ,_cbafg );_bffc :=0;for _ ,_gda :=range _dadea {if _ ,_eefd :=_gfge [_gda ];!_eefd {_gabd [_bffc ]=_gda ;
_bffc ++;};};_bbbgc ._aac [_cccd ]=_gabd ;};};func (_ggege intSet )has (_ccgdb int )bool {_ ,_cfcac :=_ggege [_ccgdb ];return _cfcac };func (_edgef paraList )findTextTables ()[]*textTable {var _eedf []*textTable ;for _ ,_fgcb :=range _edgef {if _fgcb .taken ()||_fgcb .Width ()==0{continue ;
};_aedagf :=_fgcb .isAtom ();if _aedagf ==nil {continue ;};_aedagf .growTable ();if _aedagf ._acddc *_aedagf ._gebeeb < _ffa {continue ;};_aedagf .markCells ();_aedagf .log ("\u0067\u0072\u006fw\u006e");_eedf =append (_eedf ,_aedagf );};return _eedf ;};
func (_aeac rulingList )intersections ()map[int ]intSet {var _afbeb ,_eeeae []int ;for _caee ,_adee :=range _aeac {switch _adee ._gffa {case _cbab :_afbeb =append (_afbeb ,_caee );case _faccd :_eeeae =append (_eeeae ,_caee );};};if len (_afbeb )< _fbff +1||len (_eeeae )< _gaab +1{return nil ;
};if len (_afbeb )+len (_eeeae )> _bcccg {_ac .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_aeac ),len (_afbeb ),len (_eeeae ));
return nil ;};_fegf :=make (map[int ]intSet ,len (_afbeb )+len (_eeeae ));for _ ,_ebae :=range _afbeb {for _ ,_bfdc :=range _eeeae {if _aeac [_ebae ].intersects (_aeac [_bfdc ]){if _ ,_ceda :=_fegf [_ebae ];!_ceda {_fegf [_ebae ]=make (intSet );};if _ ,_adef :=_fegf [_bfdc ];
!_adef {_fegf [_bfdc ]=make (intSet );};_fegf [_ebae ].add (_bfdc );_fegf [_bfdc ].add (_ebae );};};};return _fegf ;};func (_edab *textObject )getFontDict (_edac string )(_cabb _dce .PdfObject ,_bbd error ){_dffg :=_edab ._ccae ;if _dffg ==nil {_ac .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_edac );
return nil ,nil ;};_cabb ,_bdd :=_dffg .GetFontByName (_dce .PdfObjectName (_edac ));if !_bdd {_ac .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_edac );
return nil ,_a .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _cabb ,nil ;};func (_dcf *PageFonts )extractPageResourcesToFont (_cf *_fg .PdfPageResources )error {_ced ,_afa :=_dce .GetDict (_cf .Font );
if !_afa {return _a .New (_bf );};for _ ,_bd :=range _ced .Keys (){var (_cg =true ;_afe []byte ;_dcag string ;);_da ,_age :=_cf .GetFontByName (_bd );if !_age {return _a .New (_dcd );};_dfb ,_bde :=_fg .NewPdfFontFromPdfObject (_da );if _bde !=nil {return _bde ;
};_ffe :=_dfb .FontDescriptor ();_ed :=_dfb .FontDescriptor ().FontName .String ();_cedc :=_dfb .Subtype ();if _bb (_dcf .Fonts ,_ed ){continue ;};if len (_dfb .ToUnicode ())==0{_cg =false ;};if _ffe .FontFile !=nil {if _def ,_ebb :=_dce .GetStream (_ffe .FontFile );
_ebb {_afe ,_bde =_dce .DecodeStream (_def );if _bde !=nil {return _bde ;};_dcag =_ed +"\u002e\u0070\u0066\u0062";};}else if _ffe .FontFile2 !=nil {if _cd ,_ae :=_dce .GetStream (_ffe .FontFile2 );_ae {_afe ,_bde =_dce .DecodeStream (_cd );if _bde !=nil {return _bde ;
};_dcag =_ed +"\u002e\u0074\u0074\u0066";};}else if _ffe .FontFile3 !=nil {if _ba ,_efd :=_dce .GetStream (_ffe .FontFile3 );_efd {_afe ,_bde =_dce .DecodeStream (_ba );if _bde !=nil {return _bde ;};_dcag =_ed +"\u002e\u0063\u0066\u0066";};};if len (_dcag )< 1{_ac .Log .Debug (_adc );
};_eg :=Font {FontName :_ed ,PdfFont :_dfb ,IsCID :_dfb .IsCID (),IsSimple :_dfb .IsSimple (),ToUnicode :_cg ,FontType :_cedc ,FontData :_afe ,FontFileName :_dcag ,FontDescriptor :_ffe };_dcf .Fonts =append (_dcf .Fonts ,_eg );};return nil ;};func _bacd (_eeeag *wordBag ,_dfgbe *textWord ,_gdba float64 )bool {return _eeeag .Urx <=_dfgbe .Llx &&_dfgbe .Llx < _eeeag .Urx +_gdba ;
};func (_caad *wordBag )minDepth ()float64 {return _caad ._ggfe -(_caad .Ury -_caad ._fab )};func (_ffdba gridTiling )log (_gafac string ){if !_gbde {return ;};_ac .Log .Info ("\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071",len (_ffdba ._abeb ),len (_ffdba ._cegbg ),_gafac );
_gde .Printf ("\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a",_ffdba ._abeb );_gde .Printf ("\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a",_ffdba ._cegbg );for _bdagd ,_geeb :=range _ffdba ._cegbg {_gddf ,_ccbc :=_ffdba ._faeca [_geeb ];
if !_ccbc {continue ;};_gde .Printf ("%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_bdagd ,_geeb );for _dbafc ,_cgceb :=range _ffdba ._abeb {_bdefaf ,_bdada :=_gddf [_cgceb ];if !_bdada {continue ;};_gde .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_dbafc ,_bdefaf .String ());
};};};
2023-08-03 17:30:04 +00:00
2023-11-11 11:29:03 +00:00
// String returns a string describing `pt`.
func (_aaag PageText )String ()string {_acdb :=_gde .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_aaag ._fbga ));_dcff :=[]string {"\u002d"+_acdb };for _ ,_ebfg :=range _aaag ._fbga {_dcff =append (_dcff ,_ebfg .String ());
};_dcff =append (_dcff ,"\u002b"+_acdb );return _df .Join (_dcff ,"\u000a");};
2023-05-29 17:26:33 +00:00
2023-11-11 11:29:03 +00:00
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct{_gcc []TextMark };func (_dbge *TextMarkArray )getTextMarkAtOffset (_gdcb int )*TextMark {for _ ,_fea :=range _dbge ._gcc {if _fea .Offset ==_gdcb {return &_fea ;};};return nil ;};func (_gfa *textObject )moveTextSetLeading (_cbf ,_eaea float64 ){_gfa ._abcb ._gcg =-_eaea ;
_gfa .moveLP (_cbf ,_eaea );};
2023-02-07 17:17:49 +00:00
2023-11-11 11:29:03 +00:00
// Extractor stores and offers functionality for extracting content from PDF pages.
type Extractor struct{_geb string ;_af *_fg .PdfPageResources ;_aa _fg .PdfRectangle ;_fb *_fg .PdfRectangle ;_bc map[string ]fontEntry ;_bca map[string ]textResult ;_ag int64 ;_ca int ;_cc *Options ;_agb *_dce .PdfObject ;_ce _dce .PdfObject ;};func _eded (_dae *Extractor ,_aaaf *_fg .PdfPageResources ,_aefe _dcg .GraphicsState ,_defa *textState ,_cad *stateStack )*textObject {return &textObject {_ccb :_dae ,_ccae :_aaaf ,_dbf :_aefe ,_ada :_cad ,_abcb :_defa ,_acbc :_dca .IdentityMatrix (),_abcf :_dca .IdentityMatrix ()};
};func (_gfbb paraList )tables ()[]TextTable {var _fbedc []TextTable ;if _eadb {_ac .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a");};for _ ,_eecfe :=range _gfbb {_bcfg :=_eecfe ._edce ;if _bcfg !=nil &&_bcfg .isExportable (){_fbedc =append (_fbedc ,_bcfg .toTextTable ());
};};return _fbedc ;};var (_bdce =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};
);
2022-06-27 19:58:38 +00:00
2023-10-07 13:58:01 +00:00
// Elements returns the TextMarks in `ma`.
2023-11-11 11:29:03 +00:00
func (_cdf *TextMarkArray )Elements ()[]TextMark {return _cdf ._gcc };func (_cbd *textObject )setTextRenderMode (_cdce int ){if _cbd ==nil {return ;};_cbd ._abcb ._bgcc =RenderMode (_cdce );};func (_ceabf *textPara )toTextMarks (_abge *int )[]TextMark {if _ceabf ._edce ==nil {return _ceabf .toCellTextMarks (_abge );
};var _gggfa []TextMark ;for _bfdge :=0;_bfdge < _ceabf ._edce ._gebeeb ;_bfdge ++{for _dagc :=0;_dagc < _ceabf ._edce ._acddc ;_dagc ++{_baed :=_ceabf ._edce .get (_dagc ,_bfdge );if _baed ==nil {_gggfa =_ccag (_gggfa ,_abge ,"\u0009");}else {_fgec :=_baed .toCellTextMarks (_abge );
_gggfa =append (_gggfa ,_fgec ...);};_gggfa =_ccag (_gggfa ,_abge ,"\u0020");};if _bfdge < _ceabf ._edce ._gebeeb -1{_gggfa =_ccag (_gggfa ,_abge ,"\u000a");};};_ccbaf :=_ceabf ._edce ;if _ccbaf .isExportable (){_eeae :=_ccbaf .toTextTable ();_gggfa =_daae (_gggfa ,&_eeae );
};return _gggfa ;};func _cagg (_ffcg string )bool {for _ ,_afdcf :=range _ffcg {if !_fc .IsSpace (_afdcf ){return false ;};};return true ;};type bounded interface{bbox ()_fg .PdfRectangle };func (_abgf *textTable )depth ()float64 {_ebfa :=1e10;for _baaag :=0;
_baaag < _abgf ._acddc ;_baaag ++{_fggdb :=_abgf .get (_baaag ,0);if _fggdb ==nil ||_fggdb ._bfge {continue ;};_ebfa =_dc .Min (_ebfa ,_fggdb .depth ());};return _ebfa ;};func (_bgca *ruling )intersects (_dgafg *ruling )bool {_gdgb :=(_bgca ._gffa ==_cbab &&_dgafg ._gffa ==_faccd )||(_dgafg ._gffa ==_cbab &&_bgca ._gffa ==_faccd );
_bfgac :=func (_ffda ,_fagf *ruling )bool {return _ffda ._efgeb -_cebe <=_fagf ._cbag &&_fagf ._cbag <=_ffda ._bbge +_cebe ;};_bbeca :=_bfgac (_bgca ,_dgafg );_fdbdb :=_bfgac (_dgafg ,_bgca );if _aebg {_gde .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_gdgb ,_bbeca ,_fdbdb ,_gdgb &&_bbeca &&_fdbdb ,_bgca ,_dgafg );
};return _gdgb &&_bbeca &&_fdbdb ;};
2022-06-27 19:58:38 +00:00
2023-11-11 11:29:03 +00:00
// NewWithOptions an Extractor instance for extracting content from the input PDF page with options.
func NewWithOptions (page *_fg .PdfPage ,options *Options )(*Extractor ,error ){const _ad ="\u0065x\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077W\u0069\u0074\u0068\u004f\u0070\u0074\u0069\u006f\u006e\u0073";_db ,_agd :=page .GetAllContentStreams ();
if _agd !=nil {return nil ,_agd ;};_bef ,_ff :=page .GetStructTreeRoot ();if !_ff {_ac .Log .Info ("T\u0068\u0065\u0020\u0070\u0064\u0066\u0020\u0064\u006f\u0063\u0075\u006d\u0065\u006e\u0074\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020\u0074\u0061\u0067g\u0065d\u002e\u0020\u0053\u0074r\u0075\u0063t\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e\u0027\u0074\u0020\u0065\u0078\u0069\u0073\u0074\u002e");
};_eb :=page .GetContainingPdfObject ();_dg ,_agd :=page .GetMediaBox ();if _agd !=nil {return nil ,_gde .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_agd );
};_fe :=&Extractor {_geb :_db ,_af :page .Resources ,_aa :*_dg ,_fb :page .CropBox ,_bc :map[string ]fontEntry {},_bca :map[string ]textResult {},_cc :options ,_agb :_bef ,_ce :_eb };if _fe ._aa .Llx > _fe ._aa .Urx {_ac .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_fe ._aa );
_fe ._aa .Llx ,_fe ._aa .Urx =_fe ._aa .Urx ,_fe ._aa .Llx ;};if _fe ._aa .Lly > _fe ._aa .Ury {_ac .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_fe ._aa );
_fe ._aa .Lly ,_fe ._aa .Ury =_fe ._aa .Ury ,_fe ._aa .Lly ;};_ec .TrackUse (_ad );return _fe ,nil ;};
2022-06-27 19:58:38 +00:00
2023-11-11 11:29:03 +00:00
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
type ImageMark struct{Image *_fg .Image ;
2022-06-27 19:58:38 +00:00
2023-11-11 11:29:03 +00:00
// Dimensions of the image as displayed in the PDF.
Width float64 ;Height float64 ;
2022-06-27 19:58:38 +00:00
2023-11-11 11:29:03 +00:00
// Position of the image in PDF coordinates (lower left corner).
X float64 ;Y float64 ;
2022-06-27 19:58:38 +00:00
2023-11-11 11:29:03 +00:00
// Angle in degrees, if rotated.
Angle float64 ;};func (_baeba *textWord )absorb (_eafce *textWord ){_baeba .PdfRectangle =_ebge (_baeba .PdfRectangle ,_eafce .PdfRectangle );_baeba ._daafd =append (_baeba ._daafd ,_eafce ._daafd ...);};func _dbdbb (_decc string ,_adag int )string {if len (_decc )< _adag {return _decc ;
};return _decc [:_adag ];};func (_deed *subpath )clear (){*_deed =subpath {}};func _fcad (_adeba _fg .PdfRectangle )*ruling {return &ruling {_gffa :_cbab ,_cbag :_adeba .Urx ,_efgeb :_adeba .Lly ,_bbge :_adeba .Ury };};var _fffdg =map[markKind ]string {_efcd :"\u0073\u0074\u0072\u006f\u006b\u0065",_babea :"\u0066\u0069\u006c\u006c",_acaca :"\u0061u\u0067\u006d\u0065\u006e\u0074"};
func _badfg (_cdge string )string {_beac :=[]rune (_cdge );return string (_beac [:len (_beac )-1])};func (_ccfgg *textTable )bbox ()_fg .PdfRectangle {return _ccfgg .PdfRectangle };func _cacg (_defeg ,_adba _dca .Point )rulingKind {_gcce :=_dc .Abs (_defeg .X -_adba .X );
_cbbb :=_dc .Abs (_defeg .Y -_adba .Y );return _ageb (_gcce ,_cbbb ,_daee );};func (_fcbba *textTable )compositeRowCorridors ()map[int ][]float64 {_faaa :=make (map[int ][]float64 ,_fcbba ._gebeeb );if _eadb {_ac .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_fcbba ._gebeeb );
};for _edecf :=1;_edecf < _fcbba ._gebeeb ;_edecf ++{var _edacd []compositeCell ;for _fddf :=0;_fddf < _fcbba ._acddc ;_fddf ++{if _efccg ,_fgaf :=_fcbba ._edbe [_bafcd (_fddf ,_edecf )];_fgaf {_edacd =append (_edacd ,_efccg );};};if len (_edacd )==0{continue ;
};_egfed :=_gcfc (_edacd );_faaa [_edecf ]=_egfed ;if _eadb {_gde .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_edecf ,_egfed );};};return _faaa ;};func (_dffd *wordBag )maxDepth ()float64 {return _dffd ._ggfe -_dffd .Lly };
func (_baeb paraList )xNeighbours (_dcbeg float64 )map[*textPara ][]int {_eada :=make ([]event ,2*len (_baeb ));if _dcbeg ==0{for _cfee ,_gegc :=range _baeb {_eada [2*_cfee ]=event {_gegc .Llx ,true ,_cfee };_eada [2*_cfee +1]=event {_gegc .Urx ,false ,_cfee };
};}else {for _aece ,_bdgbd :=range _baeb {_eada [2*_aece ]=event {_bdgbd .Llx -_dcbeg *_bdgbd .fontsize (),true ,_aece };_eada [2*_aece +1]=event {_bdgbd .Urx +_dcbeg *_bdgbd .fontsize (),false ,_aece };};};return _baeb .eventNeighbours (_eada );};func (_bbde *textPara )toCellTextMarks (_ddfad *int )[]TextMark {var _gbge []TextMark ;
for _faac ,_dede :=range _bbde ._gfaae {_bcedg :=_dede .toTextMarks (_ddfad );_ffgg :=_ddcg &&_dede .endsInHyphen ()&&_faac !=len (_bbde ._gfaae )-1;if _ffgg {_bcedg =_aeea (_bcedg ,_ddfad );};_gbge =append (_gbge ,_bcedg ...);if !(_ffgg ||_faac ==len (_bbde ._gfaae )-1){_gbge =_ccag (_gbge ,_ddfad ,_eecc (_dede ._bfcg ,_bbde ._gfaae [_faac +1]._bfcg ));
};};return _gbge ;};func (_eccgf rulingList )connections (_gdffa map[int ]intSet ,_eebe int )intSet {_bgafe :=make (intSet );_dedee :=make (intSet );var _acda func (int );_acda =func (_dbgc int ){if !_dedee .has (_dbgc ){_dedee .add (_dbgc );for _eddg :=range _eccgf {if _gdffa [_eddg ].has (_dbgc ){_bgafe .add (_eddg );
};};for _agegbc :=range _eccgf {if _bgafe .has (_agegbc ){_acda (_agegbc );};};};};_acda (_eebe );return _bgafe ;};func (_acga *wordBag )empty (_fbab int )bool {_ ,_efga :=_acga ._aac [_fbab ];return !_efga };func (_dcddd rulingList )comp (_ecfaa ,_adbf int )bool {_fabgb ,_adab :=_dcddd [_ecfaa ],_dcddd [_adbf ];
_eeaf ,_acee :=_fabgb ._gffa ,_adab ._gffa ;if _eeaf !=_acee {return _eeaf > _acee ;};if _eeaf ==_ccfb {return false ;};_facbf :=func (_gdegc bool )bool {if _eeaf ==_faccd {return _gdegc ;};return !_gdegc ;};_fceg ,_dabec :=_fabgb ._cbag ,_adab ._cbag ;
if _fceg !=_dabec {return _facbf (_fceg > _dabec );};_fceg ,_dabec =_fabgb ._efgeb ,_adab ._efgeb ;if _fceg !=_dabec {return _facbf (_fceg < _dabec );};return _facbf (_fabgb ._bbge < _adab ._bbge );};func _aded (_afaf map[float64 ]map[float64 ]gridTile )[]float64 {_bfdeea :=make ([]float64 ,0,len (_afaf ));
for _cggbc :=range _afaf {_bfdeea =append (_bfdeea ,_cggbc );};_ab .Float64s (_bfdeea );_fbfcc :=len (_bfdeea );for _ccac :=0;_ccac < _fbfcc /2;_ccac ++{_bfdeea [_ccac ],_bfdeea [_fbfcc -1-_ccac ]=_bfdeea [_fbfcc -1-_ccac ],_bfdeea [_ccac ];};return _bfdeea ;
};func _gfgf (_bccfd []float64 ,_cgacf ,_gdffd float64 )[]float64 {_ffgge ,_bcfec :=_cgacf ,_gdffd ;if _bcfec < _ffgge {_ffgge ,_bcfec =_bcfec ,_ffgge ;};_facd :=make ([]float64 ,0,len (_bccfd )+2);_facd =append (_facd ,_cgacf );for _ ,_fdcg :=range _bccfd {if _fdcg <=_ffgge {continue ;
}else if _fdcg >=_bcfec {break ;};_facd =append (_facd ,_fdcg );};_facd =append (_facd ,_gdffd );return _facd ;};func (_adga *textObject )getFontDirect (_fdb string )(*_fg .PdfFont ,error ){_egbf ,_ddce :=_adga .getFontDict (_fdb );if _ddce !=nil {return nil ,_ddce ;
};_dec ,_ddce :=_fg .NewPdfFontFromPdfObject (_egbf );if _ddce !=nil {_ac .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fdb ,_ddce );
};return _dec ,_ddce ;};func _cdcee (_egeg string )(string ,bool ){_agggd :=[]rune (_egeg );if len (_agggd )!=1{return "",false ;};_dcbdf ,_fegd :=_bdce [_agggd [0]];return _dcbdf ,_fegd ;};func (_eebf rulingList )isActualGrid ()(rulingList ,bool ){_abca ,_gceff :=_eebf .augmentGrid ();
if !(len (_abca )>=_fbff +1&&len (_gceff )>=_gaab +1){if _aebg {_ac .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_abca ),len (_gceff ),_fbff +1,_gaab +1);
};return nil ,false ;};if _aebg {_ac .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_eebf ,len (_abca )>=2,len (_gceff )>=2,len (_abca )>=2&&len (_gceff )>=2);
for _feeff ,_cafe :=range _eebf {_gde .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_feeff ,_cafe );};};if _ffdd {_dfdc ,_fddgd :=_abca [0],_abca [len (_abca )-1];_afgd ,_gdgfg :=_gceff [0],_gceff [len (_gceff )-1];if !(_bddf (_dfdc ._cbag -_afgd ._efgeb )&&_bddf (_fddgd ._cbag -_afgd ._bbge )&&_bddf (_afgd ._cbag -_dfdc ._bbge )&&_bddf (_gdgfg ._cbag -_dfdc ._efgeb )){if _aebg {_ac .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_dfdc ,_fddgd ,_afgd ,_gdgfg );
};return nil ,false ;};}else {if !_abca .aligned (){if _bdeff {_ac .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_abca ));
};return nil ,false ;};if !_gceff .aligned (){if _aebg {_ac .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_gceff ));
};return nil ,false ;};};_cbefd :=append (_abca ,_gceff ...);return _cbefd ,true ;};type rectRuling struct{_eegf rulingKind ;_fadf markKind ;_be .Color ;_fg .PdfRectangle ;};func _ffea (_faeeg ,_dccgb _dca .Point )bool {return _faeeg .X ==_dccgb .X &&_faeeg .Y ==_dccgb .Y };
func _bcdd (_bbdc ,_gbca _fg .PdfRectangle )(_fg .PdfRectangle ,bool ){if !_gege (_bbdc ,_gbca ){return _fg .PdfRectangle {},false ;};return _fg .PdfRectangle {Llx :_dc .Max (_bbdc .Llx ,_gbca .Llx ),Urx :_dc .Min (_bbdc .Urx ,_gbca .Urx ),Lly :_dc .Max (_bbdc .Lly ,_gbca .Lly ),Ury :_dc .Min (_bbdc .Ury ,_gbca .Ury )},true ;
};func _cccab (_eaaf _fg .PdfRectangle )*ruling {return &ruling {_gffa :_cbab ,_cbag :_eaaf .Llx ,_efgeb :_eaaf .Lly ,_bbge :_eaaf .Ury };};func _ffcd (_bgd []*wordBag )[]*wordBag {if len (_bgd )<=1{return _bgd ;};if _fccf {_ac .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");
};_ab .Slice (_bgd ,func (_cefb ,_cbce int )bool {_deef ,_aagd :=_bgd [_cefb ],_bgd [_cbce ];_aaac :=_deef .Width ()*_deef .Height ();_eaed :=_aagd .Width ()*_aagd .Height ();if _aaac !=_eaed {return _aaac > _eaed ;};if _deef .Height ()!=_aagd .Height (){return _deef .Height ()> _aagd .Height ();
};return _cefb < _cbce ;});var _cbegd []*wordBag ;_ddbd :=make (intSet );for _dceec :=0;_dceec < len (_bgd );_dceec ++{if _ddbd .has (_dceec ){continue ;};_degef :=_bgd [_dceec ];for _baac :=_dceec +1;_baac < len (_bgd );_baac ++{if _ddbd .has (_dceec ){continue ;
};_fabd :=_bgd [_baac ];_fbfd :=_degef .PdfRectangle ;_fbfd .Llx -=_degef ._fab ;if _dgfe (_fbfd ,_fabd .PdfRectangle ){_degef .absorb (_fabd );_ddbd .add (_baac );};};_cbegd =append (_cbegd ,_degef );};if len (_bgd )!=len (_cbegd )+len (_ddbd ){_ac .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_bgd ),len (_cbegd ),len (_ddbd ));
};return _cbegd ;};func _cdcbcc (_babd []*textMark ,_ageed _fg .PdfRectangle )[]*textWord {var _cefgea []*textWord ;var _cfcbc *textWord ;if _agge {_ac .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_babd ));
};_aeceb :=func (){if _cfcbc !=nil {_bbbdg :=_cfcbc .computeText ();if !_cagg (_bbbdg ){_cfcbc ._fedgb =_bbbdg ;_cefgea =append (_cefgea ,_cfcbc );if _agge {_ac .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_cefgea )-1,_cfcbc .String ());
for _cgfda ,_aafe :=range _cfcbc ._daafd {_gde .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cgfda ,_aafe .String ());};};};_cfcbc =nil ;};};for _ ,_abcg :=range _babd {if _fefd &&_cfcbc !=nil &&len (_cfcbc ._daafd )> 0{_adda :=_cfcbc ._daafd [len (_cfcbc ._daafd )-1];
_ccfc ,_cfbb :=_cdcee (_abcg ._efgdc );_fabb ,_eafae :=_cdcee (_adda ._efgdc );if _cfbb &&!_eafae &&_adda .inDiacriticArea (_abcg ){_cfcbc .addDiacritic (_ccfc );continue ;};if _eafae &&!_cfbb &&_abcg .inDiacriticArea (_adda ){_cfcbc ._daafd =_cfcbc ._daafd [:len (_cfcbc ._daafd )-1];
_cfcbc .appendMark (_abcg ,_ageed );_cfcbc .addDiacritic (_fabb );continue ;};};_efbcb :=_cagg (_abcg ._efgdc );if _efbcb {_aeceb ();continue ;};if _cfcbc ==nil &&!_efbcb {_cfcbc =_bbfe ([]*textMark {_abcg },_ageed );continue ;};_aaeg :=_cfcbc ._adecc ;
_eegd :=_dc .Abs (_eeead (_ageed ,_abcg )-_cfcbc ._adgge )/_aaeg ;_abdf :=_debff (_abcg ,_cfcbc )/_aaeg ;if _abdf >=_ffca ||!(-_faaf <=_abdf &&_eegd <=_cded ){_aeceb ();_cfcbc =_bbfe ([]*textMark {_abcg },_ageed );continue ;};_cfcbc .appendMark (_abcg ,_ageed );
};_aeceb ();return _cefgea ;};func _dfa (_faffa *wordBag ,_bffa float64 ,_afdfe ,_dbbc rulingList )[]*wordBag {var _gged []*wordBag ;for _ ,_cgea :=range _faffa .depthIndexes (){_bebf :=false ;for !_faffa .empty (_cgea ){_gaed :=_faffa .firstReadingIndex (_cgea );
_fbea :=_faffa .firstWord (_gaed );_dgec :=_egfaa (_fbea ,_bffa ,_afdfe ,_dbbc );_faffa .removeWord (_fbea ,_gaed );if _abcbg {_ac .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_fbea .String ());
};for _adea :=true ;_adea ;_adea =_bebf {_bebf =false ;_fbagd :=_bgdf *_dgec ._fab ;_bagb :=_afdf *_dgec ._fab ;_cede :=_ggce *_dgec ._fab ;if _abcbg {_ac .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_dgec .minDepth (),_dgec .maxDepth (),_cede ,_bagb );
};if _faffa .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_dgec ,_gdaf (_cecg ,0),_dgec .minDepth ()-_cede ,_dgec .maxDepth ()+_cede ,_aebe ,false ,false )> 0{_bebf =true ;};if _faffa .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_dgec ,_gdaf (_cecg ,_bagb ),_dgec .minDepth (),_dgec .maxDepth (),_adca ,false ,false )> 0{_bebf =true ;
};if _bebf {continue ;};_fgbeb :=_faffa .scanBand ("",_dgec ,_gdaf (_bacd ,_fbagd ),_dgec .minDepth (),_dgec .maxDepth (),_ggfbd ,true ,false );if _fgbeb > 0{_dcae :=(_dgec .maxDepth ()-_dgec .minDepth ())/_dgec ._fab ;if (_fgbeb > 1&&float64 (_fgbeb )> 0.3*_dcae )||_fgbeb <=10{if _faffa .scanBand ("\u006f\u0074\u0068e\u0072",_dgec ,_gdaf (_bacd ,_fbagd ),_dgec .minDepth (),_dgec .maxDepth (),_ggfbd ,false ,true )> 0{_bebf =true ;
};};};};_gged =append (_gged ,_dgec );};};return _gged ;};type textWord struct{_fg .PdfRectangle ;_adgge float64 ;_fedgb string ;_daafd []*textMark ;_adecc float64 ;_eadcb bool ;};func (_faeg *shapesState )fill (_agcg *[]pathSection ){_gabg :=pathSection {_ged :_faeg ._efb ,Color :_faeg ._cfda .getFillColor ()};
*_agcg =append (*_agcg ,_gabg );if _aebg {_ggd :=_gabg .bbox ();_gde .Printf ("\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a",len (*_agcg ),len (_gabg ._ged ),_faeg ,_gabg .Color ,_ggd ,_ggd .Width (),_ggd .Height ());
if _aab {for _debf ,_agdd :=range _gabg ._ged {_gde .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_debf ,_agdd );if _debf ==10{break ;};};};};};func _agcd (_fgbef string ,_ebda []rulingList ){_ac .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_ebda ),_fgbef );
for _eecbe ,_faafb :=range _ebda {_gde .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_eecbe ,_faafb .String ());};};func (_bcadg rulingList )aligned ()bool {if len (_bcadg )< 2{return false ;};_dccc :=make (map[*ruling ]int );_dccc [_bcadg [0]]=0;
for _ ,_dgaea :=range _bcadg [1:]{_bbee :=false ;for _fdea :=range _dccc {if _dgaea .gridIntersecting (_fdea ){_dccc [_fdea ]++;_bbee =true ;break ;};};if !_bbee {_dccc [_dgaea ]=0;};};_gaee :=0;for _ ,_bafga :=range _dccc {if _bafga ==0{_gaee ++;};};_dedeef :=float64 (_gaee )/float64 (len (_bcadg ));
_bfed :=_dedeef <=1.0-_egab ;if _aebg {_ac .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_bfed ,_dedeef ,_gaee ,len (_bcadg ),_bcadg .String ());
};return _bfed ;};type lineRuling struct{_gdggb rulingKind ;_daab markKind ;_be .Color ;_cged ,_ggeaa _dca .Point ;};func (_cfd *TextMarkArray )exists (_gbg TextMark )bool {for _ ,_cgfb :=range _cfd .Elements (){if _b .DeepEqual (_gbg .DirectObject ,_cgfb .DirectObject )&&_b .DeepEqual (_gbg .BBox ,_cgfb .BBox )&&_cgfb .Text ==_gbg .Text {return true ;
};};return false ;};
2022-06-27 19:58:38 +00:00
2023-11-11 11:29:03 +00:00
// String returns a description of `tm`.
func (_bgee *textMark )String ()string {return _gde .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_bgee .PdfRectangle ,_bgee ._cggc ,_bgee ._efgdc );};func _bb (_bbc []Font ,_cfa string )bool {for _ ,_fde :=range _bbc {if _fde .FontName ==_cfa {return true ;
};};return false ;};func (_eadd *textObject )reset (){_eadd ._acbc =_dca .IdentityMatrix ();_eadd ._abcf =_dca .IdentityMatrix ();_eadd ._ffc =nil ;};func _bgdg (_gbaaf []*textMark ,_ecbd _fg .PdfRectangle ,_fgfc rulingList ,_dbba []gridTiling ,_bgfg bool )paraList {_ac .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_gbaaf ),_ecbd );
if len (_gbaaf )==0{return nil ;};_afcc :=_cdcbcc (_gbaaf ,_ecbd );if len (_afcc )==0{return nil ;};_fgfc .log ("\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065");_edeef ,_bcef :=_fgfc .vertsHorzs ();_cdff :=_ddfff (_afcc ,_ecbd .Ury ,_edeef ,_bcef );
_edfd :=_dfa (_cdff ,_ecbd .Ury ,_edeef ,_bcef );_edfd =_ffcd (_edfd );_dbbf :=make (paraList ,0,len (_edfd ));for _ ,_fgab :=range _edfd {_gfcg :=_fgab .arrangeText ();if _gfcg !=nil {_dbbf =append (_dbbf ,_gfcg );};};if !_bgfg &&len (_dbbf )>=_ffa {_dbbf =_dbbf .extractTables (_dbba );
};_dbbf .sortReadingOrder ();if !_bgfg {_dbbf .sortTopoOrder ();};_dbbf .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _dbbf ;};
2022-06-27 19:58:38 +00:00
2023-11-11 11:29:03 +00:00
// String returns a human readable description of `path`.
func (_fgb *subpath )String ()string {_bafc :=_fgb ._gdgd ;_cbed :=len (_bafc );if _cbed <=5{return _gde .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_cbed ,_bafc );};return _gde .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_cbed ,_bafc [0],_bafc [1],_bafc [_cbed -1]);
};func (_bcead *subpath )makeRectRuling (_bdad _be .Color )(*ruling ,bool ){if _cadc {_ac .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_bcead );};_gfggb :=_bcead ._gdgd [:4];
_fccb :=make (map[int ]rulingKind ,len (_gfggb ));for _dedb ,_dbad :=range _gfggb {_gbdde :=_bcead ._gdgd [(_dedb +1)%4];_fccb [_dedb ]=_agfe (_dbad ,_gbdde );if _cadc {_gde .Printf ("\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066",_dedb ,_fccb [_dedb ],_dbad ,_gbdde );
};};if _cadc {_gde .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_fccb );};var _affda ,_edeg []int ;for _fbdb ,_dcddea :=range _fccb {switch _dcddea {case _faccd :_edeg =append (_edeg ,_fbdb );case _cbab :_affda =append (_affda ,_fbdb );
};};if _cadc {_gde .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_edeg ),_edeg );_gde .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_affda ),_affda );
};_aabf :=(len (_edeg )==2&&len (_affda )==2)||(len (_edeg )==2&&len (_affda )==0&&_gfed (_gfggb [_edeg [0]],_gfggb [_edeg [1]]))||(len (_affda )==2&&len (_edeg )==0&&_caba (_gfggb [_affda [0]],_gfggb [_affda [1]]));if _cadc {_gde .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_edeg ),len (_affda ),_aabf );
};if !_aabf {if _cadc {_ac .Log .Error ("\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v",_bcead );_gde .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_edeg ),len (_affda ),_aabf );
};return &ruling {},false ;};if len (_affda )==0{for _gdggg ,_gfac :=range _fccb {if _gfac !=_faccd {_affda =append (_affda ,_gdggg );};};};if len (_edeg )==0{for _agag ,_bcfe :=range _fccb {if _bcfe !=_cbab {_edeg =append (_edeg ,_agag );};};};if _cadc {_ac .Log .Info ("\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_edeg ),len (_affda ),len (_gfggb ),_edeg ,_affda ,_gfggb );
};var _gfbed ,_ffdbdc ,_fbec ,_gfcc _dca .Point ;if _gfggb [_edeg [0]].Y > _gfggb [_edeg [1]].Y {_fbec ,_gfcc =_gfggb [_edeg [0]],_gfggb [_edeg [1]];}else {_fbec ,_gfcc =_gfggb [_edeg [1]],_gfggb [_edeg [0]];};if _gfggb [_affda [0]].X > _gfggb [_affda [1]].X {_gfbed ,_ffdbdc =_gfggb [_affda [0]],_gfggb [_affda [1]];
}else {_gfbed ,_ffdbdc =_gfggb [_affda [1]],_gfggb [_affda [0]];};_eade :=_fg .PdfRectangle {Llx :_gfbed .X ,Urx :_ffdbdc .X ,Lly :_gfcc .Y ,Ury :_fbec .Y };if _eade .Llx > _eade .Urx {_eade .Llx ,_eade .Urx =_eade .Urx ,_eade .Llx ;};if _eade .Lly > _eade .Ury {_eade .Lly ,_eade .Ury =_eade .Ury ,_eade .Lly ;
};_cffe :=rectRuling {PdfRectangle :_eade ,_eegf :_fgae (_eade ),Color :_bdad };if _cffe ._eegf ==_ccfb {if _cadc {_ac .Log .Error ("\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c");
};return nil ,false ;};_add ,_gdbaa :=_cffe .asRuling ();if !_gdbaa {if _cadc {_ac .Log .Error ("\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg");};return nil ,false ;};if _aebg {_gde .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_add .String ());
};return _add ,true ;};func (_dccec paraList )findTableGrid (_gffc gridTiling )(*textTable ,map[*textPara ]struct{}){_bfegf :=len (_gffc ._abeb );_aedb :=len (_gffc ._cegbg );_bcec :=textTable {_aefef :true ,_acddc :_bfegf ,_gebeeb :_aedb ,_cfgbb :make (map[uint64 ]*textPara ,_bfegf *_aedb ),_edbe :make (map[uint64 ]compositeCell ,_bfegf *_aedb )};
_bcec .PdfRectangle =_gffc .PdfRectangle ;_fdbg :=make (map[*textPara ]struct{});_gbcc :=int ((1.0-_eabd )*float64 (_bfegf *_aedb ));_gaaf :=0;if _gbde {_ac .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_bfegf ,_aedb );
};for _bgggd ,_fdac :=range _gffc ._cegbg {_bbbf ,_bgabd :=_gffc ._faeca [_fdac ];if !_bgabd {continue ;};for _daaad ,_bdgb :=range _gffc ._abeb {_bgbcd ,_ebgb :=_bbbf [_bdgb ];if !_ebgb {continue ;};_acceb :=_dccec .inTile (_bgbcd );if len (_acceb )==0{_gaaf ++;
if _gaaf > _gbcc {if _gbde {_ac .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_gaaf );};return nil ,nil ;};}else {_bcec .putComposite (_daaad ,_bgggd ,_acceb ,_bgbcd .PdfRectangle );for _ ,_fbef :=range _acceb {_fdbg [_fbef ]=struct{}{};
};};};};_bccfb :=0;for _gfbdg :=0;_gfbdg < _bfegf ;_gfbdg ++{_gacd :=_bcec .get (_gfbdg ,0);if _gacd ==nil ||!_gacd ._bfge {_bccfb ++;};};if _bccfb ==0{if _gbde {_ac .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};return nil ,nil ;
};_aaee :=_bcec .reduceTiling (_gffc ,_ddba );_aaee =_aaee .subdivide ();return _aaee ,_fdbg ;};
2022-06-27 19:58:38 +00:00
2023-11-11 11:29:03 +00:00
// String returns a description of `w`.
func (_gedbg *textWord )String ()string {return _gde .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_gedbg ._adgge ,_gedbg .PdfRectangle ,_gedbg ._adecc ,_gedbg ._fedgb );
};func (_cdbbd paraList )llyOrdering ()[]int {_bgcf :=make ([]int ,len (_cdbbd ));for _daag :=range _cdbbd {_bgcf [_daag ]=_daag ;};_ab .SliceStable (_bgcf ,func (_fedb ,_affd int )bool {_gfbge ,_gbeef :=_bgcf [_fedb ],_bgcf [_affd ];return _cdbbd [_gfbge ].Lly < _cdbbd [_gbeef ].Lly ;
});return _bgcf ;};func (_ccbff rulingList )merge ()*ruling {_edda :=_ccbff [0]._cbag ;_dacgd :=_ccbff [0]._efgeb ;_ddad :=_ccbff [0]._bbge ;for _ ,_cebda :=range _ccbff [1:]{_edda +=_cebda ._cbag ;if _cebda ._efgeb < _dacgd {_dacgd =_cebda ._efgeb ;};
if _cebda ._bbge > _ddad {_ddad =_cebda ._bbge ;};};_aadc :=&ruling {_gffa :_ccbff [0]._gffa ,_adaa :_ccbff [0]._adaa ,Color :_ccbff [0].Color ,_cbag :_edda /float64 (len (_ccbff )),_efgeb :_dacgd ,_bbge :_ddad };if _bdeff {_ac .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_ccbff ),_aadc );
for _bdeeee ,_gffd :=range _ccbff {_gde .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bdeeee ,_gffd );};};return _aadc ;};func (_bff *textObject )setHorizScaling (_acg float64 ){if _bff ==nil {return ;};_bff ._abcb ._gfad =_acg ;};func (_daa TextTable )getCellInfo (_dcge TextMark )[][]int {for _fef ,_cgac :=range _daa .Cells {for _dea ,_ddag :=range _cgac {_geae :=&_ddag .Marks ;
if _geae .exists (_dcge ){return [][]int {{_fef },{_dea }};};};};return nil ;};func (_dcadg *textLine )endsInHyphen ()bool {_fggge :=_dcadg ._fgbe [len (_dcadg ._fgbe )-1];_bbbge :=_fggge ._fedgb ;_cdcf ,_ddgfe :=_f .DecodeLastRuneInString (_bbbge );if _ddgfe <=0||!_fc .Is (_fc .Hyphen ,_cdcf ){return false ;
};if _fggge ._eadcb &&_dcdb (_bbbge ){return true ;};return _dcdb (_dcadg .text ());};func (_gba *imageExtractContext )extractXObjectImage (_edc *_dce .PdfObjectName ,_fae _dcg .GraphicsState ,_gbd *_fg .PdfPageResources )error {_bcc ,_ :=_gbd .GetXObjectByName (*_edc );
if _bcc ==nil {return nil ;};_gga ,_bed :=_gba ._fba [_bcc ];if !_bed {_cfe ,_aca :=_gbd .GetXObjectImageByName (*_edc );if _aca !=nil {return _aca ;};if _cfe ==nil {return nil ;};_bdf ,_aca :=_cfe .ToImage ();if _aca !=nil {return _aca ;};var _gegb _e .Image ;
if _cfe .Mask !=nil {if _gegb ,_aca =_decf (_cfe .Mask ,_be .Opaque );_aca !=nil {_ac .Log .Debug ("\u0057\u0041\u0052\u004e\u003a \u0063\u006f\u0075\u006c\u0064 \u006eo\u0074\u0020\u0067\u0065\u0074\u0020\u0065\u0078\u0070\u006c\u0069\u0063\u0069\u0074\u0020\u0069\u006d\u0061\u0067e\u0020\u006d\u0061\u0073\u006b\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006d\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e");
};}else if _cfe .SMask !=nil {_gegb ,_aca =_eafda (_cfe .SMask ,_be .Opaque );if _aca !=nil {_ac .Log .Debug ("W\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0067\u0065\u0074\u0020\u0073\u006f\u0066\u0074\u0020\u0069\u006da\u0067e\u0020\u006d\u0061\u0073k\u002e\u0020O\u0075\u0074\u0070\u0075\u0074\u0020\u006d\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063\u0074\u002e");
};};if _gegb !=nil {_afc ,_cdc :=_bdf .ToGoImage ();if _cdc !=nil {return _cdc ;};_afc =_faefe (_afc ,_gegb );switch _cfe .ColorSpace .String (){case "\u0044\u0065\u0076\u0069\u0063\u0065\u0047\u0072\u0061\u0079","\u0049n\u0064\u0065\u0078\u0065\u0064":_bdf ,_cdc =_fg .ImageHandling .NewGrayImageFromGoImage (_afc );
if _cdc !=nil {return _cdc ;};default:_bdf ,_cdc =_fg .ImageHandling .NewImageFromGoImage (_afc );if _cdc !=nil {return _cdc ;};};};_gga =&cachedImage {_gb :_bdf ,_cbg :_cfe .ColorSpace };_gba ._fba [_bcc ]=_gga ;};_bg :=_gga ._gb ;_bce :=_gga ._cbg ;_bad ,_cga :=_bce .ImageToRGB (*_bg );
if _cga !=nil {return _cga ;};_ac .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_fae .CTM .String ());_cbb :=ImageMark {Image :&_bad ,Width :_fae .CTM .ScalingFactorX (),Height :_fae .CTM .ScalingFactorY (),Angle :_fae .CTM .Angle ()};
_cbb .X ,_cbb .Y =_fae .CTM .Translation ();_gba ._dag =append (_gba ._dag ,_cbb );_gba ._fbd ++;return nil ;};type lists []*list ;func (_egea *shapesState )cubicTo (_bbcd ,_eaec ,_aefdg ,_egfa ,_decg ,_fcca float64 ){if _bdefa {_ac .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a");
};_egea .addPoint (_decg ,_fcca );};func (_cbcbdd *textTable )put (_fafg ,_fggdg int ,_aeacb *textPara ){_cbcbdd ._cfgbb [_bafcd (_fafg ,_fggdg )]=_aeacb ;};type textState struct{_aaec float64 ;_ggf float64 ;_gfad float64 ;_gcg float64 ;_dda float64 ;_bgcc RenderMode ;
_cgfc float64 ;_ecf *_fg .PdfFont ;_cefd _fg .PdfRectangle ;_ddcd int ;_dcad int ;};func _bbfe (_cafca []*textMark ,_caacb _fg .PdfRectangle )*textWord {_edeee :=_cafca [0].PdfRectangle ;_afbec :=_cafca [0]._cggc ;for _ ,_bfcaf :=range _cafca [1:]{_edeee =_ebge (_edeee ,_bfcaf .PdfRectangle );
if _bfcaf ._cggc > _afbec {_afbec =_bfcaf ._cggc ;};};return &textWord {PdfRectangle :_edeee ,_daafd :_cafca ,_adgge :_caacb .Ury -_edeee .Lly ,_adecc :_afbec };};var _efdd string ="\u0028\u003f\u0069\u0029\u005e\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028?\u003a\u0044\u007cM\u0029\u007c\u0044\u003f\u0043{\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028\u003f\u003a\u004c\u007c\u0043\u0029\u007cL\u003f\u0058\u007b\u0030\u002c\u0033}\u0029\u0028\u0049\u0028\u003f\u003a\u0056\u007c\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u005c\u0029\u007c\u005c\u002e\u0029\u007c\u005e\u005c\u0028\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028\u003f\u003aD\u007cM\u0029\u007c\u0044\u003f\u0043\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028?\u003a\u004c\u007c\u0043\u0029\u007c\u004c?\u0058\u007b0\u002c\u0033\u007d\u0029(\u0049\u0028\u003f\u003a\u0056|\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u005c\u0029";
func (_gafd paraList )readBefore (_caefd []int ,_gfeg ,_cgeg int )bool {_ebfd ,_eaeb :=_gafd [_gfeg ],_gafd [_cgeg ];if _edacg (_ebfd ,_eaeb )&&_ebfd .Lly > _eaeb .Lly {return true ;};if !(_ebfd ._ebadd .Urx < _eaeb ._ebadd .Llx ){return false ;};_daeac ,_dagf :=_ebfd .Lly ,_eaeb .Lly ;
if _daeac > _dagf {_dagf ,_daeac =_daeac ,_dagf ;};_effgb :=_dc .Max (_ebfd ._ebadd .Llx ,_eaeb ._ebadd .Llx );_ffcf :=_dc .Min (_ebfd ._ebadd .Urx ,_eaeb ._ebadd .Urx );_eege :=_gafd .llyRange (_caefd ,_daeac ,_dagf );for _ ,_bffdd :=range _eege {if _bffdd ==_gfeg ||_bffdd ==_cgeg {continue ;
};_bdfb :=_gafd [_bffdd ];if _bdfb ._ebadd .Llx <=_ffcf &&_effgb <=_bdfb ._ebadd .Urx {return false ;};};return true ;};
2022-06-27 19:58:38 +00:00
2023-11-11 11:29:03 +00:00
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
func (_faga PageText )ToText ()string {return _faga .Text ()};func _cced (_dcage ,_dafca _fg .PdfRectangle )bool {return _dcage .Lly <=_dafca .Ury &&_dafca .Lly <=_dcage .Ury ;};
2022-06-27 19:58:38 +00:00
2023-11-11 11:29:03 +00:00
// String returns a description of `b`.
func (_ffcc *wordBag )String ()string {var _cefa []string ;for _ ,_gac :=range _ffcc .depthIndexes (){_fdf :=_ffcc ._aac [_gac ];for _ ,_fadgg :=range _fdf {_cefa =append (_cefa ,_fadgg ._fedgb );};};return _gde .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_ffcc .PdfRectangle ,_ffcc ._fab ,len (_cefa ),_cefa );
2023-10-07 13:58:01 +00:00
};
2022-07-13 21:28:43 +00:00
2023-11-11 11:29:03 +00:00
// Text returns the text content of the `bulletLists`.
func (_cgdd *lists )Text ()string {_bcad :=&_df .Builder {};for _ ,_cdfb :=range *_cgdd {_defee :=_cdfb .Text ();_bcad .WriteString (_defee );};return _bcad .String ();};func (_bdecg *PageText )getParagraphs ()paraList {var _ccf rulingList ;if _efbd {_abb :=_fdgdc (_bdecg ._ffge );
_ccf =append (_ccf ,_abb ...);};if _dfcb {_dadc :=_cgabd (_bdecg ._cedf );_ccf =append (_ccf ,_dadc ...);};_ccf ,_cfeg :=_ccf .toTilings ();var _cabe paraList ;_aeg :=len (_bdecg ._fbga );for _gfcb :=0;_gfcb < 360&&_aeg > 0;_gfcb +=90{_dge :=make ([]*textMark ,0,len (_bdecg ._fbga )-_aeg );
for _ ,_fedc :=range _bdecg ._fbga {if _fedc ._aaad ==_gfcb {_dge =append (_dge ,_fedc );};};if len (_dge )> 0{_gbef :=_bgdg (_dge ,_bdecg ._fgag ,_ccf ,_cfeg ,_bdecg ._dbc ._dcc );_cabe =append (_cabe ,_gbef ...);_aeg -=len (_dge );};};return _cabe ;};
func (_bddda compositeCell )hasLines (_ggeaf []*textLine )bool {for _abcd ,_ggdb :=range _ggeaf {_aaada :=_gege (_bddda .PdfRectangle ,_ggdb .PdfRectangle );if _eadb {_gde .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a",_aaada ,_abcd ,len (_ggeaf ));
_gde .Printf ("\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a",_bddda );_gde .Printf ("\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a",_ggdb );};if _aaada {return true ;
};};return false ;};type textMark struct{_fg .PdfRectangle ;_aaad int ;_efgdc string ;_dcdf string ;_abef *_fg .PdfFont ;_cggc float64 ;_accea float64 ;_bcbeg _dca .Matrix ;_caeg _dca .Point ;_fbae _fg .PdfRectangle ;_cccae _be .Color ;_dcgea _be .Color ;
_dafb _dce .PdfObject ;_facc []string ;Tw float64 ;Th float64 ;_fbcf int ;_ccfa int ;};func (_fggg *textObject )setFont (_eee string ,_aedf float64 )error {if _fggg ==nil {return nil ;};_fggg ._abcb ._dda =_aedf ;_eff ,_cff :=_fggg .getFont (_eee );if _cff !=nil {return _cff ;
};_fggg ._abcb ._ecf =_eff ;return nil ;};func (_fageg rulingList )secMinMax ()(float64 ,float64 ){_ecdac ,_cdad :=_fageg [0]._efgeb ,_fageg [0]._bbge ;for _ ,_dgfeg :=range _fageg [1:]{if _dgfeg ._efgeb < _ecdac {_ecdac =_dgfeg ._efgeb ;};if _dgfeg ._bbge > _cdad {_cdad =_dgfeg ._bbge ;
};};return _ecdac ,_cdad ;};func _bdca (_bdfe *textLine ,_afeg []*textLine ,_fefb []float64 ,_dgdc ,_bfff float64 )[]*textLine {_dgfg :=[]*textLine {};for _ ,_eeag :=range _afeg {if _eeag ._bfcg >=_dgdc {if _bfff !=-1&&_eeag ._bfcg < _bfff {if _eeag .text ()!=_bdfe .text (){if _dc .Round (_eeag .Llx )< _dc .Round (_bdfe .Llx ){break ;
};_dgfg =append (_dgfg ,_eeag );};}else if _bfff ==-1{if _eeag ._bfcg ==_bdfe ._bfcg {if _eeag .text ()!=_bdfe .text (){_dgfg =append (_dgfg ,_eeag );};continue ;};_ebbad :=_egbac (_bdfe ,_afeg ,_fefb );if _ebbad !=-1&&_eeag ._bfcg <=_ebbad {_dgfg =append (_dgfg ,_eeag );
};};};};return _dgfg ;};type gridTile struct{_fg .PdfRectangle ;_cbbgc ,_cbge ,_beacf ,_dafe bool ;};func _ebfaf (_eebg float64 )bool {return _dc .Abs (_eebg )< _bafg };func (_deb *textObject )setTextMatrix (_dege []float64 ){if len (_dege )!=6{_ac .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_dege ));
return ;};_bdef ,_ggb ,_geba ,_gdb ,_gff ,_ccca :=_dege [0],_dege [1],_dege [2],_dege [3],_dege [4],_dege [5];_deb ._acbc =_dca .NewMatrix (_bdef ,_ggb ,_geba ,_gdb ,_gff ,_ccca );_deb ._abcf =_deb ._acbc ;};func _dcgaf (_cefge map[int ][]float64 ){if len (_cefge )<=1{return ;
};_eced :=_dfdf (_cefge );if _eadb {_ac .Log .Info ("\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076",_eced );};var _dadcb ,_cgaad int ;for _dadcb ,_cgaad =range _eced {if _cefge [_cgaad ]!=nil {break ;};};for _ebdga ,_bfgfe :=range _eced [_dadcb :]{_eafg :=_cefge [_bfgfe ];
if _eafg ==nil {continue ;};if _eadb {_gde .Printf ("\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a",_dadcb +_ebdga ,_cgaad ,_bfgfe );};_cgabde :=_cefge [_bfgfe ];if _cgabde [len (_cgabde )-1]> _eafg [0]{_cgabde [len (_cgabde )-1]=_eafg [0];
_cefge [_cgaad ]=_cgabde ;};_cgaad =_bfgfe ;};};func (_deaf *textTable )getComposite (_ddcc ,_gbff int )(paraList ,_fg .PdfRectangle ){_ggfba ,_ggec :=_deaf ._edbe [_bafcd (_ddcc ,_gbff )];if _eadb {_gde .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a",_ddcc ,_gbff ,_ggfba .String ());
};if !_ggec {return nil ,_fg .PdfRectangle {};};return _ggfba .parasBBox ();};func _bcdb (_cbeb *list ,_dccd *_df .Builder ,_fcag *string ){_acafb :=_adgb (_cbeb ,_fcag );_dccd .WriteString (_acafb );for _ ,_aegf :=range _cbeb ._edge {_caaa :=*_fcag +"\u0020\u0020\u0020";
_bcdb (_aegf ,_dccd ,&_caaa );};};func _efgdb (_aaecb map[int ]intSet )[]int {_cdga :=make ([]int ,0,len (_aaecb ));for _ccgeg :=range _aaecb {_cdga =append (_cdga ,_ccgeg );};_ab .Ints (_cdga );return _cdga ;};func (_gfeb paraList )eventNeighbours (_ddedd []event )map[*textPara ][]int {_ab .Slice (_ddedd ,func (_dagd ,_cgacb int )bool {_aaeca ,_efdde :=_ddedd [_dagd ],_ddedd [_cgacb ];
_gcdfb ,_eaad :=_aaeca ._eebcc ,_efdde ._eebcc ;if _gcdfb !=_eaad {return _gcdfb < _eaad ;};if _aaeca ._dacd !=_efdde ._dacd {return _aaeca ._dacd ;};return _dagd < _cgacb ;});_ffgb :=make (map[int ]intSet );_egdbg :=make (intSet );for _ ,_edbgd :=range _ddedd {if _edbgd ._dacd {_ffgb [_edbgd ._dcgg ]=make (intSet );
for _dfebe :=range _egdbg {if _dfebe !=_edbgd ._dcgg {_ffgb [_edbgd ._dcgg ].add (_dfebe );_ffgb [_dfebe ].add (_edbgd ._dcgg );};};_egdbg .add (_edbgd ._dcgg );}else {_egdbg .del (_edbgd ._dcgg );};};_dffgc :=map[*textPara ][]int {};for _fabda ,_ebbb :=range _ffgb {_ccgd :=_gfeb [_fabda ];
if len (_ebbb )==0{_dffgc [_ccgd ]=nil ;continue ;};_eaef :=make ([]int ,len (_ebbb ));_bece :=0;for _ffeb :=range _ebbb {_eaef [_bece ]=_ffeb ;_bece ++;};_dffgc [_ccgd ]=_eaef ;};return _dffgc ;};func (_beee paraList )inTile (_fbbb gridTile )paraList {var _efgff paraList ;
for _ ,_bfgg :=range _beee {if _fbbb .contains (_bfgg .PdfRectangle ){_efgff =append (_efgff ,_bfgg );};};if _eadb {_gde .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_fbbb ,len (_efgff ));
for _gdebe ,_cfba :=range _efgff {_gde .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gdebe ,_cfba );};_gde .Println ("");};return _efgff ;};
2023-08-03 17:30:04 +00:00
2023-11-11 11:29:03 +00:00
// PageText represents the layout of text on a device page.
type PageText struct{_fbga []*textMark ;_edee string ;_fdaf []TextMark ;_agdc []TextTable ;_fgag _fg .PdfRectangle ;_ffge []pathSection ;_cedf []pathSection ;_cabf *_dce .PdfObject ;_gefc _dce .PdfObject ;_eaee *_dcg .ContentStreamOperations ;_dbc PageTextOptions ;
};func _bcda (_dggg []rulingList )(rulingList ,rulingList ){var _dabg rulingList ;for _ ,_cbcbd :=range _dggg {_dabg =append (_dabg ,_cbcbd ...);};return _dabg .vertsHorzs ();};type rulingList []*ruling ;type pathSection struct{_ged []*subpath ;_be .Color ;
};func (_fgeg *textWord )appendMark (_gfab *textMark ,_aadad _fg .PdfRectangle ){_fgeg ._daafd =append (_fgeg ._daafd ,_gfab );_fgeg .PdfRectangle =_ebge (_fgeg .PdfRectangle ,_gfab .PdfRectangle );if _gfab ._cggc > _fgeg ._adecc {_fgeg ._adecc =_gfab ._cggc ;
};_fgeg ._adgge =_aadad .Ury -_fgeg .PdfRectangle .Lly ;};func _gege (_afge ,_dgae _fg .PdfRectangle )bool {return _fcgc (_afge ,_dgae )&&_cced (_afge ,_dgae )};func _dcfd (_deceg *_fg .Image ,_cdbd _be .Color )_e .Image {_aabgf ,_cffeg :=int (_deceg .Width ),int (_deceg .Height );
_egcde :=_e .NewRGBA (_e .Rect (0,0,_aabgf ,_cffeg ));for _ddga :=0;_ddga < _cffeg ;_ddga ++{for _eafca :=0;_eafca < _aabgf ;_eafca ++{_egfc ,_bffab :=_deceg .ColorAt (_eafca ,_ddga );if _bffab !=nil {_ac .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063o\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0072\u0065\u0074\u0072\u0069\u0065v\u0065 \u0069\u006d\u0061\u0067\u0065\u0020m\u0061\u0073\u006b\u0020\u0076\u0061\u006cu\u0065\u0020\u0061\u0074\u0020\u0028\u0025\u0064\u002c\u0020\u0025\u0064\u0029\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006da\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063t\u002e",_eafca ,_ddga );
continue ;};_ffdbde ,_agbbc ,_bfdbe ,_ :=_egfc .RGBA ();var _eadca _be .Color ;if _ffdbde +_agbbc +_bfdbe ==0{_eadca =_cdbd ;}else {_eadca =_be .Transparent ;};_egcde .Set (_eafca ,_ddga ,_eadca );};};return _egcde ;};
2023-05-29 17:26:33 +00:00
2023-11-11 11:29:03 +00:00
// String returns a description of `l`.
func (_bffd *textLine )String ()string {return _gde .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_bffd ._bfcg ,_bffd .PdfRectangle ,_bffd ._ceacg ,_bffd .text ());
};
2023-01-08 22:34:27 +00:00
2023-11-11 11:29:03 +00:00
// String returns a description of `k`.
func (_abffa markKind )String ()string {_abefa ,_cagfca :=_fffdg [_abffa ];if !_cagfca {return _gde .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_abffa );};return _abefa ;};func (_aegdd *textTable )getRight ()paraList {_gfde :=make (paraList ,_aegdd ._gebeeb );
for _gfdef :=0;_gfdef < _aegdd ._gebeeb ;_gfdef ++{_aeaff :=_aegdd .get (_aegdd ._acddc -1,_gfdef )._abfec ;if _aeaff .taken (){return nil ;};_gfde [_gfdef ]=_aeaff ;};for _fcdd :=0;_fcdd < _aegdd ._gebeeb -1;_fcdd ++{if _gfde [_fcdd ]._fgbea !=_gfde [_fcdd +1]{return nil ;
};};return _gfde ;};type imageExtractContext struct{_dag []ImageMark ;_cfc int ;_fbd int ;_fbg int ;_fba map[*_dce .PdfObjectStream ]*cachedImage ;_ebac *ImageExtractOptions ;_defe bool ;};func _aebgd (_dfec []*textLine )[]*textLine {_gdfbe :=[]*textLine {};
for _ ,_aeaf :=range _dfec {_dcfc :=_aeaf .text ();_cgdc :=_cfec .Find ([]byte (_dcfc ));if _cgdc !=nil {_gdfbe =append (_gdfbe ,_aeaf );};};return _gdfbe ;};
2023-10-07 13:58:01 +00:00
2023-11-11 11:29:03 +00:00
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
func (_eag *Extractor )ExtractText ()(string ,error ){_fdd ,_ ,_ ,_eca :=_eag .ExtractTextWithStats ();return _fdd ,_eca ;};func (_cbfb lineRuling )asRuling ()(*ruling ,bool ){_efeb :=ruling {_gffa :_cbfb ._gdggb ,Color :_cbfb .Color ,_adaa :_efcd };switch _cbfb ._gdggb {case _cbab :_efeb ._cbag =_cbfb .xMean ();
_efeb ._efgeb =_dc .Min (_cbfb ._cged .Y ,_cbfb ._ggeaa .Y );_efeb ._bbge =_dc .Max (_cbfb ._cged .Y ,_cbfb ._ggeaa .Y );case _faccd :_efeb ._cbag =_cbfb .yMean ();_efeb ._efgeb =_dc .Min (_cbfb ._cged .X ,_cbfb ._ggeaa .X );_efeb ._bbge =_dc .Max (_cbfb ._cged .X ,_cbfb ._ggeaa .X );
default:_ac .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_cbfb ._gdggb );return nil ,false ;};return &_efeb ,true ;};func (_gdbae *structTreeRoot )buildList (_cagfc map[int ][]*textLine ,_cabbe _dce .PdfObject )[]*list {if _gdbae ==nil {_ac .Log .Debug ("\u0062\u0075\u0069\u006c\u0064\u004c\u0069\u0073\u0074\u003a\u0020t\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0069\u0073 \u006e\u0069\u006c");
return nil ;};var _egacb *structElement ;_gece :=[]structElement {};if len (_gdbae ._ccdea )==1{_gefb :=_gdbae ._ccdea [0]._affb ;if _gefb =="\u0044\u006f\u0063\u0075\u006d\u0065\u006e\u0074"||_gefb =="\u0053\u0065\u0063\u0074"||_gefb =="\u0050\u0061\u0072\u0074"||_gefb =="\u0044\u0069\u0076"||_gefb =="\u0041\u0072\u0074"{_egacb =&_gdbae ._ccdea [0];
};}else {_egacb =&structElement {_dfcd :_gdbae ._ccdea ,_affb :_gdbae ._cefga };};if _egacb ==nil {_ac .Log .Debug ("\u0062\u0075\u0069\u006cd\u004c\u0069\u0073\u0074\u003a\u0020\u0074\u006f\u0070\u0045l\u0065m\u0065\u006e\u0074\u0020\u0069\u0073\u0020n\u0069\u006c");
return nil ;};for _ ,_fdbd :=range _egacb ._dfcd {if _fdbd ._affb =="\u004c"{_gece =append (_gece ,_fdbd );}else if _fdbd ._affb =="\u0054\u0061\u0062l\u0065"{_ccba :=_fede (_fdbd );_gece =append (_gece ,_ccba ...);};};_ebba :=_abcc (_gece ,_cagfc ,_cabbe );
var _abeac []*list ;for _ ,_acbdd :=range _ebba {_ffdg :=_egddb (_acbdd );_abeac =append (_abeac ,_ffdg ...);};return _abeac ;};
2023-10-07 13:58:01 +00:00
2023-11-11 11:29:03 +00:00
// String returns a string describing `ma`.
func (_bdb TextMarkArray )String ()string {_bdae :=len (_bdb ._gcc );if _bdae ==0{return "\u0045\u004d\u0050T\u0059";};_dgaf :=_bdb ._gcc [0];_fcd :=_bdb ._gcc [_bdae -1];return _gde .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_bdae ,_dgaf ,_fcd );
};func (_fbdfc *textObject )setCharSpacing (_bgb float64 ){if _fbdfc ==nil {return ;};_fbdfc ._abcb ._aaec =_bgb ;if _adbb {_ac .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_bgb ,_fbdfc ._abcb .String ());
};};func _abcc (_ggfc []structElement ,_begec map[int ][]*textLine ,_acef _dce .PdfObject )[]*list {_ggdd :=[]*list {};for _ ,_cgcf :=range _ggfc {_ecce :=_cgcf ._dfcd ;_ecad :=int (_cgcf ._ecaf );_faff :=_cgcf ._affb ;_bfab :=[]*textLine {};_feeb :=[]*list {};
_gbbdc :=_cgcf ._ebad ;_bgff ,_cbgb :=(_gbbdc .(*_dce .PdfObjectReference ));if !_cbgb {_ac .Log .Debug ("\u0066\u0061\u0069l\u0065\u0064\u0020\u006f\u0074\u0020\u0063\u0061\u0073\u0074\u0020\u0074\u006f\u0020\u002a\u0063\u006f\u0072\u0065\u002e\u0050\u0064\u0066\u004f\u0062\u006a\u0065\u0063\u0074R\u0065\u0066\u0065\u0072\u0065\u006e\u0063\u0065");
};if _ecad !=-1&&_bgff !=nil {if _bcf ,_daeff :=_begec [_ecad ];_daeff {if _cfff ,_fdgd :=_acef .(*_dce .PdfIndirectObject );_fdgd {_aceb :=_cfff .PdfObjectReference ;if _b .DeepEqual (*_bgff ,_aceb ){_bfab =_bcf ;};};};};if _ecce !=nil {_feeb =_abcc (_ecce ,_begec ,_acef );
};_cfgb :=_bddc (_bfab ,_faff ,_feeb );_ggdd =append (_ggdd ,_cfgb );};return _ggdd ;};func (_aedc *textObject )setTextLeading (_fbgc float64 ){if _aedc ==nil {return ;};_aedc ._abcb ._gcg =_fbgc ;};func (_dgag *PageText )computeViews (){_dade :=_dgag .getParagraphs ();
_gadf :=new (_dfg .Buffer );_dade .writeText (_gadf );_dgag ._edee =_gadf .String ();_dgag ._fdaf =_dade .toTextMarks ();_dgag ._agdc =_dade .tables ();if _eadb {_ac .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_dgag ._agdc ));
};};func _caa (_ffee *_dcg .ContentStreamOperation )(float64 ,error ){if len (_ffee .Params )!=1{_acbb :=_a .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");_ac .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_ffee .Operand ,1,len (_ffee .Params ),_ffee .Params );
return 0.0,_acbb ;};return _dce .GetNumberAsFloat (_ffee .Params [0]);};func _bafcd (_cdcff ,_gefbd int )uint64 {return uint64 (_cdcff )*0x1000000+uint64 (_gefbd )};func (_cgfcc rulingList )mergePrimary ()float64 {_abcaa :=_cgfcc [0]._cbag ;for _ ,_agdab :=range _cgfcc [1:]{_abcaa +=_agdab ._cbag ;
};return _abcaa /float64 (len (_cgfcc ));};
2023-05-29 17:26:33 +00:00
2023-09-07 17:40:17 +00:00
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
2023-11-11 11:29:03 +00:00
type RenderMode int ;func (_ggfab lineRuling )xMean ()float64 {return 0.5*(_ggfab ._cged .X +_ggfab ._ggeaa .X )};func _daae (_edbg []TextMark ,_gfef *TextTable )[]TextMark {var _ecgf []TextMark ;for _ ,_egefc :=range _edbg {_egefc ._dgbe =true ;_egefc ._acbcf =_gfef ;
_ecgf =append (_ecgf ,_egefc );};return _ecgf ;};func (_agab *wordBag )removeDuplicates (){if _bfcf {_ac .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_agab .text ());};for _ ,_gbfc :=range _agab .depthIndexes (){if len (_agab ._aac [_gbfc ])==0{continue ;
};_agdg :=_agab ._aac [_gbfc ][0];_fbdc :=_bfdb *_agdg ._adecc ;_ebgcb :=_agdg ._adgge ;for _ ,_abae :=range _agab .depthBand (_ebgcb ,_ebgcb +_fbdc ){_aggf :=map[*textWord ]struct{}{};_gfbe :=_agab ._aac [_abae ];for _ ,_aabe :=range _gfbe {if _ ,_abff :=_aggf [_aabe ];
_abff {continue ;};for _ ,_eggb :=range _gfbe {if _ ,_dded :=_aggf [_eggb ];_dded {continue ;};if _eggb !=_aabe &&_eggb ._fedgb ==_aabe ._fedgb &&_dc .Abs (_eggb .Llx -_aabe .Llx )< _fbdc &&_dc .Abs (_eggb .Urx -_aabe .Urx )< _fbdc &&_dc .Abs (_eggb .Lly -_aabe .Lly )< _fbdc &&_dc .Abs (_eggb .Ury -_aabe .Ury )< _fbdc {_aggf [_eggb ]=struct{}{};
};};};if len (_aggf )> 0{_faadg :=0;for _ ,_gccb :=range _gfbe {if _ ,_dbaag :=_aggf [_gccb ];!_dbaag {_gfbe [_faadg ]=_gccb ;_faadg ++;};};_agab ._aac [_abae ]=_gfbe [:len (_gfbe )-len (_aggf )];if len (_agab ._aac [_abae ])==0{delete (_agab ._aac ,_abae );
};};};};};func _gced (_dgdff []*textWord ,_gbaac *textWord )[]*textWord {for _bbaf ,_adfc :=range _dgdff {if _adfc ==_gbaac {return _ebcgb (_dgdff ,_bbaf );};};_ac .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_gbaac );
return nil ;};func (_efg *Extractor )extractPageText (_cbc string ,_adb *_fg .PdfPageResources ,_ageg _dca .Matrix ,_cag int )(*PageText ,int ,int ,error ){_ac .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_cag );
_gdg :=&PageText {_fgag :_efg ._aa ,_cabf :_efg ._agb ,_gefc :_efg ._ce };_fdg :=_eab (_efg ._aa );var _dcdd stateStack ;_ddc :=_eded (_efg ,_adb ,_dcg .GraphicsState {},&_fdg ,&_dcdd );_afg :=shapesState {_dged :_ageg ,_gebd :_dca .IdentityMatrix (),_cfda :_ddc };
var _bgc bool ;_gee :=-1;if _cag > _deg {_edcf :=_a .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");_ac .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_cag ,_edcf );
return _gdg ,_fdg ._ddcd ,_fdg ._dcad ,_edcf ;};_bdc :=_dcg .NewContentStreamParser (_cbc );_efc ,_bae :=_bdc .Parse ();if _bae !=nil {_ac .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bae );
return _gdg ,_fdg ._ddcd ,_fdg ._dcad ,_bae ;};_gdg ._eaee =_efc ;_aed :=_dcg .NewContentStreamProcessor (*_efc );_aed .AddHandler (_dcg .HandlerConditionEnumAllOperands ,"",func (_gaa *_dcg .ContentStreamOperation ,_gdef _dcg .GraphicsState ,_adf *_fg .PdfPageResources )error {_ebc :=_gaa .Operand ;
if _faee {_ac .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_gaa );};switch _ebc {case "\u0071":if _bdefa {_ac .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_afg ._gebd );};_dcdd .push (&_fdg );case "\u0051":if !_dcdd .empty (){_fdg =*_dcdd .pop ();
};_afg ._gebd =_gdef .CTM ;if _bdefa {_ac .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_afg ._gebd );};case "\u0042\u0044\u0043":_gc ,_fda :=_dce .GetDict (_gaa .Params [1]);if !_fda {_ac .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0042D\u0043\u0020\u006f\u0070\u003d\u0025\u0073 \u0047\u0065\u0074\u0044\u0069\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_gaa );
return _bae ;};_egd :=_gc .Get ("\u004d\u0043\u0049\u0044");if _egd !=nil {_adg ,_ecg :=_dce .GetIntVal (_egd );if !_ecg {_ac .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0042\u0044C\u0020\u006f\u0070=\u0025\u0073\u002e\u0020\u0042\u0061\u0064\u0020\u006eum\u0065\u0072\u0069c\u0061\u006c \u006f\u0062\u006a\u0065\u0063\u0074.\u0020\u006f=\u0025\u0073",_gaa ,_egd );
};_gee =_adg ;}else {_gee =-1;};case "\u0045\u004d\u0043":_gee =-1;case "\u0042\u0054":if _bgc {_ac .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
_gdg ._fbga =append (_gdg ._fbga ,_ddc ._ffc ...);};_bgc =true ;_adgc :=_gdef ;_adgc .CTM =_ageg .Mult (_adgc .CTM );_ddc =_eded (_efg ,_adf ,_adgc ,&_fdg ,&_dcdd );_afg ._cfda =_ddc ;case "\u0045\u0054":if !_bgc {_ac .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
};_bgc =false ;_gdg ._fbga =append (_gdg ._fbga ,_ddc ._ffc ...);_ddc .reset ();case "\u0054\u002a":_ddc .nextLine ();case "\u0054\u0064":if _dff ,_dcb :=_ddc .checkOp (_gaa ,2,true );!_dff {_ac .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dcb );
return _dcb ;};_cgff ,_fee ,_dfba :=_ecgcg (_gaa .Params );if _dfba !=nil {return _dfba ;};_ddc .moveText (_cgff ,_fee );case "\u0054\u0044":if _acdg ,_dad :=_ddc .checkOp (_gaa ,2,true );!_acdg {_ac .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dad );
return _dad ;};_cfed ,_edaf ,_fga :=_ecgcg (_gaa .Params );if _fga !=nil {_ac .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fga );return _fga ;};_ddc .moveTextSetLeading (_cfed ,_edaf );case "\u0054\u006a":if _ceac ,_efe :=_ddc .checkOp (_gaa ,1,true );
!_ceac {_ac .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_gaa ,_efe );return _efe ;};_ecab :=_dce .TraceToDirectObject (_gaa .Params [0]);_dee ,_faa :=_dce .GetStringBytes (_ecab );
if !_faa {_ac .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_gaa );return _dce .ErrTypeError ;
};return _ddc .showText (_ecab ,_dee ,_gee );case "\u0054\u004a":if _fbe ,_bdeg :=_ddc .checkOp (_gaa ,1,true );!_fbe {_ac .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bdeg );return _bdeg ;};_cba ,_agbb :=_dce .GetArray (_gaa .Params [0]);
if !_agbb {_ac .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_gaa );return _bae ;};return _ddc .showTextAdjusted (_cba ,_gee );
case "\u0027":if _gbcf ,_gbad :=_ddc .checkOp (_gaa ,1,true );!_gbcf {_ac .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gbad );return _gbad ;};_ecbc :=_dce .TraceToDirectObject (_gaa .Params [0]);_bda ,_gea :=_dce .GetStringBytes (_ecbc );
if !_gea {_ac .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_gaa );return _dce .ErrTypeError ;};_ddc .nextLine ();return _ddc .showText (_ecbc ,_bda ,_gee );
case "\u0022":if _gfd ,_fad :=_ddc .checkOp (_gaa ,3,true );!_gfd {_ac .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fad );return _fad ;};_dafc ,_gfb ,_fgg :=_ecgcg (_gaa .Params [:2]);if _fgg !=nil {return _fgg ;
};_bab :=_dce .TraceToDirectObject (_gaa .Params [2]);_cfb ,_cbe :=_dce .GetStringBytes (_bab );if !_cbe {_ac .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_gaa );
return _dce .ErrTypeError ;};_ddc .setCharSpacing (_dafc );_ddc .setWordSpacing (_gfb );_ddc .nextLine ();return _ddc .showText (_bab ,_cfb ,_gee );case "\u0054\u004c":_gec ,_gag :=_caa (_gaa );if _gag !=nil {_ac .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gag );
return _gag ;};_ddc .setTextLeading (_gec );case "\u0054\u0063":_eac ,_aage :=_caa (_gaa );if _aage !=nil {_ac .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aage );return _aage ;};_ddc .setCharSpacing (_eac );
case "\u0054\u0066":if _beg ,_gef :=_ddc .checkOp (_gaa ,2,true );!_beg {_ac .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gef );return _gef ;};_gfba ,_cce :=_dce .GetNameVal (_gaa .Params [0]);if !_cce {_ac .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_gaa );
return _dce .ErrTypeError ;};_eed ,_gge :=_dce .GetNumberAsFloat (_gaa .Params [1]);if !_cce {_ac .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gaa ,_gge );
return _gge ;};_gge =_ddc .setFont (_gfba ,_eed );_ddc ._gbf =_gdf .Is (_gge ,_dce .ErrNotSupported );if _gge !=nil &&!_ddc ._gbf {return _gge ;};case "\u0054\u006d":if _defb ,_agg :=_ddc .checkOp (_gaa ,6,true );!_defb {_ac .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_agg );
return _agg ;};_ead ,_aba :=_dce .GetNumbersAsFloat (_gaa .Params );if _aba !=nil {_ac .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aba );return _aba ;};_ddc .setTextMatrix (_ead );case "\u0054\u0072":if _gcb ,_cgb :=_ddc .checkOp (_gaa ,1,true );
!_gcb {_ac .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cgb );return _cgb ;};_cbgg ,_fed :=_dce .GetIntVal (_gaa .Params [0]);if !_fed {_ac .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_gaa );
return _dce .ErrTypeError ;};_ddc .setTextRenderMode (_cbgg );case "\u0054\u0073":if _fbdf ,_fbf :=_ddc .checkOp (_gaa ,1,true );!_fbdf {_ac .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fbf );return _fbf ;
};_dbaa ,_ggc :=_dce .GetNumberAsFloat (_gaa .Params [0]);if _ggc !=nil {_ac .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ggc );return _ggc ;};_ddc .setTextRise (_dbaa );case "\u0054\u0077":if _adfa ,_bea :=_ddc .checkOp (_gaa ,1,true );
!_adfa {_ac .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bea );return _bea ;};_fge ,_gca :=_dce .GetNumberAsFloat (_gaa .Params [0]);if _gca !=nil {_ac .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gca );
return _gca ;};_ddc .setWordSpacing (_fge );case "\u0054\u007a":if _agegb ,_ebf :=_ddc .checkOp (_gaa ,1,true );!_agegb {_ac .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ebf );return _ebf ;};_dfe ,_fbgf :=_dce .GetNumberAsFloat (_gaa .Params [0]);
if _fbgf !=nil {_ac .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fbgf );return _fbgf ;};_ddc .setHorizScaling (_dfe );case "\u0063\u006d":_afg ._gebd =_gdef .CTM ;if _afg ._gebd .Singular (){_aedg :=_dca .IdentityMatrix ().Translate (_afg ._gebd .Translation ());
_ac .Log .Debug ("S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s",_afg ._gebd ,_aedg );_afg ._gebd =_aedg ;};if _bdefa {_ac .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_afg ._gebd );};case "\u006d":if len (_gaa .Params )!=2{_ac .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_efa );
return nil ;};_gfc ,_eace :=_dce .GetNumbersAsFloat (_gaa .Params );if _eace !=nil {return _eace ;};_afg .moveTo (_gfc [0],_gfc [1]);case "\u006c":if len (_gaa .Params )!=2{_ac .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_efa );
return nil ;};_gab ,_bbf :=_dce .GetNumbersAsFloat (_gaa .Params );if _bbf !=nil {return _bbf ;};_afg .lineTo (_gab [0],_gab [1]);case "\u0063":if len (_gaa .Params )!=6{return _efa ;};_bdcf ,_dcee :=_dce .GetNumbersAsFloat (_gaa .Params );if _dcee !=nil {return _dcee ;
};_ac .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_bdcf );_afg .cubicTo (_bdcf [0],_bdcf [1],_bdcf [2],_bdcf [3],_bdcf [4],_bdcf [5]);case "\u0076","\u0079":if len (_gaa .Params )!=4{return _efa ;
};_ecd ,_dbg :=_dce .GetNumbersAsFloat (_gaa .Params );if _dbg !=nil {return _dbg ;};_ac .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_ecd );_afg .quadraticTo (_ecd [0],_ecd [1],_ecd [2],_ecd [3]);
case "\u0068":_afg .closePath ();case "\u0072\u0065":if len (_gaa .Params )!=4{return _efa ;};_fbgfa ,_cgc :=_dce .GetNumbersAsFloat (_gaa .Params );if _cgc !=nil {return _cgc ;};_afg .drawRectangle (_fbgfa [0],_fbgfa [1],_fbgfa [2],_fbgfa [3]);_afg .closePath ();
case "\u0053":_afg .stroke (&_gdg ._ffge );_afg .clearPath ();case "\u0073":_afg .closePath ();_afg .stroke (&_gdg ._ffge );_afg .clearPath ();case "\u0046":_afg .fill (&_gdg ._cedf );_afg .clearPath ();case "\u0066","\u0066\u002a":_afg .closePath ();_afg .fill (&_gdg ._cedf );
_afg .clearPath ();case "\u0042","\u0042\u002a":_afg .fill (&_gdg ._cedf );_afg .stroke (&_gdg ._ffge );_afg .clearPath ();case "\u0062","\u0062\u002a":_afg .closePath ();_afg .fill (&_gdg ._cedf );_afg .stroke (&_gdg ._ffge );_afg .clearPath ();case "\u006e":_afg .clearPath ();
case "\u0044\u006f":if len (_gaa .Params )==0{_ac .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_gaa .Params );
return _dce .ErrRangeError ;};_cgaf ,_bba :=_dce .GetName (_gaa .Params [0]);if !_bba {_ac .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_gaa .Params [0]);
return _dce .ErrTypeError ;};_ ,_eef :=_adf .GetXObjectByName (*_cgaf );if _eef !=_fg .XObjectTypeForm {break ;};_ccce ,_bba :=_efg ._bca [_cgaf .String ()];if !_bba {_eagb ,_fded :=_adf .GetXObjectFormByName (*_cgaf );if _fded !=nil {_ac .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_fded );
return _fded ;};_afd ,_fded :=_eagb .GetContentStream ();if _fded !=nil {_ac .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_fded );return _fded ;};_gbbd :=_eagb .Resources ;if _gbbd ==nil {_gbbd =_adf ;};_dgb :=_gdef .CTM ;if _eccg ,_egg :=_dce .GetArray (_eagb .Matrix );
_egg {_ffeg ,_dacf :=_eccg .GetAsFloat64Slice ();if _dacf !=nil {return _dacf ;};if len (_ffeg )!=6{return _efa ;};_edad :=_dca .NewMatrix (_ffeg [0],_ffeg [1],_ffeg [2],_ffeg [3],_ffeg [4],_ffeg [5]);_dgb =_gdef .CTM .Mult (_edad );};_cca ,_bcaf ,_cedcc ,_fded :=_efg .extractPageText (string (_afd ),_gbbd ,_ageg .Mult (_dgb ),_cag +1);
if _fded !=nil {_ac .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_fded );return _fded ;};_ccce =textResult {*_cca ,_bcaf ,_cedcc };_efg ._bca [_cgaf .String ()]=_ccce ;};_afg ._gebd =_gdef .CTM ;if _bdefa {_ac .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_afg ._gebd );
};_gdg ._fbga =append (_gdg ._fbga ,_ccce ._ecca ._fbga ...);_gdg ._ffge =append (_gdg ._ffge ,_ccce ._ecca ._ffge ...);_gdg ._cedf =append (_gdg ._cedf ,_ccce ._ecca ._cedf ...);_fdg ._ddcd +=_ccce ._bbag ;_fdg ._dcad +=_ccce ._ffg ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_ddc ._dbf .ColorspaceNonStroking =_gdef .ColorspaceNonStroking ;
_ddc ._dbf .ColorNonStroking =_gdef .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_ddc ._dbf .ColorspaceStroking =_gdef .ColorspaceStroking ;_ddc ._dbf .ColorStroking =_gdef .ColorStroking ;
};return nil ;});_bae =_aed .Process (_adb );return _gdg ,_fdg ._ddcd ,_fdg ._dcad ,_bae ;};func (_dddb *textLine )text ()string {var _cdbb []string ;for _ ,_daef :=range _dddb ._fgbe {if _daef ._eadcb {_cdbb =append (_cdbb ,"\u0020");};_cdbb =append (_cdbb ,_daef ._fedgb );
};return _df .Join (_cdbb ,"");};
2023-10-07 13:58:01 +00:00
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
2023-11-11 11:29:03 +00:00
func (_gdd *TextMarkArray )BBox ()(_fg .PdfRectangle ,bool ){var _feec _fg .PdfRectangle ;_abbd :=false ;for _ ,_aggg :=range _gdd ._gcc {if _aggg .Meta ||_cagg (_aggg .Text ){continue ;};if _abbd {_feec =_ebge (_feec ,_aggg .BBox );}else {_feec =_aggg .BBox ;
_abbd =true ;};};return _feec ,_abbd ;};func (_cbabg rectRuling )checkWidth (_fcba ,_bebc float64 )(float64 ,bool ){_bbgf :=_bebc -_fcba ;_cgeacc :=_bbgf <=_gbgf ;return _bbgf ,_cgeacc ;};func _egddb (_gggde *list )[]*list {var _abbdf []*list ;for _ ,_begg :=range _gggde ._edge {switch _begg ._aeaa {case "\u004c\u0049":_dfeg :=_dgcc (_begg );
_eacce :=_egddb (_begg );_ddbdc :=_bddc (_dfeg ,"\u0062\u0075\u006c\u006c\u0065\u0074",_eacce );_feba :=_gadcf (_dfeg ,"");_ddbdc ._fbfaf =_feba ;_abbdf =append (_abbdf ,_ddbdc );case "\u004c\u0042\u006fd\u0079":return _egddb (_begg );case "\u004c":_gefg :=_egddb (_begg );
_abbdf =append (_abbdf ,_gefg ...);return _abbdf ;};};return _abbdf ;};func (_fcfc paraList )applyTables (_dfcca []*textTable )paraList {var _bbgc paraList ;for _ ,_fbgdb :=range _dfcca {_bbgc =append (_bbgc ,_fbgdb .newTablePara ());};for _ ,_egacbf :=range _fcfc {if _egacbf ._dcada {continue ;
};_bbgc =append (_bbgc ,_egacbf );};return _bbgc ;};func (_baae rulingList )log (_geec string ){if !_aebg {return ;};_ac .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_geec ,_baae .String ());
for _gcbe ,_ebdc :=range _baae {_gde .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gcbe ,_ebdc .String ());};};
2023-10-07 13:58:01 +00:00
2023-11-11 11:29:03 +00:00
// Append appends `mark` to the mark array.
func (_cgcd *TextMarkArray )Append (mark TextMark ){_cgcd ._gcc =append (_cgcd ._gcc ,mark )};func (_agf *subpath )add (_dgge ..._dca .Point ){_agf ._gdgd =append (_agf ._gdgd ,_dgge ...)};func _gfdg (_bdcfb *textLine )bool {_fdfa :=true ;_abad :=-1;for _ ,_eacg :=range _bdcfb ._fgbe {for _ ,_cee :=range _eacg ._daafd {_caga :=_cee ._fbcf ;
if _abad ==-1{_abad =_caga ;}else {if _abad !=_caga {_fdfa =false ;break ;};};};};return _fdfa ;};func _ddfff (_cdda []*textWord ,_aff float64 ,_eeea ,_bbcb rulingList )*wordBag {_dada :=_egfaa (_cdda [0],_aff ,_eeea ,_bbcb );for _ ,_edgbe :=range _cdda [1:]{_bbcdb :=_ebaf (_edgbe ._adgge );
_dada ._aac [_bbcdb ]=append (_dada ._aac [_bbcdb ],_edgbe );_dada .PdfRectangle =_ebge (_dada .PdfRectangle ,_edgbe .PdfRectangle );};_dada .sort ();return _dada ;};
2023-10-07 13:58:01 +00:00
2023-11-11 11:29:03 +00:00
// PageTextOptions holds various options available in extraction process.
type PageTextOptions struct{_dbcd bool ;_dcc bool ;};func (_abddc gridTile )complete ()bool {return _abddc .numBorders ()==4};var _bfaaa =_gd .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024");
const (_bafg =1.0e-6;_egac =1.0e-4;_edef =10;_ddgf =6;_dgfb =0.5;_ffca =0.12;_faaf =0.19;_cded =0.04;_gebab =0.04;_ggce =1.0;_aebe =0.04;_afdf =0.4;_adca =0.7;_bgdf =1.0;_ggfbd =0.1;_acbbe =1.4;_eedd =0.46;_aeeg =0.02;_bfdb =0.2;_abbba =0.5;_ggg =4;_eega =4.0;
_ffa =6;_eabd =0.3;_bdag =0.01;_aec =0.02;_fbff =2;_gaab =2;_bcccg =500;_daee =4.0;_ccbf =4.0;_edec =0.05;_cfca =0.1;_cebe =2.0;_gbgf =2.0;_gagd =1.5;_ddba =3.0;_egab =0.25;);func (_bbb *imageExtractContext )processOperand (_eae *_dcg .ContentStreamOperation ,_ecc _dcg .GraphicsState ,_dac *_fg .PdfPageResources )error {if _eae .Operand =="\u0042\u0049"&&len (_eae .Params )==1{_edb ,_ee :=_eae .Params [0].(*_dcg .ContentStreamInlineImage );
if !_ee {return nil ;};if _cgd ,_baf :=_dce .GetBoolVal (_edb .ImageMask );_baf {if _cgd &&!_bbb ._ebac .IncludeInlineStencilMasks {return nil ;};};return _bbb .extractInlineImage (_edb ,_ecc ,_dac );}else if _eae .Operand =="\u0044\u006f"&&len (_eae .Params )==1{_dba ,_acd :=_dce .GetName (_eae .Params [0]);
if !_acd {_ac .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _ge ;};_ ,_gbc :=_dac .GetXObjectByName (*_dba );switch _gbc {case _fg .XObjectTypeImage :return _bbb .extractXObjectImage (_dba ,_ecc ,_dac );case _fg .XObjectTypeForm :return _bbb .extractFormImages (_dba ,_ecc ,_dac );
};}else if _bbb ._defe &&(_eae .Operand =="\u0073\u0063\u006e"||_eae .Operand =="\u0053\u0043\u004e")&&len (_eae .Params )==1{_ede ,_bfd :=_dce .GetName (_eae .Params [0]);if !_bfd {_ac .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");
return _ge ;};_daf ,_bfd :=_dac .GetPatternByName (*_ede );if !_bfd {_ac .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0050\u0061\u0074\u0074\u0065\u0072n\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075\u006e\u0064");return nil ;};if _daf .IsTiling (){_aagf :=_daf .GetAsTilingPattern ();
_ded ,_acb :=_aagf .GetContentStream ();if _acb !=nil {return _acb ;};_acb =_bbb .extractContentStreamImages (string (_ded ),_aagf .Resources );if _acb !=nil {return _acb ;};};}else if (_eae .Operand =="\u0063\u0073"||_eae .Operand =="\u0043\u0053")&&len (_eae .Params )>=1{_bbb ._defe =_eae .Params [0].String ()=="\u0050a\u0074\u0074\u0065\u0072\u006e";
};return nil ;};func (_eeac *wordBag )scanBand (_eddd string ,_aegc *wordBag ,_accf func (_dadcd *wordBag ,_gecb *textWord )bool ,_cebc ,_bcafc ,_gddb float64 ,_fead ,_eaf bool )int {_ddbf :=_aegc ._fab ;var _eedg map[int ]map[*textWord ]struct{};if !_fead {_eedg =_eeac .makeRemovals ();
};_gedb :=_dgfb *_ddbf ;_cfeb :=0;for _ ,_geea :=range _eeac .depthBand (_cebc -_gedb ,_bcafc +_gedb ){if len (_eeac ._aac [_geea ])==0{continue ;};for _ ,_cabfa :=range _eeac ._aac [_geea ]{if !(_cebc -_gedb <=_cabfa ._adgge &&_cabfa ._adgge <=_bcafc +_gedb ){continue ;
};if !_accf (_aegc ,_cabfa ){continue ;};_gcab :=2.0*_dc .Abs (_cabfa ._adecc -_aegc ._fab )/(_cabfa ._adecc +_aegc ._fab );_gbaa :=_dc .Max (_cabfa ._adecc /_aegc ._fab ,_aegc ._fab /_cabfa ._adecc );_gbdf :=_dc .Min (_gcab ,_gbaa );if _gddb > 0&&_gbdf > _gddb {continue ;
};if _aegc .blocked (_cabfa ){continue ;};if !_fead {_aegc .pullWord (_cabfa ,_geea ,_eedg );};_cfeb ++;if !_eaf {if _cabfa ._adgge < _cebc {_cebc =_cabfa ._adgge ;};if _cabfa ._adgge > _bcafc {_bcafc =_cabfa ._adgge ;};};if _fead {break ;};};};if !_fead {_eeac .applyRemovals (_eedg );
};return _cfeb ;};func _ecgcg (_ebce []_dce .PdfObject )(_eefa ,_edgbbe float64 ,_ggfcc error ){if len (_ebce )!=2{return 0,0,_gde .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_ebce ));
};_abec ,_ggfcc :=_dce .GetNumbersAsFloat (_ebce );if _ggfcc !=nil {return 0,0,_ggfcc ;};return _abec [0],_abec [1],nil ;};func (_acbbb *shapesState )stroke (_fcfg *[]pathSection ){_dddc :=pathSection {_ged :_acbbb ._efb ,Color :_acbbb ._cfda .getStrokeColor ()};
*_fcfg =append (*_fcfg ,_dddc );if _aebg {_gde .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_fcfg ),_acbbb ,_acbbb ._cfda .getStrokeColor (),_dddc .bbox ());
if _aab {for _fgc ,_bced :=range _acbbb ._efb {_gde .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_fgc ,_bced );if _fgc ==10{break ;};};};};};func _afbef (_ebea int ,_egeec func (int ,int )bool )[]int {_agaa :=make ([]int ,_ebea );for _cgddf :=range _agaa {_agaa [_cgddf ]=_cgddf ;
};_ab .Slice (_agaa ,func (_agcce ,_fbbad int )bool {return _egeec (_agaa [_agcce ],_agaa [_fbbad ])});return _agaa ;};func (_eedb paraList )llyRange (_baga []int ,_ddbfg ,_cegb float64 )[]int {_faec :=len (_eedb );if _cegb < _eedb [_baga [0]].Lly ||_ddbfg > _eedb [_baga [_faec -1]].Lly {return nil ;
};_ffb :=_ab .Search (_faec ,func (_eeb int )bool {return _eedb [_baga [_eeb ]].Lly >=_ddbfg });_ebgc :=_ab .Search (_faec ,func (_beaf int )bool {return _eedb [_baga [_beaf ]].Lly > _cegb });return _baga [_ffb :_ebgc ];};func (_cgcbcg rulingList )sort (){_ab .Slice (_cgcbcg ,_cgcbcg .comp )};
func _decf (_dfgc _dce .PdfObject ,_abddcf _be .Color )(_e .Image ,error ){_dbfg ,_bgafa :=_dce .GetStream (_dfgc );if !_bgafa {return nil ,nil ;};_ggba ,_edgg :=_fg .NewXObjectImageFromStream (_dbfg );if _edgg !=nil {return nil ,_edgg ;};_deff ,_edgg :=_ggba .ToImage ();
if _edgg !=nil {return nil ,_edgg ;};return _dcfd (_deff ,_abddcf ),nil ;};func _caba (_eebc ,_ebbd _dca .Point )bool {_caff :=_dc .Abs (_eebc .X -_ebbd .X );_abba :=_dc .Abs (_eebc .Y -_ebbd .Y );return _edfa (_caff ,_abba );};type textLine struct{_fg .PdfRectangle ;
_bfcg float64 ;_fgbe []*textWord ;_ceacg float64 ;};func _dfebg (_egdee *_fg .Image ,_agaad _be .Color )_e .Image {_dfeac ,_fdefe :=int (_egdee .Width ),int (_egdee .Height );_dbgf :=_e .NewRGBA (_e .Rect (0,0,_dfeac ,_fdefe ));for _abddb :=0;_abddb < _fdefe ;
_abddb ++{for _adeegd :=0;_adeegd < _dfeac ;_adeegd ++{_eccf ,_egefcf :=_egdee .ColorAt (_adeegd ,_abddb );if _egefcf !=nil {_ac .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063o\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0072\u0065\u0074\u0072\u0069\u0065v\u0065 \u0069\u006d\u0061\u0067\u0065\u0020m\u0061\u0073\u006b\u0020\u0076\u0061\u006cu\u0065\u0020\u0061\u0074\u0020\u0028\u0025\u0064\u002c\u0020\u0025\u0064\u0029\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006da\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063t\u002e",_adeegd ,_abddb );
continue ;};_cbgagd ,_ffafc ,_gacfd ,_ :=_eccf .RGBA ();var _gfabf _be .Color ;if _cbgagd +_ffafc +_gacfd ==0{_gfabf =_be .Transparent ;}else {_gfabf =_agaad ;};_dbgf .Set (_adeegd ,_abddb ,_gfabf );};};return _dbgf ;};func _cbgbf (_ecfef map[int ][]float64 )string {_ffff :=_dfdf (_ecfef );
_agcaa :=make ([]string ,len (_ecfef ));for _fefbc ,_bdda :=range _ffff {_agcaa [_fefbc ]=_gde .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_bdda ,_ecfef [_bdda ]);};return _gde .Sprintf ("\u007b\u0025\u0073\u007d",_df .Join (_agcaa ,"\u002c\u0020"));
2023-10-07 13:58:01 +00:00
};
2023-11-11 11:29:03 +00:00
// String returns a description of `p`.
func (_fcgeb *textPara )String ()string {if _fcgeb ._bfge {return _gde .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_fcgeb .PdfRectangle );};_ebbed :="";if _fcgeb ._edce !=nil {_ebbed =_gde .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_fcgeb ._edce ._acddc ,_fcgeb ._edce ._gebeeb );
};return _gde .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_fcgeb .PdfRectangle ,_ebbed ,len (_fcgeb ._gfaae ),_dbdbb (_fcgeb .text (),50));};func _eafda (_dfca _dce .PdfObject ,_baea _be .Color )(_e .Image ,error ){_gacdb ,_facge :=_dce .GetStream (_dfca );
if !_facge {return nil ,nil ;};_ggecf ,_cadaf :=_fg .NewXObjectImageFromStream (_gacdb );if _cadaf !=nil {return nil ,_cadaf ;};_fffc ,_cadaf :=_ggecf .ToImage ();if _cadaf !=nil {return nil ,_cadaf ;};return _dfebg (_fffc ,_baea ),nil ;};func (_bgfe *ruling )alignsSec (_daac *ruling )bool {const _cecgb =_gbgf +1.0;
return _bgfe ._efgeb -_cecgb <=_daac ._bbge &&_daac ._efgeb -_cecgb <=_bgfe ._bbge ;};func (_gbefe *textTable )putComposite (_bdadb ,_fgdb int ,_fabe paraList ,_fcgg _fg .PdfRectangle ){if len (_fabe )==0{_ac .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073");
return ;};_aabg :=compositeCell {PdfRectangle :_fcgg ,paraList :_fabe };if _eadb {_gde .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_bdadb ,_fgdb ,_aabg .String ());
};_aabg .updateBBox ();_gbefe ._edbe [_bafcd (_bdadb ,_fgdb )]=_aabg ;};func (_abf *imageExtractContext )extractInlineImage (_bfad *_dcg .ContentStreamInlineImage ,_cbgd _dcg .GraphicsState ,_eda *_fg .PdfPageResources )error {_edd ,_fa :=_bfad .ToImage (_eda );
if _fa !=nil {return _fa ;};_ece ,_fa :=_bfad .GetColorSpace (_eda );if _fa !=nil {return _fa ;};if _ece ==nil {_ece =_fg .NewPdfColorspaceDeviceGray ();};_abc ,_fa :=_ece .ImageToRGB (*_edd );if _fa !=nil {return _fa ;};_dab :=ImageMark {Image :&_abc ,Width :_cbgd .CTM .ScalingFactorX (),Height :_cbgd .CTM .ScalingFactorY (),Angle :_cbgd .CTM .Angle ()};
_dab .X ,_dab .Y =_cbgd .CTM .Translation ();_abf ._dag =append (_abf ._dag ,_dab );_abf ._cfc ++;return nil ;};func (_eccef *textWord )computeText ()string {_febgd :=make ([]string ,len (_eccef ._daafd ));for _ecgg ,_gfec :=range _eccef ._daafd {_febgd [_ecgg ]=_gfec ._efgdc ;
};return _df .Join (_febgd ,"");};const (_ccfb rulingKind =iota ;_faccd ;_cbab ;);func (_adgcc *textTable )logComposite (_gbcag string ){if !_eadb {return ;};_ac .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_adgcc ._acddc ,_adgcc ._gebeeb ,_gbcag );
_gde .Printf ("\u0025\u0035\u0073 \u007c","");for _cedd :=0;_cedd < _adgcc ._acddc ;_cedd ++{_gde .Printf ("\u0025\u0033\u0064 \u007c",_cedd );};_gde .Println ("");_gde .Printf ("\u0025\u0035\u0073 \u002b","");for _ceba :=0;_ceba < _adgcc ._acddc ;_ceba ++{_gde .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d");
};_gde .Println ("");for _eedda :=0;_eedda < _adgcc ._gebeeb ;_eedda ++{_gde .Printf ("\u0025\u0035\u0064 \u007c",_eedda );for _cbfg :=0;_cbfg < _adgcc ._acddc ;_cbfg ++{_beaad ,_ :=_adgcc ._edbe [_bafcd (_cbfg ,_eedda )].parasBBox ();_gde .Printf ("\u0025\u0033\u0064 \u007c",len (_beaad ));
};_gde .Println ("");};_ac .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_adgcc ._acddc ,_adgcc ._gebeeb ,_gbcag );_gde .Printf ("\u0025\u0035\u0073 \u007c","");for _acgaa :=0;_acgaa < _adgcc ._acddc ;
_acgaa ++{_gde .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_acgaa );};_gde .Println ("");_gde .Printf ("\u0025\u0035\u0073 \u002b","");for _egagc :=0;_egagc < _adgcc ._acddc ;_egagc ++{_gde .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");
};_gde .Println ("");for _bfgfa :=0;_bfgfa < _adgcc ._gebeeb ;_bfgfa ++{_gde .Printf ("\u0025\u0035\u0064 \u007c",_bfgfa );for _dffdf :=0;_dffdf < _adgcc ._acddc ;_dffdf ++{_eeegd ,_ :=_adgcc ._edbe [_bafcd (_dffdf ,_bfgfa )].parasBBox ();_beea :="";_aace :=_eeegd .merge ();
if _aace !=nil {_beea =_aace .text ();};_beea =_gde .Sprintf ("\u0025\u0071",_dbdbb (_beea ,12));_beea =_beea [1:len (_beea )-1];_gde .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_beea );};_gde .Println ("");};};func _ebeg (_daad []*textLine )map[float64 ][]*textLine {_ab .Slice (_daad ,func (_ecceb ,_cdac int )bool {return _daad [_ecceb ]._bfcg < _daad [_cdac ]._bfcg });
_aead :=map[float64 ][]*textLine {};for _ ,_fbcd :=range _daad {_dggce :=_aadf (_fbcd );_dggce =_dc .Round (_dggce );_aead [_dggce ]=append (_aead [_dggce ],_fbcd );};return _aead ;};func _gcfc (_edfbde []compositeCell )[]float64 {var _bcfd []*textLine ;
_gfea :=0;for _ ,_acgd :=range _edfbde {_gfea +=len (_acgd .paraList );_bcfd =append (_bcfd ,_acgd .lines ()...);};_ab .Slice (_bcfd ,func (_edceg ,_ccfbc int )bool {_ecdf ,_fbdca :=_bcfd [_edceg ],_bcfd [_ccfbc ];_ffagd ,_bfcd :=_ecdf ._bfcg ,_fbdca ._bfcg ;
if !_ebfaf (_ffagd -_bfcd ){return _ffagd < _bfcd ;};return _ecdf .Llx < _fbdca .Llx ;});if _eadb {_gde .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_gfea ,len (_bcfd ));
for _aefa ,_aecb :=range _bcfd {_gde .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_aefa ,_aecb );};};var _edfe []float64 ;_edfca :=_bcfd [0];var _cfebg [][]*textLine ;_caac :=[]*textLine {_edfca };for _faacd ,_abbad :=range _bcfd [1:]{if _abbad .Ury < _edfca .Lly {_abcdg :=0.5*(_abbad .Ury +_edfca .Lly );
if _eadb {_gde .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_faacd ,_abbad .Ury ,_edfca .Lly ,_abcdg ,_edfca ,_abbad );
};_edfe =append (_edfe ,_abcdg );_cfebg =append (_cfebg ,_caac );_caac =nil ;};_caac =append (_caac ,_abbad );if _abbad .Lly < _edfca .Lly {_edfca =_abbad ;};};if len (_caac )> 0{_cfebg =append (_cfebg ,_caac );};if _eadb {_gde .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_edfe );
};if _eadb {_ac .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len (_edfbde ));for _cdeb ,_fcfae :=range _edfbde {_gde .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cdeb ,_fcfae );};_ac .Log .Info ("\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d",len (_cfebg ));
for _aegb ,_eedga :=range _cfebg {_gde .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a",_aegb ,len (_eedga ));for _gebc ,_affbg :=range _eedga {_gde .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_gebc ,_affbg );};};};_ggcg :=true ;
for _fcab ,_gdegd :=range _cfebg {_bdfbb :=true ;for _adde ,_fdcf :=range _edfbde {if _eadb {_gde .Printf ("\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a",_fcab ,len (_cfebg ),_adde ,len (_edfbde ),_fdcf );
};if !_fdcf .hasLines (_gdegd ){if _eadb {_gde .Printf ("\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a",_fcab ,len (_cfebg ),_adde ,len (_edfbde ));
};_bdfbb =false ;break ;};};if !_bdfbb {_ggcg =false ;break ;};};if !_ggcg {if _eadb {_ac .Log .Info ("\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg");
};_edfe =nil ;};if _eadb &&_edfe !=nil {_gde .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a",_edfe );};return _edfe ;};
func _cgffb (_ggdf []int )[]int {_effda :=make ([]int ,len (_ggdf ));for _ebcg ,_babe :=range _ggdf {_effda [len (_ggdf )-1-_ebcg ]=_babe ;};return _effda ;};var (_ge =_a .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072");
_efa =_a .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072"););
2023-10-07 13:58:01 +00:00
2023-11-11 11:29:03 +00:00
// List returns all the list objects detected on the page.
// It detects all the bullet point Lists from a given pdf page and builds a slice of bullet list objects.
// A given bullet list object has a tree structure.
// Each bullet point list is extracted with the text content it contains and all the sub lists found under it as children in the tree.
// The rest content of the pdf is ignored and only text in the bullet point lists are extracted.
// The list extraction is done in two ways.
// 1. If the document is tagged then the lists are extracted using the tags provided in the document.
// 2. Otherwise the bullet lists are extracted from the raw text using regex matching.
// By default the document tag is used if available.
// However this can be disabled using `DisableDocumentTags` in the `Options` object.
// Sometimes disabling document tags option might give a better bullet list extraction if the document was tagged incorrectly.
// options := &Options{
// DisableDocumentTags: false, // this means use document tag if available
// }
// ex, err := NewWithOptions(page, options)
// // handle error
// pageText, _, _, err := ex.ExtractPageText()
// // handle error
// lists := pageText.List()
// txt := lists.Text()
func (_cfef PageText )List ()lists {_aeba :=!_cfef ._dbc ._dbcd ;_agda :=_cfef .getParagraphs ();_acce :=true ;if _cfef ._cabf ==nil ||*_cfef ._cabf ==nil {_acce =false ;};_abd :=_agda .list ();if _acce &&_aeba {_bgda :=_feea (&_agda );_acde :=&structTreeRoot {};
_acde .parseStructTreeRoot (*_cfef ._cabf );if _acde ._ccdea ==nil {_ac .Log .Debug ("\u004c\u0069\u0073\u0074\u003a\u0020\u0073t\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e'\u0074\u0020\u0068\u0061\u0076e\u0020\u0061\u006e\u0079\u0020\u0063\u006f\u006e\u0074e\u006e\u0074\u002c\u0020\u0075\u0073\u0069\u006e\u0067\u0020\u0074\u0065\u0078\u0074\u0020\u006d\u0061\u0074\u0063\u0068\u0069\u006e\u0067\u0020\u006d\u0065\u0074\u0068\u006f\u0064\u0020\u0069\u006e\u0073\u0074\u0065\u0061\u0064\u002e");
return _abd ;};_abd =_acde .buildList (_bgda ,_cfef ._gefc );};return _abd ;};func _ebecd (_cccda map[float64 ][]*textLine )[]float64 {_cccdd :=[]float64 {};for _bdaag :=range _cccda {_cccdd =append (_cccdd ,_bdaag );};_ab .Float64s (_cccdd );return _cccdd ;
};func (_dfgaf rulingList )tidied (_facac string )rulingList {_daead :=_dfgaf .removeDuplicates ();_daead .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");_cgbc :=_daead .snapToGroups ();if _cgbc ==nil {return nil ;};_cgbc .sort ();if _aebg {_ac .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_facac ,len (_dfgaf ),len (_daead ),len (_cgbc ));
};_cgbc .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _cgbc ;};func (_affg rulingList )splitSec ()[]rulingList {_ab .Slice (_affg ,func (_fbaeg ,_cefgf int )bool {_dfee ,_geac :=_affg [_fbaeg ],_affg [_cefgf ];if _dfee ._efgeb !=_geac ._efgeb {return _dfee ._efgeb < _geac ._efgeb ;
};return _dfee ._bbge < _geac ._bbge ;});_agad :=make (map[*ruling ]struct{},len (_affg ));_gbbg :=func (_ccabb *ruling )rulingList {_dece :=rulingList {_ccabb };_agad [_ccabb ]=struct{}{};for _ ,_aface :=range _affg {if _ ,_fgcee :=_agad [_aface ];_fgcee {continue ;
};for _ ,_agcf :=range _dece {if _aface .alignsSec (_agcf ){_dece =append (_dece ,_aface );_agad [_aface ]=struct{}{};break ;};};};return _dece ;};_eeafg :=[]rulingList {_gbbg (_affg [0])};for _ ,_ccdeb :=range _affg [1:]{if _ ,_cffc :=_agad [_ccdeb ];
_cffc {continue ;};_eeafg =append (_eeafg ,_gbbg (_ccdeb ));};return _eeafg ;};func _eecc (_fceb ,_becf float64 )string {_agddf :=!_ebfaf (_fceb -_becf );if _agddf {return "\u000a";};return "\u0020";};func _ebdgb (_geef float64 )float64 {return _egac *_dc .Round (_geef /_egac )};
var _cfec *_gd .Regexp =_gd .MustCompile (_efdd +"\u007c"+_dceb );