mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-27 13:48:51 +08:00
1002 lines
219 KiB
Go
1002 lines
219 KiB
Go
//
|
||
// Copyright 2020 FoxyUtils ehf. All rights reserved.
|
||
//
|
||
// This is a commercial product and requires a license to operate.
|
||
// A trial license can be obtained at https://unidoc.io
|
||
//
|
||
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
|
||
//
|
||
// Use of this source code is governed by the UniDoc End User License Agreement
|
||
// terms that can be accessed at https://unidoc.io/eula/
|
||
|
||
// Package extractor is used for quickly extracting PDF content through a simple interface.
|
||
// Currently offers functionality for extracting textual content.
|
||
package extractor ;import (_dc "bytes";_b "errors";_agc "fmt";_ga "github.com/unidoc/unipdf/v3/common";_fc "github.com/unidoc/unipdf/v3/contentstream";_add "github.com/unidoc/unipdf/v3/core";_ce "github.com/unidoc/unipdf/v3/internal/license";_cg "github.com/unidoc/unipdf/v3/internal/textencoding";
|
||
_agf "github.com/unidoc/unipdf/v3/internal/transform";_ba "github.com/unidoc/unipdf/v3/model";_ed "golang.org/x/image/draw";_ea "golang.org/x/text/unicode/norm";_ag "image";_gff "image/color";_gg "io";_gf "math";_ad "reflect";_d "regexp";_e "sort";_f "strings";
|
||
_c "unicode";_a "unicode/utf8";);var _dddb string ="\u005e\u005b\u0061\u002d\u007a\u0041\u002dZ\u005d\u0028\u005c)\u007c\u005c\u002e)\u007c\u005e[\u005c\u0064\u005d\u002b\u0028\u005c)\u007c\\.\u0029\u007c\u005e\u005c\u0028\u005b\u0061\u002d\u007a\u0041\u002d\u005a\u005d\u005c\u0029\u007c\u005e\u005c\u0028\u005b\u005c\u0064\u005d\u002b\u005c\u0029";
|
||
func (_eabe *wordBag )minDepth ()float64 {return _eabe ._ebgd -(_eabe .Ury -_eabe ._cdea )};func (_egee *textObject )getFontDirect (_faafg string )(*_ba .PdfFont ,error ){_ecgc ,_afgg :=_egee .getFontDict (_faafg );if _afgg !=nil {return nil ,_afgg ;};
|
||
_cea ,_afgg :=_ba .NewPdfFontFromPdfObject (_ecgc );if _afgg !=nil {_ga .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_faafg ,_afgg );
|
||
};return _cea ,_afgg ;};func _eacc (_gdaab ,_bafbe _agf .Point ,_egbc _gff .Color )(*ruling ,bool ){_bggdd :=lineRuling {_eded :_gdaab ,_badee :_bafbe ,_cebc :_ffdg (_gdaab ,_bafbe ),Color :_egbc };if _bggdd ._cebc ==_fbcc {return nil ,false ;};return _bggdd .asRuling ();
|
||
};
|
||
|
||
// String returns a description of `p`.
|
||
func (_cegf *textPara )String ()string {if _cegf ._cddef {return _agc .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_cegf .PdfRectangle );};_gbcf :="";if _cegf ._fbbea !=nil {_gbcf =_agc .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_cegf ._fbbea ._eacg ,_cegf ._fbbea ._cgae );
|
||
};return _agc .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_cegf .PdfRectangle ,_gbcf ,len (_cegf ._fdec ),_adgd (_cegf .text (),50));};type gridTiling struct{_ba .PdfRectangle ;_fece []float64 ;
|
||
_dfca []float64 ;_gdcg map[float64 ]map[float64 ]gridTile ;};func _cgbc (_efb _ba .PdfRectangle )textState {return textState {_def :100,_gdf :RenderModeFill ,_ade :_efb };};func _cbde (_fdda *wordBag ,_bdcf *textWord ,_ffea float64 )bool {return _fdda .Urx <=_bdcf .Llx &&_bdcf .Llx < _fdda .Urx +_ffea ;
|
||
};func (_gbgg *stateStack )top ()*textState {if _gbgg .empty (){return nil ;};return (*_gbgg )[_gbgg .size ()-1];};const (_fbcc rulingKind =iota ;_aaad ;_cfae ;);func (_cggb *PageText )computeViews (){_dbca :=_cggb .getParagraphs ();_gfgb :=new (_dc .Buffer );
|
||
_dbca .writeText (_gfgb );_cggb ._ggff =_gfgb .String ();_cggb ._ccca =_dbca .toTextMarks ();_cggb ._gdba =_dbca .tables ();if _efda {_ga .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_cggb ._gdba ));
|
||
};};var _cffd *_d .Regexp =_d .MustCompile (_geef +"\u007c"+_dddb );func _dag (_fbfdf bounded )float64 {return -_fbfdf .bbox ().Lly };func (_gfcdg *textTable )growTable (){_agdf :=func (_bfea paraList ){_gfcdg ._cgae ++;for _adgf :=0;_adgf < _gfcdg ._eacg ;
|
||
_adgf ++{_fbege :=_bfea [_adgf ];_gfcdg .put (_adgf ,_gfcdg ._cgae -1,_fbege );};};_baag :=func (_begfc paraList ){_gfcdg ._eacg ++;for _fcdgc :=0;_fcdgc < _gfcdg ._cgae ;_fcdgc ++{_bbbbb :=_begfc [_fcdgc ];_gfcdg .put (_gfcdg ._eacg -1,_fcdgc ,_bbbbb );
|
||
};};if _baace {_gfcdg .log ("\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce");};for _egccaa :=0;;_egccaa ++{_abdf :=false ;_bbdf :=_gfcdg .getDown ();_gdge :=_gfcdg .getRight ();if _baace {_agc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_egccaa ,_gfcdg );
|
||
_agc .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a",_bbdf );_agc .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a",_gdge );};if _bbdf !=nil &&_gdge !=nil {_aefa :=_bbdf [len (_bbdf )-1];
|
||
if !_aefa .taken ()&&_aefa ==_gdge [len (_gdge )-1]{_agdf (_bbdf );if _gdge =_gfcdg .getRight ();_gdge !=nil {_baag (_gdge );_gfcdg .put (_gfcdg ._eacg -1,_gfcdg ._cgae -1,_aefa );};_abdf =true ;};};if !_abdf &&_bbdf !=nil {_agdf (_bbdf );_abdf =true ;
|
||
};if !_abdf &&_gdge !=nil {_baag (_gdge );_abdf =true ;};if !_abdf {break ;};};};func _cbgc (_ggffg map[float64 ]gridTile )[]float64 {_fgddf :=make ([]float64 ,0,len (_ggffg ));for _cccg :=range _ggffg {_fgddf =append (_fgddf ,_cccg );};_e .Float64s (_fgddf );
|
||
return _fgddf ;};
|
||
|
||
// Options extractor options.
|
||
type Options struct{
|
||
|
||
// DisableDocumentTags specifies whether to use the document tags during list extraction.
|
||
DisableDocumentTags bool ;
|
||
|
||
// ApplyCropBox will extract page text based on page cropbox if set to `true`.
|
||
ApplyCropBox bool ;
|
||
|
||
// UseSimplerExtractionProcess will skip topological text ordering and table processing.
|
||
//
|
||
// NOTE: While normally the extra processing is beneficial, it can also lead to problems when it does not work.
|
||
// Thus it is a flag to allow the user to control this process.
|
||
//
|
||
// Skipping some extraction processes would also lead to the reduced processing time.
|
||
UseSimplerExtractionProcess bool ;
|
||
|
||
// IncludeAnnotations specifies whether to include annotations in the extraction process, default value is `false`.
|
||
IncludeAnnotations bool ;};
|
||
|
||
// String returns a description of `t`.
|
||
func (_ffcd *textTable )String ()string {return _agc .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_ffcd ._eacg ,_ffcd ._cgae ,_ffcd ._edeg );};
|
||
|
||
// Text returns the extracted page text.
|
||
func (_gge PageText )Text ()string {return _gge ._ggff };func (_gdcgc *textTable )logComposite (_fefaa string ){if !_efda {return ;};_ga .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_gdcgc ._eacg ,_gdcgc ._cgae ,_fefaa );
|
||
_agc .Printf ("\u0025\u0035\u0073 \u007c","");for _ecef :=0;_ecef < _gdcgc ._eacg ;_ecef ++{_agc .Printf ("\u0025\u0033\u0064 \u007c",_ecef );};_agc .Println ("");_agc .Printf ("\u0025\u0035\u0073 \u002b","");for _beggf :=0;_beggf < _gdcgc ._eacg ;_beggf ++{_agc .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d");
|
||
};_agc .Println ("");for _dcae :=0;_dcae < _gdcgc ._cgae ;_dcae ++{_agc .Printf ("\u0025\u0035\u0064 \u007c",_dcae );for _bdfg :=0;_bdfg < _gdcgc ._eacg ;_bdfg ++{_gbdacb ,_ :=_gdcgc ._aaaga [_bgcc (_bdfg ,_dcae )].parasBBox ();_agc .Printf ("\u0025\u0033\u0064 \u007c",len (_gbdacb ));
|
||
};_agc .Println ("");};_ga .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_gdcgc ._eacg ,_gdcgc ._cgae ,_fefaa );_agc .Printf ("\u0025\u0035\u0073 \u007c","");for _geee :=0;_geee < _gdcgc ._eacg ;
|
||
_geee ++{_agc .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_geee );};_agc .Println ("");_agc .Printf ("\u0025\u0035\u0073 \u002b","");for _affe :=0;_affe < _gdcgc ._eacg ;_affe ++{_agc .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");
|
||
};_agc .Println ("");for _aaagg :=0;_aaagg < _gdcgc ._cgae ;_aaagg ++{_agc .Printf ("\u0025\u0035\u0064 \u007c",_aaagg );for _ffbg :=0;_ffbg < _gdcgc ._eacg ;_ffbg ++{_dcgf ,_ :=_gdcgc ._aaaga [_bgcc (_ffbg ,_aaagg )].parasBBox ();_agbae :="";_eccdd :=_dcgf .merge ();
|
||
if _eccdd !=nil {_agbae =_eccdd .text ();};_agbae =_agc .Sprintf ("\u0025\u0071",_adgd (_agbae ,12));_agbae =_agbae [1:len (_agbae )-1];_agc .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_agbae );};_agc .Println ("");};};func (_gfdbf *textTable )newTablePara ()*textPara {_bcfbf :=_gfdbf .computeBbox ();
|
||
_cfbf :=&textPara {PdfRectangle :_bcfbf ,_dgabg :_bcfbf ,_fbbea :_gfdbf };if _efda {_ga .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_cfbf );};return _cfbf ;};func (_badd rulingList )tidied (_ebafce string )rulingList {_dedaa :=_badd .removeDuplicates ();
|
||
_dedaa .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");_dedc :=_dedaa .snapToGroups ();if _dedc ==nil {return nil ;};_dedc .sort ();if _adgbf {_ga .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_ebafce ,len (_badd ),len (_dedaa ),len (_dedc ));
|
||
};_dedc .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _dedc ;};func (_bbbc *wordBag )arrangeText ()*textPara {_bbbc .sort ();if _cafd {_bbbc .removeDuplicates ();};var _dedf []*textLine ;for _ ,_cfff :=range _bbbc .depthIndexes (){for !_bbbc .empty (_cfff ){_ccgf :=_bbbc .firstReadingIndex (_cfff );
|
||
_aacg :=_bbbc .firstWord (_ccgf );_cfaga :=_eedf (_bbbc ,_ccgf );_gcde :=_aacg ._eabbf ;_aaeag :=_aacg ._dfagd -_fcca *_gcde ;_gfdf :=_aacg ._dfagd +_fcca *_gcde ;_adec :=_gcfb *_gcde ;_dbff :=_bbad *_gcde ;_dddba :for {var _bagc *textWord ;_cacc :=0;for _ ,_abgb :=range _bbbc .depthBand (_aaeag ,_gfdf ){_dbab :=_bbbc .highestWord (_abgb ,_aaeag ,_gfdf );
|
||
if _dbab ==nil {continue ;};_bdgcd :=_daf (_dbab ,_cfaga ._cdcg [len (_cfaga ._cdcg )-1]);if _bdgcd < -_dbff {break _dddba ;};if _bdgcd > _adec {continue ;};if _bagc !=nil &&_eddba (_dbab ,_bagc )>=0{continue ;};_bagc =_dbab ;_cacc =_abgb ;};if _bagc ==nil {break ;
|
||
};_cfaga .pullWord (_bbbc ,_bagc ,_cacc );};_cfaga .markWordBoundaries ();_dedf =append (_dedf ,_cfaga );};};if len (_dedf )==0{return nil ;};_e .Slice (_dedf ,func (_gffaa ,_fcdff int )bool {return _cgfgf (_dedf [_gffaa ],_dedf [_fcdff ])< 0});_aeac :=_bfgg (_bbbc .PdfRectangle ,_dedf );
|
||
if _cbbb {_ga .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_aeac .String ());if _abfa {for _bgafb ,_ffee :=range _aeac ._fdec {_agc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bgafb ,_ffee .String ());
|
||
if _gaba {for _dgbe ,_faefb :=range _ffee ._cdcg {_agc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_dgbe ,_faefb .String ());for _dada ,_eedfd :=range _faefb ._abcee {_agc .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_dada ,_eedfd .String ());
|
||
};};};};};};return _aeac ;};func (_eadc paraList )yNeighbours (_bgbc float64 )map[*textPara ][]int {_geec :=make ([]event ,2*len (_eadc ));if _bgbc ==0{for _dcceb ,_fbccf :=range _eadc {_geec [2*_dcceb ]=event {_fbccf .Lly ,true ,_dcceb };_geec [2*_dcceb +1]=event {_fbccf .Ury ,false ,_dcceb };
|
||
};}else {for _cecf ,_fgdg :=range _eadc {_geec [2*_cecf ]=event {_fgdg .Lly -_bgbc *_fgdg .fontsize (),true ,_cecf };_geec [2*_cecf +1]=event {_fgdg .Ury +_bgbc *_fgdg .fontsize (),false ,_cecf };};};return _eadc .eventNeighbours (_geec );};func (_gbf *textObject )reset (){_gbf ._eefe =_agf .IdentityMatrix ();
|
||
_gbf ._dbc =_agf .IdentityMatrix ();_gbf ._cfde =nil ;};func _aada (_cggf *textLine )bool {_fbbag :=true ;_efbb :=-1;for _ ,_bgada :=range _cggf ._cdcg {for _ ,_gccg :=range _bgada ._abcee {_cgdcd :=_gccg ._ebbb ;if _efbb ==-1{_efbb =_cgdcd ;}else {if _efbb !=_cgdcd {_fbbag =false ;
|
||
break ;};};};};return _fbbag ;};func (_deadc *textTable )markCells (){for _ceff :=0;_ceff < _deadc ._cgae ;_ceff ++{for _ccag :=0;_ccag < _deadc ._eacg ;_ccag ++{_fdbd :=_deadc .get (_ccag ,_ceff );if _fdbd !=nil {_fdbd ._gecb =true ;};};};};func (_gfde *shapesState )newSubPath (){_gfde .clearPath ();
|
||
if _fdbg {_ga .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_gfde );};};func (_ceec *TextMarkArray )getTextMarkAtOffset (_begg int )*TextMark {for _ ,_agb :=range _ceec ._ffca {if _agb .Offset ==_begg {return &_agb ;
|
||
};};return nil ;};func (_acb *textObject )getFillColor ()_gff .Color {return _eddaee (_acb ._bace .ColorspaceNonStroking ,_acb ._bace .ColorNonStroking );};func (_edcf *textObject )getFont (_decc string )(*_ba .PdfFont ,error ){if _edcf ._cdb ._bd !=nil {_dcea ,_fbcd :=_edcf .getFontDict (_decc );
|
||
if _fbcd !=nil {_ga .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073",_decc ,_fbcd .Error ());return nil ,_fbcd ;
|
||
};_edcf ._cdb ._ae ++;_fgd ,_cgfb :=_edcf ._cdb ._bd [_dcea .String ()];if _cgfb {_fgd ._ccg =_edcf ._cdb ._ae ;return _fgd ._cbad ,nil ;};};_faaf ,_gbgd :=_edcf .getFontDict (_decc );if _gbgd !=nil {return nil ,_gbgd ;};_cgfg ,_gbgd :=_edcf .getFontDirect (_decc );
|
||
if _gbgd !=nil {return nil ,_gbgd ;};if _edcf ._cdb ._bd !=nil {_cfag :=fontEntry {_cgfg ,_edcf ._cdb ._ae };if len (_edcf ._cdb ._bd )>=_bcgf {var _gfca []string ;for _eaaa :=range _edcf ._cdb ._bd {_gfca =append (_gfca ,_eaaa );};_e .Slice (_gfca ,func (_ffae ,_fdef int )bool {return _edcf ._cdb ._bd [_gfca [_ffae ]]._ccg < _edcf ._cdb ._bd [_gfca [_fdef ]]._ccg ;
|
||
});delete (_edcf ._cdb ._bd ,_gfca [0]);};_edcf ._cdb ._bd [_faaf .String ()]=_cfag ;};return _cgfg ,nil ;};func (_gcfa *compositeCell )updateBBox (){for _ ,_gdaa :=range _gcfa .paraList {_gcfa .PdfRectangle =_bbbafc (_gcfa .PdfRectangle ,_gdaa .PdfRectangle );
|
||
};};func (_faee rulingList )vertsHorzs ()(rulingList ,rulingList ){var _ccfgc ,_eaba rulingList ;for _ ,_egfg :=range _faee {switch _egfg ._bbce {case _cfae :_ccfgc =append (_ccfgc ,_egfg );case _aaad :_eaba =append (_eaba ,_egfg );};};return _ccfgc ,_eaba ;
|
||
};
|
||
|
||
// Font represents the font properties on a PDF page.
|
||
type Font struct{PdfFont *_ba .PdfFont ;
|
||
|
||
// FontName represents Font Name from font properties.
|
||
FontName string ;
|
||
|
||
// FontType represents Font Subtype entry in the font dictionary inside page resources.
|
||
// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont.
|
||
FontType string ;
|
||
|
||
// ToUnicode is true if font provides a `ToUnicode` mapping.
|
||
ToUnicode bool ;
|
||
|
||
// IsCID is true if underlying font is a composite font.
|
||
// Composite font is represented by a font dictionary whose Subtype is `Type0`
|
||
IsCID bool ;
|
||
|
||
// IsSimple is true if font is simple font.
|
||
// A simple font is limited to only 8 bit (255) character codes.
|
||
IsSimple bool ;
|
||
|
||
// FontData represents the raw data of the embedded font file.
|
||
// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF).
|
||
// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor.
|
||
// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value.
|
||
FontData []byte ;
|
||
|
||
// FontFileName is a name representing the font. it has format:
|
||
// (Font Name) + (Font Type Extension), example: helvetica.ttf.
|
||
FontFileName string ;
|
||
|
||
// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor).
|
||
FontDescriptor *_ba .PdfFontDescriptor ;};func _fbcde (_fdcf _ba .PdfRectangle )*ruling {return &ruling {_bbce :_aaad ,_edga :_fdcf .Lly ,_fcec :_fdcf .Llx ,_abeg :_fdcf .Urx };};func (_eedee *textWord )toTextMarks (_cgfbf *int )[]TextMark {var _bdaf []TextMark ;
|
||
for _ ,_adae :=range _eedee ._abcee {_bdaf =_aadbb (_bdaf ,_cgfbf ,_adae .ToTextMark ());};return _bdaf ;};var _gbdc =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_gff .White ,StrokeColor :_gff .White };
|
||
|
||
// String returns a human readable description of `vecs`.
|
||
func (_acag rulingList )String ()string {if len (_acag )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_afbf ,_eege :=_acag .vertsHorzs ();_eddcd :=len (_afbf );_egfgb :=len (_eege );if _eddcd ==0||_egfgb ==0{return _agc .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_eddcd ,_egfgb );
|
||
};_aebee :=_ba .PdfRectangle {Llx :_afbf [0]._edga ,Urx :_afbf [_eddcd -1]._edga ,Lly :_eege [_egfgb -1]._edga ,Ury :_eege [0]._edga };return _agc .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_eddcd ,_egfgb ,_aebee );
|
||
};
|
||
|
||
// TableCell is a cell in a TextTable.
|
||
type TableCell struct{_ba .PdfRectangle ;
|
||
|
||
// Text is the extracted text.
|
||
Text string ;
|
||
|
||
// Marks returns the TextMarks corresponding to the text in Text.
|
||
Marks TextMarkArray ;};
|
||
|
||
// List returns all the list objects detected on the page.
|
||
// It detects all the bullet point Lists from a given pdf page and builds a slice of bullet list objects.
|
||
// A given bullet list object has a tree structure.
|
||
// Each bullet point list is extracted with the text content it contains and all the sub lists found under it as children in the tree.
|
||
// The rest content of the pdf is ignored and only text in the bullet point lists are extracted.
|
||
// The list extraction is done in two ways.
|
||
// 1. If the document is tagged then the lists are extracted using the tags provided in the document.
|
||
// 2. Otherwise the bullet lists are extracted from the raw text using regex matching.
|
||
// By default the document tag is used if available.
|
||
// However this can be disabled using `DisableDocumentTags` in the `Options` object.
|
||
// Sometimes disabling document tags option might give a better bullet list extraction if the document was tagged incorrectly.
|
||
//
|
||
// options := &Options{
|
||
// DisableDocumentTags: false, // this means use document tag if available
|
||
// }
|
||
// ex, err := NewWithOptions(page, options)
|
||
// // handle error
|
||
// pageText, _, _, err := ex.ExtractPageText()
|
||
// // handle error
|
||
// lists := pageText.List()
|
||
// txt := lists.Text()
|
||
func (_bbddb PageText )List ()lists {_aebe :=!_bbddb ._eaag ._bcc ;_cbdbb :=_bbddb .getParagraphs ();_beedf :=true ;if _bbddb ._abge ==nil ||*_bbddb ._abge ==nil {_beedf =false ;};_gecc :=_cbdbb .list ();if _beedf &&_aebe {_dcfb :=_edgg (&_cbdbb );_cbcd :=&structTreeRoot {};
|
||
_cbcd .parseStructTreeRoot (*_bbddb ._abge );if _cbcd ._faef ==nil {_ga .Log .Debug ("\u004c\u0069\u0073\u0074\u003a\u0020\u0073t\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e'\u0074\u0020\u0068\u0061\u0076e\u0020\u0061\u006e\u0079\u0020\u0063\u006f\u006e\u0074e\u006e\u0074\u002c\u0020\u0075\u0073\u0069\u006e\u0067\u0020\u0074\u0065\u0078\u0074\u0020\u006d\u0061\u0074\u0063\u0068\u0069\u006e\u0067\u0020\u006d\u0065\u0074\u0068\u006f\u0064\u0020\u0069\u006e\u0073\u0074\u0065\u0061\u0064\u002e");
|
||
return _gecc ;};_gecc =_cbcd .buildList (_dcfb ,_bbddb ._abad );};return _gecc ;};func _dbcba (_cdbba []*textMark ,_fagdf _ba .PdfRectangle )[]*textWord {var _fgfcd []*textWord ;var _cbgca *textWord ;if _bfgae {_ga .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_cdbba ));
|
||
};_fbab :=func (){if _cbgca !=nil {_bggada :=_cbgca .computeText ();if !_efcbe (_bggada ){_cbgca ._eaae =_bggada ;_fgfcd =append (_fgfcd ,_cbgca );if _bfgae {_ga .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_fgfcd )-1,_cbgca .String ());
|
||
for _gbefa ,_gffad :=range _cbgca ._abcee {_agc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gbefa ,_gffad .String ());};};};_cbgca =nil ;};};for _ ,_ceda :=range _cdbba {if _fecd &&_cbgca !=nil &&len (_cbgca ._abcee )> 0{_ggfag :=_cbgca ._abcee [len (_cbgca ._abcee )-1];
|
||
_egff ,_eafaa :=_ecfb (_ceda ._bfdb );_eagg ,_edba :=_ecfb (_ggfag ._bfdb );if _eafaa &&!_edba &&_ggfag .inDiacriticArea (_ceda ){_cbgca .addDiacritic (_egff );continue ;};if _edba &&!_eafaa &&_ceda .inDiacriticArea (_ggfag ){_cbgca ._abcee =_cbgca ._abcee [:len (_cbgca ._abcee )-1];
|
||
_cbgca .appendMark (_ceda ,_fagdf );_cbgca .addDiacritic (_eagg );continue ;};};_bebb :=_efcbe (_ceda ._bfdb );if _bebb {_fbab ();continue ;};if _cbgca ==nil &&!_bebb {_cbgca =_adcdc ([]*textMark {_ceda },_fagdf );continue ;};_dbbeb :=_cbgca ._eabbf ;_acegf :=_gf .Abs (_eegf (_fagdf ,_ceda )-_cbgca ._dfagd )/_dbbeb ;
|
||
_bdagf :=_daf (_ceda ,_cbgca )/_dbbeb ;if _bdagf >=_afbgg ||!(-_cdfe <=_bdagf &&_acegf <=_ebfgc ){_fbab ();_cbgca =_adcdc ([]*textMark {_ceda },_fagdf );continue ;};_cbgca .appendMark (_ceda ,_fagdf );};_fbab ();return _fgfcd ;};func (_gafg *subpath )close (){if !_gaeg (_gafg ._aeee [0],_gafg .last ()){_gafg .add (_gafg ._aeee [0]);
|
||
};_gafg ._dbe =true ;_gafg .removeDuplicates ();};func (_efec paraList )sortReadingOrder (){_ga .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_efec ));
|
||
if len (_efec )<=1{return ;};_efec .computeEBBoxes ();_e .Slice (_efec ,func (_aaff ,_cbadf int )bool {return _cgfgf (_efec [_aaff ],_efec [_cbadf ])<=0});};func (_bfdg *wordBag )empty (_dfef int )bool {_ ,_aged :=_bfdg ._fcgd [_dfef ];return !_aged };
|
||
func _dafaf (_fcda ,_bdea int )int {if _fcda > _bdea {return _fcda ;};return _bdea ;};
|
||
|
||
// Len returns the number of TextMarks in `ma`.
|
||
func (_gda *TextMarkArray )Len ()int {if _gda ==nil {return 0;};return len (_gda ._ffca );};func _eafa (_aaea _agf .Matrix )_agf .Point {_bfga ,_bea :=_aaea .Translation ();return _agf .Point {X :_bfga ,Y :_bea };};func _bggg (_ggc ,_caf _ba .PdfRectangle )bool {return _ggc .Lly <=_caf .Ury &&_caf .Lly <=_ggc .Ury };
|
||
func _dabg (_fgdc structElement )[]structElement {_fgcd :=[]structElement {};for _ ,_faede :=range _fgdc ._abff {for _ ,_degc :=range _faede ._abff {for _ ,_efggb :=range _degc ._abff {if _efggb ._bbag =="\u004c"{_fgcd =append (_fgcd ,_efggb );};};};};
|
||
return _fgcd ;};func (_gbba *structTreeRoot )buildList (_ccfe map[int ][]*textLine ,_cfaf _add .PdfObject )[]*list {if _gbba ==nil {_ga .Log .Debug ("\u0062\u0075\u0069\u006c\u0064\u004c\u0069\u0073\u0074\u003a\u0020t\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0069\u0073 \u006e\u0069\u006c");
|
||
return nil ;};var _cabe *structElement ;_cdbd :=[]structElement {};if len (_gbba ._faef )==1{_gdda :=_gbba ._faef [0]._bbag ;if _gdda =="\u0044\u006f\u0063\u0075\u006d\u0065\u006e\u0074"||_gdda =="\u0053\u0065\u0063\u0074"||_gdda =="\u0050\u0061\u0072\u0074"||_gdda =="\u0044\u0069\u0076"||_gdda =="\u0041\u0072\u0074"{_cabe =&_gbba ._faef [0];
|
||
};}else {_cabe =&structElement {_abff :_gbba ._faef ,_bbag :_gbba ._adbca };};if _cabe ==nil {_ga .Log .Debug ("\u0062\u0075\u0069\u006cd\u004c\u0069\u0073\u0074\u003a\u0020\u0074\u006f\u0070\u0045l\u0065m\u0065\u006e\u0074\u0020\u0069\u0073\u0020n\u0069\u006c");
|
||
return nil ;};for _ ,_ebega :=range _cabe ._abff {if _ebega ._bbag =="\u004c"{_cdbd =append (_cdbd ,_ebega );}else if _ebega ._bbag =="\u0054\u0061\u0062l\u0065"{_cbcc :=_dabg (_ebega );_cdbd =append (_cdbd ,_cbcc ...);};};_bgaf :=_gagf (_cdbd ,_ccfe ,_cfaf );
|
||
var _deef []*list ;for _ ,_dgcg :=range _bgaf {_beag :=_abda (_dgcg );_deef =append (_deef ,_beag ...);};return _deef ;};
|
||
|
||
// Elements returns the TextMarks in `ma`.
|
||
func (_gbeca *TextMarkArray )Elements ()[]TextMark {return _gbeca ._ffca };func (_bdfd paraList )lines ()[]*textLine {var _dccbb []*textLine ;for _ ,_fdad :=range _bdfd {_dccbb =append (_dccbb ,_fdad ._fdec ...);};return _dccbb ;};func (_eafdc paraList )eventNeighbours (_dbge []event )map[*textPara ][]int {_e .Slice (_dbge ,func (_cdgdb ,_gcgb int )bool {_ecdg ,_fdga :=_dbge [_cdgdb ],_dbge [_gcgb ];
|
||
_edfa ,_fgbb :=_ecdg ._cdbc ,_fdga ._cdbc ;if _edfa !=_fgbb {return _edfa < _fgbb ;};if _ecdg ._dddef !=_fdga ._dddef {return _ecdg ._dddef ;};return _cdgdb < _gcgb ;});_fgca :=make (map[int ]intSet );_fffd :=make (intSet );for _ ,_ecffc :=range _dbge {if _ecffc ._dddef {_fgca [_ecffc ._dcfcc ]=make (intSet );
|
||
for _cebgc :=range _fffd {if _cebgc !=_ecffc ._dcfcc {_fgca [_ecffc ._dcfcc ].add (_cebgc );_fgca [_cebgc ].add (_ecffc ._dcfcc );};};_fffd .add (_ecffc ._dcfcc );}else {_fffd .del (_ecffc ._dcfcc );};};_afea :=map[*textPara ][]int {};for _dceag ,_cbca :=range _fgca {_eafe :=_eafdc [_dceag ];
|
||
if len (_cbca )==0{_afea [_eafe ]=nil ;continue ;};_eacgb :=make ([]int ,len (_cbca ));_eefef :=0;for _bdbed :=range _cbca {_eacgb [_eefef ]=_bdbed ;_eefef ++;};_afea [_eafe ]=_eacgb ;};return _afea ;};func _aga (_dcf []Font ,_aeb string )bool {for _ ,_fcc :=range _dcf {if _fcc .FontName ==_aeb {return true ;
|
||
};};return false ;};type textState struct{_ffaf float64 ;_gdeg float64 ;_def float64 ;_bfa float64 ;_dcd float64 ;_gdf RenderMode ;_eaaf float64 ;_gbdg *_ba .PdfFont ;_ade _ba .PdfRectangle ;_abd int ;_bafdf int ;};func (_egabd *textPara )isAtom ()*textTable {_gbgcb :=_egabd ;
|
||
_daec :=_egabd ._fdgf ;_fcbce :=_egabd ._ecada ;if _daec .taken ()||_fcbce .taken (){return nil ;};_ebdcd :=_daec ._ecada ;if _ebdcd .taken ()||_ebdcd !=_fcbce ._fdgf {return nil ;};return _baff (_gbgcb ,_daec ,_fcbce ,_ebdcd );};func _bgcc (_dccfb ,_gfdbe int )uint64 {return uint64 (_dccfb )*0x1000000+uint64 (_gfdbe )};
|
||
func (_efaba *ruling )equals (_dfff *ruling )bool {return _efaba ._bbce ==_dfff ._bbce &&_gecbg (_efaba ._edga ,_dfff ._edga )&&_gecbg (_efaba ._fcec ,_dfff ._fcec )&&_gecbg (_efaba ._abeg ,_dfff ._abeg );};func (_bgad *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_gfdda :=make (map[int ]map[*textWord ]struct{},len (_bgad ._fcgd ));
|
||
for _fcgc :=range _bgad ._fcgd {_gfdda [_fcgc ]=make (map[*textWord ]struct{});};return _gfdda ;};func (_gefb *wordBag )depthBand (_cdbg ,_dccd float64 )[]int {if len (_gefb ._fcgd )==0{return nil ;};return _gefb .depthRange (_gefb .getDepthIdx (_cdbg ),_gefb .getDepthIdx (_dccd ));
|
||
};
|
||
|
||
// String returns a description of `k`.
|
||
func (_ccdb markKind )String ()string {_cbggg ,_dbbg :=_decf [_ccdb ];if !_dbbg {return _agc .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_ccdb );};return _cbggg ;};func (_gaa *imageExtractContext )extractXObjectImage (_aab *_add .PdfObjectName ,_bge _fc .GraphicsState ,_aea *_ba .PdfPageResources )error {_fac ,_ :=_aea .GetXObjectByName (*_aab );
|
||
if _fac ==nil {return nil ;};_fge ,_gcc :=_gaa ._ca [_fac ];if !_gcc {_cad ,_acg :=_aea .GetXObjectImageByName (*_aab );if _acg !=nil {return _acg ;};if _cad ==nil {return nil ;};_egc ,_acg :=_cad .ToImage ();if _acg !=nil {return _acg ;};var _fgf _ag .Image ;
|
||
if _cad .Mask !=nil {if _fgf ,_acg =_caac (_cad .Mask ,_gff .Opaque );_acg !=nil {_ga .Log .Debug ("\u0057\u0041\u0052\u004e\u003a \u0063\u006f\u0075\u006c\u0064 \u006eo\u0074\u0020\u0067\u0065\u0074\u0020\u0065\u0078\u0070\u006c\u0069\u0063\u0069\u0074\u0020\u0069\u006d\u0061\u0067e\u0020\u006d\u0061\u0073\u006b\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006d\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e");
|
||
};}else if _cad .SMask !=nil {_fgf ,_acg =_ggee (_cad .SMask ,_gff .Opaque );if _acg !=nil {_ga .Log .Debug ("W\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0067\u0065\u0074\u0020\u0073\u006f\u0066\u0074\u0020\u0069\u006da\u0067e\u0020\u006d\u0061\u0073k\u002e\u0020O\u0075\u0074\u0070\u0075\u0074\u0020\u006d\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063\u0074\u002e");
|
||
};};if _fgf !=nil {_bdb ,_bbb :=_egc .ToGoImage ();if _bbb !=nil {return _bbb ;};_bdb =_aebeec (_bdb ,_fgf );switch _cad .ColorSpace .String (){case "\u0044\u0065\u0076\u0069\u0063\u0065\u0047\u0072\u0061\u0079","\u0049n\u0064\u0065\u0078\u0065\u0064":_egc ,_bbb =_ba .ImageHandling .NewGrayImageFromGoImage (_bdb );
|
||
if _bbb !=nil {return _bbb ;};default:_egc ,_bbb =_ba .ImageHandling .NewImageFromGoImage (_bdb );if _bbb !=nil {return _bbb ;};};};_fge =&cachedImage {_fed :_egc ,_addb :_cad .ColorSpace };_gaa ._ca [_fac ]=_fge ;};_fcdf :=_fge ._fed ;_gdd :=_fge ._addb ;
|
||
_faa ,_ceb :=_gdd .ImageToRGB (*_fcdf );if _ceb !=nil {return _ceb ;};_ga .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_bge .CTM .String ());_ecf :=ImageMark {Image :&_faa ,Width :_bge .CTM .ScalingFactorX (),Height :_bge .CTM .ScalingFactorY (),Angle :_bge .CTM .Angle ()};
|
||
_ecf .X ,_ecf .Y =_bge .CTM .Translation ();_gaa ._dcc =append (_gaa ._dcc ,_ecf );_gaa ._fbd ++;return nil ;};func (_befd *subpath )makeRectRuling (_becff _gff .Color )(*ruling ,bool ){if _adbfb {_ga .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_befd );
|
||
};_cddd :=_befd ._aeee [:4];_ffbc :=make (map[int ]rulingKind ,len (_cddd ));for _beff ,_ggaac :=range _cddd {_bfged :=_befd ._aeee [(_beff +1)%4];_ffbc [_beff ]=_ddbf (_ggaac ,_bfged );if _adbfb {_agc .Printf ("\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066",_beff ,_ffbc [_beff ],_ggaac ,_bfged );
|
||
};};if _adbfb {_agc .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_ffbc );};var _bcgfc ,_eedd []int ;for _aceg ,_fgfed :=range _ffbc {switch _fgfed {case _aaad :_eedd =append (_eedd ,_aceg );case _cfae :_bcgfc =append (_bcgfc ,_aceg );
|
||
};};if _adbfb {_agc .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_eedd ),_eedd );_agc .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_bcgfc ),_bcgfc );
|
||
};_edgbd :=(len (_eedd )==2&&len (_bcgfc )==2)||(len (_eedd )==2&&len (_bcgfc )==0&&_fdaf (_cddd [_eedd [0]],_cddd [_eedd [1]]))||(len (_bcgfc )==2&&len (_eedd )==0&&_adbb (_cddd [_bcgfc [0]],_cddd [_bcgfc [1]]));if _adbfb {_agc .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_eedd ),len (_bcgfc ),_edgbd );
|
||
};if !_edgbd {if _adbfb {_ga .Log .Error ("\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v",_befd );_agc .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_eedd ),len (_bcgfc ),_edgbd );
|
||
};return &ruling {},false ;};if len (_bcgfc )==0{for _bgbf ,_gfdg :=range _ffbc {if _gfdg !=_aaad {_bcgfc =append (_bcgfc ,_bgbf );};};};if len (_eedd )==0{for _cbfa ,_agce :=range _ffbc {if _agce !=_cfae {_eedd =append (_eedd ,_cbfa );};};};if _adbfb {_ga .Log .Info ("\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_eedd ),len (_bcgfc ),len (_cddd ),_eedd ,_bcgfc ,_cddd );
|
||
};var _cebd ,_accd ,_bcfba ,_fdff _agf .Point ;if _cddd [_eedd [0]].Y > _cddd [_eedd [1]].Y {_bcfba ,_fdff =_cddd [_eedd [0]],_cddd [_eedd [1]];}else {_bcfba ,_fdff =_cddd [_eedd [1]],_cddd [_eedd [0]];};if _cddd [_bcgfc [0]].X > _cddd [_bcgfc [1]].X {_cebd ,_accd =_cddd [_bcgfc [0]],_cddd [_bcgfc [1]];
|
||
}else {_cebd ,_accd =_cddd [_bcgfc [1]],_cddd [_bcgfc [0]];};_dead :=_ba .PdfRectangle {Llx :_cebd .X ,Urx :_accd .X ,Lly :_fdff .Y ,Ury :_bcfba .Y };if _dead .Llx > _dead .Urx {_dead .Llx ,_dead .Urx =_dead .Urx ,_dead .Llx ;};if _dead .Lly > _dead .Ury {_dead .Lly ,_dead .Ury =_dead .Ury ,_dead .Lly ;
|
||
};_abdd :=rectRuling {PdfRectangle :_dead ,_ecaa :_aaeed (_dead ),Color :_becff };if _abdd ._ecaa ==_fbcc {if _adbfb {_ga .Log .Error ("\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c");
|
||
};return nil ,false ;};_dfbb ,_ccfd :=_abdd .asRuling ();if !_ccfd {if _adbfb {_ga .Log .Error ("\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg");};return nil ,false ;};if _adgbf {_agc .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_dfbb .String ());
|
||
};return _dfbb ,true ;};type event struct{_cdbc float64 ;_dddef bool ;_dcfcc int ;};type lineRuling struct{_cebc rulingKind ;_beda markKind ;_gff .Color ;_eded ,_badee _agf .Point ;};
|
||
|
||
// ToText returns the page text as a single string.
|
||
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
|
||
// Text() instead.
|
||
func (_ebeb PageText )ToText ()string {return _ebeb .Text ()};func _bdefa (_bfbgb *_ba .Image ,_dedeb _gff .Color )_ag .Image {_acfba ,_fecfg :=int (_bfbgb .Width ),int (_bfbgb .Height );_cgcc :=_ag .NewRGBA (_ag .Rect (0,0,_acfba ,_fecfg ));for _egadc :=0;
|
||
_egadc < _fecfg ;_egadc ++{for _acgf :=0;_acgf < _acfba ;_acgf ++{_cfab ,_gfadg :=_bfbgb .ColorAt (_acgf ,_egadc );if _gfadg !=nil {_ga .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063o\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0072\u0065\u0074\u0072\u0069\u0065v\u0065 \u0069\u006d\u0061\u0067\u0065\u0020m\u0061\u0073\u006b\u0020\u0076\u0061\u006cu\u0065\u0020\u0061\u0074\u0020\u0028\u0025\u0064\u002c\u0020\u0025\u0064\u0029\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006da\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063t\u002e",_acgf ,_egadc );
|
||
continue ;};_ffcb ,_cegg ,_efcff ,_ :=_cfab .RGBA ();var _aeegg _gff .Color ;if _ffcb +_cegg +_efcff ==0{_aeegg =_gff .Transparent ;}else {_aeegg =_dedeb ;};_cgcc .Set (_acgf ,_egadc ,_aeegg );};};return _cgcc ;};func (_face paraList )findTableGrid (_gfcc gridTiling )(*textTable ,map[*textPara ]struct{}){_bdcfab :=len (_gfcc ._fece );
|
||
_cacef :=len (_gfcc ._dfca );_adbe :=textTable {_edeg :true ,_eacg :_bdcfab ,_cgae :_cacef ,_egfea :make (map[uint64 ]*textPara ,_bdcfab *_cacef ),_aaaga :make (map[uint64 ]compositeCell ,_bdcfab *_cacef )};_adbe .PdfRectangle =_gfcc .PdfRectangle ;_efcbg :=make (map[*textPara ]struct{});
|
||
_ggda :=int ((1.0-_beec )*float64 (_bdcfab *_cacef ));_dcgd :=0;if _dfge {_ga .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_bdcfab ,_cacef );};for _dcab ,_adcga :=range _gfcc ._dfca {_dagg ,_egdb :=_gfcc ._gdcg [_adcga ];
|
||
if !_egdb {continue ;};for _bcec ,_baffd :=range _gfcc ._fece {_cfcc ,_gddae :=_dagg [_baffd ];if !_gddae {continue ;};_bbdaa :=_face .inTile (_cfcc );if len (_bbdaa )==0{_dcgd ++;if _dcgd > _ggda {if _dfge {_ga .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_dcgd );
|
||
};return nil ,nil ;};}else {_adbe .putComposite (_bcec ,_dcab ,_bbdaa ,_cfcc .PdfRectangle );for _ ,_ceef :=range _bbdaa {_efcbg [_ceef ]=struct{}{};};};};};_gaac :=0;for _gfeb :=0;_gfeb < _bdcfab ;_gfeb ++{_cdbga :=_adbe .get (_gfeb ,0);if _cdbga ==nil ||!_cdbga ._cddef {_gaac ++;
|
||
};};if _gaac ==0{if _dfge {_ga .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};return nil ,nil ;};_eggf :=_adbe .reduceTiling (_gfcc ,_gaea );_eggf =_eggf .subdivide ();return _eggf ,_efcbg ;};const (_aba ="\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";
|
||
_cgc ="\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064";
|
||
_aef ="\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";);
|
||
|
||
|
||
// GetContentStreamOps returns the contentStreamOps field of `pt`.
|
||
func (_gbff *PageText )GetContentStreamOps ()*_fc .ContentStreamOperations {return _gbff ._cde };func _adgc (_eadab *textLine )float64 {return _eadab ._cdcg [0].Llx };func (_adbc *wordBag )pullWord (_gbad *textWord ,_fcfde int ,_fefag map[int ]map[*textWord ]struct{}){_adbc .PdfRectangle =_bbbafc (_adbc .PdfRectangle ,_gbad .PdfRectangle );
|
||
if _gbad ._eabbf > _adbc ._cdea {_adbc ._cdea =_gbad ._eabbf ;};_adbc ._fcgd [_fcfde ]=append (_adbc ._fcgd [_fcfde ],_gbad );_fefag [_fcfde ][_gbad ]=struct{}{};};const (_cebg =false ;_bfgae =false ;_eeeb =false ;_cgbd =false ;_fdbg =false ;_cfca =false ;
|
||
_eeab =false ;_afga =false ;_cbbb =false ;_abfa =_cbbb &&true ;_gaba =_abfa &&false ;_bdgb =_cbbb &&true ;_efda =false ;_baace =_efda &&false ;_ggcd =_efda &&true ;_adgbf =false ;_ebce =_adgbf &&false ;_eda =_adgbf &&false ;_dfge =_adgbf &&true ;_adbfb =_adgbf &&false ;
|
||
_ggaeg =_adgbf &&false ;);func (_eaeea *textTable )computeBbox ()_ba .PdfRectangle {var _edcbe _ba .PdfRectangle ;_cfeg :=false ;for _gggbe :=0;_gggbe < _eaeea ._cgae ;_gggbe ++{for _gad :=0;_gad < _eaeea ._eacg ;_gad ++{_gfffe :=_eaeea .get (_gad ,_gggbe );
|
||
if _gfffe ==nil {continue ;};if !_cfeg {_edcbe =_gfffe .PdfRectangle ;_cfeg =true ;}else {_edcbe =_bbbafc (_edcbe ,_gfffe .PdfRectangle );};};};return _edcbe ;};
|
||
|
||
// Text gets the extracted text contained in `l`.
|
||
func (_caec *list )Text ()string {_ggab :=&_f .Builder {};_gcbc :="";_dbbbd (_caec ,_ggab ,&_gcbc );return _ggab .String ();};func (_bbcf *shapesState )establishSubpath ()*subpath {_agdg ,_ddec :=_bbcf .lastpointEstablished ();if !_ddec {_bbcf ._gfce =append (_bbcf ._gfce ,_ddbd (_agdg ));
|
||
};if len (_bbcf ._gfce )==0{return nil ;};_bbcf ._gbdgg =false ;return _bbcf ._gfce [len (_bbcf ._gfce )-1];};func (_aadac rulingList )primaries ()[]float64 {_dgac :=make (map[float64 ]struct{},len (_aadac ));for _ ,_bcde :=range _aadac {_dgac [_bcde ._edga ]=struct{}{};
|
||
};_dfcf :=make ([]float64 ,len (_dgac ));_beffd :=0;for _fcab :=range _dgac {_dfcf [_beffd ]=_fcab ;_beffd ++;};_e .Float64s (_dfcf );return _dfcf ;};func _aedcc (_bbdfg int ,_gcfcb func (int ,int )bool )[]int {_ccce :=make ([]int ,_bbdfg );for _fadad :=range _ccce {_ccce [_fadad ]=_fadad ;
|
||
};_e .Slice (_ccce ,func (_deccc ,_dccc int )bool {return _gcfcb (_ccce [_deccc ],_ccce [_dccc ])});return _ccce ;};func (_faac *shapesState )drawRectangle (_cgbce ,_edb ,_cdfc ,_bfed float64 ){if _fdbg {_bfec :=_faac .devicePoint (_cgbce ,_edb );_gacc :=_faac .devicePoint (_cgbce +_cdfc ,_edb +_bfed );
|
||
_fcbg :=_ba .PdfRectangle {Llx :_bfec .X ,Lly :_bfec .Y ,Urx :_gacc .X ,Ury :_gacc .Y };_ga .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_fcbg );};_faac .newSubPath ();_faac .moveTo (_cgbce ,_edb );
|
||
_faac .lineTo (_cgbce +_cdfc ,_edb );_faac .lineTo (_cgbce +_cdfc ,_edb +_bfed );_faac .lineTo (_cgbce ,_edb +_bfed );_faac .closePath ();};func (_fgbfc *shapesState )clearPath (){_fgbfc ._gfce =nil ;_fgbfc ._gbdgg =false ;if _fdbg {_ga .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_fgbfc );
|
||
};};func (_ddaed rulingList )secMinMax ()(float64 ,float64 ){_gffba ,_fgcg :=_ddaed [0]._fcec ,_ddaed [0]._abeg ;for _ ,_gdaaf :=range _ddaed [1:]{if _gdaaf ._fcec < _gffba {_gffba =_gdaaf ._fcec ;};if _gdaaf ._abeg > _fgcg {_fgcg =_gdaaf ._abeg ;};};return _gffba ,_fgcg ;
|
||
};func _fdgb (_decgc []*wordBag )[]*wordBag {if len (_decgc )<=1{return _decgc ;};if _cbbb {_ga .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");};_e .Slice (_decgc ,func (_dgeb ,_daag int )bool {_dfdd ,_decgg :=_decgc [_dgeb ],_decgc [_daag ];
|
||
_agdb :=_dfdd .Width ()*_dfdd .Height ();_cedg :=_decgg .Width ()*_decgg .Height ();if _agdb !=_cedg {return _agdb > _cedg ;};if _dfdd .Height ()!=_decgg .Height (){return _dfdd .Height ()> _decgg .Height ();};return _dgeb < _daag ;});var _fbdd []*wordBag ;
|
||
_dac :=make (intSet );for _dabba :=0;_dabba < len (_decgc );_dabba ++{if _dac .has (_dabba ){continue ;};_beeb :=_decgc [_dabba ];for _gdcfb :=_dabba +1;_gdcfb < len (_decgc );_gdcfb ++{if _dac .has (_dabba ){continue ;};_gcdc :=_decgc [_gdcfb ];_dadc :=_beeb .PdfRectangle ;
|
||
_dadc .Llx -=_beeb ._cdea ;if _gcdb (_dadc ,_gcdc .PdfRectangle ){_beeb .absorb (_gcdc );_dac .add (_gdcfb );};};_fbdd =append (_fbdd ,_beeb );};if len (_decgc )!=len (_fbdd )+len (_dac ){_ga .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_decgc ),len (_fbdd ),len (_dac ));
|
||
};return _fbdd ;};func (_eade paraList )computeEBBoxes (){if _cebg {_ga .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a");};for _ ,_aaegg :=range _eade {_aaegg ._dgabg =_aaegg .PdfRectangle ;};_ebed :=_eade .yNeighbours (0);
|
||
for _dgab ,_bbgfb :=range _eade {_gfcd :=_bbgfb ._dgabg ;_ffff ,_gedfd :=-1.0e9,+1.0e9;for _ ,_fgfb :=range _ebed [_bbgfb ]{_babe :=_eade [_fgfb ]._dgabg ;if _babe .Urx < _gfcd .Llx {_ffff =_gf .Max (_ffff ,_babe .Urx );}else if _gfcd .Urx < _babe .Llx {_gedfd =_gf .Min (_gedfd ,_babe .Llx );
|
||
};};for _ffdd ,_bfgb :=range _eade {_egbf :=_bfgb ._dgabg ;if _dgab ==_ffdd ||_egbf .Ury > _gfcd .Lly {continue ;};if _ffff <=_egbf .Llx &&_egbf .Llx < _gfcd .Llx {_gfcd .Llx =_egbf .Llx ;}else if _egbf .Urx <=_gedfd &&_gfcd .Urx < _egbf .Urx {_gfcd .Urx =_egbf .Urx ;
|
||
};};if _cebg {_agc .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_dgab ,_bbgfb ._dgabg ,_gfcd ,_adgd (_bbgfb .text (),50));};_bbgfb ._dgabg =_gfcd ;};if _afgf {for _ ,_cfad :=range _eade {_cfad .PdfRectangle =_cfad ._dgabg ;
|
||
};};};func (_aadc rulingList )blocks (_abeb ,_bbge *ruling )bool {if _abeb ._fcec > _bbge ._abeg ||_bbge ._fcec > _abeb ._abeg {return false ;};_edced :=_gf .Max (_abeb ._fcec ,_bbge ._fcec );_gaeaf :=_gf .Min (_abeb ._abeg ,_bbge ._abeg );if _abeb ._edga > _bbge ._edga {_abeb ,_bbge =_bbge ,_abeb ;
|
||
};for _ ,_edbd :=range _aadc {if _abeb ._edga <=_edbd ._edga +_gbb &&_edbd ._edga <=_bbge ._edga +_gbb &&_edbd ._fcec <=_gaeaf &&_edced <=_edbd ._abeg {return true ;};};return false ;};func (_beeg rulingList )mergePrimary ()float64 {_edaae :=_beeg [0]._edga ;
|
||
for _ ,_beecf :=range _beeg [1:]{_edaae +=_beecf ._edga ;};return _edaae /float64 (len (_beeg ));};var _geef string ="\u0028\u003f\u0069\u0029\u005e\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028?\u003a\u0044\u007cM\u0029\u007c\u0044\u003f\u0043{\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028\u003f\u003a\u004c\u007c\u0043\u0029\u007cL\u003f\u0058\u007b\u0030\u002c\u0033}\u0029\u0028\u0049\u0028\u003f\u003a\u0056\u007c\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u005c\u0029\u007c\u005c\u002e\u0029\u007c\u005e\u005c\u0028\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028\u003f\u003aD\u007cM\u0029\u007c\u0044\u003f\u0043\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028?\u003a\u004c\u007c\u0043\u0029\u007c\u004c?\u0058\u007b0\u002c\u0033\u007d\u0029(\u0049\u0028\u003f\u003a\u0056|\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u005c\u0029";
|
||
type bounded interface{bbox ()_ba .PdfRectangle };func _eegf (_efff _ba .PdfRectangle ,_adcc bounded )float64 {return _efff .Ury -_adcc .bbox ().Lly };func (_dddg gridTile )numBorders ()int {_eddd :=0;if _dddg ._ecaf {_eddd ++;};if _dddg ._bfab {_eddd ++;
|
||
};if _dddg ._bgbfd {_eddd ++;};if _dddg ._dgccd {_eddd ++;};return _eddd ;};func _ddbf (_eadac ,_deaa _agf .Point )rulingKind {_efaea :=_gf .Abs (_eadac .X -_deaa .X );_aafa :=_gf .Abs (_eadac .Y -_deaa .Y );return _cbggc (_efaea ,_aafa ,_beed );};func (_ecaag gridTiling )complete ()bool {for _ ,_gddcc :=range _ecaag ._gdcg {for _ ,_ccggf :=range _gddcc {if !_ccggf .complete (){return false ;
|
||
};};};return true ;};func _ffeg (_defd []TextMark ,_deea *int ,_fccd string )[]TextMark {_fggg :=_gbdc ;_fggg .Text =_fccd ;return _aadbb (_defd ,_deea ,_fggg );};func (_cagg *textObject )setTextRise (_bcb float64 ){if _cagg ==nil {return ;};_cagg ._efed ._eaaf =_bcb ;
|
||
};func _caeb (_geeb ,_efcc float64 )bool {return _geeb /_gf .Max (_dccf ,_efcc )< _beed };type lists []*list ;func (_cdab *wordBag )applyRemovals (_ggef map[int ]map[*textWord ]struct{}){for _abcb ,_egba :=range _ggef {if len (_egba )==0{continue ;};_gffgg :=_cdab ._fcgd [_abcb ];
|
||
_gcg :=len (_gffgg )-len (_egba );if _gcg ==0{delete (_cdab ._fcgd ,_abcb );continue ;};_efab :=make ([]*textWord ,_gcg );_gag :=0;for _ ,_dab :=range _gffgg {if _ ,_aecf :=_egba [_dab ];!_aecf {_efab [_gag ]=_dab ;_gag ++;};};_cdab ._fcgd [_abcb ]=_efab ;
|
||
};};func (_egecg paraList )findTables (_cdcd []gridTiling )[]*textTable {_egecg .addNeighbours ();_e .Slice (_egecg ,func (_fbec ,_dafd int )bool {return _bafa (_egecg [_fbec ],_egecg [_dafd ])< 0});var _gabca []*textTable ;if _bfcf {_cgge :=_egecg .findGridTables (_cdcd );
|
||
_gabca =append (_gabca ,_cgge ...);};if _cbdec {_gfaa :=_egecg .findTextTables ();_gabca =append (_gabca ,_gfaa ...);};return _gabca ;};func (_ebe *textObject )setFont (_egf string ,_cbaa float64 )error {if _ebe ==nil {return nil ;};_ebe ._efed ._dcd =_cbaa ;
|
||
_fefd ,_dcbe :=_ebe .getFont (_egf );if _dcbe !=nil {return _dcbe ;};_ebe ._efed ._gbdg =_fefd ;return nil ;};func (_dcec *textObject )moveTextSetLeading (_faab ,_cae float64 ){_dcec ._efed ._bfa =-_cae ;_dcec .moveLP (_faab ,_cae );};func _dbfg (_cade []*textLine ,_ecff map[float64 ][]*textLine )[]*list {_gbecc :=_bdfa (_ecff );
|
||
_fgbg :=[]*list {};if len (_gbecc )==0{return _fgbg ;};_gaefg :=_gbecc [0];_fbbee :=1;_gegf :=_ecff [_gaefg ];for _bgdc ,_agge :=range _gegf {var _edgf float64 ;_gfge :=[]*list {};_ebab :=_agge ._bbfg ;_aedgd :=-1.0;if _bgdc < len (_gegf )-1{_aedgd =_gegf [_bgdc +1]._bbfg ;
|
||
};if _fbbee < len (_gbecc ){_gfge =_eeag (_cade ,_ecff ,_gbecc ,_fbbee ,_ebab ,_aedgd );};_edgf =_aedgd ;if len (_gfge )> 0{_eaad :=_gfge [0];if len (_eaad ._aebd )> 0{_edgf =_eaad ._aebd [0]._bbfg ;};};_bded :=[]*textLine {_agge };_ecfde :=_ggdbe (_agge ,_cade ,_gbecc ,_ebab ,_edgf );
|
||
_bded =append (_bded ,_ecfde ...);_efcd :=_dgda (_bded ,"\u0062\u0075\u006c\u006c\u0065\u0074",_gfge );_efcd ._ddef =_efgc (_bded ,"");_fgbg =append (_fgbg ,_efcd );};return _fgbg ;};type paraList []*textPara ;func (_ceed *textTable )getComposite (_dacd ,_bdge int )(paraList ,_ba .PdfRectangle ){_dcde ,_bdae :=_ceed ._aaaga [_bgcc (_dacd ,_bdge )];
|
||
if _efda {_agc .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a",_dacd ,_bdge ,_dcde .String ());};if !_bdae {return nil ,_ba .PdfRectangle {};
|
||
};return _dcde .parasBBox ();};func (_fdcc rulingList )connections (_bcdd map[int ]intSet ,_efba int )intSet {_gbdac :=make (intSet );_gffag :=make (intSet );var _aeaa func (int );_aeaa =func (_fbgcb int ){if !_gffag .has (_fbgcb ){_gffag .add (_fbgcb );
|
||
for _geab :=range _fdcc {if _bcdd [_geab ].has (_fbgcb ){_gbdac .add (_geab );};};for _ggfe :=range _fdcc {if _gbdac .has (_ggfe ){_aeaa (_ggfe );};};};};_aeaa (_efba );return _gbdac ;};func _adad (_bbe []*textWord ,_cbc float64 ,_ecad ,_effa rulingList )*wordBag {_dbbb :=_bfcbd (_bbe [0],_cbc ,_ecad ,_effa );
|
||
for _ ,_fcce :=range _bbe [1:]{_efg :=_ceee (_fcce ._dfagd );_dbbb ._fcgd [_efg ]=append (_dbbb ._fcgd [_efg ],_fcce );_dbbb .PdfRectangle =_bbbafc (_dbbb .PdfRectangle ,_fcce .PdfRectangle );};_dbbb .sort ();return _dbbb ;};const _ebaf =1.0/1000.0;func (_dgec *textPara )getListLines ()[]*textLine {var _cdcf []*textLine ;
|
||
_cedb :=_ecaea (_dgec ._fdec );for _ ,_bfcd :=range _dgec ._fdec {_bfdgc :=_bfcd ._cdcg [0]._eaae [0];if _ecfd (_bfdgc ){_cdcf =append (_cdcf ,_bfcd );};};_cdcf =append (_cdcf ,_cedb ...);return _cdcf ;};func _abda (_debg *list )[]*list {var _bdfc []*list ;
|
||
for _ ,_dceb :=range _debg ._abcc {switch _dceb ._cdde {case "\u004c\u0049":_debd :=_acdc (_dceb );_ggfb :=_abda (_dceb );_agcaf :=_dgda (_debd ,"\u0062\u0075\u006c\u006c\u0065\u0074",_ggfb );_bfbf :=_efgc (_debd ,"");_agcaf ._ddef =_bfbf ;_bdfc =append (_bdfc ,_agcaf );
|
||
case "\u004c\u0042\u006fd\u0079":return _abda (_dceb );case "\u004c":_dggc :=_abda (_dceb );_bdfc =append (_bdfc ,_dggc ...);return _bdfc ;};};return _bdfc ;};var _de =false ;func (_gebc *textObject )setHorizScaling (_gfd float64 ){if _gebc ==nil {return ;
|
||
};_gebc ._efed ._def =_gfd ;};func (_dadbc *wordBag )absorb (_bbfc *wordBag ){_ffe :=_bbfc .makeRemovals ();for _dbbd ,_bbde :=range _bbfc ._fcgd {for _ ,_gaef :=range _bbde {_dadbc .pullWord (_gaef ,_dbbd ,_ffe );};};_bbfc .applyRemovals (_ffe );};func (_ccgfc *textPara )taken ()bool {return _ccgfc ==nil ||_ccgfc ._gecb };
|
||
func _fgfag (_addcb ,_agef _ba .PdfRectangle )bool {return _agef .Llx <=_addcb .Urx &&_addcb .Llx <=_agef .Urx ;};func (_edce *shapesState )moveTo (_cbg ,_fff float64 ){_edce ._gbdgg =true ;_edce ._gcaaf =_edce .devicePoint (_cbg ,_fff );if _fdbg {_ga .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_cbg ,_fff ,_edce ._gcaaf );
|
||
};};func _cgfd (_bffg *wordBag ,_eeac *textWord ,_efef float64 )bool {return _eeac .Llx < _bffg .Urx +_efef &&_bffg .Llx -_efef < _eeac .Urx ;};func (_gggd rulingList )removeDuplicates ()rulingList {if len (_gggd )==0{return nil ;};_gggd .sort ();_fade :=rulingList {_gggd [0]};
|
||
for _ ,_gccfc :=range _gggd [1:]{if _gccfc .equals (_fade [len (_fade )-1]){continue ;};_fade =append (_fade ,_gccfc );};return _fade ;};func _dcdb (_gacd []TextMark ,_ecbfg *TextTable )[]TextMark {var _bedda []TextMark ;for _ ,_bcbcb :=range _gacd {_bcbcb ._efeg =true ;
|
||
_bcbcb ._cfaa =_ecbfg ;_bedda =append (_bedda ,_bcbcb );};return _bedda ;};func (_ccddf paraList )log (_dcba string ){if !_afga {return ;};_ga .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_dcba ,len (_ccddf ));
|
||
for _cccc ,_dfab :=range _ccddf {if _dfab ==nil {continue ;};_bcgbc :=_dfab .text ();_eefdf :="\u0020\u0020";if _dfab ._fbbea !=nil {_eefdf =_agc .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_dfab ._fbbea ._eacg ,_dfab ._fbbea ._cgae );};_agc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_cccc ,_dfab .PdfRectangle ,_eefdf ,_adgd (_bcgbc ,50));
|
||
};};func (_aebb *textTable )getRight ()paraList {_fgea :=make (paraList ,_aebb ._cgae );for _ccdbe :=0;_ccdbe < _aebb ._cgae ;_ccdbe ++{_aaedaa :=_aebb .get (_aebb ._eacg -1,_ccdbe )._fdgf ;if _aaedaa .taken (){return nil ;};_fgea [_ccdbe ]=_aaedaa ;};
|
||
for _eec :=0;_eec < _aebb ._cgae -1;_eec ++{if _fgea [_eec ]._ecada !=_fgea [_eec +1]{return nil ;};};return _fgea ;};func (_acab lineRuling )yMean ()float64 {return 0.5*(_acab ._eded .Y +_acab ._badee .Y )};func _abag (_ebc ,_bdabb bounded )float64 {return _dag (_ebc )-_dag (_bdabb )};
|
||
func (_ced *textObject )renderText (_gbga _add .PdfObject ,_gbfc []byte ,_gceg int )error {if _ced ._dgcf {_ga .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");
|
||
return nil ;};_eaac :=_ced .getCurrentFont ();_decg :=_eaac .BytesToCharcodes (_gbfc );_fdb ,_cbe ,_egcd :=_eaac .CharcodesToStrings (_decg );if _egcd > 0{_ga .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_cbe ,_egcd );
|
||
};_ced ._efed ._abd +=_cbe ;_ced ._efed ._bafdf +=_egcd ;_bae :=_ced ._efed ;_dggb :=_bae ._dcd ;_ggf :=_bae ._def /100.0;_baaf :=_ebaf ;if _eaac .Subtype ()=="\u0054\u0079\u0070e\u0033"{_baaf =1;};_bbabg ,_bbg :=_eaac .GetRuneMetrics (' ');if !_bbg {_bbabg ,_bbg =_eaac .GetCharMetrics (32);
|
||
};if !_bbg {_bbabg ,_ =_ba .DefaultFont ().GetRuneMetrics (' ');};_gab :=_bbabg .Wx *_baaf ;_ga .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_gab ,_fdb ,_eaac ,_dggb );
|
||
_cdfb :=_agf .NewMatrix (_dggb *_ggf ,0,0,_dggb ,0,_bae ._eaaf );if _cfca {_ga .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_decg ),_decg ,_fdb );
|
||
};_ga .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_decg ),_decg ,len (_fdb ));_fab :=_ced .getFillColor ();
|
||
_cgf :=_ced .getStrokeColor ();for _aaf ,_cda :=range _fdb {_deg :=[]rune (_cda );if len (_deg )==1&&_deg [0]=='\x00'{continue ;};_bafdg :=_decg [_aaf ];_gbec :=_ced ._bace .CTM .Mult (_ced ._eefe ).Mult (_cdfb );_gcaa :=0.0;if len (_deg )==1&&_deg [0]==32{_gcaa =_bae ._gdeg ;
|
||
};_aff ,_dccb :=_eaac .GetCharMetrics (_bafdg );if !_dccb {_ga .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_bafdg ,_deg ,_deg ,_eaac );
|
||
return _agc .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_eaac .String (),_bafdg );};_cgdd :=_agf .Point {X :_aff .Wx *_baaf ,Y :_aff .Wy *_baaf };
|
||
_dbf :=_agf .Point {X :(_cgdd .X *_dggb +_gcaa )*_ggf };_egeb :=_agf .Point {X :(_cgdd .X *_dggb +_bae ._ffaf +_gcaa )*_ggf };if _cfca {_ga .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_dggb ,_bae ._ffaf ,_bae ._gdeg ,_ggf );
|
||
_ga .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_cgdd ,_dbf ,_egeb );};_fcdg :=_gfdd (_dbf );_aceb :=_gfdd (_egeb );_egec :=_ced ._bace .CTM .Mult (_ced ._eefe ).Mult (_fcdg );
|
||
if _cgbd {_ga .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_ced ._bace .CTM ,_ced ._eefe ,_aceb ,_eafa (_ced ._bace .CTM .Mult (_ced ._eefe ).Mult (_aceb )),_fcdg ,_egec ,_eafa (_egec ));
|
||
};_dadg ,_egbg :=_ced .newTextMark (_cg .ExpandLigatures (_deg ),_gbec ,_eafa (_egec ),_gf .Abs (_gab *_gbec .ScalingFactorX ()),_eaac ,_ced ._efed ._ffaf ,_fab ,_cgf ,_gbga ,_fdb ,_aaf ,_gceg );if !_egbg {_ga .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067");
|
||
continue ;};if _eaac ==nil {_ga .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _eaac .Encoder ()==nil {_ga .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_eaac );
|
||
}else {if _ecfc ,_bgd :=_eaac .Encoder ().CharcodeToRune (_bafdg );_bgd {_dadg ._gccf =string (_ecfc );};};_ga .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_aaf ,_bafdg ,_dadg ,_gbec );
|
||
_ced ._cfde =append (_ced ._cfde ,&_dadg );_ced ._eefe .Concat (_aceb );};return nil ;};func (_fgbgd paraList )topoOrder ()[]int {if _afga {_ga .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");};_ecadg :=len (_fgbgd );_ddde :=make ([]bool ,_ecadg );
|
||
_gddada :=make ([]int ,0,_ecadg );_bcgbg :=_fgbgd .llyOrdering ();var _cbce func (_dfegg int );_cbce =func (_bbdac int ){_ddde [_bbdac ]=true ;for _fbbae :=0;_fbbae < _ecadg ;_fbbae ++{if !_ddde [_fbbae ]{if _fgbgd .readBefore (_bcgbg ,_bbdac ,_fbbae ){_cbce (_fbbae );
|
||
};};};_gddada =append (_gddada ,_bbdac );};for _aecfa :=0;_aecfa < _ecadg ;_aecfa ++{if !_ddde [_aecfa ]{_cbce (_aecfa );};};return _bdbc (_gddada );};func (_gef *PageText )getParagraphs ()paraList {var _egd rulingList ;if _gbge {_aeeb :=_agde (_gef ._efea );
|
||
_egd =append (_egd ,_aeeb ...);};if _gaedd {_bfcb :=_cgdde (_gef ._ebafc );_egd =append (_egd ,_bfcb ...);};_egd ,_gdga :=_egd .toTilings ();var _bfaf paraList ;_bcga :=len (_gef ._aebf );for _bee :=0;_bee < 360&&_bcga > 0;_bee +=90{_debac :=make ([]*textMark ,0,len (_gef ._aebf )-_bcga );
|
||
for _ ,_dggg :=range _gef ._aebf {if _dggg ._gfbg ==_bee {_debac =append (_debac ,_dggg );};};if len (_debac )> 0{_bedd :=_cabcg (_debac ,_gef ._bbdc ,_egd ,_gdga ,_gef ._eaag ._caab );_bfaf =append (_bfaf ,_bedd ...);_bcga -=len (_debac );};};return _bfaf ;
|
||
};type textLine struct{_ba .PdfRectangle ;_bbfg float64 ;_cdcg []*textWord ;_fdfb float64 ;};
|
||
|
||
// ImageMark represents an image drawn on a page and its position in device coordinates.
|
||
// All coordinates are in device coordinates.
|
||
type ImageMark struct{Image *_ba .Image ;
|
||
|
||
// Dimensions of the image as displayed in the PDF.
|
||
Width float64 ;Height float64 ;
|
||
|
||
// Position of the image in PDF coordinates (lower left corner).
|
||
X float64 ;Y float64 ;
|
||
|
||
// Angle in degrees, if rotated.
|
||
Angle float64 ;};func (_begf *wordBag )highestWord (_bade int ,_faafd ,_bcea float64 )*textWord {for _ ,_ebda :=range _begf ._fcgd [_bade ]{if _faafd <=_ebda ._dfagd &&_ebda ._dfagd <=_bcea {return _ebda ;};};return nil ;};
|
||
|
||
// Extractor stores and offers functionality for extracting content from PDF pages.
|
||
type Extractor struct{_ac string ;_gbe *_ba .PdfPageResources ;_cb _ba .PdfRectangle ;_eb *_ba .PdfRectangle ;_bd map[string ]fontEntry ;_baa map[string ]textResult ;_gfe map[string ]textResult ;_ae int64 ;_edc int ;_cf *Options ;_ab *_add .PdfObject ;
|
||
_dd _add .PdfObject ;_bg []*_ba .PdfAnnotation ;};func _beaa (_ecee ,_ceabd float64 )string {_bccef :=!_dbeae (_ecee -_ceabd );if _bccef {return "\u000a";};return "\u0020";};func _dbbbd (_dddcf *list ,_ccfc *_f .Builder ,_gfea *string ){_fgfcc :=_fbbd (_dddcf ,_gfea );
|
||
_ccfc .WriteString (_fgfcc );for _ ,_bcfb :=range _dddcf ._abcc {_bffga :=*_gfea +"\u0020\u0020\u0020";_dbbbd (_bcfb ,_ccfc ,&_bffga );};};func (_ffccd *textTable )log (_fadd string ){if !_efda {return ;};_ga .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_fadd ,_ffccd ._eacg ,_ffccd ._cgae ,_ffccd ._edeg ,_ffccd .PdfRectangle );
|
||
for _aabca :=0;_aabca < _ffccd ._cgae ;_aabca ++{for _ebaff :=0;_ebaff < _ffccd ._eacg ;_ebaff ++{_gddce :=_ffccd .get (_ebaff ,_aabca );if _gddce ==nil {continue ;};_agc .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_ebaff ,_aabca ,_gddce .PdfRectangle ,_adgd (_gddce .text (),50),_a .RuneCountInString (_gddce .text ()));
|
||
};};};func _ggad (_afecf map[int ]intSet )[]int {_cadc :=make ([]int ,0,len (_afecf ));for _gfdeg :=range _afecf {_cadc =append (_cadc ,_gfdeg );};_e .Ints (_cadc );return _cadc ;};func (_bgce *textObject )moveText (_bfbd ,_facc float64 ){_bgce .moveLP (_bfbd ,_facc )};
|
||
func _adcdc (_afcf []*textMark ,_fdcda _ba .PdfRectangle )*textWord {_bbgea :=_afcf [0].PdfRectangle ;_bgcde :=_afcf [0]._gccfd ;for _ ,_eaadc :=range _afcf [1:]{_bbgea =_bbbafc (_bbgea ,_eaadc .PdfRectangle );if _eaadc ._gccfd > _bgcde {_bgcde =_eaadc ._gccfd ;
|
||
};};return &textWord {PdfRectangle :_bbgea ,_abcee :_afcf ,_dfagd :_fdcda .Ury -_bbgea .Lly ,_eabbf :_bgcde };};func _bcfa (_aded func (*wordBag ,*textWord ,float64 )bool ,_bdbe float64 )func (*wordBag ,*textWord )bool {return func (_edgea *wordBag ,_afagd *textWord )bool {return _aded (_edgea ,_afagd ,_bdbe )};
|
||
};func _cgdde (_dgcfgg []pathSection )rulingList {_beebf (_dgcfgg );if _adgbf {_ga .Log .Info ("\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs",len (_dgcfgg ));};var _bdggb rulingList ;
|
||
for _ ,_cdeca :=range _dgcfgg {for _ ,_fefg :=range _cdeca ._dbdc {if !_fefg .isQuadrilateral (){if _adgbf {_ga .Log .Error ("!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073",_fefg );};continue ;};if _eccde ,_febed :=_fefg .makeRectRuling (_cdeca .Color );
|
||
_febed {_bdggb =append (_bdggb ,_eccde );}else {if _adbfb {_ga .Log .Error ("\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073",_fefg );};};};};if _adgbf {_ga .Log .Info ("\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073",_bdggb .String ());
|
||
};return _bdggb ;};func _gabc (_ddcb ,_ebge _ba .PdfRectangle )bool {return _fgfag (_ddcb ,_ebge )&&_bggg (_ddcb ,_ebge )};
|
||
|
||
// PageImages represents extracted images on a PDF page with spatial information:
|
||
// display position and size.
|
||
type PageImages struct{Images []ImageMark ;};var (_gb =_b .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072");_gfg =_b .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072");
|
||
);func (_ebeg *textLine )text ()string {var _fdfg []string ;for _ ,_aeae :=range _ebeg ._cdcg {if _aeae ._gdec {_fdfg =append (_fdfg ,"\u0020");};_fdfg =append (_fdfg ,_aeae ._eaae );};return _f .Join (_fdfg ,"");};func (_bdbbf rectRuling )checkWidth (_becfc ,_bccfa float64 )(float64 ,bool ){_cfbb :=_bccfa -_becfc ;
|
||
_cdbdd :=_cfbb <=_gbb ;return _cfbb ,_cdbdd ;};
|
||
|
||
// String returns a description of `w`.
|
||
func (_beagc *textWord )String ()string {return _agc .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_beagc ._dfagd ,_beagc .PdfRectangle ,_beagc ._eabbf ,_beagc ._eaae );
|
||
};
|
||
|
||
// Text returns the text content of the `bulletLists`.
|
||
func (_dged *lists )Text ()string {_cfdb :=&_f .Builder {};for _ ,_fcfb :=range *_dged {_dba :=_fcfb .Text ();_cfdb .WriteString (_dba );};return _cfdb .String ();};func (_daab *textTable )reduceTiling (_bcaga gridTiling ,_dadad float64 )*textTable {_afgfb :=make ([]int ,0,_daab ._cgae );
|
||
_eacf :=make ([]int ,0,_daab ._eacg );_fdea :=_bcaga ._fece ;_gbaeb :=_bcaga ._dfca ;for _bdcfa :=0;_bdcfa < _daab ._cgae ;_bdcfa ++{_cbfae :=_bdcfa > 0&&_gf .Abs (_gbaeb [_bdcfa -1]-_gbaeb [_bdcfa ])< _dadad &&_daab .emptyCompositeRow (_bdcfa );if !_cbfae {_afgfb =append (_afgfb ,_bdcfa );
|
||
};};for _bgbb :=0;_bgbb < _daab ._eacg ;_bgbb ++{_ccbe :=_bgbb < _daab ._eacg -1&&_gf .Abs (_fdea [_bgbb +1]-_fdea [_bgbb ])< _dadad &&_daab .emptyCompositeColumn (_bgbb );if !_ccbe {_eacf =append (_eacf ,_bgbb );};};if len (_afgfb )==_daab ._cgae &&len (_eacf )==_daab ._eacg {return _daab ;
|
||
};_abcdf :=textTable {_edeg :_daab ._edeg ,_eacg :len (_eacf ),_cgae :len (_afgfb ),_aaaga :make (map[uint64 ]compositeCell ,len (_eacf )*len (_afgfb ))};if _efda {_ga .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_daab ._eacg ,_daab ._cgae ,len (_eacf ),len (_afgfb ));
|
||
_ga .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_eacf );_ga .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_afgfb );};for _fgcfc ,_faefc :=range _afgfb {for _gdfe ,_ebcf :=range _eacf {_cfdd ,_bcddg :=_daab .getComposite (_ebcf ,_faefc );
|
||
if len (_cfdd )==0{continue ;};if _efda {_agc .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_gdfe ,_fgcfc ,_ebcf ,_faefc ,_adgd (_cfdd .merge ().text (),50));};_abcdf .putComposite (_gdfe ,_fgcfc ,_cfdd ,_bcddg );
|
||
};};return &_abcdf ;};type rulingList []*ruling ;func (_becb paraList )tables ()[]TextTable {var _agag []TextTable ;if _efda {_ga .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a");};for _ ,_afefe :=range _becb {_cce :=_afefe ._fbbea ;
|
||
if _cce !=nil &&_cce .isExportable (){_agag =append (_agag ,_cce .toTextTable ());};};return _agag ;};func (_adga *textTable )emptyCompositeRow (_cfgc int )bool {for _egaba :=0;_egaba < _adga ._eacg ;_egaba ++{if _bfac ,_gcab :=_adga ._aaaga [_bgcc (_egaba ,_cfgc )];
|
||
_gcab {if len (_bfac .paraList )> 0{return false ;};};};return true ;};func _ecfb (_ecbbe string )(string ,bool ){_gege :=[]rune (_ecbbe );if len (_gege )!=1{return "",false ;};_gcgdf ,_dafgd :=_gbefd [_gege [0]];return _gcgdf ,_dafgd ;};
|
||
|
||
// Marks returns the TextMark collection for a page. It represents all the text on the page.
|
||
func (_addc PageText )Marks ()*TextMarkArray {return &TextMarkArray {_ffca :_addc ._ccca }};func _gfdd (_bgge _agf .Point )_agf .Matrix {return _agf .TranslationMatrix (_bgge .X ,_bgge .Y )};
|
||
|
||
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
|
||
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
|
||
// `start` and `end` are offsets in the extracted text.
|
||
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
|
||
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
|
||
func (_gac *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _gac ==nil {return nil ,_b .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_agc .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );
|
||
};_aabaa :=len (_gac ._ffca );if _aabaa ==0{return _gac ,nil ;};if start < _gac ._ffca [0].Offset {start =_gac ._ffca [0].Offset ;};if end > _gac ._ffca [_aabaa -1].Offset +1{end =_gac ._ffca [_aabaa -1].Offset +1;};_gaf :=_e .Search (_aabaa ,func (_aec int )bool {return _gac ._ffca [_aec ].Offset +len (_gac ._ffca [_aec ].Text )-1>=start });
|
||
if !(0<=_gaf &&_gaf < _aabaa ){_dfde :=_agc .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_gaf ,_aabaa ,_gac ._ffca [0],_gac ._ffca [_aabaa -1]);
|
||
return nil ,_dfde ;};_edge :=_e .Search (_aabaa ,func (_aeaf int )bool {return _gac ._ffca [_aeaf ].Offset > end -1});if !(0<=_edge &&_edge < _aabaa ){_cfbd :=_agc .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_edge ,_aabaa ,_gac ._ffca [0],_gac ._ffca [_aabaa -1]);
|
||
return nil ,_cfbd ;};if _edge <=_gaf {return nil ,_agc .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_gaf ,_edge );
|
||
};return &TextMarkArray {_ffca :_gac ._ffca [_gaf :_edge ]},nil ;};func _degf (_faea _ba .PdfRectangle )*ruling {return &ruling {_bbce :_cfae ,_edga :_faea .Urx ,_fcec :_faea .Lly ,_abeg :_faea .Ury };};var (_gbefd =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};
|
||
);func _efgc (_bggdg []*textLine ,_dgeaf string )string {var _afda _f .Builder ;_afebd :=0.0;for _ebec ,_ggbg :=range _bggdg {_bbee :=_ggbg .text ();_degcd :=_ggbg ._bbfg ;if _ebec < len (_bggdg )-1{_afebd =_bggdg [_ebec +1]._bbfg ;}else {_afebd =0.0;};
|
||
_afda .WriteString (_dgeaf );_afda .WriteString (_bbee );if _afebd !=_degcd {_afda .WriteString ("\u000a");}else {_afda .WriteString ("\u0020");};};return _afda .String ();};const (_gcca markKind =iota ;_bcfc ;_ccfg ;_degg ;);type rectRuling struct{_ecaa rulingKind ;
|
||
_bdbfa markKind ;_gff .Color ;_ba .PdfRectangle ;};const _fad =20;func (_cgged *textWord )bbox ()_ba .PdfRectangle {return _cgged .PdfRectangle };func (_dea *textPara )fontsize ()float64 {return _dea ._fdec [0]._fdfb };func (_fefe paraList )writeText (_fbdbd _gg .Writer ){for _gddd ,_facg :=range _fefe {if _facg ._cddef {continue ;
|
||
};_facg .writeText (_fbdbd );if _gddd !=len (_fefe )-1{if _gfgc (_facg ,_fefe [_gddd +1]){_fbdbd .Write ([]byte ("\u0020"));}else {_fbdbd .Write ([]byte ("\u000a"));_fbdbd .Write ([]byte ("\u000a"));};};};_fbdbd .Write ([]byte ("\u000a"));_fbdbd .Write ([]byte ("\u000a"));
|
||
};func (_adf *textObject )checkOp (_cdc *_fc .ContentStreamOperation ,_cgde int ,_fafe bool )(_gbda bool ,_acc error ){if _adf ==nil {var _ffc []_add .PdfObject ;if _cgde > 0{_ffc =_cdc .Params ;if len (_ffc )> _cgde {_ffc =_ffc [:_cgde ];};};_ga .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_cdc .Operand ,_ffc );
|
||
};if _cgde >=0{if len (_cdc .Params )!=_cgde {if _fafe {_acc =_b .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");};_ga .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_cdc .Operand ,_cgde ,len (_cdc .Params ),_cdc .Params );
|
||
return false ,_acc ;};};return true ,nil ;};func (_bggb rulingList )splitSec ()[]rulingList {_e .Slice (_bggb ,func (_fcdc ,_dbea int )bool {_bdade ,_gbce :=_bggb [_fcdc ],_bggb [_dbea ];if _bdade ._fcec !=_gbce ._fcec {return _bdade ._fcec < _gbce ._fcec ;
|
||
};return _bdade ._abeg < _gbce ._abeg ;});_aacgg :=make (map[*ruling ]struct{},len (_bggb ));_dbec :=func (_dfgce *ruling )rulingList {_cgcb :=rulingList {_dfgce };_aacgg [_dfgce ]=struct{}{};for _ ,_bega :=range _bggb {if _ ,_ecabad :=_aacgg [_bega ];
|
||
_ecabad {continue ;};for _ ,_adfbc :=range _cgcb {if _bega .alignsSec (_adfbc ){_cgcb =append (_cgcb ,_bega );_aacgg [_bega ]=struct{}{};break ;};};};return _cgcb ;};_edgfc :=[]rulingList {_dbec (_bggb [0])};for _ ,_bdcb :=range _bggb [1:]{if _ ,_eccdec :=_aacgg [_bdcb ];
|
||
_eccdec {continue ;};_edgfc =append (_edgfc ,_dbec (_bdcb ));};return _edgfc ;};
|
||
|
||
// TableInfo gets table information of the textmark `tm`.
|
||
func (_dee *TextMark )TableInfo ()(*TextTable ,[][]int ){if !_dee ._efeg {return nil ,nil ;};_cdgc :=_dee ._cfaa ;_bbdb :=_cdgc .getCellInfo (*_dee );return _cdgc ,_bbdb ;};const (_cbdb =1.0e-6;_dgaf =1.0e-4;_cbbd =10;_dage =6;_fcca =0.5;_afbgg =0.12;_cdfe =0.19;
|
||
_ebfgc =0.04;_gagd =0.04;_bfcae =1.0;_fae =0.04;_cac =0.4;_fcag =0.7;_dgcc =1.0;_cfcf =0.1;_gcfb =1.4;_bbad =0.46;_fadg =0.02;_dgea =0.2;_befe =0.5;_afeg =4;_bag =4.0;_ggca =6;_beec =0.3;_efega =0.01;_agba =0.02;_afec =2;_dccbc =2;_ccbb =500;_ggdg =4.0;
|
||
_cgef =4.0;_beed =0.05;_dccf =0.1;_dfed =2.0;_gbb =2.0;_bddc =1.5;_gaea =3.0;_efeb =0.25;);type textPara struct{_ba .PdfRectangle ;_dgabg _ba .PdfRectangle ;_fdec []*textLine ;_fbbea *textTable ;_gecb bool ;_cddef bool ;_cfee *textPara ;_fdgf *textPara ;
|
||
_cgag *textPara ;_ecada *textPara ;_fcdfg []list ;};func (_agaa *textLine )toTextMarks (_ggaa *int )[]TextMark {var _daef []TextMark ;for _ ,_cdbb :=range _agaa ._cdcg {if _cdbb ._gdec {_daef =_ffeg (_daef ,_ggaa ,"\u0020");};_dcad :=_cdbb .toTextMarks (_ggaa );
|
||
_daef =append (_daef ,_dcad ...);};return _daef ;};func (_cdae *textObject )getCurrentFont ()*_ba .PdfFont {_gegc :=_cdae ._efed ._gbdg ;if _gegc ==nil {_ga .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e");
|
||
return _ba .DefaultFont ();};return _gegc ;};func (_fdgc *shapesState )devicePoint (_eddb ,_gfcf float64 )_agf .Point {_agca :=_fdgc ._gdfd .Mult (_fdgc ._gafa );_eddb ,_gfcf =_agca .Transform (_eddb ,_gfcf );return _agf .NewPoint (_eddb ,_gfcf );};func _dbeae (_gfegb float64 )bool {return _gf .Abs (_gfegb )< _cbdb };
|
||
func _gecbg (_dfdb ,_ddgc float64 )bool {return _gf .Abs (_dfdb -_ddgc )<=_dfed };
|
||
|
||
// String returns a description of `state`.
|
||
func (_bdc *textState )String ()string {_bac :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _bdc ._gbdg !=nil {_bac =_bdc ._gbdg .BaseFont ();};return _agc .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_bdc ._ffaf ,_bdc ._gdeg ,_bdc ._dcd ,_bac );
|
||
};func (_bcbe rulingList )asTiling ()gridTiling {if _dfge {_ga .Log .Info ("r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_bcbe ));
|
||
};for _aabf ,_cccb :=range _bcbe [1:]{_efaac :=_bcbe [_aabf ];if _efaac .alignsPrimary (_cccb )&&_efaac .alignsSec (_cccb ){_ga .Log .Error ("a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073",_cccb ,_efaac );
|
||
};};_bcbe .sortStrict ();_bcbe .log ("\u0073n\u0061\u0070\u0070\u0065\u0064");_bdgd ,_aacda :=_bcbe .vertsHorzs ();_dabbe :=_bdgd .primaries ();_ccccc :=_aacda .primaries ();_dgdg :=len (_dabbe )-1;_ecffg :=len (_ccccc )-1;if _dgdg ==0||_ecffg ==0{return gridTiling {};
|
||
};_fbbgb :=_ba .PdfRectangle {Llx :_dabbe [0],Urx :_dabbe [_dgdg ],Lly :_ccccc [0],Ury :_ccccc [_ecffg ]};if _dfge {_ga .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064",len (_bdgd ));
|
||
for _bafdd ,_dacfe :=range _bdgd {_agc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bafdd ,_dacfe );};_ga .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064",len (_aacda ));
|
||
for _fddb ,_adcgd :=range _aacda {_agc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fddb ,_adcgd );};_ga .Log .Info ("\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f",_dgdg ,_ecffg ,_dabbe ,_ccccc );
|
||
};_eadec :=make ([]gridTile ,_dgdg *_ecffg );for _fege :=_ecffg -1;_fege >=0;_fege --{_ffcaf :=_ccccc [_fege ];_dgdc :=_ccccc [_fege +1];for _ggbb :=0;_ggbb < _dgdg ;_ggbb ++{_egggeb :=_dabbe [_ggbb ];_cecb :=_dabbe [_ggbb +1];_dddcd :=_bdgd .findPrimSec (_egggeb ,_ffcaf );
|
||
_fefgc :=_bdgd .findPrimSec (_cecb ,_ffcaf );_dgadf :=_aacda .findPrimSec (_ffcaf ,_egggeb );_cdeed :=_aacda .findPrimSec (_dgdc ,_egggeb );_faae :=_ba .PdfRectangle {Llx :_egggeb ,Urx :_cecb ,Lly :_ffcaf ,Ury :_dgdc };_efegg :=_ebba (_faae ,_dddcd ,_fefgc ,_dgadf ,_cdeed );
|
||
_eadec [_fege *_dgdg +_ggbb ]=_efegg ;if _dfge {_agc .Printf ("\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_ggbb ,_fege ,_efegg .String (),_efegg .Width (),_efegg .Height ());
|
||
};};};if _dfge {_ga .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_fbbgb );
|
||
};_dgdgg :=make ([]map[float64 ]gridTile ,_ecffg );for _gbbaf :=_ecffg -1;_gbbaf >=0;_gbbaf --{if _dfge {_agc .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_gbbaf );};_dgdgg [_gbbaf ]=make (map[float64 ]gridTile ,_dgdg );for _gcdcg :=0;_gcdcg < _dgdg ;
|
||
_gcdcg ++{_eafg :=_eadec [_gbbaf *_dgdg +_gcdcg ];if _dfge {_agc .Printf ("\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gcdcg ,_eafg );};if !_eafg ._ecaf {continue ;};_aaag :=_gcdcg ;for _abef :=_gcdcg +1;!_eafg ._bfab &&_abef < _dgdg ;
|
||
_abef ++{_fbda :=_eadec [_gbbaf *_dgdg +_abef ];_eafg .Urx =_fbda .Urx ;_eafg ._dgccd =_eafg ._dgccd ||_fbda ._dgccd ;_eafg ._bgbfd =_eafg ._bgbfd ||_fbda ._bgbfd ;_eafg ._bfab =_fbda ._bfab ;if _dfge {_agc .Printf ("\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a",_abef ,_fbda ,_eafg );
|
||
};_aaag =_abef ;};if _dfge {_agc .Printf (" \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n",_gcdcg ,_aaag ,_eafg );};_gcdcg =_aaag ;_dgdgg [_gbbaf ][_eafg .Llx ]=_eafg ;};};_dabc :=make (map[float64 ]map[float64 ]gridTile ,_ecffg );
|
||
_gbbe :=make (map[float64 ]map[float64 ]struct{},_ecffg );for _bdfca :=_ecffg -1;_bdfca >=0;_bdfca --{_ggdc :=_eadec [_bdfca *_dgdg ].Lly ;_dabc [_ggdc ]=make (map[float64 ]gridTile ,_dgdg );_gbbe [_ggdc ]=make (map[float64 ]struct{},_dgdg );};if _dfge {_ga .Log .Info ("\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_fbbgb );
|
||
};for _agbff :=_ecffg -1;_agbff >=0;_agbff --{_effd :=_eadec [_agbff *_dgdg ].Lly ;_dcbaf :=_dgdgg [_agbff ];if _dfge {_agc .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_agbff );};for _ ,_eaea :=range _cbgc (_dcbaf ){if _ ,_aaec :=_gbbe [_effd ][_eaea ];
|
||
_aaec {continue ;};_ddad :=_dcbaf [_eaea ];if _dfge {_agc .Printf (" \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_ddad .String ());};for _gbdf :=_agbff -1;_gbdf >=0;_gbdf --{if _ddad ._bgbfd {break ;};_bffdb :=_dgdgg [_gbdf ];_efcb ,_caaab :=_bffdb [_eaea ];
|
||
if !_caaab {break ;};if _efcb .Urx !=_ddad .Urx {break ;};_ddad ._bgbfd =_efcb ._bgbfd ;_ddad .Lly =_efcb .Lly ;if _dfge {_agc .Printf ("\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_efcb .String (),_ddad .String ());
|
||
};_gbbe [_efcb .Lly ][_efcb .Llx ]=struct{}{};};if _agbff ==0{_ddad ._bgbfd =true ;};if _ddad .complete (){_dabc [_effd ][_eaea ]=_ddad ;};};};_aaef :=gridTiling {PdfRectangle :_fbbgb ,_fece :_ffad (_dabc ),_dfca :_ecbcb (_dabc ),_gdcg :_dabc };_aaef .log ("\u0043r\u0065\u0061\u0074\u0065\u0064");
|
||
return _aaef ;};func _cceb (_gdbec map[int ][]float64 ){if len (_gdbec )<=1{return ;};_abaee :=_adgbc (_gdbec );if _efda {_ga .Log .Info ("\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076",_abaee );};var _ebgbe ,_bfdf int ;
|
||
for _ebgbe ,_bfdf =range _abaee {if _gdbec [_bfdf ]!=nil {break ;};};for _addg ,_afae :=range _abaee [_ebgbe :]{_dadf :=_gdbec [_afae ];if _dadf ==nil {continue ;};if _efda {_agc .Printf ("\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a",_ebgbe +_addg ,_bfdf ,_afae );
|
||
};_egdea :=_gdbec [_afae ];if _egdea [len (_egdea )-1]> _dadf [0]{_egdea [len (_egdea )-1]=_dadf [0];_gdbec [_bfdf ]=_egdea ;};_bfdf =_afae ;};};type stateStack []*textState ;func (_fgaaf *textObject )setTextMatrix (_cbd []float64 ){if len (_cbd )!=6{_ga .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_cbd ));
|
||
return ;};_bba ,_eea ,_abg ,_adgb ,_gec ,_eeg :=_cbd [0],_cbd [1],_cbd [2],_cbd [3],_cbd [4],_cbd [5];_fgaaf ._eefe =_agf .NewMatrix (_bba ,_eea ,_abg ,_adgb ,_gec ,_eeg );_fgaaf ._dbc =_fgaaf ._eefe ;};func (_dfgfc intSet )add (_dega int ){_dfgfc [_dega ]=struct{}{}};
|
||
func (_fbcb paraList )inTile (_cbggf gridTile )paraList {var _fbfb paraList ;for _ ,_dagcf :=range _fbcb {if _cbggf .contains (_dagcf .PdfRectangle ){_fbfb =append (_fbfb ,_dagcf );};};if _efda {_agc .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_cbggf ,len (_fbfb ));
|
||
for _gfgcc ,_ebga :=range _fbfb {_agc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gfgcc ,_ebga );};_agc .Println ("");};return _fbfb ;};const _bcgf =10;func (_gafb *shapesState )closePath (){if _gafb ._gbdgg {_gafb ._gfce =append (_gafb ._gfce ,_ddbd (_gafb ._gcaaf ));
|
||
_gafb ._gbdgg =false ;}else if len (_gafb ._gfce )==0{if _fdbg {_ga .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");};_gafb ._gbdgg =false ;return ;};_gafb ._gfce [len (_gafb ._gfce )-1].close ();
|
||
if _fdbg {_ga .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_gafb );};};type rulingKind int ;func (_gdbf *shapesState )addPoint (_gbdaf ,_fcfd float64 ){_bfbb :=_gdbf .establishSubpath ();_gaec :=_gdbf .devicePoint (_gbdaf ,_fcfd );
|
||
if _bfbb ==nil {_gdbf ._gbdgg =true ;_gdbf ._gcaaf =_gaec ;}else {_bfbb .add (_gaec );};};
|
||
|
||
// TextTable represents a table.
|
||
// Cells are ordered top-to-bottom, left-to-right.
|
||
// Cells[y] is the (0-offset) y'th row in the table.
|
||
// Cells[y][x] is the (0-offset) x'th column in the table.
|
||
type TextTable struct{_ba .PdfRectangle ;W ,H int ;Cells [][]TableCell ;};func (_ffce paraList )merge ()*textPara {_ga .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_ffce ));
|
||
if len (_ffce )==0{return nil ;};_ffce .sortReadingOrder ();_fagbg :=_ffce [0].PdfRectangle ;_gcfg :=_ffce [0]._fdec ;for _ ,_adfg :=range _ffce [1:]{_fagbg =_bbbafc (_fagbg ,_adfg .PdfRectangle );_gcfg =append (_gcfg ,_adfg ._fdec ...);};return _bfgg (_fagbg ,_gcfg );
|
||
};func (_cafdc paraList )addNeighbours (){_ecgfb :=func (_eeadg []int ,_ecdf *textPara )([]*textPara ,[]*textPara ){_gfbd :=make ([]*textPara ,0,len (_eeadg )-1);_acgd :=make ([]*textPara ,0,len (_eeadg )-1);for _ ,_baegc :=range _eeadg {_dbffc :=_cafdc [_baegc ];
|
||
if _dbffc .Urx <=_ecdf .Llx {_gfbd =append (_gfbd ,_dbffc );}else if _dbffc .Llx >=_ecdf .Urx {_acgd =append (_acgd ,_dbffc );};};return _gfbd ,_acgd ;};_dcdef :=func (_addd []int ,_gfffc *textPara )([]*textPara ,[]*textPara ){_cfacb :=make ([]*textPara ,0,len (_addd )-1);
|
||
_cbabd :=make ([]*textPara ,0,len (_addd )-1);for _ ,_befee :=range _addd {_egefg :=_cafdc [_befee ];if _egefg .Ury <=_gfffc .Lly {_cbabd =append (_cbabd ,_egefg );}else if _egefg .Lly >=_gfffc .Ury {_cfacb =append (_cfacb ,_egefg );};};return _cfacb ,_cbabd ;
|
||
};_ecgg :=_cafdc .yNeighbours (_agba );for _ ,_bfdd :=range _cafdc {_cefbb :=_ecgg [_bfdd ];if len (_cefbb )==0{continue ;};_ggabg ,_egbbb :=_ecgfb (_cefbb ,_bfdd );if len (_ggabg )==0&&len (_egbbb )==0{continue ;};if len (_ggabg )> 0{_facb :=_ggabg [0];
|
||
for _ ,_deee :=range _ggabg [1:]{if _deee .Urx >=_facb .Urx {_facb =_deee ;};};for _ ,_dcfc :=range _ggabg {if _dcfc !=_facb &&_dcfc .Urx > _facb .Llx {_facb =nil ;break ;};};if _facb !=nil &&_bggg (_bfdd .PdfRectangle ,_facb .PdfRectangle ){_bfdd ._cfee =_facb ;
|
||
};};if len (_egbbb )> 0{_adbed :=_egbbb [0];for _ ,_bffce :=range _egbbb [1:]{if _bffce .Llx <=_adbed .Llx {_adbed =_bffce ;};};for _ ,_abfc :=range _egbbb {if _abfc !=_adbed &&_abfc .Llx < _adbed .Urx {_adbed =nil ;break ;};};if _adbed !=nil &&_bggg (_bfdd .PdfRectangle ,_adbed .PdfRectangle ){_bfdd ._fdgf =_adbed ;
|
||
};};};_ecgg =_cafdc .xNeighbours (_efega );for _ ,_bdeg :=range _cafdc {_adee :=_ecgg [_bdeg ];if len (_adee )==0{continue ;};_ddafa ,_ebgec :=_dcdef (_adee ,_bdeg );if len (_ddafa )==0&&len (_ebgec )==0{continue ;};if len (_ebgec )> 0{_ccaef :=_ebgec [0];
|
||
for _ ,_gccef :=range _ebgec [1:]{if _gccef .Ury >=_ccaef .Ury {_ccaef =_gccef ;};};for _ ,_gefd :=range _ebgec {if _gefd !=_ccaef &&_gefd .Ury > _ccaef .Lly {_ccaef =nil ;break ;};};if _ccaef !=nil &&_fgfag (_bdeg .PdfRectangle ,_ccaef .PdfRectangle ){_bdeg ._ecada =_ccaef ;
|
||
};};if len (_ddafa )> 0{_afgdd :=_ddafa [0];for _ ,_badf :=range _ddafa [1:]{if _badf .Lly <=_afgdd .Lly {_afgdd =_badf ;};};for _ ,_aafd :=range _ddafa {if _aafd !=_afgdd &&_aafd .Lly < _afgdd .Ury {_afgdd =nil ;break ;};};if _afgdd !=nil &&_fgfag (_bdeg .PdfRectangle ,_afgdd .PdfRectangle ){_bdeg ._cgag =_afgdd ;
|
||
};};};for _ ,_efecc :=range _cafdc {if _efecc ._cfee !=nil &&_efecc ._cfee ._fdgf !=_efecc {_efecc ._cfee =nil ;};if _efecc ._cgag !=nil &&_efecc ._cgag ._ecada !=_efecc {_efecc ._cgag =nil ;};if _efecc ._fdgf !=nil &&_efecc ._fdgf ._cfee !=_efecc {_efecc ._fdgf =nil ;
|
||
};if _efecc ._ecada !=nil &&_efecc ._ecada ._cgag !=_efecc {_efecc ._ecada =nil ;};};};
|
||
|
||
// String returns a string describing `pt`.
|
||
func (_affc PageText )String ()string {_cfa :=_agc .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_affc ._aebf ));_ece :=[]string {"\u002d"+_cfa };for _ ,_ddac :=range _affc ._aebf {_ece =append (_ece ,_ddac .String ());
|
||
};_ece =append (_ece ,"\u002b"+_cfa );return _f .Join (_ece ,"\u000a");};func (_efc *TextMarkArray )exists (_dadd TextMark )bool {for _ ,_bdec :=range _efc .Elements (){if _ad .DeepEqual (_dadd .DirectObject ,_bdec .DirectObject )&&_ad .DeepEqual (_dadd .BBox ,_bdec .BBox )&&_bdec .Text ==_dadd .Text {return true ;
|
||
};};return false ;};func (_gfgd *subpath )removeDuplicates (){if len (_gfgd ._aeee )==0{return ;};_aeda :=[]_agf .Point {_gfgd ._aeee [0]};for _ ,_gcbg :=range _gfgd ._aeee [1:]{if !_gaeg (_gcbg ,_aeda [len (_aeda )-1]){_aeda =append (_aeda ,_gcbg );};
|
||
};_gfgd ._aeee =_aeda ;};func _bfgg (_gfgce _ba .PdfRectangle ,_fbgg []*textLine )*textPara {return &textPara {PdfRectangle :_gfgce ,_fdec :_fbgg };};func (_bgea *textLine )pullWord (_dffb *wordBag ,_adfb *textWord ,_gaee int ){_bgea .appendWord (_adfb );
|
||
_dffb .removeWord (_adfb ,_gaee );};func (_efedd compositeCell )parasBBox ()(paraList ,_ba .PdfRectangle ){return _efedd .paraList ,_efedd .PdfRectangle ;};func (_ecaba rulingList )sortStrict (){_e .Slice (_ecaba ,func (_gcdbb ,_gecf int )bool {_eccgf ,_deaf :=_ecaba [_gcdbb ],_ecaba [_gecf ];
|
||
_eefdc ,_ffged :=_eccgf ._bbce ,_deaf ._bbce ;if _eefdc !=_ffged {return _eefdc > _ffged ;};_aadg ,_ebafg :=_eccgf ._edga ,_deaf ._edga ;if !_dbeae (_aadg -_ebafg ){return _aadg < _ebafg ;};_aadg ,_ebafg =_eccgf ._fcec ,_deaf ._fcec ;if _aadg !=_ebafg {return _aadg < _ebafg ;
|
||
};return _eccgf ._abeg < _deaf ._abeg ;});};func (_caccg *ruling )gridIntersecting (_dbag *ruling )bool {return _gecbg (_caccg ._fcec ,_dbag ._fcec )&&_gecbg (_caccg ._abeg ,_dbag ._abeg );};func _adgd (_gbed string ,_gdgef int )string {if len (_gbed )< _gdgef {return _gbed ;
|
||
};return _gbed [:_gdgef ];};func _cgfgf (_aeeg ,_cedf bounded )float64 {_cbga :=_abag (_aeeg ,_cedf );if !_dbeae (_cbga ){return _cbga ;};return _eddba (_aeeg ,_cedf );};func _gbbc (_cbaf string ,_bcaf []rulingList ){_ga .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_bcaf ),_cbaf );
|
||
for _dbcd ,_aaaee :=range _bcaf {_agc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dbcd ,_aaaee .String ());};};
|
||
|
||
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
|
||
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
|
||
//
|
||
// Replace with a function like Extract() (*PageText, error)
|
||
func (_ccfa *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_dae ,_baf ,_cfb ,_fba :=_ccfa .extractPageText (_ccfa ._ac ,_ccfa ._gbe ,_agf .IdentityMatrix (),0,false );if _fba !=nil &&_fba !=_ba .ErrColorOutOfRange {return nil ,0,0,_fba ;};
|
||
if _ccfa ._cf !=nil {_dae ._eaag ._caab =_ccfa ._cf .UseSimplerExtractionProcess ;};_dae .computeViews ();_fba =_ccab (_dae );if _fba !=nil {return nil ,0,0,_fba ;};if _ccfa ._cf !=nil {if _ccfa ._cf .ApplyCropBox &&_ccfa ._eb !=nil {_dae .ApplyArea (*_ccfa ._eb );
|
||
};_dae ._eaag ._bcc =_ccfa ._cf .DisableDocumentTags ;};return _dae ,_baf ,_cfb ,nil ;};func (_dfdec *wordBag )removeWord (_abae *textWord ,_cfe int ){_acfc :=_dfdec ._fcgd [_cfe ];_acfc =_gcdff (_acfc ,_abae );if len (_acfc )==0{delete (_dfdec ._fcgd ,_cfe );
|
||
}else {_dfdec ._fcgd [_cfe ]=_acfc ;};};func _cafaa (_eggd float64 ,_gbfg int )int {if _gbfg ==0{_gbfg =1;};_fecb :=float64 (_gbfg );return int (_gf .Round (_eggd /_fecb )*_fecb );};func (_beg *textObject )setCharSpacing (_feb float64 ){if _beg ==nil {return ;
|
||
};_beg ._efed ._ffaf =_feb ;if _cfca {_ga .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_feb ,_beg ._efed .String ());};};func (_fdcdf *wordBag )firstWord (_aeg int )*textWord {return _fdcdf ._fcgd [_aeg ][0]};
|
||
func (_ebgf paraList )reorder (_ceafe []int ){_cead :=make (paraList ,len (_ebgf ));for _aceef ,_abced :=range _ceafe {_cead [_aceef ]=_ebgf [_abced ];};copy (_ebgf ,_cead );};func _dgda (_dbeb []*textLine ,_bdecee string ,_bfda []*list )*list {return &list {_aebd :_dbeb ,_cdde :_bdecee ,_abcc :_bfda };
|
||
};func _eeag (_fagd []*textLine ,_fgeb map[float64 ][]*textLine ,_acdg []float64 ,_gafga int ,_gbffc ,_bgcdc float64 )[]*list {_bccf :=[]*list {};_gfcae :=_gafga ;_gafga =_gafga +1;_ccbgg :=_acdg [_gfcae ];_dcbed :=_fgeb [_ccbgg ];_afc :=_acfa (_dcbed ,_bgcdc ,_gbffc );
|
||
for _dgga ,_faedc :=range _afc {var _ggdb float64 ;_edag :=[]*list {};_cefg :=_faedc ._bbfg ;_fdcec :=_bgcdc ;if _dgga < len (_afc )-1{_fdcec =_afc [_dgga +1]._bbfg ;};if _gafga < len (_acdg ){_edag =_eeag (_fagd ,_fgeb ,_acdg ,_gafga ,_cefg ,_fdcec );
|
||
};_ggdb =_fdcec ;if len (_edag )> 0{_ffdc :=_edag [0];if len (_ffdc ._aebd )> 0{_ggdb =_ffdc ._aebd [0]._bbfg ;};};_cdaf :=[]*textLine {_faedc };_dagf :=_ggdbe (_faedc ,_fagd ,_acdg ,_cefg ,_ggdb );_cdaf =append (_cdaf ,_dagf ...);_gfed :=_dgda (_cdaf ,"\u0062\u0075\u006c\u006c\u0065\u0074",_edag );
|
||
_gfed ._ddef =_efgc (_cdaf ,"");_bccf =append (_bccf ,_gfed );};return _bccf ;};func _dggea (_cdbbd []_add .PdfObject )(_fbfg ,_gaade float64 ,_agcbb error ){if len (_cdbbd )!=2{return 0,0,_agc .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_cdbbd ));
|
||
};_feae ,_agcbb :=_add .GetNumbersAsFloat (_cdbbd );if _agcbb !=nil {return 0,0,_agcbb ;};return _feae [0],_feae [1],nil ;};func _gaeg (_ccged ,_cedfa _agf .Point )bool {return _ccged .X ==_cedfa .X &&_ccged .Y ==_cedfa .Y };func (_bgcdcd *wordBag )removeDuplicates (){if _bdgb {_ga .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_bgcdcd .text ());
|
||
};for _ ,_dfae :=range _bgcdcd .depthIndexes (){if len (_bgcdcd ._fcgd [_dfae ])==0{continue ;};_ffeb :=_bgcdcd ._fcgd [_dfae ][0];_facge :=_dgea *_ffeb ._eabbf ;_ggfa :=_ffeb ._dfagd ;for _ ,_dddf :=range _bgcdcd .depthBand (_ggfa ,_ggfa +_facge ){_ggdf :=map[*textWord ]struct{}{};
|
||
_febaf :=_bgcdcd ._fcgd [_dddf ];for _ ,_gbfef :=range _febaf {if _ ,_bfbc :=_ggdf [_gbfef ];_bfbc {continue ;};for _ ,_effgg :=range _febaf {if _ ,_eedfg :=_ggdf [_effgg ];_eedfg {continue ;};if _effgg !=_gbfef &&_effgg ._eaae ==_gbfef ._eaae &&_gf .Abs (_effgg .Llx -_gbfef .Llx )< _facge &&_gf .Abs (_effgg .Urx -_gbfef .Urx )< _facge &&_gf .Abs (_effgg .Lly -_gbfef .Lly )< _facge &&_gf .Abs (_effgg .Ury -_gbfef .Ury )< _facge {_ggdf [_effgg ]=struct{}{};
|
||
};};};if len (_ggdf )> 0{_bdbb :=0;for _ ,_eggb :=range _febaf {if _ ,_abffe :=_ggdf [_eggb ];!_abffe {_febaf [_bdbb ]=_eggb ;_bdbb ++;};};_bgcdcd ._fcgd [_dddf ]=_febaf [:len (_febaf )-len (_ggdf )];if len (_bgcdcd ._fcgd [_dddf ])==0{delete (_bgcdcd ._fcgd ,_dddf );
|
||
};};};};};func (_gdab rulingList )toTilings ()(rulingList ,[]gridTiling ){_gdab .log ("\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s");if len (_gdab )==0{return nil ,nil ;};_gdab =_gdab .tidied ("\u0061\u006c\u006c");_gdab .log ("\u0074\u0069\u0064\u0069\u0065\u0064");
|
||
_agcafa :=_gdab .toGrids ();_fbafg :=make ([]gridTiling ,len (_agcafa ));for _fbddd ,_becd :=range _agcafa {_fbafg [_fbddd ]=_becd .asTiling ();};return _gdab ,_fbafg ;};func (_afag pathSection )bbox ()_ba .PdfRectangle {_gabea :=_afag ._dbdc [0]._aeee [0];
|
||
_cdgb :=_ba .PdfRectangle {Llx :_gabea .X ,Urx :_gabea .X ,Lly :_gabea .Y ,Ury :_gabea .Y };_bggd :=func (_egab _agf .Point ){if _egab .X < _cdgb .Llx {_cdgb .Llx =_egab .X ;}else if _egab .X > _cdgb .Urx {_cdgb .Urx =_egab .X ;};if _egab .Y < _cdgb .Lly {_cdgb .Lly =_egab .Y ;
|
||
}else if _egab .Y > _cdgb .Ury {_cdgb .Ury =_egab .Y ;};};for _ ,_gbeb :=range _afag ._dbdc [0]._aeee [1:]{_bggd (_gbeb );};for _ ,_bbda :=range _afag ._dbdc [1:]{for _ ,_bgeg :=range _bbda ._aeee {_bggd (_bgeg );};};return _cdgb ;};func (_cedc rulingList )snapToGroups ()rulingList {_ecfda ,_bcdf :=_cedc .vertsHorzs ();
|
||
if len (_ecfda )> 0{_ecfda =_ecfda .snapToGroupsDirection ();};if len (_bcdf )> 0{_bcdf =_bcdf .snapToGroupsDirection ();};_gbafg :=append (_ecfda ,_bcdf ...);_gbafg .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");return _gbafg ;
|
||
};func (_dcdd rectRuling )asRuling ()(*ruling ,bool ){_bccc :=ruling {_bbce :_dcdd ._ecaa ,Color :_dcdd .Color ,_ccaa :_ccfg };switch _dcdd ._ecaa {case _cfae :_bccc ._edga =0.5*(_dcdd .Llx +_dcdd .Urx );_bccc ._fcec =_dcdd .Lly ;_bccc ._abeg =_dcdd .Ury ;
|
||
_afed ,_cefb :=_dcdd .checkWidth (_dcdd .Llx ,_dcdd .Urx );if !_cefb {if _adbfb {_ga .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_dcdd );
|
||
};return nil ,false ;};_bccc ._ebcee =_afed ;case _aaad :_bccc ._edga =0.5*(_dcdd .Lly +_dcdd .Ury );_bccc ._fcec =_dcdd .Llx ;_bccc ._abeg =_dcdd .Urx ;_bfcbg ,_fcea :=_dcdd .checkWidth (_dcdd .Lly ,_dcdd .Ury );if !_fcea {if _adbfb {_ga .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_dcdd );
|
||
};return nil ,false ;};_bccc ._ebcee =_bfcbg ;default:_ga .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_dcdd ._ecaa );return nil ,false ;};return &_bccc ,true ;};var _gfcgc =map[rulingKind ]string {_fbcc :"\u006e\u006f\u006e\u0065",_aaad :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_cfae :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};
|
||
func (_geba *wordBag )depthRange (_afe ,_geff int )[]int {var _fadf []int ;for _faabb :=range _geba ._fcgd {if _afe <=_faabb &&_faabb <=_geff {_fadf =append (_fadf ,_faabb );};};if len (_fadf )==0{return nil ;};_e .Ints (_fadf );return _fadf ;};func _bfba (_bacfa ,_dgdcc int )int {if _bacfa < _dgdcc {return _bacfa ;
|
||
};return _dgdcc ;};func _gcdff (_dfcbf []*textWord ,_ageb *textWord )[]*textWord {for _efdc ,_daddf :=range _dfcbf {if _daddf ==_ageb {return _fecf (_dfcbf ,_efdc );};};_ga .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_ageb );
|
||
return nil ;};func (_bada rulingList )sort (){_e .Slice (_bada ,_bada .comp )};
|
||
|
||
// String returns a human readable description of `s`.
|
||
func (_ffdec intSet )String ()string {var _eaab []int ;for _aafab :=range _ffdec {if _ffdec .has (_aafab ){_eaab =append (_eaab ,_aafab );};};_e .Ints (_eaab );return _agc .Sprintf ("\u0025\u002b\u0076",_eaab );};func (_gebd *textLine )endsInHyphen ()bool {_edff :=_gebd ._cdcg [len (_gebd ._cdcg )-1];
|
||
_cgdc :=_edff ._eaae ;_bdagd ,_gcaf :=_a .DecodeLastRuneInString (_cgdc );if _gcaf <=0||!_c .Is (_c .Hyphen ,_bdagd ){return false ;};if _edff ._gdec &&_dfc (_cgdc ){return true ;};return _dfc (_gebd .text ());};func (_caea *textObject )nextLine (){_caea .moveLP (0,-_caea ._efed ._bfa )};
|
||
func (_abac *shapesState )fill (_bbgb *[]pathSection ){_gbc :=pathSection {_dbdc :_abac ._gfce ,Color :_abac ._edf .getFillColor ()};*_bbgb =append (*_bbgb ,_gbc );if _adgbf {_cfdc :=_gbc .bbox ();_agc .Printf ("\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a",len (*_bbgb ),len (_gbc ._dbdc ),_abac ,_gbc .Color ,_cfdc ,_cfdc .Width (),_cfdc .Height ());
|
||
if _ebce {for _gedg ,_babc :=range _gbc ._dbdc {_agc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_gedg ,_babc );if _gedg ==10{break ;};};};};};func (_daagf *textTable )bbox ()_ba .PdfRectangle {return _daagf .PdfRectangle };func (_egggf *textWord )addDiacritic (_eaeb string ){_geabb :=_egggf ._abcee [len (_egggf ._abcee )-1];
|
||
_geabb ._bfdb +=_eaeb ;_geabb ._bfdb =_ea .NFKC .String (_geabb ._bfdb );};
|
||
|
||
// ExtractFonts returns all font information from the page extractor, including
|
||
// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more.
|
||
//
|
||
// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document.
|
||
// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries.
|
||
//
|
||
// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range.
|
||
func (_gd *Extractor )ExtractFonts (previousPageFonts *PageFonts )(*PageFonts ,error ){_eg :=PageFonts {};_fbe :=_eg .extractPageResourcesToFont (_gd ._gbe );if _fbe !=nil {return nil ,_fbe ;};if previousPageFonts !=nil {for _ ,_dcb :=range previousPageFonts .Fonts {if !_aga (_eg .Fonts ,_dcb .FontName ){_eg .Fonts =append (_eg .Fonts ,_dcb );
|
||
};};};return &PageFonts {Fonts :_eg .Fonts },nil ;};func (_ffge paraList )list ()[]*list {var _aacb []*textLine ;var _bfgc []*textLine ;for _ ,_ccac :=range _ffge {_afdb :=_ccac .getListLines ();_aacb =append (_aacb ,_afdb ...);_bfgc =append (_bfgc ,_ccac ._fdec ...);
|
||
};_edfc :=_ccba (_aacb );_befg :=_dbfg (_bfgc ,_edfc );return _befg ;};func _fgdcg (_bbcb int ,_gggbb map[int ][]float64 )([]int ,int ){_ecbff :=make ([]int ,_bbcb );_cfagab :=0;for _adbcf :=0;_adbcf < _bbcb ;_adbcf ++{_ecbff [_adbcf ]=_cfagab ;_cfagab +=len (_gggbb [_adbcf ])+1;
|
||
};return _ecbff ,_cfagab ;};
|
||
|
||
// ApplyArea processes the page text only within the specified area `bbox`.
|
||
// Each time ApplyArea is called, it updates the result set in `pt`.
|
||
// Can be called multiple times in a row with different bounding boxes.
|
||
func (_cfbg *PageText )ApplyArea (bbox _ba .PdfRectangle ){_ddae :=make ([]*textMark ,0,len (_cfbg ._aebf ));for _ ,_ecbd :=range _cfbg ._aebf {if _gabc (_ecbd .bbox (),bbox ){_ddae =append (_ddae ,_ecbd );};};var _aag paraList ;_egfd :=len (_ddae );for _ecea :=0;
|
||
_ecea < 360&&_egfd > 0;_ecea +=90{_bfca :=make ([]*textMark ,0,len (_ddae )-_egfd );for _ ,_fbc :=range _ddae {if _fbc ._gfbg ==_ecea {_bfca =append (_bfca ,_fbc );};};if len (_bfca )> 0{_gae :=_cabcg (_bfca ,_cfbg ._bbdc ,nil ,nil ,_cfbg ._eaag ._caab );
|
||
_aag =append (_aag ,_gae ...);_egfd -=len (_bfca );};};_fcb :=new (_dc .Buffer );_aag .writeText (_fcb );_cfbg ._ggff =_fcb .String ();_cfbg ._ccca =_aag .toTextMarks ();_cfbg ._gdba =_aag .tables ();};func _gcdb (_aeeec ,_bdecb _ba .PdfRectangle )bool {return _aeeec .Llx <=_bdecb .Llx &&_bdecb .Urx <=_aeeec .Urx &&_aeeec .Lly <=_bdecb .Lly &&_bdecb .Ury <=_aeeec .Ury ;
|
||
};
|
||
|
||
// PageTextOptions holds various options available in extraction process.
|
||
type PageTextOptions struct{_bcc bool ;_caab bool ;};func _ffad (_fbcf map[float64 ]map[float64 ]gridTile )[]float64 {_abgd :=make ([]float64 ,0,len (_fbcf ));_dgfe :=make (map[float64 ]struct{},len (_fbcf ));for _ ,_affgd :=range _fbcf {for _fbgeb :=range _affgd {if _ ,_ebfea :=_dgfe [_fbgeb ];
|
||
_ebfea {continue ;};_abgd =append (_abgd ,_fbgeb );_dgfe [_fbgeb ]=struct{}{};};};_e .Float64s (_abgd );return _abgd ;};func (_gffa *wordBag )maxDepth ()float64 {return _gffa ._ebgd -_gffa .Lly };func (_cbdge rulingList )isActualGrid ()(rulingList ,bool ){_bbgfa ,_baebg :=_cbdge .augmentGrid ();
|
||
if !(len (_bbgfa )>=_afec +1&&len (_baebg )>=_dccbc +1){if _adgbf {_ga .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_bbgfa ),len (_baebg ),_afec +1,_dccbc +1);
|
||
};return nil ,false ;};if _adgbf {_ga .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_cbdge ,len (_bbgfa )>=2,len (_baebg )>=2,len (_bbgfa )>=2&&len (_baebg )>=2);
|
||
for _gfgbe ,_dddd :=range _cbdge {_agc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_gfgbe ,_dddd );};};if _ecbfe {_ddfgd ,_efag :=_bbgfa [0],_bbgfa [len (_bbgfa )-1];_ggdbeg ,_ggfd :=_baebg [0],_baebg [len (_baebg )-1];if !(_cedbg (_ddfgd ._edga -_ggdbeg ._fcec )&&_cedbg (_efag ._edga -_ggdbeg ._abeg )&&_cedbg (_ggdbeg ._edga -_ddfgd ._abeg )&&_cedbg (_ggfd ._edga -_ddfgd ._fcec )){if _adgbf {_ga .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_ddfgd ,_efag ,_ggdbeg ,_ggfd );
|
||
};return nil ,false ;};}else {if !_bbgfa .aligned (){if _eda {_ga .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_bbgfa ));
|
||
};return nil ,false ;};if !_baebg .aligned (){if _adgbf {_ga .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_baebg ));
|
||
};return nil ,false ;};};_ebfe :=append (_bbgfa ,_baebg ...);return _ebfe ,true ;};
|
||
|
||
// PageFonts represents extracted fonts on a PDF page.
|
||
type PageFonts struct{Fonts []Font ;};func (_bedf *stateStack )size ()int {return len (*_bedf )};
|
||
|
||
// String returns a description of `l`.
|
||
func (_ecc *textLine )String ()string {return _agc .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_ecc ._bbfg ,_ecc .PdfRectangle ,_ecc ._fdfb ,_ecc .text ());
|
||
};func (_ecffe *textTable )put (_dbad ,_bcfg int ,_aabfg *textPara ){_ecffe ._egfea [_bgcc (_dbad ,_bcfg )]=_aabfg ;};type textResult struct{_egcbc PageText ;_dcga int ;_bcg int ;};func (_ffaff rulingList )findPrimSec (_cbabg ,_bgbfa float64 )*ruling {for _ ,_egaab :=range _ffaff {if _dbeae (_egaab ._edga -_cbabg )&&_egaab ._fcec -_dfed <=_bgbfa &&_bgbfa <=_egaab ._abeg +_dfed {return _egaab ;
|
||
};};return nil ;};func (_bdagc paraList )extractTables (_bcgc []gridTiling )paraList {if _efda {_ga .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_bdagc ));
|
||
};if len (_bdagc )< _ggca {return _bdagc ;};_dbgf :=_bdagc .findTables (_bcgc );if _efda {_ga .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_dbgf ));
|
||
for _gfdbb ,_eede :=range _dbgf {_eede .log (_agc .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_gfdbb ));};};return _bdagc .applyTables (_dbgf );};func _dfdc (_bbga ,_efbg *textPara )bool {return _fgfag (_bbga ._dgabg ,_efbg ._dgabg )};
|
||
func (_fgeg rulingList )merge ()*ruling {_cgbb :=_fgeg [0]._edga ;_dddgg :=_fgeg [0]._fcec ;_geca :=_fgeg [0]._abeg ;for _ ,_gfeg :=range _fgeg [1:]{_cgbb +=_gfeg ._edga ;if _gfeg ._fcec < _dddgg {_dddgg =_gfeg ._fcec ;};if _gfeg ._abeg > _geca {_geca =_gfeg ._abeg ;
|
||
};};_gfadf :=&ruling {_bbce :_fgeg [0]._bbce ,_ccaa :_fgeg [0]._ccaa ,Color :_fgeg [0].Color ,_edga :_cgbb /float64 (len (_fgeg )),_fcec :_dddgg ,_abeg :_geca };if _eda {_ga .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_fgeg ),_gfadf );
|
||
for _ggec ,_bbcfe :=range _fgeg {_agc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_ggec ,_bbcfe );};};return _gfadf ;};
|
||
|
||
// ExtractText processes and extracts all text data in content streams and returns as a string.
|
||
// It takes into account character encodings in the PDF file, which are decoded by
|
||
// CharcodeBytesToUnicode.
|
||
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
|
||
func (_egb *Extractor )ExtractText ()(string ,error ){_bbba ,_ ,_ ,_acf :=_egb .ExtractTextWithStats ();return _bbba ,_acf ;};type compositeCell struct{_ba .PdfRectangle ;paraList ;};func (_decd *textPara )writeText (_dgcd _gg .Writer ){if _decd ._fbbea ==nil {_decd .writeCellText (_dgcd );
|
||
return ;};for _bddeg :=0;_bddeg < _decd ._fbbea ._cgae ;_bddeg ++{for _fgddc :=0;_fgddc < _decd ._fbbea ._eacg ;_fgddc ++{_eeeff :=_decd ._fbbea .get (_fgddc ,_bddeg );if _eeeff ==nil {_dgcd .Write ([]byte ("\u0009"));}else {_eeeff .writeCellText (_dgcd );
|
||
};_dgcd .Write ([]byte ("\u0020"));};if _bddeg < _decd ._fbbea ._cgae -1{_dgcd .Write ([]byte ("\u000a"));};};};type markKind int ;type intSet map[int ]struct{};func (_daff intSet )has (_bcdg int )bool {_ ,_cdaec :=_daff [_bcdg ];return _cdaec };func (_ffafb *wordBag )scanBand (_ceaf string ,_bcgd *wordBag ,_cddg func (_bfafd *wordBag ,_dabb *textWord )bool ,_fbba ,_becae ,_ddbdc float64 ,_dgb ,_dabf bool )int {_fgg :=_bcgd ._cdea ;
|
||
var _becf map[int ]map[*textWord ]struct{};if !_dgb {_becf =_ffafb .makeRemovals ();};_caeg :=_fcca *_fgg ;_aeea :=0;for _ ,_fffe :=range _ffafb .depthBand (_fbba -_caeg ,_becae +_caeg ){if len (_ffafb ._fcgd [_fffe ])==0{continue ;};for _ ,_bcce :=range _ffafb ._fcgd [_fffe ]{if !(_fbba -_caeg <=_bcce ._dfagd &&_bcce ._dfagd <=_becae +_caeg ){continue ;
|
||
};if !_cddg (_bcgd ,_bcce ){continue ;};_edfd :=2.0*_gf .Abs (_bcce ._eabbf -_bcgd ._cdea )/(_bcce ._eabbf +_bcgd ._cdea );_bged :=_gf .Max (_bcce ._eabbf /_bcgd ._cdea ,_bcgd ._cdea /_bcce ._eabbf );_bcgag :=_gf .Min (_edfd ,_bged );if _ddbdc > 0&&_bcgag > _ddbdc {continue ;
|
||
};if _bcgd .blocked (_bcce ){continue ;};if !_dgb {_bcgd .pullWord (_bcce ,_fffe ,_becf );};_aeea ++;if !_dabf {if _bcce ._dfagd < _fbba {_fbba =_bcce ._dfagd ;};if _bcce ._dfagd > _becae {_becae =_bcce ._dfagd ;};};if _dgb {break ;};};};if !_dgb {_ffafb .applyRemovals (_becf );
|
||
};return _aeea ;};func _adbb (_abebg ,_dfbg _agf .Point )bool {_afgd :=_gf .Abs (_abebg .X -_dfbg .X );_becg :=_gf .Abs (_abebg .Y -_dfbg .Y );return _caeb (_afgd ,_becg );};func (_ebdd intSet )del (_aabg int ){delete (_ebdd ,_aabg )};func _eedf (_acef *wordBag ,_fbce int )*textLine {_egaa :=_acef .firstWord (_fbce );
|
||
_abdb :=textLine {PdfRectangle :_egaa .PdfRectangle ,_fdfb :_egaa ._eabbf ,_bbfg :_egaa ._dfagd };_abdb .pullWord (_acef ,_egaa ,_fbce );return &_abdb ;};
|
||
|
||
// ToTextMark returns the public view of `tm`.
|
||
func (_gdbe *textMark )ToTextMark ()TextMark {return TextMark {Text :_gdbe ._bfdb ,Original :_gdbe ._gccf ,BBox :_gdbe ._dded ,Font :_gdbe ._ccdg ,FontSize :_gdbe ._gccfd ,FillColor :_gdbe ._fbaf ,StrokeColor :_gdbe ._eaee ,Orientation :_gdbe ._gfbg ,DirectObject :_gdbe ._bbgf ,ObjString :_gdbe ._fcbc ,Tw :_gdbe .Tw ,Th :_gdbe .Th ,Tc :_gdbe ._gccd ,Index :_gdbe ._aeaee };
|
||
};
|
||
|
||
// New returns an Extractor instance for extracting content from the input PDF page.
|
||
func New (page *_ba .PdfPage )(*Extractor ,error ){return NewWithOptions (page ,nil )};
|
||
|
||
// NewWithOptions an Extractor instance for extracting content from the input PDF page with options.
|
||
func NewWithOptions (page *_ba .PdfPage ,options *Options )(*Extractor ,error ){const _cd ="\u0065x\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077W\u0069\u0074\u0068\u004f\u0070\u0074\u0069\u006f\u006e\u0073";_ee ,_ddc :=page .GetAllContentStreams ();
|
||
if _ddc !=nil {return nil ,_ddc ;};_ec ,_fb :=page .GetStructTreeRoot ();if !_fb {_ga .Log .Info ("T\u0068\u0065\u0020\u0070\u0064\u0066\u0020\u0064\u006f\u0063\u0075\u006d\u0065\u006e\u0074\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020\u0074\u0061\u0067g\u0065d\u002e\u0020\u0053\u0074r\u0075\u0063t\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e\u0027\u0074\u0020\u0065\u0078\u0069\u0073\u0074\u002e");
|
||
};_ggg :=page .GetContainingPdfObject ();_eaa ,_ddc :=page .GetMediaBox ();if _ddc !=nil {return nil ,_agc .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_ddc );
|
||
};_adc :=&Extractor {_ac :_ee ,_gbe :page .Resources ,_cb :*_eaa ,_eb :page .CropBox ,_bd :map[string ]fontEntry {},_baa :map[string ]textResult {},_gfe :map[string ]textResult {},_cf :options ,_ab :_ec ,_dd :_ggg };if _adc ._cb .Llx > _adc ._cb .Urx {_ga .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_adc ._cb );
|
||
_adc ._cb .Llx ,_adc ._cb .Urx =_adc ._cb .Urx ,_adc ._cb .Llx ;};if _adc ._cb .Lly > _adc ._cb .Ury {_ga .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_adc ._cb );
|
||
_adc ._cb .Lly ,_adc ._cb .Ury =_adc ._cb .Ury ,_adc ._cb .Lly ;};if _adc ._cf !=nil {if _adc ._cf .IncludeAnnotations {_adc ._bg ,_ddc =page .GetAnnotations ();if _ddc !=nil {_ga .Log .Debug ("\u0045\u0072r\u006f\u0072\u0020\u0067\u0065\u0074\u0074\u0069\u006e\u0067\u0020\u0061\u006e\u006e\u006f\u0074\u0061\u0074\u0069\u006f\u006e\u0073: \u0025\u0076",_ddc );
|
||
};};};_ce .TrackUse (_cd );return _adc ,nil ;};func (_dfga *textPara )toTextMarks (_afbc *int )[]TextMark {if _dfga ._fbbea ==nil {return _dfga .toCellTextMarks (_afbc );};var _acfe []TextMark ;for _agfb :=0;_agfb < _dfga ._fbbea ._cgae ;_agfb ++{for _beagd :=0;
|
||
_beagd < _dfga ._fbbea ._eacg ;_beagd ++{_ecgb :=_dfga ._fbbea .get (_beagd ,_agfb );if _ecgb ==nil {_acfe =_ffeg (_acfe ,_afbc ,"\u0009");}else {_cgdg :=_ecgb .toCellTextMarks (_afbc );_acfe =append (_acfe ,_cgdg ...);};_acfe =_ffeg (_acfe ,_afbc ,"\u0020");
|
||
};if _agfb < _dfga ._fbbea ._cgae -1{_acfe =_ffeg (_acfe ,_afbc ,"\u000a");};};_dgcfg :=_dfga ._fbbea ;if _dgcfg .isExportable (){_fbfa :=_dgcfg .toTextTable ();_acfe =_dcdb (_acfe ,&_fbfa );};return _acfe ;};func (_decee *textTable )reduce ()*textTable {_ceade :=make ([]int ,0,_decee ._cgae );
|
||
_ffecb :=make ([]int ,0,_decee ._eacg );for _gddcb :=0;_gddcb < _decee ._cgae ;_gddcb ++{if !_decee .emptyCompositeRow (_gddcb ){_ceade =append (_ceade ,_gddcb );};};for _ebcb :=0;_ebcb < _decee ._eacg ;_ebcb ++{if !_decee .emptyCompositeColumn (_ebcb ){_ffecb =append (_ffecb ,_ebcb );
|
||
};};if len (_ceade )==_decee ._cgae &&len (_ffecb )==_decee ._eacg {return _decee ;};_gdff :=textTable {_edeg :_decee ._edeg ,_eacg :len (_ffecb ),_cgae :len (_ceade ),_egfea :make (map[uint64 ]*textPara ,len (_ffecb )*len (_ceade ))};if _efda {_ga .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_decee ._eacg ,_decee ._cgae ,len (_ffecb ),len (_ceade ));
|
||
_ga .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_ffecb );_ga .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_ceade );};for _dgdd ,_cdaca :=range _ceade {for _ecbfgc ,_gbgcbb :=range _ffecb {_bcafa ,_bdbdf :=_decee .getComposite (_gbgcbb ,_cdaca );
|
||
if _bcafa ==nil {continue ;};if _efda {_agc .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_ecbfgc ,_dgdd ,_gbgcbb ,_cdaca ,_adgd (_bcafa .merge ().text (),50));};_gdff .putComposite (_ecbfgc ,_dgdd ,_bcafa ,_bdbdf );
|
||
};};return &_gdff ;};func (_adcf *wordBag )getDepthIdx (_caef float64 )int {_ceg :=_adcf .depthIndexes ();_gaed :=_ceee (_caef );if _gaed < _ceg [0]{return _ceg [0];};if _gaed > _ceg [len (_ceg )-1]{return _ceg [len (_ceg )-1];};return _gaed ;};func (_fbfe *structElement )parseStructElement (_gegd _add .PdfObject ){_fgga ,_bfafg :=_add .GetDict (_gegd );
|
||
if !_bfafg {_ga .Log .Debug ("\u0070\u0061\u0072\u0073\u0065\u0053\u0074\u0072u\u0063\u0074\u0045le\u006d\u0065\u006e\u0074\u003a\u0020d\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006f\u0062\u006a\u0065\u0063t\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075n\u0064\u002e");
|
||
return ;};_eaga :=_fgga .Get ("\u0053");_efcf :=_fgga .Get ("\u0050\u0067");_effg :="";if _eaga !=nil {_effg =_eaga .String ();};_bafc :=_fgga .Get ("\u004b");_fbfe ._bbag =_effg ;_fbfe ._eada =_efcf ;switch _fbbad :=_bafc .(type ){case *_add .PdfObjectInteger :_fbfe ._bbag =_effg ;
|
||
_fbfe ._cfcb =int64 (*_fbbad );_fbfe ._eada =_efcf ;case *_add .PdfObjectReference :_dcdf :=*_add .MakeArray (_fbbad );var _cgdec int64 =-1;_fbfe ._cfcb =_cgdec ;if _dcdf .Len ()==1{_ccgg :=_dcdf .Elements ()[0];_ggcad ,_edaa :=_ccgg .(*_add .PdfObjectInteger );
|
||
if _edaa {_cgdec =int64 (*_ggcad );_fbfe ._cfcb =_cgdec ;_fbfe ._bbag =_effg ;_fbfe ._eada =_efcf ;return ;};};_bdgg :=[]structElement {};for _ ,_gcffb :=range _dcdf .Elements (){_dfbd ,_bfeg :=_gcffb .(*_add .PdfObjectInteger );if _bfeg {_cgdec =int64 (*_dfbd );
|
||
_fbfe ._cfcb =_cgdec ;_fbfe ._bbag =_effg ;}else {_afee :=&structElement {};_afee .parseStructElement (_gcffb );_bdgg =append (_bdgg ,*_afee );};_cgdec =-1;};_fbfe ._abff =_bdgg ;case *_add .PdfObjectArray :_edcb :=_bafc .(*_add .PdfObjectArray );var _aacd int64 =-1;
|
||
_fbfe ._cfcb =_aacd ;if _edcb .Len ()==1{_cbac :=_edcb .Elements ()[0];_bgfg ,_feba :=_cbac .(*_add .PdfObjectInteger );if _feba {_aacd =int64 (*_bgfg );_fbfe ._cfcb =_aacd ;_fbfe ._bbag =_effg ;_fbfe ._eada =_efcf ;return ;};};_bfaa :=[]structElement {};
|
||
for _ ,_cdff :=range _edcb .Elements (){_fbg ,_bfbbc :=_cdff .(*_add .PdfObjectInteger );if _bfbbc {_aacd =int64 (*_fbg );_fbfe ._cfcb =_aacd ;_fbfe ._bbag =_effg ;_fbfe ._eada =_efcf ;}else {_cafad :=&structElement {};_cafad .parseStructElement (_cdff );
|
||
_bfaa =append (_bfaa ,*_cafad );};_aacd =-1;};_fbfe ._abff =_bfaa ;};};
|
||
|
||
// ExtractPageImages returns the image contents of the page extractor, including data
|
||
// and position, size information for each image.
|
||
// A set of options to control page image extraction can be passed in. The options
|
||
// parameter can be nil for the default options. By default, inline stencil masks
|
||
// are not extracted.
|
||
func (_fa *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_fg :=&imageExtractContext {_gce :options };_gcd :=_fg .extractContentStreamImages (_fa ._ac ,_fa ._gbe );if _gcd !=nil {return nil ,_gcd ;};return &PageImages {Images :_fg ._dcc },nil ;
|
||
};func (_ffgf *wordBag )sort (){for _ ,_adbcc :=range _ffgf ._fcgd {_e .Slice (_adbcc ,func (_abb ,_fec int )bool {return _eddba (_adbcc [_abb ],_adbcc [_fec ])< 0});};};func (_bdcc *shapesState )cubicTo (_ebgg ,_egfc ,_gfddd ,_bcba ,_agd ,_edefc float64 ){if _fdbg {_ga .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a");
|
||
};_bdcc .addPoint (_agd ,_edefc );};func (_bfgaeb rulingList )intersections ()map[int ]intSet {var _dbfd ,_gffc []int ;for _gcfef ,_bgced :=range _bfgaeb {switch _bgced ._bbce {case _cfae :_dbfd =append (_dbfd ,_gcfef );case _aaad :_gffc =append (_gffc ,_gcfef );
|
||
};};if len (_dbfd )< _afec +1||len (_gffc )< _dccbc +1{return nil ;};if len (_dbfd )+len (_gffc )> _ccbb {_ga .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_bfgaeb ),len (_dbfd ),len (_gffc ));
|
||
return nil ;};_gbae :=make (map[int ]intSet ,len (_dbfd )+len (_gffc ));for _ ,_ddbb :=range _dbfd {for _ ,_fceg :=range _gffc {if _bfgaeb [_ddbb ].intersects (_bfgaeb [_fceg ]){if _ ,_ceabb :=_gbae [_ddbb ];!_ceabb {_gbae [_ddbb ]=make (intSet );};if _ ,_aceee :=_gbae [_fceg ];
|
||
!_aceee {_gbae [_fceg ]=make (intSet );};_gbae [_ddbb ].add (_fceg );_gbae [_fceg ].add (_ddbb );};};};return _gbae ;};type fontEntry struct{_cbad *_ba .PdfFont ;_ccg int64 ;};var _decf =map[markKind ]string {_bcfc :"\u0073\u0074\u0072\u006f\u006b\u0065",_ccfg :"\u0066\u0069\u006c\u006c",_degg :"\u0061u\u0067\u006d\u0065\u006e\u0074"};
|
||
func (_gdc *imageExtractContext )extractFormImages (_egcb *_add .PdfObjectName ,_gedf _fc .GraphicsState ,_edca *_ba .PdfPageResources )error {_aae ,_cfd :=_edca .GetXObjectFormByName (*_egcb );if _cfd !=nil {return _cfd ;};if _aae ==nil {return nil ;};
|
||
_bec ,_cfd :=_aae .GetContentStream ();if _cfd !=nil {return _cfd ;};_fbb :=_aae .Resources ;if _fbb ==nil {_fbb =_edca ;};_cfd =_gdc .extractContentStreamImages (string (_bec ),_fbb );if _cfd !=nil {return _cfd ;};_gdc ._bde ++;return nil ;};
|
||
|
||
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
|
||
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
|
||
func (_dfe *Extractor )ExtractTextWithStats ()(_cgda string ,_ddb int ,_fbeb int ,_dfg error ){_dca ,_ddb ,_fbeb ,_dfg :=_dfe .ExtractPageText ();if _dfg !=nil {return "",_ddb ,_fbeb ,_dfg ;};return _dca .Text (),_ddb ,_fbeb ,nil ;};
|
||
|
||
// String returns a description of `k`.
|
||
func (_gegcf rulingKind )String ()string {_agdc ,_deec :=_gfcgc [_gegcf ];if !_deec {return _agc .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_gegcf );};return _agdc ;};func (_deaad *textTable )compositeColCorridors ()map[int ][]float64 {_cfffb :=make (map[int ][]float64 ,_deaad ._eacg );
|
||
if _efda {_ga .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_deaad ._eacg );};for _cfeee :=0;_cfeee < _deaad ._eacg ;_cfeee ++{_cfffb [_cfeee ]=nil ;
|
||
};return _cfffb ;};type ruling struct{_bbce rulingKind ;_ccaa markKind ;_gff .Color ;_edga float64 ;_fcec float64 ;_abeg float64 ;_ebcee float64 ;};func (_cebde gridTiling )log (_afaa string ){if !_dfge {return ;};_ga .Log .Info ("\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071",len (_cebde ._fece ),len (_cebde ._dfca ),_afaa );
|
||
_agc .Printf ("\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a",_cebde ._fece );_agc .Printf ("\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a",_cebde ._dfca );for _egdeg ,_cfac :=range _cebde ._dfca {_cgacf ,_ggcg :=_cebde ._gdcg [_cfac ];
|
||
if !_ggcg {continue ;};_agc .Printf ("%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_egdeg ,_cfac );for _bgdd ,_aggd :=range _cebde ._fece {_acfbe ,_dgeca :=_cgacf [_aggd ];if !_dgeca {continue ;};_agc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_bgdd ,_acfbe .String ());
|
||
};};};func _fecf (_efga []*textWord ,_gded int )[]*textWord {_efce :=len (_efga );copy (_efga [_gded :],_efga [_gded +1:]);return _efga [:_efce -1];};type structTreeRoot struct{_faef []structElement ;_adbca string ;};
|
||
|
||
// String returns a human readable description of `ss`.
|
||
func (_egecb *shapesState )String ()string {return _agc .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_egecb ._gfce ),_egecb ._gbdgg );};func (_bfcg *textObject )getStrokeColor ()_gff .Color {return _eddaee (_bfcg ._bace .ColorspaceStroking ,_bfcg ._bace .ColorStroking );
|
||
};func (_ge *PageFonts )extractPageResourcesToFont (_db *_ba .PdfPageResources )error {_cga ,_gc :=_add .GetDict (_db .Font );if !_gc {return _b .New (_aba );};for _ ,_bgg :=range _cga .Keys (){var (_cc =true ;_gde []byte ;_eaf string ;);_abe ,_ecg :=_db .GetFontByName (_bgg );
|
||
if !_ecg {return _b .New (_cgc );};_dde ,_fbf :=_ba .NewPdfFontFromPdfObject (_abe );if _fbf !=nil {return _fbf ;};_fcd :=_dde .FontDescriptor ();_cgg :=_dde .FontDescriptor ().FontName .String ();_ff :=_dde .Subtype ();if _aga (_ge .Fonts ,_cgg ){continue ;
|
||
};if len (_dde .ToUnicode ())==0{_cc =false ;};if _fcd .FontFile !=nil {if _eca ,_da :=_add .GetStream (_fcd .FontFile );_da {_gde ,_fbf =_add .DecodeStream (_eca );if _fbf !=nil {return _fbf ;};_eaf =_cgg +"\u002e\u0070\u0066\u0062";};}else if _fcd .FontFile2 !=nil {if _be ,_bed :=_add .GetStream (_fcd .FontFile2 );
|
||
_bed {_gde ,_fbf =_add .DecodeStream (_be );if _fbf !=nil {return _fbf ;};_eaf =_cgg +"\u002e\u0074\u0074\u0066";};}else if _fcd .FontFile3 !=nil {if _ccb ,_fe :=_add .GetStream (_fcd .FontFile3 );_fe {_gde ,_fbf =_add .DecodeStream (_ccb );if _fbf !=nil {return _fbf ;
|
||
};_eaf =_cgg +"\u002e\u0063\u0066\u0066";};};if len (_eaf )< 1{_ga .Log .Debug (_aef );};_abc :=Font {FontName :_cgg ,PdfFont :_dde ,IsCID :_dde .IsCID (),IsSimple :_dde .IsSimple (),ToUnicode :_cc ,FontType :_ff ,FontData :_gde ,FontFileName :_eaf ,FontDescriptor :_fcd };
|
||
_ge .Fonts =append (_ge .Fonts ,_abc );};return nil ;};
|
||
|
||
// String returns a string describing `ma`.
|
||
func (_eeeg TextMarkArray )String ()string {_bfde :=len (_eeeg ._ffca );if _bfde ==0{return "\u0045\u004d\u0050T\u0059";};_gefa :=_eeeg ._ffca [0];_edgb :=_eeeg ._ffca [_bfde -1];return _agc .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_bfde ,_gefa ,_edgb );
|
||
};func (_abaed *textPara )depth ()float64 {if _abaed ._cddef {return -1.0;};if len (_abaed ._fdec )> 0{return _abaed ._fdec [0]._bbfg ;};return _abaed ._fbbea .depth ();};func _gecgf (_gfbc []float64 ,_fgfac ,_ceea float64 )[]float64 {_fbbaf ,_gbdab :=_fgfac ,_ceea ;
|
||
if _gbdab < _fbbaf {_fbbaf ,_gbdab =_gbdab ,_fbbaf ;};_dbee :=make ([]float64 ,0,len (_gfbc )+2);_dbee =append (_dbee ,_fgfac );for _ ,_bbcee :=range _gfbc {if _bbcee <=_fbbaf {continue ;}else if _bbcee >=_gbdab {break ;};_dbee =append (_dbee ,_bbcee );
|
||
};_dbee =append (_dbee ,_ceea );return _dbee ;};
|
||
|
||
// String returns a description of `b`.
|
||
func (_gffb *wordBag )String ()string {var _bda []string ;for _ ,_fdcd :=range _gffb .depthIndexes (){_efgb :=_gffb ._fcgd [_fdcd ];for _ ,_fgdb :=range _efgb {_bda =append (_bda ,_fgdb ._eaae );};};return _agc .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_gffb .PdfRectangle ,_gffb ._cdea ,len (_bda ),_bda );
|
||
};func (_ddab *wordBag )firstReadingIndex (_eedb int )int {_cdda :=_ddab .firstWord (_eedb )._eabbf ;_accb :=float64 (_eedb +1)*_dage ;_egabg :=_accb +_bag *_cdda ;_ddbdcb :=_eedb ;for _ ,_gaeb :=range _ddab .depthBand (_accb ,_egabg ){if _eddba (_ddab .firstWord (_gaeb ),_ddab .firstWord (_ddbdcb ))< 0{_ddbdcb =_gaeb ;
|
||
};};return _ddbdcb ;};func _aebeec (_bbgcf ,_gbbafd _ag .Image )_ag .Image {_abgce ,_aeadc :=_gbbafd .Bounds ().Size (),_bbgcf .Bounds ().Size ();_egbfgg ,_agcac :=_abgce .X ,_abgce .Y ;if _aeadc .X > _egbfgg {_egbfgg =_aeadc .X ;};if _aeadc .Y > _agcac {_agcac =_aeadc .Y ;
|
||
};_cbcdd :=_ag .Rect (0,0,_egbfgg ,_agcac );if _abgce .X !=_egbfgg ||_abgce .Y !=_agcac {_ffdeb :=_ag .NewRGBA (_cbcdd );_ed .BiLinear .Scale (_ffdeb ,_cbcdd ,_bbgcf ,_gbbafd .Bounds (),_ed .Over ,nil );_gbbafd =_ffdeb ;};if _aeadc .X !=_egbfgg ||_aeadc .Y !=_agcac {_dgfbg :=_ag .NewRGBA (_cbcdd );
|
||
_ed .BiLinear .Scale (_dgfbg ,_cbcdd ,_bbgcf ,_bbgcf .Bounds (),_ed .Over ,nil );_bbgcf =_dgfbg ;};_bfacg :=_ag .NewRGBA (_cbcdd );_ed .DrawMask (_bfacg ,_cbcdd ,_bbgcf ,_ag .Point {},_gbbafd ,_ag .Point {},_ed .Over );return _bfacg ;};
|
||
|
||
// String returns a string descibing `i`.
|
||
func (_fgcdc gridTile )String ()string {_ggaacg :=func (_cdcc bool ,_gccfb string )string {if _cdcc {return _gccfb ;};return "\u005f";};return _agc .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_fgcdc .PdfRectangle ,_ggaacg (_fgcdc ._ecaf ,"\u004c"),_ggaacg (_fgcdc ._bfab ,"\u0052"),_ggaacg (_fgcdc ._bgbfd ,"\u0042"),_ggaacg (_fgcdc ._dgccd ,"\u0054"));
|
||
};func _baff (_defb ,_fcdgd ,_gbeae ,_adfe *textPara )*textTable {_ecfa :=&textTable {_eacg :2,_cgae :2,_egfea :make (map[uint64 ]*textPara ,4)};_ecfa .put (0,0,_defb );_ecfa .put (1,0,_fcdgd );_ecfa .put (0,1,_gbeae );_ecfa .put (1,1,_adfe );return _ecfa ;
|
||
};func (_ggbd paraList )xNeighbours (_ffabb float64 )map[*textPara ][]int {_fdcfe :=make ([]event ,2*len (_ggbd ));if _ffabb ==0{for _dgadd ,_cgabe :=range _ggbd {_fdcfe [2*_dgadd ]=event {_cgabe .Llx ,true ,_dgadd };_fdcfe [2*_dgadd +1]=event {_cgabe .Urx ,false ,_dgadd };
|
||
};}else {for _dfbdb ,_gfcge :=range _ggbd {_fdcfe [2*_dfbdb ]=event {_gfcge .Llx -_ffabb *_gfcge .fontsize (),true ,_dfbdb };_fdcfe [2*_dfbdb +1]=event {_gfcge .Urx +_ffabb *_gfcge .fontsize (),false ,_dfbdb };};};return _ggbd .eventNeighbours (_fdcfe );
|
||
};func (_fefgb rulingList )aligned ()bool {if len (_fefgb )< 2{return false ;};_cacg :=make (map[*ruling ]int );_cacg [_fefgb [0]]=0;for _ ,_gffggc :=range _fefgb [1:]{_caaa :=false ;for _gffga :=range _cacg {if _gffggc .gridIntersecting (_gffga ){_cacg [_gffga ]++;
|
||
_caaa =true ;break ;};};if !_caaa {_cacg [_gffggc ]=0;};};_fddae :=0;for _ ,_edfba :=range _cacg {if _edfba ==0{_fddae ++;};};_aedb :=float64 (_fddae )/float64 (len (_fefgb ));_gacb :=_aedb <=1.0-_efeb ;if _adgbf {_ga .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_gacb ,_aedb ,_fddae ,len (_fefgb ),_fefgb .String ());
|
||
};return _gacb ;};func (_becda *ruling )encloses (_cbbf ,_adgbd float64 )bool {return _becda ._fcec -_dfed <=_cbbf &&_adgbd <=_becda ._abeg +_dfed ;};func (_egdf *wordBag )depthIndexes ()[]int {if len (_egdf ._fcgd )==0{return nil ;};_ffgb :=make ([]int ,len (_egdf ._fcgd ));
|
||
_cddb :=0;for _cbb :=range _egdf ._fcgd {_ffgb [_cddb ]=_cbb ;_cddb ++;};_e .Ints (_ffgb );return _ffgb ;};func (_agae *wordBag )blocked (_gbaff *textWord )bool {if _gbaff .Urx < _agae .Llx {_affa :=_degf (_gbaff .PdfRectangle );_cbab :=_bdccc (_agae .PdfRectangle );
|
||
if _agae ._febe .blocks (_affa ,_cbab ){if _ggaeg {_ga .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_gbaff ,_agae );};return true ;};}else if _agae .Urx < _gbaff .Llx {_dbcag :=_degf (_agae .PdfRectangle );
|
||
_bdece :=_bdccc (_gbaff .PdfRectangle );if _agae ._febe .blocks (_dbcag ,_bdece ){if _ggaeg {_ga .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s",_gbaff ,_agae );};return true ;};};if _gbaff .Ury < _agae .Lly {_bdfb :=_faafc (_gbaff .PdfRectangle );
|
||
_dfdg :=_fbcde (_agae .PdfRectangle );if _agae ._cgdae .blocks (_bdfb ,_dfdg ){if _ggaeg {_ga .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_gbaff ,_agae );};return true ;};}else if _agae .Ury < _gbaff .Lly {_ccdd :=_faafc (_agae .PdfRectangle );
|
||
_eff :=_fbcde (_gbaff .PdfRectangle );if _agae ._cgdae .blocks (_ccdd ,_eff ){if _ggaeg {_ga .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s",_gbaff ,_agae );};return true ;};};return false ;};func (_feac compositeCell )String ()string {_acff :="";
|
||
if len (_feac .paraList )> 0{_acff =_adgd (_feac .paraList .merge ().text (),50);};return _agc .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_feac .PdfRectangle ,len (_feac .paraList ),_acff );
|
||
};func (_eaec paraList )llyOrdering ()[]int {_eefb :=make ([]int ,len (_eaec ));for _ddge :=range _eaec {_eefb [_ddge ]=_ddge ;};_e .SliceStable (_eefb ,func (_efad ,_beeef int )bool {_dbbbg ,_dcfd :=_eefb [_efad ],_eefb [_beeef ];return _eaec [_dbbbg ].Lly < _eaec [_dcfd ].Lly ;
|
||
});return _eefb ;};func (_adac *subpath )isQuadrilateral ()bool {if len (_adac ._aeee )< 4||len (_adac ._aeee )> 5{return false ;};if len (_adac ._aeee )==5{_ebad :=_adac ._aeee [0];_dede :=_adac ._aeee [4];if _ebad .X !=_dede .X ||_ebad .Y !=_dede .Y {return false ;
|
||
};};return true ;};func (_cbgcf *ruling )alignsSec (_bcfdf *ruling )bool {const _abgc =_gbb +1.0;return _cbgcf ._fcec -_abgc <=_bcfdf ._abeg &&_bcfdf ._fcec -_abgc <=_cbgcf ._abeg ;};func (_acea *textTable )emptyCompositeColumn (_faafdg int )bool {for _cgaea :=0;
|
||
_cgaea < _acea ._cgae ;_cgaea ++{if _bcdfc ,_acdad :=_acea ._aaaga [_bgcc (_faafdg ,_cgaea )];_acdad {if len (_bcdfc .paraList )> 0{return false ;};};};return true ;};func (_gcfcd *textTable )subdivide ()*textTable {_gcfcd .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");
|
||
_ecac :=_gcfcd .compositeRowCorridors ();_eefdd :=_gcfcd .compositeColCorridors ();if _efda {_ga .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_edaf (_ecac ),_edaf (_eefdd ));
|
||
};if len (_ecac )==0||len (_eefdd )==0{return _gcfcd ;};_cceb (_ecac );_cceb (_eefdd );if _efda {_ga .Log .Info ("\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073",_edaf (_ecac ),_edaf (_eefdd ));
|
||
};_agdeg ,_eced :=_fgdcg (_gcfcd ._cgae ,_ecac );_ccgbb ,_ccae :=_fgdcg (_gcfcd ._eacg ,_eefdd );_dcbc :=make (map[uint64 ]*textPara ,_ccae *_eced );_bdfad :=&textTable {PdfRectangle :_gcfcd .PdfRectangle ,_edeg :_gcfcd ._edeg ,_cgae :_eced ,_eacg :_ccae ,_egfea :_dcbc };
|
||
if _efda {_ga .Log .Info ("\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a"+"\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076",_gcfcd ._eacg ,_gcfcd ._cgae ,_ccae ,_eced ,_edaf (_ecac ),_edaf (_eefdd ),_agdeg ,_ccgbb );
|
||
};for _ebagg :=0;_ebagg < _gcfcd ._cgae ;_ebagg ++{_eaega :=_agdeg [_ebagg ];for _ggcf :=0;_ggcf < _gcfcd ._eacg ;_ggcf ++{_faca :=_ccgbb [_ggcf ];if _efda {_agc .Printf ("\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a",_ggcf ,_ebagg ,_faca ,_eaega );
|
||
};_cgefd ,_gbbbc :=_gcfcd ._aaaga [_bgcc (_ggcf ,_ebagg )];if !_gbbbc {continue ;};_aeed :=_cgefd .split (_ecac [_ebagg ],_eefdd [_ggcf ]);for _dgeafg :=0;_dgeafg < _aeed ._cgae ;_dgeafg ++{for _egfdfa :=0;_egfdfa < _aeed ._eacg ;_egfdfa ++{_abdae :=_aeed .get (_egfdfa ,_dgeafg );
|
||
_bdfad .put (_faca +_egfdfa ,_eaega +_dgeafg ,_abdae );if _efda {_agc .Printf ("\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_faca +_egfdfa ,_eaega +_dgeafg ,_abdae );};};};};};return _bdfad ;};
|
||
|
||
// Append appends `mark` to the mark array.
|
||
func (_edef *TextMarkArray )Append (mark TextMark ){_edef ._ffca =append (_edef ._ffca ,mark )};type subpath struct{_aeee []_agf .Point ;_dbe bool ;};
|
||
|
||
// Tables returns the tables extracted from the page.
|
||
func (_afbe PageText )Tables ()[]TextTable {if _efda {_ga .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_afbe ._gdba ));};return _afbe ._gdba ;};func (_dddc *structTreeRoot )parseStructTreeRoot (_gebcc _add .PdfObject ){if _gebcc !=nil {_ddece ,_dffbe :=_add .GetDict (_gebcc );
|
||
if !_dffbe {_ga .Log .Debug ("\u0070\u0061\u0072s\u0065\u0053\u0074\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u003a\u0020\u0064\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006eo\u0074\u0020\u0066\u006f\u0075\u006e\u0064\u002e");
|
||
};K :=_ddece .Get ("\u004b");_cdgd :=_ddece .Get ("\u0054\u0079\u0070\u0065").String ();var _gecg *_add .PdfObjectArray ;switch _eafd :=K .(type ){case *_add .PdfObjectArray :_gecg =_eafd ;case *_add .PdfObjectReference :_gecg =_add .MakeArray (K );};_fcge :=[]structElement {};
|
||
for _ ,_becc :=range _gecg .Elements (){_fgbfa :=&structElement {};_fgbfa .parseStructElement (_becc );_fcge =append (_fcge ,*_fgbfa );};_dddc ._faef =_fcge ;_dddc ._adbca =_cdgd ;};};func (_aaee *Extractor )extractPageText (_gcfd string ,_cdg *_ba .PdfPageResources ,_baac _agf .Matrix ,_fagb int ,_adb bool )(*PageText ,int ,int ,error ){_ga .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_fagb );
|
||
_dge :=&PageText {_bbdc :_aaee ._cb ,_abge :_aaee ._ab ,_abad :_aaee ._dd };_adcg :=_cgbc (_aaee ._cb );var _cee stateStack ;_bce :=_acac (_aaee ,_cdg ,_fc .GraphicsState {},&_adcg ,&_cee );_egeg :=shapesState {_gdfd :_baac ,_gafa :_agf .IdentityMatrix (),_edf :_bce };
|
||
var _ccd bool ;_bbf :=-1;if _fagb > _fad {_fd :=_b .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");_ga .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_fagb ,_fd );
|
||
return _dge ,_adcg ._abd ,_adcg ._bafdf ,_fd ;};_fef :=_fc .NewContentStreamParser (_gcfd );_cge ,_fca :=_fef .Parse ();if _fca !=nil {_ga .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fca );
|
||
return _dge ,_adcg ._abd ,_adcg ._bafdf ,_fca ;};_dge ._cde =_cge ;_bafd :=_fc .NewContentStreamProcessor (*_cge );_bafd .AddHandler (_fc .HandlerConditionEnumAllOperands ,"",func (_fbeg *_fc .ContentStreamOperation ,_ef _fc .GraphicsState ,_ceeb *_ba .PdfPageResources )error {_dcg :=_fbeg .Operand ;
|
||
if _eeeb {_ga .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_fbeg );};switch _dcg {case "\u0071":if _fdbg {_ga .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_egeg ._gafa );};_cee .push (&_adcg );case "\u0051":if !_cee .empty (){_adcg =*_cee .pop ();
|
||
};_egeg ._gafa =_ef .CTM ;if _fdbg {_ga .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_egeg ._gafa );};case "\u0042\u0044\u0043":_aee ,_fbbe :=_add .GetDict (_fbeg .Params [1]);if !_fbbe {_ga .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0042D\u0043\u0020\u006f\u0070\u003d\u0025\u0073 \u0047\u0065\u0074\u0044\u0069\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_fbeg );
|
||
return _fca ;};_deb :=_aee .Get ("\u004d\u0043\u0049\u0044");if _deb !=nil {_dece ,_bcf :=_add .GetIntVal (_deb );if !_bcf {_ga .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0042\u0044C\u0020\u006f\u0070=\u0025\u0073\u002e\u0020\u0042\u0061\u0064\u0020\u006eum\u0065\u0072\u0069c\u0061\u006c \u006f\u0062\u006a\u0065\u0063\u0074.\u0020\u006f=\u0025\u0073",_fbeg ,_deb );
|
||
};_bbf =_dece ;}else {_bbf =-1;};case "\u0045\u004d\u0043":_bbf =-1;case "\u0042\u0054":if _ccd {_ga .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
|
||
_dge ._aebf =append (_dge ._aebf ,_bce ._cfde ...);};_ccd =true ;_gcfc :=_ef ;if _adb {_gcfc =_fc .GraphicsState {};_gcfc .CTM =_egeg ._gafa ;};_gcfc .CTM =_baac .Mult (_gcfc .CTM );_bce =_acac (_aaee ,_ceeb ,_gcfc ,&_adcg ,&_cee );_egeg ._edf =_bce ;case "\u0045\u0054":if !_ccd {_ga .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
|
||
};_ccd =false ;_dge ._aebf =append (_dge ._aebf ,_bce ._cfde ...);_bce .reset ();case "\u0054\u002a":_bce .nextLine ();case "\u0054\u0064":if _dfeg ,_aed :=_bce .checkOp (_fbeg ,2,true );!_dfeg {_ga .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aed );
|
||
return _aed ;};_aedg ,_gca ,_adg :=_dggea (_fbeg .Params );if _adg !=nil {return _adg ;};_bce .moveText (_aedg ,_gca );case "\u0054\u0044":if _fea ,_dfb :=_bce .checkOp (_fbeg ,2,true );!_fea {_ga .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dfb );
|
||
return _dfb ;};_ggb ,_bfbg ,_fcf :=_dggea (_fbeg .Params );if _fcf !=nil {_ga .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fcf );return _fcf ;};_bce .moveTextSetLeading (_ggb ,_bfbg );case "\u0054\u006a":if _efa ,_eee :=_bce .checkOp (_fbeg ,1,true );
|
||
!_efa {_ga .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_fbeg ,_eee );return _eee ;};_cgdf :=_add .TraceToDirectObject (_fbeg .Params [0]);_ebf ,_fdd :=_add .GetStringBytes (_cgdf );
|
||
if !_fdd {_ga .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_fbeg );return _add .ErrTypeError ;
|
||
};return _bce .showText (_cgdf ,_ebf ,_bbf );case "\u0054\u004a":if _eef ,_bfg :=_bce .checkOp (_fbeg ,1,true );!_eef {_ga .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bfg );return _bfg ;};_geg ,_ddd :=_add .GetArray (_fbeg .Params [0]);
|
||
if !_ddd {_ga .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_fbeg );return _fca ;};return _bce .showTextAdjusted (_geg ,_bbf );
|
||
case "\u0027":if _faf ,_bbc :=_bce .checkOp (_fbeg ,1,true );!_faf {_ga .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bbc );return _bbc ;};_geb :=_add .TraceToDirectObject (_fbeg .Params [0]);_cdf ,_ggd :=_add .GetStringBytes (_geb );
|
||
if !_ggd {_ga .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_fbeg );return _add .ErrTypeError ;};_bce .nextLine ();return _bce .showText (_geb ,_cdf ,_bbf );
|
||
case "\u0022":if _fgb ,_bab :=_bce .checkOp (_fbeg ,3,true );!_fgb {_ga .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bab );return _bab ;};_cba ,_ffa ,_fcdb :=_dggea (_fbeg .Params [:2]);if _fcdb !=nil {return _fcdb ;
|
||
};_fde :=_add .TraceToDirectObject (_fbeg .Params [2]);_cff ,_bff :=_add .GetStringBytes (_fde );if !_bff {_ga .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_fbeg );
|
||
return _add .ErrTypeError ;};_bce .setCharSpacing (_cba );_bce .setWordSpacing (_ffa );_bce .nextLine ();return _bce .showText (_fde ,_cff ,_bbf );case "\u0054\u004c":_bfc ,_aac :=_cage (_fbeg );if _aac !=nil {_ga .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aac );
|
||
return _aac ;};_bce .setTextLeading (_bfc );case "\u0054\u0063":_egg ,_dcaa :=_cage (_fbeg );if _dcaa !=nil {_ga .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dcaa );return _dcaa ;};_bce .setCharSpacing (_egg );
|
||
case "\u0054\u0066":if _ffd ,_eeef :=_bce .checkOp (_fbeg ,2,true );!_ffd {_ga .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_eeef );return _eeef ;};_facf ,_aaba :=_add .GetNameVal (_fbeg .Params [0]);
|
||
if !_aaba {_ga .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_fbeg );return _add .ErrTypeError ;};_agcb ,_caa :=_add .GetNumberAsFloat (_fbeg .Params [1]);
|
||
if !_aaba {_ga .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fbeg ,_caa );
|
||
return _caa ;};_caa =_bce .setFont (_facf ,_agcb );_bce ._dgcf =_b .Is (_caa ,_add .ErrNotSupported );if _caa !=nil &&!_bce ._dgcf {return _caa ;};case "\u0054\u006d":if _bdda ,_gee :=_bce .checkOp (_fbeg ,6,true );!_bdda {_ga .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gee );
|
||
return _gee ;};_ffg ,_afg :=_add .GetNumbersAsFloat (_fbeg .Params );if _afg !=nil {_ga .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_afg );return _afg ;};_bce .setTextMatrix (_ffg );case "\u0054\u0072":if _eab ,_abab :=_bce .checkOp (_fbeg ,1,true );
|
||
!_eab {_ga .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_abab );return _abab ;};_eeed ,_fdg :=_add .GetIntVal (_fbeg .Params [0]);if !_fdg {_ga .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_fbeg );
|
||
return _add .ErrTypeError ;};_bce .setTextRenderMode (_eeed );case "\u0054\u0073":if _ebd ,_ede :=_bce .checkOp (_fbeg ,1,true );!_ebd {_ga .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ede );return _ede ;
|
||
};_bca ,_edg :=_add .GetNumberAsFloat (_fbeg .Params [0]);if _edg !=nil {_ga .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_edg );return _edg ;};_bce .setTextRise (_bca );case "\u0054\u0077":if _acd ,_fgff :=_bce .checkOp (_fbeg ,1,true );
|
||
!_acd {_ga .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fgff );return _fgff ;};_fdf ,_gdg :=_add .GetNumberAsFloat (_fbeg .Params [0]);if _gdg !=nil {_ga .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gdg );
|
||
return _gdg ;};_bce .setWordSpacing (_fdf );case "\u0054\u007a":if _ecab ,_dgf :=_bce .checkOp (_fbeg ,1,true );!_ecab {_ga .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dgf );return _dgf ;};_ebfg ,_cef :=_add .GetNumberAsFloat (_fbeg .Params [0]);
|
||
if _cef !=nil {_ga .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cef );return _cef ;};_bce .setHorizScaling (_ebfg );case "\u0063\u006d":if !_adb {_egeg ._gafa =_ef .CTM ;};if _egeg ._gafa .Singular (){_gcb :=_agf .IdentityMatrix ().Translate (_egeg ._gafa .Translation ());
|
||
_ga .Log .Debug ("S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s",_egeg ._gafa ,_gcb );_egeg ._gafa =_gcb ;};if _fdbg {_ga .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_egeg ._gafa );};case "\u006d":if len (_fbeg .Params )!=2{_ga .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_gfg );
|
||
return nil ;};_eeb ,_cag :=_add .GetNumbersAsFloat (_fbeg .Params );if _cag !=nil {return _cag ;};_egeg .moveTo (_eeb [0],_eeb [1]);case "\u006c":if len (_fbeg .Params )!=2{_ga .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_gfg );
|
||
return nil ;};_bef ,_deda :=_add .GetNumbersAsFloat (_fbeg .Params );if _deda !=nil {return _deda ;};_egeg .lineTo (_bef [0],_bef [1]);case "\u0063":if len (_fbeg .Params )!=6{return _gfg ;};_eddg ,_fgc :=_add .GetNumbersAsFloat (_fbeg .Params );if _fgc !=nil {return _fgc ;
|
||
};_ga .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_eddg );_egeg .cubicTo (_eddg [0],_eddg [1],_eddg [2],_eddg [3],_eddg [4],_eddg [5]);case "\u0076","\u0079":if len (_fbeg .Params )!=4{return _gfg ;
|
||
};_bdde ,_ace :=_add .GetNumbersAsFloat (_fbeg .Params );if _ace !=nil {return _ace ;};_ga .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_bdde );_egeg .quadraticTo (_bdde [0],_bdde [1],_bdde [2],_bdde [3]);
|
||
case "\u0068":_egeg .closePath ();case "\u0072\u0065":if len (_fbeg .Params )!=4{return _gfg ;};_agfa ,_fefa :=_add .GetNumbersAsFloat (_fbeg .Params );if _fefa !=nil {return _fefa ;};_egeg .drawRectangle (_agfa [0],_agfa [1],_agfa [2],_agfa [3]);_egeg .closePath ();
|
||
case "\u0053":_egeg .stroke (&_dge ._efea );_egeg .clearPath ();case "\u0073":_egeg .closePath ();_egeg .stroke (&_dge ._efea );_egeg .clearPath ();case "\u0046":_egeg .fill (&_dge ._ebafc );_egeg .clearPath ();case "\u0066","\u0066\u002a":_egeg .closePath ();
|
||
_egeg .fill (&_dge ._ebafc );_egeg .clearPath ();case "\u0042","\u0042\u002a":_egeg .fill (&_dge ._ebafc );_egeg .stroke (&_dge ._efea );_egeg .clearPath ();case "\u0062","\u0062\u002a":_egeg .closePath ();_egeg .fill (&_dge ._ebafc );_egeg .stroke (&_dge ._efea );
|
||
_egeg .clearPath ();case "\u006e":_egeg .clearPath ();case "\u0044\u006f":if len (_fbeg .Params )==0{_ga .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_fbeg .Params );
|
||
return _add .ErrRangeError ;};_deba ,_ead :=_add .GetName (_fbeg .Params [0]);if !_ead {_ga .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_fbeg .Params [0]);
|
||
return _add .ErrTypeError ;};_ ,_gddc :=_ceeb .GetXObjectByName (*_deba );if _gddc !=_ba .XObjectTypeForm {break ;};_bad ,_ead :=_aaee ._baa [_deba .String ()];if !_ead {_abed ,_efe :=_ceeb .GetXObjectFormByName (*_deba );if _efe !=nil {_ga .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_efe );
|
||
return _efe ;};_fbdb ,_efe :=_abed .GetContentStream ();if _efe !=nil {_ga .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_efe );return _efe ;};_eae :=_abed .Resources ;if _eae ==nil {_eae =_ceeb ;};_dedb :=_ef .CTM ;if _adca ,_fgcf :=_add .GetArray (_abed .Matrix );
|
||
_fgcf {_cdd ,_cab :=_adca .GetAsFloat64Slice ();if _cab !=nil {return _cab ;};if len (_cdd )!=6{return _gfg ;};_ada :=_agf .NewMatrix (_cdd [0],_cdd [1],_cdd [2],_cdd [3],_cdd [4],_cdd [5]);_dedb =_ef .CTM .Mult (_ada );};_eefa ,_feg ,_ggae ,_efe :=_aaee .extractPageText (string (_fbdb ),_eae ,_baac .Mult (_dedb ),_fagb +1,false );
|
||
if _efe !=nil {_ga .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_efe );return _efe ;};_bad =textResult {*_eefa ,_feg ,_ggae };_aaee ._baa [_deba .String ()]=_bad ;};_egeg ._gafa =_ef .CTM ;if _fdbg {_ga .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_egeg ._gafa );
|
||
};_dge ._aebf =append (_dge ._aebf ,_bad ._egcbc ._aebf ...);_dge ._efea =append (_dge ._efea ,_bad ._egcbc ._efea ...);_dge ._ebafc =append (_dge ._ebafc ,_bad ._egcbc ._ebafc ...);_adcg ._abd +=_bad ._dcga ;_adcg ._bafdf +=_bad ._bcg ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_bce ._bace .ColorspaceNonStroking =_ef .ColorspaceNonStroking ;
|
||
_bce ._bace .ColorNonStroking =_ef .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_bce ._bace .ColorspaceStroking =_ef .ColorspaceStroking ;_bce ._bace .ColorStroking =_ef .ColorStroking ;};return nil ;
|
||
});_fca =_bafd .Process (_cdg );if _aaee ._cf !=nil &&_aaee ._cf .IncludeAnnotations &&!_adb {for _ ,_ccc :=range _aaee ._bg {_cgeg ,_eaeg :=_add .GetDict (_ccc .AP );if !_eaeg {continue ;};_dad ,_eaeg :=_cgeg .Get ("\u004e").(*_add .PdfObjectStream );
|
||
if !_eaeg {continue ;};_fcfc ,_bcfd :=_add .DecodeStream (_dad );if _bcfd !=nil {_ga .Log .Debug ("\u0045\u0072\u0072\u006f\u0072\u0020\u006f\u006e\u0020\u0064\u0065c\u006f\u0064\u0065\u0020\u0073\u0074\u0072\u0065\u0061\u006d:\u0020\u0025\u0076",_bcfd );
|
||
continue ;};_beca :=_dad .PdfObjectDictionary .Get ("\u0052e\u0073\u006f\u0075\u0072\u0063\u0065s");_efd ,_bcfd :=_ba .NewPdfPageResourcesFromDict (_beca .(*_add .PdfObjectDictionary ));if _bcfd !=nil {_ga .Log .Debug ("\u0045\u0072\u0072\u006f\u0072 \u006f\u006e\u0020\u0067\u0065\u0074\u0074\u0069\u006e\u0067\u0020\u0061\u006en\u006f\u0074\u0061\u0074\u0069\u006f\u006e\u0020\u0072\u0065\u0073\u006f\u0075\u0072\u0063\u0065\u0073\u003a\u0020\u0025\u0076",_bcfd );
|
||
continue ;};_bfd :=_agf .IdentityMatrix ();_gdcf ,_eaeg :=_dad .PdfObjectDictionary .Get ("\u004d\u0061\u0074\u0072\u0069\u0078").(*_add .PdfObjectArray );if _eaeg {_feff ,_cgb :=_gdcf .GetAsFloat64Slice ();if _cgb !=nil {_ga .Log .Debug ("\u0045\u0072\u0072or\u0020\u006f\u006e\u0020\u0067\u0065\u0074\u0074\u0069n\u0067 \u0066l\u006fa\u0074\u0036\u0034\u0020\u0073\u006c\u0069\u0063\u0065\u003a\u0020\u0025\u0076",_cgb );
|
||
continue ;};if len (_feff )!=6{_ga .Log .Debug ("I\u006e\u0076\u0061\u006c\u0069\u0064 \u006d\u0061\u0074\u0072\u0069\u0078\u0020\u0073\u006ci\u0063\u0065\u0020l\u0065n\u0067\u0074\u0068");continue ;};_bfd =_agf .NewMatrix (_feff [0],_feff [1],_feff [2],_feff [3],_feff [4],_feff [5]);
|
||
};_bcac ,_eaeg :=_aaee ._gfe [_dad .String ()];if !_eaeg {_ecb ,_edeb ,_dgc ,_edcc :=_aaee .extractPageText (string (_fcfc ),_efd ,_bfd ,_fagb +1,true );if _edcc !=nil {_ga .Log .Debug ("\u0045\u0052R\u004f\u0052\u0020\u0065x\u0074\u0072a\u0063\u0074\u0069\u006e\u0067\u0020\u0061\u006en\u006f\u0074\u0061\u0074\u0069\u006f\u006e\u0020\u0074\u0065\u0078\u0074s\u003a\u0020\u0025\u0076",_edcc );
|
||
continue ;};_bcac =textResult {*_ecb ,_edeb ,_dgc };_aaee ._gfe [_dad .String ()]=_bcac ;};_dge ._aebf =append (_dge ._aebf ,_bcac ._egcbc ._aebf ...);_dge ._efea =append (_dge ._efea ,_bcac ._egcbc ._efea ...);_dge ._ebafc =append (_dge ._ebafc ,_bcac ._egcbc ._ebafc ...);
|
||
_adcg ._abd +=_bcac ._dcga ;_adcg ._bafdf +=_bcac ._bcg ;};};return _dge ,_adcg ._abd ,_adcg ._bafdf ,_fca ;};func _gfgc (_gcac ,_gebbg *textPara )bool {if _gcac ._cddef ||_gebbg ._cddef {return true ;};return _dbeae (_gcac .depth ()-_gebbg .depth ());
|
||
};func (_bdbf *textObject )moveLP (_ebg ,_bdf float64 ){_bdbf ._dbc .Concat (_agf .NewMatrix (1,0,0,1,_ebg ,_bdf ));_bdbf ._eefe =_bdbf ._dbc ;};func _ggee (_aggec _add .PdfObject ,_eceb _gff .Color )(_ag .Image ,error ){_bdfbb ,_gbfce :=_add .GetStream (_aggec );
|
||
if !_gbfce {return nil ,nil ;};_ecfbg ,_bccfe :=_ba .NewXObjectImageFromStream (_bdfbb );if _bccfe !=nil {return nil ,_bccfe ;};_dffc ,_bccfe :=_ecfbg .ToImage ();if _bccfe !=nil {return nil ,_bccfe ;};return _bdefa (_dffc ,_eceb ),nil ;};type pathSection struct{_dbdc []*subpath ;
|
||
_gff .Color ;};func _edgg (_fcfe *paraList )map[int ][]*textLine {_fecg :=map[int ][]*textLine {};for _ ,_bcbc :=range *_fcfe {for _ ,_bdaa :=range _bcbc ._fdec {if !_aada (_bdaa ){_ga .Log .Debug ("g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e");
|
||
continue ;};_fggag :=_bdaa ._cdcg [0]._abcee [0]._ebbb ;_fecg [_fggag ]=append (_fecg [_fggag ],_bdaa );};if _bcbc ._fbbea !=nil {_afeb :=_bcbc ._fbbea ._egfea ;for _ ,_gbfd :=range _afeb {for _ ,_aacdb :=range _gbfd ._fdec {if !_aada (_aacdb ){_ga .Log .Debug ("g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e");
|
||
continue ;};_dfbdc :=_aacdb ._cdcg [0]._abcee [0]._ebbb ;_fecg [_dfbdc ]=append (_fecg [_dfbdc ],_aacdb );};};};};return _fecg ;};func (_aaed *textPara )bbox ()_ba .PdfRectangle {return _aaed .PdfRectangle };type textMark struct{_ba .PdfRectangle ;_gfbg int ;
|
||
_bfdb string ;_gccf string ;_ccdg *_ba .PdfFont ;_gccfd float64 ;_gccd float64 ;_aaccc _agf .Matrix ;_acdgd _agf .Point ;_dded _ba .PdfRectangle ;_fbaf _gff .Color ;_eaee _gff .Color ;_bbgf _add .PdfObject ;_fcbc []string ;Tw float64 ;Th float64 ;_ebbb int ;
|
||
_aeaee int ;};func _fdaf (_cfg ,_aabc _agf .Point )bool {_aebg :=_gf .Abs (_cfg .X -_aabc .X );_aabd :=_gf .Abs (_cfg .Y -_aabc .Y );return _caeb (_aabd ,_aebg );};func (_eabc *stateStack )push (_accg *textState ){_dfd :=*_accg ;*_eabc =append (*_eabc ,&_dfd )};
|
||
func (_fafc paraList )findTextTables ()[]*textTable {var _cabec []*textTable ;for _ ,_aefb :=range _fafc {if _aefb .taken ()||_aefb .Width ()==0{continue ;};_egdeb :=_aefb .isAtom ();if _egdeb ==nil {continue ;};_egdeb .growTable ();if _egdeb ._eacg *_egdeb ._cgae < _ggca {continue ;
|
||
};_egdeb .markCells ();_egdeb .log ("\u0067\u0072\u006fw\u006e");_cabec =append (_cabec ,_egdeb );};return _cabec ;};func (_dafg paraList )sortTopoOrder (){_gcfe :=_dafg .topoOrder ();_dafg .reorder (_gcfe )};func (_cbaab *shapesState )stroke (_agbf *[]pathSection ){_dfgb :=pathSection {_dbdc :_cbaab ._gfce ,Color :_cbaab ._edf .getStrokeColor ()};
|
||
*_agbf =append (*_agbf ,_dfgb );if _adgbf {_agc .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_agbf ),_cbaab ,_cbaab ._edf .getStrokeColor (),_dfgb .bbox ());
|
||
if _ebce {for _dbcb ,_dbfb :=range _cbaab ._gfce {_agc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_dbcb ,_dbfb );if _dbcb ==10{break ;};};};};};type textObject struct{_cdb *Extractor ;_dbg *_ba .PdfPageResources ;_bace _fc .GraphicsState ;
|
||
_efed *textState ;_fgbf *stateStack ;_eefe _agf .Matrix ;_dbc _agf .Matrix ;_cfde []*textMark ;_dgcf bool ;};func (_cbf *stateStack )empty ()bool {return len (*_cbf )==0};func (_cecec rulingList )snapToGroupsDirection ()rulingList {_cecec .sortStrict ();
|
||
_bfcbgb :=make (map[*ruling ]rulingList ,len (_cecec ));_aeff :=_cecec [0];_ecadgb :=func (_ggbad *ruling ){_aeff =_ggbad ;_bfcbgb [_aeff ]=rulingList {_ggbad }};_ecadgb (_cecec [0]);for _ ,_gffbe :=range _cecec [1:]{if _gffbe ._edga < _aeff ._edga -_cbdb {_ga .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_aeff ,_gffbe );
|
||
};if _gffbe ._edga > _aeff ._edga +_gbb {_ecadgb (_gffbe );}else {_bfcbgb [_aeff ]=append (_bfcbgb [_aeff ],_gffbe );};};_daac :=make (map[*ruling ]float64 ,len (_bfcbgb ));_ecaaa :=make (map[*ruling ]*ruling ,len (_cecec ));for _ggeb ,_ecga :=range _bfcbgb {_daac [_ggeb ]=_ecga .mergePrimary ();
|
||
for _ ,_cbda :=range _ecga {_ecaaa [_cbda ]=_ggeb ;};};for _ ,_dffff :=range _cecec {_dffff ._edga =_daac [_ecaaa [_dffff ]];};_gbef :=make (rulingList ,0,len (_cecec ));for _ ,_decfa :=range _bfcbgb {_agee :=_decfa .splitSec ();for _caded ,_gaga :=range _agee {_cada :=_gaga .merge ();
|
||
if len (_gbef )> 0{_bgfa :=_gbef [len (_gbef )-1];if _bgfa .alignsPrimary (_cada )&&_bgfa .alignsSec (_cada ){_ga .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_caded ,_bgfa ,_cada );
|
||
continue ;};};_gbef =append (_gbef ,_cada );};};_gbef .sortStrict ();return _gbef ;};func _acfa (_acfb []*textLine ,_edcg ,_egfb float64 )[]*textLine {var _aacc []*textLine ;for _ ,_ddfg :=range _acfb {if _edcg ==-1{if _ddfg ._bbfg > _egfb {_aacc =append (_aacc ,_ddfg );
|
||
};}else {if _ddfg ._bbfg > _egfb &&_ddfg ._bbfg < _edcg {_aacc =append (_aacc ,_ddfg );};};};return _aacc ;};func _ccab (_adabe *PageText )error {_dgdge :=_ce .GetLicenseKey ();if _dgdge !=nil &&_dgdge .IsLicensed ()||_de {return nil ;};_agc .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");
|
||
_agc .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");
|
||
return _b .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};
|
||
|
||
// ImageExtractOptions contains options for controlling image extraction from
|
||
// PDF pages.
|
||
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};func (_eacd compositeCell )split (_aabb ,_dedd []float64 )*textTable {_cebe :=len (_aabb )+1;_dbgd :=len (_dedd )+1;if _efda {_ga .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066",_dbgd ,_cebe ,_eacd ,_aabb ,_dedd );
|
||
_agc .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a",len (_eacd .paraList ));for _feca ,_cbdbe :=range _eacd .paraList {_agc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_feca ,_cbdbe .String ());
|
||
};_agc .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",len (_eacd .lines ()));for _gfad ,_adcbe :=range _eacd .lines (){_agc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gfad ,_adcbe );};};_aabb =_gecgf (_aabb ,_eacd .Ury ,_eacd .Lly );
|
||
_dedd =_gecgf (_dedd ,_eacd .Llx ,_eacd .Urx );_afbcf :=make (map[uint64 ]*textPara ,_dbgd *_cebe );_fdbc :=textTable {_eacg :_dbgd ,_cgae :_cebe ,_egfea :_afbcf };_dedfb :=_eacd .paraList ;_e .Slice (_dedfb ,func (_ebbe ,_facgf int )bool {_gccc ,_ggdff :=_dedfb [_ebbe ],_dedfb [_facgf ];
|
||
_dgggc ,_cded :=_gccc .Lly ,_ggdff .Lly ;if _dgggc !=_cded {return _dgggc < _cded ;};return _gccc .Llx < _ggdff .Llx ;});_addcd :=make (map[uint64 ]_ba .PdfRectangle ,_dbgd *_cebe );for _cece ,_cfadd :=range _aabb [1:]{_bedc :=_aabb [_cece ];for _ecfe ,_edefe :=range _dedd [1:]{_cggbf :=_dedd [_ecfe ];
|
||
_addcd [_bgcc (_ecfe ,_cece )]=_ba .PdfRectangle {Llx :_cggbf ,Urx :_edefe ,Lly :_cfadd ,Ury :_bedc };};};if _efda {_ga .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073");
|
||
_agc .Printf ("\u0020\u0020\u0020\u0020");for _ccddfa :=0;_ccddfa < _dbgd ;_ccddfa ++{_agc .Printf ("\u0025\u0033\u0030\u0064\u002c\u0020",_ccddfa );};_agc .Println ();for _gaadg :=0;_gaadg < _cebe ;_gaadg ++{_agc .Printf ("\u0020\u0020\u0025\u0032\u0064\u003a",_gaadg );
|
||
for _bfbgf :=0;_bfbgf < _dbgd ;_bfbgf ++{_agc .Printf ("\u00256\u002e\u0032\u0066\u002c\u0020",_addcd [_bgcc (_bfbgf ,_gaadg )]);};_agc .Println ();};};_babdg :=func (_bbgc *textLine )(int ,int ){for _eddc :=0;_eddc < _cebe ;_eddc ++{for _bfbge :=0;_bfbge < _dbgd ;
|
||
_bfbge ++{if _gcdb (_addcd [_bgcc (_bfbge ,_eddc )],_bbgc .PdfRectangle ){return _bfbge ,_eddc ;};};};return -1,-1;};_dgbc :=make (map[uint64 ][]*textLine ,_dbgd *_cebe );for _ ,_afeec :=range _dedfb .lines (){_dabfe ,_efgbe :=_babdg (_afeec );if _dabfe < 0{continue ;
|
||
};_dgbc [_bgcc (_dabfe ,_efgbe )]=append (_dgbc [_bgcc (_dabfe ,_efgbe )],_afeec );};for _eagc :=0;_eagc < len (_aabb )-1;_eagc ++{_ccgb :=_aabb [_eagc ];_ecffa :=_aabb [_eagc +1];for _gccgb :=0;_gccgb < len (_dedd )-1;_gccgb ++{_beef :=_dedd [_gccgb ];
|
||
_gbbb :=_dedd [_gccgb +1];_bcef :=_ba .PdfRectangle {Llx :_beef ,Urx :_gbbb ,Lly :_ecffa ,Ury :_ccgb };_baeg :=_dgbc [_bgcc (_gccgb ,_eagc )];if len (_baeg )==0{continue ;};_fecaa :=_bfgg (_bcef ,_baeg );_fdbc .put (_gccgb ,_eagc ,_fecaa );};};return &_fdbc ;
|
||
};func _acdc (_agaea *list )[]*textLine {for _ ,_gecd :=range _agaea ._abcc {switch _gecd ._cdde {case "\u004c\u0042\u006fd\u0079":if len (_gecd ._aebd )!=0{return _gecd ._aebd ;};return _acdc (_gecd );case "\u0053\u0070\u0061\u006e":return _gecd ._aebd ;
|
||
case "I\u006e\u006c\u0069\u006e\u0065\u0053\u0068\u0061\u0070\u0065":return _gecd ._aebd ;};};return nil ;};func (_dfee *textTable )depth ()float64 {_efcg :=1e10;for _bacea :=0;_bacea < _dfee ._eacg ;_bacea ++{_adff :=_dfee .get (_bacea ,0);if _adff ==nil ||_adff ._cddef {continue ;
|
||
};_efcg =_gf .Min (_efcg ,_adff .depth ());};return _efcg ;};func (_efdea paraList )findGridTables (_fbae []gridTiling )[]*textTable {if _efda {_ga .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_efdea ));
|
||
for _egfgd ,_efdd :=range _efdea {_agc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_egfgd ,_efdd );};};var _fedg []*textTable ;for _bfcbc ,_abdab :=range _fbae {_dafa ,_gfcgg :=_efdea .findTableGrid (_abdab );if _dafa !=nil {_dafa .log (_agc .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_bfcbc ));
|
||
_fedg =append (_fedg ,_dafa );_dafa .markCells ();};for _dgbbe :=range _gfcgg {_dgbbe ._gecb =true ;};};if _efda {_ga .Log .Info ("\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s",len (_fedg ));
|
||
};return _fedg ;};func _caac (_ggcb _add .PdfObject ,_afdad _gff .Color )(_ag .Image ,error ){_geabd ,_aecc :=_add .GetStream (_ggcb );if !_aecc {return nil ,nil ;};_dfgad ,_badg :=_ba .NewXObjectImageFromStream (_geabd );if _badg !=nil {return nil ,_badg ;
|
||
};_dcdeb ,_badg :=_dfgad .ToImage ();if _badg !=nil {return nil ,_badg ;};return _ggfef (_dcdeb ,_afdad ),nil ;};func (_ebbd *textTable )get (_bedgb ,_gcag int )*textPara {return _ebbd ._egfea [_bgcc (_bedgb ,_gcag )]};func _ffdg (_egbb ,_adeg _agf .Point )rulingKind {_cbdd :=_gf .Abs (_egbb .X -_adeg .X );
|
||
_afcgg :=_gf .Abs (_egbb .Y -_adeg .Y );return _cbggc (_cbdd ,_afcgg ,_ggdg );};func _fbbab (_aaa ,_aadb _ba .PdfRectangle )(_ba .PdfRectangle ,bool ){if !_gabc (_aaa ,_aadb ){return _ba .PdfRectangle {},false ;};return _ba .PdfRectangle {Llx :_gf .Max (_aaa .Llx ,_aadb .Llx ),Urx :_gf .Min (_aaa .Urx ,_aadb .Urx ),Lly :_gf .Max (_aaa .Lly ,_aadb .Lly ),Ury :_gf .Min (_aaa .Ury ,_aadb .Ury )},true ;
|
||
};func _aadbb (_dace []TextMark ,_dbbcab *int ,_eebg TextMark )[]TextMark {_eebg .Offset =*_dbbcab ;_dace =append (_dace ,_eebg );*_dbbcab +=len (_eebg .Text );return _dace ;};func (_edde *subpath )add (_bdgc ..._agf .Point ){_edde ._aeee =append (_edde ._aeee ,_bdgc ...)};
|
||
func _cebgb (_daea []compositeCell )[]float64 {var _bbea []*textLine ;_cgba :=0;for _ ,_aadeg :=range _daea {_cgba +=len (_aadeg .paraList );_bbea =append (_bbea ,_aadeg .lines ()...);};_e .Slice (_bbea ,func (_cacb ,_ddcc int )bool {_feeg ,_cdgf :=_bbea [_cacb ],_bbea [_ddcc ];
|
||
_bebc ,_dabe :=_feeg ._bbfg ,_cdgf ._bbfg ;if !_dbeae (_bebc -_dabe ){return _bebc < _dabe ;};return _feeg .Llx < _cdgf .Llx ;});if _efda {_agc .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_cgba ,len (_bbea ));
|
||
for _fgddg ,_efdeg :=range _bbea {_agc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_fgddg ,_efdeg );};};var _agdd []float64 ;_bggad :=_bbea [0];var _dgeg [][]*textLine ;_eecf :=[]*textLine {_bggad };for _eafc ,_dgge :=range _bbea [1:]{if _dgge .Ury < _bggad .Lly {_eead :=0.5*(_dgge .Ury +_bggad .Lly );
|
||
if _efda {_agc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_eafc ,_dgge .Ury ,_bggad .Lly ,_eead ,_bggad ,_dgge );
|
||
};_agdd =append (_agdd ,_eead );_dgeg =append (_dgeg ,_eecf );_eecf =nil ;};_eecf =append (_eecf ,_dgge );if _dgge .Lly < _bggad .Lly {_bggad =_dgge ;};};if len (_eecf )> 0{_dgeg =append (_dgeg ,_eecf );};if _efda {_agc .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_agdd );
|
||
};if _efda {_ga .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len (_daea ));for _bacdg ,_adbg :=range _daea {_agc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bacdg ,_adbg );};_ga .Log .Info ("\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d",len (_dgeg ));
|
||
for _aefeaa ,_bbff :=range _dgeg {_agc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a",_aefeaa ,len (_bbff ));for _bgcedd ,_bdef :=range _bbff {_agc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_bgcedd ,_bdef );};};};_ebdbc :=true ;
|
||
for _cdag ,_bccg :=range _dgeg {_bbdea :=true ;for _bcag ,_edac :=range _daea {if _efda {_agc .Printf ("\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a",_cdag ,len (_dgeg ),_bcag ,len (_daea ),_edac );
|
||
};if !_edac .hasLines (_bccg ){if _efda {_agc .Printf ("\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a",_cdag ,len (_dgeg ),_bcag ,len (_daea ));
|
||
};_bbdea =false ;break ;};};if !_bbdea {_ebdbc =false ;break ;};};if !_ebdbc {if _efda {_ga .Log .Info ("\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg");
|
||
};_agdd =nil ;};if _efda &&_agdd !=nil {_agc .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a",_agdd );};return _agdd ;};
|
||
func (_bgee *textPara )toCellTextMarks (_bafe *int )[]TextMark {var _dfea []TextMark ;for _edcgb ,_fbcg :=range _bgee ._fdec {_cbgf :=_fbcg .toTextMarks (_bafe );_cgdcg :=_efgbd &&_fbcg .endsInHyphen ()&&_edcgb !=len (_bgee ._fdec )-1;if _cgdcg {_cbgf =_bgaca (_cbgf ,_bafe );
|
||
};_dfea =append (_dfea ,_cbgf ...);if !(_cgdcg ||_edcgb ==len (_bgee ._fdec )-1){_dfea =_ffeg (_dfea ,_bafe ,_beaa (_fbcg ._bbfg ,_bgee ._fdec [_edcgb +1]._bbfg ));};};return _dfea ;};func (_afacc compositeCell )hasLines (_gaeaa []*textLine )bool {for _fbebg ,_bacc :=range _gaeaa {_eac :=_gabc (_afacc .PdfRectangle ,_bacc .PdfRectangle );
|
||
if _efda {_agc .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a",_eac ,_fbebg ,len (_gaeaa ));_agc .Printf ("\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a",_afacc );
|
||
_agc .Printf ("\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a",_bacc );};if _eac {return true ;};};return false ;};func (_ddabb *textTable )getDown ()paraList {_aeca :=make (paraList ,_ddabb ._eacg );for _gcgd :=0;_gcgd < _ddabb ._eacg ;
|
||
_gcgd ++{_afff :=_ddabb .get (_gcgd ,_ddabb ._cgae -1)._ecada ;if _afff .taken (){return nil ;};_aeca [_gcgd ]=_afff ;};for _dbac :=0;_dbac < _ddabb ._eacg -1;_dbac ++{if _aeca [_dbac ]._fdgf !=_aeca [_dbac +1]{return nil ;};};return _aeca ;};func _ggfef (_cgefc *_ba .Image ,_dfac _gff .Color )_ag .Image {_aacbc ,_dbgg :=int (_cgefc .Width ),int (_cgefc .Height );
|
||
_ecgd :=_ag .NewRGBA (_ag .Rect (0,0,_aacbc ,_dbgg ));for _defbc :=0;_defbc < _dbgg ;_defbc ++{for _fbca :=0;_fbca < _aacbc ;_fbca ++{_dcbb ,_eabeb :=_cgefc .ColorAt (_fbca ,_defbc );if _eabeb !=nil {_ga .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063o\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0072\u0065\u0074\u0072\u0069\u0065v\u0065 \u0069\u006d\u0061\u0067\u0065\u0020m\u0061\u0073\u006b\u0020\u0076\u0061\u006cu\u0065\u0020\u0061\u0074\u0020\u0028\u0025\u0064\u002c\u0020\u0025\u0064\u0029\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006da\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063t\u002e",_fbca ,_defbc );
|
||
continue ;};_acbc ,_edec ,_dedg ,_ :=_dcbb .RGBA ();var _eefba _gff .Color ;if _acbc +_edec +_dedg ==0{_eefba =_dfac ;}else {_eefba =_gff .Transparent ;};_ecgd .Set (_fbca ,_defbc ,_eefba );};};return _ecgd ;};func _adgbc (_acebed map[int ][]float64 )[]int {_gegca :=make ([]int ,len (_acebed ));
|
||
_cface :=0;for _degce :=range _acebed {_gegca [_cface ]=_degce ;_cface ++;};_e .Ints (_gegca );return _gegca ;};
|
||
|
||
// String returns a string describing `tm`.
|
||
func (_caag TextMark )String ()string {_ddbg :=_caag .BBox ;var _gabe string ;if _caag .Font !=nil {_gabe =_caag .Font .String ();if len (_gabe )> 50{_gabe =_gabe [:50]+"\u002e\u002e\u002e";};};var _age string ;if _caag .Meta {_age ="\u0020\u002a\u004d\u002a";
|
||
};return _agc .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_caag .Offset ,_caag .Text ,[]rune (_caag .Text ),_ddbg .Llx ,_ddbg .Lly ,_ddbg .Urx ,_ddbg .Ury ,_gabe ,_age );
|
||
};type textTable struct{_ba .PdfRectangle ;_eacg ,_cgae int ;_edeg bool ;_egfea map[uint64 ]*textPara ;_aaaga map[uint64 ]compositeCell ;};func _aaeed (_bdgbf _ba .PdfRectangle )rulingKind {_egbfg :=_bdgbf .Width ();_bgegc :=_bdgbf .Height ();if _egbfg > _bgegc {if _egbfg >=_ggdg {return _aaad ;
|
||
};}else {if _bgegc >=_ggdg {return _cfae ;};};return _fbcc ;};func (_baga *textWord )absorb (_cbgcd *textWord ){_baga .PdfRectangle =_bbbafc (_baga .PdfRectangle ,_cbgcd .PdfRectangle );_baga ._abcee =append (_baga ._abcee ,_cbgcd ._abcee ...);};func (_ebgb rulingList )bbox ()_ba .PdfRectangle {var _ecbe _ba .PdfRectangle ;
|
||
if len (_ebgb )==0{_ga .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073");return _ba .PdfRectangle {};};if _ebgb [0]._bbce ==_aaad {_ecbe .Llx ,_ecbe .Urx =_ebgb .secMinMax ();
|
||
_ecbe .Lly ,_ecbe .Ury =_ebgb .primMinMax ();}else {_ecbe .Llx ,_ecbe .Urx =_ebgb .primMinMax ();_ecbe .Lly ,_ecbe .Ury =_ebgb .secMinMax ();};return _ecbe ;};func (_degd *textObject )newTextMark (_eeagb string ,_afcg _agf .Matrix ,_ddca _agf .Point ,_gbfe float64 ,_ddeg *_ba .PdfFont ,_ddbgf float64 ,_deff ,_gagfg _gff .Color ,_abde _add .PdfObject ,_fegde []string ,_adag int ,_adcd int )(textMark ,bool ){_fcae :=_afcg .Angle ();
|
||
_eefad :=_cafaa (_fcae ,_cbbd );var _eccd float64 ;if _eefad %180!=90{_eccd =_afcg .ScalingFactorY ();}else {_eccd =_afcg .ScalingFactorX ();};_feda :=_eafa (_afcg );_bcgb :=_ba .PdfRectangle {Llx :_feda .X ,Lly :_feda .Y ,Urx :_ddca .X ,Ury :_ddca .Y };
|
||
switch _eefad %360{case 90:_bcgb .Urx -=_eccd ;case 180:_bcgb .Ury -=_eccd ;case 270:_bcgb .Urx +=_eccd ;case 0:_bcgb .Ury +=_eccd ;default:_eefad =0;_bcgb .Ury +=_eccd ;};if _bcgb .Llx > _bcgb .Urx {_bcgb .Llx ,_bcgb .Urx =_bcgb .Urx ,_bcgb .Llx ;};if _bcgb .Lly > _bcgb .Ury {_bcgb .Lly ,_bcgb .Ury =_bcgb .Ury ,_bcgb .Lly ;
|
||
};_baedf :=true ;if _degd ._cdb ._cb .Width ()> 0{_efca ,_afgb :=_fbbab (_bcgb ,_degd ._cdb ._cb );if !_afgb {_baedf =false ;_ga .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_bcgb ,_degd ._cdb ._cb ,_eeagb );
|
||
};_bcgb =_efca ;};_dabgb :=_bcgb ;_fbea :=_degd ._cdb ._cb ;switch _eefad %360{case 90:_fbea .Urx ,_fbea .Ury =_fbea .Ury ,_fbea .Urx ;_dabgb =_ba .PdfRectangle {Llx :_fbea .Urx -_bcgb .Ury ,Urx :_fbea .Urx -_bcgb .Lly ,Lly :_bcgb .Llx ,Ury :_bcgb .Urx };
|
||
case 180:_dabgb =_ba .PdfRectangle {Llx :_fbea .Urx -_bcgb .Llx ,Urx :_fbea .Urx -_bcgb .Urx ,Lly :_fbea .Ury -_bcgb .Lly ,Ury :_fbea .Ury -_bcgb .Ury };case 270:_fbea .Urx ,_fbea .Ury =_fbea .Ury ,_fbea .Urx ;_dabgb =_ba .PdfRectangle {Llx :_bcgb .Ury ,Urx :_bcgb .Lly ,Lly :_fbea .Ury -_bcgb .Llx ,Ury :_fbea .Ury -_bcgb .Urx };
|
||
};if _dabgb .Llx > _dabgb .Urx {_dabgb .Llx ,_dabgb .Urx =_dabgb .Urx ,_dabgb .Llx ;};if _dabgb .Lly > _dabgb .Ury {_dabgb .Lly ,_dabgb .Ury =_dabgb .Ury ,_dabgb .Lly ;};_cffee :=textMark {_bfdb :_eeagb ,PdfRectangle :_dabgb ,_dded :_bcgb ,_ccdg :_ddeg ,_gccfd :_eccd ,_gccd :_ddbgf ,_aaccc :_afcg ,_acdgd :_ddca ,_gfbg :_eefad ,_fbaf :_deff ,_eaee :_gagfg ,_bbgf :_abde ,_fcbc :_fegde ,Th :_degd ._efed ._def ,Tw :_degd ._efed ._gdeg ,_ebbb :_adcd ,_aeaee :_adag };
|
||
if _bfgae {_ga .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_feda ,_ddca ,_cffee .String ());};return _cffee ,_baedf ;
|
||
};func (_bgac *textPara )writeCellText (_fbbg _gg .Writer ){for _daaa ,_dfcb :=range _bgac ._fdec {_ccda :=_dfcb .text ();_bagb :=_efgbd &&_dfcb .endsInHyphen ()&&_daaa !=len (_bgac ._fdec )-1;if _bagb {_ccda =_cabf (_ccda );};_fbbg .Write ([]byte (_ccda ));
|
||
if !(_bagb ||_daaa ==len (_bgac ._fdec )-1){_fbbg .Write ([]byte (_beaa (_dfcb ._bbfg ,_bgac ._fdec [_daaa +1]._bbfg )));};};};
|
||
|
||
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
|
||
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
|
||
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
|
||
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
|
||
type RenderMode int ;
|
||
|
||
// PageText represents the layout of text on a device page.
|
||
type PageText struct{_aebf []*textMark ;_ggff string ;_ccca []TextMark ;_gdba []TextTable ;_bbdc _ba .PdfRectangle ;_efea []pathSection ;_ebafc []pathSection ;_abge *_add .PdfObject ;_abad _add .PdfObject ;_cde *_fc .ContentStreamOperations ;_eaag PageTextOptions ;
|
||
};func (_fegb *textObject )setTextLeading (_fbfd float64 ){if _fegb ==nil {return ;};_fegb ._efed ._bfa =_fbfd ;};type wordBag struct{_ba .PdfRectangle ;_cdea float64 ;_febe ,_cgdae rulingList ;_ebgd float64 ;_fcgd map[int ][]*textWord ;};
|
||
|
||
// String returns a description of `v`.
|
||
func (_ebag *ruling )String ()string {if _ebag ._bbce ==_fbcc {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_ggba ,_ccbd :="\u0078","\u0079";if _ebag ._bbce ==_aaad {_ggba ,_ccbd ="\u0079","\u0078";};_gbdd :="";if _ebag ._ebcee !=0.0{_gbdd =_agc .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_ebag ._ebcee );
|
||
};return _agc .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_ebag ._bbce ,_ggba ,_ebag ._edga ,_ccbd ,_ebag ._fcec ,_ebag ._abeg ,_ebag ._abeg -_ebag ._fcec ,_ebag ._ccaa ,_ebag .Color ,_gbdd );
|
||
};func _bafa (_faabf ,_dfeb bounded )float64 {_addba :=_eddba (_faabf ,_dfeb );if !_dbeae (_addba ){return _addba ;};return _abag (_faabf ,_dfeb );};func (_fda *shapesState )lineTo (_ggdd ,_gcbfg float64 ){if _fdbg {_ga .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_ggdd ,_gcbfg ,_fda .devicePoint (_ggdd ,_gcbfg ));
|
||
};_fda .addPoint (_ggdd ,_gcbfg );};func (_fga *imageExtractContext )extractContentStreamImages (_bf string ,_bb *_ba .PdfPageResources )error {_ccf :=_fc .NewContentStreamParser (_bf );_dce ,_bfe :=_ccf .Parse ();if _bfe !=nil {return _bfe ;};if _fga ._ca ==nil {_fga ._ca =map[*_add .PdfObjectStream ]*cachedImage {};
|
||
};if _fga ._gce ==nil {_fga ._gce =&ImageExtractOptions {};};_edd :=_fc .NewContentStreamProcessor (*_dce );_edd .AddHandler (_fc .HandlerConditionEnumAllOperands ,"",_fga .processOperand );return _edd .Process (_bb );};func _ecbcb (_dcca map[float64 ]map[float64 ]gridTile )[]float64 {_efbga :=make ([]float64 ,0,len (_dcca ));
|
||
for _cecg :=range _dcca {_efbga =append (_efbga ,_cecg );};_e .Float64s (_efbga );_gcfbd :=len (_efbga );for _gbea :=0;_gbea < _gcfbd /2;_gbea ++{_efbga [_gbea ],_efbga [_gcfbd -1-_gbea ]=_efbga [_gcfbd -1-_gbea ],_efbga [_gbea ];};return _efbga ;};func (_gdgb paraList )applyTables (_gebbf []*textTable )paraList {var _aefgd paraList ;
|
||
for _ ,_ecaeb :=range _gebbf {_aefgd =append (_aefgd ,_ecaeb .newTablePara ());};for _ ,_fecc :=range _gdgb {if _fecc ._gecb {continue ;};_aefgd =append (_aefgd ,_fecc );};return _aefgd ;};func (_eeage gridTile )complete ()bool {return _eeage .numBorders ()==4};
|
||
func (_gbg *textObject )setWordSpacing (_fded float64 ){if _gbg ==nil {return ;};_gbg ._efed ._gdeg =_fded ;};func _ecaea (_acbf []*textLine )[]*textLine {_fced :=[]*textLine {};for _ ,_acebe :=range _acbf {_dgfc :=_acebe .text ();_gfdb :=_cffd .Find ([]byte (_dgfc ));
|
||
if _gfdb !=nil {_fced =append (_fced ,_acebe );};};return _fced ;};var _ccacg =_d .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024");func _cage (_ddea *_fc .ContentStreamOperation )(float64 ,error ){if len (_ddea .Params )!=1{_gcff :=_b .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");
|
||
_ga .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_ddea .Operand ,1,len (_ddea .Params ),_ddea .Params );
|
||
return 0.0,_gcff ;};return _add .GetNumberAsFloat (_ddea .Params [0]);};
|
||
|
||
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
|
||
func (_gfa *TextMarkArray )BBox ()(_ba .PdfRectangle ,bool ){var _adbf _ba .PdfRectangle ;_bgdb :=false ;for _ ,_eadd :=range _gfa ._ffca {if _eadd .Meta ||_efcbe (_eadd .Text ){continue ;};if _bgdb {_adbf =_bbbafc (_adbf ,_eadd .BBox );}else {_adbf =_eadd .BBox ;
|
||
_bgdb =true ;};};return _adbf ,_bgdb ;};type structElement struct{_bbag string ;_abff []structElement ;_cfcb int64 ;_eada _add .PdfObject ;};func _fbbd (_fgaf *list ,_efedf *string )string {_bbeb :=_f .Split (_fgaf ._ddef ,"\u000a");_cabd :=&_f .Builder {};
|
||
for _ ,_cafae :=range _bbeb {if _cafae !=""{_cabd .WriteString (*_efedf );_cabd .WriteString (_cafae );_cabd .WriteString ("\u000a");};};return _cabd .String ();};func (_dcac *shapesState )quadraticTo (_cdee ,_beaf ,_dcce ,_agcf float64 ){if _fdbg {_ga .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a");
|
||
};_dcac .addPoint (_dcce ,_agcf );};func _bgaca (_bfgag []TextMark ,_acfed *int )[]TextMark {_adgg :=_bfgag [len (_bfgag )-1];_badef :=[]rune (_adgg .Text );if len (_badef )==1{_bfgag =_bfgag [:len (_bfgag )-1];_egfbg :=_bfgag [len (_bfgag )-1];*_acfed =_egfbg .Offset +len (_egfbg .Text );
|
||
}else {_gbgcg :=_cabf (_adgg .Text );*_acfed +=len (_gbgcg )-len (_adgg .Text );_adgg .Text =_gbgcg ;};return _bfgag ;};func (_bffa *textObject )showText (_bbab _add .PdfObject ,_egbd []byte ,_dgd int )error {return _bffa .renderText (_bbab ,_egbd ,_dgd );
|
||
};func _agde (_aagff []pathSection )rulingList {_beebf (_aagff );if _adgbf {_ga .Log .Info ("\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073",len (_aagff ));
|
||
};var _egga rulingList ;for _ ,_dafe :=range _aagff {for _ ,_dbbdg :=range _dafe ._dbdc {if len (_dbbdg ._aeee )< 2{continue ;};_dcff :=_dbbdg ._aeee [0];for _ ,_ecgbb :=range _dbbdg ._aeee [1:]{if _ddag ,_geea :=_eacc (_dcff ,_ecgbb ,_dafe .Color );_geea {_egga =append (_egga ,_ddag );
|
||
};_dcff =_ecgbb ;};};};if _adgbf {_ga .Log .Info ("m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073",_egga );};return _egga ;};func (_ccdc *textPara )text ()string {_affg :=new (_dc .Buffer );_ccdc .writeText (_affg );
|
||
return _affg .String ();};
|
||
|
||
// TextMark represents extracted text on a page with information regarding both textual content,
|
||
// formatting (font and size) and positioning.
|
||
// It is the smallest unit of text on a PDF page, typically a single character.
|
||
//
|
||
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
|
||
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
|
||
// `bbox` of substring `term` in `text`.
|
||
//
|
||
// ex, _ := New(page)
|
||
// // handle errors
|
||
// pageText, _, _, err := ex.ExtractPageText()
|
||
// // handle errors
|
||
// text := pageText.Text()
|
||
// textMarks := pageText.Marks()
|
||
//
|
||
// start := strings.Index(text, term)
|
||
// end := start + len(term)
|
||
// spanMarks, err := textMarks.RangeOffset(start, end)
|
||
// // handle errors
|
||
// bbox, ok := spanMarks.BBox()
|
||
// // handle errors
|
||
type TextMark struct{
|
||
|
||
// Text is the extracted text.
|
||
Text string ;
|
||
|
||
// Original is the text in the PDF. It has not been decoded like `Text`.
|
||
Original string ;
|
||
|
||
// BBox is the bounding box of the text.
|
||
BBox _ba .PdfRectangle ;
|
||
|
||
// Font is the font the text was drawn with.
|
||
Font *_ba .PdfFont ;
|
||
|
||
// FontSize is the font size the text was drawn with.
|
||
FontSize float64 ;
|
||
|
||
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
|
||
// text, textMarks := pageText.Text(), pageText.Marks()
|
||
// marks := textMarks.Elements()
|
||
// then marks[i].Offset is the offset of marks[i].Text in text.
|
||
Offset int ;
|
||
|
||
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
|
||
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
|
||
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
|
||
Meta bool ;
|
||
|
||
// FillColor is the fill color of the text.
|
||
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
|
||
FillColor _gff .Color ;
|
||
|
||
// StrokeColor is the stroke color of the text.
|
||
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
|
||
StrokeColor _gff .Color ;
|
||
|
||
// Orientation is the text orientation
|
||
Orientation int ;
|
||
|
||
// DirectObject is the underlying PdfObject (Text Object) that represents the visible texts. This is introduced to get
|
||
// a simple access to the TextObject in case editing or replacment of some text is needed. E.g during redaction.
|
||
DirectObject _add .PdfObject ;
|
||
|
||
// ObjString is a decoded string operand of a text-showing operator. It has the same value as `Text` attribute except
|
||
// when many glyphs are represented with the same Text Object that contains multiple length string operand in which case
|
||
// ObjString spans more than one character string that falls in different TextMark objects.
|
||
ObjString []string ;Tw float64 ;Th float64 ;Tc float64 ;Index int ;_efeg bool ;_cfaa *TextTable ;};func _cabcg (_caae []*textMark ,_cagb _ba .PdfRectangle ,_agbac rulingList ,_ccffd []gridTiling ,_adea bool )paraList {_ga .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_caae ),_cagb );
|
||
if len (_caae )==0{return nil ;};_bacd :=_dbcba (_caae ,_cagb );if len (_bacd )==0{return nil ;};_agbac .log ("\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065");_fdfa ,_beee :=_agbac .vertsHorzs ();_aaae :=_adad (_bacd ,_cagb .Ury ,_fdfa ,_beee );
|
||
_eccg :=_fbgb (_aaae ,_cagb .Ury ,_fdfa ,_beee );_eccg =_fdgb (_eccg );_faga :=make (paraList ,0,len (_eccg ));for _ ,_fbac :=range _eccg {_eefd :=_fbac .arrangeText ();if _eefd !=nil {_faga =append (_faga ,_eefd );};};if !_adea &&len (_faga )>=_ggca {_faga =_faga .extractTables (_ccffd );
|
||
};_faga .sortReadingOrder ();if !_adea {_faga .sortTopoOrder ();};_faga .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _faga ;};const (RenderModeStroke RenderMode =1<<iota ;
|
||
RenderModeFill ;RenderModeClip ;);func (_bdad *textLine )appendWord (_ccbg *textWord ){_bdad ._cdcg =append (_bdad ._cdcg ,_ccbg );_bdad .PdfRectangle =_bbbafc (_bdad .PdfRectangle ,_ccbg .PdfRectangle );if _ccbg ._eabbf > _bdad ._fdfb {_bdad ._fdfb =_ccbg ._eabbf ;
|
||
};if _ccbg ._dfagd > _bdad ._bbfg {_bdad ._bbfg =_ccbg ._dfagd ;};};func _ddbd (_ebac _agf .Point )*subpath {return &subpath {_aeee :[]_agf .Point {_ebac }}};func _eddaee (_ccggd _ba .PdfColorspace ,_gccdd _ba .PdfColor )_gff .Color {if _ccggd ==nil ||_gccdd ==nil {return _gff .Black ;
|
||
};_dfdgf ,_ccffa :=_ccggd .ColorToRGB (_gccdd );if _ccffa !=nil {_ga .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_gccdd ,_ccggd ,_ccffa );
|
||
return _gff .Black ;};_ffcae ,_debgc :=_dfdgf .(*_ba .PdfColorDeviceRGB );if !_debgc {_ga .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_dfdgf );
|
||
return _gff .Black ;};return _gff .NRGBA {R :uint8 (_ffcae .R ()*255),G :uint8 (_ffcae .G ()*255),B :uint8 (_ffcae .B ()*255),A :uint8 (255)};};func (_cfce rulingList )toGrids ()[]rulingList {if _adgbf {_ga .Log .Info ("t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073",_cfce );
|
||
};_afede :=_cfce .intersections ();if _adgbf {_ga .Log .Info ("\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020",len (_cfce ),len (_afede ));
|
||
for _ ,_abccg :=range _ggad (_afede ){_agc .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_abccg ,_afede [_abccg ]);};};_aefea :=make (map[int ]intSet ,len (_cfce ));for _dbbcg :=range _cfce {_ddagc :=_cfce .connections (_afede ,_dbbcg );if len (_ddagc )> 0{_aefea [_dbbcg ]=_ddagc ;
|
||
};};if _adgbf {_ga .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064",len (_aefea ));for _ ,_ebcd :=range _ggad (_aefea ){_agc .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_ebcd ,_aefea [_ebcd ]);
|
||
};};_adef :=_aedcc (len (_cfce ),func (_bbfd ,_gcfae int )bool {_eeec ,_dbga :=len (_aefea [_bbfd ]),len (_aefea [_gcfae ]);if _eeec !=_dbga {return _eeec > _dbga ;};return _cfce .comp (_bbfd ,_gcfae );});if _adgbf {_ga .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_adef );
|
||
};_dacf :=[][]int {{_adef [0]}};_gggc :for _ ,_dfgc :=range _adef [1:]{for _aeec ,_bgacf :=range _dacf {for _ ,_cfec :=range _bgacf {if _aefea [_cfec ].has (_dfgc ){_dacf [_aeec ]=append (_bgacf ,_dfgc );continue _gggc ;};};};_dacf =append (_dacf ,[]int {_dfgc });
|
||
};if _adgbf {_ga .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_dacf );};_e .SliceStable (_dacf ,func (_fafd ,_caee int )bool {return len (_dacf [_fafd ])> len (_dacf [_caee ])});for _ ,_gaeed :=range _dacf {_e .Slice (_gaeed ,func (_eeeca ,_fgab int )bool {return _cfce .comp (_gaeed [_eeeca ],_gaeed [_fgab ])});
|
||
};_gdeab :=make ([]rulingList ,len (_dacf ));for _ffde ,_acdb :=range _dacf {_cbeb :=make (rulingList ,len (_acdb ));for _fabc ,_ebfa :=range _acdb {_cbeb [_fabc ]=_cfce [_ebfa ];};_gdeab [_ffde ]=_cbeb ;};if _adgbf {_ga .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076",_gdeab );
|
||
};var _fggagg []rulingList ;for _ ,_fbfdg :=range _gdeab {if _dccg ,_dbbef :=_fbfdg .isActualGrid ();_dbbef {_fbfdg =_dccg ;_fbfdg =_fbfdg .snapToGroups ();_fggagg =append (_fggagg ,_fbfdg );};};if _adgbf {_gbbc ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073",_fggagg );
|
||
_ga .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_gdeab ),len (_fggagg ));};return _fggagg ;};func _beebf (_fcde []pathSection ){if _dgaf < 0.0{return ;
|
||
};if _adgbf {_ga .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_fcde ));};for _beaag ,_fegc :=range _fcde {for _afbee ,_efbbee :=range _fegc ._dbdc {for _egggeg ,_ffeea :=range _efbbee ._aeee {_efbbee ._aeee [_egggeg ]=_agf .Point {X :_babb (_ffeea .X ),Y :_babb (_ffeea .Y )};
|
||
if _adgbf {_aafe :=_efbbee ._aeee [_egggeg ];if !_gaeg (_ffeea ,_aafe ){_egfde :=_agf .Point {X :_aafe .X -_ffeea .X ,Y :_aafe .Y -_ffeea .Y };_agc .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_beaag ,_afbee ,_egggeg ,_ffeea ,_aafe ,_egfde );
|
||
};};};};};};func _acac (_bbcg *Extractor ,_fgfa *_ba .PdfPageResources ,_efaa _fc .GraphicsState ,_cadd *textState ,_gdce *stateStack )*textObject {return &textObject {_cdb :_bbcg ,_dbg :_fgfa ,_bace :_efaa ,_fgbf :_gdce ,_efed :_cadd ,_eefe :_agf .IdentityMatrix (),_dbc :_agf .IdentityMatrix ()};
|
||
};type textWord struct{_ba .PdfRectangle ;_dfagd float64 ;_eaae string ;_abcee []*textMark ;_eabbf float64 ;_gdec bool ;};func _bbbafc (_bgga ,_dfda _ba .PdfRectangle )_ba .PdfRectangle {return _ba .PdfRectangle {Llx :_gf .Min (_bgga .Llx ,_dfda .Llx ),Lly :_gf .Min (_bgga .Lly ,_dfda .Lly ),Urx :_gf .Max (_bgga .Urx ,_dfda .Urx ),Ury :_gf .Max (_bgga .Ury ,_dfda .Ury )};
|
||
};func (_dbcf *textTable )isExportable ()bool {if _dbcf ._edeg {return true ;};_gged :=func (_effed int )bool {_fgec :=_dbcf .get (0,_effed );if _fgec ==nil {return false ;};_eabea :=_fgec .text ();_bfbbf :=_a .RuneCountInString (_eabea );_gbefc :=_ccacg .MatchString (_eabea );
|
||
return _bfbbf <=1||_gbefc ;};for _cddgc :=0;_cddgc < _dbcf ._cgae ;_cddgc ++{if !_gged (_cddgc ){return true ;};};return false ;};type shapesState struct{_gafa _agf .Matrix ;_gdfd _agf .Matrix ;_gfce []*subpath ;_gbdgg bool ;_gcaaf _agf .Point ;_edf *textObject ;
|
||
};const (_efgbd =true ;_cafd =true ;_fecd =true ;_afgf =false ;_bbdcd =false ;_ebcg =6;_cgdda =3.0;_ceab =200;_bfcf =true ;_cbdec =true ;_gbge =true ;_gaedd =true ;_ecbfe =false ;);func _ceee (_agfd float64 )int {var _eeff int ;if _agfd >=0{_eeff =int (_agfd /_dage );
|
||
}else {_eeff =int (_agfd /_dage )-1;};return _eeff ;};
|
||
|
||
// TextMarkArray is a collection of TextMarks.
|
||
type TextMarkArray struct{_ffca []TextMark };func (_ebea rulingList )augmentGrid ()(rulingList ,rulingList ){_egfcc ,_eebf :=_ebea .vertsHorzs ();if len (_egfcc )==0||len (_eebf )==0{return _egfcc ,_eebf ;};_faeb ,_dgaa :=_egfcc ,_eebf ;_bcad :=_egfcc .bbox ();
|
||
_cace :=_eebf .bbox ();if _adgbf {_ga .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066",_bcad );_ga .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066",_cace );
|
||
};var _acgc ,_ecde ,_effb ,_cfbeb *ruling ;if _cace .Llx < _bcad .Llx -_dfed {_acgc =&ruling {_ccaa :_degg ,_bbce :_cfae ,_edga :_cace .Llx ,_fcec :_bcad .Lly ,_abeg :_bcad .Ury };_egfcc =append (rulingList {_acgc },_egfcc ...);};if _cace .Urx > _bcad .Urx +_dfed {_ecde =&ruling {_ccaa :_degg ,_bbce :_cfae ,_edga :_cace .Urx ,_fcec :_bcad .Lly ,_abeg :_bcad .Ury };
|
||
_egfcc =append (_egfcc ,_ecde );};if _bcad .Lly < _cace .Lly -_dfed {_effb =&ruling {_ccaa :_degg ,_bbce :_aaad ,_edga :_bcad .Lly ,_fcec :_cace .Llx ,_abeg :_cace .Urx };_eebf =append (rulingList {_effb },_eebf ...);};if _bcad .Ury > _cace .Ury +_dfed {_cfbeb =&ruling {_ccaa :_degg ,_bbce :_aaad ,_edga :_bcad .Ury ,_fcec :_cace .Llx ,_abeg :_cace .Urx };
|
||
_eebf =append (_eebf ,_cfbeb );};if len (_egfcc )+len (_eebf )==len (_ebea ){return _faeb ,_dgaa ;};_bagg :=append (_egfcc ,_eebf ...);_ebea .log ("u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064");_bagg .log ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d");
|
||
return _egfcc ,_eebf ;};func _bdfa (_bcegd map[float64 ][]*textLine )[]float64 {_dcee :=[]float64 {};for _fbgc :=range _bcegd {_dcee =append (_dcee ,_fbgc );};_e .Float64s (_dcee );return _dcee ;};func (_dfgef paraList )toTextMarks ()[]TextMark {_cfbe :=0;
|
||
var _cbbdg []TextMark ;for _bccd ,_dbce :=range _dfgef {if _dbce ._cddef {continue ;};_efae :=_dbce .toTextMarks (&_cfbe );_cbbdg =append (_cbbdg ,_efae ...);if _bccd !=len (_dfgef )-1{if _gfgc (_dbce ,_dfgef [_bccd +1]){_cbbdg =_ffeg (_cbbdg ,&_cfbe ,"\u0020");
|
||
}else {_cbbdg =_ffeg (_cbbdg ,&_cfbe ,"\u000a");_cbbdg =_ffeg (_cbbdg ,&_cfbe ,"\u000a");};};};_cbbdg =_ffeg (_cbbdg ,&_cfbe ,"\u000a");_cbbdg =_ffeg (_cbbdg ,&_cfbe ,"\u000a");return _cbbdg ;};func (_gcccd *textTable )compositeRowCorridors ()map[int ][]float64 {_dbbbdb :=make (map[int ][]float64 ,_gcccd ._cgae );
|
||
if _efda {_ga .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_gcccd ._cgae );};for _faadf :=1;_faadf < _gcccd ._cgae ;_faadf ++{var _bebg []compositeCell ;
|
||
for _fgdf :=0;_fgdf < _gcccd ._eacg ;_fgdf ++{if _abbb ,_ggbed :=_gcccd ._aaaga [_bgcc (_fgdf ,_faadf )];_ggbed {_bebg =append (_bebg ,_abbb );};};if len (_bebg )==0{continue ;};_gdgeg :=_cebgb (_bebg );_dbbbdb [_faadf ]=_gdgeg ;if _efda {_agc .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_faadf ,_gdgeg );
|
||
};};return _dbbbdb ;};func (_dbcc *textWord )computeText ()string {_afad :=make ([]string ,len (_dbcc ._abcee ));for _dbbf ,_bdfdd :=range _dbcc ._abcee {_afad [_dbbf ]=_bdfdd ._bfdb ;};return _f .Join (_afad ,"");};func _bfcbd (_acce *textWord ,_gfae float64 ,_bgcd ,_bbbaf rulingList )*wordBag {_ebafd :=_ceee (_acce ._dfagd );
|
||
_gdgd :=[]*textWord {_acce };_agg :=wordBag {_fcgd :map[int ][]*textWord {_ebafd :_gdgd },PdfRectangle :_acce .PdfRectangle ,_cdea :_acce ._eabbf ,_ebgd :_gfae ,_febe :_bgcd ,_cgdae :_bbbaf };return &_agg ;};func (_aagf TextTable )getCellInfo (_aad TextMark )[][]int {for _gbfa ,_gfc :=range _aagf .Cells {for _dadb :=range _gfc {_fdce :=&_gfc [_dadb ].Marks ;
|
||
if _fdce .exists (_aad ){return [][]int {{_gbfa },{_dadb }};};};};return nil ;};func (_bacdd rulingList )primMinMax ()(float64 ,float64 ){_aade ,_ebdc :=_bacdd [0]._edga ,_bacdd [0]._edga ;for _ ,_cfaec :=range _bacdd [1:]{if _cfaec ._edga < _aade {_aade =_cfaec ._edga ;
|
||
}else if _cfaec ._edga > _ebdc {_ebdc =_cfaec ._edga ;};};return _aade ,_ebdc ;};func (_eddae lineRuling )asRuling ()(*ruling ,bool ){_affaf :=ruling {_bbce :_eddae ._cebc ,Color :_eddae .Color ,_ccaa :_bcfc };switch _eddae ._cebc {case _cfae :_affaf ._edga =_eddae .xMean ();
|
||
_affaf ._fcec =_gf .Min (_eddae ._eded .Y ,_eddae ._badee .Y );_affaf ._abeg =_gf .Max (_eddae ._eded .Y ,_eddae ._badee .Y );case _aaad :_affaf ._edga =_eddae .yMean ();_affaf ._fcec =_gf .Min (_eddae ._eded .X ,_eddae ._badee .X );_affaf ._abeg =_gf .Max (_eddae ._eded .X ,_eddae ._badee .X );
|
||
default:_ga .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_eddae ._cebc );return nil ,false ;};return &_affaf ,true ;};func _daf (_fegd ,_aebc bounded )float64 {return _fegd .bbox ().Llx -_aebc .bbox ().Urx };
|
||
func (_beeca paraList )llyRange (_gefc []int ,_ffgd ,_beb float64 )[]int {_dbae :=len (_beeca );if _beb < _beeca [_gefc [0]].Lly ||_ffgd > _beeca [_gefc [_dbae -1]].Lly {return nil ;};_fdee :=_e .Search (_dbae ,func (_eeabf int )bool {return _beeca [_gefc [_eeabf ]].Lly >=_ffgd });
|
||
_dbdd :=_e .Search (_dbae ,func (_fgdd int )bool {return _beeca [_gefc [_fgdd ]].Lly > _beb });return _gefc [_fdee :_dbdd ];};func _cedbg (_gfcadb float64 )bool {return _gf .Abs (_gfcadb )< _gbb };func _babb (_degcb float64 )float64 {return _dgaf *_gf .Round (_degcb /_dgaf )};
|
||
func _ccba (_bcge []*textLine )map[float64 ][]*textLine {_e .Slice (_bcge ,func (_bffc ,_feee int )bool {return _bcge [_bffc ]._bbfg < _bcge [_feee ]._bbfg });_cbfb :=map[float64 ][]*textLine {};for _ ,_fbbed :=range _bcge {_gedde :=_adgc (_fbbed );_gedde =_gf .Round (_gedde );
|
||
_cbfb [_gedde ]=append (_cbfb [_gedde ],_fbbed );};return _cbfb ;};
|
||
|
||
// String returns a description of `tm`.
|
||
func (_agdgd *textMark )String ()string {return _agc .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_agdgd .PdfRectangle ,_agdgd ._gccfd ,_agdgd ._bfdb );};func (_ege *imageExtractContext )processOperand (_dff *_fc .ContentStreamOperation ,_fag _fc .GraphicsState ,_fgaa *_ba .PdfPageResources )error {if _dff .Operand =="\u0042\u0049"&&len (_dff .Params )==1{_gdb ,_aca :=_dff .Params [0].(*_fc .ContentStreamInlineImage );
|
||
if !_aca {return nil ;};if _bc ,_gba :=_add .GetBoolVal (_gdb .ImageMask );_gba {if _bc &&!_ege ._gce .IncludeInlineStencilMasks {return nil ;};};return _ege .extractInlineImage (_gdb ,_fag ,_fgaa );}else if _dff .Operand =="\u0044\u006f"&&len (_dff .Params )==1{_gbd ,_bbd :=_add .GetName (_dff .Params [0]);
|
||
if !_bbd {_ga .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _gb ;};_ ,_bcd :=_fgaa .GetXObjectByName (*_gbd );switch _bcd {case _ba .XObjectTypeImage :return _ege .extractXObjectImage (_gbd ,_fag ,_fgaa );case _ba .XObjectTypeForm :return _ege .extractFormImages (_gbd ,_fag ,_fgaa );
|
||
};}else if _ege ._gfb &&(_dff .Operand =="\u0073\u0063\u006e"||_dff .Operand =="\u0053\u0043\u004e")&&len (_dff .Params )==1{_gcf ,_bgf :=_add .GetName (_dff .Params [0]);if !_bgf {_ga .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");
|
||
return _gb ;};_gffg ,_bgf :=_fgaa .GetPatternByName (*_gcf );if !_bgf {_ga .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0050\u0061\u0074\u0074\u0065\u0072n\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075\u006e\u0064");return nil ;};if _gffg .IsTiling (){_ecae :=_gffg .GetAsTilingPattern ();
|
||
_cfc ,_dffd :=_ecae .GetContentStream ();if _dffd !=nil {return _dffd ;};_dffd =_ege .extractContentStreamImages (string (_cfc ),_ecae .Resources );if _dffd !=nil {return _dffd ;};};}else if (_dff .Operand =="\u0063\u0073"||_dff .Operand =="\u0043\u0053")&&len (_dff .Params )>=1{_ege ._gfb =_dff .Params [0].String ()=="\u0050a\u0074\u0074\u0065\u0072\u006e";
|
||
};return nil ;};func (_ebdb *subpath )clear (){*_ebdb =subpath {}};func _ebba (_ecgf _ba .PdfRectangle ,_afefg ,_agcg ,_fdbf ,_gbfda *ruling )gridTile {_ceebf :=_ecgf .Llx ;_cdac :=_ecgf .Urx ;_eabb :=_ecgf .Lly ;_dgcgf :=_ecgf .Ury ;return gridTile {PdfRectangle :_ecgf ,_ecaf :_afefg !=nil &&_afefg .encloses (_eabb ,_dgcgf ),_bfab :_agcg !=nil &&_agcg .encloses (_eabb ,_dgcgf ),_bgbfd :_fdbf !=nil &&_fdbf .encloses (_ceebf ,_cdac ),_dgccd :_gbfda !=nil &&_gbfda .encloses (_ceebf ,_cdac )};
|
||
};func (_faccb *stateStack )pop ()*textState {if _faccb .empty (){return nil ;};_acda :=*(*_faccb )[len (*_faccb )-1];*_faccb =(*_faccb )[:len (*_faccb )-1];return &_acda ;};func _bdbc (_aeagb []int )[]int {_eggg :=make ([]int ,len (_aeagb ));for _cfef ,_gddg :=range _aeagb {_eggg [len (_aeagb )-1-_cfef ]=_gddg ;
|
||
};return _eggg ;};func (_eeeda *textLine )markWordBoundaries (){_egfe :=_fadg *_eeeda ._fdfb ;for _fce ,_fcgdd :=range _eeeda ._cdcg [1:]{if _daf (_fcgdd ,_eeeda ._cdcg [_fce ])>=_egfe {_fcgdd ._gdec =true ;};};};func (_bceg *textObject )getFontDict (_bcdc string )(_bafb _add .PdfObject ,_egad error ){_gedd :=_bceg ._dbg ;
|
||
if _gedd ==nil {_ga .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_bcdc );return nil ,nil ;};_bafb ,_fagg :=_gedd .GetFontByName (_add .PdfObjectName (_bcdc ));
|
||
if !_fagg {_ga .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_bcdc );
|
||
return nil ,_b .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _bafb ,nil ;};func (_ega *textObject )setTextRenderMode (_dgfg int ){if _ega ==nil {return ;};_ega ._efed ._gdf =RenderMode (_dgfg );
|
||
};func _cabf (_babd string )string {_cgac :=[]rune (_babd );return string (_cgac [:len (_cgac )-1])};type gridTile struct{_ba .PdfRectangle ;_dgccd ,_ecaf ,_bgbfd ,_bfab bool ;};func (_ggag *wordBag )text ()string {_agga :=_ggag .allWords ();_bdbd :=make ([]string ,len (_agga ));
|
||
for _dga ,_cffe :=range _agga {_bdbd [_dga ]=_cffe ._eaae ;};return _f .Join (_bdbd ,"\u0020");};func (_ebbg paraList )readBefore (_faba []int ,_aead ,_afecg int )bool {_gdbfc ,_dfefg :=_ebbg [_aead ],_ebbg [_afecg ];if _dfdc (_gdbfc ,_dfefg )&&_gdbfc .Lly > _dfefg .Lly {return true ;
|
||
};if !(_gdbfc ._dgabg .Urx < _dfefg ._dgabg .Llx ){return false ;};_egef ,_effe :=_gdbfc .Lly ,_dfefg .Lly ;if _egef > _effe {_effe ,_egef =_egef ,_effe ;};_gfcad :=_gf .Max (_gdbfc ._dgabg .Llx ,_dfefg ._dgabg .Llx );_fffed :=_gf .Min (_gdbfc ._dgabg .Urx ,_dfefg ._dgabg .Urx );
|
||
_faaa :=_ebbg .llyRange (_faba ,_egef ,_effe );for _ ,_aedc :=range _faaa {if _aedc ==_aead ||_aedc ==_afecg {continue ;};_abga :=_ebbg [_aedc ];if _abga ._dgabg .Llx <=_fffed &&_gfcad <=_abga ._dgabg .Urx {return false ;};};return true ;};func _edaf (_cggg map[int ][]float64 )string {_dafec :=_adgbc (_cggg );
|
||
_ebfb :=make ([]string ,len (_cggg ));for _ebfaf ,_ccbf :=range _dafec {_ebfb [_ebfaf ]=_agc .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_ccbf ,_cggg [_ccbf ]);};return _agc .Sprintf ("\u007b\u0025\u0073\u007d",_f .Join (_ebfb ,"\u002c\u0020"));
|
||
};func (_faag lineRuling )xMean ()float64 {return 0.5*(_faag ._eded .X +_faag ._badee .X )};func (_bcab *textWord )appendMark (_adab *textMark ,_cadg _ba .PdfRectangle ){_bcab ._abcee =append (_bcab ._abcee ,_adab );_bcab .PdfRectangle =_bbbafc (_bcab .PdfRectangle ,_adab .PdfRectangle );
|
||
if _adab ._gccfd > _bcab ._eabbf {_bcab ._eabbf =_adab ._gccfd ;};_bcab ._dfagd =_cadg .Ury -_bcab .PdfRectangle .Lly ;};func (_eaagd rulingList )log (_abgf string ){if !_adgbf {return ;};_ga .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_abgf ,_eaagd .String ());
|
||
for _bbbcb ,_efgbea :=range _eaagd {_agc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bbbcb ,_efgbea .String ());};};func (_fdbe *textTable )toTextTable ()TextTable {if _efda {_ga .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_fdbe ._eacg ,_fdbe ._cgae );
|
||
};_fgbe :=make ([][]TableCell ,_fdbe ._cgae );for _dggag :=0;_dggag < _fdbe ._cgae ;_dggag ++{_fgbe [_dggag ]=make ([]TableCell ,_fdbe ._eacg );for _adbd :=0;_adbd < _fdbe ._eacg ;_adbd ++{_gggcd :=_fdbe .get (_adbd ,_dggag );if _gggcd ==nil {continue ;
|
||
};if _efda {_agc .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_adbd ,_dggag ,_gggcd );};_fgbe [_dggag ][_adbd ].Text =_gggcd .text ();_dagc :=0;_fgbe [_dggag ][_adbd ].Marks ._ffca =_gggcd .toTextMarks (&_dagc );};};_egcg :=TextTable {W :_fdbe ._eacg ,H :_fdbe ._cgae ,Cells :_fgbe };
|
||
_egcg .PdfRectangle =_fdbe .bbox ();return _egcg ;};func (_cabc *textMark )bbox ()_ba .PdfRectangle {return _cabc .PdfRectangle };func (_bfgge gridTile )contains (_afdf _ba .PdfRectangle )bool {if _bfgge .numBorders ()< 3{return false ;};if _bfgge ._ecaf &&_afdf .Llx < _bfgge .Llx -_bddc {return false ;
|
||
};if _bfgge ._bfab &&_afdf .Urx > _bfgge .Urx +_bddc {return false ;};if _bfgge ._bgbfd &&_afdf .Lly < _bfgge .Lly -_bddc {return false ;};if _bfgge ._dgccd &&_afdf .Ury > _bfgge .Ury +_bddc {return false ;};return true ;};func (_daa *textObject )showTextAdjusted (_abf *_add .PdfObjectArray ,_bdg int )error {_ebb :=false ;
|
||
for _ ,_fee :=range _abf .Elements (){switch _fee .(type ){case *_add .PdfObjectFloat ,*_add .PdfObjectInteger :_gaad ,_debb :=_add .GetNumberAsFloat (_fee );if _debb !=nil {_ga .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_fee ,_abf );
|
||
return _debb ;};_eag ,_dbd :=-_gaad *0.001*_daa ._efed ._dcd ,0.0;if _ebb {_dbd ,_eag =_eag ,_dbd ;};_gcbf :=_gfdd (_agf .Point {X :_eag ,Y :_dbd });_daa ._eefe .Concat (_gcbf );case *_add .PdfObjectString :_afb :=_add .TraceToDirectObject (_fee );_bgb ,_abfg :=_add .GetStringBytes (_afb );
|
||
if !_abfg {_ga .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_fee ,_abf );
|
||
return _add .ErrTypeError ;};_daa .renderText (_afb ,_bgb ,_bdg );default:_ga .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_fee ,_abf );
|
||
return _add .ErrTypeError ;};};return nil ;};
|
||
|
||
// NewFromContents creates a new extractor from contents and page resources.
|
||
func NewFromContents (contents string ,resources *_ba .PdfPageResources )(*Extractor ,error ){const _dg ="\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s";_cgd :=&Extractor {_ac :contents ,_gbe :resources ,_bd :map[string ]fontEntry {},_baa :map[string ]textResult {}};
|
||
_ce .TrackUse (_dg );return _cgd ,nil ;};func _dgbb (_gdbff *textLine ,_bbec []*textLine ,_dbbca []float64 )float64 {var _afac float64 =-1;for _ ,_bgdf :=range _bbec {if _bgdf ._bbfg > _gdbff ._bbfg {if _gf .Round (_bgdf .Llx )>=_gf .Round (_gdbff .Llx ){_afac =_bgdf ._bbfg ;
|
||
}else {break ;};};};return _afac ;};func _faafc (_bcaa _ba .PdfRectangle )*ruling {return &ruling {_bbce :_aaad ,_edga :_bcaa .Ury ,_fcec :_bcaa .Llx ,_abeg :_bcaa .Urx };};type imageExtractContext struct{_dcc []ImageMark ;_dgg int ;_fbd int ;_bde int ;
|
||
_ca map[*_add .PdfObjectStream ]*cachedImage ;_gce *ImageExtractOptions ;_gfb bool ;};func _efcbe (_fbbf string )bool {for _ ,_aedca :=range _fbbf {if !_c .IsSpace (_aedca ){return false ;};};return true ;};func (_egge *textMark )inDiacriticArea (_dfdge *textMark )bool {_adaa :=_egge .Llx -_dfdge .Llx ;
|
||
_dcbf :=_egge .Urx -_dfdge .Urx ;_gcdf :=_egge .Lly -_dfdge .Lly ;return _gf .Abs (_adaa +_dcbf )< _egge .Width ()*_befe &&_gf .Abs (_gcdf )< _egge .Height ()*_befe ;};func _gagf (_gfef []structElement ,_cbgg map[int ][]*textLine ,_fcfa _add .PdfObject )[]*list {_abcd :=[]*list {};
|
||
for _ ,_aefd :=range _gfef {_dbbe :=_aefd ._abff ;_dgafc :=int (_aefd ._cfcb );_gabf :=_aefd ._bbag ;_dgad :=[]*textLine {};_afgc :=[]*list {};_fdfbe :=_aefd ._eada ;_edgc ,_fgfe :=(_fdfbe .(*_add .PdfObjectReference ));if !_fgfe {_ga .Log .Debug ("\u0066\u0061\u0069l\u0065\u0064\u0020\u006f\u0074\u0020\u0063\u0061\u0073\u0074\u0020\u0074\u006f\u0020\u002a\u0063\u006f\u0072\u0065\u002e\u0050\u0064\u0066\u004f\u0062\u006a\u0065\u0063\u0074R\u0065\u0066\u0065\u0072\u0065\u006e\u0063\u0065");
|
||
};if _dgafc !=-1&&_edgc !=nil {if _gedb ,_baeb :=_cbgg [_dgafc ];_baeb {if _egcc ,_afef :=_fcfa .(*_add .PdfIndirectObject );_afef {_caca :=_egcc .PdfObjectReference ;if _ad .DeepEqual (*_edgc ,_caca ){_dgad =_gedb ;};};};};if _dbbe !=nil {_afgc =_gagf (_dbbe ,_cbgg ,_fcfa );
|
||
};_dcfa :=_dgda (_dgad ,_gabf ,_afgc );_abcd =append (_abcd ,_dcfa );};return _abcd ;};func _fbgb (_bbdg *wordBag ,_bbeg float64 ,_egbe ,_fada rulingList )[]*wordBag {var _egcca []*wordBag ;for _ ,_cgcg :=range _bbdg .depthIndexes (){_dfa :=false ;for !_bbdg .empty (_cgcg ){_ggaag :=_bbdg .firstReadingIndex (_cgcg );
|
||
_cdec :=_bbdg .firstWord (_ggaag );_edfe :=_bfcbd (_cdec ,_bbeg ,_egbe ,_fada );_bbdg .removeWord (_cdec ,_ggaag );if _eeab {_ga .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_cdec .String ());
|
||
};for _acga :=true ;_acga ;_acga =_dfa {_dfa =false ;_daaff :=_dgcc *_edfe ._cdea ;_bcbb :=_cac *_edfe ._cdea ;_ecbb :=_bfcae *_edfe ._cdea ;if _eeab {_ga .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_edfe .minDepth (),_edfe .maxDepth (),_ecbb ,_bcbb );
|
||
};if _bbdg .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_edfe ,_bcfa (_cgfd ,0),_edfe .minDepth ()-_ecbb ,_edfe .maxDepth ()+_ecbb ,_fae ,false ,false )> 0{_dfa =true ;};if _bbdg .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_edfe ,_bcfa (_cgfd ,_bcbb ),_edfe .minDepth (),_edfe .maxDepth (),_fcag ,false ,false )> 0{_dfa =true ;
|
||
};if _dfa {continue ;};_dfag :=_bbdg .scanBand ("",_edfe ,_bcfa (_cbde ,_daaff ),_edfe .minDepth (),_edfe .maxDepth (),_cfcf ,true ,false );if _dfag > 0{_ffecd :=(_edfe .maxDepth ()-_edfe .minDepth ())/_edfe ._cdea ;if (_dfag > 1&&float64 (_dfag )> 0.3*_ffecd )||_dfag <=10{if _bbdg .scanBand ("\u006f\u0074\u0068e\u0072",_edfe ,_bcfa (_cbde ,_daaff ),_edfe .minDepth (),_edfe .maxDepth (),_cfcf ,false ,true )> 0{_dfa =true ;
|
||
};};};};_egcca =append (_egcca ,_edfe );};};return _egcca ;};func _ecfee (_efbbe []rulingList )(rulingList ,rulingList ){var _bffd rulingList ;for _ ,_efdeb :=range _efbbe {_bffd =append (_bffd ,_efdeb ...);};return _bffd .vertsHorzs ();};
|
||
|
||
// String returns a human readable description of `path`.
|
||
func (_afbg *subpath )String ()string {_ecbf :=_afbg ._aeee ;_gebb :=len (_ecbf );if _gebb <=5{return _agc .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_gebb ,_ecbf );};return _agc .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_gebb ,_ecbf [0],_ecbf [1],_ecbf [_gebb -1]);
|
||
};func _bdccc (_ecbc _ba .PdfRectangle )*ruling {return &ruling {_bbce :_cfae ,_edga :_ecbc .Llx ,_fcec :_ecbc .Lly ,_abeg :_ecbc .Ury };};func _ecfd (_geae byte )bool {for _ ,_faed :=range _cafa {if []byte (_faed )[0]==_geae {return true ;};};return false ;
|
||
};func _eddba (_caead ,_cgcf bounded )float64 {return _caead .bbox ().Llx -_cgcf .bbox ().Llx };func (_gbfaf *ruling )intersects (_cegd *ruling )bool {_dabfeg :=(_gbfaf ._bbce ==_cfae &&_cegd ._bbce ==_aaad )||(_cegd ._bbce ==_cfae &&_gbfaf ._bbce ==_aaad );
|
||
_bbef :=func (_deccf ,_ebcda *ruling )bool {return _deccf ._fcec -_dfed <=_ebcda ._edga &&_ebcda ._edga <=_deccf ._abeg +_dfed ;};_ccdac :=_bbef (_gbfaf ,_cegd );_dfgcg :=_bbef (_cegd ,_gbfaf );if _adgbf {_agc .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_dabfeg ,_ccdac ,_dfgcg ,_dabfeg &&_ccdac &&_dfgcg ,_gbfaf ,_cegd );
|
||
};return _dabfeg &&_ccdac &&_dfgcg ;};type list struct{_aebd []*textLine ;_cdde string ;_abcc []*list ;_ddef string ;};func (_gbgc *wordBag )allWords ()[]*textWord {var _fdgd []*textWord ;for _ ,_efgg :=range _gbgc ._fcgd {_fdgd =append (_fdgd ,_efgg ...);
|
||
};return _fdgd ;};func (_afa *shapesState )lastpointEstablished ()(_agf .Point ,bool ){if _afa ._gbdgg {return _afa ._gcaaf ,false ;};_gfcg :=len (_afa ._gfce );if _gfcg > 0&&_afa ._gfce [_gfcg -1]._dbe {return _afa ._gfce [_gfcg -1].last (),false ;};return _agf .Point {},true ;
|
||
};func (_gaead *textLine )bbox ()_ba .PdfRectangle {return _gaead .PdfRectangle };type cachedImage struct{_fed *_ba .Image ;_addb _ba .PdfColorspace ;};func (_bga *subpath )last ()_agf .Point {return _bga ._aeee [len (_bga ._aeee )-1]};
|
||
|
||
// String returns a string describing the current state of the textState stack.
|
||
func (_aaeg *stateStack )String ()string {_adfa :=[]string {_agc .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_aaeg ))};for _fgee ,_gaab :=range *_aaeg {_fdc :="\u003c\u006e\u0069l\u003e";
|
||
if _gaab !=nil {_fdc =_gaab .String ();};_adfa =append (_adfa ,_agc .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_fgee ,_fdc ));};return _f .Join (_adfa ,"\u000a");};func (_eba *imageExtractContext )extractInlineImage (_bdd *_fc .ContentStreamInlineImage ,_dec _fc .GraphicsState ,_gdea *_ba .PdfPageResources )error {_gggb ,_ded :=_bdd .ToImage (_gdea );
|
||
if _ded !=nil {return _ded ;};_fcg ,_ded :=_bdd .GetColorSpace (_gdea );if _ded !=nil {return _ded ;};if _fcg ==nil {_fcg =_ba .NewPdfColorspaceDeviceGray ();};_ged ,_ded :=_fcg .ImageToRGB (*_gggb );if _ded !=nil {return _ded ;};_bfb :=ImageMark {Image :&_ged ,Width :_dec .CTM .ScalingFactorX (),Height :_dec .CTM .ScalingFactorY (),Angle :_dec .CTM .Angle ()};
|
||
_bfb .X ,_bfb .Y =_dec .CTM .Translation ();_eba ._dcc =append (_eba ._dcc ,_bfb );_eba ._dgg ++;return nil ;};func _dfc (_gbfcc string )bool {if _a .RuneCountInString (_gbfcc )< _afeg {return false ;};_bbdd ,_dbbc :=_a .DecodeLastRuneInString (_gbfcc );
|
||
if _dbbc <=0||!_c .Is (_c .Hyphen ,_bbdd ){return false ;};_bbdd ,_dbbc =_a .DecodeLastRuneInString (_gbfcc [:len (_gbfcc )-_dbbc ]);return _dbbc > 0&&!_c .IsSpace (_bbdd );};func _ggdbe (_fgba *textLine ,_egfdf []*textLine ,_ccde []float64 ,_cffg ,_adcb float64 )[]*textLine {_eegfg :=[]*textLine {};
|
||
for _ ,_bfge :=range _egfdf {if _bfge ._bbfg >=_cffg {if _adcb !=-1&&_bfge ._bbfg < _adcb {if _bfge .text ()!=_fgba .text (){if _gf .Round (_bfge .Llx )< _gf .Round (_fgba .Llx ){break ;};_eegfg =append (_eegfg ,_bfge );};}else if _adcb ==-1{if _bfge ._bbfg ==_fgba ._bbfg {if _bfge .text ()!=_fgba .text (){_eegfg =append (_eegfg ,_bfge );
|
||
};continue ;};_bgcda :=_dgbb (_fgba ,_egfdf ,_ccde );if _bgcda !=-1&&_bfge ._bbfg <=_bgcda {_eegfg =append (_eegfg ,_bfge );};};};};return _eegfg ;};func (_ffbf rulingList )comp (_faad ,_agdge int )bool {_geed ,_eagd :=_ffbf [_faad ],_ffbf [_agdge ];_agfg ,_bfeb :=_geed ._bbce ,_eagd ._bbce ;
|
||
if _agfg !=_bfeb {return _agfg > _bfeb ;};if _agfg ==_fbcc {return false ;};_bbgcd :=func (_becce bool )bool {if _agfg ==_aaad {return _becce ;};return !_becce ;};_egde ,_aefg :=_geed ._edga ,_eagd ._edga ;if _egde !=_aefg {return _bbgcd (_egde > _aefg );
|
||
};_egde ,_aefg =_geed ._fcec ,_eagd ._fcec ;if _egde !=_aefg {return _bbgcd (_egde < _aefg );};return _bbgcd (_geed ._abeg < _eagd ._abeg );};func (_edbg *textTable )putComposite (_ffcc ,_aaccb int ,_accgg paraList ,_adeac _ba .PdfRectangle ){if len (_accgg )==0{_ga .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073");
|
||
return ;};_ffddd :=compositeCell {PdfRectangle :_adeac ,paraList :_accgg };if _efda {_agc .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_ffcc ,_aaccb ,_ffddd .String ());
|
||
};_ffddd .updateBBox ();_edbg ._aaaga [_bgcc (_ffcc ,_aaccb )]=_ffddd ;};var _cafa =[]string {"\u2756","\u27a2","\u2713","\u2022","\uf0a7","\u25a1","\u2212","\u25a0","\u25aa","\u006f"};func _cbggc (_edee ,_gcef ,_fbfc float64 )rulingKind {if _edee >=_fbfc &&_caeb (_gcef ,_edee ){return _aaad ;
|
||
};if _gcef >=_fbfc &&_caeb (_edee ,_gcef ){return _cfae ;};return _fbcc ;};func (_bacf *ruling )alignsPrimary (_bedg *ruling )bool {return _bacf ._bbce ==_bedg ._bbce &&_gf .Abs (_bacf ._edga -_bedg ._edga )< _gbb *0.5;}; |