unipdf/extractor/extractor.go

1003 lines
219 KiB
Go
Raw Normal View History

2020-08-27 21:45:09 +00:00
//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
2020-08-27 21:45:09 +00:00
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
2024-05-29 17:04:37 +00:00
package extractor ;import (_ae "bytes";_b "errors";_efc "fmt";_ag "github.com/unidoc/unipdf/v3/common";_aa "github.com/unidoc/unipdf/v3/contentstream";_gf "github.com/unidoc/unipdf/v3/core";_d "github.com/unidoc/unipdf/v3/internal/license";_cb "github.com/unidoc/unipdf/v3/internal/textencoding";
_aae "github.com/unidoc/unipdf/v3/internal/transform";_af "github.com/unidoc/unipdf/v3/model";_cc "golang.org/x/image/draw";_c "golang.org/x/text/unicode/norm";_ef "image";_fe "image/color";_fc "io";_ea "math";_ed "reflect";_g "regexp";_e "sort";_a "strings";
_fg "unicode";_bb "unicode/utf8";);func (_ccdd *wordBag )highestWord (_ffagg int ,_effd ,_ffed float64 )*textWord {for _ ,_ccfg :=range _ccdd ._cdbc [_ffagg ]{if _effd <=_ccfg ._accb &&_ccfg ._accb <=_ffed {return _ccfg ;};};return nil ;};
// String returns a string describing the current state of the textState stack.
func (_ege *stateStack )String ()string {_aeab :=[]string {_efc .Sprintf ("\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064",len (*_ege ))};for _ebff ,_gce :=range *_ege {_ffgd :="\u003c\u006e\u0069l\u003e";
if _gce !=nil {_ffgd =_gce .String ();};_aeab =append (_aeab ,_efc .Sprintf ("\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073",_ebff ,_ffgd ));};return _a .Join (_aeab ,"\u000a");};func _ebdca (_fffdb _gf .PdfObject ,_fbgda _fe .Color )(_ef .Image ,error ){_adggdd ,_adabd :=_gf .GetStream (_fffdb );
if !_adabd {return nil ,nil ;};_efcb ,_bagbfc :=_af .NewXObjectImageFromStream (_adggdd );if _bagbfc !=nil {return nil ,_bagbfc ;};_ffabf ,_bagbfc :=_efcb .ToImage ();if _bagbfc !=nil {return nil ,_bagbfc ;};return _eccab (_ffabf ,_fbgda ),nil ;};type rulingList []*ruling ;
func (_fabf rulingList )comp (_ecea ,_fgfc int )bool {_bfdbg ,_gggac :=_fabf [_ecea ],_fabf [_fgfc ];_cfdc ,_fcbc :=_bfdbg ._ecfb ,_gggac ._ecfb ;if _cfdc !=_fcbc {return _cfdc > _fcbc ;};if _cfdc ==_ceag {return false ;};_bagcf :=func (_bbdb bool )bool {if _cfdc ==_eeg {return _bbdb ;
};return !_bbdb ;};_gcega ,_agdf :=_bfdbg ._aeef ,_gggac ._aeef ;if _gcega !=_agdf {return _bagcf (_gcega > _agdf );};_gcega ,_agdf =_bfdbg ._ggdb ,_gggac ._ggdb ;if _gcega !=_agdf {return _bagcf (_gcega < _agdf );};return _bagcf (_bfdbg ._gbca < _gggac ._gbca );
};
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
type TextTable struct{_af .PdfRectangle ;W ,H int ;Cells [][]TableCell ;};func _bebf (_bcfd _af .PdfRectangle )*ruling {return &ruling {_ecfb :_gecdf ,_aeef :_bcfd .Urx ,_ggdb :_bcfd .Lly ,_gbca :_bcfd .Ury };};func (_aabc *shapesState )cubicTo (_adg ,_ece ,_dbee ,_becb ,_bece ,_ccbcd float64 ){if _cece {_ag .Log .Info ("\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a");
};_aabc .addPoint (_bece ,_ccbcd );};func (_dbgdd *textTable )reduce ()*textTable {_cecd :=make ([]int ,0,_dbgdd ._cegga );_cbccf :=make ([]int ,0,_dbgdd ._aageb );for _caba :=0;_caba < _dbgdd ._cegga ;_caba ++{if !_dbgdd .emptyCompositeRow (_caba ){_cecd =append (_cecd ,_caba );
};};for _gbcb :=0;_gbcb < _dbgdd ._aageb ;_gbcb ++{if !_dbgdd .emptyCompositeColumn (_gbcb ){_cbccf =append (_cbccf ,_gbcb );};};if len (_cecd )==_dbgdd ._cegga &&len (_cbccf )==_dbgdd ._aageb {return _dbgdd ;};_fcfab :=textTable {_caagg :_dbgdd ._caagg ,_aageb :len (_cbccf ),_cegga :len (_cecd ),_dgcf :make (map[uint64 ]*textPara ,len (_cbccf )*len (_cecd ))};
if _dedc {_ag .Log .Info ("\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064",_dbgdd ._aageb ,_dbgdd ._cegga ,len (_cbccf ),len (_cecd ));_ag .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_cbccf );
_ag .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_cecd );};for _cgagc ,_fcgfd :=range _cecd {for _ebbc ,_cade :=range _cbccf {_abcgc ,_febfc :=_dbgdd .getComposite (_cade ,_fcgfd );if _abcgc ==nil {continue ;
};if _dedc {_efc .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_ebbc ,_cgagc ,_cade ,_fcgfd ,_efcca (_abcgc .merge ().text (),50));};_fcfab .putComposite (_ebbc ,_cgagc ,_abcgc ,_febfc );
};};return &_fcfab ;};func _eebe (_cafe []structElement ,_abceg map[int ][]*textLine ,_fbcd _gf .PdfObject )[]*list {_bdfea :=[]*list {};for _ ,_aeee :=range _cafe {_ffecg :=_aeee ._befc ;_cfeaa :=int (_aeee ._fbge );_cgbbf :=_aeee ._dccda ;_ffac :=[]*textLine {};
_bcbcg :=[]*list {};_bcdea :=_aeee ._bffdf ;_bbbaf ,_cdeg :=(_bcdea .(*_gf .PdfObjectReference ));if !_cdeg {_ag .Log .Debug ("\u0066\u0061\u0069l\u0065\u0064\u0020\u006f\u0074\u0020\u0063\u0061\u0073\u0074\u0020\u0074\u006f\u0020\u002a\u0063\u006f\u0072\u0065\u002e\u0050\u0064\u0066\u004f\u0062\u006a\u0065\u0063\u0074R\u0065\u0066\u0065\u0072\u0065\u006e\u0063\u0065");
};if _cfeaa !=-1&&_bbbaf !=nil {if _cgcd ,_cccce :=_abceg [_cfeaa ];_cccce {if _eadb ,_aebdb :=_fbcd .(*_gf .PdfIndirectObject );_aebdb {_aggc :=_eadb .PdfObjectReference ;if _ed .DeepEqual (*_bbbaf ,_aggc ){_ffac =_cgcd ;};};};};if _ffecg !=nil {_bcbcg =_eebe (_ffecg ,_abceg ,_fbcd );
};_ddce :=_abda (_ffac ,_cgbbf ,_bcbcg );_bdfea =append (_bdfea ,_ddce );};return _bdfea ;};func (_cgcb *wordBag )applyRemovals (_cfgb map[int ]map[*textWord ]struct{}){for _ddeb ,_bad :=range _cfgb {if len (_bad )==0{continue ;};_ggee :=_cgcb ._cdbc [_ddeb ];
_agdb :=len (_ggee )-len (_bad );if _agdb ==0{delete (_cgcb ._cdbc ,_ddeb );continue ;};_ebaa :=make ([]*textWord ,_agdb );_ggdf :=0;for _ ,_decc :=range _ggee {if _ ,_ebbb :=_bad [_decc ];!_ebbb {_ebaa [_ggdf ]=_decc ;_ggdf ++;};};_cgcb ._cdbc [_ddeb ]=_ebaa ;
};};func _eaf (_gbbd _aae .Matrix )_aae .Point {_caeg ,_ccca :=_gbbd .Translation ();return _aae .Point {X :_caeg ,Y :_ccca };};type lineRuling struct{_faab rulingKind ;_fffg markKind ;_fe .Color ;_bbee ,_efge _aae .Point ;};func (_faff paraList )addNeighbours (){_afbaf :=func (_edaf []int ,_faaba *textPara )([]*textPara ,[]*textPara ){_dgbbf :=make ([]*textPara ,0,len (_edaf )-1);
_beaec :=make ([]*textPara ,0,len (_edaf )-1);for _ ,_ggefg :=range _edaf {_gcbea :=_faff [_ggefg ];if _gcbea .Urx <=_faaba .Llx {_dgbbf =append (_dgbbf ,_gcbea );}else if _gcbea .Llx >=_faaba .Urx {_beaec =append (_beaec ,_gcbea );};};return _dgbbf ,_beaec ;
};_bgcae :=func (_dcbff []int ,_dfecc *textPara )([]*textPara ,[]*textPara ){_cegab :=make ([]*textPara ,0,len (_dcbff )-1);_gdbcc :=make ([]*textPara ,0,len (_dcbff )-1);for _ ,_fabd :=range _dcbff {_cebdb :=_faff [_fabd ];if _cebdb .Ury <=_dfecc .Lly {_gdbcc =append (_gdbcc ,_cebdb );
}else if _cebdb .Lly >=_dfecc .Ury {_cegab =append (_cegab ,_cebdb );};};return _cegab ,_gdbcc ;};_gdegc :=_faff .yNeighbours (_gcaf );for _ ,_fgdf :=range _faff {_debcc :=_gdegc [_fgdf ];if len (_debcc )==0{continue ;};_efagg ,_cggdf :=_afbaf (_debcc ,_fgdf );
if len (_efagg )==0&&len (_cggdf )==0{continue ;};if len (_efagg )> 0{_bacac :=_efagg [0];for _ ,_aebg :=range _efagg [1:]{if _aebg .Urx >=_bacac .Urx {_bacac =_aebg ;};};for _ ,_afgf :=range _efagg {if _afgf !=_bacac &&_afgf .Urx > _bacac .Llx {_bacac =nil ;
break ;};};if _bacac !=nil &&_dfba (_fgdf .PdfRectangle ,_bacac .PdfRectangle ){_fgdf ._caagd =_bacac ;};};if len (_cggdf )> 0{_dafgg :=_cggdf [0];for _ ,_egge :=range _cggdf [1:]{if _egge .Llx <=_dafgg .Llx {_dafgg =_egge ;};};for _ ,_ffaag :=range _cggdf {if _ffaag !=_dafgg &&_ffaag .Llx < _dafgg .Urx {_dafgg =nil ;
break ;};};if _dafgg !=nil &&_dfba (_fgdf .PdfRectangle ,_dafgg .PdfRectangle ){_fgdf ._aggd =_dafgg ;};};};_gdegc =_faff .xNeighbours (_bcfa );for _ ,_gbad :=range _faff {_egdef :=_gdegc [_gbad ];if len (_egdef )==0{continue ;};_gecf ,_aabfc :=_bgcae (_egdef ,_gbad );
if len (_gecf )==0&&len (_aabfc )==0{continue ;};if len (_aabfc )> 0{_fdga :=_aabfc [0];for _ ,_cefb :=range _aabfc [1:]{if _cefb .Ury >=_fdga .Ury {_fdga =_cefb ;};};for _ ,_decfg :=range _aabfc {if _decfg !=_fdga &&_decfg .Ury > _fdga .Lly {_fdga =nil ;
break ;};};if _fdga !=nil &&_gfb (_gbad .PdfRectangle ,_fdga .PdfRectangle ){_gbad ._cabda =_fdga ;};};if len (_gecf )> 0{_ddca :=_gecf [0];for _ ,_fgcb :=range _gecf [1:]{if _fgcb .Lly <=_ddca .Lly {_ddca =_fgcb ;};};for _ ,_fgcbd :=range _gecf {if _fgcbd !=_ddca &&_fgcbd .Lly < _ddca .Ury {_ddca =nil ;
break ;};};if _ddca !=nil &&_gfb (_gbad .PdfRectangle ,_ddca .PdfRectangle ){_gbad ._ecdfc =_ddca ;};};};for _ ,_eabb :=range _faff {if _eabb ._caagd !=nil &&_eabb ._caagd ._aggd !=_eabb {_eabb ._caagd =nil ;};if _eabb ._ecdfc !=nil &&_eabb ._ecdfc ._cabda !=_eabb {_eabb ._ecdfc =nil ;
};if _eabb ._aggd !=nil &&_eabb ._aggd ._caagd !=_eabb {_eabb ._aggd =nil ;};if _eabb ._cabda !=nil &&_eabb ._cabda ._ecdfc !=_eabb {_eabb ._cabda =nil ;};};};const (_gefg =1.0e-6;_fbf =1.0e-4;_dgbd =10;_cdcb =6;_egeb =0.5;_fbbf =0.12;_gegc =0.19;_ceca =0.04;
_cgdgc =0.04;_cgbc =1.0;_afdg =0.04;_cecf =0.4;_bea =0.7;_daf =1.0;_egdfc =0.1;_geac =1.4;_efbf =0.46;_bgcc =0.02;_fcfce =0.2;_bbcc =0.5;_eade =4;_fcbe =4.0;_gaba =6;_ccfc =0.3;_bcfa =0.01;_gcaf =0.02;_fcad =2;_bce =2;_ebdb =500;_abca =4.0;_gabg =4.0;_ebffe =0.05;
_gcffb =0.1;_bcae =2.0;_cggd =2.0;_caf =1.5;_bfad =3.0;_bbce =0.25;);func _afee (_fgfef *paraList )map[int ][]*textLine {_ggfd :=map[int ][]*textLine {};for _ ,_gafd :=range *_fgfef {for _ ,_edbfb :=range _gafd ._aage {if !_ccga (_edbfb ){_ag .Log .Debug ("g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e");
continue ;};_aded :=_edbfb ._cfcb [0]._ffcd [0]._ffbdg ;_ggfd [_aded ]=append (_ggfd [_aded ],_edbfb );};if _gafd ._befe !=nil {_dadd :=_gafd ._befe ._dgcf ;for _ ,_abfe :=range _dadd {for _ ,_ecfe :=range _abfe ._aage {if !_ccga (_ecfe ){_ag .Log .Debug ("g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e");
continue ;};_gcbfb :=_ecfe ._cfcb [0]._ffcd [0]._ffbdg ;_ggfd [_gcbfb ]=append (_ggfd [_gcbfb ],_ecfe );};};};};return _ggfd ;};type textObject struct{_dbe *Extractor ;_dae *_af .PdfPageResources ;_aef _aa .GraphicsState ;_ecff *textState ;_aega *stateStack ;
_dbc _aae .Matrix ;_ebc _aae .Matrix ;_afff []*textMark ;_cdcc bool ;};
2024-02-11 21:29:32 +00:00
// Font represents the font properties on a PDF page.
2024-05-29 17:04:37 +00:00
type Font struct{PdfFont *_af .PdfFont ;
2024-02-11 21:29:32 +00:00
// FontName represents Font Name from font properties.
FontName string ;
// FontType represents Font Subtype entry in the font dictionary inside page resources.
// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont.
FontType string ;
// ToUnicode is true if font provides a `ToUnicode` mapping.
ToUnicode bool ;
// IsCID is true if underlying font is a composite font.
// Composite font is represented by a font dictionary whose Subtype is `Type0`
IsCID bool ;
// IsSimple is true if font is simple font.
// A simple font is limited to only 8 bit (255) character codes.
IsSimple bool ;
// FontData represents the raw data of the embedded font file.
// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF).
// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor.
// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value.
FontData []byte ;
// FontFileName is a name representing the font. it has format:
// (Font Name) + (Font Type Extension), example: helvetica.ttf.
FontFileName string ;
// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor).
2024-05-29 17:04:37 +00:00
FontDescriptor *_af .PdfFontDescriptor ;};func _ebgc (_babf []*textLine ,_aabcg string )string {var _fdfce _a .Builder ;_bcac :=0.0;for _abcfe ,_ebec :=range _babf {_agbd :=_ebec .text ();_acae :=_ebec ._addd ;if _abcfe < len (_babf )-1{_bcac =_babf [_abcfe +1]._addd ;
}else {_bcac =0.0;};_fdfce .WriteString (_aabcg );_fdfce .WriteString (_agbd );if _bcac !=_acae {_fdfce .WriteString ("\u000a");}else {_fdfce .WriteString ("\u0020");};};return _fdfce .String ();};func (_deegd gridTile )contains (_bddc _af .PdfRectangle )bool {if _deegd .numBorders ()< 3{return false ;
};if _deegd ._gceeb &&_bddc .Llx < _deegd .Llx -_caf {return false ;};if _deegd ._gdcbg &&_bddc .Urx > _deegd .Urx +_caf {return false ;};if _deegd ._dbafa &&_bddc .Lly < _deegd .Lly -_caf {return false ;};if _deegd ._ffdf &&_bddc .Ury > _deegd .Ury +_caf {return false ;
};return true ;};
2024-03-27 22:34:33 +00:00
2024-05-29 17:04:37 +00:00
// List returns all the list objects detected on the page.
// It detects all the bullet point Lists from a given pdf page and builds a slice of bullet list objects.
// A given bullet list object has a tree structure.
// Each bullet point list is extracted with the text content it contains and all the sub lists found under it as children in the tree.
// The rest content of the pdf is ignored and only text in the bullet point lists are extracted.
// The list extraction is done in two ways.
// 1. If the document is tagged then the lists are extracted using the tags provided in the document.
// 2. Otherwise the bullet lists are extracted from the raw text using regex matching.
// By default the document tag is used if available.
// However this can be disabled using `DisableDocumentTags` in the `Options` object.
// Sometimes disabling document tags option might give a better bullet list extraction if the document was tagged incorrectly.
//
// options := &Options{
// DisableDocumentTags: false, // this means use document tag if available
// }
// ex, err := NewWithOptions(page, options)
// // handle error
// pageText, _, _, err := ex.ExtractPageText()
// // handle error
// lists := pageText.List()
// txt := lists.Text()
func (_gaedc PageText )List ()lists {_degg :=!_gaedc ._gbg ._gcgg ;_ecef :=_gaedc .getParagraphs ();_fbgc :=true ;if _gaedc ._cgad ==nil ||*_gaedc ._cgad ==nil {_fbgc =false ;};_dabf :=_ecef .list ();if _fbgc &&_degg {_ebae :=_afee (&_ecef );_aceg :=&structTreeRoot {};
_aceg .parseStructTreeRoot (*_gaedc ._cgad );if _aceg ._cfbfg ==nil {_ag .Log .Debug ("\u004c\u0069\u0073\u0074\u003a\u0020\u0073t\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e'\u0074\u0020\u0068\u0061\u0076e\u0020\u0061\u006e\u0079\u0020\u0063\u006f\u006e\u0074e\u006e\u0074\u002c\u0020\u0075\u0073\u0069\u006e\u0067\u0020\u0074\u0065\u0078\u0074\u0020\u006d\u0061\u0074\u0063\u0068\u0069\u006e\u0067\u0020\u006d\u0065\u0074\u0068\u006f\u0064\u0020\u0069\u006e\u0073\u0074\u0065\u0061\u0064\u002e");
return _dabf ;};_dabf =_aceg .buildList (_ebae ,_gaedc ._dfc );};return _dabf ;};func (_bffb *TextMarkArray )exists (_gag TextMark )bool {for _ ,_bge :=range _bffb .Elements (){if _ed .DeepEqual (_gag .DirectObject ,_bge .DirectObject )&&_ed .DeepEqual (_gag .BBox ,_bge .BBox )&&_bge .Text ==_gag .Text {return true ;
};};return false ;};func (_aaa *imageExtractContext )extractInlineImage (_ddf *_aa .ContentStreamInlineImage ,_bdd _aa .GraphicsState ,_eac *_af .PdfPageResources )error {_ede ,_fdf :=_ddf .ToImage (_eac );if _fdf !=nil {return _fdf ;};_gaff ,_fdf :=_ddf .GetColorSpace (_eac );
if _fdf !=nil {return _fdf ;};if _gaff ==nil {_gaff =_af .NewPdfColorspaceDeviceGray ();};_fa ,_fdf :=_gaff .ImageToRGB (*_ede );if _fdf !=nil {return _fdf ;};_ffg :=ImageMark {Image :&_fa ,Width :_bdd .CTM .ScalingFactorX (),Height :_bdd .CTM .ScalingFactorY (),Angle :_bdd .CTM .Angle ()};
_ffg .X ,_ffg .Y =_bdd .CTM .Translation ();_aaa ._cgg =append (_aaa ._cgg ,_ffg );_aaa ._ffa ++;return nil ;};func _fdag (_bcfac ,_acad _aae .Point )rulingKind {_caadg :=_ea .Abs (_bcfac .X -_acad .X );_gacef :=_ea .Abs (_bcfac .Y -_acad .Y );return _fcgef (_caadg ,_gacef ,_abca );
};type textState struct{_fdad float64 ;_febe float64 ;_dba float64 ;_cdc float64 ;_gbbgg float64 ;_aaeb RenderMode ;_dgef float64 ;_fgfgb *_af .PdfFont ;_bcbc _af .PdfRectangle ;_cfg int ;_dacb int ;};func (_edbcc *textTable )computeBbox ()_af .PdfRectangle {var _fddc _af .PdfRectangle ;
_gaef :=false ;for _decae :=0;_decae < _edbcc ._cegga ;_decae ++{for _bfage :=0;_bfage < _edbcc ._aageb ;_bfage ++{_deafc :=_edbcc .get (_bfage ,_decae );if _deafc ==nil {continue ;};if !_gaef {_fddc =_deafc .PdfRectangle ;_gaef =true ;}else {_fddc =_cfab (_fddc ,_deafc .PdfRectangle );
};};};return _fddc ;};func (_becbe *wordBag )blocked (_ffd *textWord )bool {if _ffd .Urx < _becbe .Llx {_eaab :=_bebf (_ffd .PdfRectangle );_egdf :=_ggbea (_becbe .PdfRectangle );if _becbe ._dcaa .blocks (_eaab ,_egdf ){if _bgeg {_ag .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_ffd ,_becbe );
};return true ;};}else if _becbe .Urx < _ffd .Llx {_baab :=_bebf (_becbe .PdfRectangle );_gddcf :=_ggbea (_ffd .PdfRectangle );if _becbe ._dcaa .blocks (_baab ,_gddcf ){if _bgeg {_ag .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s",_ffd ,_becbe );
};return true ;};};if _ffd .Ury < _becbe .Lly {_agab :=_bbaf (_ffd .PdfRectangle );_gbgd :=_gdgf (_becbe .PdfRectangle );if _becbe ._bfcg .blocks (_agab ,_gbgd ){if _bgeg {_ag .Log .Info ("\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073",_ffd ,_becbe );
};return true ;};}else if _becbe .Ury < _ffd .Lly {_abbb :=_bbaf (_becbe .PdfRectangle );_gbgf :=_gdgf (_ffd .PdfRectangle );if _becbe ._bfcg .blocks (_abbb ,_gbgf ){if _bgeg {_ag .Log .Info ("b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s",_ffd ,_becbe );
};return true ;};};return false ;};func (_ecbe *imageExtractContext )processOperand (_dd *_aa .ContentStreamOperation ,_feb _aa .GraphicsState ,_cfd *_af .PdfPageResources )error {if _dd .Operand =="\u0042\u0049"&&len (_dd .Params )==1{_agd ,_efca :=_dd .Params [0].(*_aa .ContentStreamInlineImage );
if !_efca {return nil ;};if _gaf ,_bde :=_gf .GetBoolVal (_agd .ImageMask );_bde {if _gaf &&!_ecbe ._dac .IncludeInlineStencilMasks {return nil ;};};return _ecbe .extractInlineImage (_agd ,_feb ,_cfd );}else if _dd .Operand =="\u0044\u006f"&&len (_dd .Params )==1{_def ,_fcd :=_gf .GetName (_dd .Params [0]);
if !_fcd {_ag .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");return _ca ;};_ ,_ccbe :=_cfd .GetXObjectByName (*_def );switch _ccbe {case _af .XObjectTypeImage :return _ecbe .extractXObjectImage (_def ,_feb ,_cfd );case _af .XObjectTypeForm :return _ecbe .extractFormImages (_def ,_feb ,_cfd );
};}else if _ecbe ._eed &&(_dd .Operand =="\u0073\u0063\u006e"||_dd .Operand =="\u0053\u0043\u004e")&&len (_dd .Params )==1{_dea ,_ffc :=_gf .GetName (_dd .Params [0]);if !_ffc {_ag .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065");
return _ca ;};_gfg ,_ffc :=_cfd .GetPatternByName (*_dea );if !_ffc {_ag .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0050\u0061\u0074\u0074\u0065\u0072n\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075\u006e\u0064");return nil ;};if _gfg .IsTiling (){_dc :=_gfg .GetAsTilingPattern ();
_abc ,_fdb :=_dc .GetContentStream ();if _fdb !=nil {return _fdb ;};_fdb =_ecbe .extractContentStreamImages (string (_abc ),_dc .Resources );if _fdb !=nil {return _fdb ;};};}else if (_dd .Operand =="\u0063\u0073"||_dd .Operand =="\u0043\u0053")&&len (_dd .Params )>=1{_ecbe ._eed =_dd .Params [0].String ()=="\u0050a\u0074\u0074\u0065\u0072\u006e";
};return nil ;};const (_ebd ="\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";
_dfe ="\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064";
_cbe ="\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065";);
func (_fggb compositeCell )split (_fcge ,_efea []float64 )*textTable {_fabg :=len (_fcge )+1;_ecbac :=len (_efea )+1;if _dedc {_ag .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066",_ecbac ,_fabg ,_fggb ,_fcge ,_efea );
_efc .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a",len (_fggb .paraList ));for _dacg ,_dcdabc :=range _fggb .paraList {_efc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_dacg ,_dcdabc .String ());
};_efc .Printf ("\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",len (_fggb .lines ()));for _bafe ,_aeae :=range _fggb .lines (){_efc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bafe ,_aeae );};};_fcge =_cdee (_fcge ,_fggb .Ury ,_fggb .Lly );
_efea =_cdee (_efea ,_fggb .Llx ,_fggb .Urx );_cgag :=make (map[uint64 ]*textPara ,_ecbac *_fabg );_ebfaa :=textTable {_aageb :_ecbac ,_cegga :_fabg ,_dgcf :_cgag };_fcgfc :=_fggb .paraList ;_e .Slice (_fcgfc ,func (_dfce ,_cdcbf int )bool {_ggbc ,_ddab :=_fcgfc [_dfce ],_fcgfc [_cdcbf ];
_gbfff ,_aaaa :=_ggbc .Lly ,_ddab .Lly ;if _gbfff !=_aaaa {return _gbfff < _aaaa ;};return _ggbc .Llx < _ddab .Llx ;});_gbfe :=make (map[uint64 ]_af .PdfRectangle ,_ecbac *_fabg );for _bfee ,_eagb :=range _fcge [1:]{_gcbbe :=_fcge [_bfee ];for _acga ,_dbbbd :=range _efea [1:]{_agbdd :=_efea [_acga ];
_gbfe [_cdgd (_acga ,_bfee )]=_af .PdfRectangle {Llx :_agbdd ,Urx :_dbbbd ,Lly :_eagb ,Ury :_gcbbe };};};if _dedc {_ag .Log .Info ("\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073");
_efc .Printf ("\u0020\u0020\u0020\u0020");for _fgaga :=0;_fgaga < _ecbac ;_fgaga ++{_efc .Printf ("\u0025\u0033\u0030\u0064\u002c\u0020",_fgaga );};_efc .Println ();for _eddd :=0;_eddd < _fabg ;_eddd ++{_efc .Printf ("\u0020\u0020\u0025\u0032\u0064\u003a",_eddd );
for _ecgd :=0;_ecgd < _ecbac ;_ecgd ++{_efc .Printf ("\u00256\u002e\u0032\u0066\u002c\u0020",_gbfe [_cdgd (_ecgd ,_eddd )]);};_efc .Println ();};};_cceac :=func (_fdddb *textLine )(int ,int ){for _aegb :=0;_aegb < _fabg ;_aegb ++{for _bcga :=0;_bcga < _ecbac ;
_bcga ++{if _aeca (_gbfe [_cdgd (_bcga ,_aegb )],_fdddb .PdfRectangle ){return _bcga ,_aegb ;};};};return -1,-1;};_agcde :=make (map[uint64 ][]*textLine ,_ecbac *_fabg );for _ ,_bfcfc :=range _fcgfc .lines (){_fcbee ,_fafg :=_cceac (_bfcfc );if _fcbee < 0{continue ;
};_agcde [_cdgd (_fcbee ,_fafg )]=append (_agcde [_cdgd (_fcbee ,_fafg )],_bfcfc );};for _geace :=0;_geace < len (_fcge )-1;_geace ++{_beaf :=_fcge [_geace ];_fffde :=_fcge [_geace +1];for _acbg :=0;_acbg < len (_efea )-1;_acbg ++{_gecde :=_efea [_acbg ];
_eedc :=_efea [_acbg +1];_cgabf :=_af .PdfRectangle {Llx :_gecde ,Urx :_eedc ,Lly :_fffde ,Ury :_beaf };_cfag :=_agcde [_cdgd (_acbg ,_geace )];if len (_cfag )==0{continue ;};_bege :=_adbde (_cgabf ,_cfag );_ebfaa .put (_acbg ,_geace ,_bege );};};return &_ebfaa ;
};
2024-03-27 22:34:33 +00:00
2024-05-29 17:04:37 +00:00
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
type PageImages struct{Images []ImageMark ;};
2024-04-16 11:40:43 +00:00
2024-05-29 17:04:37 +00:00
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct{_aec []TextMark };func (_bbacd rulingList )snapToGroupsDirection ()rulingList {_bbacd .sortStrict ();_beea :=make (map[*ruling ]rulingList ,len (_bbacd ));_cefgb :=_bbacd [0];_bcfe :=func (_bagda *ruling ){_cefgb =_bagda ;_beea [_cefgb ]=rulingList {_bagda }};
_bcfe (_bbacd [0]);for _ ,_cacg :=range _bbacd [1:]{if _cacg ._aeef < _cefgb ._aeef -_gefg {_ag .Log .Error ("\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073",_cefgb ,_cacg );
};if _cacg ._aeef > _cefgb ._aeef +_cggd {_bcfe (_cacg );}else {_beea [_cefgb ]=append (_beea [_cefgb ],_cacg );};};_cffed :=make (map[*ruling ]float64 ,len (_beea ));_fggbg :=make (map[*ruling ]*ruling ,len (_bbacd ));for _eadbe ,_gcaae :=range _beea {_cffed [_eadbe ]=_gcaae .mergePrimary ();
for _ ,_cdbd :=range _gcaae {_fggbg [_cdbd ]=_eadbe ;};};for _ ,_eacb :=range _bbacd {_eacb ._aeef =_cffed [_fggbg [_eacb ]];};_bcddf :=make (rulingList ,0,len (_bbacd ));for _ ,_babag :=range _beea {_cgge :=_babag .splitSec ();for _fbgca ,_ggdgd :=range _cgge {_bedgd :=_ggdgd .merge ();
if len (_bcddf )> 0{_egbgee :=_bcddf [len (_bcddf )-1];if _egbgee .alignsPrimary (_bedgd )&&_egbgee .alignsSec (_bedgd ){_ag .Log .Error ("\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073",_fbgca ,_egbgee ,_bedgd );
continue ;};};_bcddf =append (_bcddf ,_bedgd );};};_bcddf .sortStrict ();return _bcddf ;};func _fcegf (_adea []pathSection )rulingList {_abbaa (_adea );if _gdeb {_ag .Log .Info ("\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073",len (_adea ));
};var _ccfcg rulingList ;for _ ,_dacba :=range _adea {for _ ,_cecc :=range _dacba ._bgbeg {if len (_cecc ._aaebg )< 2{continue ;};_dagb :=_cecc ._aaebg [0];for _ ,_fcacb :=range _cecc ._aaebg [1:]{if _bgda ,_fegd :=_gccad (_dagb ,_fcacb ,_dacba .Color );
_fegd {_ccfcg =append (_ccfcg ,_bgda );};_dagb =_fcacb ;};};};if _gdeb {_ag .Log .Info ("m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073",_ccfcg );};return _ccfcg ;};func (_cegc *PageText )getParagraphs ()paraList {var _aabfd rulingList ;
if _adef {_cda :=_fcegf (_cegc ._gggf );_aabfd =append (_aabfd ,_cda ...);};if _bgce {_fffd :=_aefg (_cegc ._afbg );_aabfd =append (_aabfd ,_fffd ...);};_aabfd ,_geed :=_aabfd .toTilings ();var _dfcg paraList ;_cdcg :=len (_cegc ._fecaa );for _ageg :=0;
_ageg < 360&&_cdcg > 0;_ageg +=90{_ggef :=make ([]*textMark ,0,len (_cegc ._fecaa )-_cdcg );for _ ,_eead :=range _cegc ._fecaa {if _eead ._ddfdb ==_ageg {_ggef =append (_ggef ,_eead );};};if len (_ggef )> 0{_cdgg :=_bdfg (_ggef ,_cegc ._cdf ,_aabfd ,_geed ,_cegc ._gbg ._dbed );
_dfcg =append (_dfcg ,_cdgg ...);_cdcg -=len (_ggef );};};return _dfcg ;};func _gbef (_cfbfc ,_gffe bounded )float64 {_eeadg :=_fdbb (_cfbfc ,_gffe );if !_cdaea (_eeadg ){return _eeadg ;};return _gdfa (_cfbfc ,_gffe );};func _dbbb (_fcdd string )string {_dggg :=[]rune (_fcdd );
return string (_dggg [:len (_dggg )-1])};func (_beef rulingList )primMinMax ()(float64 ,float64 ){_ecefe ,_ddaef :=_beef [0]._aeef ,_beef [0]._aeef ;for _ ,_ffab :=range _beef [1:]{if _ffab ._aeef < _ecefe {_ecefe =_ffab ._aeef ;}else if _ffab ._aeef > _ddaef {_ddaef =_ffab ._aeef ;
};};return _ecefe ,_ddaef ;};func _dbeee (_abef []*textMark ,_egff _af .PdfRectangle )[]*textWord {var _aeecf []*textWord ;var _fcgff *textWord ;if _aebe {_ag .Log .Info ("\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073",len (_abef ));
};_ffgg :=func (){if _fcgff !=nil {_ceeg :=_fcgff .computeText ();if !_fcegd (_ceeg ){_fcgff ._ccbcc =_ceeg ;_aeecf =append (_aeecf ,_fcgff );if _aebe {_ag .Log .Info ("\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",len (_aeecf )-1,_fcgff .String ());
for _fdbec ,_baddc :=range _fcgff ._ffcd {_efc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fdbec ,_baddc .String ());};};};_fcgff =nil ;};};for _ ,_bgeb :=range _abef {if _geaa &&_fcgff !=nil &&len (_fcgff ._ffcd )> 0{_dccceg :=_fcgff ._ffcd [len (_fcgff ._ffcd )-1];
_fecb ,_dcba :=_fcgg (_bgeb ._cgeb );_babbd ,_gadd :=_fcgg (_dccceg ._cgeb );if _dcba &&!_gadd &&_dccceg .inDiacriticArea (_bgeb ){_fcgff .addDiacritic (_fecb );continue ;};if _gadd &&!_dcba &&_bgeb .inDiacriticArea (_dccceg ){_fcgff ._ffcd =_fcgff ._ffcd [:len (_fcgff ._ffcd )-1];
_fcgff .appendMark (_bgeb ,_egff );_fcgff .addDiacritic (_babbd );continue ;};};_bbdgc :=_fcegd (_bgeb ._cgeb );if _bbdgc {_ffgg ();continue ;};if _fcgff ==nil &&!_bbdgc {_fcgff =_eaae ([]*textMark {_bgeb },_egff );continue ;};_ggcbbe :=_fcgff ._abcc ;
_ddaeb :=_ea .Abs (_eeecd (_egff ,_bgeb )-_fcgff ._accb )/_ggcbbe ;_bagbf :=_egec (_bgeb ,_fcgff )/_ggcbbe ;if _bagbf >=_fbbf ||!(-_gegc <=_bagbf &&_ddaeb <=_ceca ){_ffgg ();_fcgff =_eaae ([]*textMark {_bgeb },_egff );continue ;};_fcgff .appendMark (_bgeb ,_egff );
};_ffgg ();return _aeecf ;};func (_gged *textPara )toCellTextMarks (_dcgbc *int )[]TextMark {var _cdccf []TextMark ;for _gefgd ,_ccaf :=range _gged ._aage {_edbce :=_ccaf .toTextMarks (_dcgbc );_bcea :=_dadc &&_ccaf .endsInHyphen ()&&_gefgd !=len (_gged ._aage )-1;
if _bcea {_edbce =_cgab (_edbce ,_dcgbc );};_cdccf =append (_cdccf ,_edbce ...);if !(_bcea ||_gefgd ==len (_gged ._aage )-1){_cdccf =_dbce (_cdccf ,_dcgbc ,_gcccd (_ccaf ._addd ,_gged ._aage [_gefgd +1]._addd ));};};return _cdccf ;};
2024-03-27 22:34:33 +00:00
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
2024-05-29 17:04:37 +00:00
type ImageMark struct{Image *_af .Image ;
2024-03-27 22:34:33 +00:00
// Dimensions of the image as displayed in the PDF.
Width float64 ;Height float64 ;
// Position of the image in PDF coordinates (lower left corner).
X float64 ;Y float64 ;
// Angle in degrees, if rotated.
2024-05-29 17:04:37 +00:00
Angle float64 ;};func (_gabc paraList )sortReadingOrder (){_ag .Log .Trace ("\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_gabc ));
if len (_gabc )<=1{return ;};_gabc .computeEBBoxes ();_e .Slice (_gabc ,func (_ebad ,_fddd int )bool {return _bcbe (_gabc [_ebad ],_gabc [_fddd ])<=0});};func (_gcec *ruling )intersects (_gdda *ruling )bool {_ddfa :=(_gcec ._ecfb ==_gecdf &&_gdda ._ecfb ==_eeg )||(_gdda ._ecfb ==_gecdf &&_gcec ._ecfb ==_eeg );
_egbcf :=func (_eeeg ,_defcdb *ruling )bool {return _eeeg ._ggdb -_bcae <=_defcdb ._aeef &&_defcdb ._aeef <=_eeeg ._gbca +_bcae ;};_gdcdd :=_egbcf (_gcec ,_gdda );_gecaa :=_egbcf (_gdda ,_gcec );if _gdeb {_efc .Printf ("\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a"+"\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a"+" \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a",_ddfa ,_gdcdd ,_gecaa ,_ddfa &&_gdcdd &&_gecaa ,_gcec ,_gdda );
};return _ddfa &&_gdcdd &&_gecaa ;};func (_eff *shapesState )closePath (){if _eff ._gbee {_eff ._baca =append (_eff ._baca ,_ggda (_eff ._faa ));_eff ._gbee =false ;}else if len (_eff ._baca )==0{if _cece {_ag .Log .Debug ("\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068");
};_eff ._gbee =false ;return ;};_eff ._baca [len (_eff ._baca )-1].close ();if _cece {_ag .Log .Info ("\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073",_eff );};};func (_gagb *shapesState )moveTo (_dcf ,_defd float64 ){_gagb ._gbee =true ;
_gagb ._faa =_gagb .devicePoint (_dcf ,_defd );if _cece {_ag .Log .Info ("\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066",_dcf ,_defd ,_gagb ._faa );
};};func _fabfa (_cefd ,_gcafg _ef .Image )_ef .Image {_dfcac ,_abbe :=_gcafg .Bounds ().Size (),_cefd .Bounds ().Size ();_bcgc ,_dcad :=_dfcac .X ,_dfcac .Y ;if _abbe .X > _bcgc {_bcgc =_abbe .X ;};if _abbe .Y > _dcad {_dcad =_abbe .Y ;};_baegf :=_ef .Rect (0,0,_bcgc ,_dcad );
if _dfcac .X !=_bcgc ||_dfcac .Y !=_dcad {_bdea :=_ef .NewRGBA (_baegf );_cc .BiLinear .Scale (_bdea ,_baegf ,_cefd ,_gcafg .Bounds (),_cc .Over ,nil );_gcafg =_bdea ;};if _abbe .X !=_bcgc ||_abbe .Y !=_dcad {_ddee :=_ef .NewRGBA (_baegf );_cc .BiLinear .Scale (_ddee ,_baegf ,_cefd ,_cefd .Bounds (),_cc .Over ,nil );
_cefd =_ddee ;};_bgffe :=_ef .NewRGBA (_baegf );_cc .DrawMask (_bgffe ,_baegf ,_cefd ,_ef .Point {},_gcafg ,_ef .Point {},_cc .Over );return _bgffe ;};func (_cefg paraList )tables ()[]TextTable {var _fbcb []TextTable ;if _dedc {_ag .Log .Info ("\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a");
};for _ ,_efae :=range _cefg {_fgff :=_efae ._befe ;if _fgff !=nil &&_fgff .isExportable (){_fbcb =append (_fbcb ,_fgff .toTextTable ());};};return _fbcb ;};func (_baa *PageFonts )extractPageResourcesToFont (_bc *_af .PdfPageResources )error {_dg ,_dbg :=_gf .GetDict (_bc .Font );
if !_dbg {return _b .New (_ebd );};for _ ,_be :=range _dg .Keys (){var (_bbf =true ;_ga []byte ;_bag string ;);_cfb ,_ebde :=_bc .GetFontByName (_be );if !_ebde {return _b .New (_dfe );};_ceg ,_cbd :=_af .NewPdfFontFromPdfObject (_cfb );if _cbd !=nil {return _cbd ;
};_abd :=_ceg .FontDescriptor ();_ecb :=_ceg .FontDescriptor ().FontName .String ();_dbd :=_ceg .Subtype ();if _dgg (_baa .Fonts ,_ecb ){continue ;};if len (_ceg .ToUnicode ())==0{_bbf =false ;};if _abd .FontFile !=nil {if _bbg ,_fb :=_gf .GetStream (_abd .FontFile );
_fb {_ga ,_cbd =_gf .DecodeStream (_bbg );if _cbd !=nil {return _cbd ;};_bag =_ecb +"\u002e\u0070\u0066\u0062";};}else if _abd .FontFile2 !=nil {if _de ,_gg :=_gf .GetStream (_abd .FontFile2 );_gg {_ga ,_cbd =_gf .DecodeStream (_de );if _cbd !=nil {return _cbd ;
};_bag =_ecb +"\u002e\u0074\u0074\u0066";};}else if _abd .FontFile3 !=nil {if _da ,_ccg :=_gf .GetStream (_abd .FontFile3 );_ccg {_ga ,_cbd =_gf .DecodeStream (_da );if _cbd !=nil {return _cbd ;};_bag =_ecb +"\u002e\u0063\u0066\u0066";};};if len (_bag )< 1{_ag .Log .Debug (_cbe );
};_ebab :=Font {FontName :_ecb ,PdfFont :_ceg ,IsCID :_ceg .IsCID (),IsSimple :_ceg .IsSimple (),ToUnicode :_bbf ,FontType :_dbd ,FontData :_ga ,FontFileName :_bag ,FontDescriptor :_abd };_baa .Fonts =append (_baa .Fonts ,_ebab );};return nil ;};func (_ged *wordBag )allWords ()[]*textWord {var _bdef []*textWord ;
for _ ,_fceg :=range _ged ._cdbc {_bdef =append (_bdef ,_fceg ...);};return _bdef ;};type subpath struct{_aaebg []_aae .Point ;_cedc bool ;};func _dfga (_bbefg []*textLine )map[float64 ][]*textLine {_e .Slice (_bbefg ,func (_baac ,_gbda int )bool {return _bbefg [_baac ]._addd < _bbefg [_gbda ]._addd });
_bffec :=map[float64 ][]*textLine {};for _ ,_bgeac :=range _bbefg {_cgfd :=_bedc (_bgeac );_cgfd =_ea .Round (_cgfd );_bffec [_cgfd ]=append (_bffec [_cgfd ],_bgeac );};return _bffec ;};func (_fee *textObject )getFontDirect (_ebea string )(*_af .PdfFont ,error ){_cbeg ,_gbfa :=_fee .getFontDict (_ebea );
if _gbfa !=nil {return nil ,_gbfa ;};_ffcc ,_gbfa :=_af .NewPdfFontFromPdfObject (_cbeg );if _gbfa !=nil {_ag .Log .Debug ("\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ebea ,_gbfa );
};return _ffcc ,_gbfa ;};func (_ccea compositeCell )parasBBox ()(paraList ,_af .PdfRectangle ){return _ccea .paraList ,_ccea .PdfRectangle ;};func (_egf *wordBag )scanBand (_cfbe string ,_eeec *wordBag ,_eacf func (_ebbee *wordBag ,_bcba *textWord )bool ,_deb ,_gccge ,_beca float64 ,_fdbe ,_cdd bool )int {_fcaf :=_eeec ._cdac ;
var _bbcg map[int ]map[*textWord ]struct{};if !_fdbe {_bbcg =_egf .makeRemovals ();};_cdfa :=_egeb *_fcaf ;_egab :=0;for _ ,_cfcg :=range _egf .depthBand (_deb -_cdfa ,_gccge +_cdfa ){if len (_egf ._cdbc [_cfcg ])==0{continue ;};for _ ,_faebb :=range _egf ._cdbc [_cfcg ]{if !(_deb -_cdfa <=_faebb ._accb &&_faebb ._accb <=_gccge +_cdfa ){continue ;
};if !_eacf (_eeec ,_faebb ){continue ;};_cdba :=2.0*_ea .Abs (_faebb ._abcc -_eeec ._cdac )/(_faebb ._abcc +_eeec ._cdac );_deag :=_ea .Max (_faebb ._abcc /_eeec ._cdac ,_eeec ._cdac /_faebb ._abcc );_cgdg :=_ea .Min (_cdba ,_deag );if _beca > 0&&_cgdg > _beca {continue ;
};if _eeec .blocked (_faebb ){continue ;};if !_fdbe {_eeec .pullWord (_faebb ,_cfcg ,_bbcg );};_egab ++;if !_cdd {if _faebb ._accb < _deb {_deb =_faebb ._accb ;};if _faebb ._accb > _gccge {_gccge =_faebb ._accb ;};};if _fdbe {break ;};};};if !_fdbe {_egf .applyRemovals (_bbcg );
};return _egab ;};func _gfb (_cfff ,_bba _af .PdfRectangle )bool {return _bba .Llx <=_cfff .Urx &&_cfff .Llx <=_bba .Urx };func _eaaa (_fdba float64 )bool {return _ea .Abs (_fdba )< _cggd };func (_adeaf *textTable )putComposite (_dfgafg ,_egea int ,_eedf paraList ,_faaed _af .PdfRectangle ){if len (_eedf )==0{_ag .Log .Error ("\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073");
return ;};_bbdf :=compositeCell {PdfRectangle :_faaed ,paraList :_eedf };if _dedc {_efc .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a",_dfgafg ,_egea ,_bbdf .String ());
};_bbdf .updateBBox ();_adeaf ._becfc [_cdgd (_dfgafg ,_egea )]=_bbdf ;};type rulingKind int ;func (_bfce *textObject )setWordSpacing (_cbf float64 ){if _bfce ==nil {return ;};_bfce ._ecff ._febe =_cbf ;};
2024-03-27 22:34:33 +00:00
2024-05-29 17:04:37 +00:00
// Append appends `mark` to the mark array.
func (_dca *TextMarkArray )Append (mark TextMark ){_dca ._aec =append (_dca ._aec ,mark )};func (_gfbg paraList )reorder (_acbd []int ){_gcca :=make (paraList ,len (_gfbg ));for _cfgef ,_fcfcf :=range _acbd {_gcca [_cfgef ]=_gfbg [_fcfcf ];};copy (_gfbg ,_gcca );
};func (_baga rulingList )tidied (_feccb string )rulingList {_gffgg :=_baga .removeDuplicates ();_gffgg .log ("\u0075n\u0069\u0071\u0075\u0065\u0073");_gdbbb :=_gffgg .snapToGroups ();if _gdbbb ==nil {return nil ;};_gdbbb .sort ();if _gdeb {_ag .Log .Info ("\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064",_feccb ,len (_baga ),len (_gffgg ),len (_gdbbb ));
};_gdbbb .log ("\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d");return _gdbbb ;};func (_eeac *stateStack )empty ()bool {return len (*_eeac )==0};
2024-03-27 22:34:33 +00:00
2024-05-29 17:04:37 +00:00
// Text returns the text content of the `bulletLists`.
func (_gegg *lists )Text ()string {_fdcf :=&_a .Builder {};for _ ,_ffaa :=range *_gegg {_eeef :=_ffaa .Text ();_fdcf .WriteString (_eeef );};return _fdcf .String ();};
// String returns a human readable description of `path`.
func (_dfg *subpath )String ()string {_caad :=_dfg ._aaebg ;_bgfa :=len (_caad );if _bgfa <=5{return _efc .Sprintf ("\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f",_bgfa ,_caad );};return _efc .Sprintf ("\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f",_bgfa ,_caad [0],_caad [1],_caad [_bgfa -1]);
};func (_efbdd *textTable )growTable (){_bcfea :=func (_fadbbb paraList ){_efbdd ._cegga ++;for _beefb :=0;_beefb < _efbdd ._aageb ;_beefb ++{_ddec :=_fadbbb [_beefb ];_efbdd .put (_beefb ,_efbdd ._cegga -1,_ddec );};};_bacbe :=func (_ebaee paraList ){_efbdd ._aageb ++;
for _bgaf :=0;_bgaf < _efbdd ._cegga ;_bgaf ++{_fadgb :=_ebaee [_bgaf ];_efbdd .put (_efbdd ._aageb -1,_bgaf ,_fadgb );};};if _gddf {_efbdd .log ("\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce");};for _bead :=0;;_bead ++{_fcgd :=false ;_gedcg :=_efbdd .getDown ();
_fadab :=_efbdd .getRight ();if _gddf {_efc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bead ,_efbdd );_efc .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a",_gedcg );_efc .Printf ("\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a",_fadab );
};if _gedcg !=nil &&_fadab !=nil {_eede :=_gedcg [len (_gedcg )-1];if !_eede .taken ()&&_eede ==_fadab [len (_fadab )-1]{_bcfea (_gedcg );if _fadab =_efbdd .getRight ();_fadab !=nil {_bacbe (_fadab );_efbdd .put (_efbdd ._aageb -1,_efbdd ._cegga -1,_eede );
};_fcgd =true ;};};if !_fcgd &&_gedcg !=nil {_bcfea (_gedcg );_fcgd =true ;};if !_fcgd &&_fadab !=nil {_bacbe (_fadab );_fcgd =true ;};if !_fcgd {break ;};};};type textTable struct{_af .PdfRectangle ;_aageb ,_cegga int ;_caagg bool ;_dgcf map[uint64 ]*textPara ;
_becfc map[uint64 ]compositeCell ;};func _eccab (_bfcfa *_af .Image ,_aggbc _fe .Color )_ef .Image {_feefg ,_bcab :=int (_bfcfa .Width ),int (_bfcfa .Height );_cacd :=_ef .NewRGBA (_ef .Rect (0,0,_feefg ,_bcab ));for _ddeca :=0;_ddeca < _bcab ;_ddeca ++{for _dedd :=0;
_dedd < _feefg ;_dedd ++{_dddf ,_fegg :=_bfcfa .ColorAt (_dedd ,_ddeca );if _fegg !=nil {_ag .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063o\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0072\u0065\u0074\u0072\u0069\u0065v\u0065 \u0069\u006d\u0061\u0067\u0065\u0020m\u0061\u0073\u006b\u0020\u0076\u0061\u006cu\u0065\u0020\u0061\u0074\u0020\u0028\u0025\u0064\u002c\u0020\u0025\u0064\u0029\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006da\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063t\u002e",_dedd ,_ddeca );
continue ;};_ecffg ,_dfcb ,_fcefb ,_ :=_dddf .RGBA ();var _aagbg _fe .Color ;if _ecffg +_dfcb +_fcefb ==0{_aagbg =_fe .Transparent ;}else {_aagbg =_aggbc ;};_cacd .Set (_dedd ,_ddeca ,_aagbg );};};return _cacd ;};func (_dcbb *textPara )toTextMarks (_aafee *int )[]TextMark {if _dcbb ._befe ==nil {return _dcbb .toCellTextMarks (_aafee );
};var _fgcd []TextMark ;for _cbffg :=0;_cbffg < _dcbb ._befe ._cegga ;_cbffg ++{for _fcdf :=0;_fcdf < _dcbb ._befe ._aageb ;_fcdf ++{_aggg :=_dcbb ._befe .get (_fcdf ,_cbffg );if _aggg ==nil {_fgcd =_dbce (_fgcd ,_aafee ,"\u0009");}else {_gfca :=_aggg .toCellTextMarks (_aafee );
_fgcd =append (_fgcd ,_gfca ...);};_fgcd =_dbce (_fgcd ,_aafee ,"\u0020");};if _cbffg < _dcbb ._befe ._cegga -1{_fgcd =_dbce (_fgcd ,_aafee ,"\u000a");};};_fgbfd :=_dcbb ._befe ;if _fgbfd .isExportable (){_ccbbe :=_fgbfd .toTextTable ();_fgcd =_egdfd (_fgcd ,&_ccbbe );
};return _fgcd ;};func (_ffgf paraList )sortTopoOrder (){_defcd :=_ffgf .topoOrder ();_ffgf .reorder (_defcd )};func (_agcd *subpath )clear (){*_agcd =subpath {}};func _aeca (_bfcbf ,_ccbae _af .PdfRectangle )bool {return _bfcbf .Llx <=_ccbae .Llx &&_ccbae .Urx <=_bfcbf .Urx &&_bfcbf .Lly <=_ccbae .Lly &&_ccbae .Ury <=_bfcbf .Ury ;
2024-04-16 11:40:43 +00:00
};
2024-02-11 21:29:32 +00:00
2024-04-30 12:24:05 +00:00
// Options extractor options.
type Options struct{
2024-03-27 22:34:33 +00:00
2024-04-30 12:24:05 +00:00
// DisableDocumentTags specifies whether to use the document tags during list extraction.
DisableDocumentTags bool ;
2024-04-16 11:40:43 +00:00
2024-04-30 12:24:05 +00:00
// ApplyCropBox will extract page text based on page cropbox if set to `true`.
ApplyCropBox bool ;
2024-04-16 11:40:43 +00:00
2024-04-30 12:24:05 +00:00
// UseSimplerExtractionProcess will skip topological text ordering and table processing.
//
// NOTE: While normally the extra processing is beneficial, it can also lead to problems when it does not work.
// Thus it is a flag to allow the user to control this process.
//
// Skipping some extraction processes would also lead to the reduced processing time.
UseSimplerExtractionProcess bool ;
2024-03-27 22:34:33 +00:00
2024-04-30 12:24:05 +00:00
// IncludeAnnotations specifies whether to include annotations in the extraction process, default value is `false`.
2024-05-29 17:04:37 +00:00
IncludeAnnotations bool ;};const (_ceag rulingKind =iota ;_eeg ;_gecdf ;);type paraList []*textPara ;func (_bbff paraList )writeText (_gdde _fc .Writer ){for _ddegf ,_fggf :=range _bbff {if _fggf ._bdgc {continue ;};_fggf .writeText (_gdde );if _ddegf !=len (_bbff )-1{if _adab (_fggf ,_bbff [_ddegf +1]){_gdde .Write ([]byte ("\u0020"));
}else {_gdde .Write ([]byte ("\u000a"));_gdde .Write ([]byte ("\u000a"));};};};_gdde .Write ([]byte ("\u000a"));_gdde .Write ([]byte ("\u000a"));};func _feca (_fcdc *_aa .ContentStreamOperation )(float64 ,error ){if len (_fcdc .Params )!=1{_dbdd :=_b .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");
_ag .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_fcdc .Operand ,1,len (_fcdc .Params ),_fcdc .Params );
return 0.0,_dbdd ;};return _gf .GetNumberAsFloat (_fcdc .Params [0]);};func (_bdbfd paraList )xNeighbours (_gbcg float64 )map[*textPara ][]int {_afabb :=make ([]event ,2*len (_bdbfd ));if _gbcg ==0{for _ddga ,_agbfb :=range _bdbfd {_afabb [2*_ddga ]=event {_agbfb .Llx ,true ,_ddga };
_afabb [2*_ddga +1]=event {_agbfb .Urx ,false ,_ddga };};}else {for _cgdb ,_edcef :=range _bdbfd {_afabb [2*_cgdb ]=event {_edcef .Llx -_gbcg *_edcef .fontsize (),true ,_cgdb };_afabb [2*_cgdb +1]=event {_edcef .Urx +_gbcg *_edcef .fontsize (),false ,_cgdb };
};};return _bdbfd .eventNeighbours (_afabb );};func (_fcgf paraList )log (_dccb string ){if !_efbd {return ;};_ag .Log .Info ("%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d",_dccb ,len (_fcgf ));
for _ggedc ,_efad :=range _fcgf {if _efad ==nil {continue ;};_gaffd :=_efad .text ();_feaf :="\u0020\u0020";if _efad ._befe !=nil {_feaf =_efc .Sprintf ("\u005b%\u0064\u0078\u0025\u0064\u005d",_efad ._befe ._aageb ,_efad ._befe ._cegga );};_efc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a",_ggedc ,_efad .PdfRectangle ,_feaf ,_efcca (_gaffd ,50));
};};func (_aee *textObject )getFontDict (_ededf string )(_ecca _gf .PdfObject ,_aeag error ){_cgf :=_aee ._dae ;if _cgf ==nil {_ag .Log .Debug ("g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071",_ededf );
return nil ,nil ;};_ecca ,_gbac :=_cgf .GetFontByName (_gf .PdfObjectName (_ededf ));if !_gbac {_ag .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071",_ededf );
return nil ,_b .New ("f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073");};return _ecca ,nil ;};func _adbde (_gbgg _af .PdfRectangle ,_cgcc []*textLine )*textPara {return &textPara {PdfRectangle :_gbgg ,_aage :_cgcc };
};const (_dadc =true ;_deeb =true ;_geaa =true ;_adfb =false ;_deff =false ;_fcda =6;_gefe =3.0;_defc =200;_eeab =true ;_dcdb =true ;_adef =true ;_bgce =true ;_bedg =false ;);func _cgab (_gggff []TextMark ,_bega *int )[]TextMark {_ceced :=_gggff [len (_gggff )-1];
_acee :=[]rune (_ceced .Text );if len (_acee )==1{_gggff =_gggff [:len (_gggff )-1];_fbcg :=_gggff [len (_gggff )-1];*_bega =_fbcg .Offset +len (_fbcg .Text );}else {_fccc :=_dbbb (_ceced .Text );*_bega +=len (_fccc )-len (_ceced .Text );_ceced .Text =_fccc ;
};return _gggff ;};func (_gbacd *compositeCell )updateBBox (){for _ ,_fgaff :=range _gbacd .paraList {_gbacd .PdfRectangle =_cfab (_gbacd .PdfRectangle ,_fgaff .PdfRectangle );};};var _defba string ="\u005e\u005b\u0061\u002d\u007a\u0041\u002dZ\u005d\u0028\u005c)\u007c\u005c\u002e)\u007c\u005e[\u005c\u0064\u005d\u002b\u0028\u005c)\u007c\\.\u0029\u007c\u005e\u005c\u0028\u005b\u0061\u002d\u007a\u0041\u002d\u005a\u005d\u005c\u0029\u007c\u005e\u005c\u0028\u005b\u005c\u0064\u005d\u002b\u005c\u0029";
func (_adcf *wordBag )removeDuplicates (){if _gbgbd {_ag .Log .Info ("r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071",_adcf .text ());};for _ ,_ffgca :=range _adcf .depthIndexes (){if len (_adcf ._cdbc [_ffgca ])==0{continue ;
};_gabeb :=_adcf ._cdbc [_ffgca ][0];_cgde :=_fcfce *_gabeb ._abcc ;_cegb :=_gabeb ._accb ;for _ ,_ccgg :=range _adcf .depthBand (_cegb ,_cegb +_cgde ){_fdae :=map[*textWord ]struct{}{};_cbef :=_adcf ._cdbc [_ccgg ];for _ ,_bgbfd :=range _cbef {if _ ,_dcdbca :=_fdae [_bgbfd ];
_dcdbca {continue ;};for _ ,_dfca :=range _cbef {if _ ,_ggab :=_fdae [_dfca ];_ggab {continue ;};if _dfca !=_bgbfd &&_dfca ._ccbcc ==_bgbfd ._ccbcc &&_ea .Abs (_dfca .Llx -_bgbfd .Llx )< _cgde &&_ea .Abs (_dfca .Urx -_bgbfd .Urx )< _cgde &&_ea .Abs (_dfca .Lly -_bgbfd .Lly )< _cgde &&_ea .Abs (_dfca .Ury -_bgbfd .Ury )< _cgde {_fdae [_dfca ]=struct{}{};
};};};if len (_fdae )> 0{_bgbdc :=0;for _ ,_dgba :=range _cbef {if _ ,_ccbcg :=_fdae [_dgba ];!_ccbcg {_cbef [_bgbdc ]=_dgba ;_bgbdc ++;};};_adcf ._cdbc [_ccgg ]=_cbef [:len (_cbef )-len (_fdae )];if len (_adcf ._cdbc [_ccgg ])==0{delete (_adcf ._cdbc ,_ccgg );
};};};};};var _baee =_g .MustCompile ("\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024");func (_cfgad paraList )llyRange (_faae []int ,_egdfa ,_adfe float64 )[]int {_dcdbc :=len (_cfgad );
if _adfe < _cfgad [_faae [0]].Lly ||_egdfa > _cfgad [_faae [_dcdbc -1]].Lly {return nil ;};_eeefa :=_e .Search (_dcdbc ,func (_bdefc int )bool {return _cfgad [_faae [_bdefc ]].Lly >=_egdfa });_cgae :=_e .Search (_dcdbc ,func (_fggg int )bool {return _cfgad [_faae [_fggg ]].Lly > _adfe });
return _faae [_eeefa :_cgae ];};func _gdgf (_beae _af .PdfRectangle )*ruling {return &ruling {_ecfb :_eeg ,_aeef :_beae .Lly ,_ggdb :_beae .Llx ,_gbca :_beae .Urx };};func (_cdggg *subpath )isQuadrilateral ()bool {if len (_cdggg ._aaebg )< 4||len (_cdggg ._aaebg )> 5{return false ;
};if len (_cdggg ._aaebg )==5{_fcfg :=_cdggg ._aaebg [0];_gcef :=_cdggg ._aaebg [4];if _fcfg .X !=_gcef .X ||_fcfg .Y !=_gcef .Y {return false ;};};return true ;};func _ggbea (_bfgc _af .PdfRectangle )*ruling {return &ruling {_ecfb :_gecdf ,_aeef :_bfgc .Llx ,_ggdb :_bfgc .Lly ,_gbca :_bfgc .Ury };
};func (_dce paraList )readBefore (_adgfa []int ,_egedg ,_ecab int )bool {_fcafa ,_ggge :=_dce [_egedg ],_dce [_ecab ];if _eeed (_fcafa ,_ggge )&&_fcafa .Lly > _ggge .Lly {return true ;};if !(_fcafa ._gbgbb .Urx < _ggge ._gbgbb .Llx ){return false ;};_cbbe ,_dgcc :=_fcafa .Lly ,_ggge .Lly ;
if _cbbe > _dgcc {_dgcc ,_cbbe =_cbbe ,_dgcc ;};_bcbf :=_ea .Max (_fcafa ._gbgbb .Llx ,_ggge ._gbgbb .Llx );_gedc :=_ea .Min (_fcafa ._gbgbb .Urx ,_ggge ._gbgbb .Urx );_cecfc :=_dce .llyRange (_adgfa ,_cbbe ,_dgcc );for _ ,_gfdgf :=range _cecfc {if _gfdgf ==_egedg ||_gfdgf ==_ecab {continue ;
};_eefa :=_dce [_gfdgf ];if _eefa ._gbgbb .Llx <=_gedc &&_bcbf <=_eefa ._gbgbb .Urx {return false ;};};return true ;};func (_bfga *textPara )depth ()float64 {if _bfga ._bdgc {return -1.0;};if len (_bfga ._aage )> 0{return _bfga ._aage [0]._addd ;};return _bfga ._befe .depth ();
};func _bcfb (_cggc *wordBag ,_aeagc float64 ,_bbefb ,_cfgega rulingList )[]*wordBag {var _effc []*wordBag ;for _ ,_eaef :=range _cggc .depthIndexes (){_ffda :=false ;for !_cggc .empty (_eaef ){_cfda :=_cggc .firstReadingIndex (_eaef );_cfbb :=_cggc .firstWord (_cfda );
_dead :=_cgd (_cfbb ,_aeagc ,_bbefb ,_cfgega );_cggc .removeWord (_cfbb ,_cfda );if _cfgf {_ag .Log .Info ("\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073",_cfbb .String ());};for _geacf :=true ;_geacf ;
_geacf =_ffda {_ffda =false ;_cdff :=_daf *_dead ._cdac ;_agaba :=_cecf *_dead ._cdac ;_degb :=_cgbc *_dead ._cdac ;if _cfgf {_ag .Log .Info ("\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066",_dead .minDepth (),_dead .maxDepth (),_degb ,_agaba );
};if _cggc .scanBand ("\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c",_dead ,_cbdb (_gacff ,0),_dead .minDepth ()-_degb ,_dead .maxDepth ()+_degb ,_afdg ,false ,false )> 0{_ffda =true ;};if _cggc .scanBand ("\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_dead ,_cbdb (_gacff ,_agaba ),_dead .minDepth (),_dead .maxDepth (),_bea ,false ,false )> 0{_ffda =true ;
};if _ffda {continue ;};_adgc :=_cggc .scanBand ("",_dead ,_cbdb (_ebgf ,_cdff ),_dead .minDepth (),_dead .maxDepth (),_egdfc ,true ,false );if _adgc > 0{_dgff :=(_dead .maxDepth ()-_dead .minDepth ())/_dead ._cdac ;if (_adgc > 1&&float64 (_adgc )> 0.3*_dgff )||_adgc <=10{if _cggc .scanBand ("\u006f\u0074\u0068e\u0072",_dead ,_cbdb (_ebgf ,_cdff ),_dead .minDepth (),_dead .maxDepth (),_egdfc ,false ,true )> 0{_ffda =true ;
};};};};_effc =append (_effc ,_dead );};};return _effc ;};func (_ccded rulingList )toTilings ()(rulingList ,[]gridTiling ){_ccded .log ("\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s");if len (_ccded )==0{return nil ,nil ;};_ccded =_ccded .tidied ("\u0061\u006c\u006c");
_ccded .log ("\u0074\u0069\u0064\u0069\u0065\u0064");_fagfe :=_ccded .toGrids ();_cgagf :=make ([]gridTiling ,len (_fagfe ));for _aaba ,_dded :=range _fagfe {_cgagf [_aaba ]=_dded .asTiling ();};return _ccded ,_cgagf ;};func _ggfca (_eeaa *textLine ,_cddf []*textLine ,_dccga []float64 ,_gfba ,_cfbg float64 )[]*textLine {_bbgc :=[]*textLine {};
for _ ,_ffadcg :=range _cddf {if _ffadcg ._addd >=_gfba {if _cfbg !=-1&&_ffadcg ._addd < _cfbg {if _ffadcg .text ()!=_eeaa .text (){if _ea .Round (_ffadcg .Llx )< _ea .Round (_eeaa .Llx ){break ;};_bbgc =append (_bbgc ,_ffadcg );};}else if _cfbg ==-1{if _ffadcg ._addd ==_eeaa ._addd {if _ffadcg .text ()!=_eeaa .text (){_bbgc =append (_bbgc ,_ffadcg );
};continue ;};_fgbg :=_egbgea (_eeaa ,_cddf ,_dccga );if _fgbg !=-1&&_ffadcg ._addd <=_fgbg {_bbgc =append (_bbgc ,_ffadcg );};};};};return _bbgc ;};func (_ddecf *textWord )computeText ()string {_dfbc :=make ([]string ,len (_ddecf ._ffcd ));for _adcb ,_fgba :=range _ddecf ._ffcd {_dfbc [_adcb ]=_fgba ._cgeb ;
};return _a .Join (_dfbc ,"");};func (_fcegc rulingList )bbox ()_af .PdfRectangle {var _egcc _af .PdfRectangle ;if len (_fcegc )==0{_ag .Log .Error ("r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073");
return _af .PdfRectangle {};};if _fcegc [0]._ecfb ==_eeg {_egcc .Llx ,_egcc .Urx =_fcegc .secMinMax ();_egcc .Lly ,_egcc .Ury =_fcegc .primMinMax ();}else {_egcc .Llx ,_egcc .Urx =_fcegc .primMinMax ();_egcc .Lly ,_egcc .Ury =_fcegc .secMinMax ();};return _egcc ;
};func (_cgca *textObject )showTextAdjusted (_gfa *_gf .PdfObjectArray ,_bggd int )error {_ebdd :=false ;for _ ,_egg :=range _gfa .Elements (){switch _egg .(type ){case *_gf .PdfObjectFloat ,*_gf .PdfObjectInteger :_aff ,_gfaf :=_gf .GetNumberAsFloat (_egg );
if _gfaf !=nil {_ag .Log .Debug ("\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_egg ,_gfa );
return _gfaf ;};_dag ,_fce :=-_aff *0.001*_cgca ._ecff ._gbbgg ,0.0;if _ebdd {_fce ,_dag =_dag ,_fce ;};_cbg :=_add (_aae .Point {X :_dag ,Y :_fce });_cgca ._dbc .Concat (_cbg );case *_gf .PdfObjectString :_gad :=_gf .TraceToDirectObject (_egg );_gbd ,_edg :=_gf .GetStringBytes (_gad );
if !_edg {_ag .Log .Trace ("s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_egg ,_gfa );
return _gf .ErrTypeError ;};_cgca .renderText (_gad ,_gbd ,_bggd );default:_ag .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076",_egg ,_gfa );
return _gf .ErrTypeError ;};};return nil ;};func (_bfbee *ruling )gridIntersecting (_beag *ruling )bool {return _edddc (_bfbee ._ggdb ,_beag ._ggdb )&&_edddc (_bfbee ._gbca ,_beag ._gbca );};
// String returns a human readable description of `vecs`.
func (_dadcf rulingList )String ()string {if len (_dadcf )==0{return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}";};_dffae ,_gdbe :=_dadcf .vertsHorzs ();_agfb :=len (_dffae );_cbdag :=len (_gdbe );if _agfb ==0||_cbdag ==0{return _efc .Sprintf ("\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}",_agfb ,_cbdag );
};_dccbd :=_af .PdfRectangle {Llx :_dffae [0]._aeef ,Urx :_dffae [_agfb -1]._aeef ,Lly :_gdbe [_cbdag -1]._aeef ,Ury :_gdbe [0]._aeef };return _efc .Sprintf ("\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d",_agfb ,_cbdag ,_dccbd );
};
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// NewWithOptions an Extractor instance for extracting content from the input PDF page with options.
2024-05-29 17:04:37 +00:00
func NewWithOptions (page *_af .PdfPage ,options *Options )(*Extractor ,error ){const _ce ="\u0065x\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077W\u0069\u0074\u0068\u004f\u0070\u0074\u0069\u006f\u006e\u0073";_afe ,_ff :=page .GetAllContentStreams ();
if _ff !=nil {return nil ,_ff ;};_df ,_gda :=page .GetStructTreeRoot ();if !_gda {_ag .Log .Info ("T\u0068\u0065\u0020\u0070\u0064\u0066\u0020\u0064\u006f\u0063\u0075\u006d\u0065\u006e\u0074\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020\u0074\u0061\u0067g\u0065d\u002e\u0020\u0053\u0074r\u0075\u0063t\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e\u0027\u0074\u0020\u0065\u0078\u0069\u0073\u0074\u002e");
};_bd :=page .GetContainingPdfObject ();_ab ,_ff :=page .GetMediaBox ();if _ff !=nil {return nil ,_efc .Errorf ("\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076",_ff );
};_cg :=&Extractor {_fgb :_afe ,_cf :page .Resources ,_fd :*_ab ,_fed :page .CropBox ,_fgf :map[string ]fontEntry {},_ec :map[string ]textResult {},_agf :map[string ]textResult {},_efe :options ,_ba :_df ,_ac :_bd };if _cg ._fd .Llx > _cg ._fd .Urx {_ag .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_cg ._fd );
_cg ._fd .Llx ,_cg ._fd .Urx =_cg ._fd .Urx ,_cg ._fd .Llx ;};if _cg ._fd .Lly > _cg ._fd .Ury {_ag .Log .Info ("\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e",_cg ._fd );
_cg ._fd .Lly ,_cg ._fd .Ury =_cg ._fd .Ury ,_cg ._fd .Lly ;};if _cg ._efe !=nil {if _cg ._efe .IncludeAnnotations {_cg ._cae ,_ff =page .GetAnnotations ();if _ff !=nil {_ag .Log .Debug ("\u0045\u0072r\u006f\u0072\u0020\u0067\u0065\u0074\u0074\u0069\u006e\u0067\u0020\u0061\u006e\u006e\u006f\u0074\u0061\u0074\u0069\u006f\u006e\u0073: \u0025\u0076",_ff );
};};};_d .TrackUse (_ce );return _cg ,nil ;};func (_bbda *TextMarkArray )getTextMarkAtOffset (_dgde int )*TextMark {for _ ,_faf :=range _bbda ._aec {if _faf .Offset ==_dgde {return &_faf ;};};return nil ;};func _abbaa (_bccg []pathSection ){if _fbf < 0.0{return ;
};if _gdeb {_ag .Log .Info ("\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073",len (_bccg ));};for _egcec ,_dcbgc :=range _bccg {for _faebe ,_bgafd :=range _dcbgc ._bgbeg {for _fdedc ,_fcca :=range _bgafd ._aaebg {_bgafd ._aaebg [_fdedc ]=_aae .Point {X :_affg (_fcca .X ),Y :_affg (_fcca .Y )};
if _gdeb {_aebc :=_bgafd ._aaebg [_fdedc ];if !_bgfgg (_fcca ,_aebc ){_abbdd :=_aae .Point {X :_aebc .X -_fcca .X ,Y :_aebc .Y -_fcca .Y };_efc .Printf ("\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a",_egcec ,_faebe ,_fdedc ,_fcca ,_aebc ,_abbdd );
};};};};};};func (_gae *textObject )moveText (_ggd ,_befg float64 ){_gae .moveLP (_ggd ,_befg )};func _gcccd (_cafd ,_gcbfd float64 )string {_cedcc :=!_cdaea (_cafd -_gcbfd );if _cedcc {return "\u000a";};return "\u0020";};type textWord struct{_af .PdfRectangle ;
_accb float64 ;_ccbcc string ;_ffcd []*textMark ;_abcc float64 ;_dgeeg bool ;};func _fcgef (_egag ,_dddab ,_aggb float64 )rulingKind {if _egag >=_aggb &&_gdcgf (_dddab ,_egag ){return _eeg ;};if _dddab >=_aggb &&_gdcgf (_egag ,_dddab ){return _gecdf ;};
return _ceag ;};func (_fegb *textTable )put (_gdee ,_ecbdc int ,_acegc *textPara ){_fegb ._dgcf [_cdgd (_gdee ,_ecbdc )]=_acegc ;};func _ggda (_abec _aae .Point )*subpath {return &subpath {_aaebg :[]_aae .Point {_abec }}};func (_bafg *textTable )emptyCompositeRow (_gggfc int )bool {for _dfgaf :=0;
_dfgaf < _bafg ._aageb ;_dfgaf ++{if _fgfba ,_acbe :=_bafg ._becfc [_cdgd (_dfgaf ,_gggfc )];_acbe {if len (_fgfba .paraList )> 0{return false ;};};};return true ;};
2024-04-16 11:40:43 +00:00
2024-05-29 17:04:37 +00:00
// String returns a description of `k`.
func (_ccdca rulingKind )String ()string {_aceee ,_cbfdf :=_ecccgb [_ccdca ];if !_cbfdf {return _efc .Sprintf ("\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064",_ccdca );};return _aceee ;};func (_ecdg *textLine )text ()string {var _ageb []string ;
for _ ,_bbba :=range _ecdg ._cfcb {if _bbba ._dgeeg {_ageb =append (_ageb ,"\u0020");};_ageb =append (_ageb ,_bbba ._ccbcc );};return _a .Join (_ageb ,"");};
2024-03-27 22:34:33 +00:00
2024-05-29 17:04:37 +00:00
// Elements returns the TextMarks in `ma`.
func (_fbg *TextMarkArray )Elements ()[]TextMark {return _fbg ._aec };func _cbdb (_cdga func (*wordBag ,*textWord ,float64 )bool ,_agdd float64 )func (*wordBag ,*textWord )bool {return func (_efff *wordBag ,_fecgf *textWord )bool {return _cdga (_efff ,_fecgf ,_agdd )};
};func (_ebcf *textTable )logComposite (_cbad string ){if !_dedc {return ;};_ag .Log .Info ("\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_ebcf ._aageb ,_ebcf ._cegga ,_cbad );_efc .Printf ("\u0025\u0035\u0073 \u007c","");
for _cefa :=0;_cefa < _ebcf ._aageb ;_cefa ++{_efc .Printf ("\u0025\u0033\u0064 \u007c",_cefa );};_efc .Println ("");_efc .Printf ("\u0025\u0035\u0073 \u002b","");for _ababb :=0;_ababb < _ebcf ._aageb ;_ababb ++{_efc .Printf ("\u0025\u0033\u0073 \u002b","\u002d\u002d\u002d");
};_efc .Println ("");for _cfdf :=0;_cfdf < _ebcf ._cegga ;_cfdf ++{_efc .Printf ("\u0025\u0035\u0064 \u007c",_cfdf );for _caff :=0;_caff < _ebcf ._aageb ;_caff ++{_acgc ,_ :=_ebcf ._becfc [_cdgd (_caff ,_cfdf )].parasBBox ();_efc .Printf ("\u0025\u0033\u0064 \u007c",len (_acgc ));
};_efc .Println ("");};_ag .Log .Info ("\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073",_ebcf ._aageb ,_ebcf ._cegga ,_cbad );_efc .Printf ("\u0025\u0035\u0073 \u007c","");for _caadc :=0;_caadc < _ebcf ._aageb ;
_caadc ++{_efc .Printf ("\u0025\u0031\u0032\u0064\u0020\u007c",_caadc );};_efc .Println ("");_efc .Printf ("\u0025\u0035\u0073 \u002b","");for _cbace :=0;_cbace < _ebcf ._aageb ;_cbace ++{_efc .Print ("\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b");
};_efc .Println ("");for _bebee :=0;_bebee < _ebcf ._cegga ;_bebee ++{_efc .Printf ("\u0025\u0035\u0064 \u007c",_bebee );for _bfeac :=0;_bfeac < _ebcf ._aageb ;_bfeac ++{_gacfc ,_ :=_ebcf ._becfc [_cdgd (_bfeac ,_bebee )].parasBBox ();_gebf :="";_cdfe :=_gacfc .merge ();
if _cdfe !=nil {_gebf =_cdfe .text ();};_gebf =_efc .Sprintf ("\u0025\u0071",_efcca (_gebf ,12));_gebf =_gebf [1:len (_gebf )-1];_efc .Printf ("\u0025\u0031\u0032\u0073\u0020\u007c",_gebf );};_efc .Println ("");};};var _ccfge =[]string {"\u2756","\u27a2","\u2713","\u2022","\uf0a7","\u25a1","\u2212","\u25a0","\u25aa","\u006f"};
func (_ebbf *textTable )subdivide ()*textTable {_ebbf .logComposite ("\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e");_acde :=_ebbf .compositeRowCorridors ();_efag :=_ebbf .compositeColCorridors ();if _dedc {_ag .Log .Info ("\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073",_gfae (_acde ),_gfae (_efag ));
};if len (_acde )==0||len (_efag )==0{return _ebbf ;};_bcgac (_acde );_bcgac (_efag );if _dedc {_ag .Log .Info ("\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073",_gfae (_acde ),_gfae (_efag ));
};_edgg ,_defg :=_deadf (_ebbf ._cegga ,_acde );_gebd ,_dcff :=_deadf (_ebbf ._aageb ,_efag );_gcbbb :=make (map[uint64 ]*textPara ,_dcff *_defg );_edbfd :=&textTable {PdfRectangle :_ebbf .PdfRectangle ,_caagg :_ebbf ._caagg ,_cegga :_defg ,_aageb :_dcff ,_dgcf :_gcbbb };
if _dedc {_ag .Log .Info ("\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a"+"\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a"+"\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a"+"\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076",_ebbf ._aageb ,_ebbf ._cegga ,_dcff ,_defg ,_gfae (_acde ),_gfae (_efag ),_edgg ,_gebd );
};for _cbded :=0;_cbded < _ebbf ._cegga ;_cbded ++{_dgdbd :=_edgg [_cbded ];for _dcdd :=0;_dcdd < _ebbf ._aageb ;_dcdd ++{_baace :=_gebd [_dcdd ];if _dedc {_efc .Printf ("\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a",_dcdd ,_cbded ,_baace ,_dgdbd );
};_dafe ,_afgaa :=_ebbf ._becfc [_cdgd (_dcdd ,_cbded )];if !_afgaa {continue ;};_dgdc :=_dafe .split (_acde [_cbded ],_efag [_dcdd ]);for _ffade :=0;_ffade < _dgdc ._cegga ;_ffade ++{for _ecga :=0;_ecga < _dgdc ._aageb ;_ecga ++{_bgee :=_dgdc .get (_ecga ,_ffade );
_edbfd .put (_baace +_ecga ,_dgdbd +_ffade ,_bgee );if _dedc {_efc .Printf ("\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_baace +_ecga ,_dgdbd +_ffade ,_bgee );};};};};};return _edbfd ;};func (_ggffg *textTable )getRight ()paraList {_gffa :=make (paraList ,_ggffg ._cegga );
for _gbefe :=0;_gbefe < _ggffg ._cegga ;_gbefe ++{_efef :=_ggffg .get (_ggffg ._aageb -1,_gbefe )._aggd ;if _efef .taken (){return nil ;};_gffa [_gbefe ]=_efef ;};for _dgace :=0;_dgace < _ggffg ._cegga -1;_dgace ++{if _gffa [_dgace ]._cabda !=_gffa [_dgace +1]{return nil ;
};};return _gffa ;};func _abda (_eagc []*textLine ,_dfea string ,_fbbe []*list )*list {return &list {_fged :_eagc ,_fdgc :_dfea ,_fbef :_fbbe };};func _eeed (_fcdef ,_egbgg *textPara )bool {return _gfb (_fcdef ._gbgbb ,_egbgg ._gbgbb )};func _bggb (_egecb []*textLine ,_fdfad map[float64 ][]*textLine ,_cfgd []float64 ,_ebaaf int ,_bccd ,_bcfaf float64 )[]*list {_gbeeg :=[]*list {};
_dccg :=_ebaaf ;_ebaaf =_ebaaf +1;_febb :=_cfgd [_dccg ];_egcg :=_fdfad [_febb ];_gddfg :=_dgbf (_egcg ,_bcfaf ,_bccd );for _cdgc ,_fded :=range _gddfg {var _gcaaf float64 ;_gdegd :=[]*list {};_bcbg :=_fded ._addd ;_dfbb :=_bcfaf ;if _cdgc < len (_gddfg )-1{_dfbb =_gddfg [_cdgc +1]._addd ;
};if _ebaaf < len (_cfgd ){_gdegd =_bggb (_egecb ,_fdfad ,_cfgd ,_ebaaf ,_bcbg ,_dfbb );};_gcaaf =_dfbb ;if len (_gdegd )> 0{_gfdg :=_gdegd [0];if len (_gfdg ._fged )> 0{_gcaaf =_gfdg ._fged [0]._addd ;};};_ddddc :=[]*textLine {_fded };_bdfc :=_ggfca (_fded ,_egecb ,_cfgd ,_bcbg ,_gcaaf );
_ddddc =append (_ddddc ,_bdfc ...);_ebbg :=_abda (_ddddc ,"\u0062\u0075\u006c\u006c\u0065\u0074",_gdegd );_ebbg ._cbda =_ebgc (_ddddc ,"");_gbeeg =append (_gbeeg ,_ebbg );};return _gbeeg ;};func (_gacg *textWord )toTextMarks (_gfaga *int )[]TextMark {var _ebdea []TextMark ;
for _ ,_dgda :=range _gacg ._ffcd {_ebdea =_fgec (_ebdea ,_gfaga ,_dgda .ToTextMark ());};return _ebdea ;};func (_efa *subpath )close (){if !_bgfgg (_efa ._aaebg [0],_efa .last ()){_efa .add (_efa ._aaebg [0]);};_efa ._cedc =true ;_efa .removeDuplicates ();
};func (_bcde *textLine )toTextMarks (_ebcg *int )[]TextMark {var _edfb []TextMark ;for _ ,_geged :=range _bcde ._cfcb {if _geged ._dgeeg {_edfb =_dbce (_edfb ,_ebcg ,"\u0020");};_babc :=_geged .toTextMarks (_ebcg );_edfb =append (_edfb ,_babc ...);};return _edfb ;
};const _bffd =10;type structElement struct{_dccda string ;_befc []structElement ;_fbge int64 ;_bffdf _gf .PdfObject ;};func _afc (_ggcd *list )[]*list {var _gbgfa []*list ;for _ ,_ddg :=range _ggcd ._fbef {switch _ddg ._fdgc {case "\u004c\u0049":_gbdge :=_fcgc (_ddg );
_eddfbg :=_afc (_ddg );_acgf :=_abda (_gbdge ,"\u0062\u0075\u006c\u006c\u0065\u0074",_eddfbg );_dabfc :=_ebgc (_gbdge ,"");_acgf ._cbda =_dabfc ;_gbgfa =append (_gbgfa ,_acgf );case "\u004c\u0042\u006fd\u0079":return _afc (_ddg );case "\u004c":_dfef :=_afc (_ddg );
_gbgfa =append (_gbgfa ,_dfef ...);return _gbgfa ;};};return _gbgfa ;};func _facf (_bgbf []*wordBag )[]*wordBag {if len (_bgbf )<=1{return _bgbf ;};if _fbeb {_ag .Log .Info ("\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a");
};_e .Slice (_bgbf ,func (_cccf ,_dbgd int )bool {_gfeg ,_efeb :=_bgbf [_cccf ],_bgbf [_dbgd ];_ccbg :=_gfeg .Width ()*_gfeg .Height ();_cca :=_efeb .Width ()*_efeb .Height ();if _ccbg !=_cca {return _ccbg > _cca ;};if _gfeg .Height ()!=_efeb .Height (){return _gfeg .Height ()> _efeb .Height ();
};return _cccf < _dbgd ;});var _gdcb []*wordBag ;_gcgd :=make (intSet );for _geca :=0;_geca < len (_bgbf );_geca ++{if _gcgd .has (_geca ){continue ;};_gdcg :=_bgbf [_geca ];for _bcgb :=_geca +1;_bcgb < len (_bgbf );_bcgb ++{if _gcgd .has (_geca ){continue ;
};_ffbe :=_bgbf [_bcgb ];_dfgc :=_gdcg .PdfRectangle ;_dfgc .Llx -=_gdcg ._cdac ;if _aeca (_dfgc ,_ffbe .PdfRectangle ){_gdcg .absorb (_ffbe );_gcgd .add (_bcgb );};};_gdcb =append (_gdcb ,_gdcg );};if len (_bgbf )!=len (_gdcb )+len (_gcgd ){_ag .Log .Error ("\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064",len (_bgbf ),len (_gdcb ),len (_gcgd ));
};return _gdcb ;};func (_cbbg *textWord )addDiacritic (_aedbf string ){_feaabe :=_cbbg ._ffcd [len (_cbbg ._ffcd )-1];_feaabe ._cgeb +=_aedbf ;_feaabe ._cgeb =_c .NFKC .String (_feaabe ._cgeb );};func (_dagbc paraList )findTables (_febc []gridTiling )[]*textTable {_dagbc .addNeighbours ();
_e .Slice (_dagbc ,func (_cbgc ,_cffedg int )bool {return _gbef (_dagbc [_cbgc ],_dagbc [_cffedg ])< 0});var _cdgbd []*textTable ;if _eeab {_afce :=_dagbc .findGridTables (_febc );_cdgbd =append (_cdgbd ,_afce ...);};if _dcdb {_ecdb :=_dagbc .findTextTables ();
_cdgbd =append (_cdgbd ,_ecdb ...);};return _cdgbd ;};func _cdaea (_dgag float64 )bool {return _ea .Abs (_dgag )< _gefg };func _fcgg (_cgebe string )(string ,bool ){_fbfc :=[]rune (_cgebe );if len (_fbfc )!=1{return "",false ;};_bfdd ,_adbef :=_cbba [_fbfc [0]];
return _bfdd ,_adbef ;};const _bagb =20;func (_edgf *textObject )checkOp (_bgc *_aa .ContentStreamOperation ,_gcff int ,_bgd bool )(_bacb bool ,_cfeb error ){if _edgf ==nil {var _ddcb []_gf .PdfObject ;if _gcff > 0{_ddcb =_bgc .Params ;if len (_ddcb )> _gcff {_ddcb =_ddcb [:_gcff ];
};};_ag .Log .Debug ("\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076",_bgc .Operand ,_ddcb );};if _gcff >=0{if len (_bgc .Params )!=_gcff {if _bgd {_cfeb =_b .New ("\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et");
};_ag .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076",_bgc .Operand ,_gcff ,len (_bgc .Params ),_bgc .Params );
return false ,_cfeb ;};};return true ,nil ;};func _ebaad (_feccc _af .PdfRectangle ,_gafb ,_bbgge ,_fbga ,_abde *ruling )gridTile {_dagf :=_feccc .Llx ;_fbbdb :=_feccc .Urx ;_ffea :=_feccc .Lly ;_cgafc :=_feccc .Ury ;return gridTile {PdfRectangle :_feccc ,_gceeb :_gafb !=nil &&_gafb .encloses (_ffea ,_cgafc ),_gdcbg :_bbgge !=nil &&_bbgge .encloses (_ffea ,_cgafc ),_dbafa :_fbga !=nil &&_fbga .encloses (_dagf ,_fbbdb ),_ffdf :_abde !=nil &&_abde .encloses (_dagf ,_fbbdb )};
};func (_bfea *wordBag )text ()string {_aagf :=_bfea .allWords ();_cedf :=make ([]string ,len (_aagf ));for _ebfe ,_abbd :=range _aagf {_cedf [_ebfe ]=_abbd ._ccbcc ;};return _a .Join (_cedf ,"\u0020");};type lists []*list ;func (_gbbf *wordBag )depthRange (_aecd ,_efdeg int )[]int {var _dccce []int ;
for _gfee :=range _gbbf ._cdbc {if _aecd <=_gfee &&_gfee <=_efdeg {_dccce =append (_dccce ,_gfee );};};if len (_dccce )==0{return nil ;};_e .Ints (_dccce );return _dccce ;};
// String returns a description of `p`.
func (_cfebc *textPara )String ()string {if _cfebc ._bdgc {return _efc .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d",_cfebc .PdfRectangle );};_caeb :="";if _cfebc ._befe !=nil {_caeb =_efc .Sprintf ("\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020",_cfebc ._befe ._aageb ,_cfebc ._befe ._cegga );
};return _efc .Sprintf ("\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071",_cfebc .PdfRectangle ,_caeb ,len (_cfebc ._aage ),_efcca (_cfebc .text (),50));};func (_babac rectRuling )asRuling ()(*ruling ,bool ){_acce :=ruling {_ecfb :_babac ._beda ,Color :_babac .Color ,_agff :_bddf };
switch _babac ._beda {case _gecdf :_acce ._aeef =0.5*(_babac .Llx +_babac .Urx );_acce ._ggdb =_babac .Lly ;_acce ._gbca =_babac .Ury ;_bcce ,_bcda :=_babac .checkWidth (_babac .Llx ,_babac .Urx );if !_bcda {if _fccf {_ag .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_babac );
};return nil ,false ;};_acce ._faba =_bcce ;case _eeg :_acce ._aeef =0.5*(_babac .Lly +_babac .Ury );_acce ._ggdb =_babac .Llx ;_acce ._gbca =_babac .Urx ;_aege ,_fbgfa :=_babac .checkWidth (_babac .Lly ,_babac .Ury );if !_fbgfa {if _fccf {_ag .Log .Error ("\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076",_babac );
};return nil ,false ;};_acce ._faba =_aege ;default:_ag .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_babac ._beda );return nil ,false ;};return &_acce ,true ;};func (_abaa *textTable )get (_bbfeb ,_bfbed int )*textPara {return _abaa ._dgcf [_cdgd (_bbfeb ,_bfbed )]};
func (_decd *textObject )getFont (_dccd string )(*_af .PdfFont ,error ){if _decd ._dbe ._fgf !=nil {_ccbc ,_bee :=_decd .getFontDict (_dccd );if _bee !=nil {_ag .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073",_dccd ,_bee .Error ());
return nil ,_bee ;};_decd ._dbe ._db ++;_deee ,_gaa :=_decd ._dbe ._fgf [_ccbc .String ()];if _gaa {_deee ._edeb =_decd ._dbe ._db ;return _deee ._gggfe ,nil ;};};_gbea ,_deca :=_decd .getFontDict (_dccd );if _deca !=nil {return nil ,_deca ;};_cag ,_deca :=_decd .getFontDirect (_dccd );
if _deca !=nil {return nil ,_deca ;};if _decd ._dbe ._fgf !=nil {_cfcd :=fontEntry {_cag ,_decd ._dbe ._db };if len (_decd ._dbe ._fgf )>=_bffd {var _agdg []string ;for _eecd :=range _decd ._dbe ._fgf {_agdg =append (_agdg ,_eecd );};_e .Slice (_agdg ,func (_gddc ,_fecc int )bool {return _decd ._dbe ._fgf [_agdg [_gddc ]]._edeb < _decd ._dbe ._fgf [_agdg [_fecc ]]._edeb ;
});delete (_decd ._dbe ._fgf ,_agdg [0]);};_decd ._dbe ._fgf [_gbea .String ()]=_cfcd ;};return _cag ,nil ;};func _bcbe (_agfa ,_gabec bounded )float64 {_cfaeb :=_gdfa (_agfa ,_gabec );if !_cdaea (_cfaeb ){return _cfaeb ;};return _fdbb (_agfa ,_gabec );
};func _afeec (_cgfb float64 ,_eccg int )int {if _eccg ==0{_eccg =1;};_ggdfa :=float64 (_eccg );return int (_ea .Round (_cgfb /_ggdfa )*_ggdfa );};func (_edgc lineRuling )asRuling ()(*ruling ,bool ){_cabe :=ruling {_ecfb :_edgc ._faab ,Color :_edgc .Color ,_agff :_cbeb };
switch _edgc ._faab {case _gecdf :_cabe ._aeef =_edgc .xMean ();_cabe ._ggdb =_ea .Min (_edgc ._bbee .Y ,_edgc ._efge .Y );_cabe ._gbca =_ea .Max (_edgc ._bbee .Y ,_edgc ._efge .Y );case _eeg :_cabe ._aeef =_edgc .yMean ();_cabe ._ggdb =_ea .Min (_edgc ._bbee .X ,_edgc ._efge .X );
_cabe ._gbca =_ea .Max (_edgc ._bbee .X ,_edgc ._efge .X );default:_ag .Log .Error ("\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064",_edgc ._faab );return nil ,false ;};return &_cabe ,true ;};func (_cga *textObject )setTextRenderMode (_fdda int ){if _cga ==nil {return ;
};_cga ._ecff ._aaeb =RenderMode (_fdda );};var _cdaf string ="\u0028\u003f\u0069\u0029\u005e\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028?\u003a\u0044\u007cM\u0029\u007c\u0044\u003f\u0043{\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028\u003f\u003a\u004c\u007c\u0043\u0029\u007cL\u003f\u0058\u007b\u0030\u002c\u0033}\u0029\u0028\u0049\u0028\u003f\u003a\u0056\u007c\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u005c\u0029\u007c\u005c\u002e\u0029\u007c\u005e\u005c\u0028\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028\u003f\u003aD\u007cM\u0029\u007c\u0044\u003f\u0043\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028?\u003a\u004c\u007c\u0043\u0029\u007c\u004c?\u0058\u007b0\u002c\u0033\u007d\u0029(\u0049\u0028\u003f\u003a\u0056|\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u005c\u0029";
func (_dfge *wordBag )minDepth ()float64 {return _dfge ._ecba -(_dfge .Ury -_dfge ._cdac )};func (_fdfa *imageExtractContext )extractFormImages (_eddb *_gf .PdfObjectName ,_afa _aa .GraphicsState ,_bfe *_af .PdfPageResources )error {_bec ,_ge :=_bfe .GetXObjectFormByName (*_eddb );
if _ge !=nil {return _ge ;};if _bec ==nil {return nil ;};_ggf ,_ge :=_bec .GetContentStream ();if _ge !=nil {return _ge ;};_bgb :=_bec .Resources ;if _bgb ==nil {_bgb =_bfe ;};_ge =_fdfa .extractContentStreamImages (string (_ggf ),_bgb );if _ge !=nil {return _ge ;
};_fdfa ._gga ++;return nil ;};func (_dbga rulingList )intersections ()map[int ]intSet {var _aad ,_aeaf []int ;for _baff ,_bgaae :=range _dbga {switch _bgaae ._ecfb {case _gecdf :_aad =append (_aad ,_baff );case _eeg :_aeaf =append (_aeaf ,_baff );};};
if len (_aad )< _fcad +1||len (_aeaf )< _bce +1{return nil ;};if len (_aad )+len (_aeaf )> _ebdb {_ag .Log .Debug ("\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064",len (_dbga ),len (_aad ),len (_aeaf ));
return nil ;};_fgd :=make (map[int ]intSet ,len (_aad )+len (_aeaf ));for _ ,_fddeb :=range _aad {for _ ,_eebb :=range _aeaf {if _dbga [_fddeb ].intersects (_dbga [_eebb ]){if _ ,_dcge :=_fgd [_fddeb ];!_dcge {_fgd [_fddeb ]=make (intSet );};if _ ,_agfd :=_fgd [_eebb ];
!_agfd {_fgd [_eebb ]=make (intSet );};_fgd [_fddeb ].add (_eebb );_fgd [_eebb ].add (_fddeb );};};};return _fgd ;};func _egec (_gega ,_defb bounded )float64 {return _gega .bbox ().Llx -_defb .bbox ().Urx };func _bcgd (_bfab ,_faee _aae .Point )rulingKind {_dgcca :=_ea .Abs (_bfab .X -_faee .X );
_gbfad :=_ea .Abs (_bfab .Y -_faee .Y );return _fcgef (_dgcca ,_gbfad ,_ebffe );};type imageExtractContext struct{_cgg []ImageMark ;_ffa int ;_fcc int ;_gga int ;_ebf map[*_gf .PdfObjectStream ]*cachedImage ;_dac *ImageExtractOptions ;_eed bool ;};var _cbdd =map[markKind ]string {_cbeb :"\u0073\u0074\u0072\u006f\u006b\u0065",_bddf :"\u0066\u0069\u006c\u006c",_gcfgb :"\u0061u\u0067\u006d\u0065\u006e\u0074"};
func _add (_dcda _aae .Point )_aae .Matrix {return _aae .TranslationMatrix (_dcda .X ,_dcda .Y )};func (_bddbd paraList )findGridTables (_bcggf []gridTiling )[]*textTable {if _dedc {_ag .Log .Info ("\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073",len (_bddbd ));
for _edcee ,_ccgb :=range _bddbd {_efc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_edcee ,_ccgb );};};var _cddg []*textTable ;for _dfab ,_ddfe :=range _bcggf {_gaag ,_adcgd :=_bddbd .findTableGrid (_ddfe );if _gaag !=nil {_gaag .log (_efc .Sprintf ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064",_dfab ));
_cddg =append (_cddg ,_gaag );_gaag .markCells ();};for _egfd :=range _adcgd {_egfd ._fcdcf =true ;};};if _dedc {_ag .Log .Info ("\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s",len (_cddg ));
};return _cddg ;};var (_cbba =map[rune ]string {0x0060:"\u0300",0x02CB:"\u0300",0x0027:"\u0301",0x00B4:"\u0301",0x02B9:"\u0301",0x02CA:"\u0301",0x005E:"\u0302",0x02C6:"\u0302",0x007E:"\u0303",0x02DC:"\u0303",0x00AF:"\u0304",0x02C9:"\u0304",0x02D8:"\u0306",0x02D9:"\u0307",0x00A8:"\u0308",0x00B0:"\u030a",0x02DA:"\u030a",0x02BA:"\u030b",0x02DD:"\u030b",0x02C7:"\u030c",0x02C8:"\u030d",0x0022:"\u030e",0x02BB:"\u0312",0x02BC:"\u0313",0x0486:"\u0313",0x055A:"\u0313",0x02BD:"\u0314",0x0485:"\u0314",0x0559:"\u0314",0x02D4:"\u031d",0x02D5:"\u031e",0x02D6:"\u031f",0x02D7:"\u0320",0x02B2:"\u0321",0x00B8:"\u0327",0x02CC:"\u0329",0x02B7:"\u032b",0x02CD:"\u0331",0x005F:"\u0332",0x204E:"\u0359"};
);type intSet map[int ]struct{};func (_cecg *textTable )log (_cdag string ){if !_dedc {return ;};_ag .Log .Info ("~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066",_cdag ,_cecg ._aageb ,_cecg ._cegga ,_cecg ._caagg ,_cecg .PdfRectangle );
for _gdgg :=0;_gdgg < _cecg ._cegga ;_gdgg ++{for _ecfbc :=0;_ecfbc < _cecg ._aageb ;_ecfbc ++{_fefg :=_cecg .get (_ecfbc ,_gdgg );if _fefg ==nil {continue ;};_efc .Printf ("%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a",_ecfbc ,_gdgg ,_fefg .PdfRectangle ,_efcca (_fefg .text (),50),_bb .RuneCountInString (_fefg .text ()));
};};};func (_fad *shapesState )newSubPath (){_fad .clearPath ();if _cece {_ag .Log .Info ("\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073",_fad );};};func (_eddfg *wordBag )getDepthIdx (_ggb float64 )int {_edba :=_eddfg .depthIndexes ();
_bdff :=_fdgf (_ggb );if _bdff < _edba [0]{return _edba [0];};if _bdff > _edba [len (_edba )-1]{return _edba [len (_edba )-1];};return _bdff ;};func (_bgaa *textObject )moveLP (_fddf ,_dgfa float64 ){_bgaa ._ebc .Concat (_aae .NewMatrix (1,0,0,1,_fddf ,_dgfa ));
_bgaa ._dbc =_bgaa ._ebc ;};func (_ddge *textTable )compositeColCorridors ()map[int ][]float64 {_dgbdb :=make (map[int ][]float64 ,_ddge ._aageb );if _dedc {_ag .Log .Info ("\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020",_ddge ._aageb );
};for _bcfge :=0;_bcfge < _ddge ._aageb ;_bcfge ++{_dgbdb [_bcfge ]=nil ;};return _dgbdb ;};func (_cfc *stateStack )size ()int {return len (*_cfc )};func (_bcfde *textTable )isExportable ()bool {if _bcfde ._caagg {return true ;};_gecbb :=func (_eecc int )bool {_gcdc :=_bcfde .get (0,_eecc );
if _gcdc ==nil {return false ;};_baeb :=_gcdc .text ();_gabef :=_bb .RuneCountInString (_baeb );_ggbed :=_baee .MatchString (_baeb );return _gabef <=1||_ggbed ;};for _bfcd :=0;_bfcd < _bcfde ._cegga ;_bfcd ++{if !_gecbb (_bfcd ){return true ;};};return false ;
2024-04-30 12:24:05 +00:00
};
2024-03-27 22:34:33 +00:00
2024-05-29 17:04:37 +00:00
// Extractor stores and offers functionality for extracting content from PDF pages.
type Extractor struct{_fgb string ;_cf *_af .PdfPageResources ;_fd _af .PdfRectangle ;_fed *_af .PdfRectangle ;_fgf map[string ]fontEntry ;_ec map[string ]textResult ;_agf map[string ]textResult ;_db int64 ;_ebe int ;_efe *Options ;_ba *_gf .PdfObject ;
_ac _gf .PdfObject ;_cae []*_af .PdfAnnotation ;};
2024-03-27 22:34:33 +00:00
2024-05-29 17:04:37 +00:00
// String returns a description of `k`.
func (_dece markKind )String ()string {_caafc ,_bbed :=_cbdd [_dece ];if !_bbed {return _efc .Sprintf ("\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064",_dece );};return _caafc ;};type textResult struct{_eae PageText ;_cec int ;
_gabe int ;};func (_bcb *textObject )setTextLeading (_cebc float64 ){if _bcb ==nil {return ;};_bcb ._ecff ._cdc =_cebc ;};type stateStack []*textState ;func (_gdef *textPara )writeCellText (_cccff _fc .Writer ){for _debae ,_bgdc :=range _gdef ._aage {_fdbd :=_bgdc .text ();
_ggaa :=_dadc &&_bgdc .endsInHyphen ()&&_debae !=len (_gdef ._aage )-1;if _ggaa {_fdbd =_dbbb (_fdbd );};_cccff .Write ([]byte (_fdbd ));if !(_ggaa ||_debae ==len (_gdef ._aage )-1){_cccff .Write ([]byte (_gcccd (_bgdc ._addd ,_gdef ._aage [_debae +1]._addd )));
};};};func (_bbcbf *shapesState )quadraticTo (_bgbc ,_acg ,_dcdag ,_cfaa float64 ){if _cece {_ag .Log .Info ("\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a");};_bbcbf .addPoint (_dcdag ,_cfaa );};func _fecd (_eceg *list ,_abgb *_a .Builder ,_cdcbcc *string ){_fgfb :=_ccgd (_eceg ,_cdcbcc );
_abgb .WriteString (_fgfb );for _ ,_cdega :=range _eceg ._fbef {_fdfd :=*_cdcbcc +"\u0020\u0020\u0020";_fecd (_cdega ,_abgb ,&_fdfd );};};func _debe (_edgeb byte )bool {for _ ,_eefdc :=range _ccfge {if []byte (_eefdc )[0]==_edgeb {return true ;};};return false ;
};func (_bffe *structElement )parseStructElement (_eeff _gf .PdfObject ){_egbef ,_ddcc :=_gf .GetDict (_eeff );if !_ddcc {_ag .Log .Debug ("\u0070\u0061\u0072\u0073\u0065\u0053\u0074\u0072u\u0063\u0074\u0045le\u006d\u0065\u006e\u0074\u003a\u0020d\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006f\u0062\u006a\u0065\u0063t\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075n\u0064\u002e");
return ;};_afab :=_egbef .Get ("\u0053");_gdeg :=_egbef .Get ("\u0050\u0067");_fgbc :="";if _afab !=nil {_fgbc =_afab .String ();};_bfcgf :=_egbef .Get ("\u004b");_bffe ._dccda =_fgbc ;_bffe ._bffdf =_gdeg ;switch _ecaf :=_bfcgf .(type ){case *_gf .PdfObjectInteger :_bffe ._dccda =_fgbc ;
_bffe ._fbge =int64 (*_ecaf );_bffe ._bffdf =_gdeg ;case *_gf .PdfObjectReference :_aaebb :=*_gf .MakeArray (_ecaf );var _caagc int64 =-1;_bffe ._fbge =_caagc ;if _aaebb .Len ()==1{_egcf :=_aaebb .Elements ()[0];_bacc ,_bgca :=_egcf .(*_gf .PdfObjectInteger );
if _bgca {_caagc =int64 (*_bacc );_bffe ._fbge =_caagc ;_bffe ._dccda =_fgbc ;_bffe ._bffdf =_gdeg ;return ;};};_agg :=[]structElement {};for _ ,_egafa :=range _aaebb .Elements (){_fecca ,_eeacd :=_egafa .(*_gf .PdfObjectInteger );if _eeacd {_caagc =int64 (*_fecca );
_bffe ._fbge =_caagc ;_bffe ._dccda =_fgbc ;}else {_ffccb :=&structElement {};_ffccb .parseStructElement (_egafa );_agg =append (_agg ,*_ffccb );};_caagc =-1;};_bffe ._befc =_agg ;case *_gf .PdfObjectArray :_ffbd :=_bfcgf .(*_gf .PdfObjectArray );var _gggc int64 =-1;
_bffe ._fbge =_gggc ;if _ffbd .Len ()==1{_bbdc :=_ffbd .Elements ()[0];_eeae ,_ecdc :=_bbdc .(*_gf .PdfObjectInteger );if _ecdc {_gggc =int64 (*_eeae );_bffe ._fbge =_gggc ;_bffe ._dccda =_fgbc ;_bffe ._bffdf =_gdeg ;return ;};};_eebff :=[]structElement {};
for _ ,_dfee :=range _ffbd .Elements (){_efbb ,_egdb :=_dfee .(*_gf .PdfObjectInteger );if _egdb {_gggc =int64 (*_efbb );_bffe ._fbge =_gggc ;_bffe ._dccda =_fgbc ;_bffe ._bffdf =_gdeg ;}else {_agdba :=&structElement {};_agdba .parseStructElement (_dfee );
_eebff =append (_eebff ,*_agdba );};_gggc =-1;};_bffe ._befc =_eebff ;};};func (_fceb *textLine )endsInHyphen ()bool {_gcggg :=_fceb ._cfcb [len (_fceb ._cfcb )-1];_dcfg :=_gcggg ._ccbcc ;_gaeed ,_cccg :=_bb .DecodeLastRuneInString (_dcfg );if _cccg <=0||!_fg .Is (_fg .Hyphen ,_gaeed ){return false ;
};if _gcggg ._dgeeg &&_edeg (_dcfg ){return true ;};return _edeg (_fceb .text ());};func _aecg (_cacc map[float64 ]gridTile )[]float64 {_debc :=make ([]float64 ,0,len (_cacc ));for _ddcd :=range _cacc {_debc =append (_debc ,_ddcd );};_e .Float64s (_debc );
return _debc ;};func (_agcf paraList )yNeighbours (_geef float64 )map[*textPara ][]int {_bgcdg :=make ([]event ,2*len (_agcf ));if _geef ==0{for _ddba ,_cegea :=range _agcf {_bgcdg [2*_ddba ]=event {_cegea .Lly ,true ,_ddba };_bgcdg [2*_ddba +1]=event {_cegea .Ury ,false ,_ddba };
};}else {for _ggeee ,_baacca :=range _agcf {_bgcdg [2*_ggeee ]=event {_baacca .Lly -_geef *_baacca .fontsize (),true ,_ggeee };_bgcdg [2*_ggeee +1]=event {_baacca .Ury +_geef *_baacca .fontsize (),false ,_ggeee };};};return _agcf .eventNeighbours (_bgcdg );
};func (_bfa *Extractor )extractPageText (_ecf string ,_bdb *_af .PdfPageResources ,_ebfc _aae .Matrix ,_bgf int ,_eadd bool )(*PageText ,int ,int ,error ){_ag .Log .Trace ("\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d",_bgf );
_fbc :=&PageText {_cdf :_bfa ._fd ,_cgad :_bfa ._ba ,_dfc :_bfa ._ac };_abg :=_ecd (_bfa ._fd );var _dcg stateStack ;_gab :=_ffag (_bfa ,_bdb ,_aa .GraphicsState {},&_abg ,&_dcg );_acd :=shapesState {_gdec :_ebfc ,_cffc :_aae .IdentityMatrix (),_cegf :_gab };
var _fde bool ;_ddd :=-1;if _bgf > _bagb {_fff :=_b .New ("\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077");_ag .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076",_bgf ,_fff );
return _fbc ,_abg ._cfg ,_abg ._dacb ,_fff ;};_cged :=_aa .NewContentStreamParser (_ecf );_fcab ,_ced :=_cged .Parse ();if _ced !=nil {_ag .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ced );
return _fbc ,_abg ._cfg ,_abg ._dacb ,_ced ;};_fbc ._cfa =_fcab ;_fdfc :=_aa .NewContentStreamProcessor (*_fcab );_fdfc .AddHandler (_aa .HandlerConditionEnumAllOperands ,"",func (_bbd *_aa .ContentStreamOperation ,_gbba _aa .GraphicsState ,_fgfg *_af .PdfPageResources )error {_bga :=_bbd .Operand ;
if _egbg {_ag .Log .Info ("\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s",_bbd );};switch _bga {case "\u0071":if _cece {_ag .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_acd ._cffc );};_dcg .push (&_abg );case "\u0051":if !_dcg .empty (){_abg =*_dcg .pop ();
};_acd ._cffc =_gbba .CTM ;if _cece {_ag .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_acd ._cffc );};case "\u0042\u0044\u0043":_fab ,_dge :=_gf .GetDict (_bbd .Params [1]);if !_dge {_ag .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0042D\u0043\u0020\u006f\u0070\u003d\u0025\u0073 \u0047\u0065\u0074\u0044\u0069\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_bbd );
return _ced ;};_gac :=_fab .Get ("\u004d\u0043\u0049\u0044");if _gac !=nil {_dff ,_fda :=_gf .GetIntVal (_gac );if !_fda {_ag .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u0042\u0044C\u0020\u006f\u0070=\u0025\u0073\u002e\u0020\u0042\u0061\u0064\u0020\u006eum\u0065\u0072\u0069c\u0061\u006c \u006f\u0062\u006a\u0065\u0063\u0074.\u0020\u006f=\u0025\u0073",_bbd ,_gac );
};_ddd =_dff ;}else {_ddd =-1;};case "\u0045\u004d\u0043":_ddd =-1;case "\u0042\u0054":if _fde {_ag .Log .Debug ("\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
_fbc ._fecaa =append (_fbc ._fecaa ,_gab ._afff ...);};_fde =true ;_eag :=_gbba ;if _eadd {_eag =_aa .GraphicsState {};_eag .CTM =_acd ._cffc ;};_eag .CTM =_ebfc .Mult (_eag .CTM );_gab =_ffag (_bfa ,_fgfg ,_eag ,&_abg ,&_dcg );_acd ._cegf =_gab ;case "\u0045\u0054":if !_fde {_ag .Log .Debug ("\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074");
};_fde =false ;_fbc ._fecaa =append (_fbc ._fecaa ,_gab ._afff ...);_gab .reset ();case "\u0054\u002a":_gab .nextLine ();case "\u0054\u0064":if _ccc ,_dga :=_gab .checkOp (_bbd ,2,true );!_ccc {_ag .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dga );
return _dga ;};_eeb ,_eg ,_eebf :=_bbgag (_bbd .Params );if _eebf !=nil {return _eebf ;};_gab .moveText (_eeb ,_eg );case "\u0054\u0044":if _aac ,_cd :=_gab .checkOp (_bbd ,2,true );!_aac {_ag .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_cd );
return _cd ;};_aba ,_ecc ,_gcf :=_bbgag (_bbd .Params );if _gcf !=nil {_ag .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gcf );return _gcf ;};_gab .moveTextSetLeading (_aba ,_ecc );case "\u0054\u006a":if _gdf ,_bfb :=_gab .checkOp (_bbd ,1,true );
!_gdf {_ag .Log .Debug ("\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076",_bbd ,_bfb );return _bfb ;};_befb :=_gf .TraceToDirectObject (_bbd .Params [0]);_dgd ,_caed :=_gf .GetStringBytes (_befb );
if !_caed {_ag .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064",_bbd );return _gf .ErrTypeError ;
};return _gab .showText (_befb ,_dgd ,_ddd );case "\u0054\u004a":if _bcg ,_gbbg :=_gab .checkOp (_bbd ,1,true );!_bcg {_ag .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gbbg );return _gbbg ;};_ceb ,_bgff :=_gf .GetArray (_bbd .Params [0]);
if !_bgff {_ag .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_bbd );return _ced ;};return _gab .showTextAdjusted (_ceb ,_ddd );
case "\u0027":if _ggfc ,_gef :=_gab .checkOp (_bbd ,1,true );!_ggfc {_ag .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_gef );return _gef ;};_afd :=_gf .TraceToDirectObject (_bbd .Params [0]);_efeg ,_dcc :=_gf .GetStringBytes (_afd );
if !_dcc {_ag .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_bbd );return _gf .ErrTypeError ;};_gab .nextLine ();return _gab .showText (_afd ,_efeg ,_ddd );
case "\u0022":if _geg ,_eea :=_gab .checkOp (_bbd ,3,true );!_geg {_ag .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_eea );return _eea ;};_ggga ,_dcb ,_bac :=_bbgag (_bbd .Params [:2]);if _bac !=nil {return _bac ;
};_abf :=_gf .TraceToDirectObject (_bbd .Params [2]);_fba ,_bgbg :=_gf .GetStringBytes (_abf );if !_bgbg {_ag .Log .Debug ("\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064",_bbd );
return _gf .ErrTypeError ;};_gab .setCharSpacing (_ggga );_gab .setWordSpacing (_dcb );_gab .nextLine ();return _gab .showText (_abf ,_fba ,_ddd );case "\u0054\u004c":_eefe ,_eda :=_feca (_bbd );if _eda !=nil {_ag .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_eda );
return _eda ;};_gab .setTextLeading (_eefe );case "\u0054\u0063":_fccd ,_dfa :=_feca (_bbd );if _dfa !=nil {_ag .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_dfa );return _dfa ;};_gab .setCharSpacing (_fccd );
case "\u0054\u0066":if _fcf ,_fec :=_gab .checkOp (_bbd ,2,true );!_fcf {_ag .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fec );return _fec ;};_cgc ,_ccba :=_gf .GetNameVal (_bbd .Params [0]);if !_ccba {_ag .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064",_bbd );
return _gf .ErrTypeError ;};_egd ,_ecbd :=_gf .GetNumberAsFloat (_bbd .Params [1]);if !_ccba {_ag .Log .Debug ("\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bbd ,_ecbd );
return _ecbd ;};_ecbd =_gab .setFont (_cgc ,_egd );_gab ._cdcc =_b .Is (_ecbd ,_gf .ErrNotSupported );if _ecbd !=nil &&!_gab ._cdcc {return _ecbd ;};case "\u0054\u006d":if _fea ,_abge :=_gab .checkOp (_bbd ,6,true );!_fea {_ag .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_abge );
return _abge ;};_bccb ,_efb :=_gf .GetNumbersAsFloat (_bbd .Params );if _efb !=nil {_ag .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_efb );return _efb ;};_gab .setTextMatrix (_bccb );case "\u0054\u0072":if _adf ,_fgg :=_gab .checkOp (_bbd ,1,true );
!_adf {_ag .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_fgg );return _fgg ;};_fef ,_daa :=_gf .GetIntVal (_bbd .Params [0]);if !_daa {_ag .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064",_bbd );
return _gf .ErrTypeError ;};_gab .setTextRenderMode (_fef );case "\u0054\u0073":if _cce ,_ecfd :=_gab .checkOp (_bbd ,1,true );!_cce {_ag .Log .Debug ("\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_ecfd );return _ecfd ;
};_cba ,_aab :=_gf .GetNumberAsFloat (_bbd .Params [0]);if _aab !=nil {_ag .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_aab );return _aab ;};_gab .setTextRise (_cba );case "\u0054\u0077":if _eagf ,_egb :=_gab .checkOp (_bbd ,1,true );
!_eagf {_ag .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_egb );return _egb ;};_dbgf ,_bfcb :=_gf .GetNumberAsFloat (_bbd .Params [0]);if _bfcb !=nil {_ag .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_bfcb );
return _bfcb ;};_gab .setWordSpacing (_dbgf );case "\u0054\u007a":if _edec ,_edb :=_gab .checkOp (_bbd ,1,true );!_edec {_ag .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_edb );return _edb ;};_gbe ,_agb :=_gf .GetNumberAsFloat (_bbd .Params [0]);
if _agb !=nil {_ag .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076",_agb );return _agb ;};_gab .setHorizScaling (_gbe );case "\u0063\u006d":if !_eadd {_acd ._cffc =_gbba .CTM ;};if _acd ._cffc .Singular (){_afb :=_aae .IdentityMatrix ().Translate (_acd ._cffc .Translation ());
_ag .Log .Debug ("S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s",_acd ._cffc ,_afb );_acd ._cffc =_afb ;};if _cece {_ag .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_acd ._cffc );};case "\u006d":if len (_bbd .Params )!=2{_ag .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_eb );
return nil ;};_daaf ,_dfed :=_gf .GetNumbersAsFloat (_bbd .Params );if _dfed !=nil {return _dfed ;};_acd .moveTo (_daaf [0],_daaf [1]);case "\u006c":if len (_bbd .Params )!=2{_ag .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e",_eb );
return nil ;};_cfe ,_cfea :=_gf .GetNumbersAsFloat (_bbd .Params );if _cfea !=nil {return _cfea ;};_acd .lineTo (_cfe [0],_cfe [1]);case "\u0063":if len (_bbd .Params )!=6{return _eb ;};_ddfc ,_adb :=_gf .GetNumbersAsFloat (_bbd .Params );if _adb !=nil {return _adb ;
};_ag .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_ddfc );_acd .cubicTo (_ddfc [0],_ddfc [1],_ddfc [2],_ddfc [3],_ddfc [4],_ddfc [5]);case "\u0076","\u0079":if len (_bbd .Params )!=4{return _eb ;
};_dfeg ,_dddd :=_gf .GetNumbersAsFloat (_bbd .Params );if _dddd !=nil {return _dddd ;};_ag .Log .Debug ("\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f",_dfeg );_acd .quadraticTo (_dfeg [0],_dfeg [1],_dfeg [2],_dfeg [3]);
case "\u0068":_acd .closePath ();case "\u0072\u0065":if len (_bbd .Params )!=4{return _eb ;};_bgg ,_ffad :=_gf .GetNumbersAsFloat (_bbd .Params );if _ffad !=nil {return _ffad ;};_acd .drawRectangle (_bgg [0],_bgg [1],_bgg [2],_bgg [3]);_acd .closePath ();
case "\u0053":_acd .stroke (&_fbc ._gggf );_acd .clearPath ();case "\u0073":_acd .closePath ();_acd .stroke (&_fbc ._gggf );_acd .clearPath ();case "\u0046":_acd .fill (&_fbc ._afbg );_acd .clearPath ();case "\u0066","\u0066\u002a":_acd .closePath ();_acd .fill (&_fbc ._afbg );
_acd .clearPath ();case "\u0042","\u0042\u002a":_acd .fill (&_fbc ._afbg );_acd .stroke (&_fbc ._gggf );_acd .clearPath ();case "\u0062","\u0062\u002a":_acd .closePath ();_acd .fill (&_fbc ._afbg );_acd .stroke (&_fbc ._gggf );_acd .clearPath ();case "\u006e":_acd .clearPath ();
case "\u0044\u006f":if len (_bbd .Params )==0{_ag .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e",_bbd .Params );
return _gf .ErrRangeError ;};_fae ,_eddfb :=_gf .GetName (_bbd .Params [0]);if !_eddfb {_ag .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e",_bbd .Params [0]);
return _gf .ErrTypeError ;};_ ,_baae :=_fgfg .GetXObjectByName (*_fae );if _baae !=_af .XObjectTypeForm {break ;};_ade ,_eddfb :=_bfa ._ec [_fae .String ()];if !_eddfb {_fcg ,_cccc :=_fgfg .GetXObjectFormByName (*_fae );if _cccc !=nil {_ag .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_cccc );
return _cccc ;};_dcd ,_cccc :=_fcg .GetContentStream ();if _cccc !=nil {_ag .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_cccc );return _cccc ;};_gca :=_fcg .Resources ;if _gca ==nil {_gca =_fgfg ;};_dgb :=_gbba .CTM ;if _age ,_eec :=_gf .GetArray (_fcg .Matrix );
_eec {_bebc ,_gea :=_age .GetAsFloat64Slice ();if _gea !=nil {return _gea ;};if len (_bebc )!=6{return _eb ;};_gbf :=_aae .NewMatrix (_bebc [0],_bebc [1],_bebc [2],_bebc [3],_bebc [4],_bebc [5]);_dgb =_gbba .CTM .Mult (_gbf );};_cabf ,_gfc ,_cbdf ,_cccc :=_bfa .extractPageText (string (_dcd ),_gca ,_ebfc .Mult (_dgb ),_bgf +1,false );
if _cccc !=nil {_ag .Log .Debug ("\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v",_cccc );return _cccc ;};_ade =textResult {*_cabf ,_gfc ,_cbdf };_bfa ._ec [_fae .String ()]=_ade ;};_acd ._cffc =_gbba .CTM ;if _cece {_ag .Log .Info ("\u0063\u0074\u006d\u003d\u0025\u0073",_acd ._cffc );
};_fbc ._fecaa =append (_fbc ._fecaa ,_ade ._eae ._fecaa ...);_fbc ._gggf =append (_fbc ._gggf ,_ade ._eae ._gggf ...);_fbc ._afbg =append (_fbc ._afbg ,_ade ._eae ._afbg ...);_abg ._cfg +=_ade ._cec ;_abg ._dacb +=_ade ._gabe ;case "\u0072\u0067","\u0067","\u006b","\u0063\u0073","\u0073\u0063","\u0073\u0063\u006e":_gab ._aef .ColorspaceNonStroking =_gbba .ColorspaceNonStroking ;
_gab ._aef .ColorNonStroking =_gbba .ColorNonStroking ;case "\u0052\u0047","\u0047","\u004b","\u0043\u0053","\u0053\u0043","\u0053\u0043\u004e":_gab ._aef .ColorspaceStroking =_gbba .ColorspaceStroking ;_gab ._aef .ColorStroking =_gbba .ColorStroking ;
};return nil ;});_ced =_fdfc .Process (_bdb );if _bfa ._efe !=nil &&_bfa ._efe .IncludeAnnotations &&!_eadd {for _ ,_ebb :=range _bfa ._cae {_aaf ,_daaff :=_gf .GetDict (_ebb .AP );if !_daaff {continue ;};_afbb ,_daaff :=_aaf .Get ("\u004e").(*_gf .PdfObjectStream );
if !_daaff {continue ;};_bfef ,_edf :=_gf .DecodeStream (_afbb );if _edf !=nil {_ag .Log .Debug ("\u0045\u0072\u0072\u006f\u0072\u0020\u006f\u006e\u0020\u0064\u0065c\u006f\u0064\u0065\u0020\u0073\u0074\u0072\u0065\u0061\u006d:\u0020\u0025\u0076",_edf );
continue ;};_eca :=_afbb .PdfObjectDictionary .Get ("\u0052e\u0073\u006f\u0075\u0072\u0063\u0065s");_cgeg ,_edf :=_af .NewPdfPageResourcesFromDict (_eca .(*_gf .PdfObjectDictionary ));if _edf !=nil {_ag .Log .Debug ("\u0045\u0072\u0072\u006f\u0072 \u006f\u006e\u0020\u0067\u0065\u0074\u0074\u0069\u006e\u0067\u0020\u0061\u006en\u006f\u0074\u0061\u0074\u0069\u006f\u006e\u0020\u0072\u0065\u0073\u006f\u0075\u0072\u0063\u0065\u0073\u003a\u0020\u0025\u0076",_edf );
continue ;};_gfge :=_aae .IdentityMatrix ();_bggg ,_daaff :=_afbb .PdfObjectDictionary .Get ("\u004d\u0061\u0074\u0072\u0069\u0078").(*_gf .PdfObjectArray );if _daaff {_dde ,_aabf :=_bggg .GetAsFloat64Slice ();if _aabf !=nil {_ag .Log .Debug ("\u0045\u0072\u0072or\u0020\u006f\u006e\u0020\u0067\u0065\u0074\u0074\u0069n\u0067 \u0066l\u006fa\u0074\u0036\u0034\u0020\u0073\u006c\u0069\u0063\u0065\u003a\u0020\u0025\u0076",_aabf );
continue ;};if len (_dde )!=6{_ag .Log .Debug ("I\u006e\u0076\u0061\u006c\u0069\u0064 \u006d\u0061\u0074\u0072\u0069\u0078\u0020\u0073\u006ci\u0063\u0065\u0020l\u0065n\u0067\u0074\u0068");continue ;};_gfge =_aae .NewMatrix (_dde [0],_dde [1],_dde [2],_dde [3],_dde [4],_dde [5]);
};_bca ,_daaff :=_bfa ._agf [_afbb .String ()];if !_daaff {_ccd ,_gafe ,_acf ,_gcfe :=_bfa .extractPageText (string (_bfef ),_cgeg ,_gfge ,_bgf +1,true );if _gcfe !=nil {_ag .Log .Debug ("\u0045\u0052R\u004f\u0052\u0020\u0065x\u0074\u0072a\u0063\u0074\u0069\u006e\u0067\u0020\u0061\u006en\u006f\u0074\u0061\u0074\u0069\u006f\u006e\u0020\u0074\u0065\u0078\u0074s\u003a\u0020\u0025\u0076",_gcfe );
continue ;};_bca =textResult {*_ccd ,_gafe ,_acf };_bfa ._agf [_afbb .String ()]=_bca ;};_fbc ._fecaa =append (_fbc ._fecaa ,_bca ._eae ._fecaa ...);_fbc ._gggf =append (_fbc ._gggf ,_bca ._eae ._gggf ...);_fbc ._afbg =append (_fbc ._afbg ,_bca ._eae ._afbg ...);
_abg ._cfg +=_bca ._cec ;_abg ._dacb +=_bca ._gabe ;};};return _fbc ,_abg ._cfg ,_abg ._dacb ,_ced ;};func (_eafg rectRuling )checkWidth (_eaccd ,_cafg float64 )(float64 ,bool ){_dcfce :=_cafg -_eaccd ;_gdfg :=_dcfce <=_cggd ;return _dcfce ,_gdfg ;};func _egbgea (_dffg *textLine ,_dgc []*textLine ,_ffga []float64 )float64 {var _gcba float64 =-1;
for _ ,_dabg :=range _dgc {if _dabg ._addd > _dffg ._addd {if _ea .Round (_dabg .Llx )>=_ea .Round (_dffg .Llx ){_gcba =_dabg ._addd ;}else {break ;};};};return _gcba ;};func _fcgc (_gagg *list )[]*textLine {for _ ,_bgdf :=range _gagg ._fbef {switch _bgdf ._fdgc {case "\u004c\u0042\u006fd\u0079":if len (_bgdf ._fged )!=0{return _bgdf ._fged ;
};return _fcgc (_bgdf );case "\u0053\u0070\u0061\u006e":return _bgdf ._fged ;case "I\u006e\u006c\u0069\u006e\u0065\u0053\u0068\u0061\u0070\u0065":return _bgdf ._fged ;};};return nil ;};func (_fgef paraList )computeEBBoxes (){if _cffe {_ag .Log .Info ("\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a");
};for _ ,_gccb :=range _fgef {_gccb ._gbgbb =_gccb .PdfRectangle ;};_eggc :=_fgef .yNeighbours (0);for _eggf ,_gbgdd :=range _fgef {_feaac :=_gbgdd ._gbgbb ;_dfbec ,_cgfbb :=-1.0e9,+1.0e9;for _ ,_dcbf :=range _eggc [_gbgdd ]{_aaec :=_fgef [_dcbf ]._gbgbb ;
if _aaec .Urx < _feaac .Llx {_dfbec =_ea .Max (_dfbec ,_aaec .Urx );}else if _feaac .Urx < _aaec .Llx {_cgfbb =_ea .Min (_cgfbb ,_aaec .Llx );};};for _gcga ,_ddag :=range _fgef {_acgbf :=_ddag ._gbgbb ;if _eggf ==_gcga ||_acgbf .Ury > _feaac .Lly {continue ;
};if _dfbec <=_acgbf .Llx &&_acgbf .Llx < _feaac .Llx {_feaac .Llx =_acgbf .Llx ;}else if _acgbf .Urx <=_cgfbb &&_feaac .Urx < _acgbf .Urx {_feaac .Urx =_acgbf .Urx ;};};if _cffe {_efc .Printf ("\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a",_eggf ,_gbgdd ._gbgbb ,_feaac ,_efcca (_gbgdd .text (),50));
};_gbgdd ._gbgbb =_feaac ;};if _adfb {for _ ,_ccdc :=range _fgef {_ccdc .PdfRectangle =_ccdc ._gbgbb ;};};};
2024-03-27 22:34:33 +00:00
2024-05-29 17:04:37 +00:00
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
func (_gff *Extractor )ExtractTextWithStats ()(_fdd string ,_ffe int ,_bbe int ,_efd error ){_gde ,_ffe ,_bbe ,_efd :=_gff .ExtractPageText ();if _efd !=nil {return "",_ffe ,_bbe ,_efd ;};return _gde .Text (),_ffe ,_bbe ,nil ;};func (_acaa *structTreeRoot )buildList (_bgga map[int ][]*textLine ,_bfdb _gf .PdfObject )[]*list {if _acaa ==nil {_ag .Log .Debug ("\u0062\u0075\u0069\u006c\u0064\u004c\u0069\u0073\u0074\u003a\u0020t\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0069\u0073 \u006e\u0069\u006c");
return nil ;};var _cbc *structElement ;_eaadd :=[]structElement {};if len (_acaa ._cfbfg )==1{_gceg :=_acaa ._cfbfg [0]._dccda ;if _gceg =="\u0044\u006f\u0063\u0075\u006d\u0065\u006e\u0074"||_gceg =="\u0053\u0065\u0063\u0074"||_gceg =="\u0050\u0061\u0072\u0074"||_gceg =="\u0044\u0069\u0076"||_gceg =="\u0041\u0072\u0074"{_cbc =&_acaa ._cfbfg [0];
};}else {_cbc =&structElement {_befc :_acaa ._cfbfg ,_dccda :_acaa ._gfbe };};if _cbc ==nil {_ag .Log .Debug ("\u0062\u0075\u0069\u006cd\u004c\u0069\u0073\u0074\u003a\u0020\u0074\u006f\u0070\u0045l\u0065m\u0065\u006e\u0074\u0020\u0069\u0073\u0020n\u0069\u006c");
return nil ;};for _ ,_afge :=range _cbc ._befc {if _afge ._dccda =="\u004c"{_eaadd =append (_eaadd ,_afge );}else if _afge ._dccda =="\u0054\u0061\u0062l\u0065"{_egeee :=_dbddc (_afge );_eaadd =append (_eaadd ,_egeee ...);};};_aebd :=_eebe (_eaadd ,_bgga ,_bfdb );
var _adaf []*list ;for _ ,_adfba :=range _aebd {_abbbc :=_afc (_adfba );_adaf =append (_adaf ,_abbbc ...);};return _adaf ;};
2024-04-16 11:40:43 +00:00
2024-05-29 17:04:37 +00:00
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
type RenderMode int ;func (_aegc *shapesState )stroke (_eabc *[]pathSection ){_eabf :=pathSection {_bgbeg :_aegc ._baca ,Color :_aegc ._cegf .getStrokeColor ()};*_eabc =append (*_eabc ,_eabf );if _gdeb {_efc .Printf ("\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a",len (*_eabc ),_aegc ,_aegc ._cegf .getStrokeColor (),_eabf .bbox ());
if _edebg {for _fagf ,_cfgeg :=range _aegc ._baca {_efc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_fagf ,_cfgeg );if _fagf ==10{break ;};};};};};func (_ddebg *wordBag )maxDepth ()float64 {return _ddebg ._ecba -_ddebg .Lly };func _edeg (_ebcb string )bool {if _bb .RuneCountInString (_ebcb )< _eade {return false ;
};_aceb ,_aagfa :=_bb .DecodeLastRuneInString (_ebcb );if _aagfa <=0||!_fg .Is (_fg .Hyphen ,_aceb ){return false ;};_aceb ,_aagfa =_bb .DecodeLastRuneInString (_ebcb [:len (_ebcb )-_aagfa ]);return _aagfa > 0&&!_fg .IsSpace (_aceb );};func (_gaad *textTable )emptyCompositeColumn (_cdde int )bool {for _cacb :=0;
_cacb < _gaad ._cegga ;_cacb ++{if _gbfab ,_gccaa :=_gaad ._becfc [_cdgd (_cdde ,_cacb )];_gccaa {if len (_gbfab .paraList )> 0{return false ;};};};return true ;};func (_fafa *wordBag )firstReadingIndex (_baf int )int {_bdbf :=_fafa .firstWord (_baf )._abcc ;
_gaee :=float64 (_baf +1)*_cdcb ;_adac :=_gaee +_fcbe *_bdbf ;_dcaf :=_baf ;for _ ,_deccc :=range _fafa .depthBand (_gaee ,_adac ){if _fdbb (_fafa .firstWord (_deccc ),_fafa .firstWord (_dcaf ))< 0{_dcaf =_deccc ;};};return _dcaf ;};type gridTile struct{_af .PdfRectangle ;
_ffdf ,_gceeb ,_dbafa ,_gdcbg bool ;};func (_fgag *textObject )setFont (_fefa string ,_faeb float64 )error {if _fgag ==nil {return nil ;};_fgag ._ecff ._gbbgg =_faeb ;_bdad ,_edad :=_fgag .getFont (_fefa );if _edad !=nil {return _edad ;};_fgag ._ecff ._fgfgb =_bdad ;
return nil ;};type structTreeRoot struct{_cfbfg []structElement ;_gfbe string ;};
2024-04-16 11:40:43 +00:00
2024-05-29 17:04:37 +00:00
// ExtractFonts returns all font information from the page extractor, including
// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more.
//
// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document.
// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries.
//
// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range.
func (_eba *Extractor )ExtractFonts (previousPageFonts *PageFonts )(*PageFonts ,error ){_fdg :=PageFonts {};_aed :=_fdg .extractPageResourcesToFont (_eba ._cf );if _aed !=nil {return nil ,_aed ;};if previousPageFonts !=nil {for _ ,_aea :=range previousPageFonts .Fonts {if !_dgg (_fdg .Fonts ,_aea .FontName ){_fdg .Fonts =append (_fdg .Fonts ,_aea );
};};};return &PageFonts {Fonts :_fdg .Fonts },nil ;};func _deadf (_cbdaf int ,_cbfda map[int ][]float64 )([]int ,int ){_bdcg :=make ([]int ,_cbdaf );_egcbd :=0;for _ffacd :=0;_ffacd < _cbdaf ;_ffacd ++{_bdcg [_ffacd ]=_egcbd ;_egcbd +=len (_cbfda [_ffacd ])+1;
};return _bdcg ,_egcbd ;};func _aacg (_bdbbd []rulingList )(rulingList ,rulingList ){var _gfgec rulingList ;for _ ,_cfcgg :=range _bdbbd {_gfgec =append (_gfgec ,_cfcgg ...);};return _gfgec .vertsHorzs ();};func (_cagc rulingList )asTiling ()gridTiling {if _fgge {_ag .Log .Info ("r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_cagc ));
};for _afebd ,_gfef :=range _cagc [1:]{_bedf :=_cagc [_afebd ];if _bedf .alignsPrimary (_gfef )&&_bedf .alignsSec (_gfef ){_ag .Log .Error ("a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073",_gfef ,_bedf );
};};_cagc .sortStrict ();_cagc .log ("\u0073n\u0061\u0070\u0070\u0065\u0064");_efffa ,_dgccc :=_cagc .vertsHorzs ();_ecfa :=_efffa .primaries ();_daffe :=_dgccc .primaries ();_egaa :=len (_ecfa )-1;_dfcee :=len (_daffe )-1;if _egaa ==0||_dfcee ==0{return gridTiling {};
};_bgcdb :=_af .PdfRectangle {Llx :_ecfa [0],Urx :_ecfa [_egaa ],Lly :_daffe [0],Ury :_daffe [_dfcee ]};if _fgge {_ag .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064",len (_efffa ));
for _cgac ,_geaf :=range _efffa {_efc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cgac ,_geaf );};_ag .Log .Info ("\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064",len (_dgccc ));
for _faga ,_bfbb :=range _dgccc {_efc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_faga ,_bfbb );};_ag .Log .Info ("\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f",_egaa ,_dfcee ,_ecfa ,_daffe );
};_caaff :=make ([]gridTile ,_egaa *_dfcee );for _acbc :=_dfcee -1;_acbc >=0;_acbc --{_bcfg :=_daffe [_acbc ];_feaaf :=_daffe [_acbc +1];for _ebaf :=0;_ebaf < _egaa ;_ebaf ++{_adfg :=_ecfa [_ebaf ];_dfbae :=_ecfa [_ebaf +1];_aefe :=_efffa .findPrimSec (_adfg ,_bcfg );
_efdgg :=_efffa .findPrimSec (_dfbae ,_bcfg );_cffb :=_dgccc .findPrimSec (_bcfg ,_adfg );_gbbab :=_dgccc .findPrimSec (_feaaf ,_adfg );_aeac :=_af .PdfRectangle {Llx :_adfg ,Urx :_dfbae ,Lly :_bcfg ,Ury :_feaaf };_fadg :=_ebaad (_aeac ,_aefe ,_efdgg ,_cffb ,_gbbab );
_caaff [_acbc *_egaa +_ebaf ]=_fadg ;if _fgge {_efc .Printf ("\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_ebaf ,_acbc ,_fadg .String (),_fadg .Width (),_fadg .Height ());
};};};if _fgge {_ag .Log .Info ("r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_bgcdb );
};_fdfbce :=make ([]map[float64 ]gridTile ,_dfcee );for _fgea :=_dfcee -1;_fgea >=0;_fgea --{if _fgge {_efc .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_fgea );};_fdfbce [_fgea ]=make (map[float64 ]gridTile ,_egaa );for _acgeb :=0;_acgeb < _egaa ;
_acgeb ++{_bcfaa :=_caaff [_fgea *_egaa +_acgeb ];if _fgge {_efc .Printf ("\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_acgeb ,_bcfaa );};if !_bcfaa ._gceeb {continue ;};_fcba :=_acgeb ;for _bebgg :=_acgeb +1;!_bcfaa ._gdcbg &&_bebgg < _egaa ;
_bebgg ++{_aaaf :=_caaff [_fgea *_egaa +_bebgg ];_bcfaa .Urx =_aaaf .Urx ;_bcfaa ._ffdf =_bcfaa ._ffdf ||_aaaf ._ffdf ;_bcfaa ._dbafa =_bcfaa ._dbafa ||_aaaf ._dbafa ;_bcfaa ._gdcbg =_aaaf ._gdcbg ;if _fgge {_efc .Printf ("\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a",_bebgg ,_aaaf ,_bcfaa );
};_fcba =_bebgg ;};if _fgge {_efc .Printf (" \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n",_acgeb ,_fcba ,_bcfaa );};_acgeb =_fcba ;_fdfbce [_fgea ][_bcfaa .Llx ]=_bcfaa ;};};_aged :=make (map[float64 ]map[float64 ]gridTile ,_dfcee );
_ebgd :=make (map[float64 ]map[float64 ]struct{},_dfcee );for _edde :=_dfcee -1;_edde >=0;_edde --{_ggcba :=_caaff [_edde *_egaa ].Lly ;_aged [_ggcba ]=make (map[float64 ]gridTile ,_egaa );_ebgd [_ggcba ]=make (map[float64 ]struct{},_egaa );};if _fgge {_ag .Log .Info ("\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066",_bgcdb );
};for _ecfaf :=_dfcee -1;_ecfaf >=0;_ecfaf --{_eefef :=_caaff [_ecfaf *_egaa ].Lly ;_fdaa :=_fdfbce [_ecfaf ];if _fgge {_efc .Printf ("\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a",_ecfaf );};for _ ,_cccdg :=range _aecg (_fdaa ){if _ ,_cbfe :=_ebgd [_eefef ][_cccdg ];
_cbfe {continue ;};_gbfed :=_fdaa [_cccdg ];if _fgge {_efc .Printf (" \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_gbfed .String ());};for _acbac :=_ecfaf -1;_acbac >=0;_acbac --{if _gbfed ._dbafa {break ;};_dfdec :=_fdfbce [_acbac ];
_bbccc ,_gdab :=_dfdec [_cccdg ];if !_gdab {break ;};if _bbccc .Urx !=_gbfed .Urx {break ;};_gbfed ._dbafa =_bbccc ._dbafa ;_gbfed .Lly =_bbccc .Lly ;if _fgge {_efc .Printf ("\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a",_bbccc .String (),_gbfed .String ());
};_ebgd [_bbccc .Lly ][_bbccc .Llx ]=struct{}{};};if _ecfaf ==0{_gbfed ._dbafa =true ;};if _gbfed .complete (){_aged [_eefef ][_cccdg ]=_gbfed ;};};};_geeg :=gridTiling {PdfRectangle :_bgcdb ,_beefa :_eedcg (_aged ),_eeba :_dgca (_aged ),_agba :_aged };
_geeg .log ("\u0043r\u0065\u0061\u0074\u0065\u0064");return _geeg ;};func (_feaab rulingList )augmentGrid ()(rulingList ,rulingList ){_febd ,_fgdb :=_feaab .vertsHorzs ();if len (_febd )==0||len (_fgdb )==0{return _febd ,_fgdb ;};_gfcd ,_bbea :=_febd ,_fgdb ;
_gegf :=_febd .bbox ();_ggae :=_fgdb .bbox ();if _gdeb {_ag .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066",_gegf );_ag .Log .Info ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066",_ggae );
};var _abgbg ,_gbfb ,_cfdad ,_bagd *ruling ;if _ggae .Llx < _gegf .Llx -_bcae {_abgbg =&ruling {_agff :_gcfgb ,_ecfb :_gecdf ,_aeef :_ggae .Llx ,_ggdb :_gegf .Lly ,_gbca :_gegf .Ury };_febd =append (rulingList {_abgbg },_febd ...);};if _ggae .Urx > _gegf .Urx +_bcae {_gbfb =&ruling {_agff :_gcfgb ,_ecfb :_gecdf ,_aeef :_ggae .Urx ,_ggdb :_gegf .Lly ,_gbca :_gegf .Ury };
_febd =append (_febd ,_gbfb );};if _gegf .Lly < _ggae .Lly -_bcae {_cfdad =&ruling {_agff :_gcfgb ,_ecfb :_eeg ,_aeef :_gegf .Lly ,_ggdb :_ggae .Llx ,_gbca :_ggae .Urx };_fgdb =append (rulingList {_cfdad },_fgdb ...);};if _gegf .Ury > _ggae .Ury +_bcae {_bagd =&ruling {_agff :_gcfgb ,_ecfb :_eeg ,_aeef :_gegf .Ury ,_ggdb :_ggae .Llx ,_gbca :_ggae .Urx };
_fgdb =append (_fgdb ,_bagd );};if len (_febd )+len (_fgdb )==len (_feaab ){return _gfcd ,_bbea ;};_fbcc :=append (_febd ,_fgdb ...);_feaab .log ("u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064");_fbcc .log ("\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d");
return _febd ,_fgdb ;};func (_eddc gridTiling )log (_gfdf string ){if !_fgge {return ;};_ag .Log .Info ("\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071",len (_eddc ._beefa ),len (_eddc ._eeba ),_gfdf );_efc .Printf ("\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a",_eddc ._beefa );
_efc .Printf ("\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a",_eddc ._eeba );for _facb ,_gcda :=range _eddc ._eeba {_bfec ,_bgdgb :=_eddc ._agba [_gcda ];if !_bgdgb {continue ;};_efc .Printf ("%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a",_facb ,_gcda );
for _dcbfg ,_ggdc :=range _eddc ._beefa {_baacb ,_dfgf :=_bfec [_ggdc ];if !_dfgf {continue ;};_efc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_dcbfg ,_baacb .String ());};};};func (_aegf *shapesState )establishSubpath ()*subpath {_cfae ,_bgfc :=_aegf .lastpointEstablished ();
if !_bgfc {_aegf ._baca =append (_aegf ._baca ,_ggda (_cfae ));};if len (_aegf ._baca )==0{return nil ;};_aegf ._gbee =false ;return _aegf ._baca [len (_aegf ._baca )-1];};func (_dedcc rulingList )splitSec ()[]rulingList {_e .Slice (_dedcc ,func (_adga ,_fgfd int )bool {_daafb ,_cffdf :=_dedcc [_adga ],_dedcc [_fgfd ];
if _daafb ._ggdb !=_cffdf ._ggdb {return _daafb ._ggdb < _cffdf ._ggdb ;};return _daafb ._gbca < _cffdf ._gbca ;});_ddaf :=make (map[*ruling ]struct{},len (_dedcc ));_gafdb :=func (_dgga *ruling )rulingList {_gdcge :=rulingList {_dgga };_ddaf [_dgga ]=struct{}{};
for _ ,_bdfga :=range _dedcc {if _ ,_cbafb :=_ddaf [_bdfga ];_cbafb {continue ;};for _ ,_edgfg :=range _gdcge {if _bdfga .alignsSec (_edgfg ){_gdcge =append (_gdcge ,_bdfga );_ddaf [_bdfga ]=struct{}{};break ;};};};return _gdcge ;};_aadc :=[]rulingList {_gafdb (_dedcc [0])};
for _ ,_ccgfg :=range _dedcc [1:]{if _ ,_dggd :=_ddaf [_ccgfg ];_dggd {continue ;};_aadc =append (_aadc ,_gafdb (_ccgfg ));};return _aadc ;};func (_acbb *textPara )writeText (_decgc _fc .Writer ){if _acbb ._befe ==nil {_acbb .writeCellText (_decgc );return ;
};for _gaeedg :=0;_gaeedg < _acbb ._befe ._cegga ;_gaeedg ++{for _ccbb :=0;_ccbb < _acbb ._befe ._aageb ;_ccbb ++{_ecbdf :=_acbb ._befe .get (_ccbb ,_gaeedg );if _ecbdf ==nil {_decgc .Write ([]byte ("\u0009"));}else {_ecbdf .writeCellText (_decgc );};_decgc .Write ([]byte ("\u0020"));
};if _gaeedg < _acbb ._befe ._cegga -1{_decgc .Write ([]byte ("\u000a"));};};};func (_daffdg paraList )topoOrder ()[]int {if _efbd {_ag .Log .Info ("\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a");};_egbga :=len (_daffdg );_bbbc :=make ([]bool ,_egbga );
_bbadf :=make ([]int ,0,_egbga );_badc :=_daffdg .llyOrdering ();var _adgf func (_abgccg int );_adgf =func (_bcge int ){_bbbc [_bcge ]=true ;for _afba :=0;_afba < _egbga ;_afba ++{if !_bbbc [_afba ]{if _daffdg .readBefore (_badc ,_bcge ,_afba ){_adgf (_afba );
};};};_bbadf =append (_bbadf ,_bcge );};for _adgg :=0;_adgg < _egbga ;_adgg ++{if !_bbbc [_adgg ]{_adgf (_adgg );};};return _deeg (_bbadf );};func _gdcgf (_becf ,_fcbcf float64 )bool {return _becf /_ea .Max (_gcffb ,_fcbcf )< _ebffe };
2024-04-16 11:40:43 +00:00
2024-05-29 17:04:37 +00:00
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
func (_cdb *TextMarkArray )RangeOffset (start ,end int )(*TextMarkArray ,error ){if _cdb ==nil {return nil ,_b .New ("\u006da\u003d\u003d\u006e\u0069\u006c");};if end < start {return nil ,_efc .Errorf ("\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020",start ,end );
};_cfbf :=len (_cdb ._aec );if _cfbf ==0{return _cdb ,nil ;};if start < _cdb ._aec [0].Offset {start =_cdb ._aec [0].Offset ;};if end > _cdb ._aec [_cfbf -1].Offset +1{end =_cdb ._aec [_cfbf -1].Offset +1;};_dcgd :=_e .Search (_cfbf ,func (_cdca int )bool {return _cdb ._aec [_cdca ].Offset +len (_cdb ._aec [_cdca ].Text )-1>=start });
if !(0<=_dcgd &&_dcgd < _cfbf ){_ggcb :=_efc .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076",start ,_dcgd ,_cfbf ,_cdb ._aec [0],_cdb ._aec [_cfbf -1]);
return nil ,_ggcb ;};_abce :=_e .Search (_cfbf ,func (_fcb int )bool {return _cdb ._aec [_fcb ].Offset > end -1});if !(0<=_abce &&_abce < _cfbf ){_gbgb :=_efc .Errorf ("\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076",end ,_abce ,_cfbf ,_cdb ._aec [0],_cdb ._aec [_cfbf -1]);
return nil ,_gbgb ;};if _abce <=_dcgd {return nil ,_efc .Errorf ("\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064",start ,end ,_dcgd ,_abce );
};return &TextMarkArray {_aec :_cdb ._aec [_dcgd :_abce ]},nil ;};
// Tables returns the tables extracted from the page.
func (_fag PageText )Tables ()[]TextTable {if _dedc {_ag .Log .Info ("\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064",len (_fag ._gacf ));};return _fag ._gacf ;};
// New returns an Extractor instance for extracting content from the input PDF page.
func New (page *_af .PdfPage )(*Extractor ,error ){return NewWithOptions (page ,nil )};func (_bbc *textObject )setTextMatrix (_fdbg []float64 ){if len (_fdbg )!=6{_ag .Log .Debug ("\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029",len (_fdbg ));
return ;};_dccf ,_gdd ,_feba ,_edc ,_ffaf ,_gfd :=_fdbg [0],_fdbg [1],_fdbg [2],_fdbg [3],_fdbg [4],_fdbg [5];_bbc ._dbc =_aae .NewMatrix (_dccf ,_gdd ,_feba ,_edc ,_ffaf ,_gfd );_bbc ._ebc =_bbc ._dbc ;};
// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents (contents string ,resources *_af .PdfPageResources )(*Extractor ,error ){const _gb ="\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s";_ee :=&Extractor {_fgb :contents ,_cf :resources ,_fgf :map[string ]fontEntry {},_ec :map[string ]textResult {}};
_d .TrackUse (_gb );return _ee ,nil ;};
// String returns a human readable description of `ss`.
func (_dbdf *shapesState )String ()string {return _efc .Sprintf ("\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d",len (_dbdf ._baca ),_dbdf ._gbee );};func (_gee *PageText )computeViews (){_abab :=_gee .getParagraphs ();
_bff :=new (_ae .Buffer );_abab .writeText (_bff );_gee ._ffb =_bff .String ();_gee ._gdbf =_abab .toTextMarks ();_gee ._gacf =_abab .tables ();if _dedc {_ag .Log .Info ("\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064",len (_gee ._gacf ));
};};func (_acda *textPara )taken ()bool {return _acda ==nil ||_acda ._fcdcf };func _gccad (_ccfa ,_eabg _aae .Point ,_accg _fe .Color )(*ruling ,bool ){_eegb :=lineRuling {_bbee :_ccfa ,_efge :_eabg ,_faab :_fdag (_ccfa ,_eabg ),Color :_accg };if _eegb ._faab ==_ceag {return nil ,false ;
};return _eegb .asRuling ();};func (_gdad compositeCell )String ()string {_abbc :="";if len (_gdad .paraList )> 0{_abbc =_efcca (_gdad .paraList .merge ().text (),50);};return _efc .Sprintf ("\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071",_gdad .PdfRectangle ,len (_gdad .paraList ),_abbc );
};
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
func (_fgfe *PageText )ApplyArea (bbox _af .PdfRectangle ){_aedc :=make ([]*textMark ,0,len (_fgfe ._fecaa ));for _ ,_bgab :=range _fgfe ._fecaa {if _bdcb (_bgab .bbox (),bbox ){_aedc =append (_aedc ,_bgab );};};var _dda paraList ;_cbfd :=len (_aedc );
for _gbff :=0;_gbff < 360&&_cbfd > 0;_gbff +=90{_caag :=make ([]*textMark ,0,len (_aedc )-_cbfd );for _ ,_dec :=range _aedc {if _dec ._ddfdb ==_gbff {_caag =append (_caag ,_dec );};};if len (_caag )> 0{_dbgg :=_bdfg (_caag ,_fgfe ._cdf ,nil ,nil ,_fgfe ._gbg ._dbed );
_dda =append (_dda ,_dbgg ...);_cbfd -=len (_caag );};};_dbea :=new (_ae .Buffer );_dda .writeText (_dbea );_fgfe ._ffb =_dbea .String ();_fgfe ._gdbf =_dda .toTextMarks ();_fgfe ._gacf =_dda .tables ();};func (_agdae *textWord )absorb (_baaac *textWord ){_agdae .PdfRectangle =_cfab (_agdae .PdfRectangle ,_baaac .PdfRectangle );
_agdae ._ffcd =append (_agdae ._ffcd ,_baaac ._ffcd ...);};func (_efcg paraList )findTableGrid (_afbbf gridTiling )(*textTable ,map[*textPara ]struct{}){_ecefd :=len (_afbbf ._beefa );_efac :=len (_afbbf ._eeba );_bffge :=textTable {_caagg :true ,_aageb :_ecefd ,_cegga :_efac ,_dgcf :make (map[uint64 ]*textPara ,_ecefd *_efac ),_becfc :make (map[uint64 ]compositeCell ,_ecefd *_efac )};
_bffge .PdfRectangle =_afbbf .PdfRectangle ;_degf :=make (map[*textPara ]struct{});_abafc :=int ((1.0-_ccfc )*float64 (_ecefd *_efac ));_aeec :=0;if _fgge {_ag .Log .Info ("\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064",_ecefd ,_efac );
};for _fdbga ,_gabf :=range _afbbf ._eeba {_bffc ,_gbaec :=_afbbf ._agba [_gabf ];if !_gbaec {continue ;};for _fcbg ,_gdfgd :=range _afbbf ._beefa {_ccbbf ,_efffc :=_bffc [_gdfgd ];if !_efffc {continue ;};_fadbd :=_efcg .inTile (_ccbbf );if len (_fadbd )==0{_aeec ++;
if _aeec > _abafc {if _fgge {_ag .Log .Info ("\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064",_aeec );};return nil ,nil ;};}else {_bffge .putComposite (_fcbg ,_fdbga ,_fadbd ,_ccbbf .PdfRectangle );for _ ,_eedcb :=range _fadbd {_degf [_eedcb ]=struct{}{};
};};};};_dade :=0;for _fddec :=0;_fddec < _ecefd ;_fddec ++{_aggcg :=_bffge .get (_fddec ,0);if _aggcg ==nil ||!_aggcg ._bdgc {_dade ++;};};if _dade ==0{if _fgge {_ag .Log .Info ("\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030");};
return nil ,nil ;};_dacdg :=_bffge .reduceTiling (_afbbf ,_bfad );_dacdg =_dacdg .subdivide ();return _dacdg ,_degf ;};func _dcbd (_cdae ,_gecb ,_fafeb ,_cede *textPara )*textTable {_cegcd :=&textTable {_aageb :2,_cegga :2,_dgcf :make (map[uint64 ]*textPara ,4)};
_cegcd .put (0,0,_cdae );_cegcd .put (1,0,_gecb );_cegcd .put (0,1,_fafeb );_cegcd .put (1,1,_cede );return _cegcd ;};func _ebfg (_fead ,_beced int )int {if _fead > _beced {return _fead ;};return _beced ;};var _gded =TextMark {Text :"\u005b\u0058\u005d",Original :"\u0020",Meta :true ,FillColor :_fe .White ,StrokeColor :_fe .White };
func (_bgaeg *textTable )newTablePara ()*textPara {_ccdea :=_bgaeg .computeBbox ();_egabd :=&textPara {PdfRectangle :_ccdea ,_gbgbb :_ccdea ,_befe :_bgaeg };if _dedc {_ag .Log .Info ("\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073",_egabd );
};return _egabd ;};
// PageText represents the layout of text on a device page.
type PageText struct{_fecaa []*textMark ;_ffb string ;_gdbf []TextMark ;_gacf []TextTable ;_cdf _af .PdfRectangle ;_gggf []pathSection ;_afbg []pathSection ;_cgad *_gf .PdfObject ;_dfc _gf .PdfObject ;_cfa *_aa .ContentStreamOperations ;_gbg PageTextOptions ;
};const (_cffe =false ;_aebe =false ;_egbg =false ;_gfgca =false ;_cece =false ;_bgbd =false ;_cfgf =false ;_efbd =false ;_fbeb =false ;_fdfb =_fbeb &&true ;_aedg =_fdfb &&false ;_gbgbd =_fbeb &&true ;_dedc =false ;_gddf =_dedc &&false ;_edge =_dedc &&true ;
_gdeb =false ;_edebg =_gdeb &&false ;_gcbf =_gdeb &&false ;_fgge =_gdeb &&true ;_fccf =_gdeb &&false ;_bgeg =_gdeb &&false ;);type compositeCell struct{_af .PdfRectangle ;paraList ;};func (_aefa rulingList )toGrids ()[]rulingList {if _gdeb {_ag .Log .Info ("t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073",_aefa );
};_egcb :=_aefa .intersections ();if _gdeb {_ag .Log .Info ("\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020",len (_aefa ),len (_egcb ));
for _ ,_fafb :=range _adca (_egcb ){_efc .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_fafb ,_egcb [_fafb ]);};};_cbee :=make (map[int ]intSet ,len (_aefa ));for _fdgfe :=range _aefa {_cfad :=_aefa .connections (_egcb ,_fdgfe );if len (_cfad )> 0{_cbee [_fdgfe ]=_cfad ;
};};if _gdeb {_ag .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064",len (_cbee ));for _ ,_ddccd :=range _adca (_cbee ){_efc .Printf ("\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n",_ddccd ,_cbee [_ddccd ]);
};};_ccgc :=_bebec (len (_aefa ),func (_adbc ,_bgffc int )bool {_gggd ,_bfag :=len (_cbee [_adbc ]),len (_cbee [_bgffc ]);if _gggd !=_bfag {return _gggd > _bfag ;};return _aefa .comp (_adbc ,_bgffc );});if _gdeb {_ag .Log .Info ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076",_ccgc );
};_bbffb :=[][]int {{_ccgc [0]}};_fbdb :for _ ,_gbcag :=range _ccgc [1:]{for _edada ,_bdeg :=range _bbffb {for _ ,_acge :=range _bdeg {if _cbee [_acge ].has (_gbcag ){_bbffb [_edada ]=append (_bdeg ,_gbcag );continue _fbdb ;};};};_bbffb =append (_bbffb ,[]int {_gbcag });
};if _gdeb {_ag .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076",_bbffb );};_e .SliceStable (_bbffb ,func (_gabgg ,_eecg int )bool {return len (_bbffb [_gabgg ])> len (_bbffb [_eecg ])});for _ ,_bgfg :=range _bbffb {_e .Slice (_bgfg ,func (_gecc ,_dfec int )bool {return _aefa .comp (_bgfg [_gecc ],_bgfg [_dfec ])});
};_fcabe :=make ([]rulingList ,len (_bbffb ));for _cecec ,_ebce :=range _bbffb {_cedccg :=make (rulingList ,len (_ebce ));for _acbbb ,_ffgdf :=range _ebce {_cedccg [_acbbb ]=_aefa [_ffgdf ];};_fcabe [_cecec ]=_cedccg ;};if _gdeb {_ag .Log .Info ("\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076",_fcabe );
};var _dfcf []rulingList ;for _ ,_fcfbd :=range _fcabe {if _cbgf ,_eggcb :=_fcfbd .isActualGrid ();_eggcb {_fcfbd =_cbgf ;_fcfbd =_fcfbd .snapToGroups ();_dfcf =append (_dfcf ,_fcfbd );};};if _gdeb {_acgbd ("t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073",_dfcf );
_ag .Log .Info ("\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064",len (_fcabe ),len (_dfcf ));};return _dfcf ;};func (_degd *textPara )text ()string {_effce :=new (_ae .Buffer );
_degd .writeText (_effce );return _effce .String ();};func (_abeb rulingList )merge ()*ruling {_gffbb :=_abeb [0]._aeef ;_beac :=_abeb [0]._ggdb ;_decf :=_abeb [0]._gbca ;for _ ,_fbgd :=range _abeb [1:]{_gffbb +=_fbgd ._aeef ;if _fbgd ._ggdb < _beac {_beac =_fbgd ._ggdb ;
};if _fbgd ._gbca > _decf {_decf =_fbgd ._gbca ;};};_ggfdc :=&ruling {_ecfb :_abeb [0]._ecfb ,_agff :_abeb [0]._agff ,Color :_abeb [0].Color ,_aeef :_gffbb /float64 (len (_abeb )),_ggdb :_beac ,_gbca :_decf };if _gcbf {_ag .Log .Info ("\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073",len (_abeb ),_ggfdc );
for _cgef ,_gfacc :=range _abeb {_efc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_cgef ,_gfacc );};};return _ggfdc ;};func (_gbae *textPara )fontsize ()float64 {return _gbae ._aage [0]._afeb };func (_fefd *stateStack )pop ()*textState {if _fefd .empty (){return nil ;
};_edce :=*(*_fefd )[len (*_fefd )-1];*_fefd =(*_fefd )[:len (*_fefd )-1];return &_edce ;};func _bgfgg (_gbaa ,_baaa _aae .Point )bool {return _gbaa .X ==_baaa .X &&_gbaa .Y ==_baaa .Y };func (_cdbg *subpath )last ()_aae .Point {return _cdbg ._aaebg [len (_cdbg ._aaebg )-1]};
func (_gccc paraList )list ()[]*list {var _acfb []*textLine ;var _gaab []*textLine ;for _ ,_daffd :=range _gccc {_dgeb :=_daffd .getListLines ();_acfb =append (_acfb ,_dgeb ...);_gaab =append (_gaab ,_daffd ._aage ...);};_dbcg :=_dfga (_acfb );_edef :=_caegg (_gaab ,_dbcg );
return _edef ;};func (_ebfa *imageExtractContext )extractXObjectImage (_ebg *_gf .PdfObjectName ,_aeb _aa .GraphicsState ,_aeg *_af .PdfPageResources )error {_cega ,_ :=_aeg .GetXObjectByName (*_ebg );if _cega ==nil {return nil ;};_fga ,_bcc :=_ebfa ._ebf [_cega ];
if !_bcc {_fgc ,_bdf :=_aeg .GetXObjectImageByName (*_ebg );if _bdf !=nil {return _bdf ;};if _fgc ==nil {return nil ;};_bbgg ,_bdf :=_fgc .ToImage ();if _bdf !=nil {return _bdf ;};var _ddc _ef .Image ;if _fgc .Mask !=nil {if _ddc ,_bdf =_edfff (_fgc .Mask ,_fe .Opaque );
_bdf !=nil {_ag .Log .Debug ("\u0057\u0041\u0052\u004e\u003a \u0063\u006f\u0075\u006c\u0064 \u006eo\u0074\u0020\u0067\u0065\u0074\u0020\u0065\u0078\u0070\u006c\u0069\u0063\u0069\u0074\u0020\u0069\u006d\u0061\u0067e\u0020\u006d\u0061\u0073\u006b\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006d\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e");
};}else if _fgc .SMask !=nil {_ddc ,_bdf =_ebdca (_fgc .SMask ,_fe .Opaque );if _bdf !=nil {_ag .Log .Debug ("W\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0067\u0065\u0074\u0020\u0073\u006f\u0066\u0074\u0020\u0069\u006da\u0067e\u0020\u006d\u0061\u0073k\u002e\u0020O\u0075\u0074\u0070\u0075\u0074\u0020\u006d\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063\u0074\u002e");
};};if _ddc !=nil {_bef ,_eddf :=_bbgg .ToGoImage ();if _eddf !=nil {return _eddf ;};_bef =_fabfa (_bef ,_ddc );switch _fgc .ColorSpace .String (){case "\u0044\u0065\u0076\u0069\u0063\u0065\u0047\u0072\u0061\u0079","\u0049n\u0064\u0065\u0078\u0065\u0064":_bbgg ,_eddf =_af .ImageHandling .NewGrayImageFromGoImage (_bef );
if _eddf !=nil {return _eddf ;};default:_bbgg ,_eddf =_af .ImageHandling .NewImageFromGoImage (_bef );if _eddf !=nil {return _eddf ;};};};_fga =&cachedImage {_aag :_bbgg ,_gbb :_fgc .ColorSpace };_ebfa ._ebf [_cega ]=_fga ;};_dgge :=_fga ._aag ;_bda :=_fga ._gbb ;
_gdb ,_bf :=_bda .ImageToRGB (*_dgge );if _bf !=nil {return _bf ;};_ag .Log .Debug ("@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073",_aeb .CTM .String ());_gdg :=ImageMark {Image :&_gdb ,Width :_aeb .CTM .ScalingFactorX (),Height :_aeb .CTM .ScalingFactorY (),Angle :_aeb .CTM .Angle ()};
_gdg .X ,_gdg .Y =_aeb .CTM .Translation ();_ebfa ._cgg =append (_ebfa ._cgg ,_gdg );_ebfa ._fcc ++;return nil ;};
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
//
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
type TextMark struct{
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Text is the extracted text.
Text string ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// BBox is the bounding box of the text.
2024-05-29 17:04:37 +00:00
BBox _af .PdfRectangle ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Font is the font the text was drawn with.
2024-05-29 17:04:37 +00:00
Font *_af .PdfFont ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// FontSize is the font size the text was drawn with.
FontSize float64 ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2024-05-29 17:04:37 +00:00
FillColor _fe .Color ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2024-05-29 17:04:37 +00:00
StrokeColor _fe .Color ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Orientation is the text orientation
Orientation int ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// DirectObject is the underlying PdfObject (Text Object) that represents the visible texts. This is introduced to get
// a simple access to the TextObject in case editing or replacment of some text is needed. E.g during redaction.
2024-05-29 17:04:37 +00:00
DirectObject _gf .PdfObject ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// ObjString is a decoded string operand of a text-showing operator. It has the same value as `Text` attribute except
// when many glyphs are represented with the same Text Object that contains multiple length string operand in which case
// ObjString spans more than one character string that falls in different TextMark objects.
2024-05-29 17:04:37 +00:00
ObjString []string ;Tw float64 ;Th float64 ;Tc float64 ;Index int ;_bacg bool ;_efce *TextTable ;};
2024-04-16 11:40:43 +00:00
2024-05-29 17:04:37 +00:00
// ToTextMark returns the public view of `tm`.
func (_acba *textMark )ToTextMark ()TextMark {return TextMark {Text :_acba ._cgeb ,Original :_acba ._faaf ,BBox :_acba ._fbaa ,Font :_acba ._dbgfg ,FontSize :_acba ._acaf ,FillColor :_acba ._ccbff ,StrokeColor :_acba ._bcfc ,Orientation :_acba ._ddfdb ,DirectObject :_acba ._bae ,ObjString :_acba ._afcb ,Tw :_acba .Tw ,Th :_acba .Th ,Tc :_acba ._bdeb ,Index :_acba ._feg };
};
2024-04-30 12:24:05 +00:00
2024-05-29 17:04:37 +00:00
// TableInfo gets table information of the textmark `tm`.
func (_bgbe *TextMark )TableInfo ()(*TextTable ,[][]int ){if !_bgbe ._bacg {return nil ,nil ;};_ebcc :=_bgbe ._efce ;_edfa :=_ebcc .getCellInfo (*_bgbe );return _ebcc ,_edfa ;};func (_bgea *wordBag )makeRemovals ()map[int ]map[*textWord ]struct{}{_agegg :=make (map[int ]map[*textWord ]struct{},len (_bgea ._cdbc ));
for _gec :=range _bgea ._cdbc {_agegg [_gec ]=make (map[*textWord ]struct{});};return _agegg ;};type cachedImage struct{_aag *_af .Image ;_gbb _af .PdfColorspace ;};func (_gafcd paraList )lines ()[]*textLine {var _aebf []*textLine ;for _ ,_cfca :=range _gafcd {_aebf =append (_aebf ,_cfca ._aage ...);
};return _aebf ;};func (_fdedg *wordBag )arrangeText ()*textPara {_fdedg .sort ();if _deeb {_fdedg .removeDuplicates ();};var _fcef []*textLine ;for _ ,_afaf :=range _fdedg .depthIndexes (){for !_fdedg .empty (_afaf ){_acc :=_fdedg .firstReadingIndex (_afaf );
_deafb :=_fdedg .firstWord (_acc );_ccedc :=_gfgde (_fdedg ,_acc );_bbga :=_deafb ._abcc ;_deebg :=_deafb ._accb -_egeb *_bbga ;_agda :=_deafb ._accb +_egeb *_bbga ;_adge :=_geac *_bbga ;_aeeg :=_efbf *_bbga ;_ffee :for {var _fdgbf *textWord ;_edaa :=0;
for _ ,_eaec :=range _fdedg .depthBand (_deebg ,_agda ){_dbbd :=_fdedg .highestWord (_eaec ,_deebg ,_agda );if _dbbd ==nil {continue ;};_dfcc :=_egec (_dbbd ,_ccedc ._cfcb [len (_ccedc ._cfcb )-1]);if _dfcc < -_aeeg {break _ffee ;};if _dfcc > _adge {continue ;
};if _fdgbf !=nil &&_fdbb (_dbbd ,_fdgbf )>=0{continue ;};_fdgbf =_dbbd ;_edaa =_eaec ;};if _fdgbf ==nil {break ;};_ccedc .pullWord (_fdedg ,_fdgbf ,_edaa );};_ccedc .markWordBoundaries ();_fcef =append (_fcef ,_ccedc );};};if len (_fcef )==0{return nil ;
};_e .Slice (_fcef ,func (_dbf ,_dbec int )bool {return _bcbe (_fcef [_dbf ],_fcef [_dbec ])< 0});_gecd :=_adbde (_fdedg .PdfRectangle ,_fcef );if _fbeb {_ag .Log .Info ("\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073",_gecd .String ());
if _fdfb {for _bbac ,_gabgd :=range _gecd ._aage {_efc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_bbac ,_gabgd .String ());if _aedg {for _fefb ,_fafe :=range _gabgd ._cfcb {_efc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_fefb ,_fafe .String ());
for _gdbc ,_cfcba :=range _fafe ._ffcd {_efc .Printf ("\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n",_gdbc ,_cfcba .String ());};};};};};};return _gecd ;};func (_abbcc *textWord )appendMark (_agbbg *textMark ,_cgaeb _af .PdfRectangle ){_abbcc ._ffcd =append (_abbcc ._ffcd ,_agbbg );
_abbcc .PdfRectangle =_cfab (_abbcc .PdfRectangle ,_agbbg .PdfRectangle );if _agbbg ._acaf > _abbcc ._abcc {_abbcc ._abcc =_agbbg ._acaf ;};_abbcc ._accb =_cgaeb .Ury -_abbcc .PdfRectangle .Lly ;};
2024-04-30 12:24:05 +00:00
2024-05-29 17:04:37 +00:00
// PageTextOptions holds various options available in extraction process.
type PageTextOptions struct{_gcgg bool ;_dbed bool ;};func _eaae (_cegaa []*textMark ,_ccee _af .PdfRectangle )*textWord {_aeaaf :=_cegaa [0].PdfRectangle ;_gefef :=_cegaa [0]._acaf ;for _ ,_eecbb :=range _cegaa [1:]{_aeaaf =_cfab (_aeaaf ,_eecbb .PdfRectangle );
if _eecbb ._acaf > _gefef {_gefef =_eecbb ._acaf ;};};return &textWord {PdfRectangle :_aeaaf ,_ffcd :_cegaa ,_accb :_ccee .Ury -_aeaaf .Lly ,_abcc :_gefef };};func (_bgfd *stateStack )top ()*textState {if _bgfd .empty (){return nil ;};return (*_bgfd )[_bgfd .size ()-1];
};func (_afgee *textTable )getDown ()paraList {_gdge :=make (paraList ,_afgee ._aageb );for _cbdc :=0;_cbdc < _afgee ._aageb ;_cbdc ++{_baddf :=_afgee .get (_cbdc ,_afgee ._cegga -1)._cabda ;if _baddf .taken (){return nil ;};_gdge [_cbdc ]=_baddf ;};for _eaff :=0;
_eaff < _afgee ._aageb -1;_eaff ++{if _gdge [_eaff ]._aggd !=_gdge [_eaff +1]{return nil ;};};return _gdge ;};func (_dfag *textTable )markCells (){for _egcd :=0;_egcd < _dfag ._cegga ;_egcd ++{for _dcfcg :=0;_dcfcg < _dfag ._aageb ;_dcfcg ++{_fbba :=_dfag .get (_dcfcg ,_egcd );
if _fbba !=nil {_fbba ._fcdcf =true ;};};};};
2024-04-30 12:24:05 +00:00
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
2024-05-29 17:04:37 +00:00
type ImageExtractOptions struct{IncludeInlineStencilMasks bool ;};func (_agbea gridTiling )complete ()bool {for _ ,_geedc :=range _agbea ._agba {for _ ,_fcgefg :=range _geedc {if !_fcgefg .complete (){return false ;};};};return true ;};func _ffadg (_afeg map[float64 ][]*textLine )[]float64 {_bfeae :=[]float64 {};
for _aegd :=range _afeg {_bfeae =append (_bfeae ,_aegd );};_e .Float64s (_bfeae );return _bfeae ;};func (_deab paraList )llyOrdering ()[]int {_fbca :=make ([]int ,len (_deab ));for _gfgee :=range _deab {_fbca [_gfgee ]=_gfgee ;};_e .SliceStable (_fbca ,func (_ffge ,_fbda int )bool {_gdbb ,_ffbdf :=_fbca [_ffge ],_fbca [_fbda ];
return _deab [_gdbb ].Lly < _deab [_ffbdf ].Lly ;});return _fbca ;};func (_bcdd lineRuling )xMean ()float64 {return 0.5*(_bcdd ._bbee .X +_bcdd ._efge .X )};const _edac =1.0/1000.0;func _adab (_feaag ,_babd *textPara )bool {if _feaag ._bdgc ||_babd ._bdgc {return true ;
};return _cdaea (_feaag .depth ()-_babd .depth ());};
2024-04-16 11:40:43 +00:00
2024-05-29 17:04:37 +00:00
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20>).
func (_ggg *Extractor )ExtractText ()(string ,error ){_bfc ,_ ,_ ,_cge :=_ggg .ExtractTextWithStats ();return _bfc ,_cge ;};func _cfeg (_cdeb _af .PdfColorspace ,_bbccb _af .PdfColor )_fe .Color {if _cdeb ==nil ||_bbccb ==nil {return _fe .Black ;};_bbde ,_aabcd :=_cdeb .ColorToRGB (_bbccb );
if _aabcd !=nil {_ag .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073",_bbccb ,_cdeb ,_aabcd );
return _fe .Black ;};_gdafd ,_bfeea :=_bbde .(*_af .PdfColorDeviceRGB );if !_bfeea {_ag .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076",_bbde );
return _fe .Black ;};return _fe .NRGBA {R :uint8 (_gdafd .R ()*255),G :uint8 (_gdafd .G ()*255),B :uint8 (_gdafd .B ()*255),A :uint8 (255)};};func _dgbf (_bcaca []*textLine ,_gefd ,_eccd float64 )[]*textLine {var _fgagc []*textLine ;for _ ,_fdcb :=range _bcaca {if _gefd ==-1{if _fdcb ._addd > _eccd {_fgagc =append (_fgagc ,_fdcb );
};}else {if _fdcb ._addd > _eccd &&_fdcb ._addd < _gefd {_fgagc =append (_fgagc ,_fdcb );};};};return _fgagc ;};func _dgfg (_bebg ,_egee _af .PdfRectangle )(_af .PdfRectangle ,bool ){if !_bdcb (_bebg ,_egee ){return _af .PdfRectangle {},false ;};return _af .PdfRectangle {Llx :_ea .Max (_bebg .Llx ,_egee .Llx ),Urx :_ea .Min (_bebg .Urx ,_egee .Urx ),Lly :_ea .Max (_bebg .Lly ,_egee .Lly ),Ury :_ea .Min (_bebg .Ury ,_egee .Ury )},true ;
};func (_dfbf rulingList )mergePrimary ()float64 {_gacc :=_dfbf [0]._aeef ;for _ ,_gbde :=range _dfbf [1:]{_gacc +=_gbde ._aeef ;};return _gacc /float64 (len (_dfbf ));};func (_agdc rulingList )sort (){_e .Slice (_agdc ,_agdc .comp )};func (_ffbc rulingList )connections (_fdaeg map[int ]intSet ,_ggec int )intSet {_dfde :=make (intSet );
_dcdaa :=make (intSet );var _aada func (int );_aada =func (_bgad int ){if !_dcdaa .has (_bgad ){_dcdaa .add (_bgad );for _efdg :=range _ffbc {if _fdaeg [_efdg ].has (_bgad ){_dfde .add (_efdg );};};for _gdaf :=range _ffbc {if _dfde .has (_gdaf ){_aada (_gdaf );
};};};};_aada (_ggec );return _dfde ;};func (_deg *shapesState )lineTo (_bed ,_gfcg float64 ){if _cece {_ag .Log .Info ("\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066",_bed ,_gfcg ,_deg .devicePoint (_bed ,_gfcg ));
};_deg .addPoint (_bed ,_gfcg );};func (_adfeb rulingList )findPrimSec (_edfg ,_ebead float64 )*ruling {for _ ,_ceedg :=range _adfeb {if _cdaea (_ceedg ._aeef -_edfg )&&_ceedg ._ggdb -_bcae <=_ebead &&_ebead <=_ceedg ._gbca +_bcae {return _ceedg ;};};return nil ;
};type rectRuling struct{_beda rulingKind ;_fdfdf markKind ;_fe .Color ;_af .PdfRectangle ;};func (_gfe *textObject )getCurrentFont ()*_af .PdfFont {_afaca :=_gfe ._ecff ._fgfgb ;if _afaca ==nil {_ag .Log .Debug ("\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e");
return _af .DefaultFont ();};return _afaca ;};func (_geab rulingList )isActualGrid ()(rulingList ,bool ){_ecde ,_fafea :=_geab .augmentGrid ();if !(len (_ecde )>=_fcad +1&&len (_fafea )>=_bce +1){if _gdeb {_ag .Log .Info ("\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064",len (_ecde ),len (_fafea ),_fcad +1,_bce +1);
};return nil ,false ;};if _gdeb {_ag .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074",_geab ,len (_ecde )>=2,len (_fafea )>=2,len (_ecde )>=2&&len (_fafea )>=2);
for _cfef ,_gbeb :=range _geab {_efc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a",_cfef ,_gbeb );};};if _bedg {_gdded ,_egeef :=_ecde [0],_ecde [len (_ecde )-1];_gffeg ,_ffada :=_fafea [0],_fafea [len (_fafea )-1];if !(_eaaa (_gdded ._aeef -_gffeg ._ggdb )&&_eaaa (_egeef ._aeef -_gffeg ._gbca )&&_eaaa (_gffeg ._aeef -_gdded ._gbca )&&_eaaa (_ffada ._aeef -_gdded ._ggdb )){if _gdeb {_ag .Log .Info ("\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073",_gdded ,_egeef ,_gffeg ,_ffada );
};return nil ,false ;};}else {if !_ecde .aligned (){if _gcbf {_ag .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064",len (_ecde ));
};return nil ,false ;};if !_fafea .aligned (){if _gdeb {_ag .Log .Info ("i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064",len (_fafea ));
};return nil ,false ;};};_ebfb :=append (_ecde ,_fafea ...);return _ebfb ,true ;};type pathSection struct{_bgbeg []*subpath ;_fe .Color ;};func _fdfbc (_acbgc ,_gdada _aae .Point )bool {_accf :=_ea .Abs (_acbgc .X -_gdada .X );_gcae :=_ea .Abs (_acbgc .Y -_gdada .Y );
return _gdcgf (_gcae ,_accf );};func _ffag (_egaf *Extractor ,_dgf *_af .PdfPageResources ,_bdfe _aa .GraphicsState ,_fdbc *textState ,_feae *stateStack )*textObject {return &textObject {_dbe :_egaf ,_dae :_dgf ,_aef :_bdfe ,_aega :_feae ,_ecff :_fdbc ,_dbc :_aae .IdentityMatrix (),_ebc :_aae .IdentityMatrix ()};
};func (_afdb intSet )del (_efefb int ){delete (_afdb ,_efefb )};func (_gada *textPara )bbox ()_af .PdfRectangle {return _gada .PdfRectangle };
2024-04-30 12:24:05 +00:00
2024-05-29 17:04:37 +00:00
// String returns a string describing `pt`.
func (_gege PageText )String ()string {_egac :=_efc .Sprintf ("P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073",len (_gege ._fecaa ));_eab :=[]string {"\u002d"+_egac };for _ ,_aga :=range _gege ._fecaa {_eab =append (_eab ,_aga .String ());
};_eab =append (_eab ,"\u002b"+_egac );return _a .Join (_eab ,"\u000a");};func _fdgf (_cfgg float64 )int {var _agegf int ;if _cfgg >=0{_agegf =int (_cfgg /_cdcb );}else {_agegf =int (_cfgg /_cdcb )-1;};return _agegf ;};func _efg (_ebge []*textWord ,_ggfcd float64 ,_bgaag ,_aaae rulingList )*wordBag {_ebbe :=_cgd (_ebge [0],_ggfcd ,_bgaag ,_aaae );
for _ ,_gfag :=range _ebge [1:]{_gccg :=_fdgf (_gfag ._accb );_ebbe ._cdbc [_gccg ]=append (_ebbe ._cdbc [_gccg ],_gfag );_ebbe .PdfRectangle =_cfab (_ebbe .PdfRectangle ,_gfag .PdfRectangle );};_ebbe .sort ();return _ebbe ;};func (_fdcc *textObject )setCharSpacing (_feda float64 ){if _fdcc ==nil {return ;
};_fdcc ._ecff ._fdad =_feda ;if _bgbd {_ag .Log .Info ("\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073",_feda ,_fdcc ._ecff .String ());};
};func _eedcg (_caac map[float64 ]map[float64 ]gridTile )[]float64 {_efaec :=make ([]float64 ,0,len (_caac ));_eebag :=make (map[float64 ]struct{},len (_caac ));for _ ,_abfeg :=range _caac {for _ffef :=range _abfeg {if _ ,_bdfcd :=_eebag [_ffef ];_bdfcd {continue ;
};_efaec =append (_efaec ,_ffef );_eebag [_ffef ]=struct{}{};};};_e .Float64s (_efaec );return _efaec ;};func (_dcgea paraList )extractTables (_bcaf []gridTiling )paraList {if _dedc {_ag .Log .Debug ("\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_dcgea ));
};if len (_dcgea )< _gaba {return _dcgea ;};_bfeca :=_dcgea .findTables (_bcaf );if _dedc {_ag .Log .Info ("c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d",len (_bfeca ));
for _begag ,_adggd :=range _bfeca {_adggd .log (_efc .Sprintf ("c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064",_begag ));};};return _dcgea .applyTables (_bfeca );};func (_aeaa paraList )applyTables (_aafcf []*textTable )paraList {var _adeaa paraList ;
for _ ,_eeffd :=range _aafcf {_adeaa =append (_adeaa ,_eeffd .newTablePara ());};for _ ,_aaea :=range _aeaa {if _aaea ._fcdcf {continue ;};_adeaa =append (_adeaa ,_aaea );};return _adeaa ;};
2024-04-30 12:24:05 +00:00
2024-05-29 17:04:37 +00:00
// String returns a string describing `ma`.
func (_gace TextMarkArray )String ()string {_ada :=len (_gace ._aec );if _ada ==0{return "\u0045\u004d\u0050T\u0059";};_gbaf :=_gace ._aec [0];_dgdd :=_gace ._aec [_ada -1];return _efc .Sprintf ("\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d",_ada ,_gbaf ,_dgdd );
};const (RenderModeStroke RenderMode =1<<iota ;RenderModeFill ;RenderModeClip ;);
2024-04-30 12:24:05 +00:00
// TableCell is a cell in a TextTable.
2024-05-29 17:04:37 +00:00
type TableCell struct{_af .PdfRectangle ;
2024-04-30 12:24:05 +00:00
// Text is the extracted text.
Text string ;
// Marks returns the TextMarks corresponding to the text in Text.
2024-05-29 17:04:37 +00:00
Marks TextMarkArray ;};func _bfeb (_dfbe []*textLine )[]*textLine {_fdea :=[]*textLine {};for _ ,_fadbe :=range _dfbe {_gcd :=_fadbe .text ();_ddfd :=_fbgge .Find ([]byte (_gcd ));if _ddfd !=nil {_fdea =append (_fdea ,_fadbe );};};return _fdea ;};var (_ca =_b .New ("\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072");
_eb =_b .New ("\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072"););func _ebgf (_ccdf *wordBag ,_gedd *textWord ,_cffd float64 )bool {return _ccdf .Urx <=_gedd .Llx &&_gedd .Llx < _ccdf .Urx +_cffd ;};func (_afga *wordBag )pullWord (_dffa *textWord ,_fadc int ,_cegad map[int ]map[*textWord ]struct{}){_afga .PdfRectangle =_cfab (_afga .PdfRectangle ,_dffa .PdfRectangle );
if _dffa ._abcc > _afga ._cdac {_afga ._cdac =_dffa ._abcc ;};_afga ._cdbc [_fadc ]=append (_afga ._cdbc [_fadc ],_dffa );_cegad [_fadc ][_dffa ]=struct{}{};};func (_cege *textObject )renderText (_edbc _gf .PdfObject ,_bbdg []byte ,_fac int )error {if _cege ._cdcc {_ag .Log .Debug ("\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e");
return nil ;};_cfge :=_cege .getCurrentFont ();_ffadc :=_cfge .BytesToCharcodes (_bbdg );_bcd ,_acb ,_fgbf :=_cfge .CharcodesToStrings (_ffadc );if _fgbf > 0{_ag .Log .Debug ("\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064",_acb ,_fgbf );
};_cege ._ecff ._cfg +=_acb ;_cege ._ecff ._dacb +=_fgbf ;_afdf :=_cege ._ecff ;_afaa :=_afdf ._gbbgg ;_dee :=_afdf ._dba /100.0;_eded :=_edac ;if _cfge .Subtype ()=="\u0054\u0079\u0070e\u0033"{_eded =1;};_fcfb ,_cef :=_cfge .GetRuneMetrics (' ');if !_cef {_fcfb ,_cef =_cfge .GetCharMetrics (32);
};if !_cef {_fcfb ,_ =_af .DefaultFont ().GetRuneMetrics (' ');};_edcg :=_fcfb .Wx *_eded ;_ag .Log .Trace ("\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066",_edcg ,_bcd ,_cfge ,_afaa );
_fccb :=_aae .NewMatrix (_afaa *_dee ,0,0,_afaa ,0,_afdf ._dgef );if _bgbd {_ag .Log .Info ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071",len (_ffadc ),_ffadc ,_bcd );
};_ag .Log .Trace ("\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071",len (_ffadc ),_ffadc ,len (_bcd ));_eeaf :=_cege .getFillColor ();
_gge :=_cege .getStrokeColor ();for _bbb ,_bbbe :=range _bcd {_adfdf :=[]rune (_bbbe );if len (_adfdf )==1&&_adfdf [0]=='\x00'{continue ;};_caa :=_ffadc [_bbb ];_dcdg :=_cege ._aef .CTM .Mult (_cege ._dbc ).Mult (_fccb );_daad :=0.0;if len (_adfdf )==1&&_adfdf [0]==32{_daad =_afdf ._febe ;
};_gdc ,_fbe :=_cfge .GetCharMetrics (_caa );if !_fbe {_ag .Log .Debug ("\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073",_caa ,_adfdf ,_adfdf ,_cfge );
return _efc .Errorf ("\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064",_cfge .String (),_caa );};_ecfc :=_aae .Point {X :_gdc .Wx *_eded ,Y :_gdc .Wy *_eded };
_edbf :=_aae .Point {X :(_ecfc .X *_afaa +_daad )*_dee };_bbcb :=_aae .Point {X :(_ecfc .X *_afaa +_afdf ._fdad +_daad )*_dee };if _bgbd {_ag .Log .Info ("\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_afaa ,_afdf ._fdad ,_afdf ._febe ,_dee );
_ag .Log .Info ("\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f",_ecfc ,_edbf ,_bbcb );};_abba :=_add (_edbf );_bgdb :=_add (_bbcb );_aaef :=_cege ._aef .CTM .Mult (_cege ._dbc ).Mult (_abba );
if _gfgca {_ag .Log .Info ("e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a"+"\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a"+"\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073",_cege ._aef .CTM ,_cege ._dbc ,_bgdb ,_eaf (_cege ._aef .CTM .Mult (_cege ._dbc ).Mult (_bgdb )),_abba ,_aaef ,_eaf (_aaef ));
};_cabfb ,_cebd :=_cege .newTextMark (_cb .ExpandLigatures (_adfdf ),_dcdg ,_eaf (_aaef ),_ea .Abs (_edcg *_dcdg .ScalingFactorX ()),_cfge ,_cege ._ecff ._fdad ,_eeaf ,_gge ,_edbc ,_bcd ,_bbb ,_fac );if !_cebd {_ag .Log .Debug ("\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067");
continue ;};if _cfge ==nil {_ag .Log .Debug ("\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e");}else if _cfge .Encoder ()==nil {_ag .Log .Debug ("E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073",_cfge );
}else {if _gfac ,_defe :=_cfge .Encoder ().CharcodeToRune (_caa );_defe {_cabfb ._faaf =string (_gfac );};};_ag .Log .Trace ("i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073",_bbb ,_caa ,_cabfb ,_dcdg );
_cege ._afff =append (_cege ._afff ,&_cabfb );_cege ._dbc .Concat (_bgdb );};return nil ;};
2024-04-16 11:40:43 +00:00
2024-05-29 17:04:37 +00:00
// String returns a description of `w`.
func (_bfeg *textWord )String ()string {return _efc .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_bfeg ._accb ,_bfeg .PdfRectangle ,_bfeg ._abcc ,_bfeg ._ccbcc );
};func _efcca (_afed string ,_abefc int )string {if len (_afed )< _abefc {return _afed ;};return _afed [:_abefc ];};func (_cagg *structTreeRoot )parseStructTreeRoot (_gced _gf .PdfObject ){if _gced !=nil {_fdgb ,_bafb :=_gf .GetDict (_gced );if !_bafb {_ag .Log .Debug ("\u0070\u0061\u0072s\u0065\u0053\u0074\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u003a\u0020\u0064\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006eo\u0074\u0020\u0066\u006f\u0075\u006e\u0064\u002e");
};K :=_fdgb .Get ("\u004b");_beaa :=_fdgb .Get ("\u0054\u0079\u0070\u0065").String ();var _gddg *_gf .PdfObjectArray ;switch _ffec :=K .(type ){case *_gf .PdfObjectArray :_gddg =_ffec ;case *_gf .PdfObjectReference :_gddg =_gf .MakeArray (K );};_ecgc :=[]structElement {};
for _ ,_begg :=range _gddg .Elements (){_baabg :=&structElement {};_baabg .parseStructElement (_begg );_ecgc =append (_ecgc ,*_baabg );};_cagg ._cfbfg =_ecgc ;_cagg ._gfbe =_beaa ;};};func (_geae *subpath )makeRectRuling (_egeda _fe .Color )(*ruling ,bool ){if _fccf {_ag .Log .Info ("\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076",_geae );
};_bcceb :=_geae ._aaebg [:4];_cad :=make (map[int ]rulingKind ,len (_bcceb ));for _ddagg ,_bfgg :=range _bcceb {_feea :=_geae ._aaebg [(_ddagg +1)%4];_cad [_ddagg ]=_bcgd (_bfgg ,_feea );if _fccf {_efc .Printf ("\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066",_ddagg ,_cad [_ddagg ],_bfgg ,_feea );
};};if _fccf {_efc .Printf ("\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a",_cad );};var _dfbda ,_gcge []int ;for _eaea ,_dbced :=range _cad {switch _dbced {case _eeg :_gcge =append (_gcge ,_eaea );case _gecdf :_dfbda =append (_dfbda ,_eaea );
};};if _fccf {_efc .Printf ("\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_gcge ),_gcge );_efc .Printf ("\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a",len (_dfbda ),_dfbda );
};_egbbc :=(len (_gcge )==2&&len (_dfbda )==2)||(len (_gcge )==2&&len (_dfbda )==0&&_fdfbc (_bcceb [_gcge [0]],_bcceb [_gcge [1]]))||(len (_dfbda )==2&&len (_gcge )==0&&_cbcee (_bcceb [_dfbda [0]],_bcceb [_dfbda [1]]));if _fccf {_efc .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_gcge ),len (_dfbda ),_egbbc );
};if !_egbbc {if _fccf {_ag .Log .Error ("\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v",_geae );_efc .Printf (" \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a",len (_gcge ),len (_dfbda ),_egbbc );
};return &ruling {},false ;};if len (_dfbda )==0{for _beaee ,_abecc :=range _cad {if _abecc !=_eeg {_dfbda =append (_dfbda ,_beaee );};};};if len (_gcge )==0{for _deed ,_ecdaf :=range _cad {if _ecdaf !=_gecdf {_gcge =append (_gcge ,_deed );};};};if _fccf {_ag .Log .Info ("\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a"+"\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a"+"\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a"+"\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076",len (_gcge ),len (_dfbda ),len (_bcceb ),_gcge ,_dfbda ,_bcceb );
};var _aggcd ,_ceccd ,_gbggf ,_fgad _aae .Point ;if _bcceb [_gcge [0]].Y > _bcceb [_gcge [1]].Y {_gbggf ,_fgad =_bcceb [_gcge [0]],_bcceb [_gcge [1]];}else {_gbggf ,_fgad =_bcceb [_gcge [1]],_bcceb [_gcge [0]];};if _bcceb [_dfbda [0]].X > _bcceb [_dfbda [1]].X {_aggcd ,_ceccd =_bcceb [_dfbda [0]],_bcceb [_dfbda [1]];
}else {_aggcd ,_ceccd =_bcceb [_dfbda [1]],_bcceb [_dfbda [0]];};_dcdgf :=_af .PdfRectangle {Llx :_aggcd .X ,Urx :_ceccd .X ,Lly :_fgad .Y ,Ury :_gbggf .Y };if _dcdgf .Llx > _dcdgf .Urx {_dcdgf .Llx ,_dcdgf .Urx =_dcdgf .Urx ,_dcdgf .Llx ;};if _dcdgf .Lly > _dcdgf .Ury {_dcdgf .Lly ,_dcdgf .Ury =_dcdgf .Ury ,_dcdgf .Lly ;
};_dcbfd :=rectRuling {PdfRectangle :_dcdgf ,_beda :_bbbee (_dcdgf ),Color :_egeda };if _dcbfd ._beda ==_ceag {if _fccf {_ag .Log .Error ("\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c");
};return nil ,false ;};_dfeb ,_ecfg :=_dcbfd .asRuling ();if !_ecfg {if _fccf {_ag .Log .Error ("\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg");};return nil ,false ;};if _gdeb {_efc .Printf ("\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a",_dfeb .String ());
};return _dfeb ,true ;};func (_faccf *subpath )add (_bccf ..._aae .Point ){_faccf ._aaebg =append (_faccf ._aaebg ,_bccf ...)};func (_aabb *shapesState )fill (_dfd *[]pathSection ){_cfgac :=pathSection {_bgbeg :_aabb ._baca ,Color :_aabb ._cegf .getFillColor ()};
*_dfd =append (*_dfd ,_cfgac );if _gdeb {_ccdg :=_cfgac .bbox ();_efc .Printf ("\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a",len (*_dfd ),len (_cfgac ._bgbeg ),_aabb ,_cfgac .Color ,_ccdg ,_ccdg .Width (),_ccdg .Height ());
if _edebg {for _caaf ,_eaad :=range _cfgac ._bgbeg {_efc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_caaf ,_eaad );if _caaf ==10{break ;};};};};};func _adca (_accgf map[int ]intSet )[]int {_ceda :=make ([]int ,0,len (_accgf ));for _eebg :=range _accgf {_ceda =append (_ceda ,_eebg );
};_e .Ints (_ceda );return _ceda ;};func _edddc (_abcg ,_cgfdb float64 )bool {return _ea .Abs (_abcg -_cgfdb )<=_bcae };func (_fafcf paraList )findTextTables ()[]*textTable {var _dbfef []*textTable ;for _ ,_fgeb :=range _fafcf {if _fgeb .taken ()||_fgeb .Width ()==0{continue ;
};_facd :=_fgeb .isAtom ();if _facd ==nil {continue ;};_facd .growTable ();if _facd ._aageb *_facd ._cegga < _gaba {continue ;};_facd .markCells ();_facd .log ("\u0067\u0072\u006fw\u006e");_dbfef =append (_dbfef ,_facd );};return _dbfef ;};func (_gccca rulingList )log (_cdfad string ){if !_gdeb {return ;
};_ag .Log .Info ("\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_cdfad ,_gccca .String ());for _fefe ,_beggd :=range _gccca {_efc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fefe ,_beggd .String ());
};};func (_dgged *wordBag )firstWord (_ceae int )*textWord {return _dgged ._cdbc [_ceae ][0]};func _cdee (_deafe []float64 ,_egdc ,_fbcab float64 )[]float64 {_gefgg ,_bdaf :=_egdc ,_fbcab ;if _bdaf < _gefgg {_gefgg ,_bdaf =_bdaf ,_gefgg ;};_gdcbc :=make ([]float64 ,0,len (_deafe )+2);
_gdcbc =append (_gdcbc ,_egdc );for _ ,_fcgfe :=range _deafe {if _fcgfe <=_gefgg {continue ;}else if _fcgfe >=_bdaf {break ;};_gdcbc =append (_gdcbc ,_fcgfe );};_gdcbc =append (_gdcbc ,_fbcab );return _gdcbc ;};func _acgbd (_dgbe string ,_cbddd []rulingList ){_ag .Log .Info ("\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073",len (_cbddd ),_dgbe );
for _fcdde ,_gdcc :=range _cbddd {_efc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_fcdde ,_gdcc .String ());};};func (_gece paraList )toTextMarks ()[]TextMark {_ccff :=0;var _daed []TextMark ;for _geba ,_ecda :=range _gece {if _ecda ._bdgc {continue ;
};_bggag :=_ecda .toTextMarks (&_ccff );_daed =append (_daed ,_bggag ...);if _geba !=len (_gece )-1{if _adab (_ecda ,_gece [_geba +1]){_daed =_dbce (_daed ,&_ccff ,"\u0020");}else {_daed =_dbce (_daed ,&_ccff ,"\u000a");_daed =_dbce (_daed ,&_ccff ,"\u000a");
};};};_daed =_dbce (_daed ,&_ccff ,"\u000a");_daed =_dbce (_daed ,&_ccff ,"\u000a");return _daed ;};type list struct{_fged []*textLine ;_fdgc string ;_fbef []*list ;_cbda string ;};func (_cbac *textObject )moveTextSetLeading (_cabdc ,_agbf float64 ){_cbac ._ecff ._cdc =-_agbf ;
_cbac .moveLP (_cabdc ,_agbf );};func (_dad *wordBag )sort (){for _ ,_eged :=range _dad ._cdbc {_e .Slice (_eged ,func (_feab ,_fadb int )bool {return _fdbb (_eged [_feab ],_eged [_fadb ])< 0});};};func _gdfa (_ddae ,_gfgd bounded )float64 {return _fbce (_ddae )-_fbce (_gfgd )};
2024-04-30 12:24:05 +00:00
2024-05-29 17:04:37 +00:00
// String returns a string descibing `i`.
func (_cbbc gridTile )String ()string {_bcdg :=func (_eccdf bool ,_bfgag string )string {if _eccdf {return _bfgag ;};return "\u005f";};return _efc .Sprintf ("\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073",_cbbc .PdfRectangle ,_bcdg (_cbbc ._gceeb ,"\u004c"),_bcdg (_cbbc ._gdcbg ,"\u0052"),_bcdg (_cbbc ._dbafa ,"\u0042"),_bcdg (_cbbc ._ffdf ,"\u0054"));
};func _cfab (_fbaf ,_edee _af .PdfRectangle )_af .PdfRectangle {return _af .PdfRectangle {Llx :_ea .Min (_fbaf .Llx ,_edee .Llx ),Lly :_ea .Min (_fbaf .Lly ,_edee .Lly ),Urx :_ea .Max (_fbaf .Urx ,_edee .Urx ),Ury :_ea .Max (_fbaf .Ury ,_edee .Ury )};
};func (_bbega *ruling )alignsSec (_bgaef *ruling )bool {const _caab =_cggd +1.0;return _bbega ._ggdb -_caab <=_bgaef ._gbca &&_bgaef ._ggdb -_caab <=_bbega ._gbca ;};func (_abe *shapesState )addPoint (_bfg ,_bebb float64 ){_edbb :=_abe .establishSubpath ();
_gafeg :=_abe .devicePoint (_bfg ,_bebb );if _edbb ==nil {_abe ._gbee =true ;_abe ._faa =_gafeg ;}else {_edbb .add (_gafeg );};};func _dbce (_bced []TextMark ,_bffg *int ,_dbba string )[]TextMark {_cegfg :=_gded ;_cegfg .Text =_dbba ;return _fgec (_bced ,_bffg ,_cegfg );
};func (_ccac lineRuling )yMean ()float64 {return 0.5*(_ccac ._bbee .Y +_ccac ._efge .Y )};func (_acdc *textObject )nextLine (){_acdc .moveLP (0,-_acdc ._ecff ._cdc )};const (_afdgf markKind =iota ;_cbeb ;_bddf ;_gcfgb ;);func (_ggfdd rulingList )vertsHorzs ()(rulingList ,rulingList ){var _geagb ,_ecbc rulingList ;
for _ ,_edefe :=range _ggfdd {switch _edefe ._ecfb {case _gecdf :_geagb =append (_geagb ,_edefe );case _eeg :_ecbc =append (_ecbc ,_edefe );};};return _geagb ,_ecbc ;};func (_cbaa *ruling )alignsPrimary (_bfgd *ruling )bool {return _cbaa ._ecfb ==_bfgd ._ecfb &&_ea .Abs (_cbaa ._aeef -_bfgd ._aeef )< _cggd *0.5;
2024-04-30 12:24:05 +00:00
};
2024-05-29 17:04:37 +00:00
// String returns a human readable description of `s`.
func (_gfaa intSet )String ()string {var _eedae []int ;for _eddg :=range _gfaa {if _gfaa .has (_eddg ){_eedae =append (_eedae ,_eddg );};};_e .Ints (_eedae );return _efc .Sprintf ("\u0025\u002b\u0076",_eedae );};func (_aagb *textLine )markWordBoundaries (){_geb :=_bgcc *_aagb ._afeb ;
for _ggbe ,_edadd :=range _aagb ._cfcb [1:]{if _egec (_edadd ,_aagb ._cfcb [_ggbe ])>=_geb {_edadd ._dgeeg =true ;};};};func _gaccd (_aedb map[int ][]float64 )[]int {_fade :=make ([]int ,len (_aedb ));_ggabd :=0;for _bfaf :=range _aedb {_fade [_ggabd ]=_bfaf ;
_ggabd ++;};_e .Ints (_fade );return _fade ;};func (_cbcb paraList )eventNeighbours (_fbfb []event )map[*textPara ][]int {_e .Slice (_fbfb ,func (_bdefe ,_ceef int )bool {_ebfbd ,_febda :=_fbfb [_bdefe ],_fbfb [_ceef ];_aegcb ,_babb :=_ebfbd ._gafa ,_febda ._gafa ;
if _aegcb !=_babb {return _aegcb < _babb ;};if _ebfbd ._ccbeb !=_febda ._ccbeb {return _ebfbd ._ccbeb ;};return _bdefe < _ceef ;});_faag :=make (map[int ]intSet );_bceg :=make (intSet );for _ ,_fgdbb :=range _fbfb {if _fgdbb ._ccbeb {_faag [_fgdbb ._ggfab ]=make (intSet );
for _bbffe :=range _bceg {if _bbffe !=_fgdbb ._ggfab {_faag [_fgdbb ._ggfab ].add (_bbffe );_faag [_bbffe ].add (_fgdbb ._ggfab );};};_bceg .add (_fgdbb ._ggfab );}else {_bceg .del (_fgdbb ._ggfab );};};_fbbeg :=map[*textPara ][]int {};for _ebbad ,_cabg :=range _faag {_dddgc :=_cbcb [_ebbad ];
if len (_cabg )==0{_fbbeg [_dddgc ]=nil ;continue ;};_deecb :=make ([]int ,len (_cabg ));_abdf :=0;for _fegde :=range _cabg {_deecb [_abdf ]=_fegde ;_abdf ++;};_fbbeg [_dddgc ]=_deecb ;};return _fbbeg ;};func (_dbeca rulingList )primaries ()[]float64 {_aagc :=make (map[float64 ]struct{},len (_dbeca ));
for _ ,_deec :=range _dbeca {_aagc [_deec ._aeef ]=struct{}{};};_dfdb :=make ([]float64 ,len (_aagc ));_befa :=0;for _cgedf :=range _aagc {_dfdb [_befa ]=_cgedf ;_befa ++;};_e .Float64s (_dfdb );return _dfdb ;};func _dgg (_ead []Font ,_edd string )bool {for _ ,_ad :=range _ead {if _ad .FontName ==_edd {return true ;
};};return false ;};func (_ddb *wordBag )removeWord (_bfbe *textWord ,_dbaf int ){_cccd :=_ddb ._cdbc [_dbaf ];_cccd =_fgcbe (_cccd ,_bfbe );if len (_cccd )==0{delete (_ddb ._cdbc ,_dbaf );}else {_ddb ._cdbc [_dbaf ]=_cccd ;};};func (_gcg *imageExtractContext )extractContentStreamImages (_fca string ,_bg *_af .PdfPageResources )error {_bdc :=_aa .NewContentStreamParser (_fca );
_cbea ,_fdc :=_bdc .Parse ();if _fdc !=nil {return _fdc ;};if _gcg ._ebf ==nil {_gcg ._ebf =map[*_gf .PdfObjectStream ]*cachedImage {};};if _gcg ._dac ==nil {_gcg ._dac =&ImageExtractOptions {};};_cac :=_aa .NewContentStreamProcessor (*_cbea );_cac .AddHandler (_aa .HandlerConditionEnumAllOperands ,"",_gcg .processOperand );
return _cac .Process (_bg );};func (_adfd *textObject )getStrokeColor ()_fe .Color {return _cfeg (_adfd ._aef .ColorspaceStroking ,_adfd ._aef .ColorStroking );};func (_aaac *textObject )newTextMark (_dcgb string ,_bcacb _aae .Matrix ,_egde _aae .Point ,_egbc float64 ,_dgac *_af .PdfFont ,_afde float64 ,_bagc ,_dbgdg _fe .Color ,_eeca _gf .PdfObject ,_acdb []string ,_cdbb int ,_bagce int )(textMark ,bool ){_aafe :=_bcacb .Angle ();
_bbad :=_afeec (_aafe ,_dgbd );var _gcffbc float64 ;if _bbad %180!=90{_gcffbc =_bcacb .ScalingFactorY ();}else {_gcffbc =_bcacb .ScalingFactorX ();};_fbdf :=_eaf (_bcacb );_dgdb :=_af .PdfRectangle {Llx :_fbdf .X ,Lly :_fbdf .Y ,Urx :_egde .X ,Ury :_egde .Y };
switch _bbad %360{case 90:_dgdb .Urx -=_gcffbc ;case 180:_dgdb .Ury -=_gcffbc ;case 270:_dgdb .Urx +=_gcffbc ;case 0:_dgdb .Ury +=_gcffbc ;default:_bbad =0;_dgdb .Ury +=_gcffbc ;};if _dgdb .Llx > _dgdb .Urx {_dgdb .Llx ,_dgdb .Urx =_dgdb .Urx ,_dgdb .Llx ;
};if _dgdb .Lly > _dgdb .Ury {_dgdb .Lly ,_dgdb .Ury =_dgdb .Ury ,_dgdb .Lly ;};_cded :=true ;if _aaac ._dbe ._fd .Width ()> 0{_bacf ,_gbefa :=_dgfg (_dgdb ,_aaac ._dbe ._fd );if !_gbefa {_cded =false ;_ag .Log .Debug ("\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q",_dgdb ,_aaac ._dbe ._fd ,_dcgb );
};_dgdb =_bacf ;};_cfbec :=_dgdb ;_eccag :=_aaac ._dbe ._fd ;switch _bbad %360{case 90:_eccag .Urx ,_eccag .Ury =_eccag .Ury ,_eccag .Urx ;_cfbec =_af .PdfRectangle {Llx :_eccag .Urx -_dgdb .Ury ,Urx :_eccag .Urx -_dgdb .Lly ,Lly :_dgdb .Llx ,Ury :_dgdb .Urx };
case 180:_cfbec =_af .PdfRectangle {Llx :_eccag .Urx -_dgdb .Llx ,Urx :_eccag .Urx -_dgdb .Urx ,Lly :_eccag .Ury -_dgdb .Lly ,Ury :_eccag .Ury -_dgdb .Ury };case 270:_eccag .Urx ,_eccag .Ury =_eccag .Ury ,_eccag .Urx ;_cfbec =_af .PdfRectangle {Llx :_dgdb .Ury ,Urx :_dgdb .Lly ,Lly :_eccag .Ury -_dgdb .Llx ,Ury :_eccag .Ury -_dgdb .Urx };
};if _cfbec .Llx > _cfbec .Urx {_cfbec .Llx ,_cfbec .Urx =_cfbec .Urx ,_cfbec .Llx ;};if _cfbec .Lly > _cfbec .Ury {_cfbec .Lly ,_cfbec .Ury =_cfbec .Ury ,_cfbec .Lly ;};_cegg :=textMark {_cgeb :_dcgb ,PdfRectangle :_cfbec ,_fbaa :_dgdb ,_dbgfg :_dgac ,_acaf :_gcffbc ,_bdeb :_afde ,_acgb :_bcacb ,_efab :_egde ,_ddfdb :_bbad ,_ccbff :_bagc ,_bcfc :_dbgdg ,_bae :_eeca ,_afcb :_acdb ,Th :_aaac ._ecff ._dba ,Tw :_aaac ._ecff ._febe ,_ffbdg :_bagce ,_feg :_cdbb };
if _aebe {_ag .Log .Info ("n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073",_fbdf ,_egde ,_cegg .String ());};return _cegg ,_cded ;
};type gridTiling struct{_af .PdfRectangle ;_beefa []float64 ;_eeba []float64 ;_agba map[float64 ]map[float64 ]gridTile ;};func _egdfd (_acdd []TextMark ,_dcaad *TextTable )[]TextMark {var _dgea []TextMark ;for _ ,_ebeb :=range _acdd {_ebeb ._bacg =true ;
_ebeb ._efce =_dcaad ;_dgea =append (_dgea ,_ebeb );};return _dgea ;};func _edfff (_acdfa _gf .PdfObject ,_fbgab _fe .Color )(_ef .Image ,error ){_gceb ,_fgdd :=_gf .GetStream (_acdfa );if !_fgdd {return nil ,nil ;};_aagcd ,_afae :=_af .NewXObjectImageFromStream (_gceb );
if _afae !=nil {return nil ,_afae ;};_edfec ,_afae :=_aagcd .ToImage ();if _afae !=nil {return nil ,_afae ;};return _gaeb (_edfec ,_fbgab ),nil ;};func (_fecga rulingList )snapToGroups ()rulingList {_adfdc ,_ffagb :=_fecga .vertsHorzs ();if len (_adfdc )> 0{_adfdc =_adfdc .snapToGroupsDirection ();
};if len (_ffagb )> 0{_ffagb =_ffagb .snapToGroupsDirection ();};_aafc :=append (_adfdc ,_ffagb ...);_aafc .log ("\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073");return _aafc ;};func (_bfeaa *textLine )pullWord (_cbde *wordBag ,_ccfb *textWord ,_bgcd int ){_bfeaa .appendWord (_ccfb );
_cbde .removeWord (_ccfb ,_bgcd );};func (_dfb *textObject )getFillColor ()_fe .Color {return _cfeg (_dfb ._aef .ColorspaceNonStroking ,_dfb ._aef .ColorNonStroking );};func _affg (_cagbd float64 )float64 {return _fbf *_ea .Round (_cagbd /_fbf )};func _ecd (_gbc _af .PdfRectangle )textState {return textState {_dba :100,_aaeb :RenderModeFill ,_bcbc :_gbc };
};func (_bgdbe *wordBag )depthBand (_gaed ,_dab float64 )[]int {if len (_bgdbe ._cdbc )==0{return nil ;};return _bgdbe .depthRange (_bgdbe .getDepthIdx (_gaed ),_bgdbe .getDepthIdx (_dab ));};
2024-04-16 11:40:43 +00:00
2024-05-29 17:04:37 +00:00
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
func (_facc *TextMarkArray )BBox ()(_af .PdfRectangle ,bool ){var _cfcc _af .PdfRectangle ;_eacc :=false ;for _ ,_cff :=range _facc ._aec {if _cff .Meta ||_fcegd (_cff .Text ){continue ;};if _eacc {_cfcc =_cfab (_cfcc ,_cff .BBox );}else {_cfcc =_cff .BBox ;
_eacc =true ;};};return _cfcc ,_eacc ;};func _deeg (_deba []int )[]int {_egdbc :=make ([]int ,len (_deba ));for _adba ,_ccgf :=range _deba {_egdbc [len (_deba )-1-_adba ]=_ccgf ;};return _egdbc ;};
2024-04-30 12:24:05 +00:00
2024-05-29 17:04:37 +00:00
// Text returns the extracted page text.
func (_gcb PageText )Text ()string {return _gcb ._ffb };var _gd =false ;func (_fcfc *shapesState )lastpointEstablished ()(_aae .Point ,bool ){if _fcfc ._gbee {return _fcfc ._faa ,false ;};_feee :=len (_fcfc ._baca );if _feee > 0&&_fcfc ._baca [_feee -1]._cedc {return _fcfc ._baca [_feee -1].last (),false ;
};return _aae .Point {},true ;};func _bdfg (_bcgg []*textMark ,_adcg _af .PdfRectangle ,_acdcb rulingList ,_acbae []gridTiling ,_agac bool )paraList {_ag .Log .Trace ("\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066",len (_bcgg ),_adcg );
if len (_bcgg )==0{return nil ;};_baef :=_dbeee (_bcgg ,_adcg );if len (_baef )==0{return nil ;};_acdcb .log ("\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065");_gaceb ,_bgbec :=_acdcb .vertsHorzs ();_agbb :=_efg (_baef ,_adcg .Ury ,_gaceb ,_bgbec );
_fdfg :=_bcfb (_agbb ,_adcg .Ury ,_gaceb ,_bgbec );_fdfg =_facf (_fdfg );_bfbc :=make (paraList ,0,len (_fdfg ));for _ ,_cfbfgb :=range _fdfg {_fccdc :=_cfbfgb .arrangeText ();if _fccdc !=nil {_bfbc =append (_bfbc ,_fccdc );};};if !_agac &&len (_bfbc )>=_gaba {_bfbc =_bfbc .extractTables (_acbae );
};_bfbc .sortReadingOrder ();if !_agac {_bfbc .sortTopoOrder ();};_bfbc .log ("\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072");return _bfbc ;};func _cdgd (_bdcba ,_ecddg int )uint64 {return uint64 (_bdcba )*0x1000000+uint64 (_ecddg )};
func (_ebdcb *textTable )reduceTiling (_acfbb gridTiling ,_gebdf float64 )*textTable {_abeg :=make ([]int ,0,_ebdcb ._cegga );_bfcaf :=make ([]int ,0,_ebdcb ._aageb );_dccca :=_acfbb ._beefa ;_egccg :=_acfbb ._eeba ;for _ccaa :=0;_ccaa < _ebdcb ._cegga ;
_ccaa ++{_dccfa :=_ccaa > 0&&_ea .Abs (_egccg [_ccaa -1]-_egccg [_ccaa ])< _gebdf &&_ebdcb .emptyCompositeRow (_ccaa );if !_dccfa {_abeg =append (_abeg ,_ccaa );};};for _bbeaa :=0;_bbeaa < _ebdcb ._aageb ;_bbeaa ++{_cgcbb :=_bbeaa < _ebdcb ._aageb -1&&_ea .Abs (_dccca [_bbeaa +1]-_dccca [_bbeaa ])< _gebdf &&_ebdcb .emptyCompositeColumn (_bbeaa );
if !_cgcbb {_bfcaf =append (_bfcaf ,_bbeaa );};};if len (_abeg )==_ebdcb ._cegga &&len (_bfcaf )==_ebdcb ._aageb {return _ebdcb ;};_bged :=textTable {_caagg :_ebdcb ._caagg ,_aageb :len (_bfcaf ),_cegga :len (_abeg ),_becfc :make (map[uint64 ]compositeCell ,len (_bfcaf )*len (_abeg ))};
if _dedc {_ag .Log .Info ("\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064",_ebdcb ._aageb ,_ebdcb ._cegga ,len (_bfcaf ),len (_abeg ));_ag .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076",_bfcaf );
_ag .Log .Info ("\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076",_abeg );};for _adcfa ,_fcddeg :=range _abeg {for _gdfac ,_fcdee :=range _bfcaf {_cgcac ,_cfcaa :=_ebdcb .getComposite (_fcdee ,_fcddeg );if len (_cgcac )==0{continue ;
};if _dedc {_efc .Printf ("\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n",_gdfac ,_adcfa ,_fcdee ,_fcddeg ,_efcca (_cgcac .merge ().text (),50));};_bged .putComposite (_gdfac ,_adcfa ,_cgcac ,_cfcaa );
};};return &_bged ;};func _eeecd (_bbeg _af .PdfRectangle ,_bbbd bounded )float64 {return _bbeg .Ury -_bbbd .bbox ().Lly };func (_edfe *textPara )isAtom ()*textTable {_fbebb :=_edfe ;_gcfa :=_edfe ._aggd ;_dbfb :=_edfe ._cabda ;if _gcfa .taken ()||_dbfb .taken (){return nil ;
};_fbcgf :=_gcfa ._cabda ;if _fbcgf .taken ()||_fbcgf !=_dbfb ._aggd {return nil ;};return _dcbd (_fbebb ,_gcfa ,_dbfb ,_fbcgf );};func (_cdg *stateStack )push (_efcc *textState ){_cbb :=*_efcc ;*_cdg =append (*_cdg ,&_cbb )};type textLine struct{_af .PdfRectangle ;
_addd float64 ;_cfcb []*textWord ;_afeb float64 ;};
2024-04-30 12:24:05 +00:00
2024-05-29 17:04:37 +00:00
// String returns a description of `t`.
func (_gdcdc *textTable )String ()string {return _efc .Sprintf ("\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074",_gdcdc ._aageb ,_gdcdc ._cegga ,_gdcdc ._caagg );};func (_ebba *textTable )bbox ()_af .PdfRectangle {return _ebba .PdfRectangle };
func (_aeagcb *textTable )depth ()float64 {_fgbgd :=1e10;for _dgad :=0;_dgad < _aeagcb ._aageb ;_dgad ++{_fbbda :=_aeagcb .get (_dgad ,0);if _fbbda ==nil ||_fbbda ._bdgc {continue ;};_fgbgd =_ea .Min (_fgbgd ,_fbbda .depth ());};return _fgbgd ;};type fontEntry struct{_gggfe *_af .PdfFont ;
_edeb int64 ;};func _afgg (_caebb []compositeCell )[]float64 {var _cfbab []*textLine ;_cfgfd :=0;for _ ,_gedb :=range _caebb {_cfgfd +=len (_gedb .paraList );_cfbab =append (_cfbab ,_gedb .lines ()...);};_e .Slice (_cfbab ,func (_deada ,_defa int )bool {_gaec ,_fbcdd :=_cfbab [_deada ],_cfbab [_defa ];
_dgee ,_dddg :=_gaec ._addd ,_fbcdd ._addd ;if !_cdaea (_dgee -_dddg ){return _dgee < _dddg ;};return _gaec .Llx < _fbcdd .Llx ;});if _dedc {_efc .Printf ("\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a",_cfgfd ,len (_cfbab ));
for _ebgef ,_eedce :=range _cfbab {_efc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_ebgef ,_eedce );};};var _eagfg []float64 ;_acfca :=_cfbab [0];var _dace [][]*textLine ;_aagfb :=[]*textLine {_acfca };for _cacgb ,_fffga :=range _cfbab [1:]{if _fffga .Ury < _acfca .Lly {_dfff :=0.5*(_fffga .Ury +_acfca .Lly );
if _dedc {_efc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a"+"\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a",_cacgb ,_fffga .Ury ,_acfca .Lly ,_dfff ,_acfca ,_fffga );
};_eagfg =append (_eagfg ,_dfff );_dace =append (_dace ,_aagfb );_aagfb =nil ;};_aagfb =append (_aagfb ,_fffga );if _fffga .Lly < _acfca .Lly {_acfca =_fffga ;};};if len (_aagfb )> 0{_dace =append (_dace ,_aagfb );};if _dedc {_efc .Printf (" \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a",_eagfg );
};if _dedc {_ag .Log .Info ("\u0072\u006f\u0077\u003d\u0025\u0064",len (_caebb ));for _eddec ,_cedd :=range _caebb {_efc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_eddec ,_cedd );};_ag .Log .Info ("\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d",len (_dace ));
for _gfec ,_dbeac :=range _dace {_efc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a",_gfec ,len (_dbeac ));for _dbfa ,_gfed :=range _dbeac {_efc .Printf ("\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a",_dbfa ,_gfed );};};};_dbceg :=true ;
for _adbca ,_fgabb :=range _dace {_defeg :=true ;for _cgcg ,_bgfac :=range _caebb {if _dedc {_efc .Printf ("\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a",_adbca ,len (_dace ),_cgcg ,len (_caebb ),_bgfac );
};if !_bgfac .hasLines (_fgabb ){if _dedc {_efc .Printf ("\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a",_adbca ,len (_dace ),_cgcg ,len (_caebb ));
};_defeg =false ;break ;};};if !_defeg {_dbceg =false ;break ;};};if !_dbceg {if _dedc {_ag .Log .Info ("\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg");
};_eagfg =nil ;};if _dedc &&_eagfg !=nil {_efc .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a",_eagfg );};return _eagfg ;
};func _dfba (_ecg ,_bddb _af .PdfRectangle )bool {return _ecg .Lly <=_bddb .Ury &&_bddb .Lly <=_ecg .Ury };func (_dccfe *textObject )setTextRise (_gdgc float64 ){if _dccfe ==nil {return ;};_dccfe ._ecff ._dgef =_gdgc ;};type markKind int ;
2024-04-30 12:24:05 +00:00
2024-05-29 17:04:37 +00:00
// String returns a string describing `tm`.
func (_egbe TextMark )String ()string {_afg :=_egbe .BBox ;var _dccc string ;if _egbe .Font !=nil {_dccc =_egbe .Font .String ();if len (_dccc )> 50{_dccc =_dccc [:50]+"\u002e\u002e\u002e";};};var _feaa string ;if _egbe .Meta {_feaa ="\u0020\u002a\u004d\u002a";
};return _efc .Sprintf ("\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d",_egbe .Offset ,_egbe .Text ,[]rune (_egbe .Text ),_afg .Llx ,_afg .Lly ,_afg .Urx ,_afg .Ury ,_dccc ,_feaa );
};func (_fgfbd intSet )add (_ddedb int ){_fgfbd [_ddedb ]=struct{}{}};
2024-04-30 12:24:05 +00:00
2024-05-29 17:04:37 +00:00
// String returns a description of `v`.
func (_eada *ruling )String ()string {if _eada ._ecfb ==_ceag {return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047";};_ccfcf ,_ggff :="\u0078","\u0079";if _eada ._ecfb ==_eeg {_ccfcf ,_ggff ="\u0079","\u0078";};_dgcg :="";if _eada ._faba !=0.0{_dgcg =_efc .Sprintf (" \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066",_eada ._faba );
};return _efc .Sprintf ("\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073",_eada ._ecfb ,_ccfcf ,_eada ._aeef ,_ggff ,_eada ._ggdb ,_eada ._gbca ,_eada ._gbca -_eada ._ggdb ,_eada ._agff ,_eada .Color ,_dgcg );
};var _fbgge *_g .Regexp =_g .MustCompile (_cdaf +"\u007c"+_defba );
2024-04-16 11:40:43 +00:00
2024-05-29 17:04:37 +00:00
// String returns a description of `b`.
func (_ffgc *wordBag )String ()string {var _eefd []string ;for _ ,_eee :=range _ffgc .depthIndexes (){_bgdg :=_ffgc ._cdbc [_eee ];for _ ,_febf :=range _bgdg {_eefd =append (_eefd ,_febf ._ccbcc );};};return _efc .Sprintf ("\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071",_ffgc .PdfRectangle ,_ffgc ._cdac ,len (_eefd ),_eefd );
};func (_bbdda paraList )inTile (_ebga gridTile )paraList {var _dcga paraList ;for _ ,_dcca :=range _bbdda {if _ebga .contains (_dcca .PdfRectangle ){_dcga =append (_dcga ,_dcca );};};if _dedc {_efc .Printf ("\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n",_ebga ,len (_dcga ));
for _gdff ,_bafa :=range _dcga {_efc .Printf ("\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a",_gdff ,_bafa );};_efc .Println ("");};return _dcga ;};func _fgec (_fgcc []TextMark ,_bfgf *int ,_daff TextMark )[]TextMark {_daff .Offset =*_bfgf ;_fgcc =append (_fgcc ,_daff );
*_bfgf +=len (_daff .Text );return _fgcc ;};func _ccgd (_eabe *list ,_fgede *string )string {_dfbd :=_a .Split (_eabe ._cbda ,"\u000a");_ebfd :=&_a .Builder {};for _ ,_bebe :=range _dfbd {if _bebe !=""{_ebfd .WriteString (*_fgede );_ebfd .WriteString (_bebe );
_ebfd .WriteString ("\u000a");};};return _ebfd .String ();};func _dgca (_edfae map[float64 ]map[float64 ]gridTile )[]float64 {_baacc :=make ([]float64 ,0,len (_edfae ));for _cgabg :=range _edfae {_baacc =append (_baacc ,_cgabg );};_e .Float64s (_baacc );
_fdebg :=len (_baacc );for _gdga :=0;_gdga < _fdebg /2;_gdga ++{_baacc [_gdga ],_baacc [_fdebg -1-_gdga ]=_baacc [_fdebg -1-_gdga ],_baacc [_gdga ];};return _baacc ;};func _bedc (_fdbf *textLine )float64 {return _fdbf ._cfcb [0].Llx };type shapesState struct{_cffc _aae .Matrix ;
_gdec _aae .Matrix ;_baca []*subpath ;_gbee bool ;_faa _aae .Point ;_cegf *textObject ;};
2024-04-30 12:24:05 +00:00
// GetContentStreamOps returns the contentStreamOps field of `pt`.
2024-05-29 17:04:37 +00:00
func (_edbff *PageText )GetContentStreamOps ()*_aa .ContentStreamOperations {return _edbff ._cfa };
2024-04-30 12:24:05 +00:00
2024-05-29 17:04:37 +00:00
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
//
// Replace with a function like Extract() (*PageText, error)
func (_bdg *Extractor )ExtractPageText ()(*PageText ,int ,int ,error ){_eedd ,_dbgc ,_deaf ,_beb :=_bdg .extractPageText (_bdg ._fgb ,_bdg ._cf ,_aae .IdentityMatrix (),0,false );if _beb !=nil &&_beb !=_af .ErrColorOutOfRange {return nil ,0,0,_beb ;};if _bdg ._efe !=nil {_eedd ._gbg ._dbed =_bdg ._efe .UseSimplerExtractionProcess ;
};_eedd .computeViews ();_beb =_ecbg (_eedd );if _beb !=nil {return nil ,0,0,_beb ;};if _bdg ._efe !=nil {if _bdg ._efe .ApplyCropBox &&_bdg ._fed !=nil {_eedd .ApplyArea (*_bdg ._fed );};_eedd ._gbg ._gcgg =_bdg ._efe .DisableDocumentTags ;};return _eedd ,_dbgc ,_deaf ,nil ;
};func (_fcga *textObject )reset (){_fcga ._dbc =_aae .IdentityMatrix ();_fcga ._ebc =_aae .IdentityMatrix ();_fcga ._afff =nil ;};func _gacff (_fcde *wordBag ,_ddeg *textWord ,_dbeb float64 )bool {return _ddeg .Llx < _fcde .Urx +_dbeb &&_fcde .Llx -_dbeb < _ddeg .Urx ;
};var _ecccgb =map[rulingKind ]string {_ceag :"\u006e\u006f\u006e\u0065",_eeg :"\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c",_gecdf :"\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c"};func _bbgag (_daec []_gf .PdfObject )(_ccfgc ,_gfbgc float64 ,_aabeg error ){if len (_daec )!=2{return 0,0,_efc .Errorf ("\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064",len (_daec ));
};_fgac ,_aabeg :=_gf .GetNumbersAsFloat (_daec );if _aabeg !=nil {return 0,0,_aabeg ;};return _fgac [0],_fgac [1],nil ;};type wordBag struct{_af .PdfRectangle ;_cdac float64 ;_dcaa ,_bfcg rulingList ;_ecba float64 ;_cdbc map[int ][]*textWord ;};func (_dcbg gridTile )complete ()bool {return _dcbg .numBorders ()==4};
type event struct{_gafa float64 ;_ccbeb bool ;_ggfab int ;};func (_gdfe *wordBag )empty (_badd int )bool {_ ,_bbca :=_gdfe ._cdbc [_badd ];return !_bbca };func _fdbb (_bbfe ,_bfff bounded )float64 {return _bbfe .bbox ().Llx -_bfff .bbox ().Llx };func _caaa (_cggf ,_dgbg int )int {if _cggf < _dgbg {return _cggf ;
};return _dgbg ;};func (_dfefa *textTable )getComposite (_dgfb ,_gddcc int )(paraList ,_af .PdfRectangle ){_eabfgb ,_dbcf :=_dfefa ._becfc [_cdgd (_dgfb ,_gddcc )];if _dedc {_efc .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a",_dgfb ,_gddcc ,_eabfgb .String ());
};if !_dbcf {return nil ,_af .PdfRectangle {};};return _eabfgb .parasBBox ();};func (_dbbc *ruling )encloses (_eecb ,_bdcbd float64 )bool {return _dbbc ._ggdb -_bcae <=_eecb &&_bdcbd <=_dbbc ._gbca +_bcae ;};func _bcgac (_dgedb map[int ][]float64 ){if len (_dgedb )<=1{return ;
};_cgfc :=_gaccd (_dgedb );if _dedc {_ag .Log .Info ("\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076",_cgfc );};var _bace ,_dfaf int ;for _bace ,_dfaf =range _cgfc {if _dgedb [_dfaf ]!=nil {break ;};};for _aebfc ,_gcgdad :=range _cgfc [_bace :]{_fcbea :=_dgedb [_gcgdad ];
if _fcbea ==nil {continue ;};if _dedc {_efc .Printf ("\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a",_bace +_aebfc ,_dfaf ,_gcgdad );};_ebfadb :=_dgedb [_gcgdad ];if _ebfadb [len (_ebfadb )-1]> _fcbea [0]{_ebfadb [len (_ebfadb )-1]=_fcbea [0];
_dgedb [_dfaf ]=_ebfadb ;};_dfaf =_gcgdad ;};};func (_geeb *textTable )compositeRowCorridors ()map[int ][]float64 {_bfageb :=make (map[int ][]float64 ,_geeb ._cegga );if _dedc {_ag .Log .Info ("c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064",_geeb ._cegga );
};for _dfda :=1;_dfda < _geeb ._cegga ;_dfda ++{var _dfgd []compositeCell ;for _eggd :=0;_eggd < _geeb ._aageb ;_eggd ++{if _ddgc ,_fdbbb :=_geeb ._becfc [_cdgd (_eggd ,_dfda )];_fdbbb {_dfgd =append (_dfgd ,_ddgc );};};if len (_dfgd )==0{continue ;};_bcef :=_afgg (_dfgd );
_bfageb [_dfda ]=_bcef ;if _dedc {_efc .Printf ("\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a",_dfda ,_bcef );};};return _bfageb ;};func _cgd (_gcaa *textWord ,_aegaf float64 ,_decb ,_abgc rulingList )*wordBag {_fbgg :=_fdgf (_gcaa ._accb );
_ecdf :=[]*textWord {_gcaa };_fggd :=wordBag {_cdbc :map[int ][]*textWord {_fbgg :_ecdf },PdfRectangle :_gcaa .PdfRectangle ,_cdac :_gcaa ._abcc ,_ecba :_aegaf ,_dcaa :_decb ,_bfcg :_abgc };return &_fggd ;};func (_ddbb *wordBag )absorb (_fdde *wordBag ){_bbggb :=_fdde .makeRemovals ();
for _cbff ,_aabce :=range _fdde ._cdbc {for _ ,_edbd :=range _aabce {_ddbb .pullWord (_edbd ,_cbff ,_bbggb );};};_fdde .applyRemovals (_bbggb );};func _aefg (_beab []pathSection )rulingList {_abbaa (_beab );if _gdeb {_ag .Log .Info ("\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs",len (_beab ));
};var _dfeef rulingList ;for _ ,_bbdcg :=range _beab {for _ ,_ceedd :=range _bbdcg ._bgbeg {if !_ceedd .isQuadrilateral (){if _gdeb {_ag .Log .Error ("!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073",_ceedd );
};continue ;};if _aeabg ,_cegbe :=_ceedd .makeRectRuling (_bbdcg .Color );_cegbe {_dfeef =append (_dfeef ,_aeabg );}else {if _fccf {_ag .Log .Error ("\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073",_ceedd );
};};};};if _gdeb {_ag .Log .Info ("\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073",_dfeef .String ());};return _dfeef ;};
2024-04-30 12:24:05 +00:00
// Marks returns the TextMark collection for a page. It represents all the text on the page.
2024-05-29 17:04:37 +00:00
func (_aca PageText )Marks ()*TextMarkArray {return &TextMarkArray {_aec :_aca ._gdbf }};
2024-04-30 12:24:05 +00:00
2024-05-29 17:04:37 +00:00
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
func (_cab *Extractor )ExtractPageImages (options *ImageExtractOptions )(*PageImages ,error ){_cea :=&imageExtractContext {_dac :options };_eef :=_cea .extractContentStreamImages (_cab ._fgb ,_cab ._cf );if _eef !=nil {return nil ,_eef ;};return &PageImages {Images :_cea ._cgg },nil ;
};func _bbaf (_baeg _af .PdfRectangle )*ruling {return &ruling {_ecfb :_eeg ,_aeef :_baeg .Ury ,_ggdb :_baeg .Llx ,_gbca :_baeg .Urx };};func (_ecccg *textPara )getListLines ()[]*textLine {var _cgbe []*textLine ;_gffg :=_bfeb (_ecccg ._aage );for _ ,_fbgf :=range _ecccg ._aage {_ggbd :=_fbgf ._cfcb [0]._ccbcc [0];
if _debe (_ggbd ){_cgbe =append (_cgbe ,_fbgf );};};_cgbe =append (_cgbe ,_gffg ...);return _cgbe ;};func (_ddef *textMark )inDiacriticArea (_dgddg *textMark )bool {_gefga :=_ddef .Llx -_dgddg .Llx ;_bdab :=_ddef .Urx -_dgddg .Urx ;_aeea :=_ddef .Lly -_dgddg .Lly ;
return _ea .Abs (_gefga +_bdab )< _ddef .Width ()*_bbcc &&_ea .Abs (_aeea )< _ddef .Height ()*_bbcc ;};func _bbbee (_adgea _af .PdfRectangle )rulingKind {_edea :=_adgea .Width ();_acced :=_adgea .Height ();if _edea > _acced {if _edea >=_abca {return _eeg ;
};}else {if _acced >=_abca {return _gecdf ;};};return _ceag ;};type bounded interface{bbox ()_af .PdfRectangle };
2024-04-30 12:24:05 +00:00
2024-05-29 17:04:37 +00:00
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
func (_fecg PageText )ToText ()string {return _fecg .Text ()};func _ecbg (_cgage *PageText )error {_gefgb :=_d .GetLicenseKey ();if _gefgb !=nil &&_gefgb .IsLicensed ()||_gd {return nil ;};_efc .Printf ("\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a");
_efc .Println ("-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f");
return _b .New ("\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064");};func (_bfcc *shapesState )clearPath (){_bfcc ._baca =nil ;_bfcc ._gbee =false ;if _cece {_ag .Log .Info ("\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073",_bfcc );
};};
2024-04-30 12:24:05 +00:00
// String returns a description of `state`.
2024-05-29 17:04:37 +00:00
func (_dedf *textState )String ()string {_gfafg :="\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]";if _dedf ._fgfgb !=nil {_gfafg =_dedf ._fgfgb .BaseFont ();};return _efc .Sprintf ("\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071",_dedf ._fdad ,_dedf ._febe ,_dedf ._gbbgg ,_gfafg );
};func (_ggedf rulingList )secMinMax ()(float64 ,float64 ){_ddbf ,_fced :=_ggedf [0]._ggdb ,_ggedf [0]._gbca ;for _ ,_ccge :=range _ggedf [1:]{if _ccge ._ggdb < _ddbf {_ddbf =_ccge ._ggdb ;};if _ccge ._gbca > _fced {_fced =_ccge ._gbca ;};};return _ddbf ,_fced ;
};func _fgcbe (_egaag []*textWord ,_bcee *textWord )[]*textWord {for _edgebf ,_efgc :=range _egaag {if _efgc ==_bcee {return _adfc (_egaag ,_edgebf );};};_ag .Log .Error ("\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073",_bcee );
return nil ;};func (_ffcb *textObject )showText (_eccc _gf .PdfObject ,_ega []byte ,_ded int )error {return _ffcb .renderText (_eccc ,_ega ,_ded );};func (_bgcb pathSection )bbox ()_af .PdfRectangle {_fbd :=_bgcb ._bgbeg [0]._aaebg [0];_aaeg :=_af .PdfRectangle {Llx :_fbd .X ,Urx :_fbd .X ,Lly :_fbd .Y ,Ury :_fbd .Y };
_fcac :=func (_fge _aae .Point ){if _fge .X < _aaeg .Llx {_aaeg .Llx =_fge .X ;}else if _fge .X > _aaeg .Urx {_aaeg .Urx =_fge .X ;};if _fge .Y < _aaeg .Lly {_aaeg .Lly =_fge .Y ;}else if _fge .Y > _aaeg .Ury {_aaeg .Ury =_fge .Y ;};};for _ ,_agc :=range _bgcb ._bgbeg [0]._aaebg [1:]{_fcac (_agc );
};for _ ,_gfgc :=range _bgcb ._bgbeg [1:]{for _ ,_bfd :=range _gfgc ._aaebg {_fcac (_bfd );};};return _aaeg ;};func (_dgfae *textLine )appendWord (_cdcbc *textWord ){_dgfae ._cfcb =append (_dgfae ._cfcb ,_cdcbc );_dgfae .PdfRectangle =_cfab (_dgfae .PdfRectangle ,_cdcbc .PdfRectangle );
if _cdcbc ._abcc > _dgfae ._afeb {_dgfae ._afeb =_cdcbc ._abcc ;};if _cdcbc ._accb > _dgfae ._addd {_dgfae ._addd =_cdcbc ._accb ;};};
2024-04-30 12:24:05 +00:00
2024-05-29 17:04:37 +00:00
// String returns a description of `tm`.
func (_cddfb *textMark )String ()string {return _efc .Sprintf ("\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022",_cddfb .PdfRectangle ,_cddfb ._acaf ,_cddfb ._cgeb );};func _ccga (_gddb *textLine )bool {_abaf :=true ;
_gdaa :=-1;for _ ,_afgb :=range _gddb ._cfcb {for _ ,_effda :=range _afgb ._ffcd {_cbae :=_effda ._ffbdg ;if _gdaa ==-1{_gdaa =_cbae ;}else {if _gdaa !=_cbae {_abaf =false ;break ;};};};};return _abaf ;};
2024-04-30 12:24:05 +00:00
2024-05-29 17:04:37 +00:00
// String returns a description of `l`.
func (_fccg *textLine )String ()string {return _efc .Sprintf ("\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022",_fccg ._addd ,_fccg .PdfRectangle ,_fccg ._afeb ,_fccg .text ());
};type textPara struct{_af .PdfRectangle ;_gbgbb _af .PdfRectangle ;_aage []*textLine ;_befe *textTable ;_fcdcf bool ;_bdgc bool ;_caagd *textPara ;_aggd *textPara ;_ecdfc *textPara ;_cabda *textPara ;_fadbb []list ;};func _gaeb (_ccdda *_af .Image ,_ebgag _fe .Color )_ef .Image {_bedb ,_fddg :=int (_ccdda .Width ),int (_ccdda .Height );
_fdade :=_ef .NewRGBA (_ef .Rect (0,0,_bedb ,_fddg ));for _beagf :=0;_beagf < _fddg ;_beagf ++{for _ffeg :=0;_ffeg < _bedb ;_ffeg ++{_acfg ,_ebeag :=_ccdda .ColorAt (_ffeg ,_beagf );if _ebeag !=nil {_ag .Log .Debug ("\u0057\u0041\u0052\u004e\u003a\u0020\u0063o\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0072\u0065\u0074\u0072\u0069\u0065v\u0065 \u0069\u006d\u0061\u0067\u0065\u0020m\u0061\u0073\u006b\u0020\u0076\u0061\u006cu\u0065\u0020\u0061\u0074\u0020\u0028\u0025\u0064\u002c\u0020\u0025\u0064\u0029\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006da\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063t\u002e",_ffeg ,_beagf );
continue ;};_eaaaf ,_gaeg ,_ebdg ,_ :=_acfg .RGBA ();var _feef _fe .Color ;if _eaaaf +_gaeg +_ebdg ==0{_feef =_ebgag ;}else {_feef =_fe .Transparent ;};_fdade .Set (_ffeg ,_beagf ,_feef );};};return _fdade ;};func (_gdebf gridTile )numBorders ()int {_ebag :=0;
if _gdebf ._gceeb {_ebag ++;};if _gdebf ._gdcbg {_ebag ++;};if _gdebf ._dbafa {_ebag ++;};if _gdebf ._ffdf {_ebag ++;};return _ebag ;};func _cbcee (_cccb ,_bfffd _aae .Point )bool {_bbdd :=_ea .Abs (_cccb .X -_bfffd .X );_ccbef :=_ea .Abs (_cccb .Y -_bfffd .Y );
return _gdcgf (_bbdd ,_ccbef );};func (_adc TextTable )getCellInfo (_ccde TextMark )[][]int {for _abfd ,_bfcf :=range _adc .Cells {for _cbfdb :=range _bfcf {_dgaa :=&_bfcf [_cbfdb ].Marks ;if _dgaa .exists (_ccde ){return [][]int {{_abfd },{_cbfdb }};};
};};return nil ;};func _bdcb (_fgafb ,_bcf _af .PdfRectangle )bool {return _gfb (_fgafb ,_bcf )&&_dfba (_fgafb ,_bcf )};func _adfc (_gecbbg []*textWord ,_dcdc int )[]*textWord {_dabfb :=len (_gecbbg );copy (_gecbbg [_dcdc :],_gecbbg [_dcdc +1:]);return _gecbbg [:_dabfb -1];
};func (_gcgda *textMark )bbox ()_af .PdfRectangle {return _gcgda .PdfRectangle };func _fcegd (_bage string )bool {for _ ,_daag :=range _bage {if !_fg .IsSpace (_daag ){return false ;};};return true ;};func (_cbab *subpath )removeDuplicates (){if len (_cbab ._aaebg )==0{return ;
};_cbaf :=[]_aae .Point {_cbab ._aaebg [0]};for _ ,_dege :=range _cbab ._aaebg [1:]{if !_bgfgg (_dege ,_cbaf [len (_cbaf )-1]){_cbaf =append (_cbaf ,_dege );};};_cbab ._aaebg =_cbaf ;};
2024-04-30 12:24:05 +00:00
2024-05-29 17:04:37 +00:00
// Len returns the number of TextMarks in `ma`.
func (_fbb *TextMarkArray )Len ()int {if _fbb ==nil {return 0;};return len (_fbb ._aec );};func _fbce (_ebeab bounded )float64 {return -_ebeab .bbox ().Lly };func _bebec (_acca int ,_aagfc func (int ,int )bool )[]int {_abfeb :=make ([]int ,_acca );for _ecbbc :=range _abfeb {_abfeb [_ecbbc ]=_ecbbc ;
};_e .Slice (_abfeb ,func (_fefaf ,_bcbgc int )bool {return _aagfc (_abfeb [_fefaf ],_abfeb [_bcbgc ])});return _abfeb ;};func (_cdgb rulingList )aligned ()bool {if len (_cdgb )< 2{return false ;};_cgcff :=make (map[*ruling ]int );_cgcff [_cdgb [0]]=0;
for _ ,_bgbb :=range _cdgb [1:]{_gdcd :=false ;for _afdfc :=range _cgcff {if _bgbb .gridIntersecting (_afdfc ){_cgcff [_afdfc ]++;_gdcd =true ;break ;};};if !_gdcd {_cgcff [_bgbb ]=0;};};_adgca :=0;for _ ,_cfbef :=range _cgcff {if _cfbef ==0{_adgca ++;
};};_ffacb :=float64 (_adgca )/float64 (len (_cdgb ));_ecgcd :=_ffacb <=1.0-_bbce ;if _gdeb {_ag .Log .Info ("\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073",_ecgcd ,_ffacb ,_adgca ,len (_cdgb ),_cdgb .String ());
};return _ecgcd ;};func (_afcf intSet )has (_cgbf int )bool {_ ,_ffdb :=_afcf [_cgbf ];return _ffdb };func (_cbed paraList )merge ()*textPara {_ag .Log .Trace ("\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d",len (_cbed ));
if len (_cbed )==0{return nil ;};_cbed .sortReadingOrder ();_bgcg :=_cbed [0].PdfRectangle ;_gcbad :=_cbed [0]._aage ;for _ ,_cbdbg :=range _cbed [1:]{_bgcg =_cfab (_bgcg ,_cbdbg .PdfRectangle );_gcbad =append (_gcbad ,_cbdbg ._aage ...);};return _adbde (_bgcg ,_gcbad );
};func (_dgbc compositeCell )hasLines (_ccffd []*textLine )bool {for _agdab ,_eaba :=range _ccffd {_agdabf :=_bdcb (_dgbc .PdfRectangle ,_eaba .PdfRectangle );if _dedc {_efc .Printf ("\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a",_agdabf ,_agdab ,len (_ccffd ));
_efc .Printf ("\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a",_dgbc );_efc .Printf ("\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a",_eaba );};if _agdabf {return true ;
};};return false ;};func (_ffae *textWord )bbox ()_af .PdfRectangle {return _ffae .PdfRectangle };func _dbddc (_effff structElement )[]structElement {_cfba :=[]structElement {};for _ ,_gbdg :=range _effff ._befc {for _ ,_dbae :=range _gbdg ._befc {for _ ,_cced :=range _dbae ._befc {if _cced ._dccda =="\u004c"{_cfba =append (_cfba ,_cced );
};};};};return _cfba ;};
2024-04-30 12:24:05 +00:00
2024-05-29 17:04:37 +00:00
// Text gets the extracted text contained in `l`.
func (_cfac *list )Text ()string {_ecdce :=&_a .Builder {};_edcd :="";_fecd (_cfac ,_ecdce ,&_edcd );return _ecdce .String ();};func _gfae (_fgce map[int ][]float64 )string {_gfdfe :=_gaccd (_fgce );_cdgf :=make ([]string ,len (_fgce ));for _gadb ,_fgab :=range _gfdfe {_cdgf [_gadb ]=_efc .Sprintf ("\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066",_fgab ,_fgce [_fgab ]);
};return _efc .Sprintf ("\u007b\u0025\u0073\u007d",_a .Join (_cdgf ,"\u002c\u0020"));};func _gfgde (_eaeg *wordBag ,_cfbc int )*textLine {_egbge :=_eaeg .firstWord (_cfbc );_ddda :=textLine {PdfRectangle :_egbge .PdfRectangle ,_afeb :_egbge ._abcc ,_addd :_egbge ._accb };
_ddda .pullWord (_eaeg ,_egbge ,_cfbc );return &_ddda ;};func (_babcc *textTable )toTextTable ()TextTable {if _dedc {_ag .Log .Info ("t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064",_babcc ._aageb ,_babcc ._cegga );
};_bfecd :=make ([][]TableCell ,_babcc ._cegga );for _ecce :=0;_ecce < _babcc ._cegga ;_ecce ++{_bfecd [_ecce ]=make ([]TableCell ,_babcc ._aageb );for _bebbb :=0;_bebbb < _babcc ._aageb ;_bebbb ++{_cbcc :=_babcc .get (_bebbb ,_ecce );if _cbcc ==nil {continue ;
};if _dedc {_efc .Printf ("\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a",_bebbb ,_ecce ,_cbcc );};_bfecd [_ecce ][_bebbb ].Text =_cbcc .text ();_dged :=0;_bfecd [_ecce ][_bebbb ].Marks ._aec =_cbcc .toTextMarks (&_dged );};};_acdg :=TextTable {W :_babcc ._aageb ,H :_babcc ._cegga ,Cells :_bfecd };
_acdg .PdfRectangle =_babcc .bbox ();return _acdg ;};type ruling struct{_ecfb rulingKind ;_agff markKind ;_fe .Color ;_aeef float64 ;_ggdb float64 ;_gbca float64 ;_faba float64 ;};func (_gafc *wordBag )depthIndexes ()[]int {if len (_gafc ._cdbc )==0{return nil ;
};_fgca :=make ([]int ,len (_gafc ._cdbc ));_acef :=0;for _gbbgf :=range _gafc ._cdbc {_fgca [_acef ]=_gbbgf ;_acef ++;};_e .Ints (_fgca );return _fgca ;};
2024-04-30 12:24:05 +00:00
2024-05-29 17:04:37 +00:00
// PageFonts represents extracted fonts on a PDF page.
type PageFonts struct{Fonts []Font ;};func (_cgbb *textLine )bbox ()_af .PdfRectangle {return _cgbb .PdfRectangle };func (_aegbf rulingList )blocks (_gdcbb ,_fgcf *ruling )bool {if _gdcbb ._ggdb > _fgcf ._gbca ||_fgcf ._ggdb > _gdcbb ._gbca {return false ;
};_cbebf :=_ea .Max (_gdcbb ._ggdb ,_fgcf ._ggdb );_ffafc :=_ea .Min (_gdcbb ._gbca ,_fgcf ._gbca );if _gdcbb ._aeef > _fgcf ._aeef {_gdcbb ,_fgcf =_fgcf ,_gdcbb ;};for _ ,_fggc :=range _aegbf {if _gdcbb ._aeef <=_fggc ._aeef +_cggd &&_fggc ._aeef <=_fgcf ._aeef +_cggd &&_fggc ._ggdb <=_ffafc &&_cbebf <=_fggc ._gbca {return true ;
};};return false ;};func (_fddae *ruling )equals (_cddc *ruling )bool {return _fddae ._ecfb ==_cddc ._ecfb &&_edddc (_fddae ._aeef ,_cddc ._aeef )&&_edddc (_fddae ._ggdb ,_cddc ._ggdb )&&_edddc (_fddae ._gbca ,_cddc ._gbca );};func (_ccce *shapesState )devicePoint (_gcbb ,_gcea float64 )_aae .Point {_gcc :=_ccce ._gdec .Mult (_ccce ._cffc );
_gcbb ,_gcea =_gcc .Transform (_gcbb ,_gcea );return _aae .NewPoint (_gcbb ,_gcea );};func (_aabg *textObject )setHorizScaling (_bbcd float64 ){if _aabg ==nil {return ;};_aabg ._ecff ._dba =_bbcd ;};func (_agea *shapesState )drawRectangle (_adbd ,_acdcf ,_cfga ,_adbe float64 ){if _cece {_eaa :=_agea .devicePoint (_adbd ,_acdcf );
_gcee :=_agea .devicePoint (_adbd +_cfga ,_acdcf +_adbe );_fcfa :=_af .PdfRectangle {Llx :_eaa .X ,Lly :_eaa .Y ,Urx :_gcee .X ,Ury :_gcee .Y };_ag .Log .Info ("d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066",_fcfa );
};_agea .newSubPath ();_agea .moveTo (_adbd ,_acdcf );_agea .lineTo (_adbd +_cfga ,_acdcf );_agea .lineTo (_adbd +_cfga ,_acdcf +_adbe );_agea .lineTo (_adbd ,_acdcf +_adbe );_agea .closePath ();};func _caegg (_fbbd []*textLine ,_cagb map[float64 ][]*textLine )[]*list {_fdgg :=_ffadg (_cagb );
_caec :=[]*list {};if len (_fdgg )==0{return _caec ;};_fafc :=_fdgg [0];_agfe :=1;_baba :=_cagb [_fafc ];for _gcfg ,_cgfa :=range _baba {var _bdgd float64 ;_abgg :=[]*list {};_gcfgc :=_cgfa ._addd ;_agdgc :=-1.0;if _gcfg < len (_baba )-1{_agdgc =_baba [_gcfg +1]._addd ;
};if _agfe < len (_fdgg ){_abgg =_bggb (_fbbd ,_cagb ,_fdgg ,_agfe ,_gcfgc ,_agdgc );};_bdgd =_agdgc ;if len (_abgg )> 0{_fcfad :=_abgg [0];if len (_fcfad ._fged )> 0{_bdgd =_fcfad ._fged [0]._addd ;};};_fcfcb :=[]*textLine {_cgfa };_bgcca :=_ggfca (_cgfa ,_fbbd ,_fdgg ,_gcfgc ,_bdgd );
_fcfcb =append (_fcfcb ,_bgcca ...);_dddb :=_abda (_fcfcb ,"\u0062\u0075\u006c\u006c\u0065\u0074",_abgg );_dddb ._cbda =_ebgc (_fcfcb ,"");_caec =append (_caec ,_dddb );};return _caec ;};type textMark struct{_af .PdfRectangle ;_ddfdb int ;_cgeb string ;
_faaf string ;_dbgfg *_af .PdfFont ;_acaf float64 ;_bdeb float64 ;_acgb _aae .Matrix ;_efab _aae .Point ;_fbaa _af .PdfRectangle ;_ccbff _fe .Color ;_bcfc _fe .Color ;_bae _gf .PdfObject ;_afcb []string ;Tw float64 ;Th float64 ;_ffbdg int ;_feg int ;};
func (_begad rulingList )removeDuplicates ()rulingList {if len (_begad )==0{return nil ;};_begad .sort ();_accd :=rulingList {_begad [0]};for _ ,_ffadd :=range _begad [1:]{if _ffadd .equals (_accd [len (_accd )-1]){continue ;};_accd =append (_accd ,_ffadd );
};return _accd ;};func (_cbce rulingList )sortStrict (){_e .Slice (_cbce ,func (_cdcf ,_gbce int )bool {_addg ,_gcbe :=_cbce [_cdcf ],_cbce [_gbce ];_fdeaf ,_gbag :=_addg ._ecfb ,_gcbe ._ecfb ;if _fdeaf !=_gbag {return _fdeaf > _gbag ;};_eccge ,_ebfca :=_addg ._aeef ,_gcbe ._aeef ;
if !_cdaea (_eccge -_ebfca ){return _eccge < _ebfca ;};_eccge ,_ebfca =_addg ._ggdb ,_gcbe ._ggdb ;if _eccge !=_ebfca {return _eccge < _ebfca ;};return _addg ._gbca < _gcbe ._gbca ;});};