2020-08-27 21:45:09 +00:00
//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
2018-03-22 14:03:47 +00:00
2020-08-27 21:45:09 +00:00
//
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
//
2023-11-11 11:29:03 +00:00
package extractor ; import ( _dfg "bytes" ; _a "errors" ; _gde "fmt" ; _ac "github.com/unidoc/unipdf/v3/common" ; _dcg "github.com/unidoc/unipdf/v3/contentstream" ; _dce "github.com/unidoc/unipdf/v3/core" ; _ec "github.com/unidoc/unipdf/v3/internal/license" ; _c "github.com/unidoc/unipdf/v3/internal/textencoding" ;
_dca "github.com/unidoc/unipdf/v3/internal/transform" ; _fg "github.com/unidoc/unipdf/v3/model" ; _ef "golang.org/x/image/draw" ; _fd "golang.org/x/text/unicode/norm" ; _gdf "golang.org/x/xerrors" ; _e "image" ; _be "image/color" ; _g "io" ; _dc "math" ; _b "reflect" ; _gd "regexp" ;
_ab "sort" ; _df "strings" ; _fc "unicode" ; _f "unicode/utf8" ; ) ; func ( _gdce * textTable ) reduceTiling ( _ggef gridTiling , _ffbf float64 ) * textTable { _egeac := make ( [ ] int , 0 , _gdce . _gebeeb ) ; _acgb := make ( [ ] int , 0 , _gdce . _acddc ) ; _acbg := _ggef . _abeb ; _ccagb := _ggef . _cegbg ;
for _cdfd := 0 ; _cdfd < _gdce . _gebeeb ; _cdfd ++ { _ccdce := _cdfd > 0 && _dc . Abs ( _ccagb [ _cdfd - 1 ] - _ccagb [ _cdfd ] ) < _ffbf && _gdce . emptyCompositeRow ( _cdfd ) ; if ! _ccdce { _egeac = append ( _egeac , _cdfd ) ; } ; } ; for _ddbdd := 0 ; _ddbdd < _gdce . _acddc ; _ddbdd ++ { _fdda := _ddbdd < _gdce . _acddc - 1 && _dc . Abs ( _acbg [ _ddbdd + 1 ] - _acbg [ _ddbdd ] ) < _ffbf && _gdce . emptyCompositeColumn ( _ddbdd ) ;
if ! _fdda { _acgb = append ( _acgb , _ddbdd ) ; } ; } ; if len ( _egeac ) == _gdce . _gebeeb && len ( _acgb ) == _gdce . _acddc { return _gdce ; } ; _dfgf := textTable { _aefef : _gdce . _aefef , _acddc : len ( _acgb ) , _gebeeb : len ( _egeac ) , _edbe : make ( map [ uint64 ] compositeCell , len ( _acgb ) * len ( _egeac ) ) } ;
if _eadb { _ac . Log . Info ( "\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064" , _gdce . _acddc , _gdce . _gebeeb , len ( _acgb ) , len ( _egeac ) ) ; _ac . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076" , _acgb ) ;
_ac . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076" , _egeac ) ; } ; for _gegbb , _afcg := range _egeac { for _fdcbd , _dbffea := range _acgb { _dbed , _ceaf := _gdce . getComposite ( _dbffea , _afcg ) ; if len ( _dbed ) == 0 { continue ;
} ; if _eadb { _gde . Printf ( "\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n" , _fdcbd , _gegbb , _dbffea , _afcg , _dbdbb ( _dbed . merge ( ) . text ( ) , 50 ) ) ; } ; _dfgf . putComposite ( _fdcbd , _gegbb , _dbed , _ceaf ) ;
} ; } ; return & _dfgf ; } ; func ( _bgf * shapesState ) moveTo ( _ggbg , _edae float64 ) { _bgf . _egfd = true ; _bgf . _eaeeb = _bgf . devicePoint ( _ggbg , _edae ) ; if _bdefa { _ac . Log . Info ( "\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066" , _ggbg , _edae , _bgf . _eaeeb ) ;
} ; } ; func ( _cfcd * shapesState ) closePath ( ) { if _cfcd . _egfd { _cfcd . _efb = append ( _cfcd . _efb , _cdec ( _cfcd . _eaeeb ) ) ; _cfcd . _egfd = false ; } else if len ( _cfcd . _efb ) == 0 { if _bdefa { _ac . Log . Debug ( "\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068" ) ;
} ; _cfcd . _egfd = false ; return ; } ; _cfcd . _efb [ len ( _cfcd . _efb ) - 1 ] . close ( ) ; if _bdefa { _ac . Log . Info ( "\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073" , _cfcd ) ; } ; } ; func ( _gadc * wordBag ) highestWord ( _abbb int , _ebfc , _cdef float64 ) * textWord { for _ , _cbega := range _gadc . _aac [ _abbb ] { if _ebfc <= _cbega . _adgge && _cbega . _adgge <= _cdef { return _cbega ;
} ; } ; return nil ; } ; func ( _ddgc * subpath ) close ( ) { if ! _ffea ( _ddgc . _gdgd [ 0 ] , _ddgc . last ( ) ) { _ddgc . add ( _ddgc . _gdgd [ 0 ] ) ; } ; _ddgc . _bbbb = true ; _ddgc . removeDuplicates ( ) ; } ; const ( _bf = "\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065" ;
_dcd = "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064" ;
_adc = "\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065" ; ) ;
2023-02-07 17:17:49 +00:00
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// Font represents the font properties on a PDF page.
type Font struct { PdfFont * _fg . PdfFont ;
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// FontName represents Font Name from font properties.
FontName string ;
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// FontType represents Font Subtype entry in the font dictionary inside page resources.
// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont.
FontType string ;
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// ToUnicode is true if font provides a `ToUnicode` mapping.
ToUnicode bool ;
2023-10-07 13:58:01 +00:00
2023-11-11 11:29:03 +00:00
// IsCID is true if underlying font is a composite font.
// Composite font is represented by a font dictionary whose Subtype is `Type0`
IsCID bool ;
2023-04-06 19:57:40 +00:00
2023-11-11 11:29:03 +00:00
// IsSimple is true if font is simple font.
// A simple font is limited to only 8 bit (255) character codes.
IsSimple bool ;
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// FontData represents the raw data of the embedded font file.
// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF).
// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor.
// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value.
FontData [ ] byte ;
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// FontFileName is a name representing the font. it has format:
// (Font Name) + (Font Type Extension), example: helvetica.ttf.
FontFileName string ;
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor).
FontDescriptor * _fg . PdfFontDescriptor ; } ; func ( _gecc * wordBag ) firstReadingIndex ( _geed int ) int { _facfc := _gecc . firstWord ( _geed ) . _adecc ; _cfcb := float64 ( _geed + 1 ) * _ddgf ; _ecgb := _cfcb + _eega * _facfc ; _caec := _geed ; for _ , _agbf := range _gecc . depthBand ( _cfcb , _ecgb ) { if _gedbc ( _gecc . firstWord ( _agbf ) , _gecc . firstWord ( _caec ) ) < 0 { _caec = _agbf ;
} ; } ; return _caec ; } ;
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
type PageImages struct { Images [ ] ImageMark ; } ; func ( _cbec * textObject ) getCurrentFont ( ) * _fg . PdfFont { _gaf := _cbec . _abcb . _ecf ; if _gaf == nil { _ac . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e" ) ;
return _fg . DefaultFont ( ) ; } ; return _gaf ; } ; func _fbgdd ( _badg _fg . PdfRectangle , _bgeg , _bbabc , _agegd , _aecc * ruling ) gridTile { _fafd := _badg . Llx ; _ffcaa := _badg . Urx ; _defc := _badg . Lly ; _gdcda := _badg . Ury ; return gridTile { PdfRectangle : _badg , _cbge : _bgeg != nil && _bgeg . encloses ( _defc , _gdcda ) , _dafe : _bbabc != nil && _bbabc . encloses ( _defc , _gdcda ) , _beacf : _agegd != nil && _agegd . encloses ( _fafd , _ffcaa ) , _cbbgc : _aecc != nil && _aecc . encloses ( _fafd , _ffcaa ) } ;
} ; func ( _dbeae * textTable ) newTablePara ( ) * textPara { _gfbbd := _dbeae . computeBbox ( ) ; _bafda := & textPara { PdfRectangle : _gfbbd , _ebadd : _gfbbd , _edce : _dbeae } ; if _eadb { _ac . Log . Info ( "\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073" , _bafda ) ;
} ; return _bafda ; } ; func _ebaf ( _gfdd float64 ) int { var _dfgd int ; if _gfdd >= 0 { _dfgd = int ( _gfdd / _ddgf ) ; } else { _dfgd = int ( _gfdd / _ddgf ) - 1 ; } ; return _dfgd ; } ; func _bddf ( _babf float64 ) bool { return _dc . Abs ( _babf ) < _gbgf } ; func _cda ( _gfbg byte ) bool { for _ , _dbe := range _bgad { if [ ] byte ( _dbe ) [ 0 ] == _gfbg { return true ;
} ; } ; return false ; } ; func ( _aae * stateStack ) pop ( ) * textState { if _aae . empty ( ) { return nil ; } ; _ebd := * ( * _aae ) [ len ( * _aae ) - 1 ] ; * _aae = ( * _aae ) [ : len ( * _aae ) - 1 ] ; return & _ebd ; } ; func ( _dbd * stateStack ) size ( ) int { return len ( * _dbd ) } ; type list struct { _ggcfd [ ] * textLine ;
_aeaa string ; _edge [ ] * list ; _fbfaf string ; } ; func _ggde ( _egdcc * PageText ) error { _degee := _ec . GetLicenseKey ( ) ; if _degee != nil && _degee . IsLicensed ( ) || _de { return nil ; } ; _gde . Printf ( "\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a" ) ;
_gde . Println ( "-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f" ) ;
return _a . New ( "\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064" ) ; } ; func _edacg ( _dfde , _ddgfee * textPara ) bool { return _fcgc ( _dfde . _ebadd , _ddgfee . _ebadd ) } ;
func ( _aacb * textObject ) newTextMark ( _eaedc string , _bddg _dca . Matrix , _ccaa _dca . Point , _cdde float64 , _ffdc * _fg . PdfFont , _eccb float64 , _baba , _dabdb _be . Color , _afdd _dce . PdfObject , _ffcag [ ] string , _aedag int , _ccfg int ) ( textMark , bool ) { _cfad := _bddg . Angle ( ) ;
_ecfad := _bcddg ( _cfad , _edef ) ; var _cggb float64 ; if _ecfad % 180 != 90 { _cggb = _bddg . ScalingFactorY ( ) ; } else { _cggb = _bddg . ScalingFactorX ( ) ; } ; _dddg := _gcdf ( _bddg ) ; _daec := _fg . PdfRectangle { Llx : _dddg . X , Lly : _dddg . Y , Urx : _ccaa . X , Ury : _ccaa . Y } ;
switch _ecfad % 360 { case 90 : _daec . Urx -= _cggb ; case 180 : _daec . Ury -= _cggb ; case 270 : _daec . Urx += _cggb ; case 0 : _daec . Ury += _cggb ; default : _ecfad = 0 ; _daec . Ury += _cggb ; } ; if _daec . Llx > _daec . Urx { _daec . Llx , _daec . Urx = _daec . Urx , _daec . Llx ; } ; if _daec . Lly > _daec . Ury { _daec . Lly , _daec . Ury = _daec . Ury , _daec . Lly ;
} ; _fcda := true ; if _aacb . _ccb . _aa . Width ( ) > 0 { _cbgbd , _dbdf := _bcdd ( _daec , _aacb . _ccb . _aa ) ; if ! _dbdf { _fcda = false ; _ac . Log . Debug ( "\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q" , _daec , _aacb . _ccb . _aa , _eaedc ) ;
} ; _daec = _cbgbd ; } ; _fdfae := _daec ; _gabdc := _aacb . _ccb . _aa ; switch _ecfad % 360 { case 90 : _gabdc . Urx , _gabdc . Ury = _gabdc . Ury , _gabdc . Urx ; _fdfae = _fg . PdfRectangle { Llx : _gabdc . Urx - _daec . Ury , Urx : _gabdc . Urx - _daec . Lly , Lly : _daec . Llx , Ury : _daec . Urx } ;
case 180 : _fdfae = _fg . PdfRectangle { Llx : _gabdc . Urx - _daec . Llx , Urx : _gabdc . Urx - _daec . Urx , Lly : _gabdc . Ury - _daec . Lly , Ury : _gabdc . Ury - _daec . Ury } ; case 270 : _gabdc . Urx , _gabdc . Ury = _gabdc . Ury , _gabdc . Urx ; _fdfae = _fg . PdfRectangle { Llx : _daec . Ury , Urx : _daec . Lly , Lly : _gabdc . Ury - _daec . Llx , Ury : _gabdc . Ury - _daec . Urx } ;
} ; if _fdfae . Llx > _fdfae . Urx { _fdfae . Llx , _fdfae . Urx = _fdfae . Urx , _fdfae . Llx ; } ; if _fdfae . Lly > _fdfae . Ury { _fdfae . Lly , _fdfae . Ury = _fdfae . Ury , _fdfae . Lly ; } ; _bdeee := textMark { _efgdc : _eaedc , PdfRectangle : _fdfae , _fbae : _daec , _abef : _ffdc , _cggc : _cggb , _accea : _eccb , _bcbeg : _bddg , _caeg : _ccaa , _aaad : _ecfad , _cccae : _baba , _dcgea : _dabdb , _dafb : _afdd , _facc : _ffcag , Th : _aacb . _abcb . _gfad , Tw : _aacb . _abcb . _ggf , _fbcf : _ccfg , _ccfa : _aedag } ;
if _agge { _ac . Log . Info ( "n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073" , _dddg , _ccaa , _bdeee . String ( ) ) ; } ; return _bdeee , _fcda ;
} ; func ( _dgd * textObject ) showText ( _fag _dce . PdfObject , _gfg [ ] byte , _gebe int ) error { return _dgd . renderText ( _fag , _gfg , _gebe ) ; } ; func ( _cefec * textObject ) getFont ( _bdee string ) ( * _fg . PdfFont , error ) { if _cefec . _ccb . _bc != nil { _fbgd , _bdeeb := _cefec . getFontDict ( _bdee ) ;
if _bdeeb != nil { _ac . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073" , _bdee , _bdeeb . Error ( ) ) ; return nil , _bdeeb ;
} ; _cefec . _ccb . _ag ++ ; _ffga , _cage := _cefec . _ccb . _bc [ _fbgd . String ( ) ] ; if _cage { _ffga . _faad = _cefec . _ccb . _ag ; return _ffga . _fgf , nil ; } ; } ; _gdgg , _bfdg := _cefec . getFontDict ( _bdee ) ; if _bfdg != nil { return nil , _bfdg ; } ; _ddf , _bfdg := _cefec . getFontDirect ( _bdee ) ;
if _bfdg != nil { return nil , _bfdg ; } ; if _cefec . _ccb . _bc != nil { _ebbe := fontEntry { _ddf , _cefec . _ccb . _ag } ; if len ( _cefec . _ccb . _bc ) >= _acbd { var _bdaa [ ] string ; for _fce := range _cefec . _ccb . _bc { _bdaa = append ( _bdaa , _fce ) ; } ; _ab . Slice ( _bdaa , func ( _eacb , _ebag int ) bool { return _cefec . _ccb . _bc [ _bdaa [ _eacb ] ] . _faad < _cefec . _ccb . _bc [ _bdaa [ _ebag ] ] . _faad ;
} ) ; delete ( _cefec . _ccb . _bc , _bdaa [ 0 ] ) ; } ; _cefec . _ccb . _bc [ _gdgg . String ( ) ] = _ebbe ; } ; return _ddf , nil ; } ; type fontEntry struct { _fgf * _fg . PdfFont ; _faad int64 ; } ; func ( _dgagg * wordBag ) allWords ( ) [ ] * textWord { var _cefg [ ] * textWord ; for _ , _efgd := range _dgagg . _aac { _cefg = append ( _cefg , _efgd ... ) ;
} ; return _cefg ; } ; func ( _ffdec * textMark ) inDiacriticArea ( _gafa * textMark ) bool { _ggbc := _ffdec . Llx - _gafa . Llx ; _cgga := _ffdec . Urx - _gafa . Urx ; _cabc := _ffdec . Lly - _gafa . Lly ; return _dc . Abs ( _ggbc + _cgga ) < _ffdec . Width ( ) * _abbba && _dc . Abs ( _cabc ) < _ffdec . Height ( ) * _abbba ;
} ; type stateStack [ ] * textState ; func ( _gcbd * shapesState ) quadraticTo ( _bddb , _daed , _cac , _face float64 ) { if _bdefa { _ac . Log . Info ( "\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a" ) ; } ; _gcbd . addPoint ( _cac , _face ) ; } ;
// ExtractFonts returns all font information from the page extractor, including
// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more.
//
// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document.
// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries.
//
// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range.
func ( _eba * Extractor ) ExtractFonts ( previousPageFonts * PageFonts ) ( * PageFonts , error ) { _ceaa := PageFonts { } ; _cb := _ceaa . extractPageResourcesToFont ( _eba . _af ) ; if _cb != nil { return nil , _cb ; } ; if previousPageFonts != nil { for _ , _geg := range previousPageFonts . Fonts { if ! _bb ( _ceaa . Fonts , _geg . FontName ) { _ceaa . Fonts = append ( _ceaa . Fonts , _geg ) ;
} ; } ; } ; return & PageFonts { Fonts : _ceaa . Fonts } , nil ; } ; type event struct { _eebcc float64 ; _dacd bool ; _dcgg int ; } ; func ( _debc * textObject ) checkOp ( _bcb * _dcg . ContentStreamOperation , _bfde int , _bccc bool ) ( _dacg bool , _ddb error ) { if _debc == nil { var _cef [ ] _dce . PdfObject ;
if _bfde > 0 { _cef = _bcb . Params ; if len ( _cef ) > _bfde { _cef = _cef [ : _bfde ] ; } ; } ; _ac . Log . Debug ( "\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076" , _bcb . Operand , _cef ) ;
} ; if _bfde >= 0 { if len ( _bcb . Params ) != _bfde { if _bccc { _ddb = _a . New ( "\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et" ) ; } ; _ac . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076" , _bcb . Operand , _bfde , len ( _bcb . Params ) , _bcb . Params ) ;
return false , _ddb ; } ; } ; return true , nil ; } ; func ( _ddfg paraList ) sortTopoOrder ( ) { _dafa := _ddfg . topoOrder ( ) ; _ddfg . reorder ( _dafa ) } ; func ( _daecg * subpath ) isQuadrilateral ( ) bool { if len ( _daecg . _gdgd ) < 4 || len ( _daecg . _gdgd ) > 5 { return false ; } ; if len ( _daecg . _gdgd ) == 5 { _dagbf := _daecg . _gdgd [ 0 ] ;
_ddde := _daecg . _gdgd [ 4 ] ; if _dagbf . X != _ddde . X || _dagbf . Y != _ddde . Y { return false ; } ; } ; return true ; } ; func _gcdf ( _eagf _dca . Matrix ) _dca . Point { _gcgc , _ebde := _eagf . Translation ( ) ; return _dca . Point { X : _gcgc , Y : _ebde } ; } ;
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
func ( _cfaf * PageText ) ApplyArea ( bbox _fg . PdfRectangle ) { _egb := make ( [ ] * textMark , 0 , len ( _cfaf . _fbga ) ) ; for _ , _bbbg := range _cfaf . _fbga { if _gege ( _bbbg . bbox ( ) , bbox ) { _egb = append ( _egb , _bbbg ) ; } ; } ; var _ddac paraList ; _aeb := len ( _egb ) ; for _facb := 0 ;
_facb < 360 && _aeb > 0 ; _facb += 90 { _eecf := make ( [ ] * textMark , 0 , len ( _egb ) - _aeb ) ; for _ , _dgg := range _egb { if _dgg . _aaad == _facb { _eecf = append ( _eecf , _dgg ) ; } ; } ; if len ( _eecf ) > 0 { _cec := _bgdg ( _eecf , _cfaf . _fgag , nil , nil , _cfaf . _dbc . _dcc ) ; _ddac = append ( _ddac , _cec ... ) ;
_aeb -= len ( _eecf ) ; } ; } ; _gdgf := new ( _dfg . Buffer ) ; _ddac . writeText ( _gdgf ) ; _cfaf . _edee = _gdgf . String ( ) ; _cfaf . _fdaf = _ddac . toTextMarks ( ) ; _cfaf . _agdc = _ddac . tables ( ) ; } ; func ( _bcabe * textWord ) toTextMarks ( _egafb * int ) [ ] TextMark { var _gbcfg [ ] TextMark ;
for _ , _eceec := range _bcabe . _daafd { _gbcfg = _fgea ( _gbcfg , _egafb , _eceec . ToTextMark ( ) ) ; } ; return _gbcfg ; } ; func _gdaf ( _ega func ( * wordBag , * textWord , float64 ) bool , _ccg float64 ) func ( * wordBag , * textWord ) bool { return func ( _egdd * wordBag , _aacf * textWord ) bool { return _ega ( _egdd , _aacf , _ccg ) } ;
} ; func ( _eec * stateStack ) top ( ) * textState { if _eec . empty ( ) { return nil ; } ; return ( * _eec ) [ _eec . size ( ) - 1 ] ; } ; func ( _bbg * wordBag ) arrangeText ( ) * textPara { _bbg . sort ( ) ; if _ade { _bbg . removeDuplicates ( ) ; } ; var _bgfgd [ ] * textLine ; for _ , _fdcd := range _bbg . depthIndexes ( ) { for ! _bbg . empty ( _fdcd ) { _fbfdc := _bbg . firstReadingIndex ( _fdcd ) ;
_dcgc := _bbg . firstWord ( _fbfdc ) ; _adaff := _aaca ( _bbg , _fbfdc ) ; _dgdb := _dcgc . _adecc ; _eegeb := _dcgc . _adgge - _dgfb * _dgdb ; _agbc := _dcgc . _adgge + _dgfb * _dgdb ; _gecd := _acbbe * _dgdb ; _dbccg := _eedd * _dgdb ; _edeb : for { var _efge * textWord ; _bcfa := 0 ;
for _ , _ecbbe := range _bbg . depthBand ( _eegeb , _agbc ) { _eccce := _bbg . highestWord ( _ecbbe , _eegeb , _agbc ) ; if _eccce == nil { continue ; } ; _bbdec := _debff ( _eccce , _adaff . _fgbe [ len ( _adaff . _fgbe ) - 1 ] ) ; if _bbdec < - _dbccg { break _edeb ; } ; if _bbdec > _gecd { continue ;
} ; if _efge != nil && _gedbc ( _eccce , _efge ) >= 0 { continue ; } ; _efge = _eccce ; _bcfa = _ecbbe ; } ; if _efge == nil { break ; } ; _adaff . pullWord ( _bbg , _efge , _bcfa ) ; } ; _adaff . markWordBoundaries ( ) ; _bgfgd = append ( _bgfgd , _adaff ) ; } ; } ; if len ( _bgfgd ) == 0 { return nil ;
} ; _ab . Slice ( _bgfgd , func ( _cdag , _faffb int ) bool { return _fbba ( _bgfgd [ _cdag ] , _bgfgd [ _faffb ] ) < 0 } ) ; _bdfec := _ggdc ( _bbg . PdfRectangle , _bgfgd ) ; if _fccf { _ac . Log . Info ( "\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073" , _bdfec . String ( ) ) ;
if _baaa { for _fgee , _dfcf := range _bdfec . _gfaae { _gde . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _fgee , _dfcf . String ( ) ) ; if _dbcb { for _aacfd , _eegag := range _dfcf . _fgbe { _gde . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _aacfd , _eegag . String ( ) ) ;
for _dacgg , _ebcfe := range _eegag . _daafd { _gde . Printf ( "\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n" , _dacgg , _ebcfe . String ( ) ) ; } ; } ; } ; } ; } ; } ; return _bdfec ; } ; type textObject struct { _ccb * Extractor ; _ccae * _fg . PdfPageResources ; _dbf _dcg . GraphicsState ;
_abcb * textState ; _ada * stateStack ; _acbc _dca . Matrix ; _abcf _dca . Matrix ; _ffc [ ] * textMark ; _gbf bool ; } ; const _degg = 1.0 / 1000.0 ; func ( _gdff * shapesState ) devicePoint ( _fcfgb , _ddgd float64 ) _dca . Point { _bge := _gdff . _dged . Mult ( _gdff . _gebd ) ; _fcfgb , _ddgd = _bge . Transform ( _fcfgb , _ddgd ) ;
return _dca . NewPoint ( _fcfgb , _ddgd ) ; } ; func ( _beaa * textPara ) getListLines ( ) [ ] * textLine { var _eaff [ ] * textLine ; _bfdd := _aebgd ( _beaa . _gfaae ) ; for _ , _gfgb := range _beaa . _gfaae { _caef := _gfgb . _fgbe [ 0 ] . _fedgb [ 0 ] ; if _cda ( _caef ) { _eaff = append ( _eaff , _gfgb ) ;
} ; } ; _eaff = append ( _eaff , _bfdd ... ) ; return _eaff ; } ; func ( _acff * textPara ) isAtom ( ) * textTable { _gcfe := _acff ; _bgfgc := _acff . _abfec ; _ggca := _acff . _fgbea ; if _bgfgc . taken ( ) || _ggca . taken ( ) { return nil ; } ; _eeeaa := _bgfgc . _fgbea ; if _eeeaa . taken ( ) || _eeeaa != _ggca . _abfec { return nil ;
} ; return _dadcda ( _gcfe , _bgfgc , _ggca , _eeeaa ) ; } ; type markKind int ; func ( _abgee * ruling ) equals ( _gddg * ruling ) bool { return _abgee . _gffa == _gddg . _gffa && _begea ( _abgee . _cbag , _gddg . _cbag ) && _begea ( _abgee . _efgeb , _gddg . _efgeb ) && _begea ( _abgee . _bbge , _gddg . _bbge ) ;
2023-09-07 17:40:17 +00:00
} ;
2023-11-11 11:29:03 +00:00
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
func ( _ddg * TextMarkArray ) RangeOffset ( start , end int ) ( * TextMarkArray , error ) { if _ddg == nil { return nil , _a . New ( "\u006da\u003d\u003d\u006e\u0069\u006c" ) ; } ; if end < start { return nil , _gde . Errorf ( "\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020" , start , end ) ;
} ; _effd := len ( _ddg . _gcc ) ; if _effd == 0 { return _ddg , nil ; } ; if start < _ddg . _gcc [ 0 ] . Offset { start = _ddg . _gcc [ 0 ] . Offset ; } ; if end > _ddg . _gcc [ _effd - 1 ] . Offset + 1 { end = _ddg . _gcc [ _effd - 1 ] . Offset + 1 ; } ; _ege := _ab . Search ( _effd , func ( _ebec int ) bool { return _ddg . _gcc [ _ebec ] . Offset + len ( _ddg . _gcc [ _ebec ] . Text ) - 1 >= start } ) ;
if ! ( 0 <= _ege && _ege < _effd ) { _egbb := _gde . Errorf ( "\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076" , start , _ege , _effd , _ddg . _gcc [ 0 ] , _ddg . _gcc [ _effd - 1 ] ) ;
return nil , _egbb ; } ; _adgf := _ab . Search ( _effd , func ( _baa int ) bool { return _ddg . _gcc [ _baa ] . Offset > end - 1 } ) ; if ! ( 0 <= _adgf && _adgf < _effd ) { _gebee := _gde . Errorf ( "\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076" , end , _adgf , _effd , _ddg . _gcc [ 0 ] , _ddg . _gcc [ _effd - 1 ] ) ;
return nil , _gebee ; } ; if _adgf <= _ege { return nil , _gde . Errorf ( "\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064" , start , end , _ege , _adgf ) ;
} ; return & TextMarkArray { _gcc : _ddg . _gcc [ _ege : _adgf ] } , nil ; } ; func ( _ggeg rulingList ) bbox ( ) _fg . PdfRectangle { var _eage _fg . PdfRectangle ; if len ( _ggeg ) == 0 { _ac . Log . Error ( "r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073" ) ;
return _fg . PdfRectangle { } ; } ; if _ggeg [ 0 ] . _gffa == _faccd { _eage . Llx , _eage . Urx = _ggeg . secMinMax ( ) ; _eage . Lly , _eage . Ury = _ggeg . primMinMax ( ) ; } else { _eage . Llx , _eage . Urx = _ggeg . primMinMax ( ) ; _eage . Lly , _eage . Ury = _ggeg . secMinMax ( ) ; } ; return _eage ;
} ; func ( _ebab * wordBag ) absorb ( _eecd * wordBag ) { _cagd := _eecd . makeRemovals ( ) ; for _dabd , _fgd := range _eecd . _aac { for _ , _cecb := range _fgd { _ebab . pullWord ( _cecb , _dabd , _cagd ) ; } ; } ; _eecd . applyRemovals ( _cagd ) ; } ; func ( _bged * textPara ) writeText ( _efcc _g . Writer ) { if _bged . _edce == nil { _bged . writeCellText ( _efcc ) ;
return ; } ; for _afcb := 0 ; _afcb < _bged . _edce . _gebeeb ; _afcb ++ { for _ebbg := 0 ; _ebbg < _bged . _edce . _acddc ; _ebbg ++ { _facag := _bged . _edce . get ( _ebbg , _afcb ) ; if _facag == nil { _efcc . Write ( [ ] byte ( "\u0009" ) ) ; } else { _facag . writeCellText ( _efcc ) ; } ; _efcc . Write ( [ ] byte ( "\u0020" ) ) ;
} ; if _afcb < _bged . _edce . _gebeeb - 1 { _efcc . Write ( [ ] byte ( "\u000a" ) ) ; } ; } ; } ; func _bcddg ( _dggfc float64 , _adaf int ) int { if _adaf == 0 { _adaf = 1 ; } ; _cdfg := float64 ( _adaf ) ; return int ( _dc . Round ( _dggfc / _cdfg ) * _cdfg ) ; } ; func _acc ( _adac _dca . Point ) _dca . Matrix { return _dca . TranslationMatrix ( _adac . X , _adac . Y ) } ;
// Len returns the number of TextMarks in `ma`.
func ( _ebg * TextMarkArray ) Len ( ) int { if _ebg == nil { return 0 ; } ; return len ( _ebg . _gcc ) ; } ; func _fgfb ( _bfeg , _ecdb * textPara ) bool { if _bfeg . _bfge || _ecdb . _bfge { return true ; } ; return _ebfaf ( _bfeg . depth ( ) - _ecdb . depth ( ) ) ; } ; func ( _dfgb * imageExtractContext ) extractFormImages ( _ceg * _dce . PdfObjectName , _edg _dcg . GraphicsState , _bfb * _fg . PdfPageResources ) error { _gebg , _edgb := _bfb . GetXObjectFormByName ( * _ceg ) ;
if _edgb != nil { return _edgb ; } ; if _gebg == nil { return nil ; } ; _egf , _edgb := _gebg . GetContentStream ( ) ; if _edgb != nil { return _edgb ; } ; _badf := _gebg . Resources ; if _badf == nil { _badf = _bfb ; } ; _edgb = _dfgb . extractContentStreamImages ( string ( _egf ) , _badf ) ;
if _edgb != nil { return _edgb ; } ; _dfgb . _fbg ++ ; return nil ; } ;
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
func ( _aea * Extractor ) ExtractPageImages ( options * ImageExtractOptions ) ( * PageImages , error ) { _feg := & imageExtractContext { _ebac : options } ; _gf := _feg . extractContentStreamImages ( _aea . _geb , _aea . _af ) ; if _gf != nil { return nil , _gf ; } ; return & PageImages { Images : _feg . _dag } , nil ;
} ; func ( _bfaa paraList ) computeEBBoxes ( ) { if _defga { _ac . Log . Info ( "\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a" ) ; } ; for _ , _dfea := range _bfaa { _dfea . _ebadd = _dfea . PdfRectangle ; } ; _fbce := _bfaa . yNeighbours ( 0 ) ;
for _egaf , _daba := range _bfaa { _afga := _daba . _ebadd ; _ccdb , _abada := - 1.0e9 , + 1.0e9 ; for _ , _fddg := range _fbce [ _daba ] { _dfdea := _bfaa [ _fddg ] . _ebadd ; if _dfdea . Urx < _afga . Llx { _ccdb = _dc . Max ( _ccdb , _dfdea . Urx ) ; } else if _afga . Urx < _dfdea . Llx { _abada = _dc . Min ( _abada , _dfdea . Llx ) ;
} ; } ; for _gadg , _cabd := range _bfaa { _cbacc := _cabd . _ebadd ; if _egaf == _gadg || _cbacc . Ury > _afga . Lly { continue ; } ; if _ccdb <= _cbacc . Llx && _cbacc . Llx < _afga . Llx { _afga . Llx = _cbacc . Llx ; } else if _cbacc . Urx <= _abada && _afga . Urx < _cbacc . Urx { _afga . Urx = _cbacc . Urx ;
} ; } ; if _defga { _gde . Printf ( "\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a" , _egaf , _daba . _ebadd , _afga , _dbdbb ( _daba . text ( ) , 50 ) ) ; } ; _daba . _ebadd = _afga ; } ; if _gcda { for _ , _ffaa := range _bfaa { _ffaa . PdfRectangle = _ffaa . _ebadd ;
} ; } ; } ; func _dgcc ( _egda * list ) [ ] * textLine { for _ , _dgbaa := range _egda . _edge { switch _dgbaa . _aeaa { case "\u004c\u0042\u006fd\u0079" : if len ( _dgbaa . _ggcfd ) != 0 { return _dgbaa . _ggcfd ; } ; return _dgcc ( _dgbaa ) ; case "\u0053\u0070\u0061\u006e" : return _dgbaa . _ggcfd ;
case "I\u006e\u006c\u0069\u006e\u0065\u0053\u0068\u0061\u0070\u0065" : return _dgbaa . _ggcfd ; } ; } ; return nil ; } ; type textTable struct { _fg . PdfRectangle ; _acddc , _gebeeb int ; _aefef bool ; _cfgbb map [ uint64 ] * textPara ; _edbe map [ uint64 ] compositeCell ; } ; func ( _cagb * shapesState ) establishSubpath ( ) * subpath { _bgcb , _fefc := _cagb . lastpointEstablished ( ) ;
if ! _fefc { _cagb . _efb = append ( _cagb . _efb , _cdec ( _bgcb ) ) ; } ; if len ( _cagb . _efb ) == 0 { return nil ; } ; _cagb . _egfd = false ; return _cagb . _efb [ len ( _cagb . _efb ) - 1 ] ; } ; func ( _gfe * imageExtractContext ) extractContentStreamImages ( _fcf string , _cgf * _fg . PdfPageResources ) error { _bfa := _dcg . NewContentStreamParser ( _fcf ) ;
_gdeg , _dd := _bfa . Parse ( ) ; if _dd != nil { return _dd ; } ; if _gfe . _fba == nil { _gfe . _fba = map [ * _dce . PdfObjectStream ] * cachedImage { } ; } ; if _gfe . _ebac == nil { _gfe . _ebac = & ImageExtractOptions { } ; } ; _gg := _dcg . NewContentStreamProcessor ( * _gdeg ) ; _gg . AddHandler ( _dcg . HandlerConditionEnumAllOperands , "" , _gfe . processOperand ) ;
return _gg . Process ( _cgf ) ; } ; func ( _fgeb * textTable ) log ( _bfdcb string ) { if ! _eadb { return ; } ; _ac . Log . Info ( "~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066" , _bfdcb , _fgeb . _acddc , _fgeb . _gebeeb , _fgeb . _aefef , _fgeb . PdfRectangle ) ;
for _eeda := 0 ; _eeda < _fgeb . _gebeeb ; _eeda ++ { for _fdcdc := 0 ; _fdcdc < _fgeb . _acddc ; _fdcdc ++ { _bfgef := _fgeb . get ( _fdcdc , _eeda ) ; if _bfgef == nil { continue ; } ; _gde . Printf ( "%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a" , _fdcdc , _eeda , _bfgef . PdfRectangle , _dbdbb ( _bfgef . text ( ) , 50 ) , _f . RuneCountInString ( _bfgef . text ( ) ) ) ;
} ; } ; } ; func ( _bebcf * textTable ) toTextTable ( ) TextTable { if _eadb { _ac . Log . Info ( "t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064" , _bebcf . _acddc , _bebcf . _gebeeb ) ; } ; _acec := make ( [ ] [ ] TableCell , _bebcf . _gebeeb ) ;
for _eadc := 0 ; _eadc < _bebcf . _gebeeb ; _eadc ++ { _acec [ _eadc ] = make ( [ ] TableCell , _bebcf . _acddc ) ; for _faefg := 0 ; _faefg < _bebcf . _acddc ; _faefg ++ { _cdeae := _bebcf . get ( _faefg , _eadc ) ; if _cdeae == nil { continue ; } ; if _eadb { _gde . Printf ( "\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a" , _faefg , _eadc , _cdeae ) ;
} ; _acec [ _eadc ] [ _faefg ] . Text = _cdeae . text ( ) ; _efae := 0 ; _acec [ _eadc ] [ _faefg ] . Marks . _gcc = _cdeae . toTextMarks ( & _efae ) ; } ; } ; _bbgd := TextTable { W : _bebcf . _acddc , H : _bebcf . _gebeeb , Cells : _acec } ; _bbgd . PdfRectangle = _bebcf . bbox ( ) ; return _bbgd ;
} ; func _gdfa ( _gdea [ ] * textLine , _dffe map [ float64 ] [ ] * textLine , _fdba [ ] float64 , _cfece int , _dcga , _fggd float64 ) [ ] * list { _gfdc := [ ] * list { } ; _egfec := _cfece ; _cfece = _cfece + 1 ; _aadfa := _fdba [ _egfec ] ; _bbbe := _dffe [ _aadfa ] ; _gcaa := _fgca ( _bbbe , _fggd , _dcga ) ;
for _fdcb , _beade := range _gcaa { var _fabg float64 ; _cgcc := [ ] * list { } ; _daaa := _beade . _bfcg ; _fggde := _fggd ; if _fdcb < len ( _gcaa ) - 1 { _fggde = _gcaa [ _fdcb + 1 ] . _bfcg ; } ; if _cfece < len ( _fdba ) { _cgcc = _gdfa ( _gdea , _dffe , _fdba , _cfece , _daaa , _fggde ) ;
} ; _fabg = _fggde ; if len ( _cgcc ) > 0 { _bgfd := _cgcc [ 0 ] ; if len ( _bgfd . _ggcfd ) > 0 { _fabg = _bgfd . _ggcfd [ 0 ] . _bfcg ; } ; } ; _cgfae := [ ] * textLine { _beade } ; _aceg := _bdca ( _beade , _gdea , _fdba , _daaa , _fabg ) ; _cgfae = append ( _cgfae , _aceg ... ) ; _gdddf := _bddc ( _cgfae , "\u0062\u0075\u006c\u006c\u0065\u0074" , _cgcc ) ;
_gdddf . _fbfaf = _gadcf ( _cgfae , "" ) ; _gfdc = append ( _gfdc , _gdddf ) ; } ; return _gfdc ; } ; func ( _aada rulingList ) snapToGroups ( ) rulingList { _ccdg , _gfefd := _aada . vertsHorzs ( ) ; if len ( _ccdg ) > 0 { _ccdg = _ccdg . snapToGroupsDirection ( ) ; } ; if len ( _gfefd ) > 0 { _gfefd = _gfefd . snapToGroupsDirection ( ) ;
} ; _acdgb := append ( _ccdg , _gfefd ... ) ; _acdgb . log ( "\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073" ) ; return _acdgb ; } ; func ( _gdbag * textTable ) subdivide ( ) * textTable { _gdbag . logComposite ( "\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e" ) ;
_edfbd := _gdbag . compositeRowCorridors ( ) ; _eeeagf := _gdbag . compositeColCorridors ( ) ; if _eadb { _ac . Log . Info ( "\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073" , _cbgbf ( _edfbd ) , _cbgbf ( _eeeagf ) ) ;
} ; if len ( _edfbd ) == 0 || len ( _eeeagf ) == 0 { return _gdbag ; } ; _dcgaf ( _edfbd ) ; _dcgaf ( _eeeagf ) ; if _eadb { _ac . Log . Info ( "\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073" , _cbgbf ( _edfbd ) , _cbgbf ( _eeeagf ) ) ;
} ; _cedff , _begd := _bfgee ( _gdbag . _gebeeb , _edfbd ) ; _fgcc , _abade := _bfgee ( _gdbag . _acddc , _eeeagf ) ; _cada := make ( map [ uint64 ] * textPara , _abade * _begd ) ; _gddbf := & textTable { PdfRectangle : _gdbag . PdfRectangle , _aefef : _gdbag . _aefef , _gebeeb : _begd , _acddc : _abade , _cfgbb : _cada } ;
if _eadb { _ac . Log . Info ( "\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a" + "\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a" + "\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a" + "\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a" + "\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076" , _gdbag . _acddc , _gdbag . _gebeeb , _abade , _begd , _cbgbf ( _edfbd ) , _cbgbf ( _eeeagf ) , _cedff , _fgcc ) ;
} ; for _efac := 0 ; _efac < _gdbag . _gebeeb ; _efac ++ { _cbgag := _cedff [ _efac ] ; for _adcd := 0 ; _adcd < _gdbag . _acddc ; _adcd ++ { _dfaa := _fgcc [ _adcd ] ; if _eadb { _gde . Printf ( "\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a" , _adcd , _efac , _dfaa , _cbgag ) ;
} ; _gffaa , _cfea := _gdbag . _edbe [ _bafcd ( _adcd , _efac ) ] ; if ! _cfea { continue ; } ; _beda := _gffaa . split ( _edfbd [ _efac ] , _eeeagf [ _adcd ] ) ; for _cdecd := 0 ; _cdecd < _beda . _gebeeb ; _cdecd ++ { for _facg := 0 ; _facg < _beda . _acddc ; _facg ++ { _cbgaga := _beda . get ( _facg , _cdecd ) ;
_gddbf . put ( _dfaa + _facg , _cbgag + _cdecd , _cbgaga ) ; if _eadb { _gde . Printf ( "\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a" , _dfaa + _facg , _cbgag + _cdecd , _cbgaga ) ; } ; } ; } ; } ; } ; return _gddbf ; } ; func _becd ( _dbce _fg . PdfColorspace , _dccdd _fg . PdfColor ) _be . Color { if _dbce == nil || _dccdd == nil { return _be . Black ;
} ; _afccf , _cegfa := _dbce . ColorToRGB ( _dccdd ) ; if _cegfa != nil { _ac . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073" , _dccdd , _dbce , _cegfa ) ;
return _be . Black ; } ; _eafdf , _ddcf := _afccf . ( * _fg . PdfColorDeviceRGB ) ; if ! _ddcf { _ac . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076" , _afccf ) ;
return _be . Black ; } ; return _be . NRGBA { R : uint8 ( _eafdf . R ( ) * 255 ) , G : uint8 ( _eafdf . G ( ) * 255 ) , B : uint8 ( _eafdf . B ( ) * 255 ) , A : uint8 ( 255 ) } ; } ; func _ageb ( _gebaf , _fddd , _eagd float64 ) rulingKind { if _gebaf >= _eagd && _edfa ( _fddd , _gebaf ) { return _faccd ;
} ; if _fddd >= _eagd && _edfa ( _gebaf , _fddd ) { return _cbab ; } ; return _ccfb ; } ; func ( _dbdb * textLine ) appendWord ( _gbaf * textWord ) { _dbdb . _fgbe = append ( _dbdb . _fgbe , _gbaf ) ; _dbdb . PdfRectangle = _ebge ( _dbdb . PdfRectangle , _gbaf . PdfRectangle ) ; if _gbaf . _adecc > _dbdb . _ceacg { _dbdb . _ceacg = _gbaf . _adecc ;
} ; if _gbaf . _adgge > _dbdb . _bfcg { _dbdb . _bfcg = _gbaf . _adgge ; } ; } ; func _dgfe ( _aacfb , _bcbe _fg . PdfRectangle ) bool { return _aacfb . Llx <= _bcbe . Llx && _bcbe . Urx <= _aacfb . Urx && _aacfb . Lly <= _bcbe . Lly && _bcbe . Ury <= _aacfb . Ury ; } ;
// TableInfo gets table information of the textmark `tm`.
func ( _ceae * TextMark ) TableInfo ( ) ( * TextTable , [ ] [ ] int ) { if ! _ceae . _dgbe { return nil , nil ; } ; _aefd := _ceae . _acbcf ; _edf := _aefd . getCellInfo ( * _ceae ) ; return _aefd , _edf ; } ; type textPara struct { _fg . PdfRectangle ; _ebadd _fg . PdfRectangle ; _gfaae [ ] * textLine ;
_edce * textTable ; _dcada bool ; _bfge bool ; _bfdgg * textPara ; _abfec * textPara ; _caege * textPara ; _fgbea * textPara ; _ceab [ ] list ; } ; func ( _dcac * textLine ) markWordBoundaries ( ) { _ecfe := _aeeg * _dcac . _ceacg ; for _dcdde , _abed := range _dcac . _fgbe [ 1 : ] { if _debff ( _abed , _dcac . _fgbe [ _dcdde ] ) >= _ecfe { _abed . _eadcb = true ;
} ; } ; } ; func ( _gfda * structTreeRoot ) parseStructTreeRoot ( _bgde _dce . PdfObject ) { if _bgde != nil { _gaea , _gggd := _dce . GetDict ( _bgde ) ; if ! _gggd { _ac . Log . Debug ( "\u0070\u0061\u0072s\u0065\u0053\u0074\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u003a\u0020\u0064\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006eo\u0074\u0020\u0066\u006f\u0075\u006e\u0064\u002e" ) ;
} ; K := _gaea . Get ( "\u004b" ) ; _cdgg := _gaea . Get ( "\u0054\u0079\u0070\u0065" ) . String ( ) ; var _eafc * _dce . PdfObjectArray ; switch _feab := K . ( type ) { case * _dce . PdfObjectArray : _eafc = _feab ; case * _dce . PdfObjectReference : _eafc = _dce . MakeArray ( K ) ; } ; _eafd := [ ] structElement { } ;
for _ , _bgaf := range _eafc . Elements ( ) { _gcdc := & structElement { } ; _gcdc . parseStructElement ( _bgaf ) ; _eafd = append ( _eafd , * _gcdc ) ; } ; _gfda . _ccdea = _eafd ; _gfda . _cefga = _cdgg ; } ; } ; func ( _bdff * textTable ) emptyCompositeRow ( _cdfdf int ) bool { for _efdg := 0 ;
_efdg < _bdff . _acddc ; _efdg ++ { if _bfcdd , _dbcbe := _bdff . _edbe [ _bafcd ( _efdg , _cdfdf ) ] ; _dbcbe { if len ( _bfcdd . paraList ) > 0 { return false ; } ; } ; } ; return true ; } ; func _dfdf ( _dccce map [ int ] [ ] float64 ) [ ] int { _fadgd := make ( [ ] int , len ( _dccce ) ) ; _gcbee := 0 ;
for _ggbga := range _dccce { _fadgd [ _gcbee ] = _ggbga ; _gcbee ++ ; } ; _ab . Ints ( _fadgd ) ; return _fadgd ; } ; func ( _babb * textObject ) getFillColor ( ) _be . Color { return _becd ( _babb . _dbf . ColorspaceNonStroking , _babb . _dbf . ColorNonStroking ) ; } ; func ( _efaae paraList ) yNeighbours ( _dfecc float64 ) map [ * textPara ] [ ] int { _dcdfe := make ( [ ] event , 2 * len ( _efaae ) ) ;
if _dfecc == 0 { for _ggfgc , _ceded := range _efaae { _dcdfe [ 2 * _ggfgc ] = event { _ceded . Lly , true , _ggfgc } ; _dcdfe [ 2 * _ggfgc + 1 ] = event { _ceded . Ury , false , _ggfgc } ; } ; } else { for _ecdd , _eaddc := range _efaae { _dcdfe [ 2 * _ecdd ] = event { _eaddc . Lly - _dfecc * _eaddc . fontsize ( ) , true , _ecdd } ;
_dcdfe [ 2 * _ecdd + 1 ] = event { _eaddc . Ury + _dfecc * _eaddc . fontsize ( ) , false , _ecdd } ; } ; } ; return _efaae . eventNeighbours ( _dcdfe ) ; } ; func ( _befe * wordBag ) getDepthIdx ( _fcee float64 ) int { _fegg := _befe . depthIndexes ( ) ; _gabbg := _ebaf ( _fcee ) ; if _gabbg < _fegg [ 0 ] { return _fegg [ 0 ] ;
} ; if _gabbg > _fegg [ len ( _fegg ) - 1 ] { return _fegg [ len ( _fegg ) - 1 ] ; } ; return _gabbg ; } ; var _dceb string = "\u005e\u005b\u0061\u002d\u007a\u0041\u002dZ\u005d\u0028\u005c)\u007c\u005c\u002e)\u007c\u005e[\u005c\u0064\u005d\u002b\u0028\u005c)\u007c\\.\u0029\u007c\u005e\u005c\u0028\u005b\u0061\u002d\u007a\u0041\u002d\u005a\u005d\u005c\u0029\u007c\u005e\u005c\u0028\u005b\u005c\u0064\u005d\u002b\u005c\u0029" ;
func ( _abga * textPara ) writeCellText ( _bgag _g . Writer ) { for _aacd , _cgfdg := range _abga . _gfaae { _fgfe := _cgfdg . text ( ) ; _bbdd := _ddcg && _cgfdg . endsInHyphen ( ) && _aacd != len ( _abga . _gfaae ) - 1 ; if _bbdd { _fgfe = _badfg ( _fgfe ) ; } ; _bgag . Write ( [ ] byte ( _fgfe ) ) ;
if ! ( _bbdd || _aacd == len ( _abga . _gfaae ) - 1 ) { _bgag . Write ( [ ] byte ( _eecc ( _cgfdg . _bfcg , _abga . _gfaae [ _aacd + 1 ] . _bfcg ) ) ) ; } ; } ; } ; func _feea ( _cdae * paraList ) map [ int ] [ ] * textLine { _bacb := map [ int ] [ ] * textLine { } ; for _ , _bdbc := range * _cdae { for _ , _bgdfd := range _bdbc . _gfaae { if ! _gfdg ( _bgdfd ) { _ac . Log . Debug ( "g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e" ) ;
continue ; } ; _agga := _bgdfd . _fgbe [ 0 ] . _daafd [ 0 ] . _fbcf ; _bacb [ _agga ] = append ( _bacb [ _agga ] , _bgdfd ) ; } ; if _bdbc . _edce != nil { _ffcad := _bdbc . _edce . _cfgbb ; for _ , _aagg := range _ffcad { for _ , _degf := range _aagg . _gfaae { if ! _gfdg ( _degf ) { _ac . Log . Debug ( "g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e" ) ;
continue ; } ; _adgg := _degf . _fgbe [ 0 ] . _daafd [ 0 ] . _fbcf ; _bacb [ _adgg ] = append ( _bacb [ _adgg ] , _degf ) ; } ; } ; } ; } ; return _bacb ; } ; func ( _cdcbc rulingList ) snapToGroupsDirection ( ) rulingList { _cdcbc . sortStrict ( ) ; _geeg := make ( map [ * ruling ] rulingList , len ( _cdcbc ) ) ;
_defbc := _cdcbc [ 0 ] ; _ddgbg := func ( _fedfd * ruling ) { _defbc = _fedfd ; _geeg [ _defbc ] = rulingList { _fedfd } } ; _ddgbg ( _cdcbc [ 0 ] ) ; for _ , _dgcd := range _cdcbc [ 1 : ] { if _dgcd . _cbag < _defbc . _cbag - _bafg { _ac . Log . Error ( "\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073" , _defbc , _dgcd ) ;
} ; if _dgcd . _cbag > _defbc . _cbag + _gbgf { _ddgbg ( _dgcd ) ; } else { _geeg [ _defbc ] = append ( _geeg [ _defbc ] , _dgcd ) ; } ; } ; _dffc := make ( map [ * ruling ] float64 , len ( _geeg ) ) ; _fedgc := make ( map [ * ruling ] * ruling , len ( _cdcbc ) ) ; for _aade , _ggbcb := range _geeg { _dffc [ _aade ] = _ggbcb . mergePrimary ( ) ;
for _ , _dcbgd := range _ggbcb { _fedgc [ _dcbgd ] = _aade ; } ; } ; for _ , _afdca := range _cdcbc { _afdca . _cbag = _dffc [ _fedgc [ _afdca ] ] ; } ; _facfcd := make ( rulingList , 0 , len ( _cdcbc ) ) ; for _ , _efgg := range _geeg { _bcccd := _efgg . splitSec ( ) ; for _efgf , _ggceg := range _bcccd { _gbfd := _ggceg . merge ( ) ;
if len ( _facfcd ) > 0 { _ggfea := _facfcd [ len ( _facfcd ) - 1 ] ; if _ggfea . alignsPrimary ( _gbfd ) && _ggfea . alignsSec ( _gbfd ) { _ac . Log . Error ( "\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073" , _efgf , _ggfea , _gbfd ) ;
continue ; } ; } ; _facfcd = append ( _facfcd , _gbfd ) ; } ; } ; _facfcd . sortStrict ( ) ; return _facfcd ; } ; func ( _fgdd paraList ) addNeighbours ( ) { _gcbbf := func ( _gacfb [ ] int , _aaaa * textPara ) ( [ ] * textPara , [ ] * textPara ) { _bddcc := make ( [ ] * textPara , 0 , len ( _gacfb ) - 1 ) ;
_beab := make ( [ ] * textPara , 0 , len ( _gacfb ) - 1 ) ; for _ , _cgfg := range _gacfb { _ecgc := _fgdd [ _cgfg ] ; if _ecgc . Urx <= _aaaa . Llx { _bddcc = append ( _bddcc , _ecgc ) ; } else if _ecgc . Llx >= _aaaa . Urx { _beab = append ( _beab , _ecgc ) ; } ; } ; return _bddcc , _beab ; } ; _edcd := func ( _daaed [ ] int , _becc * textPara ) ( [ ] * textPara , [ ] * textPara ) { _fcbe := make ( [ ] * textPara , 0 , len ( _daaed ) - 1 ) ;
_acfa := make ( [ ] * textPara , 0 , len ( _daaed ) - 1 ) ; for _ , _baage := range _daaed { _cadcb := _fgdd [ _baage ] ; if _cadcb . Ury <= _becc . Lly { _acfa = append ( _acfa , _cadcb ) ; } else if _cadcb . Lly >= _becc . Ury { _fcbe = append ( _fcbe , _cadcb ) ; } ; } ; return _fcbe , _acfa ;
} ; _dabea := _fgdd . yNeighbours ( _aec ) ; for _ , _debdd := range _fgdd { _eafdc := _dabea [ _debdd ] ; if len ( _eafdc ) == 0 { continue ; } ; _gbfb , _acfd := _gcbbf ( _eafdc , _debdd ) ; if len ( _gbfb ) == 0 && len ( _acfd ) == 0 { continue ; } ; if len ( _gbfb ) > 0 { _gdab := _gbfb [ 0 ] ; for _ , _aagff := range _gbfb [ 1 : ] { if _aagff . Urx >= _gdab . Urx { _gdab = _aagff ;
} ; } ; for _ , _eagg := range _gbfb { if _eagg != _gdab && _eagg . Urx > _gdab . Llx { _gdab = nil ; break ; } ; } ; if _gdab != nil && _cced ( _debdd . PdfRectangle , _gdab . PdfRectangle ) { _debdd . _bfdgg = _gdab ; } ; } ; if len ( _acfd ) > 0 { _acebg := _acfd [ 0 ] ; for _ , _bbfac := range _acfd [ 1 : ] { if _bbfac . Llx <= _acebg . Llx { _acebg = _bbfac ;
} ; } ; for _ , _begad := range _acfd { if _begad != _acebg && _begad . Llx < _acebg . Urx { _acebg = nil ; break ; } ; } ; if _acebg != nil && _cced ( _debdd . PdfRectangle , _acebg . PdfRectangle ) { _debdd . _abfec = _acebg ; } ; } ; } ; _dabea = _fgdd . xNeighbours ( _bdag ) ; for _ , _gbffc := range _fgdd { _bbcg := _dabea [ _gbffc ] ;
if len ( _bbcg ) == 0 { continue ; } ; _gbac , _ecee := _edcd ( _bbcg , _gbffc ) ; if len ( _gbac ) == 0 && len ( _ecee ) == 0 { continue ; } ; if len ( _ecee ) > 0 { _efcbg := _ecee [ 0 ] ; for _ , _bdagdd := range _ecee [ 1 : ] { if _bdagdd . Ury >= _efcbg . Ury { _efcbg = _bdagdd ; } ; } ; for _ , _cgced := range _ecee { if _cgced != _efcbg && _cgced . Ury > _efcbg . Lly { _efcbg = nil ;
break ; } ; } ; if _efcbg != nil && _fcgc ( _gbffc . PdfRectangle , _efcbg . PdfRectangle ) { _gbffc . _fgbea = _efcbg ; } ; } ; if len ( _gbac ) > 0 { _dggga := _gbac [ 0 ] ; for _ , _abcba := range _gbac [ 1 : ] { if _abcba . Lly <= _dggga . Lly { _dggga = _abcba ; } ; } ; for _ , _ccda := range _gbac { if _ccda != _dggga && _ccda . Lly < _dggga . Ury { _dggga = nil ;
break ; } ; } ; if _dggga != nil && _fcgc ( _gbffc . PdfRectangle , _dggga . PdfRectangle ) { _gbffc . _caege = _dggga ; } ; } ; } ; for _ , _cdcc := range _fgdd { if _cdcc . _bfdgg != nil && _cdcc . _bfdgg . _abfec != _cdcc { _cdcc . _bfdgg = nil ; } ; if _cdcc . _caege != nil && _cdcc . _caege . _fgbea != _cdcc { _cdcc . _caege = nil ;
} ; if _cdcc . _abfec != nil && _cdcc . _abfec . _bfdgg != _cdcc { _cdcc . _abfec = nil ; } ; if _cdcc . _fgbea != nil && _cdcc . _fgbea . _caege != _cdcc { _cdcc . _fgbea = nil ; } ; } ; } ; func ( _facba * textTable ) isExportable ( ) bool { if _facba . _aefef { return true ; } ; _cfce := func ( _fgac int ) bool { _bgcec := _facba . get ( 0 , _fgac ) ;
if _bgcec == nil { return false ; } ; _agbe := _bgcec . text ( ) ; _edgc := _f . RuneCountInString ( _agbe ) ; _cade := _bfaaa . MatchString ( _agbe ) ; return _edgc <= 1 || _cade ; } ; for _gagb := 0 ; _gagb < _facba . _gebeeb ; _gagb ++ { if ! _cfce ( _gagb ) { return true ; } ; } ; return false ;
} ; type paraList [ ] * textPara ; func _eeead ( _bbfa _fg . PdfRectangle , _bcae bounded ) float64 { return _bbfa . Ury - _bcae . bbox ( ) . Lly } ; type structTreeRoot struct { _ccdea [ ] structElement ; _cefga string ; } ; func ( _beacb gridTile ) contains ( _agca _fg . PdfRectangle ) bool { if _beacb . numBorders ( ) < 3 { return false ;
} ; if _beacb . _cbge && _agca . Llx < _beacb . Llx - _gagd { return false ; } ; if _beacb . _dafe && _agca . Urx > _beacb . Urx + _gagd { return false ; } ; if _beacb . _beacf && _agca . Lly < _beacb . Lly - _gagd { return false ; } ; if _beacb . _cbbgc && _agca . Ury > _beacb . Ury + _gagd { return false ;
} ; return true ; } ; func ( _cfg * shapesState ) drawRectangle ( _cgae , _gdfg , _bbcdc , _baec float64 ) { if _bdefa { _fbb := _cfg . devicePoint ( _cgae , _gdfg ) ; _ggfb := _cfg . devicePoint ( _cgae + _bbcdc , _gdfg + _baec ) ; _adcc := _fg . PdfRectangle { Llx : _fbb . X , Lly : _fbb . Y , Urx : _ggfb . X , Ury : _ggfb . Y } ;
_ac . Log . Info ( "d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066" , _adcc ) ; } ; _cfg . newSubPath ( ) ; _cfg . moveTo ( _cgae , _gdfg ) ; _cfg . lineTo ( _cgae + _bbcdc , _gdfg ) ; _cfg . lineTo ( _cgae + _bbcdc , _gdfg + _baec ) ;
_cfg . lineTo ( _cgae , _gdfg + _baec ) ; _cfg . closePath ( ) ; } ; func ( _dbdbf * ruling ) alignsPrimary ( _bcbc * ruling ) bool { return _dbdbf . _gffa == _bcbc . _gffa && _dc . Abs ( _dbdbf . _cbag - _bcbc . _cbag ) < _gbgf * 0.5 ; } ; func _edfa ( _edafg , _cdca float64 ) bool { return _edafg / _dc . Max ( _cfca , _cdca ) < _edec } ;
func ( _ddae rulingList ) augmentGrid ( ) ( rulingList , rulingList ) { _afgac , _cadg := _ddae . vertsHorzs ( ) ; if len ( _afgac ) == 0 || len ( _cadg ) == 0 { return _afgac , _cadg ; } ; _cafc , _dbfb := _afgac , _cadg ; _bdgcf := _afgac . bbox ( ) ; _fegc := _cadg . bbox ( ) ; if _aebg { _ac . Log . Info ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066" , _bdgcf ) ;
_ac . Log . Info ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066" , _fegc ) ; } ; var _dfdg , _fddcg , _facca , _bacde * ruling ; if _fegc . Llx < _bdgcf . Llx - _cebe { _dfdg = & ruling { _adaa : _acaca , _gffa : _cbab , _cbag : _fegc . Llx , _efgeb : _bdgcf . Lly , _bbge : _bdgcf . Ury } ;
_afgac = append ( rulingList { _dfdg } , _afgac ... ) ; } ; if _fegc . Urx > _bdgcf . Urx + _cebe { _fddcg = & ruling { _adaa : _acaca , _gffa : _cbab , _cbag : _fegc . Urx , _efgeb : _bdgcf . Lly , _bbge : _bdgcf . Ury } ; _afgac = append ( _afgac , _fddcg ) ; } ; if _bdgcf . Lly < _fegc . Lly - _cebe { _facca = & ruling { _adaa : _acaca , _gffa : _faccd , _cbag : _bdgcf . Lly , _efgeb : _fegc . Llx , _bbge : _fegc . Urx } ;
_cadg = append ( rulingList { _facca } , _cadg ... ) ; } ; if _bdgcf . Ury > _fegc . Ury + _cebe { _bacde = & ruling { _adaa : _acaca , _gffa : _faccd , _cbag : _bdgcf . Ury , _efgeb : _fegc . Llx , _bbge : _fegc . Urx } ; _cadg = append ( _cadg , _bacde ) ; } ; if len ( _afgac ) + len ( _cadg ) == len ( _ddae ) { return _cafc , _dbfb ;
} ; _gcef := append ( _afgac , _cadg ... ) ; _ddae . log ( "u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064" ) ; _gcef . log ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d" ) ; return _afgac , _cadg ; } ; const ( _defga = false ; _agge = false ; _faee = false ; _cgcbc = false ;
_bdefa = false ; _adbb = false ; _abcbg = false ; _abfe = false ; _fccf = false ; _baaa = _fccf && true ; _dbcb = _baaa && false ; _bfcf = _fccf && true ; _eadb = false ; _gddd = _eadb && false ; _cece = _eadb && true ; _aebg = false ; _aab = _aebg && false ; _bdeff = _aebg && false ; _gbde = _aebg && true ;
_cadc = _aebg && false ; _gbbdd = _aebg && false ; ) ; func ( _daafg * textPara ) taken ( ) bool { return _daafg == nil || _daafg . _dcada } ; func _dadcda ( _ddfgd , _ddfge , _gccg , _dbffe * textPara ) * textTable { _dadf := & textTable { _acddc : 2 , _gebeeb : 2 , _cfgbb : make ( map [ uint64 ] * textPara , 4 ) } ;
_dadf . put ( 0 , 0 , _ddfgd ) ; _dadf . put ( 1 , 0 , _ddfge ) ; _dadf . put ( 0 , 1 , _gccg ) ; _dadf . put ( 1 , 1 , _dbffe ) ; return _dadf ; } ; func ( _cggae lineRuling ) yMean ( ) float64 { return 0.5 * ( _cggae . _cged . Y + _cggae . _ggeaa . Y ) } ; func ( _ace * shapesState ) lastpointEstablished ( ) ( _dca . Point , bool ) { if _ace . _egfd { return _ace . _eaeeb , false ;
} ; _fcb := len ( _ace . _efb ) ; if _fcb > 0 && _ace . _efb [ _fcb - 1 ] . _bbbb { return _ace . _efb [ _fcb - 1 ] . last ( ) , false ; } ; return _dca . Point { } , true ; } ; func ( _ceee rulingList ) blocks ( _eafcc , _dcef * ruling ) bool { if _eafcc . _efgeb > _dcef . _bbge || _dcef . _efgeb > _eafcc . _bbge { return false ;
} ; _gdfc := _dc . Max ( _eafcc . _efgeb , _dcef . _efgeb ) ; _dcacd := _dc . Min ( _eafcc . _bbge , _dcef . _bbge ) ; if _eafcc . _cbag > _dcef . _cbag { _eafcc , _dcef = _dcef , _eafcc ; } ; for _ , _adacg := range _ceee { if _eafcc . _cbag <= _adacg . _cbag + _gbgf && _adacg . _cbag <= _dcef . _cbag + _gbgf && _adacg . _efgeb <= _dcacd && _gdfc <= _adacg . _bbge { return true ;
} ; } ; return false ; } ; func ( _cbfcg pathSection ) bbox ( ) _fg . PdfRectangle { _gedf := _cbfcg . _ged [ 0 ] . _gdgd [ 0 ] ; _ddgb := _fg . PdfRectangle { Llx : _gedf . X , Urx : _gedf . X , Lly : _gedf . Y , Ury : _gedf . Y } ; _bac := func ( _aaf _dca . Point ) { if _aaf . X < _ddgb . Llx { _ddgb . Llx = _aaf . X ;
} else if _aaf . X > _ddgb . Urx { _ddgb . Urx = _aaf . X ; } ; if _aaf . Y < _ddgb . Lly { _ddgb . Lly = _aaf . Y ; } else if _aaf . Y > _ddgb . Ury { _ddgb . Ury = _aaf . Y ; } ; } ; for _ , _dgf := range _cbfcg . _ged [ 0 ] . _gdgd [ 1 : ] { _bac ( _dgf ) ; } ; for _ , _bggg := range _cbfcg . _ged [ 1 : ] { for _ , _eaeab := range _bggg . _gdgd { _bac ( _eaeab ) ;
} ; } ; return _ddgb ; } ;
// Text gets the extracted text contained in `l`.
func ( _daaf * list ) Text ( ) string { _eecb := & _df . Builder { } ; _efab := "" ; _bcdb ( _daaf , _eecb , & _efab ) ; return _eecb . String ( ) ; } ;
// New returns an Extractor instance for extracting content from the input PDF page.
func New ( page * _fg . PdfPage ) ( * Extractor , error ) { return NewWithOptions ( page , nil ) } ;
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
func ( _bafa * Extractor ) ExtractTextWithStats ( ) ( _bgg string , _dga int , _beb int , _bcg error ) { _bfe , _dga , _beb , _bcg := _bafa . ExtractPageText ( ) ; if _bcg != nil { return "" , _dga , _beb , _bcg ; } ; return _bfe . Text ( ) , _dga , _beb , nil ; } ; func _abgeef ( _cdffda , _fbfed int ) int { if _cdffda < _fbfed { return _cdffda ;
} ; return _fbfed ; } ; func _fgae ( _afgea _fg . PdfRectangle ) rulingKind { _gegeb := _afgea . Width ( ) ; _acbf := _afgea . Height ( ) ; if _gegeb > _acbf { if _gegeb >= _daee { return _faccd ; } ; } else { if _acbf >= _daee { return _cbab ; } ; } ; return _ccfb ; } ;
// String returns a human readable description of `s`.
func ( _fabee intSet ) String ( ) string { var _dbgg [ ] int ; for _fdfc := range _fabee { if _fabee . has ( _fdfc ) { _dbgg = append ( _dbgg , _fdfc ) ; } ; } ; _ab . Ints ( _dbgg ) ; return _gde . Sprintf ( "\u0025\u002b\u0076" , _dbgg ) ; } ; func ( _aecaf rulingList ) asTiling ( ) gridTiling { if _gbde { _ac . Log . Info ( "r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d" , len ( _aecaf ) ) ;
} ; for _ddfaa , _fcdab := range _aecaf [ 1 : ] { _gbcbd := _aecaf [ _ddfaa ] ; if _gbcbd . alignsPrimary ( _fcdab ) && _gbcbd . alignsSec ( _fcdab ) { _ac . Log . Error ( "a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073" , _fcdab , _gbcbd ) ;
} ; } ; _aecaf . sortStrict ( ) ; _aecaf . log ( "\u0073n\u0061\u0070\u0070\u0065\u0064" ) ; _aegd , _ebdf := _aecaf . vertsHorzs ( ) ; _dbca := _aegd . primaries ( ) ; _gcaef := _ebdf . primaries ( ) ; _gafae := len ( _dbca ) - 1 ; _daca := len ( _gcaef ) - 1 ; if _gafae == 0 || _daca == 0 { return gridTiling { } ;
} ; _ecag := _fg . PdfRectangle { Llx : _dbca [ 0 ] , Urx : _dbca [ _gafae ] , Lly : _gcaef [ 0 ] , Ury : _gcaef [ _daca ] } ; if _gbde { _ac . Log . Info ( "\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064" , len ( _aegd ) ) ;
for _dbff , _ffafg := range _aegd { _gde . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _dbff , _ffafg ) ; } ; _ac . Log . Info ( "\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064" , len ( _ebdf ) ) ;
for _fgbg , _gabda := range _ebdf { _gde . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _fgbg , _gabda ) ; } ; _ac . Log . Info ( "\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f" , _gafae , _daca , _dbca , _gcaef ) ;
} ; _dcdc := make ( [ ] gridTile , _gafae * _daca ) ; for _fdeac := _daca - 1 ; _fdeac >= 0 ; _fdeac -- { _gegga := _gcaef [ _fdeac ] ; _bdgd := _gcaef [ _fdeac + 1 ] ; for _ggcff := 0 ; _ggcff < _gafae ; _ggcff ++ { _fefcf := _dbca [ _ggcff ] ; _fdad := _dbca [ _ggcff + 1 ] ; _dccg := _aegd . findPrimSec ( _fefcf , _gegga ) ;
_acgf := _aegd . findPrimSec ( _fdad , _gegga ) ; _cbcc := _ebdf . findPrimSec ( _gegga , _fefcf ) ; _ffcab := _ebdf . findPrimSec ( _bdgd , _fefcf ) ; _gbgba := _fg . PdfRectangle { Llx : _fefcf , Urx : _fdad , Lly : _gegga , Ury : _bdgd } ; _gafba := _fbgdd ( _gbgba , _dccg , _acgf , _cbcc , _ffcab ) ;
_dcdc [ _fdeac * _gafae + _ggcff ] = _gafba ; if _gbde { _gde . Printf ( "\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , _ggcff , _fdeac , _gafba . String ( ) , _gafba . Width ( ) , _gafba . Height ( ) ) ;
} ; } ; } ; if _gbde { _ac . Log . Info ( "r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066" , _ecag ) ;
} ; _bagf := make ( [ ] map [ float64 ] gridTile , _daca ) ; for _dcfg := _daca - 1 ; _dcfg >= 0 ; _dcfg -- { if _gbde { _gde . Printf ( "\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a" , _dcfg ) ; } ; _bagf [ _dcfg ] = make ( map [ float64 ] gridTile , _gafae ) ; for _daeg := 0 ; _daeg < _gafae ;
_daeg ++ { _ebeca := _dcdc [ _dcfg * _gafae + _daeg ] ; if _gbde { _gde . Printf ( "\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _daeg , _ebeca ) ; } ; if ! _ebeca . _cbge { continue ; } ; _aeag := _daeg ; for _bgabb := _daeg + 1 ; ! _ebeca . _dafe && _bgabb < _gafae ;
_bgabb ++ { _fffb := _dcdc [ _dcfg * _gafae + _bgabb ] ; _ebeca . Urx = _fffb . Urx ; _ebeca . _cbbgc = _ebeca . _cbbgc || _fffb . _cbbgc ; _ebeca . _beacf = _ebeca . _beacf || _fffb . _beacf ; _ebeca . _dafe = _fffb . _dafe ; if _gbde { _gde . Printf ( "\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a" , _bgabb , _fffb , _ebeca ) ;
} ; _aeag = _bgabb ; } ; if _gbde { _gde . Printf ( " \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n" , _daeg , _aeag , _ebeca ) ; } ; _daeg = _aeag ; _bagf [ _dcfg ] [ _ebeca . Llx ] = _ebeca ; } ; } ; _acfca := make ( map [ float64 ] map [ float64 ] gridTile , _daca ) ;
_efda := make ( map [ float64 ] map [ float64 ] struct { } , _daca ) ; for _fffg := _daca - 1 ; _fffg >= 0 ; _fffg -- { _ebgeg := _dcdc [ _fffg * _gafae ] . Lly ; _acfca [ _ebgeg ] = make ( map [ float64 ] gridTile , _gafae ) ; _efda [ _ebgeg ] = make ( map [ float64 ] struct { } , _gafae ) ; } ; if _gbde { _ac . Log . Info ( "\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066" , _ecag ) ;
} ; for _ebcae := _daca - 1 ; _ebcae >= 0 ; _ebcae -- { _fcgd := _dcdc [ _ebcae * _gafae ] . Lly ; _gcbb := _bagf [ _ebcae ] ; if _gbde { _gde . Printf ( "\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a" , _ebcae ) ; } ; for _ , _abfac := range _bbagb ( _gcbb ) { if _ , _ebaed := _efda [ _fcgd ] [ _abfac ] ;
_ebaed { continue ; } ; _agae := _gcbb [ _abfac ] ; if _gbde { _gde . Printf ( " \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a" , _agae . String ( ) ) ; } ; for _gcgb := _ebcae - 1 ; _gcgb >= 0 ; _gcgb -- { if _agae . _beacf { break ; } ; _bacfd := _bagf [ _gcgb ] ; _facbc , _agdb := _bacfd [ _abfac ] ;
if ! _agdb { break ; } ; if _facbc . Urx != _agae . Urx { break ; } ; _agae . _beacf = _facbc . _beacf ; _agae . Lly = _facbc . Lly ; if _gbde { _gde . Printf ( "\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a" , _facbc . String ( ) , _agae . String ( ) ) ;
} ; _efda [ _facbc . Lly ] [ _facbc . Llx ] = struct { } { } ; } ; if _ebcae == 0 { _agae . _beacf = true ; } ; if _agae . complete ( ) { _acfca [ _fcgd ] [ _abfac ] = _agae ; } ; } ; } ; _ddgce := gridTiling { PdfRectangle : _ecag , _abeb : _efcda ( _acfca ) , _cegbg : _aded ( _acfca ) , _faeca : _acfca } ;
_ddgce . log ( "\u0043r\u0065\u0061\u0074\u0065\u0064" ) ; return _ddgce ; } ; func _aadf ( _bedf * textLine ) float64 { return _bedf . _fgbe [ 0 ] . Llx } ; func ( _gdbd * ruling ) gridIntersecting ( _bbgeb * ruling ) bool { return _begea ( _gdbd . _efgeb , _bbgeb . _efgeb ) && _begea ( _gdbd . _bbge , _bbgeb . _bbge ) ;
2023-09-07 17:40:17 +00:00
} ;
2023-11-11 11:29:03 +00:00
// String returns a description of `v`.
func ( _ccede * ruling ) String ( ) string { if _ccede . _gffa == _ccfb { return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047" ; } ; _bcefc , _dbdgb := "\u0078" , "\u0079" ; if _ccede . _gffa == _faccd { _bcefc , _dbdgb = "\u0079" , "\u0078" ; } ; _adacb := "" ; if _ccede . _afdc != 0.0 { _adacb = _gde . Sprintf ( " \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066" , _ccede . _afdc ) ;
} ; return _gde . Sprintf ( "\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073" , _ccede . _gffa , _bcefc , _ccede . _cbag , _dbdgb , _ccede . _efgeb , _ccede . _bbge , _ccede . _bbge - _ccede . _efgeb , _ccede . _adaa , _ccede . Color , _adacb ) ;
} ; func ( _faef paraList ) log ( _bbdcb string ) { if ! _abfe { return ; } ; _ac . Log . Info ( "%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d" , _bbdcb , len ( _faef ) ) ;
for _fddgc , _fcebe := range _faef { if _fcebe == nil { continue ; } ; _aabec := _fcebe . text ( ) ; _aaeb := "\u0020\u0020" ; if _fcebe . _edce != nil { _aaeb = _gde . Sprintf ( "\u005b%\u0064\u0078\u0025\u0064\u005d" , _fcebe . _edce . _acddc , _fcebe . _edce . _gebeeb ) ; } ; _gde . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a" , _fddgc , _fcebe . PdfRectangle , _aaeb , _dbdbb ( _aabec , 50 ) ) ;
} ; } ; func ( _cdb * wordBag ) firstWord ( _ggbf int ) * textWord { return _cdb . _aac [ _ggbf ] [ 0 ] } ; const _acbd = 10 ; func ( _bded * subpath ) last ( ) _dca . Point { return _bded . _gdgd [ len ( _bded . _gdgd ) - 1 ] } ; func ( _dgc * textObject ) renderText ( _gcbc _dce . PdfObject , _cbfc [ ] byte , _ddd int ) error { if _dgc . _gbf { _ac . Log . Debug ( "\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e" ) ;
return nil ; } ; _cab := _dgc . getCurrentFont ( ) ; _cefe := _cab . BytesToCharcodes ( _cbfc ) ; _cgad , _cegda , _cdg := _cab . CharcodesToStrings ( _cefe ) ; if _cdg > 0 { _ac . Log . Debug ( "\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064" , _cegda , _cdg ) ;
} ; _dgc . _abcb . _ddcd += _cegda ; _dgc . _abcb . _dcad += _cdg ; _fdef := _dgc . _abcb ; _bga := _fdef . _dda ; _bbff := _fdef . _gfad / 100.0 ; _gad := _degg ; if _cab . Subtype ( ) == "\u0054\u0079\u0070e\u0033" { _gad = 1 ; } ; _gdc , _gcgd := _cab . GetRuneMetrics ( ' ' ) ; if ! _gcgd { _gdc , _gcgd = _cab . GetCharMetrics ( 32 ) ;
} ; if ! _gcgd { _gdc , _ = _fg . DefaultFont ( ) . GetRuneMetrics ( ' ' ) ; } ; _ebfb := _gdc . Wx * _gad ; _ac . Log . Trace ( "\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066" , _ebfb , _cgad , _cab , _bga ) ;
_gcd := _dca . NewMatrix ( _bga * _bbff , 0 , 0 , _bga , 0 , _fdef . _cgfc ) ; if _adbb { _ac . Log . Info ( "\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071" , len ( _cefe ) , _cefe , _cgad ) ;
} ; _ac . Log . Trace ( "\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071" , len ( _cefe ) , _cefe , len ( _cgad ) ) ; _cgaa := _dgc . getFillColor ( ) ;
_ecbg := _dgc . getStrokeColor ( ) ; for _dcbd , _cegf := range _cgad { _egc := [ ] rune ( _cegf ) ; if len ( _egc ) == 1 && _egc [ 0 ] == '\x00' { continue ; } ; _gdcf := _cefe [ _dcbd ] ; _fcce := _dgc . _dbf . CTM . Mult ( _dgc . _acbc ) . Mult ( _gcd ) ; _ggfa := 0.0 ; if len ( _egc ) == 1 && _egc [ 0 ] == 32 { _ggfa = _fdef . _ggf ;
} ; _bccb , _ggfd := _cab . GetCharMetrics ( _gdcf ) ; if ! _ggfd { _ac . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073" , _gdcf , _egc , _egc , _cab ) ;
return _gde . Errorf ( "\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064" , _cab . String ( ) , _gdcf ) ; } ; _dfge := _dca . Point { X : _bccb . Wx * _gad , Y : _bccb . Wy * _gad } ;
_fbee := _dca . Point { X : ( _dfge . X * _bga + _ggfa ) * _bbff } ; _dbfe := _dca . Point { X : ( _dfge . X * _bga + _fdef . _aaec + _ggfa ) * _bbff } ; if _adbb { _ac . Log . Info ( "\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066" , _bga , _fdef . _aaec , _fdef . _ggf , _bbff ) ;
_ac . Log . Info ( "\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f" , _dfge , _fbee , _dbfe ) ; } ; _bffg := _acc ( _fbee ) ; _afac := _acc ( _dbfe ) ; _fbed := _dgc . _dbf . CTM . Mult ( _dgc . _acbc ) . Mult ( _bffg ) ;
if _cgcbc { _ac . Log . Info ( "e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a" + "\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a" + "\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073" , _dgc . _dbf . CTM , _dgc . _acbc , _afac , _gcdf ( _dgc . _dbf . CTM . Mult ( _dgc . _acbc ) . Mult ( _afac ) ) , _bffg , _fbed , _gcdf ( _fbed ) ) ;
} ; _cbcd , _ecbb := _dgc . newTextMark ( _c . ExpandLigatures ( _egc ) , _fcce , _gcdf ( _fbed ) , _dc . Abs ( _ebfb * _fcce . ScalingFactorX ( ) ) , _cab , _dgc . _abcb . _aaec , _cgaa , _ecbg , _gcbc , _cgad , _dcbd , _ddd ) ; if ! _ecbb { _ac . Log . Debug ( "\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067" ) ;
continue ; } ; if _cab == nil { _ac . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e" ) ; } else if _cab . Encoder ( ) == nil { _ac . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073" , _cab ) ;
} else { if _dbfed , _gabb := _cab . Encoder ( ) . CharcodeToRune ( _gdcf ) ; _gabb { _cbcd . _dcdf = string ( _dbfed ) ; } ; } ; _ac . Log . Trace ( "i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073" , _dcbd , _gdcf , _cbcd , _fcce ) ;
_dgc . _ffc = append ( _dgc . _ffc , & _cbcd ) ; _dgc . _acbc . Concat ( _afac ) ; } ; return nil ; } ; func _fdgdc ( _gdee [ ] pathSection ) rulingList { _ecba ( _gdee ) ; if _aebg { _ac . Log . Info ( "\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073" , len ( _gdee ) ) ;
} ; var _dfcc rulingList ; for _ , _baedg := range _gdee { for _ , _dfeaf := range _baedg . _ged { if len ( _dfeaf . _gdgd ) < 2 { continue ; } ; _afddf := _dfeaf . _gdgd [ 0 ] ; for _ , _aafa := range _dfeaf . _gdgd [ 1 : ] { if _ccbb , _bage := _dfegb ( _afddf , _aafa , _baedg . Color ) ;
_bage { _dfcc = append ( _dfcc , _ccbb ) ; } ; _afddf = _aafa ; } ; } ; } ; if _aebg { _ac . Log . Info ( "m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073" , _dfcc ) ; } ; return _dfcc ; } ; func ( _ecbca rulingList ) removeDuplicates ( ) rulingList { if len ( _ecbca ) == 0 { return nil ;
} ; _ecbca . sort ( ) ; _fage := rulingList { _ecbca [ 0 ] } ; for _ , _bccf := range _ecbca [ 1 : ] { if _bccf . equals ( _fage [ len ( _fage ) - 1 ] ) { continue ; } ; _fage = append ( _fage , _bccf ) ; } ; return _fage ; } ; func _fgca ( _bbcf [ ] * textLine , _cddg , _dbcca float64 ) [ ] * textLine { var _dbda [ ] * textLine ;
for _ , _debe := range _bbcf { if _cddg == - 1 { if _debe . _bfcg > _dbcca { _dbda = append ( _dbda , _debe ) ; } ; } else { if _debe . _bfcg > _dbcca && _debe . _bfcg < _cddg { _dbda = append ( _dbda , _debe ) ; } ; } ; } ; return _dbda ; } ; func ( _dadg * textObject ) setTextRise ( _cegd float64 ) { if _dadg == nil { return ;
} ; _dadg . _abcb . _cgfc = _cegd ; } ;
// String returns a description of `k`.
func ( _fddgg rulingKind ) String ( ) string { _fbgaa , _bdge := _adfb [ _fddgg ] ; if ! _bdge { return _gde . Sprintf ( "\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064" , _fddgg ) ; } ; return _fbgaa ; } ; type shapesState struct { _gebd _dca . Matrix ;
_dged _dca . Matrix ; _efb [ ] * subpath ; _egfd bool ; _eaeeb _dca . Point ; _cfda * textObject ; } ; func ( _gdga paraList ) writeText ( _gage _g . Writer ) { for _bgdc , _febg := range _gdga { if _febg . _bfge { continue ; } ; _febg . writeText ( _gage ) ; if _bgdc != len ( _gdga ) - 1 { if _fgfb ( _febg , _gdga [ _bgdc + 1 ] ) { _gage . Write ( [ ] byte ( "\u0020" ) ) ;
} else { _gage . Write ( [ ] byte ( "\u000a" ) ) ; _gage . Write ( [ ] byte ( "\u000a" ) ) ; } ; } ; } ; _gage . Write ( [ ] byte ( "\u000a" ) ) ; _gage . Write ( [ ] byte ( "\u000a" ) ) ; } ;
// ToTextMark returns the public view of `tm`.
func ( _fbgce * textMark ) ToTextMark ( ) TextMark { return TextMark { Text : _fbgce . _efgdc , Original : _fbgce . _dcdf , BBox : _fbgce . _fbae , Font : _fbgce . _abef , FontSize : _fbgce . _cggc , FillColor : _fbgce . _cccae , StrokeColor : _fbgce . _dcgea , Orientation : _fbgce . _aaad , DirectObject : _fbgce . _dafb , ObjString : _fbgce . _facc , Tw : _fbgce . Tw , Th : _fbgce . Th , Tc : _fbgce . _accea , Index : _fbgce . _ccfa } ;
} ; func ( _dabe * wordBag ) depthIndexes ( ) [ ] int { if len ( _dabe . _aac ) == 0 { return nil ; } ; _dfeb := make ( [ ] int , len ( _dabe . _aac ) ) ; _cgcg := 0 ; for _aga := range _dabe . _aac { _dfeb [ _cgcg ] = _aga ; _cgcg ++ ; } ; _ab . Ints ( _dfeb ) ; return _dfeb ; } ;
// Marks returns the TextMark collection for a page. It represents all the text on the page.
func ( _gbe PageText ) Marks ( ) * TextMarkArray { return & TextMarkArray { _gcc : _gbe . _fdaf } } ; func ( _ggbfg paraList ) findGridTables ( _aabc [ ] gridTiling ) [ ] * textTable { if _eadb { _ac . Log . Info ( "\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073" , len ( _ggbfg ) ) ;
for _abgac , _gdad := range _ggbfg { _gde . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _abgac , _gdad ) ; } ; } ; var _agcgg [ ] * textTable ; for _ggbd , _cbga := range _aabc { _gggg , _cacb := _ggbfg . findTableGrid ( _cbga ) ; if _gggg != nil { _gggg . log ( _gde . Sprintf ( "\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064" , _ggbd ) ) ;
_agcgg = append ( _agcgg , _gggg ) ; _gggg . markCells ( ) ; } ; for _febde := range _cacb { _febde . _dcada = true ; } ; } ; if _eadb { _ac . Log . Info ( "\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s" , len ( _agcgg ) ) ;
} ; return _agcgg ; } ; func ( _cbea * textTable ) reduce ( ) * textTable { _dcfad := make ( [ ] int , 0 , _cbea . _gebeeb ) ; _agaed := make ( [ ] int , 0 , _cbea . _acddc ) ; for _dfff := 0 ; _dfff < _cbea . _gebeeb ; _dfff ++ { if ! _cbea . emptyCompositeRow ( _dfff ) { _dcfad = append ( _dcfad , _dfff ) ;
} ; } ; for _agebd := 0 ; _agebd < _cbea . _acddc ; _agebd ++ { if ! _cbea . emptyCompositeColumn ( _agebd ) { _agaed = append ( _agaed , _agebd ) ; } ; } ; if len ( _dcfad ) == _cbea . _gebeeb && len ( _agaed ) == _cbea . _acddc { return _cbea ; } ; _edbd := textTable { _aefef : _cbea . _aefef , _acddc : len ( _agaed ) , _gebeeb : len ( _dcfad ) , _cfgbb : make ( map [ uint64 ] * textPara , len ( _agaed ) * len ( _dcfad ) ) } ;
if _eadb { _ac . Log . Info ( "\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064" , _cbea . _acddc , _cbea . _gebeeb , len ( _agaed ) , len ( _dcfad ) ) ; _ac . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076" , _agaed ) ;
_ac . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076" , _dcfad ) ; } ; for _fecf , _gdeb := range _dcfad { for _gcfg , _baag := range _agaed { _dbbd , _eeege := _cbea . getComposite ( _baag , _gdeb ) ; if _dbbd == nil { continue ;
} ; if _eadb { _gde . Printf ( "\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n" , _gcfg , _fecf , _baag , _gdeb , _dbdbb ( _dbbd . merge ( ) . text ( ) , 50 ) ) ; } ; _edbd . putComposite ( _gcfg , _fecf , _dbbd , _eeege ) ;
} ; } ; return & _edbd ; } ; func ( _cceag compositeCell ) parasBBox ( ) ( paraList , _fg . PdfRectangle ) { return _cceag . paraList , _cceag . PdfRectangle ; } ; func _cecg ( _abea * wordBag , _cagf * textWord , _ffdbd float64 ) bool { return _cagf . Llx < _abea . Urx + _ffdbd && _abea . Llx - _ffdbd < _cagf . Urx ;
} ; type wordBag struct { _fg . PdfRectangle ; _fab float64 ; _egga , _bebd rulingList ; _ggfe float64 ; _aac map [ int ] [ ] * textWord ; } ; func ( _aeace * textTable ) growTable ( ) { _cgfcb := func ( _dbbb paraList ) { _aeace . _gebeeb ++ ; for _cfcgd := 0 ; _cfcgd < _aeace . _acddc ;
_cfcgd ++ { _babc := _dbbb [ _cfcgd ] ; _aeace . put ( _cfcgd , _aeace . _gebeeb - 1 , _babc ) ; } ; } ; _dgfd := func ( _daff paraList ) { _aeace . _acddc ++ ; for _adebf := 0 ; _adebf < _aeace . _gebeeb ; _adebf ++ { _ddfe := _daff [ _adebf ] ; _aeace . put ( _aeace . _acddc - 1 , _adebf , _ddfe ) ;
} ; } ; if _gddd { _aeace . log ( "\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce" ) ; } ; for _caag := 0 ; ; _caag ++ { _gbda := false ; _dbgbe := _aeace . getDown ( ) ; _dfgdc := _aeace . getRight ( ) ; if _gddd { _gde . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _caag , _aeace ) ;
_gde . Printf ( "\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a" , _dbgbe ) ; _gde . Printf ( "\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a" , _dfgdc ) ; } ; if _dbgbe != nil && _dfgdc != nil { _dddga := _dbgbe [ len ( _dbgbe ) - 1 ] ;
if ! _dddga . taken ( ) && _dddga == _dfgdc [ len ( _dfgdc ) - 1 ] { _cgfcb ( _dbgbe ) ; if _dfgdc = _aeace . getRight ( ) ; _dfgdc != nil { _dgfd ( _dfgdc ) ; _aeace . put ( _aeace . _acddc - 1 , _aeace . _gebeeb - 1 , _dddga ) ; } ; _gbda = true ; } ; } ; if ! _gbda && _dbgbe != nil { _cgfcb ( _dbgbe ) ;
_gbda = true ; } ; if ! _gbda && _dfgdc != nil { _dgfd ( _dfgdc ) ; _gbda = true ; } ; if ! _gbda { break ; } ; } ; } ;
2023-10-07 13:58:01 +00:00
// Text returns the extracted page text.
2023-11-11 11:29:03 +00:00
func ( _adbg PageText ) Text ( ) string { return _adbg . _edee } ; func ( _bcee * textObject ) showTextAdjusted ( _ffd * _dce . PdfObjectArray , _eea int ) error { _dcbg := false ; for _ , _ccd := range _ffd . Elements ( ) { switch _ccd . ( type ) { case * _dce . PdfObjectFloat , * _dce . PdfObjectInteger : _bgce , _cgcb := _dce . GetNumberAsFloat ( _ccd ) ;
if _cgcb != nil { _ac . Log . Debug ( "\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _ccd , _ffd ) ;
return _cgcb ; } ; _abe , _gcae := - _bgce * 0.001 * _bcee . _abcb . _dda , 0.0 ; if _dcbg { _gcae , _abe = _abe , _gcae ; } ; _bcea := _acc ( _dca . Point { X : _abe , Y : _gcae } ) ; _bcee . _acbc . Concat ( _bcea ) ; case * _dce . PdfObjectString : _fadb := _dce . TraceToDirectObject ( _ccd ) ;
_acdd , _abaf := _dce . GetStringBytes ( _fadb ) ; if ! _abaf { _ac . Log . Trace ( "s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _ccd , _ffd ) ;
return _dce . ErrTypeError ; } ; _bcee . renderText ( _fadb , _acdd , _eea ) ; default : _ac . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _ccd , _ffd ) ;
return _dce . ErrTypeError ; } ; } ; return nil ; } ;
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// String returns a human readable description of `ss`.
func ( _ddff * shapesState ) String ( ) string { return _gde . Sprintf ( "\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d" , len ( _ddff . _efb ) , _ddff . _egfd ) ; } ; func ( _efdge * textWord ) addDiacritic ( _bcged string ) { _dceee := _efdge . _daafd [ len ( _efdge . _daafd ) - 1 ] ;
_dceee . _efgdc += _bcged ; _dceee . _efgdc = _fd . NFKC . String ( _dceee . _efgdc ) ; } ; const _deg = 20 ; type cachedImage struct { _gb * _fg . Image ; _cbg _fg . PdfColorspace ; } ; func _dfegb ( _cgbg , _eaeec _dca . Point , _cdecf _be . Color ) ( * ruling , bool ) { _efba := lineRuling { _cged : _cgbg , _ggeaa : _eaeec , _gdggb : _cacg ( _cgbg , _eaeec ) , Color : _cdecf } ;
if _efba . _gdggb == _ccfb { return nil , false ; } ; return _efba . asRuling ( ) ; } ; func _ecba ( _egfac [ ] pathSection ) { if _egac < 0.0 { return ; } ; if _aebg { _ac . Log . Info ( "\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073" , len ( _egfac ) ) ;
} ; for _bcfb , _fgbf := range _egfac { for _cccc , _dfbg := range _fgbf . _ged { for _caade , _fbcb := range _dfbg . _gdgd { _dfbg . _gdgd [ _caade ] = _dca . Point { X : _ebdgb ( _fbcb . X ) , Y : _ebdgb ( _fbcb . Y ) } ; if _aebg { _cccea := _dfbg . _gdgd [ _caade ] ; if ! _ffea ( _fbcb , _cccea ) { _cfadd := _dca . Point { X : _cccea . X - _fbcb . X , Y : _cccea . Y - _fbcb . Y } ;
_gde . Printf ( "\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a" , _bcfb , _cccc , _caade , _fbcb , _cccea , _cfadd ) ; } ; } ; } ; } ; } ; } ; func ( _cdaed paraList ) topoOrder ( ) [ ] int { if _abfe { _ac . Log . Info ( "\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a" ) ;
} ; _bdgg := len ( _cdaed ) ; _ecadc := make ( [ ] bool , _bdgg ) ; _cdcg := make ( [ ] int , 0 , _bdgg ) ; _bgaff := _cdaed . llyOrdering ( ) ; var _ddca func ( _dgdf int ) ; _ddca = func ( _dfgg int ) { _ecadc [ _dfgg ] = true ; for _fcff := 0 ; _fcff < _bdgg ; _fcff ++ { if ! _ecadc [ _fcff ] { if _cdaed . readBefore ( _bgaff , _dfgg , _fcff ) { _ddca ( _fcff ) ;
} ; } ; } ; _cdcg = append ( _cdcg , _dfgg ) ; } ; for _eacf := 0 ; _eacf < _bdgg ; _eacf ++ { if ! _ecadc [ _eacf ] { _ddca ( _eacf ) ; } ; } ; return _cgffb ( _cdcg ) ; } ; var _bgad = [ ] string { "\u2756" , "\u27a2" , "\u2713" , "\u2022" , "\uf0a7" , "\u25a1" , "\u2212" , "\u25a0" , "\u25aa" , "\u006f" } ;
func ( _eeg * textObject ) setWordSpacing ( _fcc float64 ) { if _eeg == nil { return ; } ; _eeg . _abcb . _ggf = _fcc ; } ;
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
type ImageExtractOptions struct { IncludeInlineStencilMasks bool ; } ;
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
type TextTable struct { _fg . PdfRectangle ; W , H int ; Cells [ ] [ ] TableCell ; } ; func ( _daga * textMark ) bbox ( ) _fg . PdfRectangle { return _daga . PdfRectangle } ; func _eab ( _cde _fg . PdfRectangle ) textState { return textState { _gfad : 100 , _bgcc : RenderModeFill , _cefd : _cde } ;
} ; func ( _bddbe * wordBag ) sort ( ) { for _ , _gdfb := range _bddbe . _aac { _ab . Slice ( _gdfb , func ( _fbc , _dcda int ) bool { return _gedbc ( _gdfb [ _fbc ] , _gdfb [ _dcda ] ) < 0 } ) ; } ; } ; type rulingKind int ; func ( _abgeb * textPara ) bbox ( ) _fg . PdfRectangle { return _abgeb . PdfRectangle } ;
func ( _bgbe * stateStack ) empty ( ) bool { return len ( * _bgbe ) == 0 } ; func ( _cabec * wordBag ) text ( ) string { _cgfa := _cabec . allWords ( ) ; _agfb := make ( [ ] string , len ( _cgfa ) ) ; for _aad , _eafb := range _cgfa { _agfb [ _aad ] = _eafb . _fedgb ; } ; return _df . Join ( _agfb , "\u0020" ) ;
} ; var _de = false ; func _gadcf ( _dabde [ ] * textLine , _afdgg string ) string { var _afda _df . Builder ; _ecea := 0.0 ; for _edecd , _aeca := range _dabde { _fcaf := _aeca . text ( ) ; _fgce := _aeca . _bfcg ; if _edecd < len ( _dabde ) - 1 { _ecea = _dabde [ _edecd + 1 ] . _bfcg ; } else { _ecea = 0.0 ;
} ; _afda . WriteString ( _afdgg ) ; _afda . WriteString ( _fcaf ) ; if _ecea != _fgce { _afda . WriteString ( "\u000a" ) ; } else { _afda . WriteString ( "\u0020" ) ; } ; } ; return _afda . String ( ) ; } ; func ( _cegde * subpath ) removeDuplicates ( ) { if len ( _cegde . _gdgd ) == 0 { return ;
} ; _cbac := [ ] _dca . Point { _cegde . _gdgd [ 0 ] } ; for _ , _ffce := range _cegde . _gdgd [ 1 : ] { if ! _ffea ( _ffce , _cbac [ len ( _cbac ) - 1 ] ) { _cbac = append ( _cbac , _ffce ) ; } ; } ; _cegde . _gdgd = _cbac ; } ; type textResult struct { _ecca PageText ; _bbag int ; _ffg int ; } ; func _adgb ( _fgdg * list , _cdbe * string ) string { _dcca := _df . Split ( _fgdg . _fbfaf , "\u000a" ) ;
_faba := & _df . Builder { } ; for _ , _daea := range _dcca { if _daea != "" { _faba . WriteString ( * _cdbe ) ; _faba . WriteString ( _daea ) ; _faba . WriteString ( "\u000a" ) ; } ; } ; return _faba . String ( ) ; } ; func _gfed ( _bcdc , _ggad _dca . Point ) bool { _fbdce := _dc . Abs ( _bcdc . X - _ggad . X ) ;
_egcd := _dc . Abs ( _bcdc . Y - _ggad . Y ) ; return _edfa ( _egcd , _fbdce ) ; } ; type ruling struct { _gffa rulingKind ; _adaa markKind ; _be . Color ; _cbag float64 ; _efgeb float64 ; _bbge float64 ; _afdc float64 ; } ; func ( _eccbe * textTable ) markCells ( ) { for _deae := 0 ; _deae < _eccbe . _gebeeb ;
_deae ++ { for _faea := 0 ; _faea < _eccbe . _acddc ; _faea ++ { _fbfdg := _eccbe . get ( _faea , _deae ) ; if _fbfdg != nil { _fbfdg . _dcada = true ; } ; } ; } ; } ; const ( _eafcg markKind = iota ; _efcd ; _babea ; _acaca ; ) ; type gridTiling struct { _fg . PdfRectangle ; _abeb [ ] float64 ;
_cegbg [ ] float64 ; _faeca map [ float64 ] map [ float64 ] gridTile ; } ; func ( _bead * textObject ) nextLine ( ) { _bead . moveLP ( 0 , - _bead . _abcb . _gcg ) } ;
// String returns a string descibing `i`.
func ( _gaba gridTile ) String ( ) string { _gabde := func ( _eeab bool , _cfde string ) string { if _eeab { return _cfde ; } ; return "\u005f" ; } ; return _gde . Sprintf ( "\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073" , _gaba . PdfRectangle , _gabde ( _gaba . _cbge , "\u004c" ) , _gabde ( _gaba . _dafe , "\u0052" ) , _gabde ( _gaba . _beacf , "\u0042" ) , _gabde ( _gaba . _cbbgc , "\u0054" ) ) ;
} ; func _fgea ( _cfdf [ ] TextMark , _gggc * int , _gcf TextMark ) [ ] TextMark { _gcf . Offset = * _gggc ; _cfdf = append ( _cfdf , _gcf ) ; * _gggc += len ( _gcf . Text ) ; return _cfdf ; } ; func ( _eggc rulingList ) primaries ( ) [ ] float64 { _ccdf := make ( map [ float64 ] struct { } , len ( _eggc ) ) ;
for _ , _cefgd := range _eggc { _ccdf [ _cefgd . _cbag ] = struct { } { } ; } ; _accb := make ( [ ] float64 , len ( _ccdf ) ) ; _agfg := 0 ; for _fbgda := range _ccdf { _accb [ _agfg ] = _fbgda ; _agfg ++ ; } ; _ab . Float64s ( _accb ) ; return _accb ; } ; type intSet map [ int ] struct { } ;
// String returns a string describing `tm`.
func ( _abab TextMark ) String ( ) string { _dggf := _abab . BBox ; var _afab string ; if _abab . Font != nil { _afab = _abab . Font . String ( ) ; if len ( _afab ) > 50 { _afab = _afab [ : 50 ] + "\u002e\u002e\u002e" ; } ; } ; var _afb string ; if _abab . Meta { _afb = "\u0020\u002a\u004d\u002a" ;
} ; return _gde . Sprintf ( "\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d" , _abab . Offset , _abab . Text , [ ] rune ( _abab . Text ) , _dggf . Llx , _dggf . Lly , _dggf . Urx , _dggf . Ury , _afab , _afb ) ;
} ; func ( _gaca compositeCell ) split ( _cgfag , _gecef [ ] float64 ) * textTable { _abdd := len ( _cgfag ) + 1 ; _faed := len ( _gecef ) + 1 ; if _eadb { _ac . Log . Info ( "\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a" + "\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066" , _faed , _abdd , _gaca , _cgfag , _gecef ) ;
_gde . Printf ( "\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a" , len ( _gaca . paraList ) ) ; for _ggdgd , _gdde := range _gaca . paraList { _gde . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _ggdgd , _gdde . String ( ) ) ;
} ; _gde . Printf ( "\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a" , len ( _gaca . lines ( ) ) ) ; for _gcea , _cbebg := range _gaca . lines ( ) { _gde . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _gcea , _cbebg ) ; } ; } ; _cgfag = _gfgf ( _cgfag , _gaca . Ury , _gaca . Lly ) ;
_gecef = _gfgf ( _gecef , _gaca . Llx , _gaca . Urx ) ; _cdbg := make ( map [ uint64 ] * textPara , _faed * _abdd ) ; _cbcf := textTable { _acddc : _faed , _gebeeb : _abdd , _cfgbb : _cdbg } ; _fdbag := _gaca . paraList ; _ab . Slice ( _fdbag , func ( _cdgb , _daedc int ) bool { _fafbc , _ebca := _fdbag [ _cdgb ] , _fdbag [ _daedc ] ;
_gfcgg , _bdeec := _fafbc . Lly , _ebca . Lly ; if _gfcgg != _bdeec { return _gfcgg < _bdeec ; } ; return _fafbc . Llx < _ebca . Llx ; } ) ; _gfgg := make ( map [ uint64 ] _fg . PdfRectangle , _faed * _abdd ) ; for _cebg , _dgee := range _cgfag [ 1 : ] { _bbga := _cgfag [ _cebg ] ; for _gefe , _adeb := range _gecef [ 1 : ] { _cega := _gecef [ _gefe ] ;
_gfgg [ _bafcd ( _gefe , _cebg ) ] = _fg . PdfRectangle { Llx : _cega , Urx : _adeb , Lly : _dgee , Ury : _bbga } ; } ; } ; if _eadb { _ac . Log . Info ( "\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073" ) ;
_gde . Printf ( "\u0020\u0020\u0020\u0020" ) ; for _ggcc := 0 ; _ggcc < _faed ; _ggcc ++ { _gde . Printf ( "\u0025\u0033\u0030\u0064\u002c\u0020" , _ggcc ) ; } ; _gde . Println ( ) ; for _cdba := 0 ; _cdba < _abdd ; _cdba ++ { _gde . Printf ( "\u0020\u0020\u0025\u0032\u0064\u003a" , _cdba ) ;
for _eecca := 0 ; _eecca < _faed ; _eecca ++ { _gde . Printf ( "\u00256\u002e\u0032\u0066\u002c\u0020" , _gfgg [ _bafcd ( _eecca , _cdba ) ] ) ; } ; _gde . Println ( ) ; } ; } ; _eebb := func ( _ccfe * textLine ) ( int , int ) { for _bgfa := 0 ; _bgfa < _abdd ; _bgfa ++ { for _aaga := 0 ; _aaga < _faed ;
_aaga ++ { if _dgfe ( _gfgg [ _bafcd ( _aaga , _bgfa ) ] , _ccfe . PdfRectangle ) { return _aaga , _bgfa ; } ; } ; } ; return - 1 , - 1 ; } ; _gbege := make ( map [ uint64 ] [ ] * textLine , _faed * _abdd ) ; for _ , _abfc := range _fdbag . lines ( ) { _aefc , _cgcfa := _eebb ( _abfc ) ; if _aefc < 0 { continue ;
} ; _gbege [ _bafcd ( _aefc , _cgcfa ) ] = append ( _gbege [ _bafcd ( _aefc , _cgcfa ) ] , _abfc ) ; } ; for _bdcaa := 0 ; _bdcaa < len ( _cgfag ) - 1 ; _bdcaa ++ { _ddabe := _cgfag [ _bdcaa ] ; _cgeac := _cgfag [ _bdcaa + 1 ] ; for _agfbb := 0 ; _agfbb < len ( _gecef ) - 1 ; _agfbb ++ { _cbee := _gecef [ _agfbb ] ;
_aeed := _gecef [ _agfbb + 1 ] ; _egag := _fg . PdfRectangle { Llx : _cbee , Urx : _aeed , Lly : _cgeac , Ury : _ddabe } ; _ecaaf := _gbege [ _bafcd ( _agfbb , _bdcaa ) ] ; if len ( _ecaaf ) == 0 { continue ; } ; _gdcg := _ggdc ( _egag , _ecaaf ) ; _cbcf . put ( _agfbb , _bdcaa , _gdcg ) ; } ; } ; return & _cbcf ;
} ; func ( _faffbe paraList ) extractTables ( _gacf [ ] gridTiling ) paraList { if _eadb { _ac . Log . Debug ( "\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _faffbe ) ) ;
} ; if len ( _faffbe ) < _ffa { return _faffbe ; } ; _fbac := _faffbe . findTables ( _gacf ) ; if _eadb { _ac . Log . Info ( "c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d" , len ( _fbac ) ) ;
for _ccdd , _bgbg := range _fbac { _bgbg . log ( _gde . Sprintf ( "c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064" , _ccdd ) ) ; } ; } ; return _faffbe . applyTables ( _fbac ) ; } ; func ( _afec paraList ) reorder ( _bec [ ] int ) { _cccb := make ( paraList , len ( _afec ) ) ;
for _ccdc , _baaca := range _bec { _cccb [ _ccdc ] = _afec [ _baaca ] ; } ; copy ( _afec , _cccb ) ; } ; func _ccag ( _daede [ ] TextMark , _bbe * int , _bcge string ) [ ] TextMark { _ecec := _bfec ; _ecec . Text = _bcge ; return _fgea ( _daede , _bbe , _ecec ) ; } ; const ( _ddcg = true ; _ade = true ;
_fefd = true ; _gcda = false ; _ggfbe = false ; _fedg = 6 ; _baecd = 3.0 ; _aee = 200 ; _dcea = true ; _aagdg = true ; _efbd = true ; _dfcb = true ; _ffdd = false ; ) ; func ( _ebcf * wordBag ) depthRange ( _dcbe , _cae int ) [ ] int { var _fcbb [ ] int ; for _agfd := range _ebcf . _aac { if _dcbe <= _agfd && _agfd <= _cae { _fcbb = append ( _fcbb , _agfd ) ;
} ; } ; if len ( _fcbb ) == 0 { return nil ; } ; _ab . Ints ( _fcbb ) ; return _fcbb ; } ; func ( _gebf * textLine ) pullWord ( _bggd * wordBag , _degge * textWord , _afcf int ) { _gebf . appendWord ( _degge ) ; _bggd . removeWord ( _degge , _afcf ) ; } ;
// GetContentStreamOps returns the contentStreamOps field of `pt`.
func ( _eedc * PageText ) GetContentStreamOps ( ) * _dcg . ContentStreamOperations { return _eedc . _eaee } ; func _ebge ( _bacf , _bfga _fg . PdfRectangle ) _fg . PdfRectangle { return _fg . PdfRectangle { Llx : _dc . Min ( _bacf . Llx , _bfga . Llx ) , Lly : _dc . Min ( _bacf . Lly , _bfga . Lly ) , Urx : _dc . Max ( _bacf . Urx , _bfga . Urx ) , Ury : _dc . Max ( _bacf . Ury , _bfga . Ury ) } ;
} ; func ( _bgcff paraList ) merge ( ) * textPara { _ac . Log . Trace ( "\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _bgcff ) ) ;
if len ( _bgcff ) == 0 { return nil ; } ; _bgcff . sortReadingOrder ( ) ; _egbeg := _bgcff [ 0 ] . PdfRectangle ; _gadge := _bgcff [ 0 ] . _gfaae ; for _ , _ddge := range _bgcff [ 1 : ] { _egbeg = _ebge ( _egbeg , _ddge . PdfRectangle ) ; _gadge = append ( _gadge , _ddge . _gfaae ... ) ; } ; return _ggdc ( _egbeg , _gadge ) ;
} ; func ( _fgcd * wordBag ) pullWord ( _gbdd * textWord , _abfa int , _dfd map [ int ] map [ * textWord ] struct { } ) { _fgcd . PdfRectangle = _ebge ( _fgcd . PdfRectangle , _gbdd . PdfRectangle ) ; if _gbdd . _adecc > _fgcd . _fab { _fgcd . _fab = _gbdd . _adecc ; } ; _fgcd . _aac [ _abfa ] = append ( _fgcd . _aac [ _abfa ] , _gbdd ) ;
_dfd [ _abfa ] [ _gbdd ] = struct { } { } ; } ; func ( _facf * wordBag ) blocked ( _ddfa * textWord ) bool { if _ddfa . Urx < _facf . Llx { _gce := _fcad ( _ddfa . PdfRectangle ) ; _gegbg := _cccab ( _facf . PdfRectangle ) ; if _facf . _egga . blocks ( _gce , _gegbg ) { if _gbbdd { _ac . Log . Info ( "\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073" , _ddfa , _facf ) ;
} ; return true ; } ; } else if _facf . Urx < _ddfa . Llx { _gfbd := _fcad ( _facf . PdfRectangle ) ; _afdg := _cccab ( _ddfa . PdfRectangle ) ; if _facf . _egga . blocks ( _gfbd , _afdg ) { if _gbbdd { _ac . Log . Info ( "b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s" , _ddfa , _facf ) ;
} ; return true ; } ; } ; if _ddfa . Ury < _facf . Lly { _dfc := _fedf ( _ddfa . PdfRectangle ) ; _acaf := _bdac ( _facf . PdfRectangle ) ; if _facf . _bebd . blocks ( _dfc , _acaf ) { if _gbbdd { _ac . Log . Info ( "\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073" , _ddfa , _facf ) ;
} ; return true ; } ; } else if _facf . Ury < _ddfa . Lly { _ceacd := _fedf ( _facf . PdfRectangle ) ; _agcc := _bdac ( _ddfa . PdfRectangle ) ; if _facf . _bebd . blocks ( _ceacd , _agcc ) { if _gbbdd { _ac . Log . Info ( "b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s" , _ddfa , _facf ) ;
} ; return true ; } ; } ; return false ; } ; func _bddc ( _fgde [ ] * textLine , _efaf string , _dgca [ ] * list ) * list { return & list { _ggcfd : _fgde , _aeaa : _efaf , _edge : _dgca } ; } ; func ( _daaeg rulingList ) findPrimSec ( _gaef , _cgbf float64 ) * ruling { for _ , _efgcf := range _daaeg { if _ebfaf ( _efgcf . _cbag - _gaef ) && _efgcf . _efgeb - _cebe <= _cgbf && _cgbf <= _efgcf . _bbge + _cebe { return _efgcf ;
} ; } ; return nil ; } ; func _cdfc ( _fdc bounded ) float64 { return - _fdc . bbox ( ) . Lly } ; func _debff ( _ggcf , _dcdg bounded ) float64 { return _ggcf . bbox ( ) . Llx - _dcdg . bbox ( ) . Urx } ; func ( _eafa * compositeCell ) updateBBox ( ) { for _ , _dbdg := range _eafa . paraList { _eafa . PdfRectangle = _ebge ( _eafa . PdfRectangle , _dbdg . PdfRectangle ) ;
} ; } ; func _cdec ( _cgcdg _dca . Point ) * subpath { return & subpath { _gdgd : [ ] _dca . Point { _cgcdg } } } ; func _fbba ( _ffeeb , _edgbb bounded ) float64 { _eddc := _bfc ( _ffeeb , _edgbb ) ; if ! _ebfaf ( _eddc ) { return _eddc ; } ; return _gedbc ( _ffeeb , _edgbb ) ; } ; func ( _caae * textPara ) fontsize ( ) float64 { return _caae . _gfaae [ 0 ] . _ceacg } ;
func _ebage ( _fgbeg [ ] * textLine , _gada map [ float64 ] [ ] * textLine ) [ ] * list { _bfcb := _ebecd ( _gada ) ; _fffd := [ ] * list { } ; if len ( _bfcb ) == 0 { return _fffd ; } ; _abee := _bfcb [ 0 ] ; _gbdga := 1 ; _aefeb := _gada [ _abee ] ; for _edfc , _fbag := range _aefeb { var _ecfa float64 ;
_gfaa := [ ] * list { } ; _cdea := _fbag . _bfcg ; _bfgf := - 1.0 ; if _edfc < len ( _aefeb ) - 1 { _bfgf = _aefeb [ _edfc + 1 ] . _bfcg ; } ; if _gbdga < len ( _bfcb ) { _gfaa = _gdfa ( _fgbeg , _gada , _bfcb , _gbdga , _cdea , _bfgf ) ; } ; _ecfa = _bfgf ; if len ( _gfaa ) > 0 { _cgfd := _gfaa [ 0 ] ;
if len ( _cgfd . _ggcfd ) > 0 { _ecfa = _cgfd . _ggcfd [ 0 ] . _bfcg ; } ; } ; _fefa := [ ] * textLine { _fbag } ; _geee := _bdca ( _fbag , _fgbeg , _bfcb , _cdea , _ecfa ) ; _fefa = append ( _fefa , _geee ... ) ; _eabe := _bddc ( _fefa , "\u0062\u0075\u006c\u006c\u0065\u0074" , _gfaa ) ; _eabe . _fbfaf = _gadcf ( _fefa , "" ) ;
_fffd = append ( _fffd , _eabe ) ; } ; return _fffd ; } ; func ( _edadf rectRuling ) asRuling ( ) ( * ruling , bool ) { _dagb := ruling { _gffa : _edadf . _eegf , Color : _edadf . Color , _adaa : _babea } ; switch _edadf . _eegf { case _cbab : _dagb . _cbag = 0.5 * ( _edadf . Llx + _edadf . Urx ) ;
_dagb . _efgeb = _edadf . Lly ; _dagb . _bbge = _edadf . Ury ; _ddgg , _efed := _edadf . checkWidth ( _edadf . Llx , _edadf . Urx ) ; if ! _efed { if _cadc { _ac . Log . Error ( "\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076" , _edadf ) ;
} ; return nil , false ; } ; _dagb . _afdc = _ddgg ; case _faccd : _dagb . _cbag = 0.5 * ( _edadf . Lly + _edadf . Ury ) ; _dagb . _efgeb = _edadf . Llx ; _dagb . _bbge = _edadf . Urx ; _bgdgb , _bffga := _edadf . checkWidth ( _edadf . Lly , _edadf . Ury ) ; if ! _bffga { if _cadc { _ac . Log . Error ( "\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076" , _edadf ) ;
} ; return nil , false ; } ; _dagb . _afdc = _bgdgb ; default : _ac . Log . Error ( "\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064" , _edadf . _eegf ) ; return nil , false ; } ; return & _dagb , true ; } ;
// String returns a string describing the current state of the textState stack.
func ( _ffde * stateStack ) String ( ) string { _aaa := [ ] string { _gde . Sprintf ( "\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064" , len ( * _ffde ) ) } ; for _bag , _cgg := range * _ffde { _aef := "\u003c\u006e\u0069l\u003e" ;
if _cgg != nil { _aef = _cgg . String ( ) ; } ; _aaa = append ( _aaa , _gde . Sprintf ( "\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073" , _bag , _aef ) ) ; } ; return _df . Join ( _aaa , "\u000a" ) ; } ;
// String returns a description of `state`.
func ( _bedd * textState ) String ( ) string { _bege := "\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]" ; if _bedd . _ecf != nil { _bege = _bedd . _ecf . BaseFont ( ) ; } ; return _gde . Sprintf ( "\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071" , _bedd . _aaec , _bedd . _ggf , _bedd . _dda , _bege ) ;
} ; func ( _dcba * textObject ) moveText ( _ebe , _fbfa float64 ) { _dcba . moveLP ( _ebe , _fbfa ) } ; func ( _ebeb * shapesState ) clearPath ( ) { _ebeb . _efb = nil ; _ebeb . _egfd = false ; if _bdefa { _ac . Log . Info ( "\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073" , _ebeb ) ;
} ; } ; func ( _acba * textTable ) computeBbox ( ) _fg . PdfRectangle { var _gcag _fg . PdfRectangle ; _fcdb := false ; for _ccfee := 0 ; _ccfee < _acba . _gebeeb ; _ccfee ++ { for _aggfc := 0 ; _aggfc < _acba . _acddc ; _aggfc ++ { _eged := _acba . get ( _aggfc , _ccfee ) ; if _eged == nil { continue ;
} ; if ! _fcdb { _gcag = _eged . PdfRectangle ; _fcdb = true ; } else { _gcag = _ebge ( _gcag , _eged . PdfRectangle ) ; } ; } ; } ; return _gcag ; } ; func _aaca ( _efcb * wordBag , _ggeb int ) * textLine { _cfcg := _efcb . firstWord ( _ggeb ) ; _gccd := textLine { PdfRectangle : _cfcg . PdfRectangle , _ceacg : _cfcg . _adecc , _bfcg : _cfcg . _adgge } ;
_gccd . pullWord ( _efcb , _cfcg , _ggeb ) ; return & _gccd ; } ; func _ggdc ( _fbgcc _fg . PdfRectangle , _efaa [ ] * textLine ) * textPara { return & textPara { PdfRectangle : _fbgcc , _gfaae : _efaa } ; } ; func _bfc ( _fgfd , _gbcb bounded ) float64 { return _cdfc ( _fgfd ) - _cdfc ( _gbcb ) } ;
func _dcdb ( _ccde string ) bool { if _f . RuneCountInString ( _ccde ) < _ggg { return false ; } ; _cfge , _abgc := _f . DecodeLastRuneInString ( _ccde ) ; if _abgc <= 0 || ! _fc . Is ( _fc . Hyphen , _cfge ) { return false ; } ; _cfge , _abgc = _f . DecodeLastRuneInString ( _ccde [ : len ( _ccde ) - _abgc ] ) ;
return _abgc > 0 && ! _fc . IsSpace ( _cfge ) ; } ; func ( _adgfg * shapesState ) lineTo ( _dgba , _ceb float64 ) { if _bdefa { _ac . Log . Info ( "\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066" , _dgba , _ceb , _adgfg . devicePoint ( _dgba , _ceb ) ) ;
} ; _adgfg . addPoint ( _dgba , _ceb ) ; } ; func _fcgc ( _abgb , _bdgc _fg . PdfRectangle ) bool { return _bdgc . Llx <= _abgb . Urx && _abgb . Llx <= _bdgc . Urx ; } ; func ( _bdab * textPara ) text ( ) string { _fdbf := new ( _dfg . Buffer ) ; _bdab . writeText ( _fdbf ) ; return _fdbf . String ( ) ;
} ; func ( _acdeg paraList ) lines ( ) [ ] * textLine { var _afbe [ ] * textLine ; for _ , _gaeab := range _acdeg { _afbe = append ( _afbe , _gaeab . _gfaae ... ) ; } ; return _afbe ; } ; func _fedf ( _acfc _fg . PdfRectangle ) * ruling { return & ruling { _gffa : _faccd , _cbag : _acfc . Ury , _efgeb : _acfc . Llx , _bbge : _acfc . Urx } ;
} ; func ( _egdc paraList ) toTextMarks ( ) [ ] TextMark { _efbc := 0 ; var _gbgb [ ] TextMark ; for _bbab , _bfca := range _egdc { if _bfca . _bfge { continue ; } ; _abda := _bfca . toTextMarks ( & _efbc ) ; _gbgb = append ( _gbgb , _abda ... ) ; if _bbab != len ( _egdc ) - 1 { if _fgfb ( _bfca , _egdc [ _bbab + 1 ] ) { _gbgb = _ccag ( _gbgb , & _efbc , "\u0020" ) ;
} else { _gbgb = _ccag ( _gbgb , & _efbc , "\u000a" ) ; _gbgb = _ccag ( _gbgb , & _efbc , "\u000a" ) ; } ; } ; } ; _gbgb = _ccag ( _gbgb , & _efbc , "\u000a" ) ; _gbgb = _ccag ( _gbgb , & _efbc , "\u000a" ) ; return _gbgb ; } ; func _bbagb ( _fgage map [ float64 ] gridTile ) [ ] float64 { _eebcg := make ( [ ] float64 , 0 , len ( _fgage ) ) ;
for _gcdae := range _fgage { _eebcg = append ( _eebcg , _gcdae ) ; } ; _ab . Float64s ( _eebcg ) ; return _eebcg ; } ; func _bfgee ( _fega int , _cbebf map [ int ] [ ] float64 ) ( [ ] int , int ) { _dagfd := make ( [ ] int , _fega ) ; _ffag := 0 ; for _gbfg := 0 ; _gbfg < _fega ; _gbfg ++ { _dagfd [ _gbfg ] = _ffag ;
_ffag += len ( _cbebf [ _gbfg ] ) + 1 ; } ; return _dagfd , _ffag ; } ;
// String returns a human readable description of `vecs`.
func ( _dddgg rulingList ) String ( ) string { if len ( _dddgg ) == 0 { return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}" ; } ; _ccad , _dgfeb := _dddgg . vertsHorzs ( ) ; _edfg := len ( _ccad ) ; _ecfec := len ( _dgfeb ) ; if _edfg == 0 || _ecfec == 0 { return _gde . Sprintf ( "\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}" , _edfg , _ecfec ) ;
} ; _eede := _fg . PdfRectangle { Llx : _ccad [ 0 ] . _cbag , Urx : _ccad [ _edfg - 1 ] . _cbag , Lly : _dgfeb [ _ecfec - 1 ] . _cbag , Ury : _dgfeb [ 0 ] . _cbag } ; return _gde . Sprintf ( "\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d" , _edfg , _ecfec , _eede ) ;
} ; func ( _bfg * wordBag ) depthBand ( _egef , _dddd float64 ) [ ] int { if len ( _bfg . _aac ) == 0 { return nil ; } ; return _bfg . depthRange ( _bfg . getDepthIdx ( _egef ) , _bfg . getDepthIdx ( _dddd ) ) ; } ; var _bfec = TextMark { Text : "\u005b\u0058\u005d" , Original : "\u0020" , Meta : true , FillColor : _be . White , StrokeColor : _be . White } ;
func ( _cagag * textPara ) depth ( ) float64 { if _cagag . _bfge { return - 1.0 ; } ; if len ( _cagag . _gfaae ) > 0 { return _cagag . _gfaae [ 0 ] . _bfcg ; } ; return _cagag . _edce . depth ( ) ; } ; func _eabef ( _fefcfa , _daabf int ) int { if _fefcfa > _daabf { return _fefcfa ; } ; return _daabf ;
} ; func _agfe ( _cgde , _febd _dca . Point ) rulingKind { _gbgg := _dc . Abs ( _cgde . X - _febd . X ) ; _feeag := _dc . Abs ( _cgde . Y - _febd . Y ) ; return _ageb ( _gbgg , _feeag , _edec ) ; } ;
// PageFonts represents extracted fonts on a PDF page.
type PageFonts struct { Fonts [ ] Font ; } ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
//
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
type TextMark struct {
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Text is the extracted text.
Text string ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// BBox is the bounding box of the text.
2023-11-11 11:29:03 +00:00
BBox _fg . PdfRectangle ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Font is the font the text was drawn with.
2023-11-11 11:29:03 +00:00
Font * _fg . PdfFont ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// FontSize is the font size the text was drawn with.
FontSize float64 ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2023-11-11 11:29:03 +00:00
FillColor _be . Color ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2023-11-11 11:29:03 +00:00
StrokeColor _be . Color ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Orientation is the text orientation
Orientation int ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// DirectObject is the underlying PdfObject (Text Object) that represents the visible texts. This is introduced to get
// a simple access to the TextObject in case editing or replacment of some text is needed. E.g during redaction.
2023-11-11 11:29:03 +00:00
DirectObject _dce . PdfObject ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// ObjString is a decoded string operand of a text-showing operator. It has the same value as `Text` attribute except
// when many glyphs are represented with the same Text Object that contains multiple length string operand in which case
// ObjString spans more than one character string that falls in different TextMark objects.
2023-11-11 11:29:03 +00:00
ObjString [ ] string ; Tw float64 ; Th float64 ; Tc float64 ; Index int ; _dgbe bool ; _acbcf * TextTable ; } ; func ( _fefbb rulingList ) sortStrict ( ) { _ab . Slice ( _fefbb , func ( _dfdeg , _gafe int ) bool { _gadgd , _dcfa := _fefbb [ _dfdeg ] , _fefbb [ _gafe ] ; _aeaba , _ebcd := _gadgd . _gffa , _dcfa . _gffa ;
if _aeaba != _ebcd { return _aeaba > _ebcd ; } ; _ffgd , _dcdac := _gadgd . _cbag , _dcfa . _cbag ; if ! _ebfaf ( _ffgd - _dcdac ) { return _ffgd < _dcdac ; } ; _ffgd , _dcdac = _gadgd . _efgeb , _dcfa . _efgeb ; if _ffgd != _dcdac { return _ffgd < _dcdac ; } ; return _gadgd . _bbge < _dcfa . _bbge ;
} ) ; } ; func _cgabd ( _febe [ ] pathSection ) rulingList { _ecba ( _febe ) ; if _aebg { _ac . Log . Info ( "\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs" , len ( _febe ) ) ; } ; var _cbbe rulingList ;
for _ , _acea := range _febe { for _ , _eeeb := range _acea . _ged { if ! _eeeb . isQuadrilateral ( ) { if _aebg { _ac . Log . Error ( "!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073" , _eeeb ) ; } ; continue ; } ; if _gadb , _deded := _eeeb . makeRectRuling ( _acea . Color ) ;
_deded { _cbbe = append ( _cbbe , _gadb ) ; } else { if _cadc { _ac . Log . Error ( "\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073" , _eeeb ) ; } ; } ; } ; } ; if _aebg { _ac . Log . Info ( "\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073" , _cbbe . String ( ) ) ;
} ; return _cbbe ; } ; type subpath struct { _gdgd [ ] _dca . Point ; _bbbb bool ; } ; type compositeCell struct { _fg . PdfRectangle ; paraList ; } ; var _adfb = map [ rulingKind ] string { _ccfb : "\u006e\u006f\u006e\u0065" , _faccd : "\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c" , _cbab : "\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c" } ;
2023-10-07 13:58:01 +00:00
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// Tables returns the tables extracted from the page.
func ( _fac PageText ) Tables ( ) [ ] TextTable { if _eadb { _ac . Log . Info ( "\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064" , len ( _fac . _agdc ) ) ; } ; return _fac . _agdc ; } ; func ( _cbgc * wordBag ) removeWord ( _affa * textWord , _agdcf int ) { _adgd := _cbgc . _aac [ _agdcf ] ;
_adgd = _gced ( _adgd , _affa ) ; if len ( _adgd ) == 0 { delete ( _cbgc . _aac , _agdcf ) ; } else { _cbgc . _aac [ _agdcf ] = _adgd ; } ; } ; func ( _bfac * structElement ) parseStructElement ( _affag _dce . PdfObject ) { _affbc , _dgfa := _dce . GetDict ( _affag ) ; if ! _dgfa { _ac . Log . Debug ( "\u0070\u0061\u0072\u0073\u0065\u0053\u0074\u0072u\u0063\u0074\u0045le\u006d\u0065\u006e\u0074\u003a\u0020d\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006f\u0062\u006a\u0065\u0063t\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075n\u0064\u002e" ) ;
return ; } ; _ccab := _affbc . Get ( "\u0053" ) ; _aeab := _affbc . Get ( "\u0050\u0067" ) ; _bafe := "" ; if _ccab != nil { _bafe = _ccab . String ( ) ; } ; _ggdg := _affbc . Get ( "\u004b" ) ; _bfac . _affb = _bafe ; _bfac . _ebad = _aeab ; switch _edecb := _ggdg . ( type ) { case * _dce . PdfObjectInteger : _bfac . _affb = _bafe ;
_bfac . _ecaf = int64 ( * _edecb ) ; _bfac . _ebad = _aeab ; case * _dce . PdfObjectReference : _edfb := * _dce . MakeArray ( _edecb ) ; var _cgeb int64 = - 1 ; _bfac . _ecaf = _cgeb ; if _edfb . Len ( ) == 1 { _gggf := _edfb . Elements ( ) [ 0 ] ; _fff , _fbedf := _gggf . ( * _dce . PdfObjectInteger ) ;
if _fbedf { _cgeb = int64 ( * _fff ) ; _bfac . _ecaf = _cgeb ; _bfac . _affb = _bafe ; _bfac . _ebad = _aeab ; return ; } ; } ; _gcgcf := [ ] structElement { } ; for _ , _egde := range _edfb . Elements ( ) { _gdcd , _cbacf := _egde . ( * _dce . PdfObjectInteger ) ; if _cbacf { _cgeb = int64 ( * _gdcd ) ;
_bfac . _ecaf = _cgeb ; _bfac . _affb = _bafe ; } else { _fec := & structElement { } ; _fec . parseStructElement ( _egde ) ; _gcgcf = append ( _gcgcf , * _fec ) ; } ; _cgeb = - 1 ; } ; _bfac . _dfcd = _gcgcf ; case * _dce . PdfObjectArray : _bbad := _ggdg . ( * _dce . PdfObjectArray ) ; var _fddc int64 = - 1 ;
_bfac . _ecaf = _fddc ; if _bbad . Len ( ) == 1 { _ebfbe := _bbad . Elements ( ) [ 0 ] ; _gcca , _egbe := _ebfbe . ( * _dce . PdfObjectInteger ) ; if _egbe { _fddc = int64 ( * _gcca ) ; _bfac . _ecaf = _fddc ; _bfac . _affb = _bafe ; _bfac . _ebad = _aeab ; return ; } ; } ; _adae := [ ] structElement { } ;
for _ , _dfdd := range _bbad . Elements ( ) { _fcge , _faf := _dfdd . ( * _dce . PdfObjectInteger ) ; if _faf { _fddc = int64 ( * _fcge ) ; _bfac . _ecaf = _fddc ; _bfac . _affb = _bafe ; _bfac . _ebad = _aeab ; } else { _gbee := & structElement { } ; _gbee . parseStructElement ( _dfdd ) ; _adae = append ( _adae , * _gbee ) ;
} ; _fddc = - 1 ; } ; _bfac . _dfcd = _adae ; } ; } ; func ( _faae * textWord ) bbox ( ) _fg . PdfRectangle { return _faae . PdfRectangle } ; func ( _cbaf * stateStack ) push ( _bbbd * textState ) { _fagb := * _bbbd ; * _cbaf = append ( * _cbaf , & _fagb ) } ;
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// TableCell is a cell in a TextTable.
type TableCell struct { _fg . PdfRectangle ;
// Text is the extracted text.
Text string ;
// Marks returns the TextMarks corresponding to the text in Text.
Marks TextMarkArray ; } ; func _fede ( _cdaee structElement ) [ ] structElement { _fbgdc := [ ] structElement { } ; for _ , _aadd := range _cdaee . _dfcd { for _ , _gfee := range _aadd . _dfcd { for _ , _aeda := range _gfee . _dfcd { if _aeda . _affb == "\u004c" { _fbgdc = append ( _fbgdc , _aeda ) ;
} ; } ; } ; } ; return _fbgdc ; } ; func _egfaa ( _bbfb * textWord , _egee float64 , _cge , _fgcg rulingList ) * wordBag { _gafb := _ebaf ( _bbfb . _adgge ) ; _defg := [ ] * textWord { _bbfb } ; _bgab := wordBag { _aac : map [ int ] [ ] * textWord { _gafb : _defg } , PdfRectangle : _bbfb . PdfRectangle , _fab : _bbfb . _adecc , _ggfe : _egee , _egga : _cge , _bebd : _fgcg } ;
return & _bgab ; } ; func ( _gbcgb rulingList ) vertsHorzs ( ) ( rulingList , rulingList ) { var _gcefb , _ccbfd rulingList ; for _ , _bcbd := range _gbcgb { switch _bcbd . _gffa { case _cbab : _gcefb = append ( _gcefb , _bcbd ) ; case _faccd : _ccbfd = append ( _ccbfd , _bcbd ) ; } ;
} ; return _gcefb , _ccbfd ; } ; func _bdac ( _bdeb _fg . PdfRectangle ) * ruling { return & ruling { _gffa : _faccd , _cbag : _bdeb . Lly , _efgeb : _bdeb . Llx , _bbge : _bdeb . Urx } ; } ; func _bada ( _edea , _gdge bounded ) float64 { _cfgg := _gedbc ( _edea , _gdge ) ; if ! _ebfaf ( _cfgg ) { return _cfgg ;
} ; return _bfc ( _edea , _gdge ) ; } ; func _aeea ( _bfcbe [ ] TextMark , _egdb * int ) [ ] TextMark { _aedd := _bfcbe [ len ( _bfcbe ) - 1 ] ; _aafc := [ ] rune ( _aedd . Text ) ; if len ( _aafc ) == 1 { _bfcbe = _bfcbe [ : len ( _bfcbe ) - 1 ] ; _bcfc := _bfcbe [ len ( _bfcbe ) - 1 ] ; * _egdb = _bcfc . Offset + len ( _bcfc . Text ) ;
} else { _aagfc := _badfg ( _aedd . Text ) ; * _egdb += len ( _aagfc ) - len ( _aedd . Text ) ; _aedd . Text = _aagfc ; } ; return _bfcbe ; } ; func ( _agc * textObject ) moveLP ( _bdec , _caf float64 ) { _agc . _abcf . Concat ( _dca . NewMatrix ( 1 , 0 , 0 , 1 , _bdec , _caf ) ) ; _agc . _acbc = _agc . _abcf ;
} ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Options extractor options.
type Options struct {
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// DisableDocumentTags specifies whether to use the document tags during list extraction.
DisableDocumentTags bool ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// ApplyCropBox will extract page text based on page cropbox if set to `true`.
ApplyCropBox bool ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// UseSimplerExtractionProcess will skip topological text ordering and table processing.
//
// NOTE: While normally the extra processing is beneficial, it can also lead to problems when it does not work.
// Thus it is a flag to allow the user to control this process.
//
// Skipping some extraction processes would also lead to the reduced processing time.
2023-11-11 11:29:03 +00:00
UseSimplerExtractionProcess bool ; } ; func ( _bddd * shapesState ) addPoint ( _faca , _gbgd float64 ) { _fadc := _bddd . establishSubpath ( ) ; _gbdg := _bddd . devicePoint ( _faca , _gbgd ) ; if _fadc == nil { _bddd . _egfd = true ; _bddd . _eaeeb = _gbdg ; } else { _fadc . add ( _gbdg ) ;
} ; } ; func ( _dfac paraList ) sortReadingOrder ( ) { _ac . Log . Trace ( "\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _dfac ) ) ;
if len ( _dfac ) <= 1 { return ; } ; _dfac . computeEBBoxes ( ) ; _ab . Slice ( _dfac , func ( _efgb , _afgb int ) bool { return _fbba ( _dfac [ _efgb ] , _dfac [ _afgb ] ) <= 0 } ) ; } ; func _begea ( _abfd , _egbg float64 ) bool { return _dc . Abs ( _abfd - _egbg ) <= _cebe } ; func ( _afcbe * textTable ) get ( _cgffg , _cdebg int ) * textPara { return _afcbe . _cfgbb [ _bafcd ( _cgffg , _cdebg ) ] ;
} ; func ( _cabg * textLine ) bbox ( ) _fg . PdfRectangle { return _cabg . PdfRectangle } ; const ( RenderModeStroke RenderMode = 1 << iota ; RenderModeFill ; RenderModeClip ; ) ; func ( _gbegf rulingList ) primMinMax ( ) ( float64 , float64 ) { _bbec , _beafa := _gbegf [ 0 ] . _cbag , _gbegf [ 0 ] . _cbag ;
for _ , _eaag := range _gbegf [ 1 : ] { if _eaag . _cbag < _bbec { _bbec = _eaag . _cbag ; } else if _eaag . _cbag > _beafa { _beafa = _eaag . _cbag ; } ; } ; return _bbec , _beafa ; } ; func ( _bfdee compositeCell ) String ( ) string { _gcec := "" ; if len ( _bfdee . paraList ) > 0 { _gcec = _dbdbb ( _bfdee . paraList . merge ( ) . text ( ) , 50 ) ;
} ; return _gde . Sprintf ( "\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071" , _bfdee . PdfRectangle , len ( _bfdee . paraList ) , _gcec ) ; } ; func _faefe ( _ccbd , _bbfff _e . Image ) _e . Image { _gcbef , _facab := _bbfff . Bounds ( ) . Size ( ) , _ccbd . Bounds ( ) . Size ( ) ;
_gdgdd , _eebbf := _gcbef . X , _gcbef . Y ; if _facab . X > _gdgdd { _gdgdd = _facab . X ; } ; if _facab . Y > _eebbf { _eebbf = _facab . Y ; } ; _feadb := _e . Rect ( 0 , 0 , _gdgdd , _eebbf ) ; if _gcbef . X != _gdgdd || _gcbef . Y != _eebbf { _abbg := _e . NewRGBA ( _feadb ) ; _ef . BiLinear . Scale ( _abbg , _feadb , _ccbd , _bbfff . Bounds ( ) , _ef . Over , nil ) ;
_bbfff = _abbg ; } ; if _facab . X != _gdgdd || _facab . Y != _eebbf { _ecfc := _e . NewRGBA ( _feadb ) ; _ef . BiLinear . Scale ( _ecfc , _feadb , _ccbd , _ccbd . Bounds ( ) , _ef . Over , nil ) ; _ccbd = _ecfc ; } ; _dceab := _e . NewRGBA ( _feadb ) ; _ef . DrawMask ( _dceab , _feadb , _ccbd , _e . Point { } , _bbfff , _e . Point { } , _ef . Over ) ;
return _dceab ; } ; func ( _eedce gridTiling ) complete ( ) bool { for _ , _bfaaf := range _eedce . _faeca { for _ , _fbfc := range _bfaaf { if ! _fbfc . complete ( ) { return false ; } ; } ; } ; return true ; } ; func ( _ceff paraList ) findTables ( _cdgd [ ] gridTiling ) [ ] * textTable { _ceff . addNeighbours ( ) ;
_ab . Slice ( _ceff , func ( _ceeea , _bfbd int ) bool { return _bada ( _ceff [ _ceeea ] , _ceff [ _bfbd ] ) < 0 } ) ; var _ffdac [ ] * textTable ; if _dcea { _ebdg := _ceff . findGridTables ( _cdgd ) ; _ffdac = append ( _ffdac , _ebdg ... ) ; } ; if _aagdg { _faeb := _ceff . findTextTables ( ) ;
_ffdac = append ( _ffdac , _faeb ... ) ; } ; return _ffdac ; } ; func _gedbc ( _gbbb , _feb bounded ) float64 { return _gbbb . bbox ( ) . Llx - _feb . bbox ( ) . Llx } ; func _efcda ( _edfdb map [ float64 ] map [ float64 ] gridTile ) [ ] float64 { _deggef := make ( [ ] float64 , 0 , len ( _edfdb ) ) ;
_abgbc := make ( map [ float64 ] struct { } , len ( _edfdb ) ) ; for _ , _bgcd := range _edfdb { for _deca := range _bgcd { if _ , _gbgbg := _abgbc [ _deca ] ; _gbgbg { continue ; } ; _deggef = append ( _deggef , _deca ) ; _abgbc [ _deca ] = struct { } { } ; } ; } ; _ab . Float64s ( _deggef ) ; return _deggef ;
} ; func ( _cfaa * textTable ) compositeColCorridors ( ) map [ int ] [ ] float64 { _dfcg := make ( map [ int ] [ ] float64 , _cfaa . _acddc ) ; if _eadb { _ac . Log . Info ( "\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020" , _cfaa . _acddc ) ;
} ; for _cbae := 0 ; _cbae < _cfaa . _acddc ; _cbae ++ { _dfcg [ _cbae ] = nil ; } ; return _dfcg ; } ; func ( _eddf intSet ) add ( _agebb int ) { _eddf [ _agebb ] = struct { } { } } ; func ( _aefdd rulingList ) toGrids ( ) [ ] rulingList { if _aebg { _ac . Log . Info ( "t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073" , _aefdd ) ;
} ; _efag := _aefdd . intersections ( ) ; if _aebg { _ac . Log . Info ( "\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020" , len ( _aefdd ) , len ( _efag ) ) ;
for _ , _gefa := range _efgdb ( _efag ) { _gde . Printf ( "\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n" , _gefa , _efag [ _gefa ] ) ; } ; } ; _afdag := make ( map [ int ] intSet , len ( _aefdd ) ) ; for _cedfc := range _aefdd { _cddag := _aefdd . connections ( _efag , _cedfc ) ; if len ( _cddag ) > 0 { _afdag [ _cedfc ] = _cddag ;
} ; } ; if _aebg { _ac . Log . Info ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064" , len ( _afdag ) ) ; for _ , _bccg := range _efgdb ( _afdag ) { _gde . Printf ( "\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n" , _bccg , _afdag [ _bccg ] ) ;
} ; } ; _dcgf := _afbef ( len ( _aefdd ) , func ( _ffaf , _gfbf int ) bool { _bfcfg , _eefb := len ( _afdag [ _ffaf ] ) , len ( _afdag [ _gfbf ] ) ; if _bfcfg != _eefb { return _bfcfg > _eefb ; } ; return _aefdd . comp ( _ffaf , _gfbf ) ; } ) ; if _aebg { _ac . Log . Info ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076" , _dcgf ) ;
} ; _adec := [ ] [ ] int { { _dcgf [ 0 ] } } ; _dgeg : for _ , _cbcbc := range _dcgf [ 1 : ] { for _ecda , _ddcae := range _adec { for _ , _dcde := range _ddcae { if _afdag [ _dcde ] . has ( _cbcbc ) { _adec [ _ecda ] = append ( _ddcae , _cbcbc ) ; continue _dgeg ; } ; } ; } ; _adec = append ( _adec , [ ] int { _cbcbc } ) ;
} ; if _aebg { _ac . Log . Info ( "\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076" , _adec ) ; } ; _ab . SliceStable ( _adec , func ( _fcfa , _cfbf int ) bool { return len ( _adec [ _fcfa ] ) > len ( _adec [ _cfbf ] ) } ) ; for _ , _ccge := range _adec { _ab . Slice ( _ccge , func ( _bcab , _eafe int ) bool { return _aefdd . comp ( _ccge [ _bcab ] , _ccge [ _eafe ] ) } ) ;
} ; _aeede := make ( [ ] rulingList , len ( _adec ) ) ; for _ceec , _bedc := range _adec { _ddddb := make ( rulingList , len ( _bedc ) ) ; for _cbef , _fgcdf := range _bedc { _ddddb [ _cbef ] = _aefdd [ _fgcdf ] ; } ; _aeede [ _ceec ] = _ddddb ; } ; if _aebg { _ac . Log . Info ( "\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076" , _aeede ) ;
} ; var _ddfgc [ ] rulingList ; for _ , _acfg := range _aeede { if _gefcg , _cbbeg := _acfg . isActualGrid ( ) ; _cbbeg { _acfg = _gefcg ; _acfg = _acfg . snapToGroups ( ) ; _ddfgc = append ( _ddfgc , _acfg ) ; } ; } ; if _aebg { _agcd ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073" , _ddfgc ) ;
_ac . Log . Info ( "\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064" , len ( _aeede ) , len ( _ddfgc ) ) ; } ; return _ddfgc ; } ; func ( _gegg * textObject ) getStrokeColor ( ) _be . Color { return _becd ( _gegg . _dbf . ColorspaceStroking , _gegg . _dbf . ColorStroking ) ;
} ; func _egbac ( _debd * textLine , _aadg [ ] * textLine , _fdab [ ] float64 ) float64 { var _cgdf float64 = - 1 ; for _ , _gadd := range _aadg { if _gadd . _bfcg > _debd . _bfcg { if _dc . Round ( _gadd . Llx ) >= _dc . Round ( _debd . Llx ) { _cgdf = _gadd . _bfcg ; } else { break ; } ; } ;
} ; return _cgdf ; } ; func ( _ddab * shapesState ) newSubPath ( ) { _ddab . clearPath ( ) ; if _bdefa { _ac . Log . Info ( "\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073" , _ddab ) ; } ; } ;
2023-09-07 17:40:17 +00:00
2023-11-11 11:29:03 +00:00
// String returns a description of `t`.
func ( _beca * textTable ) String ( ) string { return _gde . Sprintf ( "\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074" , _beca . _acddc , _beca . _gebeeb , _beca . _aefef ) ; } ; func _ebcgb ( _dfgfa [ ] * textWord , _bgba int ) [ ] * textWord { _bagbe := len ( _dfgfa ) ;
copy ( _dfgfa [ _bgba : ] , _dfgfa [ _bgba + 1 : ] ) ; return _dfgfa [ : _bagbe - 1 ] ; } ; func ( _adeeg intSet ) del ( _ddbc int ) { delete ( _adeeg , _ddbc ) } ; func ( _effg paraList ) list ( ) [ ] * list { var _dddbf [ ] * textLine ; var _gdcdg [ ] * textLine ; for _ , _bceac := range _effg { _gbfe := _bceac . getListLines ( ) ;
_dddbf = append ( _dddbf , _gbfe ... ) ; _gdcdg = append ( _gdcdg , _bceac . _gfaae ... ) ; } ; _dcdbc := _ebeg ( _dddbf ) ; _dde := _ebage ( _gdcdg , _dcdbc ) ; return _dde ; } ; func ( _gdggd * ruling ) encloses ( _dafaf , _afgc float64 ) bool { return _gdggd . _efgeb - _cebe <= _dafaf && _afgc <= _gdggd . _bbge + _cebe ;
2023-09-07 17:40:17 +00:00
} ;
2023-11-11 11:29:03 +00:00
// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents ( contents string , resources * _fg . PdfPageResources ) ( * Extractor , error ) { const _ea = "\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s" ; _cea := & Extractor { _geb : contents , _af : resources , _bc : map [ string ] fontEntry { } , _bca : map [ string ] textResult { } } ;
_ec . TrackUse ( _ea ) ; return _cea , nil ; } ; func ( _ccgc * textLine ) toTextMarks ( _bffe * int ) [ ] TextMark { var _cbcb [ ] TextMark ; for _ , _fca := range _ccgc . _fgbe { if _fca . _eadcb { _cbcb = _ccag ( _cbcb , _bffe , "\u0020" ) ; } ; _edadg := _fca . toTextMarks ( _bffe ) ; _cbcb = append ( _cbcb , _edadg ... ) ;
} ; return _cbcb ; } ;
2023-08-03 17:30:04 +00:00
2023-10-07 13:58:01 +00:00
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
2023-09-07 17:40:17 +00:00
//
2023-10-07 13:58:01 +00:00
// Replace with a function like Extract() (*PageText, error)
2023-11-11 11:29:03 +00:00
func ( _ccc * Extractor ) ExtractPageText ( ) ( * PageText , int , int , error ) { _fdec , _bafd , _cgab , _bcgf := _ccc . extractPageText ( _ccc . _geb , _ccc . _af , _dca . IdentityMatrix ( ) , 0 ) ; if _bcgf != nil && _bcgf != _fg . ErrColorOutOfRange { return nil , 0 , 0 , _bcgf ; } ; if _ccc . _cc != nil { _fdec . _dbc . _dcc = _ccc . _cc . UseSimplerExtractionProcess ;
} ; _fdec . computeViews ( ) ; _bcgf = _ggde ( _fdec ) ; if _bcgf != nil { return nil , 0 , 0 , _bcgf ; } ; if _ccc . _cc != nil { if _ccc . _cc . ApplyCropBox && _ccc . _fb != nil { _fdec . ApplyArea ( * _ccc . _fb ) ; } ; _fdec . _dbc . _dbcd = _ccc . _cc . DisableDocumentTags ; } ; return _fdec , _bafd , _cgab , nil ;
} ; func ( _egcc * wordBag ) makeRemovals ( ) map [ int ] map [ * textWord ] struct { } { _eacc := make ( map [ int ] map [ * textWord ] struct { } , len ( _egcc . _aac ) ) ; for _acf := range _egcc . _aac { _eacc [ _acf ] = make ( map [ * textWord ] struct { } ) ; } ; return _eacc ; } ; func ( _gefca gridTile ) numBorders ( ) int { _faag := 0 ;
if _gefca . _cbge { _faag ++ ; } ; if _gefca . _dafe { _faag ++ ; } ; if _gefca . _beacf { _faag ++ ; } ; if _gefca . _cbbgc { _faag ++ ; } ; return _faag ; } ; func ( _efgbe * textTable ) emptyCompositeColumn ( _acca int ) bool { for _aecg := 0 ; _aecg < _efgbe . _gebeeb ; _aecg ++ { if _fbfe , _afbf := _efgbe . _edbe [ _bafcd ( _acca , _aecg ) ] ;
_afbf { if len ( _fbfe . paraList ) > 0 { return false ; } ; } ; } ; return true ; } ; func ( _ffbc rulingList ) toTilings ( ) ( rulingList , [ ] gridTiling ) { _ffbc . log ( "\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s" ) ; if len ( _ffbc ) == 0 { return nil , nil ; } ; _ffbc = _ffbc . tidied ( "\u0061\u006c\u006c" ) ;
_ffbc . log ( "\u0074\u0069\u0064\u0069\u0065\u0064" ) ; _acbe := _ffbc . toGrids ( ) ; _bfadf := make ( [ ] gridTiling , len ( _acbe ) ) ; for _fbbe , _gbfcc := range _acbe { _bfadf [ _fbbe ] = _gbfcc . asTiling ( ) ; } ; return _ffbc , _bfadf ; } ; type structElement struct { _affb string ;
_dfcd [ ] structElement ; _ecaf int64 ; _ebad _dce . PdfObject ; } ; func ( _accbe * textTable ) getDown ( ) paraList { _dbeg := make ( paraList , _accbe . _acddc ) ; for _ddbdcc := 0 ; _ddbdcc < _accbe . _acddc ; _ddbdcc ++ { _eefc := _accbe . get ( _ddbdcc , _accbe . _gebeeb - 1 ) . _fgbea ;
if _eefc . taken ( ) { return nil ; } ; _dbeg [ _ddbdcc ] = _eefc ; } ; for _dgef := 0 ; _dgef < _accbe . _acddc - 1 ; _dgef ++ { if _dbeg [ _dgef ] . _abfec != _dbeg [ _dgef + 1 ] { return nil ; } ; } ; return _dbeg ; } ; func ( _bbbgc * wordBag ) applyRemovals ( _acac map [ int ] map [ * textWord ] struct { } ) { for _cccd , _gfge := range _acac { if len ( _gfge ) == 0 { continue ;
} ; _dadea := _bbbgc . _aac [ _cccd ] ; _cbafg := len ( _dadea ) - len ( _gfge ) ; if _cbafg == 0 { delete ( _bbbgc . _aac , _cccd ) ; continue ; } ; _gabd := make ( [ ] * textWord , _cbafg ) ; _bffc := 0 ; for _ , _gda := range _dadea { if _ , _eefd := _gfge [ _gda ] ; ! _eefd { _gabd [ _bffc ] = _gda ;
_bffc ++ ; } ; } ; _bbbgc . _aac [ _cccd ] = _gabd ; } ; } ; func ( _ggege intSet ) has ( _ccgdb int ) bool { _ , _cfcac := _ggege [ _ccgdb ] ; return _cfcac } ; func ( _edgef paraList ) findTextTables ( ) [ ] * textTable { var _eedf [ ] * textTable ; for _ , _fgcb := range _edgef { if _fgcb . taken ( ) || _fgcb . Width ( ) == 0 { continue ;
} ; _aedagf := _fgcb . isAtom ( ) ; if _aedagf == nil { continue ; } ; _aedagf . growTable ( ) ; if _aedagf . _acddc * _aedagf . _gebeeb < _ffa { continue ; } ; _aedagf . markCells ( ) ; _aedagf . log ( "\u0067\u0072\u006fw\u006e" ) ; _eedf = append ( _eedf , _aedagf ) ; } ; return _eedf ; } ;
func ( _aeac rulingList ) intersections ( ) map [ int ] intSet { var _afbeb , _eeeae [ ] int ; for _caee , _adee := range _aeac { switch _adee . _gffa { case _cbab : _afbeb = append ( _afbeb , _caee ) ; case _faccd : _eeeae = append ( _eeeae , _caee ) ; } ; } ; if len ( _afbeb ) < _fbff + 1 || len ( _eeeae ) < _gaab + 1 { return nil ;
} ; if len ( _afbeb ) + len ( _eeeae ) > _bcccg { _ac . Log . Debug ( "\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064" , len ( _aeac ) , len ( _afbeb ) , len ( _eeeae ) ) ;
return nil ; } ; _fegf := make ( map [ int ] intSet , len ( _afbeb ) + len ( _eeeae ) ) ; for _ , _ebae := range _afbeb { for _ , _bfdc := range _eeeae { if _aeac [ _ebae ] . intersects ( _aeac [ _bfdc ] ) { if _ , _ceda := _fegf [ _ebae ] ; ! _ceda { _fegf [ _ebae ] = make ( intSet ) ; } ; if _ , _adef := _fegf [ _bfdc ] ;
! _adef { _fegf [ _bfdc ] = make ( intSet ) ; } ; _fegf [ _ebae ] . add ( _bfdc ) ; _fegf [ _bfdc ] . add ( _ebae ) ; } ; } ; } ; return _fegf ; } ; func ( _edab * textObject ) getFontDict ( _edac string ) ( _cabb _dce . PdfObject , _bbd error ) { _dffg := _edab . _ccae ; if _dffg == nil { _ac . Log . Debug ( "g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071" , _edac ) ;
return nil , nil ; } ; _cabb , _bdd := _dffg . GetFontByName ( _dce . PdfObjectName ( _edac ) ) ; if ! _bdd { _ac . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071" , _edac ) ;
return nil , _a . New ( "f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073" ) ; } ; return _cabb , nil ; } ; func ( _dcf * PageFonts ) extractPageResourcesToFont ( _cf * _fg . PdfPageResources ) error { _ced , _afa := _dce . GetDict ( _cf . Font ) ;
if ! _afa { return _a . New ( _bf ) ; } ; for _ , _bd := range _ced . Keys ( ) { var ( _cg = true ; _afe [ ] byte ; _dcag string ; ) ; _da , _age := _cf . GetFontByName ( _bd ) ; if ! _age { return _a . New ( _dcd ) ; } ; _dfb , _bde := _fg . NewPdfFontFromPdfObject ( _da ) ; if _bde != nil { return _bde ;
} ; _ffe := _dfb . FontDescriptor ( ) ; _ed := _dfb . FontDescriptor ( ) . FontName . String ( ) ; _cedc := _dfb . Subtype ( ) ; if _bb ( _dcf . Fonts , _ed ) { continue ; } ; if len ( _dfb . ToUnicode ( ) ) == 0 { _cg = false ; } ; if _ffe . FontFile != nil { if _def , _ebb := _dce . GetStream ( _ffe . FontFile ) ;
_ebb { _afe , _bde = _dce . DecodeStream ( _def ) ; if _bde != nil { return _bde ; } ; _dcag = _ed + "\u002e\u0070\u0066\u0062" ; } ; } else if _ffe . FontFile2 != nil { if _cd , _ae := _dce . GetStream ( _ffe . FontFile2 ) ; _ae { _afe , _bde = _dce . DecodeStream ( _cd ) ; if _bde != nil { return _bde ;
} ; _dcag = _ed + "\u002e\u0074\u0074\u0066" ; } ; } else if _ffe . FontFile3 != nil { if _ba , _efd := _dce . GetStream ( _ffe . FontFile3 ) ; _efd { _afe , _bde = _dce . DecodeStream ( _ba ) ; if _bde != nil { return _bde ; } ; _dcag = _ed + "\u002e\u0063\u0066\u0066" ; } ; } ; if len ( _dcag ) < 1 { _ac . Log . Debug ( _adc ) ;
} ; _eg := Font { FontName : _ed , PdfFont : _dfb , IsCID : _dfb . IsCID ( ) , IsSimple : _dfb . IsSimple ( ) , ToUnicode : _cg , FontType : _cedc , FontData : _afe , FontFileName : _dcag , FontDescriptor : _ffe } ; _dcf . Fonts = append ( _dcf . Fonts , _eg ) ; } ; return nil ; } ; func _bacd ( _eeeag * wordBag , _dfgbe * textWord , _gdba float64 ) bool { return _eeeag . Urx <= _dfgbe . Llx && _dfgbe . Llx < _eeeag . Urx + _gdba ;
} ; func ( _caad * wordBag ) minDepth ( ) float64 { return _caad . _ggfe - ( _caad . Ury - _caad . _fab ) } ; func ( _ffdba gridTiling ) log ( _gafac string ) { if ! _gbde { return ; } ; _ac . Log . Info ( "\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071" , len ( _ffdba . _abeb ) , len ( _ffdba . _cegbg ) , _gafac ) ;
_gde . Printf ( "\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a" , _ffdba . _abeb ) ; _gde . Printf ( "\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a" , _ffdba . _cegbg ) ; for _bdagd , _geeb := range _ffdba . _cegbg { _gddf , _ccbc := _ffdba . _faeca [ _geeb ] ;
if ! _ccbc { continue ; } ; _gde . Printf ( "%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , _bdagd , _geeb ) ; for _dbafc , _cgceb := range _ffdba . _abeb { _bdefaf , _bdada := _gddf [ _cgceb ] ; if ! _bdada { continue ; } ; _gde . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _dbafc , _bdefaf . String ( ) ) ;
} ; } ; } ;
2023-08-03 17:30:04 +00:00
2023-11-11 11:29:03 +00:00
// String returns a string describing `pt`.
func ( _aaag PageText ) String ( ) string { _acdb := _gde . Sprintf ( "P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073" , len ( _aaag . _fbga ) ) ; _dcff := [ ] string { "\u002d" + _acdb } ; for _ , _ebfg := range _aaag . _fbga { _dcff = append ( _dcff , _ebfg . String ( ) ) ;
} ; _dcff = append ( _dcff , "\u002b" + _acdb ) ; return _df . Join ( _dcff , "\u000a" ) ; } ;
2023-05-29 17:26:33 +00:00
2023-11-11 11:29:03 +00:00
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct { _gcc [ ] TextMark } ; func ( _dbge * TextMarkArray ) getTextMarkAtOffset ( _gdcb int ) * TextMark { for _ , _fea := range _dbge . _gcc { if _fea . Offset == _gdcb { return & _fea ; } ; } ; return nil ; } ; func ( _gfa * textObject ) moveTextSetLeading ( _cbf , _eaea float64 ) { _gfa . _abcb . _gcg = - _eaea ;
_gfa . moveLP ( _cbf , _eaea ) ; } ;
2023-02-07 17:17:49 +00:00
2023-11-11 11:29:03 +00:00
// Extractor stores and offers functionality for extracting content from PDF pages.
type Extractor struct { _geb string ; _af * _fg . PdfPageResources ; _aa _fg . PdfRectangle ; _fb * _fg . PdfRectangle ; _bc map [ string ] fontEntry ; _bca map [ string ] textResult ; _ag int64 ; _ca int ; _cc * Options ; _agb * _dce . PdfObject ; _ce _dce . PdfObject ; } ; func _eded ( _dae * Extractor , _aaaf * _fg . PdfPageResources , _aefe _dcg . GraphicsState , _defa * textState , _cad * stateStack ) * textObject { return & textObject { _ccb : _dae , _ccae : _aaaf , _dbf : _aefe , _ada : _cad , _abcb : _defa , _acbc : _dca . IdentityMatrix ( ) , _abcf : _dca . IdentityMatrix ( ) } ;
} ; func ( _gfbb paraList ) tables ( ) [ ] TextTable { var _fbedc [ ] TextTable ; if _eadb { _ac . Log . Info ( "\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a" ) ; } ; for _ , _eecfe := range _gfbb { _bcfg := _eecfe . _edce ; if _bcfg != nil && _bcfg . isExportable ( ) { _fbedc = append ( _fbedc , _bcfg . toTextTable ( ) ) ;
} ; } ; return _fbedc ; } ; var ( _bdce = map [ rune ] string { 0x0060 : "\u0300" , 0x02CB : "\u0300" , 0x0027 : "\u0301" , 0x00B4 : "\u0301" , 0x02B9 : "\u0301" , 0x02CA : "\u0301" , 0x005E : "\u0302" , 0x02C6 : "\u0302" , 0x007E : "\u0303" , 0x02DC : "\u0303" , 0x00AF : "\u0304" , 0x02C9 : "\u0304" , 0x02D8 : "\u0306" , 0x02D9 : "\u0307" , 0x00A8 : "\u0308" , 0x00B0 : "\u030a" , 0x02DA : "\u030a" , 0x02BA : "\u030b" , 0x02DD : "\u030b" , 0x02C7 : "\u030c" , 0x02C8 : "\u030d" , 0x0022 : "\u030e" , 0x02BB : "\u0312" , 0x02BC : "\u0313" , 0x0486 : "\u0313" , 0x055A : "\u0313" , 0x02BD : "\u0314" , 0x0485 : "\u0314" , 0x0559 : "\u0314" , 0x02D4 : "\u031d" , 0x02D5 : "\u031e" , 0x02D6 : "\u031f" , 0x02D7 : "\u0320" , 0x02B2 : "\u0321" , 0x00B8 : "\u0327" , 0x02CC : "\u0329" , 0x02B7 : "\u032b" , 0x02CD : "\u0331" , 0x005F : "\u0332" , 0x204E : "\u0359" } ;
) ;
2022-06-27 19:58:38 +00:00
2023-10-07 13:58:01 +00:00
// Elements returns the TextMarks in `ma`.
2023-11-11 11:29:03 +00:00
func ( _cdf * TextMarkArray ) Elements ( ) [ ] TextMark { return _cdf . _gcc } ; func ( _cbd * textObject ) setTextRenderMode ( _cdce int ) { if _cbd == nil { return ; } ; _cbd . _abcb . _bgcc = RenderMode ( _cdce ) ; } ; func ( _ceabf * textPara ) toTextMarks ( _abge * int ) [ ] TextMark { if _ceabf . _edce == nil { return _ceabf . toCellTextMarks ( _abge ) ;
} ; var _gggfa [ ] TextMark ; for _bfdge := 0 ; _bfdge < _ceabf . _edce . _gebeeb ; _bfdge ++ { for _dagc := 0 ; _dagc < _ceabf . _edce . _acddc ; _dagc ++ { _baed := _ceabf . _edce . get ( _dagc , _bfdge ) ; if _baed == nil { _gggfa = _ccag ( _gggfa , _abge , "\u0009" ) ; } else { _fgec := _baed . toCellTextMarks ( _abge ) ;
_gggfa = append ( _gggfa , _fgec ... ) ; } ; _gggfa = _ccag ( _gggfa , _abge , "\u0020" ) ; } ; if _bfdge < _ceabf . _edce . _gebeeb - 1 { _gggfa = _ccag ( _gggfa , _abge , "\u000a" ) ; } ; } ; _ccbaf := _ceabf . _edce ; if _ccbaf . isExportable ( ) { _eeae := _ccbaf . toTextTable ( ) ; _gggfa = _daae ( _gggfa , & _eeae ) ;
} ; return _gggfa ; } ; func _cagg ( _ffcg string ) bool { for _ , _afdcf := range _ffcg { if ! _fc . IsSpace ( _afdcf ) { return false ; } ; } ; return true ; } ; type bounded interface { bbox ( ) _fg . PdfRectangle } ; func ( _abgf * textTable ) depth ( ) float64 { _ebfa := 1e10 ; for _baaag := 0 ;
_baaag < _abgf . _acddc ; _baaag ++ { _fggdb := _abgf . get ( _baaag , 0 ) ; if _fggdb == nil || _fggdb . _bfge { continue ; } ; _ebfa = _dc . Min ( _ebfa , _fggdb . depth ( ) ) ; } ; return _ebfa ; } ; func ( _bgca * ruling ) intersects ( _dgafg * ruling ) bool { _gdgb := ( _bgca . _gffa == _cbab && _dgafg . _gffa == _faccd ) || ( _dgafg . _gffa == _cbab && _bgca . _gffa == _faccd ) ;
_bfgac := func ( _ffda , _fagf * ruling ) bool { return _ffda . _efgeb - _cebe <= _fagf . _cbag && _fagf . _cbag <= _ffda . _bbge + _cebe ; } ; _bbeca := _bfgac ( _bgca , _dgafg ) ; _fdbdb := _bfgac ( _dgafg , _bgca ) ; if _aebg { _gde . Printf ( "\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a" + "\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a" + " \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a" , _gdgb , _bbeca , _fdbdb , _gdgb && _bbeca && _fdbdb , _bgca , _dgafg ) ;
} ; return _gdgb && _bbeca && _fdbdb ; } ;
2022-06-27 19:58:38 +00:00
2023-11-11 11:29:03 +00:00
// NewWithOptions an Extractor instance for extracting content from the input PDF page with options.
func NewWithOptions ( page * _fg . PdfPage , options * Options ) ( * Extractor , error ) { const _ad = "\u0065x\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077W\u0069\u0074\u0068\u004f\u0070\u0074\u0069\u006f\u006e\u0073" ; _db , _agd := page . GetAllContentStreams ( ) ;
if _agd != nil { return nil , _agd ; } ; _bef , _ff := page . GetStructTreeRoot ( ) ; if ! _ff { _ac . Log . Info ( "T\u0068\u0065\u0020\u0070\u0064\u0066\u0020\u0064\u006f\u0063\u0075\u006d\u0065\u006e\u0074\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020\u0074\u0061\u0067g\u0065d\u002e\u0020\u0053\u0074r\u0075\u0063t\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e\u0027\u0074\u0020\u0065\u0078\u0069\u0073\u0074\u002e" ) ;
} ; _eb := page . GetContainingPdfObject ( ) ; _dg , _agd := page . GetMediaBox ( ) ; if _agd != nil { return nil , _gde . Errorf ( "\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076" , _agd ) ;
} ; _fe := & Extractor { _geb : _db , _af : page . Resources , _aa : * _dg , _fb : page . CropBox , _bc : map [ string ] fontEntry { } , _bca : map [ string ] textResult { } , _cc : options , _agb : _bef , _ce : _eb } ; if _fe . _aa . Llx > _fe . _aa . Urx { _ac . Log . Info ( "\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e" , _fe . _aa ) ;
_fe . _aa . Llx , _fe . _aa . Urx = _fe . _aa . Urx , _fe . _aa . Llx ; } ; if _fe . _aa . Lly > _fe . _aa . Ury { _ac . Log . Info ( "\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e" , _fe . _aa ) ;
_fe . _aa . Lly , _fe . _aa . Ury = _fe . _aa . Ury , _fe . _aa . Lly ; } ; _ec . TrackUse ( _ad ) ; return _fe , nil ; } ;
2022-06-27 19:58:38 +00:00
2023-11-11 11:29:03 +00:00
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
type ImageMark struct { Image * _fg . Image ;
2022-06-27 19:58:38 +00:00
2023-11-11 11:29:03 +00:00
// Dimensions of the image as displayed in the PDF.
Width float64 ; Height float64 ;
2022-06-27 19:58:38 +00:00
2023-11-11 11:29:03 +00:00
// Position of the image in PDF coordinates (lower left corner).
X float64 ; Y float64 ;
2022-06-27 19:58:38 +00:00
2023-11-11 11:29:03 +00:00
// Angle in degrees, if rotated.
Angle float64 ; } ; func ( _baeba * textWord ) absorb ( _eafce * textWord ) { _baeba . PdfRectangle = _ebge ( _baeba . PdfRectangle , _eafce . PdfRectangle ) ; _baeba . _daafd = append ( _baeba . _daafd , _eafce . _daafd ... ) ; } ; func _dbdbb ( _decc string , _adag int ) string { if len ( _decc ) < _adag { return _decc ;
} ; return _decc [ : _adag ] ; } ; func ( _deed * subpath ) clear ( ) { * _deed = subpath { } } ; func _fcad ( _adeba _fg . PdfRectangle ) * ruling { return & ruling { _gffa : _cbab , _cbag : _adeba . Urx , _efgeb : _adeba . Lly , _bbge : _adeba . Ury } ; } ; var _fffdg = map [ markKind ] string { _efcd : "\u0073\u0074\u0072\u006f\u006b\u0065" , _babea : "\u0066\u0069\u006c\u006c" , _acaca : "\u0061u\u0067\u006d\u0065\u006e\u0074" } ;
func _badfg ( _cdge string ) string { _beac := [ ] rune ( _cdge ) ; return string ( _beac [ : len ( _beac ) - 1 ] ) } ; func ( _ccfgg * textTable ) bbox ( ) _fg . PdfRectangle { return _ccfgg . PdfRectangle } ; func _cacg ( _defeg , _adba _dca . Point ) rulingKind { _gcce := _dc . Abs ( _defeg . X - _adba . X ) ;
_cbbb := _dc . Abs ( _defeg . Y - _adba . Y ) ; return _ageb ( _gcce , _cbbb , _daee ) ; } ; func ( _fcbba * textTable ) compositeRowCorridors ( ) map [ int ] [ ] float64 { _faaa := make ( map [ int ] [ ] float64 , _fcbba . _gebeeb ) ; if _eadb { _ac . Log . Info ( "c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064" , _fcbba . _gebeeb ) ;
} ; for _edecf := 1 ; _edecf < _fcbba . _gebeeb ; _edecf ++ { var _edacd [ ] compositeCell ; for _fddf := 0 ; _fddf < _fcbba . _acddc ; _fddf ++ { if _efccg , _fgaf := _fcbba . _edbe [ _bafcd ( _fddf , _edecf ) ] ; _fgaf { _edacd = append ( _edacd , _efccg ) ; } ; } ; if len ( _edacd ) == 0 { continue ;
} ; _egfed := _gcfc ( _edacd ) ; _faaa [ _edecf ] = _egfed ; if _eadb { _gde . Printf ( "\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a" , _edecf , _egfed ) ; } ; } ; return _faaa ; } ; func ( _dffd * wordBag ) maxDepth ( ) float64 { return _dffd . _ggfe - _dffd . Lly } ;
func ( _baeb paraList ) xNeighbours ( _dcbeg float64 ) map [ * textPara ] [ ] int { _eada := make ( [ ] event , 2 * len ( _baeb ) ) ; if _dcbeg == 0 { for _cfee , _gegc := range _baeb { _eada [ 2 * _cfee ] = event { _gegc . Llx , true , _cfee } ; _eada [ 2 * _cfee + 1 ] = event { _gegc . Urx , false , _cfee } ;
} ; } else { for _aece , _bdgbd := range _baeb { _eada [ 2 * _aece ] = event { _bdgbd . Llx - _dcbeg * _bdgbd . fontsize ( ) , true , _aece } ; _eada [ 2 * _aece + 1 ] = event { _bdgbd . Urx + _dcbeg * _bdgbd . fontsize ( ) , false , _aece } ; } ; } ; return _baeb . eventNeighbours ( _eada ) ; } ; func ( _bbde * textPara ) toCellTextMarks ( _ddfad * int ) [ ] TextMark { var _gbge [ ] TextMark ;
for _faac , _dede := range _bbde . _gfaae { _bcedg := _dede . toTextMarks ( _ddfad ) ; _ffgg := _ddcg && _dede . endsInHyphen ( ) && _faac != len ( _bbde . _gfaae ) - 1 ; if _ffgg { _bcedg = _aeea ( _bcedg , _ddfad ) ; } ; _gbge = append ( _gbge , _bcedg ... ) ; if ! ( _ffgg || _faac == len ( _bbde . _gfaae ) - 1 ) { _gbge = _ccag ( _gbge , _ddfad , _eecc ( _dede . _bfcg , _bbde . _gfaae [ _faac + 1 ] . _bfcg ) ) ;
} ; } ; return _gbge ; } ; func ( _eccgf rulingList ) connections ( _gdffa map [ int ] intSet , _eebe int ) intSet { _bgafe := make ( intSet ) ; _dedee := make ( intSet ) ; var _acda func ( int ) ; _acda = func ( _dbgc int ) { if ! _dedee . has ( _dbgc ) { _dedee . add ( _dbgc ) ; for _eddg := range _eccgf { if _gdffa [ _eddg ] . has ( _dbgc ) { _bgafe . add ( _eddg ) ;
} ; } ; for _agegbc := range _eccgf { if _bgafe . has ( _agegbc ) { _acda ( _agegbc ) ; } ; } ; } ; } ; _acda ( _eebe ) ; return _bgafe ; } ; func ( _acga * wordBag ) empty ( _fbab int ) bool { _ , _efga := _acga . _aac [ _fbab ] ; return ! _efga } ; func ( _dcddd rulingList ) comp ( _ecfaa , _adbf int ) bool { _fabgb , _adab := _dcddd [ _ecfaa ] , _dcddd [ _adbf ] ;
_eeaf , _acee := _fabgb . _gffa , _adab . _gffa ; if _eeaf != _acee { return _eeaf > _acee ; } ; if _eeaf == _ccfb { return false ; } ; _facbf := func ( _gdegc bool ) bool { if _eeaf == _faccd { return _gdegc ; } ; return ! _gdegc ; } ; _fceg , _dabec := _fabgb . _cbag , _adab . _cbag ;
if _fceg != _dabec { return _facbf ( _fceg > _dabec ) ; } ; _fceg , _dabec = _fabgb . _efgeb , _adab . _efgeb ; if _fceg != _dabec { return _facbf ( _fceg < _dabec ) ; } ; return _facbf ( _fabgb . _bbge < _adab . _bbge ) ; } ; func _aded ( _afaf map [ float64 ] map [ float64 ] gridTile ) [ ] float64 { _bfdeea := make ( [ ] float64 , 0 , len ( _afaf ) ) ;
for _cggbc := range _afaf { _bfdeea = append ( _bfdeea , _cggbc ) ; } ; _ab . Float64s ( _bfdeea ) ; _fbfcc := len ( _bfdeea ) ; for _ccac := 0 ; _ccac < _fbfcc / 2 ; _ccac ++ { _bfdeea [ _ccac ] , _bfdeea [ _fbfcc - 1 - _ccac ] = _bfdeea [ _fbfcc - 1 - _ccac ] , _bfdeea [ _ccac ] ; } ; return _bfdeea ;
} ; func _gfgf ( _bccfd [ ] float64 , _cgacf , _gdffd float64 ) [ ] float64 { _ffgge , _bcfec := _cgacf , _gdffd ; if _bcfec < _ffgge { _ffgge , _bcfec = _bcfec , _ffgge ; } ; _facd := make ( [ ] float64 , 0 , len ( _bccfd ) + 2 ) ; _facd = append ( _facd , _cgacf ) ; for _ , _fdcg := range _bccfd { if _fdcg <= _ffgge { continue ;
} else if _fdcg >= _bcfec { break ; } ; _facd = append ( _facd , _fdcg ) ; } ; _facd = append ( _facd , _gdffd ) ; return _facd ; } ; func ( _adga * textObject ) getFontDirect ( _fdb string ) ( * _fg . PdfFont , error ) { _egbf , _ddce := _adga . getFontDict ( _fdb ) ; if _ddce != nil { return nil , _ddce ;
} ; _dec , _ddce := _fg . NewPdfFontFromPdfObject ( _egbf ) ; if _ddce != nil { _ac . Log . Debug ( "\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fdb , _ddce ) ;
} ; return _dec , _ddce ; } ; func _cdcee ( _egeg string ) ( string , bool ) { _agggd := [ ] rune ( _egeg ) ; if len ( _agggd ) != 1 { return "" , false ; } ; _dcbdf , _fegd := _bdce [ _agggd [ 0 ] ] ; return _dcbdf , _fegd ; } ; func ( _eebf rulingList ) isActualGrid ( ) ( rulingList , bool ) { _abca , _gceff := _eebf . augmentGrid ( ) ;
if ! ( len ( _abca ) >= _fbff + 1 && len ( _gceff ) >= _gaab + 1 ) { if _aebg { _ac . Log . Info ( "\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064" , len ( _abca ) , len ( _gceff ) , _fbff + 1 , _gaab + 1 ) ;
} ; return nil , false ; } ; if _aebg { _ac . Log . Info ( "\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074" , _eebf , len ( _abca ) >= 2 , len ( _gceff ) >= 2 , len ( _abca ) >= 2 && len ( _gceff ) >= 2 ) ;
for _feeff , _cafe := range _eebf { _gde . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a" , _feeff , _cafe ) ; } ; } ; if _ffdd { _dfdc , _fddgd := _abca [ 0 ] , _abca [ len ( _abca ) - 1 ] ; _afgd , _gdgfg := _gceff [ 0 ] , _gceff [ len ( _gceff ) - 1 ] ; if ! ( _bddf ( _dfdc . _cbag - _afgd . _efgeb ) && _bddf ( _fddgd . _cbag - _afgd . _bbge ) && _bddf ( _afgd . _cbag - _dfdc . _bbge ) && _bddf ( _gdgfg . _cbag - _dfdc . _efgeb ) ) { if _aebg { _ac . Log . Info ( "\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073" , _dfdc , _fddgd , _afgd , _gdgfg ) ;
} ; return nil , false ; } ; } else { if ! _abca . aligned ( ) { if _bdeff { _ac . Log . Info ( "i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064" , len ( _abca ) ) ;
} ; return nil , false ; } ; if ! _gceff . aligned ( ) { if _aebg { _ac . Log . Info ( "i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064" , len ( _gceff ) ) ;
} ; return nil , false ; } ; } ; _cbefd := append ( _abca , _gceff ... ) ; return _cbefd , true ; } ; type rectRuling struct { _eegf rulingKind ; _fadf markKind ; _be . Color ; _fg . PdfRectangle ; } ; func _ffea ( _faeeg , _dccgb _dca . Point ) bool { return _faeeg . X == _dccgb . X && _faeeg . Y == _dccgb . Y } ;
func _bcdd ( _bbdc , _gbca _fg . PdfRectangle ) ( _fg . PdfRectangle , bool ) { if ! _gege ( _bbdc , _gbca ) { return _fg . PdfRectangle { } , false ; } ; return _fg . PdfRectangle { Llx : _dc . Max ( _bbdc . Llx , _gbca . Llx ) , Urx : _dc . Min ( _bbdc . Urx , _gbca . Urx ) , Lly : _dc . Max ( _bbdc . Lly , _gbca . Lly ) , Ury : _dc . Min ( _bbdc . Ury , _gbca . Ury ) } , true ;
} ; func _cccab ( _eaaf _fg . PdfRectangle ) * ruling { return & ruling { _gffa : _cbab , _cbag : _eaaf . Llx , _efgeb : _eaaf . Lly , _bbge : _eaaf . Ury } ; } ; func _ffcd ( _bgd [ ] * wordBag ) [ ] * wordBag { if len ( _bgd ) <= 1 { return _bgd ; } ; if _fccf { _ac . Log . Info ( "\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a" ) ;
} ; _ab . Slice ( _bgd , func ( _cefb , _cbce int ) bool { _deef , _aagd := _bgd [ _cefb ] , _bgd [ _cbce ] ; _aaac := _deef . Width ( ) * _deef . Height ( ) ; _eaed := _aagd . Width ( ) * _aagd . Height ( ) ; if _aaac != _eaed { return _aaac > _eaed ; } ; if _deef . Height ( ) != _aagd . Height ( ) { return _deef . Height ( ) > _aagd . Height ( ) ;
} ; return _cefb < _cbce ; } ) ; var _cbegd [ ] * wordBag ; _ddbd := make ( intSet ) ; for _dceec := 0 ; _dceec < len ( _bgd ) ; _dceec ++ { if _ddbd . has ( _dceec ) { continue ; } ; _degef := _bgd [ _dceec ] ; for _baac := _dceec + 1 ; _baac < len ( _bgd ) ; _baac ++ { if _ddbd . has ( _dceec ) { continue ;
} ; _fabd := _bgd [ _baac ] ; _fbfd := _degef . PdfRectangle ; _fbfd . Llx -= _degef . _fab ; if _dgfe ( _fbfd , _fabd . PdfRectangle ) { _degef . absorb ( _fabd ) ; _ddbd . add ( _baac ) ; } ; } ; _cbegd = append ( _cbegd , _degef ) ; } ; if len ( _bgd ) != len ( _cbegd ) + len ( _ddbd ) { _ac . Log . Error ( "\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064" , len ( _bgd ) , len ( _cbegd ) , len ( _ddbd ) ) ;
} ; return _cbegd ; } ; func _cdcbcc ( _babd [ ] * textMark , _ageed _fg . PdfRectangle ) [ ] * textWord { var _cefgea [ ] * textWord ; var _cfcbc * textWord ; if _agge { _ac . Log . Info ( "\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073" , len ( _babd ) ) ;
} ; _aeceb := func ( ) { if _cfcbc != nil { _bbbdg := _cfcbc . computeText ( ) ; if ! _cagg ( _bbbdg ) { _cfcbc . _fedgb = _bbbdg ; _cefgea = append ( _cefgea , _cfcbc ) ; if _agge { _ac . Log . Info ( "\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073" , len ( _cefgea ) - 1 , _cfcbc . String ( ) ) ;
for _cgfda , _aafe := range _cfcbc . _daafd { _gde . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _cgfda , _aafe . String ( ) ) ; } ; } ; } ; _cfcbc = nil ; } ; } ; for _ , _abcg := range _babd { if _fefd && _cfcbc != nil && len ( _cfcbc . _daafd ) > 0 { _adda := _cfcbc . _daafd [ len ( _cfcbc . _daafd ) - 1 ] ;
_ccfc , _cfbb := _cdcee ( _abcg . _efgdc ) ; _fabb , _eafae := _cdcee ( _adda . _efgdc ) ; if _cfbb && ! _eafae && _adda . inDiacriticArea ( _abcg ) { _cfcbc . addDiacritic ( _ccfc ) ; continue ; } ; if _eafae && ! _cfbb && _abcg . inDiacriticArea ( _adda ) { _cfcbc . _daafd = _cfcbc . _daafd [ : len ( _cfcbc . _daafd ) - 1 ] ;
_cfcbc . appendMark ( _abcg , _ageed ) ; _cfcbc . addDiacritic ( _fabb ) ; continue ; } ; } ; _efbcb := _cagg ( _abcg . _efgdc ) ; if _efbcb { _aeceb ( ) ; continue ; } ; if _cfcbc == nil && ! _efbcb { _cfcbc = _bbfe ( [ ] * textMark { _abcg } , _ageed ) ; continue ; } ; _aaeg := _cfcbc . _adecc ;
_eegd := _dc . Abs ( _eeead ( _ageed , _abcg ) - _cfcbc . _adgge ) / _aaeg ; _abdf := _debff ( _abcg , _cfcbc ) / _aaeg ; if _abdf >= _ffca || ! ( - _faaf <= _abdf && _eegd <= _cded ) { _aeceb ( ) ; _cfcbc = _bbfe ( [ ] * textMark { _abcg } , _ageed ) ; continue ; } ; _cfcbc . appendMark ( _abcg , _ageed ) ;
} ; _aeceb ( ) ; return _cefgea ; } ; func _dfa ( _faffa * wordBag , _bffa float64 , _afdfe , _dbbc rulingList ) [ ] * wordBag { var _gged [ ] * wordBag ; for _ , _cgea := range _faffa . depthIndexes ( ) { _bebf := false ; for ! _faffa . empty ( _cgea ) { _gaed := _faffa . firstReadingIndex ( _cgea ) ;
_fbea := _faffa . firstWord ( _gaed ) ; _dgec := _egfaa ( _fbea , _bffa , _afdfe , _dbbc ) ; _faffa . removeWord ( _fbea , _gaed ) ; if _abcbg { _ac . Log . Info ( "\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073" , _fbea . String ( ) ) ;
} ; for _adea := true ; _adea ; _adea = _bebf { _bebf = false ; _fbagd := _bgdf * _dgec . _fab ; _bagb := _afdf * _dgec . _fab ; _cede := _ggce * _dgec . _fab ; if _abcbg { _ac . Log . Info ( "\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066" , _dgec . minDepth ( ) , _dgec . maxDepth ( ) , _cede , _bagb ) ;
} ; if _faffa . scanBand ( "\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c" , _dgec , _gdaf ( _cecg , 0 ) , _dgec . minDepth ( ) - _cede , _dgec . maxDepth ( ) + _cede , _aebe , false , false ) > 0 { _bebf = true ; } ; if _faffa . scanBand ( "\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c" , _dgec , _gdaf ( _cecg , _bagb ) , _dgec . minDepth ( ) , _dgec . maxDepth ( ) , _adca , false , false ) > 0 { _bebf = true ;
} ; if _bebf { continue ; } ; _fgbeb := _faffa . scanBand ( "" , _dgec , _gdaf ( _bacd , _fbagd ) , _dgec . minDepth ( ) , _dgec . maxDepth ( ) , _ggfbd , true , false ) ; if _fgbeb > 0 { _dcae := ( _dgec . maxDepth ( ) - _dgec . minDepth ( ) ) / _dgec . _fab ; if ( _fgbeb > 1 && float64 ( _fgbeb ) > 0.3 * _dcae ) || _fgbeb <= 10 { if _faffa . scanBand ( "\u006f\u0074\u0068e\u0072" , _dgec , _gdaf ( _bacd , _fbagd ) , _dgec . minDepth ( ) , _dgec . maxDepth ( ) , _ggfbd , false , true ) > 0 { _bebf = true ;
} ; } ; } ; } ; _gged = append ( _gged , _dgec ) ; } ; } ; return _gged ; } ; type textWord struct { _fg . PdfRectangle ; _adgge float64 ; _fedgb string ; _daafd [ ] * textMark ; _adecc float64 ; _eadcb bool ; } ; func ( _faeg * shapesState ) fill ( _agcg * [ ] pathSection ) { _gabg := pathSection { _ged : _faeg . _efb , Color : _faeg . _cfda . getFillColor ( ) } ;
* _agcg = append ( * _agcg , _gabg ) ; if _aebg { _ggd := _gabg . bbox ( ) ; _gde . Printf ( "\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a" , len ( * _agcg ) , len ( _gabg . _ged ) , _faeg , _gabg . Color , _ggd , _ggd . Width ( ) , _ggd . Height ( ) ) ;
if _aab { for _debf , _agdd := range _gabg . _ged { _gde . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _debf , _agdd ) ; if _debf == 10 { break ; } ; } ; } ; } ; } ; func _agcd ( _fgbef string , _ebda [ ] rulingList ) { _ac . Log . Info ( "\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073" , len ( _ebda ) , _fgbef ) ;
for _eecbe , _faafb := range _ebda { _gde . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _eecbe , _faafb . String ( ) ) ; } ; } ; func ( _bcadg rulingList ) aligned ( ) bool { if len ( _bcadg ) < 2 { return false ; } ; _dccc := make ( map [ * ruling ] int ) ; _dccc [ _bcadg [ 0 ] ] = 0 ;
for _ , _dgaea := range _bcadg [ 1 : ] { _bbee := false ; for _fdea := range _dccc { if _dgaea . gridIntersecting ( _fdea ) { _dccc [ _fdea ] ++ ; _bbee = true ; break ; } ; } ; if ! _bbee { _dccc [ _dgaea ] = 0 ; } ; } ; _gaee := 0 ; for _ , _bafga := range _dccc { if _bafga == 0 { _gaee ++ ; } ; } ; _dedeef := float64 ( _gaee ) / float64 ( len ( _bcadg ) ) ;
_bfed := _dedeef <= 1.0 - _egab ; if _aebg { _ac . Log . Info ( "\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073" , _bfed , _dedeef , _gaee , len ( _bcadg ) , _bcadg . String ( ) ) ;
} ; return _bfed ; } ; type lineRuling struct { _gdggb rulingKind ; _daab markKind ; _be . Color ; _cged , _ggeaa _dca . Point ; } ; func ( _cfd * TextMarkArray ) exists ( _gbg TextMark ) bool { for _ , _cgfb := range _cfd . Elements ( ) { if _b . DeepEqual ( _gbg . DirectObject , _cgfb . DirectObject ) && _b . DeepEqual ( _gbg . BBox , _cgfb . BBox ) && _cgfb . Text == _gbg . Text { return true ;
} ; } ; return false ; } ;
2022-06-27 19:58:38 +00:00
2023-11-11 11:29:03 +00:00
// String returns a description of `tm`.
func ( _bgee * textMark ) String ( ) string { return _gde . Sprintf ( "\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022" , _bgee . PdfRectangle , _bgee . _cggc , _bgee . _efgdc ) ; } ; func _bb ( _bbc [ ] Font , _cfa string ) bool { for _ , _fde := range _bbc { if _fde . FontName == _cfa { return true ;
} ; } ; return false ; } ; func ( _eadd * textObject ) reset ( ) { _eadd . _acbc = _dca . IdentityMatrix ( ) ; _eadd . _abcf = _dca . IdentityMatrix ( ) ; _eadd . _ffc = nil ; } ; func _bgdg ( _gbaaf [ ] * textMark , _ecbd _fg . PdfRectangle , _fgfc rulingList , _dbba [ ] gridTiling , _bgfg bool ) paraList { _ac . Log . Trace ( "\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066" , len ( _gbaaf ) , _ecbd ) ;
if len ( _gbaaf ) == 0 { return nil ; } ; _afcc := _cdcbcc ( _gbaaf , _ecbd ) ; if len ( _afcc ) == 0 { return nil ; } ; _fgfc . log ( "\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065" ) ; _edeef , _bcef := _fgfc . vertsHorzs ( ) ; _cdff := _ddfff ( _afcc , _ecbd . Ury , _edeef , _bcef ) ;
_edfd := _dfa ( _cdff , _ecbd . Ury , _edeef , _bcef ) ; _edfd = _ffcd ( _edfd ) ; _dbbf := make ( paraList , 0 , len ( _edfd ) ) ; for _ , _fgab := range _edfd { _gfcg := _fgab . arrangeText ( ) ; if _gfcg != nil { _dbbf = append ( _dbbf , _gfcg ) ; } ; } ; if ! _bgfg && len ( _dbbf ) >= _ffa { _dbbf = _dbbf . extractTables ( _dbba ) ;
} ; _dbbf . sortReadingOrder ( ) ; if ! _bgfg { _dbbf . sortTopoOrder ( ) ; } ; _dbbf . log ( "\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072" ) ; return _dbbf ; } ;
2022-06-27 19:58:38 +00:00
2023-11-11 11:29:03 +00:00
// String returns a human readable description of `path`.
func ( _fgb * subpath ) String ( ) string { _bafc := _fgb . _gdgd ; _cbed := len ( _bafc ) ; if _cbed <= 5 { return _gde . Sprintf ( "\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f" , _cbed , _bafc ) ; } ; return _gde . Sprintf ( "\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f" , _cbed , _bafc [ 0 ] , _bafc [ 1 ] , _bafc [ _cbed - 1 ] ) ;
} ; func ( _bcead * subpath ) makeRectRuling ( _bdad _be . Color ) ( * ruling , bool ) { if _cadc { _ac . Log . Info ( "\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076" , _bcead ) ; } ; _gfggb := _bcead . _gdgd [ : 4 ] ;
_fccb := make ( map [ int ] rulingKind , len ( _gfggb ) ) ; for _dedb , _dbad := range _gfggb { _gbdde := _bcead . _gdgd [ ( _dedb + 1 ) % 4 ] ; _fccb [ _dedb ] = _agfe ( _dbad , _gbdde ) ; if _cadc { _gde . Printf ( "\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066" , _dedb , _fccb [ _dedb ] , _dbad , _gbdde ) ;
} ; } ; if _cadc { _gde . Printf ( "\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a" , _fccb ) ; } ; var _affda , _edeg [ ] int ; for _fbdb , _dcddea := range _fccb { switch _dcddea { case _faccd : _edeg = append ( _edeg , _fbdb ) ; case _cbab : _affda = append ( _affda , _fbdb ) ;
} ; } ; if _cadc { _gde . Printf ( "\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a" , len ( _edeg ) , _edeg ) ; _gde . Printf ( "\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a" , len ( _affda ) , _affda ) ;
} ; _aabf := ( len ( _edeg ) == 2 && len ( _affda ) == 2 ) || ( len ( _edeg ) == 2 && len ( _affda ) == 0 && _gfed ( _gfggb [ _edeg [ 0 ] ] , _gfggb [ _edeg [ 1 ] ] ) ) || ( len ( _affda ) == 2 && len ( _edeg ) == 0 && _caba ( _gfggb [ _affda [ 0 ] ] , _gfggb [ _affda [ 1 ] ] ) ) ; if _cadc { _gde . Printf ( " \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a" , len ( _edeg ) , len ( _affda ) , _aabf ) ;
} ; if ! _aabf { if _cadc { _ac . Log . Error ( "\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v" , _bcead ) ; _gde . Printf ( " \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a" , len ( _edeg ) , len ( _affda ) , _aabf ) ;
} ; return & ruling { } , false ; } ; if len ( _affda ) == 0 { for _gdggg , _gfac := range _fccb { if _gfac != _faccd { _affda = append ( _affda , _gdggg ) ; } ; } ; } ; if len ( _edeg ) == 0 { for _agag , _bcfe := range _fccb { if _bcfe != _cbab { _edeg = append ( _edeg , _agag ) ; } ; } ; } ; if _cadc { _ac . Log . Info ( "\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a" + "\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a" + "\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a" + "\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076" , len ( _edeg ) , len ( _affda ) , len ( _gfggb ) , _edeg , _affda , _gfggb ) ;
} ; var _gfbed , _ffdbdc , _fbec , _gfcc _dca . Point ; if _gfggb [ _edeg [ 0 ] ] . Y > _gfggb [ _edeg [ 1 ] ] . Y { _fbec , _gfcc = _gfggb [ _edeg [ 0 ] ] , _gfggb [ _edeg [ 1 ] ] ; } else { _fbec , _gfcc = _gfggb [ _edeg [ 1 ] ] , _gfggb [ _edeg [ 0 ] ] ; } ; if _gfggb [ _affda [ 0 ] ] . X > _gfggb [ _affda [ 1 ] ] . X { _gfbed , _ffdbdc = _gfggb [ _affda [ 0 ] ] , _gfggb [ _affda [ 1 ] ] ;
} else { _gfbed , _ffdbdc = _gfggb [ _affda [ 1 ] ] , _gfggb [ _affda [ 0 ] ] ; } ; _eade := _fg . PdfRectangle { Llx : _gfbed . X , Urx : _ffdbdc . X , Lly : _gfcc . Y , Ury : _fbec . Y } ; if _eade . Llx > _eade . Urx { _eade . Llx , _eade . Urx = _eade . Urx , _eade . Llx ; } ; if _eade . Lly > _eade . Ury { _eade . Lly , _eade . Ury = _eade . Ury , _eade . Lly ;
} ; _cffe := rectRuling { PdfRectangle : _eade , _eegf : _fgae ( _eade ) , Color : _bdad } ; if _cffe . _eegf == _ccfb { if _cadc { _ac . Log . Error ( "\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c" ) ;
} ; return nil , false ; } ; _add , _gdbaa := _cffe . asRuling ( ) ; if ! _gdbaa { if _cadc { _ac . Log . Error ( "\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg" ) ; } ; return nil , false ; } ; if _aebg { _gde . Printf ( "\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a" , _add . String ( ) ) ;
} ; return _add , true ; } ; func ( _dccec paraList ) findTableGrid ( _gffc gridTiling ) ( * textTable , map [ * textPara ] struct { } ) { _bfegf := len ( _gffc . _abeb ) ; _aedb := len ( _gffc . _cegbg ) ; _bcec := textTable { _aefef : true , _acddc : _bfegf , _gebeeb : _aedb , _cfgbb : make ( map [ uint64 ] * textPara , _bfegf * _aedb ) , _edbe : make ( map [ uint64 ] compositeCell , _bfegf * _aedb ) } ;
_bcec . PdfRectangle = _gffc . PdfRectangle ; _fdbg := make ( map [ * textPara ] struct { } ) ; _gbcc := int ( ( 1.0 - _eabd ) * float64 ( _bfegf * _aedb ) ) ; _gaaf := 0 ; if _gbde { _ac . Log . Info ( "\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064" , _bfegf , _aedb ) ;
} ; for _bgggd , _fdac := range _gffc . _cegbg { _bbbf , _bgabd := _gffc . _faeca [ _fdac ] ; if ! _bgabd { continue ; } ; for _daaad , _bdgb := range _gffc . _abeb { _bgbcd , _ebgb := _bbbf [ _bdgb ] ; if ! _ebgb { continue ; } ; _acceb := _dccec . inTile ( _bgbcd ) ; if len ( _acceb ) == 0 { _gaaf ++ ;
if _gaaf > _gbcc { if _gbde { _ac . Log . Info ( "\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064" , _gaaf ) ; } ; return nil , nil ; } ; } else { _bcec . putComposite ( _daaad , _bgggd , _acceb , _bgbcd . PdfRectangle ) ; for _ , _fbef := range _acceb { _fdbg [ _fbef ] = struct { } { } ;
} ; } ; } ; } ; _bccfb := 0 ; for _gfbdg := 0 ; _gfbdg < _bfegf ; _gfbdg ++ { _gacd := _bcec . get ( _gfbdg , 0 ) ; if _gacd == nil || ! _gacd . _bfge { _bccfb ++ ; } ; } ; if _bccfb == 0 { if _gbde { _ac . Log . Info ( "\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030" ) ; } ; return nil , nil ;
} ; _aaee := _bcec . reduceTiling ( _gffc , _ddba ) ; _aaee = _aaee . subdivide ( ) ; return _aaee , _fdbg ; } ;
2022-06-27 19:58:38 +00:00
2023-11-11 11:29:03 +00:00
// String returns a description of `w`.
func ( _gedbg * textWord ) String ( ) string { return _gde . Sprintf ( "\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022" , _gedbg . _adgge , _gedbg . PdfRectangle , _gedbg . _adecc , _gedbg . _fedgb ) ;
} ; func ( _cdbbd paraList ) llyOrdering ( ) [ ] int { _bgcf := make ( [ ] int , len ( _cdbbd ) ) ; for _daag := range _cdbbd { _bgcf [ _daag ] = _daag ; } ; _ab . SliceStable ( _bgcf , func ( _fedb , _affd int ) bool { _gfbge , _gbeef := _bgcf [ _fedb ] , _bgcf [ _affd ] ; return _cdbbd [ _gfbge ] . Lly < _cdbbd [ _gbeef ] . Lly ;
} ) ; return _bgcf ; } ; func ( _ccbff rulingList ) merge ( ) * ruling { _edda := _ccbff [ 0 ] . _cbag ; _dacgd := _ccbff [ 0 ] . _efgeb ; _ddad := _ccbff [ 0 ] . _bbge ; for _ , _cebda := range _ccbff [ 1 : ] { _edda += _cebda . _cbag ; if _cebda . _efgeb < _dacgd { _dacgd = _cebda . _efgeb ; } ;
if _cebda . _bbge > _ddad { _ddad = _cebda . _bbge ; } ; } ; _aadc := & ruling { _gffa : _ccbff [ 0 ] . _gffa , _adaa : _ccbff [ 0 ] . _adaa , Color : _ccbff [ 0 ] . Color , _cbag : _edda / float64 ( len ( _ccbff ) ) , _efgeb : _dacgd , _bbge : _ddad } ; if _bdeff { _ac . Log . Info ( "\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073" , len ( _ccbff ) , _aadc ) ;
for _bdeeee , _gffd := range _ccbff { _gde . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bdeeee , _gffd ) ; } ; } ; return _aadc ; } ; func ( _bff * textObject ) setHorizScaling ( _acg float64 ) { if _bff == nil { return ; } ; _bff . _abcb . _gfad = _acg ; } ; func ( _daa TextTable ) getCellInfo ( _dcge TextMark ) [ ] [ ] int { for _fef , _cgac := range _daa . Cells { for _dea , _ddag := range _cgac { _geae := & _ddag . Marks ;
if _geae . exists ( _dcge ) { return [ ] [ ] int { { _fef } , { _dea } } ; } ; } ; } ; return nil ; } ; func ( _dcadg * textLine ) endsInHyphen ( ) bool { _fggge := _dcadg . _fgbe [ len ( _dcadg . _fgbe ) - 1 ] ; _bbbge := _fggge . _fedgb ; _cdcf , _ddgfe := _f . DecodeLastRuneInString ( _bbbge ) ; if _ddgfe <= 0 || ! _fc . Is ( _fc . Hyphen , _cdcf ) { return false ;
} ; if _fggge . _eadcb && _dcdb ( _bbbge ) { return true ; } ; return _dcdb ( _dcadg . text ( ) ) ; } ; func ( _gba * imageExtractContext ) extractXObjectImage ( _edc * _dce . PdfObjectName , _fae _dcg . GraphicsState , _gbd * _fg . PdfPageResources ) error { _bcc , _ := _gbd . GetXObjectByName ( * _edc ) ;
if _bcc == nil { return nil ; } ; _gga , _bed := _gba . _fba [ _bcc ] ; if ! _bed { _cfe , _aca := _gbd . GetXObjectImageByName ( * _edc ) ; if _aca != nil { return _aca ; } ; if _cfe == nil { return nil ; } ; _bdf , _aca := _cfe . ToImage ( ) ; if _aca != nil { return _aca ; } ; var _gegb _e . Image ;
if _cfe . Mask != nil { if _gegb , _aca = _decf ( _cfe . Mask , _be . Opaque ) ; _aca != nil { _ac . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a \u0063\u006f\u0075\u006c\u0064 \u006eo\u0074\u0020\u0067\u0065\u0074\u0020\u0065\u0078\u0070\u006c\u0069\u0063\u0069\u0074\u0020\u0069\u006d\u0061\u0067e\u0020\u006d\u0061\u0073\u006b\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006d\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" ) ;
} ; } else if _cfe . SMask != nil { _gegb , _aca = _eafda ( _cfe . SMask , _be . Opaque ) ; if _aca != nil { _ac . Log . Debug ( "W\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0067\u0065\u0074\u0020\u0073\u006f\u0066\u0074\u0020\u0069\u006da\u0067e\u0020\u006d\u0061\u0073k\u002e\u0020O\u0075\u0074\u0070\u0075\u0074\u0020\u006d\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063\u0074\u002e" ) ;
} ; } ; if _gegb != nil { _afc , _cdc := _bdf . ToGoImage ( ) ; if _cdc != nil { return _cdc ; } ; _afc = _faefe ( _afc , _gegb ) ; switch _cfe . ColorSpace . String ( ) { case "\u0044\u0065\u0076\u0069\u0063\u0065\u0047\u0072\u0061\u0079" , "\u0049n\u0064\u0065\u0078\u0065\u0064" : _bdf , _cdc = _fg . ImageHandling . NewGrayImageFromGoImage ( _afc ) ;
if _cdc != nil { return _cdc ; } ; default : _bdf , _cdc = _fg . ImageHandling . NewImageFromGoImage ( _afc ) ; if _cdc != nil { return _cdc ; } ; } ; } ; _gga = & cachedImage { _gb : _bdf , _cbg : _cfe . ColorSpace } ; _gba . _fba [ _bcc ] = _gga ; } ; _bg := _gga . _gb ; _bce := _gga . _cbg ; _bad , _cga := _bce . ImageToRGB ( * _bg ) ;
if _cga != nil { return _cga ; } ; _ac . Log . Debug ( "@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073" , _fae . CTM . String ( ) ) ; _cbb := ImageMark { Image : & _bad , Width : _fae . CTM . ScalingFactorX ( ) , Height : _fae . CTM . ScalingFactorY ( ) , Angle : _fae . CTM . Angle ( ) } ;
_cbb . X , _cbb . Y = _fae . CTM . Translation ( ) ; _gba . _dag = append ( _gba . _dag , _cbb ) ; _gba . _fbd ++ ; return nil ; } ; type lists [ ] * list ; func ( _egea * shapesState ) cubicTo ( _bbcd , _eaec , _aefdg , _egfa , _decg , _fcca float64 ) { if _bdefa { _ac . Log . Info ( "\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a" ) ;
} ; _egea . addPoint ( _decg , _fcca ) ; } ; func ( _cbcbdd * textTable ) put ( _fafg , _fggdg int , _aeacb * textPara ) { _cbcbdd . _cfgbb [ _bafcd ( _fafg , _fggdg ) ] = _aeacb ; } ; type textState struct { _aaec float64 ; _ggf float64 ; _gfad float64 ; _gcg float64 ; _dda float64 ; _bgcc RenderMode ;
_cgfc float64 ; _ecf * _fg . PdfFont ; _cefd _fg . PdfRectangle ; _ddcd int ; _dcad int ; } ; func _bbfe ( _cafca [ ] * textMark , _caacb _fg . PdfRectangle ) * textWord { _edeee := _cafca [ 0 ] . PdfRectangle ; _afbec := _cafca [ 0 ] . _cggc ; for _ , _bfcaf := range _cafca [ 1 : ] { _edeee = _ebge ( _edeee , _bfcaf . PdfRectangle ) ;
if _bfcaf . _cggc > _afbec { _afbec = _bfcaf . _cggc ; } ; } ; return & textWord { PdfRectangle : _edeee , _daafd : _cafca , _adgge : _caacb . Ury - _edeee . Lly , _adecc : _afbec } ; } ; var _efdd string = "\u0028\u003f\u0069\u0029\u005e\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028?\u003a\u0044\u007cM\u0029\u007c\u0044\u003f\u0043{\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028\u003f\u003a\u004c\u007c\u0043\u0029\u007cL\u003f\u0058\u007b\u0030\u002c\u0033}\u0029\u0028\u0049\u0028\u003f\u003a\u0056\u007c\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u005c\u0029\u007c\u005c\u002e\u0029\u007c\u005e\u005c\u0028\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028\u003f\u003aD\u007cM\u0029\u007c\u0044\u003f\u0043\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028?\u003a\u004c\u007c\u0043\u0029\u007c\u004c?\u0058\u007b0\u002c\u0033\u007d\u0029(\u0049\u0028\u003f\u003a\u0056|\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u005c\u0029" ;
func ( _gafd paraList ) readBefore ( _caefd [ ] int , _gfeg , _cgeg int ) bool { _ebfd , _eaeb := _gafd [ _gfeg ] , _gafd [ _cgeg ] ; if _edacg ( _ebfd , _eaeb ) && _ebfd . Lly > _eaeb . Lly { return true ; } ; if ! ( _ebfd . _ebadd . Urx < _eaeb . _ebadd . Llx ) { return false ; } ; _daeac , _dagf := _ebfd . Lly , _eaeb . Lly ;
if _daeac > _dagf { _dagf , _daeac = _daeac , _dagf ; } ; _effgb := _dc . Max ( _ebfd . _ebadd . Llx , _eaeb . _ebadd . Llx ) ; _ffcf := _dc . Min ( _ebfd . _ebadd . Urx , _eaeb . _ebadd . Urx ) ; _eege := _gafd . llyRange ( _caefd , _daeac , _dagf ) ; for _ , _bffdd := range _eege { if _bffdd == _gfeg || _bffdd == _cgeg { continue ;
} ; _bdfb := _gafd [ _bffdd ] ; if _bdfb . _ebadd . Llx <= _ffcf && _effgb <= _bdfb . _ebadd . Urx { return false ; } ; } ; return true ; } ;
2022-06-27 19:58:38 +00:00
2023-11-11 11:29:03 +00:00
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
func ( _faga PageText ) ToText ( ) string { return _faga . Text ( ) } ; func _cced ( _dcage , _dafca _fg . PdfRectangle ) bool { return _dcage . Lly <= _dafca . Ury && _dafca . Lly <= _dcage . Ury ; } ;
2022-06-27 19:58:38 +00:00
2023-11-11 11:29:03 +00:00
// String returns a description of `b`.
func ( _ffcc * wordBag ) String ( ) string { var _cefa [ ] string ; for _ , _gac := range _ffcc . depthIndexes ( ) { _fdf := _ffcc . _aac [ _gac ] ; for _ , _fadgg := range _fdf { _cefa = append ( _cefa , _fadgg . _fedgb ) ; } ; } ; return _gde . Sprintf ( "\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071" , _ffcc . PdfRectangle , _ffcc . _fab , len ( _cefa ) , _cefa ) ;
2023-10-07 13:58:01 +00:00
} ;
2022-07-13 21:28:43 +00:00
2023-11-11 11:29:03 +00:00
// Text returns the text content of the `bulletLists`.
func ( _cgdd * lists ) Text ( ) string { _bcad := & _df . Builder { } ; for _ , _cdfb := range * _cgdd { _defee := _cdfb . Text ( ) ; _bcad . WriteString ( _defee ) ; } ; return _bcad . String ( ) ; } ; func ( _bdecg * PageText ) getParagraphs ( ) paraList { var _ccf rulingList ; if _efbd { _abb := _fdgdc ( _bdecg . _ffge ) ;
_ccf = append ( _ccf , _abb ... ) ; } ; if _dfcb { _dadc := _cgabd ( _bdecg . _cedf ) ; _ccf = append ( _ccf , _dadc ... ) ; } ; _ccf , _cfeg := _ccf . toTilings ( ) ; var _cabe paraList ; _aeg := len ( _bdecg . _fbga ) ; for _gfcb := 0 ; _gfcb < 360 && _aeg > 0 ; _gfcb += 90 { _dge := make ( [ ] * textMark , 0 , len ( _bdecg . _fbga ) - _aeg ) ;
for _ , _fedc := range _bdecg . _fbga { if _fedc . _aaad == _gfcb { _dge = append ( _dge , _fedc ) ; } ; } ; if len ( _dge ) > 0 { _gbef := _bgdg ( _dge , _bdecg . _fgag , _ccf , _cfeg , _bdecg . _dbc . _dcc ) ; _cabe = append ( _cabe , _gbef ... ) ; _aeg -= len ( _dge ) ; } ; } ; return _cabe ; } ;
func ( _bddda compositeCell ) hasLines ( _ggeaf [ ] * textLine ) bool { for _abcd , _ggdb := range _ggeaf { _aaada := _gege ( _bddda . PdfRectangle , _ggdb . PdfRectangle ) ; if _eadb { _gde . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a" , _aaada , _abcd , len ( _ggeaf ) ) ;
_gde . Printf ( "\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a" , _bddda ) ; _gde . Printf ( "\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a" , _ggdb ) ; } ; if _aaada { return true ;
} ; } ; return false ; } ; type textMark struct { _fg . PdfRectangle ; _aaad int ; _efgdc string ; _dcdf string ; _abef * _fg . PdfFont ; _cggc float64 ; _accea float64 ; _bcbeg _dca . Matrix ; _caeg _dca . Point ; _fbae _fg . PdfRectangle ; _cccae _be . Color ; _dcgea _be . Color ;
_dafb _dce . PdfObject ; _facc [ ] string ; Tw float64 ; Th float64 ; _fbcf int ; _ccfa int ; } ; func ( _fggg * textObject ) setFont ( _eee string , _aedf float64 ) error { if _fggg == nil { return nil ; } ; _fggg . _abcb . _dda = _aedf ; _eff , _cff := _fggg . getFont ( _eee ) ; if _cff != nil { return _cff ;
} ; _fggg . _abcb . _ecf = _eff ; return nil ; } ; func ( _fageg rulingList ) secMinMax ( ) ( float64 , float64 ) { _ecdac , _cdad := _fageg [ 0 ] . _efgeb , _fageg [ 0 ] . _bbge ; for _ , _dgfeg := range _fageg [ 1 : ] { if _dgfeg . _efgeb < _ecdac { _ecdac = _dgfeg . _efgeb ; } ; if _dgfeg . _bbge > _cdad { _cdad = _dgfeg . _bbge ;
} ; } ; return _ecdac , _cdad ; } ; func _bdca ( _bdfe * textLine , _afeg [ ] * textLine , _fefb [ ] float64 , _dgdc , _bfff float64 ) [ ] * textLine { _dgfg := [ ] * textLine { } ; for _ , _eeag := range _afeg { if _eeag . _bfcg >= _dgdc { if _bfff != - 1 && _eeag . _bfcg < _bfff { if _eeag . text ( ) != _bdfe . text ( ) { if _dc . Round ( _eeag . Llx ) < _dc . Round ( _bdfe . Llx ) { break ;
} ; _dgfg = append ( _dgfg , _eeag ) ; } ; } else if _bfff == - 1 { if _eeag . _bfcg == _bdfe . _bfcg { if _eeag . text ( ) != _bdfe . text ( ) { _dgfg = append ( _dgfg , _eeag ) ; } ; continue ; } ; _ebbad := _egbac ( _bdfe , _afeg , _fefb ) ; if _ebbad != - 1 && _eeag . _bfcg <= _ebbad { _dgfg = append ( _dgfg , _eeag ) ;
} ; } ; } ; } ; return _dgfg ; } ; type gridTile struct { _fg . PdfRectangle ; _cbbgc , _cbge , _beacf , _dafe bool ; } ; func _ebfaf ( _eebg float64 ) bool { return _dc . Abs ( _eebg ) < _bafg } ; func ( _deb * textObject ) setTextMatrix ( _dege [ ] float64 ) { if len ( _dege ) != 6 { _ac . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029" , len ( _dege ) ) ;
return ; } ; _bdef , _ggb , _geba , _gdb , _gff , _ccca := _dege [ 0 ] , _dege [ 1 ] , _dege [ 2 ] , _dege [ 3 ] , _dege [ 4 ] , _dege [ 5 ] ; _deb . _acbc = _dca . NewMatrix ( _bdef , _ggb , _geba , _gdb , _gff , _ccca ) ; _deb . _abcf = _deb . _acbc ; } ; func _dcgaf ( _cefge map [ int ] [ ] float64 ) { if len ( _cefge ) <= 1 { return ;
} ; _eced := _dfdf ( _cefge ) ; if _eadb { _ac . Log . Info ( "\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076" , _eced ) ; } ; var _dadcb , _cgaad int ; for _dadcb , _cgaad = range _eced { if _cefge [ _cgaad ] != nil { break ; } ; } ; for _ebdga , _bfgfe := range _eced [ _dadcb : ] { _eafg := _cefge [ _bfgfe ] ;
if _eafg == nil { continue ; } ; if _eadb { _gde . Printf ( "\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a" , _dadcb + _ebdga , _cgaad , _bfgfe ) ; } ; _cgabde := _cefge [ _bfgfe ] ; if _cgabde [ len ( _cgabde ) - 1 ] > _eafg [ 0 ] { _cgabde [ len ( _cgabde ) - 1 ] = _eafg [ 0 ] ;
_cefge [ _cgaad ] = _cgabde ; } ; _cgaad = _bfgfe ; } ; } ; func ( _deaf * textTable ) getComposite ( _ddcc , _gbff int ) ( paraList , _fg . PdfRectangle ) { _ggfba , _ggec := _deaf . _edbe [ _bafcd ( _ddcc , _gbff ) ] ; if _eadb { _gde . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a" , _ddcc , _gbff , _ggfba . String ( ) ) ;
} ; if ! _ggec { return nil , _fg . PdfRectangle { } ; } ; return _ggfba . parasBBox ( ) ; } ; func _bcdb ( _cbeb * list , _dccd * _df . Builder , _fcag * string ) { _acafb := _adgb ( _cbeb , _fcag ) ; _dccd . WriteString ( _acafb ) ; for _ , _aegf := range _cbeb . _edge { _caaa := * _fcag + "\u0020\u0020\u0020" ;
_bcdb ( _aegf , _dccd , & _caaa ) ; } ; } ; func _efgdb ( _aaecb map [ int ] intSet ) [ ] int { _cdga := make ( [ ] int , 0 , len ( _aaecb ) ) ; for _ccgeg := range _aaecb { _cdga = append ( _cdga , _ccgeg ) ; } ; _ab . Ints ( _cdga ) ; return _cdga ; } ; func ( _gfeb paraList ) eventNeighbours ( _ddedd [ ] event ) map [ * textPara ] [ ] int { _ab . Slice ( _ddedd , func ( _dagd , _cgacb int ) bool { _aaeca , _efdde := _ddedd [ _dagd ] , _ddedd [ _cgacb ] ;
_gcdfb , _eaad := _aaeca . _eebcc , _efdde . _eebcc ; if _gcdfb != _eaad { return _gcdfb < _eaad ; } ; if _aaeca . _dacd != _efdde . _dacd { return _aaeca . _dacd ; } ; return _dagd < _cgacb ; } ) ; _ffgb := make ( map [ int ] intSet ) ; _egdbg := make ( intSet ) ; for _ , _edbgd := range _ddedd { if _edbgd . _dacd { _ffgb [ _edbgd . _dcgg ] = make ( intSet ) ;
for _dfebe := range _egdbg { if _dfebe != _edbgd . _dcgg { _ffgb [ _edbgd . _dcgg ] . add ( _dfebe ) ; _ffgb [ _dfebe ] . add ( _edbgd . _dcgg ) ; } ; } ; _egdbg . add ( _edbgd . _dcgg ) ; } else { _egdbg . del ( _edbgd . _dcgg ) ; } ; } ; _dffgc := map [ * textPara ] [ ] int { } ; for _fabda , _ebbb := range _ffgb { _ccgd := _gfeb [ _fabda ] ;
if len ( _ebbb ) == 0 { _dffgc [ _ccgd ] = nil ; continue ; } ; _eaef := make ( [ ] int , len ( _ebbb ) ) ; _bece := 0 ; for _ffeb := range _ebbb { _eaef [ _bece ] = _ffeb ; _bece ++ ; } ; _dffgc [ _ccgd ] = _eaef ; } ; return _dffgc ; } ; func ( _beee paraList ) inTile ( _fbbb gridTile ) paraList { var _efgff paraList ;
for _ , _bfgg := range _beee { if _fbbb . contains ( _bfgg . PdfRectangle ) { _efgff = append ( _efgff , _bfgg ) ; } ; } ; if _eadb { _gde . Printf ( "\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n" , _fbbb , len ( _efgff ) ) ;
for _gdebe , _cfba := range _efgff { _gde . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _gdebe , _cfba ) ; } ; _gde . Println ( "" ) ; } ; return _efgff ; } ;
2023-08-03 17:30:04 +00:00
2023-11-11 11:29:03 +00:00
// PageText represents the layout of text on a device page.
type PageText struct { _fbga [ ] * textMark ; _edee string ; _fdaf [ ] TextMark ; _agdc [ ] TextTable ; _fgag _fg . PdfRectangle ; _ffge [ ] pathSection ; _cedf [ ] pathSection ; _cabf * _dce . PdfObject ; _gefc _dce . PdfObject ; _eaee * _dcg . ContentStreamOperations ; _dbc PageTextOptions ;
} ; func _bcda ( _dggg [ ] rulingList ) ( rulingList , rulingList ) { var _dabg rulingList ; for _ , _cbcbd := range _dggg { _dabg = append ( _dabg , _cbcbd ... ) ; } ; return _dabg . vertsHorzs ( ) ; } ; type rulingList [ ] * ruling ; type pathSection struct { _ged [ ] * subpath ; _be . Color ;
} ; func ( _fgeg * textWord ) appendMark ( _gfab * textMark , _aadad _fg . PdfRectangle ) { _fgeg . _daafd = append ( _fgeg . _daafd , _gfab ) ; _fgeg . PdfRectangle = _ebge ( _fgeg . PdfRectangle , _gfab . PdfRectangle ) ; if _gfab . _cggc > _fgeg . _adecc { _fgeg . _adecc = _gfab . _cggc ;
} ; _fgeg . _adgge = _aadad . Ury - _fgeg . PdfRectangle . Lly ; } ; func _gege ( _afge , _dgae _fg . PdfRectangle ) bool { return _fcgc ( _afge , _dgae ) && _cced ( _afge , _dgae ) } ; func _dcfd ( _deceg * _fg . Image , _cdbd _be . Color ) _e . Image { _aabgf , _cffeg := int ( _deceg . Width ) , int ( _deceg . Height ) ;
_egcde := _e . NewRGBA ( _e . Rect ( 0 , 0 , _aabgf , _cffeg ) ) ; for _ddga := 0 ; _ddga < _cffeg ; _ddga ++ { for _eafca := 0 ; _eafca < _aabgf ; _eafca ++ { _egfc , _bffab := _deceg . ColorAt ( _eafca , _ddga ) ; if _bffab != nil { _ac . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063o\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0072\u0065\u0074\u0072\u0069\u0065v\u0065 \u0069\u006d\u0061\u0067\u0065\u0020m\u0061\u0073\u006b\u0020\u0076\u0061\u006cu\u0065\u0020\u0061\u0074\u0020\u0028\u0025\u0064\u002c\u0020\u0025\u0064\u0029\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006da\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063t\u002e" , _eafca , _ddga ) ;
continue ; } ; _ffdbde , _agbbc , _bfdbe , _ := _egfc . RGBA ( ) ; var _eadca _be . Color ; if _ffdbde + _agbbc + _bfdbe == 0 { _eadca = _cdbd ; } else { _eadca = _be . Transparent ; } ; _egcde . Set ( _eafca , _ddga , _eadca ) ; } ; } ; return _egcde ; } ;
2023-05-29 17:26:33 +00:00
2023-11-11 11:29:03 +00:00
// String returns a description of `l`.
func ( _bffd * textLine ) String ( ) string { return _gde . Sprintf ( "\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022" , _bffd . _bfcg , _bffd . PdfRectangle , _bffd . _ceacg , _bffd . text ( ) ) ;
} ;
2023-01-08 22:34:27 +00:00
2023-11-11 11:29:03 +00:00
// String returns a description of `k`.
func ( _abffa markKind ) String ( ) string { _abefa , _cagfca := _fffdg [ _abffa ] ; if ! _cagfca { return _gde . Sprintf ( "\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064" , _abffa ) ; } ; return _abefa ; } ; func ( _aegdd * textTable ) getRight ( ) paraList { _gfde := make ( paraList , _aegdd . _gebeeb ) ;
for _gfdef := 0 ; _gfdef < _aegdd . _gebeeb ; _gfdef ++ { _aeaff := _aegdd . get ( _aegdd . _acddc - 1 , _gfdef ) . _abfec ; if _aeaff . taken ( ) { return nil ; } ; _gfde [ _gfdef ] = _aeaff ; } ; for _fcdd := 0 ; _fcdd < _aegdd . _gebeeb - 1 ; _fcdd ++ { if _gfde [ _fcdd ] . _fgbea != _gfde [ _fcdd + 1 ] { return nil ;
} ; } ; return _gfde ; } ; type imageExtractContext struct { _dag [ ] ImageMark ; _cfc int ; _fbd int ; _fbg int ; _fba map [ * _dce . PdfObjectStream ] * cachedImage ; _ebac * ImageExtractOptions ; _defe bool ; } ; func _aebgd ( _dfec [ ] * textLine ) [ ] * textLine { _gdfbe := [ ] * textLine { } ;
for _ , _aeaf := range _dfec { _dcfc := _aeaf . text ( ) ; _cgdc := _cfec . Find ( [ ] byte ( _dcfc ) ) ; if _cgdc != nil { _gdfbe = append ( _gdfbe , _aeaf ) ; } ; } ; return _gdfbe ; } ;
2023-10-07 13:58:01 +00:00
2023-11-11 11:29:03 +00:00
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20> ).
func ( _eag * Extractor ) ExtractText ( ) ( string , error ) { _fdd , _ , _ , _eca := _eag . ExtractTextWithStats ( ) ; return _fdd , _eca ; } ; func ( _cbfb lineRuling ) asRuling ( ) ( * ruling , bool ) { _efeb := ruling { _gffa : _cbfb . _gdggb , Color : _cbfb . Color , _adaa : _efcd } ; switch _cbfb . _gdggb { case _cbab : _efeb . _cbag = _cbfb . xMean ( ) ;
_efeb . _efgeb = _dc . Min ( _cbfb . _cged . Y , _cbfb . _ggeaa . Y ) ; _efeb . _bbge = _dc . Max ( _cbfb . _cged . Y , _cbfb . _ggeaa . Y ) ; case _faccd : _efeb . _cbag = _cbfb . yMean ( ) ; _efeb . _efgeb = _dc . Min ( _cbfb . _cged . X , _cbfb . _ggeaa . X ) ; _efeb . _bbge = _dc . Max ( _cbfb . _cged . X , _cbfb . _ggeaa . X ) ;
default : _ac . Log . Error ( "\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064" , _cbfb . _gdggb ) ; return nil , false ; } ; return & _efeb , true ; } ; func ( _gdbae * structTreeRoot ) buildList ( _cagfc map [ int ] [ ] * textLine , _cabbe _dce . PdfObject ) [ ] * list { if _gdbae == nil { _ac . Log . Debug ( "\u0062\u0075\u0069\u006c\u0064\u004c\u0069\u0073\u0074\u003a\u0020t\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0069\u0073 \u006e\u0069\u006c" ) ;
return nil ; } ; var _egacb * structElement ; _gece := [ ] structElement { } ; if len ( _gdbae . _ccdea ) == 1 { _gefb := _gdbae . _ccdea [ 0 ] . _affb ; if _gefb == "\u0044\u006f\u0063\u0075\u006d\u0065\u006e\u0074" || _gefb == "\u0053\u0065\u0063\u0074" || _gefb == "\u0050\u0061\u0072\u0074" || _gefb == "\u0044\u0069\u0076" || _gefb == "\u0041\u0072\u0074" { _egacb = & _gdbae . _ccdea [ 0 ] ;
} ; } else { _egacb = & structElement { _dfcd : _gdbae . _ccdea , _affb : _gdbae . _cefga } ; } ; if _egacb == nil { _ac . Log . Debug ( "\u0062\u0075\u0069\u006cd\u004c\u0069\u0073\u0074\u003a\u0020\u0074\u006f\u0070\u0045l\u0065m\u0065\u006e\u0074\u0020\u0069\u0073\u0020n\u0069\u006c" ) ;
return nil ; } ; for _ , _fdbd := range _egacb . _dfcd { if _fdbd . _affb == "\u004c" { _gece = append ( _gece , _fdbd ) ; } else if _fdbd . _affb == "\u0054\u0061\u0062l\u0065" { _ccba := _fede ( _fdbd ) ; _gece = append ( _gece , _ccba ... ) ; } ; } ; _ebba := _abcc ( _gece , _cagfc , _cabbe ) ;
var _abeac [ ] * list ; for _ , _acbdd := range _ebba { _ffdg := _egddb ( _acbdd ) ; _abeac = append ( _abeac , _ffdg ... ) ; } ; return _abeac ; } ;
2023-10-07 13:58:01 +00:00
2023-11-11 11:29:03 +00:00
// String returns a string describing `ma`.
func ( _bdb TextMarkArray ) String ( ) string { _bdae := len ( _bdb . _gcc ) ; if _bdae == 0 { return "\u0045\u004d\u0050T\u0059" ; } ; _dgaf := _bdb . _gcc [ 0 ] ; _fcd := _bdb . _gcc [ _bdae - 1 ] ; return _gde . Sprintf ( "\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d" , _bdae , _dgaf , _fcd ) ;
} ; func ( _fbdfc * textObject ) setCharSpacing ( _bgb float64 ) { if _fbdfc == nil { return ; } ; _fbdfc . _abcb . _aaec = _bgb ; if _adbb { _ac . Log . Info ( "\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073" , _bgb , _fbdfc . _abcb . String ( ) ) ;
} ; } ; func _abcc ( _ggfc [ ] structElement , _begec map [ int ] [ ] * textLine , _acef _dce . PdfObject ) [ ] * list { _ggdd := [ ] * list { } ; for _ , _cgcf := range _ggfc { _ecce := _cgcf . _dfcd ; _ecad := int ( _cgcf . _ecaf ) ; _faff := _cgcf . _affb ; _bfab := [ ] * textLine { } ; _feeb := [ ] * list { } ;
_gbbdc := _cgcf . _ebad ; _bgff , _cbgb := ( _gbbdc . ( * _dce . PdfObjectReference ) ) ; if ! _cbgb { _ac . Log . Debug ( "\u0066\u0061\u0069l\u0065\u0064\u0020\u006f\u0074\u0020\u0063\u0061\u0073\u0074\u0020\u0074\u006f\u0020\u002a\u0063\u006f\u0072\u0065\u002e\u0050\u0064\u0066\u004f\u0062\u006a\u0065\u0063\u0074R\u0065\u0066\u0065\u0072\u0065\u006e\u0063\u0065" ) ;
} ; if _ecad != - 1 && _bgff != nil { if _bcf , _daeff := _begec [ _ecad ] ; _daeff { if _cfff , _fdgd := _acef . ( * _dce . PdfIndirectObject ) ; _fdgd { _aceb := _cfff . PdfObjectReference ; if _b . DeepEqual ( * _bgff , _aceb ) { _bfab = _bcf ; } ; } ; } ; } ; if _ecce != nil { _feeb = _abcc ( _ecce , _begec , _acef ) ;
} ; _cfgb := _bddc ( _bfab , _faff , _feeb ) ; _ggdd = append ( _ggdd , _cfgb ) ; } ; return _ggdd ; } ; func ( _aedc * textObject ) setTextLeading ( _fbgc float64 ) { if _aedc == nil { return ; } ; _aedc . _abcb . _gcg = _fbgc ; } ; func ( _dgag * PageText ) computeViews ( ) { _dade := _dgag . getParagraphs ( ) ;
_gadf := new ( _dfg . Buffer ) ; _dade . writeText ( _gadf ) ; _dgag . _edee = _gadf . String ( ) ; _dgag . _fdaf = _dade . toTextMarks ( ) ; _dgag . _agdc = _dade . tables ( ) ; if _eadb { _ac . Log . Info ( "\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064" , len ( _dgag . _agdc ) ) ;
} ; } ; func _caa ( _ffee * _dcg . ContentStreamOperation ) ( float64 , error ) { if len ( _ffee . Params ) != 1 { _acbb := _a . New ( "\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et" ) ; _ac . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076" , _ffee . Operand , 1 , len ( _ffee . Params ) , _ffee . Params ) ;
return 0.0 , _acbb ; } ; return _dce . GetNumberAsFloat ( _ffee . Params [ 0 ] ) ; } ; func _bafcd ( _cdcff , _gefbd int ) uint64 { return uint64 ( _cdcff ) * 0x1000000 + uint64 ( _gefbd ) } ; func ( _cgfcc rulingList ) mergePrimary ( ) float64 { _abcaa := _cgfcc [ 0 ] . _cbag ; for _ , _agdab := range _cgfcc [ 1 : ] { _abcaa += _agdab . _cbag ;
} ; return _abcaa / float64 ( len ( _cgfcc ) ) ; } ;
2023-05-29 17:26:33 +00:00
2023-09-07 17:40:17 +00:00
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
2023-11-11 11:29:03 +00:00
type RenderMode int ; func ( _ggfab lineRuling ) xMean ( ) float64 { return 0.5 * ( _ggfab . _cged . X + _ggfab . _ggeaa . X ) } ; func _daae ( _edbg [ ] TextMark , _gfef * TextTable ) [ ] TextMark { var _ecgf [ ] TextMark ; for _ , _egefc := range _edbg { _egefc . _dgbe = true ; _egefc . _acbcf = _gfef ;
_ecgf = append ( _ecgf , _egefc ) ; } ; return _ecgf ; } ; func ( _agab * wordBag ) removeDuplicates ( ) { if _bfcf { _ac . Log . Info ( "r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071" , _agab . text ( ) ) ; } ; for _ , _gbfc := range _agab . depthIndexes ( ) { if len ( _agab . _aac [ _gbfc ] ) == 0 { continue ;
} ; _agdg := _agab . _aac [ _gbfc ] [ 0 ] ; _fbdc := _bfdb * _agdg . _adecc ; _ebgcb := _agdg . _adgge ; for _ , _abae := range _agab . depthBand ( _ebgcb , _ebgcb + _fbdc ) { _aggf := map [ * textWord ] struct { } { } ; _gfbe := _agab . _aac [ _abae ] ; for _ , _aabe := range _gfbe { if _ , _abff := _aggf [ _aabe ] ;
_abff { continue ; } ; for _ , _eggb := range _gfbe { if _ , _dded := _aggf [ _eggb ] ; _dded { continue ; } ; if _eggb != _aabe && _eggb . _fedgb == _aabe . _fedgb && _dc . Abs ( _eggb . Llx - _aabe . Llx ) < _fbdc && _dc . Abs ( _eggb . Urx - _aabe . Urx ) < _fbdc && _dc . Abs ( _eggb . Lly - _aabe . Lly ) < _fbdc && _dc . Abs ( _eggb . Ury - _aabe . Ury ) < _fbdc { _aggf [ _eggb ] = struct { } { } ;
} ; } ; } ; if len ( _aggf ) > 0 { _faadg := 0 ; for _ , _gccb := range _gfbe { if _ , _dbaag := _aggf [ _gccb ] ; ! _dbaag { _gfbe [ _faadg ] = _gccb ; _faadg ++ ; } ; } ; _agab . _aac [ _abae ] = _gfbe [ : len ( _gfbe ) - len ( _aggf ) ] ; if len ( _agab . _aac [ _abae ] ) == 0 { delete ( _agab . _aac , _abae ) ;
} ; } ; } ; } ; } ; func _gced ( _dgdff [ ] * textWord , _gbaac * textWord ) [ ] * textWord { for _bbaf , _adfc := range _dgdff { if _adfc == _gbaac { return _ebcgb ( _dgdff , _bbaf ) ; } ; } ; _ac . Log . Error ( "\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073" , _gbaac ) ;
return nil ; } ; func ( _efg * Extractor ) extractPageText ( _cbc string , _adb * _fg . PdfPageResources , _ageg _dca . Matrix , _cag int ) ( * PageText , int , int , error ) { _ac . Log . Trace ( "\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d" , _cag ) ;
_gdg := & PageText { _fgag : _efg . _aa , _cabf : _efg . _agb , _gefc : _efg . _ce } ; _fdg := _eab ( _efg . _aa ) ; var _dcdd stateStack ; _ddc := _eded ( _efg , _adb , _dcg . GraphicsState { } , & _fdg , & _dcdd ) ; _afg := shapesState { _dged : _ageg , _gebd : _dca . IdentityMatrix ( ) , _cfda : _ddc } ;
var _bgc bool ; _gee := - 1 ; if _cag > _deg { _edcf := _a . New ( "\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077" ) ; _ac . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076" , _cag , _edcf ) ;
return _gdg , _fdg . _ddcd , _fdg . _dcad , _edcf ; } ; _bdc := _dcg . NewContentStreamParser ( _cbc ) ; _efc , _bae := _bdc . Parse ( ) ; if _bae != nil { _ac . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bae ) ;
return _gdg , _fdg . _ddcd , _fdg . _dcad , _bae ; } ; _gdg . _eaee = _efc ; _aed := _dcg . NewContentStreamProcessor ( * _efc ) ; _aed . AddHandler ( _dcg . HandlerConditionEnumAllOperands , "" , func ( _gaa * _dcg . ContentStreamOperation , _gdef _dcg . GraphicsState , _adf * _fg . PdfPageResources ) error { _ebc := _gaa . Operand ;
if _faee { _ac . Log . Info ( "\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s" , _gaa ) ; } ; switch _ebc { case "\u0071" : if _bdefa { _ac . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _afg . _gebd ) ; } ; _dcdd . push ( & _fdg ) ; case "\u0051" : if ! _dcdd . empty ( ) { _fdg = * _dcdd . pop ( ) ;
} ; _afg . _gebd = _gdef . CTM ; if _bdefa { _ac . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _afg . _gebd ) ; } ; case "\u0042\u0044\u0043" : _gc , _fda := _dce . GetDict ( _gaa . Params [ 1 ] ) ; if ! _fda { _ac . Log . Debug ( "\u0045\u0052\u0052O\u0052\u003a\u0020\u0042D\u0043\u0020\u006f\u0070\u003d\u0025\u0073 \u0047\u0065\u0074\u0044\u0069\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _gaa ) ;
return _bae ; } ; _egd := _gc . Get ( "\u004d\u0043\u0049\u0044" ) ; if _egd != nil { _adg , _ecg := _dce . GetIntVal ( _egd ) ; if ! _ecg { _ac . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u0042\u0044C\u0020\u006f\u0070=\u0025\u0073\u002e\u0020\u0042\u0061\u0064\u0020\u006eum\u0065\u0072\u0069c\u0061\u006c \u006f\u0062\u006a\u0065\u0063\u0074.\u0020\u006f=\u0025\u0073" , _gaa , _egd ) ;
} ; _gee = _adg ; } else { _gee = - 1 ; } ; case "\u0045\u004d\u0043" : _gee = - 1 ; case "\u0042\u0054" : if _bgc { _ac . Log . Debug ( "\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074" ) ;
_gdg . _fbga = append ( _gdg . _fbga , _ddc . _ffc ... ) ; } ; _bgc = true ; _adgc := _gdef ; _adgc . CTM = _ageg . Mult ( _adgc . CTM ) ; _ddc = _eded ( _efg , _adf , _adgc , & _fdg , & _dcdd ) ; _afg . _cfda = _ddc ; case "\u0045\u0054" : if ! _bgc { _ac . Log . Debug ( "\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074" ) ;
} ; _bgc = false ; _gdg . _fbga = append ( _gdg . _fbga , _ddc . _ffc ... ) ; _ddc . reset ( ) ; case "\u0054\u002a" : _ddc . nextLine ( ) ; case "\u0054\u0064" : if _dff , _dcb := _ddc . checkOp ( _gaa , 2 , true ) ; ! _dff { _ac . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _dcb ) ;
return _dcb ; } ; _cgff , _fee , _dfba := _ecgcg ( _gaa . Params ) ; if _dfba != nil { return _dfba ; } ; _ddc . moveText ( _cgff , _fee ) ; case "\u0054\u0044" : if _acdg , _dad := _ddc . checkOp ( _gaa , 2 , true ) ; ! _acdg { _ac . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _dad ) ;
return _dad ; } ; _cfed , _edaf , _fga := _ecgcg ( _gaa . Params ) ; if _fga != nil { _ac . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fga ) ; return _fga ; } ; _ddc . moveTextSetLeading ( _cfed , _edaf ) ; case "\u0054\u006a" : if _ceac , _efe := _ddc . checkOp ( _gaa , 1 , true ) ;
! _ceac { _ac . Log . Debug ( "\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076" , _gaa , _efe ) ; return _efe ; } ; _ecab := _dce . TraceToDirectObject ( _gaa . Params [ 0 ] ) ; _dee , _faa := _dce . GetStringBytes ( _ecab ) ;
if ! _faa { _ac . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064" , _gaa ) ; return _dce . ErrTypeError ;
} ; return _ddc . showText ( _ecab , _dee , _gee ) ; case "\u0054\u004a" : if _fbe , _bdeg := _ddc . checkOp ( _gaa , 1 , true ) ; ! _fbe { _ac . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bdeg ) ; return _bdeg ; } ; _cba , _agbb := _dce . GetArray ( _gaa . Params [ 0 ] ) ;
if ! _agbb { _ac . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _gaa ) ; return _bae ; } ; return _ddc . showTextAdjusted ( _cba , _gee ) ;
case "\u0027" : if _gbcf , _gbad := _ddc . checkOp ( _gaa , 1 , true ) ; ! _gbcf { _ac . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gbad ) ; return _gbad ; } ; _ecbc := _dce . TraceToDirectObject ( _gaa . Params [ 0 ] ) ; _bda , _gea := _dce . GetStringBytes ( _ecbc ) ;
if ! _gea { _ac . Log . Debug ( "\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064" , _gaa ) ; return _dce . ErrTypeError ; } ; _ddc . nextLine ( ) ; return _ddc . showText ( _ecbc , _bda , _gee ) ;
case "\u0022" : if _gfd , _fad := _ddc . checkOp ( _gaa , 3 , true ) ; ! _gfd { _ac . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fad ) ; return _fad ; } ; _dafc , _gfb , _fgg := _ecgcg ( _gaa . Params [ : 2 ] ) ; if _fgg != nil { return _fgg ;
} ; _bab := _dce . TraceToDirectObject ( _gaa . Params [ 2 ] ) ; _cfb , _cbe := _dce . GetStringBytes ( _bab ) ; if ! _cbe { _ac . Log . Debug ( "\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064" , _gaa ) ;
return _dce . ErrTypeError ; } ; _ddc . setCharSpacing ( _dafc ) ; _ddc . setWordSpacing ( _gfb ) ; _ddc . nextLine ( ) ; return _ddc . showText ( _bab , _cfb , _gee ) ; case "\u0054\u004c" : _gec , _gag := _caa ( _gaa ) ; if _gag != nil { _ac . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gag ) ;
return _gag ; } ; _ddc . setTextLeading ( _gec ) ; case "\u0054\u0063" : _eac , _aage := _caa ( _gaa ) ; if _aage != nil { _ac . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _aage ) ; return _aage ; } ; _ddc . setCharSpacing ( _eac ) ;
case "\u0054\u0066" : if _beg , _gef := _ddc . checkOp ( _gaa , 2 , true ) ; ! _beg { _ac . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gef ) ; return _gef ; } ; _gfba , _cce := _dce . GetNameVal ( _gaa . Params [ 0 ] ) ; if ! _cce { _ac . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064" , _gaa ) ;
return _dce . ErrTypeError ; } ; _eed , _gge := _dce . GetNumberAsFloat ( _gaa . Params [ 1 ] ) ; if ! _cce { _ac . Log . Debug ( "\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gaa , _gge ) ;
return _gge ; } ; _gge = _ddc . setFont ( _gfba , _eed ) ; _ddc . _gbf = _gdf . Is ( _gge , _dce . ErrNotSupported ) ; if _gge != nil && ! _ddc . _gbf { return _gge ; } ; case "\u0054\u006d" : if _defb , _agg := _ddc . checkOp ( _gaa , 6 , true ) ; ! _defb { _ac . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _agg ) ;
return _agg ; } ; _ead , _aba := _dce . GetNumbersAsFloat ( _gaa . Params ) ; if _aba != nil { _ac . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _aba ) ; return _aba ; } ; _ddc . setTextMatrix ( _ead ) ; case "\u0054\u0072" : if _gcb , _cgb := _ddc . checkOp ( _gaa , 1 , true ) ;
! _gcb { _ac . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _cgb ) ; return _cgb ; } ; _cbgg , _fed := _dce . GetIntVal ( _gaa . Params [ 0 ] ) ; if ! _fed { _ac . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _gaa ) ;
return _dce . ErrTypeError ; } ; _ddc . setTextRenderMode ( _cbgg ) ; case "\u0054\u0073" : if _fbdf , _fbf := _ddc . checkOp ( _gaa , 1 , true ) ; ! _fbdf { _ac . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fbf ) ; return _fbf ;
} ; _dbaa , _ggc := _dce . GetNumberAsFloat ( _gaa . Params [ 0 ] ) ; if _ggc != nil { _ac . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ggc ) ; return _ggc ; } ; _ddc . setTextRise ( _dbaa ) ; case "\u0054\u0077" : if _adfa , _bea := _ddc . checkOp ( _gaa , 1 , true ) ;
! _adfa { _ac . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bea ) ; return _bea ; } ; _fge , _gca := _dce . GetNumberAsFloat ( _gaa . Params [ 0 ] ) ; if _gca != nil { _ac . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gca ) ;
return _gca ; } ; _ddc . setWordSpacing ( _fge ) ; case "\u0054\u007a" : if _agegb , _ebf := _ddc . checkOp ( _gaa , 1 , true ) ; ! _agegb { _ac . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ebf ) ; return _ebf ; } ; _dfe , _fbgf := _dce . GetNumberAsFloat ( _gaa . Params [ 0 ] ) ;
if _fbgf != nil { _ac . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fbgf ) ; return _fbgf ; } ; _ddc . setHorizScaling ( _dfe ) ; case "\u0063\u006d" : _afg . _gebd = _gdef . CTM ; if _afg . _gebd . Singular ( ) { _aedg := _dca . IdentityMatrix ( ) . Translate ( _afg . _gebd . Translation ( ) ) ;
_ac . Log . Debug ( "S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s" , _afg . _gebd , _aedg ) ; _afg . _gebd = _aedg ; } ; if _bdefa { _ac . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _afg . _gebd ) ; } ; case "\u006d" : if len ( _gaa . Params ) != 2 { _ac . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" , _efa ) ;
return nil ; } ; _gfc , _eace := _dce . GetNumbersAsFloat ( _gaa . Params ) ; if _eace != nil { return _eace ; } ; _afg . moveTo ( _gfc [ 0 ] , _gfc [ 1 ] ) ; case "\u006c" : if len ( _gaa . Params ) != 2 { _ac . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" , _efa ) ;
return nil ; } ; _gab , _bbf := _dce . GetNumbersAsFloat ( _gaa . Params ) ; if _bbf != nil { return _bbf ; } ; _afg . lineTo ( _gab [ 0 ] , _gab [ 1 ] ) ; case "\u0063" : if len ( _gaa . Params ) != 6 { return _efa ; } ; _bdcf , _dcee := _dce . GetNumbersAsFloat ( _gaa . Params ) ; if _dcee != nil { return _dcee ;
} ; _ac . Log . Debug ( "\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f" , _bdcf ) ; _afg . cubicTo ( _bdcf [ 0 ] , _bdcf [ 1 ] , _bdcf [ 2 ] , _bdcf [ 3 ] , _bdcf [ 4 ] , _bdcf [ 5 ] ) ; case "\u0076" , "\u0079" : if len ( _gaa . Params ) != 4 { return _efa ;
} ; _ecd , _dbg := _dce . GetNumbersAsFloat ( _gaa . Params ) ; if _dbg != nil { return _dbg ; } ; _ac . Log . Debug ( "\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f" , _ecd ) ; _afg . quadraticTo ( _ecd [ 0 ] , _ecd [ 1 ] , _ecd [ 2 ] , _ecd [ 3 ] ) ;
case "\u0068" : _afg . closePath ( ) ; case "\u0072\u0065" : if len ( _gaa . Params ) != 4 { return _efa ; } ; _fbgfa , _cgc := _dce . GetNumbersAsFloat ( _gaa . Params ) ; if _cgc != nil { return _cgc ; } ; _afg . drawRectangle ( _fbgfa [ 0 ] , _fbgfa [ 1 ] , _fbgfa [ 2 ] , _fbgfa [ 3 ] ) ; _afg . closePath ( ) ;
case "\u0053" : _afg . stroke ( & _gdg . _ffge ) ; _afg . clearPath ( ) ; case "\u0073" : _afg . closePath ( ) ; _afg . stroke ( & _gdg . _ffge ) ; _afg . clearPath ( ) ; case "\u0046" : _afg . fill ( & _gdg . _cedf ) ; _afg . clearPath ( ) ; case "\u0066" , "\u0066\u002a" : _afg . closePath ( ) ; _afg . fill ( & _gdg . _cedf ) ;
_afg . clearPath ( ) ; case "\u0042" , "\u0042\u002a" : _afg . fill ( & _gdg . _cedf ) ; _afg . stroke ( & _gdg . _ffge ) ; _afg . clearPath ( ) ; case "\u0062" , "\u0062\u002a" : _afg . closePath ( ) ; _afg . fill ( & _gdg . _cedf ) ; _afg . stroke ( & _gdg . _ffge ) ; _afg . clearPath ( ) ; case "\u006e" : _afg . clearPath ( ) ;
case "\u0044\u006f" : if len ( _gaa . Params ) == 0 { _ac . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e" , _gaa . Params ) ;
return _dce . ErrRangeError ; } ; _cgaf , _bba := _dce . GetName ( _gaa . Params [ 0 ] ) ; if ! _bba { _ac . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e" , _gaa . Params [ 0 ] ) ;
return _dce . ErrTypeError ; } ; _ , _eef := _adf . GetXObjectByName ( * _cgaf ) ; if _eef != _fg . XObjectTypeForm { break ; } ; _ccce , _bba := _efg . _bca [ _cgaf . String ( ) ] ; if ! _bba { _eagb , _fded := _adf . GetXObjectFormByName ( * _cgaf ) ; if _fded != nil { _ac . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _fded ) ;
return _fded ; } ; _afd , _fded := _eagb . GetContentStream ( ) ; if _fded != nil { _ac . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _fded ) ; return _fded ; } ; _gbbd := _eagb . Resources ; if _gbbd == nil { _gbbd = _adf ; } ; _dgb := _gdef . CTM ; if _eccg , _egg := _dce . GetArray ( _eagb . Matrix ) ;
_egg { _ffeg , _dacf := _eccg . GetAsFloat64Slice ( ) ; if _dacf != nil { return _dacf ; } ; if len ( _ffeg ) != 6 { return _efa ; } ; _edad := _dca . NewMatrix ( _ffeg [ 0 ] , _ffeg [ 1 ] , _ffeg [ 2 ] , _ffeg [ 3 ] , _ffeg [ 4 ] , _ffeg [ 5 ] ) ; _dgb = _gdef . CTM . Mult ( _edad ) ; } ; _cca , _bcaf , _cedcc , _fded := _efg . extractPageText ( string ( _afd ) , _gbbd , _ageg . Mult ( _dgb ) , _cag + 1 ) ;
if _fded != nil { _ac . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _fded ) ; return _fded ; } ; _ccce = textResult { * _cca , _bcaf , _cedcc } ; _efg . _bca [ _cgaf . String ( ) ] = _ccce ; } ; _afg . _gebd = _gdef . CTM ; if _bdefa { _ac . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _afg . _gebd ) ;
} ; _gdg . _fbga = append ( _gdg . _fbga , _ccce . _ecca . _fbga ... ) ; _gdg . _ffge = append ( _gdg . _ffge , _ccce . _ecca . _ffge ... ) ; _gdg . _cedf = append ( _gdg . _cedf , _ccce . _ecca . _cedf ... ) ; _fdg . _ddcd += _ccce . _bbag ; _fdg . _dcad += _ccce . _ffg ; case "\u0072\u0067" , "\u0067" , "\u006b" , "\u0063\u0073" , "\u0073\u0063" , "\u0073\u0063\u006e" : _ddc . _dbf . ColorspaceNonStroking = _gdef . ColorspaceNonStroking ;
_ddc . _dbf . ColorNonStroking = _gdef . ColorNonStroking ; case "\u0052\u0047" , "\u0047" , "\u004b" , "\u0043\u0053" , "\u0053\u0043" , "\u0053\u0043\u004e" : _ddc . _dbf . ColorspaceStroking = _gdef . ColorspaceStroking ; _ddc . _dbf . ColorStroking = _gdef . ColorStroking ;
} ; return nil ; } ) ; _bae = _aed . Process ( _adb ) ; return _gdg , _fdg . _ddcd , _fdg . _dcad , _bae ; } ; func ( _dddb * textLine ) text ( ) string { var _cdbb [ ] string ; for _ , _daef := range _dddb . _fgbe { if _daef . _eadcb { _cdbb = append ( _cdbb , "\u0020" ) ; } ; _cdbb = append ( _cdbb , _daef . _fedgb ) ;
} ; return _df . Join ( _cdbb , "" ) ; } ;
2023-10-07 13:58:01 +00:00
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
2023-11-11 11:29:03 +00:00
func ( _gdd * TextMarkArray ) BBox ( ) ( _fg . PdfRectangle , bool ) { var _feec _fg . PdfRectangle ; _abbd := false ; for _ , _aggg := range _gdd . _gcc { if _aggg . Meta || _cagg ( _aggg . Text ) { continue ; } ; if _abbd { _feec = _ebge ( _feec , _aggg . BBox ) ; } else { _feec = _aggg . BBox ;
_abbd = true ; } ; } ; return _feec , _abbd ; } ; func ( _cbabg rectRuling ) checkWidth ( _fcba , _bebc float64 ) ( float64 , bool ) { _bbgf := _bebc - _fcba ; _cgeacc := _bbgf <= _gbgf ; return _bbgf , _cgeacc ; } ; func _egddb ( _gggde * list ) [ ] * list { var _abbdf [ ] * list ; for _ , _begg := range _gggde . _edge { switch _begg . _aeaa { case "\u004c\u0049" : _dfeg := _dgcc ( _begg ) ;
_eacce := _egddb ( _begg ) ; _ddbdc := _bddc ( _dfeg , "\u0062\u0075\u006c\u006c\u0065\u0074" , _eacce ) ; _feba := _gadcf ( _dfeg , "" ) ; _ddbdc . _fbfaf = _feba ; _abbdf = append ( _abbdf , _ddbdc ) ; case "\u004c\u0042\u006fd\u0079" : return _egddb ( _begg ) ; case "\u004c" : _gefg := _egddb ( _begg ) ;
_abbdf = append ( _abbdf , _gefg ... ) ; return _abbdf ; } ; } ; return _abbdf ; } ; func ( _fcfc paraList ) applyTables ( _dfcca [ ] * textTable ) paraList { var _bbgc paraList ; for _ , _fbgdb := range _dfcca { _bbgc = append ( _bbgc , _fbgdb . newTablePara ( ) ) ; } ; for _ , _egacbf := range _fcfc { if _egacbf . _dcada { continue ;
} ; _bbgc = append ( _bbgc , _egacbf ) ; } ; return _bbgc ; } ; func ( _baae rulingList ) log ( _geec string ) { if ! _aebg { return ; } ; _ac . Log . Info ( "\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073" , _geec , _baae . String ( ) ) ;
for _gcbe , _ebdc := range _baae { _gde . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _gcbe , _ebdc . String ( ) ) ; } ; } ;
2023-10-07 13:58:01 +00:00
2023-11-11 11:29:03 +00:00
// Append appends `mark` to the mark array.
func ( _cgcd * TextMarkArray ) Append ( mark TextMark ) { _cgcd . _gcc = append ( _cgcd . _gcc , mark ) } ; func ( _agf * subpath ) add ( _dgge ... _dca . Point ) { _agf . _gdgd = append ( _agf . _gdgd , _dgge ... ) } ; func _gfdg ( _bdcfb * textLine ) bool { _fdfa := true ; _abad := - 1 ; for _ , _eacg := range _bdcfb . _fgbe { for _ , _cee := range _eacg . _daafd { _caga := _cee . _fbcf ;
if _abad == - 1 { _abad = _caga ; } else { if _abad != _caga { _fdfa = false ; break ; } ; } ; } ; } ; return _fdfa ; } ; func _ddfff ( _cdda [ ] * textWord , _aff float64 , _eeea , _bbcb rulingList ) * wordBag { _dada := _egfaa ( _cdda [ 0 ] , _aff , _eeea , _bbcb ) ; for _ , _edgbe := range _cdda [ 1 : ] { _bbcdb := _ebaf ( _edgbe . _adgge ) ;
_dada . _aac [ _bbcdb ] = append ( _dada . _aac [ _bbcdb ] , _edgbe ) ; _dada . PdfRectangle = _ebge ( _dada . PdfRectangle , _edgbe . PdfRectangle ) ; } ; _dada . sort ( ) ; return _dada ; } ;
2023-10-07 13:58:01 +00:00
2023-11-11 11:29:03 +00:00
// PageTextOptions holds various options available in extraction process.
type PageTextOptions struct { _dbcd bool ; _dcc bool ; } ; func ( _abddc gridTile ) complete ( ) bool { return _abddc . numBorders ( ) == 4 } ; var _bfaaa = _gd . MustCompile ( "\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024" ) ;
const ( _bafg = 1.0e-6 ; _egac = 1.0e-4 ; _edef = 10 ; _ddgf = 6 ; _dgfb = 0.5 ; _ffca = 0.12 ; _faaf = 0.19 ; _cded = 0.04 ; _gebab = 0.04 ; _ggce = 1.0 ; _aebe = 0.04 ; _afdf = 0.4 ; _adca = 0.7 ; _bgdf = 1.0 ; _ggfbd = 0.1 ; _acbbe = 1.4 ; _eedd = 0.46 ; _aeeg = 0.02 ; _bfdb = 0.2 ; _abbba = 0.5 ; _ggg = 4 ; _eega = 4.0 ;
_ffa = 6 ; _eabd = 0.3 ; _bdag = 0.01 ; _aec = 0.02 ; _fbff = 2 ; _gaab = 2 ; _bcccg = 500 ; _daee = 4.0 ; _ccbf = 4.0 ; _edec = 0.05 ; _cfca = 0.1 ; _cebe = 2.0 ; _gbgf = 2.0 ; _gagd = 1.5 ; _ddba = 3.0 ; _egab = 0.25 ; ) ; func ( _bbb * imageExtractContext ) processOperand ( _eae * _dcg . ContentStreamOperation , _ecc _dcg . GraphicsState , _dac * _fg . PdfPageResources ) error { if _eae . Operand == "\u0042\u0049" && len ( _eae . Params ) == 1 { _edb , _ee := _eae . Params [ 0 ] . ( * _dcg . ContentStreamInlineImage ) ;
if ! _ee { return nil ; } ; if _cgd , _baf := _dce . GetBoolVal ( _edb . ImageMask ) ; _baf { if _cgd && ! _bbb . _ebac . IncludeInlineStencilMasks { return nil ; } ; } ; return _bbb . extractInlineImage ( _edb , _ecc , _dac ) ; } else if _eae . Operand == "\u0044\u006f" && len ( _eae . Params ) == 1 { _dba , _acd := _dce . GetName ( _eae . Params [ 0 ] ) ;
if ! _acd { _ac . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065" ) ; return _ge ; } ; _ , _gbc := _dac . GetXObjectByName ( * _dba ) ; switch _gbc { case _fg . XObjectTypeImage : return _bbb . extractXObjectImage ( _dba , _ecc , _dac ) ; case _fg . XObjectTypeForm : return _bbb . extractFormImages ( _dba , _ecc , _dac ) ;
} ; } else if _bbb . _defe && ( _eae . Operand == "\u0073\u0063\u006e" || _eae . Operand == "\u0053\u0043\u004e" ) && len ( _eae . Params ) == 1 { _ede , _bfd := _dce . GetName ( _eae . Params [ 0 ] ) ; if ! _bfd { _ac . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065" ) ;
return _ge ; } ; _daf , _bfd := _dac . GetPatternByName ( * _ede ) ; if ! _bfd { _ac . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0050\u0061\u0074\u0074\u0065\u0072n\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075\u006e\u0064" ) ; return nil ; } ; if _daf . IsTiling ( ) { _aagf := _daf . GetAsTilingPattern ( ) ;
_ded , _acb := _aagf . GetContentStream ( ) ; if _acb != nil { return _acb ; } ; _acb = _bbb . extractContentStreamImages ( string ( _ded ) , _aagf . Resources ) ; if _acb != nil { return _acb ; } ; } ; } else if ( _eae . Operand == "\u0063\u0073" || _eae . Operand == "\u0043\u0053" ) && len ( _eae . Params ) >= 1 { _bbb . _defe = _eae . Params [ 0 ] . String ( ) == "\u0050a\u0074\u0074\u0065\u0072\u006e" ;
} ; return nil ; } ; func ( _eeac * wordBag ) scanBand ( _eddd string , _aegc * wordBag , _accf func ( _dadcd * wordBag , _gecb * textWord ) bool , _cebc , _bcafc , _gddb float64 , _fead , _eaf bool ) int { _ddbf := _aegc . _fab ; var _eedg map [ int ] map [ * textWord ] struct { } ; if ! _fead { _eedg = _eeac . makeRemovals ( ) ;
} ; _gedb := _dgfb * _ddbf ; _cfeb := 0 ; for _ , _geea := range _eeac . depthBand ( _cebc - _gedb , _bcafc + _gedb ) { if len ( _eeac . _aac [ _geea ] ) == 0 { continue ; } ; for _ , _cabfa := range _eeac . _aac [ _geea ] { if ! ( _cebc - _gedb <= _cabfa . _adgge && _cabfa . _adgge <= _bcafc + _gedb ) { continue ;
} ; if ! _accf ( _aegc , _cabfa ) { continue ; } ; _gcab := 2.0 * _dc . Abs ( _cabfa . _adecc - _aegc . _fab ) / ( _cabfa . _adecc + _aegc . _fab ) ; _gbaa := _dc . Max ( _cabfa . _adecc / _aegc . _fab , _aegc . _fab / _cabfa . _adecc ) ; _gbdf := _dc . Min ( _gcab , _gbaa ) ; if _gddb > 0 && _gbdf > _gddb { continue ;
} ; if _aegc . blocked ( _cabfa ) { continue ; } ; if ! _fead { _aegc . pullWord ( _cabfa , _geea , _eedg ) ; } ; _cfeb ++ ; if ! _eaf { if _cabfa . _adgge < _cebc { _cebc = _cabfa . _adgge ; } ; if _cabfa . _adgge > _bcafc { _bcafc = _cabfa . _adgge ; } ; } ; if _fead { break ; } ; } ; } ; if ! _fead { _eeac . applyRemovals ( _eedg ) ;
} ; return _cfeb ; } ; func _ecgcg ( _ebce [ ] _dce . PdfObject ) ( _eefa , _edgbbe float64 , _ggfcc error ) { if len ( _ebce ) != 2 { return 0 , 0 , _gde . Errorf ( "\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064" , len ( _ebce ) ) ;
} ; _abec , _ggfcc := _dce . GetNumbersAsFloat ( _ebce ) ; if _ggfcc != nil { return 0 , 0 , _ggfcc ; } ; return _abec [ 0 ] , _abec [ 1 ] , nil ; } ; func ( _acbbb * shapesState ) stroke ( _fcfg * [ ] pathSection ) { _dddc := pathSection { _ged : _acbbb . _efb , Color : _acbbb . _cfda . getStrokeColor ( ) } ;
* _fcfg = append ( * _fcfg , _dddc ) ; if _aebg { _gde . Printf ( "\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , len ( * _fcfg ) , _acbbb , _acbbb . _cfda . getStrokeColor ( ) , _dddc . bbox ( ) ) ;
if _aab { for _fgc , _bced := range _acbbb . _efb { _gde . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _fgc , _bced ) ; if _fgc == 10 { break ; } ; } ; } ; } ; } ; func _afbef ( _ebea int , _egeec func ( int , int ) bool ) [ ] int { _agaa := make ( [ ] int , _ebea ) ; for _cgddf := range _agaa { _agaa [ _cgddf ] = _cgddf ;
} ; _ab . Slice ( _agaa , func ( _agcce , _fbbad int ) bool { return _egeec ( _agaa [ _agcce ] , _agaa [ _fbbad ] ) } ) ; return _agaa ; } ; func ( _eedb paraList ) llyRange ( _baga [ ] int , _ddbfg , _cegb float64 ) [ ] int { _faec := len ( _eedb ) ; if _cegb < _eedb [ _baga [ 0 ] ] . Lly || _ddbfg > _eedb [ _baga [ _faec - 1 ] ] . Lly { return nil ;
} ; _ffb := _ab . Search ( _faec , func ( _eeb int ) bool { return _eedb [ _baga [ _eeb ] ] . Lly >= _ddbfg } ) ; _ebgc := _ab . Search ( _faec , func ( _beaf int ) bool { return _eedb [ _baga [ _beaf ] ] . Lly > _cegb } ) ; return _baga [ _ffb : _ebgc ] ; } ; func ( _cgcbcg rulingList ) sort ( ) { _ab . Slice ( _cgcbcg , _cgcbcg . comp ) } ;
func _decf ( _dfgc _dce . PdfObject , _abddcf _be . Color ) ( _e . Image , error ) { _dbfg , _bgafa := _dce . GetStream ( _dfgc ) ; if ! _bgafa { return nil , nil ; } ; _ggba , _edgg := _fg . NewXObjectImageFromStream ( _dbfg ) ; if _edgg != nil { return nil , _edgg ; } ; _deff , _edgg := _ggba . ToImage ( ) ;
if _edgg != nil { return nil , _edgg ; } ; return _dcfd ( _deff , _abddcf ) , nil ; } ; func _caba ( _eebc , _ebbd _dca . Point ) bool { _caff := _dc . Abs ( _eebc . X - _ebbd . X ) ; _abba := _dc . Abs ( _eebc . Y - _ebbd . Y ) ; return _edfa ( _caff , _abba ) ; } ; type textLine struct { _fg . PdfRectangle ;
_bfcg float64 ; _fgbe [ ] * textWord ; _ceacg float64 ; } ; func _dfebg ( _egdee * _fg . Image , _agaad _be . Color ) _e . Image { _dfeac , _fdefe := int ( _egdee . Width ) , int ( _egdee . Height ) ; _dbgf := _e . NewRGBA ( _e . Rect ( 0 , 0 , _dfeac , _fdefe ) ) ; for _abddb := 0 ; _abddb < _fdefe ;
_abddb ++ { for _adeegd := 0 ; _adeegd < _dfeac ; _adeegd ++ { _eccf , _egefcf := _egdee . ColorAt ( _adeegd , _abddb ) ; if _egefcf != nil { _ac . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063o\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0072\u0065\u0074\u0072\u0069\u0065v\u0065 \u0069\u006d\u0061\u0067\u0065\u0020m\u0061\u0073\u006b\u0020\u0076\u0061\u006cu\u0065\u0020\u0061\u0074\u0020\u0028\u0025\u0064\u002c\u0020\u0025\u0064\u0029\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006da\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063t\u002e" , _adeegd , _abddb ) ;
continue ; } ; _cbgagd , _ffafc , _gacfd , _ := _eccf . RGBA ( ) ; var _gfabf _be . Color ; if _cbgagd + _ffafc + _gacfd == 0 { _gfabf = _be . Transparent ; } else { _gfabf = _agaad ; } ; _dbgf . Set ( _adeegd , _abddb , _gfabf ) ; } ; } ; return _dbgf ; } ; func _cbgbf ( _ecfef map [ int ] [ ] float64 ) string { _ffff := _dfdf ( _ecfef ) ;
_agcaa := make ( [ ] string , len ( _ecfef ) ) ; for _fefbc , _bdda := range _ffff { _agcaa [ _fefbc ] = _gde . Sprintf ( "\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066" , _bdda , _ecfef [ _bdda ] ) ; } ; return _gde . Sprintf ( "\u007b\u0025\u0073\u007d" , _df . Join ( _agcaa , "\u002c\u0020" ) ) ;
2023-10-07 13:58:01 +00:00
} ;
2023-11-11 11:29:03 +00:00
// String returns a description of `p`.
func ( _fcgeb * textPara ) String ( ) string { if _fcgeb . _bfge { return _gde . Sprintf ( "\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d" , _fcgeb . PdfRectangle ) ; } ; _ebbed := "" ; if _fcgeb . _edce != nil { _ebbed = _gde . Sprintf ( "\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020" , _fcgeb . _edce . _acddc , _fcgeb . _edce . _gebeeb ) ;
} ; return _gde . Sprintf ( "\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071" , _fcgeb . PdfRectangle , _ebbed , len ( _fcgeb . _gfaae ) , _dbdbb ( _fcgeb . text ( ) , 50 ) ) ; } ; func _eafda ( _dfca _dce . PdfObject , _baea _be . Color ) ( _e . Image , error ) { _gacdb , _facge := _dce . GetStream ( _dfca ) ;
if ! _facge { return nil , nil ; } ; _ggecf , _cadaf := _fg . NewXObjectImageFromStream ( _gacdb ) ; if _cadaf != nil { return nil , _cadaf ; } ; _fffc , _cadaf := _ggecf . ToImage ( ) ; if _cadaf != nil { return nil , _cadaf ; } ; return _dfebg ( _fffc , _baea ) , nil ; } ; func ( _bgfe * ruling ) alignsSec ( _daac * ruling ) bool { const _cecgb = _gbgf + 1.0 ;
return _bgfe . _efgeb - _cecgb <= _daac . _bbge && _daac . _efgeb - _cecgb <= _bgfe . _bbge ; } ; func ( _gbefe * textTable ) putComposite ( _bdadb , _fgdb int , _fabe paraList , _fcgg _fg . PdfRectangle ) { if len ( _fabe ) == 0 { _ac . Log . Error ( "\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073" ) ;
return ; } ; _aabg := compositeCell { PdfRectangle : _fcgg , paraList : _fabe } ; if _eadb { _gde . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a" , _bdadb , _fgdb , _aabg . String ( ) ) ;
} ; _aabg . updateBBox ( ) ; _gbefe . _edbe [ _bafcd ( _bdadb , _fgdb ) ] = _aabg ; } ; func ( _abf * imageExtractContext ) extractInlineImage ( _bfad * _dcg . ContentStreamInlineImage , _cbgd _dcg . GraphicsState , _eda * _fg . PdfPageResources ) error { _edd , _fa := _bfad . ToImage ( _eda ) ;
if _fa != nil { return _fa ; } ; _ece , _fa := _bfad . GetColorSpace ( _eda ) ; if _fa != nil { return _fa ; } ; if _ece == nil { _ece = _fg . NewPdfColorspaceDeviceGray ( ) ; } ; _abc , _fa := _ece . ImageToRGB ( * _edd ) ; if _fa != nil { return _fa ; } ; _dab := ImageMark { Image : & _abc , Width : _cbgd . CTM . ScalingFactorX ( ) , Height : _cbgd . CTM . ScalingFactorY ( ) , Angle : _cbgd . CTM . Angle ( ) } ;
_dab . X , _dab . Y = _cbgd . CTM . Translation ( ) ; _abf . _dag = append ( _abf . _dag , _dab ) ; _abf . _cfc ++ ; return nil ; } ; func ( _eccef * textWord ) computeText ( ) string { _febgd := make ( [ ] string , len ( _eccef . _daafd ) ) ; for _ecgg , _gfec := range _eccef . _daafd { _febgd [ _ecgg ] = _gfec . _efgdc ;
} ; return _df . Join ( _febgd , "" ) ; } ; const ( _ccfb rulingKind = iota ; _faccd ; _cbab ; ) ; func ( _adgcc * textTable ) logComposite ( _gbcag string ) { if ! _eadb { return ; } ; _ac . Log . Info ( "\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073" , _adgcc . _acddc , _adgcc . _gebeeb , _gbcag ) ;
_gde . Printf ( "\u0025\u0035\u0073 \u007c" , "" ) ; for _cedd := 0 ; _cedd < _adgcc . _acddc ; _cedd ++ { _gde . Printf ( "\u0025\u0033\u0064 \u007c" , _cedd ) ; } ; _gde . Println ( "" ) ; _gde . Printf ( "\u0025\u0035\u0073 \u002b" , "" ) ; for _ceba := 0 ; _ceba < _adgcc . _acddc ; _ceba ++ { _gde . Printf ( "\u0025\u0033\u0073 \u002b" , "\u002d\u002d\u002d" ) ;
} ; _gde . Println ( "" ) ; for _eedda := 0 ; _eedda < _adgcc . _gebeeb ; _eedda ++ { _gde . Printf ( "\u0025\u0035\u0064 \u007c" , _eedda ) ; for _cbfg := 0 ; _cbfg < _adgcc . _acddc ; _cbfg ++ { _beaad , _ := _adgcc . _edbe [ _bafcd ( _cbfg , _eedda ) ] . parasBBox ( ) ; _gde . Printf ( "\u0025\u0033\u0064 \u007c" , len ( _beaad ) ) ;
} ; _gde . Println ( "" ) ; } ; _ac . Log . Info ( "\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073" , _adgcc . _acddc , _adgcc . _gebeeb , _gbcag ) ; _gde . Printf ( "\u0025\u0035\u0073 \u007c" , "" ) ; for _acgaa := 0 ; _acgaa < _adgcc . _acddc ;
_acgaa ++ { _gde . Printf ( "\u0025\u0031\u0032\u0064\u0020\u007c" , _acgaa ) ; } ; _gde . Println ( "" ) ; _gde . Printf ( "\u0025\u0035\u0073 \u002b" , "" ) ; for _egagc := 0 ; _egagc < _adgcc . _acddc ; _egagc ++ { _gde . Print ( "\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b" ) ;
} ; _gde . Println ( "" ) ; for _bfgfa := 0 ; _bfgfa < _adgcc . _gebeeb ; _bfgfa ++ { _gde . Printf ( "\u0025\u0035\u0064 \u007c" , _bfgfa ) ; for _dffdf := 0 ; _dffdf < _adgcc . _acddc ; _dffdf ++ { _eeegd , _ := _adgcc . _edbe [ _bafcd ( _dffdf , _bfgfa ) ] . parasBBox ( ) ; _beea := "" ; _aace := _eeegd . merge ( ) ;
if _aace != nil { _beea = _aace . text ( ) ; } ; _beea = _gde . Sprintf ( "\u0025\u0071" , _dbdbb ( _beea , 12 ) ) ; _beea = _beea [ 1 : len ( _beea ) - 1 ] ; _gde . Printf ( "\u0025\u0031\u0032\u0073\u0020\u007c" , _beea ) ; } ; _gde . Println ( "" ) ; } ; } ; func _ebeg ( _daad [ ] * textLine ) map [ float64 ] [ ] * textLine { _ab . Slice ( _daad , func ( _ecceb , _cdac int ) bool { return _daad [ _ecceb ] . _bfcg < _daad [ _cdac ] . _bfcg } ) ;
_aead := map [ float64 ] [ ] * textLine { } ; for _ , _fbcd := range _daad { _dggce := _aadf ( _fbcd ) ; _dggce = _dc . Round ( _dggce ) ; _aead [ _dggce ] = append ( _aead [ _dggce ] , _fbcd ) ; } ; return _aead ; } ; func _gcfc ( _edfbde [ ] compositeCell ) [ ] float64 { var _bcfd [ ] * textLine ;
_gfea := 0 ; for _ , _acgd := range _edfbde { _gfea += len ( _acgd . paraList ) ; _bcfd = append ( _bcfd , _acgd . lines ( ) ... ) ; } ; _ab . Slice ( _bcfd , func ( _edceg , _ccfbc int ) bool { _ecdf , _fbdca := _bcfd [ _edceg ] , _bcfd [ _ccfbc ] ; _ffagd , _bfcd := _ecdf . _bfcg , _fbdca . _bfcg ;
if ! _ebfaf ( _ffagd - _bfcd ) { return _ffagd < _bfcd ; } ; return _ecdf . Llx < _fbdca . Llx ; } ) ; if _eadb { _gde . Printf ( "\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a" , _gfea , len ( _bcfd ) ) ;
for _aefa , _aecb := range _bcfd { _gde . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _aefa , _aecb ) ; } ; } ; var _edfe [ ] float64 ; _edfca := _bcfd [ 0 ] ; var _cfebg [ ] [ ] * textLine ; _caac := [ ] * textLine { _edfca } ; for _faacd , _abbad := range _bcfd [ 1 : ] { if _abbad . Ury < _edfca . Lly { _abcdg := 0.5 * ( _abbad . Ury + _edfca . Lly ) ;
if _eadb { _gde . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a" + "\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a" , _faacd , _abbad . Ury , _edfca . Lly , _abcdg , _edfca , _abbad ) ;
} ; _edfe = append ( _edfe , _abcdg ) ; _cfebg = append ( _cfebg , _caac ) ; _caac = nil ; } ; _caac = append ( _caac , _abbad ) ; if _abbad . Lly < _edfca . Lly { _edfca = _abbad ; } ; } ; if len ( _caac ) > 0 { _cfebg = append ( _cfebg , _caac ) ; } ; if _eadb { _gde . Printf ( " \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a" , _edfe ) ;
} ; if _eadb { _ac . Log . Info ( "\u0072\u006f\u0077\u003d\u0025\u0064" , len ( _edfbde ) ) ; for _cdeb , _fcfae := range _edfbde { _gde . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _cdeb , _fcfae ) ; } ; _ac . Log . Info ( "\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d" , len ( _cfebg ) ) ;
for _aegb , _eedga := range _cfebg { _gde . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a" , _aegb , len ( _eedga ) ) ; for _gebc , _affbg := range _eedga { _gde . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _gebc , _affbg ) ; } ; } ; } ; _ggcg := true ;
for _fcab , _gdegd := range _cfebg { _bdfbb := true ; for _adde , _fdcf := range _edfbde { if _eadb { _gde . Printf ( "\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a" , _fcab , len ( _cfebg ) , _adde , len ( _edfbde ) , _fdcf ) ;
} ; if ! _fdcf . hasLines ( _gdegd ) { if _eadb { _gde . Printf ( "\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a" , _fcab , len ( _cfebg ) , _adde , len ( _edfbde ) ) ;
} ; _bdfbb = false ; break ; } ; } ; if ! _bdfbb { _ggcg = false ; break ; } ; } ; if ! _ggcg { if _eadb { _ac . Log . Info ( "\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg" ) ;
} ; _edfe = nil ; } ; if _eadb && _edfe != nil { _gde . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a" , _edfe ) ; } ; return _edfe ; } ;
func _cgffb ( _ggdf [ ] int ) [ ] int { _effda := make ( [ ] int , len ( _ggdf ) ) ; for _ebcg , _babe := range _ggdf { _effda [ len ( _ggdf ) - 1 - _ebcg ] = _babe ; } ; return _effda ; } ; var ( _ge = _a . New ( "\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072" ) ;
_efa = _a . New ( "\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072" ) ; ) ;
2023-10-07 13:58:01 +00:00
2023-11-11 11:29:03 +00:00
// List returns all the list objects detected on the page.
// It detects all the bullet point Lists from a given pdf page and builds a slice of bullet list objects.
// A given bullet list object has a tree structure.
// Each bullet point list is extracted with the text content it contains and all the sub lists found under it as children in the tree.
// The rest content of the pdf is ignored and only text in the bullet point lists are extracted.
// The list extraction is done in two ways.
// 1. If the document is tagged then the lists are extracted using the tags provided in the document.
// 2. Otherwise the bullet lists are extracted from the raw text using regex matching.
// By default the document tag is used if available.
// However this can be disabled using `DisableDocumentTags` in the `Options` object.
// Sometimes disabling document tags option might give a better bullet list extraction if the document was tagged incorrectly.
// options := &Options{
// DisableDocumentTags: false, // this means use document tag if available
// }
// ex, err := NewWithOptions(page, options)
// // handle error
// pageText, _, _, err := ex.ExtractPageText()
// // handle error
// lists := pageText.List()
// txt := lists.Text()
func ( _cfef PageText ) List ( ) lists { _aeba := ! _cfef . _dbc . _dbcd ; _agda := _cfef . getParagraphs ( ) ; _acce := true ; if _cfef . _cabf == nil || * _cfef . _cabf == nil { _acce = false ; } ; _abd := _agda . list ( ) ; if _acce && _aeba { _bgda := _feea ( & _agda ) ; _acde := & structTreeRoot { } ;
_acde . parseStructTreeRoot ( * _cfef . _cabf ) ; if _acde . _ccdea == nil { _ac . Log . Debug ( "\u004c\u0069\u0073\u0074\u003a\u0020\u0073t\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e'\u0074\u0020\u0068\u0061\u0076e\u0020\u0061\u006e\u0079\u0020\u0063\u006f\u006e\u0074e\u006e\u0074\u002c\u0020\u0075\u0073\u0069\u006e\u0067\u0020\u0074\u0065\u0078\u0074\u0020\u006d\u0061\u0074\u0063\u0068\u0069\u006e\u0067\u0020\u006d\u0065\u0074\u0068\u006f\u0064\u0020\u0069\u006e\u0073\u0074\u0065\u0061\u0064\u002e" ) ;
return _abd ; } ; _abd = _acde . buildList ( _bgda , _cfef . _gefc ) ; } ; return _abd ; } ; func _ebecd ( _cccda map [ float64 ] [ ] * textLine ) [ ] float64 { _cccdd := [ ] float64 { } ; for _bdaag := range _cccda { _cccdd = append ( _cccdd , _bdaag ) ; } ; _ab . Float64s ( _cccdd ) ; return _cccdd ;
} ; func ( _dfgaf rulingList ) tidied ( _facac string ) rulingList { _daead := _dfgaf . removeDuplicates ( ) ; _daead . log ( "\u0075n\u0069\u0071\u0075\u0065\u0073" ) ; _cgbc := _daead . snapToGroups ( ) ; if _cgbc == nil { return nil ; } ; _cgbc . sort ( ) ; if _aebg { _ac . Log . Info ( "\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064" , _facac , len ( _dfgaf ) , len ( _daead ) , len ( _cgbc ) ) ;
} ; _cgbc . log ( "\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d" ) ; return _cgbc ; } ; func ( _affg rulingList ) splitSec ( ) [ ] rulingList { _ab . Slice ( _affg , func ( _fbaeg , _cefgf int ) bool { _dfee , _geac := _affg [ _fbaeg ] , _affg [ _cefgf ] ; if _dfee . _efgeb != _geac . _efgeb { return _dfee . _efgeb < _geac . _efgeb ;
} ; return _dfee . _bbge < _geac . _bbge ; } ) ; _agad := make ( map [ * ruling ] struct { } , len ( _affg ) ) ; _gbbg := func ( _ccabb * ruling ) rulingList { _dece := rulingList { _ccabb } ; _agad [ _ccabb ] = struct { } { } ; for _ , _aface := range _affg { if _ , _fgcee := _agad [ _aface ] ; _fgcee { continue ;
} ; for _ , _agcf := range _dece { if _aface . alignsSec ( _agcf ) { _dece = append ( _dece , _aface ) ; _agad [ _aface ] = struct { } { } ; break ; } ; } ; } ; return _dece ; } ; _eeafg := [ ] rulingList { _gbbg ( _affg [ 0 ] ) } ; for _ , _ccdeb := range _affg [ 1 : ] { if _ , _cffc := _agad [ _ccdeb ] ;
_cffc { continue ; } ; _eeafg = append ( _eeafg , _gbbg ( _ccdeb ) ) ; } ; return _eeafg ; } ; func _eecc ( _fceb , _becf float64 ) string { _agddf := ! _ebfaf ( _fceb - _becf ) ; if _agddf { return "\u000a" ; } ; return "\u0020" ; } ; func _ebdgb ( _geef float64 ) float64 { return _egac * _dc . Round ( _geef / _egac ) } ;
var _cfec * _gd . Regexp = _gd . MustCompile ( _efdd + "\u007c" + _dceb ) ;