2020-08-27 21:45:09 +00:00
//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
2018-03-22 14:03:47 +00:00
2020-08-27 21:45:09 +00:00
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
2024-04-16 11:40:43 +00:00
package extractor ; import ( _dc "bytes" ; _b "errors" ; _agc "fmt" ; _ga "github.com/unidoc/unipdf/v3/common" ; _fc "github.com/unidoc/unipdf/v3/contentstream" ; _add "github.com/unidoc/unipdf/v3/core" ; _ce "github.com/unidoc/unipdf/v3/internal/license" ; _cg "github.com/unidoc/unipdf/v3/internal/textencoding" ;
_agf "github.com/unidoc/unipdf/v3/internal/transform" ; _ba "github.com/unidoc/unipdf/v3/model" ; _ed "golang.org/x/image/draw" ; _ea "golang.org/x/text/unicode/norm" ; _ag "image" ; _gff "image/color" ; _gg "io" ; _gf "math" ; _ad "reflect" ; _d "regexp" ; _e "sort" ; _f "strings" ;
_c "unicode" ; _a "unicode/utf8" ; ) ; var _dddb string = "\u005e\u005b\u0061\u002d\u007a\u0041\u002dZ\u005d\u0028\u005c)\u007c\u005c\u002e)\u007c\u005e[\u005c\u0064\u005d\u002b\u0028\u005c)\u007c\\.\u0029\u007c\u005e\u005c\u0028\u005b\u0061\u002d\u007a\u0041\u002d\u005a\u005d\u005c\u0029\u007c\u005e\u005c\u0028\u005b\u005c\u0064\u005d\u002b\u005c\u0029" ;
func ( _eabe * wordBag ) minDepth ( ) float64 { return _eabe . _ebgd - ( _eabe . Ury - _eabe . _cdea ) } ; func ( _egee * textObject ) getFontDirect ( _faafg string ) ( * _ba . PdfFont , error ) { _ecgc , _afgg := _egee . getFontDict ( _faafg ) ; if _afgg != nil { return nil , _afgg ; } ;
_cea , _afgg := _ba . NewPdfFontFromPdfObject ( _ecgc ) ; if _afgg != nil { _ga . Log . Debug ( "\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _faafg , _afgg ) ;
} ; return _cea , _afgg ; } ; func _eacc ( _gdaab , _bafbe _agf . Point , _egbc _gff . Color ) ( * ruling , bool ) { _bggdd := lineRuling { _eded : _gdaab , _badee : _bafbe , _cebc : _ffdg ( _gdaab , _bafbe ) , Color : _egbc } ; if _bggdd . _cebc == _fbcc { return nil , false ; } ; return _bggdd . asRuling ( ) ;
2024-01-22 01:16:41 +00:00
} ;
2024-04-16 11:40:43 +00:00
// String returns a description of `p`.
func ( _cegf * textPara ) String ( ) string { if _cegf . _cddef { return _agc . Sprintf ( "\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d" , _cegf . PdfRectangle ) ; } ; _gbcf := "" ; if _cegf . _fbbea != nil { _gbcf = _agc . Sprintf ( "\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020" , _cegf . _fbbea . _eacg , _cegf . _fbbea . _cgae ) ;
} ; return _agc . Sprintf ( "\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071" , _cegf . PdfRectangle , _gbcf , len ( _cegf . _fdec ) , _adgd ( _cegf . text ( ) , 50 ) ) ; } ; type gridTiling struct { _ba . PdfRectangle ; _fece [ ] float64 ;
_dfca [ ] float64 ; _gdcg map [ float64 ] map [ float64 ] gridTile ; } ; func _cgbc ( _efb _ba . PdfRectangle ) textState { return textState { _def : 100 , _gdf : RenderModeFill , _ade : _efb } ; } ; func _cbde ( _fdda * wordBag , _bdcf * textWord , _ffea float64 ) bool { return _fdda . Urx <= _bdcf . Llx && _bdcf . Llx < _fdda . Urx + _ffea ;
} ; func ( _gbgg * stateStack ) top ( ) * textState { if _gbgg . empty ( ) { return nil ; } ; return ( * _gbgg ) [ _gbgg . size ( ) - 1 ] ; } ; const ( _fbcc rulingKind = iota ; _aaad ; _cfae ; ) ; func ( _cggb * PageText ) computeViews ( ) { _dbca := _cggb . getParagraphs ( ) ; _gfgb := new ( _dc . Buffer ) ;
_dbca . writeText ( _gfgb ) ; _cggb . _ggff = _gfgb . String ( ) ; _cggb . _ccca = _dbca . toTextMarks ( ) ; _cggb . _gdba = _dbca . tables ( ) ; if _efda { _ga . Log . Info ( "\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064" , len ( _cggb . _gdba ) ) ;
} ; } ; var _cffd * _d . Regexp = _d . MustCompile ( _geef + "\u007c" + _dddb ) ; func _dag ( _fbfdf bounded ) float64 { return - _fbfdf . bbox ( ) . Lly } ; func ( _gfcdg * textTable ) growTable ( ) { _agdf := func ( _bfea paraList ) { _gfcdg . _cgae ++ ; for _adgf := 0 ; _adgf < _gfcdg . _eacg ;
_adgf ++ { _fbege := _bfea [ _adgf ] ; _gfcdg . put ( _adgf , _gfcdg . _cgae - 1 , _fbege ) ; } ; } ; _baag := func ( _begfc paraList ) { _gfcdg . _eacg ++ ; for _fcdgc := 0 ; _fcdgc < _gfcdg . _cgae ; _fcdgc ++ { _bbbbb := _begfc [ _fcdgc ] ; _gfcdg . put ( _gfcdg . _eacg - 1 , _fcdgc , _bbbbb ) ;
} ; } ; if _baace { _gfcdg . log ( "\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce" ) ; } ; for _egccaa := 0 ; ; _egccaa ++ { _abdf := false ; _bbdf := _gfcdg . getDown ( ) ; _gdge := _gfcdg . getRight ( ) ; if _baace { _agc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _egccaa , _gfcdg ) ;
_agc . Printf ( "\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a" , _bbdf ) ; _agc . Printf ( "\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a" , _gdge ) ; } ; if _bbdf != nil && _gdge != nil { _aefa := _bbdf [ len ( _bbdf ) - 1 ] ;
if ! _aefa . taken ( ) && _aefa == _gdge [ len ( _gdge ) - 1 ] { _agdf ( _bbdf ) ; if _gdge = _gfcdg . getRight ( ) ; _gdge != nil { _baag ( _gdge ) ; _gfcdg . put ( _gfcdg . _eacg - 1 , _gfcdg . _cgae - 1 , _aefa ) ; } ; _abdf = true ; } ; } ; if ! _abdf && _bbdf != nil { _agdf ( _bbdf ) ; _abdf = true ;
} ; if ! _abdf && _gdge != nil { _baag ( _gdge ) ; _abdf = true ; } ; if ! _abdf { break ; } ; } ; } ; func _cbgc ( _ggffg map [ float64 ] gridTile ) [ ] float64 { _fgddf := make ( [ ] float64 , 0 , len ( _ggffg ) ) ; for _cccg := range _ggffg { _fgddf = append ( _fgddf , _cccg ) ; } ; _e . Float64s ( _fgddf ) ;
return _fgddf ; } ;
2024-02-11 21:29:32 +00:00
2024-04-16 11:40:43 +00:00
// Options extractor options.
type Options struct {
2024-01-22 01:16:41 +00:00
2024-04-16 11:40:43 +00:00
// DisableDocumentTags specifies whether to use the document tags during list extraction.
DisableDocumentTags bool ;
2024-02-11 21:29:32 +00:00
2024-04-16 11:40:43 +00:00
// ApplyCropBox will extract page text based on page cropbox if set to `true`.
ApplyCropBox bool ;
2024-02-11 21:29:32 +00:00
2024-04-16 11:40:43 +00:00
// UseSimplerExtractionProcess will skip topological text ordering and table processing.
//
// NOTE: While normally the extra processing is beneficial, it can also lead to problems when it does not work.
// Thus it is a flag to allow the user to control this process.
//
// Skipping some extraction processes would also lead to the reduced processing time.
UseSimplerExtractionProcess bool ;
2024-02-11 21:29:32 +00:00
2024-04-16 11:40:43 +00:00
// IncludeAnnotations specifies whether to include annotations in the extraction process, default value is `false`.
IncludeAnnotations bool ; } ;
2024-02-11 21:29:32 +00:00
2024-04-16 11:40:43 +00:00
// String returns a description of `t`.
func ( _ffcd * textTable ) String ( ) string { return _agc . Sprintf ( "\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074" , _ffcd . _eacg , _ffcd . _cgae , _ffcd . _edeg ) ; } ;
2024-02-11 21:29:32 +00:00
2024-04-16 11:40:43 +00:00
// Text returns the extracted page text.
func ( _gge PageText ) Text ( ) string { return _gge . _ggff } ; func ( _gdcgc * textTable ) logComposite ( _fefaa string ) { if ! _efda { return ; } ; _ga . Log . Info ( "\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073" , _gdcgc . _eacg , _gdcgc . _cgae , _fefaa ) ;
_agc . Printf ( "\u0025\u0035\u0073 \u007c" , "" ) ; for _ecef := 0 ; _ecef < _gdcgc . _eacg ; _ecef ++ { _agc . Printf ( "\u0025\u0033\u0064 \u007c" , _ecef ) ; } ; _agc . Println ( "" ) ; _agc . Printf ( "\u0025\u0035\u0073 \u002b" , "" ) ; for _beggf := 0 ; _beggf < _gdcgc . _eacg ; _beggf ++ { _agc . Printf ( "\u0025\u0033\u0073 \u002b" , "\u002d\u002d\u002d" ) ;
} ; _agc . Println ( "" ) ; for _dcae := 0 ; _dcae < _gdcgc . _cgae ; _dcae ++ { _agc . Printf ( "\u0025\u0035\u0064 \u007c" , _dcae ) ; for _bdfg := 0 ; _bdfg < _gdcgc . _eacg ; _bdfg ++ { _gbdacb , _ := _gdcgc . _aaaga [ _bgcc ( _bdfg , _dcae ) ] . parasBBox ( ) ; _agc . Printf ( "\u0025\u0033\u0064 \u007c" , len ( _gbdacb ) ) ;
} ; _agc . Println ( "" ) ; } ; _ga . Log . Info ( "\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073" , _gdcgc . _eacg , _gdcgc . _cgae , _fefaa ) ; _agc . Printf ( "\u0025\u0035\u0073 \u007c" , "" ) ; for _geee := 0 ; _geee < _gdcgc . _eacg ;
_geee ++ { _agc . Printf ( "\u0025\u0031\u0032\u0064\u0020\u007c" , _geee ) ; } ; _agc . Println ( "" ) ; _agc . Printf ( "\u0025\u0035\u0073 \u002b" , "" ) ; for _affe := 0 ; _affe < _gdcgc . _eacg ; _affe ++ { _agc . Print ( "\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b" ) ;
} ; _agc . Println ( "" ) ; for _aaagg := 0 ; _aaagg < _gdcgc . _cgae ; _aaagg ++ { _agc . Printf ( "\u0025\u0035\u0064 \u007c" , _aaagg ) ; for _ffbg := 0 ; _ffbg < _gdcgc . _eacg ; _ffbg ++ { _dcgf , _ := _gdcgc . _aaaga [ _bgcc ( _ffbg , _aaagg ) ] . parasBBox ( ) ; _agbae := "" ; _eccdd := _dcgf . merge ( ) ;
if _eccdd != nil { _agbae = _eccdd . text ( ) ; } ; _agbae = _agc . Sprintf ( "\u0025\u0071" , _adgd ( _agbae , 12 ) ) ; _agbae = _agbae [ 1 : len ( _agbae ) - 1 ] ; _agc . Printf ( "\u0025\u0031\u0032\u0073\u0020\u007c" , _agbae ) ; } ; _agc . Println ( "" ) ; } ; } ; func ( _gfdbf * textTable ) newTablePara ( ) * textPara { _bcfbf := _gfdbf . computeBbox ( ) ;
_cfbf := & textPara { PdfRectangle : _bcfbf , _dgabg : _bcfbf , _fbbea : _gfdbf } ; if _efda { _ga . Log . Info ( "\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073" , _cfbf ) ; } ; return _cfbf ; } ; func ( _badd rulingList ) tidied ( _ebafce string ) rulingList { _dedaa := _badd . removeDuplicates ( ) ;
_dedaa . log ( "\u0075n\u0069\u0071\u0075\u0065\u0073" ) ; _dedc := _dedaa . snapToGroups ( ) ; if _dedc == nil { return nil ; } ; _dedc . sort ( ) ; if _adgbf { _ga . Log . Info ( "\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064" , _ebafce , len ( _badd ) , len ( _dedaa ) , len ( _dedc ) ) ;
} ; _dedc . log ( "\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d" ) ; return _dedc ; } ; func ( _bbbc * wordBag ) arrangeText ( ) * textPara { _bbbc . sort ( ) ; if _cafd { _bbbc . removeDuplicates ( ) ; } ; var _dedf [ ] * textLine ; for _ , _cfff := range _bbbc . depthIndexes ( ) { for ! _bbbc . empty ( _cfff ) { _ccgf := _bbbc . firstReadingIndex ( _cfff ) ;
_aacg := _bbbc . firstWord ( _ccgf ) ; _cfaga := _eedf ( _bbbc , _ccgf ) ; _gcde := _aacg . _eabbf ; _aaeag := _aacg . _dfagd - _fcca * _gcde ; _gfdf := _aacg . _dfagd + _fcca * _gcde ; _adec := _gcfb * _gcde ; _dbff := _bbad * _gcde ; _dddba : for { var _bagc * textWord ; _cacc := 0 ; for _ , _abgb := range _bbbc . depthBand ( _aaeag , _gfdf ) { _dbab := _bbbc . highestWord ( _abgb , _aaeag , _gfdf ) ;
if _dbab == nil { continue ; } ; _bdgcd := _daf ( _dbab , _cfaga . _cdcg [ len ( _cfaga . _cdcg ) - 1 ] ) ; if _bdgcd < - _dbff { break _dddba ; } ; if _bdgcd > _adec { continue ; } ; if _bagc != nil && _eddba ( _dbab , _bagc ) >= 0 { continue ; } ; _bagc = _dbab ; _cacc = _abgb ; } ; if _bagc == nil { break ;
} ; _cfaga . pullWord ( _bbbc , _bagc , _cacc ) ; } ; _cfaga . markWordBoundaries ( ) ; _dedf = append ( _dedf , _cfaga ) ; } ; } ; if len ( _dedf ) == 0 { return nil ; } ; _e . Slice ( _dedf , func ( _gffaa , _fcdff int ) bool { return _cgfgf ( _dedf [ _gffaa ] , _dedf [ _fcdff ] ) < 0 } ) ; _aeac := _bfgg ( _bbbc . PdfRectangle , _dedf ) ;
if _cbbb { _ga . Log . Info ( "\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073" , _aeac . String ( ) ) ; if _abfa { for _bgafb , _ffee := range _aeac . _fdec { _agc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bgafb , _ffee . String ( ) ) ;
if _gaba { for _dgbe , _faefb := range _ffee . _cdcg { _agc . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _dgbe , _faefb . String ( ) ) ; for _dada , _eedfd := range _faefb . _abcee { _agc . Printf ( "\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n" , _dada , _eedfd . String ( ) ) ;
} ; } ; } ; } ; } ; } ; return _aeac ; } ; func ( _eadc paraList ) yNeighbours ( _bgbc float64 ) map [ * textPara ] [ ] int { _geec := make ( [ ] event , 2 * len ( _eadc ) ) ; if _bgbc == 0 { for _dcceb , _fbccf := range _eadc { _geec [ 2 * _dcceb ] = event { _fbccf . Lly , true , _dcceb } ; _geec [ 2 * _dcceb + 1 ] = event { _fbccf . Ury , false , _dcceb } ;
} ; } else { for _cecf , _fgdg := range _eadc { _geec [ 2 * _cecf ] = event { _fgdg . Lly - _bgbc * _fgdg . fontsize ( ) , true , _cecf } ; _geec [ 2 * _cecf + 1 ] = event { _fgdg . Ury + _bgbc * _fgdg . fontsize ( ) , false , _cecf } ; } ; } ; return _eadc . eventNeighbours ( _geec ) ; } ; func ( _gbf * textObject ) reset ( ) { _gbf . _eefe = _agf . IdentityMatrix ( ) ;
_gbf . _dbc = _agf . IdentityMatrix ( ) ; _gbf . _cfde = nil ; } ; func _aada ( _cggf * textLine ) bool { _fbbag := true ; _efbb := - 1 ; for _ , _bgada := range _cggf . _cdcg { for _ , _gccg := range _bgada . _abcee { _cgdcd := _gccg . _ebbb ; if _efbb == - 1 { _efbb = _cgdcd ; } else { if _efbb != _cgdcd { _fbbag = false ;
break ; } ; } ; } ; } ; return _fbbag ; } ; func ( _deadc * textTable ) markCells ( ) { for _ceff := 0 ; _ceff < _deadc . _cgae ; _ceff ++ { for _ccag := 0 ; _ccag < _deadc . _eacg ; _ccag ++ { _fdbd := _deadc . get ( _ccag , _ceff ) ; if _fdbd != nil { _fdbd . _gecb = true ; } ; } ; } ; } ; func ( _gfde * shapesState ) newSubPath ( ) { _gfde . clearPath ( ) ;
if _fdbg { _ga . Log . Info ( "\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073" , _gfde ) ; } ; } ; func ( _ceec * TextMarkArray ) getTextMarkAtOffset ( _begg int ) * TextMark { for _ , _agb := range _ceec . _ffca { if _agb . Offset == _begg { return & _agb ;
} ; } ; return nil ; } ; func ( _acb * textObject ) getFillColor ( ) _gff . Color { return _eddaee ( _acb . _bace . ColorspaceNonStroking , _acb . _bace . ColorNonStroking ) ; } ; func ( _edcf * textObject ) getFont ( _decc string ) ( * _ba . PdfFont , error ) { if _edcf . _cdb . _bd != nil { _dcea , _fbcd := _edcf . getFontDict ( _decc ) ;
if _fbcd != nil { _ga . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073" , _decc , _fbcd . Error ( ) ) ; return nil , _fbcd ;
} ; _edcf . _cdb . _ae ++ ; _fgd , _cgfb := _edcf . _cdb . _bd [ _dcea . String ( ) ] ; if _cgfb { _fgd . _ccg = _edcf . _cdb . _ae ; return _fgd . _cbad , nil ; } ; } ; _faaf , _gbgd := _edcf . getFontDict ( _decc ) ; if _gbgd != nil { return nil , _gbgd ; } ; _cgfg , _gbgd := _edcf . getFontDirect ( _decc ) ;
if _gbgd != nil { return nil , _gbgd ; } ; if _edcf . _cdb . _bd != nil { _cfag := fontEntry { _cgfg , _edcf . _cdb . _ae } ; if len ( _edcf . _cdb . _bd ) >= _bcgf { var _gfca [ ] string ; for _eaaa := range _edcf . _cdb . _bd { _gfca = append ( _gfca , _eaaa ) ; } ; _e . Slice ( _gfca , func ( _ffae , _fdef int ) bool { return _edcf . _cdb . _bd [ _gfca [ _ffae ] ] . _ccg < _edcf . _cdb . _bd [ _gfca [ _fdef ] ] . _ccg ;
} ) ; delete ( _edcf . _cdb . _bd , _gfca [ 0 ] ) ; } ; _edcf . _cdb . _bd [ _faaf . String ( ) ] = _cfag ; } ; return _cgfg , nil ; } ; func ( _gcfa * compositeCell ) updateBBox ( ) { for _ , _gdaa := range _gcfa . paraList { _gcfa . PdfRectangle = _bbbafc ( _gcfa . PdfRectangle , _gdaa . PdfRectangle ) ;
} ; } ; func ( _faee rulingList ) vertsHorzs ( ) ( rulingList , rulingList ) { var _ccfgc , _eaba rulingList ; for _ , _egfg := range _faee { switch _egfg . _bbce { case _cfae : _ccfgc = append ( _ccfgc , _egfg ) ; case _aaad : _eaba = append ( _eaba , _egfg ) ; } ; } ; return _ccfgc , _eaba ;
} ;
2024-02-11 21:29:32 +00:00
// Font represents the font properties on a PDF page.
2024-04-16 11:40:43 +00:00
type Font struct { PdfFont * _ba . PdfFont ;
2024-02-11 21:29:32 +00:00
// FontName represents Font Name from font properties.
FontName string ;
// FontType represents Font Subtype entry in the font dictionary inside page resources.
// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont.
FontType string ;
// ToUnicode is true if font provides a `ToUnicode` mapping.
ToUnicode bool ;
// IsCID is true if underlying font is a composite font.
// Composite font is represented by a font dictionary whose Subtype is `Type0`
IsCID bool ;
// IsSimple is true if font is simple font.
// A simple font is limited to only 8 bit (255) character codes.
IsSimple bool ;
// FontData represents the raw data of the embedded font file.
// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF).
// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor.
// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value.
FontData [ ] byte ;
// FontFileName is a name representing the font. it has format:
// (Font Name) + (Font Type Extension), example: helvetica.ttf.
FontFileName string ;
// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor).
2024-04-16 11:40:43 +00:00
FontDescriptor * _ba . PdfFontDescriptor ; } ; func _fbcde ( _fdcf _ba . PdfRectangle ) * ruling { return & ruling { _bbce : _aaad , _edga : _fdcf . Lly , _fcec : _fdcf . Llx , _abeg : _fdcf . Urx } ; } ; func ( _eedee * textWord ) toTextMarks ( _cgfbf * int ) [ ] TextMark { var _bdaf [ ] TextMark ;
for _ , _adae := range _eedee . _abcee { _bdaf = _aadbb ( _bdaf , _cgfbf , _adae . ToTextMark ( ) ) ; } ; return _bdaf ; } ; var _gbdc = TextMark { Text : "\u005b\u0058\u005d" , Original : "\u0020" , Meta : true , FillColor : _gff . White , StrokeColor : _gff . White } ;
2024-02-11 21:29:32 +00:00
2024-04-16 11:40:43 +00:00
// String returns a human readable description of `vecs`.
func ( _acag rulingList ) String ( ) string { if len ( _acag ) == 0 { return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}" ; } ; _afbf , _eege := _acag . vertsHorzs ( ) ; _eddcd := len ( _afbf ) ; _egfgb := len ( _eege ) ; if _eddcd == 0 || _egfgb == 0 { return _agc . Sprintf ( "\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}" , _eddcd , _egfgb ) ;
} ; _aebee := _ba . PdfRectangle { Llx : _afbf [ 0 ] . _edga , Urx : _afbf [ _eddcd - 1 ] . _edga , Lly : _eege [ _egfgb - 1 ] . _edga , Ury : _eege [ 0 ] . _edga } ; return _agc . Sprintf ( "\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d" , _eddcd , _egfgb , _aebee ) ;
} ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// TableCell is a cell in a TextTable.
type TableCell struct { _ba . PdfRectangle ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// Text is the extracted text.
Text string ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// Marks returns the TextMarks corresponding to the text in Text.
Marks TextMarkArray ; } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// List returns all the list objects detected on the page.
// It detects all the bullet point Lists from a given pdf page and builds a slice of bullet list objects.
// A given bullet list object has a tree structure.
// Each bullet point list is extracted with the text content it contains and all the sub lists found under it as children in the tree.
// The rest content of the pdf is ignored and only text in the bullet point lists are extracted.
// The list extraction is done in two ways.
// 1. If the document is tagged then the lists are extracted using the tags provided in the document.
// 2. Otherwise the bullet lists are extracted from the raw text using regex matching.
// By default the document tag is used if available.
// However this can be disabled using `DisableDocumentTags` in the `Options` object.
// Sometimes disabling document tags option might give a better bullet list extraction if the document was tagged incorrectly.
//
// options := &Options{
// DisableDocumentTags: false, // this means use document tag if available
// }
// ex, err := NewWithOptions(page, options)
// // handle error
// pageText, _, _, err := ex.ExtractPageText()
// // handle error
// lists := pageText.List()
// txt := lists.Text()
func ( _bbddb PageText ) List ( ) lists { _aebe := ! _bbddb . _eaag . _bcc ; _cbdbb := _bbddb . getParagraphs ( ) ; _beedf := true ; if _bbddb . _abge == nil || * _bbddb . _abge == nil { _beedf = false ; } ; _gecc := _cbdbb . list ( ) ; if _beedf && _aebe { _dcfb := _edgg ( & _cbdbb ) ; _cbcd := & structTreeRoot { } ;
_cbcd . parseStructTreeRoot ( * _bbddb . _abge ) ; if _cbcd . _faef == nil { _ga . Log . Debug ( "\u004c\u0069\u0073\u0074\u003a\u0020\u0073t\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e'\u0074\u0020\u0068\u0061\u0076e\u0020\u0061\u006e\u0079\u0020\u0063\u006f\u006e\u0074e\u006e\u0074\u002c\u0020\u0075\u0073\u0069\u006e\u0067\u0020\u0074\u0065\u0078\u0074\u0020\u006d\u0061\u0074\u0063\u0068\u0069\u006e\u0067\u0020\u006d\u0065\u0074\u0068\u006f\u0064\u0020\u0069\u006e\u0073\u0074\u0065\u0061\u0064\u002e" ) ;
return _gecc ; } ; _gecc = _cbcd . buildList ( _dcfb , _bbddb . _abad ) ; } ; return _gecc ; } ; func _dbcba ( _cdbba [ ] * textMark , _fagdf _ba . PdfRectangle ) [ ] * textWord { var _fgfcd [ ] * textWord ; var _cbgca * textWord ; if _bfgae { _ga . Log . Info ( "\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073" , len ( _cdbba ) ) ;
} ; _fbab := func ( ) { if _cbgca != nil { _bggada := _cbgca . computeText ( ) ; if ! _efcbe ( _bggada ) { _cbgca . _eaae = _bggada ; _fgfcd = append ( _fgfcd , _cbgca ) ; if _bfgae { _ga . Log . Info ( "\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073" , len ( _fgfcd ) - 1 , _cbgca . String ( ) ) ;
for _gbefa , _gffad := range _cbgca . _abcee { _agc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _gbefa , _gffad . String ( ) ) ; } ; } ; } ; _cbgca = nil ; } ; } ; for _ , _ceda := range _cdbba { if _fecd && _cbgca != nil && len ( _cbgca . _abcee ) > 0 { _ggfag := _cbgca . _abcee [ len ( _cbgca . _abcee ) - 1 ] ;
_egff , _eafaa := _ecfb ( _ceda . _bfdb ) ; _eagg , _edba := _ecfb ( _ggfag . _bfdb ) ; if _eafaa && ! _edba && _ggfag . inDiacriticArea ( _ceda ) { _cbgca . addDiacritic ( _egff ) ; continue ; } ; if _edba && ! _eafaa && _ceda . inDiacriticArea ( _ggfag ) { _cbgca . _abcee = _cbgca . _abcee [ : len ( _cbgca . _abcee ) - 1 ] ;
_cbgca . appendMark ( _ceda , _fagdf ) ; _cbgca . addDiacritic ( _eagg ) ; continue ; } ; } ; _bebb := _efcbe ( _ceda . _bfdb ) ; if _bebb { _fbab ( ) ; continue ; } ; if _cbgca == nil && ! _bebb { _cbgca = _adcdc ( [ ] * textMark { _ceda } , _fagdf ) ; continue ; } ; _dbbeb := _cbgca . _eabbf ; _acegf := _gf . Abs ( _eegf ( _fagdf , _ceda ) - _cbgca . _dfagd ) / _dbbeb ;
_bdagf := _daf ( _ceda , _cbgca ) / _dbbeb ; if _bdagf >= _afbgg || ! ( - _cdfe <= _bdagf && _acegf <= _ebfgc ) { _fbab ( ) ; _cbgca = _adcdc ( [ ] * textMark { _ceda } , _fagdf ) ; continue ; } ; _cbgca . appendMark ( _ceda , _fagdf ) ; } ; _fbab ( ) ; return _fgfcd ; } ; func ( _gafg * subpath ) close ( ) { if ! _gaeg ( _gafg . _aeee [ 0 ] , _gafg . last ( ) ) { _gafg . add ( _gafg . _aeee [ 0 ] ) ;
} ; _gafg . _dbe = true ; _gafg . removeDuplicates ( ) ; } ; func ( _efec paraList ) sortReadingOrder ( ) { _ga . Log . Trace ( "\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _efec ) ) ;
if len ( _efec ) <= 1 { return ; } ; _efec . computeEBBoxes ( ) ; _e . Slice ( _efec , func ( _aaff , _cbadf int ) bool { return _cgfgf ( _efec [ _aaff ] , _efec [ _cbadf ] ) <= 0 } ) ; } ; func ( _bfdg * wordBag ) empty ( _dfef int ) bool { _ , _aged := _bfdg . _fcgd [ _dfef ] ; return ! _aged } ;
func _dafaf ( _fcda , _bdea int ) int { if _fcda > _bdea { return _fcda ; } ; return _bdea ; } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// Len returns the number of TextMarks in `ma`.
func ( _gda * TextMarkArray ) Len ( ) int { if _gda == nil { return 0 ; } ; return len ( _gda . _ffca ) ; } ; func _eafa ( _aaea _agf . Matrix ) _agf . Point { _bfga , _bea := _aaea . Translation ( ) ; return _agf . Point { X : _bfga , Y : _bea } ; } ; func _bggg ( _ggc , _caf _ba . PdfRectangle ) bool { return _ggc . Lly <= _caf . Ury && _caf . Lly <= _ggc . Ury } ;
func _dabg ( _fgdc structElement ) [ ] structElement { _fgcd := [ ] structElement { } ; for _ , _faede := range _fgdc . _abff { for _ , _degc := range _faede . _abff { for _ , _efggb := range _degc . _abff { if _efggb . _bbag == "\u004c" { _fgcd = append ( _fgcd , _efggb ) ; } ; } ; } ; } ;
return _fgcd ; } ; func ( _gbba * structTreeRoot ) buildList ( _ccfe map [ int ] [ ] * textLine , _cfaf _add . PdfObject ) [ ] * list { if _gbba == nil { _ga . Log . Debug ( "\u0062\u0075\u0069\u006c\u0064\u004c\u0069\u0073\u0074\u003a\u0020t\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0069\u0073 \u006e\u0069\u006c" ) ;
return nil ; } ; var _cabe * structElement ; _cdbd := [ ] structElement { } ; if len ( _gbba . _faef ) == 1 { _gdda := _gbba . _faef [ 0 ] . _bbag ; if _gdda == "\u0044\u006f\u0063\u0075\u006d\u0065\u006e\u0074" || _gdda == "\u0053\u0065\u0063\u0074" || _gdda == "\u0050\u0061\u0072\u0074" || _gdda == "\u0044\u0069\u0076" || _gdda == "\u0041\u0072\u0074" { _cabe = & _gbba . _faef [ 0 ] ;
} ; } else { _cabe = & structElement { _abff : _gbba . _faef , _bbag : _gbba . _adbca } ; } ; if _cabe == nil { _ga . Log . Debug ( "\u0062\u0075\u0069\u006cd\u004c\u0069\u0073\u0074\u003a\u0020\u0074\u006f\u0070\u0045l\u0065m\u0065\u006e\u0074\u0020\u0069\u0073\u0020n\u0069\u006c" ) ;
return nil ; } ; for _ , _ebega := range _cabe . _abff { if _ebega . _bbag == "\u004c" { _cdbd = append ( _cdbd , _ebega ) ; } else if _ebega . _bbag == "\u0054\u0061\u0062l\u0065" { _cbcc := _dabg ( _ebega ) ; _cdbd = append ( _cdbd , _cbcc ... ) ; } ; } ; _bgaf := _gagf ( _cdbd , _ccfe , _cfaf ) ;
var _deef [ ] * list ; for _ , _dgcg := range _bgaf { _beag := _abda ( _dgcg ) ; _deef = append ( _deef , _beag ... ) ; } ; return _deef ; } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// Elements returns the TextMarks in `ma`.
func ( _gbeca * TextMarkArray ) Elements ( ) [ ] TextMark { return _gbeca . _ffca } ; func ( _bdfd paraList ) lines ( ) [ ] * textLine { var _dccbb [ ] * textLine ; for _ , _fdad := range _bdfd { _dccbb = append ( _dccbb , _fdad . _fdec ... ) ; } ; return _dccbb ; } ; func ( _eafdc paraList ) eventNeighbours ( _dbge [ ] event ) map [ * textPara ] [ ] int { _e . Slice ( _dbge , func ( _cdgdb , _gcgb int ) bool { _ecdg , _fdga := _dbge [ _cdgdb ] , _dbge [ _gcgb ] ;
_edfa , _fgbb := _ecdg . _cdbc , _fdga . _cdbc ; if _edfa != _fgbb { return _edfa < _fgbb ; } ; if _ecdg . _dddef != _fdga . _dddef { return _ecdg . _dddef ; } ; return _cdgdb < _gcgb ; } ) ; _fgca := make ( map [ int ] intSet ) ; _fffd := make ( intSet ) ; for _ , _ecffc := range _dbge { if _ecffc . _dddef { _fgca [ _ecffc . _dcfcc ] = make ( intSet ) ;
for _cebgc := range _fffd { if _cebgc != _ecffc . _dcfcc { _fgca [ _ecffc . _dcfcc ] . add ( _cebgc ) ; _fgca [ _cebgc ] . add ( _ecffc . _dcfcc ) ; } ; } ; _fffd . add ( _ecffc . _dcfcc ) ; } else { _fffd . del ( _ecffc . _dcfcc ) ; } ; } ; _afea := map [ * textPara ] [ ] int { } ; for _dceag , _cbca := range _fgca { _eafe := _eafdc [ _dceag ] ;
if len ( _cbca ) == 0 { _afea [ _eafe ] = nil ; continue ; } ; _eacgb := make ( [ ] int , len ( _cbca ) ) ; _eefef := 0 ; for _bdbed := range _cbca { _eacgb [ _eefef ] = _bdbed ; _eefef ++ ; } ; _afea [ _eafe ] = _eacgb ; } ; return _afea ; } ; func _aga ( _dcf [ ] Font , _aeb string ) bool { for _ , _fcc := range _dcf { if _fcc . FontName == _aeb { return true ;
} ; } ; return false ; } ; type textState struct { _ffaf float64 ; _gdeg float64 ; _def float64 ; _bfa float64 ; _dcd float64 ; _gdf RenderMode ; _eaaf float64 ; _gbdg * _ba . PdfFont ; _ade _ba . PdfRectangle ; _abd int ; _bafdf int ; } ; func ( _egabd * textPara ) isAtom ( ) * textTable { _gbgcb := _egabd ;
_daec := _egabd . _fdgf ; _fcbce := _egabd . _ecada ; if _daec . taken ( ) || _fcbce . taken ( ) { return nil ; } ; _ebdcd := _daec . _ecada ; if _ebdcd . taken ( ) || _ebdcd != _fcbce . _fdgf { return nil ; } ; return _baff ( _gbgcb , _daec , _fcbce , _ebdcd ) ; } ; func _bgcc ( _dccfb , _gfdbe int ) uint64 { return uint64 ( _dccfb ) * 0x1000000 + uint64 ( _gfdbe ) } ;
func ( _efaba * ruling ) equals ( _dfff * ruling ) bool { return _efaba . _bbce == _dfff . _bbce && _gecbg ( _efaba . _edga , _dfff . _edga ) && _gecbg ( _efaba . _fcec , _dfff . _fcec ) && _gecbg ( _efaba . _abeg , _dfff . _abeg ) ; } ; func ( _bgad * wordBag ) makeRemovals ( ) map [ int ] map [ * textWord ] struct { } { _gfdda := make ( map [ int ] map [ * textWord ] struct { } , len ( _bgad . _fcgd ) ) ;
for _fcgc := range _bgad . _fcgd { _gfdda [ _fcgc ] = make ( map [ * textWord ] struct { } ) ; } ; return _gfdda ; } ; func ( _gefb * wordBag ) depthBand ( _cdbg , _dccd float64 ) [ ] int { if len ( _gefb . _fcgd ) == 0 { return nil ; } ; return _gefb . depthRange ( _gefb . getDepthIdx ( _cdbg ) , _gefb . getDepthIdx ( _dccd ) ) ;
} ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// String returns a description of `k`.
func ( _ccdb markKind ) String ( ) string { _cbggg , _dbbg := _decf [ _ccdb ] ; if ! _dbbg { return _agc . Sprintf ( "\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064" , _ccdb ) ; } ; return _cbggg ; } ; func ( _gaa * imageExtractContext ) extractXObjectImage ( _aab * _add . PdfObjectName , _bge _fc . GraphicsState , _aea * _ba . PdfPageResources ) error { _fac , _ := _aea . GetXObjectByName ( * _aab ) ;
if _fac == nil { return nil ; } ; _fge , _gcc := _gaa . _ca [ _fac ] ; if ! _gcc { _cad , _acg := _aea . GetXObjectImageByName ( * _aab ) ; if _acg != nil { return _acg ; } ; if _cad == nil { return nil ; } ; _egc , _acg := _cad . ToImage ( ) ; if _acg != nil { return _acg ; } ; var _fgf _ag . Image ;
if _cad . Mask != nil { if _fgf , _acg = _caac ( _cad . Mask , _gff . Opaque ) ; _acg != nil { _ga . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a \u0063\u006f\u0075\u006c\u0064 \u006eo\u0074\u0020\u0067\u0065\u0074\u0020\u0065\u0078\u0070\u006c\u0069\u0063\u0069\u0074\u0020\u0069\u006d\u0061\u0067e\u0020\u006d\u0061\u0073\u006b\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006d\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" ) ;
} ; } else if _cad . SMask != nil { _fgf , _acg = _ggee ( _cad . SMask , _gff . Opaque ) ; if _acg != nil { _ga . Log . Debug ( "W\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0067\u0065\u0074\u0020\u0073\u006f\u0066\u0074\u0020\u0069\u006da\u0067e\u0020\u006d\u0061\u0073k\u002e\u0020O\u0075\u0074\u0070\u0075\u0074\u0020\u006d\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063\u0074\u002e" ) ;
} ; } ; if _fgf != nil { _bdb , _bbb := _egc . ToGoImage ( ) ; if _bbb != nil { return _bbb ; } ; _bdb = _aebeec ( _bdb , _fgf ) ; switch _cad . ColorSpace . String ( ) { case "\u0044\u0065\u0076\u0069\u0063\u0065\u0047\u0072\u0061\u0079" , "\u0049n\u0064\u0065\u0078\u0065\u0064" : _egc , _bbb = _ba . ImageHandling . NewGrayImageFromGoImage ( _bdb ) ;
if _bbb != nil { return _bbb ; } ; default : _egc , _bbb = _ba . ImageHandling . NewImageFromGoImage ( _bdb ) ; if _bbb != nil { return _bbb ; } ; } ; } ; _fge = & cachedImage { _fed : _egc , _addb : _cad . ColorSpace } ; _gaa . _ca [ _fac ] = _fge ; } ; _fcdf := _fge . _fed ; _gdd := _fge . _addb ;
_faa , _ceb := _gdd . ImageToRGB ( * _fcdf ) ; if _ceb != nil { return _ceb ; } ; _ga . Log . Debug ( "@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073" , _bge . CTM . String ( ) ) ; _ecf := ImageMark { Image : & _faa , Width : _bge . CTM . ScalingFactorX ( ) , Height : _bge . CTM . ScalingFactorY ( ) , Angle : _bge . CTM . Angle ( ) } ;
_ecf . X , _ecf . Y = _bge . CTM . Translation ( ) ; _gaa . _dcc = append ( _gaa . _dcc , _ecf ) ; _gaa . _fbd ++ ; return nil ; } ; func ( _befd * subpath ) makeRectRuling ( _becff _gff . Color ) ( * ruling , bool ) { if _adbfb { _ga . Log . Info ( "\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076" , _befd ) ;
} ; _cddd := _befd . _aeee [ : 4 ] ; _ffbc := make ( map [ int ] rulingKind , len ( _cddd ) ) ; for _beff , _ggaac := range _cddd { _bfged := _befd . _aeee [ ( _beff + 1 ) % 4 ] ; _ffbc [ _beff ] = _ddbf ( _ggaac , _bfged ) ; if _adbfb { _agc . Printf ( "\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066" , _beff , _ffbc [ _beff ] , _ggaac , _bfged ) ;
} ; } ; if _adbfb { _agc . Printf ( "\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a" , _ffbc ) ; } ; var _bcgfc , _eedd [ ] int ; for _aceg , _fgfed := range _ffbc { switch _fgfed { case _aaad : _eedd = append ( _eedd , _aceg ) ; case _cfae : _bcgfc = append ( _bcgfc , _aceg ) ;
} ; } ; if _adbfb { _agc . Printf ( "\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a" , len ( _eedd ) , _eedd ) ; _agc . Printf ( "\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a" , len ( _bcgfc ) , _bcgfc ) ;
} ; _edgbd := ( len ( _eedd ) == 2 && len ( _bcgfc ) == 2 ) || ( len ( _eedd ) == 2 && len ( _bcgfc ) == 0 && _fdaf ( _cddd [ _eedd [ 0 ] ] , _cddd [ _eedd [ 1 ] ] ) ) || ( len ( _bcgfc ) == 2 && len ( _eedd ) == 0 && _adbb ( _cddd [ _bcgfc [ 0 ] ] , _cddd [ _bcgfc [ 1 ] ] ) ) ; if _adbfb { _agc . Printf ( " \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a" , len ( _eedd ) , len ( _bcgfc ) , _edgbd ) ;
} ; if ! _edgbd { if _adbfb { _ga . Log . Error ( "\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v" , _befd ) ; _agc . Printf ( " \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a" , len ( _eedd ) , len ( _bcgfc ) , _edgbd ) ;
} ; return & ruling { } , false ; } ; if len ( _bcgfc ) == 0 { for _bgbf , _gfdg := range _ffbc { if _gfdg != _aaad { _bcgfc = append ( _bcgfc , _bgbf ) ; } ; } ; } ; if len ( _eedd ) == 0 { for _cbfa , _agce := range _ffbc { if _agce != _cfae { _eedd = append ( _eedd , _cbfa ) ; } ; } ; } ; if _adbfb { _ga . Log . Info ( "\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a" + "\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a" + "\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a" + "\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076" , len ( _eedd ) , len ( _bcgfc ) , len ( _cddd ) , _eedd , _bcgfc , _cddd ) ;
} ; var _cebd , _accd , _bcfba , _fdff _agf . Point ; if _cddd [ _eedd [ 0 ] ] . Y > _cddd [ _eedd [ 1 ] ] . Y { _bcfba , _fdff = _cddd [ _eedd [ 0 ] ] , _cddd [ _eedd [ 1 ] ] ; } else { _bcfba , _fdff = _cddd [ _eedd [ 1 ] ] , _cddd [ _eedd [ 0 ] ] ; } ; if _cddd [ _bcgfc [ 0 ] ] . X > _cddd [ _bcgfc [ 1 ] ] . X { _cebd , _accd = _cddd [ _bcgfc [ 0 ] ] , _cddd [ _bcgfc [ 1 ] ] ;
} else { _cebd , _accd = _cddd [ _bcgfc [ 1 ] ] , _cddd [ _bcgfc [ 0 ] ] ; } ; _dead := _ba . PdfRectangle { Llx : _cebd . X , Urx : _accd . X , Lly : _fdff . Y , Ury : _bcfba . Y } ; if _dead . Llx > _dead . Urx { _dead . Llx , _dead . Urx = _dead . Urx , _dead . Llx ; } ; if _dead . Lly > _dead . Ury { _dead . Lly , _dead . Ury = _dead . Ury , _dead . Lly ;
} ; _abdd := rectRuling { PdfRectangle : _dead , _ecaa : _aaeed ( _dead ) , Color : _becff } ; if _abdd . _ecaa == _fbcc { if _adbfb { _ga . Log . Error ( "\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c" ) ;
} ; return nil , false ; } ; _dfbb , _ccfd := _abdd . asRuling ( ) ; if ! _ccfd { if _adbfb { _ga . Log . Error ( "\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg" ) ; } ; return nil , false ; } ; if _adgbf { _agc . Printf ( "\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a" , _dfbb . String ( ) ) ;
} ; return _dfbb , true ; } ; type event struct { _cdbc float64 ; _dddef bool ; _dcfcc int ; } ; type lineRuling struct { _cebc rulingKind ; _beda markKind ; _gff . Color ; _eded , _badee _agf . Point ; } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
func ( _ebeb PageText ) ToText ( ) string { return _ebeb . Text ( ) } ; func _bdefa ( _bfbgb * _ba . Image , _dedeb _gff . Color ) _ag . Image { _acfba , _fecfg := int ( _bfbgb . Width ) , int ( _bfbgb . Height ) ; _cgcc := _ag . NewRGBA ( _ag . Rect ( 0 , 0 , _acfba , _fecfg ) ) ; for _egadc := 0 ;
_egadc < _fecfg ; _egadc ++ { for _acgf := 0 ; _acgf < _acfba ; _acgf ++ { _cfab , _gfadg := _bfbgb . ColorAt ( _acgf , _egadc ) ; if _gfadg != nil { _ga . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063o\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0072\u0065\u0074\u0072\u0069\u0065v\u0065 \u0069\u006d\u0061\u0067\u0065\u0020m\u0061\u0073\u006b\u0020\u0076\u0061\u006cu\u0065\u0020\u0061\u0074\u0020\u0028\u0025\u0064\u002c\u0020\u0025\u0064\u0029\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006da\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063t\u002e" , _acgf , _egadc ) ;
continue ; } ; _ffcb , _cegg , _efcff , _ := _cfab . RGBA ( ) ; var _aeegg _gff . Color ; if _ffcb + _cegg + _efcff == 0 { _aeegg = _gff . Transparent ; } else { _aeegg = _dedeb ; } ; _cgcc . Set ( _acgf , _egadc , _aeegg ) ; } ; } ; return _cgcc ; } ; func ( _face paraList ) findTableGrid ( _gfcc gridTiling ) ( * textTable , map [ * textPara ] struct { } ) { _bdcfab := len ( _gfcc . _fece ) ;
_cacef := len ( _gfcc . _dfca ) ; _adbe := textTable { _edeg : true , _eacg : _bdcfab , _cgae : _cacef , _egfea : make ( map [ uint64 ] * textPara , _bdcfab * _cacef ) , _aaaga : make ( map [ uint64 ] compositeCell , _bdcfab * _cacef ) } ; _adbe . PdfRectangle = _gfcc . PdfRectangle ; _efcbg := make ( map [ * textPara ] struct { } ) ;
_ggda := int ( ( 1.0 - _beec ) * float64 ( _bdcfab * _cacef ) ) ; _dcgd := 0 ; if _dfge { _ga . Log . Info ( "\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064" , _bdcfab , _cacef ) ; } ; for _dcab , _adcga := range _gfcc . _dfca { _dagg , _egdb := _gfcc . _gdcg [ _adcga ] ;
if ! _egdb { continue ; } ; for _bcec , _baffd := range _gfcc . _fece { _cfcc , _gddae := _dagg [ _baffd ] ; if ! _gddae { continue ; } ; _bbdaa := _face . inTile ( _cfcc ) ; if len ( _bbdaa ) == 0 { _dcgd ++ ; if _dcgd > _ggda { if _dfge { _ga . Log . Info ( "\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064" , _dcgd ) ;
} ; return nil , nil ; } ; } else { _adbe . putComposite ( _bcec , _dcab , _bbdaa , _cfcc . PdfRectangle ) ; for _ , _ceef := range _bbdaa { _efcbg [ _ceef ] = struct { } { } ; } ; } ; } ; } ; _gaac := 0 ; for _gfeb := 0 ; _gfeb < _bdcfab ; _gfeb ++ { _cdbga := _adbe . get ( _gfeb , 0 ) ; if _cdbga == nil || ! _cdbga . _cddef { _gaac ++ ;
} ; } ; if _gaac == 0 { if _dfge { _ga . Log . Info ( "\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030" ) ; } ; return nil , nil ; } ; _eggf := _adbe . reduceTiling ( _gfcc , _gaea ) ; _eggf = _eggf . subdivide ( ) ; return _eggf , _efcbg ; } ; const ( _aba = "\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065" ;
_cgc = "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064" ;
_aef = "\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065" ; ) ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// GetContentStreamOps returns the contentStreamOps field of `pt`.
func ( _gbff * PageText ) GetContentStreamOps ( ) * _fc . ContentStreamOperations { return _gbff . _cde } ; func _adgc ( _eadab * textLine ) float64 { return _eadab . _cdcg [ 0 ] . Llx } ; func ( _adbc * wordBag ) pullWord ( _gbad * textWord , _fcfde int , _fefag map [ int ] map [ * textWord ] struct { } ) { _adbc . PdfRectangle = _bbbafc ( _adbc . PdfRectangle , _gbad . PdfRectangle ) ;
if _gbad . _eabbf > _adbc . _cdea { _adbc . _cdea = _gbad . _eabbf ; } ; _adbc . _fcgd [ _fcfde ] = append ( _adbc . _fcgd [ _fcfde ] , _gbad ) ; _fefag [ _fcfde ] [ _gbad ] = struct { } { } ; } ; const ( _cebg = false ; _bfgae = false ; _eeeb = false ; _cgbd = false ; _fdbg = false ; _cfca = false ;
_eeab = false ; _afga = false ; _cbbb = false ; _abfa = _cbbb && true ; _gaba = _abfa && false ; _bdgb = _cbbb && true ; _efda = false ; _baace = _efda && false ; _ggcd = _efda && true ; _adgbf = false ; _ebce = _adgbf && false ; _eda = _adgbf && false ; _dfge = _adgbf && true ; _adbfb = _adgbf && false ;
_ggaeg = _adgbf && false ; ) ; func ( _eaeea * textTable ) computeBbox ( ) _ba . PdfRectangle { var _edcbe _ba . PdfRectangle ; _cfeg := false ; for _gggbe := 0 ; _gggbe < _eaeea . _cgae ; _gggbe ++ { for _gad := 0 ; _gad < _eaeea . _eacg ; _gad ++ { _gfffe := _eaeea . get ( _gad , _gggbe ) ;
if _gfffe == nil { continue ; } ; if ! _cfeg { _edcbe = _gfffe . PdfRectangle ; _cfeg = true ; } else { _edcbe = _bbbafc ( _edcbe , _gfffe . PdfRectangle ) ; } ; } ; } ; return _edcbe ; } ;
// Text gets the extracted text contained in `l`.
func ( _caec * list ) Text ( ) string { _ggab := & _f . Builder { } ; _gcbc := "" ; _dbbbd ( _caec , _ggab , & _gcbc ) ; return _ggab . String ( ) ; } ; func ( _bbcf * shapesState ) establishSubpath ( ) * subpath { _agdg , _ddec := _bbcf . lastpointEstablished ( ) ; if ! _ddec { _bbcf . _gfce = append ( _bbcf . _gfce , _ddbd ( _agdg ) ) ;
} ; if len ( _bbcf . _gfce ) == 0 { return nil ; } ; _bbcf . _gbdgg = false ; return _bbcf . _gfce [ len ( _bbcf . _gfce ) - 1 ] ; } ; func ( _aadac rulingList ) primaries ( ) [ ] float64 { _dgac := make ( map [ float64 ] struct { } , len ( _aadac ) ) ; for _ , _bcde := range _aadac { _dgac [ _bcde . _edga ] = struct { } { } ;
} ; _dfcf := make ( [ ] float64 , len ( _dgac ) ) ; _beffd := 0 ; for _fcab := range _dgac { _dfcf [ _beffd ] = _fcab ; _beffd ++ ; } ; _e . Float64s ( _dfcf ) ; return _dfcf ; } ; func _aedcc ( _bbdfg int , _gcfcb func ( int , int ) bool ) [ ] int { _ccce := make ( [ ] int , _bbdfg ) ; for _fadad := range _ccce { _ccce [ _fadad ] = _fadad ;
} ; _e . Slice ( _ccce , func ( _deccc , _dccc int ) bool { return _gcfcb ( _ccce [ _deccc ] , _ccce [ _dccc ] ) } ) ; return _ccce ; } ; func ( _faac * shapesState ) drawRectangle ( _cgbce , _edb , _cdfc , _bfed float64 ) { if _fdbg { _bfec := _faac . devicePoint ( _cgbce , _edb ) ; _gacc := _faac . devicePoint ( _cgbce + _cdfc , _edb + _bfed ) ;
_fcbg := _ba . PdfRectangle { Llx : _bfec . X , Lly : _bfec . Y , Urx : _gacc . X , Ury : _gacc . Y } ; _ga . Log . Info ( "d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066" , _fcbg ) ; } ; _faac . newSubPath ( ) ; _faac . moveTo ( _cgbce , _edb ) ;
_faac . lineTo ( _cgbce + _cdfc , _edb ) ; _faac . lineTo ( _cgbce + _cdfc , _edb + _bfed ) ; _faac . lineTo ( _cgbce , _edb + _bfed ) ; _faac . closePath ( ) ; } ; func ( _fgbfc * shapesState ) clearPath ( ) { _fgbfc . _gfce = nil ; _fgbfc . _gbdgg = false ; if _fdbg { _ga . Log . Info ( "\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073" , _fgbfc ) ;
} ; } ; func ( _ddaed rulingList ) secMinMax ( ) ( float64 , float64 ) { _gffba , _fgcg := _ddaed [ 0 ] . _fcec , _ddaed [ 0 ] . _abeg ; for _ , _gdaaf := range _ddaed [ 1 : ] { if _gdaaf . _fcec < _gffba { _gffba = _gdaaf . _fcec ; } ; if _gdaaf . _abeg > _fgcg { _fgcg = _gdaaf . _abeg ; } ; } ; return _gffba , _fgcg ;
} ; func _fdgb ( _decgc [ ] * wordBag ) [ ] * wordBag { if len ( _decgc ) <= 1 { return _decgc ; } ; if _cbbb { _ga . Log . Info ( "\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a" ) ; } ; _e . Slice ( _decgc , func ( _dgeb , _daag int ) bool { _dfdd , _decgg := _decgc [ _dgeb ] , _decgc [ _daag ] ;
_agdb := _dfdd . Width ( ) * _dfdd . Height ( ) ; _cedg := _decgg . Width ( ) * _decgg . Height ( ) ; if _agdb != _cedg { return _agdb > _cedg ; } ; if _dfdd . Height ( ) != _decgg . Height ( ) { return _dfdd . Height ( ) > _decgg . Height ( ) ; } ; return _dgeb < _daag ; } ) ; var _fbdd [ ] * wordBag ;
_dac := make ( intSet ) ; for _dabba := 0 ; _dabba < len ( _decgc ) ; _dabba ++ { if _dac . has ( _dabba ) { continue ; } ; _beeb := _decgc [ _dabba ] ; for _gdcfb := _dabba + 1 ; _gdcfb < len ( _decgc ) ; _gdcfb ++ { if _dac . has ( _dabba ) { continue ; } ; _gcdc := _decgc [ _gdcfb ] ; _dadc := _beeb . PdfRectangle ;
_dadc . Llx -= _beeb . _cdea ; if _gcdb ( _dadc , _gcdc . PdfRectangle ) { _beeb . absorb ( _gcdc ) ; _dac . add ( _gdcfb ) ; } ; } ; _fbdd = append ( _fbdd , _beeb ) ; } ; if len ( _decgc ) != len ( _fbdd ) + len ( _dac ) { _ga . Log . Error ( "\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064" , len ( _decgc ) , len ( _fbdd ) , len ( _dac ) ) ;
} ; return _fbdd ; } ; func ( _eade paraList ) computeEBBoxes ( ) { if _cebg { _ga . Log . Info ( "\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a" ) ; } ; for _ , _aaegg := range _eade { _aaegg . _dgabg = _aaegg . PdfRectangle ; } ; _ebed := _eade . yNeighbours ( 0 ) ;
for _dgab , _bbgfb := range _eade { _gfcd := _bbgfb . _dgabg ; _ffff , _gedfd := - 1.0e9 , + 1.0e9 ; for _ , _fgfb := range _ebed [ _bbgfb ] { _babe := _eade [ _fgfb ] . _dgabg ; if _babe . Urx < _gfcd . Llx { _ffff = _gf . Max ( _ffff , _babe . Urx ) ; } else if _gfcd . Urx < _babe . Llx { _gedfd = _gf . Min ( _gedfd , _babe . Llx ) ;
} ; } ; for _ffdd , _bfgb := range _eade { _egbf := _bfgb . _dgabg ; if _dgab == _ffdd || _egbf . Ury > _gfcd . Lly { continue ; } ; if _ffff <= _egbf . Llx && _egbf . Llx < _gfcd . Llx { _gfcd . Llx = _egbf . Llx ; } else if _egbf . Urx <= _gedfd && _gfcd . Urx < _egbf . Urx { _gfcd . Urx = _egbf . Urx ;
} ; } ; if _cebg { _agc . Printf ( "\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a" , _dgab , _bbgfb . _dgabg , _gfcd , _adgd ( _bbgfb . text ( ) , 50 ) ) ; } ; _bbgfb . _dgabg = _gfcd ; } ; if _afgf { for _ , _cfad := range _eade { _cfad . PdfRectangle = _cfad . _dgabg ;
} ; } ; } ; func ( _aadc rulingList ) blocks ( _abeb , _bbge * ruling ) bool { if _abeb . _fcec > _bbge . _abeg || _bbge . _fcec > _abeb . _abeg { return false ; } ; _edced := _gf . Max ( _abeb . _fcec , _bbge . _fcec ) ; _gaeaf := _gf . Min ( _abeb . _abeg , _bbge . _abeg ) ; if _abeb . _edga > _bbge . _edga { _abeb , _bbge = _bbge , _abeb ;
} ; for _ , _edbd := range _aadc { if _abeb . _edga <= _edbd . _edga + _gbb && _edbd . _edga <= _bbge . _edga + _gbb && _edbd . _fcec <= _gaeaf && _edced <= _edbd . _abeg { return true ; } ; } ; return false ; } ; func ( _beeg rulingList ) mergePrimary ( ) float64 { _edaae := _beeg [ 0 ] . _edga ;
for _ , _beecf := range _beeg [ 1 : ] { _edaae += _beecf . _edga ; } ; return _edaae / float64 ( len ( _beeg ) ) ; } ; var _geef string = "\u0028\u003f\u0069\u0029\u005e\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028?\u003a\u0044\u007cM\u0029\u007c\u0044\u003f\u0043{\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028\u003f\u003a\u004c\u007c\u0043\u0029\u007cL\u003f\u0058\u007b\u0030\u002c\u0033}\u0029\u0028\u0049\u0028\u003f\u003a\u0056\u007c\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u005c\u0029\u007c\u005c\u002e\u0029\u007c\u005e\u005c\u0028\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028\u003f\u003aD\u007cM\u0029\u007c\u0044\u003f\u0043\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028?\u003a\u004c\u007c\u0043\u0029\u007c\u004c?\u0058\u007b0\u002c\u0033\u007d\u0029(\u0049\u0028\u003f\u003a\u0056|\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u005c\u0029" ;
type bounded interface { bbox ( ) _ba . PdfRectangle } ; func _eegf ( _efff _ba . PdfRectangle , _adcc bounded ) float64 { return _efff . Ury - _adcc . bbox ( ) . Lly } ; func ( _dddg gridTile ) numBorders ( ) int { _eddd := 0 ; if _dddg . _ecaf { _eddd ++ ; } ; if _dddg . _bfab { _eddd ++ ;
} ; if _dddg . _bgbfd { _eddd ++ ; } ; if _dddg . _dgccd { _eddd ++ ; } ; return _eddd ; } ; func _ddbf ( _eadac , _deaa _agf . Point ) rulingKind { _efaea := _gf . Abs ( _eadac . X - _deaa . X ) ; _aafa := _gf . Abs ( _eadac . Y - _deaa . Y ) ; return _cbggc ( _efaea , _aafa , _beed ) ; } ; func ( _ecaag gridTiling ) complete ( ) bool { for _ , _gddcc := range _ecaag . _gdcg { for _ , _ccggf := range _gddcc { if ! _ccggf . complete ( ) { return false ;
} ; } ; } ; return true ; } ; func _ffeg ( _defd [ ] TextMark , _deea * int , _fccd string ) [ ] TextMark { _fggg := _gbdc ; _fggg . Text = _fccd ; return _aadbb ( _defd , _deea , _fggg ) ; } ; func ( _cagg * textObject ) setTextRise ( _bcb float64 ) { if _cagg == nil { return ; } ; _cagg . _efed . _eaaf = _bcb ;
} ; func _caeb ( _geeb , _efcc float64 ) bool { return _geeb / _gf . Max ( _dccf , _efcc ) < _beed } ; type lists [ ] * list ; func ( _cdab * wordBag ) applyRemovals ( _ggef map [ int ] map [ * textWord ] struct { } ) { for _abcb , _egba := range _ggef { if len ( _egba ) == 0 { continue ; } ; _gffgg := _cdab . _fcgd [ _abcb ] ;
_gcg := len ( _gffgg ) - len ( _egba ) ; if _gcg == 0 { delete ( _cdab . _fcgd , _abcb ) ; continue ; } ; _efab := make ( [ ] * textWord , _gcg ) ; _gag := 0 ; for _ , _dab := range _gffgg { if _ , _aecf := _egba [ _dab ] ; ! _aecf { _efab [ _gag ] = _dab ; _gag ++ ; } ; } ; _cdab . _fcgd [ _abcb ] = _efab ;
} ; } ; func ( _egecg paraList ) findTables ( _cdcd [ ] gridTiling ) [ ] * textTable { _egecg . addNeighbours ( ) ; _e . Slice ( _egecg , func ( _fbec , _dafd int ) bool { return _bafa ( _egecg [ _fbec ] , _egecg [ _dafd ] ) < 0 } ) ; var _gabca [ ] * textTable ; if _bfcf { _cgge := _egecg . findGridTables ( _cdcd ) ;
_gabca = append ( _gabca , _cgge ... ) ; } ; if _cbdec { _gfaa := _egecg . findTextTables ( ) ; _gabca = append ( _gabca , _gfaa ... ) ; } ; return _gabca ; } ; func ( _ebe * textObject ) setFont ( _egf string , _cbaa float64 ) error { if _ebe == nil { return nil ; } ; _ebe . _efed . _dcd = _cbaa ;
_fefd , _dcbe := _ebe . getFont ( _egf ) ; if _dcbe != nil { return _dcbe ; } ; _ebe . _efed . _gbdg = _fefd ; return nil ; } ; func ( _dcec * textObject ) moveTextSetLeading ( _faab , _cae float64 ) { _dcec . _efed . _bfa = - _cae ; _dcec . moveLP ( _faab , _cae ) ; } ; func _dbfg ( _cade [ ] * textLine , _ecff map [ float64 ] [ ] * textLine ) [ ] * list { _gbecc := _bdfa ( _ecff ) ;
_fgbg := [ ] * list { } ; if len ( _gbecc ) == 0 { return _fgbg ; } ; _gaefg := _gbecc [ 0 ] ; _fbbee := 1 ; _gegf := _ecff [ _gaefg ] ; for _bgdc , _agge := range _gegf { var _edgf float64 ; _gfge := [ ] * list { } ; _ebab := _agge . _bbfg ; _aedgd := - 1.0 ; if _bgdc < len ( _gegf ) - 1 { _aedgd = _gegf [ _bgdc + 1 ] . _bbfg ;
} ; if _fbbee < len ( _gbecc ) { _gfge = _eeag ( _cade , _ecff , _gbecc , _fbbee , _ebab , _aedgd ) ; } ; _edgf = _aedgd ; if len ( _gfge ) > 0 { _eaad := _gfge [ 0 ] ; if len ( _eaad . _aebd ) > 0 { _edgf = _eaad . _aebd [ 0 ] . _bbfg ; } ; } ; _bded := [ ] * textLine { _agge } ; _ecfde := _ggdbe ( _agge , _cade , _gbecc , _ebab , _edgf ) ;
_bded = append ( _bded , _ecfde ... ) ; _efcd := _dgda ( _bded , "\u0062\u0075\u006c\u006c\u0065\u0074" , _gfge ) ; _efcd . _ddef = _efgc ( _bded , "" ) ; _fgbg = append ( _fgbg , _efcd ) ; } ; return _fgbg ; } ; type paraList [ ] * textPara ; func ( _ceed * textTable ) getComposite ( _dacd , _bdge int ) ( paraList , _ba . PdfRectangle ) { _dcde , _bdae := _ceed . _aaaga [ _bgcc ( _dacd , _bdge ) ] ;
if _efda { _agc . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a" , _dacd , _bdge , _dcde . String ( ) ) ; } ; if ! _bdae { return nil , _ba . PdfRectangle { } ;
} ; return _dcde . parasBBox ( ) ; } ; func ( _fdcc rulingList ) connections ( _bcdd map [ int ] intSet , _efba int ) intSet { _gbdac := make ( intSet ) ; _gffag := make ( intSet ) ; var _aeaa func ( int ) ; _aeaa = func ( _fbgcb int ) { if ! _gffag . has ( _fbgcb ) { _gffag . add ( _fbgcb ) ;
for _geab := range _fdcc { if _bcdd [ _geab ] . has ( _fbgcb ) { _gbdac . add ( _geab ) ; } ; } ; for _ggfe := range _fdcc { if _gbdac . has ( _ggfe ) { _aeaa ( _ggfe ) ; } ; } ; } ; } ; _aeaa ( _efba ) ; return _gbdac ; } ; func _adad ( _bbe [ ] * textWord , _cbc float64 , _ecad , _effa rulingList ) * wordBag { _dbbb := _bfcbd ( _bbe [ 0 ] , _cbc , _ecad , _effa ) ;
for _ , _fcce := range _bbe [ 1 : ] { _efg := _ceee ( _fcce . _dfagd ) ; _dbbb . _fcgd [ _efg ] = append ( _dbbb . _fcgd [ _efg ] , _fcce ) ; _dbbb . PdfRectangle = _bbbafc ( _dbbb . PdfRectangle , _fcce . PdfRectangle ) ; } ; _dbbb . sort ( ) ; return _dbbb ; } ; const _ebaf = 1.0 / 1000.0 ; func ( _dgec * textPara ) getListLines ( ) [ ] * textLine { var _cdcf [ ] * textLine ;
_cedb := _ecaea ( _dgec . _fdec ) ; for _ , _bfcd := range _dgec . _fdec { _bfdgc := _bfcd . _cdcg [ 0 ] . _eaae [ 0 ] ; if _ecfd ( _bfdgc ) { _cdcf = append ( _cdcf , _bfcd ) ; } ; } ; _cdcf = append ( _cdcf , _cedb ... ) ; return _cdcf ; } ; func _abda ( _debg * list ) [ ] * list { var _bdfc [ ] * list ;
for _ , _dceb := range _debg . _abcc { switch _dceb . _cdde { case "\u004c\u0049" : _debd := _acdc ( _dceb ) ; _ggfb := _abda ( _dceb ) ; _agcaf := _dgda ( _debd , "\u0062\u0075\u006c\u006c\u0065\u0074" , _ggfb ) ; _bfbf := _efgc ( _debd , "" ) ; _agcaf . _ddef = _bfbf ; _bdfc = append ( _bdfc , _agcaf ) ;
case "\u004c\u0042\u006fd\u0079" : return _abda ( _dceb ) ; case "\u004c" : _dggc := _abda ( _dceb ) ; _bdfc = append ( _bdfc , _dggc ... ) ; return _bdfc ; } ; } ; return _bdfc ; } ; var _de = false ; func ( _gebc * textObject ) setHorizScaling ( _gfd float64 ) { if _gebc == nil { return ;
} ; _gebc . _efed . _def = _gfd ; } ; func ( _dadbc * wordBag ) absorb ( _bbfc * wordBag ) { _ffe := _bbfc . makeRemovals ( ) ; for _dbbd , _bbde := range _bbfc . _fcgd { for _ , _gaef := range _bbde { _dadbc . pullWord ( _gaef , _dbbd , _ffe ) ; } ; } ; _bbfc . applyRemovals ( _ffe ) ; } ; func ( _ccgfc * textPara ) taken ( ) bool { return _ccgfc == nil || _ccgfc . _gecb } ;
func _fgfag ( _addcb , _agef _ba . PdfRectangle ) bool { return _agef . Llx <= _addcb . Urx && _addcb . Llx <= _agef . Urx ; } ; func ( _edce * shapesState ) moveTo ( _cbg , _fff float64 ) { _edce . _gbdgg = true ; _edce . _gcaaf = _edce . devicePoint ( _cbg , _fff ) ; if _fdbg { _ga . Log . Info ( "\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066" , _cbg , _fff , _edce . _gcaaf ) ;
} ; } ; func _cgfd ( _bffg * wordBag , _eeac * textWord , _efef float64 ) bool { return _eeac . Llx < _bffg . Urx + _efef && _bffg . Llx - _efef < _eeac . Urx ; } ; func ( _gggd rulingList ) removeDuplicates ( ) rulingList { if len ( _gggd ) == 0 { return nil ; } ; _gggd . sort ( ) ; _fade := rulingList { _gggd [ 0 ] } ;
for _ , _gccfc := range _gggd [ 1 : ] { if _gccfc . equals ( _fade [ len ( _fade ) - 1 ] ) { continue ; } ; _fade = append ( _fade , _gccfc ) ; } ; return _fade ; } ; func _dcdb ( _gacd [ ] TextMark , _ecbfg * TextTable ) [ ] TextMark { var _bedda [ ] TextMark ; for _ , _bcbcb := range _gacd { _bcbcb . _efeg = true ;
_bcbcb . _cfaa = _ecbfg ; _bedda = append ( _bedda , _bcbcb ) ; } ; return _bedda ; } ; func ( _ccddf paraList ) log ( _dcba string ) { if ! _afga { return ; } ; _ga . Log . Info ( "%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d" , _dcba , len ( _ccddf ) ) ;
for _cccc , _dfab := range _ccddf { if _dfab == nil { continue ; } ; _bcgbc := _dfab . text ( ) ; _eefdf := "\u0020\u0020" ; if _dfab . _fbbea != nil { _eefdf = _agc . Sprintf ( "\u005b%\u0064\u0078\u0025\u0064\u005d" , _dfab . _fbbea . _eacg , _dfab . _fbbea . _cgae ) ; } ; _agc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a" , _cccc , _dfab . PdfRectangle , _eefdf , _adgd ( _bcgbc , 50 ) ) ;
} ; } ; func ( _aebb * textTable ) getRight ( ) paraList { _fgea := make ( paraList , _aebb . _cgae ) ; for _ccdbe := 0 ; _ccdbe < _aebb . _cgae ; _ccdbe ++ { _aaedaa := _aebb . get ( _aebb . _eacg - 1 , _ccdbe ) . _fdgf ; if _aaedaa . taken ( ) { return nil ; } ; _fgea [ _ccdbe ] = _aaedaa ; } ;
for _eec := 0 ; _eec < _aebb . _cgae - 1 ; _eec ++ { if _fgea [ _eec ] . _ecada != _fgea [ _eec + 1 ] { return nil ; } ; } ; return _fgea ; } ; func ( _acab lineRuling ) yMean ( ) float64 { return 0.5 * ( _acab . _eded . Y + _acab . _badee . Y ) } ; func _abag ( _ebc , _bdabb bounded ) float64 { return _dag ( _ebc ) - _dag ( _bdabb ) } ;
func ( _ced * textObject ) renderText ( _gbga _add . PdfObject , _gbfc [ ] byte , _gceg int ) error { if _ced . _dgcf { _ga . Log . Debug ( "\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e" ) ;
return nil ; } ; _eaac := _ced . getCurrentFont ( ) ; _decg := _eaac . BytesToCharcodes ( _gbfc ) ; _fdb , _cbe , _egcd := _eaac . CharcodesToStrings ( _decg ) ; if _egcd > 0 { _ga . Log . Debug ( "\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064" , _cbe , _egcd ) ;
} ; _ced . _efed . _abd += _cbe ; _ced . _efed . _bafdf += _egcd ; _bae := _ced . _efed ; _dggb := _bae . _dcd ; _ggf := _bae . _def / 100.0 ; _baaf := _ebaf ; if _eaac . Subtype ( ) == "\u0054\u0079\u0070e\u0033" { _baaf = 1 ; } ; _bbabg , _bbg := _eaac . GetRuneMetrics ( ' ' ) ; if ! _bbg { _bbabg , _bbg = _eaac . GetCharMetrics ( 32 ) ;
} ; if ! _bbg { _bbabg , _ = _ba . DefaultFont ( ) . GetRuneMetrics ( ' ' ) ; } ; _gab := _bbabg . Wx * _baaf ; _ga . Log . Trace ( "\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066" , _gab , _fdb , _eaac , _dggb ) ;
_cdfb := _agf . NewMatrix ( _dggb * _ggf , 0 , 0 , _dggb , 0 , _bae . _eaaf ) ; if _cfca { _ga . Log . Info ( "\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071" , len ( _decg ) , _decg , _fdb ) ;
} ; _ga . Log . Trace ( "\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071" , len ( _decg ) , _decg , len ( _fdb ) ) ; _fab := _ced . getFillColor ( ) ;
_cgf := _ced . getStrokeColor ( ) ; for _aaf , _cda := range _fdb { _deg := [ ] rune ( _cda ) ; if len ( _deg ) == 1 && _deg [ 0 ] == '\x00' { continue ; } ; _bafdg := _decg [ _aaf ] ; _gbec := _ced . _bace . CTM . Mult ( _ced . _eefe ) . Mult ( _cdfb ) ; _gcaa := 0.0 ; if len ( _deg ) == 1 && _deg [ 0 ] == 32 { _gcaa = _bae . _gdeg ;
} ; _aff , _dccb := _eaac . GetCharMetrics ( _bafdg ) ; if ! _dccb { _ga . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073" , _bafdg , _deg , _deg , _eaac ) ;
return _agc . Errorf ( "\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064" , _eaac . String ( ) , _bafdg ) ; } ; _cgdd := _agf . Point { X : _aff . Wx * _baaf , Y : _aff . Wy * _baaf } ;
_dbf := _agf . Point { X : ( _cgdd . X * _dggb + _gcaa ) * _ggf } ; _egeb := _agf . Point { X : ( _cgdd . X * _dggb + _bae . _ffaf + _gcaa ) * _ggf } ; if _cfca { _ga . Log . Info ( "\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066" , _dggb , _bae . _ffaf , _bae . _gdeg , _ggf ) ;
_ga . Log . Info ( "\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f" , _cgdd , _dbf , _egeb ) ; } ; _fcdg := _gfdd ( _dbf ) ; _aceb := _gfdd ( _egeb ) ; _egec := _ced . _bace . CTM . Mult ( _ced . _eefe ) . Mult ( _fcdg ) ;
if _cgbd { _ga . Log . Info ( "e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a" + "\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a" + "\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073" , _ced . _bace . CTM , _ced . _eefe , _aceb , _eafa ( _ced . _bace . CTM . Mult ( _ced . _eefe ) . Mult ( _aceb ) ) , _fcdg , _egec , _eafa ( _egec ) ) ;
} ; _dadg , _egbg := _ced . newTextMark ( _cg . ExpandLigatures ( _deg ) , _gbec , _eafa ( _egec ) , _gf . Abs ( _gab * _gbec . ScalingFactorX ( ) ) , _eaac , _ced . _efed . _ffaf , _fab , _cgf , _gbga , _fdb , _aaf , _gceg ) ; if ! _egbg { _ga . Log . Debug ( "\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067" ) ;
continue ; } ; if _eaac == nil { _ga . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e" ) ; } else if _eaac . Encoder ( ) == nil { _ga . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073" , _eaac ) ;
} else { if _ecfc , _bgd := _eaac . Encoder ( ) . CharcodeToRune ( _bafdg ) ; _bgd { _dadg . _gccf = string ( _ecfc ) ; } ; } ; _ga . Log . Trace ( "i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073" , _aaf , _bafdg , _dadg , _gbec ) ;
_ced . _cfde = append ( _ced . _cfde , & _dadg ) ; _ced . _eefe . Concat ( _aceb ) ; } ; return nil ; } ; func ( _fgbgd paraList ) topoOrder ( ) [ ] int { if _afga { _ga . Log . Info ( "\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a" ) ; } ; _ecadg := len ( _fgbgd ) ; _ddde := make ( [ ] bool , _ecadg ) ;
_gddada := make ( [ ] int , 0 , _ecadg ) ; _bcgbg := _fgbgd . llyOrdering ( ) ; var _cbce func ( _dfegg int ) ; _cbce = func ( _bbdac int ) { _ddde [ _bbdac ] = true ; for _fbbae := 0 ; _fbbae < _ecadg ; _fbbae ++ { if ! _ddde [ _fbbae ] { if _fgbgd . readBefore ( _bcgbg , _bbdac , _fbbae ) { _cbce ( _fbbae ) ;
} ; } ; } ; _gddada = append ( _gddada , _bbdac ) ; } ; for _aecfa := 0 ; _aecfa < _ecadg ; _aecfa ++ { if ! _ddde [ _aecfa ] { _cbce ( _aecfa ) ; } ; } ; return _bdbc ( _gddada ) ; } ; func ( _gef * PageText ) getParagraphs ( ) paraList { var _egd rulingList ; if _gbge { _aeeb := _agde ( _gef . _efea ) ;
_egd = append ( _egd , _aeeb ... ) ; } ; if _gaedd { _bfcb := _cgdde ( _gef . _ebafc ) ; _egd = append ( _egd , _bfcb ... ) ; } ; _egd , _gdga := _egd . toTilings ( ) ; var _bfaf paraList ; _bcga := len ( _gef . _aebf ) ; for _bee := 0 ; _bee < 360 && _bcga > 0 ; _bee += 90 { _debac := make ( [ ] * textMark , 0 , len ( _gef . _aebf ) - _bcga ) ;
for _ , _dggg := range _gef . _aebf { if _dggg . _gfbg == _bee { _debac = append ( _debac , _dggg ) ; } ; } ; if len ( _debac ) > 0 { _bedd := _cabcg ( _debac , _gef . _bbdc , _egd , _gdga , _gef . _eaag . _caab ) ; _bfaf = append ( _bfaf , _bedd ... ) ; _bcga -= len ( _debac ) ; } ; } ; return _bfaf ;
} ; type textLine struct { _ba . PdfRectangle ; _bbfg float64 ; _cdcg [ ] * textWord ; _fdfb float64 ; } ;
2024-03-27 22:34:33 +00:00
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
2024-04-16 11:40:43 +00:00
type ImageMark struct { Image * _ba . Image ;
2024-03-27 22:34:33 +00:00
// Dimensions of the image as displayed in the PDF.
Width float64 ; Height float64 ;
// Position of the image in PDF coordinates (lower left corner).
X float64 ; Y float64 ;
// Angle in degrees, if rotated.
2024-04-16 11:40:43 +00:00
Angle float64 ; } ; func ( _begf * wordBag ) highestWord ( _bade int , _faafd , _bcea float64 ) * textWord { for _ , _ebda := range _begf . _fcgd [ _bade ] { if _faafd <= _ebda . _dfagd && _ebda . _dfagd <= _bcea { return _ebda ; } ; } ; return nil ; } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// Extractor stores and offers functionality for extracting content from PDF pages.
type Extractor struct { _ac string ; _gbe * _ba . PdfPageResources ; _cb _ba . PdfRectangle ; _eb * _ba . PdfRectangle ; _bd map [ string ] fontEntry ; _baa map [ string ] textResult ; _gfe map [ string ] textResult ; _ae int64 ; _edc int ; _cf * Options ; _ab * _add . PdfObject ;
_dd _add . PdfObject ; _bg [ ] * _ba . PdfAnnotation ; } ; func _beaa ( _ecee , _ceabd float64 ) string { _bccef := ! _dbeae ( _ecee - _ceabd ) ; if _bccef { return "\u000a" ; } ; return "\u0020" ; } ; func _dbbbd ( _dddcf * list , _ccfc * _f . Builder , _gfea * string ) { _fgfcc := _fbbd ( _dddcf , _gfea ) ;
_ccfc . WriteString ( _fgfcc ) ; for _ , _bcfb := range _dddcf . _abcc { _bffga := * _gfea + "\u0020\u0020\u0020" ; _dbbbd ( _bcfb , _ccfc , & _bffga ) ; } ; } ; func ( _ffccd * textTable ) log ( _fadd string ) { if ! _efda { return ; } ; _ga . Log . Info ( "~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066" , _fadd , _ffccd . _eacg , _ffccd . _cgae , _ffccd . _edeg , _ffccd . PdfRectangle ) ;
for _aabca := 0 ; _aabca < _ffccd . _cgae ; _aabca ++ { for _ebaff := 0 ; _ebaff < _ffccd . _eacg ; _ebaff ++ { _gddce := _ffccd . get ( _ebaff , _aabca ) ; if _gddce == nil { continue ; } ; _agc . Printf ( "%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a" , _ebaff , _aabca , _gddce . PdfRectangle , _adgd ( _gddce . text ( ) , 50 ) , _a . RuneCountInString ( _gddce . text ( ) ) ) ;
} ; } ; } ; func _ggad ( _afecf map [ int ] intSet ) [ ] int { _cadc := make ( [ ] int , 0 , len ( _afecf ) ) ; for _gfdeg := range _afecf { _cadc = append ( _cadc , _gfdeg ) ; } ; _e . Ints ( _cadc ) ; return _cadc ; } ; func ( _bgce * textObject ) moveText ( _bfbd , _facc float64 ) { _bgce . moveLP ( _bfbd , _facc ) } ;
func _adcdc ( _afcf [ ] * textMark , _fdcda _ba . PdfRectangle ) * textWord { _bbgea := _afcf [ 0 ] . PdfRectangle ; _bgcde := _afcf [ 0 ] . _gccfd ; for _ , _eaadc := range _afcf [ 1 : ] { _bbgea = _bbbafc ( _bbgea , _eaadc . PdfRectangle ) ; if _eaadc . _gccfd > _bgcde { _bgcde = _eaadc . _gccfd ;
} ; } ; return & textWord { PdfRectangle : _bbgea , _abcee : _afcf , _dfagd : _fdcda . Ury - _bbgea . Lly , _eabbf : _bgcde } ; } ; func _bcfa ( _aded func ( * wordBag , * textWord , float64 ) bool , _bdbe float64 ) func ( * wordBag , * textWord ) bool { return func ( _edgea * wordBag , _afagd * textWord ) bool { return _aded ( _edgea , _afagd , _bdbe ) } ;
} ; func _cgdde ( _dgcfgg [ ] pathSection ) rulingList { _beebf ( _dgcfgg ) ; if _adgbf { _ga . Log . Info ( "\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs" , len ( _dgcfgg ) ) ; } ; var _bdggb rulingList ;
for _ , _cdeca := range _dgcfgg { for _ , _fefg := range _cdeca . _dbdc { if ! _fefg . isQuadrilateral ( ) { if _adgbf { _ga . Log . Error ( "!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073" , _fefg ) ; } ; continue ; } ; if _eccde , _febed := _fefg . makeRectRuling ( _cdeca . Color ) ;
_febed { _bdggb = append ( _bdggb , _eccde ) ; } else { if _adbfb { _ga . Log . Error ( "\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073" , _fefg ) ; } ; } ; } ; } ; if _adgbf { _ga . Log . Info ( "\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073" , _bdggb . String ( ) ) ;
} ; return _bdggb ; } ; func _gabc ( _ddcb , _ebge _ba . PdfRectangle ) bool { return _fgfag ( _ddcb , _ebge ) && _bggg ( _ddcb , _ebge ) } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
type PageImages struct { Images [ ] ImageMark ; } ; var ( _gb = _b . New ( "\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072" ) ; _gfg = _b . New ( "\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072" ) ;
) ; func ( _ebeg * textLine ) text ( ) string { var _fdfg [ ] string ; for _ , _aeae := range _ebeg . _cdcg { if _aeae . _gdec { _fdfg = append ( _fdfg , "\u0020" ) ; } ; _fdfg = append ( _fdfg , _aeae . _eaae ) ; } ; return _f . Join ( _fdfg , "" ) ; } ; func ( _bdbbf rectRuling ) checkWidth ( _becfc , _bccfa float64 ) ( float64 , bool ) { _cfbb := _bccfa - _becfc ;
_cdbdd := _cfbb <= _gbb ; return _cfbb , _cdbdd ; } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// String returns a description of `w`.
func ( _beagc * textWord ) String ( ) string { return _agc . Sprintf ( "\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022" , _beagc . _dfagd , _beagc . PdfRectangle , _beagc . _eabbf , _beagc . _eaae ) ;
} ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// Text returns the text content of the `bulletLists`.
func ( _dged * lists ) Text ( ) string { _cfdb := & _f . Builder { } ; for _ , _fcfb := range * _dged { _dba := _fcfb . Text ( ) ; _cfdb . WriteString ( _dba ) ; } ; return _cfdb . String ( ) ; } ; func ( _daab * textTable ) reduceTiling ( _bcaga gridTiling , _dadad float64 ) * textTable { _afgfb := make ( [ ] int , 0 , _daab . _cgae ) ;
_eacf := make ( [ ] int , 0 , _daab . _eacg ) ; _fdea := _bcaga . _fece ; _gbaeb := _bcaga . _dfca ; for _bdcfa := 0 ; _bdcfa < _daab . _cgae ; _bdcfa ++ { _cbfae := _bdcfa > 0 && _gf . Abs ( _gbaeb [ _bdcfa - 1 ] - _gbaeb [ _bdcfa ] ) < _dadad && _daab . emptyCompositeRow ( _bdcfa ) ; if ! _cbfae { _afgfb = append ( _afgfb , _bdcfa ) ;
} ; } ; for _bgbb := 0 ; _bgbb < _daab . _eacg ; _bgbb ++ { _ccbe := _bgbb < _daab . _eacg - 1 && _gf . Abs ( _fdea [ _bgbb + 1 ] - _fdea [ _bgbb ] ) < _dadad && _daab . emptyCompositeColumn ( _bgbb ) ; if ! _ccbe { _eacf = append ( _eacf , _bgbb ) ; } ; } ; if len ( _afgfb ) == _daab . _cgae && len ( _eacf ) == _daab . _eacg { return _daab ;
} ; _abcdf := textTable { _edeg : _daab . _edeg , _eacg : len ( _eacf ) , _cgae : len ( _afgfb ) , _aaaga : make ( map [ uint64 ] compositeCell , len ( _eacf ) * len ( _afgfb ) ) } ; if _efda { _ga . Log . Info ( "\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064" , _daab . _eacg , _daab . _cgae , len ( _eacf ) , len ( _afgfb ) ) ;
_ga . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076" , _eacf ) ; _ga . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076" , _afgfb ) ; } ; for _fgcfc , _faefc := range _afgfb { for _gdfe , _ebcf := range _eacf { _cfdd , _bcddg := _daab . getComposite ( _ebcf , _faefc ) ;
if len ( _cfdd ) == 0 { continue ; } ; if _efda { _agc . Printf ( "\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n" , _gdfe , _fgcfc , _ebcf , _faefc , _adgd ( _cfdd . merge ( ) . text ( ) , 50 ) ) ; } ; _abcdf . putComposite ( _gdfe , _fgcfc , _cfdd , _bcddg ) ;
} ; } ; return & _abcdf ; } ; type rulingList [ ] * ruling ; func ( _becb paraList ) tables ( ) [ ] TextTable { var _agag [ ] TextTable ; if _efda { _ga . Log . Info ( "\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a" ) ; } ; for _ , _afefe := range _becb { _cce := _afefe . _fbbea ;
if _cce != nil && _cce . isExportable ( ) { _agag = append ( _agag , _cce . toTextTable ( ) ) ; } ; } ; return _agag ; } ; func ( _adga * textTable ) emptyCompositeRow ( _cfgc int ) bool { for _egaba := 0 ; _egaba < _adga . _eacg ; _egaba ++ { if _bfac , _gcab := _adga . _aaaga [ _bgcc ( _egaba , _cfgc ) ] ;
_gcab { if len ( _bfac . paraList ) > 0 { return false ; } ; } ; } ; return true ; } ; func _ecfb ( _ecbbe string ) ( string , bool ) { _gege := [ ] rune ( _ecbbe ) ; if len ( _gege ) != 1 { return "" , false ; } ; _gcgdf , _dafgd := _gbefd [ _gege [ 0 ] ] ; return _gcgdf , _dafgd ; } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// Marks returns the TextMark collection for a page. It represents all the text on the page.
func ( _addc PageText ) Marks ( ) * TextMarkArray { return & TextMarkArray { _ffca : _addc . _ccca } } ; func _gfdd ( _bgge _agf . Point ) _agf . Matrix { return _agf . TranslationMatrix ( _bgge . X , _bgge . Y ) } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
func ( _gac * TextMarkArray ) RangeOffset ( start , end int ) ( * TextMarkArray , error ) { if _gac == nil { return nil , _b . New ( "\u006da\u003d\u003d\u006e\u0069\u006c" ) ; } ; if end < start { return nil , _agc . Errorf ( "\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020" , start , end ) ;
} ; _aabaa := len ( _gac . _ffca ) ; if _aabaa == 0 { return _gac , nil ; } ; if start < _gac . _ffca [ 0 ] . Offset { start = _gac . _ffca [ 0 ] . Offset ; } ; if end > _gac . _ffca [ _aabaa - 1 ] . Offset + 1 { end = _gac . _ffca [ _aabaa - 1 ] . Offset + 1 ; } ; _gaf := _e . Search ( _aabaa , func ( _aec int ) bool { return _gac . _ffca [ _aec ] . Offset + len ( _gac . _ffca [ _aec ] . Text ) - 1 >= start } ) ;
if ! ( 0 <= _gaf && _gaf < _aabaa ) { _dfde := _agc . Errorf ( "\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076" , start , _gaf , _aabaa , _gac . _ffca [ 0 ] , _gac . _ffca [ _aabaa - 1 ] ) ;
return nil , _dfde ; } ; _edge := _e . Search ( _aabaa , func ( _aeaf int ) bool { return _gac . _ffca [ _aeaf ] . Offset > end - 1 } ) ; if ! ( 0 <= _edge && _edge < _aabaa ) { _cfbd := _agc . Errorf ( "\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076" , end , _edge , _aabaa , _gac . _ffca [ 0 ] , _gac . _ffca [ _aabaa - 1 ] ) ;
return nil , _cfbd ; } ; if _edge <= _gaf { return nil , _agc . Errorf ( "\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064" , start , end , _gaf , _edge ) ;
} ; return & TextMarkArray { _ffca : _gac . _ffca [ _gaf : _edge ] } , nil ; } ; func _degf ( _faea _ba . PdfRectangle ) * ruling { return & ruling { _bbce : _cfae , _edga : _faea . Urx , _fcec : _faea . Lly , _abeg : _faea . Ury } ; } ; var ( _gbefd = map [ rune ] string { 0x0060 : "\u0300" , 0x02CB : "\u0300" , 0x0027 : "\u0301" , 0x00B4 : "\u0301" , 0x02B9 : "\u0301" , 0x02CA : "\u0301" , 0x005E : "\u0302" , 0x02C6 : "\u0302" , 0x007E : "\u0303" , 0x02DC : "\u0303" , 0x00AF : "\u0304" , 0x02C9 : "\u0304" , 0x02D8 : "\u0306" , 0x02D9 : "\u0307" , 0x00A8 : "\u0308" , 0x00B0 : "\u030a" , 0x02DA : "\u030a" , 0x02BA : "\u030b" , 0x02DD : "\u030b" , 0x02C7 : "\u030c" , 0x02C8 : "\u030d" , 0x0022 : "\u030e" , 0x02BB : "\u0312" , 0x02BC : "\u0313" , 0x0486 : "\u0313" , 0x055A : "\u0313" , 0x02BD : "\u0314" , 0x0485 : "\u0314" , 0x0559 : "\u0314" , 0x02D4 : "\u031d" , 0x02D5 : "\u031e" , 0x02D6 : "\u031f" , 0x02D7 : "\u0320" , 0x02B2 : "\u0321" , 0x00B8 : "\u0327" , 0x02CC : "\u0329" , 0x02B7 : "\u032b" , 0x02CD : "\u0331" , 0x005F : "\u0332" , 0x204E : "\u0359" } ;
) ; func _efgc ( _bggdg [ ] * textLine , _dgeaf string ) string { var _afda _f . Builder ; _afebd := 0.0 ; for _ebec , _ggbg := range _bggdg { _bbee := _ggbg . text ( ) ; _degcd := _ggbg . _bbfg ; if _ebec < len ( _bggdg ) - 1 { _afebd = _bggdg [ _ebec + 1 ] . _bbfg ; } else { _afebd = 0.0 ; } ;
_afda . WriteString ( _dgeaf ) ; _afda . WriteString ( _bbee ) ; if _afebd != _degcd { _afda . WriteString ( "\u000a" ) ; } else { _afda . WriteString ( "\u0020" ) ; } ; } ; return _afda . String ( ) ; } ; const ( _gcca markKind = iota ; _bcfc ; _ccfg ; _degg ; ) ; type rectRuling struct { _ecaa rulingKind ;
_bdbfa markKind ; _gff . Color ; _ba . PdfRectangle ; } ; const _fad = 20 ; func ( _cgged * textWord ) bbox ( ) _ba . PdfRectangle { return _cgged . PdfRectangle } ; func ( _dea * textPara ) fontsize ( ) float64 { return _dea . _fdec [ 0 ] . _fdfb } ; func ( _fefe paraList ) writeText ( _fbdbd _gg . Writer ) { for _gddd , _facg := range _fefe { if _facg . _cddef { continue ;
} ; _facg . writeText ( _fbdbd ) ; if _gddd != len ( _fefe ) - 1 { if _gfgc ( _facg , _fefe [ _gddd + 1 ] ) { _fbdbd . Write ( [ ] byte ( "\u0020" ) ) ; } else { _fbdbd . Write ( [ ] byte ( "\u000a" ) ) ; _fbdbd . Write ( [ ] byte ( "\u000a" ) ) ; } ; } ; } ; _fbdbd . Write ( [ ] byte ( "\u000a" ) ) ; _fbdbd . Write ( [ ] byte ( "\u000a" ) ) ;
} ; func ( _adf * textObject ) checkOp ( _cdc * _fc . ContentStreamOperation , _cgde int , _fafe bool ) ( _gbda bool , _acc error ) { if _adf == nil { var _ffc [ ] _add . PdfObject ; if _cgde > 0 { _ffc = _cdc . Params ; if len ( _ffc ) > _cgde { _ffc = _ffc [ : _cgde ] ; } ; } ; _ga . Log . Debug ( "\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076" , _cdc . Operand , _ffc ) ;
} ; if _cgde >= 0 { if len ( _cdc . Params ) != _cgde { if _fafe { _acc = _b . New ( "\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et" ) ; } ; _ga . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076" , _cdc . Operand , _cgde , len ( _cdc . Params ) , _cdc . Params ) ;
return false , _acc ; } ; } ; return true , nil ; } ; func ( _bggb rulingList ) splitSec ( ) [ ] rulingList { _e . Slice ( _bggb , func ( _fcdc , _dbea int ) bool { _bdade , _gbce := _bggb [ _fcdc ] , _bggb [ _dbea ] ; if _bdade . _fcec != _gbce . _fcec { return _bdade . _fcec < _gbce . _fcec ;
} ; return _bdade . _abeg < _gbce . _abeg ; } ) ; _aacgg := make ( map [ * ruling ] struct { } , len ( _bggb ) ) ; _dbec := func ( _dfgce * ruling ) rulingList { _cgcb := rulingList { _dfgce } ; _aacgg [ _dfgce ] = struct { } { } ; for _ , _bega := range _bggb { if _ , _ecabad := _aacgg [ _bega ] ;
_ecabad { continue ; } ; for _ , _adfbc := range _cgcb { if _bega . alignsSec ( _adfbc ) { _cgcb = append ( _cgcb , _bega ) ; _aacgg [ _bega ] = struct { } { } ; break ; } ; } ; } ; return _cgcb ; } ; _edgfc := [ ] rulingList { _dbec ( _bggb [ 0 ] ) } ; for _ , _bdcb := range _bggb [ 1 : ] { if _ , _eccdec := _aacgg [ _bdcb ] ;
_eccdec { continue ; } ; _edgfc = append ( _edgfc , _dbec ( _bdcb ) ) ; } ; return _edgfc ; } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// TableInfo gets table information of the textmark `tm`.
func ( _dee * TextMark ) TableInfo ( ) ( * TextTable , [ ] [ ] int ) { if ! _dee . _efeg { return nil , nil ; } ; _cdgc := _dee . _cfaa ; _bbdb := _cdgc . getCellInfo ( * _dee ) ; return _cdgc , _bbdb ; } ; const ( _cbdb = 1.0e-6 ; _dgaf = 1.0e-4 ; _cbbd = 10 ; _dage = 6 ; _fcca = 0.5 ; _afbgg = 0.12 ; _cdfe = 0.19 ;
_ebfgc = 0.04 ; _gagd = 0.04 ; _bfcae = 1.0 ; _fae = 0.04 ; _cac = 0.4 ; _fcag = 0.7 ; _dgcc = 1.0 ; _cfcf = 0.1 ; _gcfb = 1.4 ; _bbad = 0.46 ; _fadg = 0.02 ; _dgea = 0.2 ; _befe = 0.5 ; _afeg = 4 ; _bag = 4.0 ; _ggca = 6 ; _beec = 0.3 ; _efega = 0.01 ; _agba = 0.02 ; _afec = 2 ; _dccbc = 2 ; _ccbb = 500 ; _ggdg = 4.0 ;
_cgef = 4.0 ; _beed = 0.05 ; _dccf = 0.1 ; _dfed = 2.0 ; _gbb = 2.0 ; _bddc = 1.5 ; _gaea = 3.0 ; _efeb = 0.25 ; ) ; type textPara struct { _ba . PdfRectangle ; _dgabg _ba . PdfRectangle ; _fdec [ ] * textLine ; _fbbea * textTable ; _gecb bool ; _cddef bool ; _cfee * textPara ; _fdgf * textPara ;
_cgag * textPara ; _ecada * textPara ; _fcdfg [ ] list ; } ; func ( _agaa * textLine ) toTextMarks ( _ggaa * int ) [ ] TextMark { var _daef [ ] TextMark ; for _ , _cdbb := range _agaa . _cdcg { if _cdbb . _gdec { _daef = _ffeg ( _daef , _ggaa , "\u0020" ) ; } ; _dcad := _cdbb . toTextMarks ( _ggaa ) ;
_daef = append ( _daef , _dcad ... ) ; } ; return _daef ; } ; func ( _cdae * textObject ) getCurrentFont ( ) * _ba . PdfFont { _gegc := _cdae . _efed . _gbdg ; if _gegc == nil { _ga . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e" ) ;
return _ba . DefaultFont ( ) ; } ; return _gegc ; } ; func ( _fdgc * shapesState ) devicePoint ( _eddb , _gfcf float64 ) _agf . Point { _agca := _fdgc . _gdfd . Mult ( _fdgc . _gafa ) ; _eddb , _gfcf = _agca . Transform ( _eddb , _gfcf ) ; return _agf . NewPoint ( _eddb , _gfcf ) ; } ; func _dbeae ( _gfegb float64 ) bool { return _gf . Abs ( _gfegb ) < _cbdb } ;
func _gecbg ( _dfdb , _ddgc float64 ) bool { return _gf . Abs ( _dfdb - _ddgc ) <= _dfed } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// String returns a description of `state`.
func ( _bdc * textState ) String ( ) string { _bac := "\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]" ; if _bdc . _gbdg != nil { _bac = _bdc . _gbdg . BaseFont ( ) ; } ; return _agc . Sprintf ( "\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071" , _bdc . _ffaf , _bdc . _gdeg , _bdc . _dcd , _bac ) ;
} ; func ( _bcbe rulingList ) asTiling ( ) gridTiling { if _dfge { _ga . Log . Info ( "r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d" , len ( _bcbe ) ) ;
} ; for _aabf , _cccb := range _bcbe [ 1 : ] { _efaac := _bcbe [ _aabf ] ; if _efaac . alignsPrimary ( _cccb ) && _efaac . alignsSec ( _cccb ) { _ga . Log . Error ( "a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073" , _cccb , _efaac ) ;
} ; } ; _bcbe . sortStrict ( ) ; _bcbe . log ( "\u0073n\u0061\u0070\u0070\u0065\u0064" ) ; _bdgd , _aacda := _bcbe . vertsHorzs ( ) ; _dabbe := _bdgd . primaries ( ) ; _ccccc := _aacda . primaries ( ) ; _dgdg := len ( _dabbe ) - 1 ; _ecffg := len ( _ccccc ) - 1 ; if _dgdg == 0 || _ecffg == 0 { return gridTiling { } ;
} ; _fbbgb := _ba . PdfRectangle { Llx : _dabbe [ 0 ] , Urx : _dabbe [ _dgdg ] , Lly : _ccccc [ 0 ] , Ury : _ccccc [ _ecffg ] } ; if _dfge { _ga . Log . Info ( "\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064" , len ( _bdgd ) ) ;
for _bafdd , _dacfe := range _bdgd { _agc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bafdd , _dacfe ) ; } ; _ga . Log . Info ( "\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064" , len ( _aacda ) ) ;
for _fddb , _adcgd := range _aacda { _agc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _fddb , _adcgd ) ; } ; _ga . Log . Info ( "\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f" , _dgdg , _ecffg , _dabbe , _ccccc ) ;
} ; _eadec := make ( [ ] gridTile , _dgdg * _ecffg ) ; for _fege := _ecffg - 1 ; _fege >= 0 ; _fege -- { _ffcaf := _ccccc [ _fege ] ; _dgdc := _ccccc [ _fege + 1 ] ; for _ggbb := 0 ; _ggbb < _dgdg ; _ggbb ++ { _egggeb := _dabbe [ _ggbb ] ; _cecb := _dabbe [ _ggbb + 1 ] ; _dddcd := _bdgd . findPrimSec ( _egggeb , _ffcaf ) ;
_fefgc := _bdgd . findPrimSec ( _cecb , _ffcaf ) ; _dgadf := _aacda . findPrimSec ( _ffcaf , _egggeb ) ; _cdeed := _aacda . findPrimSec ( _dgdc , _egggeb ) ; _faae := _ba . PdfRectangle { Llx : _egggeb , Urx : _cecb , Lly : _ffcaf , Ury : _dgdc } ; _efegg := _ebba ( _faae , _dddcd , _fefgc , _dgadf , _cdeed ) ;
_eadec [ _fege * _dgdg + _ggbb ] = _efegg ; if _dfge { _agc . Printf ( "\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , _ggbb , _fege , _efegg . String ( ) , _efegg . Width ( ) , _efegg . Height ( ) ) ;
} ; } ; } ; if _dfge { _ga . Log . Info ( "r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066" , _fbbgb ) ;
} ; _dgdgg := make ( [ ] map [ float64 ] gridTile , _ecffg ) ; for _gbbaf := _ecffg - 1 ; _gbbaf >= 0 ; _gbbaf -- { if _dfge { _agc . Printf ( "\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a" , _gbbaf ) ; } ; _dgdgg [ _gbbaf ] = make ( map [ float64 ] gridTile , _dgdg ) ; for _gcdcg := 0 ; _gcdcg < _dgdg ;
_gcdcg ++ { _eafg := _eadec [ _gbbaf * _dgdg + _gcdcg ] ; if _dfge { _agc . Printf ( "\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _gcdcg , _eafg ) ; } ; if ! _eafg . _ecaf { continue ; } ; _aaag := _gcdcg ; for _abef := _gcdcg + 1 ; ! _eafg . _bfab && _abef < _dgdg ;
_abef ++ { _fbda := _eadec [ _gbbaf * _dgdg + _abef ] ; _eafg . Urx = _fbda . Urx ; _eafg . _dgccd = _eafg . _dgccd || _fbda . _dgccd ; _eafg . _bgbfd = _eafg . _bgbfd || _fbda . _bgbfd ; _eafg . _bfab = _fbda . _bfab ; if _dfge { _agc . Printf ( "\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a" , _abef , _fbda , _eafg ) ;
} ; _aaag = _abef ; } ; if _dfge { _agc . Printf ( " \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n" , _gcdcg , _aaag , _eafg ) ; } ; _gcdcg = _aaag ; _dgdgg [ _gbbaf ] [ _eafg . Llx ] = _eafg ; } ; } ; _dabc := make ( map [ float64 ] map [ float64 ] gridTile , _ecffg ) ;
_gbbe := make ( map [ float64 ] map [ float64 ] struct { } , _ecffg ) ; for _bdfca := _ecffg - 1 ; _bdfca >= 0 ; _bdfca -- { _ggdc := _eadec [ _bdfca * _dgdg ] . Lly ; _dabc [ _ggdc ] = make ( map [ float64 ] gridTile , _dgdg ) ; _gbbe [ _ggdc ] = make ( map [ float64 ] struct { } , _dgdg ) ; } ; if _dfge { _ga . Log . Info ( "\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066" , _fbbgb ) ;
} ; for _agbff := _ecffg - 1 ; _agbff >= 0 ; _agbff -- { _effd := _eadec [ _agbff * _dgdg ] . Lly ; _dcbaf := _dgdgg [ _agbff ] ; if _dfge { _agc . Printf ( "\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a" , _agbff ) ; } ; for _ , _eaea := range _cbgc ( _dcbaf ) { if _ , _aaec := _gbbe [ _effd ] [ _eaea ] ;
_aaec { continue ; } ; _ddad := _dcbaf [ _eaea ] ; if _dfge { _agc . Printf ( " \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a" , _ddad . String ( ) ) ; } ; for _gbdf := _agbff - 1 ; _gbdf >= 0 ; _gbdf -- { if _ddad . _bgbfd { break ; } ; _bffdb := _dgdgg [ _gbdf ] ; _efcb , _caaab := _bffdb [ _eaea ] ;
if ! _caaab { break ; } ; if _efcb . Urx != _ddad . Urx { break ; } ; _ddad . _bgbfd = _efcb . _bgbfd ; _ddad . Lly = _efcb . Lly ; if _dfge { _agc . Printf ( "\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a" , _efcb . String ( ) , _ddad . String ( ) ) ;
} ; _gbbe [ _efcb . Lly ] [ _efcb . Llx ] = struct { } { } ; } ; if _agbff == 0 { _ddad . _bgbfd = true ; } ; if _ddad . complete ( ) { _dabc [ _effd ] [ _eaea ] = _ddad ; } ; } ; } ; _aaef := gridTiling { PdfRectangle : _fbbgb , _fece : _ffad ( _dabc ) , _dfca : _ecbcb ( _dabc ) , _gdcg : _dabc } ; _aaef . log ( "\u0043r\u0065\u0061\u0074\u0065\u0064" ) ;
return _aaef ; } ; func _cceb ( _gdbec map [ int ] [ ] float64 ) { if len ( _gdbec ) <= 1 { return ; } ; _abaee := _adgbc ( _gdbec ) ; if _efda { _ga . Log . Info ( "\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076" , _abaee ) ; } ; var _ebgbe , _bfdf int ;
for _ebgbe , _bfdf = range _abaee { if _gdbec [ _bfdf ] != nil { break ; } ; } ; for _addg , _afae := range _abaee [ _ebgbe : ] { _dadf := _gdbec [ _afae ] ; if _dadf == nil { continue ; } ; if _efda { _agc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a" , _ebgbe + _addg , _bfdf , _afae ) ;
} ; _egdea := _gdbec [ _afae ] ; if _egdea [ len ( _egdea ) - 1 ] > _dadf [ 0 ] { _egdea [ len ( _egdea ) - 1 ] = _dadf [ 0 ] ; _gdbec [ _bfdf ] = _egdea ; } ; _bfdf = _afae ; } ; } ; type stateStack [ ] * textState ; func ( _fgaaf * textObject ) setTextMatrix ( _cbd [ ] float64 ) { if len ( _cbd ) != 6 { _ga . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029" , len ( _cbd ) ) ;
return ; } ; _bba , _eea , _abg , _adgb , _gec , _eeg := _cbd [ 0 ] , _cbd [ 1 ] , _cbd [ 2 ] , _cbd [ 3 ] , _cbd [ 4 ] , _cbd [ 5 ] ; _fgaaf . _eefe = _agf . NewMatrix ( _bba , _eea , _abg , _adgb , _gec , _eeg ) ; _fgaaf . _dbc = _fgaaf . _eefe ; } ; func ( _dfgfc intSet ) add ( _dega int ) { _dfgfc [ _dega ] = struct { } { } } ;
func ( _fbcb paraList ) inTile ( _cbggf gridTile ) paraList { var _fbfb paraList ; for _ , _dagcf := range _fbcb { if _cbggf . contains ( _dagcf . PdfRectangle ) { _fbfb = append ( _fbfb , _dagcf ) ; } ; } ; if _efda { _agc . Printf ( "\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n" , _cbggf , len ( _fbfb ) ) ;
for _gfgcc , _ebga := range _fbfb { _agc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _gfgcc , _ebga ) ; } ; _agc . Println ( "" ) ; } ; return _fbfb ; } ; const _bcgf = 10 ; func ( _gafb * shapesState ) closePath ( ) { if _gafb . _gbdgg { _gafb . _gfce = append ( _gafb . _gfce , _ddbd ( _gafb . _gcaaf ) ) ;
_gafb . _gbdgg = false ; } else if len ( _gafb . _gfce ) == 0 { if _fdbg { _ga . Log . Debug ( "\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068" ) ; } ; _gafb . _gbdgg = false ; return ; } ; _gafb . _gfce [ len ( _gafb . _gfce ) - 1 ] . close ( ) ;
if _fdbg { _ga . Log . Info ( "\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073" , _gafb ) ; } ; } ; type rulingKind int ; func ( _gdbf * shapesState ) addPoint ( _gbdaf , _fcfd float64 ) { _bfbb := _gdbf . establishSubpath ( ) ; _gaec := _gdbf . devicePoint ( _gbdaf , _fcfd ) ;
if _bfbb == nil { _gdbf . _gbdgg = true ; _gdbf . _gcaaf = _gaec ; } else { _bfbb . add ( _gaec ) ; } ; } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
type TextTable struct { _ba . PdfRectangle ; W , H int ; Cells [ ] [ ] TableCell ; } ; func ( _ffce paraList ) merge ( ) * textPara { _ga . Log . Trace ( "\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _ffce ) ) ;
if len ( _ffce ) == 0 { return nil ; } ; _ffce . sortReadingOrder ( ) ; _fagbg := _ffce [ 0 ] . PdfRectangle ; _gcfg := _ffce [ 0 ] . _fdec ; for _ , _adfg := range _ffce [ 1 : ] { _fagbg = _bbbafc ( _fagbg , _adfg . PdfRectangle ) ; _gcfg = append ( _gcfg , _adfg . _fdec ... ) ; } ; return _bfgg ( _fagbg , _gcfg ) ;
} ; func ( _cafdc paraList ) addNeighbours ( ) { _ecgfb := func ( _eeadg [ ] int , _ecdf * textPara ) ( [ ] * textPara , [ ] * textPara ) { _gfbd := make ( [ ] * textPara , 0 , len ( _eeadg ) - 1 ) ; _acgd := make ( [ ] * textPara , 0 , len ( _eeadg ) - 1 ) ; for _ , _baegc := range _eeadg { _dbffc := _cafdc [ _baegc ] ;
if _dbffc . Urx <= _ecdf . Llx { _gfbd = append ( _gfbd , _dbffc ) ; } else if _dbffc . Llx >= _ecdf . Urx { _acgd = append ( _acgd , _dbffc ) ; } ; } ; return _gfbd , _acgd ; } ; _dcdef := func ( _addd [ ] int , _gfffc * textPara ) ( [ ] * textPara , [ ] * textPara ) { _cfacb := make ( [ ] * textPara , 0 , len ( _addd ) - 1 ) ;
_cbabd := make ( [ ] * textPara , 0 , len ( _addd ) - 1 ) ; for _ , _befee := range _addd { _egefg := _cafdc [ _befee ] ; if _egefg . Ury <= _gfffc . Lly { _cbabd = append ( _cbabd , _egefg ) ; } else if _egefg . Lly >= _gfffc . Ury { _cfacb = append ( _cfacb , _egefg ) ; } ; } ; return _cfacb , _cbabd ;
} ; _ecgg := _cafdc . yNeighbours ( _agba ) ; for _ , _bfdd := range _cafdc { _cefbb := _ecgg [ _bfdd ] ; if len ( _cefbb ) == 0 { continue ; } ; _ggabg , _egbbb := _ecgfb ( _cefbb , _bfdd ) ; if len ( _ggabg ) == 0 && len ( _egbbb ) == 0 { continue ; } ; if len ( _ggabg ) > 0 { _facb := _ggabg [ 0 ] ;
for _ , _deee := range _ggabg [ 1 : ] { if _deee . Urx >= _facb . Urx { _facb = _deee ; } ; } ; for _ , _dcfc := range _ggabg { if _dcfc != _facb && _dcfc . Urx > _facb . Llx { _facb = nil ; break ; } ; } ; if _facb != nil && _bggg ( _bfdd . PdfRectangle , _facb . PdfRectangle ) { _bfdd . _cfee = _facb ;
} ; } ; if len ( _egbbb ) > 0 { _adbed := _egbbb [ 0 ] ; for _ , _bffce := range _egbbb [ 1 : ] { if _bffce . Llx <= _adbed . Llx { _adbed = _bffce ; } ; } ; for _ , _abfc := range _egbbb { if _abfc != _adbed && _abfc . Llx < _adbed . Urx { _adbed = nil ; break ; } ; } ; if _adbed != nil && _bggg ( _bfdd . PdfRectangle , _adbed . PdfRectangle ) { _bfdd . _fdgf = _adbed ;
} ; } ; } ; _ecgg = _cafdc . xNeighbours ( _efega ) ; for _ , _bdeg := range _cafdc { _adee := _ecgg [ _bdeg ] ; if len ( _adee ) == 0 { continue ; } ; _ddafa , _ebgec := _dcdef ( _adee , _bdeg ) ; if len ( _ddafa ) == 0 && len ( _ebgec ) == 0 { continue ; } ; if len ( _ebgec ) > 0 { _ccaef := _ebgec [ 0 ] ;
for _ , _gccef := range _ebgec [ 1 : ] { if _gccef . Ury >= _ccaef . Ury { _ccaef = _gccef ; } ; } ; for _ , _gefd := range _ebgec { if _gefd != _ccaef && _gefd . Ury > _ccaef . Lly { _ccaef = nil ; break ; } ; } ; if _ccaef != nil && _fgfag ( _bdeg . PdfRectangle , _ccaef . PdfRectangle ) { _bdeg . _ecada = _ccaef ;
} ; } ; if len ( _ddafa ) > 0 { _afgdd := _ddafa [ 0 ] ; for _ , _badf := range _ddafa [ 1 : ] { if _badf . Lly <= _afgdd . Lly { _afgdd = _badf ; } ; } ; for _ , _aafd := range _ddafa { if _aafd != _afgdd && _aafd . Lly < _afgdd . Ury { _afgdd = nil ; break ; } ; } ; if _afgdd != nil && _fgfag ( _bdeg . PdfRectangle , _afgdd . PdfRectangle ) { _bdeg . _cgag = _afgdd ;
} ; } ; } ; for _ , _efecc := range _cafdc { if _efecc . _cfee != nil && _efecc . _cfee . _fdgf != _efecc { _efecc . _cfee = nil ; } ; if _efecc . _cgag != nil && _efecc . _cgag . _ecada != _efecc { _efecc . _cgag = nil ; } ; if _efecc . _fdgf != nil && _efecc . _fdgf . _cfee != _efecc { _efecc . _fdgf = nil ;
} ; if _efecc . _ecada != nil && _efecc . _ecada . _cgag != _efecc { _efecc . _ecada = nil ; } ; } ; } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// String returns a string describing `pt`.
func ( _affc PageText ) String ( ) string { _cfa := _agc . Sprintf ( "P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073" , len ( _affc . _aebf ) ) ; _ece := [ ] string { "\u002d" + _cfa } ; for _ , _ddac := range _affc . _aebf { _ece = append ( _ece , _ddac . String ( ) ) ;
} ; _ece = append ( _ece , "\u002b" + _cfa ) ; return _f . Join ( _ece , "\u000a" ) ; } ; func ( _efc * TextMarkArray ) exists ( _dadd TextMark ) bool { for _ , _bdec := range _efc . Elements ( ) { if _ad . DeepEqual ( _dadd . DirectObject , _bdec . DirectObject ) && _ad . DeepEqual ( _dadd . BBox , _bdec . BBox ) && _bdec . Text == _dadd . Text { return true ;
} ; } ; return false ; } ; func ( _gfgd * subpath ) removeDuplicates ( ) { if len ( _gfgd . _aeee ) == 0 { return ; } ; _aeda := [ ] _agf . Point { _gfgd . _aeee [ 0 ] } ; for _ , _gcbg := range _gfgd . _aeee [ 1 : ] { if ! _gaeg ( _gcbg , _aeda [ len ( _aeda ) - 1 ] ) { _aeda = append ( _aeda , _gcbg ) ; } ;
} ; _gfgd . _aeee = _aeda ; } ; func _bfgg ( _gfgce _ba . PdfRectangle , _fbgg [ ] * textLine ) * textPara { return & textPara { PdfRectangle : _gfgce , _fdec : _fbgg } ; } ; func ( _bgea * textLine ) pullWord ( _dffb * wordBag , _adfb * textWord , _gaee int ) { _bgea . appendWord ( _adfb ) ;
_dffb . removeWord ( _adfb , _gaee ) ; } ; func ( _efedd compositeCell ) parasBBox ( ) ( paraList , _ba . PdfRectangle ) { return _efedd . paraList , _efedd . PdfRectangle ; } ; func ( _ecaba rulingList ) sortStrict ( ) { _e . Slice ( _ecaba , func ( _gcdbb , _gecf int ) bool { _eccgf , _deaf := _ecaba [ _gcdbb ] , _ecaba [ _gecf ] ;
_eefdc , _ffged := _eccgf . _bbce , _deaf . _bbce ; if _eefdc != _ffged { return _eefdc > _ffged ; } ; _aadg , _ebafg := _eccgf . _edga , _deaf . _edga ; if ! _dbeae ( _aadg - _ebafg ) { return _aadg < _ebafg ; } ; _aadg , _ebafg = _eccgf . _fcec , _deaf . _fcec ; if _aadg != _ebafg { return _aadg < _ebafg ;
} ; return _eccgf . _abeg < _deaf . _abeg ; } ) ; } ; func ( _caccg * ruling ) gridIntersecting ( _dbag * ruling ) bool { return _gecbg ( _caccg . _fcec , _dbag . _fcec ) && _gecbg ( _caccg . _abeg , _dbag . _abeg ) ; } ; func _adgd ( _gbed string , _gdgef int ) string { if len ( _gbed ) < _gdgef { return _gbed ;
} ; return _gbed [ : _gdgef ] ; } ; func _cgfgf ( _aeeg , _cedf bounded ) float64 { _cbga := _abag ( _aeeg , _cedf ) ; if ! _dbeae ( _cbga ) { return _cbga ; } ; return _eddba ( _aeeg , _cedf ) ; } ; func _gbbc ( _cbaf string , _bcaf [ ] rulingList ) { _ga . Log . Info ( "\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073" , len ( _bcaf ) , _cbaf ) ;
for _dbcd , _aaaee := range _bcaf { _agc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _dbcd , _aaaee . String ( ) ) ; } ; } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
//
// Replace with a function like Extract() (*PageText, error)
func ( _ccfa * Extractor ) ExtractPageText ( ) ( * PageText , int , int , error ) { _dae , _baf , _cfb , _fba := _ccfa . extractPageText ( _ccfa . _ac , _ccfa . _gbe , _agf . IdentityMatrix ( ) , 0 , false ) ; if _fba != nil && _fba != _ba . ErrColorOutOfRange { return nil , 0 , 0 , _fba ; } ;
if _ccfa . _cf != nil { _dae . _eaag . _caab = _ccfa . _cf . UseSimplerExtractionProcess ; } ; _dae . computeViews ( ) ; _fba = _ccab ( _dae ) ; if _fba != nil { return nil , 0 , 0 , _fba ; } ; if _ccfa . _cf != nil { if _ccfa . _cf . ApplyCropBox && _ccfa . _eb != nil { _dae . ApplyArea ( * _ccfa . _eb ) ;
} ; _dae . _eaag . _bcc = _ccfa . _cf . DisableDocumentTags ; } ; return _dae , _baf , _cfb , nil ; } ; func ( _dfdec * wordBag ) removeWord ( _abae * textWord , _cfe int ) { _acfc := _dfdec . _fcgd [ _cfe ] ; _acfc = _gcdff ( _acfc , _abae ) ; if len ( _acfc ) == 0 { delete ( _dfdec . _fcgd , _cfe ) ;
} else { _dfdec . _fcgd [ _cfe ] = _acfc ; } ; } ; func _cafaa ( _eggd float64 , _gbfg int ) int { if _gbfg == 0 { _gbfg = 1 ; } ; _fecb := float64 ( _gbfg ) ; return int ( _gf . Round ( _eggd / _fecb ) * _fecb ) ; } ; func ( _beg * textObject ) setCharSpacing ( _feb float64 ) { if _beg == nil { return ;
} ; _beg . _efed . _ffaf = _feb ; if _cfca { _ga . Log . Info ( "\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073" , _feb , _beg . _efed . String ( ) ) ; } ; } ; func ( _fdcdf * wordBag ) firstWord ( _aeg int ) * textWord { return _fdcdf . _fcgd [ _aeg ] [ 0 ] } ;
func ( _ebgf paraList ) reorder ( _ceafe [ ] int ) { _cead := make ( paraList , len ( _ebgf ) ) ; for _aceef , _abced := range _ceafe { _cead [ _aceef ] = _ebgf [ _abced ] ; } ; copy ( _ebgf , _cead ) ; } ; func _dgda ( _dbeb [ ] * textLine , _bdecee string , _bfda [ ] * list ) * list { return & list { _aebd : _dbeb , _cdde : _bdecee , _abcc : _bfda } ;
} ; func _eeag ( _fagd [ ] * textLine , _fgeb map [ float64 ] [ ] * textLine , _acdg [ ] float64 , _gafga int , _gbffc , _bgcdc float64 ) [ ] * list { _bccf := [ ] * list { } ; _gfcae := _gafga ; _gafga = _gafga + 1 ; _ccbgg := _acdg [ _gfcae ] ; _dcbed := _fgeb [ _ccbgg ] ; _afc := _acfa ( _dcbed , _bgcdc , _gbffc ) ;
for _dgga , _faedc := range _afc { var _ggdb float64 ; _edag := [ ] * list { } ; _cefg := _faedc . _bbfg ; _fdcec := _bgcdc ; if _dgga < len ( _afc ) - 1 { _fdcec = _afc [ _dgga + 1 ] . _bbfg ; } ; if _gafga < len ( _acdg ) { _edag = _eeag ( _fagd , _fgeb , _acdg , _gafga , _cefg , _fdcec ) ;
} ; _ggdb = _fdcec ; if len ( _edag ) > 0 { _ffdc := _edag [ 0 ] ; if len ( _ffdc . _aebd ) > 0 { _ggdb = _ffdc . _aebd [ 0 ] . _bbfg ; } ; } ; _cdaf := [ ] * textLine { _faedc } ; _dagf := _ggdbe ( _faedc , _fagd , _acdg , _cefg , _ggdb ) ; _cdaf = append ( _cdaf , _dagf ... ) ; _gfed := _dgda ( _cdaf , "\u0062\u0075\u006c\u006c\u0065\u0074" , _edag ) ;
_gfed . _ddef = _efgc ( _cdaf , "" ) ; _bccf = append ( _bccf , _gfed ) ; } ; return _bccf ; } ; func _dggea ( _cdbbd [ ] _add . PdfObject ) ( _fbfg , _gaade float64 , _agcbb error ) { if len ( _cdbbd ) != 2 { return 0 , 0 , _agc . Errorf ( "\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064" , len ( _cdbbd ) ) ;
} ; _feae , _agcbb := _add . GetNumbersAsFloat ( _cdbbd ) ; if _agcbb != nil { return 0 , 0 , _agcbb ; } ; return _feae [ 0 ] , _feae [ 1 ] , nil ; } ; func _gaeg ( _ccged , _cedfa _agf . Point ) bool { return _ccged . X == _cedfa . X && _ccged . Y == _cedfa . Y } ; func ( _bgcdcd * wordBag ) removeDuplicates ( ) { if _bdgb { _ga . Log . Info ( "r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071" , _bgcdcd . text ( ) ) ;
} ; for _ , _dfae := range _bgcdcd . depthIndexes ( ) { if len ( _bgcdcd . _fcgd [ _dfae ] ) == 0 { continue ; } ; _ffeb := _bgcdcd . _fcgd [ _dfae ] [ 0 ] ; _facge := _dgea * _ffeb . _eabbf ; _ggfa := _ffeb . _dfagd ; for _ , _dddf := range _bgcdcd . depthBand ( _ggfa , _ggfa + _facge ) { _ggdf := map [ * textWord ] struct { } { } ;
_febaf := _bgcdcd . _fcgd [ _dddf ] ; for _ , _gbfef := range _febaf { if _ , _bfbc := _ggdf [ _gbfef ] ; _bfbc { continue ; } ; for _ , _effgg := range _febaf { if _ , _eedfg := _ggdf [ _effgg ] ; _eedfg { continue ; } ; if _effgg != _gbfef && _effgg . _eaae == _gbfef . _eaae && _gf . Abs ( _effgg . Llx - _gbfef . Llx ) < _facge && _gf . Abs ( _effgg . Urx - _gbfef . Urx ) < _facge && _gf . Abs ( _effgg . Lly - _gbfef . Lly ) < _facge && _gf . Abs ( _effgg . Ury - _gbfef . Ury ) < _facge { _ggdf [ _effgg ] = struct { } { } ;
} ; } ; } ; if len ( _ggdf ) > 0 { _bdbb := 0 ; for _ , _eggb := range _febaf { if _ , _abffe := _ggdf [ _eggb ] ; ! _abffe { _febaf [ _bdbb ] = _eggb ; _bdbb ++ ; } ; } ; _bgcdcd . _fcgd [ _dddf ] = _febaf [ : len ( _febaf ) - len ( _ggdf ) ] ; if len ( _bgcdcd . _fcgd [ _dddf ] ) == 0 { delete ( _bgcdcd . _fcgd , _dddf ) ;
} ; } ; } ; } ; } ; func ( _gdab rulingList ) toTilings ( ) ( rulingList , [ ] gridTiling ) { _gdab . log ( "\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s" ) ; if len ( _gdab ) == 0 { return nil , nil ; } ; _gdab = _gdab . tidied ( "\u0061\u006c\u006c" ) ; _gdab . log ( "\u0074\u0069\u0064\u0069\u0065\u0064" ) ;
_agcafa := _gdab . toGrids ( ) ; _fbafg := make ( [ ] gridTiling , len ( _agcafa ) ) ; for _fbddd , _becd := range _agcafa { _fbafg [ _fbddd ] = _becd . asTiling ( ) ; } ; return _gdab , _fbafg ; } ; func ( _afag pathSection ) bbox ( ) _ba . PdfRectangle { _gabea := _afag . _dbdc [ 0 ] . _aeee [ 0 ] ;
_cdgb := _ba . PdfRectangle { Llx : _gabea . X , Urx : _gabea . X , Lly : _gabea . Y , Ury : _gabea . Y } ; _bggd := func ( _egab _agf . Point ) { if _egab . X < _cdgb . Llx { _cdgb . Llx = _egab . X ; } else if _egab . X > _cdgb . Urx { _cdgb . Urx = _egab . X ; } ; if _egab . Y < _cdgb . Lly { _cdgb . Lly = _egab . Y ;
} else if _egab . Y > _cdgb . Ury { _cdgb . Ury = _egab . Y ; } ; } ; for _ , _gbeb := range _afag . _dbdc [ 0 ] . _aeee [ 1 : ] { _bggd ( _gbeb ) ; } ; for _ , _bbda := range _afag . _dbdc [ 1 : ] { for _ , _bgeg := range _bbda . _aeee { _bggd ( _bgeg ) ; } ; } ; return _cdgb ; } ; func ( _cedc rulingList ) snapToGroups ( ) rulingList { _ecfda , _bcdf := _cedc . vertsHorzs ( ) ;
if len ( _ecfda ) > 0 { _ecfda = _ecfda . snapToGroupsDirection ( ) ; } ; if len ( _bcdf ) > 0 { _bcdf = _bcdf . snapToGroupsDirection ( ) ; } ; _gbafg := append ( _ecfda , _bcdf ... ) ; _gbafg . log ( "\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073" ) ; return _gbafg ;
} ; func ( _dcdd rectRuling ) asRuling ( ) ( * ruling , bool ) { _bccc := ruling { _bbce : _dcdd . _ecaa , Color : _dcdd . Color , _ccaa : _ccfg } ; switch _dcdd . _ecaa { case _cfae : _bccc . _edga = 0.5 * ( _dcdd . Llx + _dcdd . Urx ) ; _bccc . _fcec = _dcdd . Lly ; _bccc . _abeg = _dcdd . Ury ;
_afed , _cefb := _dcdd . checkWidth ( _dcdd . Llx , _dcdd . Urx ) ; if ! _cefb { if _adbfb { _ga . Log . Error ( "\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076" , _dcdd ) ;
} ; return nil , false ; } ; _bccc . _ebcee = _afed ; case _aaad : _bccc . _edga = 0.5 * ( _dcdd . Lly + _dcdd . Ury ) ; _bccc . _fcec = _dcdd . Llx ; _bccc . _abeg = _dcdd . Urx ; _bfcbg , _fcea := _dcdd . checkWidth ( _dcdd . Lly , _dcdd . Ury ) ; if ! _fcea { if _adbfb { _ga . Log . Error ( "\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076" , _dcdd ) ;
} ; return nil , false ; } ; _bccc . _ebcee = _bfcbg ; default : _ga . Log . Error ( "\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064" , _dcdd . _ecaa ) ; return nil , false ; } ; return & _bccc , true ; } ; var _gfcgc = map [ rulingKind ] string { _fbcc : "\u006e\u006f\u006e\u0065" , _aaad : "\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c" , _cfae : "\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c" } ;
func ( _geba * wordBag ) depthRange ( _afe , _geff int ) [ ] int { var _fadf [ ] int ; for _faabb := range _geba . _fcgd { if _afe <= _faabb && _faabb <= _geff { _fadf = append ( _fadf , _faabb ) ; } ; } ; if len ( _fadf ) == 0 { return nil ; } ; _e . Ints ( _fadf ) ; return _fadf ; } ; func _bfba ( _bacfa , _dgdcc int ) int { if _bacfa < _dgdcc { return _bacfa ;
} ; return _dgdcc ; } ; func _gcdff ( _dfcbf [ ] * textWord , _ageb * textWord ) [ ] * textWord { for _efdc , _daddf := range _dfcbf { if _daddf == _ageb { return _fecf ( _dfcbf , _efdc ) ; } ; } ; _ga . Log . Error ( "\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073" , _ageb ) ;
return nil ; } ; func ( _bada rulingList ) sort ( ) { _e . Slice ( _bada , _bada . comp ) } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// String returns a human readable description of `s`.
func ( _ffdec intSet ) String ( ) string { var _eaab [ ] int ; for _aafab := range _ffdec { if _ffdec . has ( _aafab ) { _eaab = append ( _eaab , _aafab ) ; } ; } ; _e . Ints ( _eaab ) ; return _agc . Sprintf ( "\u0025\u002b\u0076" , _eaab ) ; } ; func ( _gebd * textLine ) endsInHyphen ( ) bool { _edff := _gebd . _cdcg [ len ( _gebd . _cdcg ) - 1 ] ;
_cgdc := _edff . _eaae ; _bdagd , _gcaf := _a . DecodeLastRuneInString ( _cgdc ) ; if _gcaf <= 0 || ! _c . Is ( _c . Hyphen , _bdagd ) { return false ; } ; if _edff . _gdec && _dfc ( _cgdc ) { return true ; } ; return _dfc ( _gebd . text ( ) ) ; } ; func ( _caea * textObject ) nextLine ( ) { _caea . moveLP ( 0 , - _caea . _efed . _bfa ) } ;
func ( _abac * shapesState ) fill ( _bbgb * [ ] pathSection ) { _gbc := pathSection { _dbdc : _abac . _gfce , Color : _abac . _edf . getFillColor ( ) } ; * _bbgb = append ( * _bbgb , _gbc ) ; if _adgbf { _cfdc := _gbc . bbox ( ) ; _agc . Printf ( "\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a" , len ( * _bbgb ) , len ( _gbc . _dbdc ) , _abac , _gbc . Color , _cfdc , _cfdc . Width ( ) , _cfdc . Height ( ) ) ;
if _ebce { for _gedg , _babc := range _gbc . _dbdc { _agc . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _gedg , _babc ) ; if _gedg == 10 { break ; } ; } ; } ; } ; } ; func ( _daagf * textTable ) bbox ( ) _ba . PdfRectangle { return _daagf . PdfRectangle } ; func ( _egggf * textWord ) addDiacritic ( _eaeb string ) { _geabb := _egggf . _abcee [ len ( _egggf . _abcee ) - 1 ] ;
_geabb . _bfdb += _eaeb ; _geabb . _bfdb = _ea . NFKC . String ( _geabb . _bfdb ) ; } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// ExtractFonts returns all font information from the page extractor, including
// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more.
2024-03-27 22:34:33 +00:00
//
2024-04-16 11:40:43 +00:00
// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document.
// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries.
2024-03-27 22:34:33 +00:00
//
2024-04-16 11:40:43 +00:00
// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range.
func ( _gd * Extractor ) ExtractFonts ( previousPageFonts * PageFonts ) ( * PageFonts , error ) { _eg := PageFonts { } ; _fbe := _eg . extractPageResourcesToFont ( _gd . _gbe ) ; if _fbe != nil { return nil , _fbe ; } ; if previousPageFonts != nil { for _ , _dcb := range previousPageFonts . Fonts { if ! _aga ( _eg . Fonts , _dcb . FontName ) { _eg . Fonts = append ( _eg . Fonts , _dcb ) ;
} ; } ; } ; return & PageFonts { Fonts : _eg . Fonts } , nil ; } ; func ( _ffge paraList ) list ( ) [ ] * list { var _aacb [ ] * textLine ; var _bfgc [ ] * textLine ; for _ , _ccac := range _ffge { _afdb := _ccac . getListLines ( ) ; _aacb = append ( _aacb , _afdb ... ) ; _bfgc = append ( _bfgc , _ccac . _fdec ... ) ;
} ; _edfc := _ccba ( _aacb ) ; _befg := _dbfg ( _bfgc , _edfc ) ; return _befg ; } ; func _fgdcg ( _bbcb int , _gggbb map [ int ] [ ] float64 ) ( [ ] int , int ) { _ecbff := make ( [ ] int , _bbcb ) ; _cfagab := 0 ; for _adbcf := 0 ; _adbcf < _bbcb ; _adbcf ++ { _ecbff [ _adbcf ] = _cfagab ; _cfagab += len ( _gggbb [ _adbcf ] ) + 1 ;
} ; return _ecbff , _cfagab ; } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
func ( _cfbg * PageText ) ApplyArea ( bbox _ba . PdfRectangle ) { _ddae := make ( [ ] * textMark , 0 , len ( _cfbg . _aebf ) ) ; for _ , _ecbd := range _cfbg . _aebf { if _gabc ( _ecbd . bbox ( ) , bbox ) { _ddae = append ( _ddae , _ecbd ) ; } ; } ; var _aag paraList ; _egfd := len ( _ddae ) ; for _ecea := 0 ;
_ecea < 360 && _egfd > 0 ; _ecea += 90 { _bfca := make ( [ ] * textMark , 0 , len ( _ddae ) - _egfd ) ; for _ , _fbc := range _ddae { if _fbc . _gfbg == _ecea { _bfca = append ( _bfca , _fbc ) ; } ; } ; if len ( _bfca ) > 0 { _gae := _cabcg ( _bfca , _cfbg . _bbdc , nil , nil , _cfbg . _eaag . _caab ) ;
_aag = append ( _aag , _gae ... ) ; _egfd -= len ( _bfca ) ; } ; } ; _fcb := new ( _dc . Buffer ) ; _aag . writeText ( _fcb ) ; _cfbg . _ggff = _fcb . String ( ) ; _cfbg . _ccca = _aag . toTextMarks ( ) ; _cfbg . _gdba = _aag . tables ( ) ; } ; func _gcdb ( _aeeec , _bdecb _ba . PdfRectangle ) bool { return _aeeec . Llx <= _bdecb . Llx && _bdecb . Urx <= _aeeec . Urx && _aeeec . Lly <= _bdecb . Lly && _bdecb . Ury <= _aeeec . Ury ;
} ;
2024-02-11 21:29:32 +00:00
2024-04-16 11:40:43 +00:00
// PageTextOptions holds various options available in extraction process.
type PageTextOptions struct { _bcc bool ; _caab bool ; } ; func _ffad ( _fbcf map [ float64 ] map [ float64 ] gridTile ) [ ] float64 { _abgd := make ( [ ] float64 , 0 , len ( _fbcf ) ) ; _dgfe := make ( map [ float64 ] struct { } , len ( _fbcf ) ) ; for _ , _affgd := range _fbcf { for _fbgeb := range _affgd { if _ , _ebfea := _dgfe [ _fbgeb ] ;
_ebfea { continue ; } ; _abgd = append ( _abgd , _fbgeb ) ; _dgfe [ _fbgeb ] = struct { } { } ; } ; } ; _e . Float64s ( _abgd ) ; return _abgd ; } ; func ( _gffa * wordBag ) maxDepth ( ) float64 { return _gffa . _ebgd - _gffa . Lly } ; func ( _cbdge rulingList ) isActualGrid ( ) ( rulingList , bool ) { _bbgfa , _baebg := _cbdge . augmentGrid ( ) ;
if ! ( len ( _bbgfa ) >= _afec + 1 && len ( _baebg ) >= _dccbc + 1 ) { if _adgbf { _ga . Log . Info ( "\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064" , len ( _bbgfa ) , len ( _baebg ) , _afec + 1 , _dccbc + 1 ) ;
} ; return nil , false ; } ; if _adgbf { _ga . Log . Info ( "\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074" , _cbdge , len ( _bbgfa ) >= 2 , len ( _baebg ) >= 2 , len ( _bbgfa ) >= 2 && len ( _baebg ) >= 2 ) ;
for _gfgbe , _dddd := range _cbdge { _agc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a" , _gfgbe , _dddd ) ; } ; } ; if _ecbfe { _ddfgd , _efag := _bbgfa [ 0 ] , _bbgfa [ len ( _bbgfa ) - 1 ] ; _ggdbeg , _ggfd := _baebg [ 0 ] , _baebg [ len ( _baebg ) - 1 ] ; if ! ( _cedbg ( _ddfgd . _edga - _ggdbeg . _fcec ) && _cedbg ( _efag . _edga - _ggdbeg . _abeg ) && _cedbg ( _ggdbeg . _edga - _ddfgd . _abeg ) && _cedbg ( _ggfd . _edga - _ddfgd . _fcec ) ) { if _adgbf { _ga . Log . Info ( "\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073" , _ddfgd , _efag , _ggdbeg , _ggfd ) ;
} ; return nil , false ; } ; } else { if ! _bbgfa . aligned ( ) { if _eda { _ga . Log . Info ( "i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064" , len ( _bbgfa ) ) ;
} ; return nil , false ; } ; if ! _baebg . aligned ( ) { if _adgbf { _ga . Log . Info ( "i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064" , len ( _baebg ) ) ;
} ; return nil , false ; } ; } ; _ebfe := append ( _bbgfa , _baebg ... ) ; return _ebfe , true ; } ;
2024-02-11 21:29:32 +00:00
2024-04-16 11:40:43 +00:00
// PageFonts represents extracted fonts on a PDF page.
type PageFonts struct { Fonts [ ] Font ; } ; func ( _bedf * stateStack ) size ( ) int { return len ( * _bedf ) } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// String returns a description of `l`.
func ( _ecc * textLine ) String ( ) string { return _agc . Sprintf ( "\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022" , _ecc . _bbfg , _ecc . PdfRectangle , _ecc . _fdfb , _ecc . text ( ) ) ;
} ; func ( _ecffe * textTable ) put ( _dbad , _bcfg int , _aabfg * textPara ) { _ecffe . _egfea [ _bgcc ( _dbad , _bcfg ) ] = _aabfg ; } ; type textResult struct { _egcbc PageText ; _dcga int ; _bcg int ; } ; func ( _ffaff rulingList ) findPrimSec ( _cbabg , _bgbfa float64 ) * ruling { for _ , _egaab := range _ffaff { if _dbeae ( _egaab . _edga - _cbabg ) && _egaab . _fcec - _dfed <= _bgbfa && _bgbfa <= _egaab . _abeg + _dfed { return _egaab ;
} ; } ; return nil ; } ; func ( _bdagc paraList ) extractTables ( _bcgc [ ] gridTiling ) paraList { if _efda { _ga . Log . Debug ( "\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _bdagc ) ) ;
} ; if len ( _bdagc ) < _ggca { return _bdagc ; } ; _dbgf := _bdagc . findTables ( _bcgc ) ; if _efda { _ga . Log . Info ( "c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d" , len ( _dbgf ) ) ;
for _gfdbb , _eede := range _dbgf { _eede . log ( _agc . Sprintf ( "c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064" , _gfdbb ) ) ; } ; } ; return _bdagc . applyTables ( _dbgf ) ; } ; func _dfdc ( _bbga , _efbg * textPara ) bool { return _fgfag ( _bbga . _dgabg , _efbg . _dgabg ) } ;
func ( _fgeg rulingList ) merge ( ) * ruling { _cgbb := _fgeg [ 0 ] . _edga ; _dddgg := _fgeg [ 0 ] . _fcec ; _geca := _fgeg [ 0 ] . _abeg ; for _ , _gfeg := range _fgeg [ 1 : ] { _cgbb += _gfeg . _edga ; if _gfeg . _fcec < _dddgg { _dddgg = _gfeg . _fcec ; } ; if _gfeg . _abeg > _geca { _geca = _gfeg . _abeg ;
} ; } ; _gfadf := & ruling { _bbce : _fgeg [ 0 ] . _bbce , _ccaa : _fgeg [ 0 ] . _ccaa , Color : _fgeg [ 0 ] . Color , _edga : _cgbb / float64 ( len ( _fgeg ) ) , _fcec : _dddgg , _abeg : _geca } ; if _eda { _ga . Log . Info ( "\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073" , len ( _fgeg ) , _gfadf ) ;
for _ggec , _bbcfe := range _fgeg { _agc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _ggec , _bbcfe ) ; } ; } ; return _gfadf ; } ;
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20> ).
func ( _egb * Extractor ) ExtractText ( ) ( string , error ) { _bbba , _ , _ , _acf := _egb . ExtractTextWithStats ( ) ; return _bbba , _acf ; } ; type compositeCell struct { _ba . PdfRectangle ; paraList ; } ; func ( _decd * textPara ) writeText ( _dgcd _gg . Writer ) { if _decd . _fbbea == nil { _decd . writeCellText ( _dgcd ) ;
return ; } ; for _bddeg := 0 ; _bddeg < _decd . _fbbea . _cgae ; _bddeg ++ { for _fgddc := 0 ; _fgddc < _decd . _fbbea . _eacg ; _fgddc ++ { _eeeff := _decd . _fbbea . get ( _fgddc , _bddeg ) ; if _eeeff == nil { _dgcd . Write ( [ ] byte ( "\u0009" ) ) ; } else { _eeeff . writeCellText ( _dgcd ) ;
} ; _dgcd . Write ( [ ] byte ( "\u0020" ) ) ; } ; if _bddeg < _decd . _fbbea . _cgae - 1 { _dgcd . Write ( [ ] byte ( "\u000a" ) ) ; } ; } ; } ; type markKind int ; type intSet map [ int ] struct { } ; func ( _daff intSet ) has ( _bcdg int ) bool { _ , _cdaec := _daff [ _bcdg ] ; return _cdaec } ; func ( _ffafb * wordBag ) scanBand ( _ceaf string , _bcgd * wordBag , _cddg func ( _bfafd * wordBag , _dabb * textWord ) bool , _fbba , _becae , _ddbdc float64 , _dgb , _dabf bool ) int { _fgg := _bcgd . _cdea ;
var _becf map [ int ] map [ * textWord ] struct { } ; if ! _dgb { _becf = _ffafb . makeRemovals ( ) ; } ; _caeg := _fcca * _fgg ; _aeea := 0 ; for _ , _fffe := range _ffafb . depthBand ( _fbba - _caeg , _becae + _caeg ) { if len ( _ffafb . _fcgd [ _fffe ] ) == 0 { continue ; } ; for _ , _bcce := range _ffafb . _fcgd [ _fffe ] { if ! ( _fbba - _caeg <= _bcce . _dfagd && _bcce . _dfagd <= _becae + _caeg ) { continue ;
} ; if ! _cddg ( _bcgd , _bcce ) { continue ; } ; _edfd := 2.0 * _gf . Abs ( _bcce . _eabbf - _bcgd . _cdea ) / ( _bcce . _eabbf + _bcgd . _cdea ) ; _bged := _gf . Max ( _bcce . _eabbf / _bcgd . _cdea , _bcgd . _cdea / _bcce . _eabbf ) ; _bcgag := _gf . Min ( _edfd , _bged ) ; if _ddbdc > 0 && _bcgag > _ddbdc { continue ;
} ; if _bcgd . blocked ( _bcce ) { continue ; } ; if ! _dgb { _bcgd . pullWord ( _bcce , _fffe , _becf ) ; } ; _aeea ++ ; if ! _dabf { if _bcce . _dfagd < _fbba { _fbba = _bcce . _dfagd ; } ; if _bcce . _dfagd > _becae { _becae = _bcce . _dfagd ; } ; } ; if _dgb { break ; } ; } ; } ; if ! _dgb { _ffafb . applyRemovals ( _becf ) ;
} ; return _aeea ; } ; func _adbb ( _abebg , _dfbg _agf . Point ) bool { _afgd := _gf . Abs ( _abebg . X - _dfbg . X ) ; _becg := _gf . Abs ( _abebg . Y - _dfbg . Y ) ; return _caeb ( _afgd , _becg ) ; } ; func ( _ebdd intSet ) del ( _aabg int ) { delete ( _ebdd , _aabg ) } ; func _eedf ( _acef * wordBag , _fbce int ) * textLine { _egaa := _acef . firstWord ( _fbce ) ;
_abdb := textLine { PdfRectangle : _egaa . PdfRectangle , _fdfb : _egaa . _eabbf , _bbfg : _egaa . _dfagd } ; _abdb . pullWord ( _acef , _egaa , _fbce ) ; return & _abdb ; } ;
// ToTextMark returns the public view of `tm`.
func ( _gdbe * textMark ) ToTextMark ( ) TextMark { return TextMark { Text : _gdbe . _bfdb , Original : _gdbe . _gccf , BBox : _gdbe . _dded , Font : _gdbe . _ccdg , FontSize : _gdbe . _gccfd , FillColor : _gdbe . _fbaf , StrokeColor : _gdbe . _eaee , Orientation : _gdbe . _gfbg , DirectObject : _gdbe . _bbgf , ObjString : _gdbe . _fcbc , Tw : _gdbe . Tw , Th : _gdbe . Th , Tc : _gdbe . _gccd , Index : _gdbe . _aeaee } ;
2024-03-27 22:34:33 +00:00
} ;
2024-04-16 11:40:43 +00:00
// New returns an Extractor instance for extracting content from the input PDF page.
func New ( page * _ba . PdfPage ) ( * Extractor , error ) { return NewWithOptions ( page , nil ) } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// NewWithOptions an Extractor instance for extracting content from the input PDF page with options.
func NewWithOptions ( page * _ba . PdfPage , options * Options ) ( * Extractor , error ) { const _cd = "\u0065x\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077W\u0069\u0074\u0068\u004f\u0070\u0074\u0069\u006f\u006e\u0073" ; _ee , _ddc := page . GetAllContentStreams ( ) ;
if _ddc != nil { return nil , _ddc ; } ; _ec , _fb := page . GetStructTreeRoot ( ) ; if ! _fb { _ga . Log . Info ( "T\u0068\u0065\u0020\u0070\u0064\u0066\u0020\u0064\u006f\u0063\u0075\u006d\u0065\u006e\u0074\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020\u0074\u0061\u0067g\u0065d\u002e\u0020\u0053\u0074r\u0075\u0063t\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e\u0027\u0074\u0020\u0065\u0078\u0069\u0073\u0074\u002e" ) ;
} ; _ggg := page . GetContainingPdfObject ( ) ; _eaa , _ddc := page . GetMediaBox ( ) ; if _ddc != nil { return nil , _agc . Errorf ( "\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076" , _ddc ) ;
} ; _adc := & Extractor { _ac : _ee , _gbe : page . Resources , _cb : * _eaa , _eb : page . CropBox , _bd : map [ string ] fontEntry { } , _baa : map [ string ] textResult { } , _gfe : map [ string ] textResult { } , _cf : options , _ab : _ec , _dd : _ggg } ; if _adc . _cb . Llx > _adc . _cb . Urx { _ga . Log . Info ( "\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e" , _adc . _cb ) ;
_adc . _cb . Llx , _adc . _cb . Urx = _adc . _cb . Urx , _adc . _cb . Llx ; } ; if _adc . _cb . Lly > _adc . _cb . Ury { _ga . Log . Info ( "\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e" , _adc . _cb ) ;
_adc . _cb . Lly , _adc . _cb . Ury = _adc . _cb . Ury , _adc . _cb . Lly ; } ; if _adc . _cf != nil { if _adc . _cf . IncludeAnnotations { _adc . _bg , _ddc = page . GetAnnotations ( ) ; if _ddc != nil { _ga . Log . Debug ( "\u0045\u0072r\u006f\u0072\u0020\u0067\u0065\u0074\u0074\u0069\u006e\u0067\u0020\u0061\u006e\u006e\u006f\u0074\u0061\u0074\u0069\u006f\u006e\u0073: \u0025\u0076" , _ddc ) ;
} ; } ; } ; _ce . TrackUse ( _cd ) ; return _adc , nil ; } ; func ( _dfga * textPara ) toTextMarks ( _afbc * int ) [ ] TextMark { if _dfga . _fbbea == nil { return _dfga . toCellTextMarks ( _afbc ) ; } ; var _acfe [ ] TextMark ; for _agfb := 0 ; _agfb < _dfga . _fbbea . _cgae ; _agfb ++ { for _beagd := 0 ;
_beagd < _dfga . _fbbea . _eacg ; _beagd ++ { _ecgb := _dfga . _fbbea . get ( _beagd , _agfb ) ; if _ecgb == nil { _acfe = _ffeg ( _acfe , _afbc , "\u0009" ) ; } else { _cgdg := _ecgb . toCellTextMarks ( _afbc ) ; _acfe = append ( _acfe , _cgdg ... ) ; } ; _acfe = _ffeg ( _acfe , _afbc , "\u0020" ) ;
} ; if _agfb < _dfga . _fbbea . _cgae - 1 { _acfe = _ffeg ( _acfe , _afbc , "\u000a" ) ; } ; } ; _dgcfg := _dfga . _fbbea ; if _dgcfg . isExportable ( ) { _fbfa := _dgcfg . toTextTable ( ) ; _acfe = _dcdb ( _acfe , & _fbfa ) ; } ; return _acfe ; } ; func ( _decee * textTable ) reduce ( ) * textTable { _ceade := make ( [ ] int , 0 , _decee . _cgae ) ;
_ffecb := make ( [ ] int , 0 , _decee . _eacg ) ; for _gddcb := 0 ; _gddcb < _decee . _cgae ; _gddcb ++ { if ! _decee . emptyCompositeRow ( _gddcb ) { _ceade = append ( _ceade , _gddcb ) ; } ; } ; for _ebcb := 0 ; _ebcb < _decee . _eacg ; _ebcb ++ { if ! _decee . emptyCompositeColumn ( _ebcb ) { _ffecb = append ( _ffecb , _ebcb ) ;
} ; } ; if len ( _ceade ) == _decee . _cgae && len ( _ffecb ) == _decee . _eacg { return _decee ; } ; _gdff := textTable { _edeg : _decee . _edeg , _eacg : len ( _ffecb ) , _cgae : len ( _ceade ) , _egfea : make ( map [ uint64 ] * textPara , len ( _ffecb ) * len ( _ceade ) ) } ; if _efda { _ga . Log . Info ( "\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064" , _decee . _eacg , _decee . _cgae , len ( _ffecb ) , len ( _ceade ) ) ;
_ga . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076" , _ffecb ) ; _ga . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076" , _ceade ) ; } ; for _dgdd , _cdaca := range _ceade { for _ecbfgc , _gbgcbb := range _ffecb { _bcafa , _bdbdf := _decee . getComposite ( _gbgcbb , _cdaca ) ;
if _bcafa == nil { continue ; } ; if _efda { _agc . Printf ( "\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n" , _ecbfgc , _dgdd , _gbgcbb , _cdaca , _adgd ( _bcafa . merge ( ) . text ( ) , 50 ) ) ; } ; _gdff . putComposite ( _ecbfgc , _dgdd , _bcafa , _bdbdf ) ;
} ; } ; return & _gdff ; } ; func ( _adcf * wordBag ) getDepthIdx ( _caef float64 ) int { _ceg := _adcf . depthIndexes ( ) ; _gaed := _ceee ( _caef ) ; if _gaed < _ceg [ 0 ] { return _ceg [ 0 ] ; } ; if _gaed > _ceg [ len ( _ceg ) - 1 ] { return _ceg [ len ( _ceg ) - 1 ] ; } ; return _gaed ; } ; func ( _fbfe * structElement ) parseStructElement ( _gegd _add . PdfObject ) { _fgga , _bfafg := _add . GetDict ( _gegd ) ;
if ! _bfafg { _ga . Log . Debug ( "\u0070\u0061\u0072\u0073\u0065\u0053\u0074\u0072u\u0063\u0074\u0045le\u006d\u0065\u006e\u0074\u003a\u0020d\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006f\u0062\u006a\u0065\u0063t\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075n\u0064\u002e" ) ;
return ; } ; _eaga := _fgga . Get ( "\u0053" ) ; _efcf := _fgga . Get ( "\u0050\u0067" ) ; _effg := "" ; if _eaga != nil { _effg = _eaga . String ( ) ; } ; _bafc := _fgga . Get ( "\u004b" ) ; _fbfe . _bbag = _effg ; _fbfe . _eada = _efcf ; switch _fbbad := _bafc . ( type ) { case * _add . PdfObjectInteger : _fbfe . _bbag = _effg ;
_fbfe . _cfcb = int64 ( * _fbbad ) ; _fbfe . _eada = _efcf ; case * _add . PdfObjectReference : _dcdf := * _add . MakeArray ( _fbbad ) ; var _cgdec int64 = - 1 ; _fbfe . _cfcb = _cgdec ; if _dcdf . Len ( ) == 1 { _ccgg := _dcdf . Elements ( ) [ 0 ] ; _ggcad , _edaa := _ccgg . ( * _add . PdfObjectInteger ) ;
if _edaa { _cgdec = int64 ( * _ggcad ) ; _fbfe . _cfcb = _cgdec ; _fbfe . _bbag = _effg ; _fbfe . _eada = _efcf ; return ; } ; } ; _bdgg := [ ] structElement { } ; for _ , _gcffb := range _dcdf . Elements ( ) { _dfbd , _bfeg := _gcffb . ( * _add . PdfObjectInteger ) ; if _bfeg { _cgdec = int64 ( * _dfbd ) ;
_fbfe . _cfcb = _cgdec ; _fbfe . _bbag = _effg ; } else { _afee := & structElement { } ; _afee . parseStructElement ( _gcffb ) ; _bdgg = append ( _bdgg , * _afee ) ; } ; _cgdec = - 1 ; } ; _fbfe . _abff = _bdgg ; case * _add . PdfObjectArray : _edcb := _bafc . ( * _add . PdfObjectArray ) ; var _aacd int64 = - 1 ;
_fbfe . _cfcb = _aacd ; if _edcb . Len ( ) == 1 { _cbac := _edcb . Elements ( ) [ 0 ] ; _bgfg , _feba := _cbac . ( * _add . PdfObjectInteger ) ; if _feba { _aacd = int64 ( * _bgfg ) ; _fbfe . _cfcb = _aacd ; _fbfe . _bbag = _effg ; _fbfe . _eada = _efcf ; return ; } ; } ; _bfaa := [ ] structElement { } ;
for _ , _cdff := range _edcb . Elements ( ) { _fbg , _bfbbc := _cdff . ( * _add . PdfObjectInteger ) ; if _bfbbc { _aacd = int64 ( * _fbg ) ; _fbfe . _cfcb = _aacd ; _fbfe . _bbag = _effg ; _fbfe . _eada = _efcf ; } else { _cafad := & structElement { } ; _cafad . parseStructElement ( _cdff ) ;
_bfaa = append ( _bfaa , * _cafad ) ; } ; _aacd = - 1 ; } ; _fbfe . _abff = _bfaa ; } ; } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
func ( _fa * Extractor ) ExtractPageImages ( options * ImageExtractOptions ) ( * PageImages , error ) { _fg := & imageExtractContext { _gce : options } ; _gcd := _fg . extractContentStreamImages ( _fa . _ac , _fa . _gbe ) ; if _gcd != nil { return nil , _gcd ; } ; return & PageImages { Images : _fg . _dcc } , nil ;
} ; func ( _ffgf * wordBag ) sort ( ) { for _ , _adbcc := range _ffgf . _fcgd { _e . Slice ( _adbcc , func ( _abb , _fec int ) bool { return _eddba ( _adbcc [ _abb ] , _adbcc [ _fec ] ) < 0 } ) ; } ; } ; func ( _bdcc * shapesState ) cubicTo ( _ebgg , _egfc , _gfddd , _bcba , _agd , _edefc float64 ) { if _fdbg { _ga . Log . Info ( "\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a" ) ;
} ; _bdcc . addPoint ( _agd , _edefc ) ; } ; func ( _bfgaeb rulingList ) intersections ( ) map [ int ] intSet { var _dbfd , _gffc [ ] int ; for _gcfef , _bgced := range _bfgaeb { switch _bgced . _bbce { case _cfae : _dbfd = append ( _dbfd , _gcfef ) ; case _aaad : _gffc = append ( _gffc , _gcfef ) ;
} ; } ; if len ( _dbfd ) < _afec + 1 || len ( _gffc ) < _dccbc + 1 { return nil ; } ; if len ( _dbfd ) + len ( _gffc ) > _ccbb { _ga . Log . Debug ( "\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064" , len ( _bfgaeb ) , len ( _dbfd ) , len ( _gffc ) ) ;
return nil ; } ; _gbae := make ( map [ int ] intSet , len ( _dbfd ) + len ( _gffc ) ) ; for _ , _ddbb := range _dbfd { for _ , _fceg := range _gffc { if _bfgaeb [ _ddbb ] . intersects ( _bfgaeb [ _fceg ] ) { if _ , _ceabb := _gbae [ _ddbb ] ; ! _ceabb { _gbae [ _ddbb ] = make ( intSet ) ; } ; if _ , _aceee := _gbae [ _fceg ] ;
! _aceee { _gbae [ _fceg ] = make ( intSet ) ; } ; _gbae [ _ddbb ] . add ( _fceg ) ; _gbae [ _fceg ] . add ( _ddbb ) ; } ; } ; } ; return _gbae ; } ; type fontEntry struct { _cbad * _ba . PdfFont ; _ccg int64 ; } ; var _decf = map [ markKind ] string { _bcfc : "\u0073\u0074\u0072\u006f\u006b\u0065" , _ccfg : "\u0066\u0069\u006c\u006c" , _degg : "\u0061u\u0067\u006d\u0065\u006e\u0074" } ;
func ( _gdc * imageExtractContext ) extractFormImages ( _egcb * _add . PdfObjectName , _gedf _fc . GraphicsState , _edca * _ba . PdfPageResources ) error { _aae , _cfd := _edca . GetXObjectFormByName ( * _egcb ) ; if _cfd != nil { return _cfd ; } ; if _aae == nil { return nil ; } ;
_bec , _cfd := _aae . GetContentStream ( ) ; if _cfd != nil { return _cfd ; } ; _fbb := _aae . Resources ; if _fbb == nil { _fbb = _edca ; } ; _cfd = _gdc . extractContentStreamImages ( string ( _bec ) , _fbb ) ; if _cfd != nil { return _cfd ; } ; _gdc . _bde ++ ; return nil ; } ;
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
func ( _dfe * Extractor ) ExtractTextWithStats ( ) ( _cgda string , _ddb int , _fbeb int , _dfg error ) { _dca , _ddb , _fbeb , _dfg := _dfe . ExtractPageText ( ) ; if _dfg != nil { return "" , _ddb , _fbeb , _dfg ; } ; return _dca . Text ( ) , _ddb , _fbeb , nil ; } ;
2024-03-27 22:34:33 +00:00
// String returns a description of `k`.
2024-04-16 11:40:43 +00:00
func ( _gegcf rulingKind ) String ( ) string { _agdc , _deec := _gfcgc [ _gegcf ] ; if ! _deec { return _agc . Sprintf ( "\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064" , _gegcf ) ; } ; return _agdc ; } ; func ( _deaad * textTable ) compositeColCorridors ( ) map [ int ] [ ] float64 { _cfffb := make ( map [ int ] [ ] float64 , _deaad . _eacg ) ;
if _efda { _ga . Log . Info ( "\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020" , _deaad . _eacg ) ; } ; for _cfeee := 0 ; _cfeee < _deaad . _eacg ; _cfeee ++ { _cfffb [ _cfeee ] = nil ;
} ; return _cfffb ; } ; type ruling struct { _bbce rulingKind ; _ccaa markKind ; _gff . Color ; _edga float64 ; _fcec float64 ; _abeg float64 ; _ebcee float64 ; } ; func ( _cebde gridTiling ) log ( _afaa string ) { if ! _dfge { return ; } ; _ga . Log . Info ( "\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071" , len ( _cebde . _fece ) , len ( _cebde . _dfca ) , _afaa ) ;
_agc . Printf ( "\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a" , _cebde . _fece ) ; _agc . Printf ( "\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a" , _cebde . _dfca ) ; for _egdeg , _cfac := range _cebde . _dfca { _cgacf , _ggcg := _cebde . _gdcg [ _cfac ] ;
if ! _ggcg { continue ; } ; _agc . Printf ( "%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , _egdeg , _cfac ) ; for _bgdd , _aggd := range _cebde . _fece { _acfbe , _dgeca := _cgacf [ _aggd ] ; if ! _dgeca { continue ; } ; _agc . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _bgdd , _acfbe . String ( ) ) ;
} ; } ; } ; func _fecf ( _efga [ ] * textWord , _gded int ) [ ] * textWord { _efce := len ( _efga ) ; copy ( _efga [ _gded : ] , _efga [ _gded + 1 : ] ) ; return _efga [ : _efce - 1 ] ; } ; type structTreeRoot struct { _faef [ ] structElement ; _adbca string ; } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// String returns a human readable description of `ss`.
func ( _egecb * shapesState ) String ( ) string { return _agc . Sprintf ( "\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d" , len ( _egecb . _gfce ) , _egecb . _gbdgg ) ; } ; func ( _bfcg * textObject ) getStrokeColor ( ) _gff . Color { return _eddaee ( _bfcg . _bace . ColorspaceStroking , _bfcg . _bace . ColorStroking ) ;
} ; func ( _ge * PageFonts ) extractPageResourcesToFont ( _db * _ba . PdfPageResources ) error { _cga , _gc := _add . GetDict ( _db . Font ) ; if ! _gc { return _b . New ( _aba ) ; } ; for _ , _bgg := range _cga . Keys ( ) { var ( _cc = true ; _gde [ ] byte ; _eaf string ; ) ; _abe , _ecg := _db . GetFontByName ( _bgg ) ;
if ! _ecg { return _b . New ( _cgc ) ; } ; _dde , _fbf := _ba . NewPdfFontFromPdfObject ( _abe ) ; if _fbf != nil { return _fbf ; } ; _fcd := _dde . FontDescriptor ( ) ; _cgg := _dde . FontDescriptor ( ) . FontName . String ( ) ; _ff := _dde . Subtype ( ) ; if _aga ( _ge . Fonts , _cgg ) { continue ;
} ; if len ( _dde . ToUnicode ( ) ) == 0 { _cc = false ; } ; if _fcd . FontFile != nil { if _eca , _da := _add . GetStream ( _fcd . FontFile ) ; _da { _gde , _fbf = _add . DecodeStream ( _eca ) ; if _fbf != nil { return _fbf ; } ; _eaf = _cgg + "\u002e\u0070\u0066\u0062" ; } ; } else if _fcd . FontFile2 != nil { if _be , _bed := _add . GetStream ( _fcd . FontFile2 ) ;
_bed { _gde , _fbf = _add . DecodeStream ( _be ) ; if _fbf != nil { return _fbf ; } ; _eaf = _cgg + "\u002e\u0074\u0074\u0066" ; } ; } else if _fcd . FontFile3 != nil { if _ccb , _fe := _add . GetStream ( _fcd . FontFile3 ) ; _fe { _gde , _fbf = _add . DecodeStream ( _ccb ) ; if _fbf != nil { return _fbf ;
} ; _eaf = _cgg + "\u002e\u0063\u0066\u0066" ; } ; } ; if len ( _eaf ) < 1 { _ga . Log . Debug ( _aef ) ; } ; _abc := Font { FontName : _cgg , PdfFont : _dde , IsCID : _dde . IsCID ( ) , IsSimple : _dde . IsSimple ( ) , ToUnicode : _cc , FontType : _ff , FontData : _gde , FontFileName : _eaf , FontDescriptor : _fcd } ;
_ge . Fonts = append ( _ge . Fonts , _abc ) ; } ; return nil ; } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// String returns a string describing `ma`.
func ( _eeeg TextMarkArray ) String ( ) string { _bfde := len ( _eeeg . _ffca ) ; if _bfde == 0 { return "\u0045\u004d\u0050T\u0059" ; } ; _gefa := _eeeg . _ffca [ 0 ] ; _edgb := _eeeg . _ffca [ _bfde - 1 ] ; return _agc . Sprintf ( "\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d" , _bfde , _gefa , _edgb ) ;
} ; func ( _abaed * textPara ) depth ( ) float64 { if _abaed . _cddef { return - 1.0 ; } ; if len ( _abaed . _fdec ) > 0 { return _abaed . _fdec [ 0 ] . _bbfg ; } ; return _abaed . _fbbea . depth ( ) ; } ; func _gecgf ( _gfbc [ ] float64 , _fgfac , _ceea float64 ) [ ] float64 { _fbbaf , _gbdab := _fgfac , _ceea ;
if _gbdab < _fbbaf { _fbbaf , _gbdab = _gbdab , _fbbaf ; } ; _dbee := make ( [ ] float64 , 0 , len ( _gfbc ) + 2 ) ; _dbee = append ( _dbee , _fgfac ) ; for _ , _bbcee := range _gfbc { if _bbcee <= _fbbaf { continue ; } else if _bbcee >= _gbdab { break ; } ; _dbee = append ( _dbee , _bbcee ) ;
} ; _dbee = append ( _dbee , _ceea ) ; return _dbee ; } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// String returns a description of `b`.
func ( _gffb * wordBag ) String ( ) string { var _bda [ ] string ; for _ , _fdcd := range _gffb . depthIndexes ( ) { _efgb := _gffb . _fcgd [ _fdcd ] ; for _ , _fgdb := range _efgb { _bda = append ( _bda , _fgdb . _eaae ) ; } ; } ; return _agc . Sprintf ( "\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071" , _gffb . PdfRectangle , _gffb . _cdea , len ( _bda ) , _bda ) ;
} ; func ( _ddab * wordBag ) firstReadingIndex ( _eedb int ) int { _cdda := _ddab . firstWord ( _eedb ) . _eabbf ; _accb := float64 ( _eedb + 1 ) * _dage ; _egabg := _accb + _bag * _cdda ; _ddbdcb := _eedb ; for _ , _gaeb := range _ddab . depthBand ( _accb , _egabg ) { if _eddba ( _ddab . firstWord ( _gaeb ) , _ddab . firstWord ( _ddbdcb ) ) < 0 { _ddbdcb = _gaeb ;
} ; } ; return _ddbdcb ; } ; func _aebeec ( _bbgcf , _gbbafd _ag . Image ) _ag . Image { _abgce , _aeadc := _gbbafd . Bounds ( ) . Size ( ) , _bbgcf . Bounds ( ) . Size ( ) ; _egbfgg , _agcac := _abgce . X , _abgce . Y ; if _aeadc . X > _egbfgg { _egbfgg = _aeadc . X ; } ; if _aeadc . Y > _agcac { _agcac = _aeadc . Y ;
} ; _cbcdd := _ag . Rect ( 0 , 0 , _egbfgg , _agcac ) ; if _abgce . X != _egbfgg || _abgce . Y != _agcac { _ffdeb := _ag . NewRGBA ( _cbcdd ) ; _ed . BiLinear . Scale ( _ffdeb , _cbcdd , _bbgcf , _gbbafd . Bounds ( ) , _ed . Over , nil ) ; _gbbafd = _ffdeb ; } ; if _aeadc . X != _egbfgg || _aeadc . Y != _agcac { _dgfbg := _ag . NewRGBA ( _cbcdd ) ;
_ed . BiLinear . Scale ( _dgfbg , _cbcdd , _bbgcf , _bbgcf . Bounds ( ) , _ed . Over , nil ) ; _bbgcf = _dgfbg ; } ; _bfacg := _ag . NewRGBA ( _cbcdd ) ; _ed . DrawMask ( _bfacg , _cbcdd , _bbgcf , _ag . Point { } , _gbbafd , _ag . Point { } , _ed . Over ) ; return _bfacg ; } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// String returns a string descibing `i`.
func ( _fgcdc gridTile ) String ( ) string { _ggaacg := func ( _cdcc bool , _gccfb string ) string { if _cdcc { return _gccfb ; } ; return "\u005f" ; } ; return _agc . Sprintf ( "\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073" , _fgcdc . PdfRectangle , _ggaacg ( _fgcdc . _ecaf , "\u004c" ) , _ggaacg ( _fgcdc . _bfab , "\u0052" ) , _ggaacg ( _fgcdc . _bgbfd , "\u0042" ) , _ggaacg ( _fgcdc . _dgccd , "\u0054" ) ) ;
} ; func _baff ( _defb , _fcdgd , _gbeae , _adfe * textPara ) * textTable { _ecfa := & textTable { _eacg : 2 , _cgae : 2 , _egfea : make ( map [ uint64 ] * textPara , 4 ) } ; _ecfa . put ( 0 , 0 , _defb ) ; _ecfa . put ( 1 , 0 , _fcdgd ) ; _ecfa . put ( 0 , 1 , _gbeae ) ; _ecfa . put ( 1 , 1 , _adfe ) ; return _ecfa ;
} ; func ( _ggbd paraList ) xNeighbours ( _ffabb float64 ) map [ * textPara ] [ ] int { _fdcfe := make ( [ ] event , 2 * len ( _ggbd ) ) ; if _ffabb == 0 { for _dgadd , _cgabe := range _ggbd { _fdcfe [ 2 * _dgadd ] = event { _cgabe . Llx , true , _dgadd } ; _fdcfe [ 2 * _dgadd + 1 ] = event { _cgabe . Urx , false , _dgadd } ;
} ; } else { for _dfbdb , _gfcge := range _ggbd { _fdcfe [ 2 * _dfbdb ] = event { _gfcge . Llx - _ffabb * _gfcge . fontsize ( ) , true , _dfbdb } ; _fdcfe [ 2 * _dfbdb + 1 ] = event { _gfcge . Urx + _ffabb * _gfcge . fontsize ( ) , false , _dfbdb } ; } ; } ; return _ggbd . eventNeighbours ( _fdcfe ) ;
} ; func ( _fefgb rulingList ) aligned ( ) bool { if len ( _fefgb ) < 2 { return false ; } ; _cacg := make ( map [ * ruling ] int ) ; _cacg [ _fefgb [ 0 ] ] = 0 ; for _ , _gffggc := range _fefgb [ 1 : ] { _caaa := false ; for _gffga := range _cacg { if _gffggc . gridIntersecting ( _gffga ) { _cacg [ _gffga ] ++ ;
_caaa = true ; break ; } ; } ; if ! _caaa { _cacg [ _gffggc ] = 0 ; } ; } ; _fddae := 0 ; for _ , _edfba := range _cacg { if _edfba == 0 { _fddae ++ ; } ; } ; _aedb := float64 ( _fddae ) / float64 ( len ( _fefgb ) ) ; _gacb := _aedb <= 1.0 - _efeb ; if _adgbf { _ga . Log . Info ( "\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073" , _gacb , _aedb , _fddae , len ( _fefgb ) , _fefgb . String ( ) ) ;
} ; return _gacb ; } ; func ( _becda * ruling ) encloses ( _cbbf , _adgbd float64 ) bool { return _becda . _fcec - _dfed <= _cbbf && _adgbd <= _becda . _abeg + _dfed ; } ; func ( _egdf * wordBag ) depthIndexes ( ) [ ] int { if len ( _egdf . _fcgd ) == 0 { return nil ; } ; _ffgb := make ( [ ] int , len ( _egdf . _fcgd ) ) ;
_cddb := 0 ; for _cbb := range _egdf . _fcgd { _ffgb [ _cddb ] = _cbb ; _cddb ++ ; } ; _e . Ints ( _ffgb ) ; return _ffgb ; } ; func ( _agae * wordBag ) blocked ( _gbaff * textWord ) bool { if _gbaff . Urx < _agae . Llx { _affa := _degf ( _gbaff . PdfRectangle ) ; _cbab := _bdccc ( _agae . PdfRectangle ) ;
if _agae . _febe . blocks ( _affa , _cbab ) { if _ggaeg { _ga . Log . Info ( "\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073" , _gbaff , _agae ) ; } ; return true ; } ; } else if _agae . Urx < _gbaff . Llx { _dbcag := _degf ( _agae . PdfRectangle ) ;
_bdece := _bdccc ( _gbaff . PdfRectangle ) ; if _agae . _febe . blocks ( _dbcag , _bdece ) { if _ggaeg { _ga . Log . Info ( "b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s" , _gbaff , _agae ) ; } ; return true ; } ; } ; if _gbaff . Ury < _agae . Lly { _bdfb := _faafc ( _gbaff . PdfRectangle ) ;
_dfdg := _fbcde ( _agae . PdfRectangle ) ; if _agae . _cgdae . blocks ( _bdfb , _dfdg ) { if _ggaeg { _ga . Log . Info ( "\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073" , _gbaff , _agae ) ; } ; return true ; } ; } else if _agae . Ury < _gbaff . Lly { _ccdd := _faafc ( _agae . PdfRectangle ) ;
_eff := _fbcde ( _gbaff . PdfRectangle ) ; if _agae . _cgdae . blocks ( _ccdd , _eff ) { if _ggaeg { _ga . Log . Info ( "b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s" , _gbaff , _agae ) ; } ; return true ; } ; } ; return false ; } ; func ( _feac compositeCell ) String ( ) string { _acff := "" ;
if len ( _feac . paraList ) > 0 { _acff = _adgd ( _feac . paraList . merge ( ) . text ( ) , 50 ) ; } ; return _agc . Sprintf ( "\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071" , _feac . PdfRectangle , len ( _feac . paraList ) , _acff ) ;
} ; func ( _eaec paraList ) llyOrdering ( ) [ ] int { _eefb := make ( [ ] int , len ( _eaec ) ) ; for _ddge := range _eaec { _eefb [ _ddge ] = _ddge ; } ; _e . SliceStable ( _eefb , func ( _efad , _beeef int ) bool { _dbbbg , _dcfd := _eefb [ _efad ] , _eefb [ _beeef ] ; return _eaec [ _dbbbg ] . Lly < _eaec [ _dcfd ] . Lly ;
} ) ; return _eefb ; } ; func ( _adac * subpath ) isQuadrilateral ( ) bool { if len ( _adac . _aeee ) < 4 || len ( _adac . _aeee ) > 5 { return false ; } ; if len ( _adac . _aeee ) == 5 { _ebad := _adac . _aeee [ 0 ] ; _dede := _adac . _aeee [ 4 ] ; if _ebad . X != _dede . X || _ebad . Y != _dede . Y { return false ;
} ; } ; return true ; } ; func ( _cbgcf * ruling ) alignsSec ( _bcfdf * ruling ) bool { const _abgc = _gbb + 1.0 ; return _cbgcf . _fcec - _abgc <= _bcfdf . _abeg && _bcfdf . _fcec - _abgc <= _cbgcf . _abeg ; } ; func ( _acea * textTable ) emptyCompositeColumn ( _faafdg int ) bool { for _cgaea := 0 ;
_cgaea < _acea . _cgae ; _cgaea ++ { if _bcdfc , _acdad := _acea . _aaaga [ _bgcc ( _faafdg , _cgaea ) ] ; _acdad { if len ( _bcdfc . paraList ) > 0 { return false ; } ; } ; } ; return true ; } ; func ( _gcfcd * textTable ) subdivide ( ) * textTable { _gcfcd . logComposite ( "\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e" ) ;
_ecac := _gcfcd . compositeRowCorridors ( ) ; _eefdd := _gcfcd . compositeColCorridors ( ) ; if _efda { _ga . Log . Info ( "\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073" , _edaf ( _ecac ) , _edaf ( _eefdd ) ) ;
} ; if len ( _ecac ) == 0 || len ( _eefdd ) == 0 { return _gcfcd ; } ; _cceb ( _ecac ) ; _cceb ( _eefdd ) ; if _efda { _ga . Log . Info ( "\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073" , _edaf ( _ecac ) , _edaf ( _eefdd ) ) ;
} ; _agdeg , _eced := _fgdcg ( _gcfcd . _cgae , _ecac ) ; _ccgbb , _ccae := _fgdcg ( _gcfcd . _eacg , _eefdd ) ; _dcbc := make ( map [ uint64 ] * textPara , _ccae * _eced ) ; _bdfad := & textTable { PdfRectangle : _gcfcd . PdfRectangle , _edeg : _gcfcd . _edeg , _cgae : _eced , _eacg : _ccae , _egfea : _dcbc } ;
if _efda { _ga . Log . Info ( "\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a" + "\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a" + "\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a" + "\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a" + "\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076" , _gcfcd . _eacg , _gcfcd . _cgae , _ccae , _eced , _edaf ( _ecac ) , _edaf ( _eefdd ) , _agdeg , _ccgbb ) ;
} ; for _ebagg := 0 ; _ebagg < _gcfcd . _cgae ; _ebagg ++ { _eaega := _agdeg [ _ebagg ] ; for _ggcf := 0 ; _ggcf < _gcfcd . _eacg ; _ggcf ++ { _faca := _ccgbb [ _ggcf ] ; if _efda { _agc . Printf ( "\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a" , _ggcf , _ebagg , _faca , _eaega ) ;
} ; _cgefd , _gbbbc := _gcfcd . _aaaga [ _bgcc ( _ggcf , _ebagg ) ] ; if ! _gbbbc { continue ; } ; _aeed := _cgefd . split ( _ecac [ _ebagg ] , _eefdd [ _ggcf ] ) ; for _dgeafg := 0 ; _dgeafg < _aeed . _cgae ; _dgeafg ++ { for _egfdfa := 0 ; _egfdfa < _aeed . _eacg ; _egfdfa ++ { _abdae := _aeed . get ( _egfdfa , _dgeafg ) ;
_bdfad . put ( _faca + _egfdfa , _eaega + _dgeafg , _abdae ) ; if _efda { _agc . Printf ( "\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a" , _faca + _egfdfa , _eaega + _dgeafg , _abdae ) ; } ; } ; } ; } ; } ; return _bdfad ; } ;
// Append appends `mark` to the mark array.
func ( _edef * TextMarkArray ) Append ( mark TextMark ) { _edef . _ffca = append ( _edef . _ffca , mark ) } ; type subpath struct { _aeee [ ] _agf . Point ; _dbe bool ; } ;
2024-03-27 22:34:33 +00:00
// Tables returns the tables extracted from the page.
2024-04-16 11:40:43 +00:00
func ( _afbe PageText ) Tables ( ) [ ] TextTable { if _efda { _ga . Log . Info ( "\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064" , len ( _afbe . _gdba ) ) ; } ; return _afbe . _gdba ; } ; func ( _dddc * structTreeRoot ) parseStructTreeRoot ( _gebcc _add . PdfObject ) { if _gebcc != nil { _ddece , _dffbe := _add . GetDict ( _gebcc ) ;
if ! _dffbe { _ga . Log . Debug ( "\u0070\u0061\u0072s\u0065\u0053\u0074\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u003a\u0020\u0064\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006eo\u0074\u0020\u0066\u006f\u0075\u006e\u0064\u002e" ) ;
} ; K := _ddece . Get ( "\u004b" ) ; _cdgd := _ddece . Get ( "\u0054\u0079\u0070\u0065" ) . String ( ) ; var _gecg * _add . PdfObjectArray ; switch _eafd := K . ( type ) { case * _add . PdfObjectArray : _gecg = _eafd ; case * _add . PdfObjectReference : _gecg = _add . MakeArray ( K ) ; } ; _fcge := [ ] structElement { } ;
for _ , _becc := range _gecg . Elements ( ) { _fgbfa := & structElement { } ; _fgbfa . parseStructElement ( _becc ) ; _fcge = append ( _fcge , * _fgbfa ) ; } ; _dddc . _faef = _fcge ; _dddc . _adbca = _cdgd ; } ; } ; func ( _aaee * Extractor ) extractPageText ( _gcfd string , _cdg * _ba . PdfPageResources , _baac _agf . Matrix , _fagb int , _adb bool ) ( * PageText , int , int , error ) { _ga . Log . Trace ( "\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d" , _fagb ) ;
_dge := & PageText { _bbdc : _aaee . _cb , _abge : _aaee . _ab , _abad : _aaee . _dd } ; _adcg := _cgbc ( _aaee . _cb ) ; var _cee stateStack ; _bce := _acac ( _aaee , _cdg , _fc . GraphicsState { } , & _adcg , & _cee ) ; _egeg := shapesState { _gdfd : _baac , _gafa : _agf . IdentityMatrix ( ) , _edf : _bce } ;
var _ccd bool ; _bbf := - 1 ; if _fagb > _fad { _fd := _b . New ( "\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077" ) ; _ga . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076" , _fagb , _fd ) ;
return _dge , _adcg . _abd , _adcg . _bafdf , _fd ; } ; _fef := _fc . NewContentStreamParser ( _gcfd ) ; _cge , _fca := _fef . Parse ( ) ; if _fca != nil { _ga . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fca ) ;
return _dge , _adcg . _abd , _adcg . _bafdf , _fca ; } ; _dge . _cde = _cge ; _bafd := _fc . NewContentStreamProcessor ( * _cge ) ; _bafd . AddHandler ( _fc . HandlerConditionEnumAllOperands , "" , func ( _fbeg * _fc . ContentStreamOperation , _ef _fc . GraphicsState , _ceeb * _ba . PdfPageResources ) error { _dcg := _fbeg . Operand ;
if _eeeb { _ga . Log . Info ( "\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s" , _fbeg ) ; } ; switch _dcg { case "\u0071" : if _fdbg { _ga . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _egeg . _gafa ) ; } ; _cee . push ( & _adcg ) ; case "\u0051" : if ! _cee . empty ( ) { _adcg = * _cee . pop ( ) ;
} ; _egeg . _gafa = _ef . CTM ; if _fdbg { _ga . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _egeg . _gafa ) ; } ; case "\u0042\u0044\u0043" : _aee , _fbbe := _add . GetDict ( _fbeg . Params [ 1 ] ) ; if ! _fbbe { _ga . Log . Debug ( "\u0045\u0052\u0052O\u0052\u003a\u0020\u0042D\u0043\u0020\u006f\u0070\u003d\u0025\u0073 \u0047\u0065\u0074\u0044\u0069\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _fbeg ) ;
return _fca ; } ; _deb := _aee . Get ( "\u004d\u0043\u0049\u0044" ) ; if _deb != nil { _dece , _bcf := _add . GetIntVal ( _deb ) ; if ! _bcf { _ga . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u0042\u0044C\u0020\u006f\u0070=\u0025\u0073\u002e\u0020\u0042\u0061\u0064\u0020\u006eum\u0065\u0072\u0069c\u0061\u006c \u006f\u0062\u006a\u0065\u0063\u0074.\u0020\u006f=\u0025\u0073" , _fbeg , _deb ) ;
} ; _bbf = _dece ; } else { _bbf = - 1 ; } ; case "\u0045\u004d\u0043" : _bbf = - 1 ; case "\u0042\u0054" : if _ccd { _ga . Log . Debug ( "\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074" ) ;
_dge . _aebf = append ( _dge . _aebf , _bce . _cfde ... ) ; } ; _ccd = true ; _gcfc := _ef ; if _adb { _gcfc = _fc . GraphicsState { } ; _gcfc . CTM = _egeg . _gafa ; } ; _gcfc . CTM = _baac . Mult ( _gcfc . CTM ) ; _bce = _acac ( _aaee , _ceeb , _gcfc , & _adcg , & _cee ) ; _egeg . _edf = _bce ; case "\u0045\u0054" : if ! _ccd { _ga . Log . Debug ( "\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074" ) ;
} ; _ccd = false ; _dge . _aebf = append ( _dge . _aebf , _bce . _cfde ... ) ; _bce . reset ( ) ; case "\u0054\u002a" : _bce . nextLine ( ) ; case "\u0054\u0064" : if _dfeg , _aed := _bce . checkOp ( _fbeg , 2 , true ) ; ! _dfeg { _ga . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _aed ) ;
return _aed ; } ; _aedg , _gca , _adg := _dggea ( _fbeg . Params ) ; if _adg != nil { return _adg ; } ; _bce . moveText ( _aedg , _gca ) ; case "\u0054\u0044" : if _fea , _dfb := _bce . checkOp ( _fbeg , 2 , true ) ; ! _fea { _ga . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _dfb ) ;
return _dfb ; } ; _ggb , _bfbg , _fcf := _dggea ( _fbeg . Params ) ; if _fcf != nil { _ga . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fcf ) ; return _fcf ; } ; _bce . moveTextSetLeading ( _ggb , _bfbg ) ; case "\u0054\u006a" : if _efa , _eee := _bce . checkOp ( _fbeg , 1 , true ) ;
! _efa { _ga . Log . Debug ( "\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076" , _fbeg , _eee ) ; return _eee ; } ; _cgdf := _add . TraceToDirectObject ( _fbeg . Params [ 0 ] ) ; _ebf , _fdd := _add . GetStringBytes ( _cgdf ) ;
if ! _fdd { _ga . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064" , _fbeg ) ; return _add . ErrTypeError ;
} ; return _bce . showText ( _cgdf , _ebf , _bbf ) ; case "\u0054\u004a" : if _eef , _bfg := _bce . checkOp ( _fbeg , 1 , true ) ; ! _eef { _ga . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bfg ) ; return _bfg ; } ; _geg , _ddd := _add . GetArray ( _fbeg . Params [ 0 ] ) ;
if ! _ddd { _ga . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _fbeg ) ; return _fca ; } ; return _bce . showTextAdjusted ( _geg , _bbf ) ;
case "\u0027" : if _faf , _bbc := _bce . checkOp ( _fbeg , 1 , true ) ; ! _faf { _ga . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bbc ) ; return _bbc ; } ; _geb := _add . TraceToDirectObject ( _fbeg . Params [ 0 ] ) ; _cdf , _ggd := _add . GetStringBytes ( _geb ) ;
if ! _ggd { _ga . Log . Debug ( "\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064" , _fbeg ) ; return _add . ErrTypeError ; } ; _bce . nextLine ( ) ; return _bce . showText ( _geb , _cdf , _bbf ) ;
case "\u0022" : if _fgb , _bab := _bce . checkOp ( _fbeg , 3 , true ) ; ! _fgb { _ga . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bab ) ; return _bab ; } ; _cba , _ffa , _fcdb := _dggea ( _fbeg . Params [ : 2 ] ) ; if _fcdb != nil { return _fcdb ;
} ; _fde := _add . TraceToDirectObject ( _fbeg . Params [ 2 ] ) ; _cff , _bff := _add . GetStringBytes ( _fde ) ; if ! _bff { _ga . Log . Debug ( "\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064" , _fbeg ) ;
return _add . ErrTypeError ; } ; _bce . setCharSpacing ( _cba ) ; _bce . setWordSpacing ( _ffa ) ; _bce . nextLine ( ) ; return _bce . showText ( _fde , _cff , _bbf ) ; case "\u0054\u004c" : _bfc , _aac := _cage ( _fbeg ) ; if _aac != nil { _ga . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _aac ) ;
return _aac ; } ; _bce . setTextLeading ( _bfc ) ; case "\u0054\u0063" : _egg , _dcaa := _cage ( _fbeg ) ; if _dcaa != nil { _ga . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _dcaa ) ; return _dcaa ; } ; _bce . setCharSpacing ( _egg ) ;
case "\u0054\u0066" : if _ffd , _eeef := _bce . checkOp ( _fbeg , 2 , true ) ; ! _ffd { _ga . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _eeef ) ; return _eeef ; } ; _facf , _aaba := _add . GetNameVal ( _fbeg . Params [ 0 ] ) ;
if ! _aaba { _ga . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064" , _fbeg ) ; return _add . ErrTypeError ; } ; _agcb , _caa := _add . GetNumberAsFloat ( _fbeg . Params [ 1 ] ) ;
if ! _aaba { _ga . Log . Debug ( "\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fbeg , _caa ) ;
return _caa ; } ; _caa = _bce . setFont ( _facf , _agcb ) ; _bce . _dgcf = _b . Is ( _caa , _add . ErrNotSupported ) ; if _caa != nil && ! _bce . _dgcf { return _caa ; } ; case "\u0054\u006d" : if _bdda , _gee := _bce . checkOp ( _fbeg , 6 , true ) ; ! _bdda { _ga . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gee ) ;
return _gee ; } ; _ffg , _afg := _add . GetNumbersAsFloat ( _fbeg . Params ) ; if _afg != nil { _ga . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _afg ) ; return _afg ; } ; _bce . setTextMatrix ( _ffg ) ; case "\u0054\u0072" : if _eab , _abab := _bce . checkOp ( _fbeg , 1 , true ) ;
! _eab { _ga . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _abab ) ; return _abab ; } ; _eeed , _fdg := _add . GetIntVal ( _fbeg . Params [ 0 ] ) ; if ! _fdg { _ga . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _fbeg ) ;
return _add . ErrTypeError ; } ; _bce . setTextRenderMode ( _eeed ) ; case "\u0054\u0073" : if _ebd , _ede := _bce . checkOp ( _fbeg , 1 , true ) ; ! _ebd { _ga . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ede ) ; return _ede ;
} ; _bca , _edg := _add . GetNumberAsFloat ( _fbeg . Params [ 0 ] ) ; if _edg != nil { _ga . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _edg ) ; return _edg ; } ; _bce . setTextRise ( _bca ) ; case "\u0054\u0077" : if _acd , _fgff := _bce . checkOp ( _fbeg , 1 , true ) ;
! _acd { _ga . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fgff ) ; return _fgff ; } ; _fdf , _gdg := _add . GetNumberAsFloat ( _fbeg . Params [ 0 ] ) ; if _gdg != nil { _ga . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gdg ) ;
return _gdg ; } ; _bce . setWordSpacing ( _fdf ) ; case "\u0054\u007a" : if _ecab , _dgf := _bce . checkOp ( _fbeg , 1 , true ) ; ! _ecab { _ga . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _dgf ) ; return _dgf ; } ; _ebfg , _cef := _add . GetNumberAsFloat ( _fbeg . Params [ 0 ] ) ;
if _cef != nil { _ga . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _cef ) ; return _cef ; } ; _bce . setHorizScaling ( _ebfg ) ; case "\u0063\u006d" : if ! _adb { _egeg . _gafa = _ef . CTM ; } ; if _egeg . _gafa . Singular ( ) { _gcb := _agf . IdentityMatrix ( ) . Translate ( _egeg . _gafa . Translation ( ) ) ;
_ga . Log . Debug ( "S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s" , _egeg . _gafa , _gcb ) ; _egeg . _gafa = _gcb ; } ; if _fdbg { _ga . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _egeg . _gafa ) ; } ; case "\u006d" : if len ( _fbeg . Params ) != 2 { _ga . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" , _gfg ) ;
return nil ; } ; _eeb , _cag := _add . GetNumbersAsFloat ( _fbeg . Params ) ; if _cag != nil { return _cag ; } ; _egeg . moveTo ( _eeb [ 0 ] , _eeb [ 1 ] ) ; case "\u006c" : if len ( _fbeg . Params ) != 2 { _ga . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" , _gfg ) ;
return nil ; } ; _bef , _deda := _add . GetNumbersAsFloat ( _fbeg . Params ) ; if _deda != nil { return _deda ; } ; _egeg . lineTo ( _bef [ 0 ] , _bef [ 1 ] ) ; case "\u0063" : if len ( _fbeg . Params ) != 6 { return _gfg ; } ; _eddg , _fgc := _add . GetNumbersAsFloat ( _fbeg . Params ) ; if _fgc != nil { return _fgc ;
} ; _ga . Log . Debug ( "\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f" , _eddg ) ; _egeg . cubicTo ( _eddg [ 0 ] , _eddg [ 1 ] , _eddg [ 2 ] , _eddg [ 3 ] , _eddg [ 4 ] , _eddg [ 5 ] ) ; case "\u0076" , "\u0079" : if len ( _fbeg . Params ) != 4 { return _gfg ;
} ; _bdde , _ace := _add . GetNumbersAsFloat ( _fbeg . Params ) ; if _ace != nil { return _ace ; } ; _ga . Log . Debug ( "\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f" , _bdde ) ; _egeg . quadraticTo ( _bdde [ 0 ] , _bdde [ 1 ] , _bdde [ 2 ] , _bdde [ 3 ] ) ;
case "\u0068" : _egeg . closePath ( ) ; case "\u0072\u0065" : if len ( _fbeg . Params ) != 4 { return _gfg ; } ; _agfa , _fefa := _add . GetNumbersAsFloat ( _fbeg . Params ) ; if _fefa != nil { return _fefa ; } ; _egeg . drawRectangle ( _agfa [ 0 ] , _agfa [ 1 ] , _agfa [ 2 ] , _agfa [ 3 ] ) ; _egeg . closePath ( ) ;
case "\u0053" : _egeg . stroke ( & _dge . _efea ) ; _egeg . clearPath ( ) ; case "\u0073" : _egeg . closePath ( ) ; _egeg . stroke ( & _dge . _efea ) ; _egeg . clearPath ( ) ; case "\u0046" : _egeg . fill ( & _dge . _ebafc ) ; _egeg . clearPath ( ) ; case "\u0066" , "\u0066\u002a" : _egeg . closePath ( ) ;
_egeg . fill ( & _dge . _ebafc ) ; _egeg . clearPath ( ) ; case "\u0042" , "\u0042\u002a" : _egeg . fill ( & _dge . _ebafc ) ; _egeg . stroke ( & _dge . _efea ) ; _egeg . clearPath ( ) ; case "\u0062" , "\u0062\u002a" : _egeg . closePath ( ) ; _egeg . fill ( & _dge . _ebafc ) ; _egeg . stroke ( & _dge . _efea ) ;
_egeg . clearPath ( ) ; case "\u006e" : _egeg . clearPath ( ) ; case "\u0044\u006f" : if len ( _fbeg . Params ) == 0 { _ga . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e" , _fbeg . Params ) ;
return _add . ErrRangeError ; } ; _deba , _ead := _add . GetName ( _fbeg . Params [ 0 ] ) ; if ! _ead { _ga . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e" , _fbeg . Params [ 0 ] ) ;
return _add . ErrTypeError ; } ; _ , _gddc := _ceeb . GetXObjectByName ( * _deba ) ; if _gddc != _ba . XObjectTypeForm { break ; } ; _bad , _ead := _aaee . _baa [ _deba . String ( ) ] ; if ! _ead { _abed , _efe := _ceeb . GetXObjectFormByName ( * _deba ) ; if _efe != nil { _ga . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _efe ) ;
return _efe ; } ; _fbdb , _efe := _abed . GetContentStream ( ) ; if _efe != nil { _ga . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _efe ) ; return _efe ; } ; _eae := _abed . Resources ; if _eae == nil { _eae = _ceeb ; } ; _dedb := _ef . CTM ; if _adca , _fgcf := _add . GetArray ( _abed . Matrix ) ;
_fgcf { _cdd , _cab := _adca . GetAsFloat64Slice ( ) ; if _cab != nil { return _cab ; } ; if len ( _cdd ) != 6 { return _gfg ; } ; _ada := _agf . NewMatrix ( _cdd [ 0 ] , _cdd [ 1 ] , _cdd [ 2 ] , _cdd [ 3 ] , _cdd [ 4 ] , _cdd [ 5 ] ) ; _dedb = _ef . CTM . Mult ( _ada ) ; } ; _eefa , _feg , _ggae , _efe := _aaee . extractPageText ( string ( _fbdb ) , _eae , _baac . Mult ( _dedb ) , _fagb + 1 , false ) ;
if _efe != nil { _ga . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _efe ) ; return _efe ; } ; _bad = textResult { * _eefa , _feg , _ggae } ; _aaee . _baa [ _deba . String ( ) ] = _bad ; } ; _egeg . _gafa = _ef . CTM ; if _fdbg { _ga . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _egeg . _gafa ) ;
} ; _dge . _aebf = append ( _dge . _aebf , _bad . _egcbc . _aebf ... ) ; _dge . _efea = append ( _dge . _efea , _bad . _egcbc . _efea ... ) ; _dge . _ebafc = append ( _dge . _ebafc , _bad . _egcbc . _ebafc ... ) ; _adcg . _abd += _bad . _dcga ; _adcg . _bafdf += _bad . _bcg ; case "\u0072\u0067" , "\u0067" , "\u006b" , "\u0063\u0073" , "\u0073\u0063" , "\u0073\u0063\u006e" : _bce . _bace . ColorspaceNonStroking = _ef . ColorspaceNonStroking ;
_bce . _bace . ColorNonStroking = _ef . ColorNonStroking ; case "\u0052\u0047" , "\u0047" , "\u004b" , "\u0043\u0053" , "\u0053\u0043" , "\u0053\u0043\u004e" : _bce . _bace . ColorspaceStroking = _ef . ColorspaceStroking ; _bce . _bace . ColorStroking = _ef . ColorStroking ; } ; return nil ;
} ) ; _fca = _bafd . Process ( _cdg ) ; if _aaee . _cf != nil && _aaee . _cf . IncludeAnnotations && ! _adb { for _ , _ccc := range _aaee . _bg { _cgeg , _eaeg := _add . GetDict ( _ccc . AP ) ; if ! _eaeg { continue ; } ; _dad , _eaeg := _cgeg . Get ( "\u004e" ) . ( * _add . PdfObjectStream ) ;
if ! _eaeg { continue ; } ; _fcfc , _bcfd := _add . DecodeStream ( _dad ) ; if _bcfd != nil { _ga . Log . Debug ( "\u0045\u0072\u0072\u006f\u0072\u0020\u006f\u006e\u0020\u0064\u0065c\u006f\u0064\u0065\u0020\u0073\u0074\u0072\u0065\u0061\u006d:\u0020\u0025\u0076" , _bcfd ) ;
continue ; } ; _beca := _dad . PdfObjectDictionary . Get ( "\u0052e\u0073\u006f\u0075\u0072\u0063\u0065s" ) ; _efd , _bcfd := _ba . NewPdfPageResourcesFromDict ( _beca . ( * _add . PdfObjectDictionary ) ) ; if _bcfd != nil { _ga . Log . Debug ( "\u0045\u0072\u0072\u006f\u0072 \u006f\u006e\u0020\u0067\u0065\u0074\u0074\u0069\u006e\u0067\u0020\u0061\u006en\u006f\u0074\u0061\u0074\u0069\u006f\u006e\u0020\u0072\u0065\u0073\u006f\u0075\u0072\u0063\u0065\u0073\u003a\u0020\u0025\u0076" , _bcfd ) ;
continue ; } ; _bfd := _agf . IdentityMatrix ( ) ; _gdcf , _eaeg := _dad . PdfObjectDictionary . Get ( "\u004d\u0061\u0074\u0072\u0069\u0078" ) . ( * _add . PdfObjectArray ) ; if _eaeg { _feff , _cgb := _gdcf . GetAsFloat64Slice ( ) ; if _cgb != nil { _ga . Log . Debug ( "\u0045\u0072\u0072or\u0020\u006f\u006e\u0020\u0067\u0065\u0074\u0074\u0069n\u0067 \u0066l\u006fa\u0074\u0036\u0034\u0020\u0073\u006c\u0069\u0063\u0065\u003a\u0020\u0025\u0076" , _cgb ) ;
continue ; } ; if len ( _feff ) != 6 { _ga . Log . Debug ( "I\u006e\u0076\u0061\u006c\u0069\u0064 \u006d\u0061\u0074\u0072\u0069\u0078\u0020\u0073\u006ci\u0063\u0065\u0020l\u0065n\u0067\u0074\u0068" ) ; continue ; } ; _bfd = _agf . NewMatrix ( _feff [ 0 ] , _feff [ 1 ] , _feff [ 2 ] , _feff [ 3 ] , _feff [ 4 ] , _feff [ 5 ] ) ;
} ; _bcac , _eaeg := _aaee . _gfe [ _dad . String ( ) ] ; if ! _eaeg { _ecb , _edeb , _dgc , _edcc := _aaee . extractPageText ( string ( _fcfc ) , _efd , _bfd , _fagb + 1 , true ) ; if _edcc != nil { _ga . Log . Debug ( "\u0045\u0052R\u004f\u0052\u0020\u0065x\u0074\u0072a\u0063\u0074\u0069\u006e\u0067\u0020\u0061\u006en\u006f\u0074\u0061\u0074\u0069\u006f\u006e\u0020\u0074\u0065\u0078\u0074s\u003a\u0020\u0025\u0076" , _edcc ) ;
continue ; } ; _bcac = textResult { * _ecb , _edeb , _dgc } ; _aaee . _gfe [ _dad . String ( ) ] = _bcac ; } ; _dge . _aebf = append ( _dge . _aebf , _bcac . _egcbc . _aebf ... ) ; _dge . _efea = append ( _dge . _efea , _bcac . _egcbc . _efea ... ) ; _dge . _ebafc = append ( _dge . _ebafc , _bcac . _egcbc . _ebafc ... ) ;
_adcg . _abd += _bcac . _dcga ; _adcg . _bafdf += _bcac . _bcg ; } ; } ; return _dge , _adcg . _abd , _adcg . _bafdf , _fca ; } ; func _gfgc ( _gcac , _gebbg * textPara ) bool { if _gcac . _cddef || _gebbg . _cddef { return true ; } ; return _dbeae ( _gcac . depth ( ) - _gebbg . depth ( ) ) ;
} ; func ( _bdbf * textObject ) moveLP ( _ebg , _bdf float64 ) { _bdbf . _dbc . Concat ( _agf . NewMatrix ( 1 , 0 , 0 , 1 , _ebg , _bdf ) ) ; _bdbf . _eefe = _bdbf . _dbc ; } ; func _ggee ( _aggec _add . PdfObject , _eceb _gff . Color ) ( _ag . Image , error ) { _bdfbb , _gbfce := _add . GetStream ( _aggec ) ;
if ! _gbfce { return nil , nil ; } ; _ecfbg , _bccfe := _ba . NewXObjectImageFromStream ( _bdfbb ) ; if _bccfe != nil { return nil , _bccfe ; } ; _dffc , _bccfe := _ecfbg . ToImage ( ) ; if _bccfe != nil { return nil , _bccfe ; } ; return _bdefa ( _dffc , _eceb ) , nil ; } ; type pathSection struct { _dbdc [ ] * subpath ;
_gff . Color ; } ; func _edgg ( _fcfe * paraList ) map [ int ] [ ] * textLine { _fecg := map [ int ] [ ] * textLine { } ; for _ , _bcbc := range * _fcfe { for _ , _bdaa := range _bcbc . _fdec { if ! _aada ( _bdaa ) { _ga . Log . Debug ( "g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e" ) ;
continue ; } ; _fggag := _bdaa . _cdcg [ 0 ] . _abcee [ 0 ] . _ebbb ; _fecg [ _fggag ] = append ( _fecg [ _fggag ] , _bdaa ) ; } ; if _bcbc . _fbbea != nil { _afeb := _bcbc . _fbbea . _egfea ; for _ , _gbfd := range _afeb { for _ , _aacdb := range _gbfd . _fdec { if ! _aada ( _aacdb ) { _ga . Log . Debug ( "g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e" ) ;
continue ; } ; _dfbdc := _aacdb . _cdcg [ 0 ] . _abcee [ 0 ] . _ebbb ; _fecg [ _dfbdc ] = append ( _fecg [ _dfbdc ] , _aacdb ) ; } ; } ; } ; } ; return _fecg ; } ; func ( _aaed * textPara ) bbox ( ) _ba . PdfRectangle { return _aaed . PdfRectangle } ; type textMark struct { _ba . PdfRectangle ; _gfbg int ;
_bfdb string ; _gccf string ; _ccdg * _ba . PdfFont ; _gccfd float64 ; _gccd float64 ; _aaccc _agf . Matrix ; _acdgd _agf . Point ; _dded _ba . PdfRectangle ; _fbaf _gff . Color ; _eaee _gff . Color ; _bbgf _add . PdfObject ; _fcbc [ ] string ; Tw float64 ; Th float64 ; _ebbb int ;
_aeaee int ; } ; func _fdaf ( _cfg , _aabc _agf . Point ) bool { _aebg := _gf . Abs ( _cfg . X - _aabc . X ) ; _aabd := _gf . Abs ( _cfg . Y - _aabc . Y ) ; return _caeb ( _aabd , _aebg ) ; } ; func ( _eabc * stateStack ) push ( _accg * textState ) { _dfd := * _accg ; * _eabc = append ( * _eabc , & _dfd ) } ;
func ( _fafc paraList ) findTextTables ( ) [ ] * textTable { var _cabec [ ] * textTable ; for _ , _aefb := range _fafc { if _aefb . taken ( ) || _aefb . Width ( ) == 0 { continue ; } ; _egdeb := _aefb . isAtom ( ) ; if _egdeb == nil { continue ; } ; _egdeb . growTable ( ) ; if _egdeb . _eacg * _egdeb . _cgae < _ggca { continue ;
} ; _egdeb . markCells ( ) ; _egdeb . log ( "\u0067\u0072\u006fw\u006e" ) ; _cabec = append ( _cabec , _egdeb ) ; } ; return _cabec ; } ; func ( _dafg paraList ) sortTopoOrder ( ) { _gcfe := _dafg . topoOrder ( ) ; _dafg . reorder ( _gcfe ) } ; func ( _cbaab * shapesState ) stroke ( _agbf * [ ] pathSection ) { _dfgb := pathSection { _dbdc : _cbaab . _gfce , Color : _cbaab . _edf . getStrokeColor ( ) } ;
* _agbf = append ( * _agbf , _dfgb ) ; if _adgbf { _agc . Printf ( "\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , len ( * _agbf ) , _cbaab , _cbaab . _edf . getStrokeColor ( ) , _dfgb . bbox ( ) ) ;
if _ebce { for _dbcb , _dbfb := range _cbaab . _gfce { _agc . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _dbcb , _dbfb ) ; if _dbcb == 10 { break ; } ; } ; } ; } ; } ; type textObject struct { _cdb * Extractor ; _dbg * _ba . PdfPageResources ; _bace _fc . GraphicsState ;
_efed * textState ; _fgbf * stateStack ; _eefe _agf . Matrix ; _dbc _agf . Matrix ; _cfde [ ] * textMark ; _dgcf bool ; } ; func ( _cbf * stateStack ) empty ( ) bool { return len ( * _cbf ) == 0 } ; func ( _cecec rulingList ) snapToGroupsDirection ( ) rulingList { _cecec . sortStrict ( ) ;
_bfcbgb := make ( map [ * ruling ] rulingList , len ( _cecec ) ) ; _aeff := _cecec [ 0 ] ; _ecadgb := func ( _ggbad * ruling ) { _aeff = _ggbad ; _bfcbgb [ _aeff ] = rulingList { _ggbad } } ; _ecadgb ( _cecec [ 0 ] ) ; for _ , _gffbe := range _cecec [ 1 : ] { if _gffbe . _edga < _aeff . _edga - _cbdb { _ga . Log . Error ( "\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073" , _aeff , _gffbe ) ;
} ; if _gffbe . _edga > _aeff . _edga + _gbb { _ecadgb ( _gffbe ) ; } else { _bfcbgb [ _aeff ] = append ( _bfcbgb [ _aeff ] , _gffbe ) ; } ; } ; _daac := make ( map [ * ruling ] float64 , len ( _bfcbgb ) ) ; _ecaaa := make ( map [ * ruling ] * ruling , len ( _cecec ) ) ; for _ggeb , _ecga := range _bfcbgb { _daac [ _ggeb ] = _ecga . mergePrimary ( ) ;
for _ , _cbda := range _ecga { _ecaaa [ _cbda ] = _ggeb ; } ; } ; for _ , _dffff := range _cecec { _dffff . _edga = _daac [ _ecaaa [ _dffff ] ] ; } ; _gbef := make ( rulingList , 0 , len ( _cecec ) ) ; for _ , _decfa := range _bfcbgb { _agee := _decfa . splitSec ( ) ; for _caded , _gaga := range _agee { _cada := _gaga . merge ( ) ;
if len ( _gbef ) > 0 { _bgfa := _gbef [ len ( _gbef ) - 1 ] ; if _bgfa . alignsPrimary ( _cada ) && _bgfa . alignsSec ( _cada ) { _ga . Log . Error ( "\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073" , _caded , _bgfa , _cada ) ;
continue ; } ; } ; _gbef = append ( _gbef , _cada ) ; } ; } ; _gbef . sortStrict ( ) ; return _gbef ; } ; func _acfa ( _acfb [ ] * textLine , _edcg , _egfb float64 ) [ ] * textLine { var _aacc [ ] * textLine ; for _ , _ddfg := range _acfb { if _edcg == - 1 { if _ddfg . _bbfg > _egfb { _aacc = append ( _aacc , _ddfg ) ;
} ; } else { if _ddfg . _bbfg > _egfb && _ddfg . _bbfg < _edcg { _aacc = append ( _aacc , _ddfg ) ; } ; } ; } ; return _aacc ; } ; func _ccab ( _adabe * PageText ) error { _dgdge := _ce . GetLicenseKey ( ) ; if _dgdge != nil && _dgdge . IsLicensed ( ) || _de { return nil ; } ; _agc . Printf ( "\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a" ) ;
_agc . Println ( "-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f" ) ;
return _b . New ( "\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064" ) ; } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
type ImageExtractOptions struct { IncludeInlineStencilMasks bool ; } ; func ( _eacd compositeCell ) split ( _aabb , _dedd [ ] float64 ) * textTable { _cebe := len ( _aabb ) + 1 ; _dbgd := len ( _dedd ) + 1 ; if _efda { _ga . Log . Info ( "\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a" + "\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066" , _dbgd , _cebe , _eacd , _aabb , _dedd ) ;
_agc . Printf ( "\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a" , len ( _eacd . paraList ) ) ; for _feca , _cbdbe := range _eacd . paraList { _agc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _feca , _cbdbe . String ( ) ) ;
} ; _agc . Printf ( "\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a" , len ( _eacd . lines ( ) ) ) ; for _gfad , _adcbe := range _eacd . lines ( ) { _agc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _gfad , _adcbe ) ; } ; } ; _aabb = _gecgf ( _aabb , _eacd . Ury , _eacd . Lly ) ;
_dedd = _gecgf ( _dedd , _eacd . Llx , _eacd . Urx ) ; _afbcf := make ( map [ uint64 ] * textPara , _dbgd * _cebe ) ; _fdbc := textTable { _eacg : _dbgd , _cgae : _cebe , _egfea : _afbcf } ; _dedfb := _eacd . paraList ; _e . Slice ( _dedfb , func ( _ebbe , _facgf int ) bool { _gccc , _ggdff := _dedfb [ _ebbe ] , _dedfb [ _facgf ] ;
_dgggc , _cded := _gccc . Lly , _ggdff . Lly ; if _dgggc != _cded { return _dgggc < _cded ; } ; return _gccc . Llx < _ggdff . Llx ; } ) ; _addcd := make ( map [ uint64 ] _ba . PdfRectangle , _dbgd * _cebe ) ; for _cece , _cfadd := range _aabb [ 1 : ] { _bedc := _aabb [ _cece ] ; for _ecfe , _edefe := range _dedd [ 1 : ] { _cggbf := _dedd [ _ecfe ] ;
_addcd [ _bgcc ( _ecfe , _cece ) ] = _ba . PdfRectangle { Llx : _cggbf , Urx : _edefe , Lly : _cfadd , Ury : _bedc } ; } ; } ; if _efda { _ga . Log . Info ( "\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073" ) ;
_agc . Printf ( "\u0020\u0020\u0020\u0020" ) ; for _ccddfa := 0 ; _ccddfa < _dbgd ; _ccddfa ++ { _agc . Printf ( "\u0025\u0033\u0030\u0064\u002c\u0020" , _ccddfa ) ; } ; _agc . Println ( ) ; for _gaadg := 0 ; _gaadg < _cebe ; _gaadg ++ { _agc . Printf ( "\u0020\u0020\u0025\u0032\u0064\u003a" , _gaadg ) ;
for _bfbgf := 0 ; _bfbgf < _dbgd ; _bfbgf ++ { _agc . Printf ( "\u00256\u002e\u0032\u0066\u002c\u0020" , _addcd [ _bgcc ( _bfbgf , _gaadg ) ] ) ; } ; _agc . Println ( ) ; } ; } ; _babdg := func ( _bbgc * textLine ) ( int , int ) { for _eddc := 0 ; _eddc < _cebe ; _eddc ++ { for _bfbge := 0 ; _bfbge < _dbgd ;
_bfbge ++ { if _gcdb ( _addcd [ _bgcc ( _bfbge , _eddc ) ] , _bbgc . PdfRectangle ) { return _bfbge , _eddc ; } ; } ; } ; return - 1 , - 1 ; } ; _dgbc := make ( map [ uint64 ] [ ] * textLine , _dbgd * _cebe ) ; for _ , _afeec := range _dedfb . lines ( ) { _dabfe , _efgbe := _babdg ( _afeec ) ; if _dabfe < 0 { continue ;
} ; _dgbc [ _bgcc ( _dabfe , _efgbe ) ] = append ( _dgbc [ _bgcc ( _dabfe , _efgbe ) ] , _afeec ) ; } ; for _eagc := 0 ; _eagc < len ( _aabb ) - 1 ; _eagc ++ { _ccgb := _aabb [ _eagc ] ; _ecffa := _aabb [ _eagc + 1 ] ; for _gccgb := 0 ; _gccgb < len ( _dedd ) - 1 ; _gccgb ++ { _beef := _dedd [ _gccgb ] ;
_gbbb := _dedd [ _gccgb + 1 ] ; _bcef := _ba . PdfRectangle { Llx : _beef , Urx : _gbbb , Lly : _ecffa , Ury : _ccgb } ; _baeg := _dgbc [ _bgcc ( _gccgb , _eagc ) ] ; if len ( _baeg ) == 0 { continue ; } ; _fecaa := _bfgg ( _bcef , _baeg ) ; _fdbc . put ( _gccgb , _eagc , _fecaa ) ; } ; } ; return & _fdbc ;
} ; func _acdc ( _agaea * list ) [ ] * textLine { for _ , _gecd := range _agaea . _abcc { switch _gecd . _cdde { case "\u004c\u0042\u006fd\u0079" : if len ( _gecd . _aebd ) != 0 { return _gecd . _aebd ; } ; return _acdc ( _gecd ) ; case "\u0053\u0070\u0061\u006e" : return _gecd . _aebd ;
case "I\u006e\u006c\u0069\u006e\u0065\u0053\u0068\u0061\u0070\u0065" : return _gecd . _aebd ; } ; } ; return nil ; } ; func ( _dfee * textTable ) depth ( ) float64 { _efcg := 1e10 ; for _bacea := 0 ; _bacea < _dfee . _eacg ; _bacea ++ { _adff := _dfee . get ( _bacea , 0 ) ; if _adff == nil || _adff . _cddef { continue ;
} ; _efcg = _gf . Min ( _efcg , _adff . depth ( ) ) ; } ; return _efcg ; } ; func ( _efdea paraList ) findGridTables ( _fbae [ ] gridTiling ) [ ] * textTable { if _efda { _ga . Log . Info ( "\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073" , len ( _efdea ) ) ;
for _egfgd , _efdd := range _efdea { _agc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _egfgd , _efdd ) ; } ; } ; var _fedg [ ] * textTable ; for _bfcbc , _abdab := range _fbae { _dafa , _gfcgg := _efdea . findTableGrid ( _abdab ) ; if _dafa != nil { _dafa . log ( _agc . Sprintf ( "\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064" , _bfcbc ) ) ;
_fedg = append ( _fedg , _dafa ) ; _dafa . markCells ( ) ; } ; for _dgbbe := range _gfcgg { _dgbbe . _gecb = true ; } ; } ; if _efda { _ga . Log . Info ( "\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s" , len ( _fedg ) ) ;
} ; return _fedg ; } ; func _caac ( _ggcb _add . PdfObject , _afdad _gff . Color ) ( _ag . Image , error ) { _geabd , _aecc := _add . GetStream ( _ggcb ) ; if ! _aecc { return nil , nil ; } ; _dfgad , _badg := _ba . NewXObjectImageFromStream ( _geabd ) ; if _badg != nil { return nil , _badg ;
} ; _dcdeb , _badg := _dfgad . ToImage ( ) ; if _badg != nil { return nil , _badg ; } ; return _ggfef ( _dcdeb , _afdad ) , nil ; } ; func ( _ebbd * textTable ) get ( _bedgb , _gcag int ) * textPara { return _ebbd . _egfea [ _bgcc ( _bedgb , _gcag ) ] } ; func _ffdg ( _egbb , _adeg _agf . Point ) rulingKind { _cbdd := _gf . Abs ( _egbb . X - _adeg . X ) ;
_afcgg := _gf . Abs ( _egbb . Y - _adeg . Y ) ; return _cbggc ( _cbdd , _afcgg , _ggdg ) ; } ; func _fbbab ( _aaa , _aadb _ba . PdfRectangle ) ( _ba . PdfRectangle , bool ) { if ! _gabc ( _aaa , _aadb ) { return _ba . PdfRectangle { } , false ; } ; return _ba . PdfRectangle { Llx : _gf . Max ( _aaa . Llx , _aadb . Llx ) , Urx : _gf . Min ( _aaa . Urx , _aadb . Urx ) , Lly : _gf . Max ( _aaa . Lly , _aadb . Lly ) , Ury : _gf . Min ( _aaa . Ury , _aadb . Ury ) } , true ;
} ; func _aadbb ( _dace [ ] TextMark , _dbbcab * int , _eebg TextMark ) [ ] TextMark { _eebg . Offset = * _dbbcab ; _dace = append ( _dace , _eebg ) ; * _dbbcab += len ( _eebg . Text ) ; return _dace ; } ; func ( _edde * subpath ) add ( _bdgc ... _agf . Point ) { _edde . _aeee = append ( _edde . _aeee , _bdgc ... ) } ;
func _cebgb ( _daea [ ] compositeCell ) [ ] float64 { var _bbea [ ] * textLine ; _cgba := 0 ; for _ , _aadeg := range _daea { _cgba += len ( _aadeg . paraList ) ; _bbea = append ( _bbea , _aadeg . lines ( ) ... ) ; } ; _e . Slice ( _bbea , func ( _cacb , _ddcc int ) bool { _feeg , _cdgf := _bbea [ _cacb ] , _bbea [ _ddcc ] ;
_bebc , _dabe := _feeg . _bbfg , _cdgf . _bbfg ; if ! _dbeae ( _bebc - _dabe ) { return _bebc < _dabe ; } ; return _feeg . Llx < _cdgf . Llx ; } ) ; if _efda { _agc . Printf ( "\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a" , _cgba , len ( _bbea ) ) ;
for _fgddg , _efdeg := range _bbea { _agc . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _fgddg , _efdeg ) ; } ; } ; var _agdd [ ] float64 ; _bggad := _bbea [ 0 ] ; var _dgeg [ ] [ ] * textLine ; _eecf := [ ] * textLine { _bggad } ; for _eafc , _dgge := range _bbea [ 1 : ] { if _dgge . Ury < _bggad . Lly { _eead := 0.5 * ( _dgge . Ury + _bggad . Lly ) ;
if _efda { _agc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a" + "\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a" , _eafc , _dgge . Ury , _bggad . Lly , _eead , _bggad , _dgge ) ;
} ; _agdd = append ( _agdd , _eead ) ; _dgeg = append ( _dgeg , _eecf ) ; _eecf = nil ; } ; _eecf = append ( _eecf , _dgge ) ; if _dgge . Lly < _bggad . Lly { _bggad = _dgge ; } ; } ; if len ( _eecf ) > 0 { _dgeg = append ( _dgeg , _eecf ) ; } ; if _efda { _agc . Printf ( " \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a" , _agdd ) ;
} ; if _efda { _ga . Log . Info ( "\u0072\u006f\u0077\u003d\u0025\u0064" , len ( _daea ) ) ; for _bacdg , _adbg := range _daea { _agc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bacdg , _adbg ) ; } ; _ga . Log . Info ( "\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d" , len ( _dgeg ) ) ;
for _aefeaa , _bbff := range _dgeg { _agc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a" , _aefeaa , len ( _bbff ) ) ; for _bgcedd , _bdef := range _bbff { _agc . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _bgcedd , _bdef ) ; } ; } ; } ; _ebdbc := true ;
for _cdag , _bccg := range _dgeg { _bbdea := true ; for _bcag , _edac := range _daea { if _efda { _agc . Printf ( "\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a" , _cdag , len ( _dgeg ) , _bcag , len ( _daea ) , _edac ) ;
} ; if ! _edac . hasLines ( _bccg ) { if _efda { _agc . Printf ( "\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a" , _cdag , len ( _dgeg ) , _bcag , len ( _daea ) ) ;
} ; _bbdea = false ; break ; } ; } ; if ! _bbdea { _ebdbc = false ; break ; } ; } ; if ! _ebdbc { if _efda { _ga . Log . Info ( "\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg" ) ;
} ; _agdd = nil ; } ; if _efda && _agdd != nil { _agc . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a" , _agdd ) ; } ; return _agdd ; } ;
func ( _bgee * textPara ) toCellTextMarks ( _bafe * int ) [ ] TextMark { var _dfea [ ] TextMark ; for _edcgb , _fbcg := range _bgee . _fdec { _cbgf := _fbcg . toTextMarks ( _bafe ) ; _cgdcg := _efgbd && _fbcg . endsInHyphen ( ) && _edcgb != len ( _bgee . _fdec ) - 1 ; if _cgdcg { _cbgf = _bgaca ( _cbgf , _bafe ) ;
} ; _dfea = append ( _dfea , _cbgf ... ) ; if ! ( _cgdcg || _edcgb == len ( _bgee . _fdec ) - 1 ) { _dfea = _ffeg ( _dfea , _bafe , _beaa ( _fbcg . _bbfg , _bgee . _fdec [ _edcgb + 1 ] . _bbfg ) ) ; } ; } ; return _dfea ; } ; func ( _afacc compositeCell ) hasLines ( _gaeaa [ ] * textLine ) bool { for _fbebg , _bacc := range _gaeaa { _eac := _gabc ( _afacc . PdfRectangle , _bacc . PdfRectangle ) ;
if _efda { _agc . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a" , _eac , _fbebg , len ( _gaeaa ) ) ; _agc . Printf ( "\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a" , _afacc ) ;
_agc . Printf ( "\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a" , _bacc ) ; } ; if _eac { return true ; } ; } ; return false ; } ; func ( _ddabb * textTable ) getDown ( ) paraList { _aeca := make ( paraList , _ddabb . _eacg ) ; for _gcgd := 0 ; _gcgd < _ddabb . _eacg ;
_gcgd ++ { _afff := _ddabb . get ( _gcgd , _ddabb . _cgae - 1 ) . _ecada ; if _afff . taken ( ) { return nil ; } ; _aeca [ _gcgd ] = _afff ; } ; for _dbac := 0 ; _dbac < _ddabb . _eacg - 1 ; _dbac ++ { if _aeca [ _dbac ] . _fdgf != _aeca [ _dbac + 1 ] { return nil ; } ; } ; return _aeca ; } ; func _ggfef ( _cgefc * _ba . Image , _dfac _gff . Color ) _ag . Image { _aacbc , _dbgg := int ( _cgefc . Width ) , int ( _cgefc . Height ) ;
_ecgd := _ag . NewRGBA ( _ag . Rect ( 0 , 0 , _aacbc , _dbgg ) ) ; for _defbc := 0 ; _defbc < _dbgg ; _defbc ++ { for _fbca := 0 ; _fbca < _aacbc ; _fbca ++ { _dcbb , _eabeb := _cgefc . ColorAt ( _fbca , _defbc ) ; if _eabeb != nil { _ga . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063o\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0072\u0065\u0074\u0072\u0069\u0065v\u0065 \u0069\u006d\u0061\u0067\u0065\u0020m\u0061\u0073\u006b\u0020\u0076\u0061\u006cu\u0065\u0020\u0061\u0074\u0020\u0028\u0025\u0064\u002c\u0020\u0025\u0064\u0029\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006da\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063t\u002e" , _fbca , _defbc ) ;
continue ; } ; _acbc , _edec , _dedg , _ := _dcbb . RGBA ( ) ; var _eefba _gff . Color ; if _acbc + _edec + _dedg == 0 { _eefba = _dfac ; } else { _eefba = _gff . Transparent ; } ; _ecgd . Set ( _fbca , _defbc , _eefba ) ; } ; } ; return _ecgd ; } ; func _adgbc ( _acebed map [ int ] [ ] float64 ) [ ] int { _gegca := make ( [ ] int , len ( _acebed ) ) ;
_cface := 0 ; for _degce := range _acebed { _gegca [ _cface ] = _degce ; _cface ++ ; } ; _e . Ints ( _gegca ) ; return _gegca ; } ;
2024-03-27 22:34:33 +00:00
2024-04-16 11:40:43 +00:00
// String returns a string describing `tm`.
func ( _caag TextMark ) String ( ) string { _ddbg := _caag . BBox ; var _gabe string ; if _caag . Font != nil { _gabe = _caag . Font . String ( ) ; if len ( _gabe ) > 50 { _gabe = _gabe [ : 50 ] + "\u002e\u002e\u002e" ; } ; } ; var _age string ; if _caag . Meta { _age = "\u0020\u002a\u004d\u002a" ;
} ; return _agc . Sprintf ( "\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d" , _caag . Offset , _caag . Text , [ ] rune ( _caag . Text ) , _ddbg . Llx , _ddbg . Lly , _ddbg . Urx , _ddbg . Ury , _gabe , _age ) ;
} ; type textTable struct { _ba . PdfRectangle ; _eacg , _cgae int ; _edeg bool ; _egfea map [ uint64 ] * textPara ; _aaaga map [ uint64 ] compositeCell ; } ; func _aaeed ( _bdgbf _ba . PdfRectangle ) rulingKind { _egbfg := _bdgbf . Width ( ) ; _bgegc := _bdgbf . Height ( ) ; if _egbfg > _bgegc { if _egbfg >= _ggdg { return _aaad ;
} ; } else { if _bgegc >= _ggdg { return _cfae ; } ; } ; return _fbcc ; } ; func ( _baga * textWord ) absorb ( _cbgcd * textWord ) { _baga . PdfRectangle = _bbbafc ( _baga . PdfRectangle , _cbgcd . PdfRectangle ) ; _baga . _abcee = append ( _baga . _abcee , _cbgcd . _abcee ... ) ; } ; func ( _ebgb rulingList ) bbox ( ) _ba . PdfRectangle { var _ecbe _ba . PdfRectangle ;
if len ( _ebgb ) == 0 { _ga . Log . Error ( "r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073" ) ; return _ba . PdfRectangle { } ; } ; if _ebgb [ 0 ] . _bbce == _aaad { _ecbe . Llx , _ecbe . Urx = _ebgb . secMinMax ( ) ;
_ecbe . Lly , _ecbe . Ury = _ebgb . primMinMax ( ) ; } else { _ecbe . Llx , _ecbe . Urx = _ebgb . primMinMax ( ) ; _ecbe . Lly , _ecbe . Ury = _ebgb . secMinMax ( ) ; } ; return _ecbe ; } ; func ( _degd * textObject ) newTextMark ( _eeagb string , _afcg _agf . Matrix , _ddca _agf . Point , _gbfe float64 , _ddeg * _ba . PdfFont , _ddbgf float64 , _deff , _gagfg _gff . Color , _abde _add . PdfObject , _fegde [ ] string , _adag int , _adcd int ) ( textMark , bool ) { _fcae := _afcg . Angle ( ) ;
_eefad := _cafaa ( _fcae , _cbbd ) ; var _eccd float64 ; if _eefad % 180 != 90 { _eccd = _afcg . ScalingFactorY ( ) ; } else { _eccd = _afcg . ScalingFactorX ( ) ; } ; _feda := _eafa ( _afcg ) ; _bcgb := _ba . PdfRectangle { Llx : _feda . X , Lly : _feda . Y , Urx : _ddca . X , Ury : _ddca . Y } ;
switch _eefad % 360 { case 90 : _bcgb . Urx -= _eccd ; case 180 : _bcgb . Ury -= _eccd ; case 270 : _bcgb . Urx += _eccd ; case 0 : _bcgb . Ury += _eccd ; default : _eefad = 0 ; _bcgb . Ury += _eccd ; } ; if _bcgb . Llx > _bcgb . Urx { _bcgb . Llx , _bcgb . Urx = _bcgb . Urx , _bcgb . Llx ; } ; if _bcgb . Lly > _bcgb . Ury { _bcgb . Lly , _bcgb . Ury = _bcgb . Ury , _bcgb . Lly ;
} ; _baedf := true ; if _degd . _cdb . _cb . Width ( ) > 0 { _efca , _afgb := _fbbab ( _bcgb , _degd . _cdb . _cb ) ; if ! _afgb { _baedf = false ; _ga . Log . Debug ( "\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q" , _bcgb , _degd . _cdb . _cb , _eeagb ) ;
} ; _bcgb = _efca ; } ; _dabgb := _bcgb ; _fbea := _degd . _cdb . _cb ; switch _eefad % 360 { case 90 : _fbea . Urx , _fbea . Ury = _fbea . Ury , _fbea . Urx ; _dabgb = _ba . PdfRectangle { Llx : _fbea . Urx - _bcgb . Ury , Urx : _fbea . Urx - _bcgb . Lly , Lly : _bcgb . Llx , Ury : _bcgb . Urx } ;
case 180 : _dabgb = _ba . PdfRectangle { Llx : _fbea . Urx - _bcgb . Llx , Urx : _fbea . Urx - _bcgb . Urx , Lly : _fbea . Ury - _bcgb . Lly , Ury : _fbea . Ury - _bcgb . Ury } ; case 270 : _fbea . Urx , _fbea . Ury = _fbea . Ury , _fbea . Urx ; _dabgb = _ba . PdfRectangle { Llx : _bcgb . Ury , Urx : _bcgb . Lly , Lly : _fbea . Ury - _bcgb . Llx , Ury : _fbea . Ury - _bcgb . Urx } ;
} ; if _dabgb . Llx > _dabgb . Urx { _dabgb . Llx , _dabgb . Urx = _dabgb . Urx , _dabgb . Llx ; } ; if _dabgb . Lly > _dabgb . Ury { _dabgb . Lly , _dabgb . Ury = _dabgb . Ury , _dabgb . Lly ; } ; _cffee := textMark { _bfdb : _eeagb , PdfRectangle : _dabgb , _dded : _bcgb , _ccdg : _ddeg , _gccfd : _eccd , _gccd : _ddbgf , _aaccc : _afcg , _acdgd : _ddca , _gfbg : _eefad , _fbaf : _deff , _eaee : _gagfg , _bbgf : _abde , _fcbc : _fegde , Th : _degd . _efed . _def , Tw : _degd . _efed . _gdeg , _ebbb : _adcd , _aeaee : _adag } ;
if _bfgae { _ga . Log . Info ( "n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073" , _feda , _ddca , _cffee . String ( ) ) ; } ; return _cffee , _baedf ;
} ; func ( _bgac * textPara ) writeCellText ( _fbbg _gg . Writer ) { for _daaa , _dfcb := range _bgac . _fdec { _ccda := _dfcb . text ( ) ; _bagb := _efgbd && _dfcb . endsInHyphen ( ) && _daaa != len ( _bgac . _fdec ) - 1 ; if _bagb { _ccda = _cabf ( _ccda ) ; } ; _fbbg . Write ( [ ] byte ( _ccda ) ) ;
if ! ( _bagb || _daaa == len ( _bgac . _fdec ) - 1 ) { _fbbg . Write ( [ ] byte ( _beaa ( _dfcb . _bbfg , _bgac . _fdec [ _daaa + 1 ] . _bbfg ) ) ) ; } ; } ; } ;
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
type RenderMode int ;
// PageText represents the layout of text on a device page.
type PageText struct { _aebf [ ] * textMark ; _ggff string ; _ccca [ ] TextMark ; _gdba [ ] TextTable ; _bbdc _ba . PdfRectangle ; _efea [ ] pathSection ; _ebafc [ ] pathSection ; _abge * _add . PdfObject ; _abad _add . PdfObject ; _cde * _fc . ContentStreamOperations ; _eaag PageTextOptions ;
} ; func ( _fegb * textObject ) setTextLeading ( _fbfd float64 ) { if _fegb == nil { return ; } ; _fegb . _efed . _bfa = _fbfd ; } ; type wordBag struct { _ba . PdfRectangle ; _cdea float64 ; _febe , _cgdae rulingList ; _ebgd float64 ; _fcgd map [ int ] [ ] * textWord ; } ;
// String returns a description of `v`.
func ( _ebag * ruling ) String ( ) string { if _ebag . _bbce == _fbcc { return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047" ; } ; _ggba , _ccbd := "\u0078" , "\u0079" ; if _ebag . _bbce == _aaad { _ggba , _ccbd = "\u0079" , "\u0078" ; } ; _gbdd := "" ; if _ebag . _ebcee != 0.0 { _gbdd = _agc . Sprintf ( " \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066" , _ebag . _ebcee ) ;
} ; return _agc . Sprintf ( "\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073" , _ebag . _bbce , _ggba , _ebag . _edga , _ccbd , _ebag . _fcec , _ebag . _abeg , _ebag . _abeg - _ebag . _fcec , _ebag . _ccaa , _ebag . Color , _gbdd ) ;
} ; func _bafa ( _faabf , _dfeb bounded ) float64 { _addba := _eddba ( _faabf , _dfeb ) ; if ! _dbeae ( _addba ) { return _addba ; } ; return _abag ( _faabf , _dfeb ) ; } ; func ( _fda * shapesState ) lineTo ( _ggdd , _gcbfg float64 ) { if _fdbg { _ga . Log . Info ( "\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066" , _ggdd , _gcbfg , _fda . devicePoint ( _ggdd , _gcbfg ) ) ;
} ; _fda . addPoint ( _ggdd , _gcbfg ) ; } ; func ( _fga * imageExtractContext ) extractContentStreamImages ( _bf string , _bb * _ba . PdfPageResources ) error { _ccf := _fc . NewContentStreamParser ( _bf ) ; _dce , _bfe := _ccf . Parse ( ) ; if _bfe != nil { return _bfe ; } ; if _fga . _ca == nil { _fga . _ca = map [ * _add . PdfObjectStream ] * cachedImage { } ;
} ; if _fga . _gce == nil { _fga . _gce = & ImageExtractOptions { } ; } ; _edd := _fc . NewContentStreamProcessor ( * _dce ) ; _edd . AddHandler ( _fc . HandlerConditionEnumAllOperands , "" , _fga . processOperand ) ; return _edd . Process ( _bb ) ; } ; func _ecbcb ( _dcca map [ float64 ] map [ float64 ] gridTile ) [ ] float64 { _efbga := make ( [ ] float64 , 0 , len ( _dcca ) ) ;
for _cecg := range _dcca { _efbga = append ( _efbga , _cecg ) ; } ; _e . Float64s ( _efbga ) ; _gcfbd := len ( _efbga ) ; for _gbea := 0 ; _gbea < _gcfbd / 2 ; _gbea ++ { _efbga [ _gbea ] , _efbga [ _gcfbd - 1 - _gbea ] = _efbga [ _gcfbd - 1 - _gbea ] , _efbga [ _gbea ] ; } ; return _efbga ; } ; func ( _gdgb paraList ) applyTables ( _gebbf [ ] * textTable ) paraList { var _aefgd paraList ;
for _ , _ecaeb := range _gebbf { _aefgd = append ( _aefgd , _ecaeb . newTablePara ( ) ) ; } ; for _ , _fecc := range _gdgb { if _fecc . _gecb { continue ; } ; _aefgd = append ( _aefgd , _fecc ) ; } ; return _aefgd ; } ; func ( _eeage gridTile ) complete ( ) bool { return _eeage . numBorders ( ) == 4 } ;
func ( _gbg * textObject ) setWordSpacing ( _fded float64 ) { if _gbg == nil { return ; } ; _gbg . _efed . _gdeg = _fded ; } ; func _ecaea ( _acbf [ ] * textLine ) [ ] * textLine { _fced := [ ] * textLine { } ; for _ , _acebe := range _acbf { _dgfc := _acebe . text ( ) ; _gfdb := _cffd . Find ( [ ] byte ( _dgfc ) ) ;
if _gfdb != nil { _fced = append ( _fced , _acebe ) ; } ; } ; return _fced ; } ; var _ccacg = _d . MustCompile ( "\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024" ) ; func _cage ( _ddea * _fc . ContentStreamOperation ) ( float64 , error ) { if len ( _ddea . Params ) != 1 { _gcff := _b . New ( "\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et" ) ;
_ga . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076" , _ddea . Operand , 1 , len ( _ddea . Params ) , _ddea . Params ) ;
return 0.0 , _gcff ; } ; return _add . GetNumberAsFloat ( _ddea . Params [ 0 ] ) ; } ;
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
func ( _gfa * TextMarkArray ) BBox ( ) ( _ba . PdfRectangle , bool ) { var _adbf _ba . PdfRectangle ; _bgdb := false ; for _ , _eadd := range _gfa . _ffca { if _eadd . Meta || _efcbe ( _eadd . Text ) { continue ; } ; if _bgdb { _adbf = _bbbafc ( _adbf , _eadd . BBox ) ; } else { _adbf = _eadd . BBox ;
_bgdb = true ; } ; } ; return _adbf , _bgdb ; } ; type structElement struct { _bbag string ; _abff [ ] structElement ; _cfcb int64 ; _eada _add . PdfObject ; } ; func _fbbd ( _fgaf * list , _efedf * string ) string { _bbeb := _f . Split ( _fgaf . _ddef , "\u000a" ) ; _cabd := & _f . Builder { } ;
for _ , _cafae := range _bbeb { if _cafae != "" { _cabd . WriteString ( * _efedf ) ; _cabd . WriteString ( _cafae ) ; _cabd . WriteString ( "\u000a" ) ; } ; } ; return _cabd . String ( ) ; } ; func ( _dcac * shapesState ) quadraticTo ( _cdee , _beaf , _dcce , _agcf float64 ) { if _fdbg { _ga . Log . Info ( "\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a" ) ;
} ; _dcac . addPoint ( _dcce , _agcf ) ; } ; func _bgaca ( _bfgag [ ] TextMark , _acfed * int ) [ ] TextMark { _adgg := _bfgag [ len ( _bfgag ) - 1 ] ; _badef := [ ] rune ( _adgg . Text ) ; if len ( _badef ) == 1 { _bfgag = _bfgag [ : len ( _bfgag ) - 1 ] ; _egfbg := _bfgag [ len ( _bfgag ) - 1 ] ; * _acfed = _egfbg . Offset + len ( _egfbg . Text ) ;
} else { _gbgcg := _cabf ( _adgg . Text ) ; * _acfed += len ( _gbgcg ) - len ( _adgg . Text ) ; _adgg . Text = _gbgcg ; } ; return _bfgag ; } ; func ( _bffa * textObject ) showText ( _bbab _add . PdfObject , _egbd [ ] byte , _dgd int ) error { return _bffa . renderText ( _bbab , _egbd , _dgd ) ;
} ; func _agde ( _aagff [ ] pathSection ) rulingList { _beebf ( _aagff ) ; if _adgbf { _ga . Log . Info ( "\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073" , len ( _aagff ) ) ;
} ; var _egga rulingList ; for _ , _dafe := range _aagff { for _ , _dbbdg := range _dafe . _dbdc { if len ( _dbbdg . _aeee ) < 2 { continue ; } ; _dcff := _dbbdg . _aeee [ 0 ] ; for _ , _ecgbb := range _dbbdg . _aeee [ 1 : ] { if _ddag , _geea := _eacc ( _dcff , _ecgbb , _dafe . Color ) ; _geea { _egga = append ( _egga , _ddag ) ;
} ; _dcff = _ecgbb ; } ; } ; } ; if _adgbf { _ga . Log . Info ( "m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073" , _egga ) ; } ; return _egga ; } ; func ( _ccdc * textPara ) text ( ) string { _affg := new ( _dc . Buffer ) ; _ccdc . writeText ( _affg ) ;
return _affg . String ( ) ; } ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
//
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
type TextMark struct {
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Text is the extracted text.
Text string ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// BBox is the bounding box of the text.
2024-04-16 11:40:43 +00:00
BBox _ba . PdfRectangle ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Font is the font the text was drawn with.
2024-04-16 11:40:43 +00:00
Font * _ba . PdfFont ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// FontSize is the font size the text was drawn with.
FontSize float64 ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2024-04-16 11:40:43 +00:00
FillColor _gff . Color ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2024-04-16 11:40:43 +00:00
StrokeColor _gff . Color ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Orientation is the text orientation
Orientation int ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// DirectObject is the underlying PdfObject (Text Object) that represents the visible texts. This is introduced to get
// a simple access to the TextObject in case editing or replacment of some text is needed. E.g during redaction.
2024-04-16 11:40:43 +00:00
DirectObject _add . PdfObject ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// ObjString is a decoded string operand of a text-showing operator. It has the same value as `Text` attribute except
// when many glyphs are represented with the same Text Object that contains multiple length string operand in which case
// ObjString spans more than one character string that falls in different TextMark objects.
2024-04-16 11:40:43 +00:00
ObjString [ ] string ; Tw float64 ; Th float64 ; Tc float64 ; Index int ; _efeg bool ; _cfaa * TextTable ; } ; func _cabcg ( _caae [ ] * textMark , _cagb _ba . PdfRectangle , _agbac rulingList , _ccffd [ ] gridTiling , _adea bool ) paraList { _ga . Log . Trace ( "\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066" , len ( _caae ) , _cagb ) ;
if len ( _caae ) == 0 { return nil ; } ; _bacd := _dbcba ( _caae , _cagb ) ; if len ( _bacd ) == 0 { return nil ; } ; _agbac . log ( "\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065" ) ; _fdfa , _beee := _agbac . vertsHorzs ( ) ; _aaae := _adad ( _bacd , _cagb . Ury , _fdfa , _beee ) ;
_eccg := _fbgb ( _aaae , _cagb . Ury , _fdfa , _beee ) ; _eccg = _fdgb ( _eccg ) ; _faga := make ( paraList , 0 , len ( _eccg ) ) ; for _ , _fbac := range _eccg { _eefd := _fbac . arrangeText ( ) ; if _eefd != nil { _faga = append ( _faga , _eefd ) ; } ; } ; if ! _adea && len ( _faga ) >= _ggca { _faga = _faga . extractTables ( _ccffd ) ;
} ; _faga . sortReadingOrder ( ) ; if ! _adea { _faga . sortTopoOrder ( ) ; } ; _faga . log ( "\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072" ) ; return _faga ; } ; const ( RenderModeStroke RenderMode = 1 << iota ;
RenderModeFill ; RenderModeClip ; ) ; func ( _bdad * textLine ) appendWord ( _ccbg * textWord ) { _bdad . _cdcg = append ( _bdad . _cdcg , _ccbg ) ; _bdad . PdfRectangle = _bbbafc ( _bdad . PdfRectangle , _ccbg . PdfRectangle ) ; if _ccbg . _eabbf > _bdad . _fdfb { _bdad . _fdfb = _ccbg . _eabbf ;
} ; if _ccbg . _dfagd > _bdad . _bbfg { _bdad . _bbfg = _ccbg . _dfagd ; } ; } ; func _ddbd ( _ebac _agf . Point ) * subpath { return & subpath { _aeee : [ ] _agf . Point { _ebac } } } ; func _eddaee ( _ccggd _ba . PdfColorspace , _gccdd _ba . PdfColor ) _gff . Color { if _ccggd == nil || _gccdd == nil { return _gff . Black ;
} ; _dfdgf , _ccffa := _ccggd . ColorToRGB ( _gccdd ) ; if _ccffa != nil { _ga . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073" , _gccdd , _ccggd , _ccffa ) ;
return _gff . Black ; } ; _ffcae , _debgc := _dfdgf . ( * _ba . PdfColorDeviceRGB ) ; if ! _debgc { _ga . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076" , _dfdgf ) ;
return _gff . Black ; } ; return _gff . NRGBA { R : uint8 ( _ffcae . R ( ) * 255 ) , G : uint8 ( _ffcae . G ( ) * 255 ) , B : uint8 ( _ffcae . B ( ) * 255 ) , A : uint8 ( 255 ) } ; } ; func ( _cfce rulingList ) toGrids ( ) [ ] rulingList { if _adgbf { _ga . Log . Info ( "t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073" , _cfce ) ;
} ; _afede := _cfce . intersections ( ) ; if _adgbf { _ga . Log . Info ( "\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020" , len ( _cfce ) , len ( _afede ) ) ;
for _ , _abccg := range _ggad ( _afede ) { _agc . Printf ( "\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n" , _abccg , _afede [ _abccg ] ) ; } ; } ; _aefea := make ( map [ int ] intSet , len ( _cfce ) ) ; for _dbbcg := range _cfce { _ddagc := _cfce . connections ( _afede , _dbbcg ) ; if len ( _ddagc ) > 0 { _aefea [ _dbbcg ] = _ddagc ;
} ; } ; if _adgbf { _ga . Log . Info ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064" , len ( _aefea ) ) ; for _ , _ebcd := range _ggad ( _aefea ) { _agc . Printf ( "\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n" , _ebcd , _aefea [ _ebcd ] ) ;
} ; } ; _adef := _aedcc ( len ( _cfce ) , func ( _bbfd , _gcfae int ) bool { _eeec , _dbga := len ( _aefea [ _bbfd ] ) , len ( _aefea [ _gcfae ] ) ; if _eeec != _dbga { return _eeec > _dbga ; } ; return _cfce . comp ( _bbfd , _gcfae ) ; } ) ; if _adgbf { _ga . Log . Info ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076" , _adef ) ;
} ; _dacf := [ ] [ ] int { { _adef [ 0 ] } } ; _gggc : for _ , _dfgc := range _adef [ 1 : ] { for _aeec , _bgacf := range _dacf { for _ , _cfec := range _bgacf { if _aefea [ _cfec ] . has ( _dfgc ) { _dacf [ _aeec ] = append ( _bgacf , _dfgc ) ; continue _gggc ; } ; } ; } ; _dacf = append ( _dacf , [ ] int { _dfgc } ) ;
} ; if _adgbf { _ga . Log . Info ( "\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076" , _dacf ) ; } ; _e . SliceStable ( _dacf , func ( _fafd , _caee int ) bool { return len ( _dacf [ _fafd ] ) > len ( _dacf [ _caee ] ) } ) ; for _ , _gaeed := range _dacf { _e . Slice ( _gaeed , func ( _eeeca , _fgab int ) bool { return _cfce . comp ( _gaeed [ _eeeca ] , _gaeed [ _fgab ] ) } ) ;
} ; _gdeab := make ( [ ] rulingList , len ( _dacf ) ) ; for _ffde , _acdb := range _dacf { _cbeb := make ( rulingList , len ( _acdb ) ) ; for _fabc , _ebfa := range _acdb { _cbeb [ _fabc ] = _cfce [ _ebfa ] ; } ; _gdeab [ _ffde ] = _cbeb ; } ; if _adgbf { _ga . Log . Info ( "\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076" , _gdeab ) ;
} ; var _fggagg [ ] rulingList ; for _ , _fbfdg := range _gdeab { if _dccg , _dbbef := _fbfdg . isActualGrid ( ) ; _dbbef { _fbfdg = _dccg ; _fbfdg = _fbfdg . snapToGroups ( ) ; _fggagg = append ( _fggagg , _fbfdg ) ; } ; } ; if _adgbf { _gbbc ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073" , _fggagg ) ;
_ga . Log . Info ( "\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064" , len ( _gdeab ) , len ( _fggagg ) ) ; } ; return _fggagg ; } ; func _beebf ( _fcde [ ] pathSection ) { if _dgaf < 0.0 { return ;
} ; if _adgbf { _ga . Log . Info ( "\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073" , len ( _fcde ) ) ; } ; for _beaag , _fegc := range _fcde { for _afbee , _efbbee := range _fegc . _dbdc { for _egggeg , _ffeea := range _efbbee . _aeee { _efbbee . _aeee [ _egggeg ] = _agf . Point { X : _babb ( _ffeea . X ) , Y : _babb ( _ffeea . Y ) } ;
if _adgbf { _aafe := _efbbee . _aeee [ _egggeg ] ; if ! _gaeg ( _ffeea , _aafe ) { _egfde := _agf . Point { X : _aafe . X - _ffeea . X , Y : _aafe . Y - _ffeea . Y } ; _agc . Printf ( "\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a" , _beaag , _afbee , _egggeg , _ffeea , _aafe , _egfde ) ;
} ; } ; } ; } ; } ; } ; func _acac ( _bbcg * Extractor , _fgfa * _ba . PdfPageResources , _efaa _fc . GraphicsState , _cadd * textState , _gdce * stateStack ) * textObject { return & textObject { _cdb : _bbcg , _dbg : _fgfa , _bace : _efaa , _fgbf : _gdce , _efed : _cadd , _eefe : _agf . IdentityMatrix ( ) , _dbc : _agf . IdentityMatrix ( ) } ;
} ; type textWord struct { _ba . PdfRectangle ; _dfagd float64 ; _eaae string ; _abcee [ ] * textMark ; _eabbf float64 ; _gdec bool ; } ; func _bbbafc ( _bgga , _dfda _ba . PdfRectangle ) _ba . PdfRectangle { return _ba . PdfRectangle { Llx : _gf . Min ( _bgga . Llx , _dfda . Llx ) , Lly : _gf . Min ( _bgga . Lly , _dfda . Lly ) , Urx : _gf . Max ( _bgga . Urx , _dfda . Urx ) , Ury : _gf . Max ( _bgga . Ury , _dfda . Ury ) } ;
} ; func ( _dbcf * textTable ) isExportable ( ) bool { if _dbcf . _edeg { return true ; } ; _gged := func ( _effed int ) bool { _fgec := _dbcf . get ( 0 , _effed ) ; if _fgec == nil { return false ; } ; _eabea := _fgec . text ( ) ; _bfbbf := _a . RuneCountInString ( _eabea ) ; _gbefc := _ccacg . MatchString ( _eabea ) ;
return _bfbbf <= 1 || _gbefc ; } ; for _cddgc := 0 ; _cddgc < _dbcf . _cgae ; _cddgc ++ { if ! _gged ( _cddgc ) { return true ; } ; } ; return false ; } ; type shapesState struct { _gafa _agf . Matrix ; _gdfd _agf . Matrix ; _gfce [ ] * subpath ; _gbdgg bool ; _gcaaf _agf . Point ; _edf * textObject ;
} ; const ( _efgbd = true ; _cafd = true ; _fecd = true ; _afgf = false ; _bbdcd = false ; _ebcg = 6 ; _cgdda = 3.0 ; _ceab = 200 ; _bfcf = true ; _cbdec = true ; _gbge = true ; _gaedd = true ; _ecbfe = false ; ) ; func _ceee ( _agfd float64 ) int { var _eeff int ; if _agfd >= 0 { _eeff = int ( _agfd / _dage ) ;
} else { _eeff = int ( _agfd / _dage ) - 1 ; } ; return _eeff ; } ;
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct { _ffca [ ] TextMark } ; func ( _ebea rulingList ) augmentGrid ( ) ( rulingList , rulingList ) { _egfcc , _eebf := _ebea . vertsHorzs ( ) ; if len ( _egfcc ) == 0 || len ( _eebf ) == 0 { return _egfcc , _eebf ; } ; _faeb , _dgaa := _egfcc , _eebf ; _bcad := _egfcc . bbox ( ) ;
_cace := _eebf . bbox ( ) ; if _adgbf { _ga . Log . Info ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066" , _bcad ) ; _ga . Log . Info ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066" , _cace ) ;
} ; var _acgc , _ecde , _effb , _cfbeb * ruling ; if _cace . Llx < _bcad . Llx - _dfed { _acgc = & ruling { _ccaa : _degg , _bbce : _cfae , _edga : _cace . Llx , _fcec : _bcad . Lly , _abeg : _bcad . Ury } ; _egfcc = append ( rulingList { _acgc } , _egfcc ... ) ; } ; if _cace . Urx > _bcad . Urx + _dfed { _ecde = & ruling { _ccaa : _degg , _bbce : _cfae , _edga : _cace . Urx , _fcec : _bcad . Lly , _abeg : _bcad . Ury } ;
_egfcc = append ( _egfcc , _ecde ) ; } ; if _bcad . Lly < _cace . Lly - _dfed { _effb = & ruling { _ccaa : _degg , _bbce : _aaad , _edga : _bcad . Lly , _fcec : _cace . Llx , _abeg : _cace . Urx } ; _eebf = append ( rulingList { _effb } , _eebf ... ) ; } ; if _bcad . Ury > _cace . Ury + _dfed { _cfbeb = & ruling { _ccaa : _degg , _bbce : _aaad , _edga : _bcad . Ury , _fcec : _cace . Llx , _abeg : _cace . Urx } ;
_eebf = append ( _eebf , _cfbeb ) ; } ; if len ( _egfcc ) + len ( _eebf ) == len ( _ebea ) { return _faeb , _dgaa ; } ; _bagg := append ( _egfcc , _eebf ... ) ; _ebea . log ( "u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064" ) ; _bagg . log ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d" ) ;
return _egfcc , _eebf ; } ; func _bdfa ( _bcegd map [ float64 ] [ ] * textLine ) [ ] float64 { _dcee := [ ] float64 { } ; for _fbgc := range _bcegd { _dcee = append ( _dcee , _fbgc ) ; } ; _e . Float64s ( _dcee ) ; return _dcee ; } ; func ( _dfgef paraList ) toTextMarks ( ) [ ] TextMark { _cfbe := 0 ;
var _cbbdg [ ] TextMark ; for _bccd , _dbce := range _dfgef { if _dbce . _cddef { continue ; } ; _efae := _dbce . toTextMarks ( & _cfbe ) ; _cbbdg = append ( _cbbdg , _efae ... ) ; if _bccd != len ( _dfgef ) - 1 { if _gfgc ( _dbce , _dfgef [ _bccd + 1 ] ) { _cbbdg = _ffeg ( _cbbdg , & _cfbe , "\u0020" ) ;
} else { _cbbdg = _ffeg ( _cbbdg , & _cfbe , "\u000a" ) ; _cbbdg = _ffeg ( _cbbdg , & _cfbe , "\u000a" ) ; } ; } ; } ; _cbbdg = _ffeg ( _cbbdg , & _cfbe , "\u000a" ) ; _cbbdg = _ffeg ( _cbbdg , & _cfbe , "\u000a" ) ; return _cbbdg ; } ; func ( _gcccd * textTable ) compositeRowCorridors ( ) map [ int ] [ ] float64 { _dbbbdb := make ( map [ int ] [ ] float64 , _gcccd . _cgae ) ;
if _efda { _ga . Log . Info ( "c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064" , _gcccd . _cgae ) ; } ; for _faadf := 1 ; _faadf < _gcccd . _cgae ; _faadf ++ { var _bebg [ ] compositeCell ;
for _fgdf := 0 ; _fgdf < _gcccd . _eacg ; _fgdf ++ { if _abbb , _ggbed := _gcccd . _aaaga [ _bgcc ( _fgdf , _faadf ) ] ; _ggbed { _bebg = append ( _bebg , _abbb ) ; } ; } ; if len ( _bebg ) == 0 { continue ; } ; _gdgeg := _cebgb ( _bebg ) ; _dbbbdb [ _faadf ] = _gdgeg ; if _efda { _agc . Printf ( "\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a" , _faadf , _gdgeg ) ;
} ; } ; return _dbbbdb ; } ; func ( _dbcc * textWord ) computeText ( ) string { _afad := make ( [ ] string , len ( _dbcc . _abcee ) ) ; for _dbbf , _bdfdd := range _dbcc . _abcee { _afad [ _dbbf ] = _bdfdd . _bfdb ; } ; return _f . Join ( _afad , "" ) ; } ; func _bfcbd ( _acce * textWord , _gfae float64 , _bgcd , _bbbaf rulingList ) * wordBag { _ebafd := _ceee ( _acce . _dfagd ) ;
_gdgd := [ ] * textWord { _acce } ; _agg := wordBag { _fcgd : map [ int ] [ ] * textWord { _ebafd : _gdgd } , PdfRectangle : _acce . PdfRectangle , _cdea : _acce . _eabbf , _ebgd : _gfae , _febe : _bgcd , _cgdae : _bbbaf } ; return & _agg ; } ; func ( _aagf TextTable ) getCellInfo ( _aad TextMark ) [ ] [ ] int { for _gbfa , _gfc := range _aagf . Cells { for _dadb := range _gfc { _fdce := & _gfc [ _dadb ] . Marks ;
if _fdce . exists ( _aad ) { return [ ] [ ] int { { _gbfa } , { _dadb } } ; } ; } ; } ; return nil ; } ; func ( _bacdd rulingList ) primMinMax ( ) ( float64 , float64 ) { _aade , _ebdc := _bacdd [ 0 ] . _edga , _bacdd [ 0 ] . _edga ; for _ , _cfaec := range _bacdd [ 1 : ] { if _cfaec . _edga < _aade { _aade = _cfaec . _edga ;
} else if _cfaec . _edga > _ebdc { _ebdc = _cfaec . _edga ; } ; } ; return _aade , _ebdc ; } ; func ( _eddae lineRuling ) asRuling ( ) ( * ruling , bool ) { _affaf := ruling { _bbce : _eddae . _cebc , Color : _eddae . Color , _ccaa : _bcfc } ; switch _eddae . _cebc { case _cfae : _affaf . _edga = _eddae . xMean ( ) ;
_affaf . _fcec = _gf . Min ( _eddae . _eded . Y , _eddae . _badee . Y ) ; _affaf . _abeg = _gf . Max ( _eddae . _eded . Y , _eddae . _badee . Y ) ; case _aaad : _affaf . _edga = _eddae . yMean ( ) ; _affaf . _fcec = _gf . Min ( _eddae . _eded . X , _eddae . _badee . X ) ; _affaf . _abeg = _gf . Max ( _eddae . _eded . X , _eddae . _badee . X ) ;
default : _ga . Log . Error ( "\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064" , _eddae . _cebc ) ; return nil , false ; } ; return & _affaf , true ; } ; func _daf ( _fegd , _aebc bounded ) float64 { return _fegd . bbox ( ) . Llx - _aebc . bbox ( ) . Urx } ;
func ( _beeca paraList ) llyRange ( _gefc [ ] int , _ffgd , _beb float64 ) [ ] int { _dbae := len ( _beeca ) ; if _beb < _beeca [ _gefc [ 0 ] ] . Lly || _ffgd > _beeca [ _gefc [ _dbae - 1 ] ] . Lly { return nil ; } ; _fdee := _e . Search ( _dbae , func ( _eeabf int ) bool { return _beeca [ _gefc [ _eeabf ] ] . Lly >= _ffgd } ) ;
_dbdd := _e . Search ( _dbae , func ( _fgdd int ) bool { return _beeca [ _gefc [ _fgdd ] ] . Lly > _beb } ) ; return _gefc [ _fdee : _dbdd ] ; } ; func _cedbg ( _gfcadb float64 ) bool { return _gf . Abs ( _gfcadb ) < _gbb } ; func _babb ( _degcb float64 ) float64 { return _dgaf * _gf . Round ( _degcb / _dgaf ) } ;
func _ccba ( _bcge [ ] * textLine ) map [ float64 ] [ ] * textLine { _e . Slice ( _bcge , func ( _bffc , _feee int ) bool { return _bcge [ _bffc ] . _bbfg < _bcge [ _feee ] . _bbfg } ) ; _cbfb := map [ float64 ] [ ] * textLine { } ; for _ , _fbbed := range _bcge { _gedde := _adgc ( _fbbed ) ; _gedde = _gf . Round ( _gedde ) ;
_cbfb [ _gedde ] = append ( _cbfb [ _gedde ] , _fbbed ) ; } ; return _cbfb ; } ;
// String returns a description of `tm`.
func ( _agdgd * textMark ) String ( ) string { return _agc . Sprintf ( "\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022" , _agdgd . PdfRectangle , _agdgd . _gccfd , _agdgd . _bfdb ) ; } ; func ( _ege * imageExtractContext ) processOperand ( _dff * _fc . ContentStreamOperation , _fag _fc . GraphicsState , _fgaa * _ba . PdfPageResources ) error { if _dff . Operand == "\u0042\u0049" && len ( _dff . Params ) == 1 { _gdb , _aca := _dff . Params [ 0 ] . ( * _fc . ContentStreamInlineImage ) ;
if ! _aca { return nil ; } ; if _bc , _gba := _add . GetBoolVal ( _gdb . ImageMask ) ; _gba { if _bc && ! _ege . _gce . IncludeInlineStencilMasks { return nil ; } ; } ; return _ege . extractInlineImage ( _gdb , _fag , _fgaa ) ; } else if _dff . Operand == "\u0044\u006f" && len ( _dff . Params ) == 1 { _gbd , _bbd := _add . GetName ( _dff . Params [ 0 ] ) ;
if ! _bbd { _ga . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065" ) ; return _gb ; } ; _ , _bcd := _fgaa . GetXObjectByName ( * _gbd ) ; switch _bcd { case _ba . XObjectTypeImage : return _ege . extractXObjectImage ( _gbd , _fag , _fgaa ) ; case _ba . XObjectTypeForm : return _ege . extractFormImages ( _gbd , _fag , _fgaa ) ;
} ; } else if _ege . _gfb && ( _dff . Operand == "\u0073\u0063\u006e" || _dff . Operand == "\u0053\u0043\u004e" ) && len ( _dff . Params ) == 1 { _gcf , _bgf := _add . GetName ( _dff . Params [ 0 ] ) ; if ! _bgf { _ga . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065" ) ;
return _gb ; } ; _gffg , _bgf := _fgaa . GetPatternByName ( * _gcf ) ; if ! _bgf { _ga . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0050\u0061\u0074\u0074\u0065\u0072n\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075\u006e\u0064" ) ; return nil ; } ; if _gffg . IsTiling ( ) { _ecae := _gffg . GetAsTilingPattern ( ) ;
_cfc , _dffd := _ecae . GetContentStream ( ) ; if _dffd != nil { return _dffd ; } ; _dffd = _ege . extractContentStreamImages ( string ( _cfc ) , _ecae . Resources ) ; if _dffd != nil { return _dffd ; } ; } ; } else if ( _dff . Operand == "\u0063\u0073" || _dff . Operand == "\u0043\u0053" ) && len ( _dff . Params ) >= 1 { _ege . _gfb = _dff . Params [ 0 ] . String ( ) == "\u0050a\u0074\u0074\u0065\u0072\u006e" ;
} ; return nil ; } ; func ( _ebdb * subpath ) clear ( ) { * _ebdb = subpath { } } ; func _ebba ( _ecgf _ba . PdfRectangle , _afefg , _agcg , _fdbf , _gbfda * ruling ) gridTile { _ceebf := _ecgf . Llx ; _cdac := _ecgf . Urx ; _eabb := _ecgf . Lly ; _dgcgf := _ecgf . Ury ; return gridTile { PdfRectangle : _ecgf , _ecaf : _afefg != nil && _afefg . encloses ( _eabb , _dgcgf ) , _bfab : _agcg != nil && _agcg . encloses ( _eabb , _dgcgf ) , _bgbfd : _fdbf != nil && _fdbf . encloses ( _ceebf , _cdac ) , _dgccd : _gbfda != nil && _gbfda . encloses ( _ceebf , _cdac ) } ;
} ; func ( _faccb * stateStack ) pop ( ) * textState { if _faccb . empty ( ) { return nil ; } ; _acda := * ( * _faccb ) [ len ( * _faccb ) - 1 ] ; * _faccb = ( * _faccb ) [ : len ( * _faccb ) - 1 ] ; return & _acda ; } ; func _bdbc ( _aeagb [ ] int ) [ ] int { _eggg := make ( [ ] int , len ( _aeagb ) ) ; for _cfef , _gddg := range _aeagb { _eggg [ len ( _aeagb ) - 1 - _cfef ] = _gddg ;
} ; return _eggg ; } ; func ( _eeeda * textLine ) markWordBoundaries ( ) { _egfe := _fadg * _eeeda . _fdfb ; for _fce , _fcgdd := range _eeeda . _cdcg [ 1 : ] { if _daf ( _fcgdd , _eeeda . _cdcg [ _fce ] ) >= _egfe { _fcgdd . _gdec = true ; } ; } ; } ; func ( _bceg * textObject ) getFontDict ( _bcdc string ) ( _bafb _add . PdfObject , _egad error ) { _gedd := _bceg . _dbg ;
if _gedd == nil { _ga . Log . Debug ( "g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071" , _bcdc ) ; return nil , nil ; } ; _bafb , _fagg := _gedd . GetFontByName ( _add . PdfObjectName ( _bcdc ) ) ;
if ! _fagg { _ga . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071" , _bcdc ) ;
return nil , _b . New ( "f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073" ) ; } ; return _bafb , nil ; } ; func ( _ega * textObject ) setTextRenderMode ( _dgfg int ) { if _ega == nil { return ; } ; _ega . _efed . _gdf = RenderMode ( _dgfg ) ;
} ; func _cabf ( _babd string ) string { _cgac := [ ] rune ( _babd ) ; return string ( _cgac [ : len ( _cgac ) - 1 ] ) } ; type gridTile struct { _ba . PdfRectangle ; _dgccd , _ecaf , _bgbfd , _bfab bool ; } ; func ( _ggag * wordBag ) text ( ) string { _agga := _ggag . allWords ( ) ; _bdbd := make ( [ ] string , len ( _agga ) ) ;
for _dga , _cffe := range _agga { _bdbd [ _dga ] = _cffe . _eaae ; } ; return _f . Join ( _bdbd , "\u0020" ) ; } ; func ( _ebbg paraList ) readBefore ( _faba [ ] int , _aead , _afecg int ) bool { _gdbfc , _dfefg := _ebbg [ _aead ] , _ebbg [ _afecg ] ; if _dfdc ( _gdbfc , _dfefg ) && _gdbfc . Lly > _dfefg . Lly { return true ;
} ; if ! ( _gdbfc . _dgabg . Urx < _dfefg . _dgabg . Llx ) { return false ; } ; _egef , _effe := _gdbfc . Lly , _dfefg . Lly ; if _egef > _effe { _effe , _egef = _egef , _effe ; } ; _gfcad := _gf . Max ( _gdbfc . _dgabg . Llx , _dfefg . _dgabg . Llx ) ; _fffed := _gf . Min ( _gdbfc . _dgabg . Urx , _dfefg . _dgabg . Urx ) ;
_faaa := _ebbg . llyRange ( _faba , _egef , _effe ) ; for _ , _aedc := range _faaa { if _aedc == _aead || _aedc == _afecg { continue ; } ; _abga := _ebbg [ _aedc ] ; if _abga . _dgabg . Llx <= _fffed && _gfcad <= _abga . _dgabg . Urx { return false ; } ; } ; return true ; } ; func _edaf ( _cggg map [ int ] [ ] float64 ) string { _dafec := _adgbc ( _cggg ) ;
_ebfb := make ( [ ] string , len ( _cggg ) ) ; for _ebfaf , _ccbf := range _dafec { _ebfb [ _ebfaf ] = _agc . Sprintf ( "\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066" , _ccbf , _cggg [ _ccbf ] ) ; } ; return _agc . Sprintf ( "\u007b\u0025\u0073\u007d" , _f . Join ( _ebfb , "\u002c\u0020" ) ) ;
} ; func ( _faag lineRuling ) xMean ( ) float64 { return 0.5 * ( _faag . _eded . X + _faag . _badee . X ) } ; func ( _bcab * textWord ) appendMark ( _adab * textMark , _cadg _ba . PdfRectangle ) { _bcab . _abcee = append ( _bcab . _abcee , _adab ) ; _bcab . PdfRectangle = _bbbafc ( _bcab . PdfRectangle , _adab . PdfRectangle ) ;
if _adab . _gccfd > _bcab . _eabbf { _bcab . _eabbf = _adab . _gccfd ; } ; _bcab . _dfagd = _cadg . Ury - _bcab . PdfRectangle . Lly ; } ; func ( _eaagd rulingList ) log ( _abgf string ) { if ! _adgbf { return ; } ; _ga . Log . Info ( "\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073" , _abgf , _eaagd . String ( ) ) ;
for _bbbcb , _efgbea := range _eaagd { _agc . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bbbcb , _efgbea . String ( ) ) ; } ; } ; func ( _fdbe * textTable ) toTextTable ( ) TextTable { if _efda { _ga . Log . Info ( "t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064" , _fdbe . _eacg , _fdbe . _cgae ) ;
} ; _fgbe := make ( [ ] [ ] TableCell , _fdbe . _cgae ) ; for _dggag := 0 ; _dggag < _fdbe . _cgae ; _dggag ++ { _fgbe [ _dggag ] = make ( [ ] TableCell , _fdbe . _eacg ) ; for _adbd := 0 ; _adbd < _fdbe . _eacg ; _adbd ++ { _gggcd := _fdbe . get ( _adbd , _dggag ) ; if _gggcd == nil { continue ;
} ; if _efda { _agc . Printf ( "\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a" , _adbd , _dggag , _gggcd ) ; } ; _fgbe [ _dggag ] [ _adbd ] . Text = _gggcd . text ( ) ; _dagc := 0 ; _fgbe [ _dggag ] [ _adbd ] . Marks . _ffca = _gggcd . toTextMarks ( & _dagc ) ; } ; } ; _egcg := TextTable { W : _fdbe . _eacg , H : _fdbe . _cgae , Cells : _fgbe } ;
_egcg . PdfRectangle = _fdbe . bbox ( ) ; return _egcg ; } ; func ( _cabc * textMark ) bbox ( ) _ba . PdfRectangle { return _cabc . PdfRectangle } ; func ( _bfgge gridTile ) contains ( _afdf _ba . PdfRectangle ) bool { if _bfgge . numBorders ( ) < 3 { return false ; } ; if _bfgge . _ecaf && _afdf . Llx < _bfgge . Llx - _bddc { return false ;
} ; if _bfgge . _bfab && _afdf . Urx > _bfgge . Urx + _bddc { return false ; } ; if _bfgge . _bgbfd && _afdf . Lly < _bfgge . Lly - _bddc { return false ; } ; if _bfgge . _dgccd && _afdf . Ury > _bfgge . Ury + _bddc { return false ; } ; return true ; } ; func ( _daa * textObject ) showTextAdjusted ( _abf * _add . PdfObjectArray , _bdg int ) error { _ebb := false ;
for _ , _fee := range _abf . Elements ( ) { switch _fee . ( type ) { case * _add . PdfObjectFloat , * _add . PdfObjectInteger : _gaad , _debb := _add . GetNumberAsFloat ( _fee ) ; if _debb != nil { _ga . Log . Debug ( "\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _fee , _abf ) ;
return _debb ; } ; _eag , _dbd := - _gaad * 0.001 * _daa . _efed . _dcd , 0.0 ; if _ebb { _dbd , _eag = _eag , _dbd ; } ; _gcbf := _gfdd ( _agf . Point { X : _eag , Y : _dbd } ) ; _daa . _eefe . Concat ( _gcbf ) ; case * _add . PdfObjectString : _afb := _add . TraceToDirectObject ( _fee ) ; _bgb , _abfg := _add . GetStringBytes ( _afb ) ;
if ! _abfg { _ga . Log . Trace ( "s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _fee , _abf ) ;
return _add . ErrTypeError ; } ; _daa . renderText ( _afb , _bgb , _bdg ) ; default : _ga . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _fee , _abf ) ;
return _add . ErrTypeError ; } ; } ; return nil ; } ;
// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents ( contents string , resources * _ba . PdfPageResources ) ( * Extractor , error ) { const _dg = "\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s" ; _cgd := & Extractor { _ac : contents , _gbe : resources , _bd : map [ string ] fontEntry { } , _baa : map [ string ] textResult { } } ;
_ce . TrackUse ( _dg ) ; return _cgd , nil ; } ; func _dgbb ( _gdbff * textLine , _bbec [ ] * textLine , _dbbca [ ] float64 ) float64 { var _afac float64 = - 1 ; for _ , _bgdf := range _bbec { if _bgdf . _bbfg > _gdbff . _bbfg { if _gf . Round ( _bgdf . Llx ) >= _gf . Round ( _gdbff . Llx ) { _afac = _bgdf . _bbfg ;
} else { break ; } ; } ; } ; return _afac ; } ; func _faafc ( _bcaa _ba . PdfRectangle ) * ruling { return & ruling { _bbce : _aaad , _edga : _bcaa . Ury , _fcec : _bcaa . Llx , _abeg : _bcaa . Urx } ; } ; type imageExtractContext struct { _dcc [ ] ImageMark ; _dgg int ; _fbd int ; _bde int ;
_ca map [ * _add . PdfObjectStream ] * cachedImage ; _gce * ImageExtractOptions ; _gfb bool ; } ; func _efcbe ( _fbbf string ) bool { for _ , _aedca := range _fbbf { if ! _c . IsSpace ( _aedca ) { return false ; } ; } ; return true ; } ; func ( _egge * textMark ) inDiacriticArea ( _dfdge * textMark ) bool { _adaa := _egge . Llx - _dfdge . Llx ;
_dcbf := _egge . Urx - _dfdge . Urx ; _gcdf := _egge . Lly - _dfdge . Lly ; return _gf . Abs ( _adaa + _dcbf ) < _egge . Width ( ) * _befe && _gf . Abs ( _gcdf ) < _egge . Height ( ) * _befe ; } ; func _gagf ( _gfef [ ] structElement , _cbgg map [ int ] [ ] * textLine , _fcfa _add . PdfObject ) [ ] * list { _abcd := [ ] * list { } ;
for _ , _aefd := range _gfef { _dbbe := _aefd . _abff ; _dgafc := int ( _aefd . _cfcb ) ; _gabf := _aefd . _bbag ; _dgad := [ ] * textLine { } ; _afgc := [ ] * list { } ; _fdfbe := _aefd . _eada ; _edgc , _fgfe := ( _fdfbe . ( * _add . PdfObjectReference ) ) ; if ! _fgfe { _ga . Log . Debug ( "\u0066\u0061\u0069l\u0065\u0064\u0020\u006f\u0074\u0020\u0063\u0061\u0073\u0074\u0020\u0074\u006f\u0020\u002a\u0063\u006f\u0072\u0065\u002e\u0050\u0064\u0066\u004f\u0062\u006a\u0065\u0063\u0074R\u0065\u0066\u0065\u0072\u0065\u006e\u0063\u0065" ) ;
} ; if _dgafc != - 1 && _edgc != nil { if _gedb , _baeb := _cbgg [ _dgafc ] ; _baeb { if _egcc , _afef := _fcfa . ( * _add . PdfIndirectObject ) ; _afef { _caca := _egcc . PdfObjectReference ; if _ad . DeepEqual ( * _edgc , _caca ) { _dgad = _gedb ; } ; } ; } ; } ; if _dbbe != nil { _afgc = _gagf ( _dbbe , _cbgg , _fcfa ) ;
} ; _dcfa := _dgda ( _dgad , _gabf , _afgc ) ; _abcd = append ( _abcd , _dcfa ) ; } ; return _abcd ; } ; func _fbgb ( _bbdg * wordBag , _bbeg float64 , _egbe , _fada rulingList ) [ ] * wordBag { var _egcca [ ] * wordBag ; for _ , _cgcg := range _bbdg . depthIndexes ( ) { _dfa := false ; for ! _bbdg . empty ( _cgcg ) { _ggaag := _bbdg . firstReadingIndex ( _cgcg ) ;
_cdec := _bbdg . firstWord ( _ggaag ) ; _edfe := _bfcbd ( _cdec , _bbeg , _egbe , _fada ) ; _bbdg . removeWord ( _cdec , _ggaag ) ; if _eeab { _ga . Log . Info ( "\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073" , _cdec . String ( ) ) ;
} ; for _acga := true ; _acga ; _acga = _dfa { _dfa = false ; _daaff := _dgcc * _edfe . _cdea ; _bcbb := _cac * _edfe . _cdea ; _ecbb := _bfcae * _edfe . _cdea ; if _eeab { _ga . Log . Info ( "\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066" , _edfe . minDepth ( ) , _edfe . maxDepth ( ) , _ecbb , _bcbb ) ;
} ; if _bbdg . scanBand ( "\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c" , _edfe , _bcfa ( _cgfd , 0 ) , _edfe . minDepth ( ) - _ecbb , _edfe . maxDepth ( ) + _ecbb , _fae , false , false ) > 0 { _dfa = true ; } ; if _bbdg . scanBand ( "\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c" , _edfe , _bcfa ( _cgfd , _bcbb ) , _edfe . minDepth ( ) , _edfe . maxDepth ( ) , _fcag , false , false ) > 0 { _dfa = true ;
} ; if _dfa { continue ; } ; _dfag := _bbdg . scanBand ( "" , _edfe , _bcfa ( _cbde , _daaff ) , _edfe . minDepth ( ) , _edfe . maxDepth ( ) , _cfcf , true , false ) ; if _dfag > 0 { _ffecd := ( _edfe . maxDepth ( ) - _edfe . minDepth ( ) ) / _edfe . _cdea ; if ( _dfag > 1 && float64 ( _dfag ) > 0.3 * _ffecd ) || _dfag <= 10 { if _bbdg . scanBand ( "\u006f\u0074\u0068e\u0072" , _edfe , _bcfa ( _cbde , _daaff ) , _edfe . minDepth ( ) , _edfe . maxDepth ( ) , _cfcf , false , true ) > 0 { _dfa = true ;
} ; } ; } ; } ; _egcca = append ( _egcca , _edfe ) ; } ; } ; return _egcca ; } ; func _ecfee ( _efbbe [ ] rulingList ) ( rulingList , rulingList ) { var _bffd rulingList ; for _ , _efdeb := range _efbbe { _bffd = append ( _bffd , _efdeb ... ) ; } ; return _bffd . vertsHorzs ( ) ; } ;
// String returns a human readable description of `path`.
func ( _afbg * subpath ) String ( ) string { _ecbf := _afbg . _aeee ; _gebb := len ( _ecbf ) ; if _gebb <= 5 { return _agc . Sprintf ( "\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f" , _gebb , _ecbf ) ; } ; return _agc . Sprintf ( "\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f" , _gebb , _ecbf [ 0 ] , _ecbf [ 1 ] , _ecbf [ _gebb - 1 ] ) ;
} ; func _bdccc ( _ecbc _ba . PdfRectangle ) * ruling { return & ruling { _bbce : _cfae , _edga : _ecbc . Llx , _fcec : _ecbc . Lly , _abeg : _ecbc . Ury } ; } ; func _ecfd ( _geae byte ) bool { for _ , _faed := range _cafa { if [ ] byte ( _faed ) [ 0 ] == _geae { return true ; } ; } ; return false ;
} ; func _eddba ( _caead , _cgcf bounded ) float64 { return _caead . bbox ( ) . Llx - _cgcf . bbox ( ) . Llx } ; func ( _gbfaf * ruling ) intersects ( _cegd * ruling ) bool { _dabfeg := ( _gbfaf . _bbce == _cfae && _cegd . _bbce == _aaad ) || ( _cegd . _bbce == _cfae && _gbfaf . _bbce == _aaad ) ;
_bbef := func ( _deccf , _ebcda * ruling ) bool { return _deccf . _fcec - _dfed <= _ebcda . _edga && _ebcda . _edga <= _deccf . _abeg + _dfed ; } ; _ccdac := _bbef ( _gbfaf , _cegd ) ; _dfgcg := _bbef ( _cegd , _gbfaf ) ; if _adgbf { _agc . Printf ( "\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a" + "\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a" + " \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a" , _dabfeg , _ccdac , _dfgcg , _dabfeg && _ccdac && _dfgcg , _gbfaf , _cegd ) ;
} ; return _dabfeg && _ccdac && _dfgcg ; } ; type list struct { _aebd [ ] * textLine ; _cdde string ; _abcc [ ] * list ; _ddef string ; } ; func ( _gbgc * wordBag ) allWords ( ) [ ] * textWord { var _fdgd [ ] * textWord ; for _ , _efgg := range _gbgc . _fcgd { _fdgd = append ( _fdgd , _efgg ... ) ;
} ; return _fdgd ; } ; func ( _afa * shapesState ) lastpointEstablished ( ) ( _agf . Point , bool ) { if _afa . _gbdgg { return _afa . _gcaaf , false ; } ; _gfcg := len ( _afa . _gfce ) ; if _gfcg > 0 && _afa . _gfce [ _gfcg - 1 ] . _dbe { return _afa . _gfce [ _gfcg - 1 ] . last ( ) , false ; } ; return _agf . Point { } , true ;
} ; func ( _gaead * textLine ) bbox ( ) _ba . PdfRectangle { return _gaead . PdfRectangle } ; type cachedImage struct { _fed * _ba . Image ; _addb _ba . PdfColorspace ; } ; func ( _bga * subpath ) last ( ) _agf . Point { return _bga . _aeee [ len ( _bga . _aeee ) - 1 ] } ;
// String returns a string describing the current state of the textState stack.
func ( _aaeg * stateStack ) String ( ) string { _adfa := [ ] string { _agc . Sprintf ( "\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064" , len ( * _aaeg ) ) } ; for _fgee , _gaab := range * _aaeg { _fdc := "\u003c\u006e\u0069l\u003e" ;
if _gaab != nil { _fdc = _gaab . String ( ) ; } ; _adfa = append ( _adfa , _agc . Sprintf ( "\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073" , _fgee , _fdc ) ) ; } ; return _f . Join ( _adfa , "\u000a" ) ; } ; func ( _eba * imageExtractContext ) extractInlineImage ( _bdd * _fc . ContentStreamInlineImage , _dec _fc . GraphicsState , _gdea * _ba . PdfPageResources ) error { _gggb , _ded := _bdd . ToImage ( _gdea ) ;
if _ded != nil { return _ded ; } ; _fcg , _ded := _bdd . GetColorSpace ( _gdea ) ; if _ded != nil { return _ded ; } ; if _fcg == nil { _fcg = _ba . NewPdfColorspaceDeviceGray ( ) ; } ; _ged , _ded := _fcg . ImageToRGB ( * _gggb ) ; if _ded != nil { return _ded ; } ; _bfb := ImageMark { Image : & _ged , Width : _dec . CTM . ScalingFactorX ( ) , Height : _dec . CTM . ScalingFactorY ( ) , Angle : _dec . CTM . Angle ( ) } ;
_bfb . X , _bfb . Y = _dec . CTM . Translation ( ) ; _eba . _dcc = append ( _eba . _dcc , _bfb ) ; _eba . _dgg ++ ; return nil ; } ; func _dfc ( _gbfcc string ) bool { if _a . RuneCountInString ( _gbfcc ) < _afeg { return false ; } ; _bbdd , _dbbc := _a . DecodeLastRuneInString ( _gbfcc ) ;
if _dbbc <= 0 || ! _c . Is ( _c . Hyphen , _bbdd ) { return false ; } ; _bbdd , _dbbc = _a . DecodeLastRuneInString ( _gbfcc [ : len ( _gbfcc ) - _dbbc ] ) ; return _dbbc > 0 && ! _c . IsSpace ( _bbdd ) ; } ; func _ggdbe ( _fgba * textLine , _egfdf [ ] * textLine , _ccde [ ] float64 , _cffg , _adcb float64 ) [ ] * textLine { _eegfg := [ ] * textLine { } ;
for _ , _bfge := range _egfdf { if _bfge . _bbfg >= _cffg { if _adcb != - 1 && _bfge . _bbfg < _adcb { if _bfge . text ( ) != _fgba . text ( ) { if _gf . Round ( _bfge . Llx ) < _gf . Round ( _fgba . Llx ) { break ; } ; _eegfg = append ( _eegfg , _bfge ) ; } ; } else if _adcb == - 1 { if _bfge . _bbfg == _fgba . _bbfg { if _bfge . text ( ) != _fgba . text ( ) { _eegfg = append ( _eegfg , _bfge ) ;
} ; continue ; } ; _bgcda := _dgbb ( _fgba , _egfdf , _ccde ) ; if _bgcda != - 1 && _bfge . _bbfg <= _bgcda { _eegfg = append ( _eegfg , _bfge ) ; } ; } ; } ; } ; return _eegfg ; } ; func ( _ffbf rulingList ) comp ( _faad , _agdge int ) bool { _geed , _eagd := _ffbf [ _faad ] , _ffbf [ _agdge ] ; _agfg , _bfeb := _geed . _bbce , _eagd . _bbce ;
if _agfg != _bfeb { return _agfg > _bfeb ; } ; if _agfg == _fbcc { return false ; } ; _bbgcd := func ( _becce bool ) bool { if _agfg == _aaad { return _becce ; } ; return ! _becce ; } ; _egde , _aefg := _geed . _edga , _eagd . _edga ; if _egde != _aefg { return _bbgcd ( _egde > _aefg ) ;
} ; _egde , _aefg = _geed . _fcec , _eagd . _fcec ; if _egde != _aefg { return _bbgcd ( _egde < _aefg ) ; } ; return _bbgcd ( _geed . _abeg < _eagd . _abeg ) ; } ; func ( _edbg * textTable ) putComposite ( _ffcc , _aaccb int , _accgg paraList , _adeac _ba . PdfRectangle ) { if len ( _accgg ) == 0 { _ga . Log . Error ( "\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073" ) ;
return ; } ; _ffddd := compositeCell { PdfRectangle : _adeac , paraList : _accgg } ; if _efda { _agc . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a" , _ffcc , _aaccb , _ffddd . String ( ) ) ;
} ; _ffddd . updateBBox ( ) ; _edbg . _aaaga [ _bgcc ( _ffcc , _aaccb ) ] = _ffddd ; } ; var _cafa = [ ] string { "\u2756" , "\u27a2" , "\u2713" , "\u2022" , "\uf0a7" , "\u25a1" , "\u2212" , "\u25a0" , "\u25aa" , "\u006f" } ; func _cbggc ( _edee , _gcef , _fbfc float64 ) rulingKind { if _edee >= _fbfc && _caeb ( _gcef , _edee ) { return _aaad ;
} ; if _gcef >= _fbfc && _caeb ( _edee , _gcef ) { return _cfae ; } ; return _fbcc ; } ; func ( _bacf * ruling ) alignsPrimary ( _bedg * ruling ) bool { return _bacf . _bbce == _bedg . _bbce && _gf . Abs ( _bacf . _edga - _bedg . _edga ) < _gbb * 0.5 ; } ;