2020-08-27 21:45:09 +00:00
//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
2018-03-22 14:03:47 +00:00
2020-08-27 21:45:09 +00:00
//
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
//
2022-06-27 19:58:38 +00:00
package extractor ; import ( _cde "bytes" ; _f "errors" ; _be "fmt" ; _eg "github.com/unidoc/unipdf/v3/common" ; _cce "github.com/unidoc/unipdf/v3/contentstream" ; _db "github.com/unidoc/unipdf/v3/core" ; _ba "github.com/unidoc/unipdf/v3/internal/license" ; _a "github.com/unidoc/unipdf/v3/internal/textencoding" ;
_g "github.com/unidoc/unipdf/v3/internal/transform" ; _bd "github.com/unidoc/unipdf/v3/model" ; _fd "golang.org/x/text/unicode/norm" ; _fg "golang.org/x/xerrors" ; _fb "image/color" ; _d "io" ; _bf "math" ; _e "regexp" ; _cc "sort" ; _dc "strings" ; _cd "unicode" ; _c "unicode/utf8" ;
) ; func _bbbca ( _gcade [ ] * textWord , _gefe int ) [ ] * textWord { _bdgec := len ( _gcade ) ; copy ( _gcade [ _gefe : ] , _gcade [ _gefe + 1 : ] ) ; return _gcade [ : _bdgec - 1 ] ; } ; func ( _edegf * wordBag ) empty ( _cbba int ) bool { _ , _gee := _edegf . _fadg [ _cbba ] ; return ! _gee } ; func _ccbff ( _fdgab _bd . PdfRectangle ) rulingKind { _aaed := _fdgab . Width ( ) ;
_cbdd := _fdgab . Height ( ) ; if _aaed > _cbdd { if _aaed >= _cafe { return _ddga ; } ; } else { if _cbdd >= _cafe { return _gaba ; } ; } ; return _eedb ; } ; func ( _bged * textPara ) writeCellText ( _cceb _d . Writer ) { for _baacf , _dbbe := range _bged . _gadg { _gacfe := _dbbe . text ( ) ;
_cgcg := _addg && _dbbe . endsInHyphen ( ) && _baacf != len ( _bged . _gadg ) - 1 ; if _cgcg { _gacfe = _bfe ( _gacfe ) ; } ; _cceb . Write ( [ ] byte ( _gacfe ) ) ; if ! ( _cgcg || _baacf == len ( _bged . _gadg ) - 1 ) { _cceb . Write ( [ ] byte ( _begc ( _dbbe . _gddec , _bged . _gadg [ _baacf + 1 ] . _gddec ) ) ) ;
} ; } ; } ; func ( _afbfe paraList ) findTables ( _fggc [ ] gridTiling ) [ ] * textTable { _afbfe . addNeighbours ( ) ; _cc . Slice ( _afbfe , func ( _cafec , _bbbdff int ) bool { return _fdcf ( _afbfe [ _cafec ] , _afbfe [ _bbbdff ] ) < 0 } ) ; var _faded [ ] * textTable ; if _adbe { _gcgd := _afbfe . findGridTables ( _fggc ) ;
_faded = append ( _faded , _gcgd ... ) ; } ; if _aaeaf { _cceg := _afbfe . findTextTables ( ) ; _faded = append ( _faded , _cceg ... ) ; } ; return _faded ; } ; func ( _bab * textObject ) setWordSpacing ( _dad float64 ) { if _bab == nil { return ; } ; _bab . _dff . _fec = _dad ; } ; func _gade ( _bcba _bd . PdfRectangle , _efbfd [ ] * textLine ) * textPara { return & textPara { PdfRectangle : _bcba , _gadg : _efbfd } ;
2022-04-27 00:10:33 +00:00
} ;
2022-06-27 19:58:38 +00:00
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
type PageImages struct { Images [ ] ImageMark ; } ; func ( _ecee * wordBag ) sort ( ) { for _ , _dggb := range _ecee . _fadg { _cc . Slice ( _dggb , func ( _ddcg , _afba int ) bool { return _afdf ( _dggb [ _ddcg ] , _dggb [ _afba ] ) < 0 } ) ; } ; } ; func ( _ddbad * textTable ) getComposite ( _gcegg , _ccgf int ) ( paraList , _bd . PdfRectangle ) { _cbbb , _fdaae := _ddbad . _agga [ _bddbg ( _gcegg , _ccgf ) ] ;
if _eeca { _be . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a" , _gcegg , _ccgf , _cbbb . String ( ) ) ; } ; if ! _fdaae { return nil , _bd . PdfRectangle { } ;
} ; return _cbbb . parasBBox ( ) ; } ; type ruling struct { _gggf rulingKind ; _beaec markKind ; _fb . Color ; _ccb float64 ; _gaad float64 ; _gdaf float64 ; _cfbf float64 ; } ; func _cecgg ( _fefg , _bdcc float64 ) bool { return _bf . Abs ( _fefg - _bdcc ) <= _cebae } ; func _fdcf ( _gfbg , _edag bounded ) float64 { _gdag := _afdf ( _gfbg , _edag ) ;
if ! _bcaga ( _gdag ) { return _gdag ; } ; return _ebbf ( _gfbg , _edag ) ; } ; func ( _aadb * stateStack ) push ( _cebd * textState ) { _cgfd := * _cebd ; * _aadb = append ( * _aadb , & _cgfd ) } ; func ( _fbeg compositeCell ) split ( _adea , _cbece [ ] float64 ) * textTable { _cdee := len ( _adea ) + 1 ;
_fedb := len ( _cbece ) + 1 ; if _eeca { _eg . Log . Info ( "\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a" + "\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066" , _fedb , _cdee , _fbeg , _adea , _cbece ) ;
_be . Printf ( "\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a" , len ( _fbeg . paraList ) ) ; for _edfcb , _adad := range _fbeg . paraList { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _edfcb , _adad . String ( ) ) ; } ;
_be . Printf ( "\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a" , len ( _fbeg . lines ( ) ) ) ; for _gcfd , _eeec := range _fbeg . lines ( ) { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _gcfd , _eeec ) ; } ; } ; _adea = _abfgd ( _adea , _fbeg . Ury , _fbeg . Lly ) ;
_cbece = _abfgd ( _cbece , _fbeg . Llx , _fbeg . Urx ) ; _fdbd := make ( map [ uint64 ] * textPara , _fedb * _cdee ) ; _ecce := textTable { _bgcfb : _fedb , _gccb : _cdee , _deedc : _fdbd } ; _aaec := _fbeg . paraList ; _cc . Slice ( _aaec , func ( _bfeg , _abfgg int ) bool { _aggf , _aagdg := _aaec [ _bfeg ] , _aaec [ _abfgg ] ;
_dbgg , _dcdgc := _aggf . Lly , _aagdg . Lly ; if _dbgg != _dcdgc { return _dbgg < _dcdgc ; } ; return _aggf . Llx < _aagdg . Llx ; } ) ; _ccfa := make ( map [ uint64 ] _bd . PdfRectangle , _fedb * _cdee ) ; for _becfb , _eccgf := range _adea [ 1 : ] { _dgeg := _adea [ _becfb ] ; for _bgag , _abaeg := range _cbece [ 1 : ] { _dgade := _cbece [ _bgag ] ;
_ccfa [ _bddbg ( _bgag , _becfb ) ] = _bd . PdfRectangle { Llx : _dgade , Urx : _abaeg , Lly : _eccgf , Ury : _dgeg } ; } ; } ; if _eeca { _eg . Log . Info ( "\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073" ) ;
_be . Printf ( "\u0020\u0020\u0020\u0020" ) ; for _dfec := 0 ; _dfec < _fedb ; _dfec ++ { _be . Printf ( "\u0025\u0033\u0030\u0064\u002c\u0020" , _dfec ) ; } ; _be . Println ( ) ; for _eaegae := 0 ; _eaegae < _cdee ; _eaegae ++ { _be . Printf ( "\u0020\u0020\u0025\u0032\u0064\u003a" , _eaegae ) ;
for _caec := 0 ; _caec < _fedb ; _caec ++ { _be . Printf ( "\u00256\u002e\u0032\u0066\u002c\u0020" , _ccfa [ _bddbg ( _caec , _eaegae ) ] ) ; } ; _be . Println ( ) ; } ; } ; _gcdg := func ( _fefa * textLine ) ( int , int ) { for _cdcd := 0 ; _cdcd < _cdee ; _cdcd ++ { for _caff := 0 ; _caff < _fedb ;
_caff ++ { if _gbae ( _ccfa [ _bddbg ( _caff , _cdcd ) ] , _fefa . PdfRectangle ) { return _caff , _cdcd ; } ; } ; } ; return - 1 , - 1 ; } ; _egaa := make ( map [ uint64 ] [ ] * textLine , _fedb * _cdee ) ; for _ , _bdbc := range _aaec . lines ( ) { _bdge , _cegb := _gcdg ( _bdbc ) ; if _bdge < 0 { continue ;
} ; _egaa [ _bddbg ( _bdge , _cegb ) ] = append ( _egaa [ _bddbg ( _bdge , _cegb ) ] , _bdbc ) ; } ; for _eaaf := 0 ; _eaaf < len ( _adea ) - 1 ; _eaaf ++ { _eeece := _adea [ _eaaf ] ; _gfdc := _adea [ _eaaf + 1 ] ; for _bgfa := 0 ; _bgfa < len ( _cbece ) - 1 ; _bgfa ++ { _acef := _cbece [ _bgfa ] ; _beae := _cbece [ _bgfa + 1 ] ;
_febg := _bd . PdfRectangle { Llx : _acef , Urx : _beae , Lly : _gfdc , Ury : _eeece } ; _ddde := _egaa [ _bddbg ( _bgfa , _eaaf ) ] ; if len ( _ddde ) == 0 { continue ; } ; _dceb := _gade ( _febg , _ddde ) ; _ecce . put ( _bgfa , _eaaf , _dceb ) ; } ; } ; return & _ecce ; } ;
2022-04-27 00:10:33 +00:00
2022-06-06 22:48:24 +00:00
// Extractor stores and offers functionality for extracting content from PDF pages.
2022-06-27 19:58:38 +00:00
type Extractor struct { _da string ; _dbb * _bd . PdfPageResources ; _dd _bd . PdfRectangle ; _bg map [ string ] fontEntry ; _dg map [ string ] textResult ; _gb int64 ; _dbf int ; } ;
2022-04-27 00:10:33 +00:00
2022-06-27 19:58:38 +00:00
// String returns a description of `w`.
func ( _cedc * textWord ) String ( ) string { return _be . Sprintf ( "\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022" , _cedc . _acag , _cedc . PdfRectangle , _cedc . _efag , _cedc . _debad ) ;
} ; const _gcg = 1.0 / 1000.0 ; func ( _dgc * wordBag ) depthIndexes ( ) [ ] int { if len ( _dgc . _fadg ) == 0 { return nil ; } ; _gcbg := make ( [ ] int , len ( _dgc . _fadg ) ) ; _dab := 0 ; for _gcbgb := range _dgc . _fadg { _gcbg [ _dab ] = _gcbgb ; _dab ++ ; } ; _cc . Ints ( _gcbg ) ; return _gcbg ;
} ; func _aagde ( _eegdd map [ int ] intSet ) [ ] int { _bcged := make ( [ ] int , 0 , len ( _eegdd ) ) ; for _cebbdd := range _eegdd { _bcged = append ( _bcged , _cebbdd ) ; } ; _cc . Ints ( _bcged ) ; return _bcged ; } ; func ( _eaa * textObject ) setTextLeading ( _eae float64 ) { if _eaa == nil { return ;
} ; _eaa . _dff . _dde = _eae ; } ; func _beage ( _ffea _bd . PdfRectangle , _daabd , _aaeb , _dfbcaf , _fcbd * ruling ) gridTile { _fcea := _ffea . Llx ; _aefc := _ffea . Urx ; _debagb := _ffea . Lly ; _dagbd := _ffea . Ury ; return gridTile { PdfRectangle : _ffea , _fgbc : _daabd != nil && _daabd . encloses ( _debagb , _dagbd ) , _eafc : _aaeb != nil && _aaeb . encloses ( _debagb , _dagbd ) , _ccadg : _dfbcaf != nil && _dfbcaf . encloses ( _fcea , _aefc ) , _bfgeab : _fcbd != nil && _fcbd . encloses ( _fcea , _aefc ) } ;
} ; func ( _fede * wordBag ) firstReadingIndex ( _cabgd int ) int { _dfdg := _fede . firstWord ( _cabgd ) . _efag ; _dee := float64 ( _cabgd + 1 ) * _abab ; _cgfg := _dee + _ggba * _dfdg ; _cdeb := _cabgd ; for _ , _afde := range _fede . depthBand ( _dee , _cgfg ) { if _afdf ( _fede . firstWord ( _afde ) , _fede . firstWord ( _cdeb ) ) < 0 { _cdeb = _afde ;
} ; } ; return _cdeb ; } ; func ( _gbedg * textWord ) absorb ( _fdageb * textWord ) { _gbedg . PdfRectangle = _gcff ( _gbedg . PdfRectangle , _fdageb . PdfRectangle ) ; _gbedg . _bbacg = append ( _gbedg . _bbacg , _fdageb . _bbacg ... ) ; } ; func ( _ddgc pathSection ) bbox ( ) _bd . PdfRectangle { _gba := _ddgc . _aga [ 0 ] . _gcbb [ 0 ] ;
_dcac := _bd . PdfRectangle { Llx : _gba . X , Urx : _gba . X , Lly : _gba . Y , Ury : _gba . Y } ; _bffa := func ( _eaag _g . Point ) { if _eaag . X < _dcac . Llx { _dcac . Llx = _eaag . X ; } else if _eaag . X > _dcac . Urx { _dcac . Urx = _eaag . X ; } ; if _eaag . Y < _dcac . Lly { _dcac . Lly = _eaag . Y ;
} else if _eaag . Y > _dcac . Ury { _dcac . Ury = _eaag . Y ; } ; } ; for _ , _bgcf := range _ddgc . _aga [ 0 ] . _gcbb [ 1 : ] { _bffa ( _bgcf ) ; } ; for _ , _fade := range _ddgc . _aga [ 1 : ] { for _ , _bebb := range _fade . _gcbb { _bffa ( _bebb ) ; } ; } ; return _dcac ; } ; func _eaab ( _gbc func ( * wordBag , * textWord , float64 ) bool , _eddb float64 ) func ( * wordBag , * textWord ) bool { return func ( _daae * wordBag , _cfb * textWord ) bool { return _gbc ( _daae , _cfb , _eddb ) } ;
} ; func _gddeb ( _aegdg map [ float64 ] gridTile ) [ ] float64 { _dcge := make ( [ ] float64 , 0 , len ( _aegdg ) ) ; for _eagb := range _aegdg { _dcge = append ( _dcge , _eagb ) ; } ; _cc . Float64s ( _dcge ) ; return _dcge ; } ; func _cbfbe ( _begb , _bffb int ) int { if _begb < _bffb { return _begb ;
} ; return _bffb ; } ; func ( _ddcf * shapesState ) addPoint ( _ddba , _eagef float64 ) { _cdgd := _ddcf . establishSubpath ( ) ; _ecgb := _ddcf . devicePoint ( _ddba , _eagef ) ; if _cdgd == nil { _ddcf . _beg = true ; _ddcf . _eee = _ecgb ; } else { _cdgd . add ( _ecgb ) ; } ; } ; func _fafbc ( _ecdaf , _cdge _g . Point ) bool { return _ecdaf . X == _cdge . X && _ecdaf . Y == _cdge . Y } ;
func ( _ddab * wordBag ) absorb ( _degea * wordBag ) { _dgbb := _degea . makeRemovals ( ) ; for _fddg , _gbgc := range _degea . _fadg { for _ , _afga := range _gbgc { _ddab . pullWord ( _afga , _fddg , _dgbb ) ; } ; } ; _degea . applyRemovals ( _dgbb ) ; } ;
2022-04-27 00:10:33 +00:00
2022-06-27 19:58:38 +00:00
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct { _ggbg [ ] TextMark } ;
2022-04-27 00:10:33 +00:00
// String returns a string describing `ma`.
2022-06-27 19:58:38 +00:00
func ( _fega TextMarkArray ) String ( ) string { _fce := len ( _fega . _ggbg ) ; if _fce == 0 { return "\u0045\u004d\u0050T\u0059" ; } ; _becg := _fega . _ggbg [ 0 ] ; _cgeed := _fega . _ggbg [ _fce - 1 ] ; return _be . Sprintf ( "\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d" , _fce , _becg , _cgeed ) ;
} ; type imageExtractContext struct { _daa [ ] ImageMark ; _ec int ; _de int ; _gf int ; _ac map [ * _db . PdfObjectStream ] * cachedImage ; _gfe * ImageExtractOptions ; } ; func _gfac ( _ceca , _dfda _g . Point , _ebefb _fb . Color ) ( * ruling , bool ) { _ccdf := lineRuling { _eadee : _ceca , _feab : _dfda , _dgfg : _ggcd ( _ceca , _dfda ) , Color : _ebefb } ;
if _ccdf . _dgfg == _eedb { return nil , false ; } ; return _ccdf . asRuling ( ) ; } ; func ( _baff * subpath ) last ( ) _g . Point { return _baff . _gcbb [ len ( _baff . _gcbb ) - 1 ] } ;
2021-01-07 14:20:10 +00:00
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
//
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
type TextMark struct {
2020-11-23 22:15:56 +00:00
2020-12-06 13:03:03 +00:00
// Text is the extracted text.
Text string ;
2020-11-23 22:15:56 +00:00
2021-01-07 14:20:10 +00:00
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
2020-11-23 22:15:56 +00:00
2021-01-07 14:20:10 +00:00
// BBox is the bounding box of the text.
2022-06-27 19:58:38 +00:00
BBox _bd . PdfRectangle ;
2021-01-07 14:20:10 +00:00
// Font is the font the text was drawn with.
2022-06-27 19:58:38 +00:00
Font * _bd . PdfFont ;
2021-01-07 14:20:10 +00:00
// FontSize is the font size the text was drawn with.
FontSize float64 ;
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2022-06-27 19:58:38 +00:00
FillColor _fb . Color ;
2021-01-07 14:20:10 +00:00
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2022-06-27 19:58:38 +00:00
StrokeColor _fb . Color ;
2021-01-07 14:20:10 +00:00
// Orientation is the text orientation
2022-06-27 19:58:38 +00:00
Orientation int ; } ; func _efcg ( _gaaf map [ int ] [ ] float64 ) string { _bfcgc := _ggadbb ( _gaaf ) ; _gdaeb := make ( [ ] string , len ( _gaaf ) ) ; for _acga , _ecac := range _bfcgc { _gdaeb [ _acga ] = _be . Sprintf ( "\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066" , _ecac , _gaaf [ _ecac ] ) ;
} ; return _be . Sprintf ( "\u007b\u0025\u0073\u007d" , _dc . Join ( _gdaeb , "\u002c\u0020" ) ) ; } ; func ( _adacb * textTable ) compositeColCorridors ( ) map [ int ] [ ] float64 { _edad := make ( map [ int ] [ ] float64 , _adacb . _bgcfb ) ; if _eeca { _eg . Log . Info ( "\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020" , _adacb . _bgcfb ) ;
} ; for _dgga := 0 ; _dgga < _adacb . _bgcfb ; _dgga ++ { _edad [ _dgga ] = nil ; } ; return _edad ; } ; func ( _dfae * textTable ) compositeRowCorridors ( ) map [ int ] [ ] float64 { _afec := make ( map [ int ] [ ] float64 , _dfae . _gccb ) ; if _eeca { _eg . Log . Info ( "c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064" , _dfae . _gccb ) ;
} ; for _edagab := 1 ; _edagab < _dfae . _gccb ; _edagab ++ { var _aeed [ ] compositeCell ; for _cgdd := 0 ; _cgdd < _dfae . _bgcfb ; _cgdd ++ { if _fgafce , _fbef := _dfae . _agga [ _bddbg ( _cgdd , _edagab ) ] ; _fbef { _aeed = append ( _aeed , _fgafce ) ; } ; } ; if len ( _aeed ) == 0 { continue ;
} ; _caca := _fdebfb ( _aeed ) ; _afec [ _edagab ] = _caca ; if _eeca { _be . Printf ( "\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a" , _edagab , _caca ) ; } ; } ; return _afec ; } ; func _aggc ( _dgba _bd . PdfRectangle ) * ruling { return & ruling { _gggf : _gaba , _ccb : _dgba . Urx , _gaad : _dgba . Lly , _gdaf : _dgba . Ury } ;
} ; func _aceb ( _dcff , _aff * textPara ) bool { return _ecag ( _dcff . _bgca , _aff . _bgca ) } ;
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// String returns a string describing `tm`.
func ( _eefe TextMark ) String ( ) string { _cac := _eefe . BBox ; var _ccf string ; if _eefe . Font != nil { _ccf = _eefe . Font . String ( ) ; if len ( _ccf ) > 50 { _ccf = _ccf [ : 50 ] + "\u002e\u002e\u002e" ; } ; } ; var _afbd string ; if _eefe . Meta { _afbd = "\u0020\u002a\u004d\u002a" ;
} ; return _be . Sprintf ( "\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d" , _eefe . Offset , _eefe . Text , [ ] rune ( _eefe . Text ) , _cac . Llx , _cac . Lly , _cac . Urx , _cac . Ury , _ccf , _afbd ) ;
} ; func ( _fdag * wordBag ) scanBand ( _dbaa string , _egge * wordBag , _bcce func ( _dgbe * wordBag , _bcfc * textWord ) bool , _gbf , _edbeb , _acbf float64 , _dccg , _bdga bool ) int { _fdac := _egge . _dbga ; var _bcde map [ int ] map [ * textWord ] struct { } ; if ! _dccg { _bcde = _fdag . makeRemovals ( ) ;
} ; _bccd := _bbcc * _fdac ; _ccdg := 0 ; for _ , _feef := range _fdag . depthBand ( _gbf - _bccd , _edbeb + _bccd ) { if len ( _fdag . _fadg [ _feef ] ) == 0 { continue ; } ; for _ , _cgbd := range _fdag . _fadg [ _feef ] { if ! ( _gbf - _bccd <= _cgbd . _acag && _cgbd . _acag <= _edbeb + _bccd ) { continue ;
} ; if ! _bcce ( _egge , _cgbd ) { continue ; } ; _bdae := 2.0 * _bf . Abs ( _cgbd . _efag - _egge . _dbga ) / ( _cgbd . _efag + _egge . _dbga ) ; _cefd := _bf . Max ( _cgbd . _efag / _egge . _dbga , _egge . _dbga / _cgbd . _efag ) ; _gcce := _bf . Min ( _bdae , _cefd ) ; if _acbf > 0 && _gcce > _acbf { continue ;
} ; if _egge . blocked ( _cgbd ) { continue ; } ; if ! _dccg { _egge . pullWord ( _cgbd , _feef , _bcde ) ; } ; _ccdg ++ ; if ! _bdga { if _cgbd . _acag < _gbf { _gbf = _cgbd . _acag ; } ; if _cgbd . _acag > _edbeb { _edbeb = _cgbd . _acag ; } ; } ; if _dccg { break ; } ; } ; } ; if ! _dccg { _fdag . applyRemovals ( _bcde ) ;
} ; return _ccdg ; } ; func ( _dcd * stateStack ) empty ( ) bool { return len ( * _dcd ) == 0 } ; func ( _cgg * textLine ) text ( ) string { var _ddae [ ] string ; for _ , _bdfg := range _cgg . _ebge { if _bdfg . _fgbg { _ddae = append ( _ddae , "\u0020" ) ; } ; _ddae = append ( _ddae , _bdfg . _debad ) ;
} ; return _dc . Join ( _ddae , "" ) ; } ;
2022-03-13 12:41:53 +00:00
2022-06-27 19:58:38 +00:00
// ToTextMark returns the public view of `tm`.
func ( _gaed * textMark ) ToTextMark ( ) TextMark { return TextMark { Text : _gaed . _aec , Original : _gaed . _degeed , BBox : _gaed . _gadc , Font : _gaed . _bafgd , FontSize : _gaed . _gaaaf , FillColor : _gaed . _cefe , StrokeColor : _gaed . _gbda , Orientation : _gaed . _ddggc } ;
} ; type textMark struct { _bd . PdfRectangle ; _ddggc int ; _aec string ; _degeed string ; _bafgd * _bd . PdfFont ; _gaaaf float64 ; _cfae float64 ; _bddg _g . Matrix ; _debc _g . Point ; _gadc _bd . PdfRectangle ; _cefe _fb . Color ; _gbda _fb . Color ; } ; func ( _dfd * textObject ) getStrokeColor ( ) _fb . Color { return _cgdc ( _dfd . _cddag . ColorspaceStroking , _dfd . _cddag . ColorStroking ) ;
} ; func _ecaa ( _afefe int , _bdgg func ( int , int ) bool ) [ ] int { _cgage := make ( [ ] int , _afefe ) ; for _ggebf := range _cgage { _cgage [ _ggebf ] = _ggebf ; } ; _cc . Slice ( _cgage , func ( _faed , _egdg int ) bool { return _bdgg ( _cgage [ _faed ] , _cgage [ _egdg ] ) } ) ; return _cgage ;
} ; func ( _deb * textObject ) getCurrentFont ( ) * _bd . PdfFont { _cdce := _deb . _dff . _degee ; if _cdce == nil { _eg . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e" ) ;
return _bd . DefaultFont ( ) ; } ; return _cdce ; } ; func ( _babf paraList ) writeText ( _cfceb _d . Writer ) { for _fdbb , _ceebg := range _babf { if _ceebg . _gfce { continue ; } ; _ceebg . writeText ( _cfceb ) ; if _fdbb != len ( _babf ) - 1 { if _gbgg ( _ceebg , _babf [ _fdbb + 1 ] ) { _cfceb . Write ( [ ] byte ( "\u0020" ) ) ;
} else { _cfceb . Write ( [ ] byte ( "\u000a" ) ) ; _cfceb . Write ( [ ] byte ( "\u000a" ) ) ; } ; } ; } ; _cfceb . Write ( [ ] byte ( "\u000a" ) ) ; _cfceb . Write ( [ ] byte ( "\u000a" ) ) ; } ; func ( _bagce rulingList ) splitSec ( ) [ ] rulingList { _cc . Slice ( _bagce , func ( _caed , _gegc int ) bool { _efgga , _fcffc := _bagce [ _caed ] , _bagce [ _gegc ] ;
if _efgga . _gaad != _fcffc . _gaad { return _efgga . _gaad < _fcffc . _gaad ; } ; return _efgga . _gdaf < _fcffc . _gdaf ; } ) ; _accbc := make ( map [ * ruling ] struct { } , len ( _bagce ) ) ; _bggb := func ( _dbdf * ruling ) rulingList { _adca := rulingList { _dbdf } ; _accbc [ _dbdf ] = struct { } { } ;
for _ , _eabc := range _bagce { if _ , _ebed := _accbc [ _eabc ] ; _ebed { continue ; } ; for _ , _feefgc := range _adca { if _eabc . alignsSec ( _feefgc ) { _adca = append ( _adca , _eabc ) ; _accbc [ _eabc ] = struct { } { } ; break ; } ; } ; } ; return _adca ; } ; _addc := [ ] rulingList { _bggb ( _bagce [ 0 ] ) } ;
for _ , _dcdc := range _bagce [ 1 : ] { if _ , _gcbbg := _accbc [ _dcdc ] ; _gcbbg { continue ; } ; _addc = append ( _addc , _bggb ( _dcdc ) ) ; } ; return _addc ; } ; func ( _ddce rulingList ) connections ( _ebafg map [ int ] intSet , _ddff int ) intSet { _fgce := make ( intSet ) ; _fdgac := make ( intSet ) ;
var _afacb func ( int ) ; _afacb = func ( _dcce int ) { if ! _fdgac . has ( _dcce ) { _fdgac . add ( _dcce ) ; for _cgae := range _ddce { if _ebafg [ _cgae ] . has ( _dcce ) { _fgce . add ( _cgae ) ; } ; } ; for _ebaa := range _ddce { if _fgce . has ( _ebaa ) { _afacb ( _ebaa ) ; } ; } ; } ; } ; _afacb ( _ddff ) ;
return _fgce ; } ; func ( _efefd * textTable ) emptyCompositeRow ( _eaae int ) bool { for _dcffga := 0 ; _dcffga < _efefd . _bgcfb ; _dcffga ++ { if _feade , _bage := _efefd . _agga [ _bddbg ( _dcffga , _eaae ) ] ; _bage { if len ( _feade . paraList ) > 0 { return false ; } ; } ; } ; return true ;
} ; func _gbae ( _eeae , _bcdgb _bd . PdfRectangle ) bool { return _eeae . Llx <= _bcdgb . Llx && _bcdgb . Urx <= _eeae . Urx && _eeae . Lly <= _bcdgb . Lly && _bcdgb . Ury <= _eeae . Ury ; } ; func _gagcd ( _gbggg * PageText ) error { _ffefc := _ba . GetLicenseKey ( ) ; if _ffefc != nil && _ffefc . IsLicensed ( ) || _fdd { return nil ;
} ; _be . Printf ( "\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a" ) ; _be . Println ( "-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f" ) ;
return _f . New ( "\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064" ) ; } ;
2022-03-13 12:41:53 +00:00
2022-06-27 19:58:38 +00:00
// String returns a description of `k`.
func ( _beefa markKind ) String ( ) string { _affdg , _ebda := _ecgab [ _beefa ] ; if ! _ebda { return _be . Sprintf ( "\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064" , _beefa ) ; } ; return _affdg ; } ; func ( _ggbc * textPara ) toTextMarks ( _aaee * int ) [ ] TextMark { if _ggbc . _dbfdg == nil { return _ggbc . toCellTextMarks ( _aaee ) ;
} ; var _ccddf [ ] TextMark ; for _gcac := 0 ; _gcac < _ggbc . _dbfdg . _gccb ; _gcac ++ { for _faefg := 0 ; _faefg < _ggbc . _dbfdg . _bgcfb ; _faefg ++ { _aacc := _ggbc . _dbfdg . get ( _faefg , _gcac ) ; if _aacc == nil { _ccddf = _efbc ( _ccddf , _aaee , "\u0009" ) ; } else { _dccf := _aacc . toCellTextMarks ( _aaee ) ;
_ccddf = append ( _ccddf , _dccf ... ) ; } ; _ccddf = _efbc ( _ccddf , _aaee , "\u0020" ) ; } ; if _gcac < _ggbc . _dbfdg . _gccb - 1 { _ccddf = _efbc ( _ccddf , _aaee , "\u000a" ) ; } ; } ; return _ccddf ; } ; func ( _cbaa * textTable ) getDown ( ) paraList { _cgdf := make ( paraList , _cbaa . _bgcfb ) ;
for _dgde := 0 ; _dgde < _cbaa . _bgcfb ; _dgde ++ { _eggg := _cbaa . get ( _dgde , _cbaa . _gccb - 1 ) . _egad ; if _eggg . taken ( ) { return nil ; } ; _cgdf [ _dgde ] = _eggg ; } ; for _dddfa := 0 ; _dddfa < _cbaa . _bgcfb - 1 ; _dddfa ++ { if _cgdf [ _dddfa ] . _bbbdd != _cgdf [ _dddfa + 1 ] { return nil ;
} ; } ; return _cgdf ; } ; func ( _faac * textTable ) get ( _cebf , _cacgc int ) * textPara { return _faac . _deedc [ _bddbg ( _cebf , _cacgc ) ] } ; func ( _ebgge * textPara ) bbox ( ) _bd . PdfRectangle { return _ebgge . PdfRectangle } ; var ( _cg = _f . New ( "\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072" ) ;
_bdg = _f . New ( "\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072" ) ; ) ; func _aead ( _bdbaf , _abgb int ) int { if _bdbaf > _abgb { return _bdbaf ; } ; return _abgb ; } ; func _fdeba ( _cabb float64 ) int { var _fdfcg int ;
if _cabb >= 0 { _fdfcg = int ( _cabb / _abab ) ; } else { _fdfcg = int ( _cabb / _abab ) - 1 ; } ; return _fdfcg ; } ;
2022-02-05 21:34:53 +00:00
2022-06-06 22:48:24 +00:00
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
2022-06-27 19:58:38 +00:00
type ImageMark struct { Image * _bd . Image ;
2021-12-14 01:08:28 +00:00
2022-06-06 22:48:24 +00:00
// Dimensions of the image as displayed in the PDF.
Width float64 ; Height float64 ;
// Position of the image in PDF coordinates (lower left corner).
X float64 ; Y float64 ;
// Angle in degrees, if rotated.
2022-06-27 19:58:38 +00:00
Angle float64 ; } ; func _dbfdf ( _cgde , _gcec _g . Point ) bool { _geea := _bf . Abs ( _cgde . X - _gcec . X ) ; _cbed := _bf . Abs ( _cgde . Y - _gcec . Y ) ; return _acbb ( _cbed , _geea ) ; } ; func ( _cacb * compositeCell ) updateBBox ( ) { for _ , _aace := range _cacb . paraList { _cacb . PdfRectangle = _gcff ( _cacb . PdfRectangle , _aace . PdfRectangle ) ;
} ; } ; func ( _debf * wordBag ) allWords ( ) [ ] * textWord { var _dbcc [ ] * textWord ; for _ , _fcbg := range _debf . _fadg { _dbcc = append ( _dbcc , _fcbg ... ) ; } ; return _dbcc ; } ; type textResult struct { _bdf PageText ; _cfd int ; _acg int ; } ; func ( _dgff * wordBag ) maxDepth ( ) float64 { return _dgff . _adbg - _dgff . Lly } ;
func _bfe ( _dede string ) string { _gcdab := [ ] rune ( _dede ) ; return string ( _gcdab [ : len ( _gcdab ) - 1 ] ) } ; func ( _cdec * wordBag ) firstWord ( _efcf int ) * textWord { return _cdec . _fadg [ _efcf ] [ 0 ] } ;
2021-12-14 01:08:28 +00:00
2022-06-27 19:58:38 +00:00
// String returns a human readable description of `ss`.
func ( _fee * shapesState ) String ( ) string { return _be . Sprintf ( "\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d" , len ( _fee . _gdbg ) , _fee . _beg ) ; } ;
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// Text returns the extracted page text.
func ( _fcb PageText ) Text ( ) string { return _fcb . _ebce } ; func ( _efbg * wordBag ) makeRemovals ( ) map [ int ] map [ * textWord ] struct { } { _abfc := make ( map [ int ] map [ * textWord ] struct { } , len ( _efbg . _fadg ) ) ; for _bfce := range _efbg . _fadg { _abfc [ _bfce ] = make ( map [ * textWord ] struct { } ) ;
} ; return _abfc ; } ; func ( _bga paraList ) log ( _fggf string ) { if ! _cbec { return ; } ; _eg . Log . Info ( "%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d" , _fggf , len ( _bga ) ) ;
for _ffdg , _egce := range _bga { if _egce == nil { continue ; } ; _bcg := _egce . text ( ) ; _adac := "\u0020\u0020" ; if _egce . _dbfdg != nil { _adac = _be . Sprintf ( "\u005b%\u0064\u0078\u0025\u0064\u005d" , _egce . _dbfdg . _bgcfb , _egce . _dbfdg . _gccb ) ; } ; _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a" , _ffdg , _egce . PdfRectangle , _adac , _dbec ( _bcg , 50 ) ) ;
} ; } ; func ( _beag lineRuling ) xMean ( ) float64 { return 0.5 * ( _beag . _eadee . X + _beag . _feab . X ) } ;
2022-06-06 22:48:24 +00:00
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
2022-06-27 19:58:38 +00:00
func ( _cbc * TextMarkArray ) RangeOffset ( start , end int ) ( * TextMarkArray , error ) { if _cbc == nil { return nil , _f . New ( "\u006da\u003d\u003d\u006e\u0069\u006c" ) ; } ; if end < start { return nil , _be . Errorf ( "\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020" , start , end ) ;
} ; _aegc := len ( _cbc . _ggbg ) ; if _aegc == 0 { return _cbc , nil ; } ; if start < _cbc . _ggbg [ 0 ] . Offset { start = _cbc . _ggbg [ 0 ] . Offset ; } ; if end > _cbc . _ggbg [ _aegc - 1 ] . Offset + 1 { end = _cbc . _ggbg [ _aegc - 1 ] . Offset + 1 ; } ; _fbad := _cc . Search ( _aegc , func ( _bgfb int ) bool { return _cbc . _ggbg [ _bgfb ] . Offset + len ( _cbc . _ggbg [ _bgfb ] . Text ) - 1 >= start } ) ;
if ! ( 0 <= _fbad && _fbad < _aegc ) { _aede := _be . Errorf ( "\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076" , start , _fbad , _aegc , _cbc . _ggbg [ 0 ] , _cbc . _ggbg [ _aegc - 1 ] ) ;
return nil , _aede ; } ; _gdea := _cc . Search ( _aegc , func ( _eeab int ) bool { return _cbc . _ggbg [ _eeab ] . Offset > end - 1 } ) ; if ! ( 0 <= _gdea && _gdea < _aegc ) { _caaa := _be . Errorf ( "\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076" , end , _gdea , _aegc , _cbc . _ggbg [ 0 ] , _cbc . _ggbg [ _aegc - 1 ] ) ;
return nil , _caaa ; } ; if _gdea <= _fbad { return nil , _be . Errorf ( "\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064" , start , end , _fbad , _gdea ) ;
} ; return & TextMarkArray { _ggbg : _cbc . _ggbg [ _fbad : _gdea ] } , nil ; } ; func _fdbbd ( _abfa [ ] pathSection ) { if _ebba < 0.0 { return ; } ; if _daafa { _eg . Log . Info ( "\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073" , len ( _abfa ) ) ;
} ; for _dgbeff , _faeb := range _abfa { for _eeag , _abacd := range _faeb . _aga { for _dabg , _abdd := range _abacd . _gcbb { _abacd . _gcbb [ _dabg ] = _g . Point { X : _cfgb ( _abdd . X ) , Y : _cfgb ( _abdd . Y ) } ; if _daafa { _fcgg := _abacd . _gcbb [ _dabg ] ; if ! _fafbc ( _abdd , _fcgg ) { _cecd := _g . Point { X : _fcgg . X - _abdd . X , Y : _fcgg . Y - _abdd . Y } ;
_be . Printf ( "\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a" , _dgbeff , _eeag , _dabg , _abdd , _fcgg , _cecd ) ; } ; } ; } ; } ; } ; } ; func _fggaa ( _edegb [ ] * textMark , _caeg _bd . PdfRectangle ) [ ] * textWord { var _fgabd [ ] * textWord ;
var _gcadc * textWord ; if _eeegf { _eg . Log . Info ( "\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073" , len ( _edegb ) ) ; } ; _cade := func ( ) { if _gcadc != nil { _deaad := _gcadc . computeText ( ) ;
if ! _gagac ( _deaad ) { _gcadc . _debad = _deaad ; _fgabd = append ( _fgabd , _gcadc ) ; if _eeegf { _eg . Log . Info ( "\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073" , len ( _fgabd ) - 1 , _gcadc . String ( ) ) ;
for _dgdc , _ebada := range _gcadc . _bbacg { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _dgdc , _ebada . String ( ) ) ; } ; } ; } ; _gcadc = nil ; } ; } ; for _ , _aacf := range _edegb { if _dgeb && _gcadc != nil && len ( _gcadc . _bbacg ) > 0 { _dceca := _gcadc . _bbacg [ len ( _gcadc . _bbacg ) - 1 ] ;
_ggcbb , _fdfca := _egfd ( _aacf . _aec ) ; _gaedf , _fcagf := _egfd ( _dceca . _aec ) ; if _fdfca && ! _fcagf && _dceca . inDiacriticArea ( _aacf ) { _gcadc . addDiacritic ( _ggcbb ) ; continue ; } ; if _fcagf && ! _fdfca && _aacf . inDiacriticArea ( _dceca ) { _gcadc . _bbacg = _gcadc . _bbacg [ : len ( _gcadc . _bbacg ) - 1 ] ;
_gcadc . appendMark ( _aacf , _caeg ) ; _gcadc . addDiacritic ( _gaedf ) ; continue ; } ; } ; _cdaae := _gagac ( _aacf . _aec ) ; if _cdaae { _cade ( ) ; continue ; } ; if _gcadc == nil && ! _cdaae { _gcadc = _bccf ( [ ] * textMark { _aacf } , _caeg ) ; continue ; } ; _aefa := _gcadc . _efag ; _cdfgg := _bf . Abs ( _acec ( _caeg , _aacf ) - _gcadc . _acag ) / _aefa ;
_deae := _edagd ( _aacf , _gcadc ) / _aefa ; if _deae >= _dadd || ! ( - _afbbc <= _deae && _cdfgg <= _afeb ) { _cade ( ) ; _gcadc = _bccf ( [ ] * textMark { _aacf } , _caeg ) ; continue ; } ; _gcadc . appendMark ( _aacf , _caeg ) ; } ; _cade ( ) ; return _fgabd ; } ; func ( _ffad * textObject ) reset ( ) { _ffad . _cbff = _g . IdentityMatrix ( ) ;
_ffad . _bdb = _g . IdentityMatrix ( ) ; _ffad . _dcaa = nil ; } ; func ( _eddad * textObject ) newTextMark ( _aeeg string , _cdbb _g . Matrix , _bbfaf _g . Point , _cfce float64 , _ebbd * _bd . PdfFont , _eaeg float64 , _acgg , _cdece _fb . Color ) ( textMark , bool ) { _bfgc := _cdbb . Angle ( ) ;
_eddac := _ecage ( _bfgc , _gef ) ; var _dfgae float64 ; if _eddac % 180 != 90 { _dfgae = _cdbb . ScalingFactorY ( ) ; } else { _dfgae = _cdbb . ScalingFactorX ( ) ; } ; _cebbg := _cecc ( _cdbb ) ; _gac := _bd . PdfRectangle { Llx : _cebbg . X , Lly : _cebbg . Y , Urx : _bbfaf . X , Ury : _bbfaf . Y } ;
switch _eddac % 360 { case 90 : _gac . Urx -= _dfgae ; case 180 : _gac . Ury -= _dfgae ; case 270 : _gac . Urx += _dfgae ; case 0 : _gac . Ury += _dfgae ; default : _eddac = 0 ; _gac . Ury += _dfgae ; } ; if _gac . Llx > _gac . Urx { _gac . Llx , _gac . Urx = _gac . Urx , _gac . Llx ; } ; if _gac . Lly > _gac . Ury { _gac . Lly , _gac . Ury = _gac . Ury , _gac . Lly ;
} ; _ebad := true ; if _eddad . _fafb . _dd . Width ( ) > 0 { _ebca , _eded := _dbaee ( _gac , _eddad . _fafb . _dd ) ; if ! _eded { _ebad = false ; _eg . Log . Debug ( "\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q" , _gac , _eddad . _fafb . _dd , _aeeg ) ;
} ; _gac = _ebca ; } ; _cbcc := _gac ; _dgbf := _eddad . _fafb . _dd ; switch _eddac % 360 { case 90 : _dgbf . Urx , _dgbf . Ury = _dgbf . Ury , _dgbf . Urx ; _cbcc = _bd . PdfRectangle { Llx : _dgbf . Urx - _gac . Ury , Urx : _dgbf . Urx - _gac . Lly , Lly : _gac . Llx , Ury : _gac . Urx } ; case 180 : _cbcc = _bd . PdfRectangle { Llx : _dgbf . Urx - _gac . Llx , Urx : _dgbf . Urx - _gac . Urx , Lly : _dgbf . Ury - _gac . Lly , Ury : _dgbf . Ury - _gac . Ury } ;
case 270 : _dgbf . Urx , _dgbf . Ury = _dgbf . Ury , _dgbf . Urx ; _cbcc = _bd . PdfRectangle { Llx : _gac . Ury , Urx : _gac . Lly , Lly : _dgbf . Ury - _gac . Llx , Ury : _dgbf . Ury - _gac . Urx } ; } ; if _cbcc . Llx > _cbcc . Urx { _cbcc . Llx , _cbcc . Urx = _cbcc . Urx , _cbcc . Llx ; } ;
if _cbcc . Lly > _cbcc . Ury { _cbcc . Lly , _cbcc . Ury = _cbcc . Ury , _cbcc . Lly ; } ; _bggcd := textMark { _aec : _aeeg , PdfRectangle : _cbcc , _gadc : _gac , _bafgd : _ebbd , _gaaaf : _dfgae , _cfae : _eaeg , _bddg : _cdbb , _debc : _bbfaf , _ddggc : _eddac , _cefe : _acgg , _gbda : _cdece } ;
if _eeegf { _eg . Log . Info ( "n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073" , _cebbg , _bbfaf , _bggcd . String ( ) ) ; } ;
return _bggcd , _ebad ; } ; type gridTile struct { _bd . PdfRectangle ; _bfgeab , _fgbc , _ccadg , _eafc bool ; } ; type event struct { _bbcd float64 ; _cfgg bool ; _agecb int ; } ; func ( _befcb * textWord ) computeText ( ) string { _dcbb := make ( [ ] string , len ( _befcb . _bbacg ) ) ;
for _faba , _feeee := range _befcb . _bbacg { _dcbb [ _faba ] = _feeee . _aec ; } ; return _dc . Join ( _dcbb , "" ) ; } ; func _ecage ( _cbee float64 , _gdfc int ) int { if _gdfc == 0 { _gdfc = 1 ; } ; _cdfa := float64 ( _gdfc ) ; return int ( _bf . Round ( _cbee / _cdfa ) * _cdfa ) ; } ; const _gcb = 20 ;
func ( _gdcde * textPara ) taken ( ) bool { return _gdcde == nil || _gdcde . _deed } ; func _dfe ( _bea , _aged bounded ) float64 { _acfa := _ebbf ( _bea , _aged ) ; if ! _bcaga ( _acfa ) { return _acfa ; } ; return _afdf ( _bea , _aged ) ; } ; func ( _fgaff paraList ) toTextMarks ( ) [ ] TextMark { _eggb := 0 ;
var _fbcb [ ] TextMark ; for _edcb , _dcdg := range _fgaff { if _dcdg . _gfce { continue ; } ; _agdd := _dcdg . toTextMarks ( & _eggb ) ; _fbcb = append ( _fbcb , _agdd ... ) ; if _edcb != len ( _fgaff ) - 1 { if _gbgg ( _dcdg , _fgaff [ _edcb + 1 ] ) { _fbcb = _efbc ( _fbcb , & _eggb , "\u0020" ) ;
} else { _fbcb = _efbc ( _fbcb , & _eggb , "\u000a" ) ; _fbcb = _efbc ( _fbcb , & _eggb , "\u000a" ) ; } ; } ; } ; _fbcb = _efbc ( _fbcb , & _eggb , "\u000a" ) ; _fbcb = _efbc ( _fbcb , & _eggb , "\u000a" ) ; return _fbcb ; } ; func ( _ccfcg rulingList ) asTiling ( ) gridTiling { if _gdcf { _eg . Log . Info ( "r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d" , len ( _ccfcg ) ) ;
} ; for _acac , _acabe := range _ccfcg [ 1 : ] { _adade := _ccfcg [ _acac ] ; if _adade . alignsPrimary ( _acabe ) && _adade . alignsSec ( _acabe ) { _eg . Log . Error ( "a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073" , _acabe , _adade ) ;
} ; } ; _ccfcg . sortStrict ( ) ; _ccfcg . log ( "\u0073n\u0061\u0070\u0070\u0065\u0064" ) ; _decc , _ebcb := _ccfcg . vertsHorzs ( ) ; _cffd := _decc . primaries ( ) ; _bgaa := _ebcb . primaries ( ) ; _ebaeg := len ( _cffd ) - 1 ; _ecafg := len ( _bgaa ) - 1 ; if _ebaeg == 0 || _ecafg == 0 { return gridTiling { } ;
} ; _ebcce := _bd . PdfRectangle { Llx : _cffd [ 0 ] , Urx : _cffd [ _ebaeg ] , Lly : _bgaa [ 0 ] , Ury : _bgaa [ _ecafg ] } ; if _gdcf { _eg . Log . Info ( "\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064" , len ( _decc ) ) ;
for _bacd , _bgdb := range _decc { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bacd , _bgdb ) ; } ; _eg . Log . Info ( "\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064" , len ( _ebcb ) ) ;
for _bfga , _gdbc := range _ebcb { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bfga , _gdbc ) ; } ; _eg . Log . Info ( "\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f" , _ebaeg , _ecafg , _cffd , _bgaa ) ;
} ; _eega := make ( [ ] gridTile , _ebaeg * _ecafg ) ; for _bfgd := _ecafg - 1 ; _bfgd >= 0 ; _bfgd -- { _ebcca := _bgaa [ _bfgd ] ; _bffec := _bgaa [ _bfgd + 1 ] ; for _fccca := 0 ; _fccca < _ebaeg ; _fccca ++ { _bdfc := _cffd [ _fccca ] ; _eefc := _cffd [ _fccca + 1 ] ; _fadeb := _decc . findPrimSec ( _bdfc , _ebcca ) ;
_fdgf := _decc . findPrimSec ( _eefc , _ebcca ) ; _aeafc := _ebcb . findPrimSec ( _ebcca , _bdfc ) ; _fgdc := _ebcb . findPrimSec ( _bffec , _bdfc ) ; _afag := _bd . PdfRectangle { Llx : _bdfc , Urx : _eefc , Lly : _ebcca , Ury : _bffec } ; _efceg := _beage ( _afag , _fadeb , _fdgf , _aeafc , _fgdc ) ;
_eega [ _bfgd * _ebaeg + _fccca ] = _efceg ; if _gdcf { _be . Printf ( "\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , _fccca , _bfgd , _efceg . String ( ) , _efceg . Width ( ) , _efceg . Height ( ) ) ;
} ; } ; } ; if _gdcf { _eg . Log . Info ( "r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066" , _ebcce ) ;
} ; _aadf := make ( [ ] map [ float64 ] gridTile , _ecafg ) ; for _ffge := _ecafg - 1 ; _ffge >= 0 ; _ffge -- { if _gdcf { _be . Printf ( "\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a" , _ffge ) ; } ; _aadf [ _ffge ] = make ( map [ float64 ] gridTile , _ebaeg ) ; for _cddea := 0 ; _cddea < _ebaeg ;
_cddea ++ { _afab := _eega [ _ffge * _ebaeg + _cddea ] ; if _gdcf { _be . Printf ( "\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _cddea , _afab ) ; } ; if ! _afab . _fgbc { continue ; } ; _gcacg := _cddea ; for _becc := _cddea + 1 ; ! _afab . _eafc && _becc < _ebaeg ;
_becc ++ { _gaeabe := _eega [ _ffge * _ebaeg + _becc ] ; _afab . Urx = _gaeabe . Urx ; _afab . _bfgeab = _afab . _bfgeab || _gaeabe . _bfgeab ; _afab . _ccadg = _afab . _ccadg || _gaeabe . _ccadg ; _afab . _eafc = _gaeabe . _eafc ; if _gdcf { _be . Printf ( "\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a" , _becc , _gaeabe , _afab ) ;
} ; _gcacg = _becc ; } ; if _gdcf { _be . Printf ( " \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n" , _cddea , _gcacg , _afab ) ; } ; _cddea = _gcacg ; _aadf [ _ffge ] [ _afab . Llx ] = _afab ; } ; } ; _cbcea := make ( map [ float64 ] map [ float64 ] gridTile , _ecafg ) ;
_abfbe := make ( map [ float64 ] map [ float64 ] struct { } , _ecafg ) ; for _dafdg := _ecafg - 1 ; _dafdg >= 0 ; _dafdg -- { _bfdf := _eega [ _dafdg * _ebaeg ] . Lly ; _cbcea [ _bfdf ] = make ( map [ float64 ] gridTile , _ebaeg ) ; _abfbe [ _bfdf ] = make ( map [ float64 ] struct { } , _ebaeg ) ; } ; if _gdcf { _eg . Log . Info ( "\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066" , _ebcce ) ;
} ; for _gfeae := _ecafg - 1 ; _gfeae >= 0 ; _gfeae -- { _gdecc := _eega [ _gfeae * _ebaeg ] . Lly ; _gfedd := _aadf [ _gfeae ] ; if _gdcf { _be . Printf ( "\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a" , _gfeae ) ; } ; for _ , _eecb := range _gddeb ( _gfedd ) { if _ , _fbba := _abfbe [ _gdecc ] [ _eecb ] ;
_fbba { continue ; } ; _abgd := _gfedd [ _eecb ] ; if _gdcf { _be . Printf ( " \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a" , _abgd . String ( ) ) ; } ; for _gbcg := _gfeae - 1 ; _gbcg >= 0 ; _gbcg -- { if _abgd . _ccadg { break ; } ; _aeaaa := _aadf [ _gbcg ] ; _aggcg , _gagfb := _aeaaa [ _eecb ] ;
if ! _gagfb { break ; } ; if _aggcg . Urx != _abgd . Urx { break ; } ; _abgd . _ccadg = _aggcg . _ccadg ; _abgd . Lly = _aggcg . Lly ; if _gdcf { _be . Printf ( "\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a" , _aggcg . String ( ) , _abgd . String ( ) ) ;
} ; _abfbe [ _aggcg . Lly ] [ _aggcg . Llx ] = struct { } { } ; } ; if _gfeae == 0 { _abgd . _ccadg = true ; } ; if _abgd . complete ( ) { _cbcea [ _gdecc ] [ _eecb ] = _abgd ; } ; } ; } ; _dabae := gridTiling { PdfRectangle : _ebcce , _defd : _cbge ( _cbcea ) , _eabe : _dagae ( _cbcea ) , _bdgaf : _cbcea } ;
_dabae . log ( "\u0043r\u0065\u0061\u0074\u0065\u0064" ) ; return _dabae ; } ; func ( _baffd * ruling ) encloses ( _ffdf , _ddbe float64 ) bool { return _baffd . _gaad - _cebae <= _ffdf && _ddbe <= _baffd . _gdaf + _cebae ; } ; func _dbec ( _ffgb string , _eefg int ) string { if len ( _ffgb ) < _eefg { return _ffgb ;
} ; return _ffgb [ : _eefg ] ; } ; func ( _becd * textObject ) moveLP ( _gbea , _ddb float64 ) { _becd . _bdb . Concat ( _g . NewMatrix ( 1 , 0 , 0 , 1 , _gbea , _ddb ) ) ; _becd . _cbff = _becd . _bdb ; } ; type textPara struct { _bd . PdfRectangle ; _bgca _bd . PdfRectangle ; _gadg [ ] * textLine ;
_dbfdg * textTable ; _deed bool ; _gfce bool ; _ceabb * textPara ; _bbbdd * textPara ; _edaad * textPara ; _egad * textPara ; } ; func _gfde ( _cagae , _fdfb _g . Point ) bool { _cgbc := _bf . Abs ( _cagae . X - _fdfb . X ) ; _ggaa := _bf . Abs ( _cagae . Y - _fdfb . Y ) ; return _acbb ( _cgbc , _ggaa ) ;
} ; func ( _eade * textObject ) setHorizScaling ( _bba float64 ) { if _eade == nil { return ; } ; _eade . _dff . _aee = _bba ; } ; var _fdd = false ; func ( _edba paraList ) topoOrder ( ) [ ] int { if _cbec { _eg . Log . Info ( "\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a" ) ;
} ; _afge := len ( _edba ) ; _dadc := make ( [ ] bool , _afge ) ; _ebgd := make ( [ ] int , 0 , _afge ) ; _ffcc := _edba . llyOrdering ( ) ; var _ceff func ( _dafe int ) ; _ceff = func ( _gbaef int ) { _dadc [ _gbaef ] = true ; for _eeabb := 0 ; _eeabb < _afge ; _eeabb ++ { if ! _dadc [ _eeabb ] { if _edba . readBefore ( _ffcc , _gbaef , _eeabb ) { _ceff ( _eeabb ) ;
} ; } ; } ; _ebgd = append ( _ebgd , _gbaef ) ; } ; for _efed := 0 ; _efed < _afge ; _efed ++ { if ! _dadc [ _efed ] { _ceff ( _efed ) ; } ; } ; return _gfec ( _ebgd ) ; } ; func ( _cgge paraList ) eventNeighbours ( _bebbg [ ] event ) map [ * textPara ] [ ] int { _cc . Slice ( _bebbg , func ( _dabbe , _geca int ) bool { _gafde , _dfff := _bebbg [ _dabbe ] , _bebbg [ _geca ] ;
_beefag , _gabb := _gafde . _bbcd , _dfff . _bbcd ; if _beefag != _gabb { return _beefag < _gabb ; } ; if _gafde . _cfgg != _dfff . _cfgg { return _gafde . _cfgg ; } ; return _dabbe < _geca ; } ) ; _ffec := make ( map [ int ] intSet ) ; _cedfe := make ( intSet ) ; for _ , _bagcef := range _bebbg { if _bagcef . _cfgg { _ffec [ _bagcef . _agecb ] = make ( intSet ) ;
for _gecb := range _cedfe { if _gecb != _bagcef . _agecb { _ffec [ _bagcef . _agecb ] . add ( _gecb ) ; _ffec [ _gecb ] . add ( _bagcef . _agecb ) ; } ; } ; _cedfe . add ( _bagcef . _agecb ) ; } else { _cedfe . del ( _bagcef . _agecb ) ; } ; } ; _dbea := map [ * textPara ] [ ] int { } ; for _cgcef , _fgebd := range _ffec { _cfcd := _cgge [ _cgcef ] ;
if len ( _fgebd ) == 0 { _dbea [ _cfcd ] = nil ; continue ; } ; _adec := make ( [ ] int , len ( _fgebd ) ) ; _gbgba := 0 ; for _gdba := range _fgebd { _adec [ _gbgba ] = _gdba ; _gbgba ++ ; } ; _dbea [ _cfcd ] = _adec ; } ; return _dbea ; } ; func _dbee ( _cgce , _fbfa , _bfgec , _eacac * textPara ) * textTable { _eebe := & textTable { _bgcfb : 2 , _gccb : 2 , _deedc : make ( map [ uint64 ] * textPara , 4 ) } ;
_eebe . put ( 0 , 0 , _cgce ) ; _eebe . put ( 1 , 0 , _fbfa ) ; _eebe . put ( 0 , 1 , _bfgec ) ; _eebe . put ( 1 , 1 , _eacac ) ; return _eebe ; } ; func ( _gdcff intSet ) has ( _dcfe int ) bool { _ , _fcaeb := _gdcff [ _dcfe ] ; return _fcaeb } ; func _efbc ( _debae [ ] TextMark , _deff * int , _ggad string ) [ ] TextMark { _ebac := _bcfa ;
_ebac . Text = _ggad ; return _ecfc ( _debae , _deff , _ebac ) ; } ; func ( _egc * PageFonts ) extractPageResourcesToFont ( _ad * _bd . PdfPageResources ) error { _dga , _dce := _db . GetDict ( _ad . Font ) ; if ! _dce { return _f . New ( _ab ) ; } ; for _ , _gae := range _dga . Keys ( ) { var ( _ce = true ;
_bed [ ] byte ; _gab string ; ) ; _dag , _fae := _ad . GetFontByName ( _gae ) ; if ! _fae { return _f . New ( _bge ) ; } ; _ddc , _gc := _bd . NewPdfFontFromPdfObject ( _dag ) ; if _gc != nil { return _gc ; } ; _ff := _ddc . FontDescriptor ( ) ; _edb := _ddc . FontDescriptor ( ) . FontName . String ( ) ;
_fgc := _ddc . Subtype ( ) ; if _edd ( _egc . Fonts , _edb ) { continue ; } ; if len ( _ddc . ToUnicode ( ) ) == 0 { _ce = false ; } ; if _ff . FontFile != nil { if _ccd , _gcd := _db . GetStream ( _ff . FontFile ) ; _gcd { _bed , _gc = _db . DecodeStream ( _ccd ) ; if _gc != nil { return _gc ; } ;
_gab = _edb + "\u002e\u0070\u0066\u0062" ; } ; } else if _ff . FontFile2 != nil { if _fe , _bb := _db . GetStream ( _ff . FontFile2 ) ; _bb { _bed , _gc = _db . DecodeStream ( _fe ) ; if _gc != nil { return _gc ; } ; _gab = _edb + "\u002e\u0074\u0074\u0066" ; } ; } else if _ff . FontFile3 != nil { if _fed , _ced := _db . GetStream ( _ff . FontFile3 ) ;
_ced { _bed , _gc = _db . DecodeStream ( _fed ) ; if _gc != nil { return _gc ; } ; _gab = _edb + "\u002e\u0063\u0066\u0066" ; } ; } ; if len ( _gab ) < 1 { _eg . Log . Debug ( _ag ) ; } ; _dgb := Font { FontName : _edb , PdfFont : _ddc , IsCID : _ddc . IsCID ( ) , IsSimple : _ddc . IsSimple ( ) , ToUnicode : _ce , FontType : _fgc , FontData : _bed , FontFileName : _gab , FontDescriptor : _ff } ;
_egc . Fonts = append ( _egc . Fonts , _dgb ) ; } ; return nil ; } ; type cachedImage struct { _fdg * _bd . Image ; _daf _bd . PdfColorspace ; } ; func ( _dfg * stateStack ) top ( ) * textState { if _dfg . empty ( ) { return nil ; } ; return ( * _dfg ) [ _dfg . size ( ) - 1 ] ; } ; func ( _cbbg * ruling ) intersects ( _fagbd * ruling ) bool { _fcdc := ( _cbbg . _gggf == _gaba && _fagbd . _gggf == _ddga ) || ( _fagbd . _gggf == _gaba && _cbbg . _gggf == _ddga ) ;
_eaegf := func ( _bfbbe , _efdc * ruling ) bool { return _bfbbe . _gaad - _cebae <= _efdc . _ccb && _efdc . _ccb <= _bfbbe . _gdaf + _cebae ; } ; _cccc := _eaegf ( _cbbg , _fagbd ) ; _gage := _eaegf ( _fagbd , _cbbg ) ; if _daafa { _be . Printf ( "\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a" + "\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a" + " \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a" , _fcdc , _cccc , _gage , _fcdc && _cccc && _gage , _cbbg , _fagbd ) ;
} ; return _fcdc && _cccc && _gage ; } ; func ( _dagfe * textWord ) addDiacritic ( _dfef string ) { _ecgbe := _dagfe . _bbacg [ len ( _dagfe . _bbacg ) - 1 ] ; _ecgbe . _aec += _dfef ; _ecgbe . _aec = _fd . NFKC . String ( _ecgbe . _aec ) ; } ; func ( _eeb * textObject ) setTextRise ( _gfaed float64 ) { if _eeb == nil { return ;
} ; _eeb . _dff . _cabf = _gfaed ; } ; func ( _cbeaa rulingList ) isActualGrid ( ) ( rulingList , bool ) { _fgbb , _cbgg := _cbeaa . augmentGrid ( ) ; if ! ( len ( _fgbb ) >= _gafg + 1 && len ( _cbgg ) >= _ebgg + 1 ) { if _daafa { _eg . Log . Info ( "\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064" , len ( _fgbb ) , len ( _cbgg ) , _gafg + 1 , _ebgg + 1 ) ;
} ; return nil , false ; } ; if _daafa { _eg . Log . Info ( "\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074" , _cbeaa , len ( _fgbb ) >= 2 , len ( _cbgg ) >= 2 , len ( _fgbb ) >= 2 && len ( _cbgg ) >= 2 ) ;
for _dbfg , _fggab := range _cbeaa { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a" , _dbfg , _fggab ) ; } ; } ; if _beegc { _feee , _cddbb := _fgbb [ 0 ] , _fgbb [ len ( _fgbb ) - 1 ] ; _fedfg , _ecfca := _cbgg [ 0 ] , _cbgg [ len ( _cbgg ) - 1 ] ; if ! ( _cdgde ( _feee . _ccb - _fedfg . _gaad ) && _cdgde ( _cddbb . _ccb - _fedfg . _gdaf ) && _cdgde ( _fedfg . _ccb - _feee . _gdaf ) && _cdgde ( _ecfca . _ccb - _feee . _gaad ) ) { if _daafa { _eg . Log . Info ( "\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073" , _feee , _cddbb , _fedfg , _ecfca ) ;
} ; return nil , false ; } ; } else { if ! _fgbb . aligned ( ) { if _cddf { _eg . Log . Info ( "i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064" , len ( _fgbb ) ) ;
} ; return nil , false ; } ; if ! _cbgg . aligned ( ) { if _daafa { _eg . Log . Info ( "i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064" , len ( _cbgg ) ) ;
} ; return nil , false ; } ; } ; _egb := append ( _fgbb , _cbgg ... ) ; return _egb , true ; } ; func _bccf ( _ccfcd [ ] * textMark , _ebggf _bd . PdfRectangle ) * textWord { _feec := _ccfcd [ 0 ] . PdfRectangle ; _caegb := _ccfcd [ 0 ] . _gaaaf ; for _ , _gbce := range _ccfcd [ 1 : ] { _feec = _gcff ( _feec , _gbce . PdfRectangle ) ;
if _gbce . _gaaaf > _caegb { _caegb = _gbce . _gaaaf ; } ; } ; return & textWord { PdfRectangle : _feec , _bbacg : _ccfcd , _acag : _ebggf . Ury - _feec . Lly , _efag : _caegb } ; } ; var _ccc = map [ rulingKind ] string { _eedb : "\u006e\u006f\u006e\u0065" , _ddga : "\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c" , _gaba : "\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c" } ;
func _ccdde ( _cgggc [ ] _db . PdfObject ) ( _gbeec , _cagc float64 , _bggf error ) { if len ( _cgggc ) != 2 { return 0 , 0 , _be . Errorf ( "\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064" , len ( _cgggc ) ) ;
} ; _edff , _bggf := _db . GetNumbersAsFloat ( _cgggc ) ; if _bggf != nil { return 0 , 0 , _bggf ; } ; return _edff [ 0 ] , _edff [ 1 ] , nil ; } ; type compositeCell struct { _bd . PdfRectangle ; paraList ; } ; func ( _ega * shapesState ) drawRectangle ( _beed , _dba , _edeg , _gfad float64 ) { if _dded { _gbbd := _ega . devicePoint ( _beed , _dba ) ;
_cfde := _ega . devicePoint ( _beed + _edeg , _dba + _gfad ) ; _cfe := _bd . PdfRectangle { Llx : _gbbd . X , Lly : _gbbd . Y , Urx : _cfde . X , Ury : _cfde . Y } ; _eg . Log . Info ( "d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066" , _cfe ) ;
} ; _ega . newSubPath ( ) ; _ega . moveTo ( _beed , _dba ) ; _ega . lineTo ( _beed + _edeg , _dba ) ; _ega . lineTo ( _beed + _edeg , _dba + _gfad ) ; _ega . lineTo ( _beed , _dba + _gfad ) ; _ega . closePath ( ) ; } ; func ( _ecbe * wordBag ) depthBand ( _ffcg , _ggfb float64 ) [ ] int { if len ( _ecbe . _fadg ) == 0 { return nil ;
} ; return _ecbe . depthRange ( _ecbe . getDepthIdx ( _ffcg ) , _ecbe . getDepthIdx ( _ggfb ) ) ; } ; func ( _gdcdd * textLine ) toTextMarks ( _dfbca * int ) [ ] TextMark { var _bbfa [ ] TextMark ; for _ , _eaee := range _gdcdd . _ebge { if _eaee . _fgbg { _bbfa = _efbc ( _bbfa , _dfbca , "\u0020" ) ;
} ; _dgad := _eaee . toTextMarks ( _dfbca ) ; _bbfa = append ( _bbfa , _dgad ... ) ; } ; return _bbfa ; } ;
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// ExtractFonts returns all font information from the page extractor, including
// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more.
//
// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document.
// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries.
//
// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range.
func ( _fa * Extractor ) ExtractFonts ( previousPageFonts * PageFonts ) ( * PageFonts , error ) { _gdd := PageFonts { } ; _gaf := _gdd . extractPageResourcesToFont ( _fa . _dbb ) ; if _gaf != nil { return nil , _gaf ; } ; if previousPageFonts != nil { for _ , _ea := range previousPageFonts . Fonts { if ! _edd ( _gdd . Fonts , _ea . FontName ) { _gdd . Fonts = append ( _gdd . Fonts , _ea ) ;
} ; } ; } ; return & PageFonts { Fonts : _gdd . Fonts } , nil ; } ; func _cecc ( _bcc _g . Matrix ) _g . Point { _abgg , _gfcf := _bcc . Translation ( ) ; return _g . Point { X : _abgg , Y : _gfcf } ; } ; type subpath struct { _gcbb [ ] _g . Point ; _bad bool ; } ;
2022-06-06 22:48:24 +00:00
// Font represents the font properties on a PDF page.
2022-06-27 19:58:38 +00:00
type Font struct { PdfFont * _bd . PdfFont ;
2022-06-06 22:48:24 +00:00
// FontName represents Font Name from font properties.
FontName string ;
// FontType represents Font Subtype entry in the font dictionary inside page resources.
// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont.
FontType string ;
// ToUnicode is true if font provides a `ToUnicode` mapping.
ToUnicode bool ;
// IsCID is true if underlying font is a composite font.
// Composite font is represented by a font dictionary whose Subtype is `Type0`
IsCID bool ;
// IsSimple is true if font is simple font.
// A simple font is limited to only 8 bit (255) character codes.
IsSimple bool ;
// FontData represents the raw data of the embedded font file.
// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF).
// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor.
// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value.
FontData [ ] byte ;
// FontFileName is a name representing the font. it has format:
// (Font Name) + (Font Type Extension), example: helvetica.ttf.
FontFileName string ;
// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor).
2022-06-27 19:58:38 +00:00
FontDescriptor * _bd . PdfFontDescriptor ; } ; func ( _eefcf rulingList ) merge ( ) * ruling { _gfgde := _eefcf [ 0 ] . _ccb ; _dgce := _eefcf [ 0 ] . _gaad ; _cgcf := _eefcf [ 0 ] . _gdaf ; for _ , _fceg := range _eefcf [ 1 : ] { _gfgde += _fceg . _ccb ; if _fceg . _gaad < _dgce { _dgce = _fceg . _gaad ;
} ; if _fceg . _gdaf > _cgcf { _cgcf = _fceg . _gdaf ; } ; } ; _abcg := & ruling { _gggf : _eefcf [ 0 ] . _gggf , _beaec : _eefcf [ 0 ] . _beaec , Color : _eefcf [ 0 ] . Color , _ccb : _gfgde / float64 ( len ( _eefcf ) ) , _gaad : _dgce , _gdaf : _cgcf } ; if _cddf { _eg . Log . Info ( "\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073" , len ( _eefcf ) , _abcg ) ;
for _efcfe , _cffc := range _eefcf { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _efcfe , _cffc ) ; } ; } ; return _abcg ; } ; func ( _fggff * ruling ) alignsPrimary ( _fbda * ruling ) bool { return _fggff . _gggf == _fbda . _gggf && _bf . Abs ( _fggff . _ccb - _fbda . _ccb ) < _ffd * 0.5 ;
} ; func ( _cfcef intSet ) del ( _cgabg int ) { delete ( _cfcef , _cgabg ) } ; func ( _gbe * textObject ) setFont ( _ade string , _eea float64 ) error { if _gbe == nil { return nil ; } ; _gbe . _dff . _bcdg = _eea ; _aad , _ecdd := _gbe . getFont ( _ade ) ; if _ecdd != nil { return _ecdd ;
} ; _gbe . _dff . _degee = _aad ; return nil ; } ; func ( _ddfg rulingList ) primaries ( ) [ ] float64 { _cdeef := make ( map [ float64 ] struct { } , len ( _ddfg ) ) ; for _ , _fcbgc := range _ddfg { _cdeef [ _fcbgc . _ccb ] = struct { } { } ; } ; _eeaab := make ( [ ] float64 , len ( _cdeef ) ) ; _dcfad := 0 ;
for _egab := range _cdeef { _eeaab [ _dcfad ] = _egab ; _dcfad ++ ; } ; _cc . Float64s ( _eeaab ) ; return _eeaab ; } ;
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// PageText represents the layout of text on a device page.
type PageText struct { _fgd [ ] * textMark ; _ebce string ; _dged [ ] TextMark ; _bfbb [ ] TextTable ; _fbd _bd . PdfRectangle ; _egg [ ] pathSection ; _dbe [ ] pathSection ; } ; func ( _aef * subpath ) close ( ) { if ! _fafbc ( _aef . _gcbb [ 0 ] , _aef . last ( ) ) { _aef . add ( _aef . _gcbb [ 0 ] ) ;
} ; _aef . _bad = true ; _aef . removeDuplicates ( ) ; } ; func _cfgee ( _ddfe * wordBag , _ageg float64 , _edcag , _deca rulingList ) [ ] * wordBag { var _gcbbf [ ] * wordBag ; for _ , _gggcb := range _ddfe . depthIndexes ( ) { _eecc := false ; for ! _ddfe . empty ( _gggcb ) { _gcffg := _ddfe . firstReadingIndex ( _gggcb ) ;
_bgba := _ddfe . firstWord ( _gcffg ) ; _abac := _gcab ( _bgba , _ageg , _edcag , _deca ) ; _ddfe . removeWord ( _bgba , _gcffg ) ; if _cba { _eg . Log . Info ( "\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073" , _bgba . String ( ) ) ;
} ; for _cbag := true ; _cbag ; _cbag = _eecc { _eecc = false ; _efee := _gbfd * _abac . _dbga ; _febe := _gga * _abac . _dbga ; _fdbaf := _beedf * _abac . _dbga ; if _cba { _eg . Log . Info ( "\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066" , _abac . minDepth ( ) , _abac . maxDepth ( ) , _fdbaf , _febe ) ;
} ; if _ddfe . scanBand ( "\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c" , _abac , _eaab ( _ccg , 0 ) , _abac . minDepth ( ) - _fdbaf , _abac . maxDepth ( ) + _fdbaf , _bbga , false , false ) > 0 { _eecc = true ; } ; if _ddfe . scanBand ( "\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c" , _abac , _eaab ( _ccg , _febe ) , _abac . minDepth ( ) , _abac . maxDepth ( ) , _bbab , false , false ) > 0 { _eecc = true ;
} ; if _eecc { continue ; } ; _ebag := _ddfe . scanBand ( "" , _abac , _eaab ( _bgce , _efee ) , _abac . minDepth ( ) , _abac . maxDepth ( ) , _gffb , true , false ) ; if _ebag > 0 { _cegf := ( _abac . maxDepth ( ) - _abac . minDepth ( ) ) / _abac . _dbga ; if ( _ebag > 1 && float64 ( _ebag ) > 0.3 * _cegf ) || _ebag <= 10 { if _ddfe . scanBand ( "\u006f\u0074\u0068e\u0072" , _abac , _eaab ( _bgce , _efee ) , _abac . minDepth ( ) , _abac . maxDepth ( ) , _gffb , false , true ) > 0 { _eecc = true ;
} ; } ; } ; } ; _gcbbf = append ( _gcbbf , _abac ) ; } ; } ; return _gcbbf ; } ; type textWord struct { _bd . PdfRectangle ; _acag float64 ; _debad string ; _bbacg [ ] * textMark ; _efag float64 ; _fgbg bool ; } ; type stateStack [ ] * textState ;
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// Marks returns the TextMark collection for a page. It represents all the text on the page.
func ( _bdbf PageText ) Marks ( ) * TextMarkArray { return & TextMarkArray { _ggbg : _bdbf . _dged } } ; func ( _dbed paraList ) inTile ( _cdeee gridTile ) paraList { var _eaff paraList ; for _ , _cdgc := range _dbed { if _cdeee . contains ( _cdgc . PdfRectangle ) { _eaff = append ( _eaff , _cdgc ) ;
} ; } ; if _eeca { _be . Printf ( "\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n" , _cdeee , len ( _eaff ) ) ; for _fcag , _egaeb := range _eaff { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _fcag , _egaeb ) ;
} ; _be . Println ( "" ) ; } ; return _eaff ; } ; func _bddbg ( _edfd , _fgab int ) uint64 { return uint64 ( _edfd ) * 0x1000000 + uint64 ( _fgab ) } ; type fontEntry struct { _fadc * _bd . PdfFont ; _fegb int64 ; } ; func ( _edbef * textTable ) reduce ( ) * textTable { _ggcb := make ( [ ] int , 0 , _edbef . _gccb ) ;
_fcce := make ( [ ] int , 0 , _edbef . _bgcfb ) ; for _cdeg := 0 ; _cdeg < _edbef . _gccb ; _cdeg ++ { if ! _edbef . emptyCompositeRow ( _cdeg ) { _ggcb = append ( _ggcb , _cdeg ) ; } ; } ; for _eagd := 0 ; _eagd < _edbef . _bgcfb ; _eagd ++ { if ! _edbef . emptyCompositeColumn ( _eagd ) { _fcce = append ( _fcce , _eagd ) ;
} ; } ; if len ( _ggcb ) == _edbef . _gccb && len ( _fcce ) == _edbef . _bgcfb { return _edbef ; } ; _ccafe := textTable { _cdgb : _edbef . _cdgb , _bgcfb : len ( _fcce ) , _gccb : len ( _ggcb ) , _deedc : make ( map [ uint64 ] * textPara , len ( _fcce ) * len ( _ggcb ) ) } ; if _eeca { _eg . Log . Info ( "\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064" , _edbef . _bgcfb , _edbef . _gccb , len ( _fcce ) , len ( _ggcb ) ) ;
_eg . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076" , _fcce ) ; _eg . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076" , _ggcb ) ; } ; for _cede , _eaeccd := range _ggcb { for _fgae , _bgagd := range _fcce { _bfbe , _egaec := _edbef . getComposite ( _bgagd , _eaeccd ) ;
if _bfbe == nil { continue ; } ; if _eeca { _be . Printf ( "\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n" , _fgae , _cede , _bgagd , _eaeccd , _dbec ( _bfbe . merge ( ) . text ( ) , 50 ) ) ; } ; _ccafe . putComposite ( _fgae , _cede , _bfbe , _egaec ) ;
} ; } ; return & _ccafe ; } ; func ( _eeeg * shapesState ) clearPath ( ) { _eeeg . _gdbg = nil ; _eeeg . _beg = false ; if _dded { _eg . Log . Info ( "\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073" , _eeeg ) ; } ; } ; func ( _gaeec * textTable ) getRight ( ) paraList { _bdab := make ( paraList , _gaeec . _gccb ) ;
for _aecf := 0 ; _aecf < _gaeec . _gccb ; _aecf ++ { _edfag := _gaeec . get ( _gaeec . _bgcfb - 1 , _aecf ) . _bbbdd ; if _edfag . taken ( ) { return nil ; } ; _bdab [ _aecf ] = _edfag ; } ; for _gfbee := 0 ; _gfbee < _gaeec . _gccb - 1 ; _gfbee ++ { if _bdab [ _gfbee ] . _egad != _bdab [ _gfbee + 1 ] { return nil ;
} ; } ; return _bdab ; } ; func _begc ( _feed , _affa float64 ) string { _ceadg := ! _bcaga ( _feed - _affa ) ; if _ceadg { return "\u000a" ; } ; return "\u0020" ; } ; func _bdcd ( _dbda string , _eafa [ ] rulingList ) { _eg . Log . Info ( "\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073" , len ( _eafa ) , _dbda ) ;
for _fdaa , _bdaf := range _eafa { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _fdaa , _bdaf . String ( ) ) ; } ; } ; func ( _aae * textObject ) nextLine ( ) { _aae . moveLP ( 0 , - _aae . _dff . _dde ) } ; func _abfgd ( _cddc [ ] float64 , _ageb , _gagfe float64 ) [ ] float64 { _gcffd , _dgedb := _ageb , _gagfe ;
if _dgedb < _gcffd { _gcffd , _dgedb = _dgedb , _gcffd ; } ; _ccbc := make ( [ ] float64 , 0 , len ( _cddc ) + 2 ) ; _ccbc = append ( _ccbc , _ageb ) ; for _ , _gfecd := range _cddc { if _gfecd <= _gcffd { continue ; } else if _gfecd >= _dgedb { break ; } ; _ccbc = append ( _ccbc , _gfecd ) ;
} ; _ccbc = append ( _ccbc , _gagfe ) ; return _ccbc ; } ; func ( _beef * subpath ) makeRectRuling ( _bbdeg _fb . Color ) ( * ruling , bool ) { if _dddf { _eg . Log . Info ( "\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076" , _beef ) ;
} ; _ggda := _beef . _gcbb [ : 4 ] ; _ecga := make ( map [ int ] rulingKind , len ( _ggda ) ) ; for _eefbf , _gege := range _ggda { _eefecd := _beef . _gcbb [ ( _eefbf + 1 ) % 4 ] ; _ecga [ _eefbf ] = _ebgee ( _gege , _eefecd ) ; if _dddf { _be . Printf ( "\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066" , _eefbf , _ecga [ _eefbf ] , _gege , _eefecd ) ;
} ; } ; if _dddf { _be . Printf ( "\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a" , _ecga ) ; } ; var _dagf , _adbc [ ] int ; for _bbfafa , _acgb := range _ecga { switch _acgb { case _ddga : _adbc = append ( _adbc , _bbfafa ) ; case _gaba : _dagf = append ( _dagf , _bbfafa ) ;
} ; } ; if _dddf { _be . Printf ( "\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a" , len ( _adbc ) , _adbc ) ; _be . Printf ( "\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a" , len ( _dagf ) , _dagf ) ;
} ; _fgcgc := ( len ( _adbc ) == 2 && len ( _dagf ) == 2 ) || ( len ( _adbc ) == 2 && len ( _dagf ) == 0 && _dbfdf ( _ggda [ _adbc [ 0 ] ] , _ggda [ _adbc [ 1 ] ] ) ) || ( len ( _dagf ) == 2 && len ( _adbc ) == 0 && _gfde ( _ggda [ _dagf [ 0 ] ] , _ggda [ _dagf [ 1 ] ] ) ) ; if _dddf { _be . Printf ( " \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a" , len ( _adbc ) , len ( _dagf ) , _fgcgc ) ;
} ; if ! _fgcgc { if _dddf { _eg . Log . Error ( "\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v" , _beef ) ; _be . Printf ( " \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a" , len ( _adbc ) , len ( _dagf ) , _fgcgc ) ;
} ; return & ruling { } , false ; } ; if len ( _dagf ) == 0 { for _ebbfg , _daec := range _ecga { if _daec != _ddga { _dagf = append ( _dagf , _ebbfg ) ; } ; } ; } ; if len ( _adbc ) == 0 { for _dadb , _befe := range _ecga { if _befe != _gaba { _adbc = append ( _adbc , _dadb ) ; } ; } ; } ; if _dddf { _eg . Log . Info ( "\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a" + "\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a" + "\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a" + "\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076" , len ( _adbc ) , len ( _dagf ) , len ( _ggda ) , _adbc , _dagf , _ggda ) ;
} ; var _aeae , _abee , _cfcg , _eagc _g . Point ; if _ggda [ _adbc [ 0 ] ] . Y > _ggda [ _adbc [ 1 ] ] . Y { _cfcg , _eagc = _ggda [ _adbc [ 0 ] ] , _ggda [ _adbc [ 1 ] ] ; } else { _cfcg , _eagc = _ggda [ _adbc [ 1 ] ] , _ggda [ _adbc [ 0 ] ] ; } ; if _ggda [ _dagf [ 0 ] ] . X > _ggda [ _dagf [ 1 ] ] . X { _aeae , _abee = _ggda [ _dagf [ 0 ] ] , _ggda [ _dagf [ 1 ] ] ;
} else { _aeae , _abee = _ggda [ _dagf [ 1 ] ] , _ggda [ _dagf [ 0 ] ] ; } ; _affd := _bd . PdfRectangle { Llx : _aeae . X , Urx : _abee . X , Lly : _eagc . Y , Ury : _cfcg . Y } ; if _affd . Llx > _affd . Urx { _affd . Llx , _affd . Urx = _affd . Urx , _affd . Llx ; } ; if _affd . Lly > _affd . Ury { _affd . Lly , _affd . Ury = _affd . Ury , _affd . Lly ;
} ; _cfed := rectRuling { PdfRectangle : _affd , _aegd : _ccbff ( _affd ) , Color : _bbdeg } ; if _cfed . _aegd == _eedb { if _dddf { _eg . Log . Error ( "\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c" ) ;
} ; return nil , false ; } ; _eddgb , _dccgd := _cfed . asRuling ( ) ; if ! _dccgd { if _dddf { _eg . Log . Error ( "\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg" ) ; } ; return nil , false ; } ; if _daafa { _be . Printf ( "\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a" , _eddgb . String ( ) ) ;
} ; return _eddgb , true ; } ; type intSet map [ int ] struct { } ; func _dbaee ( _afe , _eeff _bd . PdfRectangle ) ( _bd . PdfRectangle , bool ) { if ! _bfgg ( _afe , _eeff ) { return _bd . PdfRectangle { } , false ; } ; return _bd . PdfRectangle { Llx : _bf . Max ( _afe . Llx , _eeff . Llx ) , Urx : _bf . Min ( _afe . Urx , _eeff . Urx ) , Lly : _bf . Max ( _afe . Lly , _eeff . Lly ) , Ury : _bf . Min ( _afe . Ury , _eeff . Ury ) } , true ;
} ; func ( _gbd * textObject ) renderText ( _cfdd [ ] byte ) error { if _gbd . _gdcg { _eg . Log . Debug ( "\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e" ) ;
return nil ; } ; _gbg := _gbd . getCurrentFont ( ) ; _gdcd := _gbg . BytesToCharcodes ( _cfdd ) ; _fbbe , _dbfe , _caa := _gbg . CharcodesToStrings ( _gdcd ) ; if _caa > 0 { _eg . Log . Debug ( "\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064" , _dbfe , _caa ) ;
} ; _gbd . _dff . _fdfc += _dbfe ; _gbd . _dff . _dcef += _caa ; _age := _gbd . _dff ; _adce := _age . _bcdg ; _gffd := _age . _aee / 100.0 ; _faaed := _gcg ; if _gbg . Subtype ( ) == "\u0054\u0079\u0070e\u0033" { _faaed = 1 ; } ; _bfd , _agf := _gbg . GetRuneMetrics ( ' ' ) ; if ! _agf { _bfd , _agf = _gbg . GetCharMetrics ( 32 ) ;
} ; if ! _agf { _bfd , _ = _bd . DefaultFont ( ) . GetRuneMetrics ( ' ' ) ; } ; _eaad := _bfd . Wx * _faaed ; _eg . Log . Trace ( "\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066" , _eaad , _fbbe , _gbg , _adce ) ;
_bgf := _g . NewMatrix ( _adce * _gffd , 0 , 0 , _adce , 0 , _age . _cabf ) ; if _agbg { _eg . Log . Info ( "\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071" , len ( _gdcd ) , _gdcd , _fbbe ) ;
} ; _eg . Log . Trace ( "\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071" , len ( _gdcd ) , _gdcd , len ( _fbbe ) ) ; _aege := _gbd . getFillColor ( ) ;
_fcgf := _gbd . getStrokeColor ( ) ; for _ecf , _bbd := range _fbbe { _bfgf := [ ] rune ( _bbd ) ; if len ( _bfgf ) == 1 && _bfgf [ 0 ] == '\x00' { continue ; } ; _cagb := _gdcd [ _ecf ] ; _ebgf := _gbd . _cddag . CTM . Mult ( _gbd . _cbff ) . Mult ( _bgf ) ; _gdeg := 0.0 ; if len ( _bfgf ) == 1 && _bfgf [ 0 ] == 32 { _gdeg = _age . _fec ;
} ; _bdgb , _gea := _gbg . GetCharMetrics ( _cagb ) ; if ! _gea { _eg . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073" , _cagb , _bfgf , _bfgf , _gbg ) ;
return _be . Errorf ( "\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064" , _gbg . String ( ) , _cagb ) ; } ; _eaec := _g . Point { X : _bdgb . Wx * _faaed , Y : _bdgb . Wy * _faaed } ;
_eagg := _g . Point { X : ( _eaec . X * _adce + _gdeg ) * _gffd } ; _aacg := _g . Point { X : ( _eaec . X * _adce + _age . _cab + _gdeg ) * _gffd } ; if _agbg { _eg . Log . Info ( "\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066" , _adce , _age . _cab , _age . _fec , _gffd ) ;
_eg . Log . Info ( "\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f" , _eaec , _eagg , _aacg ) ; } ; _fcd := _fdde ( _eagg ) ; _cge := _fdde ( _aacg ) ; _gfc := _gbd . _cddag . CTM . Mult ( _gbd . _cbff ) . Mult ( _fcd ) ;
if _cgaa { _eg . Log . Info ( "e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a" + "\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a" + "\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073" , _gbd . _cddag . CTM , _gbd . _cbff , _cge , _cecc ( _gbd . _cddag . CTM . Mult ( _gbd . _cbff ) . Mult ( _cge ) ) , _fcd , _gfc , _cecc ( _gfc ) ) ;
} ; _cgee , _ceba := _gbd . newTextMark ( _a . ExpandLigatures ( _bfgf ) , _ebgf , _cecc ( _gfc ) , _bf . Abs ( _eaad * _ebgf . ScalingFactorX ( ) ) , _gbg , _gbd . _dff . _cab , _aege , _fcgf ) ; if ! _ceba { _eg . Log . Debug ( "\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067" ) ;
continue ; } ; if _gbg == nil { _eg . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e" ) ; } else if _gbg . Encoder ( ) == nil { _eg . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073" , _gbg ) ;
} else { if _geae , _gcbf := _gbg . Encoder ( ) . CharcodeToRune ( _cagb ) ; _gcbf { _cgee . _degeed = string ( _geae ) ; } ; } ; _eg . Log . Trace ( "i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073" , _ecf , _cagb , _cgee , _ebgf ) ;
_gbd . _dcaa = append ( _gbd . _dcaa , & _cgee ) ; _gbd . _cbff . Concat ( _cge ) ; } ; return nil ; } ;
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// PageFonts represents extracted fonts on a PDF page.
type PageFonts struct { Fonts [ ] Font ; } ; func ( _daafd * textLine ) appendWord ( _debb * textWord ) { _daafd . _ebge = append ( _daafd . _ebge , _debb ) ; _daafd . PdfRectangle = _gcff ( _daafd . PdfRectangle , _debb . PdfRectangle ) ; if _debb . _efag > _daafd . _aefd { _daafd . _aefd = _debb . _efag ;
} ; if _debb . _acag > _daafd . _gddec { _daafd . _gddec = _debb . _acag ; } ; } ; func ( _ggbf * shapesState ) newSubPath ( ) { _ggbf . clearPath ( ) ; if _dded { _eg . Log . Info ( "\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073" , _ggbf ) ; } ; } ; const ( _ab = "\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065" ;
_bge = "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064" ;
_ag = "\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065" ; ) ;
func ( _aeea rulingList ) sortStrict ( ) { _cc . Slice ( _aeea , func ( _gbed , _bgcaa int ) bool { _acaf , _dgege := _aeea [ _gbed ] , _aeea [ _bgcaa ] ; _cbeab , _bcfag := _acaf . _gggf , _dgege . _gggf ; if _cbeab != _bcfag { return _cbeab > _bcfag ; } ; _gccga , _adcea := _acaf . _ccb , _dgege . _ccb ;
if ! _bcaga ( _gccga - _adcea ) { return _gccga < _adcea ; } ; _gccga , _adcea = _acaf . _gaad , _dgege . _gaad ; if _gccga != _adcea { return _gccga < _adcea ; } ; return _acaf . _gdaf < _dgege . _gdaf ; } ) ; } ;
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// String returns a string describing `pt`.
func ( _bcf PageText ) String ( ) string { _bag := _be . Sprintf ( "P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073" , len ( _bcf . _fgd ) ) ; _fcc := [ ] string { "\u002d" + _bag } ; for _ , _fefdc := range _bcf . _fgd { _fcc = append ( _fcc , _fefdc . String ( ) ) ;
} ; _fcc = append ( _fcc , "\u002b" + _bag ) ; return _dc . Join ( _fcc , "\u000a" ) ; } ; func ( _gabf paraList ) findTableGrid ( _bccb gridTiling ) ( * textTable , map [ * textPara ] struct { } ) { _dbff := len ( _bccb . _defd ) ; _bgbed := len ( _bccb . _eabe ) ; _aagc := textTable { _cdgb : true , _bgcfb : _dbff , _gccb : _bgbed , _deedc : make ( map [ uint64 ] * textPara , _dbff * _bgbed ) , _agga : make ( map [ uint64 ] compositeCell , _dbff * _bgbed ) } ;
_ccce := make ( map [ * textPara ] struct { } ) ; _ggadg := int ( ( 1.0 - _fcbe ) * float64 ( _dbff * _bgbed ) ) ; _bgee := 0 ; if _gdcf { _eg . Log . Info ( "\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064" , _dbff , _bgbed ) ;
} ; for _bccda , _fefgg := range _bccb . _eabe { _fabf , _efefa := _bccb . _bdgaf [ _fefgg ] ; if ! _efefa { continue ; } ; for _fcbf , _beageb := range _bccb . _defd { _ebbdf , _defa := _fabf [ _beageb ] ; if ! _defa { continue ; } ; _edbb := _gabf . inTile ( _ebbdf ) ; if len ( _edbb ) == 0 { _bgee ++ ;
if _bgee > _ggadg { if _gdcf { _eg . Log . Info ( "\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064" , _bgee ) ; } ; return nil , nil ; } ; } else { _aagc . putComposite ( _fcbf , _bccda , _edbb , _ebbdf . PdfRectangle ) ; for _ , _bbdc := range _edbb { _ccce [ _bbdc ] = struct { } { } ;
} ; } ; } ; } ; _cegba := 0 ; for _agadc := 0 ; _agadc < _dbff ; _agadc ++ { _gded := _aagc . get ( _agadc , 0 ) ; if _gded == nil || ! _gded . _gfce { _cegba ++ ; } ; } ; if _cegba == 0 { if _gdcf { _eg . Log . Info ( "\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030" ) ; } ; return nil , nil ;
} ; _fbcg := _aagc . reduceTiling ( _bccb , _ceacc ) ; _fbcg = _fbcg . subdivide ( ) ; return _fbcg , _ccce ; } ; func ( _eafd gridTile ) complete ( ) bool { return _eafd . numBorders ( ) == 4 } ; func _bace ( _dfge [ ] rulingList ) ( rulingList , rulingList ) { var _bbbdf rulingList ; for _ , _gaaafb := range _dfge { _bbbdf = append ( _bbbdf , _gaaafb ... ) ;
} ; return _bbbdf . vertsHorzs ( ) ; } ; var _bcfa = TextMark { Text : "\u005b\u0058\u005d" , Original : "\u0020" , Meta : true , FillColor : _fb . White , StrokeColor : _fb . White } ; func _gaeb ( _eedg * wordBag , _acgc int ) * textLine { _bdd := _eedg . firstWord ( _acgc ) ; _bbde := textLine { PdfRectangle : _bdd . PdfRectangle , _aefd : _bdd . _efag , _gddec : _bdd . _acag } ;
_bbde . pullWord ( _eedg , _bdd , _acgc ) ; return & _bbde ; } ;
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// Elements returns the TextMarks in `ma`.
func ( _dgge * TextMarkArray ) Elements ( ) [ ] TextMark { return _dgge . _ggbg } ; func ( _affdd * textTable ) putComposite ( _efeg , _ggde int , _ccead paraList , _geeb _bd . PdfRectangle ) { if len ( _ccead ) == 0 { _eg . Log . Error ( "\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073" ) ;
return ; } ; _fecdd := compositeCell { PdfRectangle : _geeb , paraList : _ccead } ; if _eeca { _be . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a" , _efeg , _ggde , _fecdd . String ( ) ) ;
} ; _fecdd . updateBBox ( ) ; _affdd . _agga [ _bddbg ( _efeg , _ggde ) ] = _fecdd ; } ; func ( _fgcge compositeCell ) parasBBox ( ) ( paraList , _bd . PdfRectangle ) { return _fgcge . paraList , _fgcge . PdfRectangle ; } ; func ( _gccec * wordBag ) minDepth ( ) float64 { return _gccec . _adbg - ( _gccec . Ury - _gccec . _dbga ) } ;
func ( _dbgag paraList ) merge ( ) * textPara { _eg . Log . Trace ( "\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _dbgag ) ) ;
if len ( _dbgag ) == 0 { return nil ; } ; _dbgag . sortReadingOrder ( ) ; _decb := _dbgag [ 0 ] . PdfRectangle ; _eefb := _dbgag [ 0 ] . _gadg ; for _ , _dffd := range _dbgag [ 1 : ] { _decb = _gcff ( _decb , _dffd . PdfRectangle ) ; _eefb = append ( _eefb , _dffd . _gadg ... ) ; } ; return _gade ( _decb , _eefb ) ;
} ; func ( _eac * textObject ) showTextAdjusted ( _ead * _db . PdfObjectArray ) error { _ggg := false ; for _ , _addf := range _ead . Elements ( ) { switch _addf . ( type ) { case * _db . PdfObjectFloat , * _db . PdfObjectInteger : _cbd , _fgee := _db . GetNumberAsFloat ( _addf ) ; if _fgee != nil { _eg . Log . Debug ( "\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _addf , _ead ) ;
return _fgee ; } ; _gdbd , _cdda := - _cbd * 0.001 * _eac . _dff . _bcdg , 0.0 ; if _ggg { _cdda , _gdbd = _gdbd , _cdda ; } ; _afa := _fdde ( _g . Point { X : _gdbd , Y : _cdda } ) ; _eac . _cbff . Concat ( _afa ) ; case * _db . PdfObjectString : _daed , _bec := _db . GetStringBytes ( _addf ) ;
if ! _bec { _eg . Log . Trace ( "s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _addf , _ead ) ;
return _db . ErrTypeError ; } ; _eac . renderText ( _daed ) ; default : _eg . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _addf , _ead ) ;
return _db . ErrTypeError ; } ; } ; return nil ; } ; type bounded interface { bbox ( ) _bd . PdfRectangle } ; func ( _fea * textObject ) getFontDirect ( _abfg string ) ( * _bd . PdfFont , error ) { _dacg , _edcd := _fea . getFontDict ( _abfg ) ; if _edcd != nil { return nil , _edcd ; } ; _gfee , _edcd := _bd . NewPdfFontFromPdfObject ( _dacg ) ;
if _edcd != nil { _eg . Log . Debug ( "\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _abfg , _edcd ) ;
} ; return _gfee , _edcd ; } ; func ( _cdca * ruling ) equals ( _cdef * ruling ) bool { return _cdca . _gggf == _cdef . _gggf && _cecgg ( _cdca . _ccb , _cdef . _ccb ) && _cecgg ( _cdca . _gaad , _cdef . _gaad ) && _cecgg ( _cdca . _gdaf , _cdef . _gdaf ) ; } ;
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// String returns a string describing the current state of the textState stack.
func ( _ded * stateStack ) String ( ) string { _bcb := [ ] string { _be . Sprintf ( "\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064" , len ( * _ded ) ) } ; for _gfbc , _egdc := range * _ded { _fdbg := "\u003c\u006e\u0069l\u003e" ;
if _egdc != nil { _fdbg = _egdc . String ( ) ; } ; _bcb = append ( _bcb , _be . Sprintf ( "\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073" , _gfbc , _fdbg ) ) ; } ; return _dc . Join ( _bcb , "\u000a" ) ; } ; func ( _bbgb * textObject ) setTextRenderMode ( _gdf int ) { if _bbgb == nil { return ;
} ; _bbgb . _dff . _ebb = RenderMode ( _gdf ) ; } ; func ( _dfbb lineRuling ) yMean ( ) float64 { return 0.5 * ( _dfbb . _eadee . Y + _dfbb . _feab . Y ) } ;
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// String returns a description of `k`.
func ( _bfcgf rulingKind ) String ( ) string { _cfbd , _ecbc := _ccc [ _bfcgf ] ; if ! _ecbc { return _be . Sprintf ( "\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064" , _bfcgf ) ; } ; return _cfbd ; } ; func ( _adcec paraList ) readBefore ( _gdff [ ] int , _ggab , _degc int ) bool { _bcag , _gcfc := _adcec [ _ggab ] , _adcec [ _degc ] ;
if _aceb ( _bcag , _gcfc ) && _bcag . Lly > _gcfc . Lly { return true ; } ; if ! ( _bcag . _bgca . Urx < _gcfc . _bgca . Llx ) { return false ; } ; _abbcf , _edfb := _bcag . Lly , _gcfc . Lly ; if _abbcf > _edfb { _edfb , _abbcf = _abbcf , _edfb ; } ; _adcc := _bf . Max ( _bcag . _bgca . Llx , _gcfc . _bgca . Llx ) ;
_bccg := _bf . Min ( _bcag . _bgca . Urx , _gcfc . _bgca . Urx ) ; _fbdf := _adcec . llyRange ( _gdff , _abbcf , _edfb ) ; for _ , _ceec := range _fbdf { if _ceec == _ggab || _ceec == _degc { continue ; } ; _fgca := _adcec [ _ceec ] ; if _fgca . _bgca . Llx <= _bccg && _adcc <= _fgca . _bgca . Urx { return false ;
} ; } ; return true ; } ; type rectRuling struct { _aegd rulingKind ; _bfege markKind ; _fb . Color ; _bd . PdfRectangle ; } ; func _edagd ( _fdage , _cae bounded ) float64 { return _fdage . bbox ( ) . Llx - _cae . bbox ( ) . Urx } ; func ( _aaba * textPara ) toCellTextMarks ( _bffe * int ) [ ] TextMark { var _adcca [ ] TextMark ;
for _bcfaf , _gacfa := range _aaba . _gadg { _dbfbb := _gacfa . toTextMarks ( _bffe ) ; _bgde := _addg && _gacfa . endsInHyphen ( ) && _bcfaf != len ( _aaba . _gadg ) - 1 ; if _bgde { _dbfbb = _gfbae ( _dbfbb , _bffe ) ; } ; _adcca = append ( _adcca , _dbfbb ... ) ; if ! ( _bgde || _bcfaf == len ( _aaba . _gadg ) - 1 ) { _adcca = _efbc ( _adcca , _bffe , _begc ( _gacfa . _gddec , _aaba . _gadg [ _bcfaf + 1 ] . _gddec ) ) ;
} ; } ; return _adcca ; } ; func ( _acfd paraList ) llyOrdering ( ) [ ] int { _ccec := make ( [ ] int , len ( _acfd ) ) ; for _bccdg := range _acfd { _ccec [ _bccdg ] = _bccdg ; } ; _cc . SliceStable ( _ccec , func ( _fbe , _dfdc int ) bool { _cgbge , _fbfe := _ccec [ _fbe ] , _ccec [ _dfdc ] ; return _acfd [ _cgbge ] . Lly < _acfd [ _fbfe ] . Lly ;
} ) ; return _ccec ; } ; func _bgce ( _bcac * wordBag , _dec * textWord , _aadc float64 ) bool { return _bcac . Urx <= _dec . Llx && _dec . Llx < _bcac . Urx + _aadc ; } ; func ( _bcbd paraList ) llyRange ( _fbddb [ ] int , _baaed , _feda float64 ) [ ] int { _cfaf := len ( _bcbd ) ; if _feda < _bcbd [ _fbddb [ 0 ] ] . Lly || _baaed > _bcbd [ _fbddb [ _cfaf - 1 ] ] . Lly { return nil ;
} ; _cgfa := _cc . Search ( _cfaf , func ( _eadc int ) bool { return _bcbd [ _fbddb [ _eadc ] ] . Lly >= _baaed } ) ; _ffee := _cc . Search ( _cfaf , func ( _cbdf int ) bool { return _bcbd [ _fbddb [ _cbdf ] ] . Lly > _feda } ) ; return _fbddb [ _cgfa : _ffee ] ; } ; func ( _edcg rulingList ) snapToGroups ( ) rulingList { _cacg , _ggbcf := _edcg . vertsHorzs ( ) ;
if len ( _cacg ) > 0 { _cacg = _cacg . snapToGroupsDirection ( ) ; } ; if len ( _ggbcf ) > 0 { _ggbcf = _ggbcf . snapToGroupsDirection ( ) ; } ; _dedg := append ( _cacg , _ggbcf ... ) ; _dedg . log ( "\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073" ) ; return _dedg ;
} ; func ( _effb rulingList ) augmentGrid ( ) ( rulingList , rulingList ) { _baaeg , _gbdd := _effb . vertsHorzs ( ) ; if len ( _baaeg ) == 0 || len ( _gbdd ) == 0 { return _baaeg , _gbdd ; } ; _eaagc , _acgea := _baaeg , _gbdd ; _ebbe := _baaeg . bbox ( ) ; _fgafc := _gbdd . bbox ( ) ; if _daafa { _eg . Log . Info ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066" , _ebbe ) ;
_eg . Log . Info ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066" , _fgafc ) ; } ; var _adbd , _agagc , _geec , _eab * ruling ; if _fgafc . Llx < _ebbe . Llx - _cebae { _adbd = & ruling { _beaec : _bcbda , _gggf : _gaba , _ccb : _fgafc . Llx , _gaad : _ebbe . Lly , _gdaf : _ebbe . Ury } ;
_baaeg = append ( rulingList { _adbd } , _baaeg ... ) ; } ; if _fgafc . Urx > _ebbe . Urx + _cebae { _agagc = & ruling { _beaec : _bcbda , _gggf : _gaba , _ccb : _fgafc . Urx , _gaad : _ebbe . Lly , _gdaf : _ebbe . Ury } ; _baaeg = append ( _baaeg , _agagc ) ; } ; if _ebbe . Lly < _fgafc . Lly - _cebae { _geec = & ruling { _beaec : _bcbda , _gggf : _ddga , _ccb : _ebbe . Lly , _gaad : _fgafc . Llx , _gdaf : _fgafc . Urx } ;
_gbdd = append ( rulingList { _geec } , _gbdd ... ) ; } ; if _ebbe . Ury > _fgafc . Ury + _cebae { _eab = & ruling { _beaec : _bcbda , _gggf : _ddga , _ccb : _ebbe . Ury , _gaad : _fgafc . Llx , _gdaf : _fgafc . Urx } ; _gbdd = append ( _gbdd , _eab ) ; } ; if len ( _baaeg ) + len ( _gbdd ) == len ( _effb ) { return _eaagc , _acgea ;
} ; _affg := append ( _baaeg , _gbdd ... ) ; _effb . log ( "u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064" ) ; _affg . log ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d" ) ; return _baaeg , _gbdd ; } ;
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
func ( _cb * Extractor ) ExtractPageImages ( options * ImageExtractOptions ) ( * PageImages , error ) { _cbb := & imageExtractContext { _gfe : options } ; _bde := _cbb . extractContentStreamImages ( _cb . _da , _cb . _dbb ) ; if _bde != nil { return nil , _bde ; } ; return & PageImages { Images : _cbb . _daa } , nil ;
} ; func ( _cefa * wordBag ) getDepthIdx ( _dfdb float64 ) int { _gagcf := _cefa . depthIndexes ( ) ; _ccaf := _fdeba ( _dfdb ) ; if _ccaf < _gagcf [ 0 ] { return _gagcf [ 0 ] ; } ; if _ccaf > _gagcf [ len ( _gagcf ) - 1 ] { return _gagcf [ len ( _gagcf ) - 1 ] ; } ; return _ccaf ; } ; func ( _bbc * textObject ) showText ( _ccde [ ] byte ) error { return _bbc . renderText ( _ccde ) } ;
func ( _cf * imageExtractContext ) processOperand ( _fedf * _cce . ContentStreamOperation , _edc _cce . GraphicsState , _fad * _bd . PdfPageResources ) error { if _fedf . Operand == "\u0042\u0049" && len ( _fedf . Params ) == 1 { _df , _bc := _fedf . Params [ 0 ] . ( * _cce . ContentStreamInlineImage ) ;
if ! _bc { return nil ; } ; if _cfc , _eba := _db . GetBoolVal ( _df . ImageMask ) ; _eba { if _cfc && ! _cf . _gfe . IncludeInlineStencilMasks { return nil ; } ; } ; return _cf . extractInlineImage ( _df , _edc , _fad ) ; } else if _fedf . Operand == "\u0044\u006f" && len ( _fedf . Params ) == 1 { _bff , _gg := _db . GetName ( _fedf . Params [ 0 ] ) ;
if ! _gg { _eg . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065" ) ; return _cg ; } ; _ , _faeg := _fad . GetXObjectByName ( * _bff ) ; switch _faeg { case _bd . XObjectTypeImage : return _cf . extractXObjectImage ( _bff , _edc , _fad ) ; case _bd . XObjectTypeForm : return _cf . extractFormImages ( _bff , _edc , _fad ) ;
} ; } ; return nil ; } ; func ( _cfdc * textObject ) checkOp ( _bee * _cce . ContentStreamOperation , _gcda int , _bfb bool ) ( _aage bool , _beeg error ) { if _cfdc == nil { var _faec [ ] _db . PdfObject ; if _gcda > 0 { _faec = _bee . Params ; if len ( _faec ) > _gcda { _faec = _faec [ : _gcda ] ;
} ; } ; _eg . Log . Debug ( "\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076" , _bee . Operand , _faec ) ; } ; if _gcda >= 0 { if len ( _bee . Params ) != _gcda { if _bfb { _beeg = _f . New ( "\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et" ) ;
} ; _eg . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076" , _bee . Operand , _gcda , len ( _bee . Params ) , _bee . Params ) ;
return false , _beeg ; } ; } ; return true , nil ; } ; func ( _efce rulingList ) secMinMax ( ) ( float64 , float64 ) { _gcgg , _fbfb := _efce [ 0 ] . _gaad , _efce [ 0 ] . _gdaf ; for _ , _adbaf := range _efce [ 1 : ] { if _adbaf . _gaad < _gcgg { _gcgg = _adbaf . _gaad ; } ; if _adbaf . _gdaf > _fbfb { _fbfb = _adbaf . _gdaf ;
} ; } ; return _gcgg , _fbfb ; } ; func _abca ( _cebbd [ ] pathSection ) rulingList { _fdbbd ( _cebbd ) ; if _daafa { _eg . Log . Info ( "\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs" , len ( _cebbd ) ) ;
} ; var _degd rulingList ; for _ , _ecaf := range _cebbd { for _ , _dgadc := range _ecaf . _aga { if ! _dgadc . isQuadrilateral ( ) { if _daafa { _eg . Log . Error ( "!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073" , _dgadc ) ;
} ; continue ; } ; if _dced , _adae := _dgadc . makeRectRuling ( _ecaf . Color ) ; _adae { _degd = append ( _degd , _dced ) ; } else { if _dddf { _eg . Log . Error ( "\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073" , _dgadc ) ;
} ; } ; } ; } ; if _daafa { _eg . Log . Info ( "\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073" , _degd . String ( ) ) ; } ; return _degd ; } ; func ( _gafe * subpath ) removeDuplicates ( ) { if len ( _gafe . _gcbb ) == 0 { return ;
} ; _cbfb := [ ] _g . Point { _gafe . _gcbb [ 0 ] } ; for _ , _cfa := range _gafe . _gcbb [ 1 : ] { if ! _fafbc ( _cfa , _cbfb [ len ( _cbfb ) - 1 ] ) { _cbfb = append ( _cbfb , _cfa ) ; } ; } ; _gafe . _gcbb = _cbfb ; } ; func ( _egbe paraList ) extractTables ( _egbcf [ ] gridTiling ) paraList { if _eeca { _eg . Log . Debug ( "\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _egbe ) ) ;
} ; if len ( _egbe ) < _baad { return _egbe ; } ; _cfef := _egbe . findTables ( _egbcf ) ; if _eeca { _eg . Log . Info ( "c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d" , len ( _cfef ) ) ;
for _dcgd , _fegc := range _cfef { _fegc . log ( _be . Sprintf ( "c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064" , _dcgd ) ) ; } ; } ; return _egbe . applyTables ( _cfef ) ; } ; type gridTiling struct { _bd . PdfRectangle ; _defd [ ] float64 ; _eabe [ ] float64 ; _bdgaf map [ float64 ] map [ float64 ] gridTile ;
} ; func _ebgee ( _cggb , _aagb _g . Point ) rulingKind { _gdffd := _bf . Abs ( _cggb . X - _aagb . X ) ; _acbg := _bf . Abs ( _cggb . Y - _aagb . Y ) ; return _bebd ( _gdffd , _acbg , _cged ) ; } ; func ( _dbdfg * textTable ) logComposite ( _egddg string ) { if ! _eeca { return ; } ; _eg . Log . Info ( "\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073" , _dbdfg . _bgcfb , _dbdfg . _gccb , _egddg ) ;
_be . Printf ( "\u0025\u0035\u0073 \u007c" , "" ) ; for _bedgc := 0 ; _bedgc < _dbdfg . _bgcfb ; _bedgc ++ { _be . Printf ( "\u0025\u0033\u0064 \u007c" , _bedgc ) ; } ; _be . Println ( "" ) ; _be . Printf ( "\u0025\u0035\u0073 \u002b" , "" ) ; for _bcbge := 0 ; _bcbge < _dbdfg . _bgcfb ; _bcbge ++ { _be . Printf ( "\u0025\u0033\u0073 \u002b" , "\u002d\u002d\u002d" ) ;
} ; _be . Println ( "" ) ; for _bgad := 0 ; _bgad < _dbdfg . _gccb ; _bgad ++ { _be . Printf ( "\u0025\u0035\u0064 \u007c" , _bgad ) ; for _gbbb := 0 ; _gbbb < _dbdfg . _bgcfb ; _gbbb ++ { _faeab , _ := _dbdfg . _agga [ _bddbg ( _gbbb , _bgad ) ] . parasBBox ( ) ; _be . Printf ( "\u0025\u0033\u0064 \u007c" , len ( _faeab ) ) ;
} ; _be . Println ( "" ) ; } ; _eg . Log . Info ( "\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073" , _dbdfg . _bgcfb , _dbdfg . _gccb , _egddg ) ; _be . Printf ( "\u0025\u0035\u0073 \u007c" , "" ) ; for _fbfad := 0 ; _fbfad < _dbdfg . _bgcfb ;
_fbfad ++ { _be . Printf ( "\u0025\u0031\u0032\u0064\u0020\u007c" , _fbfad ) ; } ; _be . Println ( "" ) ; _be . Printf ( "\u0025\u0035\u0073 \u002b" , "" ) ; for _aaeg := 0 ; _aaeg < _dbdfg . _bgcfb ; _aaeg ++ { _be . Print ( "\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b" ) ;
} ; _be . Println ( "" ) ; for _aeeaf := 0 ; _aeeaf < _dbdfg . _gccb ; _aeeaf ++ { _be . Printf ( "\u0025\u0035\u0064 \u007c" , _aeeaf ) ; for _abeb := 0 ; _abeb < _dbdfg . _bgcfb ; _abeb ++ { _bfdgg , _ := _dbdfg . _agga [ _bddbg ( _abeb , _aeeaf ) ] . parasBBox ( ) ; _gbdb := "" ; _babe := _bfdgg . merge ( ) ;
if _babe != nil { _gbdb = _babe . text ( ) ; } ; _gbdb = _be . Sprintf ( "\u0025\u0071" , _dbec ( _gbdb , 12 ) ) ; _gbdb = _gbdb [ 1 : len ( _gbdb ) - 1 ] ; _be . Printf ( "\u0025\u0031\u0032\u0073\u0020\u007c" , _gbdb ) ; } ; _be . Println ( "" ) ; } ; } ; func ( _ebgb rulingList ) findPrimSec ( _egbc , _aaceg float64 ) * ruling { for _ , _geba := range _ebgb { if _bcaga ( _geba . _ccb - _egbc ) && _geba . _gaad - _cebae <= _aaceg && _aaceg <= _geba . _gdaf + _cebae { return _geba ;
} ; } ; return nil ; } ; func ( _efgc * textTable ) log ( _bacdc string ) { if ! _eeca { return ; } ; _eg . Log . Info ( "~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066" , _bacdc , _efgc . _bgcfb , _efgc . _gccb , _efgc . _cdgb , _efgc . PdfRectangle ) ;
for _fefde := 0 ; _fefde < _efgc . _gccb ; _fefde ++ { for _fdbf := 0 ; _fdbf < _efgc . _bgcfb ; _fdbf ++ { _cbagb := _efgc . get ( _fdbf , _fefde ) ; if _cbagb == nil { continue ; } ; _be . Printf ( "%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a" , _fdbf , _fefde , _cbagb . PdfRectangle , _dbec ( _cbagb . text ( ) , 50 ) , _c . RuneCountInString ( _cbagb . text ( ) ) ) ;
} ; } ; } ; func ( _fcdge paraList ) yNeighbours ( _cceaf float64 ) map [ * textPara ] [ ] int { _dbggc := make ( [ ] event , 2 * len ( _fcdge ) ) ; if _cceaf == 0 { for _cfafb , _affe := range _fcdge { _dbggc [ 2 * _cfafb ] = event { _affe . Lly , true , _cfafb } ; _dbggc [ 2 * _cfafb + 1 ] = event { _affe . Ury , false , _cfafb } ;
} ; } else { for _bafd , _fcfg := range _fcdge { _dbggc [ 2 * _bafd ] = event { _fcfg . Lly - _cceaf * _fcfg . fontsize ( ) , true , _bafd } ; _dbggc [ 2 * _bafd + 1 ] = event { _fcfg . Ury + _cceaf * _fcfg . fontsize ( ) , false , _bafd } ; } ; } ; return _fcdge . eventNeighbours ( _dbggc ) ; } ; func ( _cgagf rulingList ) aligned ( ) bool { if len ( _cgagf ) < 2 { return false ;
} ; _ebae := make ( map [ * ruling ] int ) ; _ebae [ _cgagf [ 0 ] ] = 0 ; for _ , _gcca := range _cgagf [ 1 : ] { _dgbff := false ; for _abeg := range _ebae { if _gcca . gridIntersecting ( _abeg ) { _ebae [ _abeg ] ++ ; _dgbff = true ; break ; } ; } ; if ! _dgbff { _ebae [ _gcca ] = 0 ; } ; } ; _dgdb := 0 ; for _ , _eeed := range _ebae { if _eeed == 0 { _dgdb ++ ;
} ; } ; _eeea := float64 ( _dgdb ) / float64 ( len ( _cgagf ) ) ; _dfadb := _eeea <= 1.0 - _bfba ; if _daafa { _eg . Log . Info ( "\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073" , _dfadb , _eeea , _dgdb , len ( _cgagf ) , _cgagf . String ( ) ) ;
} ; return _dfadb ; } ; func _eaada ( _ggadb [ ] pathSection ) rulingList { _fdbbd ( _ggadb ) ; if _daafa { _eg . Log . Info ( "\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073" , len ( _ggadb ) ) ;
} ; var _dggef rulingList ; for _ , _eff := range _ggadb { for _ , _bdeb := range _eff . _aga { if len ( _bdeb . _gcbb ) < 2 { continue ; } ; _gecg := _bdeb . _gcbb [ 0 ] ; for _ , _egea := range _bdeb . _gcbb [ 1 : ] { if _fdebf , _ecdc := _gfac ( _gecg , _egea , _eff . Color ) ; _ecdc { _dggef = append ( _dggef , _fdebf ) ;
} ; _gecg = _egea ; } ; } ; } ; if _daafa { _eg . Log . Info ( "m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073" , _dggef ) ; } ; return _dggef ; } ; func ( _fdcg * shapesState ) cubicTo ( _fdba , _ccaa , _ffeb , _faea , _edbee , _ffc float64 ) { if _dded { _eg . Log . Info ( "\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a" ) ;
} ; _fdcg . addPoint ( _edbee , _ffc ) ; } ; func _gfbae ( _afebf [ ] TextMark , _egae * int ) [ ] TextMark { _ffaeb := _afebf [ len ( _afebf ) - 1 ] ; _eggf := [ ] rune ( _ffaeb . Text ) ; if len ( _eggf ) == 1 { _afebf = _afebf [ : len ( _afebf ) - 1 ] ; _cfgf := _afebf [ len ( _afebf ) - 1 ] ; * _egae = _cfgf . Offset + len ( _cfgf . Text ) ;
} else { _bbgg := _bfe ( _ffaeb . Text ) ; * _egae += len ( _bbgg ) - len ( _ffaeb . Text ) ; _ffaeb . Text = _bbgg ; } ; return _afebf ; } ; func ( _cdf * shapesState ) lastpointEstablished ( ) ( _g . Point , bool ) { if _cdf . _beg { return _cdf . _eee , false ; } ; _agbf := len ( _cdf . _gdbg ) ;
if _agbf > 0 && _cdf . _gdbg [ _agbf - 1 ] . _bad { return _cdf . _gdbg [ _agbf - 1 ] . last ( ) , false ; } ; return _g . Point { } , true ; } ;
2022-06-06 22:48:24 +00:00
2022-06-27 19:58:38 +00:00
// String returns a description of `tm`.
func ( _efbgc * textMark ) String ( ) string { return _be . Sprintf ( "\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022" , _efbgc . PdfRectangle , _efbgc . _gaaaf , _efbgc . _aec ) ; } ; type textLine struct { _bd . PdfRectangle ;
_gddec float64 ; _ebge [ ] * textWord ; _aefd float64 ; } ; type textTable struct { _bd . PdfRectangle ; _bgcfb , _gccb int ; _cdgb bool ; _deedc map [ uint64 ] * textPara ; _agga map [ uint64 ] compositeCell ; } ; func ( _ege * imageExtractContext ) extractInlineImage ( _ecc * _cce . ContentStreamInlineImage , _adc _cce . GraphicsState , _bfc * _bd . PdfPageResources ) error { _eaf , _daab := _ecc . ToImage ( _bfc ) ;
if _daab != nil { return _daab ; } ; _dbd , _daab := _ecc . GetColorSpace ( _bfc ) ; if _daab != nil { return _daab ; } ; if _dbd == nil { _dbd = _bd . NewPdfColorspaceDeviceGray ( ) ; } ; _agd , _daab := _dbd . ImageToRGB ( * _eaf ) ; if _daab != nil { return _daab ; } ; _ca := ImageMark { Image : & _agd , Width : _adc . CTM . ScalingFactorX ( ) , Height : _adc . CTM . ScalingFactorY ( ) , Angle : _adc . CTM . Angle ( ) } ;
_ca . X , _ca . Y = _adc . CTM . Translation ( ) ; _ege . _daa = append ( _ege . _daa , _ca ) ; _ege . _ec ++ ; return nil ; } ; func ( _bded * textTable ) growTable ( ) { _gdbb := func ( _dfgf paraList ) { _bded . _gccb ++ ; for _fcge := 0 ; _fcge < _bded . _bgcfb ; _fcge ++ { _deaab := _dfgf [ _fcge ] ;
_bded . put ( _fcge , _bded . _gccb - 1 , _deaab ) ; } ; } ; _ddfc := func ( _ebccd paraList ) { _bded . _bgcfb ++ ; for _bbace := 0 ; _bbace < _bded . _gccb ; _bbace ++ { _ffef := _ebccd [ _bbace ] ; _bded . put ( _bded . _bgcfb - 1 , _bbace , _ffef ) ; } ; } ; if _cgbdc { _bded . log ( "\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce" ) ;
} ; for _cagd := 0 ; ; _cagd ++ { _dcecb := false ; _deef := _bded . getDown ( ) ; _aedb := _bded . getRight ( ) ; if _cgbdc { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _cagd , _bded ) ; _be . Printf ( "\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a" , _deef ) ;
_be . Printf ( "\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a" , _aedb ) ; } ; if _deef != nil && _aedb != nil { _eebc := _deef [ len ( _deef ) - 1 ] ; if ! _eebc . taken ( ) && _eebc == _aedb [ len ( _aedb ) - 1 ] { _gdbb ( _deef ) ; if _aedb = _bded . getRight ( ) ;
_aedb != nil { _ddfc ( _aedb ) ; _bded . put ( _bded . _bgcfb - 1 , _bded . _gccb - 1 , _eebc ) ; } ; _dcecb = true ; } ; } ; if ! _dcecb && _deef != nil { _gdbb ( _deef ) ; _dcecb = true ; } ; if ! _dcecb && _aedb != nil { _ddfc ( _aedb ) ; _dcecb = true ; } ; if ! _dcecb { break ; } ; } ; } ; func _edd ( _ceb [ ] Font , _eb string ) bool { for _ , _dbfb := range _ceb { if _dbfb . FontName == _eb { return true ;
} ; } ; return false ; } ; func _efd ( _dbg _bd . PdfRectangle ) textState { return textState { _aee : 100 , _ebb : RenderModeFill , _egdd : _dbg } ; } ; func ( _gagb * wordBag ) arrangeText ( ) * textPara { _gagb . sort ( ) ; if _cgffa { _gagb . removeDuplicates ( ) ; } ; var _fgbd [ ] * textLine ;
for _ , _ceea := range _gagb . depthIndexes ( ) { for ! _gagb . empty ( _ceea ) { _efac := _gagb . firstReadingIndex ( _ceea ) ; _eecg := _gagb . firstWord ( _efac ) ; _debag := _gaeb ( _gagb , _efac ) ; _gacb := _eecg . _efag ; _bdba := _eecg . _acag - _bbcc * _gacb ; _daba := _eecg . _acag + _bbcc * _gacb ;
_dgfff := _bbfe * _gacb ; _dcffg := _ffbe * _gacb ; _acge : for { var _aega * textWord ; _bbbf := 0 ; for _ , _ddggd := range _gagb . depthBand ( _bdba , _daba ) { _ecgbf := _gagb . highestWord ( _ddggd , _bdba , _daba ) ; if _ecgbf == nil { continue ; } ; _gfea := _edagd ( _ecgbf , _debag . _ebge [ len ( _debag . _ebge ) - 1 ] ) ;
if _gfea < - _dcffg { break _acge ; } ; if _gfea > _dgfff { continue ; } ; if _aega != nil && _afdf ( _ecgbf , _aega ) >= 0 { continue ; } ; _aega = _ecgbf ; _bbbf = _ddggd ; } ; if _aega == nil { break ; } ; _debag . pullWord ( _gagb , _aega , _bbbf ) ; } ; _debag . markWordBoundaries ( ) ; _fgbd = append ( _fgbd , _debag ) ;
} ; } ; if len ( _fgbd ) == 0 { return nil ; } ; _cc . Slice ( _fgbd , func ( _fbed , _agag int ) bool { return _dfe ( _fgbd [ _fbed ] , _fgbd [ _agag ] ) < 0 } ) ; _aabf := _gade ( _gagb . PdfRectangle , _fgbd ) ; if _eafe { _eg . Log . Info ( "\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073" , _aabf . String ( ) ) ;
if _dcaca { for _abfca , _abe := range _aabf . _gadg { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _abfca , _abe . String ( ) ) ; if _dega { for _dcb , _bgddc := range _abe . _ebge { _be . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _dcb , _bgddc . String ( ) ) ;
for _dgbef , _caee := range _bgddc . _bbacg { _be . Printf ( "\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n" , _dgbef , _caee . String ( ) ) ; } ; } ; } ; } ; } ; } ; return _aabf ; } ; func ( _bgc * imageExtractContext ) extractContentStreamImages ( _fge string , _fff * _bd . PdfPageResources ) error { _ebe := _cce . NewContentStreamParser ( _fge ) ;
_bda , _edbg := _ebe . Parse ( ) ; if _edbg != nil { return _edbg ; } ; if _bgc . _ac == nil { _bgc . _ac = map [ * _db . PdfObjectStream ] * cachedImage { } ; } ; if _bgc . _gfe == nil { _bgc . _gfe = & ImageExtractOptions { } ; } ; _bbb := _cce . NewContentStreamProcessor ( * _bda ) ; _bbb . AddHandler ( _cce . HandlerConditionEnumAllOperands , "" , _bgc . processOperand ) ;
return _bbb . Process ( _fff ) ; } ; func _fdebfb ( _dbbaa [ ] compositeCell ) [ ] float64 { var _cfdb [ ] * textLine ; _gfca := 0 ; for _ , _ggabb := range _dbbaa { _gfca += len ( _ggabb . paraList ) ; _cfdb = append ( _cfdb , _ggabb . lines ( ) ... ) ; } ; _cc . Slice ( _cfdb , func ( _agef , _cfdec int ) bool { _bddae , _dbfdb := _cfdb [ _agef ] , _cfdb [ _cfdec ] ;
_bdccc , _eagbe := _bddae . _gddec , _dbfdb . _gddec ; if ! _bcaga ( _bdccc - _eagbe ) { return _bdccc < _eagbe ; } ; return _bddae . Llx < _dbfdb . Llx ; } ) ; if _eeca { _be . Printf ( "\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a" , _gfca , len ( _cfdb ) ) ;
for _adacd , _bbca := range _cfdb { _be . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _adacd , _bbca ) ; } ; } ; var _deeg [ ] float64 ; _dedfc := _cfdb [ 0 ] ; var _ecafgc [ ] [ ] * textLine ; _dcefe := [ ] * textLine { _dedfc } ; for _faga , _cbcg := range _cfdb [ 1 : ] { if _cbcg . Ury < _dedfc . Lly { _dggba := 0.5 * ( _cbcg . Ury + _dedfc . Lly ) ;
if _eeca { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a" + "\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a" , _faga , _cbcg . Ury , _dedfc . Lly , _dggba , _dedfc , _cbcg ) ;
} ; _deeg = append ( _deeg , _dggba ) ; _ecafgc = append ( _ecafgc , _dcefe ) ; _dcefe = nil ; } ; _dcefe = append ( _dcefe , _cbcg ) ; if _cbcg . Lly < _dedfc . Lly { _dedfc = _cbcg ; } ; } ; if len ( _dcefe ) > 0 { _ecafgc = append ( _ecafgc , _dcefe ) ; } ; if _eeca { _be . Printf ( " \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a" , _deeg ) ;
} ; if _eeca { _eg . Log . Info ( "\u0072\u006f\u0077\u003d\u0025\u0064" , len ( _dbbaa ) ) ; for _baafc , _ggbcd := range _dbbaa { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _baafc , _ggbcd ) ; } ; _eg . Log . Info ( "\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d" , len ( _ecafgc ) ) ;
for _ebdd , _adag := range _ecafgc { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a" , _ebdd , len ( _adag ) ) ; for _gagg , _ddac := range _adag { _be . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _gagg , _ddac ) ; } ; } ; } ; _ffbcf := true ; for _aggeb , _gggd := range _ecafgc { _edde := true ;
for _feaf , _ggfdc := range _dbbaa { if _eeca { _be . Printf ( "\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a" , _aggeb , len ( _ecafgc ) , _feaf , len ( _dbbaa ) , _ggfdc ) ;
} ; if ! _ggfdc . hasLines ( _gggd ) { if _eeca { _be . Printf ( "\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a" , _aggeb , len ( _ecafgc ) , _feaf , len ( _dbbaa ) ) ;
} ; _edde = false ; break ; } ; } ; if ! _edde { _ffbcf = false ; break ; } ; } ; if ! _ffbcf { if _eeca { _eg . Log . Info ( "\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg" ) ;
} ; _deeg = nil ; } ; if _eeca && _deeg != nil { _be . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a" , _deeg ) ; } ; return _deeg ; } ;
func ( _deg * Extractor ) extractPageText ( _gda string , _gfg * _bd . PdfPageResources , _baf _g . Matrix , _feg int ) ( * PageText , int , int , error ) { _eg . Log . Trace ( "\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d" , _feg ) ;
_egd := & PageText { _fbd : _deg . _dd } ; _bbbg := _efd ( _deg . _dd ) ; var _dege stateStack ; _fdc := _dge ( _deg , _gfg , _cce . GraphicsState { } , & _bbbg , & _dege ) ; _ebaf := shapesState { _gaga : _baf , _eccf : _g . IdentityMatrix ( ) , _baafg : _fdc } ; var _fdb bool ; if _feg > _gcb { _aag := _f . New ( "\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077" ) ;
_eg . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076" , _feg , _aag ) ;
return _egd , _bbbg . _fdfc , _bbbg . _dcef , _aag ; } ; _cag := _cce . NewContentStreamParser ( _gda ) ; _ebc , _bcd := _cag . Parse ( ) ; if _bcd != nil { _eg . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bcd ) ;
return _egd , _bbbg . _fdfc , _bbbg . _dcef , _bcd ; } ; _aba := _cce . NewContentStreamProcessor ( * _ebc ) ; _aba . AddHandler ( _cce . HandlerConditionEnumAllOperands , "" , func ( _dca * _cce . ContentStreamOperation , _aeb _cce . GraphicsState , _ddg * _bd . PdfPageResources ) error { _cga := _dca . Operand ;
if _befc { _eg . Log . Info ( "\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s" , _dca ) ; } ; switch _cga { case "\u0071" : if _dded { _eg . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _ebaf . _eccf ) ; } ; _dege . push ( & _bbbg ) ; case "\u0051" : if ! _dege . empty ( ) { _bbbg = * _dege . pop ( ) ;
} ; _ebaf . _eccf = _aeb . CTM ; if _dded { _eg . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _ebaf . _eccf ) ; } ; case "\u0042\u0054" : if _fdb { _eg . Log . Debug ( "\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074" ) ;
_egd . _fgd = append ( _egd . _fgd , _fdc . _dcaa ... ) ; } ; _fdb = true ; _cdd := _aeb ; _cdd . CTM = _baf . Mult ( _cdd . CTM ) ; _fdc = _dge ( _deg , _ddg , _cdd , & _bbbg , & _dege ) ; _ebaf . _baafg = _fdc ; case "\u0045\u0054" : if ! _fdb { _eg . Log . Debug ( "\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074" ) ;
} ; _fdb = false ; _egd . _fgd = append ( _egd . _fgd , _fdc . _dcaa ... ) ; _fdc . reset ( ) ; case "\u0054\u002a" : _fdc . nextLine ( ) ; case "\u0054\u0064" : if _gff , _edcf := _fdc . checkOp ( _dca , 2 , true ) ; ! _gff { _eg . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _edcf ) ;
return _edcf ; } ; _gbb , _abc , _efb := _ccdde ( _dca . Params ) ; if _efb != nil { return _efb ; } ; _fdc . moveText ( _gbb , _abc ) ; case "\u0054\u0044" : if _bae , _fbc := _fdc . checkOp ( _dca , 2 , true ) ; ! _bae { _eg . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fbc ) ;
return _fbc ; } ; _edbe , _gfb , _bgg := _ccdde ( _dca . Params ) ; if _bgg != nil { _eg . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bgg ) ; return _bgg ; } ; _fdc . moveTextSetLeading ( _edbe , _gfb ) ; case "\u0054\u006a" : if _bac , _gaef := _fdc . checkOp ( _dca , 1 , true ) ;
! _bac { _eg . Log . Debug ( "\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076" , _dca , _gaef ) ; return _gaef ; } ; _gca , _acd := _db . GetStringBytes ( _dca . Params [ 0 ] ) ; if ! _acd { _eg . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064" , _dca ) ;
return _db . ErrTypeError ; } ; return _fdc . showText ( _gca ) ; case "\u0054\u004a" : if _def , _efa := _fdc . checkOp ( _dca , 1 , true ) ; ! _def { _eg . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _efa ) ; return _efa ;
} ; _cdc , _adg := _db . GetArray ( _dca . Params [ 0 ] ) ; if ! _adg { _eg . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _dca ) ;
return _bcd ; } ; return _fdc . showTextAdjusted ( _cdc ) ; case "\u0027" : if _faae , _ffa := _fdc . checkOp ( _dca , 1 , true ) ; ! _faae { _eg . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ffa ) ; return _ffa ; } ; _edf , _bgcd := _db . GetStringBytes ( _dca . Params [ 0 ] ) ;
if ! _bgcd { _eg . Log . Debug ( "\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064" , _dca ) ; return _db . ErrTypeError ; } ; _fdc . nextLine ( ) ; return _fdc . showText ( _edf ) ;
case "\u0022" : if _fefd , _cea := _fdc . checkOp ( _dca , 3 , true ) ; ! _fefd { _eg . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _cea ) ; return _cea ; } ; _gfd , _cgff , _fdbe := _ccdde ( _dca . Params [ : 2 ] ) ; if _fdbe != nil { return _fdbe ;
} ; _dbc , _fdga := _db . GetStringBytes ( _dca . Params [ 2 ] ) ; if ! _fdga { _eg . Log . Debug ( "\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064" , _dca ) ;
return _db . ErrTypeError ; } ; _fdc . setCharSpacing ( _gfd ) ; _fdc . setWordSpacing ( _cgff ) ; _fdc . nextLine ( ) ; return _fdc . showText ( _dbc ) ; case "\u0054\u004c" : _fcg , _cfg := _fgfa ( _dca ) ; if _cfg != nil { _eg . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _cfg ) ;
return _cfg ; } ; _fdc . setTextLeading ( _fcg ) ; case "\u0054\u0063" : _ccdd , _bcdc := _fgfa ( _dca ) ; if _bcdc != nil { _eg . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bcdc ) ; return _bcdc ; } ; _fdc . setCharSpacing ( _ccdd ) ;
case "\u0054\u0066" : if _gaee , _acde := _fdc . checkOp ( _dca , 2 , true ) ; ! _gaee { _eg . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _acde ) ; return _acde ; } ; _gfae , _ddd := _db . GetNameVal ( _dca . Params [ 0 ] ) ;
if ! _ddd { _eg . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064" , _dca ) ; return _db . ErrTypeError ; } ; _edda , _gdc := _db . GetNumberAsFloat ( _dca . Params [ 1 ] ) ;
if ! _ddd { _eg . Log . Debug ( "\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _dca , _gdc ) ;
return _gdc ; } ; _gdc = _fdc . setFont ( _gfae , _edda ) ; _fdc . _gdcg = _fg . Is ( _gdc , _db . ErrNotSupported ) ; if _gdc != nil && ! _fdc . _gdcg { return _gdc ; } ; case "\u0054\u006d" : if _ebf , _fag := _fdc . checkOp ( _dca , 6 , true ) ; ! _ebf { _eg . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fag ) ;
return _fag ; } ; _cdb , _eec := _db . GetNumbersAsFloat ( _dca . Params ) ; if _eec != nil { _eg . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _eec ) ; return _eec ; } ; _fdc . setTextMatrix ( _cdb ) ; case "\u0054\u0072" : if _dgg , _af := _fdc . checkOp ( _dca , 1 , true ) ;
! _dgg { _eg . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _af ) ; return _af ; } ; _efbf , _adb := _db . GetIntVal ( _dca . Params [ 0 ] ) ; if ! _adb { _eg . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _dca ) ;
return _db . ErrTypeError ; } ; _fdc . setTextRenderMode ( _efbf ) ; case "\u0054\u0073" : if _fac , _dea := _fdc . checkOp ( _dca , 1 , true ) ; ! _fac { _eg . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _dea ) ; return _dea ;
} ; _agg , _ebg := _db . GetNumberAsFloat ( _dca . Params [ 0 ] ) ; if _ebg != nil { _eg . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ebg ) ; return _ebg ; } ; _fdc . setTextRise ( _agg ) ; case "\u0054\u0077" : if _dfb , _dcf := _fdc . checkOp ( _dca , 1 , true ) ;
! _dfb { _eg . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _dcf ) ; return _dcf ; } ; _agb , _fdf := _db . GetNumberAsFloat ( _dca . Params [ 0 ] ) ; if _fdf != nil { _eg . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fdf ) ;
return _fdf ; } ; _fdc . setWordSpacing ( _agb ) ; case "\u0054\u007a" : if _ffae , _aed := _fdc . checkOp ( _dca , 1 , true ) ; ! _ffae { _eg . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _aed ) ; return _aed ; } ; _daaf , _bef := _db . GetNumberAsFloat ( _dca . Params [ 0 ] ) ;
if _bef != nil { _eg . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bef ) ; return _bef ; } ; _fdc . setHorizScaling ( _daaf ) ; case "\u0063\u006d" : _ebaf . _eccf = _aeb . CTM ; if _ebaf . _eccf . Singular ( ) { _cee := _g . IdentityMatrix ( ) . Translate ( _ebaf . _eccf . Translation ( ) ) ;
_eg . Log . Debug ( "S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s" , _ebaf . _eccf , _cee ) ; _ebaf . _eccf = _cee ; } ; if _dded { _eg . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _ebaf . _eccf ) ; } ; case "\u006d" : if len ( _dca . Params ) != 2 { _eg . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" , _bdg ) ;
return nil ; } ; _gafd , _daff := _db . GetNumbersAsFloat ( _dca . Params ) ; if _daff != nil { return _daff ; } ; _ebaf . moveTo ( _gafd [ 0 ] , _gafd [ 1 ] ) ; case "\u006c" : if len ( _dca . Params ) != 2 { _eg . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" , _bdg ) ;
return nil ; } ; _dfbc , _ada := _db . GetNumbersAsFloat ( _dca . Params ) ; if _ada != nil { return _ada ; } ; _ebaf . lineTo ( _dfbc [ 0 ] , _dfbc [ 1 ] ) ; case "\u0063" : if len ( _dca . Params ) != 6 { return _bdg ; } ; _afb , _dagb := _db . GetNumbersAsFloat ( _dca . Params ) ; if _dagb != nil { return _dagb ;
} ; _eg . Log . Debug ( "\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f" , _afb ) ; _ebaf . cubicTo ( _afb [ 0 ] , _afb [ 1 ] , _afb [ 2 ] , _afb [ 3 ] , _afb [ 4 ] , _afb [ 5 ] ) ; case "\u0076" , "\u0079" : if len ( _dca . Params ) != 4 { return _bdg ;
} ; _bfg , _afbb := _db . GetNumbersAsFloat ( _dca . Params ) ; if _afbb != nil { return _afbb ; } ; _eg . Log . Debug ( "\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f" , _bfg ) ; _ebaf . quadraticTo ( _bfg [ 0 ] , _bfg [ 1 ] , _bfg [ 2 ] , _bfg [ 3 ] ) ;
case "\u0068" : _ebaf . closePath ( ) ; case "\u0072\u0065" : if len ( _dca . Params ) != 4 { return _bdg ; } ; _cad , _gcf := _db . GetNumbersAsFloat ( _dca . Params ) ; if _gcf != nil { return _gcf ; } ; _ebaf . drawRectangle ( _cad [ 0 ] , _cad [ 1 ] , _cad [ 2 ] , _cad [ 3 ] ) ; _ebaf . closePath ( ) ;
case "\u0053" : _ebaf . stroke ( & _egd . _egg ) ; _ebaf . clearPath ( ) ; case "\u0073" : _ebaf . closePath ( ) ; _ebaf . stroke ( & _egd . _egg ) ; _ebaf . clearPath ( ) ; case "\u0046" : _ebaf . fill ( & _egd . _dbe ) ; _ebaf . clearPath ( ) ; case "\u0066" , "\u0066\u002a" : _ebaf . closePath ( ) ;
_ebaf . fill ( & _egd . _dbe ) ; _ebaf . clearPath ( ) ; case "\u0042" , "\u0042\u002a" : _ebaf . fill ( & _egd . _dbe ) ; _ebaf . stroke ( & _egd . _egg ) ; _ebaf . clearPath ( ) ; case "\u0062" , "\u0062\u002a" : _ebaf . closePath ( ) ; _ebaf . fill ( & _egd . _dbe ) ; _ebaf . stroke ( & _egd . _egg ) ;
_ebaf . clearPath ( ) ; case "\u006e" : _ebaf . clearPath ( ) ; case "\u0044\u006f" : if len ( _dca . Params ) == 0 { _eg . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e" , _dca . Params ) ;
return _db . ErrRangeError ; } ; _fba , _fga := _db . GetName ( _dca . Params [ 0 ] ) ; if ! _fga { _eg . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e" , _dca . Params [ 0 ] ) ;
return _db . ErrTypeError ; } ; _ , _dfa := _ddg . GetXObjectByName ( * _fba ) ; if _dfa != _bd . XObjectTypeForm { break ; } ; _fbgb , _fga := _deg . _dg [ _fba . String ( ) ] ; if ! _fga { _fagc , _gcad := _ddg . GetXObjectFormByName ( * _fba ) ; if _gcad != nil { _eg . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _gcad ) ;
return _gcad ; } ; _cbbe , _gcad := _fagc . GetContentStream ( ) ; if _gcad != nil { _eg . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _gcad ) ; return _gcad ; } ; _gfgd := _fagc . Resources ; if _gfgd == nil { _gfgd = _ddg ; } ; _fgg , _ecd , _fdgb , _gcad := _deg . extractPageText ( string ( _cbbe ) , _gfgd , _baf . Mult ( _aeb . CTM ) , _feg + 1 ) ;
if _gcad != nil { _eg . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _gcad ) ; return _gcad ; } ; _fbgb = textResult { * _fgg , _ecd , _fdgb } ; _deg . _dg [ _fba . String ( ) ] = _fbgb ; } ; _ebaf . _eccf = _aeb . CTM ; if _dded { _eg . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _ebaf . _eccf ) ;
} ; _egd . _fgd = append ( _egd . _fgd , _fbgb . _bdf . _fgd ... ) ; _egd . _egg = append ( _egd . _egg , _fbgb . _bdf . _egg ... ) ; _egd . _dbe = append ( _egd . _dbe , _fbgb . _bdf . _dbe ... ) ; _bbbg . _fdfc += _fbgb . _cfd ; _bbbg . _dcef += _fbgb . _acg ; case "\u0072\u0067" , "\u0067" , "\u006b" , "\u0063\u0073" , "\u0073\u0063" , "\u0073\u0063\u006e" : _fdc . _cddag . ColorspaceNonStroking = _aeb . ColorspaceNonStroking ;
_fdc . _cddag . ColorNonStroking = _aeb . ColorNonStroking ; case "\u0052\u0047" , "\u0047" , "\u004b" , "\u0043\u0053" , "\u0053\u0043" , "\u0053\u0043\u004e" : _fdc . _cddag . ColorspaceStroking = _aeb . ColorspaceStroking ; _fdc . _cddag . ColorStroking = _aeb . ColorStroking ;
} ; return nil ; } ) ; _bcd = _aba . Process ( _gfg ) ; return _egd , _bbbg . _fdfc , _bbbg . _dcef , _bcd ; } ; func ( _decbb paraList ) lines ( ) [ ] * textLine { var _gaeab [ ] * textLine ; for _ , _bce := range _decbb { _gaeab = append ( _gaeab , _bce . _gadg ... ) ; } ; return _gaeab ; } ;
2021-12-14 01:08:28 +00:00
2022-06-27 19:58:38 +00:00
// String returns a human readable description of `path`.
func ( _ddaf * subpath ) String ( ) string { _cddb := _ddaf . _gcbb ; _dfcb := len ( _cddb ) ; if _dfcb <= 5 { return _be . Sprintf ( "\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f" , _dfcb , _cddb ) ; } ; return _be . Sprintf ( "\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f" , _dfcb , _cddb [ 0 ] , _cddb [ 1 ] , _cddb [ _dfcb - 1 ] ) ;
} ; func ( _adde rulingList ) primMinMax ( ) ( float64 , float64 ) { _acecg , _dbbc := _adde [ 0 ] . _ccb , _adde [ 0 ] . _ccb ; for _ , _ecff := range _adde [ 1 : ] { if _ecff . _ccb < _acecg { _acecg = _ecff . _ccb ; } else if _ecff . _ccb > _dbbc { _dbbc = _ecff . _ccb ; } ; } ; return _acecg , _dbbc ;
2022-03-13 12:41:53 +00:00
} ;
2021-12-14 01:08:28 +00:00
2022-06-27 19:58:38 +00:00
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
func ( _adgf PageText ) ToText ( ) string { return _adgf . Text ( ) } ; func _ecfc ( _fbbf [ ] TextMark , _efgf * int , _aded TextMark ) [ ] TextMark { _aded . Offset = * _efgf ; _fbbf = append ( _fbbf , _aded ) ; * _efgf += len ( _aded . Text ) ; return _fbbf ; } ;
// New returns an Extractor instance for extracting content from the input PDF page.
func New ( page * _bd . PdfPage ) ( * Extractor , error ) { const _ee = "\u0065\u0078\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077" ; _fde , _gd := page . GetAllContentStreams ( ) ; if _gd != nil { return nil , _gd ; } ; _ga , _gd := page . GetMediaBox ( ) ; if _gd != nil { return nil , _be . Errorf ( "\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076" , _gd ) ;
} ; _dac := & Extractor { _da : _fde , _dbb : page . Resources , _dd : * _ga , _bg : map [ string ] fontEntry { } , _dg : map [ string ] textResult { } } ; if _dac . _dd . Llx > _dac . _dd . Urx { _eg . Log . Info ( "\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e" , _dac . _dd ) ;
_dac . _dd . Llx , _dac . _dd . Urx = _dac . _dd . Urx , _dac . _dd . Llx ; } ; if _dac . _dd . Lly > _dac . _dd . Ury { _eg . Log . Info ( "\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e" , _dac . _dd ) ;
_dac . _dd . Lly , _dac . _dd . Ury = _dac . _dd . Ury , _dac . _dd . Lly ; } ; _ba . TrackUse ( _ee ) ; return _dac , nil ; } ;
2022-06-06 22:48:24 +00:00
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
2022-06-27 19:58:38 +00:00
type ImageExtractOptions struct { IncludeInlineStencilMasks bool ; } ; func ( _abbde lineRuling ) asRuling ( ) ( * ruling , bool ) { _bgbb := ruling { _gggf : _abbde . _dgfg , Color : _abbde . Color , _beaec : _eagf } ; switch _abbde . _dgfg { case _gaba : _bgbb . _ccb = _abbde . xMean ( ) ;
_bgbb . _gaad = _bf . Min ( _abbde . _eadee . Y , _abbde . _feab . Y ) ; _bgbb . _gdaf = _bf . Max ( _abbde . _eadee . Y , _abbde . _feab . Y ) ; case _ddga : _bgbb . _ccb = _abbde . yMean ( ) ; _bgbb . _gaad = _bf . Min ( _abbde . _eadee . X , _abbde . _feab . X ) ; _bgbb . _gdaf = _bf . Max ( _abbde . _eadee . X , _abbde . _feab . X ) ;
default : _eg . Log . Error ( "\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064" , _abbde . _dgfg ) ; return nil , false ; } ; return & _bgbb , true ; } ; func ( _ecadg gridTile ) numBorders ( ) int { _fece := 0 ; if _ecadg . _fgbc { _fece ++ ;
} ; if _ecadg . _eafc { _fece ++ ; } ; if _ecadg . _ccadg { _fece ++ ; } ; if _ecadg . _bfgeab { _fece ++ ; } ; return _fece ; } ; type lineRuling struct { _dgfg rulingKind ; _egcg markKind ; _fb . Color ; _eadee , _feab _g . Point ; } ;
// String returns a string descibing `i`.
func ( _gebd gridTile ) String ( ) string { _debfe := func ( _gbff bool , _fdeg string ) string { if _gbff { return _fdeg ; } ; return "\u005f" ; } ; return _be . Sprintf ( "\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073" , _gebd . PdfRectangle , _debfe ( _gebd . _fgbc , "\u004c" ) , _debfe ( _gebd . _eafc , "\u0052" ) , _debfe ( _gebd . _ccadg , "\u0042" ) , _debfe ( _gebd . _bfgeab , "\u0054" ) ) ;
} ; func ( _dadg paraList ) computeEBBoxes ( ) { if _dafff { _eg . Log . Info ( "\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a" ) ; } ; for _ , _ggc := range _dadg { _ggc . _bgca = _ggc . PdfRectangle ; } ; _fccc := _dadg . yNeighbours ( 0 ) ; for _dfeb , _bdfe := range _dadg { _efgb := _bdfe . _bgca ;
_fbbfb , _abcd := - 1.0e9 , + 1.0e9 ; for _ , _bfdg := range _fccc [ _bdfe ] { _cgac := _dadg [ _bfdg ] . _bgca ; if _cgac . Urx < _efgb . Llx { _fbbfb = _bf . Max ( _fbbfb , _cgac . Urx ) ; } else if _efgb . Urx < _cgac . Llx { _abcd = _bf . Min ( _abcd , _cgac . Llx ) ; } ; } ; for _abd , _geg := range _dadg { _bafb := _geg . _bgca ;
if _dfeb == _abd || _bafb . Ury > _efgb . Lly { continue ; } ; if _fbbfb <= _bafb . Llx && _bafb . Llx < _efgb . Llx { _efgb . Llx = _bafb . Llx ; } else if _bafb . Urx <= _abcd && _efgb . Urx < _bafb . Urx { _efgb . Urx = _bafb . Urx ; } ; } ; if _dafff { _be . Printf ( "\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a" , _dfeb , _bdfe . _bgca , _efgb , _dbec ( _bdfe . text ( ) , 50 ) ) ;
} ; _bdfe . _bgca = _efgb ; } ; if _fddf { for _ , _ceab := range _dadg { _ceab . PdfRectangle = _ceab . _bgca ; } ; } ; } ; func _cbge ( _dabb map [ float64 ] map [ float64 ] gridTile ) [ ] float64 { _bfcd := make ( [ ] float64 , 0 , len ( _dabb ) ) ; _fbcd := make ( map [ float64 ] struct { } , len ( _dabb ) ) ;
for _ , _eggea := range _dabb { for _gacc := range _eggea { if _ , _fgcb := _fbcd [ _gacc ] ; _fgcb { continue ; } ; _bfcd = append ( _bfcd , _gacc ) ; _fbcd [ _gacc ] = struct { } { } ; } ; } ; _cc . Float64s ( _bfcd ) ; return _bfcd ; } ; func _cfgb ( _bebdg float64 ) float64 { return _ebba * _bf . Round ( _bebdg / _ebba ) } ;
func ( _dbba * imageExtractContext ) extractFormImages ( _dfcd * _db . PdfObjectName , _eda _cce . GraphicsState , _ace * _bd . PdfPageResources ) error { _baa , _edca := _ace . GetXObjectFormByName ( * _dfcd ) ; if _edca != nil { return _edca ; } ; if _baa == nil { return nil ;
} ; _faf , _edca := _baa . GetContentStream ( ) ; if _edca != nil { return _edca ; } ; _bgb := _baa . Resources ; if _bgb == nil { _bgb = _ace ; } ; _edca = _dbba . extractContentStreamImages ( string ( _faf ) , _bgb ) ; if _edca != nil { return _edca ; } ; _dbba . _gf ++ ; return nil ; } ; func ( _bfegc * ruling ) gridIntersecting ( _fbgg * ruling ) bool { return _cecgg ( _bfegc . _gaad , _fbgg . _gaad ) && _cecgg ( _bfegc . _gdaf , _fbgg . _gdaf ) ;
} ; func ( _bbgbd * textWord ) bbox ( ) _bd . PdfRectangle { return _bbgbd . PdfRectangle } ;
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
type TextTable struct { W , H int ; Cells [ ] [ ] TableCell ; } ; func _ccg ( _ffcf * wordBag , _dacgd * textWord , _cbfc float64 ) bool { return _dacgd . Llx < _ffcf . Urx + _cbfc && _ffcf . Llx - _cbfc < _dacgd . Urx ; } ;
// Tables returns the tables extracted from the page.
func ( _dgaf PageText ) Tables ( ) [ ] TextTable { if _eeca { _eg . Log . Info ( "\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064" , len ( _dgaf . _bfbb ) ) ; } ; return _dgaf . _bfbb ; } ; func ( _dgged * shapesState ) lineTo ( _ceeb , _gcdb float64 ) { if _dded { _eg . Log . Info ( "\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066" , _ceeb , _gcdb , _dgged . devicePoint ( _ceeb , _gcdb ) ) ;
} ; _dgged . addPoint ( _ceeb , _gcdb ) ; } ; func ( _bgaf gridTiling ) complete ( ) bool { for _ , _ggeg := range _bgaf . _bdgaf { for _ , _facab := range _ggeg { if ! _facab . complete ( ) { return false ; } ; } ; } ; return true ; } ; func ( _fdgeb rectRuling ) checkWidth ( _cbea , _cecg float64 ) ( float64 , bool ) { _ecdf := _cecg - _cbea ;
_efga := _ecdf <= _ffd ; return _ecdf , _efga ; } ; func ( _beaa * textMark ) inDiacriticArea ( _feeg * textMark ) bool { _eefec := _beaa . Llx - _feeg . Llx ; _acad := _beaa . Urx - _feeg . Urx ; _aab := _beaa . Lly - _feeg . Lly ; return _bf . Abs ( _eefec + _acad ) < _beaa . Width ( ) * _edaa && _bf . Abs ( _aab ) < _beaa . Height ( ) * _edaa ;
} ; func _gecc ( _cefg _bd . PdfRectangle ) * ruling { return & ruling { _gggf : _ddga , _ccb : _cefg . Ury , _gaad : _cefg . Llx , _gdaf : _cefg . Urx } ; } ;
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
// Replace with a function like Extract() (*PageText, error)
func ( _abf * Extractor ) ExtractPageText ( ) ( * PageText , int , int , error ) { _ggf , _gdb , _aca , _gce := _abf . extractPageText ( _abf . _da , _abf . _dbb , _g . IdentityMatrix ( ) , 0 ) ; if _gce != nil && _gce != _bd . ErrColorOutOfRange { return nil , 0 , 0 , _gce ; } ; _ggf . computeViews ( ) ;
_gce = _gagcd ( _ggf ) ; if _gce != nil { return nil , 0 , 0 , _gce ; } ; return _ggf , _gdb , _aca , nil ; } ; func ( _agdda * textTable ) isExportable ( ) bool { if _agdda . _cdgb { return true ; } ; _ggeb := func ( _afgd int ) bool { _efcb := _agdda . get ( 0 , _afgd ) ; if _efcb == nil { return false ;
} ; _egcdd := _efcb . text ( ) ; _gbbdd := _c . RuneCountInString ( _egcdd ) ; _bgea := _abggb . MatchString ( _egcdd ) ; return _gbbdd <= 1 || _bgea ; } ; for _baag := 0 ; _baag < _agdda . _gccb ; _baag ++ { if ! _ggeb ( _baag ) { return true ; } ; } ; return false ; } ; func ( _cdcec rulingList ) toTilings ( ) ( rulingList , [ ] gridTiling ) { _cdcec . log ( "\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s" ) ;
if len ( _cdcec ) == 0 { return nil , nil ; } ; _cdcec = _cdcec . tidied ( "\u0061\u006c\u006c" ) ; _cdcec . log ( "\u0074\u0069\u0064\u0069\u0065\u0064" ) ; _fecd := _cdcec . toGrids ( ) ; _egfc := make ( [ ] gridTiling , len ( _fecd ) ) ; for _ddgd , _cgag := range _fecd { _egfc [ _ddgd ] = _cgag . asTiling ( ) ;
} ; return _cdcec , _egfc ; } ; func ( _bdfa * textTable ) reduceTiling ( _gefbf gridTiling , _ccea float64 ) * textTable { _gebb := make ( [ ] int , 0 , _bdfa . _gccb ) ; _bffaa := make ( [ ] int , 0 , _bdfa . _bgcfb ) ; _befec := _gefbf . _defd ; _fdebfa := _gefbf . _eabe ; for _badb := 0 ; _badb < _bdfa . _gccb ;
_badb ++ { _eddgg := _badb > 0 && _bf . Abs ( _fdebfa [ _badb - 1 ] - _fdebfa [ _badb ] ) < _ccea && _bdfa . emptyCompositeRow ( _badb ) ; if ! _eddgg { _gebb = append ( _gebb , _badb ) ; } ; } ; for _bfab := 0 ; _bfab < _bdfa . _bgcfb ; _bfab ++ { _ggga := _bfab < _bdfa . _bgcfb - 1 && _bf . Abs ( _befec [ _bfab + 1 ] - _befec [ _bfab ] ) < _ccea && _bdfa . emptyCompositeColumn ( _bfab ) ;
if ! _ggga { _bffaa = append ( _bffaa , _bfab ) ; } ; } ; if len ( _gebb ) == _bdfa . _gccb && len ( _bffaa ) == _bdfa . _bgcfb { return _bdfa ; } ; _abbe := textTable { _cdgb : _bdfa . _cdgb , _bgcfb : len ( _bffaa ) , _gccb : len ( _gebb ) , _agga : make ( map [ uint64 ] compositeCell , len ( _bffaa ) * len ( _gebb ) ) } ;
if _eeca { _eg . Log . Info ( "\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064" , _bdfa . _bgcfb , _bdfa . _gccb , len ( _bffaa ) , len ( _gebb ) ) ; _eg . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076" , _bffaa ) ;
_eg . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076" , _gebb ) ; } ; for _ecab , _dfcbe := range _gebb { for _caffe , _fadb := range _bffaa { _gbddf , _dgffg := _bdfa . getComposite ( _fadb , _dfcbe ) ; if len ( _gbddf ) == 0 { continue ;
} ; if _eeca { _be . Printf ( "\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n" , _caffe , _ecab , _fadb , _dfcbe , _dbec ( _gbddf . merge ( ) . text ( ) , 50 ) ) ; } ; _abbe . putComposite ( _caffe , _ecab , _gbddf , _dgffg ) ;
} ; } ; return & _abbe ; } ; func ( _dddc * textPara ) text ( ) string { _cgec := new ( _cde . Buffer ) ; _dddc . writeText ( _cgec ) ; return _cgec . String ( ) ; } ; func _bebd ( _cccb , _bfbag , _aafb float64 ) rulingKind { if _cccb >= _aafb && _acbb ( _bfbag , _cccb ) { return _ddga ; } ; if _bfbag >= _aafb && _acbb ( _cccb , _bfbag ) { return _gaba ;
} ; return _eedb ; } ; func _gagac ( _fecg string ) bool { for _ , _cbgb := range _fecg { if ! _cd . IsSpace ( _cbgb ) { return false ; } ; } ; return true ; } ; func _fdde ( _fgfae _g . Point ) _g . Matrix { return _g . TranslationMatrix ( _fgfae . X , _fgfae . Y ) } ; func ( _ceg * textObject ) getFillColor ( ) _fb . Color { return _cgdc ( _ceg . _cddag . ColorspaceNonStroking , _ceg . _cddag . ColorNonStroking ) ;
} ; var _abggb = _e . MustCompile ( "\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024" ) ; func ( _fgeea * textTable ) computeBbox ( ) _bd . PdfRectangle { var _fefc _bd . PdfRectangle ;
_cgfab := false ; for _aedg := 0 ; _aedg < _fgeea . _gccb ; _aedg ++ { for _dbag := 0 ; _dbag < _fgeea . _bgcfb ; _dbag ++ { _afgab := _fgeea . get ( _dbag , _aedg ) ; if _afgab == nil { continue ; } ; if ! _cgfab { _fefc = _afgab . PdfRectangle ; _cgfab = true ; } else { _fefc = _gcff ( _fefc , _afgab . PdfRectangle ) ;
} ; } ; } ; return _fefc ; } ; func ( _bgebb * wordBag ) removeDuplicates ( ) { if _decd { _eg . Log . Info ( "r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071" , _bgebb . text ( ) ) ; } ; for _ , _dfed := range _bgebb . depthIndexes ( ) { if len ( _bgebb . _fadg [ _dfed ] ) == 0 { continue ;
} ; _beceb := _bgebb . _fadg [ _dfed ] [ 0 ] ; _fefe := _abae * _beceb . _efag ; _cege := _beceb . _acag ; for _ , _eece := range _bgebb . depthBand ( _cege , _cege + _fefe ) { _fdfg := map [ * textWord ] struct { } { } ; _ggbd := _bgebb . _fadg [ _eece ] ; for _ , _ebadg := range _ggbd { if _ , _fbaa := _fdfg [ _ebadg ] ;
_fbaa { continue ; } ; for _ , _aabc := range _ggbd { if _ , _fged := _fdfg [ _aabc ] ; _fged { continue ; } ; if _aabc != _ebadg && _aabc . _debad == _ebadg . _debad && _bf . Abs ( _aabc . Llx - _ebadg . Llx ) < _fefe && _bf . Abs ( _aabc . Urx - _ebadg . Urx ) < _fefe && _bf . Abs ( _aabc . Lly - _ebadg . Lly ) < _fefe && _bf . Abs ( _aabc . Ury - _ebadg . Ury ) < _fefe { _fdfg [ _aabc ] = struct { } { } ;
} ; } ; } ; if len ( _fdfg ) > 0 { _eebb := 0 ; for _ , _abbcg := range _ggbd { if _ , _gcbc := _fdfg [ _abbcg ] ; ! _gcbc { _ggbd [ _eebb ] = _abbcg ; _eebb ++ ; } ; } ; _bgebb . _fadg [ _eece ] = _ggbd [ : len ( _ggbd ) - len ( _fdfg ) ] ; if len ( _bgebb . _fadg [ _eece ] ) == 0 { delete ( _bgebb . _fadg , _eece ) ;
} ; } ; } ; } ; } ; func _gbcc ( _dgbffb int , _gbac map [ int ] [ ] float64 ) ( [ ] int , int ) { _cccbe := make ( [ ] int , _dgbffb ) ; _dfgc := 0 ; for _gbefg := 0 ; _gbefg < _dgbffb ; _gbefg ++ { _cccbe [ _gbefg ] = _dfgc ; _dfgc += len ( _gbac [ _gbefg ] ) + 1 ; } ; return _cccbe , _dfgc ; } ; func ( _cagg * textLine ) endsInHyphen ( ) bool { _fgcg := _cagg . _ebge [ len ( _cagg . _ebge ) - 1 ] ;
_fecbg := _fgcg . _debad ; _aadbd , _efae := _c . DecodeLastRuneInString ( _fecbg ) ; if _efae <= 0 || ! _cd . Is ( _cd . Hyphen , _aadbd ) { return false ; } ; if _fgcg . _fgbg && _bcfg ( _fecbg ) { return true ; } ; return _bcfg ( _cagg . text ( ) ) ; } ; func _acbb ( _addeg , _begf float64 ) bool { return _addeg / _bf . Max ( _accb , _begf ) < _cged } ;
func ( _fcdg * shapesState ) stroke ( _ggbfc * [ ] pathSection ) { _babg := pathSection { _aga : _fcdg . _gdbg , Color : _fcdg . _baafg . getStrokeColor ( ) } ; * _ggbfc = append ( * _ggbfc , _babg ) ; if _daafa { _be . Printf ( "\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , len ( * _ggbfc ) , _fcdg , _fcdg . _baafg . getStrokeColor ( ) , _babg . bbox ( ) ) ;
if _faca { for _ceac , _bfa := range _fcdg . _gdbg { _be . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _ceac , _bfa ) ; if _ceac == 10 { break ; } ; } ; } ; } ; } ; func ( _dafb * shapesState ) closePath ( ) { if _dafb . _beg { _dafb . _gdbg = append ( _dafb . _gdbg , _gfgdb ( _dafb . _eee ) ) ;
_dafb . _beg = false ; } else if len ( _dafb . _gdbg ) == 0 { if _dded { _eg . Log . Debug ( "\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068" ) ; } ; _dafb . _beg = false ; return ; } ; _dafb . _gdbg [ len ( _dafb . _gdbg ) - 1 ] . close ( ) ;
if _dded { _eg . Log . Info ( "\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073" , _dafb ) ; } ; } ; func ( _ebd * textPara ) fontsize ( ) float64 { return _ebd . _gadg [ 0 ] . _aefd } ; func ( _fgga rulingList ) intersections ( ) map [ int ] intSet { var _cdebb , _effd [ ] int ;
for _bfgea , _fdbaa := range _fgga { switch _fdbaa . _gggf { case _gaba : _cdebb = append ( _cdebb , _bfgea ) ; case _ddga : _effd = append ( _effd , _bfgea ) ; } ; } ; if len ( _cdebb ) < _gafg + 1 || len ( _effd ) < _ebgg + 1 { return nil ; } ; if len ( _cdebb ) + len ( _effd ) > _ffcfg { _eg . Log . Debug ( "\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064" , len ( _fgga ) , len ( _cdebb ) , len ( _effd ) ) ;
return nil ; } ; _ecgd := make ( map [ int ] intSet , len ( _cdebb ) + len ( _effd ) ) ; for _ , _dcaf := range _cdebb { for _ , _gfcd := range _effd { if _fgga [ _dcaf ] . intersects ( _fgga [ _gfcd ] ) { if _ , _dgaa := _ecgd [ _dcaf ] ; ! _dgaa { _ecgd [ _dcaf ] = make ( intSet ) ; } ; if _ , _gdfd := _ecgd [ _gfcd ] ;
! _gdfd { _ecgd [ _gfcd ] = make ( intSet ) ; } ; _ecgd [ _dcaf ] . add ( _gfcd ) ; _ecgd [ _gfcd ] . add ( _dcaf ) ; } ; } ; } ; return _ecgd ; } ; func ( _fdcc gridTiling ) log ( _gbgca string ) { if ! _gdcf { return ; } ; _eg . Log . Info ( "\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071" , len ( _fdcc . _defd ) , len ( _fdcc . _eabe ) , _gbgca ) ;
_be . Printf ( "\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a" , _fdcc . _defd ) ; _be . Printf ( "\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a" , _fdcc . _eabe ) ; for _dbef , _efaf := range _fdcc . _eabe { _bafgc , _aaeab := _fdcc . _bdgaf [ _efaf ] ;
if ! _aaeab { continue ; } ; _be . Printf ( "%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , _dbef , _efaf ) ; for _faab , _cbca := range _fdcc . _defd { _cggf , _fgea := _bafgc [ _cbca ] ; if ! _fgea { continue ; } ; _be . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _faab , _cggf . String ( ) ) ;
} ; } ; } ; func ( _efcc compositeCell ) hasLines ( _dffde [ ] * textLine ) bool { for _cbeg , _gcbd := range _dffde { _fdebb := _bfgg ( _efcc . PdfRectangle , _gcbd . PdfRectangle ) ; if _eeca { _be . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a" , _fdebb , _cbeg , len ( _dffde ) ) ;
_be . Printf ( "\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a" , _efcc ) ; _be . Printf ( "\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a" , _gcbd ) ; } ; if _fdebb { return true ;
} ; } ; return false ; } ; func _acec ( _gaaac _bd . PdfRectangle , _gfeg bounded ) float64 { return _gaaac . Ury - _gfeg . bbox ( ) . Lly } ; func ( _fbcf rulingList ) sort ( ) { _cc . Slice ( _fbcf , _fbcf . comp ) } ; type pathSection struct { _aga [ ] * subpath ; _fb . Color ; } ; var _ecgab = map [ markKind ] string { _eagf : "\u0073\u0074\u0072\u006f\u006b\u0065" , _edaga : "\u0066\u0069\u006c\u006c" , _bcbda : "\u0061u\u0067\u006d\u0065\u006e\u0074" } ;
func _ggcd ( _egdea , _fgbe _g . Point ) rulingKind { _gdbf := _bf . Abs ( _egdea . X - _fgbe . X ) ; _ebga := _bf . Abs ( _egdea . Y - _fgbe . Y ) ; return _bebd ( _gdbf , _ebga , _cafe ) ; } ; func _gcadg ( _geb bounded ) float64 { return - _geb . bbox ( ) . Lly } ; func ( _egec * subpath ) isQuadrilateral ( ) bool { if len ( _egec . _gcbb ) < 4 || len ( _egec . _gcbb ) > 5 { return false ;
} ; if len ( _egec . _gcbb ) == 5 { _cda := _egec . _gcbb [ 0 ] ; _bdea := _egec . _gcbb [ 4 ] ; if _cda . X != _bdea . X || _cda . Y != _bdea . Y { return false ; } ; } ; return true ; } ; func ( _gffa * shapesState ) establishSubpath ( ) * subpath { _dedf , _cabg := _gffa . lastpointEstablished ( ) ;
if ! _cabg { _gffa . _gdbg = append ( _gffa . _gdbg , _gfgdb ( _dedf ) ) ; } ; if len ( _gffa . _gdbg ) == 0 { return nil ; } ; _gffa . _beg = false ; return _gffa . _gdbg [ len ( _gffa . _gdbg ) - 1 ] ; } ; func _fegg ( _acda , _efde _bd . PdfRectangle ) bool { return _acda . Lly <= _efde . Ury && _efde . Lly <= _acda . Ury ;
} ; func _ddag ( _befcd [ ] * textWord , _egfdd * textWord ) [ ] * textWord { for _eacc , _fbadf := range _befcd { if _fbadf == _egfdd { return _bbbca ( _befcd , _eacc ) ; } ; } ; _eg . Log . Error ( "\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073" , _egfdd ) ;
return nil ; } ; func _bfgg ( _cddg , _bafg _bd . PdfRectangle ) bool { return _ecag ( _cddg , _bafg ) && _fegg ( _cddg , _bafg ) } ; func ( _bdda rulingList ) log ( _ecec string ) { if ! _daafa { return ; } ; _eg . Log . Info ( "\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073" , _ecec , _bdda . String ( ) ) ;
for _fbbb , _eaecc := range _bdda { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _fbbb , _eaecc . String ( ) ) ; } ; } ; func ( _bedg paraList ) tables ( ) [ ] TextTable { var _ffbc [ ] TextTable ; if _eeca { _eg . Log . Info ( "\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a" ) ;
} ; for _ , _gfddb := range _bedg { _bdbfd := _gfddb . _dbfdg ; if _bdbfd != nil && _bdbfd . isExportable ( ) { _ffbc = append ( _ffbc , _bdbfd . toTextTable ( ) ) ; } ; } ; return _ffbc ; } ; func ( _dgbbd paraList ) sortReadingOrder ( ) { _eg . Log . Trace ( "\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _dgbbd ) ) ;
if len ( _dgbbd ) <= 1 { return ; } ; _dgbbd . computeEBBoxes ( ) ; _cc . Slice ( _dgbbd , func ( _fgfd , _dffa int ) bool { return _dfe ( _dgbbd [ _fgfd ] , _dgbbd [ _dffa ] ) <= 0 } ) ; _fbfd := _dgbbd . topoOrder ( ) ; _dgbbd . reorder ( _fbfd ) ; } ; func _ebbf ( _bdgad , _bbcb bounded ) float64 { return _gcadg ( _bdgad ) - _gcadg ( _bbcb ) } ;
func ( _afbf * wordBag ) depthRange ( _egaf , _eaef int ) [ ] int { var _deba [ ] int ; for _fgb := range _afbf . _fadg { if _egaf <= _fgb && _fgb <= _eaef { _deba = append ( _deba , _fgb ) ; } ; } ; if len ( _deba ) == 0 { return nil ; } ; _cc . Ints ( _deba ) ; return _deba ; } ; type shapesState struct { _eccf _g . Matrix ;
_gaga _g . Matrix ; _gdbg [ ] * subpath ; _beg bool ; _eee _g . Point ; _baafg * textObject ; } ; func ( _gcc * shapesState ) moveTo ( _dffg , _fbgc float64 ) { _gcc . _beg = true ; _gcc . _eee = _gcc . devicePoint ( _dffg , _fbgc ) ; if _dded { _eg . Log . Info ( "\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066" , _dffg , _fbgc , _gcc . _eee ) ;
} ; } ; func ( _abfb rulingList ) blocks ( _debaf , _gedb * ruling ) bool { if _debaf . _gaad > _gedb . _gdaf || _gedb . _gaad > _debaf . _gdaf { return false ; } ; _fegd := _bf . Max ( _debaf . _gaad , _gedb . _gaad ) ; _ebff := _bf . Min ( _debaf . _gdaf , _gedb . _gdaf ) ; if _debaf . _ccb > _gedb . _ccb { _debaf , _gedb = _gedb , _debaf ;
} ; for _ , _eeege := range _abfb { if _debaf . _ccb <= _eeege . _ccb + _ffd && _eeege . _ccb <= _gedb . _ccb + _ffd && _eeege . _gaad <= _ebff && _fegd <= _eeege . _gdaf { return true ; } ; } ; return false ; } ; const ( _cece markKind = iota ; _eagf ; _edaga ; _bcbda ; ) ; func _gfgdb ( _fcga _g . Point ) * subpath { return & subpath { _gcbb : [ ] _g . Point { _fcga } } } ;
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20> ).
func ( _cbf * Extractor ) ExtractText ( ) ( string , error ) { _abb , _ , _ , _faa := _cbf . ExtractTextWithStats ( ) ; return _abb , _faa ; } ; func ( _aagf rulingList ) bbox ( ) _bd . PdfRectangle { var _ffcfe _bd . PdfRectangle ; if len ( _aagf ) == 0 { _eg . Log . Error ( "r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073" ) ;
return _bd . PdfRectangle { } ; } ; if _aagf [ 0 ] . _gggf == _ddga { _ffcfe . Llx , _ffcfe . Urx = _aagf . secMinMax ( ) ; _ffcfe . Lly , _ffcfe . Ury = _aagf . primMinMax ( ) ; } else { _ffcfe . Llx , _ffcfe . Urx = _aagf . primMinMax ( ) ; _ffcfe . Lly , _ffcfe . Ury = _aagf . secMinMax ( ) ; } ;
return _ffcfe ; } ; func ( _bffc * textTable ) toTextTable ( ) TextTable { if _eeca { _eg . Log . Info ( "t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064" , _bffc . _bgcfb , _bffc . _gccb ) ; } ; _cdaa := make ( [ ] [ ] TableCell , _bffc . _gccb ) ;
for _bbaa := 0 ; _bbaa < _bffc . _gccb ; _bbaa ++ { _cdaa [ _bbaa ] = make ( [ ] TableCell , _bffc . _bgcfb ) ; for _agfc := 0 ; _agfc < _bffc . _bgcfb ; _agfc ++ { _dgfa := _bffc . get ( _agfc , _bbaa ) ; if _dgfa == nil { continue ; } ; if _eeca { _be . Printf ( "\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a" , _agfc , _bbaa , _dgfa ) ;
} ; _cdaa [ _bbaa ] [ _agfc ] . Text = _dgfa . text ( ) ; _afef := 0 ; _cdaa [ _bbaa ] [ _agfc ] . Marks . _ggbg = _dgfa . toTextMarks ( & _afef ) ; } ; } ; return TextTable { W : _bffc . _bgcfb , H : _bffc . _gccb , Cells : _cdaa } ; } ; func _bcaga ( _eaffa float64 ) bool { return _bf . Abs ( _eaffa ) < _gdfe } ;
func ( _feba * textWord ) appendMark ( _cgdfg * textMark , _efbcc _bd . PdfRectangle ) { _feba . _bbacg = append ( _feba . _bbacg , _cgdfg ) ; _feba . PdfRectangle = _gcff ( _feba . PdfRectangle , _cgdfg . PdfRectangle ) ; if _cgdfg . _gaaaf > _feba . _efag { _feba . _efag = _cgdfg . _gaaaf ;
} ; _feba . _acag = _efbcc . Ury - _feba . PdfRectangle . Lly ; } ; var ( _baee = map [ rune ] string { 0x0060 : "\u0300" , 0x02CB : "\u0300" , 0x0027 : "\u0301" , 0x00B4 : "\u0301" , 0x02B9 : "\u0301" , 0x02CA : "\u0301" , 0x005E : "\u0302" , 0x02C6 : "\u0302" , 0x007E : "\u0303" , 0x02DC : "\u0303" , 0x00AF : "\u0304" , 0x02C9 : "\u0304" , 0x02D8 : "\u0306" , 0x02D9 : "\u0307" , 0x00A8 : "\u0308" , 0x00B0 : "\u030a" , 0x02DA : "\u030a" , 0x02BA : "\u030b" , 0x02DD : "\u030b" , 0x02C7 : "\u030c" , 0x02C8 : "\u030d" , 0x0022 : "\u030e" , 0x02BB : "\u0312" , 0x02BC : "\u0313" , 0x0486 : "\u0313" , 0x055A : "\u0313" , 0x02BD : "\u0314" , 0x0485 : "\u0314" , 0x0559 : "\u0314" , 0x02D4 : "\u031d" , 0x02D5 : "\u031e" , 0x02D6 : "\u031f" , 0x02D7 : "\u0320" , 0x02B2 : "\u0321" , 0x00B8 : "\u0327" , 0x02CC : "\u0329" , 0x02B7 : "\u032b" , 0x02CD : "\u0331" , 0x005F : "\u0332" , 0x204E : "\u0359" } ;
) ; func ( _cbdc paraList ) findGridTables ( _daad [ ] gridTiling ) [ ] * textTable { if _eeca { _eg . Log . Info ( "\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073" , len ( _cbdc ) ) ; for _cbceb , _fbca := range _cbdc { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _cbceb , _fbca ) ;
} ; } ; var _bbggf [ ] * textTable ; for _cefgf , _bcff := range _daad { _gcgc , _cebg := _cbdc . findTableGrid ( _bcff ) ; if _gcgc != nil { _gcgc . log ( _be . Sprintf ( "\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064" , _cefgf ) ) ;
_bbggf = append ( _bbggf , _gcgc ) ; _gcgc . markCells ( ) ; } ; for _cebab := range _cebg { _cebab . _deed = true ; } ; } ; if _eeca { _eg . Log . Info ( "\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s" , len ( _bbggf ) ) ;
} ; return _bbggf ; } ; func ( _addfb * textPara ) depth ( ) float64 { if _addfb . _gfce { return - 1.0 ; } ; if len ( _addfb . _gadg ) > 0 { return _addfb . _gadg [ 0 ] . _gddec ; } ; return _addfb . _dbfdg . depth ( ) ; } ; func ( _dbbg paraList ) addNeighbours ( ) { _ddee := func ( _ffbcaa [ ] int , _cffec * textPara ) ( [ ] * textPara , [ ] * textPara ) { _fafg := make ( [ ] * textPara , 0 , len ( _ffbcaa ) - 1 ) ;
_ecgc := make ( [ ] * textPara , 0 , len ( _ffbcaa ) - 1 ) ; for _ , _egfgf := range _ffbcaa { _efdb := _dbbg [ _egfgf ] ; if _efdb . Urx <= _cffec . Llx { _fafg = append ( _fafg , _efdb ) ; } else if _efdb . Llx >= _cffec . Urx { _ecgc = append ( _ecgc , _efdb ) ; } ; } ; return _fafg , _ecgc ; } ;
_daca := func ( _edadd [ ] int , _eecf * textPara ) ( [ ] * textPara , [ ] * textPara ) { _fbbec := make ( [ ] * textPara , 0 , len ( _edadd ) - 1 ) ; _bfgda := make ( [ ] * textPara , 0 , len ( _edadd ) - 1 ) ; for _ , _eegdf := range _edadd { _aebb := _dbbg [ _eegdf ] ; if _aebb . Ury <= _eecf . Lly { _bfgda = append ( _bfgda , _aebb ) ;
} else if _aebb . Lly >= _eecf . Ury { _fbbec = append ( _fbbec , _aebb ) ; } ; } ; return _fbbec , _bfgda ; } ; _ccae := _dbbg . yNeighbours ( _fgaa ) ; for _ , _bdabe := range _dbbg { _adcf := _ccae [ _bdabe ] ; if len ( _adcf ) == 0 { continue ; } ; _adge , _caeec := _ddee ( _adcf , _bdabe ) ;
if len ( _adge ) == 0 && len ( _caeec ) == 0 { continue ; } ; if len ( _adge ) > 0 { _adcg := _adge [ 0 ] ; for _ , _ecea := range _adge [ 1 : ] { if _ecea . Urx >= _adcg . Urx { _adcg = _ecea ; } ; } ; for _ , _aebe := range _adge { if _aebe != _adcg && _aebe . Urx > _adcg . Llx { _adcg = nil ; break ;
} ; } ; if _adcg != nil && _fegg ( _bdabe . PdfRectangle , _adcg . PdfRectangle ) { _bdabe . _ceabb = _adcg ; } ; } ; if len ( _caeec ) > 0 { _faeca := _caeec [ 0 ] ; for _ , _bffcd := range _caeec [ 1 : ] { if _bffcd . Llx <= _faeca . Llx { _faeca = _bffcd ; } ; } ; for _ , _ffga := range _caeec { if _ffga != _faeca && _ffga . Llx < _faeca . Urx { _faeca = nil ;
break ; } ; } ; if _faeca != nil && _fegg ( _bdabe . PdfRectangle , _faeca . PdfRectangle ) { _bdabe . _bbbdd = _faeca ; } ; } ; } ; _ccae = _dbbg . xNeighbours ( _gfge ) ; for _ , _ddafe := range _dbbg { _gcef := _ccae [ _ddafe ] ; if len ( _gcef ) == 0 { continue ; } ; _dedeb , _gdceg := _daca ( _gcef , _ddafe ) ;
if len ( _dedeb ) == 0 && len ( _gdceg ) == 0 { continue ; } ; if len ( _gdceg ) > 0 { _befd := _gdceg [ 0 ] ; for _ , _fbae := range _gdceg [ 1 : ] { if _fbae . Ury >= _befd . Ury { _befd = _fbae ; } ; } ; for _ , _fecdf := range _gdceg { if _fecdf != _befd && _fecdf . Ury > _befd . Lly { _befd = nil ;
break ; } ; } ; if _befd != nil && _ecag ( _ddafe . PdfRectangle , _befd . PdfRectangle ) { _ddafe . _egad = _befd ; } ; } ; if len ( _dedeb ) > 0 { _gecga := _dedeb [ 0 ] ; for _ , _fbcba := range _dedeb [ 1 : ] { if _fbcba . Lly <= _gecga . Lly { _gecga = _fbcba ; } ; } ; for _ , _gbeg := range _dedeb { if _gbeg != _gecga && _gbeg . Lly < _gecga . Ury { _gecga = nil ;
break ; } ; } ; if _gecga != nil && _ecag ( _ddafe . PdfRectangle , _gecga . PdfRectangle ) { _ddafe . _edaad = _gecga ; } ; } ; } ; for _ , _bcdca := range _dbbg { if _bcdca . _ceabb != nil && _bcdca . _ceabb . _bbbdd != _bcdca { _bcdca . _ceabb = nil ; } ; if _bcdca . _edaad != nil && _bcdca . _edaad . _egad != _bcdca { _bcdca . _edaad = nil ;
} ; if _bcdca . _bbbdd != nil && _bcdca . _bbbdd . _ceabb != _bcdca { _bcdca . _bbbdd = nil ; } ; if _bcdca . _egad != nil && _bcdca . _egad . _edaad != _bcdca { _bcdca . _egad = nil ; } ; } ; } ; func ( _becff * textTable ) subdivide ( ) * textTable { _becff . logComposite ( "\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e" ) ;
_eecgg := _becff . compositeRowCorridors ( ) ; _ccbfg := _becff . compositeColCorridors ( ) ; if _eeca { _eg . Log . Info ( "\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073" , _efcg ( _eecgg ) , _efcg ( _ccbfg ) ) ;
} ; if len ( _eecgg ) == 0 || len ( _ccbfg ) == 0 { return _becff ; } ; _bffd ( _eecgg ) ; _bffd ( _ccbfg ) ; if _eeca { _eg . Log . Info ( "\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073" , _efcg ( _eecgg ) , _efcg ( _ccbfg ) ) ;
} ; _eefbfd , _ffdff := _gbcc ( _becff . _gccb , _eecgg ) ; _eagffb , _gefb := _gbcc ( _becff . _bgcfb , _ccbfg ) ; _cbde := make ( map [ uint64 ] * textPara , _gefb * _ffdff ) ; _deac := & textTable { PdfRectangle : _becff . PdfRectangle , _cdgb : _becff . _cdgb , _gccb : _ffdff , _bgcfb : _gefb , _deedc : _cbde } ;
if _eeca { _eg . Log . Info ( "\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a" + "\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a" + "\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a" + "\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a" + "\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076" , _becff . _bgcfb , _becff . _gccb , _gefb , _ffdff , _efcg ( _eecgg ) , _efcg ( _ccbfg ) , _eefbfd , _eagffb ) ;
} ; for _efgd := 0 ; _efgd < _becff . _gccb ; _efgd ++ { _bfef := _eefbfd [ _efgd ] ; for _fecc := 0 ; _fecc < _becff . _bgcfb ; _fecc ++ { _cedf := _eagffb [ _fecc ] ; if _eeca { _be . Printf ( "\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a" , _fecc , _efgd , _cedf , _bfef ) ;
} ; _gdae , _cdbbc := _becff . _agga [ _bddbg ( _fecc , _efgd ) ] ; if ! _cdbbc { continue ; } ; _ebcd := _gdae . split ( _eecgg [ _efgd ] , _ccbfg [ _fecc ] ) ; for _gdac := 0 ; _gdac < _ebcd . _gccb ; _gdac ++ { for _afebe := 0 ; _afebe < _ebcd . _bgcfb ; _afebe ++ { _fegf := _ebcd . get ( _afebe , _gdac ) ;
_deac . put ( _cedf + _afebe , _bfef + _gdac , _fegf ) ; if _eeca { _be . Printf ( "\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a" , _cedf + _afebe , _bfef + _gdac , _fegf ) ; } ; } ; } ; } ; } ; return _deac ; } ; func ( _gdgg rulingList ) toGrids ( ) [ ] rulingList { if _daafa { _eg . Log . Info ( "t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073" , _gdgg ) ;
} ; _bfdgd := _gdgg . intersections ( ) ; if _daafa { _eg . Log . Info ( "\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020" , len ( _gdgg ) , len ( _bfdgd ) ) ;
for _ , _eagff := range _aagde ( _bfdgd ) { _be . Printf ( "\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n" , _eagff , _bfdgd [ _eagff ] ) ; } ; } ; _fbedd := make ( map [ int ] intSet , len ( _gdgg ) ) ; for _dbggd := range _gdgg { _cgbb := _gdgg . connections ( _bfdgd , _dbggd ) ; if len ( _cgbb ) > 0 { _fbedd [ _dbggd ] = _cgbb ;
} ; } ; if _daafa { _eg . Log . Info ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064" , len ( _fbedd ) ) ; for _ , _dcfd := range _aagde ( _fbedd ) { _be . Printf ( "\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n" , _dcfd , _fbedd [ _dcfd ] ) ;
} ; } ; _ccbfe := _ecaa ( len ( _gdgg ) , func ( _cfeda , _fdfa int ) bool { _ffaf , _ecgbfa := len ( _fbedd [ _cfeda ] ) , len ( _fbedd [ _fdfa ] ) ; if _ffaf != _ecgbfa { return _ffaf > _ecgbfa ; } ; return _gdgg . comp ( _cfeda , _fdfa ) ; } ) ; if _daafa { _eg . Log . Info ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076" , _ccbfe ) ;
} ; _efacg := [ ] [ ] int { { _ccbfe [ 0 ] } } ; _acfb : for _ , _cccg := range _ccbfe [ 1 : ] { for _ffbca , _caga := range _efacg { for _ , _aecc := range _caga { if _fbedd [ _aecc ] . has ( _cccg ) { _efacg [ _ffbca ] = append ( _caga , _cccg ) ; continue _acfb ; } ; } ; } ; _efacg = append ( _efacg , [ ] int { _cccg } ) ;
} ; if _daafa { _eg . Log . Info ( "\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076" , _efacg ) ; } ; _cc . SliceStable ( _efacg , func ( _dcfg , _dedfd int ) bool { return len ( _efacg [ _dcfg ] ) > len ( _efacg [ _dedfd ] ) } ) ; for _ , _bggg := range _efacg { _cc . Slice ( _bggg , func ( _gdec , _bcae int ) bool { return _gdgg . comp ( _bggg [ _gdec ] , _bggg [ _bcae ] ) } ) ;
} ; _dcad := make ( [ ] rulingList , len ( _efacg ) ) ; for _bcbg , _ggfa := range _efacg { _ddfd := make ( rulingList , len ( _ggfa ) ) ; for _efdee , _abec := range _ggfa { _ddfd [ _efdee ] = _gdgg [ _abec ] ; } ; _dcad [ _bcbg ] = _ddfd ; } ; if _daafa { _eg . Log . Info ( "\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076" , _dcad ) ;
} ; var _afbg [ ] rulingList ; for _ , _afgg := range _dcad { if _ecge , _fbbbd := _afgg . isActualGrid ( ) ; _fbbbd { _afgg = _ecge ; _afgg = _afgg . snapToGroups ( ) ; _afbg = append ( _afbg , _afgg ) ; } ; } ; if _daafa { _bdcd ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073" , _afbg ) ;
_eg . Log . Info ( "\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064" , len ( _dcad ) , len ( _afbg ) ) ; } ; return _afbg ; } ; func ( _abcfa rulingList ) snapToGroupsDirection ( ) rulingList { _abcfa . sortStrict ( ) ;
_cfbb := make ( map [ * ruling ] rulingList , len ( _abcfa ) ) ; _bbbcf := _abcfa [ 0 ] ; _agge := func ( _ddggb * ruling ) { _bbbcf = _ddggb ; _cfbb [ _bbbcf ] = rulingList { _ddggb } } ; _agge ( _abcfa [ 0 ] ) ; for _ , _bagc := range _abcfa [ 1 : ] { if _bagc . _ccb < _bbbcf . _ccb - _gdfe { _eg . Log . Error ( "\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073" , _bbbcf , _bagc ) ;
} ; if _bagc . _ccb > _bbbcf . _ccb + _ffd { _agge ( _bagc ) ; } else { _cfbb [ _bbbcf ] = append ( _cfbb [ _bbbcf ] , _bagc ) ; } ; } ; _bfaa := make ( map [ * ruling ] float64 , len ( _cfbb ) ) ; _eaba := make ( map [ * ruling ] * ruling , len ( _abcfa ) ) ; for _fddfd , _ebggc := range _cfbb { _bfaa [ _fddfd ] = _ebggc . mergePrimary ( ) ;
for _ , _gfbb := range _ebggc { _eaba [ _gfbb ] = _fddfd ; } ; } ; for _ , _cfgd := range _abcfa { _cfgd . _ccb = _bfaa [ _eaba [ _cfgd ] ] ; } ; _fbce := make ( rulingList , 0 , len ( _abcfa ) ) ; for _ , _bgggf := range _cfbb { _accg := _bgggf . splitSec ( ) ; for _gfeaf , _dbbef := range _accg { _cfdgg := _dbbef . merge ( ) ;
if len ( _fbce ) > 0 { _dcec := _fbce [ len ( _fbce ) - 1 ] ; if _dcec . alignsPrimary ( _cfdgg ) && _dcec . alignsSec ( _cfdgg ) { _eg . Log . Error ( "\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073" , _gfeaf , _dcec , _cfdgg ) ;
continue ; } ; } ; _fbce = append ( _fbce , _cfdgg ) ; } ; } ; _fbce . sortStrict ( ) ; return _fbce ; } ; func _dbceag ( _feefg _bd . PdfRectangle ) * ruling { return & ruling { _gggf : _gaba , _ccb : _feefg . Llx , _gaad : _feefg . Lly , _gdaf : _feefg . Ury } ; } ; func _ecag ( _cegg , _aeaa _bd . PdfRectangle ) bool { return _aeaa . Llx <= _cegg . Urx && _cegg . Llx <= _aeaa . Urx ;
} ;
// String returns a description of `b`.
func ( _fceb * wordBag ) String ( ) string { var _ecdac [ ] string ; for _ , _ffb := range _fceb . depthIndexes ( ) { _cdfd := _fceb . _fadg [ _ffb ] ; for _ , _cdde := range _cdfd { _ecdac = append ( _ecdac , _cdde . _debad ) ; } ; } ; return _be . Sprintf ( "\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071" , _fceb . PdfRectangle , _fceb . _dbga , len ( _ecdac ) , _ecdac ) ;
} ; func ( _baae * textObject ) moveTextSetLeading ( _ffe , _deaa float64 ) { _baae . _dff . _dde = - _deaa ; _baae . moveLP ( _ffe , _deaa ) ; } ;
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
func ( _ef * Extractor ) ExtractTextWithStats ( ) ( _bbg string , _dae int , _aaf int , _acee error ) { _fgf , _dae , _aaf , _acee := _ef . ExtractPageText ( ) ; if _acee != nil { return "" , _dae , _aaf , _acee ; } ; return _fgf . Text ( ) , _dae , _aaf , nil ; } ; func ( _egcae rulingList ) tidied ( _afac string ) rulingList { _aaef := _egcae . removeDuplicates ( ) ;
_aaef . log ( "\u0075n\u0069\u0071\u0075\u0065\u0073" ) ; _dfce := _aaef . snapToGroups ( ) ; if _dfce == nil { return nil ; } ; _dfce . sort ( ) ; if _daafa { _eg . Log . Info ( "\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064" , _afac , len ( _egcae ) , len ( _aaef ) , len ( _dfce ) ) ;
} ; _dfce . log ( "\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d" ) ; return _dfce ; } ; func _dge ( _cca * Extractor , _ddgg * _bd . PdfPageResources , _dgf _cce . GraphicsState , _cff * textState , _edg * stateStack ) * textObject { return & textObject { _fafb : _cca , _ggb : _ddgg , _cddag : _dgf , _daga : _edg , _dff : _cff , _cbff : _g . IdentityMatrix ( ) , _bdb : _g . IdentityMatrix ( ) } ;
} ; func ( _aagd * wordBag ) blocked ( _acb * textWord ) bool { if _acb . Urx < _aagd . Llx { _ddbgg := _aggc ( _acb . PdfRectangle ) ; _gbee := _dbceag ( _aagd . PdfRectangle ) ; if _aagd . _faad . blocks ( _ddbgg , _gbee ) { if _eegd { _eg . Log . Info ( "\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073" , _acb , _aagd ) ;
} ; return true ; } ; } else if _aagd . Urx < _acb . Llx { _fbge := _aggc ( _aagd . PdfRectangle ) ; _cef := _dbceag ( _acb . PdfRectangle ) ; if _aagd . _faad . blocks ( _fbge , _cef ) { if _eegd { _eg . Log . Info ( "b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s" , _acb , _aagd ) ;
} ; return true ; } ; } ; if _acb . Ury < _aagd . Lly { _fcf := _gecc ( _acb . PdfRectangle ) ; _aedd := _ffbg ( _aagd . PdfRectangle ) ; if _aagd . _acae . blocks ( _fcf , _aedd ) { if _eegd { _eg . Log . Info ( "\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073" , _acb , _aagd ) ;
} ; return true ; } ; } else if _aagd . Ury < _acb . Lly { _fbdb := _gecc ( _aagd . PdfRectangle ) ; _dfga := _ffbg ( _acb . PdfRectangle ) ; if _aagd . _acae . blocks ( _fbdb , _dfga ) { if _eegd { _eg . Log . Info ( "b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s" , _acb , _aagd ) ;
} ; return true ; } ; } ; return false ; } ;
// Len returns the number of TextMarks in `ma`.
func ( _eca * TextMarkArray ) Len ( ) int { if _eca == nil { return 0 ; } ; return len ( _eca . _ggbg ) ; } ; type paraList [ ] * textPara ; func ( _begg * textPara ) isAtom ( ) * textTable { _bgebe := _begg ; _fffee := _begg . _bbbdd ; _bcda := _begg . _egad ; if _fffee . taken ( ) || _bcda . taken ( ) { return nil ;
} ; _fcfd := _fffee . _egad ; if _fcfd . taken ( ) || _fcfd != _bcda . _bbbdd { return nil ; } ; return _dbee ( _bgebe , _fffee , _bcda , _fcfd ) ; } ; func ( _cdefg gridTile ) contains ( _ebffc _bd . PdfRectangle ) bool { if _cdefg . numBorders ( ) < 3 { return false ; } ; if _cdefg . _fgbc && _ebffc . Llx < _cdefg . Llx - _bggc { return false ;
} ; if _cdefg . _eafc && _ebffc . Urx > _cdefg . Urx + _bggc { return false ; } ; if _cdefg . _ccadg && _ebffc . Lly < _cdefg . Lly - _bggc { return false ; } ; if _cdefg . _bfgeab && _ebffc . Ury > _cdefg . Ury + _bggc { return false ; } ; return true ; } ; func ( _bcbf * textTable ) newTablePara ( ) * textPara { _bdbcf := _bcbf . computeBbox ( ) ;
_aegce := & textPara { PdfRectangle : _bdbcf , _bgca : _bdbcf , _dbfdg : _bcbf } ; if _eeca { _eg . Log . Info ( "\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073" , _aegce ) ; } ; return _aegce ; } ; func ( _acgf rulingList ) mergePrimary ( ) float64 { _gbcgb := _acgf [ 0 ] . _ccb ;
for _ , _eddaca := range _acgf [ 1 : ] { _gbcgb += _eddaca . _ccb ; } ; return _gbcgb / float64 ( len ( _acgf ) ) ; } ; func ( _gfgf * textLine ) bbox ( ) _bd . PdfRectangle { return _gfgf . PdfRectangle } ; func ( _edee * shapesState ) quadraticTo ( _caf , _baef , _bdce , _ddf float64 ) { if _dded { _eg . Log . Info ( "\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a" ) ;
} ; _edee . addPoint ( _bdce , _ddf ) ; } ; func ( _gdde * wordBag ) applyRemovals ( _fddc map [ int ] map [ * textWord ] struct { } ) { for _gggc , _ccagf := range _fddc { if len ( _ccagf ) == 0 { continue ; } ; _bbeb := _gdde . _fadg [ _gggc ] ; _cafb := len ( _bbeb ) - len ( _ccagf ) ; if _cafb == 0 { delete ( _gdde . _fadg , _gggc ) ;
continue ; } ; _gggb := make ( [ ] * textWord , _cafb ) ; _ggdd := 0 ; for _ , _feae := range _bbeb { if _ , _cadd := _ccagf [ _feae ] ; ! _cadd { _gggb [ _ggdd ] = _feae ; _ggdd ++ ; } ; } ; _gdde . _fadg [ _gggc ] = _gggb ; } ; } ; func ( _bcge * textTable ) markCells ( ) { for _fgaag := 0 ; _fgaag < _bcge . _gccb ;
_fgaag ++ { for _agec := 0 ; _agec < _bcge . _bgcfb ; _agec ++ { _aeeac := _bcge . get ( _agec , _fgaag ) ; if _aeeac != nil { _aeeac . _deed = true ; } ; } ; } ; } ; func ( _gfeag paraList ) findTextTables ( ) [ ] * textTable { var _aebd [ ] * textTable ; for _ , _cacge := range _gfeag { if _cacge . taken ( ) || _cacge . Width ( ) == 0 { continue ;
} ; _gdfa := _cacge . isAtom ( ) ; if _gdfa == nil { continue ; } ; _gdfa . growTable ( ) ; if _gdfa . _bgcfb * _gdfa . _gccb < _baad { continue ; } ; _gdfa . markCells ( ) ; _gdfa . log ( "\u0067\u0072\u006fw\u006e" ) ; _aebd = append ( _aebd , _gdfa ) ; } ; return _aebd ; } ; const ( _dafff = false ;
_eeegf = false ; _befc = false ; _cgaa = false ; _dded = false ; _agbg = false ; _cba = false ; _cbec = false ; _eafe = false ; _dcaca = _eafe && true ; _dega = _dcaca && false ; _decd = _eafe && true ; _eeca = false ; _cgbdc = _eeca && false ; _agdc = _eeca && true ; _daafa = false ; _faca = _daafa && false ;
_cddf = _daafa && false ; _gdcf = _daafa && true ; _dddf = _daafa && false ; _eegd = _daafa && false ; ) ;
// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents ( contents string , resources * _bd . PdfPageResources ) ( * Extractor , error ) { const _ed = "\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s" ; _aa := & Extractor { _da : contents , _dbb : resources , _bg : map [ string ] fontEntry { } , _dg : map [ string ] textResult { } } ;
_ba . TrackUse ( _ed ) ; return _aa , nil ; } ; func ( _feca * shapesState ) devicePoint ( _bbfb , _dbcg float64 ) _g . Point { _cgab := _feca . _gaga . Mult ( _feca . _eccf ) ; _bbfb , _dbcg = _cgab . Transform ( _bbfb , _dbcg ) ; return _g . NewPoint ( _bbfb , _dbcg ) ; } ; func ( _efeb * textWord ) toTextMarks ( _ffba * int ) [ ] TextMark { var _dcga [ ] TextMark ;
for _ , _eeaaf := range _efeb . _bbacg { _dcga = _ecfc ( _dcga , _ffba , _eeaaf . ToTextMark ( ) ) ; } ; return _dcga ; } ; func ( _efbb * textTable ) depth ( ) float64 { _adadd := 1e10 ; for _dgfb := 0 ; _dgfb < _efbb . _bgcfb ; _dgfb ++ { _cgbe := _efbb . get ( _dgfb , 0 ) ; if _cgbe == nil || _cgbe . _gfce { continue ;
} ; _adadd = _bf . Min ( _adadd , _cgbe . depth ( ) ) ; } ; return _adadd ; } ; func _bcfg ( _geee string ) bool { if _c . RuneCountInString ( _geee ) < _eed { return false ; } ; _bbgf , _fdef := _c . DecodeLastRuneInString ( _geee ) ; if _fdef <= 0 || ! _cd . Is ( _cd . Hyphen , _bbgf ) { return false ;
} ; _bbgf , _fdef = _c . DecodeLastRuneInString ( _geee [ : len ( _geee ) - _fdef ] ) ; return _fdef > 0 && ! _cd . IsSpace ( _bbgf ) ; } ; func _cdgde ( _cggg float64 ) bool { return _bf . Abs ( _cggg ) < _ffd } ;
// String returns a description of `p`.
func ( _abaa * textPara ) String ( ) string { if _abaa . _gfce { return _be . Sprintf ( "\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d" , _abaa . PdfRectangle ) ; } ; _eddg := "" ; if _abaa . _dbfdg != nil { _eddg = _be . Sprintf ( "\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020" , _abaa . _dbfdg . _bgcfb , _abaa . _dbfdg . _gccb ) ;
} ; return _be . Sprintf ( "\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071" , _abaa . PdfRectangle , _eddg , len ( _abaa . _gadg ) , _dbec ( _abaa . text ( ) , 50 ) ) ; } ; func ( _fffe rulingList ) comp ( _dfbf , _aceag int ) bool { _efef , _aegde := _fffe [ _dfbf ] , _fffe [ _aceag ] ;
_cbcf , _bebdc := _efef . _gggf , _aegde . _gggf ; if _cbcf != _bebdc { return _cbcf > _bebdc ; } ; if _cbcf == _eedb { return false ; } ; _ccgg := func ( _faccf bool ) bool { if _cbcf == _ddga { return _faccf ; } ; return ! _faccf ; } ; _eddd , _adfa := _efef . _ccb , _aegde . _ccb ; if _eddd != _adfa { return _ccgg ( _eddd > _adfa ) ;
} ; _eddd , _adfa = _efef . _gaad , _aegde . _gaad ; if _eddd != _adfa { return _ccgg ( _eddd < _adfa ) ; } ; return _ccgg ( _efef . _gdaf < _aegde . _gdaf ) ; } ; func ( _bgfbc * wordBag ) pullWord ( _eeef * textWord , _afdg int , _ecba map [ int ] map [ * textWord ] struct { } ) { _bgfbc . PdfRectangle = _gcff ( _bgfbc . PdfRectangle , _eeef . PdfRectangle ) ;
if _eeef . _efag > _bgfbc . _dbga { _bgfbc . _dbga = _eeef . _efag ; } ; _bgfbc . _fadg [ _afdg ] = append ( _bgfbc . _fadg [ _afdg ] , _eeef ) ; _ecba [ _afdg ] [ _eeef ] = struct { } { } ; } ; func ( _dfag * textObject ) moveText ( _dfcc , _gad float64 ) { _dfag . moveLP ( _dfcc , _gad ) } ; func ( _dagbe * textLine ) pullWord ( _bgdd * wordBag , _abcf * textWord , _acdad int ) { _dagbe . appendWord ( _abcf ) ;
_bgdd . removeWord ( _abcf , _acdad ) ; } ; func ( _acc * wordBag ) text ( ) string { _efg := _acc . allWords ( ) ; _degg := make ( [ ] string , len ( _efg ) ) ; for _bdaeb , _edaf := range _efg { _degg [ _bdaeb ] = _edaf . _debad ; } ; return _dc . Join ( _degg , "\u0020" ) ; } ;
// Append appends `mark` to the mark array.
func ( _eage * TextMarkArray ) Append ( mark TextMark ) { _eage . _ggbg = append ( _eage . _ggbg , mark ) } ; func ( _gbef * textLine ) markWordBoundaries ( ) { _fecb := _gfed * _gbef . _aefd ; for _debfb , _agad := range _gbef . _ebge [ 1 : ] { if _edagd ( _agad , _gbef . _ebge [ _debfb ] ) >= _fecb { _agad . _fgbg = true ;
} ; } ; } ; func ( _ggbe rulingList ) vertsHorzs ( ) ( rulingList , rulingList ) { var _bbcbe , _afaa rulingList ; for _ , _gadcf := range _ggbe { switch _gadcf . _gggf { case _gaba : _bbcbe = append ( _bbcbe , _gadcf ) ; case _ddga : _afaa = append ( _afaa , _gadcf ) ; } ; } ; return _bbcbe , _afaa ;
} ; func _bgcc ( _adaa [ ] * textWord , _fgfc float64 , _aeaf , _bca rulingList ) * wordBag { _cgbg := _gcab ( _adaa [ 0 ] , _fgfc , _aeaf , _bca ) ; for _ , _fead := range _adaa [ 1 : ] { _fgfg := _fdeba ( _fead . _acag ) ; _cgbg . _fadg [ _fgfg ] = append ( _cgbg . _fadg [ _fgfg ] , _fead ) ; _cgbg . PdfRectangle = _gcff ( _cgbg . PdfRectangle , _fead . PdfRectangle ) ;
} ; _cgbg . sort ( ) ; return _cgbg ; } ; func ( _geag paraList ) xNeighbours ( _adbb float64 ) map [ * textPara ] [ ] int { _daeb := make ( [ ] event , 2 * len ( _geag ) ) ; if _adbb == 0 { for _efbd , _ccac := range _geag { _daeb [ 2 * _efbd ] = event { _ccac . Llx , true , _efbd } ; _daeb [ 2 * _efbd + 1 ] = event { _ccac . Urx , false , _efbd } ;
} ; } else { for _adbbb , _addgg := range _geag { _daeb [ 2 * _adbbb ] = event { _addgg . Llx - _adbb * _addgg . fontsize ( ) , true , _adbbb } ; _daeb [ 2 * _adbbb + 1 ] = event { _addgg . Urx + _adbb * _addgg . fontsize ( ) , false , _adbbb } ; } ; } ; return _geag . eventNeighbours ( _daeb ) ; } ; const ( _eedb rulingKind = iota ;
_ddga ; _gaba ; ) ; func _cbe ( _cdfg [ ] * wordBag ) [ ] * wordBag { if len ( _cdfg ) <= 1 { return _cdfg ; } ; if _eafe { _eg . Log . Info ( "\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a" ) ; } ; _cc . Slice ( _cdfg , func ( _ddeb , _gbbcg int ) bool { _eccg , _agc := _cdfg [ _ddeb ] , _cdfg [ _gbbcg ] ;
_bdaee := _eccg . Width ( ) * _eccg . Height ( ) ; _gaaa := _agc . Width ( ) * _agc . Height ( ) ; if _bdaee != _gaaa { return _bdaee > _gaaa ; } ; if _eccg . Height ( ) != _agc . Height ( ) { return _eccg . Height ( ) > _agc . Height ( ) ; } ; return _ddeb < _gbbcg ; } ) ; var _bgeb [ ] * wordBag ;
_deab := make ( intSet ) ; for _adaab := 0 ; _adaab < len ( _cdfg ) ; _adaab ++ { if _deab . has ( _adaab ) { continue ; } ; _bbbc := _cdfg [ _adaab ] ; for _ggfd := _adaab + 1 ; _ggfd < len ( _cdfg ) ; _ggfd ++ { if _deab . has ( _adaab ) { continue ; } ; _agda := _cdfg [ _ggfd ] ; _feeb := _bbbc . PdfRectangle ;
_feeb . Llx -= _bbbc . _dbga ; if _gbae ( _feeb , _agda . PdfRectangle ) { _bbbc . absorb ( _agda ) ; _deab . add ( _ggfd ) ; } ; } ; _bgeb = append ( _bgeb , _bbbc ) ; } ; if len ( _cdfg ) != len ( _bgeb ) + len ( _deab ) { _eg . Log . Error ( "\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064" , len ( _cdfg ) , len ( _bgeb ) , len ( _deab ) ) ;
} ; return _bgeb ; } ; func ( _dgcd * textTable ) bbox ( ) _bd . PdfRectangle { return _dgcd . PdfRectangle } ;
// TableCell is a cell in a TextTable.
type TableCell struct {
// Text is the extracted text.
Text string ;
// Marks returns the TextMarks corresponding to the text in Text.
Marks TextMarkArray ; } ;
// String returns a description of `t`.
func ( _dfdbg * textTable ) String ( ) string { return _be . Sprintf ( "\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074" , _dfdbg . _bgcfb , _dfdbg . _gccb , _dfdbg . _cdgb ) ; } ; func _afdf ( _gedf , _fgaf bounded ) float64 { return _gedf . bbox ( ) . Llx - _fgaf . bbox ( ) . Llx } ;
func _bffd ( _cffe map [ int ] [ ] float64 ) { if len ( _cffe ) <= 1 { return ; } ; _gabe := _ggadbb ( _cffe ) ; if _eeca { _eg . Log . Info ( "\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076" , _gabe ) ; } ; var _cdbf , _dbdd int ; for _cdbf , _dbdd = range _gabe { if _cffe [ _dbdd ] != nil { break ;
} ; } ; for _cbfff , _bfcdc := range _gabe [ _cdbf : ] { _eaeeg := _cffe [ _bfcdc ] ; if _eaeeg == nil { continue ; } ; if _eeca { _be . Printf ( "\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a" , _cdbf + _cbfff , _dbdd , _bfcdc ) ;
} ; _gdab := _cffe [ _bfcdc ] ; if _gdab [ len ( _gdab ) - 1 ] > _eaeeg [ 0 ] { _gdab [ len ( _gdab ) - 1 ] = _eaeeg [ 0 ] ; _cffe [ _dbdd ] = _gdab ; } ; _dbdd = _bfcdc ; } ; } ; const ( _addg = true ; _cgffa = true ; _dgeb = true ; _fddf = false ; _feebf = false ; _dfade = 6 ; _dbce = 3.0 ; _ecddd = 200 ; _adbe = true ;
_aaeaf = true ; _adba = true ; _eccfg = true ; _beegc = false ; ) ; func _gbgg ( _eaeeb , _gfba * textPara ) bool { if _eaeeb . _gfce || _gfba . _gfce { return true ; } ; return _bcaga ( _eaeeb . depth ( ) - _gfba . depth ( ) ) ; } ; const _ebeb = 10 ; type rulingList [ ] * ruling ; const ( _gdfe = 1.0e-6 ;
_ebba = 1.0e-4 ; _gef = 10 ; _abab = 6 ; _bbcc = 0.5 ; _dadd = 0.12 ; _afbbc = 0.19 ; _afeb = 0.04 ; _bfbf = 0.04 ; _beedf = 1.0 ; _bbga = 0.04 ; _gga = 0.4 ; _bbab = 0.7 ; _gbfd = 1.0 ; _gffb = 0.1 ; _bbfe = 1.4 ; _ffbe = 0.46 ; _gfed = 0.02 ; _abae = 0.2 ; _edaa = 0.5 ; _eed = 4 ; _ggba = 4.0 ; _baad = 6 ; _fcbe = 0.3 ;
_gfge = 0.01 ; _fgaa = 0.02 ; _gafg = 2 ; _ebgg = 2 ; _ffcfg = 500 ; _cafe = 4.0 ; _gbgbb = 4.0 ; _cged = 0.05 ; _accb = 0.1 ; _cebae = 2.0 ; _ffd = 2.0 ; _bggc = 1.5 ; _ceacc = 3.0 ; _bfba = 0.25 ; ) ; func ( _gde * textObject ) setCharSpacing ( _feb float64 ) { if _gde == nil { return ; } ; _gde . _dff . _cab = _feb ;
if _agbg { _eg . Log . Info ( "\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073" , _feb , _gde . _dff . String ( ) ) ; } ; } ; type textObject struct { _fafb * Extractor ;
_ggb * _bd . PdfPageResources ; _cddag _cce . GraphicsState ; _dff * textState ; _daga * stateStack ; _cbff _g . Matrix ; _bdb _g . Matrix ; _dcaa [ ] * textMark ; _gdcg bool ; } ; type textState struct { _cab float64 ; _fec float64 ; _aee float64 ; _dde float64 ; _bcdg float64 ;
_ebb RenderMode ; _cabf float64 ; _degee * _bd . PdfFont ; _egdd _bd . PdfRectangle ; _fdfc int ; _dcef int ; } ; func ( _cbcee * subpath ) add ( _bdeg ... _g . Point ) { _cbcee . _gcbb = append ( _cbcee . _gcbb , _bdeg ... ) } ; func ( _bccdf * ruling ) alignsSec ( _dbceb * ruling ) bool { const _abaac = _ffd + 1.0 ;
return _bccdf . _gaad - _abaac <= _dbceb . _gdaf && _dbceb . _gaad - _abaac <= _bccdf . _gdaf ; } ; func _ggadbb ( _acaec map [ int ] [ ] float64 ) [ ] int { _eddf := make ( [ ] int , len ( _acaec ) ) ; _adgc := 0 ; for _fffd := range _acaec { _eddf [ _adgc ] = _fffd ; _adgc ++ ; } ; _cc . Ints ( _eddf ) ;
return _eddf ; } ; func ( _dedb * textTable ) put ( _dgaaa , _ccgc int , _acca * textPara ) { _dedb . _deedc [ _bddbg ( _dgaaa , _ccgc ) ] = _acca ; } ; func ( _gdagf paraList ) applyTables ( _fdfbc [ ] * textTable ) paraList { var _eagee paraList ; for _ , _cafg := range _fdfbc { _eagee = append ( _eagee , _cafg . newTablePara ( ) ) ;
} ; for _ , _fcbdd := range _gdagf { if _fcbdd . _deed { continue ; } ; _eagee = append ( _eagee , _fcbdd ) ; } ; return _eagee ; } ; func _egfd ( _eddaaf string ) ( string , bool ) { _gccbe := [ ] rune ( _eddaaf ) ; if len ( _gccbe ) != 1 { return "" , false ; } ; _fgda , _eadcf := _baee [ _gccbe [ 0 ] ] ;
return _fgda , _eadcf ; } ; func ( _bagf rulingList ) removeDuplicates ( ) rulingList { if len ( _bagf ) == 0 { return nil ; } ; _bagf . sort ( ) ; _ebcc := rulingList { _bagf [ 0 ] } ; for _ , _cgedf := range _bagf [ 1 : ] { if _cgedf . equals ( _ebcc [ len ( _ebcc ) - 1 ] ) { continue ; } ; _ebcc = append ( _ebcc , _cgedf ) ;
} ; return _ebcc ; } ; func ( _bfcb * textTable ) emptyCompositeColumn ( _fbddd int ) bool { for _bagd := 0 ; _bagd < _bfcb . _gccb ; _bagd ++ { if _geeg , _ffaa := _bfcb . _agga [ _bddbg ( _fbddd , _bagd ) ] ; _ffaa { if len ( _geeg . paraList ) > 0 { return false ; } ; } ; } ; return true ;
} ; type rulingKind int ; func ( _fgfe * wordBag ) removeWord ( _cdfc * textWord , _edfc int ) { _aegb := _fgfe . _fadg [ _edfc ] ; _aegb = _ddag ( _aegb , _cdfc ) ; if len ( _aegb ) == 0 { delete ( _fgfe . _fadg , _edfc ) ; } else { _fgfe . _fadg [ _edfc ] = _aegb ; } ; } ;
// String returns a description of `l`.
func ( _bdcf * textLine ) String ( ) string { return _be . Sprintf ( "\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022" , _bdcf . _gddec , _bdcf . PdfRectangle , _bdcf . _aefd , _bdcf . text ( ) ) ;
} ;
// String returns a human readable description of `s`.
func ( _deag intSet ) String ( ) string { var _fcgfa [ ] int ; for _bdbg := range _deag { if _deag . has ( _bdbg ) { _fcgfa = append ( _fcgfa , _bdbg ) ; } ; } ; _cc . Ints ( _fcgfa ) ; return _be . Sprintf ( "\u0025\u002b\u0076" , _fcgfa ) ; } ; func ( _gbgb * wordBag ) highestWord ( _aaea int , _ged , _cffb float64 ) * textWord { for _ , _cfge := range _gbgb . _fadg [ _aaea ] { if _ged <= _cfge . _acag && _cfge . _acag <= _cffb { return _cfge ;
} ; } ; return nil ; } ; func ( _cdg * textObject ) setTextMatrix ( _dfad [ ] float64 ) { if len ( _dfad ) != 6 { _eg . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029" , len ( _dfad ) ) ;
return ; } ; _cec , _cead , _ddgf , _abbc , _aac , _cfdg := _dfad [ 0 ] , _dfad [ 1 ] , _dfad [ 2 ] , _dfad [ 3 ] , _dfad [ 4 ] , _dfad [ 5 ] ; _cdg . _cbff = _g . NewMatrix ( _cec , _cead , _ddgf , _abbc , _aac , _cfdg ) ; _cdg . _bdb = _cdg . _cbff ; } ; func ( _edfa * shapesState ) fill ( _gdg * [ ] pathSection ) { _gagc := pathSection { _aga : _edfa . _gdbg , Color : _edfa . _baafg . getFillColor ( ) } ;
* _gdg = append ( * _gdg , _gagc ) ; if _daafa { _dbae := _gagc . bbox ( ) ; _be . Printf ( "\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a" , len ( * _gdg ) , len ( _gagc . _aga ) , _edfa , _gagc . Color , _dbae , _dbae . Width ( ) , _dbae . Height ( ) ) ;
if _faca { for _abbd , _cgb := range _gagc . _aga { _be . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _abbd , _cgb ) ; if _abbd == 10 { break ; } ; } ; } ; } ; } ; func ( _gbbc * textObject ) getFontDict ( _ecg string ) ( _afc _db . PdfObject , _acea error ) { _gdce := _gbbc . _ggb ;
if _gdce == nil { _eg . Log . Debug ( "g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071" , _ecg ) ; return nil , nil ; } ; _afc , _dbfc := _gdce . GetFontByName ( _db . PdfObjectName ( _ecg ) ) ;
if ! _dbfc { _eg . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071" , _ecg ) ;
return nil , _f . New ( "f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073" ) ; } ; return _afc , nil ; } ; const ( RenderModeStroke RenderMode = 1 << iota ; RenderModeFill ; RenderModeClip ; ) ; func _cgdc ( _gcgb _bd . PdfColorspace , _bfcgd _bd . PdfColor ) _fb . Color { if _gcgb == nil || _bfcgd == nil { return _fb . Black ;
} ; _efcd , _edef := _gcgb . ColorToRGB ( _bfcgd ) ; if _edef != nil { _eg . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073" , _bfcgd , _gcgb , _edef ) ;
return _fb . Black ; } ; _fbcfg , _adcgf := _efcd . ( * _bd . PdfColorDeviceRGB ) ; if ! _adcgf { _eg . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076" , _efcd ) ;
return _fb . Black ; } ; return _fb . NRGBA { R : uint8 ( _fbcfg . R ( ) * 255 ) , G : uint8 ( _fbcfg . G ( ) * 255 ) , B : uint8 ( _fbcfg . B ( ) * 255 ) , A : uint8 ( 255 ) } ; } ; func _dagae ( _efgg map [ float64 ] map [ float64 ] gridTile ) [ ] float64 { _edecf := make ( [ ] float64 , 0 , len ( _efgg ) ) ; for _egfg := range _efgg { _edecf = append ( _edecf , _egfg ) ;
} ; _cc . Float64s ( _edecf ) ; _eged := len ( _edecf ) ; for _daee := 0 ; _daee < _eged / 2 ; _daee ++ { _edecf [ _daee ] , _edecf [ _eged - 1 - _daee ] = _edecf [ _eged - 1 - _daee ] , _edecf [ _daee ] ; } ; return _edecf ; } ; func ( _ecfg * textObject ) getFont ( _fdge string ) ( * _bd . PdfFont , error ) { if _ecfg . _fafb . _bg != nil { _agea , _bdbd := _ecfg . getFontDict ( _fdge ) ;
if _bdbd != nil { _eg . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073" , _fdge , _bdbd . Error ( ) ) ; return nil , _bdbd ;
} ; _ecfg . _fafb . _gb ++ ; _fbf , _adf := _ecfg . _fafb . _bg [ _agea . String ( ) ] ; if _adf { _fbf . _fegb = _ecfg . _fafb . _gb ; return _fbf . _fadc , nil ; } ; } ; _ccef , _bgd := _ecfg . getFontDict ( _fdge ) ; if _bgd != nil { return nil , _bgd ; } ; _cbg , _bgd := _ecfg . getFontDirect ( _fdge ) ;
if _bgd != nil { return nil , _bgd ; } ; if _ecfg . _fafb . _bg != nil { _ddbg := fontEntry { _cbg , _ecfg . _fafb . _gb } ; if len ( _ecfg . _fafb . _bg ) >= _ebeb { var _cgfe [ ] string ; for _ecfgg := range _ecfg . _fafb . _bg { _cgfe = append ( _cgfe , _ecfgg ) ; } ; _cc . Slice ( _cgfe , func ( _dcc , _dfdf int ) bool { return _ecfg . _fafb . _bg [ _cgfe [ _dcc ] ] . _fegb < _ecfg . _fafb . _bg [ _cgfe [ _dfdf ] ] . _fegb ;
} ) ; delete ( _ecfg . _fafb . _bg , _cgfe [ 0 ] ) ; } ; _ecfg . _fafb . _bg [ _ccef . String ( ) ] = _ddbg ; } ; return _cbg , nil ; } ; func _ffbg ( _bfcg _bd . PdfRectangle ) * ruling { return & ruling { _gggf : _ddga , _ccb : _bfcg . Lly , _gaad : _bfcg . Llx , _gdaf : _bfcg . Urx } ; } ; func ( _deedd * textPara ) writeText ( _abde _d . Writer ) { if _deedd . _dbfdg == nil { _deedd . writeCellText ( _abde ) ;
return ; } ; for _fege := 0 ; _fege < _deedd . _dbfdg . _gccb ; _fege ++ { for _baac := 0 ; _baac < _deedd . _dbfdg . _bgcfb ; _baac ++ { _edec := _deedd . _dbfdg . get ( _baac , _fege ) ; if _edec == nil { _abde . Write ( [ ] byte ( "\u0009" ) ) ; } else { _edec . writeCellText ( _abde ) ; } ; _abde . Write ( [ ] byte ( "\u0020" ) ) ;
} ; if _fege < _deedd . _dbfdg . _gccb - 1 { _abde . Write ( [ ] byte ( "\u000a" ) ) ; } ; } ; } ; func _gcff ( _befa , _adff _bd . PdfRectangle ) _bd . PdfRectangle { return _bd . PdfRectangle { Llx : _bf . Min ( _befa . Llx , _adff . Llx ) , Lly : _bf . Min ( _befa . Lly , _adff . Lly ) , Urx : _bf . Max ( _befa . Urx , _adff . Urx ) , Ury : _bf . Max ( _befa . Ury , _adff . Ury ) } ;
} ; func _bdff ( _fgbf [ ] * textMark , _ccfc _bd . PdfRectangle , _bcded rulingList , _fedg [ ] gridTiling ) paraList { _eg . Log . Trace ( "\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066" , len ( _fgbf ) , _ccfc ) ;
if len ( _fgbf ) == 0 { return nil ; } ; _egddc := _fggaa ( _fgbf , _ccfc ) ; if len ( _egddc ) == 0 { return nil ; } ; _bcded . log ( "\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065" ) ; _dgef , _bcab := _bcded . vertsHorzs ( ) ; _gfdd := _bgcc ( _egddc , _ccfc . Ury , _dgef , _bcab ) ;
_gaea := _cfgee ( _gfdd , _ccfc . Ury , _dgef , _bcab ) ; _gaea = _cbe ( _gaea ) ; _efdf := make ( paraList , 0 , len ( _gaea ) ) ; for _ , _ffg := range _gaea { _bdfgb := _ffg . arrangeText ( ) ; if _bdfgb != nil { _efdf = append ( _efdf , _bdfgb ) ; } ; } ; if len ( _efdf ) >= _baad { _efdf = _efdf . extractTables ( _fedg ) ;
} ; _efdf . sortReadingOrder ( ) ; _efdf . log ( "\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072" ) ; return _efdf ; } ; func _gfec ( _cgd [ ] int ) [ ] int { _bccgd := make ( [ ] int , len ( _cgd ) ) ;
for _bddb , _aada := range _cgd { _bccgd [ len ( _cgd ) - 1 - _bddb ] = _aada ; } ; return _bccgd ; } ;
// String returns a human readable description of `vecs`.
func ( _bfbc rulingList ) String ( ) string { if len ( _bfbc ) == 0 { return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}" ; } ; _ggce , _gcdgg := _bfbc . vertsHorzs ( ) ; _edge := len ( _ggce ) ; _eeedc := len ( _gcdgg ) ; if _edge == 0 || _eeedc == 0 { return _be . Sprintf ( "\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}" , _edge , _eeedc ) ;
} ; _aadcc := _bd . PdfRectangle { Llx : _ggce [ 0 ] . _ccb , Urx : _ggce [ _edge - 1 ] . _ccb , Lly : _gcdgg [ _eeedc - 1 ] . _ccb , Ury : _gcdgg [ 0 ] . _ccb } ; return _be . Sprintf ( "\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d" , _edge , _eeedc , _aadcc ) ;
} ; func ( _bbbe * subpath ) clear ( ) { * _bbbe = subpath { } } ;
// String returns a description of `state`.
func ( _eddaa * textState ) String ( ) string { _eag := "\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]" ; if _eddaa . _degee != nil { _eag = _eddaa . _degee . BaseFont ( ) ; } ; return _be . Sprintf ( "\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071" , _eddaa . _cab , _eddaa . _fec , _eddaa . _bcdg , _eag ) ;
} ; type wordBag struct { _bd . PdfRectangle ; _dbga float64 ; _faad , _acae rulingList ; _adbg float64 ; _fadg map [ int ] [ ] * textWord ; } ; func _gcab ( _ffce * textWord , _afg float64 , _ecda , _dafd rulingList ) * wordBag { _aeac := _fdeba ( _ffce . _acag ) ; _ecca := [ ] * textWord { _ffce } ;
_fcae := wordBag { _fadg : map [ int ] [ ] * textWord { _aeac : _ecca } , PdfRectangle : _ffce . PdfRectangle , _dbga : _ffce . _efag , _adbg : _afg , _faad : _ecda , _acae : _dafd } ; return & _fcae ; } ; func ( _gacf paraList ) reorder ( _bfge [ ] int ) { _gbggf := make ( paraList , len ( _gacf ) ) ;
for _dcg , _cbccb := range _bfge { _gbggf [ _dcg ] = _gacf [ _cbccb ] ; } ; copy ( _gacf , _gbggf ) ; } ; func ( _bedf * PageText ) computeViews ( ) { var _becf rulingList ; if _adba { _bdc := _eaada ( _bedf . _egg ) ; _becf = append ( _becf , _bdc ... ) ; } ; if _eccfg { _bfcf := _abca ( _bedf . _dbe ) ;
_becf = append ( _becf , _bfcf ... ) ; } ; _becf , _ccad := _becf . toTilings ( ) ; var _eggc paraList ; _efc := len ( _bedf . _fgd ) ; for _ccag := 0 ; _ccag < 360 && _efc > 0 ; _ccag += 90 { _bbf := make ( [ ] * textMark , 0 , len ( _bedf . _fgd ) - _efc ) ; for _ , _facc := range _bedf . _fgd { if _facc . _ddggc == _ccag { _bbf = append ( _bbf , _facc ) ;
} ; } ; if len ( _bbf ) > 0 { _ece := _bdff ( _bbf , _bedf . _fbd , _becf , _ccad ) ; _eggc = append ( _eggc , _ece ... ) ; _efc -= len ( _bbf ) ; } ; } ; _gag := new ( _cde . Buffer ) ; _eggc . writeText ( _gag ) ; _bedf . _ebce = _gag . String ( ) ; _bedf . _dged = _eggc . toTextMarks ( ) ; _bedf . _bfbb = _eggc . tables ( ) ;
if _eeca { _eg . Log . Info ( "\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064" , len ( _bedf . _bfbb ) ) ; } ; } ; func _fgfa ( _fagb * _cce . ContentStreamOperation ) ( float64 , error ) { if len ( _fagb . Params ) != 1 { _gfbd := _f . New ( "\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et" ) ;
_eg . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076" , _fagb . Operand , 1 , len ( _fagb . Params ) , _fagb . Params ) ;
return 0.0 , _gfbd ; } ; return _db . GetNumberAsFloat ( _fagb . Params [ 0 ] ) ; } ;
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
type RenderMode int ; func ( _eggcg rectRuling ) asRuling ( ) ( * ruling , bool ) { _cfeb := ruling { _gggf : _eggcg . _aegd , Color : _eggcg . Color , _beaec : _edaga } ; switch _eggcg . _aegd { case _gaba : _cfeb . _ccb = 0.5 * ( _eggcg . Llx + _eggcg . Urx ) ; _cfeb . _gaad = _eggcg . Lly ;
_cfeb . _gdaf = _eggcg . Ury ; _edfaf , _acade := _eggcg . checkWidth ( _eggcg . Llx , _eggcg . Urx ) ; if ! _acade { if _dddf { _eg . Log . Error ( "\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076" , _eggcg ) ;
} ; return nil , false ; } ; _cfeb . _cfbf = _edfaf ; case _ddga : _cfeb . _ccb = 0.5 * ( _eggcg . Lly + _eggcg . Ury ) ; _cfeb . _gaad = _eggcg . Llx ; _cfeb . _gdaf = _eggcg . Urx ; _geaf , _gagf := _eggcg . checkWidth ( _eggcg . Lly , _eggcg . Ury ) ; if ! _gagf { if _dddf { _eg . Log . Error ( "\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076" , _eggcg ) ;
} ; return nil , false ; } ; _cfeb . _cfbf = _geaf ; default : _eg . Log . Error ( "\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064" , _eggcg . _aegd ) ; return nil , false ; } ; return & _cfeb , true ; } ; func ( _fcgb * stateStack ) pop ( ) * textState { if _fcgb . empty ( ) { return nil ;
} ; _eeaa := * ( * _fcgb ) [ len ( * _fcgb ) - 1 ] ; * _fcgb = ( * _fcgb ) [ : len ( * _fcgb ) - 1 ] ; return & _eeaa ; } ; func ( _fab compositeCell ) String ( ) string { _eeabc := "" ; if len ( _fab . paraList ) > 0 { _eeabc = _dbec ( _fab . paraList . merge ( ) . text ( ) , 50 ) ; } ; return _be . Sprintf ( "\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071" , _fab . PdfRectangle , len ( _fab . paraList ) , _eeabc ) ;
} ;
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
func ( _ggd * TextMarkArray ) BBox ( ) ( _bd . PdfRectangle , bool ) { var _ebgc _bd . PdfRectangle ; _ede := false ; for _ , _fbab := range _ggd . _ggbg { if _fbab . Meta || _gagac ( _fbab . Text ) { continue ; } ; if _ede { _ebgc = _gcff ( _ebgc , _fbab . BBox ) ; } else { _ebgc = _fbab . BBox ;
_ede = true ; } ; } ; return _ebgc , _ede ; } ;
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
func ( _afd * PageText ) ApplyArea ( bbox _bd . PdfRectangle ) { _gaa := make ( [ ] * textMark , 0 , len ( _afd . _fgd ) ) ; for _ , _efe := range _afd . _fgd { if _bfgg ( _efe . bbox ( ) , bbox ) { _gaa = append ( _gaa , _efe ) ; } ; } ; var _egca paraList ; _bece := len ( _gaa ) ; for _dcfa := 0 ;
_dcfa < 360 && _bece > 0 ; _dcfa += 90 { _ebef := make ( [ ] * textMark , 0 , len ( _gaa ) - _bece ) ; for _ , _bbbd := range _gaa { if _bbbd . _ddggc == _dcfa { _ebef = append ( _ebef , _bbbd ) ; } ; } ; if len ( _ebef ) > 0 { _gcfg := _bdff ( _ebef , _afd . _fbd , nil , nil ) ; _egca = append ( _egca , _gcfg ... ) ;
_bece -= len ( _ebef ) ; } ; } ; _bbe := new ( _cde . Buffer ) ; _egca . writeText ( _bbe ) ; _afd . _ebce = _bbe . String ( ) ; _afd . _dged = _egca . toTextMarks ( ) ; _afd . _bfbb = _egca . tables ( ) ; } ; func ( _egdec intSet ) add ( _afgac int ) { _egdec [ _afgac ] = struct { } { } } ;
// String returns a description of `v`.
func ( _ccbf * ruling ) String ( ) string { if _ccbf . _gggf == _eedb { return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047" ; } ; _egde , _dgd := "\u0078" , "\u0079" ; if _ccbf . _gggf == _ddga { _egde , _dgd = "\u0079" , "\u0078" ; } ; _ecad := "" ; if _ccbf . _cfbf != 0.0 { _ecad = _be . Sprintf ( " \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066" , _ccbf . _cfbf ) ;
} ; return _be . Sprintf ( "\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073" , _ccbf . _gggf , _egde , _ccbf . _ccb , _dgd , _ccbf . _gaad , _ccbf . _gdaf , _ccbf . _gdaf - _ccbf . _gaad , _ccbf . _beaec , _ccbf . Color , _ecad ) ;
} ; func ( _cacd * textMark ) bbox ( ) _bd . PdfRectangle { return _cacd . PdfRectangle } ; func ( _dbfd * stateStack ) size ( ) int { return len ( * _dbfd ) } ; func ( _cebb * imageExtractContext ) extractXObjectImage ( _fef * _db . PdfObjectName , _fgeb _cce . GraphicsState , _fdeb * _bd . PdfPageResources ) error { _add , _ := _fdeb . GetXObjectByName ( * _fef ) ;
if _add == nil { return nil ; } ; _fbg , _gfa := _cebb . _ac [ _add ] ; if ! _gfa { _beb , _ge := _fdeb . GetXObjectImageByName ( * _fef ) ; if _ge != nil { return _ge ; } ; if _beb == nil { return nil ; } ; _fc , _ge := _beb . ToImage ( ) ; if _ge != nil { return _ge ; } ; _fbg = & cachedImage { _fdg : _fc , _daf : _beb . ColorSpace } ;
_cebb . _ac [ _add ] = _fbg ; } ; _ae := _fbg . _fdg ; _fca := _fbg . _daf ; _aeg , _dfc := _fca . ImageToRGB ( * _ae ) ; if _dfc != nil { return _dfc ; } ; _eg . Log . Debug ( "@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073" , _fgeb . CTM . String ( ) ) ; _gcdc := ImageMark { Image : & _aeg , Width : _fgeb . CTM . ScalingFactorX ( ) , Height : _fgeb . CTM . ScalingFactorY ( ) , Angle : _fgeb . CTM . Angle ( ) } ;
_gcdc . X , _gcdc . Y = _fgeb . CTM . Translation ( ) ; _cebb . _daa = append ( _cebb . _daa , _gcdc ) ; _cebb . _de ++ ; return nil ; } ; type markKind int ;