2020-08-27 21:45:09 +00:00
//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
2018-03-22 14:03:47 +00:00
2020-08-27 21:45:09 +00:00
//
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
//
2023-12-17 13:54:01 +00:00
package extractor ; import ( _df "bytes" ; _d "errors" ; _ge "fmt" ; _ec "github.com/unidoc/unipdf/v3/common" ; _ag "github.com/unidoc/unipdf/v3/contentstream" ; _gb "github.com/unidoc/unipdf/v3/core" ; _bd "github.com/unidoc/unipdf/v3/internal/license" ; _fd "github.com/unidoc/unipdf/v3/internal/textencoding" ;
_bc "github.com/unidoc/unipdf/v3/internal/transform" ; _ce "github.com/unidoc/unipdf/v3/model" ; _b "golang.org/x/image/draw" ; _ef "golang.org/x/text/unicode/norm" ; _dfc "golang.org/x/xerrors" ; _ded "image" ; _fg "image/color" ; _a "io" ; _aa "math" ; _gc "reflect" ; _e "regexp" ;
_c "sort" ; _gd "strings" ; _de "unicode" ; _g "unicode/utf8" ; ) ; type lineRuling struct { _fbbag rulingKind ; _aagg markKind ; _fg . Color ; _ddgfc , _eacg _bc . Point ; } ; func _ccacc ( _effca map [ float64 ] gridTile ) [ ] float64 { _cabg := make ( [ ] float64 , 0 , len ( _effca ) ) ;
for _decbc := range _effca { _cabg = append ( _cabg , _decbc ) ; } ; _c . Float64s ( _cabg ) ; return _cabg ; } ; func ( _gadb * textObject ) showTextAdjusted ( _dae * _gb . PdfObjectArray , _fdbf int ) error { _afc := false ; for _ , _gaa := range _dae . Elements ( ) { switch _gaa . ( type ) { case * _gb . PdfObjectFloat , * _gb . PdfObjectInteger : _gcacc , _dceg := _gb . GetNumberAsFloat ( _gaa ) ;
if _dceg != nil { _ec . Log . Debug ( "\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _gaa , _dae ) ;
return _dceg ; } ; _ceeg , _bfcf := - _gcacc * 0.001 * _gadb . _befa . _ccf , 0.0 ; if _afc { _bfcf , _ceeg = _ceeg , _bfcf ; } ; _cddc := _dgfb ( _bc . Point { X : _ceeg , Y : _bfcf } ) ; _gadb . _eee . Concat ( _cddc ) ; case * _gb . PdfObjectString : _abgfd := _gb . TraceToDirectObject ( _gaa ) ;
_caa , _bfg := _gb . GetStringBytes ( _abgfd ) ; if ! _bfg { _ec . Log . Trace ( "s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _gaa , _dae ) ;
return _gb . ErrTypeError ; } ; _gadb . renderText ( _abgfd , _caa , _fdbf ) ; default : _ec . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _gaa , _dae ) ;
return _gb . ErrTypeError ; } ; } ; return nil ; } ; func ( _cdeg * textTable ) getDown ( ) paraList { _abfe := make ( paraList , _cdeg . _ecbf ) ; for _efdbg := 0 ; _efdbg < _cdeg . _ecbf ; _efdbg ++ { _gfffb := _cdeg . get ( _efdbg , _cdeg . _dcfg - 1 ) . _fdgbd ; if _gfffb . taken ( ) { return nil ;
} ; _abfe [ _efdbg ] = _gfffb ; } ; for _egccf := 0 ; _egccf < _cdeg . _ecbf - 1 ; _egccf ++ { if _abfe [ _egccf ] . _gaca != _abfe [ _egccf + 1 ] { return nil ; } ; } ; return _abfe ; } ; func ( _ecbe * textTable ) compositeRowCorridors ( ) map [ int ] [ ] float64 { _edadb := make ( map [ int ] [ ] float64 , _ecbe . _dcfg ) ;
if _afcg { _ec . Log . Info ( "c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064" , _ecbe . _dcfg ) ; } ; for _fffe := 1 ; _fffe < _ecbe . _dcfg ; _fffe ++ { var _abag [ ] compositeCell ;
for _bbfba := 0 ; _bbfba < _ecbe . _ecbf ; _bbfba ++ { if _cdcb , _cdfcf := _ecbe . _egfe [ _aaca ( _bbfba , _fffe ) ] ; _cdfcf { _abag = append ( _abag , _cdcb ) ; } ; } ; if len ( _abag ) == 0 { continue ; } ; _ebef := _ccef ( _abag ) ; _edadb [ _fffe ] = _ebef ; if _afcg { _ge . Printf ( "\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a" , _fffe , _ebef ) ;
} ; } ; return _edadb ; } ; func ( _egcc pathSection ) bbox ( ) _ce . PdfRectangle { _fgfa := _egcc . _fbfe [ 0 ] . _gfefe [ 0 ] ; _dabg := _ce . PdfRectangle { Llx : _fgfa . X , Urx : _fgfa . X , Lly : _fgfa . Y , Ury : _fgfa . Y } ; _cae := func ( _bgbe _bc . Point ) { if _bgbe . X < _dabg . Llx { _dabg . Llx = _bgbe . X ;
} else if _bgbe . X > _dabg . Urx { _dabg . Urx = _bgbe . X ; } ; if _bgbe . Y < _dabg . Lly { _dabg . Lly = _bgbe . Y ; } else if _bgbe . Y > _dabg . Ury { _dabg . Ury = _bgbe . Y ; } ; } ; for _ , _gcgb := range _egcc . _fbfe [ 0 ] . _gfefe [ 1 : ] { _cae ( _gcgb ) ; } ; for _ , _addf := range _egcc . _fbfe [ 1 : ] { for _ , _gggf := range _addf . _gfefe { _cae ( _gggf ) ;
} ; } ; return _dabg ; } ; func ( _fdeea paraList ) readBefore ( _dddgd [ ] int , _cbgba , _cbbc int ) bool { _aggf , _deed := _fdeea [ _cbgba ] , _fdeea [ _cbbc ] ; if _adcg ( _aggf , _deed ) && _aggf . Lly > _deed . Lly { return true ; } ; if ! ( _aggf . _ebcf . Urx < _deed . _ebcf . Llx ) { return false ;
} ; _cgec , _caaec := _aggf . Lly , _deed . Lly ; if _cgec > _caaec { _caaec , _cgec = _cgec , _caaec ; } ; _agef := _aa . Max ( _aggf . _ebcf . Llx , _deed . _ebcf . Llx ) ; _agbf := _aa . Min ( _aggf . _ebcf . Urx , _deed . _ebcf . Urx ) ; _ddgf := _fdeea . llyRange ( _dddgd , _cgec , _caaec ) ;
for _ , _egde := range _ddgf { if _egde == _cbgba || _egde == _cbbc { continue ; } ; _fgbg := _fdeea [ _egde ] ; if _fgbg . _ebcf . Llx <= _agbf && _agef <= _fgbg . _ebcf . Urx { return false ; } ; } ; return true ; } ; func ( _badb * textObject ) nextLine ( ) { _badb . moveLP ( 0 , - _badb . _befa . _dcc ) } ;
func ( _ggff * shapesState ) quadraticTo ( _aad , _dfdc , _gaga , _dcba float64 ) { if _cbag { _ec . Log . Info ( "\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a" ) ; } ; _ggff . addPoint ( _gaga , _dcba ) ; } ; type textLine struct { _ce . PdfRectangle ; _bcdg float64 ;
_aebc [ ] * textWord ; _ecag float64 ; } ; func ( _gecd * textObject ) setTextRenderMode ( _cdfe int ) { if _gecd == nil { return ; } ; _gecd . _befa . _bbd = RenderMode ( _cdfe ) ; } ; func ( _aeg * PageText ) getParagraphs ( ) paraList { var _gfg rulingList ; if _gfag { _dgef := _beca ( _aeg . _geda ) ;
_gfg = append ( _gfg , _dgef ... ) ; } ; if _dbbc { _gcbcd := _fdab ( _aeg . _gegc ) ; _gfg = append ( _gfg , _gcbcd ... ) ; } ; _gfg , _bbfb := _gfg . toTilings ( ) ; var _gbafd paraList ; _bfae := len ( _aeg . _fcag ) ; for _dfd := 0 ; _dfd < 360 && _bfae > 0 ; _dfd += 90 { _aba := make ( [ ] * textMark , 0 , len ( _aeg . _fcag ) - _bfae ) ;
for _ , _bfdd := range _aeg . _fcag { if _bfdd . _edge == _dfd { _aba = append ( _aba , _bfdd ) ; } ; } ; if len ( _aba ) > 0 { _ddfba := _fbcc ( _aba , _aeg . _fbbg , _gfg , _bbfb , _aeg . _ccg . _fcgfa ) ; _gbafd = append ( _gbafd , _ddfba ... ) ; _bfae -= len ( _aba ) ; } ; } ; return _gbafd ;
} ; func ( _efce * shapesState ) fill ( _bfaed * [ ] pathSection ) { _cbabc := pathSection { _fbfe : _efce . _abgb , Color : _efce . _ecfc . getFillColor ( ) } ; * _bfaed = append ( * _bfaed , _cbabc ) ; if _gfgc { _dcca := _cbabc . bbox ( ) ; _ge . Printf ( "\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a" , len ( * _bfaed ) , len ( _cbabc . _fbfe ) , _efce , _cbabc . Color , _dcca , _dcca . Width ( ) , _dcca . Height ( ) ) ;
if _deaga { for _dgb , _dbda := range _cbabc . _fbfe { _ge . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _dgb , _dbda ) ; if _dgb == 10 { break ; } ; } ; } ; } ; } ; func ( _dfbf * ruling ) equals ( _gafce * ruling ) bool { return _dfbf . _bfbc == _gafce . _bfbc && _gecae ( _dfbf . _abbgc , _gafce . _abbgc ) && _gecae ( _dfbf . _cebe , _gafce . _cebe ) && _gecae ( _dfbf . _deee , _gafce . _deee ) ;
} ; func ( _agge * wordBag ) makeRemovals ( ) map [ int ] map [ * textWord ] struct { } { _dabb := make ( map [ int ] map [ * textWord ] struct { } , len ( _agge . _faba ) ) ; for _afge := range _agge . _faba { _dabb [ _afge ] = make ( map [ * textWord ] struct { } ) ; } ; return _dabb ; } ; func ( _bffcd * shapesState ) moveTo ( _eba , _cff float64 ) { _bffcd . _edee = true ;
_bffcd . _ebfb = _bffcd . devicePoint ( _eba , _cff ) ; if _cbag { _ec . Log . Info ( "\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066" , _eba , _cff , _bffcd . _ebfb ) ;
} ; } ; func ( _ecbb * wordBag ) sort ( ) { for _ , _eadf := range _ecbb . _faba { _c . Slice ( _eadf , func ( _gfd , _eae int ) bool { return _fabg ( _eadf [ _gfd ] , _eadf [ _eae ] ) < 0 } ) ; } ; } ; func ( _bedb rulingList ) primMinMax ( ) ( float64 , float64 ) { _ebbfe , _cfdbb := _bedb [ 0 ] . _abbgc , _bedb [ 0 ] . _abbgc ;
for _ , _cecg := range _bedb [ 1 : ] { if _cecg . _abbgc < _ebbfe { _ebbfe = _cecg . _abbgc ; } else if _cecg . _abbgc > _cfdbb { _cfdbb = _cecg . _abbgc ; } ; } ; return _ebbfe , _cfdbb ; } ; func _fcd ( _cdaf , _gdca _ce . PdfRectangle ) bool { return _gdca . Llx <= _cdaf . Urx && _cdaf . Llx <= _gdca . Urx ;
} ;
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
type ImageMark struct { Image * _ce . Image ;
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// Dimensions of the image as displayed in the PDF.
Width float64 ; Height float64 ;
2023-10-07 13:58:01 +00:00
2023-12-17 13:54:01 +00:00
// Position of the image in PDF coordinates (lower left corner).
X float64 ; Y float64 ;
2023-04-06 19:57:40 +00:00
2023-12-17 13:54:01 +00:00
// Angle in degrees, if rotated.
Angle float64 ; } ; func ( _gbca * wordBag ) applyRemovals ( _acee map [ int ] map [ * textWord ] struct { } ) { for _fffg , _edgd := range _acee { if len ( _edgd ) == 0 { continue ; } ; _eccg := _gbca . _faba [ _fffg ] ; _gfcf := len ( _eccg ) - len ( _edgd ) ; if _gfcf == 0 { delete ( _gbca . _faba , _fffg ) ;
continue ; } ; _egea := make ( [ ] * textWord , _gfcf ) ; _gfgfg := 0 ; for _ , _fecg := range _eccg { if _ , _edef := _edgd [ _fecg ] ; ! _edef { _egea [ _gfgfg ] = _fecg ; _gfgfg ++ ; } ; } ; _gbca . _faba [ _fffg ] = _egea ; } ; } ; type textTable struct { _ce . PdfRectangle ; _ecbf , _dcfg int ;
_beaeg bool ; _gcbga map [ uint64 ] * textPara ; _egfe map [ uint64 ] compositeCell ; } ; func ( _bgf * textObject ) setTextMatrix ( _aaf [ ] float64 ) { if len ( _aaf ) != 6 { _ec . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029" , len ( _aaf ) ) ;
return ; } ; _gcac , _fbed , _bbgf , _dac , _geg , _cgda := _aaf [ 0 ] , _aaf [ 1 ] , _aaf [ 2 ] , _aaf [ 3 ] , _aaf [ 4 ] , _aaf [ 5 ] ; _bgf . _eee = _bc . NewMatrix ( _gcac , _fbed , _bbgf , _dac , _geg , _cgda ) ; _bgf . _ecg = _bgf . _eee ; } ; func _cacf ( _bbcg [ ] TextMark , _adce * int , _gcbb string ) [ ] TextMark { _dded := _efe ;
_dded . Text = _gcbb ; return _cfaf ( _bbcg , _adce , _dded ) ; } ; func ( _bbfe * textPara ) bbox ( ) _ce . PdfRectangle { return _bbfe . PdfRectangle } ;
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// Text returns the text content of the `bulletLists`.
func ( _aegg * lists ) Text ( ) string { _bcac := & _gd . Builder { } ; for _ , _bfea := range * _aegg { _geea := _bfea . Text ( ) ; _bcac . WriteString ( _geea ) ; } ; return _bcac . String ( ) ; } ; func ( _dcdd paraList ) list ( ) [ ] * list { var _cfd [ ] * textLine ; var _cfef [ ] * textLine ; for _ , _dagg := range _dcdd { _dbag := _dagg . getListLines ( ) ;
_cfd = append ( _cfd , _dbag ... ) ; _cfef = append ( _cfef , _dagg . _bdbcg ... ) ; } ; _cbcb := _egbg ( _cfd ) ; _effc := _cbbae ( _cfef , _cbcb ) ; return _effc ; } ; func ( _bgfg * textObject ) getStrokeColor ( ) _fg . Color { return _agefd ( _bgfg . _gbe . ColorspaceStroking , _bgfg . _gbe . ColorStroking ) ;
} ; func ( _cfbbg * textWord ) bbox ( ) _ce . PdfRectangle { return _cfbbg . PdfRectangle } ;
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// Options extractor options.
type Options struct {
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// DisableDocumentTags specifies whether to use the document tags during list extraction.
DisableDocumentTags bool ;
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// ApplyCropBox will extract page text based on page cropbox if set to `true`.
ApplyCropBox bool ;
2023-11-11 11:29:03 +00:00
2023-12-17 13:54:01 +00:00
// UseSimplerExtractionProcess will skip topological text ordering and table processing.
2023-11-11 11:29:03 +00:00
//
2023-12-17 13:54:01 +00:00
// NOTE: While normally the extra processing is beneficial, it can also lead to problems when it does not work.
// Thus it is a flag to allow the user to control this process.
2023-11-11 11:29:03 +00:00
//
2023-12-17 13:54:01 +00:00
// Skipping some extraction processes would also lead to the reduced processing time.
UseSimplerExtractionProcess bool ; } ; func _beca ( _aceeg [ ] pathSection ) rulingList { _bdged ( _aceeg ) ; if _gfgc { _ec . Log . Info ( "\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073" , len ( _aceeg ) ) ;
} ; var _eabca rulingList ; for _ , _fdga := range _aceeg { for _ , _eeed := range _fdga . _fbfe { if len ( _eeed . _gfefe ) < 2 { continue ; } ; _gceb := _eeed . _gfefe [ 0 ] ; for _ , _bfbcc := range _eeed . _gfefe [ 1 : ] { if _gadbd , _eabfd := _gfbbf ( _gceb , _bfbcc , _fdga . Color ) ;
_eabfd { _eabca = append ( _eabca , _gadbd ) ; } ; _gceb = _bfbcc ; } ; } ; } ; if _gfgc { _ec . Log . Info ( "m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073" , _eabca ) ; } ; return _eabca ; } ; func _bcdee ( _deeef , _bgae _bc . Point ) rulingKind { _gcbce := _aa . Abs ( _deeef . X - _bgae . X ) ;
_fcefg := _aa . Abs ( _deeef . Y - _bgae . Y ) ; return _gbbf ( _gcbce , _fcefg , _cffa ) ; } ; func ( _ddbg * textPara ) depth ( ) float64 { if _ddbg . _bedf { return - 1.0 ; } ; if len ( _ddbg . _bdbcg ) > 0 { return _ddbg . _bdbcg [ 0 ] . _bcdg ; } ; return _ddbg . _bddea . depth ( ) ; } ;
// String returns a string describing `tm`.
func ( _cfa TextMark ) String ( ) string { _bfbd := _cfa . BBox ; var _geca string ; if _cfa . Font != nil { _geca = _cfa . Font . String ( ) ; if len ( _geca ) > 50 { _geca = _geca [ : 50 ] + "\u002e\u002e\u002e" ; } ; } ; var _abbg string ; if _cfa . Meta { _abbg = "\u0020\u002a\u004d\u002a" ;
} ; return _ge . Sprintf ( "\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d" , _cfa . Offset , _cfa . Text , [ ] rune ( _cfa . Text ) , _bfbd . Llx , _bfbd . Lly , _bfbd . Urx , _bfbd . Ury , _geca , _abbg ) ;
} ; func ( _fffca paraList ) merge ( ) * textPara { _ec . Log . Trace ( "\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _fffca ) ) ;
if len ( _fffca ) == 0 { return nil ; } ; _fffca . sortReadingOrder ( ) ; _baea := _fffca [ 0 ] . PdfRectangle ; _dafg := _fffca [ 0 ] . _bdbcg ; for _ , _eefa := range _fffca [ 1 : ] { _baea = _cdggc ( _baea , _eefa . PdfRectangle ) ; _dafg = append ( _dafg , _eefa . _bdbcg ... ) ; } ; return _ffec ( _baea , _dafg ) ;
} ;
2023-11-11 11:29:03 +00:00
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
2023-12-17 13:54:01 +00:00
func ( _gdg * PageText ) ApplyArea ( bbox _ce . PdfRectangle ) { _gefe := make ( [ ] * textMark , 0 , len ( _gdg . _fcag ) ) ; for _ , _edc := range _gdg . _fcag { if _decgb ( _edc . bbox ( ) , bbox ) { _gefe = append ( _gefe , _edc ) ; } ; } ; var _fbcf paraList ; _eaae := len ( _gefe ) ; for _cgac := 0 ;
_cgac < 360 && _eaae > 0 ; _cgac += 90 { _dgee := make ( [ ] * textMark , 0 , len ( _gefe ) - _eaae ) ; for _ , _ceff := range _gefe { if _ceff . _edge == _cgac { _dgee = append ( _dgee , _ceff ) ; } ; } ; if len ( _dgee ) > 0 { _gfgf := _fbcc ( _dgee , _gdg . _fbbg , nil , nil , _gdg . _ccg . _fcgfa ) ;
_fbcf = append ( _fbcf , _gfgf ... ) ; _eaae -= len ( _dgee ) ; } ; } ; _bgdf := new ( _df . Buffer ) ; _fbcf . writeText ( _bgdf ) ; _gdg . _gcee = _bgdf . String ( ) ; _gdg . _ebfc = _fbcf . toTextMarks ( ) ; _gdg . _eadb = _fbcf . tables ( ) ; } ; func _bddac ( _fgaac [ ] * textMark , _bbad _ce . PdfRectangle ) * textWord { _gadac := _fgaac [ 0 ] . PdfRectangle ;
_bdbaa := _fgaac [ 0 ] . _bfaca ; for _ , _bcgce := range _fgaac [ 1 : ] { _gadac = _cdggc ( _gadac , _bcgce . PdfRectangle ) ; if _bcgce . _bfaca > _bdbaa { _bdbaa = _bcgce . _bfaca ; } ; } ; return & textWord { PdfRectangle : _gadac , _bgeaa : _fgaac , _cffdg : _bbad . Ury - _gadac . Lly , _ddgee : _bdbaa } ;
} ; func ( _fcee rulingList ) bbox ( ) _ce . PdfRectangle { var _eeba _ce . PdfRectangle ; if len ( _fcee ) == 0 { _ec . Log . Error ( "r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073" ) ;
return _ce . PdfRectangle { } ; } ; if _fcee [ 0 ] . _bfbc == _ecac { _eeba . Llx , _eeba . Urx = _fcee . secMinMax ( ) ; _eeba . Lly , _eeba . Ury = _fcee . primMinMax ( ) ; } else { _eeba . Llx , _eeba . Urx = _fcee . primMinMax ( ) ; _eeba . Lly , _eeba . Ury = _fcee . secMinMax ( ) ; } ; return _eeba ;
} ; func _aaca ( _egcg , _dbbbc int ) uint64 { return uint64 ( _egcg ) * 0x1000000 + uint64 ( _dbbbc ) } ; func ( _bgcd * shapesState ) lastpointEstablished ( ) ( _bc . Point , bool ) { if _bgcd . _edee { return _bgcd . _ebfb , false ; } ; _gbbg := len ( _bgcd . _abgb ) ; if _gbbg > 0 && _bgcd . _abgb [ _gbbg - 1 ] . _cgde { return _bgcd . _abgb [ _gbbg - 1 ] . last ( ) , false ;
} ; return _bc . Point { } , true ; } ; func ( _ffcb * Extractor ) extractPageText ( _fdf string , _gccf * _ce . PdfPageResources , _dg _bc . Matrix , _gccd int ) ( * PageText , int , int , error ) { _ec . Log . Trace ( "\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d" , _gccd ) ;
_eaa := & PageText { _fbbg : _ffcb . _gda , _bbb : _ffcb . _ba , _fefc : _ffcb . _eg } ; _dee := _cab ( _ffcb . _gda ) ; var _bfa stateStack ; _gfc := _egf ( _ffcb , _gccf , _ag . GraphicsState { } , & _dee , & _bfa ) ; _ggc := shapesState { _dag : _dg , _aabgc : _bc . IdentityMatrix ( ) , _ecfc : _gfc } ;
var _ccce bool ; _fdb := - 1 ; if _gccd > _adee { _fff := _d . New ( "\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077" ) ; _ec . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076" , _gccd , _fff ) ;
return _eaa , _dee . _baf , _dee . _afgb , _fff ; } ; _abd := _ag . NewContentStreamParser ( _fdf ) ; _fbf , _bac := _abd . Parse ( ) ; if _bac != nil { _ec . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bac ) ;
return _eaa , _dee . _baf , _dee . _afgb , _bac ; } ; _eaa . _ecfe = _fbf ; _dfa := _ag . NewContentStreamProcessor ( * _fbf ) ; _dfa . AddHandler ( _ag . HandlerConditionEnumAllOperands , "" , func ( _dce * _ag . ContentStreamOperation , _gbge _ag . GraphicsState , _bag * _ce . PdfPageResources ) error { _cgd := _dce . Operand ;
if _acab { _ec . Log . Info ( "\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s" , _dce ) ; } ; switch _cgd { case "\u0071" : if _cbag { _ec . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _ggc . _aabgc ) ; } ; _bfa . push ( & _dee ) ; case "\u0051" : if ! _bfa . empty ( ) { _dee = * _bfa . pop ( ) ;
} ; _ggc . _aabgc = _gbge . CTM ; if _cbag { _ec . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _ggc . _aabgc ) ; } ; case "\u0042\u0044\u0043" : _gce , _bacc := _gb . GetDict ( _dce . Params [ 1 ] ) ; if ! _bacc { _ec . Log . Debug ( "\u0045\u0052\u0052O\u0052\u003a\u0020\u0042D\u0043\u0020\u006f\u0070\u003d\u0025\u0073 \u0047\u0065\u0074\u0044\u0069\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _dce ) ;
return _bac ; } ; _badc := _gce . Get ( "\u004d\u0043\u0049\u0044" ) ; if _badc != nil { _fce , _cdf := _gb . GetIntVal ( _badc ) ; if ! _cdf { _ec . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u0042\u0044C\u0020\u006f\u0070=\u0025\u0073\u002e\u0020\u0042\u0061\u0064\u0020\u006eum\u0065\u0072\u0069c\u0061\u006c \u006f\u0062\u006a\u0065\u0063\u0074.\u0020\u006f=\u0025\u0073" , _dce , _badc ) ;
} ; _fdb = _fce ; } else { _fdb = - 1 ; } ; case "\u0045\u004d\u0043" : _fdb = - 1 ; case "\u0042\u0054" : if _ccce { _ec . Log . Debug ( "\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074" ) ;
_eaa . _fcag = append ( _eaa . _fcag , _gfc . _cfb ... ) ; } ; _ccce = true ; _ceac := _gbge ; _ceac . CTM = _dg . Mult ( _ceac . CTM ) ; _gfc = _egf ( _ffcb , _bag , _ceac , & _dee , & _bfa ) ; _ggc . _ecfc = _gfc ; case "\u0045\u0054" : if ! _ccce { _ec . Log . Debug ( "\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074" ) ;
} ; _ccce = false ; _eaa . _fcag = append ( _eaa . _fcag , _gfc . _cfb ... ) ; _gfc . reset ( ) ; case "\u0054\u002a" : _gfc . nextLine ( ) ; case "\u0054\u0064" : if _bfb , _aec := _gfc . checkOp ( _dce , 2 , true ) ; ! _bfb { _ec . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _aec ) ;
return _aec ; } ; _aecd , _bacd , _gba := _fbda ( _dce . Params ) ; if _gba != nil { return _gba ; } ; _gfc . moveText ( _aecd , _bacd ) ; case "\u0054\u0044" : if _dff , _abg := _gfc . checkOp ( _dce , 2 , true ) ; ! _dff { _ec . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _abg ) ;
return _abg ; } ; _ece , _af , _fab := _fbda ( _dce . Params ) ; if _fab != nil { _ec . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fab ) ; return _fab ; } ; _gfc . moveTextSetLeading ( _ece , _af ) ; case "\u0054\u006a" : if _ced , _cdc := _gfc . checkOp ( _dce , 1 , true ) ;
! _ced { _ec . Log . Debug ( "\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076" , _dce , _cdc ) ; return _cdc ; } ; _faad := _gb . TraceToDirectObject ( _dce . Params [ 0 ] ) ; _badfg , _caff := _gb . GetStringBytes ( _faad ) ;
if ! _caff { _ec . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064" , _dce ) ; return _gb . ErrTypeError ;
} ; return _gfc . showText ( _faad , _badfg , _fdb ) ; case "\u0054\u004a" : if _cdce , _cba := _gfc . checkOp ( _dce , 1 , true ) ; ! _cdce { _ec . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _cba ) ; return _cba ; } ; _aefb , _aed := _gb . GetArray ( _dce . Params [ 0 ] ) ;
if ! _aed { _ec . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _dce ) ; return _bac ; } ; return _gfc . showTextAdjusted ( _aefb , _fdb ) ;
case "\u0027" : if _fcgf , _bff := _gfc . checkOp ( _dce , 1 , true ) ; ! _fcgf { _ec . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bff ) ; return _bff ; } ; _cdd := _gb . TraceToDirectObject ( _dce . Params [ 0 ] ) ; _bdd , _fac := _gb . GetStringBytes ( _cdd ) ;
if ! _fac { _ec . Log . Debug ( "\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064" , _dce ) ; return _gb . ErrTypeError ; } ; _gfc . nextLine ( ) ; return _gfc . showText ( _cdd , _bdd , _fdb ) ;
case "\u0022" : if _bbc , _gge := _gfc . checkOp ( _dce , 3 , true ) ; ! _bbc { _ec . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gge ) ; return _gge ; } ; _gcd , _gbaf , _fbe := _fbda ( _dce . Params [ : 2 ] ) ; if _fbe != nil { return _fbe ;
} ; _cf := _gb . TraceToDirectObject ( _dce . Params [ 2 ] ) ; _eda , _fga := _gb . GetStringBytes ( _cf ) ; if ! _fga { _ec . Log . Debug ( "\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064" , _dce ) ;
return _gb . ErrTypeError ; } ; _gfc . setCharSpacing ( _gcd ) ; _gfc . setWordSpacing ( _gbaf ) ; _gfc . nextLine ( ) ; return _gfc . showText ( _cf , _eda , _fdb ) ; case "\u0054\u004c" : _eaf , _bgd := _dgd ( _dce ) ; if _bgd != nil { _ec . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bgd ) ;
return _bgd ; } ; _gfc . setTextLeading ( _eaf ) ; case "\u0054\u0063" : _cega , _fef := _dgd ( _dce ) ; if _fef != nil { _ec . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fef ) ; return _fef ; } ; _gfc . setCharSpacing ( _cega ) ;
case "\u0054\u0066" : if _bfe , _deag := _gfc . checkOp ( _dce , 2 , true ) ; ! _bfe { _ec . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _deag ) ; return _deag ; } ; _fcef , _fca := _gb . GetNameVal ( _dce . Params [ 0 ] ) ;
if ! _fca { _ec . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064" , _dce ) ; return _gb . ErrTypeError ; } ; _egb , _eade := _gb . GetNumberAsFloat ( _dce . Params [ 1 ] ) ;
if ! _fca { _ec . Log . Debug ( "\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _dce , _eade ) ;
return _eade ; } ; _eade = _gfc . setFont ( _fcef , _egb ) ; _gfc . _efbb = _dfc . Is ( _eade , _gb . ErrNotSupported ) ; if _eade != nil && ! _gfc . _efbb { return _eade ; } ; case "\u0054\u006d" : if _dgf , _dade := _gfc . checkOp ( _dce , 6 , true ) ; ! _dgf { _ec . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _dade ) ;
return _dade ; } ; _eac , _fee := _gb . GetNumbersAsFloat ( _dce . Params ) ; if _fee != nil { _ec . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fee ) ; return _fee ; } ; _gfc . setTextMatrix ( _eac ) ; case "\u0054\u0072" : if _cdb , _fcff := _gfc . checkOp ( _dce , 1 , true ) ;
! _cdb { _ec . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fcff ) ; return _fcff ; } ; _dgg , _acag := _gb . GetIntVal ( _dce . Params [ 0 ] ) ; if ! _acag { _ec . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _dce ) ;
return _gb . ErrTypeError ; } ; _gfc . setTextRenderMode ( _dgg ) ; case "\u0054\u0073" : if _afg , _bgc := _gfc . checkOp ( _dce , 1 , true ) ; ! _afg { _ec . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bgc ) ; return _bgc ;
} ; _afa , _cfc := _gb . GetNumberAsFloat ( _dce . Params [ 0 ] ) ; if _cfc != nil { _ec . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _cfc ) ; return _cfc ; } ; _gfc . setTextRise ( _afa ) ; case "\u0054\u0077" : if _bgbc , _gbc := _gfc . checkOp ( _dce , 1 , true ) ;
! _bgbc { _ec . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gbc ) ; return _gbc ; } ; _abgf , _cbab := _gb . GetNumberAsFloat ( _dce . Params [ 0 ] ) ; if _cbab != nil { _ec . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _cbab ) ;
return _cbab ; } ; _gfc . setWordSpacing ( _abgf ) ; case "\u0054\u007a" : if _aaa , _bbg := _gfc . checkOp ( _dce , 1 , true ) ; ! _aaa { _ec . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bbg ) ; return _bbg ; } ; _dfb , _bagg := _gb . GetNumberAsFloat ( _dce . Params [ 0 ] ) ;
if _bagg != nil { _ec . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bagg ) ; return _bagg ; } ; _gfc . setHorizScaling ( _dfb ) ; case "\u0063\u006d" : _ggc . _aabgc = _gbge . CTM ; if _ggc . _aabgc . Singular ( ) { _add := _bc . IdentityMatrix ( ) . Translate ( _ggc . _aabgc . Translation ( ) ) ;
_ec . Log . Debug ( "S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s" , _ggc . _aabgc , _add ) ; _ggc . _aabgc = _add ; } ; if _cbag { _ec . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _ggc . _aabgc ) ; } ; case "\u006d" : if len ( _dce . Params ) != 2 { _ec . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" , _fe ) ;
return nil ; } ; _beb , _efbd := _gb . GetNumbersAsFloat ( _dce . Params ) ; if _efbd != nil { return _efbd ; } ; _ggc . moveTo ( _beb [ 0 ] , _beb [ 1 ] ) ; case "\u006c" : if len ( _dce . Params ) != 2 { _ec . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" , _fe ) ;
return nil ; } ; _dada , _bfdb := _gb . GetNumbersAsFloat ( _dce . Params ) ; if _bfdb != nil { return _bfdb ; } ; _ggc . lineTo ( _dada [ 0 ] , _dada [ 1 ] ) ; case "\u0063" : if len ( _dce . Params ) != 6 { return _fe ; } ; _afgf , _ecc := _gb . GetNumbersAsFloat ( _dce . Params ) ; if _ecc != nil { return _ecc ;
} ; _ec . Log . Debug ( "\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f" , _afgf ) ; _ggc . cubicTo ( _afgf [ 0 ] , _afgf [ 1 ] , _afgf [ 2 ] , _afgf [ 3 ] , _afgf [ 4 ] , _afgf [ 5 ] ) ; case "\u0076" , "\u0079" : if len ( _dce . Params ) != 4 { return _fe ;
} ; _cdfc , _edg := _gb . GetNumbersAsFloat ( _dce . Params ) ; if _edg != nil { return _edg ; } ; _ec . Log . Debug ( "\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f" , _cdfc ) ; _ggc . quadraticTo ( _cdfc [ 0 ] , _cdfc [ 1 ] , _cdfc [ 2 ] , _cdfc [ 3 ] ) ;
case "\u0068" : _ggc . closePath ( ) ; case "\u0072\u0065" : if len ( _dce . Params ) != 4 { return _fe ; } ; _gca , _adf := _gb . GetNumbersAsFloat ( _dce . Params ) ; if _adf != nil { return _adf ; } ; _ggc . drawRectangle ( _gca [ 0 ] , _gca [ 1 ] , _gca [ 2 ] , _gca [ 3 ] ) ; _ggc . closePath ( ) ;
case "\u0053" : _ggc . stroke ( & _eaa . _geda ) ; _ggc . clearPath ( ) ; case "\u0073" : _ggc . closePath ( ) ; _ggc . stroke ( & _eaa . _geda ) ; _ggc . clearPath ( ) ; case "\u0046" : _ggc . fill ( & _eaa . _gegc ) ; _ggc . clearPath ( ) ; case "\u0066" , "\u0066\u002a" : _ggc . closePath ( ) ; _ggc . fill ( & _eaa . _gegc ) ;
_ggc . clearPath ( ) ; case "\u0042" , "\u0042\u002a" : _ggc . fill ( & _eaa . _gegc ) ; _ggc . stroke ( & _eaa . _geda ) ; _ggc . clearPath ( ) ; case "\u0062" , "\u0062\u002a" : _ggc . closePath ( ) ; _ggc . fill ( & _eaa . _gegc ) ; _ggc . stroke ( & _eaa . _geda ) ; _ggc . clearPath ( ) ; case "\u006e" : _ggc . clearPath ( ) ;
case "\u0044\u006f" : if len ( _dce . Params ) == 0 { _ec . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e" , _dce . Params ) ;
return _gb . ErrRangeError ; } ; _deda , _eff := _gb . GetName ( _dce . Params [ 0 ] ) ; if ! _eff { _ec . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e" , _dce . Params [ 0 ] ) ;
return _gb . ErrTypeError ; } ; _ , _geaf := _bag . GetXObjectByName ( * _deda ) ; if _geaf != _ce . XObjectTypeForm { break ; } ; _egbd , _eff := _ffcb . _fa [ _deda . String ( ) ] ; if ! _eff { _agcd , _ddf := _bag . GetXObjectFormByName ( * _deda ) ; if _ddf != nil { _ec . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _ddf ) ;
return _ddf ; } ; _edf , _ddf := _agcd . GetContentStream ( ) ; if _ddf != nil { _ec . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _ddf ) ; return _ddf ; } ; _bdg := _agcd . Resources ; if _bdg == nil { _bdg = _bag ; } ; _gcdc := _gbge . CTM ; if _ggg , _ada := _gb . GetArray ( _agcd . Matrix ) ;
_ada { _cgc , _gced := _ggg . GetAsFloat64Slice ( ) ; if _gced != nil { return _gced ; } ; if len ( _cgc ) != 6 { return _fe ; } ; _bcc := _bc . NewMatrix ( _cgc [ 0 ] , _cgc [ 1 ] , _cgc [ 2 ] , _cgc [ 3 ] , _cgc [ 4 ] , _cgc [ 5 ] ) ; _gcdc = _gbge . CTM . Mult ( _bcc ) ; } ; _gff , _cdbg , _edgg , _ddf := _ffcb . extractPageText ( string ( _edf ) , _bdg , _dg . Mult ( _gcdc ) , _gccd + 1 ) ;
if _ddf != nil { _ec . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _ddf ) ; return _ddf ; } ; _egbd = textResult { * _gff , _cdbg , _edgg } ; _ffcb . _fa [ _deda . String ( ) ] = _egbd ; } ; _ggc . _aabgc = _gbge . CTM ; if _cbag { _ec . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _ggc . _aabgc ) ;
} ; _eaa . _fcag = append ( _eaa . _fcag , _egbd . _ceee . _fcag ... ) ; _eaa . _geda = append ( _eaa . _geda , _egbd . _ceee . _geda ... ) ; _eaa . _gegc = append ( _eaa . _gegc , _egbd . _ceee . _gegc ... ) ; _dee . _baf += _egbd . _agcc ; _dee . _afgb += _egbd . _dge ; case "\u0072\u0067" , "\u0067" , "\u006b" , "\u0063\u0073" , "\u0073\u0063" , "\u0073\u0063\u006e" : _gfc . _gbe . ColorspaceNonStroking = _gbge . ColorspaceNonStroking ;
_gfc . _gbe . ColorNonStroking = _gbge . ColorNonStroking ; case "\u0052\u0047" , "\u0047" , "\u004b" , "\u0043\u0053" , "\u0053\u0043" , "\u0053\u0043\u004e" : _gfc . _gbe . ColorspaceStroking = _gbge . ColorspaceStroking ; _gfc . _gbe . ColorStroking = _gbge . ColorStroking ;
} ; return nil ; } ) ; _bac = _dfa . Process ( _gccf ) ; return _eaa , _dee . _baf , _dee . _afgb , _bac ; } ;
2023-11-11 11:29:03 +00:00
2023-12-17 13:54:01 +00:00
// Tables returns the tables extracted from the page.
func ( _gdfa PageText ) Tables ( ) [ ] TextTable { if _afcg { _ec . Log . Info ( "\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064" , len ( _gdfa . _eadb ) ) ; } ; return _gdfa . _eadb ; } ; func _fddd ( _dcbdd _gb . PdfObject , _gdgcc _fg . Color ) ( _ded . Image , error ) { _dcafg , _afadd := _gb . GetStream ( _dcbdd ) ;
if ! _afadd { return nil , nil ; } ; _bdac , _gdacc := _ce . NewXObjectImageFromStream ( _dcafg ) ; if _gdacc != nil { return nil , _gdacc ; } ; _dfgad , _gdacc := _bdac . ToImage ( ) ; if _gdacc != nil { return nil , _gdacc ; } ; return _abgbe ( _dfgad , _gdgcc ) , nil ; } ; func _ffef ( _gfae func ( * wordBag , * textWord , float64 ) bool , _bdbc float64 ) func ( * wordBag , * textWord ) bool { return func ( _fdda * wordBag , _cfca * textWord ) bool { return _gfae ( _fdda , _cfca , _bdbc ) } ;
} ; func _deca ( _fbgeb [ ] * textLine , _fffa map [ float64 ] [ ] * textLine , _afacg [ ] float64 , _bfbda int , _bead , _ebbd float64 ) [ ] * list { _cgce := [ ] * list { } ; _afcgc := _bfbda ; _bfbda = _bfbda + 1 ; _abdc := _afacg [ _afcgc ] ; _beecba := _fffa [ _abdc ] ; _fgab := _bdbf ( _beecba , _ebbd , _bead ) ;
for _dadcf , _ccac := range _fgab { var _dfcc float64 ; _fefff := [ ] * list { } ; _fgbaa := _ccac . _bcdg ; _fded := _ebbd ; if _dadcf < len ( _fgab ) - 1 { _fded = _fgab [ _dadcf + 1 ] . _bcdg ; } ; if _bfbda < len ( _afacg ) { _fefff = _deca ( _fbgeb , _fffa , _afacg , _bfbda , _fgbaa , _fded ) ;
} ; _dfcc = _fded ; if len ( _fefff ) > 0 { _dafb := _fefff [ 0 ] ; if len ( _dafb . _ggdb ) > 0 { _dfcc = _dafb . _ggdb [ 0 ] . _bcdg ; } ; } ; _bcfg := [ ] * textLine { _ccac } ; _aced := _dcbf ( _ccac , _fbgeb , _afacg , _fgbaa , _dfcc ) ; _bcfg = append ( _bcfg , _aced ... ) ; _fagdb := _baba ( _bcfg , "\u0062\u0075\u006c\u006c\u0065\u0074" , _fefff ) ;
_fagdb . _dage = _efaf ( _bcfg , "" ) ; _cgce = append ( _cgce , _fagdb ) ; } ; return _cgce ; } ; var _eeggd = _e . MustCompile ( "\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024" ) ;
const ( _be = "\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065" ;
_cc = "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064" ;
_cb = "\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065" ; ) ;
func ( _adea * textTable ) growTable ( ) { _ggac := func ( _gacg paraList ) { _adea . _dcfg ++ ; for _dgfgb := 0 ; _dgfgb < _adea . _ecbf ; _dgfgb ++ { _beecc := _gacg [ _dgfgb ] ; _adea . put ( _dgfgb , _adea . _dcfg - 1 , _beecc ) ; } ; } ; _fgge := func ( _bgfe paraList ) { _adea . _ecbf ++ ;
for _babbc := 0 ; _babbc < _adea . _dcfg ; _babbc ++ { _aafac := _bgfe [ _babbc ] ; _adea . put ( _adea . _ecbf - 1 , _babbc , _aafac ) ; } ; } ; if _dcedc { _adea . log ( "\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce" ) ; } ; for _gbfa := 0 ; ; _gbfa ++ { _gaba := false ; _cada := _adea . getDown ( ) ;
_afgcd := _adea . getRight ( ) ; if _dcedc { _ge . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _gbfa , _adea ) ; _ge . Printf ( "\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a" , _cada ) ; _ge . Printf ( "\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a" , _afgcd ) ;
} ; if _cada != nil && _afgcd != nil { _fcdf := _cada [ len ( _cada ) - 1 ] ; if ! _fcdf . taken ( ) && _fcdf == _afgcd [ len ( _afgcd ) - 1 ] { _ggac ( _cada ) ; if _afgcd = _adea . getRight ( ) ; _afgcd != nil { _fgge ( _afgcd ) ; _adea . put ( _adea . _ecbf - 1 , _adea . _dcfg - 1 , _fcdf ) ; } ; _gaba = true ;
} ; } ; if ! _gaba && _cada != nil { _ggac ( _cada ) ; _gaba = true ; } ; if ! _gaba && _afgcd != nil { _fgge ( _afgcd ) ; _gaba = true ; } ; if ! _gaba { break ; } ; } ; } ; func _edeff ( _cbed _ce . PdfRectangle ) * ruling { return & ruling { _bfbc : _ecac , _abbgc : _cbed . Ury , _cebe : _cbed . Llx , _deee : _cbed . Urx } ;
} ; func ( _abee * textWord ) computeText ( ) string { _dbfffe := make ( [ ] string , len ( _abee . _bgeaa ) ) ; for _eedge , _cfcbg := range _abee . _bgeaa { _dbfffe [ _eedge ] = _cfcbg . _ecaa ; } ; return _gd . Join ( _dbfffe , "" ) ; } ; func ( _fddcf gridTiling ) log ( _cdcf string ) { if ! _bea { return ;
} ; _ec . Log . Info ( "\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071" , len ( _fddcf . _cgecb ) , len ( _fddcf . _agbb ) , _cdcf ) ; _ge . Printf ( "\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a" , _fddcf . _cgecb ) ;
_ge . Printf ( "\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a" , _fddcf . _agbb ) ; for _fdgdf , _ffcde := range _fddcf . _agbb { _fbaa , _ccag := _fddcf . _bage [ _ffcde ] ; if ! _ccag { continue ; } ; _ge . Printf ( "%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , _fdgdf , _ffcde ) ;
for _agfa , _deedg := range _fddcf . _cgecb { _eedgg , _cafeg := _fbaa [ _deedg ] ; if ! _cafeg { continue ; } ; _ge . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _agfa , _eedgg . String ( ) ) ; } ; } ; } ; func _eccb ( _bdff [ ] * textLine ) [ ] * textLine { _adgf := [ ] * textLine { } ;
for _ , _geac := range _bdff { _eabad := _geac . text ( ) ; _dcf := _ccfe . Find ( [ ] byte ( _eabad ) ) ; if _dcf != nil { _adgf = append ( _adgf , _geac ) ; } ; } ; return _adgf ; } ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
//
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
type TextMark struct {
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Text is the extracted text.
Text string ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// BBox is the bounding box of the text.
2023-12-17 13:54:01 +00:00
BBox _ce . PdfRectangle ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Font is the font the text was drawn with.
2023-12-17 13:54:01 +00:00
Font * _ce . PdfFont ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// FontSize is the font size the text was drawn with.
FontSize float64 ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2023-12-17 13:54:01 +00:00
FillColor _fg . Color ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2023-12-17 13:54:01 +00:00
StrokeColor _fg . Color ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// Orientation is the text orientation
Orientation int ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// DirectObject is the underlying PdfObject (Text Object) that represents the visible texts. This is introduced to get
// a simple access to the TextObject in case editing or replacment of some text is needed. E.g during redaction.
2023-12-17 13:54:01 +00:00
DirectObject _gb . PdfObject ;
2023-09-07 17:40:17 +00:00
2023-10-07 13:58:01 +00:00
// ObjString is a decoded string operand of a text-showing operator. It has the same value as `Text` attribute except
// when many glyphs are represented with the same Text Object that contains multiple length string operand in which case
// ObjString spans more than one character string that falls in different TextMark objects.
2023-12-17 13:54:01 +00:00
ObjString [ ] string ; Tw float64 ; Th float64 ; Tc float64 ; Index int ; _ebd bool ; _afe * TextTable ; } ;
2023-10-07 13:58:01 +00:00
2023-12-17 13:54:01 +00:00
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
func ( _fbeda PageText ) ToText ( ) string { return _fbeda . Text ( ) } ; type shapesState struct { _aabgc _bc . Matrix ; _dag _bc . Matrix ; _abgb [ ] * subpath ; _edee bool ; _ebfb _bc . Point ; _ecfc * textObject ; } ; func ( _dbcc paraList ) eventNeighbours ( _cgcee [ ] event ) map [ * textPara ] [ ] int { _c . Slice ( _cgcee , func ( _dfag , _dafe int ) bool { _edacf , _adefe := _cgcee [ _dfag ] , _cgcee [ _dafe ] ;
_fbfa , _acef := _edacf . _gcaf , _adefe . _gcaf ; if _fbfa != _acef { return _fbfa < _acef ; } ; if _edacf . _cgcbd != _adefe . _cgcbd { return _edacf . _cgcbd ; } ; return _dfag < _dafe ; } ) ; _cgfd := make ( map [ int ] intSet ) ; _gcdf := make ( intSet ) ; for _ , _agdda := range _cgcee { if _agdda . _cgcbd { _cgfd [ _agdda . _bffcfb ] = make ( intSet ) ;
for _cgcge := range _gcdf { if _cgcge != _agdda . _bffcfb { _cgfd [ _agdda . _bffcfb ] . add ( _cgcge ) ; _cgfd [ _cgcge ] . add ( _agdda . _bffcfb ) ; } ; } ; _gcdf . add ( _agdda . _bffcfb ) ; } else { _gcdf . del ( _agdda . _bffcfb ) ; } ; } ; _cfec := map [ * textPara ] [ ] int { } ; for _fefb , _caeb := range _cgfd { _cdfgd := _dbcc [ _fefb ] ;
if len ( _caeb ) == 0 { _cfec [ _cdfgd ] = nil ; continue ; } ; _fegc := make ( [ ] int , len ( _caeb ) ) ; _cgdcg := 0 ; for _bdecg := range _caeb { _fegc [ _cgdcg ] = _bdecg ; _cgdcg ++ ; } ; _cfec [ _cdfgd ] = _fegc ; } ; return _cfec ; } ;
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// PageFonts represents extracted fonts on a PDF page.
type PageFonts struct { Fonts [ ] Font ; } ; func ( _caed * textTable ) log ( _ecaga string ) { if ! _afcg { return ; } ; _ec . Log . Info ( "~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066" , _ecaga , _caed . _ecbf , _caed . _dcfg , _caed . _beaeg , _caed . PdfRectangle ) ;
for _cgaa := 0 ; _cgaa < _caed . _dcfg ; _cgaa ++ { for _edbb := 0 ; _edbb < _caed . _ecbf ; _edbb ++ { _ccgdc := _caed . get ( _edbb , _cgaa ) ; if _ccgdc == nil { continue ; } ; _ge . Printf ( "%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a" , _edbb , _cgaa , _ccgdc . PdfRectangle , _adagc ( _ccgdc . text ( ) , 50 ) , _g . RuneCountInString ( _ccgdc . text ( ) ) ) ;
} ; } ; } ;
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// List returns all the list objects detected on the page.
// It detects all the bullet point Lists from a given pdf page and builds a slice of bullet list objects.
// A given bullet list object has a tree structure.
// Each bullet point list is extracted with the text content it contains and all the sub lists found under it as children in the tree.
// The rest content of the pdf is ignored and only text in the bullet point lists are extracted.
// The list extraction is done in two ways.
// 1. If the document is tagged then the lists are extracted using the tags provided in the document.
// 2. Otherwise the bullet lists are extracted from the raw text using regex matching.
// By default the document tag is used if available.
// However this can be disabled using `DisableDocumentTags` in the `Options` object.
// Sometimes disabling document tags option might give a better bullet list extraction if the document was tagged incorrectly.
// options := &Options{
// DisableDocumentTags: false, // this means use document tag if available
// }
// ex, err := NewWithOptions(page, options)
// // handle error
// pageText, _, _, err := ex.ExtractPageText()
// // handle error
// lists := pageText.List()
// txt := lists.Text()
func ( _eaeg PageText ) List ( ) lists { _edba := ! _eaeg . _ccg . _efab ; _gggb := _eaeg . getParagraphs ( ) ; _gaef := true ; if _eaeg . _bbb == nil || * _eaeg . _bbb == nil { _gaef = false ; } ; _gcbcf := _gggb . list ( ) ; if _gaef && _edba { _bgg := _dafd ( & _gggb ) ; _ffbf := & structTreeRoot { } ;
_ffbf . parseStructTreeRoot ( * _eaeg . _bbb ) ; if _ffbf . _dgbb == nil { _ec . Log . Debug ( "\u004c\u0069\u0073\u0074\u003a\u0020\u0073t\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e'\u0074\u0020\u0068\u0061\u0076e\u0020\u0061\u006e\u0079\u0020\u0063\u006f\u006e\u0074e\u006e\u0074\u002c\u0020\u0075\u0073\u0069\u006e\u0067\u0020\u0074\u0065\u0078\u0074\u0020\u006d\u0061\u0074\u0063\u0068\u0069\u006e\u0067\u0020\u006d\u0065\u0074\u0068\u006f\u0064\u0020\u0069\u006e\u0073\u0074\u0065\u0061\u0064\u002e" ) ;
return _gcbcf ; } ; _gcbcf = _ffbf . buildList ( _bgg , _eaeg . _fefc ) ; } ; return _gcbcf ; } ; func ( _edca paraList ) findTextTables ( ) [ ] * textTable { var _ffgdb [ ] * textTable ; for _ , _ggcc := range _edca { if _ggcc . taken ( ) || _ggcc . Width ( ) == 0 { continue ; } ; _fgccd := _ggcc . isAtom ( ) ;
if _fgccd == nil { continue ; } ; _fgccd . growTable ( ) ; if _fgccd . _ecbf * _fgccd . _dcfg < _ddd { continue ; } ; _fgccd . markCells ( ) ; _fgccd . log ( "\u0067\u0072\u006fw\u006e" ) ; _ffgdb = append ( _ffgdb , _fgccd ) ; } ; return _ffgdb ; } ; type textObject struct { _bgcb * Extractor ;
_bacad * _ce . PdfPageResources ; _gbe _ag . GraphicsState ; _befa * textState ; _ebf * stateStack ; _eee _bc . Matrix ; _ecg _bc . Matrix ; _cfb [ ] * textMark ; _efbb bool ; } ; func ( _deba paraList ) toTextMarks ( ) [ ] TextMark { _afcgb := 0 ; var _edff [ ] TextMark ; for _eeacf , _cgbd := range _deba { if _cgbd . _bedf { continue ;
} ; _gacb := _cgbd . toTextMarks ( & _afcgb ) ; _edff = append ( _edff , _gacb ... ) ; if _eeacf != len ( _deba ) - 1 { if _eeacg ( _cgbd , _deba [ _eeacf + 1 ] ) { _edff = _cacf ( _edff , & _afcgb , "\u0020" ) ; } else { _edff = _cacf ( _edff , & _afcgb , "\u000a" ) ; _edff = _cacf ( _edff , & _afcgb , "\u000a" ) ;
} ; } ; } ; _edff = _cacf ( _edff , & _afcgb , "\u000a" ) ; _edff = _cacf ( _edff , & _afcgb , "\u000a" ) ; return _edff ; } ; func ( _abab * shapesState ) drawRectangle ( _dga , _ebed , _aada , _age float64 ) { if _cbag { _bbfbd := _abab . devicePoint ( _dga , _ebed ) ; _aga := _abab . devicePoint ( _dga + _aada , _ebed + _age ) ;
_bafe := _ce . PdfRectangle { Llx : _bbfbd . X , Lly : _bbfbd . Y , Urx : _aga . X , Ury : _aga . Y } ; _ec . Log . Info ( "d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066" , _bafe ) ; } ; _abab . newSubPath ( ) ; _abab . moveTo ( _dga , _ebed ) ;
_abab . lineTo ( _dga + _aada , _ebed ) ; _abab . lineTo ( _dga + _aada , _ebed + _age ) ; _abab . lineTo ( _dga , _ebed + _age ) ; _abab . closePath ( ) ; } ; func _dcgbg ( _ebdd , _badd float64 ) string { _fgcc := ! _ffegg ( _ebdd - _badd ) ; if _fgcc { return "\u000a" ; } ; return "\u0020" ;
} ; func ( _fec * PageFonts ) extractPageResourcesToFont ( _ccc * _ce . PdfPageResources ) error { _ad , _cbg := _gb . GetDict ( _ccc . Font ) ; if ! _cbg { return _d . New ( _be ) ; } ; for _ , _bbf := range _ad . Keys ( ) { var ( _gef = true ; _bda [ ] byte ; _gdb string ; ) ; _bbe , _gec := _ccc . GetFontByName ( _bbf ) ;
if ! _gec { return _d . New ( _cc ) ; } ; _fcf , _efb := _ce . NewPdfFontFromPdfObject ( _bbe ) ; if _efb != nil { return _efb ; } ; _agd := _fcf . FontDescriptor ( ) ; _acg := _fcf . FontDescriptor ( ) . FontName . String ( ) ; _ea := _fcf . Subtype ( ) ; if _efg ( _fec . Fonts , _acg ) { continue ;
} ; if len ( _fcf . ToUnicode ( ) ) == 0 { _gef = false ; } ; if _agd . FontFile != nil { if _gcf , _fda := _gb . GetStream ( _agd . FontFile ) ; _fda { _bda , _efb = _gb . DecodeStream ( _gcf ) ; if _efb != nil { return _efb ; } ; _gdb = _acg + "\u002e\u0070\u0066\u0062" ; } ; } else if _agd . FontFile2 != nil { if _faf , _bf := _gb . GetStream ( _agd . FontFile2 ) ;
_bf { _bda , _efb = _gb . DecodeStream ( _faf ) ; if _efb != nil { return _efb ; } ; _gdb = _acg + "\u002e\u0074\u0074\u0066" ; } ; } else if _agd . FontFile3 != nil { if _aae , _fea := _gb . GetStream ( _agd . FontFile3 ) ; _fea { _bda , _efb = _gb . DecodeStream ( _aae ) ; if _efb != nil { return _efb ;
} ; _gdb = _acg + "\u002e\u0063\u0066\u0066" ; } ; } ; if len ( _gdb ) < 1 { _ec . Log . Debug ( _cb ) ; } ; _aab := Font { FontName : _acg , PdfFont : _fcf , IsCID : _fcf . IsCID ( ) , IsSimple : _fcf . IsSimple ( ) , ToUnicode : _gef , FontType : _ea , FontData : _bda , FontFileName : _gdb , FontDescriptor : _agd } ;
_fec . Fonts = append ( _fec . Fonts , _aab ) ; } ; return nil ; } ; func ( _dbae * wordBag ) depthRange ( _efea , _fdcf int ) [ ] int { var _gacf [ ] int ; for _bgef := range _dbae . _faba { if _efea <= _bgef && _bgef <= _fdcf { _gacf = append ( _gacf , _bgef ) ; } ; } ; if len ( _gacf ) == 0 { return nil ;
} ; _c . Ints ( _gacf ) ; return _gacf ; } ; type textMark struct { _ce . PdfRectangle ; _edge int ; _ecaa string ; _gdcgd string ; _bcdb * _ce . PdfFont ; _bfaca float64 ; _cgdb float64 ; _dadd _bc . Matrix ; _feea _bc . Point ; _aefef _ce . PdfRectangle ; _abgd _fg . Color ; _eedd _fg . Color ;
_bfade _gb . PdfObject ; _fcdc [ ] string ; Tw float64 ; Th float64 ; _fefe int ; _beaa int ; } ; func ( _egfb * subpath ) removeDuplicates ( ) { if len ( _egfb . _gfefe ) == 0 { return ; } ; _faca := [ ] _bc . Point { _egfb . _gfefe [ 0 ] } ; for _ , _dedg := range _egfb . _gfefe [ 1 : ] { if ! _dcbd ( _dedg , _faca [ len ( _faca ) - 1 ] ) { _faca = append ( _faca , _dedg ) ;
} ; } ; _egfb . _gfefe = _faca ; } ; func _gaagg ( _faab [ ] TextMark , _gbaa * int ) [ ] TextMark { _dffa := _faab [ len ( _faab ) - 1 ] ; _bbdfe := [ ] rune ( _dffa . Text ) ; if len ( _bbdfe ) == 1 { _faab = _faab [ : len ( _faab ) - 1 ] ; _gege := _faab [ len ( _faab ) - 1 ] ; * _gbaa = _gege . Offset + len ( _gege . Text ) ;
} else { _dagc := _edcg ( _dffa . Text ) ; * _gbaa += len ( _dagc ) - len ( _dffa . Text ) ; _dffa . Text = _dagc ; } ; return _faab ; } ; func ( _faeb * textPara ) text ( ) string { _agac := new ( _df . Buffer ) ; _faeb . writeText ( _agac ) ; return _agac . String ( ) ; } ; func _dgd ( _cbabd * _ag . ContentStreamOperation ) ( float64 , error ) { if len ( _cbabd . Params ) != 1 { _fdbfe := _d . New ( "\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et" ) ;
_ec . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076" , _cbabd . Operand , 1 , len ( _cbabd . Params ) , _cbabd . Params ) ;
return 0.0 , _fdbfe ; } ; return _gb . GetNumberAsFloat ( _cbabd . Params [ 0 ] ) ; } ; func _baba ( _bded [ ] * textLine , _cafa string , _efgac [ ] * list ) * list { return & list { _ggdb : _bded , _ffcg : _cafa , _gbab : _efgac } ; } ; func ( _bbfa * imageExtractContext ) extractFormImages ( _gbgb * _gb . PdfObjectName , _cefa _ag . GraphicsState , _fbd * _ce . PdfPageResources ) error { _gga , _bab := _fbd . GetXObjectFormByName ( * _gbgb ) ;
if _bab != nil { return _bab ; } ; if _gga == nil { return nil ; } ; _dcb , _bab := _gga . GetContentStream ( ) ; if _bab != nil { return _bab ; } ; _eea := _gga . Resources ; if _eea == nil { _eea = _fbd ; } ; _bab = _bbfa . extractContentStreamImages ( string ( _dcb ) , _eea ) ; if _bab != nil { return _bab ;
} ; _bbfa . _agb ++ ; return nil ; } ; type imageExtractContext struct { _gbf [ ] ImageMark ; _da int ; _fbc int ; _agb int ; _aabe map [ * _gb . PdfObjectStream ] * cachedImage ; _cef * ImageExtractOptions ; _dedf bool ; } ; func _edcg ( _dgfbg string ) string { _bcdca := [ ] rune ( _dgfbg ) ;
return string ( _bcdca [ : len ( _bcdca ) - 1 ] ) } ; func _fbga ( _agfb _gb . PdfObject , _faaga _fg . Color ) ( _ded . Image , error ) { _edda , _bgdca := _gb . GetStream ( _agfb ) ; if ! _bgdca { return nil , nil ; } ; _dabga , _abagc := _ce . NewXObjectImageFromStream ( _edda ) ; if _abagc != nil { return nil , _abagc ;
} ; _bfda , _abagc := _dabga . ToImage ( ) ; if _abagc != nil { return nil , _abagc ; } ; return _bcfde ( _bfda , _faaga ) , nil ; } ;
2023-11-11 11:29:03 +00:00
2023-12-17 13:54:01 +00:00
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
func ( _bcg * TextMarkArray ) RangeOffset ( start , end int ) ( * TextMarkArray , error ) { if _bcg == nil { return nil , _d . New ( "\u006da\u003d\u003d\u006e\u0069\u006c" ) ; } ; if end < start { return nil , _ge . Errorf ( "\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020" , start , end ) ;
} ; _gab := len ( _bcg . _dec ) ; if _gab == 0 { return _bcg , nil ; } ; if start < _bcg . _dec [ 0 ] . Offset { start = _bcg . _dec [ 0 ] . Offset ; } ; if end > _bcg . _dec [ _gab - 1 ] . Offset + 1 { end = _bcg . _dec [ _gab - 1 ] . Offset + 1 ; } ; _cgdca := _c . Search ( _gab , func ( _aafag int ) bool { return _bcg . _dec [ _aafag ] . Offset + len ( _bcg . _dec [ _aafag ] . Text ) - 1 >= start } ) ;
if ! ( 0 <= _cgdca && _cgdca < _gab ) { _accc := _ge . Errorf ( "\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076" , start , _cgdca , _gab , _bcg . _dec [ 0 ] , _bcg . _dec [ _gab - 1 ] ) ;
return nil , _accc ; } ; _bgcf := _c . Search ( _gab , func ( _bedge int ) bool { return _bcg . _dec [ _bedge ] . Offset > end - 1 } ) ; if ! ( 0 <= _bgcf && _bgcf < _gab ) { _gffd := _ge . Errorf ( "\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076" , end , _bgcf , _gab , _bcg . _dec [ 0 ] , _bcg . _dec [ _gab - 1 ] ) ;
return nil , _gffd ; } ; if _bgcf <= _cgdca { return nil , _ge . Errorf ( "\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064" , start , end , _cgdca , _bgcf ) ;
} ; return & TextMarkArray { _dec : _bcg . _dec [ _cgdca : _bgcf ] } , nil ; } ; const _gdgb = 10 ; func ( _effb compositeCell ) String ( ) string { _acdc := "" ; if len ( _effb . paraList ) > 0 { _acdc = _adagc ( _effb . paraList . merge ( ) . text ( ) , 50 ) ; } ; return _ge . Sprintf ( "\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071" , _effb . PdfRectangle , len ( _effb . paraList ) , _acdc ) ;
} ; func _eabf ( _cdfcb * list , _gddd * string ) string { _dcde := _gd . Split ( _cdfcb . _dage , "\u000a" ) ; _debc := & _gd . Builder { } ; for _ , _ccda := range _dcde { if _ccda != "" { _debc . WriteString ( * _gddd ) ; _debc . WriteString ( _ccda ) ; _debc . WriteString ( "\u000a" ) ; } ;
} ; return _debc . String ( ) ; } ; func ( _bdbd rulingList ) vertsHorzs ( ) ( rulingList , rulingList ) { var _afabe , _gbac rulingList ; for _ , _gcdg := range _bdbd { switch _gcdg . _bfbc { case _ebdaf : _afabe = append ( _afabe , _gcdg ) ; case _ecac : _gbac = append ( _gbac , _gcdg ) ;
} ; } ; return _afabe , _gbac ; } ;
2023-11-11 11:29:03 +00:00
2023-12-17 13:54:01 +00:00
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
func ( _ceef * Extractor ) ExtractTextWithStats ( ) ( _ddg string , _cce int , _fed int , _dda error ) { _fcb , _cce , _fed , _dda := _ceef . ExtractPageText ( ) ; if _dda != nil { return "" , _cce , _fed , _dda ; } ; return _fcb . Text ( ) , _cce , _fed , nil ; } ;
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// PageTextOptions holds various options available in extraction process.
type PageTextOptions struct { _efab bool ; _fcgfa bool ; } ; func ( _ddafg * textTable ) subdivide ( ) * textTable { _ddafg . logComposite ( "\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e" ) ; _bdba := _ddafg . compositeRowCorridors ( ) ; _gcagd := _ddafg . compositeColCorridors ( ) ;
if _afcg { _ec . Log . Info ( "\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073" , _bfbfd ( _bdba ) , _bfbfd ( _gcagd ) ) ;
} ; if len ( _bdba ) == 0 || len ( _gcagd ) == 0 { return _ddafg ; } ; _acgg ( _bdba ) ; _acgg ( _gcagd ) ; if _afcg { _ec . Log . Info ( "\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073" , _bfbfd ( _bdba ) , _bfbfd ( _gcagd ) ) ;
} ; _egfae , _gfbfa := _daeg ( _ddafg . _dcfg , _bdba ) ; _ggbd , _eccea := _daeg ( _ddafg . _ecbf , _gcagd ) ; _afeab := make ( map [ uint64 ] * textPara , _eccea * _gfbfa ) ; _fcde := & textTable { PdfRectangle : _ddafg . PdfRectangle , _beaeg : _ddafg . _beaeg , _dcfg : _gfbfa , _ecbf : _eccea , _gcbga : _afeab } ;
if _afcg { _ec . Log . Info ( "\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a" + "\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a" + "\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a" + "\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a" + "\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076" , _ddafg . _ecbf , _ddafg . _dcfg , _eccea , _gfbfa , _bfbfd ( _bdba ) , _bfbfd ( _gcagd ) , _egfae , _ggbd ) ;
} ; for _feece := 0 ; _feece < _ddafg . _dcfg ; _feece ++ { _feeb := _egfae [ _feece ] ; for _fdgdd := 0 ; _fdgdd < _ddafg . _ecbf ; _fdgdd ++ { _gcgd := _ggbd [ _fdgdd ] ; if _afcg { _ge . Printf ( "\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a" , _fdgdd , _feece , _gcgd , _feeb ) ;
} ; _cbbfg , _dffaf := _ddafg . _egfe [ _aaca ( _fdgdd , _feece ) ] ; if ! _dffaf { continue ; } ; _defff := _cbbfg . split ( _bdba [ _feece ] , _gcagd [ _fdgdd ] ) ; for _addfd := 0 ; _addfd < _defff . _dcfg ; _addfd ++ { for _gfbe := 0 ; _gfbe < _defff . _ecbf ; _gfbe ++ { _fcafg := _defff . get ( _gfbe , _addfd ) ;
_fcde . put ( _gcgd + _gfbe , _feeb + _addfd , _fcafg ) ; if _afcg { _ge . Printf ( "\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a" , _gcgd + _gfbe , _feeb + _addfd , _fcafg ) ; } ; } ; } ; } ; } ; return _fcde ; } ; func _dca ( _ccga [ ] * textWord , _aag float64 , _dgge , _acd rulingList ) * wordBag { _bcbb := _bbea ( _ccga [ 0 ] , _aag , _dgge , _acd ) ;
for _ , _gaf := range _ccga [ 1 : ] { _eaad := _fece ( _gaf . _cffdg ) ; _bcbb . _faba [ _eaad ] = append ( _bcbb . _faba [ _eaad ] , _gaf ) ; _bcbb . PdfRectangle = _cdggc ( _bcbb . PdfRectangle , _gaf . PdfRectangle ) ; } ; _bcbb . sort ( ) ; return _bcbb ; } ; func ( _fbfca rulingList ) tidied ( _bcdfe string ) rulingList { _cefd := _fbfca . removeDuplicates ( ) ;
_cefd . log ( "\u0075n\u0069\u0071\u0075\u0065\u0073" ) ; _fgae := _cefd . snapToGroups ( ) ; if _fgae == nil { return nil ; } ; _fgae . sort ( ) ; if _gfgc { _ec . Log . Info ( "\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064" , _bcdfe , len ( _fbfca ) , len ( _cefd ) , len ( _fgae ) ) ;
} ; _fgae . log ( "\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d" ) ; return _fgae ; } ; func ( _cdag * subpath ) makeRectRuling ( _bbaf _fg . Color ) ( * ruling , bool ) { if _feceb { _ec . Log . Info ( "\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076" , _cdag ) ;
} ; _dgad := _cdag . _gfefe [ : 4 ] ; _fcga := make ( map [ int ] rulingKind , len ( _dgad ) ) ; for _gaeff , _fabe := range _dgad { _agcac := _cdag . _gfefe [ ( _gaeff + 1 ) % 4 ] ; _fcga [ _gaeff ] = _bcdee ( _fabe , _agcac ) ; if _feceb { _ge . Printf ( "\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066" , _gaeff , _fcga [ _gaeff ] , _fabe , _agcac ) ;
} ; } ; if _feceb { _ge . Printf ( "\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a" , _fcga ) ; } ; var _adbc , _cbecc [ ] int ; for _ceeeg , _gaadb := range _fcga { switch _gaadb { case _ecac : _cbecc = append ( _cbecc , _ceeeg ) ; case _ebdaf : _adbc = append ( _adbc , _ceeeg ) ;
} ; } ; if _feceb { _ge . Printf ( "\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a" , len ( _cbecc ) , _cbecc ) ; _ge . Printf ( "\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a" , len ( _adbc ) , _adbc ) ;
} ; _fcce := ( len ( _cbecc ) == 2 && len ( _adbc ) == 2 ) || ( len ( _cbecc ) == 2 && len ( _adbc ) == 0 && _acdeg ( _dgad [ _cbecc [ 0 ] ] , _dgad [ _cbecc [ 1 ] ] ) ) || ( len ( _adbc ) == 2 && len ( _cbecc ) == 0 && _ddafc ( _dgad [ _adbc [ 0 ] ] , _dgad [ _adbc [ 1 ] ] ) ) ; if _feceb { _ge . Printf ( " \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a" , len ( _cbecc ) , len ( _adbc ) , _fcce ) ;
} ; if ! _fcce { if _feceb { _ec . Log . Error ( "\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v" , _cdag ) ; _ge . Printf ( " \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a" , len ( _cbecc ) , len ( _adbc ) , _fcce ) ;
} ; return & ruling { } , false ; } ; if len ( _adbc ) == 0 { for _aeadc , _aagb := range _fcga { if _aagb != _ecac { _adbc = append ( _adbc , _aeadc ) ; } ; } ; } ; if len ( _cbecc ) == 0 { for _ebbg , _ffgd := range _fcga { if _ffgd != _ebdaf { _cbecc = append ( _cbecc , _ebbg ) ; } ; } ; } ; if _feceb { _ec . Log . Info ( "\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a" + "\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a" + "\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a" + "\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076" , len ( _cbecc ) , len ( _adbc ) , len ( _dgad ) , _cbecc , _adbc , _dgad ) ;
} ; var _fedd , _acagb , _aegcd , _baee _bc . Point ; if _dgad [ _cbecc [ 0 ] ] . Y > _dgad [ _cbecc [ 1 ] ] . Y { _aegcd , _baee = _dgad [ _cbecc [ 0 ] ] , _dgad [ _cbecc [ 1 ] ] ; } else { _aegcd , _baee = _dgad [ _cbecc [ 1 ] ] , _dgad [ _cbecc [ 0 ] ] ; } ; if _dgad [ _adbc [ 0 ] ] . X > _dgad [ _adbc [ 1 ] ] . X { _fedd , _acagb = _dgad [ _adbc [ 0 ] ] , _dgad [ _adbc [ 1 ] ] ;
} else { _fedd , _acagb = _dgad [ _adbc [ 1 ] ] , _dgad [ _adbc [ 0 ] ] ; } ; _gefg := _ce . PdfRectangle { Llx : _fedd . X , Urx : _acagb . X , Lly : _baee . Y , Ury : _aegcd . Y } ; if _gefg . Llx > _gefg . Urx { _gefg . Llx , _gefg . Urx = _gefg . Urx , _gefg . Llx ; } ; if _gefg . Lly > _gefg . Ury { _gefg . Lly , _gefg . Ury = _gefg . Ury , _gefg . Lly ;
} ; _gdfd := rectRuling { PdfRectangle : _gefg , _dfec : _gcgf ( _gefg ) , Color : _bbaf } ; if _gdfd . _dfec == _gdcf { if _feceb { _ec . Log . Error ( "\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c" ) ;
} ; return nil , false ; } ; _addd , _ddgea := _gdfd . asRuling ( ) ; if ! _ddgea { if _feceb { _ec . Log . Error ( "\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg" ) ; } ; return nil , false ; } ; if _gfgc { _ge . Printf ( "\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a" , _addd . String ( ) ) ;
} ; return _addd , true ; } ; func ( _ecaab * textTable ) get ( _dcafe , _dafcc int ) * textPara { return _ecaab . _gcbga [ _aaca ( _dcafe , _dafcc ) ] ; } ; func _eacf ( _aebfc float64 ) float64 { return _dgeec * _aa . Round ( _aebfc / _dgeec ) } ; func ( _aaed paraList ) writeText ( _eafd _a . Writer ) { for _fcgdd , _bbfd := range _aaed { if _bbfd . _bedf { continue ;
} ; _bbfd . writeText ( _eafd ) ; if _fcgdd != len ( _aaed ) - 1 { if _eeacg ( _bbfd , _aaed [ _fcgdd + 1 ] ) { _eafd . Write ( [ ] byte ( "\u0020" ) ) ; } else { _eafd . Write ( [ ] byte ( "\u000a" ) ) ; _eafd . Write ( [ ] byte ( "\u000a" ) ) ; } ; } ; } ; _eafd . Write ( [ ] byte ( "\u000a" ) ) ; _eafd . Write ( [ ] byte ( "\u000a" ) ) ;
} ; func _eecad ( _cbbcf , _eabef _bc . Point ) rulingKind { _egfba := _aa . Abs ( _cbbcf . X - _eabef . X ) ; _gbaae := _aa . Abs ( _cbbcf . Y - _eabef . Y ) ; return _gbbf ( _egfba , _gbaae , _cec ) ; } ; func _gfbbf ( _eaeb , _begg _bc . Point , _cgg _fg . Color ) ( * ruling , bool ) { _cbfgb := lineRuling { _ddgfc : _eaeb , _eacg : _begg , _fbbag : _eecad ( _eaeb , _begg ) , Color : _cgg } ;
if _cbfgb . _fbbag == _gdcf { return nil , false ; } ; return _cbfgb . asRuling ( ) ; } ; func ( _cbae * wordBag ) pullWord ( _bfab * textWord , _fceff int , _dcbg map [ int ] map [ * textWord ] struct { } ) { _cbae . PdfRectangle = _cdggc ( _cbae . PdfRectangle , _bfab . PdfRectangle ) ; if _bfab . _ddgee > _cbae . _egfa { _cbae . _egfa = _bfab . _ddgee ;
} ; _cbae . _faba [ _fceff ] = append ( _cbae . _faba [ _fceff ] , _bfab ) ; _dcbg [ _fceff ] [ _bfab ] = struct { } { } ; } ; func _dafd ( _bbde * paraList ) map [ int ] [ ] * textLine { _fdbg := map [ int ] [ ] * textLine { } ; for _ , _ceca := range * _bbde { for _ , _bba := range _ceca . _bdbcg { if ! _bcgc ( _bba ) { _ec . Log . Debug ( "g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e" ) ;
continue ; } ; _bcefg := _bba . _aebc [ 0 ] . _bgeaa [ 0 ] . _fefe ; _fdbg [ _bcefg ] = append ( _fdbg [ _bcefg ] , _bba ) ; } ; if _ceca . _bddea != nil { _cafg := _ceca . _bddea . _gcbga ; for _ , _gfdd := range _cafg { for _ , _cbfd := range _gfdd . _bdbcg { if ! _bcgc ( _cbfd ) { _ec . Log . Debug ( "g\u0072\u006f\u0075p\u004c\u0069\u006e\u0065\u0073\u003a\u0020\u0054\u0068\u0065\u0020\u0074\u0065\u0078\u0074\u0020\u006c\u0069\u006e\u0065\u0020\u0063\u006f\u006e\u0074a\u0069\u006e\u0073 \u006d\u006f\u0072\u0065\u0020\u0074\u0068\u0061\u006e\u0020\u006f\u006e\u0065 \u006d\u0063\u0069\u0064 \u006e\u0075\u006d\u0062e\u0072\u002e\u0020\u0049\u0074\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020\u0062\u0065\u0020\u0073p\u006c\u0069\u0074\u002e" ) ;
continue ; } ; _gabc := _cbfd . _aebc [ 0 ] . _bgeaa [ 0 ] . _fefe ; _fdbg [ _gabc ] = append ( _fdbg [ _gabc ] , _cbfd ) ; } ; } ; } ; } ; return _fdbg ; } ; func ( _fgcad * shapesState ) establishSubpath ( ) * subpath { _cdgg , _egbf := _fgcad . lastpointEstablished ( ) ; if ! _egbf { _fgcad . _abgb = append ( _fgcad . _abgb , _fae ( _cdgg ) ) ;
} ; if len ( _fgcad . _abgb ) == 0 { return nil ; } ; _fgcad . _edee = false ; return _fgcad . _abgb [ len ( _fgcad . _abgb ) - 1 ] ; } ; func ( _cafe lineRuling ) xMean ( ) float64 { return 0.5 * ( _cafe . _ddgfc . X + _cafe . _eacg . X ) } ; func ( _egef rulingList ) mergePrimary ( ) float64 { _aebae := _egef [ 0 ] . _abbgc ;
for _ , _eagab := range _egef [ 1 : ] { _aebae += _eagab . _abbgc ; } ; return _aebae / float64 ( len ( _egef ) ) ; } ; type wordBag struct { _ce . PdfRectangle ; _egfa float64 ; _eeca , _gbef rulingList ; _cfg float64 ; _faba map [ int ] [ ] * textWord ; } ; func ( _faae * shapesState ) devicePoint ( _ceba , _ecgc float64 ) _bc . Point { _cac := _faae . _dag . Mult ( _faae . _aabgc ) ;
_ceba , _ecgc = _cac . Transform ( _ceba , _ecgc ) ; return _bc . NewPoint ( _ceba , _ecgc ) ; } ; func ( _fegd intSet ) has ( _gdcff int ) bool { _ , _bedff := _fegd [ _gdcff ] ; return _bedff } ; func ( _bbbc rulingList ) augmentGrid ( ) ( rulingList , rulingList ) { _fecfe , _efccb := _bbbc . vertsHorzs ( ) ;
if len ( _fecfe ) == 0 || len ( _efccb ) == 0 { return _fecfe , _efccb ; } ; _bgea , _gdddc := _fecfe , _efccb ; _dbfe := _fecfe . bbox ( ) ; _abbf := _efccb . bbox ( ) ; if _gfgc { _ec . Log . Info ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066" , _dbfe ) ;
_ec . Log . Info ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066" , _abbf ) ; } ; var _bdef , _cgcg , _ggafbf , _fffb * ruling ; if _abbf . Llx < _dbfe . Llx - _ebbf { _bdef = & ruling { _bgaa : _cgacb , _bfbc : _ebdaf , _abbgc : _abbf . Llx , _cebe : _dbfe . Lly , _deee : _dbfe . Ury } ;
_fecfe = append ( rulingList { _bdef } , _fecfe ... ) ; } ; if _abbf . Urx > _dbfe . Urx + _ebbf { _cgcg = & ruling { _bgaa : _cgacb , _bfbc : _ebdaf , _abbgc : _abbf . Urx , _cebe : _dbfe . Lly , _deee : _dbfe . Ury } ; _fecfe = append ( _fecfe , _cgcg ) ; } ; if _dbfe . Lly < _abbf . Lly - _ebbf { _ggafbf = & ruling { _bgaa : _cgacb , _bfbc : _ecac , _abbgc : _dbfe . Lly , _cebe : _abbf . Llx , _deee : _abbf . Urx } ;
_efccb = append ( rulingList { _ggafbf } , _efccb ... ) ; } ; if _dbfe . Ury > _abbf . Ury + _ebbf { _fffb = & ruling { _bgaa : _cgacb , _bfbc : _ecac , _abbgc : _dbfe . Ury , _cebe : _abbf . Llx , _deee : _abbf . Urx } ; _efccb = append ( _efccb , _fffb ) ; } ; if len ( _fecfe ) + len ( _efccb ) == len ( _bbbc ) { return _bgea , _gdddc ;
} ; _gfdag := append ( _fecfe , _efccb ... ) ; _bbbc . log ( "u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064" ) ; _gfdag . log ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d" ) ; return _fecfe , _efccb ; } ; const ( _fdac = 1.0e-6 ; _dgeec = 1.0e-4 ; _fafa = 10 ; _edbf = 6 ;
_cdca = 0.5 ; _edd = 0.12 ; _dfac = 0.19 ; _bagb = 0.04 ; _fffgc = 0.04 ; _cfebd = 1.0 ; _bdgd = 0.04 ; _ccab = 0.4 ; _gfff = 0.7 ; _gceeb = 1.0 ; _baae = 0.1 ; _ebfa = 1.4 ; _gecdb = 0.46 ; _ddag = 0.02 ; _bada = 0.2 ; _geb = 0.5 ; _ggafb = 4 ; _bafc = 4.0 ; _ddd = 6 ; _fbbd = 0.3 ; _gccfg = 0.01 ; _gcga = 0.02 ; _cbbb = 2 ;
_acgd = 2 ; _fcae = 500 ; _cec = 4.0 ; _aace = 4.0 ; _cffa = 0.05 ; _edeeb = 0.1 ; _ebbf = 2.0 ; _gcef = 2.0 ; _fdgb = 1.5 ; _ffee = 3.0 ; _cfea = 0.25 ; ) ; func ( _bgcg * shapesState ) newSubPath ( ) { _bgcg . clearPath ( ) ; if _cbag { _ec . Log . Info ( "\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073" , _bgcg ) ;
} ; } ; func _adcg ( _adfee , _fcbg * textPara ) bool { return _fcd ( _adfee . _ebcf , _fcbg . _ebcf ) } ; func ( _fcfa gridTile ) complete ( ) bool { return _fcfa . numBorders ( ) == 4 } ; type structElement struct { _bacdb string ; _cagb [ ] structElement ; _efcc int64 ; _geaa _gb . PdfObject ;
} ; var ( _gdf = _d . New ( "\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072" ) ; _fe = _d . New ( "\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072" ) ; ) ; func _gbbf ( _deafb , _ccdd , _cgddb float64 ) rulingKind { if _deafb >= _cgddb && _beeee ( _ccdd , _deafb ) { return _ecac ;
} ; if _ccdd >= _cgddb && _beeee ( _deafb , _ccdd ) { return _ebdaf ; } ; return _gdcf ; } ; func ( _bae * shapesState ) stroke ( _gceed * [ ] pathSection ) { _decb := pathSection { _fbfe : _bae . _abgb , Color : _bae . _ecfc . getStrokeColor ( ) } ; * _gceed = append ( * _gceed , _decb ) ; if _gfgc { _ge . Printf ( "\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , len ( * _gceed ) , _bae , _bae . _ecfc . getStrokeColor ( ) , _decb . bbox ( ) ) ;
if _deaga { for _bca , _edad := range _bae . _abgb { _ge . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _bca , _edad ) ; if _bca == 10 { break ; } ; } ; } ; } ; } ;
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
type ImageExtractOptions struct { IncludeInlineStencilMasks bool ; } ; func _fgabc ( _cbec float64 , _cbfe int ) int { if _cbfe == 0 { _cbfe = 1 ; } ; _afcc := float64 ( _cbfe ) ; return int ( _aa . Round ( _cbec / _afcc ) * _afcc ) ; } ;
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// Extractor stores and offers functionality for extracting content from PDF pages.
type Extractor struct { _bcf string ; _ab * _ce . PdfPageResources ; _gda _ce . PdfRectangle ; _ed * _ce . PdfRectangle ; _cd map [ string ] fontEntry ; _fa map [ string ] textResult ; _bg int64 ; _eb int ; _fgb * Options ; _ba * _gb . PdfObject ; _eg _gb . PdfObject ; } ;
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct { _dec [ ] TextMark } ; func _fafg ( _gagb , _aafg _ce . PdfRectangle ) bool { return _gagb . Llx <= _aafg . Llx && _aafg . Urx <= _gagb . Urx && _gagb . Lly <= _aafg . Lly && _aafg . Ury <= _gagb . Ury ; } ; func ( _cdddg * textTable ) emptyCompositeColumn ( _bdgc int ) bool { for _bddfb := 0 ;
_bddfb < _cdddg . _dcfg ; _bddfb ++ { if _gfed , _bdda := _cdddg . _egfe [ _aaca ( _bdgc , _bddfb ) ] ; _bdda { if len ( _gfed . paraList ) > 0 { return false ; } ; } ; } ; return true ; } ; func _bdcdcf ( _afbef map [ float64 ] map [ float64 ] gridTile ) [ ] float64 { _edced := make ( [ ] float64 , 0 , len ( _afbef ) ) ;
for _eddc := range _afbef { _edced = append ( _edced , _eddc ) ; } ; _c . Float64s ( _edced ) ; _ebbfa := len ( _edced ) ; for _afad := 0 ; _afad < _ebbfa / 2 ; _afad ++ { _edced [ _afad ] , _edced [ _ebbfa - 1 - _afad ] = _edced [ _ebbfa - 1 - _afad ] , _edced [ _afad ] ; } ; return _edced ; } ; func ( _baggf * textTable ) compositeColCorridors ( ) map [ int ] [ ] float64 { _fabf := make ( map [ int ] [ ] float64 , _baggf . _ecbf ) ;
if _afcg { _ec . Log . Info ( "\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020" , _baggf . _ecbf ) ; } ; for _bccdf := 0 ; _bccdf < _baggf . _ecbf ; _bccdf ++ { _fabf [ _bccdf ] = nil ;
} ; return _fabf ; } ; func ( _geaaf paraList ) findGridTables ( _bdbde [ ] gridTiling ) [ ] * textTable { if _afcg { _ec . Log . Info ( "\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073" , len ( _geaaf ) ) ;
for _fccfg , _cbafd := range _geaaf { _ge . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _fccfg , _cbafd ) ; } ; } ; var _egca [ ] * textTable ; for _gefad , _badag := range _bdbde { _gcfee , _afdba := _geaaf . findTableGrid ( _badag ) ; if _gcfee != nil { _gcfee . log ( _ge . Sprintf ( "\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064" , _gefad ) ) ;
_egca = append ( _egca , _gcfee ) ; _gcfee . markCells ( ) ; } ; for _cdfcd := range _afdba { _cdfcd . _bfcd = true ; } ; } ; if _afcg { _ec . Log . Info ( "\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s" , len ( _egca ) ) ;
} ; return _egca ; } ;
2023-09-07 17:40:17 +00:00
2023-12-17 13:54:01 +00:00
// Elements returns the TextMarks in `ma`.
func ( _ecb * TextMarkArray ) Elements ( ) [ ] TextMark { return _ecb . _dec } ; func ( _gecdc lineRuling ) yMean ( ) float64 { return 0.5 * ( _gecdc . _ddgfc . Y + _gecdc . _eacg . Y ) } ; func ( _bggbe rulingList ) snapToGroups ( ) rulingList { _gecce , _ffgda := _bggbe . vertsHorzs ( ) ;
if len ( _gecce ) > 0 { _gecce = _gecce . snapToGroupsDirection ( ) ; } ; if len ( _ffgda ) > 0 { _ffgda = _ffgda . snapToGroupsDirection ( ) ; } ; _gfeb := append ( _gecce , _ffgda ... ) ; _gfeb . log ( "\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073" ) ; return _gfeb ;
2023-09-07 17:40:17 +00:00
} ;
2023-12-17 13:54:01 +00:00
// String returns a description of `v`.
func ( _faga * ruling ) String ( ) string { if _faga . _bfbc == _gdcf { return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047" ; } ; _cegd , _bgdd := "\u0078" , "\u0079" ; if _faga . _bfbc == _ecac { _cegd , _bgdd = "\u0079" , "\u0078" ; } ; _dcga := "" ; if _faga . _bcgdf != 0.0 { _dcga = _ge . Sprintf ( " \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066" , _faga . _bcgdf ) ;
} ; return _ge . Sprintf ( "\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073" , _faga . _bfbc , _cegd , _faga . _abbgc , _bgdd , _faga . _cebe , _faga . _deee , _faga . _deee - _faga . _cebe , _faga . _bgaa , _faga . Color , _dcga ) ;
} ; type textPara struct { _ce . PdfRectangle ; _ebcf _ce . PdfRectangle ; _bdbcg [ ] * textLine ; _bddea * textTable ; _bfcd bool ; _bedf bool ; _egab * textPara ; _gaca * textPara ; _abdda * textPara ; _fdgbd * textPara ; _efafd [ ] list ; } ; const ( RenderModeStroke RenderMode = 1 << iota ;
RenderModeFill ; RenderModeClip ; ) ; func ( _aee * imageExtractContext ) extractInlineImage ( _cgae * _ag . ContentStreamInlineImage , _gfe _ag . GraphicsState , _fcg * _ce . PdfPageResources ) error { _fcc , _aef := _cgae . ToImage ( _fcg ) ; if _aef != nil { return _aef ; } ;
_ee , _aef := _cgae . GetColorSpace ( _fcg ) ; if _aef != nil { return _aef ; } ; if _ee == nil { _ee = _ce . NewPdfColorspaceDeviceGray ( ) ; } ; _cge , _aef := _ee . ImageToRGB ( * _fcc ) ; if _aef != nil { return _aef ; } ; _dad := ImageMark { Image : & _cge , Width : _gfe . CTM . ScalingFactorX ( ) , Height : _gfe . CTM . ScalingFactorY ( ) , Angle : _gfe . CTM . Angle ( ) } ;
_dad . X , _dad . Y = _gfe . CTM . Translation ( ) ; _aee . _gbf = append ( _aee . _gbf , _dad ) ; _aee . _da ++ ; return nil ; } ; func _bfbfd ( _ggeb map [ int ] [ ] float64 ) string { _ffdda := _fadg ( _ggeb ) ; _gbfbe := make ( [ ] string , len ( _ggeb ) ) ; for _edcc , _fgffd := range _ffdda { _gbfbe [ _edcc ] = _ge . Sprintf ( "\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066" , _fgffd , _ggeb [ _fgffd ] ) ;
} ; return _ge . Sprintf ( "\u007b\u0025\u0073\u007d" , _gd . Join ( _gbfbe , "\u002c\u0020" ) ) ; } ; func _dgdc ( _fcgfe _ce . PdfRectangle , _bcfb bounded ) float64 { return _fcgfe . Ury - _bcfb . bbox ( ) . Lly } ; func ( _adbb * textPara ) toTextMarks ( _bbeb * int ) [ ] TextMark { if _adbb . _bddea == nil { return _adbb . toCellTextMarks ( _bbeb ) ;
} ; var _ffdec [ ] TextMark ; for _efceb := 0 ; _efceb < _adbb . _bddea . _dcfg ; _efceb ++ { for _aefa := 0 ; _aefa < _adbb . _bddea . _ecbf ; _aefa ++ { _gffc := _adbb . _bddea . get ( _aefa , _efceb ) ; if _gffc == nil { _ffdec = _cacf ( _ffdec , _bbeb , "\u0009" ) ; } else { _dcfe := _gffc . toCellTextMarks ( _bbeb ) ;
_ffdec = append ( _ffdec , _dcfe ... ) ; } ; _ffdec = _cacf ( _ffdec , _bbeb , "\u0020" ) ; } ; if _efceb < _adbb . _bddea . _dcfg - 1 { _ffdec = _cacf ( _ffdec , _bbeb , "\u000a" ) ; } ; } ; _bdcdc := _adbb . _bddea ; if _bdcdc . isExportable ( ) { _fcage := _bdcdc . toTextTable ( ) ; _ffdec = _ddedf ( _ffdec , & _fcage ) ;
} ; return _ffdec ; } ; func ( _ebag * wordBag ) depthBand ( _bfef , _efga float64 ) [ ] int { if len ( _ebag . _faba ) == 0 { return nil ; } ; return _ebag . depthRange ( _ebag . getDepthIdx ( _bfef ) , _ebag . getDepthIdx ( _efga ) ) ; } ;
2023-08-03 17:30:04 +00:00
2023-12-17 13:54:01 +00:00
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
func ( _dea * Extractor ) ExtractPageImages ( options * ImageExtractOptions ) ( * PageImages , error ) { _ceg := & imageExtractContext { _cef : options } ; _aac := _ceg . extractContentStreamImages ( _dea . _bcf , _dea . _ab ) ; if _aac != nil { return nil , _aac ; } ; return & PageImages { Images : _ceg . _gbf } , nil ;
} ;
2023-08-03 17:30:04 +00:00
2023-12-17 13:54:01 +00:00
// String returns a description of `k`.
func ( _cgdbg rulingKind ) String ( ) string { _gafc , _efeb := _baff [ _cgdbg ] ; if ! _efeb { return _ge . Sprintf ( "\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064" , _cgdbg ) ; } ; return _gafc ; } ; func ( _cfad paraList ) tables ( ) [ ] TextTable { var _bfeaee [ ] TextTable ;
if _afcg { _ec . Log . Info ( "\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a" ) ; } ; for _ , _acde := range _cfad { _gfda := _acde . _bddea ; if _gfda != nil && _gfda . isExportable ( ) { _bfeaee = append ( _bfeaee , _gfda . toTextTable ( ) ) ; } ;
} ; return _bfeaee ; } ; func ( _gad * textObject ) moveText ( _dfcf , _dfcfa float64 ) { _gad . moveLP ( _dfcf , _dfcfa ) } ; type rulingKind int ; var _baff = map [ rulingKind ] string { _gdcf : "\u006e\u006f\u006e\u0065" , _ecac : "\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c" , _ebdaf : "\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c" } ;
func ( _ggfg rulingList ) toGrids ( ) [ ] rulingList { if _gfgc { _ec . Log . Info ( "t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073" , _ggfg ) ; } ; _gfagg := _ggfg . intersections ( ) ; if _gfgc { _ec . Log . Info ( "\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020" , len ( _ggfg ) , len ( _gfagg ) ) ;
for _ , _afdg := range _afaae ( _gfagg ) { _ge . Printf ( "\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n" , _afdg , _gfagg [ _afdg ] ) ; } ; } ; _gada := make ( map [ int ] intSet , len ( _ggfg ) ) ; for _geeae := range _ggfg { _bcdfea := _ggfg . connections ( _gfagg , _geeae ) ; if len ( _bcdfea ) > 0 { _gada [ _geeae ] = _bcdfea ;
} ; } ; if _gfgc { _ec . Log . Info ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064" , len ( _gada ) ) ; for _ , _gceef := range _afaae ( _gada ) { _ge . Printf ( "\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n" , _gceef , _gada [ _gceef ] ) ;
} ; } ; _faed := _debaa ( len ( _ggfg ) , func ( _bffcf , _gagd int ) bool { _dcbfc , _eccd := len ( _gada [ _bffcf ] ) , len ( _gada [ _gagd ] ) ; if _dcbfc != _eccd { return _dcbfc > _eccd ; } ; return _ggfg . comp ( _bffcf , _gagd ) ; } ) ; if _gfgc { _ec . Log . Info ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076" , _faed ) ;
} ; _ffcccf := [ ] [ ] int { { _faed [ 0 ] } } ; _afabg : for _ , _gecc := range _faed [ 1 : ] { for _edbg , _baaf := range _ffcccf { for _ , _cfgf := range _baaf { if _gada [ _cfgf ] . has ( _gecc ) { _ffcccf [ _edbg ] = append ( _baaf , _gecc ) ; continue _afabg ; } ; } ; } ; _ffcccf = append ( _ffcccf , [ ] int { _gecc } ) ;
} ; if _gfgc { _ec . Log . Info ( "\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076" , _ffcccf ) ; } ; _c . SliceStable ( _ffcccf , func ( _fdgd , _edded int ) bool { return len ( _ffcccf [ _fdgd ] ) > len ( _ffcccf [ _edded ] ) } ) ; for _ , _bgab := range _ffcccf { _c . Slice ( _bgab , func ( _bacbg , _abcf int ) bool { return _ggfg . comp ( _bgab [ _bacbg ] , _bgab [ _abcf ] ) } ) ;
} ; _bade := make ( [ ] rulingList , len ( _ffcccf ) ) ; for _gagef , _decba := range _ffcccf { _bdfb := make ( rulingList , len ( _decba ) ) ; for _cfcc , _fcdb := range _decba { _bdfb [ _cfcc ] = _ggfg [ _fcdb ] ; } ; _bade [ _gagef ] = _bdfb ; } ; if _gfgc { _ec . Log . Info ( "\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076" , _bade ) ;
} ; var _aeafa [ ] rulingList ; for _ , _acdb := range _bade { if _beee , _edegf := _acdb . isActualGrid ( ) ; _edegf { _acdb = _beee ; _acdb = _acdb . snapToGroups ( ) ; _aeafa = append ( _aeafa , _acdb ) ; } ; } ; if _gfgc { _fcegc ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073" , _aeafa ) ;
_ec . Log . Info ( "\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064" , len ( _bade ) , len ( _aeafa ) ) ; } ; return _aeafa ; } ; func _beeee ( _fdgg , _gbcbf float64 ) bool { return _fdgg / _aa . Max ( _edeeb , _gbcbf ) < _cffa } ;
func _ebebc ( _egad * textLine , _bdbbb [ ] * textLine , _agaf [ ] float64 ) float64 { var _gdba float64 = - 1 ; for _ , _eefbc := range _bdbbb { if _eefbc . _bcdg > _egad . _bcdg { if _aa . Round ( _eefbc . Llx ) >= _aa . Round ( _egad . Llx ) { _gdba = _eefbc . _bcdg ; } else { break ;
} ; } ; } ; return _gdba ; } ; func _egee ( _dcd * wordBag , _edde int ) * textLine { _cbbaa := _dcd . firstWord ( _edde ) ; _cdfgf := textLine { PdfRectangle : _cbbaa . PdfRectangle , _ecag : _cbbaa . _ddgee , _bcdg : _cbbaa . _cffdg } ; _cdfgf . pullWord ( _dcd , _cbbaa , _edde ) ; return & _cdfgf ;
} ; func ( _bcdf paraList ) topoOrder ( ) [ ] int { if _eaba { _ec . Log . Info ( "\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a" ) ; } ; _aacba := len ( _bcdf ) ; _ebedb := make ( [ ] bool , _aacba ) ; _bfgcd := make ( [ ] int , 0 , _aacba ) ; _ceefe := _bcdf . llyOrdering ( ) ;
var _ffbb func ( _acge int ) ; _ffbb = func ( _cddd int ) { _ebedb [ _cddd ] = true ; for _ggge := 0 ; _ggge < _aacba ; _ggge ++ { if ! _ebedb [ _ggge ] { if _bcdf . readBefore ( _ceefe , _cddd , _ggge ) { _ffbb ( _ggge ) ; } ; } ; } ; _bfgcd = append ( _bfgcd , _cddd ) ; } ; for _eabd := 0 ; _eabd < _aacba ;
_eabd ++ { if ! _ebedb [ _eabd ] { _ffbb ( _eabd ) ; } ; } ; return _ccba ( _bfgcd ) ; } ; func ( _gage * textLine ) endsInHyphen ( ) bool { _gddbf := _gage . _aebc [ len ( _gage . _aebc ) - 1 ] ; _dbdc := _gddbf . _bbdb ; _afd , _beec := _g . DecodeLastRuneInString ( _dbdc ) ; if _beec <= 0 || ! _de . Is ( _de . Hyphen , _afd ) { return false ;
} ; if _gddbf . _gfffc && _fccf ( _dbdc ) { return true ; } ; return _fccf ( _gage . text ( ) ) ; } ; func _fae ( _bbdd _bc . Point ) * subpath { return & subpath { _gfefe : [ ] _bc . Point { _bbdd } } } ; type intSet map [ int ] struct { } ; func _acdeg ( _bggb , _caeg _bc . Point ) bool { _gceaf := _aa . Abs ( _bggb . X - _caeg . X ) ;
_agdab := _aa . Abs ( _bggb . Y - _caeg . Y ) ; return _beeee ( _agdab , _gceaf ) ; } ; func _beecb ( _aggd * list ) [ ] * list { var _cgea [ ] * list ; for _ , _fadc := range _aggd . _gbab { switch _fadc . _ffcg { case "\u004c\u0049" : _daga := _fdecc ( _fadc ) ; _dfae := _beecb ( _fadc ) ;
_eddg := _baba ( _daga , "\u0062\u0075\u006c\u006c\u0065\u0074" , _dfae ) ; _ffge := _efaf ( _daga , "" ) ; _eddg . _dage = _ffge ; _cgea = append ( _cgea , _eddg ) ; case "\u004c\u0042\u006fd\u0079" : return _beecb ( _fadc ) ; case "\u004c" : _ecda := _beecb ( _fadc ) ; _cgea = append ( _cgea , _ecda ... ) ;
return _cgea ; } ; } ; return _cgea ; } ; func _dcbf ( _ddfg * textLine , _gfee [ ] * textLine , _agde [ ] float64 , _dagdd , _cfcb float64 ) [ ] * textLine { _fcda := [ ] * textLine { } ; for _ , _ffde := range _gfee { if _ffde . _bcdg >= _dagdd { if _cfcb != - 1 && _ffde . _bcdg < _cfcb { if _ffde . text ( ) != _ddfg . text ( ) { if _aa . Round ( _ffde . Llx ) < _aa . Round ( _ddfg . Llx ) { break ;
} ; _fcda = append ( _fcda , _ffde ) ; } ; } else if _cfcb == - 1 { if _ffde . _bcdg == _ddfg . _bcdg { if _ffde . text ( ) != _ddfg . text ( ) { _fcda = append ( _fcda , _ffde ) ; } ; continue ; } ; _gddg := _ebebc ( _ddfg , _gfee , _agde ) ; if _gddg != - 1 && _ffde . _bcdg <= _gddg { _fcda = append ( _fcda , _ffde ) ;
} ; } ; } ; } ; return _fcda ; } ; func ( _gaec * stateStack ) empty ( ) bool { return len ( * _gaec ) == 0 } ; func _agce ( _gbfb , _cgeea bounded ) float64 { return _deage ( _gbfb ) - _deage ( _cgeea ) } ; type textResult struct { _ceee PageText ; _agcc int ; _dge int ; } ;
2023-05-29 17:26:33 +00:00
2023-12-17 13:54:01 +00:00
// TableInfo gets table information of the textmark `tm`.
func ( _bafd * TextMark ) TableInfo ( ) ( * TextTable , [ ] [ ] int ) { if ! _bafd . _ebd { return nil , nil ; } ; _cfbd := _bafd . _afe ; _bafa := _cfbd . getCellInfo ( * _bafd ) ; return _cfbd , _bafa ; } ; func _daeg ( _dcabd int , _aeef map [ int ] [ ] float64 ) ( [ ] int , int ) { _dcgce := make ( [ ] int , _dcabd ) ;
_fdcg := 0 ; for _cdcdd := 0 ; _cdcdd < _dcabd ; _cdcdd ++ { _dcgce [ _cdcdd ] = _fdcg ; _fdcg += len ( _aeef [ _cdcdd ] ) + 1 ; } ; return _dcgce , _fdcg ; } ; var _cfgd string = "\u0028\u003f\u0069\u0029\u005e\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028?\u003a\u0044\u007cM\u0029\u007c\u0044\u003f\u0043{\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028\u003f\u003a\u004c\u007c\u0043\u0029\u007cL\u003f\u0058\u007b\u0030\u002c\u0033}\u0029\u0028\u0049\u0028\u003f\u003a\u0056\u007c\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u005c\u0029\u007c\u005c\u002e\u0029\u007c\u005e\u005c\u0028\u0028\u004d\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0043\u0028\u003f\u003aD\u007cM\u0029\u007c\u0044\u003f\u0043\u007b\u0030\u002c\u0033\u007d\u0029\u0028\u0058\u0028?\u003a\u004c\u007c\u0043\u0029\u007c\u004c?\u0058\u007b0\u002c\u0033\u007d\u0029(\u0049\u0028\u003f\u003a\u0056|\u0058\u0029\u007c\u0056\u003f\u0049\u007b\u0030\u002c\u0033\u007d\u0029\u005c\u0029" ;
2023-02-07 17:17:49 +00:00
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// Font represents the font properties on a PDF page.
type Font struct { PdfFont * _ce . PdfFont ;
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// FontName represents Font Name from font properties.
FontName string ;
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// FontType represents Font Subtype entry in the font dictionary inside page resources.
// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont.
FontType string ;
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// ToUnicode is true if font provides a `ToUnicode` mapping.
ToUnicode bool ;
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// IsCID is true if underlying font is a composite font.
// Composite font is represented by a font dictionary whose Subtype is `Type0`
IsCID bool ;
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// IsSimple is true if font is simple font.
// A simple font is limited to only 8 bit (255) character codes.
IsSimple bool ;
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// FontData represents the raw data of the embedded font file.
// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF).
// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor.
// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value.
FontData [ ] byte ;
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// FontFileName is a name representing the font. it has format:
// (Font Name) + (Font Type Extension), example: helvetica.ttf.
FontFileName string ;
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor).
FontDescriptor * _ce . PdfFontDescriptor ; } ; func ( _fdeef * textPara ) writeCellText ( _cbfec _a . Writer ) { for _abff , _defb := range _fdeef . _bdbcg { _efgg := _defb . text ( ) ; _gaadf := _dcgd && _defb . endsInHyphen ( ) && _abff != len ( _fdeef . _bdbcg ) - 1 ; if _gaadf { _efgg = _edcg ( _efgg ) ;
} ; _cbfec . Write ( [ ] byte ( _efgg ) ) ; if ! ( _gaadf || _abff == len ( _fdeef . _bdbcg ) - 1 ) { _cbfec . Write ( [ ] byte ( _dcgbg ( _defb . _bcdg , _fdeef . _bdbcg [ _abff + 1 ] . _bcdg ) ) ) ; } ; } ; } ; func ( _gefgf * textTable ) toTextTable ( ) TextTable { if _afcg { _ec . Log . Info ( "t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064" , _gefgf . _ecbf , _gefgf . _dcfg ) ;
} ; _bgga := make ( [ ] [ ] TableCell , _gefgf . _dcfg ) ; for _cdbe := 0 ; _cdbe < _gefgf . _dcfg ; _cdbe ++ { _bgga [ _cdbe ] = make ( [ ] TableCell , _gefgf . _ecbf ) ; for _gcbe := 0 ; _gcbe < _gefgf . _ecbf ; _gcbe ++ { _egdc := _gefgf . get ( _gcbe , _cdbe ) ; if _egdc == nil { continue ; } ;
if _afcg { _ge . Printf ( "\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a" , _gcbe , _cdbe , _egdc ) ; } ; _bgga [ _cdbe ] [ _gcbe ] . Text = _egdc . text ( ) ; _adbcf := 0 ; _bgga [ _cdbe ] [ _gcbe ] . Marks . _dec = _egdc . toTextMarks ( & _adbcf ) ; } ; } ; _gdaaf := TextTable { W : _gefgf . _ecbf , H : _gefgf . _dcfg , Cells : _bgga } ;
_gdaaf . PdfRectangle = _gefgf . bbox ( ) ; return _gdaaf ; } ; var _fdgf = map [ markKind ] string { _gbffb : "\u0073\u0074\u0072\u006f\u006b\u0065" , _eecbc : "\u0066\u0069\u006c\u006c" , _cgacb : "\u0061u\u0067\u006d\u0065\u006e\u0074" } ; func ( _bfc * imageExtractContext ) extractContentStreamImages ( _ae string , _fgf * _ce . PdfPageResources ) error { _dfca := _ag . NewContentStreamParser ( _ae ) ;
_cea , _ga := _dfca . Parse ( ) ; if _ga != nil { return _ga ; } ; if _bfc . _aabe == nil { _bfc . _aabe = map [ * _gb . PdfObjectStream ] * cachedImage { } ; } ; if _bfc . _cef == nil { _bfc . _cef = & ImageExtractOptions { } ; } ; _dbe := _ag . NewContentStreamProcessor ( * _cea ) ; _dbe . AddHandler ( _ag . HandlerConditionEnumAllOperands , "" , _bfc . processOperand ) ;
return _dbe . Process ( _fgf ) ; } ; func _cbbae ( _ffda [ ] * textLine , _cagca map [ float64 ] [ ] * textLine ) [ ] * list { _acf := _bcfe ( _cagca ) ; _ccgd := [ ] * list { } ; if len ( _acf ) == 0 { return _ccgd ; } ; _facd := _acf [ 0 ] ; _eebg := 1 ; _eeda := _cagca [ _facd ] ; for _gaad , _dbgd := range _eeda { var _bdcd float64 ;
_bafaa := [ ] * list { } ; _caacf := _dbgd . _bcdg ; _gcdd := - 1.0 ; if _gaad < len ( _eeda ) - 1 { _gcdd = _eeda [ _gaad + 1 ] . _bcdg ; } ; if _eebg < len ( _acf ) { _bafaa = _deca ( _ffda , _cagca , _acf , _eebg , _caacf , _gcdd ) ; } ; _bdcd = _gcdd ; if len ( _bafaa ) > 0 { _eaff := _bafaa [ 0 ] ;
if len ( _eaff . _ggdb ) > 0 { _bdcd = _eaff . _ggdb [ 0 ] . _bcdg ; } ; } ; _cbbe := [ ] * textLine { _dbgd } ; _cadg := _dcbf ( _dbgd , _ffda , _acf , _caacf , _bdcd ) ; _cbbe = append ( _cbbe , _cadg ... ) ; _aeba := _baba ( _cbbe , "\u0062\u0075\u006c\u006c\u0065\u0074" , _bafaa ) ; _aeba . _dage = _efaf ( _cbbe , "" ) ;
_ccgd = append ( _ccgd , _aeba ) ; } ; return _ccgd ; } ; func ( _geff * ruling ) intersects ( _dbfa * ruling ) bool { _acbf := ( _geff . _bfbc == _ebdaf && _dbfa . _bfbc == _ecac ) || ( _dbfa . _bfbc == _ebdaf && _geff . _bfbc == _ecac ) ; _dbce := func ( _fdgab , _aceg * ruling ) bool { return _fdgab . _cebe - _ebbf <= _aceg . _abbgc && _aceg . _abbgc <= _fdgab . _deee + _ebbf ;
} ; _cfge := _dbce ( _geff , _dbfa ) ; _ggdd := _dbce ( _dbfa , _geff ) ; if _gfgc { _ge . Printf ( "\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a" + "\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a" + " \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a" , _acbf , _cfge , _ggdd , _acbf && _cfge && _ggdd , _geff , _dbfa ) ;
} ; return _acbf && _cfge && _ggdd ; } ; func ( _bcfa * textObject ) setTextRise ( _dfga float64 ) { if _bcfa == nil { return ; } ; _bcfa . _befa . _gegg = _dfga ; } ; func ( _baa * textObject ) setFont ( _dedfd string , _gcbc float64 ) error { if _baa == nil { return nil ; } ; _baa . _befa . _ccf = _gcbc ;
_dffb , _ggea := _baa . getFont ( _dedfd ) ; if _ggea != nil { return _ggea ; } ; _baa . _befa . _badfc = _dffb ; return nil ; } ; func ( _bdec rulingList ) intersections ( ) map [ int ] intSet { var _eebc , _bcab [ ] int ; for _fabga , _aebab := range _bdec { switch _aebab . _bfbc { case _ebdaf : _eebc = append ( _eebc , _fabga ) ;
case _ecac : _bcab = append ( _bcab , _fabga ) ; } ; } ; if len ( _eebc ) < _cbbb + 1 || len ( _bcab ) < _acgd + 1 { return nil ; } ; if len ( _eebc ) + len ( _bcab ) > _fcae { _ec . Log . Debug ( "\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064" , len ( _bdec ) , len ( _eebc ) , len ( _bcab ) ) ;
return nil ; } ; _geeb := make ( map [ int ] intSet , len ( _eebc ) + len ( _bcab ) ) ; for _ , _dbgg := range _eebc { for _ , _agdd := range _bcab { if _bdec [ _dbgg ] . intersects ( _bdec [ _agdd ] ) { if _ , _baac := _geeb [ _dbgg ] ; ! _baac { _geeb [ _dbgg ] = make ( intSet ) ; } ; if _ , _fage := _geeb [ _agdd ] ;
! _fage { _geeb [ _agdd ] = make ( intSet ) ; } ; _geeb [ _dbgg ] . add ( _agdd ) ; _geeb [ _agdd ] . add ( _dbgg ) ; } ; } ; } ; return _geeb ; } ; func ( _dbff rulingList ) aligned ( ) bool { if len ( _dbff ) < 2 { return false ; } ; _edce := make ( map [ * ruling ] int ) ; _edce [ _dbff [ 0 ] ] = 0 ; for _ , _dddgg := range _dbff [ 1 : ] { _fggg := false ;
for _bfffe := range _edce { if _dddgg . gridIntersecting ( _bfffe ) { _edce [ _bfffe ] ++ ; _fggg = true ; break ; } ; } ; if ! _fggg { _edce [ _dddgg ] = 0 ; } ; } ; _bgbf := 0 ; for _ , _aabb := range _edce { if _aabb == 0 { _bgbf ++ ; } ; } ; _gafdc := float64 ( _bgbf ) / float64 ( len ( _dbff ) ) ;
_cbacg := _gafdc <= 1.0 - _cfea ; if _gfgc { _ec . Log . Info ( "\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073" , _cbacg , _gafdc , _bgbf , len ( _dbff ) , _dbff . String ( ) ) ;
} ; return _cbacg ; } ; func ( _aafe rulingList ) connections ( _abfa map [ int ] intSet , _bacg int ) intSet { _dbee := make ( intSet ) ; _fdeca := make ( intSet ) ; var _agdb func ( int ) ; _agdb = func ( _fdaf int ) { if ! _fdeca . has ( _fdaf ) { _fdeca . add ( _fdaf ) ; for _eeabg := range _aafe { if _abfa [ _eeabg ] . has ( _fdaf ) { _dbee . add ( _eeabg ) ;
} ; } ; for _bege := range _aafe { if _dbee . has ( _bege ) { _agdb ( _bege ) ; } ; } ; } ; } ; _agdb ( _bacg ) ; return _dbee ; } ; func _fabgb ( _cfbb , _dcac bounded ) float64 { _gfeg := _fabg ( _cfbb , _dcac ) ; if ! _ffegg ( _gfeg ) { return _gfeg ; } ; return _agce ( _cfbb , _dcac ) ; } ; func ( _dccd * shapesState ) lineTo ( _cbc , _faac float64 ) { if _cbag { _ec . Log . Info ( "\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066" , _cbc , _faac , _dccd . devicePoint ( _cbc , _faac ) ) ;
} ; _dccd . addPoint ( _cbc , _faac ) ; } ; func ( _dgdg * subpath ) close ( ) { if ! _dcbd ( _dgdg . _gfefe [ 0 ] , _dgdg . last ( ) ) { _dgdg . add ( _dgdg . _gfefe [ 0 ] ) ; } ; _dgdg . _cgde = true ; _dgdg . removeDuplicates ( ) ; } ;
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// ExtractFonts returns all font information from the page extractor, including
// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more.
//
// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document.
// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries.
//
// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range.
func ( _cg * Extractor ) ExtractFonts ( previousPageFonts * PageFonts ) ( * PageFonts , error ) { _fb := PageFonts { } ; _gcg := _fb . extractPageResourcesToFont ( _cg . _ab ) ; if _gcg != nil { return nil , _gcg ; } ; if previousPageFonts != nil { for _ , _cbb := range previousPageFonts . Fonts { if ! _efg ( _fb . Fonts , _cbb . FontName ) { _fb . Fonts = append ( _fb . Fonts , _cbb ) ;
} ; } ; } ; return & PageFonts { Fonts : _fb . Fonts } , nil ; } ; func ( _ggag * shapesState ) clearPath ( ) { _ggag . _abgb = nil ; _ggag . _edee = false ; if _cbag { _ec . Log . Info ( "\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073" , _ggag ) ; } ; } ; func _bdbf ( _cgca [ ] * textLine , _bbgd , _ebab float64 ) [ ] * textLine { var _cdga [ ] * textLine ;
for _ , _ddfcf := range _cgca { if _bbgd == - 1 { if _ddfcf . _bcdg > _ebab { _cdga = append ( _cdga , _ddfcf ) ; } ; } else { if _ddfcf . _bcdg > _ebab && _ddfcf . _bcdg < _bbgd { _cdga = append ( _cdga , _ddfcf ) ; } ; } ; } ; return _cdga ; } ;
2022-06-27 19:58:38 +00:00
2023-12-17 13:54:01 +00:00
// String returns a human readable description of `path`.
func ( _ccfa * subpath ) String ( ) string { _dagd := _ccfa . _gfefe ; _cged := len ( _dagd ) ; if _cged <= 5 { return _ge . Sprintf ( "\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f" , _cged , _dagd ) ; } ; return _ge . Sprintf ( "\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f" , _cged , _dagd [ 0 ] , _dagd [ 1 ] , _dagd [ _cged - 1 ] ) ;
} ; func ( _egccc * ruling ) encloses ( _cbaf , _gefeb float64 ) bool { return _egccc . _cebe - _ebbf <= _cbaf && _gefeb <= _egccc . _deee + _ebbf ; } ; func ( _eecf paraList ) findTables ( _bgfa [ ] gridTiling ) [ ] * textTable { _eecf . addNeighbours ( ) ; _c . Slice ( _eecf , func ( _adbee , _acda int ) bool { return _fabgb ( _eecf [ _adbee ] , _eecf [ _acda ] ) < 0 } ) ;
var _fedga [ ] * textTable ; if _dacb { _abffb := _eecf . findGridTables ( _bgfa ) ; _fedga = append ( _fedga , _abffb ... ) ; } ; if _eccc { _cbfdf := _eecf . findTextTables ( ) ; _fedga = append ( _fedga , _cbfdf ... ) ; } ; return _fedga ; } ; func ( _ggceb * textObject ) getCurrentFont ( ) * _ce . PdfFont { _ccea := _ggceb . _befa . _badfc ;
if _ccea == nil { _ec . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e" ) ; return _ce . DefaultFont ( ) ;
} ; return _ccea ; } ; func ( _ababb * textPara ) toCellTextMarks ( _baaeb * int ) [ ] TextMark { var _bcga [ ] TextMark ; for _fcgfag , _cgcff := range _ababb . _bdbcg { _bfff := _cgcff . toTextMarks ( _baaeb ) ; _debac := _dcgd && _cgcff . endsInHyphen ( ) && _fcgfag != len ( _ababb . _bdbcg ) - 1 ;
if _debac { _bfff = _gaagg ( _bfff , _baaeb ) ; } ; _bcga = append ( _bcga , _bfff ... ) ; if ! ( _debac || _fcgfag == len ( _ababb . _bdbcg ) - 1 ) { _bcga = _cacf ( _bcga , _baaeb , _dcgbg ( _cgcff . _bcdg , _ababb . _bdbcg [ _fcgfag + 1 ] . _bcdg ) ) ; } ; } ; return _bcga ; } ; const ( _fad = false ;
_cfgg = false ; _acab = false ; _fgba = false ; _cbag = false ; _dggee = false ; _becc = false ; _eaba = false ; _egd = false ; _eeg = _egd && true ; _aacb = _eeg && false ; _fbfc = _egd && true ; _afcg = false ; _dcedc = _afcg && false ; _fefa = _afcg && true ; _gfgc = false ; _deaga = _gfgc && false ;
_fcfe = _gfgc && false ; _bea = _gfgc && true ; _feceb = _gfgc && false ; _aecc = _gfgc && false ; ) ; func _geedc ( _aeda _ce . PdfRectangle , _baed , _deaab , _afdbf , _gdaac * ruling ) gridTile { _bfgb := _aeda . Llx ; _dfdgd := _aeda . Urx ; _defe := _aeda . Lly ; _facca := _aeda . Ury ;
return gridTile { PdfRectangle : _aeda , _ebdg : _baed != nil && _baed . encloses ( _defe , _facca ) , _ebga : _deaab != nil && _deaab . encloses ( _defe , _facca ) , _ddbaf : _afdbf != nil && _afdbf . encloses ( _bfgb , _dfdgd ) , _fgde : _gdaac != nil && _gdaac . encloses ( _bfgb , _dfdgd ) } ;
} ; func ( _feca * ruling ) gridIntersecting ( _acfc * ruling ) bool { return _gecae ( _feca . _cebe , _acfc . _cebe ) && _gecae ( _feca . _deee , _acfc . _deee ) ; } ; func ( _bagd gridTile ) numBorders ( ) int { _egecc := 0 ; if _bagd . _ebdg { _egecc ++ ; } ; if _bagd . _ebga { _egecc ++ ;
} ; if _bagd . _ddbaf { _egecc ++ ; } ; if _bagd . _fgde { _egecc ++ ; } ; return _egecc ; } ; const ( _dcgd = true ; _cafd = true ; _efbf = true ; _dbde = false ; _gaee = false ; _gafd = 6 ; _aedg = 3.0 ; _abgc = 200 ; _dacb = true ; _eccc = true ; _gfag = true ; _dbbc = true ; _gdcg = false ; ) ; func _cedg ( _gbfd * wordBag , _deagd * textWord , _dbbb float64 ) bool { return _gbfd . Urx <= _deagd . Llx && _deagd . Llx < _gbfd . Urx + _dbbb ;
} ; func ( _ccd * textObject ) setHorizScaling ( _ecd float64 ) { if _ccd == nil { return ; } ; _ccd . _befa . _gdc = _ecd ; } ; func _deage ( _bfbb bounded ) float64 { return - _bfbb . bbox ( ) . Lly } ; func _bcfe ( _fbba map [ float64 ] [ ] * textLine ) [ ] float64 { _ecde := [ ] float64 { } ;
for _aaaa := range _fbba { _ecde = append ( _ecde , _aaaa ) ; } ; _c . Float64s ( _ecde ) ; return _ecde ; } ; func ( _babgb * textObject ) newTextMark ( _fagg string , _aeea _bc . Matrix , _edae _bc . Point , _bfdfg float64 , _dbbf * _ce . PdfFont , _cccb float64 , _dbbce , _becca _fg . Color , _cffd _gb . PdfObject , _cefgg [ ] string , _fbae int , _gcfe int ) ( textMark , bool ) { _gaeec := _aeea . Angle ( ) ;
_ceefg := _fgabc ( _gaeec , _fafa ) ; var _caefe float64 ; if _ceefg % 180 != 90 { _caefe = _aeea . ScalingFactorY ( ) ; } else { _caefe = _aeea . ScalingFactorX ( ) ; } ; _gbdaa := _afb ( _aeea ) ; _fcdd := _ce . PdfRectangle { Llx : _gbdaa . X , Lly : _gbdaa . Y , Urx : _edae . X , Ury : _edae . Y } ;
switch _ceefg % 360 { case 90 : _fcdd . Urx -= _caefe ; case 180 : _fcdd . Ury -= _caefe ; case 270 : _fcdd . Urx += _caefe ; case 0 : _fcdd . Ury += _caefe ; default : _ceefg = 0 ; _fcdd . Ury += _caefe ; } ; if _fcdd . Llx > _fcdd . Urx { _fcdd . Llx , _fcdd . Urx = _fcdd . Urx , _fcdd . Llx ;
} ; if _fcdd . Lly > _fcdd . Ury { _fcdd . Lly , _fcdd . Ury = _fcdd . Ury , _fcdd . Lly ; } ; _adbec := true ; if _babgb . _bgcb . _gda . Width ( ) > 0 { _facc , _gbfe := _cad ( _fcdd , _babgb . _bgcb . _gda ) ; if ! _gbfe { _adbec = false ; _ec . Log . Debug ( "\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q" , _fcdd , _babgb . _bgcb . _gda , _fagg ) ;
} ; _fcdd = _facc ; } ; _fffda := _fcdd ; _aacf := _babgb . _bgcb . _gda ; switch _ceefg % 360 { case 90 : _aacf . Urx , _aacf . Ury = _aacf . Ury , _aacf . Urx ; _fffda = _ce . PdfRectangle { Llx : _aacf . Urx - _fcdd . Ury , Urx : _aacf . Urx - _fcdd . Lly , Lly : _fcdd . Llx , Ury : _fcdd . Urx } ;
case 180 : _fffda = _ce . PdfRectangle { Llx : _aacf . Urx - _fcdd . Llx , Urx : _aacf . Urx - _fcdd . Urx , Lly : _aacf . Ury - _fcdd . Lly , Ury : _aacf . Ury - _fcdd . Ury } ; case 270 : _aacf . Urx , _aacf . Ury = _aacf . Ury , _aacf . Urx ; _fffda = _ce . PdfRectangle { Llx : _fcdd . Ury , Urx : _fcdd . Lly , Lly : _aacf . Ury - _fcdd . Llx , Ury : _aacf . Ury - _fcdd . Urx } ;
} ; if _fffda . Llx > _fffda . Urx { _fffda . Llx , _fffda . Urx = _fffda . Urx , _fffda . Llx ; } ; if _fffda . Lly > _fffda . Ury { _fffda . Lly , _fffda . Ury = _fffda . Ury , _fffda . Lly ; } ; _ecdb := textMark { _ecaa : _fagg , PdfRectangle : _fffda , _aefef : _fcdd , _bcdb : _dbbf , _bfaca : _caefe , _cgdb : _cccb , _dadd : _aeea , _feea : _edae , _edge : _ceefg , _abgd : _dbbce , _eedd : _becca , _bfade : _cffd , _fcdc : _cefgg , Th : _babgb . _befa . _gdc , Tw : _babgb . _befa . _eag , _fefe : _gcfe , _beaa : _fbae } ;
if _cfgg { _ec . Log . Info ( "n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073" , _gbdaa , _edae , _ecdb . String ( ) ) ; } ; return _ecdb , _adbec ;
2023-10-07 13:58:01 +00:00
} ;
2022-07-13 21:28:43 +00:00
2023-12-17 13:54:01 +00:00
// String returns a string describing the current state of the textState stack.
func ( _bcbg * stateStack ) String ( ) string { _ceb := [ ] string { _ge . Sprintf ( "\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064" , len ( * _bcbg ) ) } ; for _gdda , _ggca := range * _bcbg { _dbcf := "\u003c\u006e\u0069l\u003e" ;
if _ggca != nil { _dbcf = _ggca . String ( ) ; } ; _ceb = append ( _ceb , _ge . Sprintf ( "\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073" , _gdda , _dbcf ) ) ; } ; return _gd . Join ( _ceb , "\u000a" ) ; } ; func ( _ffb * textObject ) setTextLeading ( _dbdd float64 ) { if _ffb == nil { return ;
} ; _ffb . _befa . _dcc = _dbdd ; } ; func ( _beeb paraList ) findTableGrid ( _bafeb gridTiling ) ( * textTable , map [ * textPara ] struct { } ) { _dcfgf := len ( _bafeb . _cgecb ) ; _agcf := len ( _bafeb . _agbb ) ; _feeee := textTable { _beaeg : true , _ecbf : _dcfgf , _dcfg : _agcf , _gcbga : make ( map [ uint64 ] * textPara , _dcfgf * _agcf ) , _egfe : make ( map [ uint64 ] compositeCell , _dcfgf * _agcf ) } ;
_feeee . PdfRectangle = _bafeb . PdfRectangle ; _aggdd := make ( map [ * textPara ] struct { } ) ; _dfda := int ( ( 1.0 - _fbbd ) * float64 ( _dcfgf * _agcf ) ) ; _cafc := 0 ; if _bea { _ec . Log . Info ( "\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064" , _dcfgf , _agcf ) ;
} ; for _fadgf , _acaa := range _bafeb . _agbb { _gfcgc , _ddaag := _bafeb . _bage [ _acaa ] ; if ! _ddaag { continue ; } ; for _efgf , _aaedd := range _bafeb . _cgecb { _dfbg , _fcab := _gfcgc [ _aaedd ] ; if ! _fcab { continue ; } ; _fgdg := _beeb . inTile ( _dfbg ) ; if len ( _fgdg ) == 0 { _cafc ++ ;
if _cafc > _dfda { if _bea { _ec . Log . Info ( "\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064" , _cafc ) ; } ; return nil , nil ; } ; } else { _feeee . putComposite ( _efgf , _fadgf , _fgdg , _dfbg . PdfRectangle ) ; for _ , _cgeg := range _fgdg { _aggdd [ _cgeg ] = struct { } { } ;
} ; } ; } ; } ; _ddgb := 0 ; for _bbba := 0 ; _bbba < _dcfgf ; _bbba ++ { _fgfd := _feeee . get ( _bbba , 0 ) ; if _fgfd == nil || ! _fgfd . _bedf { _ddgb ++ ; } ; } ; if _ddgb == 0 { if _bea { _ec . Log . Info ( "\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030" ) ; } ; return nil , nil ;
} ; _dfdd := _feeee . reduceTiling ( _bafeb , _ffee ) ; _dfdd = _dfdd . subdivide ( ) ; return _dfdd , _aggdd ; } ; func ( _faggb * textTable ) isExportable ( ) bool { if _faggb . _beaeg { return true ; } ; _aefbe := func ( _eebf int ) bool { _ggcbc := _faggb . get ( 0 , _eebf ) ; if _ggcbc == nil { return false ;
} ; _gddf := _ggcbc . text ( ) ; _bgdcb := _g . RuneCountInString ( _gddf ) ; _eeaea := _eeggd . MatchString ( _gddf ) ; return _bgdcb <= 1 || _eeaea ; } ; for _ecbag := 0 ; _ecbag < _faggb . _dcfg ; _ecbag ++ { if ! _aefbe ( _ecbag ) { return true ; } ; } ; return false ; } ; func _fece ( _fbff float64 ) int { var _eedg int ;
if _fbff >= 0 { _eedg = int ( _fbff / _edbf ) ; } else { _eedg = int ( _fbff / _edbf ) - 1 ; } ; return _eedg ; } ; func _fadg ( _eaffb map [ int ] [ ] float64 ) [ ] int { _ebbgc := make ( [ ] int , len ( _eaffb ) ) ; _cbfda := 0 ; for _ebdc := range _eaffb { _ebbgc [ _cbfda ] = _ebdc ; _cbfda ++ ; } ; _c . Ints ( _ebbgc ) ;
return _ebbgc ; } ; type rectRuling struct { _dfec rulingKind ; _effbc markKind ; _fg . Color ; _ce . PdfRectangle ; } ;
// String returns a human readable description of `vecs`.
func ( _dfcg rulingList ) String ( ) string { if len ( _dfcg ) == 0 { return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}" ; } ; _agefa , _afbc := _dfcg . vertsHorzs ( ) ; _cgdg := len ( _agefa ) ; _acdea := len ( _afbc ) ; if _cgdg == 0 || _acdea == 0 { return _ge . Sprintf ( "\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}" , _cgdg , _acdea ) ;
} ; _degdb := _ce . PdfRectangle { Llx : _agefa [ 0 ] . _abbgc , Urx : _agefa [ _cgdg - 1 ] . _abbgc , Lly : _afbc [ _acdea - 1 ] . _abbgc , Ury : _afbc [ 0 ] . _abbgc } ; return _ge . Sprintf ( "\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d" , _cgdg , _acdea , _degdb ) ;
} ; func ( _gac * textObject ) setCharSpacing ( _cdfg float64 ) { if _gac == nil { return ; } ; _gac . _befa . _ggf = _cdfg ; if _dggee { _ec . Log . Info ( "\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073" , _cdfg , _gac . _befa . String ( ) ) ;
} ; } ; func ( _bfad * textLine ) toTextMarks ( _ccfg * int ) [ ] TextMark { var _dcab [ ] TextMark ; for _ , _bfddb := range _bfad . _aebc { if _bfddb . _gfffc { _dcab = _cacf ( _dcab , _ccfg , "\u0020" ) ; } ; _aefe := _bfddb . toTextMarks ( _ccfg ) ; _dcab = append ( _dcab , _aefe ... ) ; } ;
return _dcab ; } ;
// String returns a string descibing `i`.
func ( _dcea gridTile ) String ( ) string { _ffeb := func ( _eaga bool , _dbfff string ) string { if _eaga { return _dbfff ; } ; return "\u005f" ; } ; return _ge . Sprintf ( "\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073" , _dcea . PdfRectangle , _ffeb ( _dcea . _ebdg , "\u004c" ) , _ffeb ( _dcea . _ebga , "\u0052" ) , _ffeb ( _dcea . _ddbaf , "\u0042" ) , _ffeb ( _dcea . _fgde , "\u0054" ) ) ;
} ; func _fdecc ( _fdeg * list ) [ ] * textLine { for _ , _fbge := range _fdeg . _gbab { switch _fbge . _ffcg { case "\u004c\u0042\u006fd\u0079" : if len ( _fbge . _ggdb ) != 0 { return _fbge . _ggdb ; } ; return _fdecc ( _fbge ) ; case "\u0053\u0070\u0061\u006e" : return _fbge . _ggdb ;
case "I\u006e\u006c\u0069\u006e\u0065\u0053\u0068\u0061\u0070\u0065" : return _fbge . _ggdb ; } ; } ; return nil ; } ; func _gcgf ( _fdba _ce . PdfRectangle ) rulingKind { _bcffb := _fdba . Width ( ) ; _ddedfg := _fdba . Height ( ) ; if _bcffb > _ddedfg { if _bcffb >= _cec { return _ecac ;
} ; } else { if _ddedfg >= _cec { return _ebdaf ; } ; } ; return _gdcf ; } ; func ( _cbgb * textObject ) renderText ( _egg _gb . PdfObject , _bed [ ] byte , _cdfd int ) error { if _cbgb . _efbb { _ec . Log . Debug ( "\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e" ) ;
return nil ; } ; _febf := _cbgb . getCurrentFont ( ) ; _gbff := _febf . BytesToCharcodes ( _bed ) ; _deeb , _ccbf , _ffbd := _febf . CharcodesToStrings ( _gbff ) ; if _ffbd > 0 { _ec . Log . Debug ( "\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064" , _ccbf , _ffbd ) ;
} ; _cbgb . _befa . _baf += _ccbf ; _cbgb . _befa . _afgb += _ffbd ; _fcgd := _cbgb . _befa ; _acgc := _fcgd . _ccf ; _ecf := _fcgd . _gdc / 100.0 ; _cda := _fgc ; if _febf . Subtype ( ) == "\u0054\u0079\u0070e\u0033" { _cda = 1 ; } ; _becb , _bedg := _febf . GetRuneMetrics ( ' ' ) ; if ! _bedg { _becb , _bedg = _febf . GetCharMetrics ( 32 ) ;
} ; if ! _bedg { _becb , _ = _ce . DefaultFont ( ) . GetRuneMetrics ( ' ' ) ; } ; _gffg := _becb . Wx * _cda ; _ec . Log . Trace ( "\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066" , _gffg , _deeb , _febf , _acgc ) ;
_eecb := _bc . NewMatrix ( _acgc * _ecf , 0 , 0 , _acgc , 0 , _fcgd . _gegg ) ; if _dggee { _ec . Log . Info ( "\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071" , len ( _gbff ) , _gbff , _deeb ) ;
} ; _ec . Log . Trace ( "\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071" , len ( _gbff ) , _gbff , len ( _deeb ) ) ; _egc := _cbgb . getFillColor ( ) ;
_ddfb := _cbgb . getStrokeColor ( ) ; for _fbb , _aafa := range _deeb { _ffe := [ ] rune ( _aafa ) ; if len ( _ffe ) == 1 && _ffe [ 0 ] == '\x00' { continue ; } ; _eed := _gbff [ _fbb ] ; _ffea := _cbgb . _gbe . CTM . Mult ( _cbgb . _eee ) . Mult ( _eecb ) ; _efde := 0.0 ; if len ( _ffe ) == 1 && _ffe [ 0 ] == 32 { _efde = _fcgd . _eag ;
} ; _ebe , _fdc := _febf . GetCharMetrics ( _eed ) ; if ! _fdc { _ec . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073" , _eed , _ffe , _ffe , _febf ) ;
return _ge . Errorf ( "\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064" , _febf . String ( ) , _eed ) ; } ; _cdg := _bc . Point { X : _ebe . Wx * _cda , Y : _ebe . Wy * _cda } ;
_fbbf := _bc . Point { X : ( _cdg . X * _acgc + _efde ) * _ecf } ; _cdcd := _bc . Point { X : ( _cdg . X * _acgc + _fcgd . _ggf + _efde ) * _ecf } ; if _dggee { _ec . Log . Info ( "\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066" , _acgc , _fcgd . _ggf , _fcgd . _eag , _ecf ) ;
_ec . Log . Info ( "\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f" , _cdg , _fbbf , _cdcd ) ; } ; _ffd := _dgfb ( _fbbf ) ; _abfb := _dgfb ( _cdcd ) ; _babb := _cbgb . _gbe . CTM . Mult ( _cbgb . _eee ) . Mult ( _ffd ) ;
if _fgba { _ec . Log . Info ( "e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a" + "\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a" + "\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073" , _cbgb . _gbe . CTM , _cbgb . _eee , _abfb , _afb ( _cbgb . _gbe . CTM . Mult ( _cbgb . _eee ) . Mult ( _abfb ) ) , _ffd , _babb , _afb ( _babb ) ) ;
} ; _efc , _ffbe := _cbgb . newTextMark ( _fd . ExpandLigatures ( _ffe ) , _ffea , _afb ( _babb ) , _aa . Abs ( _gffg * _ffea . ScalingFactorX ( ) ) , _febf , _cbgb . _befa . _ggf , _egc , _ddfb , _egg , _deeb , _fbb , _cdfd ) ; if ! _ffbe { _ec . Log . Debug ( "\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067" ) ;
continue ; } ; if _febf == nil { _ec . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e" ) ; } else if _febf . Encoder ( ) == nil { _ec . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073" , _febf ) ;
} else { if _ggce , _gbbd := _febf . Encoder ( ) . CharcodeToRune ( _eed ) ; _gbbd { _efc . _gdcgd = string ( _ggce ) ; } ; } ; _ec . Log . Trace ( "i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073" , _fbb , _eed , _efc , _ffea ) ;
_cbgb . _cfb = append ( _cbgb . _cfb , & _efc ) ; _cbgb . _eee . Concat ( _abfb ) ; } ; return nil ; } ; func ( _bcgaa lineRuling ) asRuling ( ) ( * ruling , bool ) { _cabcb := ruling { _bfbc : _bcgaa . _fbbag , Color : _bcgaa . Color , _bgaa : _gbffb } ; switch _bcgaa . _fbbag { case _ebdaf : _cabcb . _abbgc = _bcgaa . xMean ( ) ;
_cabcb . _cebe = _aa . Min ( _bcgaa . _ddgfc . Y , _bcgaa . _eacg . Y ) ; _cabcb . _deee = _aa . Max ( _bcgaa . _ddgfc . Y , _bcgaa . _eacg . Y ) ; case _ecac : _cabcb . _abbgc = _bcgaa . yMean ( ) ; _cabcb . _cebe = _aa . Min ( _bcgaa . _ddgfc . X , _bcgaa . _eacg . X ) ; _cabcb . _deee = _aa . Max ( _bcgaa . _ddgfc . X , _bcgaa . _eacg . X ) ;
default : _ec . Log . Error ( "\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064" , _bcgaa . _fbbag ) ; return nil , false ; } ; return & _cabcb , true ; } ; func ( _ccgae * textWord ) toTextMarks ( _dbadg * int ) [ ] TextMark { var _bfbab [ ] TextMark ;
for _ , _ecegf := range _ccgae . _bgeaa { _bfbab = _cfaf ( _bfbab , _dbadg , _ecegf . ToTextMark ( ) ) ; } ; return _bfbab ; } ; func ( _fgff * wordBag ) scanBand ( _ceea string , _bdde * wordBag , _ffcf func ( _dfdg * wordBag , _ggd * textWord ) bool , _bacdd , _ecgcc , _cdfea float64 , _eef , _dffg bool ) int { _cbe := _bdde . _egfa ;
var _bbfbb map [ int ] map [ * textWord ] struct { } ; if ! _eef { _bbfbb = _fgff . makeRemovals ( ) ; } ; _agda := _cdca * _cbe ; _aaec := 0 ; for _ , _deab := range _fgff . depthBand ( _bacdd - _agda , _ecgcc + _agda ) { if len ( _fgff . _faba [ _deab ] ) == 0 { continue ; } ; for _ , _eagc := range _fgff . _faba [ _deab ] { if ! ( _bacdd - _agda <= _eagc . _cffdg && _eagc . _cffdg <= _ecgcc + _agda ) { continue ;
} ; if ! _ffcf ( _bdde , _eagc ) { continue ; } ; _gde := 2.0 * _aa . Abs ( _eagc . _ddgee - _bdde . _egfa ) / ( _eagc . _ddgee + _bdde . _egfa ) ; _eabb := _aa . Max ( _eagc . _ddgee / _bdde . _egfa , _bdde . _egfa / _eagc . _ddgee ) ; _caef := _aa . Min ( _gde , _eabb ) ; if _cdfea > 0 && _caef > _cdfea { continue ;
} ; if _bdde . blocked ( _eagc ) { continue ; } ; if ! _eef { _bdde . pullWord ( _eagc , _deab , _bbfbb ) ; } ; _aaec ++ ; if ! _dffg { if _eagc . _cffdg < _bacdd { _bacdd = _eagc . _cffdg ; } ; if _eagc . _cffdg > _ecgcc { _ecgcc = _eagc . _cffdg ; } ; } ; if _eef { break ; } ; } ; } ; if ! _eef { _fgff . applyRemovals ( _bbfbb ) ;
} ; return _aaec ; } ; func _cab ( _eec _ce . PdfRectangle ) textState { return textState { _gdc : 100 , _bbd : RenderModeFill , _dab : _eec } ; } ; func ( _fagc * wordBag ) minDepth ( ) float64 { return _fagc . _cfg - ( _fagc . Ury - _fagc . _egfa ) } ; func _ggabe ( _deabe float64 ) bool { return _aa . Abs ( _deabe ) < _gcef } ;
func ( _dbed paraList ) xNeighbours ( _gcdda float64 ) map [ * textPara ] [ ] int { _daeee := make ( [ ] event , 2 * len ( _dbed ) ) ; if _gcdda == 0 { for _gdac , _ggfd := range _dbed { _daeee [ 2 * _gdac ] = event { _ggfd . Llx , true , _gdac } ; _daeee [ 2 * _gdac + 1 ] = event { _ggfd . Urx , false , _gdac } ;
} ; } else { for _ccge , _fgaf := range _dbed { _daeee [ 2 * _ccge ] = event { _fgaf . Llx - _gcdda * _fgaf . fontsize ( ) , true , _ccge } ; _daeee [ 2 * _ccge + 1 ] = event { _fgaf . Urx + _gcdda * _fgaf . fontsize ( ) , false , _ccge } ; } ; } ; return _dbed . eventNeighbours ( _daeee ) ; } ;
// String returns a description of `t`.
func ( _bdgda * textTable ) String ( ) string { return _ge . Sprintf ( "\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074" , _bdgda . _ecbf , _bdgda . _dcfg , _bdgda . _beaeg ) ; } ; func _afb ( _cced _bc . Matrix ) _bc . Point { _eabe , _aabg := _cced . Translation ( ) ;
return _bc . Point { X : _eabe , Y : _aabg } ; } ; func ( _fbfg * textTable ) logComposite ( _bgeb string ) { if ! _afcg { return ; } ; _ec . Log . Info ( "\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073" , _fbfg . _ecbf , _fbfg . _dcfg , _bgeb ) ;
_ge . Printf ( "\u0025\u0035\u0073 \u007c" , "" ) ; for _fdgda := 0 ; _fdgda < _fbfg . _ecbf ; _fdgda ++ { _ge . Printf ( "\u0025\u0033\u0064 \u007c" , _fdgda ) ; } ; _ge . Println ( "" ) ; _ge . Printf ( "\u0025\u0035\u0073 \u002b" , "" ) ; for _beef := 0 ; _beef < _fbfg . _ecbf ; _beef ++ { _ge . Printf ( "\u0025\u0033\u0073 \u002b" , "\u002d\u002d\u002d" ) ;
} ; _ge . Println ( "" ) ; for _ffebg := 0 ; _ffebg < _fbfg . _dcfg ; _ffebg ++ { _ge . Printf ( "\u0025\u0035\u0064 \u007c" , _ffebg ) ; for _acdda := 0 ; _acdda < _fbfg . _ecbf ; _acdda ++ { _ecfge , _ := _fbfg . _egfe [ _aaca ( _acdda , _ffebg ) ] . parasBBox ( ) ; _ge . Printf ( "\u0025\u0033\u0064 \u007c" , len ( _ecfge ) ) ;
} ; _ge . Println ( "" ) ; } ; _ec . Log . Info ( "\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073" , _fbfg . _ecbf , _fbfg . _dcfg , _bgeb ) ; _ge . Printf ( "\u0025\u0035\u0073 \u007c" , "" ) ; for _dfcbe := 0 ; _dfcbe < _fbfg . _ecbf ; _dfcbe ++ { _ge . Printf ( "\u0025\u0031\u0032\u0064\u0020\u007c" , _dfcbe ) ;
} ; _ge . Println ( "" ) ; _ge . Printf ( "\u0025\u0035\u0073 \u002b" , "" ) ; for _efbbb := 0 ; _efbbb < _fbfg . _ecbf ; _efbbb ++ { _ge . Print ( "\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b" ) ; } ; _ge . Println ( "" ) ; for _fceaf := 0 ; _fceaf < _fbfg . _dcfg ;
_fceaf ++ { _ge . Printf ( "\u0025\u0035\u0064 \u007c" , _fceaf ) ; for _ecca := 0 ; _ecca < _fbfg . _ecbf ; _ecca ++ { _gdddg , _ := _fbfg . _egfe [ _aaca ( _ecca , _fceaf ) ] . parasBBox ( ) ; _ddfbg := "" ; _cdfa := _gdddg . merge ( ) ; if _cdfa != nil { _ddfbg = _cdfa . text ( ) ; } ; _ddfbg = _ge . Sprintf ( "\u0025\u0071" , _adagc ( _ddfbg , 12 ) ) ;
_ddfbg = _ddfbg [ 1 : len ( _ddfbg ) - 1 ] ; _ge . Printf ( "\u0025\u0031\u0032\u0073\u0020\u007c" , _ddfbg ) ; } ; _ge . Println ( "" ) ; } ; } ; func ( _ebabb paraList ) log ( _eedee string ) { if ! _eaba { return ; } ; _ec . Log . Info ( "%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d" , _eedee , len ( _ebabb ) ) ;
for _bebg , _ecbg := range _ebabb { if _ecbg == nil { continue ; } ; _dggde := _ecbg . text ( ) ; _affb := "\u0020\u0020" ; if _ecbg . _bddea != nil { _affb = _ge . Sprintf ( "\u005b%\u0064\u0078\u0025\u0064\u005d" , _ecbg . _bddea . _ecbf , _ecbg . _bddea . _dcfg ) ; } ; _ge . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a" , _bebg , _ecbg . PdfRectangle , _affb , _adagc ( _dggde , 50 ) ) ;
} ; } ;
// ToTextMark returns the public view of `tm`.
func ( _cfcg * textMark ) ToTextMark ( ) TextMark { return TextMark { Text : _cfcg . _ecaa , Original : _cfcg . _gdcgd , BBox : _cfcg . _aefef , Font : _cfcg . _bcdb , FontSize : _cfcg . _bfaca , FillColor : _cfcg . _abgd , StrokeColor : _cfcg . _eedd , Orientation : _cfcg . _edge , DirectObject : _cfcg . _bfade , ObjString : _cfcg . _fcdc , Tw : _cfcg . Tw , Th : _cfcg . Th , Tc : _cfcg . _cgdb , Index : _cfcg . _beaa } ;
} ; type structTreeRoot struct { _dgbb [ ] structElement ; _fdec string ; } ; func ( _edfe * structTreeRoot ) parseStructTreeRoot ( _daec _gb . PdfObject ) { if _daec != nil { _adbe , _edgcg := _gb . GetDict ( _daec ) ; if ! _edgcg { _ec . Log . Debug ( "\u0070\u0061\u0072s\u0065\u0053\u0074\u0072\u0075\u0063\u0074\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u003a\u0020\u0064\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006eo\u0074\u0020\u0066\u006f\u0075\u006e\u0064\u002e" ) ;
} ; K := _adbe . Get ( "\u004b" ) ; _aggb := _adbe . Get ( "\u0054\u0079\u0070\u0065" ) . String ( ) ; var _fffd * _gb . PdfObjectArray ; switch _accd := K . ( type ) { case * _gb . PdfObjectArray : _fffd = _accd ; case * _gb . PdfObjectReference : _fffd = _gb . MakeArray ( K ) ; } ; _ega := [ ] structElement { } ;
for _ , _dcce := range _fffd . Elements ( ) { _ddfc := & structElement { } ; _ddfc . parseStructElement ( _dcce ) ; _ega = append ( _ega , * _ddfc ) ; } ; _edfe . _dgbb = _ega ; _edfe . _fdec = _aggb ; } ; } ; func ( _aefgf rulingList ) primaries ( ) [ ] float64 { _fecb := make ( map [ float64 ] struct { } , len ( _aefgf ) ) ;
for _ , _cfga := range _aefgf { _fecb [ _cfga . _abbgc ] = struct { } { } ; } ; _effa := make ( [ ] float64 , len ( _fecb ) ) ; _ccaag := 0 ; for _afdga := range _fecb { _effa [ _ccaag ] = _afdga ; _ccaag ++ ; } ; _c . Float64s ( _effa ) ; return _effa ; } ; func ( _fcbgb rulingList ) findPrimSec ( _abgfc , _cfafg float64 ) * ruling { for _ , _cbbg := range _fcbgb { if _ffegg ( _cbbg . _abbgc - _abgfc ) && _cbbg . _cebe - _ebbf <= _cfafg && _cfafg <= _cbbg . _deee + _ebbf { return _cbbg ;
} ; } ; return nil ; } ; const _fgc = 1.0 / 1000.0 ; func ( _begd rulingList ) toTilings ( ) ( rulingList , [ ] gridTiling ) { _begd . log ( "\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s" ) ; if len ( _begd ) == 0 { return nil , nil ; } ; _begd = _begd . tidied ( "\u0061\u006c\u006c" ) ; _begd . log ( "\u0074\u0069\u0064\u0069\u0065\u0064" ) ;
_daa := _begd . toGrids ( ) ; _feac := make ( [ ] gridTiling , len ( _daa ) ) ; for _debb , _abffe := range _daa { _feac [ _debb ] = _abffe . asTiling ( ) ; } ; return _begd , _feac ; } ; func ( _bfgf paraList ) sortTopoOrder ( ) { _bdcg := _bfgf . topoOrder ( ) ; _bfgf . reorder ( _bdcg ) } ; func _egf ( _bebb * Extractor , _bdb * _ce . PdfPageResources , _eab _ag . GraphicsState , _fgfe * textState , _ebc * stateStack ) * textObject { return & textObject { _bgcb : _bebb , _bacad : _bdb , _gbe : _eab , _ebf : _ebc , _befa : _fgfe , _eee : _bc . IdentityMatrix ( ) , _ecg : _bc . IdentityMatrix ( ) } ;
} ; func ( _aacg rulingList ) isActualGrid ( ) ( rulingList , bool ) { _cbea , _eaegd := _aacg . augmentGrid ( ) ; if ! ( len ( _cbea ) >= _cbbb + 1 && len ( _eaegd ) >= _acgd + 1 ) { if _gfgc { _ec . Log . Info ( "\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064" , len ( _cbea ) , len ( _eaegd ) , _cbbb + 1 , _acgd + 1 ) ;
} ; return nil , false ; } ; if _gfgc { _ec . Log . Info ( "\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074" , _aacg , len ( _cbea ) >= 2 , len ( _eaegd ) >= 2 , len ( _cbea ) >= 2 && len ( _eaegd ) >= 2 ) ;
for _gcgc , _gfce := range _aacg { _ge . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a" , _gcgc , _gfce ) ; } ; } ; if _gdcg { _bgbb , _dgbc := _cbea [ 0 ] , _cbea [ len ( _cbea ) - 1 ] ; _cdfda , _cdac := _eaegd [ 0 ] , _eaegd [ len ( _eaegd ) - 1 ] ; if ! ( _ggabe ( _bgbb . _abbgc - _cdfda . _cebe ) && _ggabe ( _dgbc . _abbgc - _cdfda . _deee ) && _ggabe ( _cdfda . _abbgc - _bgbb . _deee ) && _ggabe ( _cdac . _abbgc - _bgbb . _cebe ) ) { if _gfgc { _ec . Log . Info ( "\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073" , _bgbb , _dgbc , _cdfda , _cdac ) ;
} ; return nil , false ; } ; } else { if ! _cbea . aligned ( ) { if _fcfe { _ec . Log . Info ( "i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064" , len ( _cbea ) ) ;
} ; return nil , false ; } ; if ! _eaegd . aligned ( ) { if _gfgc { _ec . Log . Info ( "i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064" , len ( _eaegd ) ) ;
} ; return nil , false ; } ; } ; _cbfeca := append ( _cbea , _eaegd ... ) ; return _cbfeca , true ; } ;
// Marks returns the TextMark collection for a page. It represents all the text on the page.
func ( _fba PageText ) Marks ( ) * TextMarkArray { return & TextMarkArray { _dec : _fba . _ebfc } } ; func ( _fbbda * textTable ) put ( _bfgfd , _cgga int , _abgbb * textPara ) { _fbbda . _gcbga [ _aaca ( _bfgfd , _cgga ) ] = _abgbb ; } ; func _eggg ( _fdce , _eddee , _aafgf , _eddcc * textPara ) * textTable { _ddcg := & textTable { _ecbf : 2 , _dcfg : 2 , _gcbga : make ( map [ uint64 ] * textPara , 4 ) } ;
_ddcg . put ( 0 , 0 , _fdce ) ; _ddcg . put ( 1 , 0 , _eddee ) ; _ddcg . put ( 0 , 1 , _aafgf ) ; _ddcg . put ( 1 , 1 , _eddcc ) ; return _ddcg ; } ; func ( _ggcg * wordBag ) getDepthIdx ( _dggd float64 ) int { _edcb := _ggcg . depthIndexes ( ) ; _dfgac := _fece ( _dggd ) ; if _dfgac < _edcb [ 0 ] { return _edcb [ 0 ] ;
} ; if _dfgac > _edcb [ len ( _edcb ) - 1 ] { return _edcb [ len ( _edcb ) - 1 ] ; } ; return _dfgac ; } ;
// Text returns the extracted page text.
func ( _bge PageText ) Text ( ) string { return _bge . _gcee } ; func ( _degd * textObject ) getFontDict ( _decf string ) ( _aefg _gb . PdfObject , _aaee error ) { _aea := _degd . _bacad ; if _aea == nil { _ec . Log . Debug ( "g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071" , _decf ) ;
return nil , nil ; } ; _aefg , _bdf := _aea . GetFontByName ( _gb . PdfObjectName ( _decf ) ) ; if ! _bdf { _ec . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071" , _decf ) ;
return nil , _d . New ( "f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073" ) ; } ; return _aefg , nil ; } ; func _decgb ( _bbcb , _fcgb _ce . PdfRectangle ) bool { return _fcd ( _bbcb , _fcgb ) && _fdg ( _bbcb , _fcgb ) } ; func ( _fecf * subpath ) last ( ) _bc . Point { return _fecf . _gfefe [ len ( _fecf . _gfefe ) - 1 ] } ;
func ( _fagb * textLine ) markWordBoundaries ( ) { _dged := _ddag * _fagb . _ecag ; for _ccff , _egeeg := range _fagb . _aebc [ 1 : ] { if _gdgbc ( _egeeg , _fagb . _aebc [ _ccff ] ) >= _dged { _egeeg . _gfffc = true ; } ; } ; } ;
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
type TextTable struct { _ce . PdfRectangle ; W , H int ; Cells [ ] [ ] TableCell ; } ; func ( _aeeff intSet ) add ( _gefd int ) { _aeeff [ _gefd ] = struct { } { } } ; func _abgbe ( _adbbg * _ce . Image , _gbdc _fg . Color ) _ded . Image { _eadcf , _dfbcd := int ( _adbbg . Width ) , int ( _adbbg . Height ) ;
_cdebf := _ded . NewRGBA ( _ded . Rect ( 0 , 0 , _eadcf , _dfbcd ) ) ; for _cbeaf := 0 ; _cbeaf < _dfbcd ; _cbeaf ++ { for _fcgfb := 0 ; _fcgfb < _eadcf ; _fcgfb ++ { _cgbbg , _gdcac := _adbbg . ColorAt ( _fcgfb , _cbeaf ) ; if _gdcac != nil { _ec . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063o\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0072\u0065\u0074\u0072\u0069\u0065v\u0065 \u0069\u006d\u0061\u0067\u0065\u0020m\u0061\u0073\u006b\u0020\u0076\u0061\u006cu\u0065\u0020\u0061\u0074\u0020\u0028\u0025\u0064\u002c\u0020\u0025\u0064\u0029\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006da\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063t\u002e" , _fcgfb , _cbeaf ) ;
continue ; } ; _bfcbg , _bbgeg , _eagf , _ := _cgbbg . RGBA ( ) ; var _ebgaf _fg . Color ; if _bfcbg + _bbgeg + _eagf == 0 { _ebgaf = _gbdc ; } else { _ebgaf = _fg . Transparent ; } ; _cdebf . Set ( _fcgfb , _cbeaf , _ebgaf ) ; } ; } ; return _cdebf ; } ; func _cbda ( _bdaf * PageText ) error { _gffgg := _bd . GetLicenseKey ( ) ;
if _gffgg != nil && _gffgg . IsLicensed ( ) || _ff { return nil ; } ; _ge . Printf ( "\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a" ) ; _ge . Println ( "-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f" ) ;
return _d . New ( "\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064" ) ; } ; func ( _beecg rectRuling ) checkWidth ( _dccaf , _afab float64 ) ( float64 , bool ) { _bagbg := _afab - _dccaf ;
_abcb := _bagbg <= _gcef ; return _bagbg , _abcb ; } ; func ( _bfac * shapesState ) cubicTo ( _agf , _dbg , _fgca , _cbaae , _dcec , _gadf float64 ) { if _cbag { _ec . Log . Info ( "\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a" ) ; } ; _bfac . addPoint ( _dcec , _gadf ) ; } ; func _acgg ( _bddeg map [ int ] [ ] float64 ) { if len ( _bddeg ) <= 1 { return ;
} ; _ggggb := _fadg ( _bddeg ) ; if _afcg { _ec . Log . Info ( "\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076" , _ggggb ) ; } ; var _bgcca , _gfca int ; for _bgcca , _gfca = range _ggggb { if _bddeg [ _gfca ] != nil { break ; } ; } ; for _gdabf , _agba := range _ggggb [ _bgcca : ] { _cgba := _bddeg [ _agba ] ;
if _cgba == nil { continue ; } ; if _afcg { _ge . Printf ( "\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a" , _bgcca + _gdabf , _gfca , _agba ) ; } ; _egbfef := _bddeg [ _agba ] ; if _egbfef [ len ( _egbfef ) - 1 ] > _cgba [ 0 ] { _egbfef [ len ( _egbfef ) - 1 ] = _cgba [ 0 ] ;
_bddeg [ _gfca ] = _egbfef ; } ; _gfca = _agba ; } ; } ; func _fbcc ( _agbg [ ] * textMark , _gbefe _ce . PdfRectangle , _afgd rulingList , _dffd [ ] gridTiling , _fbgd bool ) paraList { _ec . Log . Trace ( "\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066" , len ( _agbg ) , _gbefe ) ;
if len ( _agbg ) == 0 { return nil ; } ; _ebaga := _fecgf ( _agbg , _gbefe ) ; if len ( _ebaga ) == 0 { return nil ; } ; _afgd . log ( "\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065" ) ; _bfag , _aafad := _afgd . vertsHorzs ( ) ; _facac := _dca ( _ebaga , _gbefe . Ury , _bfag , _aafad ) ;
_gadfb := _cgbf ( _facac , _gbefe . Ury , _bfag , _aafad ) ; _gadfb = _bddf ( _gadfb ) ; _gedg := make ( paraList , 0 , len ( _gadfb ) ) ; for _ , _gcffb := range _gadfb { _gegbb := _gcffb . arrangeText ( ) ; if _gegbb != nil { _gedg = append ( _gedg , _gegbb ) ; } ; } ; if ! _fbgd && len ( _gedg ) >= _ddd { _gedg = _gedg . extractTables ( _dffd ) ;
} ; _gedg . sortReadingOrder ( ) ; if ! _fbgd { _gedg . sortTopoOrder ( ) ; } ; _gedg . log ( "\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072" ) ; return _gedg ; } ;
// Append appends `mark` to the mark array.
func ( _ceae * TextMarkArray ) Append ( mark TextMark ) { _ceae . _dec = append ( _ceae . _dec , mark ) } ; func ( _gfdf paraList ) inTile ( _dedc gridTile ) paraList { var _adeff paraList ; for _ , _gcda := range _gfdf { if _dedc . contains ( _gcda . PdfRectangle ) { _adeff = append ( _adeff , _gcda ) ;
} ; } ; if _afcg { _ge . Printf ( "\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n" , _dedc , len ( _adeff ) ) ; for _eddd , _agfaa := range _adeff { _ge . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _eddd , _agfaa ) ;
} ; _ge . Println ( "" ) ; } ; return _adeff ; } ;
// String returns a description of `b`.
func ( _cbf * wordBag ) String ( ) string { var _ccdg [ ] string ; for _ , _ebfd := range _cbf . depthIndexes ( ) { _aeb := _cbf . _faba [ _ebfd ] ; for _ , _efba := range _aeb { _ccdg = append ( _ccdg , _efba . _bbdb ) ; } ; } ; return _ge . Sprintf ( "\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071" , _cbf . PdfRectangle , _cbf . _egfa , len ( _ccdg ) , _ccdg ) ;
} ; func ( _cbbfe * subpath ) add ( _bcde ... _bc . Point ) { _cbbfe . _gfefe = append ( _cbbfe . _gfefe , _bcde ... ) } ; func ( _ebcg * wordBag ) arrangeText ( ) * textPara { _ebcg . sort ( ) ; if _cafd { _ebcg . removeDuplicates ( ) ; } ; var _gdced [ ] * textLine ; for _ , _bga := range _ebcg . depthIndexes ( ) { for ! _ebcg . empty ( _bga ) { _cgbc := _ebcg . firstReadingIndex ( _bga ) ;
_fgcfa := _ebcg . firstWord ( _cgbc ) ; _fgbdd := _egee ( _ebcg , _cgbc ) ; _fdeb := _fgcfa . _ddgee ; _geed := _fgcfa . _cffdg - _cdca * _fdeb ; _ccabb := _fgcfa . _cffdg + _cdca * _fdeb ; _ddbe := _ebfa * _fdeb ; _aecce := _gecdb * _fdeb ; _aabf : for { var _agaa * textWord ; _egdb := 0 ;
for _ , _baggg := range _ebcg . depthBand ( _geed , _ccabb ) { _geggfe := _ebcg . highestWord ( _baggg , _geed , _ccabb ) ; if _geggfe == nil { continue ; } ; _cgeaa := _gdgbc ( _geggfe , _fgbdd . _aebc [ len ( _fgbdd . _aebc ) - 1 ] ) ; if _cgeaa < - _aecce { break _aabf ; } ; if _cgeaa > _ddbe { continue ;
} ; if _agaa != nil && _fabg ( _geggfe , _agaa ) >= 0 { continue ; } ; _agaa = _geggfe ; _egdb = _baggg ; } ; if _agaa == nil { break ; } ; _fgbdd . pullWord ( _ebcg , _agaa , _egdb ) ; } ; _fgbdd . markWordBoundaries ( ) ; _gdced = append ( _gdced , _fgbdd ) ; } ; } ; if len ( _gdced ) == 0 { return nil ;
} ; _c . Slice ( _gdced , func ( _abffd , _fbfed int ) bool { return _cbcf ( _gdced [ _abffd ] , _gdced [ _fbfed ] ) < 0 } ) ; _gfbc := _ffec ( _ebcg . PdfRectangle , _gdced ) ; if _egd { _ec . Log . Info ( "\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073" , _gfbc . String ( ) ) ;
if _eeg { for _cagg , _bdad := range _gfbc . _bdbcg { _ge . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _cagg , _bdad . String ( ) ) ; if _aacb { for _fbbc , _edfg := range _bdad . _aebc { _ge . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _fbbc , _edfg . String ( ) ) ;
for _dfe , _ddbd := range _edfg . _bgeaa { _ge . Printf ( "\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n" , _dfe , _ddbd . String ( ) ) ; } ; } ; } ; } ; } ; } ; return _gfbc ; } ; func _fbcfg ( _ffcbg string ) ( string , bool ) { _geacfe := [ ] rune ( _ffcbg ) ; if len ( _geacfe ) != 1 { return "" , false ;
} ; _bffg , _bcfgd := _eeff [ _geacfe [ 0 ] ] ; return _bffg , _bcfgd ; } ;
2023-08-03 17:30:04 +00:00
2023-11-11 11:29:03 +00:00
// PageText represents the layout of text on a device page.
2023-12-17 13:54:01 +00:00
type PageText struct { _fcag [ ] * textMark ; _gcee string ; _ebfc [ ] TextMark ; _eadb [ ] TextTable ; _fbbg _ce . PdfRectangle ; _geda [ ] pathSection ; _gegc [ ] pathSection ; _bbb * _gb . PdfObject ; _fefc _gb . PdfObject ; _ecfe * _ag . ContentStreamOperations ; _ccg PageTextOptions ;
} ; func _bgeba ( _ceabc , _abgg int ) int { if _ceabc > _abgg { return _ceabc ; } ; return _abgg ; } ; func ( _aagf rulingList ) asTiling ( ) gridTiling { if _bea { _ec . Log . Info ( "r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d" , len ( _aagf ) ) ;
} ; for _bfacf , _afged := range _aagf [ 1 : ] { _egada := _aagf [ _bfacf ] ; if _egada . alignsPrimary ( _afged ) && _egada . alignsSec ( _afged ) { _ec . Log . Error ( "a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073" , _afged , _egada ) ;
} ; } ; _aagf . sortStrict ( ) ; _aagf . log ( "\u0073n\u0061\u0070\u0070\u0065\u0064" ) ; _cdge , _bacf := _aagf . vertsHorzs ( ) ; _dbcg := _cdge . primaries ( ) ; _geag := _bacf . primaries ( ) ; _dfdb := len ( _dbcg ) - 1 ; _ecacf := len ( _geag ) - 1 ; if _dfdb == 0 || _ecacf == 0 { return gridTiling { } ;
} ; _gaae := _ce . PdfRectangle { Llx : _dbcg [ 0 ] , Urx : _dbcg [ _dfdb ] , Lly : _geag [ 0 ] , Ury : _geag [ _ecacf ] } ; if _bea { _ec . Log . Info ( "\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064" , len ( _cdge ) ) ;
for _dgbe , _aabfb := range _cdge { _ge . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _dgbe , _aabfb ) ; } ; _ec . Log . Info ( "\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064" , len ( _bacf ) ) ;
for _eagcf , _fcaf := range _bacf { _ge . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _eagcf , _fcaf ) ; } ; _ec . Log . Info ( "\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f" , _dfdb , _ecacf , _dbcg , _geag ) ;
} ; _eedf := make ( [ ] gridTile , _dfdb * _ecacf ) ; for _feffe := _ecacf - 1 ; _feffe >= 0 ; _feffe -- { _eabfg := _geag [ _feffe ] ; _bdffb := _geag [ _feffe + 1 ] ; for _gdeb := 0 ; _gdeb < _dfdb ; _gdeb ++ { _efad := _dbcg [ _gdeb ] ; _bfgca := _dbcg [ _gdeb + 1 ] ; _dgaac := _cdge . findPrimSec ( _efad , _eabfg ) ;
_efadd := _cdge . findPrimSec ( _bfgca , _eabfg ) ; _gabcb := _bacf . findPrimSec ( _eabfg , _efad ) ; _fefcb := _bacf . findPrimSec ( _bdffb , _efad ) ; _gecda := _ce . PdfRectangle { Llx : _efad , Urx : _bfgca , Lly : _eabfg , Ury : _bdffb } ; _dbdbf := _geedc ( _gecda , _dgaac , _efadd , _gabcb , _fefcb ) ;
_eedf [ _feffe * _dfdb + _gdeb ] = _dbdbf ; if _bea { _ge . Printf ( "\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , _gdeb , _feffe , _dbdbf . String ( ) , _dbdbf . Width ( ) , _dbdbf . Height ( ) ) ;
} ; } ; } ; if _bea { _ec . Log . Info ( "r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066" , _gaae ) ;
} ; _decbd := make ( [ ] map [ float64 ] gridTile , _ecacf ) ; for _egfda := _ecacf - 1 ; _egfda >= 0 ; _egfda -- { if _bea { _ge . Printf ( "\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a" , _egfda ) ; } ; _decbd [ _egfda ] = make ( map [ float64 ] gridTile , _dfdb ) ; for _eefac := 0 ; _eefac < _dfdb ;
_eefac ++ { _dcaf := _eedf [ _egfda * _dfdb + _eefac ] ; if _bea { _ge . Printf ( "\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _eefac , _dcaf ) ; } ; if ! _dcaf . _ebdg { continue ; } ; _adga := _eefac ; for _dbfd := _eefac + 1 ; ! _dcaf . _ebga && _dbfd < _dfdb ; _dbfd ++ { _abbfd := _eedf [ _egfda * _dfdb + _dbfd ] ;
_dcaf . Urx = _abbfd . Urx ; _dcaf . _fgde = _dcaf . _fgde || _abbfd . _fgde ; _dcaf . _ddbaf = _dcaf . _ddbaf || _abbfd . _ddbaf ; _dcaf . _ebga = _abbfd . _ebga ; if _bea { _ge . Printf ( "\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a" , _dbfd , _abbfd , _dcaf ) ;
} ; _adga = _dbfd ; } ; if _bea { _ge . Printf ( " \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n" , _eefac , _adga , _dcaf ) ; } ; _eefac = _adga ; _decbd [ _egfda ] [ _dcaf . Llx ] = _dcaf ; } ; } ; _eeea := make ( map [ float64 ] map [ float64 ] gridTile , _ecacf ) ;
_ffgf := make ( map [ float64 ] map [ float64 ] struct { } , _ecacf ) ; for _dcfbc := _ecacf - 1 ; _dcfbc >= 0 ; _dcfbc -- { _eaec := _eedf [ _dcfbc * _dfdb ] . Lly ; _eeea [ _eaec ] = make ( map [ float64 ] gridTile , _dfdb ) ; _ffgf [ _eaec ] = make ( map [ float64 ] struct { } , _dfdb ) ; } ; if _bea { _ec . Log . Info ( "\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066" , _gaae ) ;
} ; for _cfda := _ecacf - 1 ; _cfda >= 0 ; _cfda -- { _dfceb := _eedf [ _cfda * _dfdb ] . Lly ; _ecagb := _decbd [ _cfda ] ; if _bea { _ge . Printf ( "\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a" , _cfda ) ; } ; for _ , _aeca := range _ccacc ( _ecagb ) { if _ , _afeb := _ffgf [ _dfceb ] [ _aeca ] ;
_afeb { continue ; } ; _dbdac := _ecagb [ _aeca ] ; if _bea { _ge . Printf ( " \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a" , _dbdac . String ( ) ) ; } ; for _beea := _cfda - 1 ; _beea >= 0 ; _beea -- { if _dbdac . _ddbaf { break ; } ; _adba := _decbd [ _beea ] ; _cgf , _gefa := _adba [ _aeca ] ;
if ! _gefa { break ; } ; if _cgf . Urx != _dbdac . Urx { break ; } ; _dbdac . _ddbaf = _cgf . _ddbaf ; _dbdac . Lly = _cgf . Lly ; if _bea { _ge . Printf ( "\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a" , _cgf . String ( ) , _dbdac . String ( ) ) ;
} ; _ffgf [ _cgf . Lly ] [ _cgf . Llx ] = struct { } { } ; } ; if _cfda == 0 { _dbdac . _ddbaf = true ; } ; if _dbdac . complete ( ) { _eeea [ _dfceb ] [ _aeca ] = _dbdac ; } ; } ; } ; _gefaa := gridTiling { PdfRectangle : _gaae , _cgecb : _edege ( _eeea ) , _agbb : _bdcdcf ( _eeea ) , _bage : _eeea } ; _gefaa . log ( "\u0043r\u0065\u0061\u0074\u0065\u0064" ) ;
return _gefaa ; } ; func ( _gfb * wordBag ) firstReadingIndex ( _febc int ) int { _aadb := _gfb . firstWord ( _febc ) . _ddgee ; _def := float64 ( _febc + 1 ) * _edbf ; _efgde := _def + _bafc * _aadb ; _gdgf := _febc ; for _ , _gdab := range _gfb . depthBand ( _def , _efgde ) { if _fabg ( _gfb . firstWord ( _gdab ) , _gfb . firstWord ( _gdgf ) ) < 0 { _gdgf = _gdab ;
} ; } ; return _gdgf ; } ; func ( _gcdb * compositeCell ) updateBBox ( ) { for _ , _bbbg := range _gcdb . paraList { _gcdb . PdfRectangle = _cdggc ( _gcdb . PdfRectangle , _bbbg . PdfRectangle ) ; } ; } ; func _bgec ( _dagce _ce . PdfRectangle ) * ruling { return & ruling { _bfbc : _ebdaf , _abbgc : _dagce . Llx , _cebe : _dagce . Lly , _deee : _dagce . Ury } ;
} ; var _ccfe * _e . Regexp = _e . MustCompile ( _cfgd + "\u007c" + _befc ) ;
2023-05-29 17:26:33 +00:00
2023-12-17 13:54:01 +00:00
// NewWithOptions an Extractor instance for extracting content from the input PDF page with options.
func NewWithOptions ( page * _ce . PdfPage , options * Options ) ( * Extractor , error ) { const _gcc = "\u0065x\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077W\u0069\u0074\u0068\u004f\u0070\u0074\u0069\u006f\u006e\u0073" ; _ged , _ca := page . GetAllContentStreams ( ) ;
if _ca != nil { return nil , _ca ; } ; _dc , _ffc := page . GetStructTreeRoot ( ) ; if ! _ffc { _ec . Log . Info ( "T\u0068\u0065\u0020\u0070\u0064\u0066\u0020\u0064\u006f\u0063\u0075\u006d\u0065\u006e\u0074\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020\u0074\u0061\u0067g\u0065d\u002e\u0020\u0053\u0074r\u0075\u0063t\u0054\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0064\u006f\u0065\u0073\u006e\u0027\u0074\u0020\u0065\u0078\u0069\u0073\u0074\u002e" ) ;
} ; _dd := page . GetContainingPdfObject ( ) ; _bb , _ca := page . GetMediaBox ( ) ; if _ca != nil { return nil , _ge . Errorf ( "\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076" , _ca ) ;
} ; _gbg := & Extractor { _bcf : _ged , _ab : page . Resources , _gda : * _bb , _ed : page . CropBox , _cd : map [ string ] fontEntry { } , _fa : map [ string ] textResult { } , _fgb : options , _ba : _dc , _eg : _dd } ; if _gbg . _gda . Llx > _gbg . _gda . Urx { _ec . Log . Info ( "\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e" , _gbg . _gda ) ;
_gbg . _gda . Llx , _gbg . _gda . Urx = _gbg . _gda . Urx , _gbg . _gda . Llx ; } ; if _gbg . _gda . Lly > _gbg . _gda . Ury { _ec . Log . Info ( "\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e" , _gbg . _gda ) ;
_gbg . _gda . Lly , _gbg . _gda . Ury = _gbg . _gda . Ury , _gbg . _gda . Lly ; } ; _bd . TrackUse ( _gcc ) ; return _gbg , nil ; } ;
2023-01-08 22:34:27 +00:00
2023-12-17 13:54:01 +00:00
// String returns a human readable description of `ss`.
func ( _accb * shapesState ) String ( ) string { return _ge . Sprintf ( "\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d" , len ( _accb . _abgb ) , _accb . _edee ) ; } ; func ( _gdaa * textObject ) moveTextSetLeading ( _cgdc , _dfcb float64 ) { _gdaa . _befa . _dcc = - _dfcb ;
_gdaa . moveLP ( _cgdc , _dfcb ) ; } ; func ( _bbdec rulingList ) log ( _gcaca string ) { if ! _gfgc { return ; } ; _ec . Log . Info ( "\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073" , _gcaca , _bbdec . String ( ) ) ; for _dagf , _gfaaa := range _bbdec { _ge . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _dagf , _gfaaa . String ( ) ) ;
} ; } ; func ( _dcdf * textLine ) pullWord ( _eede * wordBag , _fceg * textWord , _fedc int ) { _dcdf . appendWord ( _fceg ) ; _eede . removeWord ( _fceg , _fedc ) ; } ; func ( _eecg rulingList ) removeDuplicates ( ) rulingList { if len ( _eecg ) == 0 { return nil ; } ; _eecg . sort ( ) ; _cdbd := rulingList { _eecg [ 0 ] } ;
for _ , _cbfc := range _eecg [ 1 : ] { if _cbfc . equals ( _cdbd [ len ( _cdbd ) - 1 ] ) { continue ; } ; _cdbd = append ( _cdbd , _cbfc ) ; } ; return _cdbd ; } ; func ( _agcdb * subpath ) isQuadrilateral ( ) bool { if len ( _agcdb . _gfefe ) < 4 || len ( _agcdb . _gfefe ) > 5 { return false ; } ; if len ( _agcdb . _gfefe ) == 5 { _gaada := _agcdb . _gfefe [ 0 ] ;
_ccdf := _agcdb . _gfefe [ 4 ] ; if _gaada . X != _ccdf . X || _gaada . Y != _ccdf . Y { return false ; } ; } ; return true ; } ; func _cecd ( _eacgc [ ] rulingList ) ( rulingList , rulingList ) { var _fagf rulingList ; for _ , _fcbfg := range _eacgc { _fagf = append ( _fagf , _fcbfg ... ) ;
} ; return _fagf . vertsHorzs ( ) ; } ; func _dbga ( _fagba _ce . PdfRectangle ) * ruling { return & ruling { _bfbc : _ebdaf , _abbgc : _fagba . Urx , _cebe : _fagba . Lly , _deee : _fagba . Ury } ; } ; func _baafc ( _gfebd , _gdaace int ) int { if _gfebd < _gdaace { return _gfebd ; } ; return _gdaace ;
} ; func ( _bcdbf compositeCell ) hasLines ( _cddcb [ ] * textLine ) bool { for _afcd , _gfge := range _cddcb { _afegg := _decgb ( _bcdbf . PdfRectangle , _gfge . PdfRectangle ) ; if _afcg { _ge . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a" , _afegg , _afcd , len ( _cddcb ) ) ;
_ge . Printf ( "\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a" , _bcdbf ) ; _ge . Printf ( "\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a" , _gfge ) ; } ; if _afegg { return true ;
} ; } ; return false ; } ; const ( _beagd markKind = iota ; _gbffb ; _eecbc ; _cgacb ; ) ; func _fccf ( _ebeb string ) bool { if _g . RuneCountInString ( _ebeb ) < _ggafb { return false ; } ; _ffag , _gfbg := _g . DecodeLastRuneInString ( _ebeb ) ; if _gfbg <= 0 || ! _de . Is ( _de . Hyphen , _ffag ) { return false ;
} ; _ffag , _gfbg = _g . DecodeLastRuneInString ( _ebeb [ : len ( _ebeb ) - _gfbg ] ) ; return _gfbg > 0 && ! _de . IsSpace ( _ffag ) ; } ; type fontEntry struct { _ddaa * _ce . PdfFont ; _addb int64 ; } ; func _gecae ( _effdd , _ceddb float64 ) bool { return _aa . Abs ( _effdd - _ceddb ) <= _ebbf } ;
// String returns a description of `w`.
func ( _beaf * textWord ) String ( ) string { return _ge . Sprintf ( "\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022" , _beaf . _cffdg , _beaf . PdfRectangle , _beaf . _ddgee , _beaf . _bbdb ) ;
} ; func _ddedf ( _agcg [ ] TextMark , _egec * TextTable ) [ ] TextMark { var _cbfg [ ] TextMark ; for _ , _dfgd := range _agcg { _dfgd . _ebd = true ; _dfgd . _afe = _egec ; _cbfg = append ( _cbfg , _dfgd ) ; } ; return _cbfg ; } ; type markKind int ; func ( _gegdd * textTable ) reduce ( ) * textTable { _eadc := make ( [ ] int , 0 , _gegdd . _dcfg ) ;
_dfbcf := make ( [ ] int , 0 , _gegdd . _ecbf ) ; for _ecbbd := 0 ; _ecbbd < _gegdd . _dcfg ; _ecbbd ++ { if ! _gegdd . emptyCompositeRow ( _ecbbd ) { _eadc = append ( _eadc , _ecbbd ) ; } ; } ; for _addg := 0 ; _addg < _gegdd . _ecbf ; _addg ++ { if ! _gegdd . emptyCompositeColumn ( _addg ) { _dfbcf = append ( _dfbcf , _addg ) ;
} ; } ; if len ( _eadc ) == _gegdd . _dcfg && len ( _dfbcf ) == _gegdd . _ecbf { return _gegdd ; } ; _ebba := textTable { _beaeg : _gegdd . _beaeg , _ecbf : len ( _dfbcf ) , _dcfg : len ( _eadc ) , _gcbga : make ( map [ uint64 ] * textPara , len ( _dfbcf ) * len ( _eadc ) ) } ; if _afcg { _ec . Log . Info ( "\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064" , _gegdd . _ecbf , _gegdd . _dcfg , len ( _dfbcf ) , len ( _eadc ) ) ;
_ec . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076" , _dfbcf ) ; _ec . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076" , _eadc ) ; } ; for _dgbea , _bggf := range _eadc { for _fgea , _ccbb := range _dfbcf { _fbcg , _egfed := _gegdd . getComposite ( _ccbb , _bggf ) ;
if _fbcg == nil { continue ; } ; if _afcg { _ge . Printf ( "\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n" , _fgea , _dgbea , _ccbb , _bggf , _adagc ( _fbcg . merge ( ) . text ( ) , 50 ) ) ; } ; _ebba . putComposite ( _fgea , _dgbea , _fbcg , _egfed ) ;
} ; } ; return & _ebba ; } ; func ( _bcbgb rulingList ) sortStrict ( ) { _c . Slice ( _bcbgb , func ( _ddcdg , _befg int ) bool { _bgee , _aeec := _bcbgb [ _ddcdg ] , _bcbgb [ _befg ] ; _dedba , _adgc := _bgee . _bfbc , _aeec . _bfbc ; if _dedba != _adgc { return _dedba > _adgc ; } ; _babe , _ccacf := _bgee . _abbgc , _aeec . _abbgc ;
if ! _ffegg ( _babe - _ccacf ) { return _babe < _ccacf ; } ; _babe , _ccacf = _bgee . _cebe , _aeec . _cebe ; if _babe != _ccacf { return _babe < _ccacf ; } ; return _bgee . _deee < _aeec . _deee ; } ) ; } ; func _ffegg ( _cfac float64 ) bool { return _aa . Abs ( _cfac ) < _fdac } ; func _bcfde ( _debfc * _ce . Image , _cgedf _fg . Color ) _ded . Image { _gfcga , _bgbac := int ( _debfc . Width ) , int ( _debfc . Height ) ;
_cgdde := _ded . NewRGBA ( _ded . Rect ( 0 , 0 , _gfcga , _bgbac ) ) ; for _ceebd := 0 ; _ceebd < _bgbac ; _ceebd ++ { for _aceee := 0 ; _aceee < _gfcga ; _aceee ++ { _efagc , _ggfa := _debfc . ColorAt ( _aceee , _ceebd ) ; if _ggfa != nil { _ec . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063o\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0072\u0065\u0074\u0072\u0069\u0065v\u0065 \u0069\u006d\u0061\u0067\u0065\u0020m\u0061\u0073\u006b\u0020\u0076\u0061\u006cu\u0065\u0020\u0061\u0074\u0020\u0028\u0025\u0064\u002c\u0020\u0025\u0064\u0029\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006da\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063t\u002e" , _aceee , _ceebd ) ;
continue ; } ; _bfdcg , _eagca , _dbgb , _ := _efagc . RGBA ( ) ; var _dgcg _fg . Color ; if _bfdcg + _eagca + _dbgb == 0 { _dgcg = _fg . Transparent ; } else { _dgcg = _cgedf ; } ; _cgdde . Set ( _aceee , _ceebd , _dgcg ) ; } ; } ; return _cgdde ; } ; type pathSection struct { _fbfe [ ] * subpath ;
_fg . Color ; } ; func ( _bcb * textObject ) setWordSpacing ( _eaag float64 ) { if _bcb == nil { return ; } ; _bcb . _befa . _eag = _eaag ; } ; func ( _ggb * stateStack ) size ( ) int { return len ( * _ggb ) } ; func ( _ddae * textObject ) showText ( _efa _gb . PdfObject , _ede [ ] byte , _baca int ) error { return _ddae . renderText ( _efa , _ede , _baca ) ;
} ; func _efaf ( _efag [ ] * textLine , _bcbba string ) string { var _gfcff _gd . Builder ; _gdgc := 0.0 ; for _ffab , _aggc := range _efag { _egag := _aggc . text ( ) ; _bdee := _aggc . _bcdg ; if _ffab < len ( _efag ) - 1 { _gdgc = _efag [ _ffab + 1 ] . _bcdg ; } else { _gdgc = 0.0 ; } ; _gfcff . WriteString ( _bcbba ) ;
_gfcff . WriteString ( _egag ) ; if _gdgc != _bdee { _gfcff . WriteString ( "\u000a" ) ; } else { _gfcff . WriteString ( "\u0020" ) ; } ; } ; return _gfcff . String ( ) ; } ; var _ff = false ; func _fcegc ( _ceab string , _dcgca [ ] rulingList ) { _ec . Log . Info ( "\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073" , len ( _dcgca ) , _ceab ) ;
for _ffdg , _afbe := range _dcgca { _ge . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _ffdg , _afbe . String ( ) ) ; } ; } ; func _bcgc ( _cege * textLine ) bool { _ccgf := true ; _cbgd := - 1 ; for _ , _gbeg := range _cege . _aebc { for _ , _gdgd := range _gbeg . _bgeaa { _dgefb := _gdgd . _fefe ;
if _cbgd == - 1 { _cbgd = _dgefb ; } else { if _cbgd != _dgefb { _ccgf = false ; break ; } ; } ; } ; } ; return _ccgf ; } ; func ( _gbd * wordBag ) blocked ( _febb * textWord ) bool { if _febb . Urx < _gbd . Llx { _facg := _dbga ( _febb . PdfRectangle ) ; _ddc := _bgec ( _gbd . PdfRectangle ) ;
if _gbd . _eeca . blocks ( _facg , _ddc ) { if _aecc { _ec . Log . Info ( "\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073" , _febb , _gbd ) ; } ; return true ; } ; } else if _gbd . Urx < _febb . Llx { _afba := _dbga ( _gbd . PdfRectangle ) ;
_bcag := _bgec ( _febb . PdfRectangle ) ; if _gbd . _eeca . blocks ( _afba , _bcag ) { if _aecc { _ec . Log . Info ( "b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s" , _febb , _gbd ) ; } ; return true ; } ; } ; if _febb . Ury < _gbd . Lly { _bbgfd := _edeff ( _febb . PdfRectangle ) ;
_fdbd := _fgee ( _gbd . PdfRectangle ) ; if _gbd . _gbef . blocks ( _bbgfd , _fdbd ) { if _aecc { _ec . Log . Info ( "\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073" , _febb , _gbd ) ; } ; return true ; } ; } else if _gbd . Ury < _febb . Lly { _cbaad := _edeff ( _gbd . PdfRectangle ) ;
_bbce := _fgee ( _febb . PdfRectangle ) ; if _gbd . _gbef . blocks ( _cbaad , _bbce ) { if _aecc { _ec . Log . Info ( "b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s" , _febb , _gbd ) ; } ; return true ; } ; } ; return false ; } ; func ( _ggda * wordBag ) maxDepth ( ) float64 { return _ggda . _cfg - _ggda . Lly } ;
func _agefd ( _cfcag _ce . PdfColorspace , _cadgd _ce . PdfColor ) _fg . Color { if _cfcag == nil || _cadgd == nil { return _fg . Black ; } ; _cadb , _acegb := _cfcag . ColorToRGB ( _cadgd ) ; if _acegb != nil { _ec . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073" , _cadgd , _cfcag , _acegb ) ;
return _fg . Black ; } ; _cbgf , _fafab := _cadb . ( * _ce . PdfColorDeviceRGB ) ; if ! _fafab { _ec . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076" , _cadb ) ;
return _fg . Black ; } ; return _fg . NRGBA { R : uint8 ( _cbgf . R ( ) * 255 ) , G : uint8 ( _cbgf . G ( ) * 255 ) , B : uint8 ( _cbgf . B ( ) * 255 ) , A : uint8 ( 255 ) } ; } ; func ( _faedf paraList ) yNeighbours ( _abfag float64 ) map [ * textPara ] [ ] int { _agag := make ( [ ] event , 2 * len ( _faedf ) ) ;
if _abfag == 0 { for _gdada , _cggb := range _faedf { _agag [ 2 * _gdada ] = event { _cggb . Lly , true , _gdada } ; _agag [ 2 * _gdada + 1 ] = event { _cggb . Ury , false , _gdada } ; } ; } else { for _caad , _eacfg := range _faedf { _agag [ 2 * _caad ] = event { _eacfg . Lly - _abfag * _eacfg . fontsize ( ) , true , _caad } ;
_agag [ 2 * _caad + 1 ] = event { _eacfg . Ury + _abfag * _eacfg . fontsize ( ) , false , _caad } ; } ; } ; return _faedf . eventNeighbours ( _agag ) ; } ; func _cfaf ( _gbeb [ ] TextMark , _adag * int , _accfb TextMark ) [ ] TextMark { _accfb . Offset = * _adag ; _gbeb = append ( _gbeb , _accfb ) ;
* _adag += len ( _accfb . Text ) ; return _gbeb ; } ; func ( _fcgad * textTable ) getRight ( ) paraList { _bgdc := make ( paraList , _fcgad . _dcfg ) ; for _faag := 0 ; _faag < _fcgad . _dcfg ; _faag ++ { _adgaf := _fcgad . get ( _fcgad . _ecbf - 1 , _faag ) . _gaca ; if _adgaf . taken ( ) { return nil ;
} ; _bgdc [ _faag ] = _adgaf ; } ; for _eaac := 0 ; _eaac < _fcgad . _dcfg - 1 ; _eaac ++ { if _bgdc [ _eaac ] . _fdgbd != _bgdc [ _eaac + 1 ] { return nil ; } ; } ; return _bgdc ; } ; func ( _gaefc * textPara ) isAtom ( ) * textTable { _fefeg := _gaefc ; _eefg := _gaefc . _gaca ; _daaa := _gaefc . _fdgbd ;
if _eefg . taken ( ) || _daaa . taken ( ) { return nil ; } ; _bdce := _eefg . _fdgbd ; if _bdce . taken ( ) || _bdce != _daaa . _gaca { return nil ; } ; return _eggg ( _fefeg , _eefg , _daaa , _bdce ) ; } ; func _ccef ( _bgcfe [ ] compositeCell ) [ ] float64 { var _ffcfb [ ] * textLine ; _ccbcc := 0 ;
for _ , _baga := range _bgcfe { _ccbcc += len ( _baga . paraList ) ; _ffcfb = append ( _ffcfb , _baga . lines ( ) ... ) ; } ; _c . Slice ( _ffcfb , func ( _dbac , _ecdd int ) bool { _egade , _ecfce := _ffcfb [ _dbac ] , _ffcfb [ _ecdd ] ; _adeg , _cgcea := _egade . _bcdg , _ecfce . _bcdg ; if ! _ffegg ( _adeg - _cgcea ) { return _adeg < _cgcea ;
} ; return _egade . Llx < _ecfce . Llx ; } ) ; if _afcg { _ge . Printf ( "\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a" , _ccbcc , len ( _ffcfb ) ) ;
for _egff , _cffad := range _ffcfb { _ge . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _egff , _cffad ) ; } ; } ; var _bbcbd [ ] float64 ; _edbd := _ffcfb [ 0 ] ; var _cdaed [ ] [ ] * textLine ; _ddgd := [ ] * textLine { _edbd } ; for _bcec , _deebg := range _ffcfb [ 1 : ] { if _deebg . Ury < _edbd . Lly { _afebd := 0.5 * ( _deebg . Ury + _edbd . Lly ) ;
if _afcg { _ge . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a" + "\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a" , _bcec , _deebg . Ury , _edbd . Lly , _afebd , _edbd , _deebg ) ;
} ; _bbcbd = append ( _bbcbd , _afebd ) ; _cdaed = append ( _cdaed , _ddgd ) ; _ddgd = nil ; } ; _ddgd = append ( _ddgd , _deebg ) ; if _deebg . Lly < _edbd . Lly { _edbd = _deebg ; } ; } ; if len ( _ddgd ) > 0 { _cdaed = append ( _cdaed , _ddgd ) ; } ; if _afcg { _ge . Printf ( " \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a" , _bbcbd ) ;
} ; if _afcg { _ec . Log . Info ( "\u0072\u006f\u0077\u003d\u0025\u0064" , len ( _bgcfe ) ) ; for _fecge , _dfdf := range _bgcfe { _ge . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _fecge , _dfdf ) ; } ; _ec . Log . Info ( "\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d" , len ( _cdaed ) ) ;
for _ebcgc , _fabge := range _cdaed { _ge . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a" , _ebcgc , len ( _fabge ) ) ; for _eacgf , _ffga := range _fabge { _ge . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _eacgf , _ffga ) ; } ; } ; } ; _ccaba := true ;
for _fcgab , _feeae := range _cdaed { _agab := true ; for _ecea , _adbad := range _bgcfe { if _afcg { _ge . Printf ( "\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a" , _fcgab , len ( _cdaed ) , _ecea , len ( _bgcfe ) , _adbad ) ;
} ; if ! _adbad . hasLines ( _feeae ) { if _afcg { _ge . Printf ( "\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a" , _fcgab , len ( _cdaed ) , _ecea , len ( _bgcfe ) ) ;
} ; _agab = false ; break ; } ; } ; if ! _agab { _ccaba = false ; break ; } ; } ; if ! _ccaba { if _afcg { _ec . Log . Info ( "\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg" ) ;
} ; _bbcbd = nil ; } ; if _afcg && _bbcbd != nil { _ge . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a" , _bbcbd ) ; } ; return _bbcbd ;
} ; func ( _aff * TextMarkArray ) getTextMarkAtOffset ( _cbaa int ) * TextMark { for _ , _cefb := range _aff . _dec { if _cefb . Offset == _cbaa { return & _cefb ; } ; } ; return nil ; } ;
2023-10-07 13:58:01 +00:00
2023-11-11 11:29:03 +00:00
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20> ).
2023-12-17 13:54:01 +00:00
func ( _fgg * Extractor ) ExtractText ( ) ( string , error ) { _badf , _ , _ , _gae := _fgg . ExtractTextWithStats ( ) ; return _badf , _gae ; } ; func ( _fbag compositeCell ) parasBBox ( ) ( paraList , _ce . PdfRectangle ) { return _fbag . paraList , _fbag . PdfRectangle ; } ; func _ddafc ( _bfffa , _fadcg _bc . Point ) bool { _adge := _aa . Abs ( _bfffa . X - _fadcg . X ) ;
_cdeb := _aa . Abs ( _bfffa . Y - _fadcg . Y ) ; return _beeee ( _adge , _cdeb ) ; } ; func ( _gcddc * textPara ) writeText ( _fecd _a . Writer ) { if _gcddc . _bddea == nil { _gcddc . writeCellText ( _fecd ) ; return ; } ; for _gcbf := 0 ; _gcbf < _gcddc . _bddea . _dcfg ; _gcbf ++ { for _dadcc := 0 ;
_dadcc < _gcddc . _bddea . _ecbf ; _dadcc ++ { _gfcg := _gcddc . _bddea . get ( _dadcc , _gcbf ) ; if _gfcg == nil { _fecd . Write ( [ ] byte ( "\u0009" ) ) ; } else { _gfcg . writeCellText ( _fecd ) ; } ; _fecd . Write ( [ ] byte ( "\u0020" ) ) ; } ; if _gcbf < _gcddc . _bddea . _dcfg - 1 { _fecd . Write ( [ ] byte ( "\u000a" ) ) ;
} ; } ; } ; func ( _dbbfc rulingList ) sort ( ) { _c . Slice ( _dbbfc , _dbbfc . comp ) } ; var ( _eeff = map [ rune ] string { 0x0060 : "\u0300" , 0x02CB : "\u0300" , 0x0027 : "\u0301" , 0x00B4 : "\u0301" , 0x02B9 : "\u0301" , 0x02CA : "\u0301" , 0x005E : "\u0302" , 0x02C6 : "\u0302" , 0x007E : "\u0303" , 0x02DC : "\u0303" , 0x00AF : "\u0304" , 0x02C9 : "\u0304" , 0x02D8 : "\u0306" , 0x02D9 : "\u0307" , 0x00A8 : "\u0308" , 0x00B0 : "\u030a" , 0x02DA : "\u030a" , 0x02BA : "\u030b" , 0x02DD : "\u030b" , 0x02C7 : "\u030c" , 0x02C8 : "\u030d" , 0x0022 : "\u030e" , 0x02BB : "\u0312" , 0x02BC : "\u0313" , 0x0486 : "\u0313" , 0x055A : "\u0313" , 0x02BD : "\u0314" , 0x0485 : "\u0314" , 0x0559 : "\u0314" , 0x02D4 : "\u031d" , 0x02D5 : "\u031e" , 0x02D6 : "\u031f" , 0x02D7 : "\u0320" , 0x02B2 : "\u0321" , 0x00B8 : "\u0327" , 0x02CC : "\u0329" , 0x02B7 : "\u032b" , 0x02CD : "\u0331" , 0x005F : "\u0332" , 0x204E : "\u0359" } ;
) ; func ( _cgeba * textTable ) emptyCompositeRow ( _acbg int ) bool { for _aegcb := 0 ; _aegcb < _cgeba . _ecbf ; _aegcb ++ { if _bdfe , _eaead := _cgeba . _egfe [ _aaca ( _aegcb , _acbg ) ] ; _eaead { if len ( _bdfe . paraList ) > 0 { return false ; } ; } ; } ; return true ; } ; func ( _dcg * imageExtractContext ) extractXObjectImage ( _cee * _gb . PdfObjectName , _faa _ag . GraphicsState , _fead * _ce . PdfPageResources ) error { _ebb , _ := _fead . GetXObjectByName ( * _cee ) ;
if _ebb == nil { return nil ; } ; _gdff , _bad := _dcg . _aabe [ _ebb ] ; if ! _bad { _ddb , _gcb := _fead . GetXObjectImageByName ( * _cee ) ; if _gcb != nil { return _gcb ; } ; if _ddb == nil { return nil ; } ; _gg , _gcb := _ddb . ToImage ( ) ; if _gcb != nil { return _gcb ; } ; var _agg _ded . Image ;
if _ddb . Mask != nil { if _agg , _gcb = _fddd ( _ddb . Mask , _fg . Opaque ) ; _gcb != nil { _ec . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a \u0063\u006f\u0075\u006c\u0064 \u006eo\u0074\u0020\u0067\u0065\u0074\u0020\u0065\u0078\u0070\u006c\u0069\u0063\u0069\u0074\u0020\u0069\u006d\u0061\u0067e\u0020\u006d\u0061\u0073\u006b\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074\u0020\u006d\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" ) ;
} ; } else if _ddb . SMask != nil { _agg , _gcb = _fbga ( _ddb . SMask , _fg . Opaque ) ; if _gcb != nil { _ec . Log . Debug ( "W\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u0075\u006c\u0064\u0020\u006e\u006f\u0074\u0020\u0067\u0065\u0074\u0020\u0073\u006f\u0066\u0074\u0020\u0069\u006da\u0067e\u0020\u006d\u0061\u0073k\u002e\u0020O\u0075\u0074\u0070\u0075\u0074\u0020\u006d\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063\u006f\u0072\u0072\u0065\u0063\u0074\u002e" ) ;
} ; } ; if _agg != nil { _dbb , _cag := _gg . ToGoImage ( ) ; if _cag != nil { return _cag ; } ; _dbb = _eaecc ( _dbb , _agg ) ; switch _ddb . ColorSpace . String ( ) { case "\u0044\u0065\u0076\u0069\u0063\u0065\u0047\u0072\u0061\u0079" , "\u0049n\u0064\u0065\u0078\u0065\u0064" : _gg , _cag = _ce . ImageHandling . NewGrayImageFromGoImage ( _dbb ) ;
if _cag != nil { return _cag ; } ; default : _gg , _cag = _ce . ImageHandling . NewImageFromGoImage ( _dbb ) ; if _cag != nil { return _cag ; } ; } ; } ; _gdff = & cachedImage { _aca : _gg , _abb : _ddb . ColorSpace } ; _dcg . _aabe [ _ebb ] = _gdff ; } ; _gag := _gdff . _aca ; _agc := _gdff . _abb ;
_ege , _dbc := _agc . ImageToRGB ( * _gag ) ; if _dbc != nil { return _dbc ; } ; _ec . Log . Debug ( "@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073" , _faa . CTM . String ( ) ) ; _gdbf := ImageMark { Image : & _ege , Width : _faa . CTM . ScalingFactorX ( ) , Height : _faa . CTM . ScalingFactorY ( ) , Angle : _faa . CTM . Angle ( ) } ;
_gdbf . X , _gdbf . Y = _faa . CTM . Translation ( ) ; _dcg . _gbf = append ( _dcg . _gbf , _gdbf ) ; _dcg . _fbc ++ ; return nil ; } ;
2023-10-07 13:58:01 +00:00
2023-12-17 13:54:01 +00:00
// String returns a description of `tm`.
func ( _ccgdg * textMark ) String ( ) string { return _ge . Sprintf ( "\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022" , _ccgdg . PdfRectangle , _ccgdg . _bfaca , _ccgdg . _ecaa ) ; } ; func _cgbe ( _dfce structElement ) [ ] structElement { _egbb := [ ] structElement { } ;
for _ , _edggc := range _dfce . _cagb { for _ , _fgbd := range _edggc . _cagb { for _ , _dffe := range _fgbd . _cagb { if _dffe . _bacdb == "\u004c" { _egbb = append ( _egbb , _dffe ) ; } ; } ; } ; } ; return _egbb ; } ; func ( _abddc paraList ) llyRange ( _cde [ ] int , _cefga , _cabdb float64 ) [ ] int { _beg := len ( _abddc ) ;
if _cabdb < _abddc [ _cde [ 0 ] ] . Lly || _cefga > _abddc [ _cde [ _beg - 1 ] ] . Lly { return nil ; } ; _dfaf := _c . Search ( _beg , func ( _bcaf int ) bool { return _abddc [ _cde [ _bcaf ] ] . Lly >= _cefga } ) ; _edeef := _c . Search ( _beg , func ( _afag int ) bool { return _abddc [ _cde [ _afag ] ] . Lly > _cabdb } ) ;
return _cde [ _dfaf : _edeef ] ; } ; func ( _edgce * textWord ) addDiacritic ( _eccfb string ) { _ccdfb := _edgce . _bgeaa [ len ( _edgce . _bgeaa ) - 1 ] ; _ccdfb . _ecaa += _eccfb ; _ccdfb . _ecaa = _ef . NFKC . String ( _ccdfb . _ecaa ) ; } ; func ( _ffgdd rulingList ) secMinMax ( ) ( float64 , float64 ) { _face , _cefag := _ffgdd [ 0 ] . _cebe , _ffgdd [ 0 ] . _deee ;
for _ , _ebdf := range _ffgdd [ 1 : ] { if _ebdf . _cebe < _face { _face = _ebdf . _cebe ; } ; if _ebdf . _deee > _cefag { _cefag = _ebdf . _deee ; } ; } ; return _face , _cefag ; } ; func ( _beggg paraList ) addNeighbours ( ) { _fcffc := func ( _agefc [ ] int , _ebbad * textPara ) ( [ ] * textPara , [ ] * textPara ) { _gfbd := make ( [ ] * textPara , 0 , len ( _agefc ) - 1 ) ;
_acfg := make ( [ ] * textPara , 0 , len ( _agefc ) - 1 ) ; for _ , _dgadg := range _agefc { _bfcb := _beggg [ _dgadg ] ; if _bfcb . Urx <= _ebbad . Llx { _gfbd = append ( _gfbd , _bfcb ) ; } else if _bfcb . Llx >= _ebbad . Urx { _acfg = append ( _acfg , _bfcb ) ; } ; } ; return _gfbd , _acfg ; } ;
_eccgb := func ( _cbfgbf [ ] int , _gcbeb * textPara ) ( [ ] * textPara , [ ] * textPara ) { _febg := make ( [ ] * textPara , 0 , len ( _cbfgbf ) - 1 ) ; _aegcf := make ( [ ] * textPara , 0 , len ( _cbfgbf ) - 1 ) ; for _ , _ccbaf := range _cbfgbf { _beaga := _beggg [ _ccbaf ] ; if _beaga . Ury <= _gcbeb . Lly { _aegcf = append ( _aegcf , _beaga ) ;
} else if _beaga . Lly >= _gcbeb . Ury { _febg = append ( _febg , _beaga ) ; } ; } ; return _febg , _aegcf ; } ; _eccf := _beggg . yNeighbours ( _gcga ) ; for _ , _afbg := range _beggg { _cgaf := _eccf [ _afbg ] ; if len ( _cgaf ) == 0 { continue ; } ; _bffb , _eaecb := _fcffc ( _cgaf , _afbg ) ;
if len ( _bffb ) == 0 && len ( _eaecb ) == 0 { continue ; } ; if len ( _bffb ) > 0 { _aafc := _bffb [ 0 ] ; for _ , _eccfa := range _bffb [ 1 : ] { if _eccfa . Urx >= _aafc . Urx { _aafc = _eccfa ; } ; } ; for _ , _cegab := range _bffb { if _cegab != _aafc && _cegab . Urx > _aafc . Llx { _aafc = nil ;
break ; } ; } ; if _aafc != nil && _fdg ( _afbg . PdfRectangle , _aafc . PdfRectangle ) { _afbg . _egab = _aafc ; } ; } ; if len ( _eaecb ) > 0 { _dgc := _eaecb [ 0 ] ; for _ , _gbbfd := range _eaecb [ 1 : ] { if _gbbfd . Llx <= _dgc . Llx { _dgc = _gbbfd ; } ; } ; for _ , _bccaf := range _eaecb { if _bccaf != _dgc && _bccaf . Llx < _dgc . Urx { _dgc = nil ;
break ; } ; } ; if _dgc != nil && _fdg ( _afbg . PdfRectangle , _dgc . PdfRectangle ) { _afbg . _gaca = _dgc ; } ; } ; } ; _eccf = _beggg . xNeighbours ( _gccfg ) ; for _ , _ffbgf := range _beggg { _gcad := _eccf [ _ffbgf ] ; if len ( _gcad ) == 0 { continue ; } ; _edbde , _adbae := _eccgb ( _gcad , _ffbgf ) ;
if len ( _edbde ) == 0 && len ( _adbae ) == 0 { continue ; } ; if len ( _adbae ) > 0 { _abeb := _adbae [ 0 ] ; for _ , _acbb := range _adbae [ 1 : ] { if _acbb . Ury >= _abeb . Ury { _abeb = _acbb ; } ; } ; for _ , _dcad := range _adbae { if _dcad != _abeb && _dcad . Ury > _abeb . Lly { _abeb = nil ;
break ; } ; } ; if _abeb != nil && _fcd ( _ffbgf . PdfRectangle , _abeb . PdfRectangle ) { _ffbgf . _fdgbd = _abeb ; } ; } ; if len ( _edbde ) > 0 { _bgad := _edbde [ 0 ] ; for _ , _abfgd := range _edbde [ 1 : ] { if _abfgd . Lly <= _bgad . Lly { _bgad = _abfgd ; } ; } ; for _ , _cdbf := range _edbde { if _cdbf != _bgad && _cdbf . Lly < _bgad . Ury { _bgad = nil ;
break ; } ; } ; if _bgad != nil && _fcd ( _ffbgf . PdfRectangle , _bgad . PdfRectangle ) { _ffbgf . _abdda = _bgad ; } ; } ; } ; for _ , _caaeb := range _beggg { if _caaeb . _egab != nil && _caaeb . _egab . _gaca != _caaeb { _caaeb . _egab = nil ; } ; if _caaeb . _abdda != nil && _caaeb . _abdda . _fdgbd != _caaeb { _caaeb . _abdda = nil ;
} ; if _caaeb . _gaca != nil && _caaeb . _gaca . _egab != _caaeb { _caaeb . _gaca = nil ; } ; if _caaeb . _fdgbd != nil && _caaeb . _fdgbd . _abdda != _caaeb { _caaeb . _fdgbd = nil ; } ; } ; } ; func ( _fdfd * wordBag ) depthIndexes ( ) [ ] int { if len ( _fdfd . _faba ) == 0 { return nil ; } ; _afeg := make ( [ ] int , len ( _fdfd . _faba ) ) ;
_fagd := 0 ; for _agfd := range _fdfd . _faba { _afeg [ _fagd ] = _agfd ; _fagd ++ ; } ; _c . Ints ( _afeg ) ; return _afeg ; } ; func ( _ddaac gridTiling ) complete ( ) bool { for _ , _ddcff := range _ddaac . _bage { for _ , _edcec := range _ddcff { if ! _edcec . complete ( ) { return false ;
} ; } ; } ; return true ; } ; func ( _fdd * textObject ) checkOp ( _fdbff * _ag . ContentStreamOperation , _efd int , _feb bool ) ( _efdb bool , _fdaa error ) { if _fdd == nil { var _eeab [ ] _gb . PdfObject ; if _efd > 0 { _eeab = _fdbff . Params ; if len ( _eeab ) > _efd { _eeab = _eeab [ : _efd ] ;
} ; } ; _ec . Log . Debug ( "\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076" , _fdbff . Operand , _eeab ) ; } ; if _efd >= 0 { if len ( _fdbff . Params ) != _efd { if _feb { _fdaa = _d . New ( "\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et" ) ;
} ; _ec . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076" , _fdbff . Operand , _efd , len ( _fdbff . Params ) , _fdbff . Params ) ;
return false , _fdaa ; } ; } ; return true , nil ; } ; func ( _aegdb rulingList ) blocks ( _fgec , _dfbfe * ruling ) bool { if _fgec . _cebe > _dfbfe . _deee || _dfbfe . _cebe > _fgec . _deee { return false ; } ; _dfbce := _aa . Max ( _fgec . _cebe , _dfbfe . _cebe ) ; _gdcfc := _aa . Min ( _fgec . _deee , _dfbfe . _deee ) ;
if _fgec . _abbgc > _dfbfe . _abbgc { _fgec , _dfbfe = _dfbfe , _fgec ; } ; for _ , _ffaga := range _aegdb { if _fgec . _abbgc <= _ffaga . _abbgc + _gcef && _ffaga . _abbgc <= _dfbfe . _abbgc + _gcef && _ffaga . _cebe <= _gdcfc && _dfbce <= _ffaga . _deee { return true ; } ; } ; return false ;
} ;
// New returns an Extractor instance for extracting content from the input PDF page.
func New ( page * _ce . PdfPage ) ( * Extractor , error ) { return NewWithOptions ( page , nil ) } ; func ( _gacff * textMark ) inDiacriticArea ( _bced * textMark ) bool { _beae := _gacff . Llx - _bced . Llx ; _gcff := _gacff . Urx - _bced . Urx ; _aade := _gacff . Lly - _bced . Lly ; return _aa . Abs ( _beae + _gcff ) < _gacff . Width ( ) * _geb && _aa . Abs ( _aade ) < _gacff . Height ( ) * _geb ;
} ; func ( _bec * textObject ) getFillColor ( ) _fg . Color { return _agefd ( _bec . _gbe . ColorspaceNonStroking , _bec . _gbe . ColorNonStroking ) ; } ; func ( _bgcc * wordBag ) highestWord ( _efbg int , _egfd , _cegf float64 ) * textWord { for _ , _bee := range _bgcc . _faba [ _efbg ] { if _egfd <= _bee . _cffdg && _bee . _cffdg <= _cegf { return _bee ;
} ; } ; return nil ; } ; type list struct { _ggdb [ ] * textLine ; _ffcg string ; _gbab [ ] * list ; _dage string ; } ; type textWord struct { _ce . PdfRectangle ; _cffdg float64 ; _bbdb string ; _bgeaa [ ] * textMark ; _ddgee float64 ; _gfffc bool ; } ; type gridTile struct { _ce . PdfRectangle ;
_fgde , _ebdg , _ddbaf , _ebga bool ; } ; func ( _aeadcd * ruling ) alignsPrimary ( _efgb * ruling ) bool { return _aeadcd . _bfbc == _efgb . _bfbc && _aa . Abs ( _aeadcd . _abbgc - _efgb . _abbgc ) < _gcef * 0.5 ; } ;
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
//
// Replace with a function like Extract() (*PageText, error)
func ( _ead * Extractor ) ExtractPageText ( ) ( * PageText , int , int , error ) { _gfef , _adc , _ebg , _bgb := _ead . extractPageText ( _ead . _bcf , _ead . _ab , _bc . IdentityMatrix ( ) , 0 ) ; if _bgb != nil && _bgb != _ce . ErrColorOutOfRange { return nil , 0 , 0 , _bgb ; } ; if _ead . _fgb != nil { _gfef . _ccg . _fcgfa = _ead . _fgb . UseSimplerExtractionProcess ;
} ; _gfef . computeViews ( ) ; _bgb = _cbda ( _gfef ) ; if _bgb != nil { return nil , 0 , 0 , _bgb ; } ; if _ead . _fgb != nil { if _ead . _fgb . ApplyCropBox && _ead . _ed != nil { _gfef . ApplyArea ( * _ead . _ed ) ; } ; _gfef . _ccg . _efab = _ead . _fgb . DisableDocumentTags ; } ; return _gfef , _adc , _ebg , nil ;
} ; func ( _ceda * textObject ) getFont ( _bce string ) ( * _ce . PdfFont , error ) { if _ceda . _bgcb . _cd != nil { _bacb , _bffc := _ceda . getFontDict ( _bce ) ; if _bffc != nil { _ec . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073" , _bce , _bffc . Error ( ) ) ;
return nil , _bffc ; } ; _ceda . _bgcb . _bg ++ ; _ffa , _caac := _ceda . _bgcb . _cd [ _bacb . String ( ) ] ; if _caac { _ffa . _addb = _ceda . _bgcb . _bg ; return _ffa . _ddaa , nil ; } ; } ; _ace , _afea := _ceda . getFontDict ( _bce ) ; if _afea != nil { return nil , _afea ; } ; _caab , _afea := _ceda . getFontDirect ( _bce ) ;
if _afea != nil { return nil , _afea ; } ; if _ceda . _bgcb . _cd != nil { _bfgc := fontEntry { _caab , _ceda . _bgcb . _bg } ; if len ( _ceda . _bgcb . _cd ) >= _gdgb { var _fgcf [ ] string ; for _fafb := range _ceda . _bgcb . _cd { _fgcf = append ( _fgcf , _fafb ) ; } ; _c . Slice ( _fgcf , func ( _cdgd , _gffb int ) bool { return _ceda . _bgcb . _cd [ _fgcf [ _cdgd ] ] . _addb < _ceda . _bgcb . _cd [ _fgcf [ _gffb ] ] . _addb ;
} ) ; delete ( _ceda . _bgcb . _cd , _fgcf [ 0 ] ) ; } ; _ceda . _bgcb . _cd [ _ace . String ( ) ] = _bfgc ; } ; return _caab , nil ; } ; func ( _bgbaa rulingList ) merge ( ) * ruling { _gdfdb := _bgbaa [ 0 ] . _abbgc ; _bbbb := _bgbaa [ 0 ] . _cebe ; _bfba := _bgbaa [ 0 ] . _deee ; for _ , _gbefc := range _bgbaa [ 1 : ] { _gdfdb += _gbefc . _abbgc ;
if _gbefc . _cebe < _bbbb { _bbbb = _gbefc . _cebe ; } ; if _gbefc . _deee > _bfba { _bfba = _gbefc . _deee ; } ; } ; _dggdg := & ruling { _bfbc : _bgbaa [ 0 ] . _bfbc , _bgaa : _bgbaa [ 0 ] . _bgaa , Color : _bgbaa [ 0 ] . Color , _abbgc : _gdfdb / float64 ( len ( _bgbaa ) ) , _cebe : _bbbb , _deee : _bfba } ;
if _fcfe { _ec . Log . Info ( "\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073" , len ( _bgbaa ) , _dggdg ) ; for _fcad , _geegc := range _bgbaa { _ge . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _fcad , _geegc ) ;
} ; } ; return _dggdg ; } ; func ( _acca * textPara ) taken ( ) bool { return _acca == nil || _acca . _bfcd } ; func ( _fdfe rulingList ) splitSec ( ) [ ] rulingList { _c . Slice ( _fdfe , func ( _geffd , _bgca int ) bool { _egdg , _bbge := _fdfe [ _geffd ] , _fdfe [ _bgca ] ; if _egdg . _cebe != _bbge . _cebe { return _egdg . _cebe < _bbge . _cebe ;
} ; return _egdg . _deee < _bbge . _deee ; } ) ; _dffeb := make ( map [ * ruling ] struct { } , len ( _fdfe ) ) ; _gabed := func ( _fbcfb * ruling ) rulingList { _fgbbd := rulingList { _fbcfb } ; _dffeb [ _fbcfb ] = struct { } { } ; for _ , _decc := range _fdfe { if _ , _debe := _dffeb [ _decc ] ; _debe { continue ;
} ; for _ , _afgaa := range _fgbbd { if _decc . alignsSec ( _afgaa ) { _fgbbd = append ( _fgbbd , _decc ) ; _dffeb [ _decc ] = struct { } { } ; break ; } ; } ; } ; return _fgbbd ; } ; _fdgff := [ ] rulingList { _gabed ( _fdfe [ 0 ] ) } ; for _ , _feede := range _fdfe [ 1 : ] { if _ , _gead := _dffeb [ _feede ] ;
_gead { continue ; } ; _fdgff = append ( _fdgff , _gabed ( _feede ) ) ; } ; return _fdgff ; } ; func ( _agbd * ruling ) alignsSec ( _cgdae * ruling ) bool { const _ecce = _gcef + 1.0 ; return _agbd . _cebe - _ecce <= _cgdae . _deee && _cgdae . _cebe - _ecce <= _agbd . _deee ; } ; func ( _dfge * wordBag ) removeWord ( _cdaeb * textWord , _effe int ) { _eeac := _dfge . _faba [ _effe ] ;
_eeac = _beff ( _eeac , _cdaeb ) ; if len ( _eeac ) == 0 { delete ( _dfge . _faba , _effe ) ; } else { _dfge . _faba [ _effe ] = _eeac ; } ; } ; func ( _fbgg * textTable ) reduceTiling ( _aadf gridTiling , _egabb float64 ) * textTable { _cbcg := make ( [ ] int , 0 , _fbgg . _dcfg ) ; _cdgee := make ( [ ] int , 0 , _fbgg . _ecbf ) ;
_dadfb := _aadf . _cgecb ; _gbed := _aadf . _agbb ; for _ecfg := 0 ; _ecfg < _fbgg . _dcfg ; _ecfg ++ { _dgbbf := _ecfg > 0 && _aa . Abs ( _gbed [ _ecfg - 1 ] - _gbed [ _ecfg ] ) < _egabb && _fbgg . emptyCompositeRow ( _ecfg ) ; if ! _dgbbf { _cbcg = append ( _cbcg , _ecfg ) ; } ; } ; for _fcdfa := 0 ;
_fcdfa < _fbgg . _ecbf ; _fcdfa ++ { _bace := _fcdfa < _fbgg . _ecbf - 1 && _aa . Abs ( _dadfb [ _fcdfa + 1 ] - _dadfb [ _fcdfa ] ) < _egabb && _fbgg . emptyCompositeColumn ( _fcdfa ) ; if ! _bace { _cdgee = append ( _cdgee , _fcdfa ) ; } ; } ; if len ( _cbcg ) == _fbgg . _dcfg && len ( _cdgee ) == _fbgg . _ecbf { return _fbgg ;
} ; _fefeb := textTable { _beaeg : _fbgg . _beaeg , _ecbf : len ( _cdgee ) , _dcfg : len ( _cbcg ) , _egfe : make ( map [ uint64 ] compositeCell , len ( _cdgee ) * len ( _cbcg ) ) } ; if _afcg { _ec . Log . Info ( "\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064" , _fbgg . _ecbf , _fbgg . _dcfg , len ( _cdgee ) , len ( _cbcg ) ) ;
_ec . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076" , _cdgee ) ; _ec . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076" , _cbcg ) ; } ; for _egeca , _aabgd := range _cbcg { for _gceaa , _ddgfb := range _cdgee { _aafdd , _fbcfa := _fbgg . getComposite ( _ddgfb , _aabgd ) ;
if len ( _aafdd ) == 0 { continue ; } ; if _afcg { _ge . Printf ( "\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n" , _gceaa , _egeca , _ddgfb , _aabgd , _adagc ( _aafdd . merge ( ) . text ( ) , 50 ) ) ; } ; _fefeb . putComposite ( _gceaa , _egeca , _aafdd , _fbcfa ) ;
} ; } ; return & _fefeb ; } ; func ( _dfcfg * wordBag ) text ( ) string { _ccbfg := _dfcfg . allWords ( ) ; _fbg := make ( [ ] string , len ( _ccbfg ) ) ; for _fcfg , _fcge := range _ccbfg { _fbg [ _fcfg ] = _fcge . _bbdb ; } ; return _gd . Join ( _fbg , "\u0020" ) ; } ; func _abdg ( _dced * wordBag , _cdfcc * textWord , _gbcaa float64 ) bool { return _cdfcc . Llx < _dced . Urx + _gbcaa && _dced . Llx - _gbcaa < _cdfcc . Urx ;
} ;
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
func ( _daea * TextMarkArray ) BBox ( ) ( _ce . PdfRectangle , bool ) { var _afgc _ce . PdfRectangle ; _gabb := false ; for _ , _ccfd := range _daea . _dec { if _ccfd . Meta || _ecga ( _ccfd . Text ) { continue ; } ; if _gabb { _afgc = _cdggc ( _afgc , _ccfd . BBox ) ; } else { _afgc = _ccfd . BBox ;
_gabb = true ; } ; } ; return _afgc , _gabb ; } ; func _ccaa ( _bafg * list , _ddcf * _gd . Builder , _babg * string ) { _fbde := _eabf ( _bafg , _babg ) ; _ddcf . WriteString ( _fbde ) ; for _ , _edbag := range _bafg . _gbab { _gccg := * _babg + "\u0020\u0020\u0020" ; _ccaa ( _edbag , _ddcf , & _gccg ) ;
} ; } ; func _ecga ( _bdadd string ) bool { for _ , _bgdfg := range _bdadd { if ! _de . IsSpace ( _bgdfg ) { return false ; } ; } ; return true ; } ; func ( _gfbb * textLine ) bbox ( ) _ce . PdfRectangle { return _gfbb . PdfRectangle } ; func _gdfab ( _bfcg [ ] structElement , _ebagg map [ int ] [ ] * textLine , _bgbce _gb . PdfObject ) [ ] * list { _geggf := [ ] * list { } ;
for _ , _afga := range _bfcg { _ggab := _afga . _cagb ; _eafc := int ( _afga . _efcc ) ; _eagcd := _afga . _bacdb ; _eeadd := [ ] * textLine { } ; _aegc := [ ] * list { } ; _ffccc := _afga . _geaa ; _gbdd , _daeab := ( _ffccc . ( * _gb . PdfObjectReference ) ) ; if ! _daeab { _ec . Log . Debug ( "\u0066\u0061\u0069l\u0065\u0064\u0020\u006f\u0074\u0020\u0063\u0061\u0073\u0074\u0020\u0074\u006f\u0020\u002a\u0063\u006f\u0072\u0065\u002e\u0050\u0064\u0066\u004f\u0062\u006a\u0065\u0063\u0074R\u0065\u0066\u0065\u0072\u0065\u006e\u0063\u0065" ) ;
} ; if _eafc != - 1 && _gbdd != nil { if _cbgg , _gcbcg := _ebagg [ _eafc ] ; _gcbcg { if _eaca , _fcfb := _bgbce . ( * _gb . PdfIndirectObject ) ; _fcfb { _dbad := _eaca . PdfObjectReference ; if _gc . DeepEqual ( * _gbdd , _dbad ) { _eeadd = _cbgg ; } ; } ; } ; } ; if _ggab != nil { _aegc = _gdfab ( _ggab , _ebagg , _bgbce ) ;
} ; _gcag := _baba ( _eeadd , _eagcd , _aegc ) ; _geggf = append ( _geggf , _gcag ) ; } ; return _geggf ; } ;
// Text gets the extracted text contained in `l`.
func ( _ccdc * list ) Text ( ) string { _cfee := & _gd . Builder { } ; _ggee := "" ; _ccaa ( _ccdc , _cfee , & _ggee ) ; return _cfee . String ( ) ; } ; type event struct { _gcaf float64 ; _cgcbd bool ; _bffcfb int ; } ;
// String returns a description of `p`.
func ( _bcbd * textPara ) String ( ) string { if _bcbd . _bedf { return _ge . Sprintf ( "\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d" , _bcbd . PdfRectangle ) ; } ; _cedd := "" ; if _bcbd . _bddea != nil { _cedd = _ge . Sprintf ( "\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020" , _bcbd . _bddea . _ecbf , _bcbd . _bddea . _dcfg ) ;
} ; return _ge . Sprintf ( "\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071" , _bcbd . PdfRectangle , _cedd , len ( _bcbd . _bdbcg ) , _adagc ( _bcbd . text ( ) , 50 ) ) ; } ; func ( _dcag * textMark ) bbox ( ) _ce . PdfRectangle { return _dcag . PdfRectangle } ;
// GetContentStreamOps returns the contentStreamOps field of `pt`.
func ( _bdab * PageText ) GetContentStreamOps ( ) * _ag . ContentStreamOperations { return _bdab . _ecfe } ; type stateStack [ ] * textState ; func _dgfb ( _dgda _bc . Point ) _bc . Matrix { return _bc . TranslationMatrix ( _dgda . X , _dgda . Y ) } ; var _efe = TextMark { Text : "\u005b\u0058\u005d" , Original : "\u0020" , Meta : true , FillColor : _fg . White , StrokeColor : _fg . White } ;
func ( _eeaa gridTile ) contains ( _ecef _ce . PdfRectangle ) bool { if _eeaa . numBorders ( ) < 3 { return false ; } ; if _eeaa . _ebdg && _ecef . Llx < _eeaa . Llx - _fdgb { return false ; } ; if _eeaa . _ebga && _ecef . Urx > _eeaa . Urx + _fdgb { return false ; } ; if _eeaa . _ddbaf && _ecef . Lly < _eeaa . Lly - _fdgb { return false ;
} ; if _eeaa . _fgde && _ecef . Ury > _eeaa . Ury + _fdgb { return false ; } ; return true ; } ; func ( _bcbdg * wordBag ) removeDuplicates ( ) { if _fbfc { _ec . Log . Info ( "r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071" , _bcbdg . text ( ) ) ;
} ; for _ , _eagcb := range _bcbdg . depthIndexes ( ) { if len ( _bcbdg . _faba [ _eagcb ] ) == 0 { continue ; } ; _bgfcfe := _bcbdg . _faba [ _eagcb ] [ 0 ] ; _dccdd := _bada * _bgfcfe . _ddgee ; _ecfd := _bgfcfe . _cffdg ; for _ , _aaecb := range _bcbdg . depthBand ( _ecfd , _ecfd + _dccdd ) { _cddde := map [ * textWord ] struct { } { } ;
_cgbee := _bcbdg . _faba [ _aaecb ] ; for _ , _aacd := range _cgbee { if _ , _gcae := _cddde [ _aacd ] ; _gcae { continue ; } ; for _ , _daeac := range _cgbee { if _ , _fbedag := _cddde [ _daeac ] ; _fbedag { continue ; } ; if _daeac != _aacd && _daeac . _bbdb == _aacd . _bbdb && _aa . Abs ( _daeac . Llx - _aacd . Llx ) < _dccdd && _aa . Abs ( _daeac . Urx - _aacd . Urx ) < _dccdd && _aa . Abs ( _daeac . Lly - _aacd . Lly ) < _dccdd && _aa . Abs ( _daeac . Ury - _aacd . Ury ) < _dccdd { _cddde [ _daeac ] = struct { } { } ;
} ; } ; } ; if len ( _cddde ) > 0 { _accfe := 0 ; for _ , _gdbbg := range _cgbee { if _ , _gdge := _cddde [ _gdbbg ] ; ! _gdge { _cgbee [ _accfe ] = _gdbbg ; _accfe ++ ; } ; } ; _bcbdg . _faba [ _aaecb ] = _cgbee [ : len ( _cgbee ) - len ( _cddde ) ] ; if len ( _bcbdg . _faba [ _aaecb ] ) == 0 { delete ( _bcbdg . _faba , _aaecb ) ;
} ; } ; } ; } ; } ; func ( _ddge * textObject ) moveLP ( _efgd , _badcf float64 ) { _ddge . _ecg . Concat ( _bc . NewMatrix ( 1 , 0 , 0 , 1 , _efgd , _badcf ) ) ; _ddge . _eee = _ddge . _ecg ; } ;
// String returns a description of `l`.
func ( _gdcef * textLine ) String ( ) string { return _ge . Sprintf ( "\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022" , _gdcef . _bcdg , _gdcef . PdfRectangle , _gdcef . _ecag , _gdcef . text ( ) ) ;
} ; func ( _dfee compositeCell ) split ( _cbd , _ggffa [ ] float64 ) * textTable { _fcgc := len ( _cbd ) + 1 ; _bffcg := len ( _ggffa ) + 1 ; if _afcg { _ec . Log . Info ( "\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a" + "\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066" , _bffcg , _fcgc , _dfee , _cbd , _ggffa ) ;
_ge . Printf ( "\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a" , len ( _dfee . paraList ) ) ; for _deff , _beage := range _dfee . paraList { _ge . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _deff , _beage . String ( ) ) ; } ;
_ge . Printf ( "\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a" , len ( _dfee . lines ( ) ) ) ; for _egac , _ddcd := range _dfee . lines ( ) { _ge . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _egac , _ddcd ) ; } ; } ; _cbd = _ggba ( _cbd , _dfee . Ury , _dfee . Lly ) ;
_ggffa = _ggba ( _ggffa , _dfee . Llx , _dfee . Urx ) ; _gbee := make ( map [ uint64 ] * textPara , _bffcg * _fcgc ) ; _dfeb := textTable { _ecbf : _bffcg , _dcfg : _fcgc , _gcbga : _gbee } ; _ddbb := _dfee . paraList ; _c . Slice ( _ddbb , func ( _cfdb , _fafae int ) bool { _dggg , _adcga := _ddbb [ _cfdb ] , _ddbb [ _fafae ] ;
_ecgd , _cgbdg := _dggg . Lly , _adcga . Lly ; if _ecgd != _cgbdg { return _ecgd < _cgbdg ; } ; return _dggg . Llx < _adcga . Llx ; } ) ; _abbc := make ( map [ uint64 ] _ce . PdfRectangle , _bffcg * _fcgc ) ; for _efaa , _cccd := range _cbd [ 1 : ] { _ccfgc := _cbd [ _efaa ] ; for _gagg , _ffdd := range _ggffa [ 1 : ] { _ggef := _ggffa [ _gagg ] ;
_abbc [ _aaca ( _gagg , _efaa ) ] = _ce . PdfRectangle { Llx : _ggef , Urx : _ffdd , Lly : _cccd , Ury : _ccfgc } ; } ; } ; if _afcg { _ec . Log . Info ( "\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073" ) ;
_ge . Printf ( "\u0020\u0020\u0020\u0020" ) ; for _fgbb := 0 ; _fgbb < _bffcg ; _fgbb ++ { _ge . Printf ( "\u0025\u0033\u0030\u0064\u002c\u0020" , _fgbb ) ; } ; _ge . Println ( ) ; for _cdef := 0 ; _cdef < _fcgc ; _cdef ++ { _ge . Printf ( "\u0020\u0020\u0025\u0032\u0064\u003a" , _cdef ) ;
for _ebbe := 0 ; _ebbe < _bffcg ; _ebbe ++ { _ge . Printf ( "\u00256\u002e\u0032\u0066\u002c\u0020" , _abbc [ _aaca ( _ebbe , _cdef ) ] ) ; } ; _ge . Println ( ) ; } ; } ; _dbf := func ( _aecfd * textLine ) ( int , int ) { for _ddgc := 0 ; _ddgc < _fcgc ; _ddgc ++ { for _abdcd := 0 ; _abdcd < _bffcg ;
_abdcd ++ { if _fafg ( _abbc [ _aaca ( _abdcd , _ddgc ) ] , _aecfd . PdfRectangle ) { return _abdcd , _ddgc ; } ; } ; } ; return - 1 , - 1 ; } ; _eadg := make ( map [ uint64 ] [ ] * textLine , _bffcg * _fcgc ) ; for _ , _adfa := range _ddbb . lines ( ) { _cdgab , _ccde := _dbf ( _adfa ) ; if _cdgab < 0 { continue ;
} ; _eadg [ _aaca ( _cdgab , _ccde ) ] = append ( _eadg [ _aaca ( _cdgab , _ccde ) ] , _adfa ) ; } ; for _dgfg := 0 ; _dgfg < len ( _cbd ) - 1 ; _dgfg ++ { _gffa := _cbd [ _dgfg ] ; _adgg := _cbd [ _dgfg + 1 ] ; for _fggc := 0 ; _fggc < len ( _ggffa ) - 1 ; _fggc ++ { _ebcc := _ggffa [ _fggc ] ; _dbdb := _ggffa [ _fggc + 1 ] ;
_eggc := _ce . PdfRectangle { Llx : _ebcc , Urx : _dbdb , Lly : _adgg , Ury : _gffa } ; _afaa := _eadg [ _aaca ( _fggc , _dgfg ) ] ; if len ( _afaa ) == 0 { continue ; } ; _eaded := _ffec ( _eggc , _afaa ) ; _dfeb . put ( _fggc , _dgfg , _eaded ) ; } ; } ; return & _dfeb ; } ; func _fbda ( _aeeg [ ] _gb . PdfObject ) ( _ffgeg , _fcbga float64 , _acgec error ) { if len ( _aeeg ) != 2 { return 0 , 0 , _ge . Errorf ( "\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064" , len ( _aeeg ) ) ;
} ; _fdecb , _acgec := _gb . GetNumbersAsFloat ( _aeeg ) ; if _acgec != nil { return 0 , 0 , _acgec ; } ; return _fdecb [ 0 ] , _fdecb [ 1 ] , nil ; } ; type subpath struct { _gfefe [ ] _bc . Point ; _cgde bool ; } ; func ( _bedc * textTable ) newTablePara ( ) * textPara { _fbfeg := _bedc . computeBbox ( ) ;
_cadab := & textPara { PdfRectangle : _fbfeg , _ebcf : _fbfeg , _bddea : _bedc } ; if _afcg { _ec . Log . Info ( "\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073" , _cadab ) ; } ; return _cadab ; } ; func ( _ebbc * textPara ) fontsize ( ) float64 { return _ebbc . _bdbcg [ 0 ] . _ecag } ;
func ( _adfe * stateStack ) top ( ) * textState { if _adfe . empty ( ) { return nil ; } ; return ( * _adfe ) [ _adfe . size ( ) - 1 ] ; } ; func ( _cbbf * stateStack ) pop ( ) * textState { if _cbbf . empty ( ) { return nil ; } ; _aefc := * ( * _cbbf ) [ len ( * _cbbf ) - 1 ] ; * _cbbf = ( * _cbbf ) [ : len ( * _cbbf ) - 1 ] ;
return & _aefc ; } ; func _beff ( _cafb [ ] * textWord , _fefbf * textWord ) [ ] * textWord { for _eadcd , _gdaga := range _cafb { if _gdaga == _fefbf { return _ddbf ( _cafb , _eadcd ) ; } ; } ; _ec . Log . Error ( "\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073" , _fefbf ) ;
return nil ; } ; const _adee = 20 ; func ( _dcee * wordBag ) empty ( _ggaf int ) bool { _ , _cdgb := _dcee . _faba [ _ggaf ] ; return ! _cdgb } ; type rulingList [ ] * ruling ; func ( _decfg paraList ) applyTables ( _gece [ ] * textTable ) paraList { var _ebfff paraList ; for _ , _bcgcf := range _gece { _ebfff = append ( _ebfff , _bcgcf . newTablePara ( ) ) ;
} ; for _ , _bede := range _decfg { if _bede . _bfcd { continue ; } ; _ebfff = append ( _ebfff , _bede ) ; } ; return _ebfff ; } ; func _fabg ( _fggd , _cbee bounded ) float64 { return _fggd . bbox ( ) . Llx - _cbee . bbox ( ) . Llx } ; func _eaecc ( _gcggc , _dgea _ded . Image ) _ded . Image { _bdgee , _ffdb := _dgea . Bounds ( ) . Size ( ) , _gcggc . Bounds ( ) . Size ( ) ;
_cbgbb , _gfaec := _bdgee . X , _bdgee . Y ; if _ffdb . X > _cbgbb { _cbgbb = _ffdb . X ; } ; if _ffdb . Y > _gfaec { _gfaec = _ffdb . Y ; } ; _ffad := _ded . Rect ( 0 , 0 , _cbgbb , _gfaec ) ; if _bdgee . X != _cbgbb || _bdgee . Y != _gfaec { _gcfc := _ded . NewRGBA ( _ffad ) ; _b . BiLinear . Scale ( _gcfc , _ffad , _gcggc , _dgea . Bounds ( ) , _b . Over , nil ) ;
_dgea = _gcfc ; } ; if _ffdb . X != _cbgbb || _ffdb . Y != _gfaec { _bgfab := _ded . NewRGBA ( _ffad ) ; _b . BiLinear . Scale ( _bgfab , _ffad , _gcggc , _gcggc . Bounds ( ) , _b . Over , nil ) ; _gcggc = _bgfab ; } ; _dgdf := _ded . NewRGBA ( _ffad ) ; _b . DrawMask ( _dgdf , _ffad , _gcggc , _ded . Point { } , _dgea , _ded . Point { } , _b . Over ) ;
return _dgdf ; } ;
// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents ( contents string , resources * _ce . PdfPageResources ) ( * Extractor , error ) { const _abf = "\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s" ; _ac := & Extractor { _bcf : contents , _ab : resources , _cd : map [ string ] fontEntry { } , _fa : map [ string ] textResult { } } ;
_bd . TrackUse ( _abf ) ; return _ac , nil ; } ; type ruling struct { _bfbc rulingKind ; _bgaa markKind ; _fg . Color ; _abbgc float64 ; _cebe float64 ; _deee float64 ; _bcgdf float64 ; } ; func ( _bgfc * textObject ) reset ( ) { _bgfc . _eee = _bc . IdentityMatrix ( ) ; _bgfc . _ecg = _bc . IdentityMatrix ( ) ;
_bgfc . _cfb = nil ; } ; func ( _cebb paraList ) extractTables ( _bafge [ ] gridTiling ) paraList { if _afcg { _ec . Log . Debug ( "\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _cebb ) ) ;
} ; if len ( _cebb ) < _ddd { return _cebb ; } ; _deeee := _cebb . findTables ( _bafge ) ; if _afcg { _ec . Log . Info ( "c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d" , len ( _deeee ) ) ;
for _dbeba , _dcbfg := range _deeee { _dcbfg . log ( _ge . Sprintf ( "c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064" , _dbeba ) ) ; } ; } ; return _cebb . applyTables ( _deeee ) ; } ; func _edege ( _eccbb map [ float64 ] map [ float64 ] gridTile ) [ ] float64 { _fege := make ( [ ] float64 , 0 , len ( _eccbb ) ) ;
_gaagc := make ( map [ float64 ] struct { } , len ( _eccbb ) ) ; for _ , _dffda := range _eccbb { for _aceb := range _dffda { if _ , _ggabg := _gaagc [ _aceb ] ; _ggabg { continue ; } ; _fege = append ( _fege , _aceb ) ; _gaagc [ _aceb ] = struct { } { } ; } ; } ; _c . Float64s ( _fege ) ; return _fege ;
} ; func _dcbd ( _egaga , _fagcg _bc . Point ) bool { return _egaga . X == _fagcg . X && _egaga . Y == _fagcg . Y } ; func _cad ( _adef , _dcgg _ce . PdfRectangle ) ( _ce . PdfRectangle , bool ) { if ! _decgb ( _adef , _dcgg ) { return _ce . PdfRectangle { } , false ; } ; return _ce . PdfRectangle { Llx : _aa . Max ( _adef . Llx , _dcgg . Llx ) , Urx : _aa . Min ( _adef . Urx , _dcgg . Urx ) , Lly : _aa . Max ( _adef . Lly , _dcgg . Lly ) , Ury : _aa . Min ( _adef . Ury , _dcgg . Ury ) } , true ;
} ; func _fdg ( _gfaf , _bdbb _ce . PdfRectangle ) bool { return _gfaf . Lly <= _bdbb . Ury && _bdbb . Lly <= _gfaf . Ury ; } ; func ( _fcea * textTable ) getComposite ( _cgcba , _cdcbg int ) ( paraList , _ce . PdfRectangle ) { _gggd , _cedf := _fcea . _egfe [ _aaca ( _cgcba , _cdcbg ) ] ;
if _afcg { _ge . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a" , _cgcba , _cdcbg , _gggd . String ( ) ) ; } ; if ! _cedf { return nil , _ce . PdfRectangle { } ;
} ; return _gggd . parasBBox ( ) ; } ; func ( _bcaa * textTable ) markCells ( ) { for _bcae := 0 ; _bcae < _bcaa . _dcfg ; _bcae ++ { for _badfcb := 0 ; _badfcb < _bcaa . _ecbf ; _badfcb ++ { _aece := _bcaa . get ( _badfcb , _bcae ) ; if _aece != nil { _aece . _bfcd = true ; } ; } ; } ; } ; func ( _gcea paraList ) llyOrdering ( ) [ ] int { _debf := make ( [ ] int , len ( _gcea ) ) ;
for _bfaec := range _gcea { _debf [ _bfaec ] = _bfaec ; } ; _c . SliceStable ( _debf , func ( _ddaf , _beag int ) bool { _bcbge , _ceaeg := _debf [ _ddaf ] , _debf [ _beag ] ; return _gcea [ _bcbge ] . Lly < _gcea [ _ceaeg ] . Lly ; } ) ; return _debf ; } ; func ( _ebcfd * textTable ) depth ( ) float64 { _cfgef := 1e10 ;
for _dcgaa := 0 ; _dcgaa < _ebcfd . _ecbf ; _dcgaa ++ { _cdcef := _ebcfd . get ( _dcgaa , 0 ) ; if _cdcef == nil || _cdcef . _bedf { continue ; } ; _cfgef = _aa . Min ( _cfgef , _cdcef . depth ( ) ) ; } ; return _cfgef ; } ; func _afaae ( _cecfe map [ int ] intSet ) [ ] int { _febbf := make ( [ ] int , 0 , len ( _cecfe ) ) ;
for _fbfcb := range _cecfe { _febbf = append ( _febbf , _fbfcb ) ; } ; _c . Ints ( _febbf ) ; return _febbf ; } ; func _bbea ( _gcbgg * textWord , _dbeb float64 , _cagf , _bbeg rulingList ) * wordBag { _aagd := _fece ( _gcbgg . _cffdg ) ; _gdbfb := [ ] * textWord { _gcbgg } ; _feee := wordBag { _faba : map [ int ] [ ] * textWord { _aagd : _gdbfb } , PdfRectangle : _gcbgg . PdfRectangle , _egfa : _gcbgg . _ddgee , _cfg : _dbeb , _eeca : _cagf , _gbef : _bbeg } ;
return & _feee ; } ; func ( _ccdge * structElement ) parseStructElement ( _eead _gb . PdfObject ) { _fdad , _fafe := _gb . GetDict ( _eead ) ; if ! _fafe { _ec . Log . Debug ( "\u0070\u0061\u0072\u0073\u0065\u0053\u0074\u0072u\u0063\u0074\u0045le\u006d\u0065\u006e\u0074\u003a\u0020d\u0069\u0063\u0074\u0069\u006f\u006e\u0061\u0072\u0079\u0020\u006f\u0062\u006a\u0065\u0063t\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075n\u0064\u002e" ) ;
return ; } ; _aecde := _fdad . Get ( "\u0053" ) ; _gedc := _fdad . Get ( "\u0050\u0067" ) ; _dfbc := "" ; if _aecde != nil { _dfbc = _aecde . String ( ) ; } ; _gbda := _fdad . Get ( "\u004b" ) ; _ccdge . _bacdb = _dfbc ; _ccdge . _geaa = _gedc ; switch _dedb := _gbda . ( type ) { case * _gb . PdfObjectInteger : _ccdge . _bacdb = _dfbc ;
_ccdge . _efcc = int64 ( * _dedb ) ; _ccdge . _geaa = _gedc ; case * _gb . PdfObjectReference : _aebf := * _gb . MakeArray ( _dedb ) ; var _gdcc int64 = - 1 ; _ccdge . _efcc = _gdcc ; if _aebf . Len ( ) == 1 { _abe := _aebf . Elements ( ) [ 0 ] ; _acec , _ceega := _abe . ( * _gb . PdfObjectInteger ) ;
if _ceega { _gdcc = int64 ( * _acec ) ; _ccdge . _efcc = _gdcc ; _ccdge . _bacdb = _dfbc ; _ccdge . _geaa = _gedc ; return ; } ; } ; _aegd := [ ] structElement { } ; for _ , _fgaa := range _aebf . Elements ( ) { _fdee , _fdaaf := _fgaa . ( * _gb . PdfObjectInteger ) ; if _fdaaf { _gdcc = int64 ( * _fdee ) ;
_ccdge . _efcc = _gdcc ; _ccdge . _bacdb = _dfbc ; } else { _dddg := & structElement { } ; _dddg . parseStructElement ( _fgaa ) ; _aegd = append ( _aegd , * _dddg ) ; } ; _gdcc = - 1 ; } ; _ccdge . _cagb = _aegd ; case * _gb . PdfObjectArray : _gdcab := _gbda . ( * _gb . PdfObjectArray ) ; var _baef int64 = - 1 ;
_ccdge . _efcc = _baef ; if _gdcab . Len ( ) == 1 { _acdd := _gdcab . Elements ( ) [ 0 ] ; _afac , _gbbgd := _acdd . ( * _gb . PdfObjectInteger ) ; if _gbbgd { _baef = int64 ( * _afac ) ; _ccdge . _efcc = _baef ; _ccdge . _bacdb = _dfbc ; _ccdge . _geaa = _gedc ; return ; } ; } ; _dcfc := [ ] structElement { } ;
for _ , _gbcb := range _gdcab . Elements ( ) { _effg , _caaa := _gbcb . ( * _gb . PdfObjectInteger ) ; if _caaa { _baef = int64 ( * _effg ) ; _ccdge . _efcc = _baef ; _ccdge . _bacdb = _dfbc ; _ccdge . _geaa = _gedc ; } else { _fbdd := & structElement { } ; _fbdd . parseStructElement ( _gbcb ) ;
_dcfc = append ( _dcfc , * _fbdd ) ; } ; _baef = - 1 ; } ; _ccdge . _cagb = _dcfc ; } ; } ; func ( _cdgbf * structTreeRoot ) buildList ( _cbfb map [ int ] [ ] * textLine , _gcge _gb . PdfObject ) [ ] * list { if _cdgbf == nil { _ec . Log . Debug ( "\u0062\u0075\u0069\u006c\u0064\u004c\u0069\u0073\u0074\u003a\u0020t\u0072\u0065\u0065\u0052\u006f\u006f\u0074\u0020\u0069\u0073 \u006e\u0069\u006c" ) ;
return nil ; } ; var _eabc * structElement ; _dadc := [ ] structElement { } ; if len ( _cdgbf . _dgbb ) == 1 { _efae := _cdgbf . _dgbb [ 0 ] . _bacdb ; if _efae == "\u0044\u006f\u0063\u0075\u006d\u0065\u006e\u0074" || _efae == "\u0053\u0065\u0063\u0074" || _efae == "\u0050\u0061\u0072\u0074" || _efae == "\u0044\u0069\u0076" || _efae == "\u0041\u0072\u0074" { _eabc = & _cdgbf . _dgbb [ 0 ] ;
} ; } else { _eabc = & structElement { _cagb : _cdgbf . _dgbb , _bacdb : _cdgbf . _fdec } ; } ; if _eabc == nil { _ec . Log . Debug ( "\u0062\u0075\u0069\u006cd\u004c\u0069\u0073\u0074\u003a\u0020\u0074\u006f\u0070\u0045l\u0065m\u0065\u006e\u0074\u0020\u0069\u0073\u0020n\u0069\u006c" ) ;
return nil ; } ; for _ , _aead := range _eabc . _cagb { if _aead . _bacdb == "\u004c" { _dadc = append ( _dadc , _aead ) ; } else if _aead . _bacdb == "\u0054\u0061\u0062l\u0065" { _dcegf := _cgbe ( _aead ) ; _dadc = append ( _dadc , _dcegf ... ) ; } ; } ; _adgfc := _gdfab ( _dadc , _cbfb , _gcge ) ;
var _ebebe [ ] * list ; for _ , _eegf := range _adgfc { _bfcc := _beecb ( _eegf ) ; _ebebe = append ( _ebebe , _bfcc ... ) ; } ; return _ebebe ; } ; func ( _adeea TextTable ) getCellInfo ( _gcaa TextMark ) [ ] [ ] int { for _eadbb , _ebec := range _adeea . Cells { for _edb , _agca := range _ebec { _deg := & _agca . Marks ;
if _deg . exists ( _gcaa ) { return [ ] [ ] int { { _eadbb } , { _edb } } ; } ; } ; } ; return nil ; } ; func ( _bgba paraList ) lines ( ) [ ] * textLine { var _dafgg [ ] * textLine ; for _ , _abffg := range _bgba { _dafgg = append ( _dafgg , _abffg . _bdbcg ... ) ; } ; return _dafgg ; } ; func ( _bcef * shapesState ) closePath ( ) { if _bcef . _edee { _bcef . _abgb = append ( _bcef . _abgb , _fae ( _bcef . _ebfb ) ) ;
_bcef . _edee = false ; } else if len ( _bcef . _abgb ) == 0 { if _cbag { _ec . Log . Debug ( "\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068" ) ; } ; _bcef . _edee = false ; return ; } ; _bcef . _abgb [ len ( _bcef . _abgb ) - 1 ] . close ( ) ;
if _cbag { _ec . Log . Info ( "\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073" , _bcef ) ; } ; } ; func ( _dcge * stateStack ) push ( _adb * textState ) { _ffbg := * _adb ; * _dcge = append ( * _dcge , & _ffbg ) } ; func _bddf ( _efgea [ ] * wordBag ) [ ] * wordBag { if len ( _efgea ) <= 1 { return _efgea ;
} ; if _egd { _ec . Log . Info ( "\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a" ) ; } ; _c . Slice ( _efgea , func ( _ebda , _eebb int ) bool { _bcade , _bfdf := _efgea [ _ebda ] , _efgea [ _eebb ] ; _aacc := _bcade . Width ( ) * _bcade . Height ( ) ;
_bcdcf := _bfdf . Width ( ) * _bfdf . Height ( ) ; if _aacc != _bcdcf { return _aacc > _bcdcf ; } ; if _bcade . Height ( ) != _bfdf . Height ( ) { return _bcade . Height ( ) > _bfdf . Height ( ) ; } ; return _ebda < _eebb ; } ) ; var _bgfb [ ] * wordBag ; _cefad := make ( intSet ) ; for _cabd := 0 ;
_cabd < len ( _efgea ) ; _cabd ++ { if _cefad . has ( _cabd ) { continue ; } ; _bfdc := _efgea [ _cabd ] ; for _bfgg := _cabd + 1 ; _bfgg < len ( _efgea ) ; _bfgg ++ { if _cefad . has ( _cabd ) { continue ; } ; _ffg := _efgea [ _bfgg ] ; _bcgd := _bfdc . PdfRectangle ; _bcgd . Llx -= _bfdc . _egfa ;
if _fafg ( _bcgd , _ffg . PdfRectangle ) { _bfdc . absorb ( _ffg ) ; _cefad . add ( _bfgg ) ; } ; } ; _bgfb = append ( _bgfb , _bfdc ) ; } ; if len ( _efgea ) != len ( _bgfb ) + len ( _cefad ) { _ec . Log . Error ( "\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064" , len ( _efgea ) , len ( _bgfb ) , len ( _cefad ) ) ;
} ; return _bgfb ; } ; func ( _cceb * textPara ) getListLines ( ) [ ] * textLine { var _gegb [ ] * textLine ; _cefg := _eccb ( _cceb . _bdbcg ) ; for _ , _cbeb := range _cceb . _bdbcg { _gcba := _cbeb . _aebc [ 0 ] . _bbdb [ 0 ] ; if _fdfa ( _gcba ) { _gegb = append ( _gegb , _cbeb ) ; } ; } ; _gegb = append ( _gegb , _cefg ... ) ;
return _gegb ; } ; func ( _abdd * PageText ) computeViews ( ) { _gefc := _abdd . getParagraphs ( ) ; _ceeec := new ( _df . Buffer ) ; _gefc . writeText ( _ceeec ) ; _abdd . _gcee = _ceeec . String ( ) ; _abdd . _ebfc = _gefc . toTextMarks ( ) ; _abdd . _eadb = _gefc . tables ( ) ; if _afcg { _ec . Log . Info ( "\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064" , len ( _abdd . _eadb ) ) ;
} ; } ; func _bdged ( _fcaed [ ] pathSection ) { if _dgeec < 0.0 { return ; } ; if _gfgc { _ec . Log . Info ( "\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073" , len ( _fcaed ) ) ;
} ; for _cgdbd , _cacg := range _fcaed { for _bdcb , _befab := range _cacg . _fbfe { for _agfde , _efee := range _befab . _gfefe { _befab . _gfefe [ _agfde ] = _bc . Point { X : _eacf ( _efee . X ) , Y : _eacf ( _efee . Y ) } ; if _gfgc { _ddgfa := _befab . _gfefe [ _agfde ] ; if ! _dcbd ( _efee , _ddgfa ) { _ffgb := _bc . Point { X : _ddgfa . X - _efee . X , Y : _ddgfa . Y - _efee . Y } ;
_ge . Printf ( "\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a" , _cgdbd , _bdcb , _agfde , _efee , _ddgfa , _ffgb ) ; } ; } ; } ; } ; } ; } ; func ( _cedaf * textTable ) putComposite ( _fgggd , _egadc int , _fabfg paraList , _ggagg _ce . PdfRectangle ) { if len ( _fabfg ) == 0 { _ec . Log . Error ( "\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073" ) ;
return ; } ; _gbgd := compositeCell { PdfRectangle : _ggagg , paraList : _fabfg } ; if _afcg { _ge . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a" , _fgggd , _egadc , _gbgd . String ( ) ) ;
} ; _gbgd . updateBBox ( ) ; _cedaf . _egfe [ _aaca ( _fgggd , _egadc ) ] = _gbgd ; } ; func ( _eeb * shapesState ) addPoint ( _cgee , _bcad float64 ) { _edgc := _eeb . establishSubpath ( ) ; _cbac := _eeb . devicePoint ( _cgee , _bcad ) ; if _edgc == nil { _eeb . _edee = true ; _eeb . _ebfb = _cbac ;
} else { _edgc . add ( _cbac ) ; } ; } ;
2023-05-29 17:26:33 +00:00
2023-09-07 17:40:17 +00:00
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
2023-12-17 13:54:01 +00:00
type RenderMode int ; type bounded interface { bbox ( ) _ce . PdfRectangle } ; type textState struct { _ggf float64 ; _eag float64 ; _gdc float64 ; _dcc float64 ; _ccf float64 ; _bbd RenderMode ; _gegg float64 ; _badfc * _ce . PdfFont ; _dab _ce . PdfRectangle ; _baf int ; _afgb int ;
} ; func ( _dgdd * wordBag ) firstWord ( _bcdc int ) * textWord { return _dgdd . _faba [ _bcdc ] [ 0 ] } ; func _accf ( _ebca * textLine ) float64 { return _ebca . _aebc [ 0 ] . Llx } ; func _gdgbc ( _ccgb , _feff bounded ) float64 { return _ccgb . bbox ( ) . Llx - _feff . bbox ( ) . Urx } ;
2023-10-07 13:58:01 +00:00
2023-12-17 13:54:01 +00:00
// String returns a description of `state`.
func ( _gcbg * textState ) String ( ) string { _bdge := "\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]" ; if _gcbg . _badfc != nil { _bdge = _gcbg . _badfc . BaseFont ( ) ; } ; return _ge . Sprintf ( "\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071" , _gcbg . _ggf , _gcbg . _eag , _gcbg . _ccf , _bdge ) ;
} ;
2023-10-07 13:58:01 +00:00
2023-12-17 13:54:01 +00:00
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
type PageImages struct { Images [ ] ImageMark ; } ;
2023-10-07 13:58:01 +00:00
2023-12-17 13:54:01 +00:00
// String returns a human readable description of `s`.
func ( _bfed intSet ) String ( ) string { var _affd [ ] int ; for _eafdb := range _bfed { if _bfed . has ( _eafdb ) { _affd = append ( _affd , _eafdb ) ; } ; } ; _c . Ints ( _affd ) ; return _ge . Sprintf ( "\u0025\u002b\u0076" , _affd ) ; } ; func ( _eeae * textLine ) text ( ) string { var _daf [ ] string ;
for _ , _gafdb := range _eeae . _aebc { if _gafdb . _gfffc { _daf = append ( _daf , "\u0020" ) ; } ; _daf = append ( _daf , _gafdb . _bbdb ) ; } ; return _gd . Join ( _daf , "" ) ; } ; func _efg ( _cca [ ] Font , _db string ) bool { for _ , _gdd := range _cca { if _gdd . FontName == _db { return true ;
} ; } ; return false ; } ; func _fgee ( _gfdad _ce . PdfRectangle ) * ruling { return & ruling { _bfbc : _ecac , _abbgc : _gfdad . Lly , _cebe : _gfdad . Llx , _deee : _gfdad . Urx } ; } ; func ( _dgdcb * textTable ) bbox ( ) _ce . PdfRectangle { return _dgdcb . PdfRectangle } ; func ( _dcbad rulingList ) comp ( _aaeca , _fbgb int ) bool { _eccgd , _feec := _dcbad [ _aaeca ] , _dcbad [ _fbgb ] ;
_cecgc , _fddc := _eccgd . _bfbc , _feec . _bfbc ; if _cecgc != _fddc { return _cecgc > _fddc ; } ; if _cecgc == _gdcf { return false ; } ; _bdadf := func ( _adfc bool ) bool { if _cecgc == _ecac { return _adfc ; } ; return ! _adfc ; } ; _egaf , _daee := _eccgd . _abbgc , _feec . _abbgc ;
if _egaf != _daee { return _bdadf ( _egaf > _daee ) ; } ; _egaf , _daee = _eccgd . _cebe , _feec . _cebe ; if _egaf != _daee { return _bdadf ( _egaf < _daee ) ; } ; return _bdadf ( _eccgd . _deee < _feec . _deee ) ; } ; func ( _cafda * textWord ) absorb ( _bbbda * textWord ) { _cafda . PdfRectangle = _cdggc ( _cafda . PdfRectangle , _bbbda . PdfRectangle ) ;
_cafda . _bgeaa = append ( _cafda . _bgeaa , _bbbda . _bgeaa ... ) ; } ; var _dbgc = [ ] string { "\u2756" , "\u27a2" , "\u2713" , "\u2022" , "\uf0a7" , "\u25a1" , "\u2212" , "\u25a0" , "\u25aa" , "\u006f" } ; func ( _decg * wordBag ) absorb ( _eceg * wordBag ) { _gfbf := _eceg . makeRemovals ( ) ;
for _cabc , _bfeb := range _eceg . _faba { for _ , _dece := range _bfeb { _decg . pullWord ( _dece , _cabc , _gfbf ) ; } ; } ; _eceg . applyRemovals ( _gfbf ) ; } ; func _adagc ( _agaae string , _agacd int ) string { if len ( _agaae ) < _agacd { return _agaae ; } ; return _agaae [ : _agacd ] ;
} ; func ( _ddbeb rectRuling ) asRuling ( ) ( * ruling , bool ) { _bbda := ruling { _bfbc : _ddbeb . _dfec , Color : _ddbeb . Color , _bgaa : _eecbc } ; switch _ddbeb . _dfec { case _ebdaf : _bbda . _abbgc = 0.5 * ( _ddbeb . Llx + _ddbeb . Urx ) ; _bbda . _cebe = _ddbeb . Lly ; _bbda . _deee = _ddbeb . Ury ;
_acae , _beda := _ddbeb . checkWidth ( _ddbeb . Llx , _ddbeb . Urx ) ; if ! _beda { if _feceb { _ec . Log . Error ( "\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076" , _ddbeb ) ;
} ; return nil , false ; } ; _bbda . _bcgdf = _acae ; case _ecac : _bbda . _abbgc = 0.5 * ( _ddbeb . Lly + _ddbeb . Ury ) ; _bbda . _cebe = _ddbeb . Llx ; _bbda . _deee = _ddbeb . Urx ; _eagg , _abca := _ddbeb . checkWidth ( _ddbeb . Lly , _ddbeb . Ury ) ; if ! _abca { if _feceb { _ec . Log . Error ( "\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076" , _ddbeb ) ;
} ; return nil , false ; } ; _bbda . _bcgdf = _eagg ; default : _ec . Log . Error ( "\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064" , _ddbeb . _dfec ) ; return nil , false ; } ; return & _bbda , true ; } ; func ( _fag * TextMarkArray ) exists ( _cagc TextMark ) bool { for _ , _dbca := range _fag . Elements ( ) { if _gc . DeepEqual ( _cagc . DirectObject , _dbca . DirectObject ) && _gc . DeepEqual ( _cagc . BBox , _dbca . BBox ) && _dbca . Text == _cagc . Text { return true ;
} ; } ; return false ; } ; var _befc string = "\u005e\u005b\u0061\u002d\u007a\u0041\u002dZ\u005d\u0028\u005c)\u007c\u005c\u002e)\u007c\u005e[\u005c\u0064\u005d\u002b\u0028\u005c)\u007c\\.\u0029\u007c\u005e\u005c\u0028\u005b\u0061\u002d\u007a\u0041\u002d\u005a\u005d\u005c\u0029\u007c\u005e\u005c\u0028\u005b\u005c\u0064\u005d\u002b\u005c\u0029" ;
func ( _fffc * textLine ) appendWord ( _gaag * textWord ) { _fffc . _aebc = append ( _fffc . _aebc , _gaag ) ; _fffc . PdfRectangle = _cdggc ( _fffc . PdfRectangle , _gaag . PdfRectangle ) ; if _gaag . _ddgee > _fffc . _ecag { _fffc . _ecag = _gaag . _ddgee ; } ; if _gaag . _cffdg > _fffc . _bcdg { _fffc . _bcdg = _gaag . _cffdg ;
} ; } ; func _fdab ( _afdb [ ] pathSection ) rulingList { _bdged ( _afdb ) ; if _gfgc { _ec . Log . Info ( "\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs" , len ( _afdb ) ) ; } ; var _gbfde rulingList ;
for _ , _bedged := range _afdb { for _ , _aggbf := range _bedged . _fbfe { if ! _aggbf . isQuadrilateral ( ) { if _gfgc { _ec . Log . Error ( "!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073" , _aggbf ) ; } ; continue ; } ; if _ebff , _gbgc := _aggbf . makeRectRuling ( _bedged . Color ) ;
_gbgc { _gbfde = append ( _gbfde , _ebff ) ; } else { if _feceb { _ec . Log . Error ( "\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073" , _aggbf ) ; } ; } ; } ; } ; if _gfgc { _ec . Log . Info ( "\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073" , _gbfde . String ( ) ) ;
} ; return _gbfde ; } ; func _eeacg ( _adefa , _dccb * textPara ) bool { if _adefa . _bedf || _dccb . _bedf { return true ; } ; return _ffegg ( _adefa . depth ( ) - _dccb . depth ( ) ) ; } ; func _cbcf ( _dbef , _ggcb bounded ) float64 { _cfeb := _agce ( _dbef , _ggcb ) ; if ! _ffegg ( _cfeb ) { return _cfeb ;
} ; return _fabg ( _dbef , _ggcb ) ; } ;
2023-10-07 13:58:01 +00:00
2023-12-17 13:54:01 +00:00
// String returns a string describing `ma`.
func ( _cbba TextMarkArray ) String ( ) string { _cgcf := len ( _cbba . _dec ) ; if _cgcf == 0 { return "\u0045\u004d\u0050T\u0059" ; } ; _egbdd := _cbba . _dec [ 0 ] ; _cgdac := _cbba . _dec [ _cgcf - 1 ] ; return _ge . Sprintf ( "\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d" , _cgcf , _egbdd , _cgdac ) ;
} ; func _ddbf ( _bbag [ ] * textWord , _ababe int ) [ ] * textWord { _bgcfc := len ( _bbag ) ; copy ( _bbag [ _ababe : ] , _bbag [ _ababe + 1 : ] ) ; return _bbag [ : _bgcfc - 1 ] ; } ; type cachedImage struct { _aca * _ce . Image ; _abb _ce . PdfColorspace ; } ; func _cgbf ( _eecd * wordBag , _cddg float64 , _cfebc , _dbbg rulingList ) [ ] * wordBag { var _abbec [ ] * wordBag ;
for _ , _faff := range _eecd . depthIndexes ( ) { _ecdbg := false ; for ! _eecd . empty ( _faff ) { _bcca := _eecd . firstReadingIndex ( _faff ) ; _aeee := _eecd . firstWord ( _bcca ) ; _fge := _bbea ( _aeee , _cddg , _cfebc , _dbbg ) ; _eecd . removeWord ( _aeee , _bcca ) ; if _becc { _ec . Log . Info ( "\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073" , _aeee . String ( ) ) ;
} ; for _gddc := true ; _gddc ; _gddc = _ecdbg { _ecdbg = false ; _dfacb := _gceeb * _fge . _egfa ; _fdeeb := _ccab * _fge . _egfa ; _bgbed := _cfebd * _fge . _egfa ; if _becc { _ec . Log . Info ( "\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066" , _fge . minDepth ( ) , _fge . maxDepth ( ) , _bgbed , _fdeeb ) ;
} ; if _eecd . scanBand ( "\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c" , _fge , _ffef ( _abdg , 0 ) , _fge . minDepth ( ) - _bgbed , _fge . maxDepth ( ) + _bgbed , _bdgd , false , false ) > 0 { _ecdbg = true ; } ; if _eecd . scanBand ( "\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c" , _fge , _ffef ( _abdg , _fdeeb ) , _fge . minDepth ( ) , _fge . maxDepth ( ) , _gfff , false , false ) > 0 { _ecdbg = true ;
} ; if _ecdbg { continue ; } ; _eaea := _eecd . scanBand ( "" , _fge , _ffef ( _cedg , _dfacb ) , _fge . minDepth ( ) , _fge . maxDepth ( ) , _baae , true , false ) ; if _eaea > 0 { _aedd := ( _fge . maxDepth ( ) - _fge . minDepth ( ) ) / _fge . _egfa ; if ( _eaea > 1 && float64 ( _eaea ) > 0.3 * _aedd ) || _eaea <= 10 { if _eecd . scanBand ( "\u006f\u0074\u0068e\u0072" , _fge , _ffef ( _cedg , _dfacb ) , _fge . minDepth ( ) , _fge . maxDepth ( ) , _baae , false , true ) > 0 { _ecdbg = true ;
} ; } ; } ; } ; _abbec = append ( _abbec , _fge ) ; } ; } ; return _abbec ; } ; func _ffec ( _afcgbe _ce . PdfRectangle , _ddea [ ] * textLine ) * textPara { return & textPara { PdfRectangle : _afcgbe , _bdbcg : _ddea } ; } ; func ( _edbc * textWord ) appendMark ( _edaf * textMark , _aecac _ce . PdfRectangle ) { _edbc . _bgeaa = append ( _edbc . _bgeaa , _edaf ) ;
_edbc . PdfRectangle = _cdggc ( _edbc . PdfRectangle , _edaf . PdfRectangle ) ; if _edaf . _bfaca > _edbc . _ddgee { _edbc . _ddgee = _edaf . _bfaca ; } ; _edbc . _cffdg = _aecac . Ury - _edbc . PdfRectangle . Lly ; } ; func ( _gfa * subpath ) clear ( ) { * _gfa = subpath { } } ; func _fdfa ( _adgb byte ) bool { for _ , _abddd := range _dbgc { if [ ] byte ( _abddd ) [ 0 ] == _adgb { return true ;
} ; } ; return false ; } ; func _fecgf ( _cbca [ ] * textMark , _daef _ce . PdfRectangle ) [ ] * textWord { var _gafbf [ ] * textWord ; var _facgf * textWord ; if _cfgg { _ec . Log . Info ( "\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073" , len ( _cbca ) ) ;
} ; _bacba := func ( ) { if _facgf != nil { _dgbd := _facgf . computeText ( ) ; if ! _ecga ( _dgbd ) { _facgf . _bbdb = _dgbd ; _gafbf = append ( _gafbf , _facgf ) ; if _cfgg { _ec . Log . Info ( "\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073" , len ( _gafbf ) - 1 , _facgf . String ( ) ) ;
for _egded , _ccfgcd := range _facgf . _bgeaa { _ge . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _egded , _ccfgcd . String ( ) ) ; } ; } ; } ; _facgf = nil ; } ; } ; for _ , _dcbc := range _cbca { if _efbf && _facgf != nil && len ( _facgf . _bgeaa ) > 0 { _eced := _facgf . _bgeaa [ len ( _facgf . _bgeaa ) - 1 ] ;
_gdfc , _fcca := _fbcfg ( _dcbc . _ecaa ) ; _bcdeb , _agefe := _fbcfg ( _eced . _ecaa ) ; if _fcca && ! _agefe && _eced . inDiacriticArea ( _dcbc ) { _facgf . addDiacritic ( _gdfc ) ; continue ; } ; if _agefe && ! _fcca && _dcbc . inDiacriticArea ( _eced ) { _facgf . _bgeaa = _facgf . _bgeaa [ : len ( _facgf . _bgeaa ) - 1 ] ;
_facgf . appendMark ( _dcbc , _daef ) ; _facgf . addDiacritic ( _bcdeb ) ; continue ; } ; } ; _agcgb := _ecga ( _dcbc . _ecaa ) ; if _agcgb { _bacba ( ) ; continue ; } ; if _facgf == nil && ! _agcgb { _facgf = _bddac ( [ ] * textMark { _dcbc } , _daef ) ; continue ; } ; _dbefd := _facgf . _ddgee ;
_beefe := _aa . Abs ( _dgdc ( _daef , _dcbc ) - _facgf . _cffdg ) / _dbefd ; _dgga := _gdgbc ( _dcbc , _facgf ) / _dbefd ; if _dgga >= _edd || ! ( - _dfac <= _dgga && _beefe <= _bagb ) { _bacba ( ) ; _facgf = _bddac ( [ ] * textMark { _dcbc } , _daef ) ; continue ; } ; _facgf . appendMark ( _dcbc , _daef ) ;
} ; _bacba ( ) ; return _gafbf ; } ; func ( _cedb * textTable ) computeBbox ( ) _ce . PdfRectangle { var _edeb _ce . PdfRectangle ; _dbbcf := false ; for _fegea := 0 ; _fegea < _cedb . _dcfg ; _fegea ++ { for _ccbc := 0 ; _ccbc < _cedb . _ecbf ; _ccbc ++ { _ffeab := _cedb . get ( _ccbc , _fegea ) ;
if _ffeab == nil { continue ; } ; if ! _dbbcf { _edeb = _ffeab . PdfRectangle ; _dbbcf = true ; } else { _edeb = _cdggc ( _edeb , _ffeab . PdfRectangle ) ; } ; } ; } ; return _edeb ; } ;
2023-10-07 13:58:01 +00:00
2023-12-17 13:54:01 +00:00
// String returns a description of `k`.
func ( _dgaa markKind ) String ( ) string { _ggbe , _cdceg := _fdgf [ _dgaa ] ; if ! _cdceg { return _ge . Sprintf ( "\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064" , _dgaa ) ; } ; return _ggbe ; } ; func ( _ddaea paraList ) reorder ( _dfbd [ ] int ) { _efed := make ( paraList , len ( _ddaea ) ) ;
for _gbegb , _afec := range _dfbd { _efed [ _gbegb ] = _ddaea [ _afec ] ; } ; copy ( _ddaea , _efed ) ; } ; type lists [ ] * list ; func ( _cgb * textObject ) getFontDirect ( _ddba string ) ( * _ce . PdfFont , error ) { _abc , _deb := _cgb . getFontDict ( _ddba ) ; if _deb != nil { return nil , _deb ;
} ; _caae , _deb := _ce . NewPdfFontFromPdfObject ( _abc ) ; if _deb != nil { _ec . Log . Debug ( "\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ddba , _deb ) ;
} ; return _caae , _deb ; } ; func _cdggc ( _eaed , _bcff _ce . PdfRectangle ) _ce . PdfRectangle { return _ce . PdfRectangle { Llx : _aa . Min ( _eaed . Llx , _bcff . Llx ) , Lly : _aa . Min ( _eaed . Lly , _bcff . Lly ) , Urx : _aa . Max ( _eaed . Urx , _bcff . Urx ) , Ury : _aa . Max ( _eaed . Ury , _bcff . Ury ) } ;
} ; type compositeCell struct { _ce . PdfRectangle ; paraList ; } ; func ( _ffccf paraList ) sortReadingOrder ( ) { _ec . Log . Trace ( "\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _ffccf ) ) ;
if len ( _ffccf ) <= 1 { return ; } ; _ffccf . computeEBBoxes ( ) ; _c . Slice ( _ffccf , func ( _cgdd , _cfag int ) bool { return _cbcf ( _ffccf [ _cgdd ] , _ffccf [ _cfag ] ) <= 0 } ) ; } ;
// String returns a string describing `pt`.
func ( _gacc PageText ) String ( ) string { _bbfc := _ge . Sprintf ( "P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073" , len ( _gacc . _fcag ) ) ; _cdae := [ ] string { "\u002d" + _bbfc } ; for _ , _acc := range _gacc . _fcag { _cdae = append ( _cdae , _acc . String ( ) ) ;
} ; _cdae = append ( _cdae , "\u002b" + _bbfc ) ; return _gd . Join ( _cdae , "\u000a" ) ; } ; func ( _fgdc paraList ) computeEBBoxes ( ) { if _fad { _ec . Log . Info ( "\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a" ) ; } ; for _ , _bffe := range _fgdc { _bffe . _ebcf = _bffe . PdfRectangle ;
} ; _fbede := _fgdc . yNeighbours ( 0 ) ; for _gbebc , _dfcd := range _fgdc { _dcfb := _dfcd . _ebcf ; _gaeeg , _fdefd := - 1.0e9 , + 1.0e9 ; for _ , _ddfce := range _fbede [ _dfcd ] { _dafde := _fgdc [ _ddfce ] . _ebcf ; if _dafde . Urx < _dcfb . Llx { _gaeeg = _aa . Max ( _gaeeg , _dafde . Urx ) ;
} else if _dcfb . Urx < _dafde . Llx { _fdefd = _aa . Min ( _fdefd , _dafde . Llx ) ; } ; } ; for _cgbea , _efgad := range _fgdc { _afef := _efgad . _ebcf ; if _gbebc == _cgbea || _afef . Ury > _dcfb . Lly { continue ; } ; if _gaeeg <= _afef . Llx && _afef . Llx < _dcfb . Llx { _dcfb . Llx = _afef . Llx ;
} else if _afef . Urx <= _fdefd && _dcfb . Urx < _afef . Urx { _dcfb . Urx = _afef . Urx ; } ; } ; if _fad { _ge . Printf ( "\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a" , _gbebc , _dfcd . _ebcf , _dcfb , _adagc ( _dfcd . text ( ) , 50 ) ) ;
} ; _dfcd . _ebcf = _dcfb ; } ; if _dbde { for _ , _abbb := range _fgdc { _abbb . PdfRectangle = _abbb . _ebcf ; } ; } ; } ; func ( _gadg * wordBag ) allWords ( ) [ ] * textWord { var _afgbd [ ] * textWord ; for _ , _edeg := range _gadg . _faba { _afgbd = append ( _afgbd , _edeg ... ) ; } ; return _afgbd ;
} ; type gridTiling struct { _ce . PdfRectangle ; _cgecb [ ] float64 ; _agbb [ ] float64 ; _bage map [ float64 ] map [ float64 ] gridTile ; } ; func ( _feab * imageExtractContext ) processOperand ( _ccb * _ag . ContentStreamOperation , _cbge _ag . GraphicsState , _gee * _ce . PdfPageResources ) error { if _ccb . Operand == "\u0042\u0049" && len ( _ccb . Params ) == 1 { _dbd , _gddb := _ccb . Params [ 0 ] . ( * _ag . ContentStreamInlineImage ) ;
if ! _gddb { return nil ; } ; if _bef , _feg := _gb . GetBoolVal ( _dbd . ImageMask ) ; _feg { if _bef && ! _feab . _cef . IncludeInlineStencilMasks { return nil ; } ; } ; return _feab . extractInlineImage ( _dbd , _cbge , _gee ) ; } else if _ccb . Operand == "\u0044\u006f" && len ( _ccb . Params ) == 1 { _dfg , _gbb := _gb . GetName ( _ccb . Params [ 0 ] ) ;
if ! _gbb { _ec . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065" ) ; return _gdf ; } ; _ , _gf := _gee . GetXObjectByName ( * _dfg ) ; switch _gf { case _ce . XObjectTypeImage : return _feab . extractXObjectImage ( _dfg , _cbge , _gee ) ; case _ce . XObjectTypeForm : return _feab . extractFormImages ( _dfg , _cbge , _gee ) ;
} ; } else if _feab . _dedf && ( _ccb . Operand == "\u0073\u0063\u006e" || _ccb . Operand == "\u0053\u0043\u004e" ) && len ( _ccb . Params ) == 1 { _bde , _gdag := _gb . GetName ( _ccb . Params [ 0 ] ) ; if ! _gdag { _ec . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065" ) ;
return _gdf ; } ; _bcd , _gdag := _gee . GetPatternByName ( * _bde ) ; if ! _gdag { _ec . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0050\u0061\u0074\u0074\u0065\u0072n\u0020\u006e\u006f\u0074\u0020\u0066\u006f\u0075\u006e\u0064" ) ; return nil ; } ; if _bcd . IsTiling ( ) { _cga := _bcd . GetAsTilingPattern ( ) ;
_caf , _efge := _cga . GetContentStream ( ) ; if _efge != nil { return _efge ; } ; _efge = _feab . extractContentStreamImages ( string ( _caf ) , _cga . Resources ) ; if _efge != nil { return _efge ; } ; } ; } else if ( _ccb . Operand == "\u0063\u0073" || _ccb . Operand == "\u0043\u0053" ) && len ( _ccb . Params ) >= 1 { _feab . _dedf = _ccb . Params [ 0 ] . String ( ) == "\u0050a\u0074\u0074\u0065\u0072\u006e" ;
} ; return nil ; } ; func _ccba ( _gcgeb [ ] int ) [ ] int { _fcbf := make ( [ ] int , len ( _gcgeb ) ) ; for _dcgb , _fafge := range _gcgeb { _fcbf [ len ( _gcgeb ) - 1 - _dcgb ] = _fafge ; } ; return _fcbf ; } ; func ( _baad rulingList ) snapToGroupsDirection ( ) rulingList { _baad . sortStrict ( ) ;
_cfgae := make ( map [ * ruling ] rulingList , len ( _baad ) ) ; _febfa := _baad [ 0 ] ; _fcfec := func ( _aebcg * ruling ) { _febfa = _aebcg ; _cfgae [ _febfa ] = rulingList { _aebcg } } ; _fcfec ( _baad [ 0 ] ) ; for _ , _bgdde := range _baad [ 1 : ] { if _bgdde . _abbgc < _febfa . _abbgc - _fdac { _ec . Log . Error ( "\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073" , _febfa , _bgdde ) ;
} ; if _bgdde . _abbgc > _febfa . _abbgc + _gcef { _fcfec ( _bgdde ) ; } else { _cfgae [ _febfa ] = append ( _cfgae [ _febfa ] , _bgdde ) ; } ; } ; _efaag := make ( map [ * ruling ] float64 , len ( _cfgae ) ) ; _addbd := make ( map [ * ruling ] * ruling , len ( _baad ) ) ; for _ffeg , _aaecbe := range _cfgae { _efaag [ _ffeg ] = _aaecbe . mergePrimary ( ) ;
for _ , _cegae := range _aaecbe { _addbd [ _cegae ] = _ffeg ; } ; } ; for _ , _babc := range _baad { _babc . _abbgc = _efaag [ _addbd [ _babc ] ] ; } ; _bccd := make ( rulingList , 0 , len ( _baad ) ) ; for _ , _ecba := range _cfgae { _egdf := _ecba . splitSec ( ) ; for _adca , _facgc := range _egdf { _cbde := _facgc . merge ( ) ;
if len ( _bccd ) > 0 { _cgge := _bccd [ len ( _bccd ) - 1 ] ; if _cgge . alignsPrimary ( _cbde ) && _cgge . alignsSec ( _cbde ) { _ec . Log . Error ( "\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073" , _adca , _cgge , _cbde ) ;
continue ; } ; } ; _bccd = append ( _bccd , _cbde ) ; } ; } ; _bccd . sortStrict ( ) ; return _bccd ; } ;
// Len returns the number of TextMarks in `ma`.
func ( _eggf * TextMarkArray ) Len ( ) int { if _eggf == nil { return 0 ; } ; return len ( _eggf . _dec ) ; } ; func _ggba ( _cbfca [ ] float64 , _efadb , _edac float64 ) [ ] float64 { _fdcff , _dacc := _efadb , _edac ; if _dacc < _fdcff { _fdcff , _dacc = _dacc , _fdcff ; } ; _faged := make ( [ ] float64 , 0 , len ( _cbfca ) + 2 ) ;
_faged = append ( _faged , _efadb ) ; for _ , _eeef := range _cbfca { if _eeef <= _fdcff { continue ; } else if _eeef >= _dacc { break ; } ; _faged = append ( _faged , _eeef ) ; } ; _faged = append ( _faged , _edac ) ; return _faged ; } ; type paraList [ ] * textPara ; func _debaa ( _affa int , _abfg func ( int , int ) bool ) [ ] int { _bbdaf := make ( [ ] int , _affa ) ;
for _debg := range _bbdaf { _bbdaf [ _debg ] = _debg ; } ; _c . Slice ( _bbdaf , func ( _cdfgc , _acegf int ) bool { return _abfg ( _bbdaf [ _cdfgc ] , _bbdaf [ _acegf ] ) } ) ; return _bbdaf ; } ; func _egbg ( _dcgc [ ] * textLine ) map [ float64 ] [ ] * textLine { _c . Slice ( _dcgc , func ( _gegdc , _bgdb int ) bool { return _dcgc [ _gegdc ] . _bcdg < _dcgc [ _bgdb ] . _bcdg } ) ;
_gabe := map [ float64 ] [ ] * textLine { } ; for _ , _acb := range _dcgc { _dfcee := _accf ( _acb ) ; _dfcee = _aa . Round ( _dfcee ) ; _gabe [ _dfcee ] = append ( _gabe [ _dfcee ] , _acb ) ; } ; return _gabe ; } ; func ( _faggf intSet ) del ( _dafac int ) { delete ( _faggf , _dafac ) } ;
// TableCell is a cell in a TextTable.
type TableCell struct { _ce . PdfRectangle ;
// Text is the extracted text.
Text string ;
// Marks returns the TextMarks corresponding to the text in Text.
Marks TextMarkArray ; } ; const ( _gdcf rulingKind = iota ; _ecac ; _ebdaf ; ) ;