2020-08-27 21:45:09 +00:00
//
// Copyright 2020 FoxyUtils ehf. All rights reserved.
//
// This is a commercial product and requires a license to operate.
// A trial license can be obtained at https://unidoc.io
//
// DO NOT EDIT: generated by unitwist Go source code obfuscator.
//
// Use of this source code is governed by the UniDoc End User License Agreement
// terms that can be accessed at https://unidoc.io/eula/
2018-03-22 14:03:47 +00:00
2020-08-27 21:45:09 +00:00
//
// Package extractor is used for quickly extracting PDF content through a simple interface.
// Currently offers functionality for extracting textual content.
//
2022-10-27 19:04:58 +00:00
package extractor ; import ( _gd "bytes" ; _g "errors" ; _gdf "fmt" ; _ag "github.com/unidoc/unipdf/v3/common" ; _da "github.com/unidoc/unipdf/v3/contentstream" ; _bg "github.com/unidoc/unipdf/v3/core" ; _cd "github.com/unidoc/unipdf/v3/internal/license" ; _ab "github.com/unidoc/unipdf/v3/internal/textencoding" ;
_ee "github.com/unidoc/unipdf/v3/internal/transform" ; _bf "github.com/unidoc/unipdf/v3/model" ; _bd "golang.org/x/text/unicode/norm" ; _fb "golang.org/x/xerrors" ; _eda "image/color" ; _ed "io" ; _b "math" ; _c "regexp" ; _f "sort" ; _d "strings" ; _gg "unicode" ; _a "unicode/utf8" ;
) ;
2022-02-05 21:34:53 +00:00
2022-10-27 19:04:58 +00:00
// GetContentStreamOps returns the contentStreamOps field of `pt`.
func ( _gfbc * PageText ) GetContentStreamOps ( ) * _da . ContentStreamOperations { return _gfbc . _bfg } ; func ( _dfe * textObject ) checkOp ( _gadbg * _da . ContentStreamOperation , _gae int , _bgb bool ) ( _ccec bool , _cag error ) { if _dfe == nil { var _gdc [ ] _bg . PdfObject ;
if _gae > 0 { _gdc = _gadbg . Params ; if len ( _gdc ) > _gae { _gdc = _gdc [ : _gae ] ; } ; } ; _ag . Log . Debug ( "\u0025\u0023q \u006f\u0070\u0065r\u0061\u006e\u0064\u0020out\u0073id\u0065\u0020\u0074\u0065\u0078\u0074\u002e p\u0061\u0072\u0061\u006d\u0073\u003d\u0025+\u0076" , _gadbg . Operand , _gdc ) ;
} ; if _gae >= 0 { if len ( _gadbg . Params ) != _gae { if _bgb { _cag = _g . New ( "\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et" ) ; } ; _ag . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076" , _gadbg . Operand , _gae , len ( _gadbg . Params ) , _gadbg . Params ) ;
return false , _cag ; } ; } ; return true , nil ; } ;
2022-09-23 18:05:51 +00:00
// RenderMode specifies the text rendering mode (Tmode), which determines whether showing text shall cause
// glyph outlines to be stroked, filled, used as a clipping boundary, or some combination of the three.
// Stroking, filling, and clipping shall have the same effects for a text object as they do for a path object
// (see 8.5.3, "Path-Painting Operators" and 8.5.4, "Clipping Path Operators").
2022-10-27 19:04:58 +00:00
type RenderMode int ; func _eagae ( _aaafd _bf . PdfRectangle ) * ruling { return & ruling { _cgef : _beec , _eead : _aaafd . Urx , _bgeb : _aaafd . Lly , _eecc : _aaafd . Ury } ; } ; func _fafd ( _ebab string ) string { _cbad := [ ] rune ( _ebab ) ; return string ( _cbad [ : len ( _cbad ) - 1 ] ) } ;
func _def ( _bce _ee . Point ) _ee . Matrix { return _ee . TranslationMatrix ( _bce . X , _bce . Y ) } ; func _cdgf ( _gafcf * wordBag , _cdffd int ) * textLine { _cgcg := _gafcf . firstWord ( _cdffd ) ; _dbaf := textLine { PdfRectangle : _cgcg . PdfRectangle , _ddef : _cgcg . _fbgge , _dcfd : _cgcg . _ecgcg } ;
_dbaf . pullWord ( _gafcf , _cgcg , _cdffd ) ; return & _dbaf ; } ; func ( _adbed * textPara ) text ( ) string { _fefc := new ( _gd . Buffer ) ; _adbed . writeText ( _fefc ) ; return _fefc . String ( ) ; } ; func _gdaf ( _fbggc [ ] rulingList ) ( rulingList , rulingList ) { var _cbfe rulingList ;
for _ , _geag := range _fbggc { _cbfe = append ( _cbfe , _geag ... ) ; } ; return _cbfe . vertsHorzs ( ) ; } ;
2022-09-23 18:05:51 +00:00
2022-10-27 19:04:58 +00:00
// RangeOffset returns the TextMarks in `ma` that overlap text[start:end] in the extracted text.
// These are tm: `start` <= tm.Offset + len(tm.Text) && tm.Offset < `end` where
// `start` and `end` are offsets in the extracted text.
// NOTE: TextMarks can contain multiple characters. e.g. "ffi" for the ffi ligature so the first and
// last elements of the returned TextMarkArray may only partially overlap text[start:end].
func ( _gbb * TextMarkArray ) RangeOffset ( start , end int ) ( * TextMarkArray , error ) { if _gbb == nil { return nil , _g . New ( "\u006da\u003d\u003d\u006e\u0069\u006c" ) ; } ; if end < start { return nil , _gdf . Errorf ( "\u0065\u006e\u0064\u0020\u003c\u0020\u0073\u0074\u0061\u0072\u0074\u002e\u0020\u0052\u0061n\u0067\u0065\u004f\u0066\u0066\u0073\u0065\u0074\u0020\u006e\u006f\u0074\u0020d\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020\u0073\u0074\u0061\u0072t=\u0025\u0064\u0020\u0065\u006e\u0064\u003d\u0025\u0064\u0020" , start , end ) ;
} ; _dbdd := len ( _gbb . _fceg ) ; if _dbdd == 0 { return _gbb , nil ; } ; if start < _gbb . _fceg [ 0 ] . Offset { start = _gbb . _fceg [ 0 ] . Offset ; } ; if end > _gbb . _fceg [ _dbdd - 1 ] . Offset + 1 { end = _gbb . _fceg [ _dbdd - 1 ] . Offset + 1 ; } ; _bed := _f . Search ( _dbdd , func ( _cda int ) bool { return _gbb . _fceg [ _cda ] . Offset + len ( _gbb . _fceg [ _cda ] . Text ) - 1 >= start } ) ;
if ! ( 0 <= _bed && _bed < _dbdd ) { _cadgf := _gdf . Errorf ( "\u004f\u0075\u0074\u0020\u006f\u0066\u0020\u0072\u0061\u006e\u0067\u0065\u002e\u0020\u0073\u0074\u0061\u0072\u0074\u003d%\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009 \u006c\u0061\u0073\u0074\u003d%\u0076" , start , _bed , _dbdd , _gbb . _fceg [ 0 ] , _gbb . _fceg [ _dbdd - 1 ] ) ;
return nil , _cadgf ; } ; _fdffa := _f . Search ( _dbdd , func ( _dac int ) bool { return _gbb . _fceg [ _dac ] . Offset > end - 1 } ) ; if ! ( 0 <= _fdffa && _fdffa < _dbdd ) { _edb := _gdf . Errorf ( "\u004f\u0075\u0074\u0020\u006f\u0066\u0020r\u0061\u006e\u0067e\u002e\u0020\u0065n\u0064\u003d%\u0064\u0020\u0069\u0045\u006e\u0064=\u0025d \u006c\u0065\u006e\u003d\u0025\u0064\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025\u0076\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d\u0025\u0076" , end , _fdffa , _dbdd , _gbb . _fceg [ 0 ] , _gbb . _fceg [ _dbdd - 1 ] ) ;
return nil , _edb ; } ; if _fdffa <= _bed { return nil , _gdf . Errorf ( "\u0069\u0045\u006e\u0064\u0020\u003c=\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003a\u0020\u0073\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020\u0065\u006ed\u003d\u0025\u0064\u0020\u0069\u0053\u0074\u0061\u0072\u0074\u003d\u0025\u0064\u0020i\u0045n\u0064\u003d\u0025\u0064" , start , end , _bed , _fdffa ) ;
} ; return & TextMarkArray { _fceg : _gbb . _fceg [ _bed : _fdffa ] } , nil ; } ; func _acdc ( _fefb [ ] TextMark , _fgbc * int ) [ ] TextMark { _dgecc := _fefb [ len ( _fefb ) - 1 ] ; _caac := [ ] rune ( _dgecc . Text ) ; if len ( _caac ) == 1 { _fefb = _fefb [ : len ( _fefb ) - 1 ] ; _fcfd := _fefb [ len ( _fefb ) - 1 ] ;
* _fgbc = _fcfd . Offset + len ( _fcfd . Text ) ; } else { _eefca := _fafd ( _dgecc . Text ) ; * _fgbc += len ( _eefca ) - len ( _dgecc . Text ) ; _dgecc . Text = _eefca ; } ; return _fefb ; } ; type event struct { _cega float64 ; _fddb bool ; _dbdfc int ; } ; func ( _befg paraList ) xNeighbours ( _dcba float64 ) map [ * textPara ] [ ] int { _agdf := make ( [ ] event , 2 * len ( _befg ) ) ;
if _dcba == 0 { for _cfadb , _beecb := range _befg { _agdf [ 2 * _cfadb ] = event { _beecb . Llx , true , _cfadb } ; _agdf [ 2 * _cfadb + 1 ] = event { _beecb . Urx , false , _cfadb } ; } ; } else { for _fdbc , _ccbab := range _befg { _agdf [ 2 * _fdbc ] = event { _ccbab . Llx - _dcba * _ccbab . fontsize ( ) , true , _fdbc } ;
_agdf [ 2 * _fdbc + 1 ] = event { _ccbab . Urx + _dcba * _ccbab . fontsize ( ) , false , _fdbc } ; } ; } ; return _befg . eventNeighbours ( _agdf ) ; } ; func ( _eddg rectRuling ) asRuling ( ) ( * ruling , bool ) { _aadg := ruling { _cgef : _eddg . _adea , Color : _eddg . Color , _gbgb : _gaea } ; switch _eddg . _adea { case _beec : _aadg . _eead = 0.5 * ( _eddg . Llx + _eddg . Urx ) ;
_aadg . _bgeb = _eddg . Lly ; _aadg . _eecc = _eddg . Ury ; _ebebe , _cfbd := _eddg . checkWidth ( _eddg . Llx , _eddg . Urx ) ; if ! _cfbd { if _bded { _ag . Log . Error ( "\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067V\u0065\u0072\u0074\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076" , _eddg ) ;
} ; return nil , false ; } ; _aadg . _efgd = _ebebe ; case _ebabd : _aadg . _eead = 0.5 * ( _eddg . Lly + _eddg . Ury ) ; _aadg . _bgeb = _eddg . Llx ; _aadg . _eecc = _eddg . Urx ; _eaade , _gdbcf := _eddg . checkWidth ( _eddg . Lly , _eddg . Ury ) ; if ! _gdbcf { if _bded { _ag . Log . Error ( "\u0072\u0065\u0063\u0074\u0052\u0075l\u0069\u006e\u0067\u002e\u0061\u0073\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0072\u0075\u006c\u0069\u006e\u0067H\u006f\u0072\u007a\u0020\u0021\u0063\u0068\u0065\u0063\u006b\u0057\u0069\u0064\u0074h\u0020v\u003d\u0025\u002b\u0076" , _eddg ) ;
} ; return nil , false ; } ; _aadg . _efgd = _eaade ; default : _ag . Log . Error ( "\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064" , _eddg . _adea ) ; return nil , false ; } ; return & _aadg , true ; } ; func ( _deece * textTable ) getComposite ( _edbb , _dafaf int ) ( paraList , _bf . PdfRectangle ) { _gcga , _bgfdg := _deece . _bbfb [ _fcbc ( _edbb , _dafaf ) ] ;
if _beae { _gdf . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0067\u0065\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u002d\u003e\u0025\u0073\u000a" , _edbb , _dafaf , _gcga . String ( ) ) ; } ; if ! _bgfdg { return nil , _bf . PdfRectangle { } ;
} ; return _gcga . parasBBox ( ) ; } ; func ( _ebb * textObject ) setTextRenderMode ( _cead int ) { if _ebb == nil { return ; } ; _ebb . _degf . _egea = RenderMode ( _cead ) ; } ; func ( _ebeb * shapesState ) closePath ( ) { if _ebeb . _ecd { _ebeb . _cfgd = append ( _ebeb . _cfgd , _bddc ( _ebeb . _decg ) ) ;
_ebeb . _ecd = false ; } else if len ( _ebeb . _cfgd ) == 0 { if _bccf { _ag . Log . Debug ( "\u0063\u006c\u006f\u0073eP\u0061\u0074\u0068\u0020\u0077\u0069\u0074\u0068\u0020\u006e\u006f\u0020\u0070\u0061t\u0068" ) ; } ; _ebeb . _ecd = false ; return ; } ; _ebeb . _cfgd [ len ( _ebeb . _cfgd ) - 1 ] . close ( ) ;
if _bccf { _ag . Log . Info ( "\u0063\u006c\u006f\u0073\u0065\u0050\u0061\u0074\u0068\u003a\u0020\u0025\u0073" , _ebeb ) ; } ; } ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// TextMark represents extracted text on a page with information regarding both textual content,
// formatting (font and size) and positioning.
// It is the smallest unit of text on a PDF page, typically a single character.
//
// getBBox() in test_text.go shows how to compute bounding boxes of substrings of extracted text.
// The following code extracts the text on PDF page `page` into `text` then finds the bounding box
// `bbox` of substring `term` in `text`.
//
// ex, _ := New(page)
// // handle errors
// pageText, _, _, err := ex.ExtractPageText()
// // handle errors
// text := pageText.Text()
// textMarks := pageText.Marks()
//
// start := strings.Index(text, term)
// end := start + len(term)
// spanMarks, err := textMarks.RangeOffset(start, end)
// // handle errors
// bbox, ok := spanMarks.BBox()
// // handle errors
type TextMark struct {
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Text is the extracted text.
Text string ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Original is the text in the PDF. It has not been decoded like `Text`.
Original string ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// BBox is the bounding box of the text.
2022-10-27 19:04:58 +00:00
BBox _bf . PdfRectangle ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Font is the font the text was drawn with.
2022-10-27 19:04:58 +00:00
Font * _bf . PdfFont ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// FontSize is the font size the text was drawn with.
FontSize float64 ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Offset is the offset of the start of TextMark.Text in the extracted text. If you do this
// text, textMarks := pageText.Text(), pageText.Marks()
// marks := textMarks.Elements()
// then marks[i].Offset is the offset of marks[i].Text in text.
Offset int ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Meta is set true for spaces and line breaks that we insert in the extracted text. We insert
// spaces (line breaks) when we see characters that are over a threshold horizontal (vertical)
// distance apart. See wordJoiner (lineJoiner) in PageText.computeViews().
Meta bool ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// FillColor is the fill color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2022-10-27 19:04:58 +00:00
FillColor _eda . Color ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// StrokeColor is the stroke color of the text.
// The color is nil for spaces and line breaks (i.e. the Meta field is true).
2022-10-27 19:04:58 +00:00
StrokeColor _eda . Color ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// Orientation is the text orientation
Orientation int ;
2022-06-27 19:58:38 +00:00
2022-07-13 21:28:43 +00:00
// DirectObject is the underlying PdfObject (Text Object) that represents the visible texts. This is introduced to get
// a simple access to the TextObject in case editing or replacment of some text is needed. E.g during redaction.
2022-10-27 19:04:58 +00:00
DirectObject _bg . PdfObject ;
2022-07-13 21:28:43 +00:00
// ObjString is a decoded string operand of a text-showing operator. It has the same value as `Text` attribute except
// when many glyphs are represented with the same Text Object that contains multiple length string operand in which case
// ObjString spans more than one character string that falls in different TextMark objects.
2022-10-27 19:04:58 +00:00
ObjString [ ] string ; Tw float64 ; Th float64 ; Tc float64 ; Index int ; } ;
2022-06-27 19:58:38 +00:00
2022-10-27 19:04:58 +00:00
// New returns an Extractor instance for extracting content from the input PDF page.
func New ( page * _bf . PdfPage ) ( * Extractor , error ) { return NewWithOptions ( page , nil ) } ; func _gcfa ( _cgeb * textWord , _aaee float64 , _fcfb , _fega rulingList ) * wordBag { _caed := _feef ( _cgeb . _ecgcg ) ; _agcbc := [ ] * textWord { _cgeb } ; _gddc := wordBag { _bbf : map [ int ] [ ] * textWord { _caed : _agcbc } , PdfRectangle : _cgeb . PdfRectangle , _gaed : _cgeb . _fbgge , _fcgc : _aaee , _fff : _fcfb , _aede : _fega } ;
return & _gddc ; } ; func ( _dccf * textWord ) absorb ( _bdce * textWord ) { _dccf . PdfRectangle = _effa ( _dccf . PdfRectangle , _bdce . PdfRectangle ) ; _dccf . _gdgg = append ( _dccf . _gdgg , _bdce . _gdgg ... ) ; } ; func ( _ggffc rulingList ) tidied ( _aeade string ) rulingList { _fgcd := _ggffc . removeDuplicates ( ) ;
_fgcd . log ( "\u0075n\u0069\u0071\u0075\u0065\u0073" ) ; _gdfab := _fgcd . snapToGroups ( ) ; if _gdfab == nil { return nil ; } ; _gdfab . sort ( ) ; if _efa { _ag . Log . Info ( "\u0074\u0069\u0064i\u0065\u0064\u003a\u0020\u0025\u0071\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u0075\u006e\u0069\u0071\u0075\u0065\u0073\u003d\u0025\u0064\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0064\u003d\u0025\u0064" , _aeade , len ( _ggffc ) , len ( _fgcd ) , len ( _gdfab ) ) ;
} ; _gdfab . log ( "\u0063o\u0061\u006c\u0065\u0073\u0063\u0065d" ) ; return _gdfab ; } ; var ( _efedf = map [ rune ] string { 0x0060 : "\u0300" , 0x02CB : "\u0300" , 0x0027 : "\u0301" , 0x00B4 : "\u0301" , 0x02B9 : "\u0301" , 0x02CA : "\u0301" , 0x005E : "\u0302" , 0x02C6 : "\u0302" , 0x007E : "\u0303" , 0x02DC : "\u0303" , 0x00AF : "\u0304" , 0x02C9 : "\u0304" , 0x02D8 : "\u0306" , 0x02D9 : "\u0307" , 0x00A8 : "\u0308" , 0x00B0 : "\u030a" , 0x02DA : "\u030a" , 0x02BA : "\u030b" , 0x02DD : "\u030b" , 0x02C7 : "\u030c" , 0x02C8 : "\u030d" , 0x0022 : "\u030e" , 0x02BB : "\u0312" , 0x02BC : "\u0313" , 0x0486 : "\u0313" , 0x055A : "\u0313" , 0x02BD : "\u0314" , 0x0485 : "\u0314" , 0x0559 : "\u0314" , 0x02D4 : "\u031d" , 0x02D5 : "\u031e" , 0x02D6 : "\u031f" , 0x02D7 : "\u0320" , 0x02B2 : "\u0321" , 0x00B8 : "\u0327" , 0x02CC : "\u0329" , 0x02B7 : "\u032b" , 0x02CD : "\u0331" , 0x005F : "\u0332" , 0x204E : "\u0359" } ;
) ; func _ffff ( _adaa , _ecfa * textPara ) bool { return _eagg ( _adaa . _dcfc , _ecfa . _dcfc ) } ; func ( _aced * textObject ) setTextLeading ( _dcbb float64 ) { if _aced == nil { return ; } ; _aced . _degf . _acfa = _dcbb ; } ; func ( _cdbb * textObject ) getFont ( _fecf string ) ( * _bf . PdfFont , error ) { if _cdbb . _ccae . _ac != nil { _cfca , _gbag := _cdbb . getFontDict ( _fecf ) ;
if _gbag != nil { _ag . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u0067\u0065\u0074\u0046\u006f\u006e\u0074:\u0020n\u0061m\u0065=\u0025\u0073\u002c\u0020\u0065\u0072\u0072\u006f\u0072\u003a\u0020\u0025\u0073" , _fecf , _gbag . Error ( ) ) ; return nil , _gbag ;
} ; _cdbb . _ccae . _eb ++ ; _cbeb , _deac := _cdbb . _ccae . _ac [ _cfca . String ( ) ] ; if _deac { _cbeb . _ffa = _cdbb . _ccae . _eb ; return _cbeb . _bdgeg , nil ; } ; } ; _eab , _ggbdd := _cdbb . getFontDict ( _fecf ) ; if _ggbdd != nil { return nil , _ggbdd ; } ; _aaeg , _ggbdd := _cdbb . getFontDirect ( _fecf ) ;
if _ggbdd != nil { return nil , _ggbdd ; } ; if _cdbb . _ccae . _ac != nil { _fgca := fontEntry { _aaeg , _cdbb . _ccae . _eb } ; if len ( _cdbb . _ccae . _ac ) >= _gaeg { var _aga [ ] string ; for _baa := range _cdbb . _ccae . _ac { _aga = append ( _aga , _baa ) ; } ; _f . Slice ( _aga , func ( _cage , _edd int ) bool { return _cdbb . _ccae . _ac [ _aga [ _cage ] ] . _ffa < _cdbb . _ccae . _ac [ _aga [ _edd ] ] . _ffa ;
} ) ; delete ( _cdbb . _ccae . _ac , _aga [ 0 ] ) ; } ; _cdbb . _ccae . _ac [ _eab . String ( ) ] = _fgca ; } ; return _aaeg , nil ; } ;
// String returns a description of `k`.
func ( _effbd markKind ) String ( ) string { _ecac , _aegfb := _gecc [ _effbd ] ; if ! _aegfb { return _gdf . Sprintf ( "\u004e\u006f\u0074\u0020\u0061\u0020\u006d\u0061\u0072k\u003a\u0020\u0025\u0064" , _effbd ) ; } ; return _ecac ; } ; const ( _badf = true ; _bccb = true ; _bfe = true ;
_gfea = false ; _effe = false ; _afce = 6 ; _ecag = 3.0 ; _cacb = 200 ; _bagf = true ; _bfcda = true ; _ecec = true ; _dcaed = true ; _cdc = false ; ) ; func _acgf ( _fbcd , _baae bounded ) float64 { _aafe := _dagb ( _fbcd , _baae ) ; if ! _fbga ( _aafe ) { return _aafe ; } ; return _addb ( _fbcd , _baae ) ;
} ; func ( _fbccg paraList ) sortReadingOrder ( ) { _ag . Log . Trace ( "\u0073\u006fr\u0074\u0052\u0065\u0061\u0064i\u006e\u0067\u004f\u0072\u0064e\u0072\u003a\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _fbccg ) ) ;
if len ( _fbccg ) <= 1 { return ; } ; _fbccg . computeEBBoxes ( ) ; _f . Slice ( _fbccg , func ( _adgge , _fegg int ) bool { return _acgf ( _fbccg [ _adgge ] , _fbccg [ _fegg ] ) <= 0 } ) ; _ebeg := _fbccg . topoOrder ( ) ; _fbccg . reorder ( _ebeg ) ; } ;
2022-09-10 15:35:04 +00:00
2022-09-23 18:05:51 +00:00
// String returns a human readable description of `path`.
2022-10-27 19:04:58 +00:00
func ( _dbcc * subpath ) String ( ) string { _cfba := _dbcc . _fcdc ; _aaae := len ( _cfba ) ; if _aaae <= 5 { return _gdf . Sprintf ( "\u0025d\u003a\u0020\u0025\u0036\u002e\u0032f" , _aaae , _cfba ) ; } ; return _gdf . Sprintf ( "\u0025d\u003a\u0020\u0025\u0036.\u0032\u0066\u0020\u0025\u0036.\u0032f\u0020.\u002e\u002e\u0020\u0025\u0036\u002e\u0032f" , _aaae , _cfba [ 0 ] , _cfba [ 1 ] , _cfba [ _aaae - 1 ] ) ;
} ;
2022-09-10 15:35:04 +00:00
2022-10-27 19:04:58 +00:00
// String returns a string describing the current state of the textState stack.
func ( _efdc * stateStack ) String ( ) string { _accb := [ ] string { _gdf . Sprintf ( "\u002d\u002d\u002d\u002d f\u006f\u006e\u0074\u0020\u0073\u0074\u0061\u0063\u006b\u003a\u0020\u0025\u0064" , len ( * _efdc ) ) } ; for _cedb , _aaaf := range * _efdc { _edef := "\u003c\u006e\u0069l\u003e" ;
if _aaaf != nil { _edef = _aaaf . String ( ) ; } ; _accb = append ( _accb , _gdf . Sprintf ( "\u0009\u0025\u0032\u0064\u003a\u0020\u0025\u0073" , _cedb , _edef ) ) ; } ; return _d . Join ( _accb , "\u000a" ) ; } ; func _aeca ( _eegb , _dcef _ee . Point , _caeb _eda . Color ) ( * ruling , bool ) { _edgeg := lineRuling { _abec : _eegb , _eacb : _dcef , _aedc : _bega ( _eegb , _dcef ) , Color : _caeb } ;
if _edgeg . _aedc == _gdbb { return nil , false ; } ; return _edgeg . asRuling ( ) ; } ; func ( _fbffe rulingList ) augmentGrid ( ) ( rulingList , rulingList ) { _fcgce , _fceb := _fbffe . vertsHorzs ( ) ; if len ( _fcgce ) == 0 || len ( _fceb ) == 0 { return _fcgce , _fceb ; } ; _cbf , _egeg := _fcgce , _fceb ;
_edbce := _fcgce . bbox ( ) ; _aecce := _fceb . bbox ( ) ; if _efa { _ag . Log . Info ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0056\u003d\u0025\u0036\u002e\u0032\u0066" , _edbce ) ; _ag . Log . Info ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0047\u0072\u0069\u0064\u003a\u0020b\u0062\u006f\u0078\u0048\u003d\u0025\u0036\u002e\u0032\u0066" , _aecce ) ;
} ; var _ggbf , _cabb , _geeac , _gecee * ruling ; if _aecce . Llx < _edbce . Llx - _deff { _ggbf = & ruling { _gbgb : _eddbd , _cgef : _beec , _eead : _aecce . Llx , _bgeb : _edbce . Lly , _eecc : _edbce . Ury } ; _fcgce = append ( rulingList { _ggbf } , _fcgce ... ) ; } ; if _aecce . Urx > _edbce . Urx + _deff { _cabb = & ruling { _gbgb : _eddbd , _cgef : _beec , _eead : _aecce . Urx , _bgeb : _edbce . Lly , _eecc : _edbce . Ury } ;
_fcgce = append ( _fcgce , _cabb ) ; } ; if _edbce . Lly < _aecce . Lly - _deff { _geeac = & ruling { _gbgb : _eddbd , _cgef : _ebabd , _eead : _edbce . Lly , _bgeb : _aecce . Llx , _eecc : _aecce . Urx } ; _fceb = append ( rulingList { _geeac } , _fceb ... ) ; } ; if _edbce . Ury > _aecce . Ury + _deff { _gecee = & ruling { _gbgb : _eddbd , _cgef : _ebabd , _eead : _edbce . Ury , _bgeb : _aecce . Llx , _eecc : _aecce . Urx } ;
_fceb = append ( _fceb , _gecee ) ; } ; if len ( _fcgce ) + len ( _fceb ) == len ( _fbffe ) { return _cbf , _egeg ; } ; _aafa := append ( _fcgce , _fceb ... ) ; _fbffe . log ( "u\u006e\u0061\u0075\u0067\u006d\u0065\u006e\u0074\u0065\u0064" ) ; _aafa . log ( "\u0061u\u0067\u006d\u0065\u006e\u0074\u0065d" ) ;
return _fcgce , _fceb ; } ; func ( _aecg paraList ) tables ( ) [ ] TextTable { var _bcfa [ ] TextTable ; if _beae { _ag . Log . Info ( "\u0070\u0061\u0072\u0061\u0073\u002e\u0074\u0061\u0062\u006c\u0065\u0073\u003a" ) ; } ; for _ , _cagee := range _aecg { _gedg := _cagee . _bccg ;
if _gedg != nil && _gedg . isExportable ( ) { _bcfa = append ( _bcfa , _gedg . toTextTable ( ) ) ; } ; } ; return _bcfa ; } ; func ( _gegd rulingList ) isActualGrid ( ) ( rulingList , bool ) { _gdbfe , _gdfgb := _gegd . augmentGrid ( ) ; if ! ( len ( _gdbfe ) >= _agec + 1 && len ( _gdfgb ) >= _bafa + 1 ) { if _efa { _ag . Log . Info ( "\u0069s\u0041\u0063t\u0075\u0061\u006c\u0047r\u0069\u0064\u003a \u004e\u006f\u0074\u0020\u0061\u006c\u0069\u0067\u006eed\u002e\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u003c\u0020\u0025d\u0020\u0078 \u0025\u0064" , len ( _gdbfe ) , len ( _gdfgb ) , _agec + 1 , _bafa + 1 ) ;
} ; return nil , false ; } ; if _efa { _ag . Log . Info ( "\u0069\u0073\u0041\u0063\u0074\u0075a\u006c\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0073\u0020\u003a\u0020\u0025t\u0020\u0026\u0020\u0025\u0074\u0020\u2192 \u0025\u0074" , _gegd , len ( _gdbfe ) >= 2 , len ( _gdfgb ) >= 2 , len ( _gdbfe ) >= 2 && len ( _gdfgb ) >= 2 ) ;
for _ebdc , _cagc := range _gegd { _gdf . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0076\u000a" , _ebdc , _cagc ) ; } ; } ; if _cdc { _cbfb , _fafa := _gdbfe [ 0 ] , _gdbfe [ len ( _gdbfe ) - 1 ] ; _gcfb , _fdbee := _gdfgb [ 0 ] , _gdfgb [ len ( _gdfgb ) - 1 ] ; if ! ( _geccf ( _cbfb . _eead - _gcfb . _bgeb ) && _geccf ( _fafa . _eead - _gcfb . _eecc ) && _geccf ( _gcfb . _eead - _cbfb . _eecc ) && _geccf ( _fdbee . _eead - _cbfb . _bgeb ) ) { if _efa { _ag . Log . Info ( "\u0069\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069d\u003a\u0020\u0020N\u006f\u0074 \u0061\u006c\u0069\u0067\u006e\u0065d\u002e\n\t\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0076\u0031\u003d\u0025\u0073\u000a\u0009\u0068\u0030\u003d\u0025\u0073\u000a\u0009\u0068\u0031\u003d\u0025\u0073" , _cbfb , _fafa , _gcfb , _fdbee ) ;
} ; return nil , false ; } ; } else { if ! _gdbfe . aligned ( ) { if _cecg { _ag . Log . Info ( "i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0076\u0065\u0072\u0074\u0073\u002e\u0020%\u0064" , len ( _gdbfe ) ) ;
} ; return nil , false ; } ; if ! _gdfgb . aligned ( ) { if _efa { _ag . Log . Info ( "i\u0073\u0041\u0063\u0074\u0075\u0061l\u0047\u0072\u0069\u0064\u003a\u0020N\u006f\u0074\u0020\u0061\u006c\u0069\u0067n\u0065\u0064\u0020\u0068\u006f\u0072\u007a\u0073\u002e\u0020%\u0064" , len ( _gdfgb ) ) ;
} ; return nil , false ; } ; } ; _ccegg := append ( _gdbfe , _gdfgb ... ) ; return _ccegg , true ; } ; func _ddce ( _ggce [ ] pathSection ) rulingList { _bcff ( _ggce ) ; if _efa { _ag . Log . Info ( "\u006da\u006b\u0065\u0046\u0069l\u006c\u0052\u0075\u006c\u0069n\u0067s\u003a \u0025\u0064\u0020\u0066\u0069\u006c\u006cs" , len ( _ggce ) ) ;
} ; var _feded rulingList ; for _ , _eadc := range _ggce { for _ , _efba := range _eadc . _gdbf { if ! _efba . isQuadrilateral ( ) { if _efa { _ag . Log . Error ( "!\u0069s\u0051\u0075\u0061\u0064\u0072\u0069\u006c\u0061t\u0065\u0072\u0061\u006c: \u0025\u0073" , _efba ) ; } ;
continue ; } ; if _affb , _efbae := _efba . makeRectRuling ( _eadc . Color ) ; _efbae { _feded = append ( _feded , _affb ) ; } else { if _bded { _ag . Log . Error ( "\u0021\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0073" , _efba ) ;
} ; } ; } ; } ; if _efa { _ag . Log . Info ( "\u006d\u0061\u006b\u0065Fi\u006c\u006c\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0073" , _feded . String ( ) ) ; } ; return _feded ; } ; func _afed ( _cbdg , _dedg float64 ) string { _cgee := ! _fbga ( _cbdg - _dedg ) ; if _cgee { return "\u000a" ;
} ; return "\u0020" ; } ; func ( _dgdg * shapesState ) newSubPath ( ) { _dgdg . clearPath ( ) ; if _bccf { _ag . Log . Info ( "\u006e\u0065\u0077\u0053\u0075\u0062\u0050\u0061\u0074h\u003a\u0020\u0025\u0073" , _dgdg ) ; } ; } ; var _gf = false ; func _fcbc ( _fagegf , _fdbb int ) uint64 { return uint64 ( _fagegf ) * 0x1000000 + uint64 ( _fdbb ) } ;
2022-07-13 21:28:43 +00:00
2022-10-27 19:04:58 +00:00
// ToText returns the page text as a single string.
// Deprecated: This function is deprecated and will be removed in a future major version. Please use
// Text() instead.
func ( _egd PageText ) ToText ( ) string { return _egd . Text ( ) } ;
2022-07-13 21:28:43 +00:00
2022-10-27 19:04:58 +00:00
// ExtractTextWithStats works like ExtractText but returns the number of characters in the output
// (`numChars`) and the number of characters that were not decoded (`numMisses`).
func ( _bbd * Extractor ) ExtractTextWithStats ( ) ( _fbf string , _fcf int , _cadd int , _ceag error ) { _ebd , _fcf , _cadd , _ceag := _bbd . ExtractPageText ( ) ; if _ceag != nil { return "" , _fcf , _cadd , _ceag ; } ; return _ebd . Text ( ) , _fcf , _cadd , nil ; } ; func ( _ecee * wordBag ) highestWord ( _bff int , _eaaf , _agf float64 ) * textWord { for _ , _ged := range _ecee . _bbf [ _bff ] { if _eaaf <= _ged . _ecgcg && _ged . _ecgcg <= _agf { return _ged ;
} ; } ; return nil ; } ; type gridTiling struct { _bf . PdfRectangle ; _fdeac [ ] float64 ; _gccf [ ] float64 ; _ffdd map [ float64 ] map [ float64 ] gridTile ; } ; func _gbae ( _eacbe string ) ( string , bool ) { _ggefb := [ ] rune ( _eacbe ) ; if len ( _ggefb ) != 1 { return "" , false ; } ; _edag , _bfbg := _efedf [ _ggefb [ 0 ] ] ;
return _edag , _bfbg ; } ; func _cacbb ( _febe [ ] float64 , _fcee , _abaf float64 ) [ ] float64 { _acce , _cdda := _fcee , _abaf ; if _cdda < _acce { _acce , _cdda = _cdda , _acce ; } ; _gfcf := make ( [ ] float64 , 0 , len ( _febe ) + 2 ) ; _gfcf = append ( _gfcf , _fcee ) ; for _ , _gedeb := range _febe { if _gedeb <= _acce { continue ;
} else if _gedeb >= _cdda { break ; } ; _gfcf = append ( _gfcf , _gedeb ) ; } ; _gfcf = append ( _gfcf , _abaf ) ; return _gfcf ; } ; func ( _gbc * textMark ) inDiacriticArea ( _agbg * textMark ) bool { _acd := _gbc . Llx - _agbg . Llx ; _agdg := _gbc . Urx - _agbg . Urx ; _abbde := _gbc . Lly - _agbg . Lly ;
return _b . Abs ( _acd + _agdg ) < _gbc . Width ( ) * _fge && _b . Abs ( _abbde ) < _gbc . Height ( ) * _fge ; } ; func _gafce ( _cfbba _bf . PdfRectangle , _ededf , _gaec , _fccab , _gbfe * ruling ) gridTile { _bbgag := _cfbba . Llx ; _cgba := _cfbba . Urx ; _accbf := _cfbba . Lly ; _bcag := _cfbba . Ury ;
return gridTile { PdfRectangle : _cfbba , _bceg : _ededf != nil && _ededf . encloses ( _accbf , _bcag ) , _abdfe : _gaec != nil && _gaec . encloses ( _accbf , _bcag ) , _abag : _fccab != nil && _fccab . encloses ( _bbgag , _cgba ) , _gfga : _gbfe != nil && _gbfe . encloses ( _bbgag , _cgba ) } ;
} ; func _dgcd ( _ffgbd , _cdea float64 ) bool { return _ffgbd / _b . Max ( _gbef , _cdea ) < _cdfc } ;
2022-07-13 21:28:43 +00:00
2022-10-27 19:04:58 +00:00
// ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
// TODO(peterwilliams97): The stats complicate this function signature and aren't very useful.
// Replace with a function like Extract() (*PageText, error)
func ( _ggba * Extractor ) ExtractPageText ( ) ( * PageText , int , int , error ) { _cedc , _dafd , _cae , _adg := _ggba . extractPageText ( _ggba . _fc , _ggba . _ad , _ee . IdentityMatrix ( ) , 0 ) ; if _adg != nil && _adg != _bf . ErrColorOutOfRange { return nil , 0 , 0 , _adg ; } ; _cedc . computeViews ( ) ;
_adg = _eabb ( _cedc ) ; if _adg != nil { return nil , 0 , 0 , _adg ; } ; if _ggba . _bc != nil { if _ggba . _bc . ApplyCropBox && _ggba . _ga != nil { _cedc . ApplyArea ( * _ggba . _ga ) ; } ; } ; return _cedc , _dafd , _cae , nil ; } ; func ( _cgfb intSet ) has ( _egaa int ) bool { _ , _egeb := _cgfb [ _egaa ] ;
return _egeb } ; func _eabb ( _cgdaa * PageText ) error { _ebgde := _cd . GetLicenseKey ( ) ; if _ebgde != nil && _ebgde . IsLicensed ( ) || _gf { return nil ; } ; _gdf . Printf ( "\u0055\u006e\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0064\u0020c\u006f\u0070\u0079\u0020\u006f\u0066\u0020\u0055\u006e\u0069P\u0044\u0046\u000a" ) ;
_gdf . Println ( "-\u0020\u0047\u0065\u0074\u0020\u0061\u0020\u0066\u0072e\u0065\u0020\u0074\u0072\u0069\u0061\u006c l\u0069\u0063\u0065\u006es\u0065\u0020\u006f\u006e\u0020\u0068\u0074\u0074\u0070s:\u002f\u002fu\u006e\u0069\u0064\u006f\u0063\u002e\u0069\u006f" ) ;
return _g . New ( "\u0075\u006e\u0069\u0070d\u0066\u0020\u006c\u0069\u0063\u0065\u006e\u0073\u0065\u0020c\u006fd\u0065\u0020\u0072\u0065\u0071\u0075\u0069r\u0065\u0064" ) ; } ; func ( _baba * textObject ) newTextMark ( _dfa string , _bffc _ee . Matrix , _ddea _ee . Point , _gafa float64 , _cbgb * _bf . PdfFont , _bcea float64 , _caefc , _ffe _eda . Color , _gddb _bg . PdfObject , _cggcb [ ] string , _bcbcc int ) ( textMark , bool ) { _eaga := _bffc . Angle ( ) ;
_aada := _fccec ( _eaga , _ggdc ) ; var _fccc float64 ; if _aada % 180 != 90 { _fccc = _bffc . ScalingFactorY ( ) ; } else { _fccc = _bffc . ScalingFactorX ( ) ; } ; _efbd := _ecc ( _bffc ) ; _acaa := _bf . PdfRectangle { Llx : _efbd . X , Lly : _efbd . Y , Urx : _ddea . X , Ury : _ddea . Y } ; switch _aada % 360 { case 90 : _acaa . Urx -= _fccc ;
case 180 : _acaa . Ury -= _fccc ; case 270 : _acaa . Urx += _fccc ; case 0 : _acaa . Ury += _fccc ; default : _aada = 0 ; _acaa . Ury += _fccc ; } ; if _acaa . Llx > _acaa . Urx { _acaa . Llx , _acaa . Urx = _acaa . Urx , _acaa . Llx ; } ; if _acaa . Lly > _acaa . Ury { _acaa . Lly , _acaa . Ury = _acaa . Ury , _acaa . Lly ;
} ; _afgb := true ; if _baba . _ccae . _bb . Width ( ) > 0 { _dfb , _cebb := _agcbf ( _acaa , _baba . _ccae . _bb ) ; if ! _cebb { _afgb = false ; _ag . Log . Debug ( "\u0054\u0065\u0078\u0074\u0020m\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069\u0064\u0065\u0020\u0070a\u0067\u0065\u002e\u0020\u0062\u0062\u006f\u0078\u003d\u0025\u0067\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u003d\u0025\u0067\u0020\u0074\u0065\u0078\u0074\u003d\u0025q" , _acaa , _baba . _ccae . _bb , _dfa ) ;
} ; _acaa = _dfb ; } ; _dffb := _acaa ; _fcdd := _baba . _ccae . _bb ; switch _aada % 360 { case 90 : _fcdd . Urx , _fcdd . Ury = _fcdd . Ury , _fcdd . Urx ; _dffb = _bf . PdfRectangle { Llx : _fcdd . Urx - _acaa . Ury , Urx : _fcdd . Urx - _acaa . Lly , Lly : _acaa . Llx , Ury : _acaa . Urx } ;
case 180 : _dffb = _bf . PdfRectangle { Llx : _fcdd . Urx - _acaa . Llx , Urx : _fcdd . Urx - _acaa . Urx , Lly : _fcdd . Ury - _acaa . Lly , Ury : _fcdd . Ury - _acaa . Ury } ; case 270 : _fcdd . Urx , _fcdd . Ury = _fcdd . Ury , _fcdd . Urx ; _dffb = _bf . PdfRectangle { Llx : _acaa . Ury , Urx : _acaa . Lly , Lly : _fcdd . Ury - _acaa . Llx , Ury : _fcdd . Ury - _acaa . Urx } ;
} ; if _dffb . Llx > _dffb . Urx { _dffb . Llx , _dffb . Urx = _dffb . Urx , _dffb . Llx ; } ; if _dffb . Lly > _dffb . Ury { _dffb . Lly , _dffb . Ury = _dffb . Ury , _dffb . Lly ; } ; _fdffd := textMark { _eeaf : _dfa , PdfRectangle : _dffb , _bbgf : _acaa , _becff : _cbgb , _abba : _fccc , _dbe : _bcea , _ecbd : _bffc , _fcced : _ddea , _bfbc : _aada , _agbe : _caefc , _dggfa : _ffe , _gded : _gddb , _cedf : _cggcb , Th : _baba . _degf . _gdef , Tw : _baba . _degf . _gab , _eced : _bcbcc } ;
if _eccd { _ag . Log . Info ( "n\u0065\u0077\u0054\u0065\u0078\u0074M\u0061\u0072\u006b\u003a\u0020\u0073t\u0061\u0072\u0074\u003d\u0025\u002e\u0032f\u0020\u0065\u006e\u0064\u003d\u0025\u002e\u0032\u0066\u0020%\u0073" , _efbd , _ddea , _fdffd . String ( ) ) ; } ; return _fdffd , _afgb ;
} ; func ( _dfdad * textPara ) isAtom ( ) * textTable { _abbda := _dfdad ; _dbcb := _dfdad . _bfdc ; _agdd := _dfdad . _egec ; if _dbcb . taken ( ) || _agdd . taken ( ) { return nil ; } ; _agcba := _dbcb . _egec ; if _agcba . taken ( ) || _agcba != _agdd . _bfdc { return nil ; } ; return _fffd ( _abbda , _dbcb , _agdd , _agcba ) ;
} ; func _dcdd ( _fadb , _affbb _ee . Point ) bool { _aece := _b . Abs ( _fadb . X - _affbb . X ) ; _bdec := _b . Abs ( _fadb . Y - _affbb . Y ) ; return _dgcd ( _bdec , _aece ) ; } ; var _fbedc = map [ rulingKind ] string { _gdbb : "\u006e\u006f\u006e\u0065" , _ebabd : "\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c" , _beec : "\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c" } ;
func ( _acdde gridTiling ) complete ( ) bool { for _ , _aaac := range _acdde . _ffdd { for _ , _dbeb := range _aaac { if ! _dbeb . complete ( ) { return false ; } ; } ; } ; return true ; } ; func ( _bdaf rulingList ) mergePrimary ( ) float64 { _ggcc := _bdaf [ 0 ] . _eead ; for _ , _fbdg := range _bdaf [ 1 : ] { _ggcc += _fbdg . _eead ;
} ; return _ggcc / float64 ( len ( _bdaf ) ) ; } ; func ( _fcdda paraList ) toTextMarks ( ) [ ] TextMark { _adggb := 0 ; var _aabe [ ] TextMark ; for _gbbf , _gddfe := range _fcdda { if _gddfe . _dgggff { continue ; } ; _gabc := _gddfe . toTextMarks ( & _adggb ) ; _aabe = append ( _aabe , _gabc ... ) ;
if _gbbf != len ( _fcdda ) - 1 { if _ceabf ( _gddfe , _fcdda [ _gbbf + 1 ] ) { _aabe = _abbf ( _aabe , & _adggb , "\u0020" ) ; } else { _aabe = _abbf ( _aabe , & _adggb , "\u000a" ) ; _aabe = _abbf ( _aabe , & _adggb , "\u000a" ) ; } ; } ; } ; _aabe = _abbf ( _aabe , & _adggb , "\u000a" ) ; _aabe = _abbf ( _aabe , & _adggb , "\u000a" ) ;
return _aabe ; } ; type rectRuling struct { _adea rulingKind ; _febad markKind ; _eda . Color ; _bf . PdfRectangle ; } ; func ( _bafe * shapesState ) addPoint ( _bgf , _fgff float64 ) { _eegdc := _bafe . establishSubpath ( ) ; _cfe := _bafe . devicePoint ( _bgf , _fgff ) ; if _eegdc == nil { _bafe . _ecd = true ;
_bafe . _decg = _cfe ; } else { _eegdc . add ( _cfe ) ; } ; } ; func ( _cdad * shapesState ) fill ( _gdbg * [ ] pathSection ) { _dgc := pathSection { _gdbf : _cdad . _cfgd , Color : _cdad . _gdcg . getFillColor ( ) } ; * _gdbg = append ( * _gdbg , _dgc ) ; if _efa { _ecfd := _dgc . bbox ( ) ; _gdf . Printf ( "\u0020 \u0020\u0020\u0046\u0049\u004c\u004c\u003a %\u0032\u0064\u0020\u0066\u0069\u006c\u006c\u0073\u0020\u0028\u0025\u0064\u0020\u006ee\u0077\u0029 \u0073\u0073\u003d%\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d\u0025\u0033\u0076\u0020\u0025\u0036\u002e\u0032f\u003d\u00256.\u0032\u0066\u0078%\u0036\u002e\u0032\u0066\u000a" , len ( * _gdbg ) , len ( _dgc . _gdbf ) , _cdad , _dgc . Color , _ecfd , _ecfd . Width ( ) , _ecfd . Height ( ) ) ;
if _bfga { for _egcfb , _cddg := range _dgc . _gdbf { _gdf . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _egcfb , _cddg ) ; if _egcfb == 10 { break ; } ; } ; } ; } ; } ; func ( _deb * shapesState ) stroke ( _fbgg * [ ] pathSection ) { _ccd := pathSection { _gdbf : _deb . _cfgd , Color : _deb . _gdcg . getStrokeColor ( ) } ;
* _fbgg = append ( * _fbgg , _ccd ) ; if _efa { _gdf . Printf ( "\u0020 \u0020\u0020S\u0054\u0052\u004fK\u0045\u003a\u0020\u0025\u0064\u0020\u0073t\u0072\u006f\u006b\u0065\u0073\u0020s\u0073\u003d\u0025\u0073\u0020\u0063\u006f\u006c\u006f\u0072\u003d%\u002b\u0076\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , len ( * _fbgg ) , _deb , _deb . _gdcg . getStrokeColor ( ) , _ccd . bbox ( ) ) ;
if _bfga { for _eggd , _bec := range _deb . _cfgd { _gdf . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _eggd , _bec ) ; if _eggd == 10 { break ; } ; } ; } ; } ; } ; func _addb ( _cceg , _gafb bounded ) float64 { return _cceg . bbox ( ) . Llx - _gafb . bbox ( ) . Llx } ; func ( _cadg * textObject ) moveText ( _fgf , _dda float64 ) { _cadg . moveLP ( _fgf , _dda ) } ;
const ( _dg = "\u0045\u0052R\u004f\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074\u002c\u0020\u0069\u006e\u0076\u0061\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065" ;
_edf = "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0043a\u006e\u0027\u0074 g\u0065\u0074\u0020\u0066\u006f\u006et\u0020\u0070\u0072\u006f\u0070\u0065\u0072\u0074\u0069\u0065\u0073\u002c\u0020\u0066\u006fn\u0074\u0020\u006e\u006f\u0074\u0020\u0066\u006fu\u006e\u0064" ;
_cgc = "\u0045\u0052\u0052O\u0052\u003a\u0020\u0043\u0061\u006e\u0027\u0074\u0020\u0067\u0065\u0074\u0020\u0066\u006f\u006e\u0074\u0020\u0073\u0074\u0072\u0065\u0061\u006d\u002c\u0020\u0069\u006e\u0076a\u006c\u0069\u0064\u0020\u0074\u0079\u0070\u0065" ; ) ;
func _bcff ( _fece [ ] pathSection ) { if _cegd < 0.0 { return ; } ; if _efa { _ag . Log . Info ( "\u0067\u0072\u0061\u006e\u0075\u006c\u0061\u0072\u0069\u007a\u0065\u003a\u0020\u0025\u0064 \u0073u\u0062\u0070\u0061\u0074\u0068\u0020\u0073\u0065\u0063\u0074\u0069\u006f\u006e\u0073" , len ( _fece ) ) ;
} ; for _cbcgb , _fgef := range _fece { for _cgbc , _fgbd := range _fgef . _gdbf { for _adga , _aaeec := range _fgbd . _fcdc { _fgbd . _fcdc [ _adga ] = _ee . Point { X : _ccede ( _aaeec . X ) , Y : _ccede ( _aaeec . Y ) } ; if _efa { _fbbd := _fgbd . _fcdc [ _adga ] ; if ! _egag ( _aaeec , _fbbd ) { _affae := _ee . Point { X : _fbbd . X - _aaeec . X , Y : _fbbd . Y - _aaeec . Y } ;
_gdf . Printf ( "\u0025\u0034d \u002d\u0020\u00254\u0064\u0020\u002d\u0020%4d\u003a %\u002e\u0032\u0066\u0020\u2192\u0020\u0025.2\u0066\u0020\u0028\u0025\u0067\u0029\u000a" , _cbcgb , _cgbc , _adga , _aaeec , _fbbd , _affae ) ; } ; } ; } ; } ; } ; } ; func ( _ebe * imageExtractContext ) extractXObjectImage ( _acf * _bg . PdfObjectName , _eca _da . GraphicsState , _gdfa * _bf . PdfPageResources ) error { _eba , _ := _gdfa . GetXObjectByName ( * _acf ) ;
if _eba == nil { return nil ; } ; _gfg , _deg := _ebe . _gda [ _eba ] ; if ! _deg { _ega , _aaca := _gdfa . GetXObjectImageByName ( * _acf ) ; if _aaca != nil { return _aaca ; } ; if _ega == nil { return nil ; } ; _abf , _aaca := _ega . ToImage ( ) ; if _aaca != nil { return _aaca ; } ; _gfg = & cachedImage { _fbc : _abf , _aac : _ega . ColorSpace } ;
_ebe . _gda [ _eba ] = _gfg ; } ; _cea := _gfg . _fbc ; _egg := _gfg . _aac ; _egf , _ggb := _egg . ImageToRGB ( * _cea ) ; if _ggb != nil { return _ggb ; } ; _ag . Log . Debug ( "@\u0044\u006f\u0020\u0043\u0054\u004d\u003a\u0020\u0025\u0073" , _eca . CTM . String ( ) ) ; _ede := ImageMark { Image : & _egf , Width : _eca . CTM . ScalingFactorX ( ) , Height : _eca . CTM . ScalingFactorY ( ) , Angle : _eca . CTM . Angle ( ) } ;
_ede . X , _ede . Y = _eca . CTM . Translation ( ) ; _ebe . _dba = append ( _ebe . _dba , _ede ) ; _ebe . _fe ++ ; return nil ; } ; func ( _cedca * shapesState ) lastpointEstablished ( ) ( _ee . Point , bool ) { if _cedca . _ecd { return _cedca . _decg , false ; } ; _cacf := len ( _cedca . _cfgd ) ;
if _cacf > 0 && _cedca . _cfgd [ _cacf - 1 ] . _fcfe { return _cedca . _cfgd [ _cacf - 1 ] . last ( ) , false ; } ; return _ee . Point { } , true ; } ; func ( _gafd paraList ) inTile ( _cdaf gridTile ) paraList { var _fdd paraList ; for _ , _cbdbc := range _gafd { if _cdaf . contains ( _cbdbc . PdfRectangle ) { _fdd = append ( _fdd , _cbdbc ) ;
} ; } ; if _beae { _gdf . Printf ( "\u0020 \u0020\u0069\u006e\u0054i\u006c\u0065\u003a\u0020\u0020%\u0073 \u0069n\u0073\u0069\u0064\u0065\u003d\u0025\u0064\n" , _cdaf , len ( _fdd ) ) ; for _gdeg , _cfge := range _fdd { _gdf . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _gdeg , _cfge ) ;
} ; _gdf . Println ( "" ) ; } ; return _fdd ; } ; type fontEntry struct { _bdgeg * _bf . PdfFont ; _ffa int64 ; } ;
// ImageExtractOptions contains options for controlling image extraction from
// PDF pages.
type ImageExtractOptions struct { IncludeInlineStencilMasks bool ; } ; func ( _adff * wordBag ) text ( ) string { _bedb := _adff . allWords ( ) ; _gbbb := make ( [ ] string , len ( _bedb ) ) ; for _efdca , _gefe := range _bedb { _gbbb [ _efdca ] = _gefe . _bcaa ; } ; return _d . Join ( _gbbb , "\u0020" ) ;
2022-09-10 15:35:04 +00:00
} ;
2022-10-27 19:04:58 +00:00
// Text returns the extracted page text.
func ( _ceab PageText ) Text ( ) string { return _ceab . _gcea } ; func ( _dgce rulingList ) blocks ( _bagbc , _edefa * ruling ) bool { if _bagbc . _bgeb > _edefa . _eecc || _edefa . _bgeb > _bagbc . _eecc { return false ; } ; _dagc := _b . Max ( _bagbc . _bgeb , _edefa . _bgeb ) ; _dddg := _b . Min ( _bagbc . _eecc , _edefa . _eecc ) ;
if _bagbc . _eead > _edefa . _eead { _bagbc , _edefa = _edefa , _bagbc ; } ; for _ , _dddae := range _dgce { if _bagbc . _eead <= _dddae . _eead + _dcfe && _dddae . _eead <= _edefa . _eead + _dcfe && _dddae . _bgeb <= _dddg && _dagc <= _dddae . _eecc { return true ; } ; } ; return false ;
} ; func ( _fageg * ruling ) encloses ( _gegad , _bbab float64 ) bool { return _fageg . _bgeb - _deff <= _gegad && _bbab <= _fageg . _eecc + _deff ; } ;
// String returns a description of `k`.
func ( _acede rulingKind ) String ( ) string { _acgb , _gefb := _fbedc [ _acede ] ; if ! _gefb { return _gdf . Sprintf ( "\u004e\u006ft\u0020\u0061\u0020r\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0025\u0064" , _acede ) ; } ; return _acgb ; } ; func _dgecg ( _dbdgb , _afbg bounded ) float64 { return _dbdgb . bbox ( ) . Llx - _afbg . bbox ( ) . Urx } ;
func ( _fgfa compositeCell ) split ( _bddg , _dedd [ ] float64 ) * textTable { _ecba := len ( _bddg ) + 1 ; _beed := len ( _dedd ) + 1 ; if _beae { _ag . Log . Info ( "\u0063\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0043\u0065l\u006c\u002e\u0073\u0070l\u0069\u0074\u003a\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a\u0009\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025\u0073\u000a" + "\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073=\u0025\u0036\u002e\u0032\u0066\u000a\t\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d%\u0036\u002e\u0032\u0066" , _beed , _ecba , _fgfa , _bddg , _dedd ) ;
_gdf . Printf ( "\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073\u000a" , len ( _fgfa . paraList ) ) ; for _bagbe , _cbefc := range _fgfa . paraList { _gdf . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bagbe , _cbefc . String ( ) ) ;
} ; _gdf . Printf ( "\u0020\u0020\u0020\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a" , len ( _fgfa . lines ( ) ) ) ; for _bgaab , _gecab := range _fgfa . lines ( ) { _gdf . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bgaab , _gecab ) ; } ; } ; _bddg = _cacbb ( _bddg , _fgfa . Ury , _fgfa . Lly ) ;
_dedd = _cacbb ( _dedd , _fgfa . Llx , _fgfa . Urx ) ; _egcdc := make ( map [ uint64 ] * textPara , _beed * _ecba ) ; _cbebe := textTable { _baee : _beed , _cabfg : _ecba , _aagc : _egcdc } ; _bacg := _fgfa . paraList ; _f . Slice ( _bacg , func ( _ffafd , _ceafd int ) bool { _gefeb , _fgcg := _bacg [ _ffafd ] , _bacg [ _ceafd ] ;
_ffbf , _dbda := _gefeb . Lly , _fgcg . Lly ; if _ffbf != _dbda { return _ffbf < _dbda ; } ; return _gefeb . Llx < _fgcg . Llx ; } ) ; _dgf := make ( map [ uint64 ] _bf . PdfRectangle , _beed * _ecba ) ; for _caaf , _debed := range _bddg [ 1 : ] { _cagf := _bddg [ _caaf ] ; for _ecbg , _dbce := range _dedd [ 1 : ] { _cdba := _dedd [ _ecbg ] ;
_dgf [ _fcbc ( _ecbg , _caaf ) ] = _bf . PdfRectangle { Llx : _cdba , Urx : _dbce , Lly : _debed , Ury : _cagf } ; } ; } ; if _beae { _ag . Log . Info ( "\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0043\u0065l\u006c\u002e\u0073\u0070\u006c\u0069\u0074\u003a\u0020\u0072e\u0063\u0074\u0073" ) ;
_gdf . Printf ( "\u0020\u0020\u0020\u0020" ) ; for _eaad := 0 ; _eaad < _beed ; _eaad ++ { _gdf . Printf ( "\u0025\u0033\u0030\u0064\u002c\u0020" , _eaad ) ; } ; _gdf . Println ( ) ; for _eged := 0 ; _eged < _ecba ; _eged ++ { _gdf . Printf ( "\u0020\u0020\u0025\u0032\u0064\u003a" , _eged ) ;
for _beeb := 0 ; _beeb < _beed ; _beeb ++ { _gdf . Printf ( "\u00256\u002e\u0032\u0066\u002c\u0020" , _dgf [ _fcbc ( _beeb , _eged ) ] ) ; } ; _gdf . Println ( ) ; } ; } ; _cggcc := func ( _effb * textLine ) ( int , int ) { for _aee := 0 ; _aee < _ecba ; _aee ++ { for _cbed := 0 ; _cbed < _beed ;
_cbed ++ { if _ceba ( _dgf [ _fcbc ( _cbed , _aee ) ] , _effb . PdfRectangle ) { return _cbed , _aee ; } ; } ; } ; return - 1 , - 1 ; } ; _bcbe := make ( map [ uint64 ] [ ] * textLine , _beed * _ecba ) ; for _ , _ceagg := range _bacg . lines ( ) { _feff , _gdbc := _cggcc ( _ceagg ) ; if _feff < 0 { continue ;
} ; _bcbe [ _fcbc ( _feff , _gdbc ) ] = append ( _bcbe [ _fcbc ( _feff , _gdbc ) ] , _ceagg ) ; } ; for _bade := 0 ; _bade < len ( _bddg ) - 1 ; _bade ++ { _fgcb := _bddg [ _bade ] ; _aedb := _bddg [ _bade + 1 ] ; for _edbd := 0 ; _edbd < len ( _dedd ) - 1 ; _edbd ++ { _gdbea := _dedd [ _edbd ] ; _bcdc := _dedd [ _edbd + 1 ] ;
_aefg := _bf . PdfRectangle { Llx : _gdbea , Urx : _bcdc , Lly : _aedb , Ury : _fgcb } ; _abgd := _bcbe [ _fcbc ( _edbd , _bade ) ] ; if len ( _abgd ) == 0 { continue ; } ; _gecb := _egecc ( _aefg , _abgd ) ; _cbebe . put ( _edbd , _bade , _gecb ) ; } ; } ; return & _cbebe ; } ; func ( _fecb * subpath ) makeRectRuling ( _dggcb _eda . Color ) ( * ruling , bool ) { if _bded { _ag . Log . Info ( "\u006d\u0061\u006beR\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006e\u0067\u003a\u0020\u0070\u0061\u0074\u0068\u003d\u0025\u0076" , _fecb ) ;
} ; _ebed := _fecb . _fcdc [ : 4 ] ; _ccddb := make ( map [ int ] rulingKind , len ( _ebed ) ) ; for _ddda , _dga := range _ebed { _fbbf := _fecb . _fcdc [ ( _ddda + 1 ) % 4 ] ; _ccddb [ _ddda ] = _ddedb ( _dga , _fbbf ) ; if _bded { _gdf . Printf ( "\u0025\u0034\u0064: \u0025\u0073\u0020\u003d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066" , _ddda , _ccddb [ _ddda ] , _dga , _fbbf ) ;
} ; } ; if _bded { _gdf . Printf ( "\u0020\u0020\u0020\u006b\u0069\u006e\u0064\u0073\u003d\u0025\u002b\u0076\u000a" , _ccddb ) ; } ; var _ecca , _dfaa [ ] int ; for _bdeed , _efaga := range _ccddb { switch _efaga { case _ebabd : _dfaa = append ( _dfaa , _bdeed ) ; case _beec : _ecca = append ( _ecca , _bdeed ) ;
} ; } ; if _bded { _gdf . Printf ( "\u0020\u0020 \u0068\u006f\u0072z\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a" , len ( _dfaa ) , _dfaa ) ; _gdf . Printf ( "\u0020\u0020 \u0076\u0065\u0072t\u0073\u003d\u0025\u0064\u0020\u0025\u002b\u0076\u000a" , len ( _ecca ) , _ecca ) ;
} ; _afad := ( len ( _dfaa ) == 2 && len ( _ecca ) == 2 ) || ( len ( _dfaa ) == 2 && len ( _ecca ) == 0 && _dcdd ( _ebed [ _dfaa [ 0 ] ] , _ebed [ _dfaa [ 1 ] ] ) ) || ( len ( _ecca ) == 2 && len ( _dfaa ) == 0 && _cada ( _ebed [ _ecca [ 0 ] ] , _ebed [ _ecca [ 1 ] ] ) ) ; if _bded { _gdf . Printf ( " \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a" , len ( _dfaa ) , len ( _ecca ) , _afad ) ;
} ; if ! _afad { if _bded { _ag . Log . Error ( "\u0021!\u006d\u0061\u006b\u0065R\u0065\u0063\u0074\u0052\u0075l\u0069n\u0067:\u0020\u0070\u0061\u0074\u0068\u003d\u0025v" , _fecb ) ; _gdf . Printf ( " \u0020\u0020\u0068\u006f\u0072\u007as\u003d\u0025\u0064\u0020\u0076\u0065\u0072\u0074\u0073=\u0025\u0064\u0020o\u006b=\u0025\u0074\u000a" , len ( _dfaa ) , len ( _ecca ) , _afad ) ;
} ; return & ruling { } , false ; } ; if len ( _ecca ) == 0 { for _fbegc , _afdf := range _ccddb { if _afdf != _ebabd { _ecca = append ( _ecca , _fbegc ) ; } ; } ; } ; if len ( _dfaa ) == 0 { for _cbbb , _eeeg := range _ccddb { if _eeeg != _beec { _dfaa = append ( _dfaa , _cbbb ) ; } ; } ; } ; if _bded { _ag . Log . Info ( "\u006da\u006b\u0065R\u0065\u0063\u0074\u0052u\u006c\u0069\u006eg\u003a\u0020\u0068\u006f\u0072\u007a\u0073\u003d\u0025d \u0076\u0065\u0072t\u0073\u003d%\u0064\u0020\u0070\u006f\u0069\u006et\u0073\u003d%\u0064\u000a" + "\u0009\u0020\u0068o\u0072\u007a\u0073\u003d\u0025\u002b\u0076\u000a" + "\u0009\u0020\u0076e\u0072\u0074\u0073\u003d\u0025\u002b\u0076\u000a" + "\t\u0070\u006f\u0069\u006e\u0074\u0073\u003d\u0025\u002b\u0076" , len ( _dfaa ) , len ( _ecca ) , len ( _ebed ) , _dfaa , _ecca , _ebed ) ;
} ; var _gccb , _fdaf , _fadd , _fdfd _ee . Point ; if _ebed [ _dfaa [ 0 ] ] . Y > _ebed [ _dfaa [ 1 ] ] . Y { _fadd , _fdfd = _ebed [ _dfaa [ 0 ] ] , _ebed [ _dfaa [ 1 ] ] ; } else { _fadd , _fdfd = _ebed [ _dfaa [ 1 ] ] , _ebed [ _dfaa [ 0 ] ] ; } ; if _ebed [ _ecca [ 0 ] ] . X > _ebed [ _ecca [ 1 ] ] . X { _gccb , _fdaf = _ebed [ _ecca [ 0 ] ] , _ebed [ _ecca [ 1 ] ] ;
} else { _gccb , _fdaf = _ebed [ _ecca [ 1 ] ] , _ebed [ _ecca [ 0 ] ] ; } ; _abcf := _bf . PdfRectangle { Llx : _gccb . X , Urx : _fdaf . X , Lly : _fdfd . Y , Ury : _fadd . Y } ; if _abcf . Llx > _abcf . Urx { _abcf . Llx , _abcf . Urx = _abcf . Urx , _abcf . Llx ; } ; if _abcf . Lly > _abcf . Ury { _abcf . Lly , _abcf . Ury = _abcf . Ury , _abcf . Lly ;
} ; _ceea := rectRuling { PdfRectangle : _abcf , _adea : _eceb ( _abcf ) , Color : _dggcb } ; if _ceea . _adea == _gdbb { if _bded { _ag . Log . Error ( "\u006da\u006b\u0065\u0052\u0065\u0063\u0074\u0052\u0075\u006c\u0069\u006eg\u003a\u0020\u006b\u0069\u006e\u0064\u003d\u006e\u0069\u006c" ) ;
} ; return nil , false ; } ; _fdgfc , _aeee := _ceea . asRuling ( ) ; if ! _aeee { if _bded { _ag . Log . Error ( "\u006da\u006b\u0065\u0052\u0065c\u0074\u0052\u0075\u006c\u0069n\u0067:\u0020!\u0069\u0073\u0052\u0075\u006c\u0069\u006eg" ) ; } ; return nil , false ; } ; if _efa { _gdf . Printf ( "\u0020\u0020\u0020\u0072\u003d\u0025\u0073\u000a" , _fdgfc . String ( ) ) ;
} ; return _fdgfc , true ; } ; func ( _deeg * wordBag ) makeRemovals ( ) map [ int ] map [ * textWord ] struct { } { _cggc := make ( map [ int ] map [ * textWord ] struct { } , len ( _deeg . _bbf ) ) ; for _bagb := range _deeg . _bbf { _cggc [ _bagb ] = make ( map [ * textWord ] struct { } ) ; } ; return _cggc ;
} ; func _fffd ( _febc , _cged , _fbbfc , _gbfa * textPara ) * textTable { _eacd := & textTable { _baee : 2 , _cabfg : 2 , _aagc : make ( map [ uint64 ] * textPara , 4 ) } ; _eacd . put ( 0 , 0 , _febc ) ; _eacd . put ( 1 , 0 , _cged ) ; _eacd . put ( 0 , 1 , _fbbfc ) ; _eacd . put ( 1 , 1 , _gbfa ) ; return _eacd ;
} ; type compositeCell struct { _bf . PdfRectangle ; paraList ; } ; func ( _bdbd * shapesState ) moveTo ( _aabf , _ada float64 ) { _bdbd . _ecd = true ; _bdbd . _decg = _bdbd . devicePoint ( _aabf , _ada ) ; if _bccf { _ag . Log . Info ( "\u006d\u006fv\u0065\u0054\u006f\u003a\u0020\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0064\u0065\u0076\u0069\u0063\u0065\u003d%.\u0032\u0066" , _aabf , _ada , _bdbd . _decg ) ;
} ; } ; func _babd ( _cabgd string , _adafg int ) string { if len ( _cabgd ) < _adafg { return _cabgd ; } ; return _cabgd [ : _adafg ] ; } ; func ( _cffce rulingList ) asTiling ( ) gridTiling { if _efbc { _ag . Log . Info ( "r\u0075\u006ci\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0063s\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u002b\u002b\u002b\u0020\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d" , len ( _cffce ) ) ;
} ; for _ecgc , _dcgf := range _cffce [ 1 : ] { _cbcd := _cffce [ _ecgc ] ; if _cbcd . alignsPrimary ( _dcgf ) && _cbcd . alignsSec ( _dcgf ) { _ag . Log . Error ( "a\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0044\u0075\u0070\u006c\u0069\u0063\u0061\u0074\u0065 \u0072\u0075\u006c\u0069\u006e\u0067\u0073\u002e\u000a\u0009v=\u0025\u0073\u000a\t\u0077=\u0025\u0073" , _dcgf , _cbcd ) ;
} ; } ; _cffce . sortStrict ( ) ; _cffce . log ( "\u0073n\u0061\u0070\u0070\u0065\u0064" ) ; _eggbf , _ebfc := _cffce . vertsHorzs ( ) ; _gfeaa := _eggbf . primaries ( ) ; _ecggc := _ebfc . primaries ( ) ; _edaaf := len ( _gfeaa ) - 1 ; _fefg := len ( _ecggc ) - 1 ; if _edaaf == 0 || _fefg == 0 { return gridTiling { } ;
} ; _deec := _bf . PdfRectangle { Llx : _gfeaa [ 0 ] , Urx : _gfeaa [ _edaaf ] , Lly : _ecggc [ 0 ] , Ury : _ecggc [ _fefg ] } ; if _efbc { _ag . Log . Info ( "\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0076\u0065\u0072\u0074s=\u0025\u0064" , len ( _eggbf ) ) ;
for _acff , _feaa := range _eggbf { _gdf . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _acff , _feaa ) ; } ; _ag . Log . Info ( "\u0072\u0075l\u0069\u006e\u0067\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0068\u006f\u0072\u007as=\u0025\u0064" , len ( _ebfc ) ) ;
for _bfad , _ccee := range _ebfc { _gdf . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bfad , _ccee ) ; } ; _ag . Log . Info ( "\u0072\u0075\u006c\u0069\u006eg\u004c\u0069\u0073\u0074\u002e\u0061\u0073\u0054\u0069\u006c\u0069\u006e\u0067:\u0020\u0020\u0077\u0078\u0068\u003d\u0025\u0064\u0078\u0025\u0064\u000a\u0009\u006c\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a\u0009\u006c\u006c\u0079\u003d\u0025\u002e\u0032f" , _edaaf , _fefg , _gfeaa , _ecggc ) ;
} ; _fcaf := make ( [ ] gridTile , _edaaf * _fefg ) ; for _afgf := _fefg - 1 ; _afgf >= 0 ; _afgf -- { _fefbb := _ecggc [ _afgf ] ; _dbbde := _ecggc [ _afgf + 1 ] ; for _edeb := 0 ; _edeb < _edaaf ; _edeb ++ { _fdeaa := _gfeaa [ _edeb ] ; _ddde := _gfeaa [ _edeb + 1 ] ; _cddf := _eggbf . findPrimSec ( _fdeaa , _fefbb ) ;
_fcb := _eggbf . findPrimSec ( _ddde , _fefbb ) ; _dgge := _ebfc . findPrimSec ( _fefbb , _fdeaa ) ; _caff := _ebfc . findPrimSec ( _dbbde , _fdeaa ) ; _afcea := _bf . PdfRectangle { Llx : _fdeaa , Urx : _ddde , Lly : _fefbb , Ury : _dbbde } ; _efagg := _gafce ( _afcea , _cddf , _fcb , _dgge , _caff ) ;
_fcaf [ _afgf * _edaaf + _edeb ] = _efagg ; if _efbc { _gdf . Printf ( "\u0020\u0020\u0078\u003d\u0025\u0032\u0064\u0020\u0079\u003d\u0025\u0032\u0064\u003a\u0020%\u0073 \u0025\u0036\u002e\u0032\u0066\u0020\u0078\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , _edeb , _afgf , _efagg . String ( ) , _efagg . Width ( ) , _efagg . Height ( ) ) ;
} ; } ; } ; if _efbc { _ag . Log . Info ( "r\u0075\u006c\u0069\u006e\u0067\u004c\u0069\u0073\u0074.\u0061\u0073\u0054\u0069\u006c\u0069\u006eg:\u0020\u0063\u006f\u0061l\u0065\u0073\u0063\u0065\u0020\u0068\u006f\u0072\u0069zo\u006e\u0074a\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066" , _deec ) ;
} ; _feefe := make ( [ ] map [ float64 ] gridTile , _fefg ) ; for _dbbc := _fefg - 1 ; _dbbc >= 0 ; _dbbc -- { if _efbc { _gdf . Printf ( "\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a" , _dbbc ) ; } ; _feefe [ _dbbc ] = make ( map [ float64 ] gridTile , _edaaf ) ; for _ccca := 0 ; _ccca < _edaaf ;
_ccca ++ { _gggbg := _fcaf [ _dbbc * _edaaf + _ccca ] ; if _efbc { _gdf . Printf ( "\u0020\u0020\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _ccca , _gggbg ) ; } ; if ! _gggbg . _bceg { continue ; } ; _fefe := _ccca ; for _dbfa := _ccca + 1 ; ! _gggbg . _abdfe && _dbfa < _edaaf ;
_dbfa ++ { _cffgc := _fcaf [ _dbbc * _edaaf + _dbfa ] ; _gggbg . Urx = _cffgc . Urx ; _gggbg . _gfga = _gggbg . _gfga || _cffgc . _gfga ; _gggbg . _abag = _gggbg . _abag || _cffgc . _abag ; _gggbg . _abdfe = _cffgc . _abdfe ; if _efbc { _gdf . Printf ( "\u0020 \u0020%\u0034\u0064\u003a\u0020\u0025s\u0020\u2192 \u0025\u0073\u000a" , _dbfa , _cffgc , _gggbg ) ;
} ; _fefe = _dbfa ; } ; if _efbc { _gdf . Printf ( " \u0020 \u0025\u0032\u0064\u0020\u002d\u0020\u0025\u0032d\u0020\u2192\u0020\u0025s\n" , _ccca , _fefe , _gggbg ) ; } ; _ccca = _fefe ; _feefe [ _dbbc ] [ _gggbg . Llx ] = _gggbg ; } ; } ; _cebbd := make ( map [ float64 ] map [ float64 ] gridTile , _fefg ) ;
_bbag := make ( map [ float64 ] map [ float64 ] struct { } , _fefg ) ; for _cgec := _fefg - 1 ; _cgec >= 0 ; _cgec -- { _cface := _fcaf [ _cgec * _edaaf ] . Lly ; _cebbd [ _cface ] = make ( map [ float64 ] gridTile , _edaaf ) ; _bbag [ _cface ] = make ( map [ float64 ] struct { } , _edaaf ) ; } ; if _efbc { _ag . Log . Info ( "\u0072u\u006c\u0069n\u0067\u004c\u0069s\u0074\u002e\u0061\u0073\u0054\u0069\u006ci\u006e\u0067\u003a\u0020\u0063\u006fa\u006c\u0065\u0073\u0063\u0065\u0020\u0076\u0065\u0072\u0074\u0069c\u0061\u006c\u002e\u0020\u0025\u0036\u002e\u0032\u0066" , _deec ) ;
} ; for _fbfc := _fefg - 1 ; _fbfc >= 0 ; _fbfc -- { _begc := _fcaf [ _fbfc * _edaaf ] . Lly ; _ebce := _feefe [ _fbfc ] ; if _efbc { _gdf . Printf ( "\u0020\u0020\u0079\u003d\u0025\u0032\u0064\u000a" , _fbfc ) ; } ; for _ , _ggfa := range _gfbg ( _ebce ) { if _ , _egbge := _bbag [ _begc ] [ _ggfa ] ;
_egbge { continue ; } ; _eadfd := _ebce [ _ggfa ] ; if _efbc { _gdf . Printf ( " \u0020\u0020\u0020\u0020\u0076\u0030\u003d\u0025\u0073\u000a" , _eadfd . String ( ) ) ; } ; for _abdd := _fbfc - 1 ; _abdd >= 0 ; _abdd -- { if _eadfd . _abag { break ; } ; _cgfga := _feefe [ _abdd ] ; _gbfg , _edee := _cgfga [ _ggfa ] ;
if ! _edee { break ; } ; if _gbfg . Urx != _eadfd . Urx { break ; } ; _eadfd . _abag = _gbfg . _abag ; _eadfd . Lly = _gbfg . Lly ; if _efbc { _gdf . Printf ( "\u0020\u0020\u0020\u0020 \u0020\u0020\u0076\u003d\u0025\u0073\u0020\u0076\u0030\u003d\u0025\u0073\u000a" , _gbfg . String ( ) , _eadfd . String ( ) ) ;
} ; _bbag [ _gbfg . Lly ] [ _gbfg . Llx ] = struct { } { } ; } ; if _fbfc == 0 { _eadfd . _abag = true ; } ; if _eadfd . complete ( ) { _cebbd [ _begc ] [ _ggfa ] = _eadfd ; } ; } ; } ; _bddb := gridTiling { PdfRectangle : _deec , _fdeac : _cffd ( _cebbd ) , _gccf : _badbf ( _cebbd ) , _ffdd : _cebbd } ; _bddb . log ( "\u0043r\u0065\u0061\u0074\u0065\u0064" ) ;
return _bddb ; } ;
// Extractor stores and offers functionality for extracting content from PDF pages.
type Extractor struct { _fc string ; _ad * _bf . PdfPageResources ; _bb _bf . PdfRectangle ; _ga * _bf . PdfRectangle ; _ac map [ string ] fontEntry ; _fd map [ string ] textResult ; _eb int64 ; _cgf int ; _bc * Options ; } ;
2022-09-23 18:05:51 +00:00
// ExtractFonts returns all font information from the page extractor, including
// font name, font type, the raw data of the embedded font file (if embedded), font descriptor and more.
//
// The argument `previousPageFonts` is used when trying to build a complete font catalog for multiple pages or the entire document.
// The entries from `previousPageFonts` are added to the returned result unless already included in the page, i.e. no duplicate entries.
//
// NOTE: If previousPageFonts is nil, all fonts from the page will be returned. Use it when building up a full list of fonts for a document or page range.
2022-10-27 19:04:58 +00:00
func ( _ca * Extractor ) ExtractFonts ( previousPageFonts * PageFonts ) ( * PageFonts , error ) { _caf := PageFonts { } ; _daf := _caf . extractPageResourcesToFont ( _ca . _ad ) ; if _daf != nil { return nil , _daf ; } ; if previousPageFonts != nil { for _ , _aff := range previousPageFonts . Fonts { if ! _ea ( _caf . Fonts , _aff . FontName ) { _caf . Fonts = append ( _caf . Fonts , _aff ) ;
} ; } ; } ; return & PageFonts { Fonts : _caf . Fonts } , nil ; } ; var ( _cg = _g . New ( "\u0074\u0079p\u0065\u0020\u0063h\u0065\u0063\u006b\u0020\u0065\u0072\u0072\u006f\u0072" ) ; _fa = _g . New ( "\u0072\u0061\u006e\u0067\u0065\u0020\u0063\u0068\u0065\u0063\u006b\u0020e\u0072\u0072\u006f\u0072" ) ;
) ; func _fbga ( _acgfg float64 ) bool { return _b . Abs ( _acgfg ) < _ffba } ; func ( _cdede paraList ) findTableGrid ( _bbedg gridTiling ) ( * textTable , map [ * textPara ] struct { } ) { _fgafd := len ( _bbedg . _fdeac ) ; _eafd := len ( _bbedg . _gccf ) ; _gffb := textTable { _acgbd : true , _baee : _fgafd , _cabfg : _eafd , _aagc : make ( map [ uint64 ] * textPara , _fgafd * _eafd ) , _bbfb : make ( map [ uint64 ] compositeCell , _fgafd * _eafd ) } ;
_ecdcd := make ( map [ * textPara ] struct { } ) ; _eccb := int ( ( 1.0 - _bbgg ) * float64 ( _fgafd * _eafd ) ) ; _ddcb := 0 ; if _efbc { _ag . Log . Info ( "\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0047\u0072\u0069\u0064\u003a\u0020\u0025\u0064\u0020\u0078\u0020%\u0064" , _fgafd , _eafd ) ;
} ; for _fagg , _eacfe := range _bbedg . _gccf { _gdbd , _cbaa := _bbedg . _ffdd [ _eacfe ] ; if ! _cbaa { continue ; } ; for _dccd , _acbgc := range _bbedg . _fdeac { _ggdca , _eeed := _gdbd [ _acbgc ] ; if ! _eeed { continue ; } ; _febbc := _cdede . inTile ( _ggdca ) ; if len ( _febbc ) == 0 { _ddcb ++ ;
if _ddcb > _eccb { if _efbc { _ag . Log . Info ( "\u0021\u006e\u0075m\u0045\u006d\u0070\u0074\u0079\u003d\u0025\u0064" , _ddcb ) ; } ; return nil , nil ; } ; } else { _gffb . putComposite ( _dccd , _fagg , _febbc , _ggdca . PdfRectangle ) ; for _ , _addgg := range _febbc { _ecdcd [ _addgg ] = struct { } { } ;
} ; } ; } ; } ; _cggb := 0 ; for _cgcbc := 0 ; _cgcbc < _fgafd ; _cgcbc ++ { _adec := _gffb . get ( _cgcbc , 0 ) ; if _adec == nil || ! _adec . _dgggff { _cggb ++ ; } ; } ; if _cggb == 0 { if _efbc { _ag . Log . Info ( "\u0021\u006e\u0075m\u0048\u0065\u0061\u0064\u0065\u0072\u003d\u0030" ) ; } ; return nil , nil ;
} ; _eecgc := _gffb . reduceTiling ( _bbedg , _dded ) ; _eecgc = _eecgc . subdivide ( ) ; return _eecgc , _ecdcd ; } ; func ( _aceb * textObject ) setTextMatrix ( _bdeea [ ] float64 ) { if len ( _bdeea ) != 6 { _ag . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u006c\u0065\u006e\u0028\u0066\u0029\u0020\u0021\u003d\u0020\u0036\u0020\u0028\u0025\u0064\u0029" , len ( _bdeea ) ) ;
return ; } ; _dec , _abbg , _agb , _aacg , _dbbg , _aec := _bdeea [ 0 ] , _bdeea [ 1 ] , _bdeea [ 2 ] , _bdeea [ 3 ] , _bdeea [ 4 ] , _bdeea [ 5 ] ; _aceb . _gcb = _ee . NewMatrix ( _dec , _abbg , _agb , _aacg , _dbbg , _aec ) ; _aceb . _baff = _aceb . _gcb ; } ; func ( _cgfa * PageText ) computeViews ( ) { var _ddff rulingList ;
if _ecec { _caab := _gabdb ( _cgfa . _fcdb ) ; _ddff = append ( _ddff , _caab ... ) ; } ; if _dcaed { _eggb := _ddce ( _cgfa . _cddd ) ; _ddff = append ( _ddff , _eggb ... ) ; } ; _ddff , _fcce := _ddff . toTilings ( ) ; var _acef paraList ; _dbdf := len ( _cgfa . _cac ) ; for _cfbb := 0 ; _cfbb < 360 && _dbdf > 0 ;
_cfbb += 90 { _cbg := make ( [ ] * textMark , 0 , len ( _cgfa . _cac ) - _dbdf ) ; for _ , _fec := range _cgfa . _cac { if _fec . _bfbc == _cfbb { _cbg = append ( _cbg , _fec ) ; } ; } ; if len ( _cbg ) > 0 { _gfef := _aadf ( _cbg , _cgfa . _fabf , _ddff , _fcce ) ; _acef = append ( _acef , _gfef ... ) ;
_dbdf -= len ( _cbg ) ; } ; } ; _dggc := new ( _gd . Buffer ) ; _acef . writeText ( _dggc ) ; _cgfa . _gcea = _dggc . String ( ) ; _cgfa . _cagb = _acef . toTextMarks ( ) ; _cgfa . _ece = _acef . tables ( ) ; if _beae { _ag . Log . Info ( "\u0063\u006f\u006dpu\u0074\u0065\u0056\u0069\u0065\u0077\u0073\u003a\u0020\u0074\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064" , len ( _cgfa . _ece ) ) ;
} ; } ; func ( _feedf paraList ) computeEBBoxes ( ) { if _ceabd { _ag . Log . Info ( "\u0063o\u006dp\u0075\u0074\u0065\u0045\u0042\u0042\u006f\u0078\u0065\u0073\u003a" ) ; } ; for _ , _edgd := range _feedf { _edgd . _dcfc = _edgd . PdfRectangle ; } ; _aafc := _feedf . yNeighbours ( 0 ) ;
for _aacc , _dbbd := range _feedf { _aebe := _dbbd . _dcfc ; _effc , _ceef := - 1.0e9 , + 1.0e9 ; for _ , _adgc := range _aafc [ _dbbd ] { _babf := _feedf [ _adgc ] . _dcfc ; if _babf . Urx < _aebe . Llx { _effc = _b . Max ( _effc , _babf . Urx ) ; } else if _aebe . Urx < _babf . Llx { _ceef = _b . Min ( _ceef , _babf . Llx ) ;
} ; } ; for _aaaef , _egaf := range _feedf { _gabd := _egaf . _dcfc ; if _aacc == _aaaef || _gabd . Ury > _aebe . Lly { continue ; } ; if _effc <= _gabd . Llx && _gabd . Llx < _aebe . Llx { _aebe . Llx = _gabd . Llx ; } else if _gabd . Urx <= _ceef && _aebe . Urx < _gabd . Urx { _aebe . Urx = _gabd . Urx ;
} ; } ; if _ceabd { _gdf . Printf ( "\u0025\u0034\u0064\u003a %\u0036\u002e\u0032\u0066\u2192\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0071\u000a" , _aacc , _dbbd . _dcfc , _aebe , _babd ( _dbbd . text ( ) , 50 ) ) ; } ; _dbbd . _dcfc = _aebe ; } ; if _gfea { for _ , _bdffg := range _feedf { _bdffg . PdfRectangle = _bdffg . _dcfc ;
} ; } ; } ;
2022-09-10 15:35:04 +00:00
2022-09-23 18:05:51 +00:00
// String returns a human readable description of `s`.
2022-10-27 19:04:58 +00:00
func ( _gbbbb intSet ) String ( ) string { var _bbaa [ ] int ; for _cfgda := range _gbbbb { if _gbbbb . has ( _cfgda ) { _bbaa = append ( _bbaa , _cfgda ) ; } ; } ; _f . Ints ( _bbaa ) ; return _gdf . Sprintf ( "\u0025\u002b\u0076" , _bbaa ) ; } ; func _cada ( _gcbed , _abac _ee . Point ) bool { _bdfaa := _b . Abs ( _gcbed . X - _abac . X ) ;
_gfcbb := _b . Abs ( _gcbed . Y - _abac . Y ) ; return _dgcd ( _bdfaa , _gfcbb ) ; } ; func ( _acfbf paraList ) merge ( ) * textPara { _ag . Log . Trace ( "\u006d\u0065\u0072\u0067\u0065:\u0020\u0070\u0061\u0072\u0061\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _acfbf ) ) ;
if len ( _acfbf ) == 0 { return nil ; } ; _acfbf . sortReadingOrder ( ) ; _bdfe := _acfbf [ 0 ] . PdfRectangle ; _ccdf := _acfbf [ 0 ] . _ecbdg ; for _ , _acea := range _acfbf [ 1 : ] { _bdfe = _effa ( _bdfe , _acea . PdfRectangle ) ; _ccdf = append ( _ccdf , _acea . _ecbdg ... ) ; } ; return _egecc ( _bdfe , _ccdf ) ;
} ;
2022-09-10 15:35:04 +00:00
2022-10-27 19:04:58 +00:00
// String returns a string describing `ma`.
func ( _fed TextMarkArray ) String ( ) string { _feed := len ( _fed . _fceg ) ; if _feed == 0 { return "\u0045\u004d\u0050T\u0059" ; } ; _aag := _fed . _fceg [ 0 ] ; _gfgf := _fed . _fceg [ _feed - 1 ] ; return _gdf . Sprintf ( "\u007b\u0054\u0045\u0058\u0054\u004d\u0041\u0052K\u0041\u0052\u0052AY\u003a\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073\u000a\u0009\u0066\u0069\u0072\u0073\u0074\u003d\u0025s\u000a\u0009\u0020\u006c\u0061\u0073\u0074\u003d%\u0073\u007d" , _feed , _aag , _gfgf ) ;
} ; func ( _cfgf rulingList ) splitSec ( ) [ ] rulingList { _f . Slice ( _cfgf , func ( _aecac , _gfbge int ) bool { _bbef , _ccebdb := _cfgf [ _aecac ] , _cfgf [ _gfbge ] ; if _bbef . _bgeb != _ccebdb . _bgeb { return _bbef . _bgeb < _ccebdb . _bgeb ; } ; return _bbef . _eecc < _ccebdb . _eecc ;
} ) ; _dgecf := make ( map [ * ruling ] struct { } , len ( _cfgf ) ) ; _aecab := func ( _ebfe * ruling ) rulingList { _ffbab := rulingList { _ebfe } ; _dgecf [ _ebfe ] = struct { } { } ; for _ , _bcagc := range _cfgf { if _ , _gdcd := _dgecf [ _bcagc ] ; _gdcd { continue ; } ; for _ , _cbcg := range _ffbab { if _bcagc . alignsSec ( _cbcg ) { _ffbab = append ( _ffbab , _bcagc ) ;
_dgecf [ _bcagc ] = struct { } { } ; break ; } ; } ; } ; return _ffbab ; } ; _daccf := [ ] rulingList { _aecab ( _cfgf [ 0 ] ) } ; for _ , _eaac := range _cfgf [ 1 : ] { if _ , _ggdg := _dgecf [ _eaac ] ; _ggdg { continue ; } ; _daccf = append ( _daccf , _aecab ( _eaac ) ) ; } ; return _daccf ; } ; func ( _ceb * imageExtractContext ) processOperand ( _dcb * _da . ContentStreamOperation , _cce _da . GraphicsState , _ccb * _bf . PdfPageResources ) error { if _dcb . Operand == "\u0042\u0049" && len ( _dcb . Params ) == 1 { _fcc , _gcf := _dcb . Params [ 0 ] . ( * _da . ContentStreamInlineImage ) ;
if ! _gcf { return nil ; } ; if _dbf , _ba := _bg . GetBoolVal ( _fcc . ImageMask ) ; _ba { if _dbf && ! _ceb . _gfa . IncludeInlineStencilMasks { return nil ; } ; } ; return _ceb . extractInlineImage ( _fcc , _cce , _ccb ) ; } else if _dcb . Operand == "\u0044\u006f" && len ( _dcb . Params ) == 1 { _dca , _cfg := _bg . GetName ( _dcb . Params [ 0 ] ) ;
if ! _cfg { _ag . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0079\u0070\u0065" ) ; return _cg ; } ; _ , _ccg := _ccb . GetXObjectByName ( * _dca ) ; switch _ccg { case _bf . XObjectTypeImage : return _ceb . extractXObjectImage ( _dca , _cce , _ccb ) ; case _bf . XObjectTypeForm : return _ceb . extractFormImages ( _dca , _cce , _ccb ) ;
} ; } ; return nil ; } ; func ( _afedg paraList ) log ( _bebe string ) { if ! _dada { return ; } ; _ag . Log . Info ( "%\u0038\u0073\u003a\u0020\u0025\u0064 \u0070\u0061\u0072\u0061\u0073\u0020=\u003d\u003d\u003d\u003d\u003d\u003d\u002d-\u002d\u002d\u002d\u002d\u002d\u003d\u003d\u003d\u003d\u003d=\u003d" , _bebe , len ( _afedg ) ) ;
for _egdg , _ecfca := range _afedg { if _ecfca == nil { continue ; } ; _bcbcd := _ecfca . text ( ) ; _ggaf := "\u0020\u0020" ; if _ecfca . _bccg != nil { _ggaf = _gdf . Sprintf ( "\u005b%\u0064\u0078\u0025\u0064\u005d" , _ecfca . _bccg . _baee , _ecfca . _bccg . _cabfg ) ; } ; _gdf . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0025s\u0020\u0025\u0071\u000a" , _egdg , _ecfca . PdfRectangle , _ggaf , _babd ( _bcbcd , 50 ) ) ;
} ; } ; func ( _fdfg * ruling ) equals ( _fdec * ruling ) bool { return _fdfg . _cgef == _fdec . _cgef && _cgcd ( _fdfg . _eead , _fdec . _eead ) && _cgcd ( _fdfg . _bgeb , _fdec . _bgeb ) && _cgcd ( _fdfg . _eecc , _fdec . _eecc ) ; } ; func ( _ccaa * textObject ) reset ( ) { _ccaa . _gcb = _ee . IdentityMatrix ( ) ;
_ccaa . _baff = _ee . IdentityMatrix ( ) ; _ccaa . _dbdg = nil ; } ;
2022-09-10 15:35:04 +00:00
2022-10-27 19:04:58 +00:00
// ToTextMark returns the public view of `tm`.
func ( _gdgf * textMark ) ToTextMark ( ) TextMark { return TextMark { Text : _gdgf . _eeaf , Original : _gdgf . _abcec , BBox : _gdgf . _bbgf , Font : _gdgf . _becff , FontSize : _gdgf . _abba , FillColor : _gdgf . _agbe , StrokeColor : _gdgf . _dggfa , Orientation : _gdgf . _bfbc , DirectObject : _gdgf . _gded , ObjString : _gdgf . _cedf , Tw : _gdgf . Tw , Th : _gdgf . Th , Tc : _gdgf . _dbe , Index : _gdgf . _eced } ;
} ;
// String returns a string descibing `i`.
func ( _aaaca gridTile ) String ( ) string { _geacf := func ( _abfdf bool , _bdbfg string ) string { if _abfdf { return _bdbfg ; } ; return "\u005f" ; } ; return _gdf . Sprintf ( "\u00256\u002e2\u0066\u0020\u0025\u0031\u0073%\u0031\u0073%\u0031\u0073\u0025\u0031\u0073" , _aaaca . PdfRectangle , _geacf ( _aaaca . _bceg , "\u004c" ) , _geacf ( _aaaca . _abdfe , "\u0052" ) , _geacf ( _aaaca . _abag , "\u0042" ) , _geacf ( _aaaca . _gfga , "\u0054" ) ) ;
} ; func _aecc ( _ffad func ( * wordBag , * textWord , float64 ) bool , _bccd float64 ) func ( * wordBag , * textWord ) bool { return func ( _bae * wordBag , _egcd * textWord ) bool { return _ffad ( _bae , _egcd , _bccd ) } ; } ; func ( _cabg * textPara ) taken ( ) bool { return _cabg == nil || _cabg . _bggb } ;
type pathSection struct { _gdbf [ ] * subpath ; _eda . Color ; } ; func _afa ( _bbfg , _dfcca bounded ) float64 { _fbcfdb := _addb ( _bbfg , _dfcca ) ; if ! _fbga ( _fbcfdb ) { return _fbcfdb ; } ; return _dagb ( _bbfg , _dfcca ) ; } ; func ( _agde * compositeCell ) updateBBox ( ) { for _ , _bdeb := range _agde . paraList { _agde . PdfRectangle = _effa ( _agde . PdfRectangle , _bdeb . PdfRectangle ) ;
} ; } ; func ( _cffb * textPara ) bbox ( ) _bf . PdfRectangle { return _cffb . PdfRectangle } ; func _ceabf ( _efff , _gfaf * textPara ) bool { if _efff . _dgggff || _gfaf . _dgggff { return true ; } ; return _fbga ( _efff . depth ( ) - _gfaf . depth ( ) ) ; } ; type rulingKind int ; func _ceba ( _fggb , _fede _bf . PdfRectangle ) bool { return _fggb . Llx <= _fede . Llx && _fede . Urx <= _fggb . Urx && _fggb . Lly <= _fede . Lly && _fede . Ury <= _fggb . Ury ;
2022-09-10 15:35:04 +00:00
} ;
2022-09-23 18:05:51 +00:00
// Options extractor options.
type Options struct {
2022-09-10 15:35:04 +00:00
2022-09-23 18:05:51 +00:00
// ApplyCropBox will extract page text based on page cropbox if set to `true`.
2022-10-27 19:04:58 +00:00
ApplyCropBox bool ; } ; func _ccagd ( _dfdd map [ int ] intSet ) [ ] int { _ecbaf := make ( [ ] int , 0 , len ( _dfdd ) ) ; for _dccdf := range _dfdd { _ecbaf = append ( _ecbaf , _dccdf ) ; } ; _f . Ints ( _ecbaf ) ; return _ecbaf ; } ; type paraList [ ] * textPara ; func ( _ffeg * textTable ) compositeColCorridors ( ) map [ int ] [ ] float64 { _ecbdd := make ( map [ int ] [ ] float64 , _ffeg . _baee ) ;
if _beae { _ag . Log . Info ( "\u0063\u006f\u006d\u0070o\u0073\u0069\u0074\u0065\u0043\u006f\u006c\u0043\u006f\u0072r\u0069d\u006f\u0072\u0073\u003a\u0020\u0077\u003d%\u0064\u0020" , _ffeg . _baee ) ; } ; for _fggc := 0 ; _fggc < _ffeg . _baee ; _fggc ++ { _ecbdd [ _fggc ] = nil ;
} ; return _ecbdd ; } ; func ( _ecb * shapesState ) drawRectangle ( _gfag , _bdaa , _afca , _cceb float64 ) { if _bccf { _gacg := _ecb . devicePoint ( _gfag , _bdaa ) ; _cdeg := _ecb . devicePoint ( _gfag + _afca , _bdaa + _cceb ) ; _gdba := _bf . PdfRectangle { Llx : _gacg . X , Lly : _gacg . Y , Urx : _cdeg . X , Ury : _cdeg . Y } ;
_ag . Log . Info ( "d\u0072a\u0077\u0052\u0065\u0063\u0074\u0061\u006e\u0067l\u0065\u003a\u0020\u00256.\u0032\u0066" , _gdba ) ; } ; _ecb . newSubPath ( ) ; _ecb . moveTo ( _gfag , _bdaa ) ; _ecb . lineTo ( _gfag + _afca , _bdaa ) ; _ecb . lineTo ( _gfag + _afca , _bdaa + _cceb ) ;
_ecb . lineTo ( _gfag , _bdaa + _cceb ) ; _ecb . closePath ( ) ; } ; func ( _gbee * wordBag ) maxDepth ( ) float64 { return _gbee . _fcgc - _gbee . Lly } ; func ( _fcdgd * wordBag ) removeWord ( _ffaf * textWord , _fag int ) { _abbd := _fcdgd . _bbf [ _fag ] ; _abbd = _fbcac ( _abbd , _ffaf ) ;
if len ( _abbd ) == 0 { delete ( _fcdgd . _bbf , _fag ) ; } else { _fcdgd . _bbf [ _fag ] = _abbd ; } ; } ; func ( _cbgfa * textTable ) reduce ( ) * textTable { _eacce := make ( [ ] int , 0 , _cbgfa . _cabfg ) ; _debfc := make ( [ ] int , 0 , _cbgfa . _baee ) ; for _dddef := 0 ; _dddef < _cbgfa . _cabfg ;
_dddef ++ { if ! _cbgfa . emptyCompositeRow ( _dddef ) { _eacce = append ( _eacce , _dddef ) ; } ; } ; for _efbf := 0 ; _efbf < _cbgfa . _baee ; _efbf ++ { if ! _cbgfa . emptyCompositeColumn ( _efbf ) { _debfc = append ( _debfc , _efbf ) ; } ; } ; if len ( _eacce ) == _cbgfa . _cabfg && len ( _debfc ) == _cbgfa . _baee { return _cbgfa ;
} ; _ggbfc := textTable { _acgbd : _cbgfa . _acgbd , _baee : len ( _debfc ) , _cabfg : len ( _eacce ) , _aagc : make ( map [ uint64 ] * textPara , len ( _debfc ) * len ( _eacce ) ) } ; if _beae { _ag . Log . Info ( "\u0072\u0065\u0064\u0075ce\u003a\u0020\u0025\u0064\u0078\u0025\u0064\u0020\u002d\u003e\u0020\u0025\u0064\u0078%\u0064" , _cbgfa . _baee , _cbgfa . _cabfg , len ( _debfc ) , len ( _eacce ) ) ;
_ag . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076" , _debfc ) ; _ag . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076" , _eacce ) ; } ; for _dfae , _fedd := range _eacce { for _acgg , _effef := range _debfc { _gged , _accbc := _cbgfa . getComposite ( _effef , _fedd ) ;
if _gged == nil { continue ; } ; if _beae { _gdf . Printf ( "\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n" , _acgg , _dfae , _effef , _fedd , _babd ( _gged . merge ( ) . text ( ) , 50 ) ) ; } ; _ggbfc . putComposite ( _acgg , _dfae , _gged , _accbc ) ;
} ; } ; return & _ggbfc ; } ; func _ddedb ( _aecf , _ddfeg _ee . Point ) rulingKind { _cgcgb := _b . Abs ( _aecf . X - _ddfeg . X ) ; _gfcbg := _b . Abs ( _aecf . Y - _ddfeg . Y ) ; return _bfcbe ( _cgcgb , _gfcbg , _cdfc ) ; } ; func ( _ddc * Extractor ) extractPageText ( _age string , _ded * _bf . PdfPageResources , _ccge _ee . Matrix , _fgg int ) ( * PageText , int , int , error ) { _ag . Log . Trace ( "\u0065x\u0074\u0072\u0061\u0063t\u0050\u0061\u0067\u0065\u0054e\u0078t\u003a \u006c\u0065\u0076\u0065\u006c\u003d\u0025d" , _fgg ) ;
_cdb := & PageText { _fabf : _ddc . _bb } ; _ffb := _cde ( _ddc . _bb ) ; var _gca stateStack ; _dbb := _eag ( _ddc , _ded , _da . GraphicsState { } , & _ffb , & _gca ) ; _ge := shapesState { _efgg : _ccge , _egb : _ee . IdentityMatrix ( ) , _gdcg : _dbb } ; var _gge bool ; if _fgg > _egc { _gga := _g . New ( "\u0066\u006f\u0072\u006d s\u0074\u0061\u0063\u006b\u0020\u006f\u0076\u0065\u0072\u0066\u006c\u006f\u0077" ) ;
_ag . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a \u0065\u0078\u0074\u0072\u0061\u0063\u0074\u0050\u0061\u0067\u0065\u0054\u0065\u0078\u0074\u002e\u0020\u0072\u0065\u0063u\u0072\u0073\u0069\u006f\u006e\u0020\u006c\u0065\u0076\u0065\u006c\u003d\u0025\u0064 \u0065r\u0072\u003d\u0025\u0076" , _fgg , _gga ) ;
return _cdb , _ffb . _fce , _ffb . _ddga , _gga ; } ; _feb := _da . NewContentStreamParser ( _age ) ; _facf , _gad := _feb . Parse ( ) ; if _gad != nil { _ag . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020e\u0078\u0074\u0072a\u0063\u0074\u0050\u0061g\u0065\u0054\u0065\u0078\u0074\u0020\u0070\u0061\u0072\u0073\u0065\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gad ) ;
return _cdb , _ffb . _fce , _ffb . _ddga , _gad ; } ; _cdb . _bfg = _facf ; _gde := _da . NewContentStreamProcessor ( * _facf ) ; _gde . AddHandler ( _da . HandlerConditionEnumAllOperands , "" , func ( _bac * _da . ContentStreamOperation , _aea _da . GraphicsState , _aae * _bf . PdfPageResources ) error { _ffce := _bac . Operand ;
if _ddb { _ag . Log . Info ( "\u0026&\u0026\u0020\u006f\u0070\u003d\u0025s" , _bac ) ; } ; switch _ffce { case "\u0071" : if _bccf { _ag . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _ge . _egb ) ; } ; _gca . push ( & _ffb ) ; case "\u0051" : if ! _gca . empty ( ) { _ffb = * _gca . pop ( ) ;
} ; _ge . _egb = _aea . CTM ; if _bccf { _ag . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _ge . _egb ) ; } ; case "\u0042\u0054" : if _gge { _ag . Log . Debug ( "\u0042\u0054\u0020\u0063\u0061\u006c\u006c\u0065\u0064\u0020\u0077\u0068\u0069\u006c\u0065 \u0069n\u0020\u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074" ) ;
_cdb . _cac = append ( _cdb . _cac , _dbb . _dbdg ... ) ; } ; _gge = true ; _abcc := _aea ; _abcc . CTM = _ccge . Mult ( _abcc . CTM ) ; _dbb = _eag ( _ddc , _aae , _abcc , & _ffb , & _gca ) ; _ge . _gdcg = _dbb ; case "\u0045\u0054" : if ! _gge { _ag . Log . Debug ( "\u0045\u0054\u0020ca\u006c\u006c\u0065\u0064\u0020\u006f\u0075\u0074\u0073i\u0064e\u0020o\u0066 \u0061\u0020\u0074\u0065\u0078\u0074\u0020\u006f\u0062\u006a\u0065\u0063\u0074" ) ;
} ; _gge = false ; _cdb . _cac = append ( _cdb . _cac , _dbb . _dbdg ... ) ; _dbb . reset ( ) ; case "\u0054\u002a" : _dbb . nextLine ( ) ; case "\u0054\u0064" : if _gdg , _adb := _dbb . checkOp ( _bac , 2 , true ) ; ! _gdg { _ag . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _adb ) ;
return _adb ; } ; _egae , _egga , _ade := _ggfdc ( _bac . Params ) ; if _ade != nil { return _ade ; } ; _dbb . moveText ( _egae , _egga ) ; case "\u0054\u0044" : if _bdcb , _gfb := _dbb . checkOp ( _bac , 2 , true ) ; ! _bdcb { _ag . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gfb ) ;
return _gfb ; } ; _cee , _cdd , _gff := _ggfdc ( _bac . Params ) ; if _gff != nil { _ag . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gff ) ; return _gff ; } ; _dbb . moveTextSetLeading ( _cee , _cdd ) ; case "\u0054\u006a" : if _aad , _dbfb := _dbb . checkOp ( _bac , 1 , true ) ;
! _aad { _ag . Log . Debug ( "\u0045\u0052\u0052\u004fR:\u0020\u0054\u006a\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0065\u0072\u0072\u003d%\u0076" , _bac , _dbfb ) ; return _dbfb ; } ; _bcb := _bg . TraceToDirectObject ( _bac . Params [ 0 ] ) ; _bab , _dgea := _bg . GetStringBytes ( _bcb ) ;
if ! _dgea { _ag . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a\u0020T\u006a\u0020o\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074S\u0074\u0072\u0069\u006e\u0067\u0042\u0079\u0074\u0065\u0073\u0020\u0066a\u0069\u006c\u0065\u0064" , _bac ) ; return _bg . ErrTypeError ;
} ; return _dbb . showText ( _bcb , _bab ) ; case "\u0054\u004a" : if _fba , _ace := _dbb . checkOp ( _bac , 1 , true ) ; ! _fba { _ag . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u004a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ace ) ; return _ace ; } ; _ead , _acc := _bg . GetArray ( _bac . Params [ 0 ] ) ;
if ! _acc { _ag . Log . Debug ( "\u0045\u0052\u0052OR\u003a\u0020\u0054\u004a\u0020\u006f\u0070\u003d\u0025s\u0020G\u0065t\u0041r\u0072\u0061\u0079\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _bac ) ; return _gad ; } ; return _dbb . showTextAdjusted ( _ead ) ;
case "\u0027" : if _fab , _aade := _dbb . checkOp ( _bac , 1 , true ) ; ! _fab { _ag . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u0027\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _aade ) ; return _aade ; } ; _fca := _bg . TraceToDirectObject ( _bac . Params [ 0 ] ) ; _gcg , _bcd := _bg . GetStringBytes ( _fca ) ;
if ! _bcd { _ag . Log . Debug ( "\u0045\u0052RO\u0052\u003a\u0020'\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064" , _bac ) ; return _bg . ErrTypeError ; } ; _dbb . nextLine ( ) ; return _dbb . showText ( _fca , _gcg ) ;
case "\u0022" : if _ccbb , _aab := _dbb . checkOp ( _bac , 3 , true ) ; ! _ccbb { _ag . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u0022\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _aab ) ; return _aab ; } ; _ggbd , _eaa , _bdee := _ggfdc ( _bac . Params [ : 2 ] ) ; if _bdee != nil { return _bdee ;
} ; _efc := _bg . TraceToDirectObject ( _bac . Params [ 2 ] ) ; _bef , _afe := _bg . GetStringBytes ( _efc ) ; if ! _afe { _ag . Log . Debug ( "\u0045\u0052RO\u0052\u003a\u0020\"\u0020\u006f\u0070\u003d%s \u0047et\u0053\u0074\u0072\u0069\u006e\u0067\u0042yt\u0065\u0073\u0020\u0066\u0061\u0069\u006ce\u0064" , _bac ) ;
return _bg . ErrTypeError ; } ; _dbb . setCharSpacing ( _ggbd ) ; _dbb . setWordSpacing ( _eaa ) ; _dbb . nextLine ( ) ; return _dbb . showText ( _efc , _bef ) ; case "\u0054\u004c" : _bdg , _adf := _efd ( _bac ) ; if _adf != nil { _ag . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u004c\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _adf ) ;
return _adf ; } ; _dbb . setTextLeading ( _bdg ) ; case "\u0054\u0063" : _fgc , _cbc := _efd ( _bac ) ; if _cbc != nil { _ag . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0063\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _cbc ) ; return _cbc ; } ; _dbb . setCharSpacing ( _fgc ) ;
case "\u0054\u0066" : if _ffg , _bdcd := _dbb . checkOp ( _bac , 2 , true ) ; ! _ffg { _ag . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0066\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bdcd ) ; return _bdcd ; } ; _gddg , _dbc := _bg . GetNameVal ( _bac . Params [ 0 ] ) ;
if ! _dbc { _ag . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a \u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u004ea\u006d\u0065\u0056\u0061\u006c\u0020\u0066a\u0069\u006c\u0065\u0064" , _bac ) ; return _bg . ErrTypeError ; } ; _cgd , _acbe := _bg . GetNumberAsFloat ( _bac . Params [ 1 ] ) ;
if ! _dbc { _ag . Log . Debug ( "\u0045\u0052\u0052O\u0052\u003a\u0020\u0054\u0066\u0020\u006f\u0070\u003d\u0025\u0073\u0020\u0047\u0065\u0074\u0046\u006c\u006f\u0061\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065d\u002e\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bac , _acbe ) ;
return _acbe ; } ; _acbe = _dbb . setFont ( _gddg , _cgd ) ; _dbb . _cdbc = _fb . Is ( _acbe , _bg . ErrNotSupported ) ; if _acbe != nil && ! _dbb . _cdbc { return _acbe ; } ; case "\u0054\u006d" : if _ecg , _gadb := _dbb . checkOp ( _bac , 6 , true ) ; ! _ecg { _ag . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u006d\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _gadb ) ;
return _gadb ; } ; _dgd , _bdb := _bg . GetNumbersAsFloat ( _bac . Params ) ; if _bdb != nil { _ag . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bdb ) ; return _bdb ; } ; _dbb . setTextMatrix ( _dgd ) ; case "\u0054\u0072" : if _abe , _acec := _dbb . checkOp ( _bac , 1 , true ) ;
! _abe { _ag . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0072\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _acec ) ; return _acec ; } ; _eea , _gced := _bg . GetIntVal ( _bac . Params [ 0 ] ) ; if ! _gced { _ag . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0054\u0072\u0020\u006f\u0070\u003d\u0025\u0073 \u0047e\u0074\u0049\u006e\u0074\u0056\u0061\u006c\u0020\u0066\u0061\u0069\u006c\u0065\u0064" , _bac ) ;
return _bg . ErrTypeError ; } ; _dbb . setTextRenderMode ( _eea ) ; case "\u0054\u0073" : if _aeb , _dag := _dbb . checkOp ( _bac , 1 , true ) ; ! _aeb { _ag . Log . Debug ( "\u0045\u0052R\u004f\u0052\u003a \u0054\u0073\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _dag ) ; return _dag ;
} ; _efg , _ccc := _bg . GetNumberAsFloat ( _bac . Params [ 0 ] ) ; if _ccc != nil { _ag . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _ccc ) ; return _ccc ; } ; _dbb . setTextRise ( _efg ) ; case "\u0054\u0077" : if _gag , _fdgf := _dbb . checkOp ( _bac , 1 , true ) ;
! _gag { _ag . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _fdgf ) ; return _fdgf ; } ; _ebaa , _bda := _bg . GetNumberAsFloat ( _bac . Params [ 0 ] ) ; if _bda != nil { _ag . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bda ) ;
return _bda ; } ; _dbb . setWordSpacing ( _ebaa ) ; case "\u0054\u007a" : if _dafg , _bcbc := _dbb . checkOp ( _bac , 1 , true ) ; ! _dafg { _ag . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _bcbc ) ; return _bcbc ; } ; _cfc , _eae := _bg . GetNumberAsFloat ( _bac . Params [ 0 ] ) ;
if _eae != nil { _ag . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _eae ) ; return _eae ; } ; _dbb . setHorizScaling ( _cfc ) ; case "\u0063\u006d" : _ge . _egb = _aea . CTM ; if _ge . _egb . Singular ( ) { _dgee := _ee . IdentityMatrix ( ) . Translate ( _ge . _egb . Translation ( ) ) ;
_ag . Log . Debug ( "S\u0069n\u0067\u0075\u006c\u0061\u0072\u0020\u0063\u0074m\u003d\u0025\u0073\u2192%s" , _ge . _egb , _dgee ) ; _ge . _egb = _dgee ; } ; if _bccf { _ag . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _ge . _egb ) ; } ; case "\u006d" : if len ( _bac . Params ) != 2 { _ag . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006d\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" , _fa ) ;
return nil ; } ; _dbaa , _accg := _bg . GetNumbersAsFloat ( _bac . Params ) ; if _accg != nil { return _accg ; } ; _ge . moveTo ( _dbaa [ 0 ] , _dbaa [ 1 ] ) ; case "\u006c" : if len ( _bac . Params ) != 2 { _ag . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0065\u0072\u0072o\u0072\u0020\u0077\u0068\u0069\u006c\u0065\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u0020\u0060\u006c\u0060\u0020o\u0070\u0065r\u0061\u0074o\u0072\u003a\u0020\u0025\u0076\u002e\u0020\u004f\u0075\u0074\u0070\u0075\u0074 m\u0061\u0079\u0020\u0062\u0065\u0020\u0069\u006e\u0063o\u0072\u0072\u0065\u0063\u0074\u002e" , _fa ) ;
return nil ; } ; _cfb , _bdgg := _bg . GetNumbersAsFloat ( _bac . Params ) ; if _bdgg != nil { return _bdgg ; } ; _ge . lineTo ( _cfb [ 0 ] , _cfb [ 1 ] ) ; case "\u0063" : if len ( _bac . Params ) != 6 { return _fa ; } ; _accgg , _acga := _bg . GetNumbersAsFloat ( _bac . Params ) ; if _acga != nil { return _acga ;
} ; _ag . Log . Debug ( "\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f" , _accgg ) ; _ge . cubicTo ( _accgg [ 0 ] , _accgg [ 1 ] , _accgg [ 2 ] , _accgg [ 3 ] , _accgg [ 4 ] , _accgg [ 5 ] ) ; case "\u0076" , "\u0079" : if len ( _bac . Params ) != 4 { return _fa ;
} ; _cge , _dfc := _bg . GetNumbersAsFloat ( _bac . Params ) ; if _dfc != nil { return _dfc ; } ; _ag . Log . Debug ( "\u0043u\u0062\u0069\u0063\u0020b\u0065\u007a\u0069\u0065\u0072 \u0070a\u0072a\u006d\u0073\u003a\u0020\u0025\u002e\u0032f" , _cge ) ; _ge . quadraticTo ( _cge [ 0 ] , _cge [ 1 ] , _cge [ 2 ] , _cge [ 3 ] ) ;
case "\u0068" : _ge . closePath ( ) ; case "\u0072\u0065" : if len ( _bac . Params ) != 4 { return _fa ; } ; _adgf , _aadee := _bg . GetNumbersAsFloat ( _bac . Params ) ; if _aadee != nil { return _aadee ; } ; _ge . drawRectangle ( _adgf [ 0 ] , _adgf [ 1 ] , _adgf [ 2 ] , _adgf [ 3 ] ) ; _ge . closePath ( ) ;
case "\u0053" : _ge . stroke ( & _cdb . _fcdb ) ; _ge . clearPath ( ) ; case "\u0073" : _ge . closePath ( ) ; _ge . stroke ( & _cdb . _fcdb ) ; _ge . clearPath ( ) ; case "\u0046" : _ge . fill ( & _cdb . _cddd ) ; _ge . clearPath ( ) ; case "\u0066" , "\u0066\u002a" : _ge . closePath ( ) ; _ge . fill ( & _cdb . _cddd ) ;
_ge . clearPath ( ) ; case "\u0042" , "\u0042\u002a" : _ge . fill ( & _cdb . _cddd ) ; _ge . stroke ( & _cdb . _fcdb ) ; _ge . clearPath ( ) ; case "\u0062" , "\u0062\u002a" : _ge . closePath ( ) ; _ge . fill ( & _cdb . _cddd ) ; _ge . stroke ( & _cdb . _fcdb ) ; _ge . clearPath ( ) ; case "\u006e" : _ge . clearPath ( ) ;
case "\u0044\u006f" : if len ( _bac . Params ) == 0 { _ag . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0065\u0078\u0070\u0065\u0063\u0074\u0065\u0064\u0020\u0058\u004fbj\u0065c\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006f\u0070\u0065\u0072\u0061n\u0064\u0020\u0066\u006f\u0072\u0020\u0044\u006f\u0020\u006f\u0070\u0065\u0072\u0061\u0074\u006f\u0072.\u0020\u0047\u006f\u0074\u0020\u0025\u002b\u0076\u002e" , _bac . Params ) ;
return _bg . ErrRangeError ; } ; _afc , _gee := _bg . GetName ( _bac . Params [ 0 ] ) ; if ! _gee { _ag . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u0044\u006f\u0020\u006f\u0070e\u0072a\u0074\u006f\u0072\u0020\u0058\u004f\u0062\u006a\u0065\u0063\u0074\u0020\u006e\u0061\u006d\u0065\u0020\u006fp\u0065\u0072\u0061\u006e\u0064\u003a\u0020\u0025\u002b\u0076\u002e" , _bac . Params [ 0 ] ) ;
return _bg . ErrTypeError ; } ; _ , _cgce := _aae . GetXObjectByName ( * _afc ) ; if _cgce != _bf . XObjectTypeForm { break ; } ; _ege , _gee := _ddc . _fd [ _afc . String ( ) ] ; if ! _gee { _gaf , _bdd := _aae . GetXObjectFormByName ( * _afc ) ; if _bdd != nil { _ag . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _bdd ) ;
return _bdd ; } ; _bdf , _bdd := _gaf . GetContentStream ( ) ; if _bdd != nil { _ag . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _bdd ) ; return _bdd ; } ; _dcc := _gaf . Resources ; if _dcc == nil { _dcc = _aae ; } ; _aed := _aea . CTM ; if _gbf , _efcd := _bg . GetArray ( _gaf . Matrix ) ;
_efcd { _bag , _fea := _gbf . GetAsFloat64Slice ( ) ; if _fea != nil { return _fea ; } ; if len ( _bag ) != 6 { return _fa ; } ; _cfbe := _ee . NewMatrix ( _bag [ 0 ] , _bag [ 1 ] , _bag [ 2 ] , _bag [ 3 ] , _bag [ 4 ] , _bag [ 5 ] ) ; _aed = _aea . CTM . Mult ( _cfbe ) ; } ; _cebf , _ceg , _bagg , _bdd := _ddc . extractPageText ( string ( _bdf ) , _dcc , _ccge . Mult ( _aed ) , _fgg + 1 ) ;
if _bdd != nil { _ag . Log . Debug ( "\u0045R\u0052\u004f\u0052\u003a\u0020\u0025v" , _bdd ) ; return _bdd ; } ; _ege = textResult { * _cebf , _ceg , _bagg } ; _ddc . _fd [ _afc . String ( ) ] = _ege ; } ; _ge . _egb = _aea . CTM ; if _bccf { _ag . Log . Info ( "\u0063\u0074\u006d\u003d\u0025\u0073" , _ge . _egb ) ;
} ; _cdb . _cac = append ( _cdb . _cac , _ege . _bafb . _cac ... ) ; _cdb . _fcdb = append ( _cdb . _fcdb , _ege . _bafb . _fcdb ... ) ; _cdb . _cddd = append ( _cdb . _cddd , _ege . _bafb . _cddd ... ) ; _ffb . _fce += _ege . _dfcc ; _ffb . _ddga += _ege . _bcbb ; case "\u0072\u0067" , "\u0067" , "\u006b" , "\u0063\u0073" , "\u0073\u0063" , "\u0073\u0063\u006e" : _dbb . _fcdg . ColorspaceNonStroking = _aea . ColorspaceNonStroking ;
_dbb . _fcdg . ColorNonStroking = _aea . ColorNonStroking ; case "\u0052\u0047" , "\u0047" , "\u004b" , "\u0043\u0053" , "\u0053\u0043" , "\u0053\u0043\u004e" : _dbb . _fcdg . ColorspaceStroking = _aea . ColorspaceStroking ; _dbb . _fcdg . ColorStroking = _aea . ColorStroking ;
} ; return nil ; } ) ; _gad = _gde . Process ( _ded ) ; return _cdb , _ffb . _fce , _ffb . _ddga , _gad ; } ; func _agcbf ( _cbge , _egcb _bf . PdfRectangle ) ( _bf . PdfRectangle , bool ) { if ! _gcgg ( _cbge , _egcb ) { return _bf . PdfRectangle { } , false ; } ; return _bf . PdfRectangle { Llx : _b . Max ( _cbge . Llx , _egcb . Llx ) , Urx : _b . Min ( _cbge . Urx , _egcb . Urx ) , Lly : _b . Max ( _cbge . Lly , _egcb . Lly ) , Ury : _b . Min ( _cbge . Ury , _egcb . Ury ) } , true ;
} ; func ( _afeb * textTable ) emptyCompositeColumn ( _cfbbde int ) bool { for _eege := 0 ; _eege < _afeb . _cabfg ; _eege ++ { if _caee , _bgfda := _afeb . _bbfb [ _fcbc ( _cfbbde , _eege ) ] ; _bgfda { if len ( _caee . paraList ) > 0 { return false ; } ; } ; } ; return true ; } ; func ( _abc * imageExtractContext ) extractFormImages ( _aaf * _bg . PdfObjectName , _ddg _da . GraphicsState , _dab * _bf . PdfPageResources ) error { _cbd , _fdg := _dab . GetXObjectFormByName ( * _aaf ) ;
if _fdg != nil { return _fdg ; } ; if _cbd == nil { return nil ; } ; _bfdb , _fdg := _cbd . GetContentStream ( ) ; if _fdg != nil { return _fdg ; } ; _efe := _cbd . Resources ; if _efe == nil { _efe = _dab ; } ; _fdg = _abc . extractContentStreamImages ( string ( _bfdb ) , _efe ) ; if _fdg != nil { return _fdg ;
} ; _abc . _cc ++ ; return nil ; } ; func ( _cfec rulingList ) sort ( ) { _f . Slice ( _cfec , _cfec . comp ) } ; func ( _dafgf paraList ) topoOrder ( ) [ ] int { if _dada { _ag . Log . Info ( "\u0074\u006f\u0070\u006f\u004f\u0072\u0064\u0065\u0072\u003a" ) ; } ; _dafa := len ( _dafgf ) ; _dfdb := make ( [ ] bool , _dafa ) ;
_cdcg := make ( [ ] int , 0 , _dafa ) ; _egfe := _dafgf . llyOrdering ( ) ; var _adbe func ( _eecgf int ) ; _adbe = func ( _ddaac int ) { _dfdb [ _ddaac ] = true ; for _cfbbd := 0 ; _cfbbd < _dafa ; _cfbbd ++ { if ! _dfdb [ _cfbbd ] { if _dafgf . readBefore ( _egfe , _ddaac , _cfbbd ) { _adbe ( _cfbbd ) ;
} ; } ; } ; _cdcg = append ( _cdcg , _ddaac ) ; } ; for _bede := 0 ; _bede < _dafa ; _bede ++ { if ! _dfdb [ _bede ] { _adbe ( _bede ) ; } ; } ; return _bceab ( _cdcg ) ; } ; func ( _babfg * ruling ) alignsSec ( _cadea * ruling ) bool { const _gffa = _dcfe + 1.0 ; return _babfg . _bgeb - _gffa <= _cadea . _eecc && _cadea . _bgeb - _gffa <= _babfg . _eecc ;
} ; type gridTile struct { _bf . PdfRectangle ; _gfga , _bceg , _abag , _abdfe bool ; } ; func ( _ggeec * textMark ) bbox ( ) _bf . PdfRectangle { return _ggeec . PdfRectangle } ; func ( _debe compositeCell ) hasLines ( _ceabfb [ ] * textLine ) bool { for _bdgb , _bgfdd := range _ceabfb { _becag := _gcgg ( _debe . PdfRectangle , _bgfdd . PdfRectangle ) ;
if _beae { _gdf . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u005e\u005e\u005e\u0069\u006e\u0074\u0065\u0072\u0073e\u0063t\u0073\u003d\u0025\u0074\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u000a" , _becag , _bdgb , len ( _ceabfb ) ) ; _gdf . Printf ( "\u0020\u0020\u0020\u0020 \u005e\u005e\u005e\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u003d\u0025s\u000a" , _debe ) ;
_gdf . Printf ( "\u0020 \u0020 \u0020\u0020\u0020\u006c\u0069\u006e\u0065\u003d\u0025\u0073\u000a" , _bgfdd ) ; } ; if _becag { return true ; } ; } ; return false ; } ;
2022-09-10 15:35:04 +00:00
2022-10-27 19:04:58 +00:00
// Tables returns the tables extracted from the page.
func ( _dadf PageText ) Tables ( ) [ ] TextTable { if _beae { _ag . Log . Info ( "\u0054\u0061\u0062\u006c\u0065\u0073\u003a\u0020\u0025\u0064" , len ( _dadf . _ece ) ) ; } ; return _dadf . _ece ; } ; func ( _bfc * imageExtractContext ) extractContentStreamImages ( _eec string , _cad * _bf . PdfPageResources ) error { _cfa := _da . NewContentStreamParser ( _eec ) ;
_fcd , _cade := _cfa . Parse ( ) ; if _cade != nil { return _cade ; } ; if _bfc . _gda == nil { _bfc . _gda = map [ * _bg . PdfObjectStream ] * cachedImage { } ; } ; if _bfc . _gfa == nil { _bfc . _gfa = & ImageExtractOptions { } ; } ; _bdc := _da . NewContentStreamProcessor ( * _fcd ) ; _bdc . AddHandler ( _da . HandlerConditionEnumAllOperands , "" , _bfc . processOperand ) ;
return _bdc . Process ( _cad ) ; } ; func ( _bgdcd * textTable ) isExportable ( ) bool { if _bgdcd . _acgbd { return true ; } ; _fadca := func ( _fcdbd int ) bool { _ffee := _bgdcd . get ( 0 , _fcdbd ) ; if _ffee == nil { return false ; } ; _aebc := _ffee . text ( ) ; _fffg := _a . RuneCountInString ( _aebc ) ;
_dead := _ebbb . MatchString ( _aebc ) ; return _fffg <= 1 || _dead ; } ; for _cbfaa := 0 ; _cbfaa < _bgdcd . _cabfg ; _cbfaa ++ { if ! _fadca ( _cbfaa ) { return true ; } ; } ; return false ; } ; type textMark struct { _bf . PdfRectangle ; _bfbc int ; _eeaf string ; _abcec string ; _becff * _bf . PdfFont ;
_abba float64 ; _dbe float64 ; _ecbd _ee . Matrix ; _fcced _ee . Point ; _bbgf _bf . PdfRectangle ; _agbe _eda . Color ; _dggfa _eda . Color ; _gded _bg . PdfObject ; _cedf [ ] string ; Tw float64 ; Th float64 ; _eced int ; } ; func ( _aggf * wordBag ) depthBand ( _efea , _acfe float64 ) [ ] int { if len ( _aggf . _bbf ) == 0 { return nil ;
} ; return _aggf . depthRange ( _aggf . getDepthIdx ( _efea ) , _aggf . getDepthIdx ( _acfe ) ) ; } ;
// ImageMark represents an image drawn on a page and its position in device coordinates.
// All coordinates are in device coordinates.
type ImageMark struct { Image * _bf . Image ;
// Dimensions of the image as displayed in the PDF.
Width float64 ; Height float64 ;
// Position of the image in PDF coordinates (lower left corner).
X float64 ; Y float64 ;
// Angle in degrees, if rotated.
Angle float64 ; } ; var _ebf = TextMark { Text : "\u005b\u0058\u005d" , Original : "\u0020" , Meta : true , FillColor : _eda . White , StrokeColor : _eda . White } ; func ( _gagb * stateStack ) empty ( ) bool { return len ( * _gagb ) == 0 } ;
// String returns a description of `w`.
func ( _eaee * textWord ) String ( ) string { return _gdf . Sprintf ( "\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022" , _eaee . _ecgcg , _eaee . PdfRectangle , _eaee . _fbgge , _eaee . _bcaa ) ;
} ; const _gaeg = 10 ; func _egecc ( _bccfc _bf . PdfRectangle , _gacd [ ] * textLine ) * textPara { return & textPara { PdfRectangle : _bccfc , _ecbdg : _gacd } ; } ;
// String returns a description of `p`.
func ( _gfdb * textPara ) String ( ) string { if _gfdb . _dgggff { return _gdf . Sprintf ( "\u0025\u0036\u002e\u0032\u0066\u0020\u005b\u0045\u004d\u0050\u0054\u0059\u005d" , _gfdb . PdfRectangle ) ; } ; _efede := "" ; if _gfdb . _bccg != nil { _efede = _gdf . Sprintf ( "\u005b\u0025\u0064\u0078\u0025\u0064\u005d\u0020" , _gfdb . _bccg . _baee , _gfdb . _bccg . _cabfg ) ;
} ; return _gdf . Sprintf ( "\u0025\u0036\u002e\u0032f \u0025\u0073\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u0020\u0025\u0071" , _gfdb . PdfRectangle , _efede , len ( _gfdb . _ecbdg ) , _babd ( _gfdb . text ( ) , 50 ) ) ; } ; func ( _bdfdg rulingList ) intersections ( ) map [ int ] intSet { var _ebcd , _eeef [ ] int ;
for _gfefb , _afge := range _bdfdg { switch _afge . _cgef { case _beec : _ebcd = append ( _ebcd , _gfefb ) ; case _ebabd : _eeef = append ( _eeef , _gfefb ) ; } ; } ; if len ( _ebcd ) < _agec + 1 || len ( _eeef ) < _bafa + 1 { return nil ; } ; if len ( _ebcd ) + len ( _eeef ) > _aedf { _ag . Log . Debug ( "\u0069\u006e\u0074\u0065\u0072\u0073e\u0063\u0074\u0069\u006f\u006e\u0073\u003a\u0020\u0054\u004f\u004f\u0020\u004d\u0041\u004e\u0059\u0020\u0072\u0075\u006ci\u006e\u0067\u0073\u0020\u0076\u0065\u0063\u0073\u003d\u0025\u0064\u0020\u003d\u0020%\u0064 \u0078\u0020\u0025\u0064" , len ( _bdfdg ) , len ( _ebcd ) , len ( _eeef ) ) ;
return nil ; } ; _efcb := make ( map [ int ] intSet , len ( _ebcd ) + len ( _eeef ) ) ; for _ , _cgda := range _ebcd { for _ , _cdgc := range _eeef { if _bdfdg [ _cgda ] . intersects ( _bdfdg [ _cdgc ] ) { if _ , _acfaa := _efcb [ _cgda ] ; ! _acfaa { _efcb [ _cgda ] = make ( intSet ) ; } ; if _ , _gcgc := _efcb [ _cdgc ] ;
! _gcgc { _efcb [ _cdgc ] = make ( intSet ) ; } ; _efcb [ _cgda ] . add ( _cdgc ) ; _efcb [ _cdgc ] . add ( _cgda ) ; } ; } ; } ; return _efcb ; } ; func ( _fcgb lineRuling ) yMean ( ) float64 { return 0.5 * ( _fcgb . _abec . Y + _fcgb . _eacb . Y ) } ; func _gecbc ( _eecca , _afegcc int ) int { if _eecca > _afegcc { return _eecca ;
} ; return _afegcc ; } ; func ( _fadf * textPara ) toTextMarks ( _fegbf * int ) [ ] TextMark { if _fadf . _bccg == nil { return _fadf . toCellTextMarks ( _fegbf ) ; } ; var _efag [ ] TextMark ; for _bcfe := 0 ; _bcfe < _fadf . _bccg . _cabfg ; _bcfe ++ { for _cbee := 0 ; _cbee < _fadf . _bccg . _baee ;
_cbee ++ { _bebf := _fadf . _bccg . get ( _cbee , _bcfe ) ; if _bebf == nil { _efag = _abbf ( _efag , _fegbf , "\u0009" ) ; } else { _edga := _bebf . toCellTextMarks ( _fegbf ) ; _efag = append ( _efag , _edga ... ) ; } ; _efag = _abbf ( _efag , _fegbf , "\u0020" ) ; } ; if _bcfe < _fadf . _bccg . _cabfg - 1 { _efag = _abbf ( _efag , _fegbf , "\u000a" ) ;
} ; } ; return _efag ; } ; func ( _efgee * textPara ) fontsize ( ) float64 { return _efgee . _ecbdg [ 0 ] . _ddef } ; func ( _bdfa * textObject ) getStrokeColor ( ) _eda . Color { return _gfdbb ( _bdfa . _fcdg . ColorspaceStroking , _bdfa . _fcdg . ColorStroking ) ; } ; func ( _bea * textObject ) setCharSpacing ( _fbg float64 ) { if _bea == nil { return ;
} ; _bea . _degf . _bfcb = _fbg ; if _dbde { _ag . Log . Info ( "\u0073\u0065t\u0043\u0068\u0061\u0072\u0053\u0070\u0061\u0063\u0069\u006e\u0067\u003a\u0020\u0025\u002e\u0032\u0066\u0020\u0073\u0074\u0061\u0074e=\u0025\u0073" , _fbg , _bea . _degf . String ( ) ) ; } ; } ; func ( _ddec rulingList ) primaries ( ) [ ] float64 { _degg := make ( map [ float64 ] struct { } , len ( _ddec ) ) ;
for _ , _ccebd := range _ddec { _degg [ _ccebd . _eead ] = struct { } { } ; } ; _fagb := make ( [ ] float64 , len ( _degg ) ) ; _badb := 0 ; for _adaf := range _degg { _fagb [ _badb ] = _adaf ; _badb ++ ; } ; _f . Float64s ( _fagb ) ; return _fagb ; } ; func ( _abfd * textObject ) setWordSpacing ( _gfd float64 ) { if _abfd == nil { return ;
} ; _abfd . _degf . _gab = _gfd ; } ; func _ccdef ( _bdad int , _fgae func ( int , int ) bool ) [ ] int { _cbcb := make ( [ ] int , _bdad ) ; for _fbca := range _cbcb { _cbcb [ _fbca ] = _fbca ; } ; _f . Slice ( _cbcb , func ( _bffd , _feecc int ) bool { return _fgae ( _cbcb [ _bffd ] , _cbcb [ _feecc ] ) } ) ;
return _cbcb ; } ; func _eagg ( _becf , _bfa _bf . PdfRectangle ) bool { return _bfa . Llx <= _becf . Urx && _becf . Llx <= _bfa . Urx } ; func ( _ggafe lineRuling ) asRuling ( ) ( * ruling , bool ) { _gbad := ruling { _cgef : _ggafe . _aedc , Color : _ggafe . Color , _gbgb : _caade } ; switch _ggafe . _aedc { case _beec : _gbad . _eead = _ggafe . xMean ( ) ;
_gbad . _bgeb = _b . Min ( _ggafe . _abec . Y , _ggafe . _eacb . Y ) ; _gbad . _eecc = _b . Max ( _ggafe . _abec . Y , _ggafe . _eacb . Y ) ; case _ebabd : _gbad . _eead = _ggafe . yMean ( ) ; _gbad . _bgeb = _b . Min ( _ggafe . _abec . X , _ggafe . _eacb . X ) ; _gbad . _eecc = _b . Max ( _ggafe . _abec . X , _ggafe . _eacb . X ) ;
default : _ag . Log . Error ( "\u0062\u0061\u0064\u0020pr\u0069\u006d\u0061\u0072\u0079\u0020\u006b\u0069\u006e\u0064\u003d\u0025\u0064" , _ggafe . _aedc ) ; return nil , false ; } ; return & _gbad , true ; } ; func _gcgg ( _adcd , _bbdb _bf . PdfRectangle ) bool { return _eagg ( _adcd , _bbdb ) && _efeg ( _adcd , _bbdb ) } ;
func ( _fcea * textLine ) markWordBoundaries ( ) { _dgggf := _gaca * _fcea . _ddef ; for _beca , _cgfgd := range _fcea . _egad [ 1 : ] { if _dgecg ( _cgfgd , _fcea . _egad [ _beca ] ) >= _dgggf { _cgfgd . _egcbd = true ; } ; } ; } ; func _cdg ( _cga [ ] * textWord , _cacg float64 , _bgc , _dbfce rulingList ) * wordBag { _ggca := _gcfa ( _cga [ 0 ] , _cacg , _bgc , _dbfce ) ;
for _ , _ddd := range _cga [ 1 : ] { _geeg := _feef ( _ddd . _ecgcg ) ; _ggca . _bbf [ _geeg ] = append ( _ggca . _bbf [ _geeg ] , _ddd ) ; _ggca . PdfRectangle = _effa ( _ggca . PdfRectangle , _ddd . PdfRectangle ) ; } ; _ggca . sort ( ) ; return _ggca ; } ; func ( _abfa * textObject ) renderText ( _dea _bg . PdfObject , _gffg [ ] byte ) error { if _abfa . _cdbc { _ag . Log . Debug ( "\u0072\u0065\u006e\u0064\u0065r\u0054\u0065\u0078\u0074\u003a\u0020\u0049\u006e\u0076\u0061\u006c\u0069\u0064 \u0066\u006f\u006e\u0074\u002e\u0020\u004e\u006f\u0074\u0020\u0070\u0072\u006f\u0063\u0065\u0073\u0073\u0069\u006e\u0067\u002e" ) ;
return nil ; } ; _dgb := _abfa . getCurrentFont ( ) ; _aedd := _dgb . BytesToCharcodes ( _gffg ) ; _cfad , _eafg , _dggf := _dgb . CharcodesToStrings ( _aedd ) ; if _dggf > 0 { _ag . Log . Debug ( "\u0072\u0065nd\u0065\u0072\u0054e\u0078\u0074\u003a\u0020num\u0043ha\u0072\u0073\u003d\u0025\u0064\u0020\u006eum\u004d\u0069\u0073\u0073\u0065\u0073\u003d%\u0064" , _eafg , _dggf ) ;
} ; _abfa . _degf . _fce += _eafg ; _abfa . _degf . _ddga += _dggf ; _gadf := _abfa . _degf ; _eadf := _gadf . _adfb ; _dgdb := _gadf . _gdef / 100.0 ; _ecgg := _edec ; if _dgb . Subtype ( ) == "\u0054\u0079\u0070e\u0033" { _ecgg = 1 ; } ; _dce , _eadb := _dgb . GetRuneMetrics ( ' ' ) ; if ! _eadb { _dce , _eadb = _dgb . GetCharMetrics ( 32 ) ;
} ; if ! _eadb { _dce , _ = _bf . DefaultFont ( ) . GetRuneMetrics ( ' ' ) ; } ; _cadc := _dce . Wx * _ecgg ; _ag . Log . Trace ( "\u0073p\u0061\u0063e\u0057\u0069\u0064t\u0068\u003d\u0025\u002e\u0032\u0066\u0020t\u0065\u0078\u0074\u003d\u0025\u0071 \u0066\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0066\u006f\u006et\u0053\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066" , _cadc , _cfad , _dgb , _eadf ) ;
_fgd := _ee . NewMatrix ( _eadf * _dgdb , 0 , 0 , _eadf , 0 , _gadf . _bacc ) ; if _dbde { _ag . Log . Info ( "\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0074\u0065\u0078t\u0073\u003d\u0025\u0071" , len ( _aedd ) , _aedd , _cfad ) ;
} ; _ag . Log . Trace ( "\u0072\u0065\u006e\u0064\u0065\u0072T\u0065\u0078\u0074\u003a\u0020\u0025\u0064\u0020\u0063\u006f\u0064\u0065\u0073=\u0025\u002b\u0076\u0020\u0072\u0075\u006ee\u0073\u003d\u0025\u0071" , len ( _aedd ) , _aedd , len ( _cfad ) ) ; _eee := _abfa . getFillColor ( ) ;
_cfcf := _abfa . getStrokeColor ( ) ; for _bbda , _edfb := range _cfad { _efge := [ ] rune ( _edfb ) ; if len ( _efge ) == 1 && _efge [ 0 ] == '\x00' { continue ; } ; _fbff := _aedd [ _bbda ] ; _dccg := _abfa . _fcdg . CTM . Mult ( _abfa . _gcb ) . Mult ( _fgd ) ; _gbe := 0.0 ; if len ( _efge ) == 1 && _efge [ 0 ] == 32 { _gbe = _gadf . _gab ;
} ; _dagg , _eaab := _dgb . GetCharMetrics ( _fbff ) ; if ! _eaab { _ag . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u004e\u006f \u006d\u0065\u0074r\u0069\u0063\u0020\u0066\u006f\u0072\u0020\u0063\u006fde\u003d\u0025\u0064 \u0072\u003d0\u0078\u0025\u0030\u0034\u0078\u003d%\u002b\u0071 \u0025\u0073" , _fbff , _efge , _efge , _dgb ) ;
return _gdf . Errorf ( "\u006e\u006f\u0020\u0063\u0068\u0061\u0072\u0020\u006d\u0065\u0074\u0072\u0069\u0063\u0073:\u0020f\u006f\u006e\u0074\u003d\u0025\u0073\u0020\u0063\u006f\u0064\u0065\u003d\u0025\u0064" , _dgb . String ( ) , _fbff ) ; } ; _aeab := _ee . Point { X : _dagg . Wx * _ecgg , Y : _dagg . Wy * _ecgg } ;
_dfd := _ee . Point { X : ( _aeab . X * _eadf + _gbe ) * _dgdb } ; _fbed := _ee . Point { X : ( _aeab . X * _eadf + _gadf . _bfcb + _gbe ) * _dgdb } ; if _dbde { _ag . Log . Info ( "\u0074\u0066\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0063\u003d\u0025\u002e\u0032f\u0020t\u0077\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0068\u003d\u0025\u002e\u0032\u0066" , _eadf , _gadf . _bfcb , _gadf . _gab , _dgdb ) ;
_ag . Log . Info ( "\u0064x\u002c\u0064\u0079\u003d%\u002e\u0033\u0066\u0020\u00740\u003d%\u002e3\u0066\u0020\u0074\u003d\u0025\u002e\u0033f" , _aeab , _dfd , _fbed ) ; } ; _gdcc := _def ( _dfd ) ; _feec := _def ( _fbed ) ; _bfcd := _abfa . _fcdg . CTM . Mult ( _abfa . _gcb ) . Mult ( _gdcc ) ;
if _dbcf { _ag . Log . Info ( "e\u006e\u0064\u003a\u000a\tC\u0054M\u003d\u0025\u0073\u000a\u0009 \u0074\u006d\u003d\u0025\u0073\u000a" + "\u0009\u0020t\u0064\u003d\u0025s\u0020\u0078\u006c\u0061\u0074\u003d\u0025\u0073\u000a" + "\u0009t\u0064\u0030\u003d\u0025s\u000a\u0009\u0020\u0020\u2192 \u0025s\u0020x\u006c\u0061\u0074\u003d\u0025\u0073" , _abfa . _fcdg . CTM , _abfa . _gcb , _feec , _ecc ( _abfa . _fcdg . CTM . Mult ( _abfa . _gcb ) . Mult ( _feec ) ) , _gdcc , _bfcd , _ecc ( _bfcd ) ) ;
} ; _fcg , _dcab := _abfa . newTextMark ( _ab . ExpandLigatures ( _efge ) , _dccg , _ecc ( _bfcd ) , _b . Abs ( _cadc * _dccg . ScalingFactorX ( ) ) , _dgb , _abfa . _degf . _bfcb , _eee , _cfcf , _dea , _cfad , _bbda ) ; if ! _dcab { _ag . Log . Debug ( "\u0054\u0065\u0078\u0074\u0020\u006d\u0061\u0072\u006b\u0020\u006f\u0075\u0074\u0073\u0069d\u0065 \u0070\u0061\u0067\u0065\u002e\u0020\u0053\u006b\u0069\u0070\u0070\u0069\u006e\u0067" ) ;
continue ; } ; if _dgb == nil { _ag . Log . Debug ( "\u0045R\u0052O\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u002e" ) ; } else if _dgb . Encoder ( ) == nil { _ag . Log . Debug ( "E\u0052\u0052\u004f\u0052\u003a\u0020N\u006f\u0020\u0065\u006e\u0063\u006f\u0064\u0069\u006eg\u002e\u0020\u0066o\u006et\u003d\u0025\u0073" , _dgb ) ;
} else { if _ddf , _caea := _dgb . Encoder ( ) . CharcodeToRune ( _fbff ) ; _caea { _fcg . _abcec = string ( _ddf ) ; } ; } ; _ag . Log . Trace ( "i\u003d\u0025\u0064\u0020\u0063\u006fd\u0065\u003d\u0025\u0064\u0020\u006d\u0061\u0072\u006b=\u0025\u0073\u0020t\u0072m\u003d\u0025\u0073" , _bbda , _fbff , _fcg , _dccg ) ;
_abfa . _dbdg = append ( _abfa . _dbdg , & _fcg ) ; _abfa . _gcb . Concat ( _feec ) ; } ; return nil ; } ; func _ecc ( _efed _ee . Matrix ) _ee . Point { _fdff , _efda := _efed . Translation ( ) ; return _ee . Point { X : _fdff , Y : _efda } ; } ; type rulingList [ ] * ruling ; type cachedImage struct { _fbc * _bf . Image ;
_aac _bf . PdfColorspace ; } ; type shapesState struct { _egb _ee . Matrix ; _efgg _ee . Matrix ; _cfgd [ ] * subpath ; _ecd bool ; _decg _ee . Point ; _gdcg * textObject ; } ; func ( _abd * shapesState ) quadraticTo ( _ddfe , _cbca , _cbb , _bggg float64 ) { if _bccf { _ag . Log . Info ( "\u0071\u0075\u0061d\u0072\u0061\u0074\u0069\u0063\u0054\u006f\u003a" ) ;
} ; _abd . addPoint ( _cbb , _bggg ) ; } ; func ( _deab paraList ) writeText ( _efeag _ed . Writer ) { for _dbgc , _abdg := range _deab { if _abdg . _dgggff { continue ; } ; _abdg . writeText ( _efeag ) ; if _dbgc != len ( _deab ) - 1 { if _ceabf ( _abdg , _deab [ _dbgc + 1 ] ) { _efeag . Write ( [ ] byte ( "\u0020" ) ) ;
} else { _efeag . Write ( [ ] byte ( "\u000a" ) ) ; _efeag . Write ( [ ] byte ( "\u000a" ) ) ; } ; } ; } ; _efeag . Write ( [ ] byte ( "\u000a" ) ) ; _efeag . Write ( [ ] byte ( "\u000a" ) ) ; } ; func _efd ( _bgdb * _da . ContentStreamOperation ) ( float64 , error ) { if len ( _bgdb . Params ) != 1 { _agc := _g . New ( "\u0069n\u0063\u006f\u0072\u0072e\u0063\u0074\u0020\u0070\u0061r\u0061m\u0065t\u0065\u0072\u0020\u0063\u006f\u0075\u006et" ) ;
_ag . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0025\u0023\u0071\u0020\u0073\u0068\u006f\u0075\u006c\u0064\u0020h\u0061\u0076\u0065\u0020\u0025\u0064\u0020i\u006e\u0070\u0075\u0074\u0020\u0070\u0061\u0072\u0061\u006d\u0073,\u0020\u0067\u006f\u0074\u0020\u0025\u0064\u0020\u0025\u002b\u0076" , _bgdb . Operand , 1 , len ( _bgdb . Params ) , _bgdb . Params ) ;
return 0.0 , _agc ; } ; return _bg . GetNumberAsFloat ( _bgdb . Params [ 0 ] ) ; } ; func _gfbg ( _dbcge map [ float64 ] gridTile ) [ ] float64 { _begd := make ( [ ] float64 , 0 , len ( _dbcge ) ) ; for _eecf := range _dbcge { _begd = append ( _begd , _eecf ) ; } ; _f . Float64s ( _begd ) ; return _begd ;
} ; func _bcdg ( _bbe * wordBag , _gbgdb float64 , _ffag , _ebc rulingList ) [ ] * wordBag { var _dbbgc [ ] * wordBag ; for _ , _bacfg := range _bbe . depthIndexes ( ) { _dfab := false ; for ! _bbe . empty ( _bacfg ) { _eggdf := _bbe . firstReadingIndex ( _bacfg ) ; _gdbe := _bbe . firstWord ( _eggdf ) ;
_bdcfd := _gcfa ( _gdbe , _gbgdb , _ffag , _ebc ) ; _bbe . removeWord ( _gdbe , _eggdf ) ; if _gfcbc { _ag . Log . Info ( "\u0066\u0069\u0072\u0073\u0074\u0057\u006f\u0072\u0064\u0020\u005e\u005e^\u005e\u0020\u0025\u0073" , _gdbe . String ( ) ) ; } ; for _ecaf := true ; _ecaf ; _ecaf = _dfab { _dfab = false ;
_ccdd := _efef * _bdcfd . _gaed ; _aegf := _cfede * _bdcfd . _gaed ; _efce := _cccf * _bdcfd . _gaed ; if _gfcbc { _ag . Log . Info ( "\u0070a\u0072a\u0057\u006f\u0072\u0064\u0073\u0020\u0064\u0065\u0070\u0074\u0068 \u0025\u002e\u0032\u0066 \u002d\u0020\u0025\u002e\u0032f\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061\u0044\u0065\u0070\u0074\u0068\u0047\u0061\u0070\u003d\u0025\u002e\u0032\u0066\u0020\u006d\u0061\u0078\u0049\u006e\u0074\u0072\u0061R\u0065\u0061\u0064\u0069\u006e\u0067\u0047\u0061p\u003d\u0025\u002e\u0032\u0066" , _bdcfd . minDepth ( ) , _bdcfd . maxDepth ( ) , _efce , _aegf ) ;
} ; if _bbe . scanBand ( "\u0076\u0065\u0072\u0074\u0069\u0063\u0061\u006c" , _bdcfd , _aecc ( _cdag , 0 ) , _bdcfd . minDepth ( ) - _efce , _bdcfd . maxDepth ( ) + _efce , _aaff , false , false ) > 0 { _dfab = true ; } ; if _bbe . scanBand ( "\u0068\u006f\u0072\u0069\u007a\u006f\u006e\u0074\u0061\u006c" , _bdcfd , _aecc ( _cdag , _aegf ) , _bdcfd . minDepth ( ) , _bdcfd . maxDepth ( ) , _bdea , false , false ) > 0 { _dfab = true ;
} ; if _dfab { continue ; } ; _cbef := _bbe . scanBand ( "" , _bdcfd , _aecc ( _ggdb , _ccdd ) , _bdcfd . minDepth ( ) , _bdcfd . maxDepth ( ) , _bee , true , false ) ; if _cbef > 0 { _cfae := ( _bdcfd . maxDepth ( ) - _bdcfd . minDepth ( ) ) / _bdcfd . _gaed ; if ( _cbef > 1 && float64 ( _cbef ) > 0.3 * _cfae ) || _cbef <= 10 { if _bbe . scanBand ( "\u006f\u0074\u0068e\u0072" , _bdcfd , _aecc ( _ggdb , _ccdd ) , _bdcfd . minDepth ( ) , _bdcfd . maxDepth ( ) , _bee , false , true ) > 0 { _dfab = true ;
} ; } ; } ; } ; _dbbgc = append ( _dbbgc , _bdcfd ) ; } ; } ; return _dbbgc ; } ;
// String returns a string describing `pt`.
func ( _gafc PageText ) String ( ) string { _ggee := _gdf . Sprintf ( "P\u0061\u0067\u0065\u0054ex\u0074:\u0020\u0025\u0064\u0020\u0065l\u0065\u006d\u0065\u006e\u0074\u0073" , len ( _gafc . _cac ) ) ; _aeg := [ ] string { "\u002d" + _ggee } ; for _ , _ccgf := range _gafc . _cac { _aeg = append ( _aeg , _ccgf . String ( ) ) ;
} ; _aeg = append ( _aeg , "\u002b" + _ggee ) ; return _d . Join ( _aeg , "\u000a" ) ; } ; type textState struct { _bfcb float64 ; _gab float64 ; _gdef float64 ; _acfa float64 ; _adfb float64 ; _egea RenderMode ; _bacc float64 ; _dde * _bf . PdfFont ; _dadc _bf . PdfRectangle ; _fce int ;
_ddga int ; } ; func _affac ( _afcd [ ] TextMark , _fedf * int , _bcgaa TextMark ) [ ] TextMark { _bcgaa . Offset = * _fedf ; _afcd = append ( _afcd , _bcgaa ) ; * _fedf += len ( _bcgaa . Text ) ; return _afcd ; } ; func ( _efbb paraList ) reorder ( _gdgdf [ ] int ) { _afeg := make ( paraList , len ( _efbb ) ) ;
for _bbdc , _fgb := range _gdgdf { _afeg [ _bbdc ] = _efbb [ _fgb ] ; } ; copy ( _efbb , _afeg ) ; } ; func ( _daee * textTable ) computeBbox ( ) _bf . PdfRectangle { var _cffbe _bf . PdfRectangle ; _gcbd := false ; for _bbgc := 0 ; _bbgc < _daee . _cabfg ; _bbgc ++ { for _dadcf := 0 ; _dadcf < _daee . _baee ;
_dadcf ++ { _bafeb := _daee . get ( _dadcf , _bbgc ) ; if _bafeb == nil { continue ; } ; if ! _gcbd { _cffbe = _bafeb . PdfRectangle ; _gcbd = true ; } else { _cffbe = _effa ( _cffbe , _bafeb . PdfRectangle ) ; } ; } ; } ; return _cffbe ; } ; type imageExtractContext struct { _dba [ ] ImageMark ;
_gcc int ; _fe int ; _cc int ; _gda map [ * _bg . PdfObjectStream ] * cachedImage ; _gfa * ImageExtractOptions ; } ; type intSet map [ int ] struct { } ; func ( _fdbg paraList ) addNeighbours ( ) { _ggcaf := func ( _ggcab [ ] int , _decc * textPara ) ( [ ] * textPara , [ ] * textPara ) { _baaeg := make ( [ ] * textPara , 0 , len ( _ggcab ) - 1 ) ;
_cddfc := make ( [ ] * textPara , 0 , len ( _ggcab ) - 1 ) ; for _ , _ffcca := range _ggcab { _egeab := _fdbg [ _ffcca ] ; if _egeab . Urx <= _decc . Llx { _baaeg = append ( _baaeg , _egeab ) ; } else if _egeab . Llx >= _decc . Urx { _cddfc = append ( _cddfc , _egeab ) ; } ; } ; return _baaeg , _cddfc ;
} ; _fedg := func ( _fdda [ ] int , _cfde * textPara ) ( [ ] * textPara , [ ] * textPara ) { _dddf := make ( [ ] * textPara , 0 , len ( _fdda ) - 1 ) ; _cfeb := make ( [ ] * textPara , 0 , len ( _fdda ) - 1 ) ; for _ , _dadfdg := range _fdda { _fefcd := _fdbg [ _dadfdg ] ; if _fefcd . Ury <= _cfde . Lly { _cfeb = append ( _cfeb , _fefcd ) ;
} else if _fefcd . Lly >= _cfde . Ury { _dddf = append ( _dddf , _fefcd ) ; } ; } ; return _dddf , _cfeb ; } ; _gdbfa := _fdbg . yNeighbours ( _cfeg ) ; for _ , _cabc := range _fdbg { _aeegg := _gdbfa [ _cabc ] ; if len ( _aeegg ) == 0 { continue ; } ; _dgbf , _ddgc := _ggcaf ( _aeegg , _cabc ) ;
if len ( _dgbf ) == 0 && len ( _ddgc ) == 0 { continue ; } ; if len ( _dgbf ) > 0 { _cgcbcc := _dgbf [ 0 ] ; for _ , _fabb := range _dgbf [ 1 : ] { if _fabb . Urx >= _cgcbcc . Urx { _cgcbcc = _fabb ; } ; } ; for _ , _cedce := range _dgbf { if _cedce != _cgcbcc && _cedce . Urx > _cgcbcc . Llx { _cgcbcc = nil ;
break ; } ; } ; if _cgcbcc != nil && _efeg ( _cabc . PdfRectangle , _cgcbcc . PdfRectangle ) { _cabc . _fgeg = _cgcbcc ; } ; } ; if len ( _ddgc ) > 0 { _fffbb := _ddgc [ 0 ] ; for _ , _fbaf := range _ddgc [ 1 : ] { if _fbaf . Llx <= _fffbb . Llx { _fffbb = _fbaf ; } ; } ; for _ , _bdfdga := range _ddgc { if _bdfdga != _fffbb && _bdfdga . Llx < _fffbb . Urx { _fffbb = nil ;
break ; } ; } ; if _fffbb != nil && _efeg ( _cabc . PdfRectangle , _fffbb . PdfRectangle ) { _cabc . _bfdc = _fffbb ; } ; } ; } ; _gdbfa = _fdbg . xNeighbours ( _fbcc ) ; for _ , _dbdac := range _fdbg { _bcad := _gdbfa [ _dbdac ] ; if len ( _bcad ) == 0 { continue ; } ; _gaegg , _dadg := _fedg ( _bcad , _dbdac ) ;
if len ( _gaegg ) == 0 && len ( _dadg ) == 0 { continue ; } ; if len ( _dadg ) > 0 { _agae := _dadg [ 0 ] ; for _ , _egce := range _dadg [ 1 : ] { if _egce . Ury >= _agae . Ury { _agae = _egce ; } ; } ; for _ , _acaf := range _dadg { if _acaf != _agae && _acaf . Ury > _agae . Lly { _agae = nil ; break ;
} ; } ; if _agae != nil && _eagg ( _dbdac . PdfRectangle , _agae . PdfRectangle ) { _dbdac . _egec = _agae ; } ; } ; if len ( _gaegg ) > 0 { _aace := _gaegg [ 0 ] ; for _ , _ebbg := range _gaegg [ 1 : ] { if _ebbg . Lly <= _aace . Lly { _aace = _ebbg ; } ; } ; for _ , _fgefa := range _gaegg { if _fgefa != _aace && _fgefa . Lly < _aace . Ury { _aace = nil ;
break ; } ; } ; if _aace != nil && _eagg ( _dbdac . PdfRectangle , _aace . PdfRectangle ) { _dbdac . _adad = _aace ; } ; } ; } ; for _ , _bbggg := range _fdbg { if _bbggg . _fgeg != nil && _bbggg . _fgeg . _bfdc != _bbggg { _bbggg . _fgeg = nil ; } ; if _bbggg . _adad != nil && _bbggg . _adad . _egec != _bbggg { _bbggg . _adad = nil ;
} ; if _bbggg . _bfdc != nil && _bbggg . _bfdc . _fgeg != _bbggg { _bbggg . _bfdc = nil ; } ; if _bbggg . _egec != nil && _bbggg . _egec . _adad != _bbggg { _bbggg . _egec = nil ; } ; } ; } ;
// PageImages represents extracted images on a PDF page with spatial information:
// display position and size.
type PageImages struct { Images [ ] ImageMark ; } ; func _fccec ( _fgdcd float64 , _edc int ) int { if _edc == 0 { _edc = 1 ; } ; _ccag := float64 ( _edc ) ; return int ( _b . Round ( _fgdcd / _ccag ) * _ccag ) ; } ; func ( _ecdd * wordBag ) sort ( ) { for _ , _adc := range _ecdd . _bbf { _f . Slice ( _adc , func ( _fgdc , _bbcf int ) bool { return _addb ( _adc [ _fgdc ] , _adc [ _bbcf ] ) < 0 } ) ;
} ; } ; func ( _ffd * textObject ) getCurrentFont ( ) * _bf . PdfFont { _ggf := _ffd . _degf . _dde ; if _ggf == nil { _ag . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u004e\u006f\u0020\u0066\u006f\u006e\u0074\u0020\u0064\u0065\u0066\u0069\u006e\u0065\u0064\u002e\u0020U\u0073\u0069\u006e\u0067\u0020d\u0065\u0066a\u0075\u006c\u0074\u002e" ) ;
return _bf . DefaultFont ( ) ; } ; return _ggf ; } ; func ( _cgcb rulingList ) log ( _fadcg string ) { if ! _efa { return ; } ; _ag . Log . Info ( "\u0023\u0023\u0023\u0020\u0025\u0031\u0030\u0073\u003a\u0020\u0076\u0065c\u0073\u003d\u0025\u0073" , _fadcg , _cgcb . String ( ) ) ; for _cdbgc , _gfgc := range _cgcb { _gdf . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _cdbgc , _gfgc . String ( ) ) ;
} ; } ;
2022-09-10 15:35:04 +00:00
2022-09-23 18:05:51 +00:00
// PageFonts represents extracted fonts on a PDF page.
2022-10-27 19:04:58 +00:00
type PageFonts struct { Fonts [ ] Font ; } ; func ( _feece * textWord ) bbox ( ) _bf . PdfRectangle { return _feece . PdfRectangle } ; func ( _bdfc lineRuling ) xMean ( ) float64 { return 0.5 * ( _bdfc . _abec . X + _bdfc . _eacb . X ) } ; func _fbcac ( _fdfc [ ] * textWord , _eceda * textWord ) [ ] * textWord { for _ggbgg , _gcfe := range _fdfc { if _gcfe == _eceda { return _bbcg ( _fdfc , _ggbgg ) ;
} ; } ; _ag . Log . Error ( "\u0072\u0065\u006d\u006f\u0076e\u0057\u006f\u0072\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u0073\u0020\u0064o\u0065\u0073\u006e\u0027\u0074\u0020\u0063\u006f\u006e\u0074\u0061\u0069\u006e\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073" , _eceda ) ;
return nil ; } ; func ( _ddcgf intSet ) add ( _cgfc int ) { _ddcgf [ _cgfc ] = struct { } { } } ; func ( _cgbg * stateStack ) size ( ) int { return len ( * _cgbg ) } ;
2022-09-10 15:35:04 +00:00
2022-10-27 19:04:58 +00:00
// PageText represents the layout of text on a device page.
type PageText struct { _cac [ ] * textMark ; _gcea string ; _cagb [ ] TextMark ; _ece [ ] TextTable ; _fabf _bf . PdfRectangle ; _fcdb [ ] pathSection ; _cddd [ ] pathSection ; _bfg * _da . ContentStreamOperations ; } ; func ( _gaef pathSection ) bbox ( ) _bf . PdfRectangle { _dgbe := _gaef . _gdbf [ 0 ] . _fcdc [ 0 ] ;
_bbc := _bf . PdfRectangle { Llx : _dgbe . X , Urx : _dgbe . X , Lly : _dgbe . Y , Ury : _dgbe . Y } ; _aca := func ( _fgac _ee . Point ) { if _fgac . X < _bbc . Llx { _bbc . Llx = _fgac . X ; } else if _fgac . X > _bbc . Urx { _bbc . Urx = _fgac . X ; } ; if _fgac . Y < _bbc . Lly { _bbc . Lly = _fgac . Y ;
} else if _fgac . Y > _bbc . Ury { _bbc . Ury = _fgac . Y ; } ; } ; for _ , _bdfd := range _gaef . _gdbf [ 0 ] . _fcdc [ 1 : ] { _aca ( _bdfd ) ; } ; for _ , _cbgd := range _gaef . _gdbf [ 1 : ] { for _ , _accge := range _cbgd . _fcdc { _aca ( _accge ) ; } ; } ; return _bbc ; } ; func ( _gdcce compositeCell ) String ( ) string { _gaab := "" ;
if len ( _gdcce . paraList ) > 0 { _gaab = _babd ( _gdcce . paraList . merge ( ) . text ( ) , 50 ) ; } ; return _gdf . Sprintf ( "\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0070\u0061\u0072a\u0073\u0020\u0025\u0071" , _gdcce . PdfRectangle , len ( _gdcce . paraList ) , _gaab ) ;
} ; func ( _cgfab * wordBag ) minDepth ( ) float64 { return _cgfab . _fcgc - ( _cgfab . Ury - _cgfab . _gaed ) } ; func ( _aeeg * textTable ) emptyCompositeRow ( _baccb int ) bool { for _cfea := 0 ; _cfea < _aeeg . _baee ; _cfea ++ { if _eddbb , _bfbbb := _aeeg . _bbfb [ _fcbc ( _cfea , _baccb ) ] ;
_bfbbb { if len ( _eddbb . paraList ) > 0 { return false ; } ; } ; } ; return true ; } ; func ( _fggg rulingList ) connections ( _gcaf map [ int ] intSet , _cdadf int ) intSet { _fbd := make ( intSet ) ; _bfbd := make ( intSet ) ; var _ecbe func ( int ) ; _ecbe = func ( _deag int ) { if ! _bfbd . has ( _deag ) { _bfbd . add ( _deag ) ;
for _ggeee := range _fggg { if _gcaf [ _ggeee ] . has ( _deag ) { _fbd . add ( _ggeee ) ; } ; } ; for _bdef := range _fggg { if _fbd . has ( _bdef ) { _ecbe ( _bdef ) ; } ; } ; } ; } ; _ecbe ( _cdadf ) ; return _fbd ; } ; func ( _cced * textPara ) writeCellText ( _bdba _ed . Writer ) { for _debg , _gbge := range _cced . _ecbdg { _fgfe := _gbge . text ( ) ;
_bcfeg := _badf && _gbge . endsInHyphen ( ) && _debg != len ( _cced . _ecbdg ) - 1 ; if _bcfeg { _fgfe = _fafd ( _fgfe ) ; } ; _bdba . Write ( [ ] byte ( _fgfe ) ) ; if ! ( _bcfeg || _debg == len ( _cced . _ecbdg ) - 1 ) { _bdba . Write ( [ ] byte ( _afed ( _gbge . _dcfd , _cced . _ecbdg [ _debg + 1 ] . _dcfd ) ) ) ;
} ; } ; } ; func ( _addg * textTable ) reduceTiling ( _bfge gridTiling , _eebc float64 ) * textTable { _eage := make ( [ ] int , 0 , _addg . _cabfg ) ; _ebadc := make ( [ ] int , 0 , _addg . _baee ) ; _efec := _bfge . _fdeac ; _dcecd := _bfge . _gccf ; for _dedcg := 0 ; _dedcg < _addg . _cabfg ;
_dedcg ++ { _gccdb := _dedcg > 0 && _b . Abs ( _dcecd [ _dedcg - 1 ] - _dcecd [ _dedcg ] ) < _eebc && _addg . emptyCompositeRow ( _dedcg ) ; if ! _gccdb { _eage = append ( _eage , _dedcg ) ; } ; } ; for _cfcb := 0 ; _cfcb < _addg . _baee ; _cfcb ++ { _fgbbb := _cfcb < _addg . _baee - 1 && _b . Abs ( _efec [ _cfcb + 1 ] - _efec [ _cfcb ] ) < _eebc && _addg . emptyCompositeColumn ( _cfcb ) ;
if ! _fgbbb { _ebadc = append ( _ebadc , _cfcb ) ; } ; } ; if len ( _eage ) == _addg . _cabfg && len ( _ebadc ) == _addg . _baee { return _addg ; } ; _ddfea := textTable { _acgbd : _addg . _acgbd , _baee : len ( _ebadc ) , _cabfg : len ( _eage ) , _bbfb : make ( map [ uint64 ] compositeCell , len ( _ebadc ) * len ( _eage ) ) } ;
if _beae { _ag . Log . Info ( "\u0072\u0065\u0064\u0075c\u0065\u0054\u0069\u006c\u0069\u006e\u0067\u003a\u0020\u0025d\u0078%\u0064\u0020\u002d\u003e\u0020\u0025\u0064x\u0025\u0064" , _addg . _baee , _addg . _cabfg , len ( _ebadc ) , len ( _eage ) ) ; _ag . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0043\u006f\u006c\u0073\u003a\u0020\u0025\u002b\u0076" , _ebadc ) ;
_ag . Log . Info ( "\u0072\u0065d\u0075\u0063\u0065d\u0052\u006f\u0077\u0073\u003a\u0020\u0025\u002b\u0076" , _eage ) ; } ; for _dccb , _ecgge := range _eage { for _bbcb , _fdeaae := range _ebadc { _ccacb , _ffabe := _addg . getComposite ( _fdeaae , _ecgge ) ; if len ( _ccacb ) == 0 { continue ;
} ; if _beae { _gdf . Printf ( "\u0020 \u0025\u0032\u0064\u002c \u0025\u0032\u0064\u0020\u0028%\u0032d\u002c \u0025\u0032\u0064\u0029\u0020\u0025\u0071\n" , _bbcb , _dccb , _fdeaae , _ecgge , _babd ( _ccacb . merge ( ) . text ( ) , 50 ) ) ; } ; _ddfea . putComposite ( _bbcb , _dccb , _ccacb , _ffabe ) ;
} ; } ; return & _ddfea ; } ; func _bbcg ( _ggefba [ ] * textWord , _cfdd int ) [ ] * textWord { _dfcdg := len ( _ggefba ) ; copy ( _ggefba [ _cfdd : ] , _ggefba [ _cfdd + 1 : ] ) ; return _ggefba [ : _dfcdg - 1 ] ; } ; func ( _ffca * textTable ) bbox ( ) _bf . PdfRectangle { return _ffca . PdfRectangle } ;
func ( _dfccd * textObject ) setHorizScaling ( _ffcg float64 ) { if _dfccd == nil { return ; } ; _dfccd . _degf . _gdef = _ffcg ; } ; func _bceab ( _daca [ ] int ) [ ] int { _egafe := make ( [ ] int , len ( _daca ) ) ; for _eddf , _fbfe := range _daca { _egafe [ len ( _daca ) - 1 - _eddf ] = _fbfe ;
} ; return _egafe ; } ; func ( _efdcf paraList ) lines ( ) [ ] * textLine { var _fbab [ ] * textLine ; for _ , _aedee := range _efdcf { _fbab = append ( _fbab , _aedee . _ecbdg ... ) ; } ; return _fbab ; } ; func _ggfdc ( _gdea [ ] _bg . PdfObject ) ( _fgdce , _abfg float64 , _dadcfd error ) { if len ( _gdea ) != 2 { return 0 , 0 , _gdf . Errorf ( "\u0069\u006e\u0076\u0061l\u0069\u0064\u0020\u006e\u0075\u006d\u0062\u0065\u0072\u0020o\u0066 \u0070\u0061\u0072\u0061\u006d\u0073\u003a \u0025\u0064" , len ( _gdea ) ) ;
} ; _aefc , _dadcfd := _bg . GetNumbersAsFloat ( _gdea ) ; if _dadcfd != nil { return 0 , 0 , _dadcfd ; } ; return _aefc [ 0 ] , _aefc [ 1 ] , nil ; } ; func ( _efdcb * textObject ) getFontDirect ( _aafd string ) ( * _bf . PdfFont , error ) { _eecg , _dbg := _efdcb . getFontDict ( _aafd ) ; if _dbg != nil { return nil , _dbg ;
} ; _ggaa , _dbg := _bf . NewPdfFontFromPdfObject ( _eecg ) ; if _dbg != nil { _ag . Log . Debug ( "\u0067\u0065\u0074\u0046\u006f\u006e\u0074\u0044\u0069\u0072\u0065\u0063\u0074\u003a\u0020\u004e\u0065\u0077Pd\u0066F\u006f\u006e\u0074\u0046\u0072\u006f\u006d\u0050\u0064\u0066\u004f\u0062j\u0065\u0063\u0074\u0020\u0066\u0061\u0069\u006c\u0065\u0064\u002e\u0020\u006e\u0061\u006d\u0065\u003d%\u0023\u0071\u0020\u0065\u0072\u0072\u003d\u0025\u0076" , _aafd , _dbg ) ;
} ; return _ggaa , _dbg ; } ; func ( _egbb * subpath ) add ( _geeb ... _ee . Point ) { _egbb . _fcdc = append ( _egbb . _fcdc , _geeb ... ) } ;
2022-09-10 15:35:04 +00:00
2022-09-23 18:05:51 +00:00
// String returns a description of `state`.
2022-10-27 19:04:58 +00:00
func ( _gbg * textState ) String ( ) string { _eef := "\u005bN\u004f\u0054\u0020\u0053\u0045\u0054]" ; if _gbg . _dde != nil { _eef = _gbg . _dde . BaseFont ( ) ; } ; return _gdf . Sprintf ( "\u0074\u0063\u003d\u0025\u002e\u0032\u0066\u0020\u0074\u0077\u003d\u0025\u002e\u0032\u0066 \u0074f\u0073\u003d\u0025\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u003d\u0025\u0071" , _gbg . _bfcb , _gbg . _gab , _gbg . _adfb , _eef ) ;
} ; func ( _ccaad gridTile ) complete ( ) bool { return _ccaad . numBorders ( ) == 4 } ; func ( _dfdg rulingList ) removeDuplicates ( ) rulingList { if len ( _dfdg ) == 0 { return nil ; } ; _dfdg . sort ( ) ; _edbc := rulingList { _dfdg [ 0 ] } ; for _ , _fagf := range _dfdg [ 1 : ] { if _fagf . equals ( _edbc [ len ( _edbc ) - 1 ] ) { continue ;
} ; _edbc = append ( _edbc , _fagf ) ; } ; return _edbc ; } ; func ( _baac paraList ) llyOrdering ( ) [ ] int { _ccddd := make ( [ ] int , len ( _baac ) ) ; for _bcfd := range _baac { _ccddd [ _bcfd ] = _bcfd ; } ; _f . SliceStable ( _ccddd , func ( _abgb , _fdbe int ) bool { _bba , _eddb := _ccddd [ _abgb ] , _ccddd [ _fdbe ] ;
return _baac [ _bba ] . Lly < _baac [ _eddb ] . Lly ; } ) ; return _ccddd ; } ; func ( _babb * wordBag ) allWords ( ) [ ] * textWord { var _gdeec [ ] * textWord ; for _ , _aeba := range _babb . _bbf { _gdeec = append ( _gdeec , _aeba ... ) ; } ; return _gdeec ; } ; func _bddc ( _bcdb _ee . Point ) * subpath { return & subpath { _fcdc : [ ] _ee . Point { _bcdb } } } ;
func ( _ebgd * wordBag ) absorb ( _dcae * wordBag ) { _dcgd := _dcae . makeRemovals ( ) ; for _ffgb , _fgdf := range _dcae . _bbf { for _ , _gcd := range _fgdf { _ebgd . pullWord ( _gcd , _ffgb , _dcgd ) ; } ; } ; _dcae . applyRemovals ( _dcgd ) ; } ; func _bega ( _ebcf , _bdfef _ee . Point ) rulingKind { _affd := _b . Abs ( _ebcf . X - _bdfef . X ) ;
_fggbg := _b . Abs ( _ebcf . Y - _bdfef . Y ) ; return _bfcbe ( _affd , _fggbg , _ffdeb ) ; } ; func ( _acfdb rectRuling ) checkWidth ( _cegg , _ceggc float64 ) ( float64 , bool ) { _ggdba := _ceggc - _cegg ; _dbcga := _ggdba <= _dcfe ; return _ggdba , _dbcga ; } ; func ( _bfcg * imageExtractContext ) extractInlineImage ( _eaf * _da . ContentStreamInlineImage , _cb _da . GraphicsState , _acb * _bf . PdfPageResources ) error { _eeg , _dad := _eaf . ToImage ( _acb ) ;
if _dad != nil { return _dad ; } ; _baf , _dad := _eaf . GetColorSpace ( _acb ) ; if _dad != nil { return _dad ; } ; if _baf == nil { _baf = _bf . NewPdfColorspaceDeviceGray ( ) ; } ; _ffc , _dad := _baf . ImageToRGB ( * _eeg ) ; if _dad != nil { return _dad ; } ; _ae := ImageMark { Image : & _ffc , Width : _cb . CTM . ScalingFactorX ( ) , Height : _cb . CTM . ScalingFactorY ( ) , Angle : _cb . CTM . Angle ( ) } ;
_ae . X , _ae . Y = _cb . CTM . Translation ( ) ; _bfcg . _dba = append ( _bfcg . _dba , _ae ) ; _bfcg . _gcc ++ ; return nil ; } ; type textObject struct { _ccae * Extractor ; _caa * _bf . PdfPageResources ; _fcdg _da . GraphicsState ; _degf * textState ; _afg * stateStack ; _gcb _ee . Matrix ;
_baff _ee . Matrix ; _dbdg [ ] * textMark ; _cdbc bool ; } ; func ( _ddfg * ruling ) alignsPrimary ( _feede * ruling ) bool { return _ddfg . _cgef == _feede . _cgef && _b . Abs ( _ddfg . _eead - _feede . _eead ) < _dcfe * 0.5 ; } ; func ( _gcgbf * textTable ) get ( _cabad , _facd int ) * textPara { return _gcgbf . _aagc [ _fcbc ( _cabad , _facd ) ] } ;
// ExtractPageImages returns the image contents of the page extractor, including data
// and position, size information for each image.
// A set of options to control page image extraction can be passed in. The options
// parameter can be nil for the default options. By default, inline stencil masks
// are not extracted.
func ( _adde * Extractor ) ExtractPageImages ( options * ImageExtractOptions ) ( * PageImages , error ) { _aaa := & imageExtractContext { _gfa : options } ; _ced := _aaa . extractContentStreamImages ( _adde . _fc , _adde . _ad ) ; if _ced != nil { return nil , _ced ; } ; return & PageImages { Images : _aaa . _dba } , nil ;
} ; func _eagab ( _cebgb [ ] compositeCell ) [ ] float64 { var _acgad [ ] * textLine ; _ebgc := 0 ; for _ , _acfac := range _cebgb { _ebgc += len ( _acfac . paraList ) ; _acgad = append ( _acgad , _acfac . lines ( ) ... ) ; } ; _f . Slice ( _acgad , func ( _gcde , _edgc int ) bool { _dfca , _abee := _acgad [ _gcde ] , _acgad [ _edgc ] ;
_agbd , _faca := _dfca . _dcfd , _abee . _dcfd ; if ! _fbga ( _agbd - _faca ) { return _agbd < _faca ; } ; return _dfca . Llx < _abee . Llx ; } ) ; if _beae { _gdf . Printf ( "\u0020\u0020\u0020 r\u006f\u0077\u0042\u006f\u0072\u0064\u0065\u0072\u0073:\u0020%\u0064 \u0070a\u0072\u0061\u0073\u0020\u0025\u0064\u0020\u006c\u0069\u006e\u0065\u0073\u000a" , _ebgc , len ( _acgad ) ) ;
for _eeaa , _dadca := range _acgad { _gdf . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _eeaa , _dadca ) ; } ; } ; var _bbad [ ] float64 ; _cadad := _acgad [ 0 ] ; var _gfdf [ ] [ ] * textLine ; _bdde := [ ] * textLine { _cadad } ; for _bgdbd , _eabf := range _acgad [ 1 : ] { if _eabf . Ury < _cadad . Lly { _agecd := 0.5 * ( _eabf . Ury + _cadad . Lly ) ;
if _beae { _gdf . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u003c\u0020\u0025\u0036.\u0032f\u0020\u0062\u006f\u0072\u0064\u0065\u0072\u003d\u0025\u0036\u002e\u0032\u0066\u000a" + "\u0009\u0020\u0071\u003d\u0025\u0073\u000a\u0009\u0020p\u003d\u0025\u0073\u000a" , _bgdbd , _eabf . Ury , _cadad . Lly , _agecd , _cadad , _eabf ) ;
} ; _bbad = append ( _bbad , _agecd ) ; _gfdf = append ( _gfdf , _bdde ) ; _bdde = nil ; } ; _bdde = append ( _bdde , _eabf ) ; if _eabf . Lly < _cadad . Lly { _cadad = _eabf ; } ; } ; if len ( _bdde ) > 0 { _gfdf = append ( _gfdf , _bdde ) ; } ; if _beae { _gdf . Printf ( " \u0020\u0020\u0020\u0020\u0020\u0020 \u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073\u003d%\u0036.\u0032\u0066\u000a" , _bbad ) ;
} ; if _beae { _ag . Log . Info ( "\u0072\u006f\u0077\u003d\u0025\u0064" , len ( _cebgb ) ) ; for _eddge , _cdbda := range _cebgb { _gdf . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _eddge , _cdbda ) ; } ; _ag . Log . Info ( "\u0067r\u006f\u0075\u0070\u0073\u003d\u0025d" , len ( _gfdf ) ) ;
for _ddgd , _gdfad := range _gfdf { _gdf . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0064\u000a" , _ddgd , len ( _gdfad ) ) ; for _faa , _gaebf := range _gdfad { _gdf . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _faa , _gaebf ) ; } ; } ; } ; _ecdc := true ;
for _eed , _ddca := range _gfdf { _cbfae := true ; for _gcfg , _adfcb := range _cebgb { if _beae { _gdf . Printf ( "\u0020\u0020\u0020\u007e\u007e\u007e\u0067\u0072\u006f\u0075\u0070\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064\u0020\u0063\u0065\u006cl\u0020\u0025\u0064\u0020\u006ff\u0020\u0025d\u0020\u0025\u0073\u000a" , _eed , len ( _gfdf ) , _gcfg , len ( _cebgb ) , _adfcb ) ;
} ; if ! _adfcb . hasLines ( _ddca ) { if _beae { _gdf . Printf ( "\u0020\u0020\u0020\u0021\u0021\u0021\u0067\u0072\u006f\u0075\u0070\u0020\u0025d\u0020\u006f\u0066\u0020\u0025\u0064 \u0063\u0065\u006c\u006c\u0020\u0025\u0064\u0020\u006f\u0066\u0020\u0025\u0064 \u004f\u0055\u0054\u000a" , _eed , len ( _gfdf ) , _gcfg , len ( _cebgb ) ) ;
} ; _cbfae = false ; break ; } ; } ; if ! _cbfae { _ecdc = false ; break ; } ; } ; if ! _ecdc { if _beae { _ag . Log . Info ( "\u0072\u006f\u0077\u0020\u0063o\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u0020\u0064\u006f\u006e\u0027\u0074 \u0073\u0070\u0061\u006e\u0020\u0061\u006c\u006c\u0020\u0063\u0065\u006c\u006c\u0073\u0020\u0069\u006e\u0020\u0072\u006f\u0077\u002e\u0020\u0069\u0067\u006e\u006f\u0072\u0069\u006eg" ) ;
} ; _bbad = nil ; } ; if _beae && _bbad != nil { _gdf . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u002a\u002a*\u0072\u006f\u0077\u0043\u006f\u0072\u0072i\u0064\u006f\u0072\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u000a" , _bbad ) ; } ; return _bbad ; } ;
func ( _cca * textObject ) setTextRise ( _cgg float64 ) { if _cca == nil { return ; } ; _cca . _degf . _bacc = _cgg ; } ;
// NewWithOptions an Extractor instance for extracting content from the input PDF page with options.
func NewWithOptions ( page * _bf . PdfPage , options * Options ) ( * Extractor , error ) { const _dd = "\u0065x\u0074\u0072\u0061\u0063\u0074\u006f\u0072\u002e\u004e\u0065\u0077W\u0069\u0074\u0068\u004f\u0070\u0074\u0069\u006f\u006e\u0073" ; _fbe , _bgg := page . GetAllContentStreams ( ) ;
if _bgg != nil { return nil , _bgg ; } ; _ce , _bgg := page . GetMediaBox ( ) ; if _bgg != nil { return nil , _gdf . Errorf ( "\u0065\u0078\u0074r\u0061\u0063\u0074\u006fr\u0020\u0072\u0065\u0071\u0075\u0069\u0072e\u0073\u0020\u006d\u0065\u0064\u0069\u0061\u0042\u006f\u0078\u002e\u0020\u0025\u0076" , _bgg ) ;
} ; _af := & Extractor { _fc : _fbe , _ad : page . Resources , _bb : * _ce , _ga : page . CropBox , _ac : map [ string ] fontEntry { } , _fd : map [ string ] textResult { } , _bc : options } ; if _af . _bb . Llx > _af . _bb . Urx { _ag . Log . Info ( "\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0058\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e" , _af . _bb ) ;
_af . _bb . Llx , _af . _bb . Urx = _af . _bb . Urx , _af . _bb . Llx ; } ; if _af . _bb . Lly > _af . _bb . Ury { _ag . Log . Info ( "\u004d\u0065\u0064\u0069\u0061\u0042o\u0078\u0020\u0068\u0061\u0073\u0020\u0059\u0020\u0063\u006f\u006f\u0072\u0064\u0069\u006e\u0061\u0074\u0065\u0073\u0020r\u0065\u0076\u0065\u0072\u0073\u0065\u0064\u002e\u0020\u0025\u002e\u0032\u0066\u0020F\u0069x\u0069\u006e\u0067\u002e" , _af . _bb ) ;
_af . _bb . Lly , _af . _bb . Ury = _af . _bb . Ury , _af . _bb . Lly ; } ; _cd . TrackUse ( _dd ) ; return _af , nil ; } ; func ( _dbggd * textLine ) endsInHyphen ( ) bool { _eddec := _dbggd . _egad [ len ( _dbggd . _egad ) - 1 ] ; _geb := _eddec . _bcaa ; _bfec , _ccda := _a . DecodeLastRuneInString ( _geb ) ;
if _ccda <= 0 || ! _gg . Is ( _gg . Hyphen , _bfec ) { return false ; } ; if _eddec . _egcbd && _fef ( _geb ) { return true ; } ; return _fef ( _dbggd . text ( ) ) ; } ; func ( _gadc intSet ) del ( _ggag int ) { delete ( _gadc , _ggag ) } ; const ( _dcbf markKind = iota ; _caade ; _gaea ; _eddbd ;
) ;
// String returns a human readable description of `vecs`.
func ( _gedfd rulingList ) String ( ) string { if len ( _gedfd ) == 0 { return "\u007b \u0045\u004d\u0050\u0054\u0059\u0020}" ; } ; _fead , _edce := _gedfd . vertsHorzs ( ) ; _cggfe := len ( _fead ) ; _gfdd := len ( _edce ) ; if _cggfe == 0 || _gfdd == 0 { return _gdf . Sprintf ( "\u007b%\u0064\u0020\u0078\u0020\u0025\u0064}" , _cggfe , _gfdd ) ;
} ; _dcgde := _bf . PdfRectangle { Llx : _fead [ 0 ] . _eead , Urx : _fead [ _cggfe - 1 ] . _eead , Lly : _edce [ _gfdd - 1 ] . _eead , Ury : _edce [ 0 ] . _eead } ; return _gdf . Sprintf ( "\u007b\u0025d\u0020\u0078\u0020%\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u007d" , _cggfe , _gfdd , _dcgde ) ;
} ; func ( _gede rulingList ) snapToGroupsDirection ( ) rulingList { _gede . sortStrict ( ) ; _ggbg := make ( map [ * ruling ] rulingList , len ( _gede ) ) ; _bfbb := _gede [ 0 ] ; _eacc := func ( _cbbe * ruling ) { _bfbb = _cbbe ; _ggbg [ _bfbb ] = rulingList { _cbbe } } ; _eacc ( _gede [ 0 ] ) ;
for _ , _dbbbd := range _gede [ 1 : ] { if _dbbbd . _eead < _bfbb . _eead - _ffba { _ag . Log . Error ( "\u0073\u006e\u0061\u0070T\u006f\u0047\u0072\u006f\u0075\u0070\u0073\u0044\u0069r\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0057\u0072\u006f\u006e\u0067\u0020\u0070\u0072\u0069\u006da\u0072\u0079\u0020\u006f\u0072d\u0065\u0072\u002e\u000a\u0009\u0076\u0030\u003d\u0025\u0073\u000a\u0009\u0020\u0076\u003d\u0025\u0073" , _bfbb , _dbbbd ) ;
} ; if _dbbbd . _eead > _bfbb . _eead + _dcfe { _eacc ( _dbbbd ) ; } else { _ggbg [ _bfbb ] = append ( _ggbg [ _bfbb ] , _dbbbd ) ; } ; } ; _bfed := make ( map [ * ruling ] float64 , len ( _ggbg ) ) ; _agfc := make ( map [ * ruling ] * ruling , len ( _gede ) ) ; for _fgadf , _eceef := range _ggbg { _bfed [ _fgadf ] = _eceef . mergePrimary ( ) ;
for _ , _dbeg := range _eceef { _agfc [ _dbeg ] = _fgadf ; } ; } ; for _ , _ebbc := range _gede { _ebbc . _eead = _bfed [ _agfc [ _ebbc ] ] ; } ; _eafe := make ( rulingList , 0 , len ( _gede ) ) ; for _ , _aefd := range _ggbg { _bdag := _aefd . splitSec ( ) ; for _cfff , _ffgd := range _bdag { _eagc := _ffgd . merge ( ) ;
if len ( _eafe ) > 0 { _ebff := _eafe [ len ( _eafe ) - 1 ] ; if _ebff . alignsPrimary ( _eagc ) && _ebff . alignsSec ( _eagc ) { _ag . Log . Error ( "\u0073\u006e\u0061\u0070\u0054\u006fG\u0072\u006f\u0075\u0070\u0073\u0044\u0069\u0072\u0065\u0063\u0074\u0069\u006f\u006e\u003a\u0020\u0044\u0075\u0070\u006ci\u0063\u0061\u0074\u0065\u0020\u0069\u003d\u0025\u0064\u000a\u0009\u0077\u003d\u0025s\u000a\t\u0076\u003d\u0025\u0073" , _cfff , _ebff , _eagc ) ;
continue ; } ; } ; _eafe = append ( _eafe , _eagc ) ; } ; } ; _eafe . sortStrict ( ) ; return _eafe ; } ; func ( _gfdg * wordBag ) depthRange ( _dbgd , _bcbd int ) [ ] int { var _ddgg [ ] int ; for _agd := range _gfdg . _bbf { if _dbgd <= _agd && _agd <= _bcbd { _ddgg = append ( _ddgg , _agd ) ;
} ; } ; if len ( _ddgg ) == 0 { return nil ; } ; _f . Ints ( _ddgg ) ; return _ddgg ; } ; func ( _caad * shapesState ) establishSubpath ( ) * subpath { _ecfc , _ecdb := _caad . lastpointEstablished ( ) ; if ! _ecdb { _caad . _cfgd = append ( _caad . _cfgd , _bddc ( _ecfc ) ) ; } ; if len ( _caad . _cfgd ) == 0 { return nil ;
} ; _caad . _ecd = false ; return _caad . _cfgd [ len ( _caad . _cfgd ) - 1 ] ; } ; func ( _ffga * textPara ) depth ( ) float64 { if _ffga . _dgggff { return - 1.0 ; } ; if len ( _ffga . _ecbdg ) > 0 { return _ffga . _ecbdg [ 0 ] . _dcfd ; } ; return _ffga . _bccg . depth ( ) ; } ; func _gfdbb ( _ggdcb _bf . PdfColorspace , _badeg _bf . PdfColor ) _eda . Color { if _ggdcb == nil || _badeg == nil { return _eda . Black ;
} ; _fdca , _efeeg := _ggdcb . ColorToRGB ( _badeg ) ; if _efeeg != nil { _ag . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006fu\u006c\u0064\u0020no\u0074\u0020\u0063\u006f\u006e\u0076e\u0072\u0074\u0020\u0063\u006f\u006c\u006f\u0072\u0020\u0025\u0076\u0020\u0028\u0025\u0076)\u0020\u0074\u006f\u0020\u0052\u0047\u0042\u003a \u0025\u0073" , _badeg , _ggdcb , _efeeg ) ;
return _eda . Black ; } ; _dcaf , _fbbdd := _fdca . ( * _bf . PdfColorDeviceRGB ) ; if ! _fbbdd { _ag . Log . Debug ( "\u0057\u0041\u0052\u004e\u003a\u0020\u0063\u006f\u006e\u0076\u0065\u0072\u0074\u0065\u0064 \u0063\u006f\u006c\u006f\u0072\u0020\u0069\u0073\u0020\u006e\u006f\u0074\u0020i\u006e\u0020\u0074\u0068\u0065\u0020\u0052\u0047\u0042\u0020\u0063\u006flo\u0072\u0073\u0070\u0061\u0063\u0065\u003a\u0020\u0025\u0076" , _fdca ) ;
return _eda . Black ; } ; return _eda . NRGBA { R : uint8 ( _dcaf . R ( ) * 255 ) , G : uint8 ( _dcaf . G ( ) * 255 ) , B : uint8 ( _dcaf . B ( ) * 255 ) , A : uint8 ( 255 ) } ; } ; func ( _ebgg * textObject ) getFontDict ( _ecf string ) ( _cfd _bg . PdfObject , _gec error ) { _ceac := _ebgg . _caa ; if _ceac == nil { _ag . Log . Debug ( "g\u0065\u0074\u0046\u006f\u006e\u0074D\u0069\u0063\u0074\u002e\u0020\u004eo\u0020\u0072\u0065\u0073\u006f\u0075\u0072c\u0065\u0073\u002e\u0020\u006e\u0061\u006d\u0065\u003d\u0025#\u0071" , _ecf ) ;
return nil , nil ; } ; _cfd , _cfdb := _ceac . GetFontByName ( _bg . PdfObjectName ( _ecf ) ) ; if ! _cfdb { _ag . Log . Debug ( "\u0045R\u0052\u004fR\u003a\u0020\u0067\u0065t\u0046\u006f\u006et\u0044\u0069\u0063\u0074\u003a\u0020\u0046\u006f\u006et \u006e\u006f\u0074 \u0066\u006fu\u006e\u0064\u003a\u0020\u006e\u0061m\u0065\u003d%\u0023\u0071" , _ecf ) ;
return nil , _g . New ( "f\u006f\u006e\u0074\u0020no\u0074 \u0069\u006e\u0020\u0072\u0065s\u006f\u0075\u0072\u0063\u0065\u0073" ) ; } ; return _cfd , nil ; } ;
// Append appends `mark` to the mark array.
func ( _cbe * TextMarkArray ) Append ( mark TextMark ) { _cbe . _fceg = append ( _cbe . _fceg , mark ) } ; func ( _ebbcf paraList ) eventNeighbours ( _ffbag [ ] event ) map [ * textPara ] [ ] int { _f . Slice ( _ffbag , func ( _caeff , _aadeb int ) bool { _eaba , _febgc := _ffbag [ _caeff ] , _ffbag [ _aadeb ] ;
_ededb , _adab := _eaba . _cega , _febgc . _cega ; if _ededb != _adab { return _ededb < _adab ; } ; if _eaba . _fddb != _febgc . _fddb { return _eaba . _fddb ; } ; return _caeff < _aadeb ; } ) ; _gbed := make ( map [ int ] intSet ) ; _addf := make ( intSet ) ; for _ , _dfba := range _ffbag { if _dfba . _fddb { _gbed [ _dfba . _dbdfc ] = make ( intSet ) ;
for _fddg := range _addf { if _fddg != _dfba . _dbdfc { _gbed [ _dfba . _dbdfc ] . add ( _fddg ) ; _gbed [ _fddg ] . add ( _dfba . _dbdfc ) ; } ; } ; _addf . add ( _dfba . _dbdfc ) ; } else { _addf . del ( _dfba . _dbdfc ) ; } ; } ; _bffcc := map [ * textPara ] [ ] int { } ; for _fbdb , _feffa := range _gbed { _ggdd := _ebbcf [ _fbdb ] ;
if len ( _feffa ) == 0 { _bffcc [ _ggdd ] = nil ; continue ; } ; _fbeec := make ( [ ] int , len ( _feffa ) ) ; _addc := 0 ; for _gdcb := range _feffa { _fbeec [ _addc ] = _gdcb ; _addc ++ ; } ; _bffcc [ _ggdd ] = _fbeec ; } ; return _bffcc ; } ;
// String returns a description of `v`.
func ( _ffffg * ruling ) String ( ) string { if _ffffg . _cgef == _gdbb { return "\u004e\u004f\u0054\u0020\u0052\u0055\u004c\u0049\u004e\u0047" ; } ; _dgac , _cacgd := "\u0078" , "\u0079" ; if _ffffg . _cgef == _ebabd { _dgac , _cacgd = "\u0079" , "\u0078" ; } ; _gefff := "" ; if _ffffg . _efgd != 0.0 { _gefff = _gdf . Sprintf ( " \u0077\u0069\u0064\u0074\u0068\u003d\u0025\u002e\u0032\u0066" , _ffffg . _efgd ) ;
} ; return _gdf . Sprintf ( "\u0025\u00310\u0073\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u0025\u0073\u003d\u0025\u0036\u002e\u0032\u0066\u0020\u002d\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0020\u0025\u0076\u0025\u0073" , _ffffg . _cgef , _dgac , _ffffg . _eead , _cacgd , _ffffg . _bgeb , _ffffg . _eecc , _ffffg . _eecc - _ffffg . _bgeb , _ffffg . _gbgb , _ffffg . Color , _gefff ) ;
} ; func ( _bbgaa * subpath ) removeDuplicates ( ) { if len ( _bbgaa . _fcdc ) == 0 { return ; } ; _agg := [ ] _ee . Point { _bbgaa . _fcdc [ 0 ] } ; for _ , _bfb := range _bbgaa . _fcdc [ 1 : ] { if ! _egag ( _bfb , _agg [ len ( _agg ) - 1 ] ) { _agg = append ( _agg , _bfb ) ; } ; } ; _bbgaa . _fcdc = _agg ;
} ; func ( _efaf * textTable ) getRight ( ) paraList { _abcg := make ( paraList , _efaf . _cabfg ) ; for _afcdd := 0 ; _afcdd < _efaf . _cabfg ; _afcdd ++ { _gefc := _efaf . get ( _efaf . _baee - 1 , _afcdd ) . _bfdc ; if _gefc . taken ( ) { return nil ; } ; _abcg [ _afcdd ] = _gefc ; } ; for _febfg := 0 ;
_febfg < _efaf . _cabfg - 1 ; _febfg ++ { if _abcg [ _febfg ] . _egec != _abcg [ _febfg + 1 ] { return nil ; } ; } ; return _abcg ; } ; func ( _feaf paraList ) findTextTables ( ) [ ] * textTable { var _dfdf [ ] * textTable ; for _ , _edbeb := range _feaf { if _edbeb . taken ( ) || _edbeb . Width ( ) == 0 { continue ;
} ; _acade := _edbeb . isAtom ( ) ; if _acade == nil { continue ; } ; _acade . growTable ( ) ; if _acade . _baee * _acade . _cabfg < _gdfd { continue ; } ; _acade . markCells ( ) ; _acade . log ( "\u0067\u0072\u006fw\u006e" ) ; _dfdf = append ( _dfdf , _acade ) ; } ; return _dfdf ; } ; func ( _dage gridTile ) contains ( _gagag _bf . PdfRectangle ) bool { if _dage . numBorders ( ) < 3 { return false ;
} ; if _dage . _bceg && _gagag . Llx < _dage . Llx - _egfc { return false ; } ; if _dage . _abdfe && _gagag . Urx > _dage . Urx + _egfc { return false ; } ; if _dage . _abag && _gagag . Lly < _dage . Lly - _egfc { return false ; } ; if _dage . _gfga && _gagag . Ury > _dage . Ury + _egfc { return false ;
} ; return true ; } ;
// ApplyArea processes the page text only within the specified area `bbox`.
// Each time ApplyArea is called, it updates the result set in `pt`.
// Can be called multiple times in a row with different bounding boxes.
func ( _dgeb * PageText ) ApplyArea ( bbox _bf . PdfRectangle ) { _agcb := make ( [ ] * textMark , 0 , len ( _dgeb . _cac ) ) ; for _ , _dee := range _dgeb . _cac { if _gcgg ( _dee . bbox ( ) , bbox ) { _agcb = append ( _agcb , _dee ) ; } ; } ; var _cggf paraList ; _gbeb := len ( _agcb ) ; for _cedcf := 0 ;
_cedcf < 360 && _gbeb > 0 ; _cedcf += 90 { _gba := make ( [ ] * textMark , 0 , len ( _agcb ) - _gbeb ) ; for _ , _ggcd := range _agcb { if _ggcd . _bfbc == _cedcf { _gba = append ( _gba , _ggcd ) ; } ; } ; if len ( _gba ) > 0 { _dcf := _aadf ( _gba , _dgeb . _fabf , nil , nil ) ; _cggf = append ( _cggf , _dcf ... ) ;
_gbeb -= len ( _gba ) ; } ; } ; _afb := new ( _gd . Buffer ) ; _cggf . writeText ( _afb ) ; _dgeb . _gcea = _afb . String ( ) ; _dgeb . _cagb = _cggf . toTextMarks ( ) ; _dgeb . _ece = _cggf . tables ( ) ; } ; func ( _gcgb * textLine ) text ( ) string { var _febb [ ] string ; for _ , _ebda := range _gcgb . _egad { if _ebda . _egcbd { _febb = append ( _febb , "\u0020" ) ;
} ; _febb = append ( _febb , _ebda . _bcaa ) ; } ; return _d . Join ( _febb , "" ) ; } ; func ( _ggcda * shapesState ) clearPath ( ) { _ggcda . _cfgd = nil ; _ggcda . _ecd = false ; if _bccf { _ag . Log . Info ( "\u0043\u004c\u0045A\u0052\u003a\u0020\u0073\u0073\u003d\u0025\u0073" , _ggcda ) ;
} ; } ; func ( _adfd * shapesState ) cubicTo ( _cgbb , _egbg , _eadg , _gdga , _bcbab , _bca float64 ) { if _bccf { _ag . Log . Info ( "\u0063\u0075\u0062\u0069\u0063\u0054\u006f\u003a" ) ; } ; _adfd . addPoint ( _bcbab , _bca ) ; } ; type textPara struct { _bf . PdfRectangle ; _dcfc _bf . PdfRectangle ;
_ecbdg [ ] * textLine ; _bccg * textTable ; _bggb bool ; _dgggff bool ; _fgeg * textPara ; _bfdc * textPara ; _adad * textPara ; _egec * textPara ; } ; func _gabdb ( _bdda [ ] pathSection ) rulingList { _bcff ( _bdda ) ; if _efa { _ag . Log . Info ( "\u006d\u0061k\u0065\u0053\u0074\u0072\u006f\u006b\u0065\u0052\u0075\u006c\u0069\u006e\u0067\u0073\u003a\u0020\u0025\u0064\u0020\u0073\u0074\u0072ok\u0065\u0073" , len ( _bdda ) ) ;
} ; var _caba rulingList ; for _ , _gcgf := range _bdda { for _ , _cbgf := range _gcgf . _gdbf { if len ( _cbgf . _fcdc ) < 2 { continue ; } ; _cgfd := _cbgf . _fcdc [ 0 ] ; for _ , _cefa := range _cbgf . _fcdc [ 1 : ] { if _gadd , _ebcc := _aeca ( _cgfd , _cefa , _gcgf . Color ) ; _ebcc { _caba = append ( _caba , _gadd ) ;
} ; _cgfd = _cefa ; } ; } ; } ; if _efa { _ag . Log . Info ( "m\u0061\u006b\u0065\u0053tr\u006fk\u0065\u0052\u0075\u006c\u0069n\u0067\u0073\u003a\u0020\u0025\u0073" , _caba ) ; } ; return _caba ; } ; const _edec = 1.0 / 1000.0 ; func _fef ( _bcgg string ) bool { if _a . RuneCountInString ( _bcgg ) < _feba { return false ;
} ; _ffab , _egbf := _a . DecodeLastRuneInString ( _bcgg ) ; if _egbf <= 0 || ! _gg . Is ( _gg . Hyphen , _ffab ) { return false ; } ; _ffab , _egbf = _a . DecodeLastRuneInString ( _bcgg [ : len ( _bcgg ) - _egbf ] ) ; return _egbf > 0 && ! _gg . IsSpace ( _ffab ) ; } ; func ( _gegg * textObject ) setFont ( _cgfg string , _bdcdd float64 ) error { if _gegg == nil { return nil ;
} ; _gegg . _degf . _adfb = _bdcdd ; _dedc , _bcc := _gegg . getFont ( _cgfg ) ; if _bcc != nil { return _bcc ; } ; _gegg . _degf . _dde = _dedc ; return nil ; } ; func ( _fddd * textWord ) appendMark ( _dcfdg * textMark , _agfb _bf . PdfRectangle ) { _fddd . _gdgg = append ( _fddd . _gdgg , _dcfdg ) ;
_fddd . PdfRectangle = _effa ( _fddd . PdfRectangle , _dcfdg . PdfRectangle ) ; if _dcfdg . _abba > _fddd . _fbgge { _fddd . _fbgge = _dcfdg . _abba ; } ; _fddd . _ecgcg = _agfb . Ury - _fddd . PdfRectangle . Lly ; } ; func _eceb ( _bgcca _bf . PdfRectangle ) rulingKind { _aefb := _bgcca . Width ( ) ;
_bfaa := _bgcca . Height ( ) ; if _aefb > _bfaa { if _aefb >= _ffdeb { return _ebabd ; } ; } else { if _bfaa >= _ffdeb { return _beec ; } ; } ; return _gdbb ; } ; func ( _eegd * textObject ) showText ( _beg _bg . PdfObject , _bad [ ] byte ) error { return _eegd . renderText ( _beg , _bad ) ;
} ; func ( _bgbg * subpath ) isQuadrilateral ( ) bool { if len ( _bgbg . _fcdc ) < 4 || len ( _bgbg . _fcdc ) > 5 { return false ; } ; if len ( _bgbg . _fcdc ) == 5 { _baaf := _bgbg . _fcdc [ 0 ] ; _cbfg := _bgbg . _fcdc [ 4 ] ; if _baaf . X != _cbfg . X || _baaf . Y != _cbfg . Y { return false ; } ;
} ; return true ; } ; func ( _gebc * textTable ) putComposite ( _cccd , _daaeb int , _cgae paraList , _egcfc _bf . PdfRectangle ) { if len ( _cgae ) == 0 { _ag . Log . Error ( "\u0074\u0065xt\u0054\u0061\u0062l\u0065\u0029\u0020\u0070utC\u006fmp\u006f\u0073\u0069\u0074\u0065\u003a\u0020em\u0070\u0074\u0079\u0020\u0070\u0061\u0072a\u0073" ) ;
return ; } ; _cfab := compositeCell { PdfRectangle : _egcfc , paraList : _cgae } ; if _beae { _gdf . Printf ( "\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0020\u0070\u0075\u0074\u0043\u006f\u006d\u0070o\u0073i\u0074\u0065\u0028\u0025\u0064\u002c\u0025\u0064\u0029\u003c\u002d\u0025\u0073\u000a" , _cccd , _daaeb , _cfab . String ( ) ) ;
} ; _cfab . updateBBox ( ) ; _gebc . _bbfb [ _fcbc ( _cccd , _daaeb ) ] = _cfab ; } ; func ( _cadf compositeCell ) parasBBox ( ) ( paraList , _bf . PdfRectangle ) { return _cadf . paraList , _cadf . PdfRectangle ; } ; func _dfdae ( _egdc , _cedaf int ) int { if _egdc < _cedaf { return _egdc ;
} ; return _cedaf ; } ; type markKind int ; func ( _fbcdc paraList ) applyTables ( _baed [ ] * textTable ) paraList { var _cede paraList ; for _ , _feecb := range _baed { _cede = append ( _cede , _feecb . newTablePara ( ) ) ; } ; for _ , _ecfga := range _fbcdc { if _ecfga . _bggb { continue ;
} ; _cede = append ( _cede , _ecfga ) ; } ; return _cede ; } ; func ( _abg * wordBag ) pullWord ( _dgebb * textWord , _agad int , _gagbb map [ int ] map [ * textWord ] struct { } ) { _abg . PdfRectangle = _effa ( _abg . PdfRectangle , _dgebb . PdfRectangle ) ; if _dgebb . _fbgge > _abg . _gaed { _abg . _gaed = _dgebb . _fbgge ;
} ; _abg . _bbf [ _agad ] = append ( _abg . _bbf [ _agad ] , _dgebb ) ; _gagbb [ _agad ] [ _dgebb ] = struct { } { } ; } ; func _ea ( _gddf [ ] Font , _aa string ) bool { for _ , _dafe := range _gddf { if _dafe . FontName == _aa { return true ; } ; } ; return false ; } ; func _dagb ( _acegd , _geff bounded ) float64 { return _acbd ( _acegd ) - _acbd ( _geff ) } ;
func _ggdb ( _acbg * wordBag , _fbfb * textWord , _ccfa float64 ) bool { return _acbg . Urx <= _fbfb . Llx && _fbfb . Llx < _acbg . Urx + _ccfa ; } ;
// TableCell is a cell in a TextTable.
type TableCell struct {
// Text is the extracted text.
Text string ;
// Marks returns the TextMarks corresponding to the text in Text.
Marks TextMarkArray ; } ; func _aadf ( _afae [ ] * textMark , _fege _bf . PdfRectangle , _ecfb rulingList , _gada [ ] gridTiling ) paraList { _ag . Log . Trace ( "\u006d\u0061\u006b\u0065\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065\u003a \u0025\u0064\u0020\u0065\u006c\u0065m\u0065\u006e\u0074\u0073\u0020\u0070\u0061\u0067\u0065\u0053\u0069\u007a\u0065=\u0025\u002e\u0032\u0066" , len ( _afae ) , _fege ) ;
if len ( _afae ) == 0 { return nil ; } ; _fccf := _cdbag ( _afae , _fege ) ; if len ( _fccf ) == 0 { return nil ; } ; _ecfb . log ( "\u006d\u0061\u006be\u0054\u0065\u0078\u0074\u0050\u0061\u0067\u0065" ) ; _badc , _cgeg := _ecfb . vertsHorzs ( ) ; _debdb := _cdg ( _fccf , _fege . Ury , _badc , _cgeg ) ;
_afgbb := _bcdg ( _debdb , _fege . Ury , _badc , _cgeg ) ; _afgbb = _eceed ( _afgbb ) ; _bdcf := make ( paraList , 0 , len ( _afgbb ) ) ; for _ , _gcae := range _afgbb { _gcce := _gcae . arrangeText ( ) ; if _gcce != nil { _bdcf = append ( _bdcf , _gcce ) ; } ; } ; if len ( _bdcf ) >= _gdfd { _bdcf = _bdcf . extractTables ( _gada ) ;
} ; _bdcf . sortReadingOrder ( ) ; _bdcf . log ( "\u0073\u006f\u0072te\u0064\u0020\u0069\u006e\u0020\u0072\u0065\u0061\u0064\u0069\u006e\u0067\u0020\u006f\u0072\u0064\u0065\u0072" ) ; return _bdcf ; } ; func _beda ( _adfbc map [ int ] [ ] float64 ) { if len ( _adfbc ) <= 1 { return ;
} ; _acde := _bacfa ( _adfbc ) ; if _beae { _ag . Log . Info ( "\u0066i\u0078C\u0065\u006c\u006c\u0073\u003a \u006b\u0065y\u0073\u003d\u0025\u002b\u0076" , _acde ) ; } ; var _aadec , _dgacg int ; for _aadec , _dgacg = range _acde { if _adfbc [ _dgacg ] != nil { break ; } ; } ; for _ebea , _afee := range _acde [ _aadec : ] { _gfee := _adfbc [ _afee ] ;
if _gfee == nil { continue ; } ; if _beae { _gdf . Printf ( "\u0025\u0034\u0064\u003a\u0020\u006b\u0030\u003d\u0025\u0064\u0020\u006b1\u003d\u0025\u0064\u000a" , _aadec + _ebea , _dgacg , _afee ) ; } ; _ecfbg := _adfbc [ _afee ] ; if _ecfbg [ len ( _ecfbg ) - 1 ] > _gfee [ 0 ] { _ecfbg [ len ( _ecfbg ) - 1 ] = _gfee [ 0 ] ;
_adfbc [ _dgacg ] = _ecfbg ; } ; _dgacg = _afee ; } ; } ;
// String returns a description of `l`.
func ( _eegf * textLine ) String ( ) string { return _gdf . Sprintf ( "\u0025\u002e2\u0066\u0020\u0025\u0036\u002e\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\"%\u0073\u0022" , _eegf . _dcfd , _eegf . PdfRectangle , _eegf . _ddef , _eegf . text ( ) ) ;
} ; func ( _ecfg paraList ) extractTables ( _aafcb [ ] gridTiling ) paraList { if _beae { _ag . Log . Debug ( "\u0065\u0078\u0074r\u0061\u0063\u0074\u0054\u0061\u0062\u006c\u0065\u0073\u003d\u0025\u0064\u0020\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u0078\u003d\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d" , len ( _ecfg ) ) ;
} ; if len ( _ecfg ) < _gdfd { return _ecfg ; } ; _aeed := _ecfg . findTables ( _aafcb ) ; if _beae { _ag . Log . Info ( "c\u006f\u006d\u0062\u0069\u006e\u0065d\u0020\u0074\u0061\u0062\u006c\u0065s\u0020\u0025\u0064\u0020\u003d\u003d\u003d=\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d=\u003d" , len ( _aeed ) ) ;
for _dbcfg , _fadcb := range _aeed { _fadcb . log ( _gdf . Sprintf ( "c\u006f\u006d\u0062\u0069\u006e\u0065\u0064\u0020\u0025\u0064" , _dbcfg ) ) ; } ; } ; return _ecfg . applyTables ( _aeed ) ; } ; func ( _bcaf rulingList ) sortStrict ( ) { _f . Slice ( _bcaf , func ( _efcee , _agcbcg int ) bool { _cbfed , _cgeba := _bcaf [ _efcee ] , _bcaf [ _agcbcg ] ;
_gcdd , _gegga := _cbfed . _cgef , _cgeba . _cgef ; if _gcdd != _gegga { return _gcdd > _gegga ; } ; _aeae , _dacc := _cbfed . _eead , _cgeba . _eead ; if ! _fbga ( _aeae - _dacc ) { return _aeae < _dacc ; } ; _aeae , _dacc = _cbfed . _bgeb , _cgeba . _bgeb ; if _aeae != _dacc { return _aeae < _dacc ;
} ; return _cbfed . _eecc < _cgeba . _eecc ; } ) ; } ; func _cdbag ( _efbg [ ] * textMark , _bdcdg _bf . PdfRectangle ) [ ] * textWord { var _cdefb [ ] * textWord ; var _eeedg * textWord ; if _eccd { _ag . Log . Info ( "\u006d\u0061\u006beT\u0065\u0078\u0074\u0057\u006f\u0072\u0064\u0073\u003a\u0020\u0025\u0064\u0020\u006d\u0061\u0072\u006b\u0073" , len ( _efbg ) ) ;
} ; _bacca := func ( ) { if _eeedg != nil { _befcd := _eeedg . computeText ( ) ; if ! _cgcdb ( _befcd ) { _eeedg . _bcaa = _befcd ; _cdefb = append ( _cdefb , _eeedg ) ; if _eccd { _ag . Log . Info ( "\u0061\u0064\u0064Ne\u0077\u0057\u006f\u0072\u0064\u003a\u0020\u0025\u0064\u003a\u0020\u0077\u006f\u0072\u0064\u003d\u0025\u0073" , len ( _cdefb ) - 1 , _eeedg . String ( ) ) ;
for _effede , _adae := range _eeedg . _gdgg { _gdf . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _effede , _adae . String ( ) ) ; } ; } ; } ; _eeedg = nil ; } ; } ; for _ , _bgff := range _efbg { if _bfe && _eeedg != nil && len ( _eeedg . _gdgg ) > 0 { _gbedd := _eeedg . _gdgg [ len ( _eeedg . _gdgg ) - 1 ] ;
_cfebf , _acbbd := _gbae ( _bgff . _eeaf ) ; _gggf , _abbc := _gbae ( _gbedd . _eeaf ) ; if _acbbd && ! _abbc && _gbedd . inDiacriticArea ( _bgff ) { _eeedg . addDiacritic ( _cfebf ) ; continue ; } ; if _abbc && ! _acbbd && _bgff . inDiacriticArea ( _gbedd ) { _eeedg . _gdgg = _eeedg . _gdgg [ : len ( _eeedg . _gdgg ) - 1 ] ;
_eeedg . appendMark ( _bgff , _bdcdg ) ; _eeedg . addDiacritic ( _gggf ) ; continue ; } ; } ; _bgdcb := _cgcdb ( _bgff . _eeaf ) ; if _bgdcb { _bacca ( ) ; continue ; } ; if _eeedg == nil && ! _bgdcb { _eeedg = _cedeb ( [ ] * textMark { _bgff } , _bdcdg ) ; continue ; } ; _gabda := _eeedg . _fbgge ;
_fecec := _b . Abs ( _aaegg ( _bdcdg , _bgff ) - _eeedg . _ecgcg ) / _gabda ; _begdf := _dgecg ( _bgff , _eeedg ) / _gabda ; if _begdf >= _gfdad || ! ( - _edge <= _begdf && _fecec <= _cbgg ) { _bacca ( ) ; _eeedg = _cedeb ( [ ] * textMark { _bgff } , _bdcdg ) ; continue ; } ; _eeedg . appendMark ( _bgff , _bdcdg ) ;
} ; _bacca ( ) ; return _cdefb ; } ; func ( _dcecg * textWord ) addDiacritic ( _eafda string ) { _bbeg := _dcecg . _gdgg [ len ( _dcecg . _gdgg ) - 1 ] ; _bbeg . _eeaf += _eafda ; _bbeg . _eeaf = _bd . NFKC . String ( _bbeg . _eeaf ) ; } ; func _cffd ( _gbagg map [ float64 ] map [ float64 ] gridTile ) [ ] float64 { _cbec := make ( [ ] float64 , 0 , len ( _gbagg ) ) ;
_defe := make ( map [ float64 ] struct { } , len ( _gbagg ) ) ; for _ , _cfdca := range _gbagg { for _gebb := range _cfdca { if _ , _ffcgf := _defe [ _gebb ] ; _ffcgf { continue ; } ; _cbec = append ( _cbec , _gebb ) ; _defe [ _gebb ] = struct { } { } ; } ; } ; _f . Float64s ( _cbec ) ; return _cbec ;
} ; func ( _ddad * textObject ) getFillColor ( ) _eda . Color { return _gfdbb ( _ddad . _fcdg . ColorspaceNonStroking , _ddad . _fcdg . ColorNonStroking ) ; } ; func ( _affa * wordBag ) applyRemovals ( _abce map [ int ] map [ * textWord ] struct { } ) { for _gfcb , _cfac := range _abce { if len ( _cfac ) == 0 { continue ;
} ; _cdf := _affa . _bbf [ _gfcb ] ; _dafbe := len ( _cdf ) - len ( _cfac ) ; if _dafbe == 0 { delete ( _affa . _bbf , _gfcb ) ; continue ; } ; _fde := make ( [ ] * textWord , _dafbe ) ; _gea := 0 ; for _ , _bgfd := range _cdf { if _ , _geea := _cfac [ _bgfd ] ; ! _geea { _fde [ _gea ] = _bgfd ; _gea ++ ;
} ; } ; _affa . _bbf [ _gfcb ] = _fde ; } ; } ; var _ebbb = _c . MustCompile ( "\u005e\u005c\u0073\u002a\u0028\u005c\u0064\u002b\u005c\u002e\u003f|\u005b\u0049\u0069\u0076\u005d\u002b\u0029\u005c\u0073\u002a\\\u0029\u003f\u0024" ) ; func ( _bgeg rulingList ) vertsHorzs ( ) ( rulingList , rulingList ) { var _agbf , _fdga rulingList ;
for _ , _daab := range _bgeg { switch _daab . _cgef { case _beec : _agbf = append ( _agbf , _daab ) ; case _ebabd : _fdga = append ( _fdga , _daab ) ; } ; } ; return _agbf , _fdga ; } ; func ( _ddfc * shapesState ) lineTo ( _bcba , _fbcfd float64 ) { if _bccf { _ag . Log . Info ( "\u006c\u0069\u006eeT\u006f\u0028\u0025\u002e\u0032\u0066\u002c\u0025\u002e\u0032\u0066\u0020\u0070\u003d\u0025\u002e\u0032\u0066" , _bcba , _fbcfd , _ddfc . devicePoint ( _bcba , _fbcfd ) ) ;
} ; _ddfc . addPoint ( _bcba , _fbcfd ) ; } ; func ( _adgg * wordBag ) firstWord ( _ccde int ) * textWord { return _adgg . _bbf [ _ccde ] [ 0 ] } ; func ( _efggd * ruling ) intersects ( _cedcd * ruling ) bool { _fgec := ( _efggd . _cgef == _beec && _cedcd . _cgef == _ebabd ) || ( _cedcd . _cgef == _beec && _efggd . _cgef == _ebabd ) ;
_cdfd := func ( _efac , _fcfa * ruling ) bool { return _efac . _bgeb - _deff <= _fcfa . _eead && _fcfa . _eead <= _efac . _eecc + _deff ; } ; _aadea := _cdfd ( _efggd , _cedcd ) ; _fgfb := _cdfd ( _cedcd , _efggd ) ; if _efa { _gdf . Printf ( "\u0020\u0020\u0020\u0020\u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003a\u0020\u0020\u006fr\u0074\u0068\u006f\u0067\u006f\u006e\u0061l\u003d\u0025\u0074\u0020\u006f\u0031\u003d\u0025\u0074\u0020\u006f2\u003d\u0025\u0074\u0020\u2192\u0020\u0025\u0074\u000a" + "\u0020\u0020\u0020 \u0020\u0020\u0020\u0076\u003d\u0025\u0073\u000a" + " \u0020\u0020\u0020\u0020\u0020\u0077\u003d\u0025\u0073\u000a" , _fgec , _aadea , _fgfb , _fgec && _aadea && _fgfb , _efggd , _cedcd ) ;
} ; return _fgec && _aadea && _fgfb ; } ; const ( _gdbb rulingKind = iota ; _ebabd ; _beec ; ) ; func ( _gdbab gridTile ) numBorders ( ) int { _fafb := 0 ; if _gdbab . _bceg { _fafb ++ ; } ; if _gdbab . _abdfe { _fafb ++ ; } ; if _gdbab . _abag { _fafb ++ ; } ; if _gdbab . _gfga { _fafb ++ ; } ; return _fafb ;
} ; func _aadc ( _dcea string , _gdgaa [ ] rulingList ) { _ag . Log . Info ( "\u0024\u0024 \u0025\u0064\u0020g\u0072\u0069\u0064\u0073\u0020\u002d\u0020\u0025\u0073" , len ( _gdgaa ) , _dcea ) ; for _ddee , _fgea := range _gdgaa { _gdf . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _ddee , _fgea . String ( ) ) ;
} ; } ; func ( _gead rulingList ) comp ( _cbfa , _dgeeg int ) bool { _gfec , _gcec := _gead [ _cbfa ] , _gead [ _dgeeg ] ; _cffga , _baab := _gfec . _cgef , _gcec . _cgef ; if _cffga != _baab { return _cffga > _baab ; } ; if _cffga == _gdbb { return false ; } ; _cedcc := func ( _baded bool ) bool { if _cffga == _ebabd { return _baded ;
} ; return ! _baded ; } ; _beab , _afdgd := _gfec . _eead , _gcec . _eead ; if _beab != _afdgd { return _cedcc ( _beab > _afdgd ) ; } ; _beab , _afdgd = _gfec . _bgeb , _gcec . _bgeb ; if _beab != _afdgd { return _cedcc ( _beab < _afdgd ) ; } ; return _cedcc ( _gfec . _eecc < _gcec . _eecc ) ;
} ; func ( _dbgg * wordBag ) depthIndexes ( ) [ ] int { if len ( _dbgg . _bbf ) == 0 { return nil ; } ; _gggcd := make ( [ ] int , len ( _dbgg . _bbf ) ) ; _edbe := 0 ; for _gdab := range _dbgg . _bbf { _gggcd [ _edbe ] = _gdab ; _edbe ++ ; } ; _f . Ints ( _gggcd ) ; return _gggcd ; } ; func ( _dggfg rulingList ) merge ( ) * ruling { _cegb := _dggfg [ 0 ] . _eead ;
_eeea := _dggfg [ 0 ] . _bgeb ; _ebced := _dggfg [ 0 ] . _eecc ; for _ , _fegdf := range _dggfg [ 1 : ] { _cegb += _fegdf . _eead ; if _fegdf . _bgeb < _eeea { _eeea = _fegdf . _bgeb ; } ; if _fegdf . _eecc > _ebced { _ebced = _fegdf . _eecc ; } ; } ; _efgf := & ruling { _cgef : _dggfg [ 0 ] . _cgef , _gbgb : _dggfg [ 0 ] . _gbgb , Color : _dggfg [ 0 ] . Color , _eead : _cegb / float64 ( len ( _dggfg ) ) , _bgeb : _eeea , _eecc : _ebced } ;
if _cecg { _ag . Log . Info ( "\u006de\u0072g\u0065\u003a\u0020\u0025\u0032d\u0020\u0076e\u0063\u0073\u0020\u0025\u0073" , len ( _dggfg ) , _efgf ) ; for _cdfca , _cedae := range _dggfg { _gdf . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _cdfca , _cedae ) ;
} ; } ; return _efgf ; } ; type ruling struct { _cgef rulingKind ; _gbgb markKind ; _eda . Color ; _eead float64 ; _bgeb float64 ; _eecc float64 ; _efgd float64 ; } ; type wordBag struct { _bf . PdfRectangle ; _gaed float64 ; _fff , _aede rulingList ; _fcgc float64 ; _bbf map [ int ] [ ] * textWord ;
} ; func _abbf ( _cffca [ ] TextMark , _feea * int , _beb string ) [ ] TextMark { _debd := _ebf ; _debd . Text = _beb ; return _affac ( _cffca , _feea , _debd ) ; } ; func ( _ffgac * textTable ) depth ( ) float64 { _eaadc := 1e10 ; for _ccfe := 0 ; _ccfe < _ffgac . _baee ; _ccfe ++ { _cgced := _ffgac . get ( _ccfe , 0 ) ;
if _cgced == nil || _cgced . _dgggff { continue ; } ; _eaadc = _b . Min ( _eaadc , _cgced . depth ( ) ) ; } ; return _eaadc ; } ; func ( _bbed rulingList ) primMinMax ( ) ( float64 , float64 ) { _gdddf , _acbb := _bbed [ 0 ] . _eead , _bbed [ 0 ] . _eead ; for _ , _bccec := range _bbed [ 1 : ] { if _bccec . _eead < _gdddf { _gdddf = _bccec . _eead ;
} else if _bccec . _eead > _acbb { _acbb = _bccec . _eead ; } ; } ; return _gdddf , _acbb ; } ; func ( _eff * subpath ) close ( ) { if ! _egag ( _eff . _fcdc [ 0 ] , _eff . last ( ) ) { _eff . add ( _eff . _fcdc [ 0 ] ) ; } ; _eff . _fcfe = true ; _eff . removeDuplicates ( ) ; } ; const ( RenderModeStroke RenderMode = 1 << iota ;
RenderModeFill ; RenderModeClip ; ) ; func _bbgcb ( _acbf int , _bgae map [ int ] [ ] float64 ) ( [ ] int , int ) { _agge := make ( [ ] int , _acbf ) ; _ecbeg := 0 ; for _abgfd := 0 ; _abgfd < _acbf ; _abgfd ++ { _agge [ _abgfd ] = _ecbeg ; _ecbeg += len ( _bgae [ _abgfd ] ) + 1 ; } ; return _agge , _ecbeg ;
} ; func _aaegg ( _decb _bf . PdfRectangle , _ceaf bounded ) float64 { return _decb . Ury - _ceaf . bbox ( ) . Lly } ; func ( _dfda * subpath ) last ( ) _ee . Point { return _dfda . _fcdc [ len ( _dfda . _fcdc ) - 1 ] } ; var _gecc = map [ markKind ] string { _caade : "\u0073\u0074\u0072\u006f\u006b\u0065" , _gaea : "\u0066\u0069\u006c\u006c" , _eddbd : "\u0061u\u0067\u006d\u0065\u006e\u0074" } ;
func ( _ec * PageFonts ) extractPageResourcesToFont ( _de * _bf . PdfPageResources ) error { _cec , _gb := _bg . GetDict ( _de . Font ) ; if ! _gb { return _g . New ( _dg ) ; } ; for _ , _dafb := range _cec . Keys ( ) { var ( _gdd = true ; _eg [ ] byte ; _add string ; ) ; _ef , _gac := _de . GetFontByName ( _dafb ) ;
if ! _gac { return _g . New ( _edf ) ; } ; _cf , _bgd := _bf . NewPdfFontFromPdfObject ( _ef ) ; if _bgd != nil { return _bgd ; } ; _ff := _cf . FontDescriptor ( ) ; _df := _cf . FontDescriptor ( ) . FontName . String ( ) ; _bbg := _cf . Subtype ( ) ; if _ea ( _ec . Fonts , _df ) { continue ; } ;
if len ( _cf . ToUnicode ( ) ) == 0 { _gdd = false ; } ; if _ff . FontFile != nil { if _bge , _db := _bg . GetStream ( _ff . FontFile ) ; _db { _eg , _bgd = _bg . DecodeStream ( _bge ) ; if _bgd != nil { return _bgd ; } ; _add = _df + "\u002e\u0070\u0066\u0062" ; } ; } else if _ff . FontFile2 != nil { if _ggd , _gce := _bg . GetStream ( _ff . FontFile2 ) ;
_gce { _eg , _bgd = _bg . DecodeStream ( _ggd ) ; if _bgd != nil { return _bgd ; } ; _add = _df + "\u002e\u0074\u0074\u0066" ; } ; } else if _ff . FontFile3 != nil { if _fg , _cgb := _bg . GetStream ( _ff . FontFile3 ) ; _cgb { _eg , _bgd = _bg . DecodeStream ( _fg ) ; if _bgd != nil { return _bgd ;
} ; _add = _df + "\u002e\u0063\u0066\u0066" ; } ; } ; if len ( _add ) < 1 { _ag . Log . Debug ( _cgc ) ; } ; _dc := Font { FontName : _df , PdfFont : _cf , IsCID : _cf . IsCID ( ) , IsSimple : _cf . IsSimple ( ) , ToUnicode : _gdd , FontType : _bbg , FontData : _eg , FontFileName : _add , FontDescriptor : _ff } ;
_ec . Fonts = append ( _ec . Fonts , _dc ) ; } ; return nil ; } ; func ( _bdfad * textLine ) toTextMarks ( _bfdd * int ) [ ] TextMark { var _dcd [ ] TextMark ; for _ , _ggbag := range _bdfad . _egad { if _ggbag . _egcbd { _dcd = _abbf ( _dcd , _bfdd , "\u0020" ) ; } ; _fgdb := _ggbag . toTextMarks ( _bfdd ) ;
_dcd = append ( _dcd , _fgdb ... ) ; } ; return _dcd ; } ; type textTable struct { _bf . PdfRectangle ; _baee , _cabfg int ; _acgbd bool ; _aagc map [ uint64 ] * textPara ; _bbfb map [ uint64 ] compositeCell ; } ; func _bfcbe ( _fcca , _faec , _afadd float64 ) rulingKind { if _fcca >= _afadd && _dgcd ( _faec , _fcca ) { return _ebabd ;
} ; if _faec >= _afadd && _dgcd ( _fcca , _faec ) { return _beec ; } ; return _gdbb ; } ; func ( _gedf * wordBag ) arrangeText ( ) * textPara { _gedf . sort ( ) ; if _bccb { _gedf . removeDuplicates ( ) ; } ; var _gdgb [ ] * textLine ; for _ , _gccea := range _gedf . depthIndexes ( ) { for ! _gedf . empty ( _gccea ) { _bfea := _gedf . firstReadingIndex ( _gccea ) ;
_aafg := _gedf . firstWord ( _bfea ) ; _ecaa := _cdgf ( _gedf , _bfea ) ; _ecaff := _aafg . _fbgge ; _bacd := _aafg . _ecgcg - _bgcc * _ecaff ; _addef := _aafg . _ecgcg + _bgcc * _ecaff ; _fbee := _dcca * _ecaff ; _cbgec := _ceda * _ecaff ; _aba : for { var _geeba * textWord ; _daba := 0 ;
for _ , _fbcdb := range _gedf . depthBand ( _bacd , _addef ) { _ecdf := _gedf . highestWord ( _fbcdb , _bacd , _addef ) ; if _ecdf == nil { continue ; } ; _bgaa := _dgecg ( _ecdf , _ecaa . _egad [ len ( _ecaa . _egad ) - 1 ] ) ; if _bgaa < - _cbgec { break _aba ; } ; if _bgaa > _fbee { continue ;
} ; if _geeba != nil && _addb ( _ecdf , _geeba ) >= 0 { continue ; } ; _geeba = _ecdf ; _daba = _fbcdb ; } ; if _geeba == nil { break ; } ; _ecaa . pullWord ( _gedf , _geeba , _daba ) ; } ; _ecaa . markWordBoundaries ( ) ; _gdgb = append ( _gdgb , _ecaa ) ; } ; } ; if len ( _gdgb ) == 0 { return nil ;
} ; _f . Slice ( _gdgb , func ( _bbb , _bfdf int ) bool { return _acgf ( _gdgb [ _bbb ] , _gdgb [ _bfdf ] ) < 0 } ) ; _gaaa := _egecc ( _gedf . PdfRectangle , _gdgb ) ; if _cfed { _ag . Log . Info ( "\u0061\u0072\u0072an\u0067\u0065\u0054\u0065\u0078\u0074\u0020\u0021\u0021\u0021\u0020\u0070\u0061\u0072\u0061\u003d\u0025\u0073" , _gaaa . String ( ) ) ;
if _bcf { for _dfeg , _eceeg := range _gaaa . _ecbdg { _gdf . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _dfeg , _eceeg . String ( ) ) ; if _aebf { for _bbggd , _deeb := range _eceeg . _egad { _gdf . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _bbggd , _deeb . String ( ) ) ;
for _fcdcd , _baca := range _deeb . _gdgg { _gdf . Printf ( "\u00251\u0032\u0064\u003a\u0020\u0025\u0073\n" , _fcdcd , _baca . String ( ) ) ; } ; } ; } ; } ; } ; } ; return _gaaa ; } ;
// Marks returns the TextMark collection for a page. It represents all the text on the page.
func ( _caef PageText ) Marks ( ) * TextMarkArray { return & TextMarkArray { _fceg : _caef . _cagb } } ; type subpath struct { _fcdc [ ] _ee . Point ; _fcfe bool ; } ;
// Font represents the font properties on a PDF page.
type Font struct { PdfFont * _bf . PdfFont ;
// FontName represents Font Name from font properties.
FontName string ;
// FontType represents Font Subtype entry in the font dictionary inside page resources.
// Examples : type0, Type1, MMType1, Type3, TrueType, CIDFont.
FontType string ;
// ToUnicode is true if font provides a `ToUnicode` mapping.
ToUnicode bool ;
// IsCID is true if underlying font is a composite font.
// Composite font is represented by a font dictionary whose Subtype is `Type0`
IsCID bool ;
// IsSimple is true if font is simple font.
// A simple font is limited to only 8 bit (255) character codes.
IsSimple bool ;
// FontData represents the raw data of the embedded font file.
// It can have format TrueType (TTF), PostScript Font (PFB) or Compact Font Format (CCF).
// FontData value can be indicates from `FontFile`, `FontFile2` or `FontFile3` inside Font Descriptor.
// At most, only one of `FontFile`, `FontFile2` or `FontFile3` will be FontData value.
FontData [ ] byte ;
// FontFileName is a name representing the font. it has format:
// (Font Name) + (Font Type Extension), example: helvetica.ttf.
FontFileName string ;
// FontDescriptor represents metrics and other attributes inside font properties from PDF Structure (Font Descriptor).
FontDescriptor * _bf . PdfFontDescriptor ; } ; func _gfaeb ( _gcda _bf . PdfRectangle ) * ruling { return & ruling { _cgef : _ebabd , _eead : _gcda . Lly , _bgeb : _gcda . Llx , _eecc : _gcda . Urx } ; } ; type lineRuling struct { _aedc rulingKind ; _daff markKind ; _eda . Color ; _abec , _eacb _ee . Point ;
} ; func ( _ggff * textLine ) appendWord ( _eaff * textWord ) { _ggff . _egad = append ( _ggff . _egad , _eaff ) ; _ggff . PdfRectangle = _effa ( _ggff . PdfRectangle , _eaff . PdfRectangle ) ; if _eaff . _fbgge > _ggff . _ddef { _ggff . _ddef = _eaff . _fbgge ; } ; if _eaff . _ecgcg > _ggff . _dcfd { _ggff . _dcfd = _eaff . _ecgcg ;
} ; } ;
// TextMarkArray is a collection of TextMarks.
type TextMarkArray struct { _fceg [ ] TextMark } ; func ( _cabf paraList ) llyRange ( _aceba [ ] int , _egbe , _gddd float64 ) [ ] int { _eefc := len ( _cabf ) ; if _gddd < _cabf [ _aceba [ 0 ] ] . Lly || _egbe > _cabf [ _aceba [ _eefc - 1 ] ] . Lly { return nil ; } ; _bdbea := _f . Search ( _eefc , func ( _bgdd int ) bool { return _cabf [ _aceba [ _bgdd ] ] . Lly >= _egbe } ) ;
_gece := _f . Search ( _eefc , func ( _ddadf int ) bool { return _cabf [ _aceba [ _ddadf ] ] . Lly > _gddd } ) ; return _aceba [ _bdbea : _gece ] ; } ; func ( _ggea * wordBag ) getDepthIdx ( _adbc float64 ) int { _beaa := _ggea . depthIndexes ( ) ; _gcbg := _feef ( _adbc ) ; if _gcbg < _beaa [ 0 ] { return _beaa [ 0 ] ;
} ; if _gcbg > _beaa [ len ( _beaa ) - 1 ] { return _beaa [ len ( _beaa ) - 1 ] ; } ; return _gcbg ; } ; func ( _acgaf * wordBag ) scanBand ( _eafc string , _eafa * wordBag , _bgdc func ( _bbff * wordBag , _efb * textWord ) bool , _dbbb , _ceed , _dabe float64 , _bdbf , _eac bool ) int { _bcga := _eafa . _gaed ;
var _fcgf map [ int ] map [ * textWord ] struct { } ; if ! _bdbf { _fcgf = _acgaf . makeRemovals ( ) ; } ; _cdef := _bgcc * _bcga ; _afde := 0 ; for _ , _cdae := range _acgaf . depthBand ( _dbbb - _cdef , _ceed + _cdef ) { if len ( _acgaf . _bbf [ _cdae ] ) == 0 { continue ; } ; for _ , _cffg := range _acgaf . _bbf [ _cdae ] { if ! ( _dbbb - _cdef <= _cffg . _ecgcg && _cffg . _ecgcg <= _ceed + _cdef ) { continue ;
} ; if ! _bgdc ( _eafa , _cffg ) { continue ; } ; _aeac := 2.0 * _b . Abs ( _cffg . _fbgge - _eafa . _gaed ) / ( _cffg . _fbgge + _eafa . _gaed ) ; _fegd := _b . Max ( _cffg . _fbgge / _eafa . _gaed , _eafa . _gaed / _cffg . _fbgge ) ; _cgbgf := _b . Min ( _aeac , _fegd ) ; if _dabe > 0 && _cgbgf > _dabe { continue ;
} ; if _eafa . blocked ( _cffg ) { continue ; } ; if ! _bdbf { _eafa . pullWord ( _cffg , _cdae , _fcgf ) ; } ; _afde ++ ; if ! _eac { if _cffg . _ecgcg < _dbbb { _dbbb = _cffg . _ecgcg ; } ; if _cffg . _ecgcg > _ceed { _ceed = _cffg . _ecgcg ; } ; } ; if _bdbf { break ; } ; } ; } ; if ! _bdbf { _acgaf . applyRemovals ( _fcgf ) ;
} ; return _afde ; } ; func _feef ( _ddcg float64 ) int { var _cdff int ; if _ddcg >= 0 { _cdff = int ( _ddcg / _ffde ) ; } else { _cdff = int ( _ddcg / _ffde ) - 1 ; } ; return _cdff ; } ; func ( _accad paraList ) findTables ( _adbeb [ ] gridTiling ) [ ] * textTable { _accad . addNeighbours ( ) ;
_f . Slice ( _accad , func ( _bgbc , _cacfa int ) bool { return _afa ( _accad [ _bgbc ] , _accad [ _cacfa ] ) < 0 } ) ; var _cgage [ ] * textTable ; if _bagf { _gfaa := _accad . findGridTables ( _adbeb ) ; _cgage = append ( _cgage , _gfaa ... ) ; } ; if _bfcda { _cgdb := _accad . findTextTables ( ) ;
_cgage = append ( _cgage , _cgdb ... ) ; } ; return _cgage ; } ; func ( _eaeb * wordBag ) blocked ( _eded * textWord ) bool { if _eded . Urx < _eaeb . Llx { _acfb := _eagae ( _eded . PdfRectangle ) ; _cfce := _debdg ( _eaeb . PdfRectangle ) ; if _eaeb . _fff . blocks ( _acfb , _cfce ) { if _fgad { _ag . Log . Info ( "\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0078\u003a\u0020\u0025\u0073\u0020\u0025\u0073" , _eded , _eaeb ) ;
} ; return true ; } ; } else if _eaeb . Urx < _eded . Llx { _aceg := _eagae ( _eaeb . PdfRectangle ) ; _ecab := _debdg ( _eded . PdfRectangle ) ; if _eaeb . _fff . blocks ( _aceg , _ecab ) { if _fgad { _ag . Log . Info ( "b\u006co\u0063\u006b\u0065\u0064\u0020\u0078\u2192\u0020:\u0020\u0025\u0073\u0020%s" , _eded , _eaeb ) ;
} ; return true ; } ; } ; if _eded . Ury < _eaeb . Lly { _fgfg := _eeee ( _eded . PdfRectangle ) ; _cecd := _gfaeb ( _eaeb . PdfRectangle ) ; if _eaeb . _aede . blocks ( _fgfg , _cecd ) { if _fgad { _ag . Log . Info ( "\u0062\u006c\u006f\u0063ke\u0064\u0020\u2190\u0079\u003a\u0020\u0025\u0073\u0020\u0025\u0073" , _eded , _eaeb ) ;
} ; return true ; } ; } else if _eaeb . Ury < _eded . Lly { _gcfd := _eeee ( _eaeb . PdfRectangle ) ; _dcff := _gfaeb ( _eded . PdfRectangle ) ; if _eaeb . _aede . blocks ( _gcfd , _dcff ) { if _fgad { _ag . Log . Info ( "b\u006co\u0063\u006b\u0065\u0064\u0020\u0079\u2192\u0020:\u0020\u0025\u0073\u0020%s" , _eded , _eaeb ) ;
} ; return true ; } ; } ; return false ; } ; func ( _fffc * wordBag ) firstReadingIndex ( _fadc int ) int { _bcbdc := _fffc . firstWord ( _fadc ) . _fbgge ; _cgcef := float64 ( _fadc + 1 ) * _ffde ; _ceee := _cgcef + _cdge * _bcbdc ; _decf := _fadc ; for _ , _gdfc := range _fffc . depthBand ( _cgcef , _ceee ) { if _addb ( _fffc . firstWord ( _gdfc ) , _fffc . firstWord ( _decf ) ) < 0 { _decf = _gdfc ;
} ; } ; return _decf ; } ; func ( _bbdd * textWord ) computeText ( ) string { _bddbb := make ( [ ] string , len ( _bbdd . _gdgg ) ) ; for _feda , _fedca := range _bbdd . _gdgg { _bddbb [ _feda ] = _fedca . _eeaf ; } ; return _d . Join ( _bddbb , "" ) ; } ; func ( _dgg * textObject ) nextLine ( ) { _dgg . moveLP ( 0 , - _dgg . _degf . _acfa ) } ;
func ( _eaed rulingList ) secMinMax ( ) ( float64 , float64 ) { _aaad , _dafbd := _eaed [ 0 ] . _bgeb , _eaed [ 0 ] . _eecc ; for _ , _adbcb := range _eaed [ 1 : ] { if _adbcb . _bgeb < _aaad { _aaad = _adbcb . _bgeb ; } ; if _adbcb . _eecc > _dafbd { _dafbd = _adbcb . _eecc ; } ; } ; return _aaad , _dafbd ;
} ; func ( _fffb * textTable ) newTablePara ( ) * textPara { _ecce := _fffb . computeBbox ( ) ; _cfbf := & textPara { PdfRectangle : _ecce , _dcfc : _ecce , _bccg : _fffb } ; if _beae { _ag . Log . Info ( "\u006e\u0065w\u0054\u0061\u0062l\u0065\u0050\u0061\u0072\u0061\u003a\u0020\u0025\u0073" , _cfbf ) ;
} ; return _cfbf ; } ; func _ccede ( _efbdf float64 ) float64 { return _cegd * _b . Round ( _efbdf / _cegd ) } ;
// String returns a description of `b`.
func ( _fgaf * wordBag ) String ( ) string { var _caefb [ ] string ; for _ , _bdbg := range _fgaf . depthIndexes ( ) { _cgfag := _fgaf . _bbf [ _bdbg ] ; for _ , _cff := range _cgfag { _caefb = append ( _caefb , _cff . _bcaa ) ; } ; } ; return _gdf . Sprintf ( "\u0025.\u0032\u0066\u0020\u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065=\u0025\u002e\u0032\u0066\u0020\u0025\u0064\u0020\u0025\u0071" , _fgaf . PdfRectangle , _fgaf . _gaed , len ( _caefb ) , _caefb ) ;
2022-09-23 18:05:51 +00:00
} ;
2022-09-10 15:35:04 +00:00
2022-10-27 19:04:58 +00:00
// Elements returns the TextMarks in `ma`.
func ( _fae * TextMarkArray ) Elements ( ) [ ] TextMark { return _fae . _fceg } ; func ( _dbd * stateStack ) top ( ) * textState { if _dbd . empty ( ) { return nil ; } ; return ( * _dbd ) [ _dbd . size ( ) - 1 ] ; } ; func _effa ( _fegb , _gcfc _bf . PdfRectangle ) _bf . PdfRectangle { return _bf . PdfRectangle { Llx : _b . Min ( _fegb . Llx , _gcfc . Llx ) , Lly : _b . Min ( _fegb . Lly , _gcfc . Lly ) , Urx : _b . Max ( _fegb . Urx , _gcfc . Urx ) , Ury : _b . Max ( _fegb . Ury , _gcfc . Ury ) } ;
} ; func _eceed ( _fegf [ ] * wordBag ) [ ] * wordBag { if len ( _fegf ) <= 1 { return _fegf ; } ; if _cfed { _ag . Log . Info ( "\u006d\u0065\u0072\u0067\u0065\u0057\u006f\u0072\u0064B\u0061\u0067\u0073\u003a" ) ; } ; _f . Slice ( _fegf , func ( _eeff , _debc int ) bool { _abdf , _geggb := _fegf [ _eeff ] , _fegf [ _debc ] ;
_fbad := _abdf . Width ( ) * _abdf . Height ( ) ; _becc := _geggb . Width ( ) * _geggb . Height ( ) ; if _fbad != _becc { return _fbad > _becc ; } ; if _abdf . Height ( ) != _geggb . Height ( ) { return _abdf . Height ( ) > _geggb . Height ( ) ; } ; return _eeff < _debc ; } ) ; var _beag [ ] * wordBag ;
_gbfb := make ( intSet ) ; for _gaa := 0 ; _gaa < len ( _fegf ) ; _gaa ++ { if _gbfb . has ( _gaa ) { continue ; } ; _cgad := _fegf [ _gaa ] ; for _dgec := _gaa + 1 ; _dgec < len ( _fegf ) ; _dgec ++ { if _gbfb . has ( _gaa ) { continue ; } ; _gaae := _fegf [ _dgec ] ; _gggc := _cgad . PdfRectangle ;
_gggc . Llx -= _cgad . _gaed ; if _ceba ( _gggc , _gaae . PdfRectangle ) { _cgad . absorb ( _gaae ) ; _gbfb . add ( _dgec ) ; } ; } ; _beag = append ( _beag , _cgad ) ; } ; if len ( _fegf ) != len ( _beag ) + len ( _gbfb ) { _ag . Log . Error ( "\u006d\u0065\u0072ge\u0057\u006f\u0072\u0064\u0042\u0061\u0067\u0073\u003a \u0025d\u2192%\u0064 \u0061\u0062\u0073\u006f\u0072\u0062\u0065\u0064\u003d\u0025\u0064" , len ( _fegf ) , len ( _beag ) , len ( _gbfb ) ) ;
} ; return _beag ; } ;
// String returns a human readable description of `ss`.
func ( _bcce * shapesState ) String ( ) string { return _gdf . Sprintf ( "\u007b\u0025\u0064\u0020su\u0062\u0070\u0061\u0074\u0068\u0073\u0020\u0066\u0072\u0065\u0073\u0068\u003d\u0025t\u007d" , len ( _bcce . _cfgd ) , _bcce . _ecd ) ; } ; func _badbf ( _fagba map [ float64 ] map [ float64 ] gridTile ) [ ] float64 { _bbaf := make ( [ ] float64 , 0 , len ( _fagba ) ) ;
for _aadb := range _fagba { _bbaf = append ( _bbaf , _aadb ) ; } ; _f . Float64s ( _bbaf ) ; _ceaa := len ( _bbaf ) ; for _gaeef := 0 ; _gaeef < _ceaa / 2 ; _gaeef ++ { _bbaf [ _gaeef ] , _bbaf [ _ceaa - 1 - _gaeef ] = _bbaf [ _ceaa - 1 - _gaeef ] , _bbaf [ _gaeef ] ; } ; return _bbaf ; } ; func _cde ( _bcg _bf . PdfRectangle ) textState { return textState { _gdef : 100 , _egea : RenderModeFill , _dadc : _bcg } ;
} ; func ( _egbc gridTiling ) log ( _acca string ) { if ! _efbc { return ; } ; _ag . Log . Info ( "\u0074i\u006ci\u006e\u0067\u003a\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0071" , len ( _egbc . _fdeac ) , len ( _egbc . _gccf ) , _acca ) ; _gdf . Printf ( "\u0020\u0020\u0020l\u006c\u0078\u003d\u0025\u002e\u0032\u0066\u000a" , _egbc . _fdeac ) ;
_gdf . Printf ( "\u0020\u0020\u0020l\u006c\u0079\u003d\u0025\u002e\u0032\u0066\u000a" , _egbc . _gccf ) ; for _gdge , _dcdf := range _egbc . _gccf { _efdf , _efcec := _egbc . _ffdd [ _dcdf ] ; if ! _efcec { continue ; } ; _gdf . Printf ( "%\u0034\u0064\u003a\u0020\u0025\u0036\u002e\u0032\u0066\u000a" , _gdge , _dcdf ) ;
for _fdag , _aage := range _egbc . _fdeac { _ggbe , _cebfg := _efdf [ _aage ] ; if ! _cebfg { continue ; } ; _gdf . Printf ( "\u0025\u0038\u0064\u003a\u0020\u0025\u0073\u000a" , _fdag , _ggbe . String ( ) ) ; } ; } ; } ; func ( _dbcg * textObject ) moveLP ( _gacc , _gdce float64 ) { _dbcg . _baff . Concat ( _ee . NewMatrix ( 1 , 0 , 0 , 1 , _gacc , _gdce ) ) ;
_dbcg . _gcb = _dbcg . _baff ; } ; func ( _ffagb * textTable ) getDown ( ) paraList { _ccff := make ( paraList , _ffagb . _baee ) ; for _abda := 0 ; _abda < _ffagb . _baee ; _abda ++ { _fafe := _ffagb . get ( _abda , _ffagb . _cabfg - 1 ) . _egec ; if _fafe . taken ( ) { return nil ; } ; _ccff [ _abda ] = _fafe ;
} ; for _bffg := 0 ; _bffg < _ffagb . _baee - 1 ; _bffg ++ { if _ccff [ _bffg ] . _bfdc != _ccff [ _bffg + 1 ] { return nil ; } ; } ; return _ccff ; } ; func ( _bdddd * ruling ) gridIntersecting ( _cgfdc * ruling ) bool { return _cgcd ( _bdddd . _bgeb , _cgfdc . _bgeb ) && _cgcd ( _bdddd . _eecc , _cgfdc . _eecc ) ;
} ; func _cgcd ( _dacg , _gecbf float64 ) bool { return _b . Abs ( _dacg - _gecbf ) <= _deff } ; type textWord struct { _bf . PdfRectangle ; _ecgcg float64 ; _bcaa string ; _gdgg [ ] * textMark ; _fbgge float64 ; _egcbd bool ; } ;
// String returns a string describing `tm`.
func ( _dcg TextMark ) String ( ) string { _befb := _dcg . BBox ; var _bgde string ; if _dcg . Font != nil { _bgde = _dcg . Font . String ( ) ; if len ( _bgde ) > 50 { _bgde = _bgde [ : 50 ] + "\u002e\u002e\u002e" ; } ; } ; var _gccd string ; if _dcg . Meta { _gccd = "\u0020\u002a\u004d\u002a" ;
} ; return _gdf . Sprintf ( "\u007b\u0054\u0065\u0078t\u004d\u0061\u0072\u006b\u003a\u0020\u0025\u0064\u0020%\u0071\u003d\u0025\u0030\u0032\u0078\u0020\u0028\u0025\u0036\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e2\u0066\u0029\u0020\u0028\u00256\u002e\u0032\u0066\u002c\u0020\u0025\u0036\u002e\u0032\u0066\u0029\u0020\u0025\u0073\u0025\u0073\u007d" , _dcg . Offset , _dcg . Text , [ ] rune ( _dcg . Text ) , _befb . Llx , _befb . Lly , _befb . Urx , _befb . Ury , _bgde , _gccd ) ;
} ;
// BBox returns the smallest axis-aligned rectangle that encloses all the TextMarks in `ma`.
func ( _fad * TextMarkArray ) BBox ( ) ( _bf . PdfRectangle , bool ) { var _gfc _bf . PdfRectangle ; _fcga := false ; for _ , _fbeg := range _fad . _fceg { if _fbeg . Meta || _cgcdb ( _fbeg . Text ) { continue ; } ; if _fcga { _gfc = _effa ( _gfc , _fbeg . BBox ) ; } else { _gfc = _fbeg . BBox ;
_fcga = true ; } ; } ; return _gfc , _fcga ; } ;
// TextTable represents a table.
// Cells are ordered top-to-bottom, left-to-right.
// Cells[y] is the (0-offset) y'th row in the table.
// Cells[y][x] is the (0-offset) x'th column in the table.
type TextTable struct { W , H int ; Cells [ ] [ ] TableCell ; } ; func ( _cdcb paraList ) readBefore ( _gdgd [ ] int , _bgccb , _edfa int ) bool { _bedf , _geac := _cdcb [ _bgccb ] , _cdcb [ _edfa ] ; if _ffff ( _bedf , _geac ) && _bedf . Lly > _geac . Lly { return true ; } ; if ! ( _bedf . _dcfc . Urx < _geac . _dcfc . Llx ) { return false ;
} ; _affe , _ggfd := _bedf . Lly , _geac . Lly ; if _affe > _ggfd { _ggfd , _affe = _affe , _ggfd ; } ; _acfd := _b . Max ( _bedf . _dcfc . Llx , _geac . _dcfc . Llx ) ; _fdea := _b . Min ( _bedf . _dcfc . Urx , _geac . _dcfc . Urx ) ; _dbdcc := _cdcb . llyRange ( _gdgd , _affe , _ggfd ) ; for _ , _abfc := range _dbdcc { if _abfc == _bgccb || _abfc == _edfa { continue ;
} ; _cab := _cdcb [ _abfc ] ; if _cab . _dcfc . Llx <= _fdea && _acfd <= _cab . _dcfc . Urx { return false ; } ; } ; return true ; } ; func ( _gef * textObject ) showTextAdjusted ( _fdb * _bg . PdfObjectArray ) error { _dae := false ; for _ , _bdbe := range _fdb . Elements ( ) { switch _bdbe . ( type ) { case * _bg . PdfObjectFloat , * _bg . PdfObjectInteger : _fga , _abca := _bg . GetNumberAsFloat ( _bdbe ) ;
if _abca != nil { _ag . Log . Debug ( "\u0045\u0052\u0052\u004fR\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078t\u0041\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0042\u0061\u0064\u0020\u006e\u0075\u006d\u0065r\u0069\u0063\u0061\u006c\u0020a\u0072\u0067\u002e\u0020\u006f\u003d\u0025\u0073\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _bdbe , _fdb ) ;
return _abca ; } ; _dbfc , _geg := - _fga * 0.001 * _gef . _degf . _adfb , 0.0 ; if _dae { _geg , _dbfc = _dbfc , _geg ; } ; _cebd := _def ( _ee . Point { X : _dbfc , Y : _geg } ) ; _gef . _gcb . Concat ( _cebd ) ; case * _bg . PdfObjectString : _bdga := _bg . TraceToDirectObject ( _bdbe ) ; _ddaa , _cccb := _bg . GetStringBytes ( _bdga ) ;
if ! _cccb { _ag . Log . Trace ( "s\u0068\u006f\u0077\u0054\u0065\u0078\u0074\u0041\u0064j\u0075\u0073\u0074\u0065\u0064\u003a\u0020Ba\u0064\u0020\u0073\u0074r\u0069\u006e\u0067\u0020\u0061\u0072\u0067\u002e\u0020o=\u0025\u0073 \u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _bdbe , _fdb ) ;
return _bg . ErrTypeError ; } ; _gef . renderText ( _bdga , _ddaa ) ; default : _ag . Log . Debug ( "\u0045\u0052\u0052\u004f\u0052\u003a\u0020\u0073\u0068\u006f\u0077\u0054\u0065\u0078\u0074A\u0064\u006a\u0075\u0073\u0074\u0065\u0064\u002e\u0020\u0055\u006e\u0065\u0078p\u0065\u0063\u0074\u0065\u0064\u0020\u0074\u0079\u0070\u0065\u0020\u0028%T\u0029\u0020\u0061\u0072\u0067\u0073\u003d\u0025\u002b\u0076" , _bdbe , _fdb ) ;
return _bg . ErrTypeError ; } ; } ; return nil ; } ; func _acbd ( _ccea bounded ) float64 { return - _ccea . bbox ( ) . Lly } ; func ( _gega * textLine ) pullWord ( _cebg * wordBag , _edde * textWord , _gbgd int ) { _gega . appendWord ( _edde ) ; _cebg . removeWord ( _edde , _gbgd ) ; } ; func ( _bcbcg rulingList ) toGrids ( ) [ ] rulingList { if _efa { _ag . Log . Info ( "t\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0025\u0073" , _bcbcg ) ;
} ; _dgfc := _bcbcg . intersections ( ) ; if _efa { _ag . Log . Info ( "\u0074\u006f\u0047r\u0069\u0064\u0073\u003a \u0076\u0065\u0063\u0073\u003d\u0025\u0064 \u0069\u006e\u0074\u0065\u0072\u0073\u0065\u0063\u0074\u0073\u003d\u0025\u0064\u0020" , len ( _bcbcg ) , len ( _dgfc ) ) ;
for _ , _accd := range _ccagd ( _dgfc ) { _gdf . Printf ( "\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n" , _accd , _dgfc [ _accd ] ) ; } ; } ; _fbgc := make ( map [ int ] intSet , len ( _bcbcg ) ) ; for _abad := range _bcbcg { _dbef := _bcbcg . connections ( _dgfc , _abad ) ; if len ( _dbef ) > 0 { _fbgc [ _abad ] = _dbef ;
} ; } ; if _efa { _ag . Log . Info ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0063\u006fn\u006e\u0065\u0063\u0074s=\u0025\u0064" , len ( _fbgc ) ) ; for _ , _gfca := range _ccagd ( _fbgc ) { _gdf . Printf ( "\u00254\u0064\u003a\u0020\u0025\u002b\u0076\n" , _gfca , _fbgc [ _gfca ] ) ;
} ; } ; _aggg := _ccdef ( len ( _bcbcg ) , func ( _fgde , _cffbg int ) bool { _adbcf , _ffbb := len ( _fbgc [ _fgde ] ) , len ( _fbgc [ _cffbg ] ) ; if _adbcf != _ffbb { return _adbcf > _ffbb ; } ; return _bcbcg . comp ( _fgde , _cffbg ) ; } ) ; if _efa { _ag . Log . Info ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u006f\u0072d\u0065\u0072\u0069\u006eg=\u0025\u0076" , _aggg ) ;
} ; _dfed := [ ] [ ] int { { _aggg [ 0 ] } } ; _dabaf : for _ , _cbgc := range _aggg [ 1 : ] { for _abeb , _afegc := range _dfed { for _ , _aaccb := range _afegc { if _fbgc [ _aaccb ] . has ( _cbgc ) { _dfed [ _abeb ] = append ( _afegc , _cbgc ) ; continue _dabaf ; } ; } ; } ; _dfed = append ( _dfed , [ ] int { _cbgc } ) ;
} ; if _efa { _ag . Log . Info ( "\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020i\u0067\u0072i\u0064\u0073\u003d\u0025\u0076" , _dfed ) ; } ; _f . SliceStable ( _dfed , func ( _bagfc , _ceec int ) bool { return len ( _dfed [ _bagfc ] ) > len ( _dfed [ _ceec ] ) } ) ; for _ , _abfcg := range _dfed { _f . Slice ( _abfcg , func ( _cedg , _ddfd int ) bool { return _bcbcg . comp ( _abfcg [ _cedg ] , _abfcg [ _ddfd ] ) } ) ;
} ; _bcgaf := make ( [ ] rulingList , len ( _dfed ) ) ; for _gedgf , _bgfg := range _dfed { _fafg := make ( rulingList , len ( _bgfg ) ) ; for _bgga , _ccba := range _bgfg { _fafg [ _bgga ] = _bcbcg [ _ccba ] ; } ; _bcgaf [ _gedgf ] = _fafg ; } ; if _efa { _ag . Log . Info ( "\u0074o\u0047r\u0069\u0064\u0073\u003a\u0020g\u0072\u0069d\u0073\u003d\u0025\u002b\u0076" , _bcgaf ) ;
} ; var _dcec [ ] rulingList ; for _ , _bebfe := range _bcgaf { if _geacb , _fage := _bebfe . isActualGrid ( ) ; _fage { _bebfe = _geacb ; _bebfe = _bebfe . snapToGroups ( ) ; _dcec = append ( _dcec , _bebfe ) ; } ; } ; if _efa { _aadc ( "t\u006fG\u0072\u0069\u0064\u0073\u003a\u0020\u0061\u0063t\u0075\u0061\u006c\u0047ri\u0064\u0073" , _dcec ) ;
_ag . Log . Info ( "\u0074\u006f\u0047\u0072\u0069\u0064\u0073\u003a\u0020\u0067\u0072\u0069\u0064\u0073\u003d%\u0064 \u0061\u0063\u0074\u0075\u0061\u006c\u0047\u0072\u0069\u0064\u0073\u003d\u0025\u0064" , len ( _bcgaf ) , len ( _dcec ) ) ; } ; return _dcec ; } ; func ( _bdeec paraList ) yNeighbours ( _cbddd float64 ) map [ * textPara ] [ ] int { _effed := make ( [ ] event , 2 * len ( _bdeec ) ) ;
if _cbddd == 0 { for _faecd , _gdbdf := range _bdeec { _effed [ 2 * _faecd ] = event { _gdbdf . Lly , true , _faecd } ; _effed [ 2 * _faecd + 1 ] = event { _gdbdf . Ury , false , _faecd } ; } ; } else { for _edeeg , _bcffb := range _bdeec { _effed [ 2 * _edeeg ] = event { _bcffb . Lly - _cbddd * _bcffb . fontsize ( ) , true , _edeeg } ;
_effed [ 2 * _edeeg + 1 ] = event { _bcffb . Ury + _cbddd * _bcffb . fontsize ( ) , false , _edeeg } ; } ; } ; return _bdeec . eventNeighbours ( _effed ) ; } ; func ( _edcg * textTable ) markCells ( ) { for _bbdcb := 0 ; _bbdcb < _edcg . _cabfg ; _bbdcb ++ { for _bagc := 0 ; _bagc < _edcg . _baee ;
_bagc ++ { _ceaag := _edcg . get ( _bagc , _bbdcb ) ; if _ceaag != nil { _ceaag . _bggb = true ; } ; } ; } ; } ; type textLine struct { _bf . PdfRectangle ; _dcfd float64 ; _egad [ ] * textWord ; _ddef float64 ; } ; func _egag ( _dcbd , _bdcda _ee . Point ) bool { return _dcbd . X == _bdcda . X && _dcbd . Y == _bdcda . Y } ;
func ( _aadcd * textTable ) growTable ( ) { _eaefg := func ( _bfbce paraList ) { _aadcd . _cabfg ++ ; for _ccfg := 0 ; _ccfg < _aadcd . _baee ; _ccfg ++ { _dfg := _bfbce [ _ccfg ] ; _aadcd . put ( _ccfg , _aadcd . _cabfg - 1 , _dfg ) ; } ; } ; _badcc := func ( _ggge paraList ) { _aadcd . _baee ++ ;
for _bccdd := 0 ; _bccdd < _aadcd . _cabfg ; _bccdd ++ { _debea := _ggge [ _bccdd ] ; _aadcd . put ( _aadcd . _baee - 1 , _bccdd , _debea ) ; } ; } ; if _ddgf { _aadcd . log ( "\u0067r\u006f\u0077\u0054\u0061\u0062\u006ce" ) ; } ; for _bfeag := 0 ; ; _bfeag ++ { _dddgg := false ; _bead := _aadcd . getDown ( ) ;
_aaege := _aadcd . getRight ( ) ; if _ddgf { _gdf . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _bfeag , _aadcd ) ; _gdf . Printf ( "\u0020\u0020 \u0020\u0020\u0020 \u0020\u0064\u006f\u0077\u006e\u003d\u0025\u0073\u000a" , _bead ) ; _gdf . Printf ( "\u0020\u0020 \u0020\u0020\u0020 \u0072\u0069\u0067\u0068\u0074\u003d\u0025\u0073\u000a" , _aaege ) ;
} ; if _bead != nil && _aaege != nil { _febf := _bead [ len ( _bead ) - 1 ] ; if ! _febf . taken ( ) && _febf == _aaege [ len ( _aaege ) - 1 ] { _eaefg ( _bead ) ; if _aaege = _aadcd . getRight ( ) ; _aaege != nil { _badcc ( _aaege ) ; _aadcd . put ( _aadcd . _baee - 1 , _aadcd . _cabfg - 1 , _febf ) ;
} ; _dddgg = true ; } ; } ; if ! _dddgg && _bead != nil { _eaefg ( _bead ) ; _dddgg = true ; } ; if ! _dddgg && _aaege != nil { _badcc ( _aaege ) ; _dddgg = true ; } ; if ! _dddgg { break ; } ; } ; } ; func _bacfa ( _ebba map [ int ] [ ] float64 ) [ ] int { _edfd := make ( [ ] int , len ( _ebba ) ) ; _dfaaf := 0 ;
for _bbbc := range _ebba { _edfd [ _dfaaf ] = _bbbc ; _dfaaf ++ ; } ; _f . Ints ( _edfd ) ; return _edfd ; } ; func ( _baeb rulingList ) aligned ( ) bool { if len ( _baeb ) < 2 { return false ; } ; _dged := make ( map [ * ruling ] int ) ; _dged [ _baeb [ 0 ] ] = 0 ; for _ , _eccdb := range _baeb [ 1 : ] { _dceb := false ;
for _fadg := range _dged { if _eccdb . gridIntersecting ( _fadg ) { _dged [ _fadg ] ++ ; _dceb = true ; break ; } ; } ; if ! _dceb { _dged [ _eccdb ] = 0 ; } ; } ; _dfaf := 0 ; for _ , _cbdc := range _dged { if _cbdc == 0 { _dfaf ++ ; } ; } ; _cbdb := float64 ( _dfaf ) / float64 ( len ( _baeb ) ) ; _fadfb := _cbdb <= 1.0 - _cagg ;
if _efa { _ag . Log . Info ( "\u0061\u006c\u0069\u0067\u006e\u0065\u0064\u003d\u0025\u0074\u0020\u0075\u006em\u0061\u0074\u0063\u0068\u0065\u0064=\u0025\u002e\u0032\u0066\u003d\u0025\u0064\u002f\u0025\u0064\u0020\u0076\u0065c\u0073\u003d\u0025\u0073" , _fadfb , _cbdb , _dfaf , len ( _baeb ) , _baeb . String ( ) ) ;
} ; return _fadfb ; } ; func ( _gdfag * textTable ) subdivide ( ) * textTable { _gdfag . logComposite ( "\u0073u\u0062\u0064\u0069\u0076\u0069\u0064e" ) ; _cecc := _gdfag . compositeRowCorridors ( ) ; _ecbgd := _gdfag . compositeColCorridors ( ) ; if _beae { _ag . Log . Info ( "\u0073u\u0062\u0064i\u0076\u0069\u0064\u0065:\u000a\u0009\u0072o\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s=\u0025\u0073\u000a\t\u0063\u006fl\u0043\u006f\u0072\u0072\u0069\u0064o\u0072\u0073=\u0025\u0073" , _cgbbf ( _cecc ) , _cgbbf ( _ecbgd ) ) ;
} ; if len ( _cecc ) == 0 || len ( _ecbgd ) == 0 { return _gdfag ; } ; _beda ( _cecc ) ; _beda ( _ecbgd ) ; if _beae { _ag . Log . Info ( "\u0073\u0075\u0062\u0064\u0069\u0076\u0069\u0064\u0065\u0020\u0066\u0069\u0078\u0065\u0064\u003a\u000a\u0009r\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073\u003d\u0025\u0073\u000a\u0009\u0063\u006f\u006cC\u006f\u0072\u0072\u0069\u0064\u006f\u0072\u0073\u003d\u0025\u0073" , _cgbbf ( _cecc ) , _cgbbf ( _ecbgd ) ) ;
} ; _cgge , _abcd := _bbgcb ( _gdfag . _cabfg , _cecc ) ; _bgac , _abfca := _bbgcb ( _gdfag . _baee , _ecbgd ) ; _fdba := make ( map [ uint64 ] * textPara , _abfca * _abcd ) ; _gdgde := & textTable { PdfRectangle : _gdfag . PdfRectangle , _acgbd : _gdfag . _acgbd , _cabfg : _abcd , _baee : _abfca , _aagc : _fdba } ;
if _beae { _ag . Log . Info ( "\u0073\u0075b\u0064\u0069\u0076\u0069\u0064\u0065\u003a\u0020\u0063\u006f\u006d\u0070\u006f\u0073\u0069\u0074\u0065\u0020\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0063\u0065\u006c\u006c\u0073\u003d\u0020\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u000a" + "\u0009\u0072\u006f\u0077\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a" + "\u0009\u0063\u006f\u006c\u0043\u006f\u0072\u0072\u0069\u0064\u006f\u0072s\u003d\u0025\u0073\u000a" + "\u0009\u0079\u004f\u0066\u0066\u0073\u0065\u0074\u0073=\u0025\u002b\u0076\u000a" + "\u0009\u0078\u004f\u0066\u0066\u0073\u0065\u0074\u0073\u003d\u0025\u002b\u0076" , _gdfag . _baee , _gdfag . _cabfg , _abfca , _abcd , _cgbbf ( _cecc ) , _cgbbf ( _ecbgd ) , _cgge , _bgac ) ;
} ; for _aaacb := 0 ; _aaacb < _gdfag . _cabfg ; _aaacb ++ { _ddece := _cgge [ _aaacb ] ; for _addeb := 0 ; _addeb < _gdfag . _baee ; _addeb ++ { _bfede := _bgac [ _addeb ] ; if _beae { _gdf . Printf ( "\u0025\u0036\u0064\u002c %\u0032\u0064\u003a\u0020\u0078\u0030\u003d\u0025\u0064\u0020\u0079\u0030\u003d\u0025d\u000a" , _addeb , _aaacb , _bfede , _ddece ) ;
} ; _gage , _egdbc := _gdfag . _bbfb [ _fcbc ( _addeb , _aaacb ) ] ; if ! _egdbc { continue ; } ; _aebg := _gage . split ( _cecc [ _aaacb ] , _ecbgd [ _addeb ] ) ; for _adbg := 0 ; _adbg < _aebg . _cabfg ; _adbg ++ { for _fcad := 0 ; _fcad < _aebg . _baee ; _fcad ++ { _debf := _aebg . get ( _fcad , _adbg ) ;
_gdgde . put ( _bfede + _fcad , _ddece + _adbg , _debf ) ; if _beae { _gdf . Printf ( "\u0025\u0038\u0064\u002c\u0020\u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a" , _bfede + _fcad , _ddece + _adbg , _debf ) ; } ; } ; } ; } ; } ; return _gdgde ; } ; const ( _ffba = 1.0e-6 ; _cegd = 1.0e-4 ;
_ggdc = 10 ; _ffde = 6 ; _bgcc = 0.5 ; _gfdad = 0.12 ; _edge = 0.19 ; _cbgg = 0.04 ; _ggcf = 0.04 ; _cccf = 1.0 ; _aaff = 0.04 ; _cfede = 0.4 ; _bdea = 0.7 ; _efef = 1.0 ; _bee = 0.1 ; _dcca = 1.4 ; _ceda = 0.46 ; _gaca = 0.02 ; _gefd = 0.2 ; _fge = 0.5 ; _feba = 4 ; _cdge = 4.0 ; _gdfd = 6 ; _bbgg = 0.3 ; _fbcc = 0.01 ;
_cfeg = 0.02 ; _agec = 2 ; _bafa = 2 ; _aedf = 500 ; _ffdeb = 4.0 ; _dbgdc = 4.0 ; _cdfc = 0.05 ; _gbef = 0.1 ; _deff = 2.0 ; _dcfe = 2.0 ; _egfc = 1.5 ; _dded = 3.0 ; _cagg = 0.25 ; ) ; const _egc = 20 ; func _debdg ( _gcca _bf . PdfRectangle ) * ruling { return & ruling { _cgef : _beec , _eead : _gcca . Llx , _bgeb : _gcca . Lly , _eecc : _gcca . Ury } ;
} ; func _cgbbf ( _gfeb map [ int ] [ ] float64 ) string { _agdb := _bacfa ( _gfeb ) ; _fcaeb := make ( [ ] string , len ( _gfeb ) ) ; for _aedbg , _ffgf := range _agdb { _fcaeb [ _aedbg ] = _gdf . Sprintf ( "\u0025\u0064\u003a\u0020\u0025\u002e\u0032\u0066" , _ffgf , _gfeb [ _ffgf ] ) ; } ;
return _gdf . Sprintf ( "\u007b\u0025\u0073\u007d" , _d . Join ( _fcaeb , "\u002c\u0020" ) ) ; } ; func ( _bacf * subpath ) clear ( ) { * _bacf = subpath { } } ;
// ExtractText processes and extracts all text data in content streams and returns as a string.
// It takes into account character encodings in the PDF file, which are decoded by
// CharcodeBytesToUnicode.
// Characters that can't be decoded are replaced with MissingCodeRune ('\ufffd' = <20> ).
func ( _be * Extractor ) ExtractText ( ) ( string , error ) { _fac , _ , _ , _dge := _be . ExtractTextWithStats ( ) ; return _fac , _dge ; } ; func _cgcdb ( _bfddc string ) bool { for _ , _cceeg := range _bfddc { if ! _gg . IsSpace ( _cceeg ) { return false ; } ; } ; return true ; } ; func ( _eeeb * textPara ) toCellTextMarks ( _bga * int ) [ ] TextMark { var _gbd [ ] TextMark ;
for _cfdc , _gfeab := range _eeeb . _ecbdg { _cfef := _gfeab . toTextMarks ( _bga ) ; _ceede := _badf && _gfeab . endsInHyphen ( ) && _cfdc != len ( _eeeb . _ecbdg ) - 1 ; if _ceede { _cfef = _acdc ( _cfef , _bga ) ; } ; _gbd = append ( _gbd , _cfef ... ) ; if ! ( _ceede || _cfdc == len ( _eeeb . _ecbdg ) - 1 ) { _gbd = _abbf ( _gbd , _bga , _afed ( _gfeab . _dcfd , _eeeb . _ecbdg [ _cfdc + 1 ] . _dcfd ) ) ;
} ; } ; return _gbd ; } ; func _eag ( _gbgc * Extractor , _fccd * _bf . PdfPageResources , _fbcf _da . GraphicsState , _ebg * textState , _daa * stateStack ) * textObject { return & textObject { _ccae : _gbgc , _caa : _fccd , _fcdg : _fbcf , _afg : _daa , _degf : _ebg , _gcb : _ee . IdentityMatrix ( ) , _baff : _ee . IdentityMatrix ( ) } ;
} ;
// String returns a description of `t`.
func ( _eeege * textTable ) String ( ) string { return _gdf . Sprintf ( "\u0025\u0064\u0020\u0078\u0020\u0025\u0064\u0020\u0025\u0074" , _eeege . _baee , _eeege . _cabfg , _eeege . _acgbd ) ; } ;
// String returns a description of `tm`.
func ( _gfae * textMark ) String ( ) string { return _gdf . Sprintf ( "\u0025\u002e\u0032f \u0066\u006f\u006e\u0074\u0073\u0069\u007a\u0065\u003d\u0025\u002e\u0032\u0066\u0020\u0022\u0025\u0073\u0022" , _gfae . PdfRectangle , _gfae . _abba , _gfae . _eeaf ) ; } ; func ( _acad rulingList ) snapToGroups ( ) rulingList { _dfec , _fedc := _acad . vertsHorzs ( ) ;
if len ( _dfec ) > 0 { _dfec = _dfec . snapToGroupsDirection ( ) ; } ; if len ( _fedc ) > 0 { _fedc = _fedc . snapToGroupsDirection ( ) ; } ; _dggec := append ( _dfec , _fedc ... ) ; _dggec . log ( "\u0073\u006e\u0061p\u0054\u006f\u0047\u0072\u006f\u0075\u0070\u0073" ) ; return _dggec ;
} ; func ( _befc * textTable ) log ( _dfcd string ) { if ! _beae { return ; } ; _ag . Log . Info ( "~\u007e\u007e\u0020\u0025\u0073\u003a \u0025\u0064\u0020\u0078\u0020\u0025d\u0020\u0067\u0072\u0069\u0064\u003d\u0025t\u000a\u0020\u0020\u0020\u0020\u0020\u0020\u0025\u0036\u002e2\u0066" , _dfcd , _befc . _baee , _befc . _cabfg , _befc . _acgbd , _befc . PdfRectangle ) ;
for _eebb := 0 ; _eebb < _befc . _cabfg ; _eebb ++ { for _faaf := 0 ; _faaf < _befc . _baee ; _faaf ++ { _bdefg := _befc . get ( _faaf , _eebb ) ; if _bdefg == nil { continue ; } ; _gdf . Printf ( "%\u0034\u0064\u0020\u00252d\u003a \u0025\u0036\u002e\u0032\u0066 \u0025\u0071\u0020\u0025\u0064\u000a" , _faaf , _eebb , _bdefg . PdfRectangle , _babd ( _bdefg . text ( ) , 50 ) , _a . RuneCountInString ( _bdefg . text ( ) ) ) ;
} ; } ; } ; func ( _cfdbd paraList ) findGridTables ( _cdfb [ ] gridTiling ) [ ] * textTable { if _beae { _ag . Log . Info ( "\u0066i\u006e\u0064\u0047\u0072\u0069\u0064\u0054\u0061\u0062\u006c\u0065s\u003a\u0020\u0025\u0064\u0020\u0070\u0061\u0072\u0061\u0073" , len ( _cfdbd ) ) ;
for _aedba , _eebf := range _cfdbd { _gdf . Printf ( "\u0025\u0034\u0064\u003a\u0020\u0025\u0073\u000a" , _aedba , _eebf ) ; } ; } ; var _cebe [ ] * textTable ; for _ccad , _gefaf := range _cdfb { _cbfc , _debgb := _cfdbd . findTableGrid ( _gefaf ) ; if _cbfc != nil { _cbfc . log ( _gdf . Sprintf ( "\u0066\u0069\u006e\u0064Ta\u0062\u006c\u0065\u0057\u0069\u0074\u0068\u0047\u0072\u0069\u0064\u0073\u003a\u0020%\u0064" , _ccad ) ) ;
_cebe = append ( _cebe , _cbfc ) ; _cbfc . markCells ( ) ; } ; for _ebfea := range _debgb { _ebfea . _bggb = true ; } ; } ; if _beae { _ag . Log . Info ( "\u0066i\u006e\u0064\u0047\u0072i\u0064\u0054\u0061\u0062\u006ce\u0073:\u0020%\u0064\u0020\u0074\u0061\u0062\u006c\u0065s" , len ( _cebe ) ) ;
} ; return _cebe ; } ; type stateStack [ ] * textState ; func ( _gagf * textWord ) toTextMarks ( _cfdg * int ) [ ] TextMark { var _fbgae [ ] TextMark ; for _ , _gfdaf := range _gagf . _gdgg { _fbgae = _affac ( _fbgae , _cfdg , _gfdaf . ToTextMark ( ) ) ; } ; return _fbgae ; } ; func ( _gaee * stateStack ) pop ( ) * textState { if _gaee . empty ( ) { return nil ;
} ; _fbb := * ( * _gaee ) [ len ( * _gaee ) - 1 ] ; * _gaee = ( * _gaee ) [ : len ( * _gaee ) - 1 ] ; return & _fbb ; } ; func ( _eagd * textTable ) put ( _cagd , _bgccbg int , _efcdc * textPara ) { _eagd . _aagc [ _fcbc ( _cagd , _bgccbg ) ] = _efcdc ; } ; func ( _dff * wordBag ) empty ( _gggb int ) bool { _ , _accgge := _dff . _bbf [ _gggb ] ;
return ! _accgge } ; func ( _fgbb * textTable ) compositeRowCorridors ( ) map [ int ] [ ] float64 { _gdbcfg := make ( map [ int ] [ ] float64 , _fgbb . _cabfg ) ; if _beae { _ag . Log . Info ( "c\u006f\u006d\u0070\u006f\u0073\u0069t\u0065\u0052\u006f\u0077\u0043\u006f\u0072\u0072\u0069d\u006f\u0072\u0073:\u0020h\u003d\u0025\u0064" , _fgbb . _cabfg ) ;
} ; for _ebad := 1 ; _ebad < _fgbb . _cabfg ; _ebad ++ { var _fbde [ ] compositeCell ; for _efgb := 0 ; _efgb < _fgbb . _baee ; _efgb ++ { if _gaeff , _ggef := _fgbb . _bbfb [ _fcbc ( _efgb , _ebad ) ] ; _ggef { _fbde = append ( _fbde , _gaeff ) ; } ; } ; if len ( _fbde ) == 0 { continue ; } ; _bddag := _eagab ( _fbde ) ;
_gdbcfg [ _ebad ] = _bddag ; if _beae { _gdf . Printf ( "\u0020\u0020\u0020\u0025\u0032\u0064\u003a\u0020\u00256\u002e\u0032\u0066\u000a" , _ebad , _bddag ) ; } ; } ; return _gdbcfg ; } ; type bounded interface { bbox ( ) _bf . PdfRectangle } ; func ( _acdd * textPara ) writeText ( _fdbf _ed . Writer ) { if _acdd . _bccg == nil { _acdd . writeCellText ( _fdbf ) ;
return ; } ; for _cgag := 0 ; _cgag < _acdd . _bccg . _cabfg ; _cgag ++ { for _bfee := 0 ; _bfee < _acdd . _bccg . _baee ; _bfee ++ { _gacgc := _acdd . _bccg . get ( _bfee , _cgag ) ; if _gacgc == nil { _fdbf . Write ( [ ] byte ( "\u0009" ) ) ; } else { _gacgc . writeCellText ( _fdbf ) ; } ; _fdbf . Write ( [ ] byte ( "\u0020" ) ) ;
} ; if _cgag < _acdd . _bccg . _cabfg - 1 { _fdbf . Write ( [ ] byte ( "\u000a" ) ) ; } ; } ; } ; func ( _cdaeb * textTable ) logComposite ( _ccfaf string ) { if ! _beae { return ; } ; _ag . Log . Info ( "\u007e~\u007eP\u0061\u0072\u0061\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073" , _cdaeb . _baee , _cdaeb . _cabfg , _ccfaf ) ;
_gdf . Printf ( "\u0025\u0035\u0073 \u007c" , "" ) ; for _deebe := 0 ; _deebe < _cdaeb . _baee ; _deebe ++ { _gdf . Printf ( "\u0025\u0033\u0064 \u007c" , _deebe ) ; } ; _gdf . Println ( "" ) ; _gdf . Printf ( "\u0025\u0035\u0073 \u002b" , "" ) ; for _eeab := 0 ; _eeab < _cdaeb . _baee ; _eeab ++ { _gdf . Printf ( "\u0025\u0033\u0073 \u002b" , "\u002d\u002d\u002d" ) ;
} ; _gdf . Println ( "" ) ; for _ggeg := 0 ; _ggeg < _cdaeb . _cabfg ; _ggeg ++ { _gdf . Printf ( "\u0025\u0035\u0064 \u007c" , _ggeg ) ; for _ccbg := 0 ; _ccbg < _cdaeb . _baee ; _ccbg ++ { _beee , _ := _cdaeb . _bbfb [ _fcbc ( _ccbg , _ggeg ) ] . parasBBox ( ) ; _gdf . Printf ( "\u0025\u0033\u0064 \u007c" , len ( _beee ) ) ;
} ; _gdf . Println ( "" ) ; } ; _ag . Log . Info ( "\u007e~\u007eT\u0065\u0078\u0074\u0020\u0025d\u0020\u0078 \u0025\u0064\u0020\u0025\u0073" , _cdaeb . _baee , _cdaeb . _cabfg , _ccfaf ) ; _gdf . Printf ( "\u0025\u0035\u0073 \u007c" , "" ) ; for _fgdbd := 0 ; _fgdbd < _cdaeb . _baee ;
_fgdbd ++ { _gdf . Printf ( "\u0025\u0031\u0032\u0064\u0020\u007c" , _fgdbd ) ; } ; _gdf . Println ( "" ) ; _gdf . Printf ( "\u0025\u0035\u0073 \u002b" , "" ) ; for _cageeg := 0 ; _cageeg < _cdaeb . _baee ; _cageeg ++ { _gdf . Print ( "\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d\u002d-\u002d\u002d\u002d\u002b" ) ;
} ; _gdf . Println ( "" ) ; for _bcceg := 0 ; _bcceg < _cdaeb . _cabfg ; _bcceg ++ { _gdf . Printf ( "\u0025\u0035\u0064 \u007c" , _bcceg ) ; for _fdef := 0 ; _fdef < _cdaeb . _baee ; _fdef ++ { _ddbf , _ := _cdaeb . _bbfb [ _fcbc ( _fdef , _bcceg ) ] . parasBBox ( ) ; _aeaef := "" ; _cafcg := _ddbf . merge ( ) ;
if _cafcg != nil { _aeaef = _cafcg . text ( ) ; } ; _aeaef = _gdf . Sprintf ( "\u0025\u0071" , _babd ( _aeaef , 12 ) ) ; _aeaef = _aeaef [ 1 : len ( _aeaef ) - 1 ] ; _gdf . Printf ( "\u0025\u0031\u0032\u0073\u0020\u007c" , _aeaef ) ; } ; _gdf . Println ( "" ) ; } ; } ; const ( _ceabd = false ; _eccd = false ;
_ddb = false ; _dbcf = false ; _bccf = false ; _dbde = false ; _gfcbc = false ; _dada = false ; _cfed = false ; _bcf = _cfed && true ; _aebf = _bcf && false ; _dbbge = _cfed && true ; _beae = false ; _ddgf = _beae && false ; _fcff = _beae && true ; _efa = false ; _bfga = _efa && false ;
_cecg = _efa && false ; _efbc = _efa && true ; _bded = _efa && false ; _fgad = _efa && false ; ) ; func _geccf ( _facc float64 ) bool { return _b . Abs ( _facc ) < _dcfe } ;
// Len returns the number of TextMarks in `ma`.
func ( _bdge * TextMarkArray ) Len ( ) int { if _bdge == nil { return 0 ; } ; return len ( _bdge . _fceg ) ; } ;
// NewFromContents creates a new extractor from contents and page resources.
func NewFromContents ( contents string , resources * _bf . PdfPageResources ) ( * Extractor , error ) { const _eeb = "\u0065x\u0074\u0072\u0061\u0063t\u006f\u0072\u002e\u004e\u0065w\u0046r\u006fm\u0043\u006f\u006e\u0074\u0065\u006e\u0074s" ; _acg := & Extractor { _fc : contents , _ad : resources , _ac : map [ string ] fontEntry { } , _fd : map [ string ] textResult { } } ;
_cd . TrackUse ( _eeb ) ; return _acg , nil ; } ; func ( _egcf * stateStack ) push ( _fee * textState ) { _cbdd := * _fee ; * _egcf = append ( * _egcf , & _cbdd ) } ; func ( _ffbd * textObject ) moveTextSetLeading ( _cba , _cef float64 ) { _ffbd . _degf . _acfa = - _cef ; _ffbd . moveLP ( _cba , _cef ) ;
} ; func _cedeb ( _egbce [ ] * textMark , _eebe _bf . PdfRectangle ) * textWord { _gegc := _egbce [ 0 ] . PdfRectangle ; _fbcfg := _egbce [ 0 ] . _abba ; for _ , _agga := range _egbce [ 1 : ] { _gegc = _effa ( _gegc , _agga . PdfRectangle ) ; if _agga . _abba > _fbcfg { _fbcfg = _agga . _abba ;
} ; } ; return & textWord { PdfRectangle : _gegc , _gdgg : _egbce , _ecgcg : _eebe . Ury - _gegc . Lly , _fbgge : _fbcfg } ; } ; func _eeee ( _febg _bf . PdfRectangle ) * ruling { return & ruling { _cgef : _ebabd , _eead : _febg . Ury , _bgeb : _febg . Llx , _eecc : _febg . Urx } ; } ; func ( _geca * textLine ) bbox ( ) _bf . PdfRectangle { return _geca . PdfRectangle } ;
func ( _gdfg rulingList ) toTilings ( ) ( rulingList , [ ] gridTiling ) { _gdfg . log ( "\u0074o\u0054\u0069\u006c\u0069\u006e\u0067s" ) ; if len ( _gdfg ) == 0 { return nil , nil ; } ; _gdfg = _gdfg . tidied ( "\u0061\u006c\u006c" ) ; _gdfg . log ( "\u0074\u0069\u0064\u0069\u0065\u0064" ) ;
_abef := _gdfg . toGrids ( ) ; _cafd := make ( [ ] gridTiling , len ( _abef ) ) ; for _acgff , _bfde := range _abef { _cafd [ _acgff ] = _bfde . asTiling ( ) ; } ; return _gdfg , _cafd ; } ; func _cdag ( _fbegd * wordBag , _cffc * textWord , _cdbg float64 ) bool { return _cffc . Llx < _fbegd . Urx + _cdbg && _fbegd . Llx - _cdbg < _cffc . Urx ;
} ; func ( _cfefe * textTable ) toTextTable ( ) TextTable { if _beae { _ag . Log . Info ( "t\u006fT\u0065\u0078\u0074\u0054\u0061\u0062\u006c\u0065:\u0020\u0025\u0064\u0020x \u0025\u0064" , _cfefe . _baee , _cfefe . _cabfg ) ; } ; _adcb := make ( [ ] [ ] TableCell , _cfefe . _cabfg ) ;
for _abebf := 0 ; _abebf < _cfefe . _cabfg ; _abebf ++ { _adcb [ _abebf ] = make ( [ ] TableCell , _cfefe . _baee ) ; for _dafbg := 0 ; _dafbg < _cfefe . _baee ; _dafbg ++ { _eafb := _cfefe . get ( _dafbg , _abebf ) ; if _eafb == nil { continue ; } ; if _beae { _gdf . Printf ( "\u0025\u0034\u0064 \u0025\u0032\u0064\u003a\u0020\u0025\u0073\u000a" , _dafbg , _abebf , _eafb ) ;
} ; _adcb [ _abebf ] [ _dafbg ] . Text = _eafb . text ( ) ; _fcceb := 0 ; _adcb [ _abebf ] [ _dafbg ] . Marks . _fceg = _eafb . toTextMarks ( & _fcceb ) ; } ; } ; return TextTable { W : _cfefe . _baee , H : _cfefe . _cabfg , Cells : _adcb } ; } ; func _efeg ( _faf , _dggg _bf . PdfRectangle ) bool { return _faf . Lly <= _dggg . Ury && _dggg . Lly <= _faf . Ury } ;
func ( _fbcg rulingList ) bbox ( ) _bf . PdfRectangle { var _gfcab _bf . PdfRectangle ; if len ( _fbcg ) == 0 { _ag . Log . Error ( "r\u0075\u006c\u0069\u006e\u0067\u004ci\u0073\u0074\u002e\u0062\u0062\u006f\u0078\u003a\u0020n\u006f\u0020\u0072u\u006ci\u006e\u0067\u0073" ) ;
return _bf . PdfRectangle { } ; } ; if _fbcg [ 0 ] . _cgef == _ebabd { _gfcab . Llx , _gfcab . Urx = _fbcg . secMinMax ( ) ; _gfcab . Lly , _gfcab . Ury = _fbcg . primMinMax ( ) ; } else { _gfcab . Llx , _gfcab . Urx = _fbcg . primMinMax ( ) ; _gfcab . Lly , _gfcab . Ury = _fbcg . secMinMax ( ) ;
} ; return _gfcab ; } ; type textResult struct { _bafb PageText ; _dfcc int ; _bcbb int ; } ; func ( _eecd * shapesState ) devicePoint ( _eaaa , _dbdc float64 ) _ee . Point { _fbfd := _eecd . _efgg . Mult ( _eecd . _egb ) ; _eaaa , _dbdc = _fbfd . Transform ( _eaaa , _dbdc ) ; return _ee . NewPoint ( _eaaa , _dbdc ) ;
} ; func ( _eabe * wordBag ) removeDuplicates ( ) { if _dbbge { _ag . Log . Info ( "r\u0065m\u006f\u0076\u0065\u0044\u0075\u0070\u006c\u0069c\u0061\u0074\u0065\u0073: \u0025\u0071" , _eabe . text ( ) ) ; } ; for _ , _bfdg := range _eabe . depthIndexes ( ) { if len ( _eabe . _bbf [ _bfdg ] ) == 0 { continue ;
} ; _edbec := _eabe . _bbf [ _bfdg ] [ 0 ] ; _cded := _gefd * _edbec . _fbgge ; _dgeef := _edbec . _ecgcg ; for _ , _fdbec := range _eabe . depthBand ( _dgeef , _dgeef + _cded ) { _fgffd := map [ * textWord ] struct { } { } ; _fda := _eabe . _bbf [ _fdbec ] ; for _ , _cecgg := range _fda { if _ , _edgdd := _fgffd [ _cecgg ] ;
_edgdd { continue ; } ; for _ , _cdfg := range _fda { if _ , _cgaga := _fgffd [ _cdfg ] ; _cgaga { continue ; } ; if _cdfg != _cecgg && _cdfg . _bcaa == _cecgg . _bcaa && _b . Abs ( _cdfg . Llx - _cecgg . Llx ) < _cded && _b . Abs ( _cdfg . Urx - _cecgg . Urx ) < _cded && _b . Abs ( _cdfg . Lly - _cecgg . Lly ) < _cded && _b . Abs ( _cdfg . Ury - _cecgg . Ury ) < _cded { _fgffd [ _cdfg ] = struct { } { } ;
} ; } ; } ; if len ( _fgffd ) > 0 { _eadd := 0 ; for _ , _ebabg := range _fda { if _ , _affg := _fgffd [ _ebabg ] ; ! _affg { _fda [ _eadd ] = _ebabg ; _eadd ++ ; } ; } ; _eabe . _bbf [ _fdbec ] = _fda [ : len ( _fda ) - len ( _fgffd ) ] ; if len ( _eabe . _bbf [ _fdbec ] ) == 0 { delete ( _eabe . _bbf , _fdbec ) ;
} ; } ; } ; } ; } ; func ( _eeac rulingList ) findPrimSec ( _gcbe , _efee float64 ) * ruling { for _ , _gdbbd := range _eeac { if _fbga ( _gdbbd . _eead - _gcbe ) && _gdbbd . _bgeb - _deff <= _efee && _efee <= _gdbbd . _eecc + _deff { return _gdbbd ; } ; } ; return nil ; } ;