mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-27 13:48:51 +08:00
Insert a space when TJ string is offset by more than a threshold
That threshold is hard-coded as -100 (is this font specific?)
This commit is contained in:
parent
bdf676e045
commit
30ff60a44e
@ -131,8 +131,17 @@ func (this *ContentStreamParser) ExtractText() (string, error) {
|
||||
return "", fmt.Errorf("Invalid parameter type, no array (%T)", op.Params[0])
|
||||
}
|
||||
for _, obj := range *paramList {
|
||||
if strObj, ok := obj.(*PdfObjectString); ok {
|
||||
txt += string(*strObj)
|
||||
switch v := obj.(type) {
|
||||
case *PdfObjectString:
|
||||
txt += string(*v)
|
||||
case *PdfObjectFloat:
|
||||
if *v < -100 {
|
||||
txt += " "
|
||||
}
|
||||
case *PdfObjectInteger:
|
||||
if *v < -100 {
|
||||
txt += " "
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if inText && op.Operand == "Tj" {
|
||||
|
25
pdf/contentstream/contentstream_test.go
Normal file
25
pdf/contentstream/contentstream_test.go
Normal file
@ -0,0 +1,25 @@
|
||||
package contentstream
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestOperandTJSpacing(t *testing.T) {
|
||||
|
||||
content := `BT
|
||||
[(are)-328(h)5(ypothesized)-328(to)-327(in\003uence)-328(the)-328(stability)-328(of)-328(the)-328(upstream)-327(glaciers,)-328(and)-328(thus)-328(of)-328(the)-328(entire)-327(ice)-328(sheet)]TJ
|
||||
ET`
|
||||
referenceText := "are hypothesized to in\003uence the stability of the upstream glaciers, and thus of the entire ice sheet"
|
||||
|
||||
cStreamParser := NewContentStreamParser(content)
|
||||
|
||||
text, err := cStreamParser.ExtractText()
|
||||
if err != nil {
|
||||
t.Error()
|
||||
}
|
||||
|
||||
if text != referenceText {
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user