Peter Williams 9ebcfcf168 Finding bounding boxes of substrings of extracted text. (#109)
* Added text bounding box extraction.
* Add `font` field to textMark struct;
Create a new method `TextComponents` to retrieve all the text components of the extracted text in the page, with position and character informations
* Reorganizing extractor/text.go
* Added a text extraction position test.
* Added another text extraction location test.
* Text extraction location testing.
* Added tests for text extraction with location information.
* Cleaned up text extraction tests. No changes to functionality.
* Simplifying text extraction code.
* Simplified line construction in text.go
* Returning TextMark's in TextMarkArray which are based on PdfObjectArray but read-only, so not pointers.
* Added text extraction to show PDFs marked-up with bounding boxes of substring in extracted text.
* Add comments explaining how to calculate text bounding boxes.
* Made text_test.go naming consistent with function comments in text.go
* Use tm, pt, tl for textMark/TextMark PageText and TextLine receivers and local variables.
* uncommeted text stress test. Use go test --short to skip
* TextMark.Offset is now an index into the extracted text. It was an index into []rune(text)
2019-07-18 06:41:47 +00:00

59 lines
1.4 KiB
Go

/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*
* Based on pdf/contentstream/draw/point.go
*/
package transform
import (
"fmt"
"math"
)
// Point defines a point (X,Y) in Cartesian coordinates.
type Point struct {
X float64
Y float64
}
// NewPoint returns a Point at `(x,y)`.
func NewPoint(x, y float64) Point {
return Point{X: x, Y: y}
}
// Set mutates `p` and sets to coordinates `(x, y)`.
func (p *Point) Set(x, y float64) {
p.X, p.Y = x, y
}
// Transform mutates and transforms `p` by the affine transformation a, b, c, d, tx, ty.
func (p *Point) Transform(a, b, c, d, tx, ty float64) {
m := NewMatrix(a, b, c, d, tx, ty)
p.transformByMatrix(m)
}
// Displace returns a new Point at location `p` + `delta`.
func (p Point) Displace(delta Point) Point {
return Point{p.X + delta.X, p.Y + delta.Y}
}
// Rotate returns a new Point at `p` rotated by `theta` degrees.
func (p Point) Rotate(theta float64) Point {
r := math.Hypot(p.X, p.Y)
t := math.Atan2(p.Y, p.X)
sin, cos := math.Sincos(t + theta/180.0*math.Pi)
return Point{r * cos, r * sin}
}
// transformByMatrix mutates and transforms `p` by the affine transformation `m`.
func (p *Point) transformByMatrix(m Matrix) {
p.X, p.Y = m.Transform(p.X, p.Y)
}
// String returns a string describing `p`.
func (p Point) String() string {
return fmt.Sprintf("(%.2f,%.2f)", p.X, p.Y)
}