mirror of
https://github.com/unidoc/unipdf.git
synced 2025-04-26 13:48:55 +08:00

* Added text bounding box extraction. * Add `font` field to textMark struct; Create a new method `TextComponents` to retrieve all the text components of the extracted text in the page, with position and character informations * Reorganizing extractor/text.go * Added a text extraction position test. * Added another text extraction location test. * Text extraction location testing. * Added tests for text extraction with location information. * Cleaned up text extraction tests. No changes to functionality. * Simplifying text extraction code. * Simplified line construction in text.go * Returning TextMark's in TextMarkArray which are based on PdfObjectArray but read-only, so not pointers. * Added text extraction to show PDFs marked-up with bounding boxes of substring in extracted text. * Add comments explaining how to calculate text bounding boxes. * Made text_test.go naming consistent with function comments in text.go * Use tm, pt, tl for textMark/TextMark PageText and TextLine receivers and local variables. * uncommeted text stress test. Use go test --short to skip * TextMark.Offset is now an index into the extracted text. It was an index into []rune(text)
59 lines
1.4 KiB
Go
59 lines
1.4 KiB
Go
/*
|
|
* This file is subject to the terms and conditions defined in
|
|
* file 'LICENSE.md', which is part of this source code package.
|
|
*
|
|
* Based on pdf/contentstream/draw/point.go
|
|
*/
|
|
|
|
package transform
|
|
|
|
import (
|
|
"fmt"
|
|
"math"
|
|
)
|
|
|
|
// Point defines a point (X,Y) in Cartesian coordinates.
|
|
type Point struct {
|
|
X float64
|
|
Y float64
|
|
}
|
|
|
|
// NewPoint returns a Point at `(x,y)`.
|
|
func NewPoint(x, y float64) Point {
|
|
return Point{X: x, Y: y}
|
|
}
|
|
|
|
// Set mutates `p` and sets to coordinates `(x, y)`.
|
|
func (p *Point) Set(x, y float64) {
|
|
p.X, p.Y = x, y
|
|
}
|
|
|
|
// Transform mutates and transforms `p` by the affine transformation a, b, c, d, tx, ty.
|
|
func (p *Point) Transform(a, b, c, d, tx, ty float64) {
|
|
m := NewMatrix(a, b, c, d, tx, ty)
|
|
p.transformByMatrix(m)
|
|
}
|
|
|
|
// Displace returns a new Point at location `p` + `delta`.
|
|
func (p Point) Displace(delta Point) Point {
|
|
return Point{p.X + delta.X, p.Y + delta.Y}
|
|
}
|
|
|
|
// Rotate returns a new Point at `p` rotated by `theta` degrees.
|
|
func (p Point) Rotate(theta float64) Point {
|
|
r := math.Hypot(p.X, p.Y)
|
|
t := math.Atan2(p.Y, p.X)
|
|
sin, cos := math.Sincos(t + theta/180.0*math.Pi)
|
|
return Point{r * cos, r * sin}
|
|
}
|
|
|
|
// transformByMatrix mutates and transforms `p` by the affine transformation `m`.
|
|
func (p *Point) transformByMatrix(m Matrix) {
|
|
p.X, p.Y = m.Transform(p.X, p.Y)
|
|
}
|
|
|
|
// String returns a string describing `p`.
|
|
func (p Point) String() string {
|
|
return fmt.Sprintf("(%.2f,%.2f)", p.X, p.Y)
|
|
}
|