unipdf/extractor/text_utils.go

74 lines
1.6 KiB
Go

/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package extractor
import (
"fmt"
"math"
"path/filepath"
"runtime"
)
// serial is used to add serial numbers to all text* instances.
var serial serialState
// serialState keeps serial number for text* structs.
type serialState struct {
mark int // textMark
word int // textWord
strata int // textStrata
line int // textLine
para int // textPara
}
// reset resets `serial` to all zeros.
func (serial *serialState) reset() {
var empty serialState
*serial = empty
}
// TOL is the tolerance for coordinates to be consideted equal. It is big enough to cover all
// rounding errors and small enough that TOL point differences on a page aren't visible.
const TOL = 1.0e-6
// isZero returns true if x is with TOL of 0.0
func isZero(x float64) bool {
return math.Abs(x) < TOL
}
// minInt return the lesser of `a` and `b`.
func minInt(a, b int) int {
if a < b {
return a
}
return b
}
// maxInt return the greater of `a` and `b`.
func maxInt(a, b int) int {
if a > b {
return a
}
return b
}
// fileLine printed out a file:line string for the caller `skip` levels up the call stack.
func fileLine(skip int, doSecond bool) string {
_, file, line, ok := runtime.Caller(skip + 1)
if !ok {
file = "???"
line = 0
} else {
file = filepath.Base(file)
}
depth := fmt.Sprintf("%s:%-4d", file, line)
if !doSecond {
return depth
}
_, _, line2, _ := runtime.Caller(skip + 2)
return fmt.Sprintf("%s:%-4d", depth, line2)
}