diff --git a/annotator/field_appearance.go b/annotator/field_appearance.go
index 07b2d7ef..1a4c12ea 100644
--- a/annotator/field_appearance.go
+++ b/annotator/field_appearance.go
@@ -13,6 +13,7 @@ import (
 
 	"github.com/unidoc/unipdf/v3/common"
 	"github.com/unidoc/unipdf/v3/contentstream"
+	"github.com/unidoc/unipdf/v3/contentstream/draw"
 	"github.com/unidoc/unipdf/v3/core"
 	"github.com/unidoc/unipdf/v3/internal/textencoding"
 	"github.com/unidoc/unipdf/v3/model"
@@ -175,12 +176,14 @@ func genFieldTextAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFieldT
 	width := rect.Width()
 	height := rect.Height()
 
+	var rotation float64
 	if mkDict, has := core.GetDict(wa.MK); has {
 		bsDict, _ := core.GetDict(wa.BS)
 		err := style.applyAppearanceCharacteristics(mkDict, bsDict, nil)
 		if err != nil {
 			return nil, err
 		}
+		rotation, _ = core.GetNumberAsFloat(mkDict.Get("R"))
 	}
 
 	// Get and process the default appearance string (DA) operands.
@@ -192,6 +195,7 @@ func genFieldTextAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFieldT
 	}
 
 	cc := contentstream.NewContentCreator()
+
 	if style.BorderSize > 0 {
 		drawRect(cc, style, width, height)
 	}
@@ -205,6 +209,28 @@ func genFieldTextAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFieldT
 
 	cc.Add_BMC("Tx")
 	cc.Add_q()
+
+	bboxWidth, bboxHeight := width, height
+	if rotation != 0 {
+		// Calculate bounding box before rotation.
+		revRotation := -rotation
+		bbox := draw.Path{Points: []draw.Point{
+			draw.NewPoint(0, 0).Rotate(revRotation),
+			draw.NewPoint(width, 0).Rotate(revRotation),
+			draw.NewPoint(0, height).Rotate(revRotation),
+			draw.NewPoint(width, height).Rotate(revRotation),
+		}}.GetBoundingBox()
+
+		// Update width and height, as the appearance is generated based on
+		// the bounding of the annotation with no rotation.
+		width = bbox.Width
+		height = bbox.Height
+
+		// Apply rotation.
+		cc.RotateDeg(rotation)
+		cc.Translate(bbox.X, bbox.Y)
+	}
+
 	// Graphic state changes.
 	cc.Add_BT()
 
@@ -461,7 +487,7 @@ func genFieldTextAppearance(wa *model.PdfAnnotationWidget, ftxt *model.PdfFieldT
 
 	xform := model.NewXObjectForm()
 	xform.Resources = resources
-	xform.BBox = core.MakeArrayFromFloats([]float64{0, 0, width, height})
+	xform.BBox = core.MakeArrayFromFloats([]float64{0, 0, bboxWidth, bboxHeight})
 	xform.SetContentStream(cc.Bytes(), defStreamEncoder())
 
 	apDict := core.MakeDict()
diff --git a/extractor/README.md b/extractor/README.md
index 0e303708..0f7204ca 100644
--- a/extractor/README.md
+++ b/extractor/README.md
@@ -62,3 +62,54 @@ bruce.pdf for char spacing save/restore.
 
 challenging-modified.pdf
 transitions_test.pdf
+
+
+Code Restructure?
+-----------------
+```
+	type textPara struct {
+		serial             int                // Sequence number for debugging.
+		model.PdfRectangle                    // Bounding box.
+		w, h   int
+		cells []textCell
+	}
+
+	type textCell struct {
+		serial             int                // Sequence number for debugging.
+		model.PdfRectangle                    // Bounding box.
+		eBBox              model.PdfRectangle // Extended bounding box needed to compute reading order.
+		lines              []*textLine        // Paragraph text gets broken into lines.
+	}
+```
+
+  x     x    x      x     x     x
+  x
+  x     x
+  x
+  x     x           x
+  x
+  x
+
+1. Compute all row candidates
+     alignedY  No intervening paras
+2. Compute all column candidates
+     alignedX  No intervening paras
+
+Table candidate
+1. Top row fully populated
+2. Left column fully populated
+3. All cells in table are aligned with 1 top row element and 1 left column candidate
+4. Mininum number of cells must be filled
+
+Computation time
+1. Row candidates  O(N)
+   Sort top to bottom, left to right
+   Search
+2. Column candidates O(N)
+   Sort left to right, top to bottom
+   Search
+3. Find intersections  O(N^2)
+   For each row
+      Find columns that start at row -> table candiates
+   Sort table candidates by w x h descending
+4. Test each candidate O(N^4)
diff --git a/extractor/text.go b/extractor/text.go
index 29638b12..ef607d61 100644
--- a/extractor/text.go
+++ b/extractor/text.go
@@ -22,8 +22,6 @@ import (
 	"github.com/unidoc/unipdf/v3/model"
 )
 
-const verbose = false
-
 // maxFormStack is the maximum form stack recursion depth. It has to be low enough to avoid a stack
 // overflow and high enough to accomodate customers' PDFs
 const maxFormStack = 10
@@ -49,7 +47,7 @@ func (e *Extractor) ExtractTextWithStats() (extracted string, numChars int, numM
 
 // ExtractPageText returns the text contents of `e` (an Extractor for a page) as a PageText.
 func (e *Extractor) ExtractPageText() (*PageText, int, int, error) {
-	pt, numChars, numMisses, err := e.extractPageText(e.contents, e.resources, 0)
+	pt, numChars, numMisses, err := e.extractPageText(e.contents, e.resources, transform.IdentityMatrix(), 0)
 	if err != nil {
 		return nil, numChars, numMisses, err
 	}
@@ -62,7 +60,8 @@ func (e *Extractor) ExtractPageText() (*PageText, int, int, error) {
 // extractPageText returns the text contents of content stream `e` and resouces `resources` as a
 // PageText.
 // This can be called on a page or a form XObject.
-func (e *Extractor) extractPageText(contents string, resources *model.PdfPageResources, level int) (
+func (e *Extractor) extractPageText(contents string, resources *model.PdfPageResources,
+	parentCTM transform.Matrix, level int) (
 	*PageText, int, int, error) {
 	common.Log.Trace("extractPageText: level=%d", level)
 	pageText := &PageText{pageSize: e.mediaBox}
@@ -97,7 +96,7 @@ func (e *Extractor) extractPageText(contents string, resources *model.PdfPageRes
 
 			operand := op.Operand
 
-			if verbose {
+			if verboseGeom {
 				common.Log.Info("&&& op=%s", op)
 			}
 
@@ -106,7 +105,7 @@ func (e *Extractor) extractPageText(contents string, resources *model.PdfPageRes
 				savedStates.push(&state)
 				// common.Log.Info("Save state: stack=%d\n %s", len(savedStates), state.String())
 			case "Q":
-				if verbose {
+				if verboseGeom {
 					common.Log.Info("Restore state: %s", savedStates.String())
 				}
 				if !savedStates.empty() {
@@ -129,7 +128,10 @@ func (e *Extractor) extractPageText(contents string, resources *model.PdfPageRes
 					pageText.marks = append(pageText.marks, to.marks...)
 				}
 				inTextObj = true
-				to = newTextObject(e, resources, gs, &state, &savedStates)
+				graphicsState := gs
+				graphicsState.CTM = parentCTM.Mult(graphicsState.CTM)
+				to = newTextObject(e, resources, graphicsState, &state, &savedStates)
+
 			case "ET": // End Text
 				// End text object, discarding text matrix. If the current
 				// text object contains text marks, they are added to the
@@ -343,8 +345,9 @@ func (e *Extractor) extractPageText(contents string, resources *model.PdfPageRes
 					if formResources == nil {
 						formResources = resources
 					}
+
 					tList, numChars, numMisses, err := e.extractPageText(string(formContent),
-						formResources, level+1)
+						formResources, parentCTM.Mult(gs.CTM), level+1)
 					if err != nil {
 						common.Log.Debug("ERROR: %v", err)
 						return err
@@ -489,8 +492,8 @@ func (to *textObject) setCharSpacing(x float64) {
 		return
 	}
 	to.state.tc = x
-	if verbose {
-		common.Log.Info("setCharSpacing: %.2f state=%s", to.state.String())
+	if verboseGeom {
+		common.Log.Info("setCharSpacing: %.2f state=%s", x, to.state.String())
 	}
 }
 
@@ -758,7 +761,7 @@ func (to *textObject) renderText(data []byte) error {
 	}
 	font := to.getCurrentFont()
 	charcodes := font.BytesToCharcodes(data)
-	runeSlices, numChars, numMisses := font.CharcodesToRuneSlices(charcodes)
+	texts, numChars, numMisses := font.CharcodesToStrings(charcodes)
 	if numMisses > 0 {
 		common.Log.Debug("renderText: numChars=%d numMisses=%d", numChars, numMisses)
 	}
@@ -777,17 +780,20 @@ func (to *textObject) renderText(data []byte) error {
 		spaceMetrics, _ = model.DefaultFont().GetRuneMetrics(' ')
 	}
 	spaceWidth := spaceMetrics.Wx * glyphTextRatio
-	common.Log.Trace("spaceWidth=%.2f text=%q font=%s fontSize=%.1f", spaceWidth, runeSlices, font, tfs)
+	common.Log.Trace("spaceWidth=%.2f text=%q font=%s fontSize=%.2f", spaceWidth, texts, font, tfs)
 
 	stateMatrix := transform.NewMatrix(
 		tfs*th, 0,
 		0, tfs,
 		0, state.trise)
-	if verbose {
-		common.Log.Info("renderText: %d codes=%+v runes=%q", len(charcodes), charcodes, runeSlices)
+	if verboseGeom {
+		common.Log.Info("renderText: %d codes=%+v texts=%q", len(charcodes), charcodes, texts)
 	}
 
-	for i, r := range runeSlices {
+	common.Log.Trace("renderText: %d codes=%+v runes=%q", len(charcodes), charcodes, len(texts))
+
+	for i, text := range texts {
+		r := []rune(text)
 		if len(r) == 1 && r[0] == '\x00' {
 			continue
 		}
@@ -819,7 +825,7 @@ func (to *textObject) renderText(data []byte) error {
 		// t is the displacement of the text cursor when the character is rendered.
 		t0 := transform.Point{X: (c.X*tfs + w) * th}
 		t := transform.Point{X: (c.X*tfs + state.tc + w) * th}
-		if verbose {
+		if verboseGeom {
 			common.Log.Info("tfs=%.2f tc=%.2f tw=%.2f th=%.2f", tfs, state.tc, state.tw, th)
 			common.Log.Info("dx,dy=%.3f t0=%.2f t=%.2f", c, t0, t)
 		}
@@ -830,7 +836,7 @@ func (to *textObject) renderText(data []byte) error {
 		td := translationMatrix(t)
 		end := to.gs.CTM.Mult(to.tm).Mult(td0)
 
-		if verbose {
+		if verboseGeom {
 			common.Log.Info("end:\n\tCTM=%s\n\t tm=%s\n"+
 				"\t td=%s xlat=%s\n"+
 				"\ttd0=%s\n\t → %s xlat=%s",
@@ -865,7 +871,7 @@ func (to *textObject) renderText(data []byte) error {
 
 		// update the text matrix by the displacement of the text location.
 		to.tm.Concat(td)
-		if i != len(runeSlices)-1 {
+		if i != len(texts)-1 {
 			to.logCursor()
 		}
 	}
@@ -908,10 +914,11 @@ func isTextSpace(text string) bool {
 
 // PageText represents the layout of text on a device page.
 type PageText struct {
-	marks     []*textMark // Texts and their positions on a PDF page.
-	viewText  string      // Extracted page text.
-	viewMarks []TextMark  // Public view of `marks`.
-	pageSize  model.PdfRectangle
+	marks      []*textMark        // Texts and their positions on a PDF page.
+	viewText   string             // Extracted page text.
+	viewMarks  []TextMark         // Public view of text marks`.
+	viewTables []TextTable        // Public view of text table`.
+	pageSize   model.PdfRectangle // Page size. Used to calculate depth.
 }
 
 // String returns a string describing `pt`.
@@ -942,6 +949,11 @@ func (pt PageText) Marks() *TextMarkArray {
 	return &TextMarkArray{marks: pt.viewMarks}
 }
 
+// Tables returns the tables extracted from the page.
+func (pt PageText) Tables() []TextTable {
+	return pt.viewTables
+}
+
 // computeViews processes the page TextMarks sorting by position and populates `pt.viewText` and
 // `pt.viewMarks` which represent the text and marks in the order which it is read on the page.
 // The comments above the TextMark definition describe how to use the []TextMark to
@@ -953,6 +965,7 @@ func (pt *PageText) computeViews() {
 	paras.writeText(b)
 	pt.viewText = b.String()
 	pt.viewMarks = paras.toTextMarks()
+	pt.viewTables = paras.toTables()
 }
 
 // TextMarkArray is a collection of TextMarks.
@@ -1119,6 +1132,13 @@ var spaceMark = TextMark{
 	Meta:     true,
 }
 
+// TextTable represents a table.
+// Cells are ordered top-to-bottom, left-to-right.
+type TextTable struct {
+	W, H  int
+	Cells [][]string
+}
+
 // getCurrentFont returns the font on top of the font stack, or DefaultFont if the font stack is
 // empty.
 func (to *textObject) getCurrentFont() *model.PdfFont {
diff --git a/extractor/text_bound.go b/extractor/text_bound.go
index 52b13c0b..16afae4e 100644
--- a/extractor/text_bound.go
+++ b/extractor/text_bound.go
@@ -19,11 +19,11 @@ import (
 var serial serialState
 
 type serialState struct {
-	mark int
-	word int
-	bins int
-	line int
-	para int
+	mark   int
+	word   int
+	strata int
+	line   int
+	para   int
 }
 
 func (serial *serialState) reset() {
@@ -65,15 +65,25 @@ func diffReading(a, b bounded) float64 {
 	return a.bbox().Llx - b.bbox().Llx
 }
 
-// func boundedUnion(objs ...bounded) model.PdfRectangle {
-// 	rect := objs[0].bbox()
-// 	for _, r := range objs[1:] {
-// 		rect = rectUnion(rect, r.bbox())
-// 	}
-// 	return rect
-// }
+func boundedUnion(objs ...bounded) model.PdfRectangle {
+	rect := objs[0].bbox()
+	for _, r := range objs[1:] {
+		rect = rectUnion(rect, r.bbox())
+	}
+	return rect
+}
 
-// diffDepth returns `a` - `b` in the depth direction..
+// rectContainsBounded returns true if `a` contains `b`.
+func rectContainsBounded(a model.PdfRectangle, b bounded) bool {
+	return rectContainsRect(a, b.bbox())
+}
+
+// rectContainsRect returns true if `a` contains `b`.
+func rectContainsRect(a, b model.PdfRectangle) bool {
+	return a.Llx <= b.Llx && b.Urx <= a.Urx && a.Lly <= b.Lly && b.Ury <= a.Ury
+}
+
+// diffDepth returns `a` - `b` in the depth direction.
 func diffDepth(a, b bounded) float64 {
 	return bboxDepth(a) - bboxDepth(b)
 }
@@ -151,3 +161,19 @@ func overlappedXRect(r0, r1 model.PdfRectangle) bool {
 func overlappedYRect(r0, r1 model.PdfRectangle) bool {
 	return (r0.Lly <= r1.Lly && r1.Lly <= r0.Ury) || (r0.Lly <= r1.Ury && r1.Ury <= r0.Ury)
 }
+
+// minInt return the lesser of `a` and `b`.
+func minInt(a, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+// maxInt return the greater of `a` and `b`.
+func maxInt(a, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
diff --git a/extractor/text_const.go b/extractor/text_const.go
index 4f964e1b..c1df77f7 100644
--- a/extractor/text_const.go
+++ b/extractor/text_const.go
@@ -5,8 +5,24 @@
 
 package extractor
 
+// The follow constant configure debugging.
 const (
+	verbose      = false
+	verboseGeom  = false
+	verbosePage  = false
+	verbosePara  = false
+	verboseTable = false
+)
 
+// The following constants control the approaches used in the code.
+const (
+	useTables = true
+	doHyphens = true
+	useEBBox  = false
+)
+
+// The following constants are the tuning parameter for text extracton
+const (
 	// Size of depth bins in points
 	depthBinPoints = 6
 
diff --git a/extractor/text_line.go b/extractor/text_line.go
index 69bf98ed..cb315d66 100644
--- a/extractor/text_line.go
+++ b/extractor/text_line.go
@@ -20,10 +20,12 @@ type textLine struct {
 	model.PdfRectangle             // Bounding box (union of `marks` bounding boxes).
 	depth              float64     // Distance from bottom of line to top of page.
 	words              []*textWord // Words in this line.
-	fontsize           float64
-	hyphenated         bool
+	fontsize           float64     // Largest word font size.
+	hyphenated         bool        // Does line have at least minHyphenation runes and end in a hyphen.
 }
 
+const minHyphenation = 4
+
 // newTextLine creates a line with font and bbox size of `w`, removes `w` from p.bins[bestWordDepthIdx] and adds it to the line
 func newTextLine(p *textStrata, depthIdx int) *textLine {
 	words := p.getStratum(depthIdx)
@@ -60,31 +62,22 @@ func (l *textLine) text() string {
 		}
 	}
 	return strings.Join(words, "")
-
 }
 
 // toTextMarks returns the TextMarks contained in `l`.text().
 // `offset` is used to give the TextMarks the correct Offset values.
 func (l *textLine) toTextMarks(offset *int) []TextMark {
 	var marks []TextMark
-	addMark := func(mark TextMark) {
-		mark.Offset = *offset
-		marks = append(marks, mark)
-		*offset += len(mark.Text)
-	}
-	addSpaceMark := func(spaceChar string) {
-		mark := spaceMark
-		mark.Text = spaceChar
-		addMark(mark)
-	}
 	for _, word := range l.words {
-		for _, tm := range word.marks {
-			addMark(tm.ToTextMark())
-		}
+		wordMarks := word.toTextMarks(offset)
+		marks = append(marks, wordMarks...)
 		if word.spaceAfter {
-			addSpaceMark(" ")
+			marks = appendSpaceMark(marks, offset, " ")
 		}
 	}
+	if len(l.text()) > 0 && len(marks) == 0 {
+		panic(l.text())
+	}
 	return marks
 }
 
@@ -130,16 +123,13 @@ func (l *textLine) mergeWordFragments() {
 	}
 
 	// check for hyphen at end of line
-	runes := []rune(l.text())
-	l.hyphenated = len(runes) >= 4 &&
+	l.hyphenated = isHyphenated(l.text())
+}
+
+// isHyphenated returns true if `text` is a hyphenated word.
+func isHyphenated(text string) bool {
+	runes := []rune(text)
+	return len(runes) >= minHyphenation &&
 		unicode.Is(unicode.Hyphen, runes[len(runes)-1]) &&
 		!unicode.IsSpace(runes[len(runes)-2])
-	// if l.hyphenated {
-	// 	// fmt.Fprintf(os.Stderr, "\n%q ", l.text())
-	// 	common.Log.Info("### %d %q\n\t%q:%t\n\t%q:%t",
-	// 		len(runes), l.text(),
-	// 		runes[len(runes)-1], unicode.Is(unicode.Hyphen, runes[len(runes)-1]),
-	// 		runes[len(runes)-2], !unicode.IsSpace(runes[len(runes)-2]),
-	// 	)
-	// }
 }
diff --git a/extractor/text_mark.go b/extractor/text_mark.go
index b7d9fcf8..f23d3a77 100644
--- a/extractor/text_mark.go
+++ b/extractor/text_mark.go
@@ -21,11 +21,6 @@ type textMark struct {
 	model.PdfRectangle                  // Bounding box.
 	text               string           // The text (decoded via ToUnicode).
 	original           string           // Original text (decoded).
-	orient             int              // The text orientation in degrees. This is the current TRM rounded to 10°.
-	orientedStart      transform.Point  // Left of text in orientation where text is horizontal.
-	orientedEnd        transform.Point  // Right of text in orientation where text is horizontal.
-	height             float64          // Text height.
-	spaceWidth         float64          // Best guess at the width of a space in the font the text was rendered with.
 	font               *model.PdfFont   // The font the mark was drawn with.
 	fontsize           float64          // The font size the mark was drawn with.
 	charspacing        float64          // TODO (peterwilliams97: Should this be exposed in TextMark?
@@ -74,25 +69,20 @@ func (to *textObject) newTextMark(text string, trm transform.Matrix, end transfo
 	bbox = clipped
 
 	tm := textMark{
-		text:          text,
-		orient:        orient,
-		PdfRectangle:  bbox,
-		orientedStart: start.Rotate(theta),
-		orientedEnd:   end.Rotate(theta),
-		height:        math.Abs(height),
-		spaceWidth:    spaceWidth,
-		font:          font,
-		fontsize:      height,
-		charspacing:   charspacing,
-		trm:           trm,
-		end:           end,
-		serial:        serial.mark,
+		text:         text,
+		PdfRectangle: bbox,
+		font:         font,
+		fontsize:     height,
+		charspacing:  charspacing,
+		trm:          trm,
+		end:          end,
+		serial:       serial.mark,
 	}
 	serial.mark++
 	if !isTextSpace(tm.text) && tm.Width() == 0.0 {
 		common.Log.Debug("ERROR: Zero width text. tm=%s", tm.String())
 	}
-	if verbose {
+	if verboseGeom {
 		common.Log.Info("newTextMark: start=%.2f end=%.2f %s", start, end, tm.String())
 	}
 
@@ -110,11 +100,6 @@ func (tm *textMark) bbox() model.PdfRectangle {
 	return tm.PdfRectangle
 }
 
-// Width returns the width of `tm`.text in the text direction.
-func (tm *textMark) Width() float64 {
-	return math.Abs(tm.orientedStart.X - tm.orientedEnd.X)
-}
-
 // ToTextMark returns the public view of `tm`.
 func (tm *textMark) ToTextMark() TextMark {
 	return TextMark{
@@ -127,6 +112,23 @@ func (tm *textMark) ToTextMark() TextMark {
 	}
 }
 
+// appendTextMark appends `mark` to `marks` and updates `offset`, the offset of `mark` in the extracted
+// text.
+func appendTextMark(marks []TextMark, offset *int, mark TextMark) []TextMark {
+	mark.Offset = *offset
+	marks = append(marks, mark)
+	*offset += len(mark.Text)
+	return marks
+}
+
+// appendSpaceMark appends a spaceMark with space character `space` to `marks` and updates `offset`,
+// the offset of `mark` in the extracted text.
+func appendSpaceMark(marks []TextMark, offset *int, spaceChar string) []TextMark {
+	mark := spaceMark
+	mark.Text = spaceChar
+	return appendTextMark(marks, offset, mark)
+}
+
 // nearestMultiple return the integer multiple of `m` that is closest to `x`.
 func nearestMultiple(x float64, m int) int {
 	if m == 0 {
diff --git a/extractor/text_page.go b/extractor/text_page.go
index 2b8d2679..1830dabd 100644
--- a/extractor/text_page.go
+++ b/extractor/text_page.go
@@ -9,16 +9,12 @@ import (
 	"fmt"
 	"io"
 	"math"
-	"unicode"
+	"sort"
 
 	"github.com/unidoc/unipdf/v3/common"
 	"github.com/unidoc/unipdf/v3/model"
 )
 
-// paraList is a sequence of textPara. We use it so often that it is convenient to have its own
-// type so we can have methods on it.
-type paraList []*textPara
-
 // makeTextPage builds a paraList from `marks`, the textMarks on a page.
 func makeTextPage(marks []*textMark, pageSize model.PdfRectangle, rot int) paraList {
 	common.Log.Trace("makeTextPage: %d elements pageSize=%.2f", len(marks), pageSize)
@@ -35,28 +31,21 @@ func makeTextPage(marks []*textMark, pageSize model.PdfRectangle, rot int) paraL
 	for i, para := range paraStratas {
 		paras[i] = composePara(para)
 	}
-	if verbose || true {
-		common.Log.Info("unsorted=========----------=====")
-		for i, para := range paras {
-			common.Log.Info("paras[%d]=%.2f%q", i, para.PdfRectangle, truncate(paras[i].text(), 200))
-		}
-	}
 
+	paras.log("unsorted")
+	// paras.computeEBBoxes()
+
+	if useTables {
+		paras = paras.extractTables()
+	}
+	// paras.log("tables extracted")
 	paras.computeEBBoxes()
-	paras = paras.extractTables()
+	paras.log("EBBoxes 2")
 
 	// Sort the paras into reading order.
 	paras.sortReadingOrder()
-	if verbose || true {
-		common.Log.Info("para sorted in reading order -----------=========")
-		for i, para := range paras {
-			tab := ""
-			if para.table != nil {
-				tab = fmt.Sprintf("[%dx%d]", para.table.w, para.table.h)
-			}
-			fmt.Printf("%4d: %6.2f %s %q\n", i, para.PdfRectangle, tab, truncate(para.text(), 50))
-		}
-	}
+	paras.log("sorted in reading order")
+
 	return paras
 }
 
@@ -72,7 +61,7 @@ func dividePage(page *textStrata, pageHeight float64) []*textStrata {
 	// Some bins are emptied before they iterated to (seee "surving bin" above).
 	// If a `page` survives until it is iterated to then at least one `para` will be built around it.
 
-	if verbose {
+	if verbosePage {
 		common.Log.Info("dividePage")
 	}
 	cnt := 0
@@ -89,7 +78,7 @@ func dividePage(page *textStrata, pageHeight float64) []*textStrata {
 			firstReadingIdx := page.firstReadingIndex(depthIdx)
 			words := page.getStratum(firstReadingIdx)
 			moveWord(firstReadingIdx, page, para, words[0])
-			if verbose {
+			if verbosePage {
 				common.Log.Info("words[0]=%s", words[0].String())
 			}
 
@@ -105,7 +94,7 @@ func dividePage(page *textStrata, pageHeight float64) []*textStrata {
 
 				// Add words that are within maxIntraDepthGap of `para` in the depth direction.
 				// i.e. Stretch para in the depth direction, vertically for English text.
-				if verbose {
+				if verbosePage {
 					common.Log.Info("para depth %.2f - %.2f maxIntraDepthGap=%.2f ",
 						para.minDepth(), para.maxDepth(), maxIntraDepthGap)
 				}
@@ -159,6 +148,9 @@ func dividePage(page *textStrata, pageHeight float64) []*textStrata {
 
 			// Sort the words in `para`'s bins in the reading direction.
 			para.sort()
+			if verbosePage {
+				common.Log.Info("para=%s", para.String())
+			}
 			paraStratas = append(paraStratas, para)
 		}
 	}
@@ -166,40 +158,11 @@ func dividePage(page *textStrata, pageHeight float64) []*textStrata {
 	return paraStratas
 }
 
-const doHyphens = true
-const useTables = true
-
 // writeText writes the text in `paras` to `w`.
 func (paras paraList) writeText(w io.Writer) {
-	for ip, para := range paras {
-		if useTables {
-			para.writeText(w)
-		} else {
-			for il, line := range para.lines {
-				s := line.text()
-				reduced := false
-				if doHyphens {
-					if line.hyphenated && (il != len(para.lines)-1 || ip != len(paras)-1) {
-						// Line ending with hyphen. Remove it.
-						runes := []rune(s)
-						s = string(runes[:len(runes)-1])
-						reduced = true
-					}
-				}
-				w.Write([]byte(s))
-				if reduced {
-					// We removed the hyphen from the end of the line so we don't need a line ending.
-					continue
-				}
-				if il < len(para.lines)-1 && isZero(line.depth-para.lines[il+1].depth) {
-					// Next line is the same depth so it's the same line as this one in the extracted text
-					w.Write([]byte(" "))
-					continue
-				}
-				w.Write([]byte("\n"))
-			}
-			w.Write([]byte("\n"))
-		}
+	for _, para := range paras {
+		para.writeText(w)
+		w.Write([]byte("\n"))
 	}
 }
 
@@ -208,69 +171,35 @@ func (paras paraList) writeText(w io.Writer) {
 func (paras paraList) toTextMarks() []TextMark {
 	offset := 0
 	var marks []TextMark
-	addMark := func(mark TextMark) {
-		mark.Offset = offset
-		marks = append(marks, mark)
-		offset += len(mark.Text)
-	}
-	addSpaceMark := func(spaceChar string) {
-		mark := spaceMark
-		mark.Text = spaceChar
-		addMark(mark)
-	}
-	for ip, para := range paras {
-		if useTables {
-			paraMarks := para.toTextMarks(&offset)
-			marks = append(marks, paraMarks...)
-		} else {
-			for il, line := range para.lines {
-				lineMarks := line.toTextMarks(&offset)
-				marks = append(marks, lineMarks...)
-				reduced := false
-				if doHyphens {
-					if line.hyphenated && (il != len(para.lines)-1 || ip != len(paras)-1) {
-						tm := marks[len(marks)-1]
-						r := []rune(tm.Text)
-						if unicode.IsSpace(r[len(r)-1]) {
-							panic(tm)
-						}
-						if len(r) == 1 {
-							marks = marks[:len(marks)-1]
-							offset = marks[len(marks)-1].Offset + len(marks[len(marks)-1].Text)
-						} else {
-							s := string(r[:len(r)-1])
-							offset += len(s) - len(tm.Text)
-							tm.Text = s
-						}
-						reduced = true
-					}
-				}
-				if reduced {
-					continue
-				}
-				if il != len(para.lines)-1 && isZero(line.depth-para.lines[il+1].depth) {
-					// Next line is the same depth so it's the same line as this one in the extracted text
-					addSpaceMark(" ")
-					continue
-				}
-				addSpaceMark("\n")
-			}
-			if ip != len(paras)-1 {
-				addSpaceMark("\n")
-			}
-		}
+	for _, para := range paras {
+		paraMarks := para.toTextMarks(&offset)
+		marks = append(marks, paraMarks...)
+		marks = appendSpaceMark(marks, &offset, "\n")
 	}
 	return marks
 }
 
+func (paras paraList) toTables() []TextTable {
+	var tables []TextTable
+	for _, para := range paras {
+		if para.table != nil {
+			tables = append(tables, para.table.toTextTable())
+		}
+	}
+	return tables
+}
+
 // sortReadingOrder sorts `paras` in reading order.
 func (paras paraList) sortReadingOrder() {
 	common.Log.Debug("sortReadingOrder: paras=%d ===========x=============", len(paras))
 	if len(paras) <= 1 {
 		return
 	}
+	sort.Slice(paras, func(i, j int) bool { return diffDepthReading(paras[i], paras[j]) <= 0 })
+	paras.log("diffReadingDepth")
 	adj := paras.adjMatrix()
 	order := topoOrder(adj)
+	printAdj(adj)
 	paras.reorder(order)
 }
 
@@ -290,22 +219,23 @@ func (paras paraList) adjMatrix() [][]bool {
 			adj[i][j], reasons[i][j] = paras.before(i, j)
 		}
 	}
-	if verbose && false {
+	if verbosePage {
+		show := func(a *textPara) string {
+			return fmt.Sprintf("%6.2f %q", a.eBBox, truncate(a.text(), 70))
+		}
 		common.Log.Info("adjMatrix =======")
 		for i := 0; i < n; i++ {
 			a := paras[i]
-			fmt.Printf("%4d: %q %.2f\n", i, truncate(a.text(), 50), a.PdfRectangle)
+			fmt.Printf("%4d: %s\n", i, show(a))
 			for j := 0; j < n; j++ {
 				if i == j {
 					continue
 				}
-				if !adj[i][j] {
+				if !adj[i][j] && i != 16 {
 					continue
 				}
 				b := paras[j]
-				fmt.Printf("%8d: %10s %q %.2f\n", j,
-					reasons[i][j], truncate(b.text(), 40), b.PdfRectangle)
-
+				fmt.Printf("%8d: %t %10s %s\n", j, adj[i][j], reasons[i][j], show(b))
 			}
 		}
 	}
@@ -344,7 +274,7 @@ func (paras paraList) before(i, j int) (bool, string) {
 			continue
 		}
 		if overlappedXPara(a, c) && overlappedXPara(c, b) {
-			return false, "Y intervening"
+			return false, fmt.Sprintf("Y intervening: %d: %s", k, c)
 		}
 	}
 	return true, "TO LEFT"
@@ -358,13 +288,21 @@ func overlappedXPara(r0, r1 *textPara) bool {
 
 // computeEBBoxes computes the eBBox fields in the elements of `paras`.
 func (paras paraList) computeEBBoxes() {
-	common.Log.Trace("computeEBBoxes:")
+	if verbose {
+		common.Log.Info("computeEBBoxes:")
+	}
 
-	for i, a := range paras {
-		// [llx, urx] is the reading direction interval for which no paras overlap `a`
+	for _, para := range paras {
+		para.eBBox = para.PdfRectangle
+	}
+
+	for i, aa := range paras {
+		a := aa.eBBox
+		// [llx, urx] is the reading direction interval for which no paras overlap `a`.
 		llx := -1.0e9
 		urx := +1.0e9
-		for j, b := range paras {
+		for j, bb := range paras {
+			b := bb.eBBox
 			if i == j || !(a.Lly <= b.Ury && b.Lly <= a.Ury) {
 				continue
 			}
@@ -385,27 +323,65 @@ func (paras paraList) computeEBBoxes() {
 
 		// Go through all paras below `a` within interval [llx, urx] in the reading direction and
 		// expand `a` as far as possible to left and right without overlapping any of them.
-		a.eBBox = a.PdfRectangle
-		for j, b := range paras {
+
+		for j, bb := range paras {
+			b := bb.eBBox
 			if i == j || b.Ury > a.Lly {
 				continue
 			}
 
 			// If `b` is completely to right of `llx`, extend `a` left to `b`.
 			if llx <= b.Llx {
-				a.eBBox.Llx = math.Min(a.eBBox.Llx, b.Llx)
+				a.Llx = math.Min(a.Llx, b.Llx)
 			}
 
 			// If `b` is completely to left of `urx`, extend `a` right to `b`.
 			if b.Urx <= urx {
-				a.eBBox.Urx = math.Max(a.eBBox.Urx, b.Urx)
+				a.Urx = math.Max(a.Urx, b.Urx)
 			}
 		}
+		if verbose {
+			fmt.Printf("%4d: %6.2f->%6.2f %q\n", i, aa.eBBox, a, truncate(aa.text(), 50))
+		}
+		aa.eBBox = a
+	}
+	if useEBBox {
+		for _, para := range paras {
+			para.PdfRectangle = para.eBBox
+		}
+	}
+}
+
+// printAdj prints `adj` to stdout.
+func printAdj(adj [][]bool) {
+	if !verbosePage {
+		return
+	}
+	common.Log.Info("printAdj:")
+	n := len(adj)
+	fmt.Printf("%3s:", "")
+	for x := 0; x < n; x++ {
+		fmt.Printf("%3d", x)
+	}
+	fmt.Println()
+	for y := 0; y < n; y++ {
+		fmt.Printf("%3d:", y)
+		for x := 0; x < n; x++ {
+			s := ""
+			if adj[y][x] {
+				s = "X"
+			}
+			fmt.Printf("%3s", s)
+		}
+		fmt.Println()
 	}
 }
 
 // topoOrder returns the ordering of the topological sort of the nodes with adjacency matrix `adj`.
 func topoOrder(adj [][]bool) []int {
+	if verbosePage {
+		common.Log.Info("topoOrder:")
+	}
 	n := len(adj)
 	visited := make([]bool, n)
 	var order []int
@@ -427,11 +403,16 @@ func topoOrder(adj [][]bool) []int {
 			sortNode(idx)
 		}
 	}
-	// Order is currently reversed so change it to forward order.
-	for i := 0; i < n/2; i++ {
-		order[i], order[n-1-i] = order[n-1-i], order[i]
+	return reversed(order)
+}
+
+// reversed return `order` reversed.
+func reversed(order []int) []int {
+	rev := make([]int, len(order))
+	for i, v := range order {
+		rev[len(order)-1-i] = v
 	}
-	return order
+	return rev
 }
 
 // reorder reorders `para` to the order in `order`.
diff --git a/extractor/text_para.go b/extractor/text_para.go
index a7d4549c..1384dd67 100644
--- a/extractor/text_para.go
+++ b/extractor/text_para.go
@@ -12,9 +12,14 @@ import (
 	"sort"
 	"unicode"
 
+	"github.com/unidoc/unipdf/v3/common"
 	"github.com/unidoc/unipdf/v3/model"
 )
 
+// paraList is a sequence of textPara. We use it so often that it is convenient to have its own
+// type so we can have methods on it.
+type paraList []*textPara
+
 // textPara is a group of words in a rectangular region of a page that get read together.
 // An peragraph in a document might span multiple pages. This is the paragraph framgent on one page.
 // We start by finding paragraph regions on a page, then we break the words into the textPara into
@@ -22,7 +27,7 @@ import (
 type textPara struct {
 	serial             int                // Sequence number for debugging.
 	model.PdfRectangle                    // Bounding box.
-	eBBox              model.PdfRectangle // Extented ounding box needed to compute reading order.
+	eBBox              model.PdfRectangle // Extended bounding box needed to compute reading order.
 	lines              []*textLine        // Paragraph text gets broken into lines.
 	table              *textTable
 }
@@ -39,8 +44,8 @@ func newTextPara(strata *textStrata) *textPara {
 
 // String returns a description of `p`.
 func (p *textPara) String() string {
-	return fmt.Sprintf("serial=%d %.2f %d lines\n%s\n-------------",
-		p.serial, p.PdfRectangle, len(p.lines), p.text())
+	return fmt.Sprintf("serial=%d %.2f %d lines %q",
+		p.serial, p.PdfRectangle, len(p.lines), truncate(p.text(), 50))
 }
 
 // text returns the text  of the lines in `p`.
@@ -52,47 +57,21 @@ func (p *textPara) text() string {
 
 // writeText writes the text of `p` including tables to `w`.
 func (p *textPara) writeText(w io.Writer) {
-	if p.table != nil {
-		for y := 0; y < p.table.h; y++ {
-			for x := 0; x < p.table.w; x++ {
-				cell := p.table.cells[y*p.table.w+x]
-				cell.writeCellText(w)
-				w.Write([]byte(" "))
-			}
-			w.Write([]byte("\n"))
-		}
-	} else {
+	if p.table == nil {
 		p.writeCellText(w)
-		w.Write([]byte("\n"))
+		return
 	}
-}
-
-// writeCellText writes the text of `p` not including tables to `w`.
-func (p *textPara) writeCellText(w io.Writer) {
-	// w := new(bytes.Buffer)
-	para := p
-	for il, line := range para.lines {
-		s := line.text()
-		reduced := false
-		if doHyphens {
-			if line.hyphenated && il != len(para.lines)-1 {
-				// Line ending with hyphen. Remove it.
-				runes := []rune(s)
-				s = string(runes[:len(runes)-1])
-				reduced = true
+	for y := 0; y < p.table.h; y++ {
+		for x := 0; x < p.table.w; x++ {
+			cell := p.table.get(x, y)
+			if cell == nil {
+				w.Write([]byte("\t"))
+			} else {
+				cell.writeCellText(w)
 			}
-		}
-		w.Write([]byte(s))
-		if reduced {
-			// We removed the hyphen from the end of the line so we don't need a line ending.
-			continue
-		}
-		if il < len(para.lines)-1 && isZero(line.depth-para.lines[il+1].depth) {
-			// Next line is the same depth so it's the same line as this one in the extracted text
 			w.Write([]byte(" "))
-			continue
 		}
-		if il < len(para.lines)-1 {
+		if y < p.table.h-1 {
 			w.Write([]byte("\n"))
 		}
 	}
@@ -101,90 +80,103 @@ func (p *textPara) writeCellText(w io.Writer) {
 // toTextMarks creates the TextMarkArray corresponding to the extracted text created by
 // paras `p`.writeText().
 func (p *textPara) toTextMarks(offset *int) []TextMark {
+	if p.table == nil {
+		return p.toCellTextMarks(offset)
+	}
 	var marks []TextMark
-	addMark := func(mark TextMark) {
-		mark.Offset = *offset
-		marks = append(marks, mark)
-		*offset += len(mark.Text)
-	}
-	addSpaceMark := func(spaceChar string) {
-		mark := spaceMark
-		mark.Text = spaceChar
-		addMark(mark)
-	}
-	if p.table != nil {
-		for y := 0; y < p.table.h; y++ {
-			for x := 0; x < p.table.w; x++ {
-				cell := p.table.cells[y*p.table.w+x]
+	for y := 0; y < p.table.h; y++ {
+		for x := 0; x < p.table.w; x++ {
+			cell := p.table.get(x, y)
+			if cell == nil {
+				marks = appendSpaceMark(marks, offset, "\t")
+			} else {
 				cellMarks := cell.toCellTextMarks(offset)
 				marks = append(marks, cellMarks...)
-				addSpaceMark(" ")
 			}
-			addSpaceMark("\n")
+			marks = appendSpaceMark(marks, offset, " ")
+		}
+		if y < p.table.h-1 {
+			marks = appendSpaceMark(marks, offset, "\n")
 		}
-	} else {
-		marks = p.toCellTextMarks(offset)
-		addSpaceMark("\n")
 	}
 	return marks
 }
 
-// toTextMarks creates the TextMarkArray corresponding to the extracted text created by
+// writeCellText writes the text of `p` not including tables to `w`.
+func (p *textPara) writeCellText(w io.Writer) {
+	for il, line := range p.lines {
+		lineText := line.text()
+		reduced := doHyphens && line.hyphenated && il != len(p.lines)-1
+		if reduced { // Line ending with hyphen. Remove it.
+			lineText = removeLastRune(lineText)
+		}
+		w.Write([]byte(lineText))
+		if !(reduced || il == len(p.lines)-1) {
+			w.Write([]byte(getSpace(line.depth, p.lines[il+1].depth)))
+		}
+	}
+}
+
+// toCellTextMarks creates the TextMarkArray corresponding to the extracted text created by
 // paras `paras`.writeCellText().
 func (p *textPara) toCellTextMarks(offset *int) []TextMark {
 	var marks []TextMark
-	addMark := func(mark TextMark) {
-		mark.Offset = *offset
-		marks = append(marks, mark)
-		*offset += len(mark.Text)
-	}
-	addSpaceMark := func(spaceChar string) {
-		mark := spaceMark
-		mark.Text = spaceChar
-		addMark(mark)
-	}
-	para := p
-
-	for il, line := range para.lines {
+	for il, line := range p.lines {
 		lineMarks := line.toTextMarks(offset)
-		marks = append(marks, lineMarks...)
-		reduced := false
-		if doHyphens {
-			if line.hyphenated && il != len(para.lines)-1 {
-				tm := marks[len(marks)-1]
-				r := []rune(tm.Text)
-				if unicode.IsSpace(r[len(r)-1]) {
-					panic(tm)
-				}
-				if len(r) == 1 {
-					marks = marks[:len(marks)-1]
-					*offset = marks[len(marks)-1].Offset + len(marks[len(marks)-1].Text)
-				} else {
-					s := string(r[:len(r)-1])
-					*offset += len(s) - len(tm.Text)
-					tm.Text = s
-				}
-				reduced = true
+		reduced := doHyphens && line.hyphenated && il != len(p.lines)-1
+		if reduced { // Line ending with hyphen. Remove it.
+			if len([]rune(line.text())) < minHyphenation {
+				panic(line.text())
 			}
+			if len(lineMarks) < 1 {
+				panic(line.text())
+			}
+			lineMarks = removeLastTextMarkRune(lineMarks, offset)
 		}
-		if reduced {
-			continue
-		}
-		if il < len(para.lines)-1 && isZero(line.depth-para.lines[il+1].depth) {
-			// Next line is the same depth so it's the same line as this one in the extracted text
-			addSpaceMark(" ")
-			continue
-		}
-		if il < len(para.lines)-1 {
-			addSpaceMark("\n")
+		marks = append(marks, lineMarks...)
+		if !(reduced || il == len(p.lines)-1) {
+			marks = appendSpaceMark(marks, offset, getSpace(line.depth, p.lines[il+1].depth))
 		}
 	}
-
-	addSpaceMark("\n")
-
 	return marks
 }
 
+func removeLastTextMarkRune(marks []TextMark, offset *int) []TextMark {
+	tm := marks[len(marks)-1]
+	runes := []rune(tm.Text)
+	if unicode.IsSpace(runes[len(runes)-1]) {
+		panic(tm)
+	}
+	if len(runes) == 1 {
+		marks = marks[:len(marks)-1]
+		tm1 := marks[len(marks)-1]
+		*offset = tm1.Offset + len(tm1.Text)
+	} else {
+		text := removeLastRune(tm.Text)
+		*offset += len(text) - len(tm.Text)
+		tm.Text = text
+	}
+	return marks
+}
+
+func removeLastRune(text string) string {
+	runes := []rune(text)
+	if len(runes) < 2 {
+		panic(text)
+	}
+	return string(runes[:len(runes)-1])
+}
+
+// getSpace returns the space to insert between lines of depth `depth1` and `depth2`.
+// Next line is the same depth so it's the same line as this one in the extracted text
+func getSpace(depth1, depth2 float64) string {
+	eol := !isZero(depth1 - depth2)
+	if eol {
+		return "\n"
+	}
+	return " "
+}
+
 // bbox makes textPara implement the `bounded` interface.
 func (p *textPara) bbox() model.PdfRectangle {
 	return p.PdfRectangle
@@ -271,5 +263,42 @@ func composePara(strata *textStrata) *textPara {
 	if len(para.lines) == 0 {
 		panic(para)
 	}
+	if verbosePara {
+		common.Log.Info("!!! para=%s", para.String())
+		for i, line := range para.lines {
+			fmt.Printf("%4d: %s\n", i, line)
+			for j, word := range line.words {
+				fmt.Printf("%8d: %s\n", j, word)
+				for k, mark := range word.marks {
+					fmt.Printf("%12d: %s\n", k, mark)
+				}
+			}
+		}
+	}
 	return para
 }
+
+// log logs the contents of `paras`.
+func (paras paraList) log(title string) {
+	if !verbosePage {
+		return
+	}
+	common.Log.Info("%8s: %d paras =======-------=======", title, len(paras))
+	for i, para := range paras {
+		if para == nil {
+			continue
+		}
+		text := para.text()
+		tabl := "  "
+		if para.table != nil {
+			tabl = fmt.Sprintf("[%dx%d]", para.table.w, para.table.h)
+		}
+		fmt.Printf("%4d: %6.2f %s %q\n", i, para.PdfRectangle, tabl, truncate(text, 50))
+		if len(text) == 0 {
+			panic("empty")
+		}
+		if para.table != nil && len(para.table.cells) == 0 {
+			panic(para)
+		}
+	}
+}
diff --git a/extractor/text_strata.go b/extractor/text_strata.go
index f24070d4..05afa833 100644
--- a/extractor/text_strata.go
+++ b/extractor/text_strata.go
@@ -38,14 +38,14 @@ func makeTextStrata(words []*textWord, pageHeight float64) *textStrata {
 
 // newTextStrata returns an empty textStrata with page height `pageHeight`.
 func newTextStrata(pageHeight float64) *textStrata {
-	bins := textStrata{
-		serial:       serial.bins,
+	strata := textStrata{
+		serial:       serial.strata,
 		bins:         map[int][]*textWord{},
 		PdfRectangle: model.PdfRectangle{Urx: -1.0, Ury: -1.0},
 		pageHeight:   pageHeight,
 	}
-	serial.bins++
-	return &bins
+	serial.strata++
+	return &strata
 }
 
 // String returns a description of `s`.
@@ -57,7 +57,9 @@ func (s *textStrata) String() string {
 			texts = append(texts, w.text())
 		}
 	}
-	return fmt.Sprintf("serial=%d %d %q", s.serial, len(texts), texts)
+	// return fmt.Sprintf("serial=%d %d %q", s.serial, )
+	return fmt.Sprintf("serial=%d %.2f fontsize=%.2f %d %q",
+		s.serial, s.PdfRectangle, s.fontsize, len(texts), texts)
 }
 
 // sort sorts the words in each bin in `s` in the reading direction.
@@ -129,10 +131,24 @@ func (s *textStrata) scanBand(title string, para *textStrata,
 			if !readingOverlap(para, word) {
 				continue
 			}
-			if fontTol > 0 && math.Abs(word.fontsize-fontsize) > fontTol*fontsize {
-				continue
+			fontRatio1 := math.Abs(word.fontsize-fontsize) / fontsize
+			fontRatio2 := word.fontsize / fontsize
+
+			fontRatio := math.Min(fontRatio1, fontRatio2)
+			if fontTol > 0 {
+				if fontRatio > fontTol {
+					continue
+				}
+			}
+			if fontTol <= 0 {
+				panic(fontTol)
 			}
 			if !detectOnly {
+				// if !para.isHomogenous(word) {
+				// 	panic(fmt.Errorf("not homogeneous fontTol=%.2f ratio=%.2f (%.2f->%.2f)\n\tpara=%s\n\tword=%s",
+				// 		fontTol, fontRatio, fontsize, word.fontsize,
+				// 		para.String(), word.String()))
+				// }
 				moveWord(depthIdx, s, para, word)
 			}
 			newWords = append(newWords, word)
@@ -155,11 +171,11 @@ func (s *textStrata) scanBand(title string, para *textStrata,
 	}
 	if verbose {
 		if len(title) > 0 {
-			common.Log.Info("scanBand: %s [%.2f %.2f]->[%.2f %.2f]  para=%.2f",
+			common.Log.Info("scanBand: %s [%.2f %.2f]->[%.2f %.2f] para=%.2f fontsize=%.2f",
 				title,
 				minDepth0, maxDepth0,
 				minDepth, maxDepth,
-				para.PdfRectangle)
+				para.PdfRectangle, para.fontsize)
 			for i, word := range newWords {
 				fmt.Printf("%4d: %s\n", i, word)
 			}
@@ -271,6 +287,36 @@ func moveWord(depthIdx int, page, para *textStrata, word *textWord) {
 	page.removeWord(depthIdx, word)
 }
 
+func (s *textStrata) allWords() []*textWord {
+	var wordList []*textWord
+	for _, words := range s.bins {
+		wordList = append(wordList, words...)
+	}
+	return wordList
+}
+
+func (s *textStrata) isHomogenous(w *textWord) bool {
+	words := s.allWords()
+	words = append(words, w)
+	if len(words) == 0 {
+		return true
+	}
+	minFont := words[0].fontsize
+	maxFont := minFont
+	for _, w := range words {
+		if w.fontsize < minFont {
+			minFont = w.fontsize
+		} else if w.fontsize > maxFont {
+			maxFont = w.fontsize
+		}
+	}
+	if maxFont/minFont > 1.3 {
+		common.Log.Error("font size range: %.2f - %.2f = %.1fx", minFont, maxFont, maxFont/minFont)
+		return false
+	}
+	return true
+}
+
 // removeWord removes `word`from `s`.bins[`depthIdx`].
 // NOTE: We delete bins as soon as they become empty to save code that calls other textStrata
 // functions from having to check for empty bins.
diff --git a/extractor/text_table.go b/extractor/text_table.go
index b04459a6..722fc3d5 100644
--- a/extractor/text_table.go
+++ b/extractor/text_table.go
@@ -17,52 +17,136 @@ import (
 type textTable struct {
 	model.PdfRectangle
 	w, h  int
-	cells cellList
+	cells cellMap
 }
 
-func (t textTable) bbox() model.PdfRectangle {
-	return t.PdfRectangle
+func newTextTable(w, h int) *textTable {
+	return &textTable{w: w, h: h, cells: cellMap{}}
 }
 
+func (t *textTable) String() string {
+	return fmt.Sprintf("[%dx%d] %6.2f", t.w, t.h, t.PdfRectangle)
+}
+
+func (t *textTable) bbox() model.PdfRectangle {
+	rect := model.PdfRectangle{Urx: -1, Ury: -1}
+	for _, cell := range t.cells {
+		if rect.Urx < rect.Llx {
+			rect = cell.PdfRectangle
+		} else {
+			rect = rectUnion(rect, cell.PdfRectangle)
+		}
+	}
+	return rect
+}
+
+func (t *textTable) get(x, y int) *textPara {
+	t.validate(x, y)
+	return t.cells[cellIndex{x, y}]
+}
+func (t *textTable) put(x, y int, cell *textPara) {
+	t.validate(x, y)
+	t.cells[cellIndex{x, y}] = cell
+}
+func (t *textTable) del(x, y int) {
+	t.validate(x, y)
+	delete(t.cells, cellIndex{x, y})
+}
+
+func (t *textTable) validate(x, y int) {
+	if !(0 <= x && x < t.w) {
+		panic(fmt.Errorf("bad x=%d t=%s", x, t))
+	}
+	if !(0 <= y && y < t.h) {
+		panic(fmt.Errorf("bad y=%d t=%s", y, t))
+	}
+}
+
+// fontsize for a table is the minimum font size of the cells.
+func (t *textTable) fontsize() float64 {
+	size := -1.0
+	for _, p := range t.cells {
+		if p != nil {
+			if size < 0 {
+				size = p.fontsize()
+			} else {
+				size = math.Min(size, p.fontsize())
+			}
+		}
+	}
+	return size
+}
+
+func (t *textTable) expand(w, h int) {
+	if w < t.w {
+		panic(w)
+	}
+	if h < t.h {
+		panic(h)
+	}
+	t.w = w
+	t.h = h
+}
+
+// !@#$%
+// w := combo.w
+// 		h := combo.h + t2.h - 1
+// 		common.Log.Info("COMBINE! %dx%d i1=%d i2=%d", w, h, i1, i2)
+// 		combined := make(cellList, w*h)
+// 		for y := 0; y < t1.h; y++ {
+// 			for x := 0; x < w; x++ {
+// 				combined[y*w+x] = combo.cells[y*w+x]
+// 			}
+// 		}
+// 		for y := 1; y < t2.h; y++ {
+// 			yy := y + combo.h - 1
+// 			for x := 0; x < w; x++ {
+// 				combined[yy*w+x] = t2.cells[y*w+x]
+// 			}
+// 		}
+// 		combo.cells = combined
+
+type cellIndex struct{ x, y int }
+
+type cellMap map[cellIndex]*textPara
 type cellList paraList
 
+func (cells cellList) String() string {
+	return fmt.Sprintf("%d %q", len(cells), cells.asStrings())
+}
+
+// bbox returns the union of the bounds of `cells`.
+func (cells cellList) bbox() model.PdfRectangle {
+	rect := cells[0].PdfRectangle
+	for _, r := range cells[1:] {
+		rect = rectUnion(rect, r.PdfRectangle)
+	}
+	return rect
+}
+
 const DBL_MIN, DBL_MAX = -1.0e10, +1.0e10
 
 // extractTables converts the`paras` that are table cells to tables containing those cells.
 func (paras paraList) extractTables() paraList {
 	common.Log.Debug("extractTables=%d ===========x=============", len(paras))
 	if len(paras) < 4 {
-		return nil
+		return paras
 	}
-	show := func(title string) {
-		common.Log.Info("%8s: %d=========----------=====", title, len(paras))
-		for i, para := range paras {
-			text := para.text()
-			tabl := "  "
-			if para.table != nil {
-				tabl = fmt.Sprintf("[%dx%d]", para.table.w, para.table.h)
-			}
-			fmt.Printf("%4d: %6.2f %s %q\n", i, para.PdfRectangle, tabl, truncate(text, 50))
-			if len(text) == 0 {
-				panic("empty")
-			}
-			if para.table != nil && len(para.table.cells) == 0 {
-				panic(para)
-			}
-		}
-	}
-	tables := paras.extractTableAtoms()
-	tables = combineTables(tables)
-	common.Log.Info("combined tables %d ================", len(tables))
-	for i, t := range tables {
-		t.log(fmt.Sprintf("combined %d", i))
-	}
-	// if len(tables) == 0 {panic("NO TABLES")}
-	show("tables extracted")
+
+	cells := cellList(paras)
+	tables := cells.findTables()
+	logTables(tables, "find tables")
+
+	// tables := paras.extractTableAtoms()
+	// logTables(tables, "table atoms")
+	// tables = combineTables(tables)
+	// logTables(tables, "table molecules")
+	// // if len(tables) == 0 {panic("NO TABLES")}
+	// showParas("tables extracted")
 	paras = paras.applyTables(tables)
-	show("tables applied")
+	paras.log("tables applied")
 	paras = paras.trimTables()
-	show("tables trimmed")
+	paras.log("tables trimmed")
 
 	return paras
 }
@@ -71,22 +155,28 @@ func (paras paraList) trimTables() paraList {
 	var recycledParas paraList
 	seen := map[*textPara]bool{}
 	for _, para := range paras {
+		table := para.table
+		if table == nil {
+			continue
+		}
 		for _, p := range paras {
 			if p == para {
 				continue
 			}
-			table := para.table
-			if table != nil && overlapped(table, p) {
-				table.log("REMOVE")
-				for _, cell := range table.cells {
-					if _, ok := seen[cell]; ok {
-						continue
-					}
-					recycledParas = append(recycledParas, cell)
-					seen[cell] = true
-				}
-				para.table.cells = nil
+			if !overlapped(table, p) {
+				continue
 			}
+			// common.Log.Info("overlap REMOVE:\n\ttable=%s\n\t p=%s", table.String(), p.String())
+			table.log("REMOVE")
+			for _, cell := range table.cells {
+				if _, ok := seen[cell]; ok {
+					continue
+				}
+				recycledParas = append(recycledParas, cell)
+				seen[cell] = true
+			}
+			para.table.cells = nil
+
 		}
 	}
 
@@ -99,7 +189,7 @@ func (paras paraList) trimTables() paraList {
 	return recycledParas
 }
 
-func (paras paraList) applyTables(tables []textTable) paraList {
+func (paras paraList) applyTables(tables []*textTable) paraList {
 	// if len(tables) == 0 {panic("no tables")}
 	consumed := map[*textPara]bool{}
 	for _, table := range tables {
@@ -124,278 +214,12 @@ func (paras paraList) applyTables(tables []textTable) paraList {
 			tabled = append(tabled, para)
 		}
 	}
+	if verboseTable {
+		common.Log.Info("applyTables: %d->%d tables=%d", len(paras), len(tabled), len(tables))
+	}
 	return tabled
 }
 
-// extractTableAtome returns all the 2x2 table candidateds in `paras`.
-func (paras paraList) extractTableAtoms() []textTable {
-	// Pre-sort by reading direction then depth
-	sort.Slice(paras, func(i, j int) bool {
-		return diffReadingDepth(paras[i], paras[j]) < 0
-	})
-
-	var llx0, lly0, llx1, lly1 float64
-	var tables []textTable
-
-	for _, para1 := range paras {
-		llx0, lly0 = DBL_MAX, DBL_MIN
-		llx1, lly1 = DBL_MAX, DBL_MIN
-
-		// Build a table fragment of 4 cells
-		//   0 1
-		//   2 3
-		// where
-		//   0 is `para1`
-		//   1 is on the right of 0 and overlaps with 0 in y axis
-		//   2 is under 0 and overlaps with 0 in x axis
-		//   3 is under 1 and on the right of 1 and closest to 0
-		cells := make(cellList, 4)
-		cells[0] = para1
-
-		for _, para2 := range paras {
-			if para1 == para2 {
-				continue
-			}
-			if yOverlap(para1, para2) && toRight(para2, para1) && para2.Llx < llx0 {
-				llx0 = para2.Llx
-				cells[1] = para2
-			} else if xOverlap(para1, para2) && below(para2, para1) && para2.Ury > lly0 {
-				lly0 = para2.Ury
-				cells[2] = para2
-			} else if toRight(para2, para1) && para2.Llx < llx1 && below(para2, para1) && para2.Ury > lly1 {
-				llx1 = para2.Llx
-				lly1 = para2.Ury
-				cells[3] = para2
-			}
-		}
-		// if we found any then look whether they form a table  !@#$
-		if !(cells[1] != nil && cells[2] != nil && cells[3] != nil) {
-			continue
-		}
-		// 1 cannot overlap with 2 in x and y
-		// 3 cannot overlap with 2 in x and with 1 in y
-		// 3 has to overlap with 2 in y and with 1 in x
-
-		if (xOverlap(cells[2], cells[3]) || yOverlap(cells[1], cells[3]) ||
-			xOverlap(cells[1], cells[2]) || yOverlap(cells[1], cells[2])) ||
-			!(xOverlap(cells[1], cells[3]) && yOverlap(cells[2], cells[3])) {
-			continue
-		}
-
-		// common.Log.Info("@@10 ip=%d %s", ip, truncate(para1.text(), 40))
-
-		deltaX := cells.fontsize()
-		deltaY := deltaX
-		//       deltaX *= minColSpacing1;  !@#$
-		//       deltaY *= maxIntraLineDelta;
-		deltaX *= maxIntraReadingGapR
-		deltaY *= lineDepthR
-
-		correspondenceX := cells.alignedX(cells.fontsize() * maxIntraReadingGapR)
-		correspondenceY := cells.alignedY(cells.fontsize() * lineDepthR)
-
-		// are blocks aligned in x and y ?
-		if correspondenceX > 0 && correspondenceY > 0 {
-			table := newTable(cells, 2, 2)
-			tables = append(tables, table)
-			table.log("New textTable")
-			// common.Log.Info("New textTable\n      %6.2f", table.PdfRectangle)
-			// for i, p := range cells {
-			// 	fmt.Printf("%4d: %6.2f %q\n", i, p.PdfRectangle, truncate(p.text(), 50))
-			// }
-		}
-	}
-	return tables
-}
-
-func (table textTable) log(title string) {
-	common.Log.Info("~~~ %s: %s: %d x %d\n      %6.2f", title, fileLine(1, false),
-		table.w, table.h, table.PdfRectangle)
-	for i, p := range table.cells {
-		fmt.Printf("%4d: %6.2f %q\n", i, p.PdfRectangle, truncate(p.text(), 50))
-	}
-}
-
-// 0 1
-// 2 3
-// A B
-// C
-// Extensions:
-//   A[1] == B[0] right
-//   A[2] == C[0] down
-func combineTables(tables []textTable) []textTable {
-	// if len(tables) == 0 {panic("tables")}
-	tablesY := combineTablesY(tables)
-	// if len(tablesY) == 0 {	panic("tablesY")}
-	heightTables := map[int][]textTable{}
-	for _, table := range tablesY {
-		heightTables[table.h] = append(heightTables[table.h], table)
-	}
-	// if len(heightTables) == 0 {panic("heightTables")}
-	var heights []int
-	for h := range heightTables {
-		heights = append(heights, h)
-	}
-	// Try to extend tallest tables to the right
-	sort.Slice(heights, func(i, j int) bool { return heights[i] > heights[j] })
-	// for _, h := range heights {
-	// 	columns := heightTables[h]
-	// 	if len(columns) < 2 {
-	// 		continue
-	// 	}
-	// 	heightTables[h] = combineTablesX(columns)
-	// }
-
-	var combined []textTable
-	for _, h := range heights {
-		combined = append(combined, heightTables[h]...)
-	}
-	for i, table := range combined {
-		table.log(fmt.Sprintf("Combined %d", i))
-	}
-	return combined
-}
-
-func combineTablesY(tables []textTable) []textTable {
-	sort.Slice(tables, func(i, j int) bool { return tables[i].Ury > tables[j].Ury })
-	removed := map[int]bool{}
-
-	var combinedTables []textTable
-	common.Log.Info("combineTablesY ------------------\n\t ------------------")
-	for i1, t1 := range tables {
-		if _, ok := removed[i1]; ok {
-			continue
-		}
-		fontsize := t1.cells.fontsize()
-		c1 := t1.corners()
-		var combo *textTable
-		for i2, t2 := range tables {
-			if _, ok := removed[i2]; ok {
-				continue
-			}
-			if t1.w != t2.w {
-				continue
-			}
-			c2 := t2.corners()
-			if c1[2] != c2[0] {
-				continue
-			}
-			// common.Log.Info("Comparing i1=%d i2=%d", i1, i2)
-			// t1.log("t1")
-			// t2.log("t2")
-			cells := cellList{
-				c1[0], c1[1],
-				c2[2], c2[3],
-			}
-			alX := cells.alignedX(fontsize * maxIntraReadingGapR)
-			alY := cells.alignedY(fontsize * lineDepthR)
-			common.Log.Info("alX=%d alY=%d", alX, alY)
-			if !(alX > 0 && alY > 0) {
-				if combo != nil {
-					combinedTables = append(combinedTables, *combo)
-				}
-				combo = nil
-				continue
-			}
-			if combo == nil {
-				combo = &t1
-				removed[i1] = true
-			}
-
-			w := combo.w
-			h := combo.h + t2.h - 1
-			common.Log.Info("COMBINE! %dx%d", w, h)
-			combined := make(cellList, w*h)
-			for y := 0; y < t1.h; y++ {
-				for x := 0; x < w; x++ {
-					combined[y*w+x] = combo.cells[y*w+x]
-				}
-			}
-			for y := 1; y < t2.h; y++ {
-				yy := y + combo.h - 1
-				for x := 0; x < w; x++ {
-					combined[yy*w+x] = t2.cells[y*w+x]
-				}
-			}
-			combo.cells = combined
-			combo.h = h
-			combo.log("combo")
-			removed[i2] = true
-			fontsize = combo.cells.fontsize()
-			c1 = combo.corners()
-		}
-		if combo != nil {
-			combinedTables = append(combinedTables, *combo)
-		}
-	}
-
-	common.Log.Info("combineTablesY a: combinedTables=%d", len(combinedTables))
-	for i, t := range tables {
-		if _, ok := removed[i]; ok {
-			continue
-		}
-		combinedTables = append(combinedTables, t)
-	}
-	common.Log.Info("combineTablesY b: combinedTables=%d", len(combinedTables))
-
-	return combinedTables
-}
-
-func combineTablesX(tables []textTable) []textTable {
-	sort.Slice(tables, func(i, j int) bool { return tables[i].Llx < tables[j].Llx })
-	removed := map[int]bool{}
-	for i1, t1 := range tables {
-		if _, ok := removed[i1]; ok {
-			continue
-		}
-		fontsize := t1.cells.fontsize()
-		c1 := t1.corners()
-		for i2, t2 := range tables {
-			if _, ok := removed[i2]; ok {
-				continue
-			}
-			if t1.w != t2.w {
-				continue
-			}
-			c2 := t2.corners()
-			if c1[1] != c2[0] {
-				continue
-			}
-			cells := cellList{
-				c1[0], c2[1],
-				c1[2], c2[3],
-			}
-			if !(cells.alignedX(fontsize*maxIntraReadingGapR) > 0 &&
-				cells.alignedY(fontsize*lineDepthR) > 0) {
-				continue
-			}
-			w := t1.w + t2.w
-			h := t1.h
-			combined := make(cellList, w*h)
-			for y := 0; y < h; y++ {
-				for x := 0; x < t1.w; x++ {
-					combined[y*w+x] = t1.cells[y*w+x]
-				}
-				for x := 0; x < t2.w; x++ {
-					xx := x + t1.w
-					combined[y*w+xx] = t1.cells[y*w+x]
-				}
-			}
-			removed[i2] = true
-			fontsize = t1.cells.fontsize()
-			c1 = t1.corners()
-		}
-	}
-	var reduced []textTable
-	for i, t := range tables {
-		if _, ok := removed[i]; ok {
-			continue
-		}
-		reduced = append(reduced, t)
-	}
-	return reduced
-}
-
 func yOverlap(para1, para2 *textPara) bool {
 	//  blk2->yMin <= blk1->yMax &&blk2->yMax >= blk1->yMin
 	return para2.Lly <= para1.Ury && para1.Lly <= para2.Ury
@@ -413,46 +237,46 @@ func below(para2, para1 *textPara) bool {
 	return para2.Ury < para1.Lly
 }
 
-func (paras cellList) cellDepths() []float64 {
-	topF := func(p *textPara) float64 { return p.Ury }
-	botF := func(p *textPara) float64 { return p.Lly }
-	top := paras.calcCellDepths(topF)
-	bottom := paras.calcCellDepths(botF)
-	if len(bottom) < len(top) {
-		return bottom
-	}
-	return top
-}
+// func (paras cellList) cellDepths() []float64 {
+// 	topF := func(p *textPara) float64 { return p.Ury }
+// 	botF := func(p *textPara) float64 { return p.Lly }
+// 	top := paras.calcCellDepths(topF)
+// 	bottom := paras.calcCellDepths(botF)
+// 	if len(bottom) < len(top) {
+// 		return bottom
+// 	}
+// 	return top
+// }
 
-func (paras cellList) calcCellDepths(getY func(*textPara) float64) []float64 {
-	depths := []float64{getY(paras[0])}
-	delta := paras.fontsize() * maxIntraDepthGapR
-	for _, para := range paras {
-		newDepth := true
-		y := getY(para)
-		for _, d := range depths {
-			if math.Abs(d-getY(para)) < delta {
-				newDepth = false
-				break
-			}
-		}
-		if newDepth {
-			depths = append(depths, y)
-		}
-	}
-	return depths
-}
+// func (paras cellList) calcCellDepths(getY func(*textPara) float64) []float64 {
+// 	depths := []float64{getY(paras[0])}
+// 	delta := paras.fontsize() * maxIntraDepthGapR
+// 	for _, para := range paras {
+// 		newDepth := true
+// 		y := getY(para)
+// 		for _, d := range depths {
+// 			if math.Abs(d-getY(para)) < delta {
+// 				newDepth = false
+// 				break
+// 			}
+// 		}
+// 		if newDepth {
+// 			depths = append(depths, y)
+// 		}
+// 	}
+// 	return depths
+// }
 
-func (c *textTable) corners() paraList {
-	w, h := c.w, c.h
+func (t *textTable) __corners() paraList {
+	w, h := t.w, t.h
 	if w == 0 || h == 0 {
-		panic(c)
+		panic(t)
 	}
 	cnrs := paraList{
-		c.cells[0],
-		c.cells[w-1],
-		c.cells[w*(h-1)],
-		c.cells[w*h-1],
+		t.get(0, 0),
+		t.get(w-1, 0),
+		t.get(0, h-1),
+		t.get(w-1, h-1),
 	}
 	for i0, c0 := range cnrs {
 		for _, c1 := range cnrs[:i0] {
@@ -464,38 +288,44 @@ func (c *textTable) corners() paraList {
 	return cnrs
 }
 
-func newTable(cells cellList, w, h int) textTable {
-	if w == 0 || h == 0 {
-		panic("emprty")
-	}
-	for i0, c0 := range cells {
-		for _, c1 := range cells[:i0] {
-			if c0.serial == c1.serial {
-				panic("dup")
-			}
-		}
-	}
-	rect := cells[0].PdfRectangle
-	for _, c := range cells[1:] {
-		rect = rectUnion(rect, c.PdfRectangle)
-	}
-	return textTable{
-		PdfRectangle: rect,
-		w:            w,
-		h:            h,
-		cells:        cells,
-	}
-}
+// func newTable(cells cellList, w, h int) textTable {
+// 	if w == 0 || h == 0 {
+// 		panic("emprty")
+// 	}
+// 	for i0, c0 := range cells {
+// 		for _, c1 := range cells[:i0] {
+// 			if c0.serial == c1.serial {
+// 				panic("dup")
+// 			}
+// 		}
+// 	}
+// 	rect := cells[0].PdfRectangle
+// 	for _, c := range cells[1:] {
+// 		rect = rectUnion(rect, c.PdfRectangle)
+// 	}
+// 	return textTable{
+// 		PdfRectangle: rect,
+// 		w:            w,
+// 		h:            h,
+// 		cells:        cells,
+// 	}
+// }
 
-func (table textTable) newTablePara() *textPara {
-	cells := table.cells
-	sort.Slice(cells, func(i, j int) bool { return diffDepthReading(cells[i], cells[j]) < 0 })
-	table.cells = cells
+func (table *textTable) newTablePara() *textPara {
+	// var cells cellList
+	// for _, cell := range table.cells {
+	// 	if cell != nil {
+	// 		cells = append(cells, cell)
+	// 	}
+	// }
+	// sort.Slice(cells, func(i, j int) bool { return diffDepthReading(cells[i], cells[j]) < 0 })
+	// table.cells = cells
+	bbox := table.bbox()
 	para := textPara{
 		serial:       serial.para,
-		PdfRectangle: table.PdfRectangle,
-		eBBox:        table.PdfRectangle,
-		table:        &table,
+		PdfRectangle: bbox,
+		eBBox:        bbox,
+		table:        table,
 	}
 	table.log(fmt.Sprintf("newTablePara: serial=%d", para.serial))
 
@@ -503,10 +333,28 @@ func (table textTable) newTablePara() *textPara {
 	return &para
 }
 
-func (cells cellList) alignedX(delta float64) int {
+// aligned2x2X return an X alignment score for the 2x2 table atom `cells`.
+func (cells cellList) aligned2x2X(delta float64) int {
+	if len(cells) != 4 {
+		panic(fmt.Errorf("cells=%d", len(cells)))
+	}
 	matches := 0
 	for _, get := range gettersX {
-		if cells.aligned(0, 2, delta, get) && cells.aligned(1, 3, delta, get) {
+		if cells.aligned(get, delta, 0, 2) && cells.aligned(get, delta, 1, 3) {
+			matches++
+		}
+	}
+	return matches
+}
+
+// aligned2x2Y return a Y alignment score for the 2x2 table atom `cells`.
+func (cells cellList) aligned2x2Y(delta float64) int {
+	if len(cells) != 4 {
+		panic(fmt.Errorf("cells=%d", len(cells)))
+	}
+	matches := 0
+	for _, get := range gettersY {
+		if cells.aligned(get, delta, 0, 1) && cells.aligned(get, delta, 2, 3) {
 			matches++
 		}
 	}
@@ -514,23 +362,568 @@ func (cells cellList) alignedX(delta float64) int {
 }
 
 func (cells cellList) alignedY(delta float64) int {
-	matches := 0
-	for _, get := range gettersY {
-		if cells.aligned(0, 1, delta, get) && cells.aligned(2, 3, delta, get) {
-			matches++
+	worstMatches := 100
+	for i := 1; i < len(cells); i++ {
+		matches := 0
+		for _, get := range gettersY {
+			if cells.aligned(get, delta, i-1, i) {
+				matches++
+			}
+		}
+		if matches < worstMatches {
+			worstMatches = matches
 		}
 	}
-	return matches
+	return worstMatches
 }
 
-func (cells cellList) aligned(i, j int, delta float64, get getter) bool {
-	return parasAligned(cells[i], cells[j], delta, get)
+// aligned returns true if `cells` are aligned on attribute `get` for indexes `i` and 'j`.
+func (cells cellList) aligned(get getter, delta float64, i, j int) bool {
+	if !(0 <= i && i < len(cells) && 0 <= j && j < len(cells)) {
+		panic(fmt.Errorf("i=%d j=%d cells=%d", i, j, len(cells)))
+	}
+	return parasAligned(get, delta, cells[i], cells[j])
+}
+
+// parasAligned returns true if `para1` and `para2` are aligned within `delta` for attribute `get`.
+func parasAligned(get getter, delta float64, para1, para2 *textPara) bool {
+	z1 := get(para1)
+	z2 := get(para2)
+	return math.Abs(z1-z2) <= delta
+}
+
+// fontsize for a paraList is the minimum font size of the paras.
+func (paras cellList) fontsize() float64 {
+	size := -1.0
+	for _, p := range paras {
+		if p != nil {
+			if size < 0 {
+				size = p.fontsize()
+			} else {
+				size = math.Min(size, p.fontsize())
+			}
+		}
+	}
+	return size
+}
+
+// insertAt inserts `table` in `t` at `x`, `y`.
+func (t *textTable) insertAt(x, y int, table *textTable) {
+	if !(0 <= x && x < t.w) {
+		panic(fmt.Errorf("x=%d is an invalid insertion for %s", x, t))
+	}
+	if !(0 <= y && y < t.h) {
+		panic(fmt.Errorf("y=%d is an invalid insertion for %s", y, t))
+	}
+	if t.w < x+table.w {
+		panic(fmt.Errorf("x=%d is an invalid insertion for %s", x, t))
+	}
+	if t.h < y+table.h {
+		panic(fmt.Errorf("y=%d is an invalid insertion for %s", y, t))
+	}
+	for idx, cell := range table.cells {
+		idx.x += x
+		idx.y += y
+		t.cells[idx] = cell
+		t.PdfRectangle = rectUnion(t.PdfRectangle, cell.PdfRectangle)
+	}
+}
+
+// subTable returns the `w` x `h` subtable of `t` at 0,0.
+func (t *textTable) subTable(w, h int) *textTable {
+	if !(1 <= w && w <= t.w) {
+		panic(fmt.Errorf("w=%d is an invalid sub-width for %s", w, t))
+	}
+	if !(1 <= h && h <= t.h) {
+		panic(fmt.Errorf("h=%d is an invalid sub-height for %s", h, t))
+	}
+	table := newTextTable(w, h)
+	for y := 0; y < h; y++ {
+		for x := 0; x < w; x++ {
+			cell := t.get(x, y)
+			if cell == nil {
+				continue
+			}
+			table.put(x, y, cell)
+			table.PdfRectangle = rectUnion(table.PdfRectangle, cell.PdfRectangle)
+		}
+	}
+	return table
+}
+
+// row returns the (0-offset) `y`th row in `t`.
+func (t textTable) row(y int) cellList {
+	if !(0 <= y && y < t.h) {
+		panic(fmt.Errorf("y=%d is an invalid row for %s", y, t.String()))
+	}
+	cells := make(cellList, t.w)
+	for x := 0; x < t.w; x++ {
+		cells[x] = t.get(x, y)
+	}
+	return cells
+}
+
+// column returns the (0-offset) `x`th column in `t`.
+func (t textTable) column(x int) cellList {
+	if !(0 <= x && x < t.w) {
+		panic(fmt.Errorf("x=%d is an invalid column for %s", x, t.String()))
+	}
+	cells := make(cellList, t.h)
+	for y := 0; y < t.h; y++ {
+		cells[y] = t.get(x, y)
+	}
+	return cells
+}
+
+// cellSet returns `cells` as a set.
+func (cells cellList) cellSet() map[*textPara]bool {
+	set := map[*textPara]bool{}
+	for _, cell := range cells {
+		set[cell] = true
+	}
+	return set
+}
+
+// overlapRange returns i0, i1 where cells[i0,i1] is the maximum overlap with `other`.
+func (cells cellList) overlapRange(other cellList) (int, int) {
+	i0, i1 := -1, len(cells)
+	for i, c := range cells {
+		if i0 < 0 {
+			if c == other[0] {
+				i0 = i
+			}
+			continue
+		}
+		if i-i0 >= len(other) || c != other[i-i0] {
+			i1 = i
+			break
+		}
+	}
+	if i0 < 0 {
+		panic("no match")
+	}
+	return i0, i1
+}
+
+// toTextTable returns the TextTable corresponding to `t`.
+func (t textTable) toTextTable() TextTable {
+	cells := make([][]string, t.h)
+	for y := 0; y < t.h; y++ {
+		cells[y] = make([]string, t.w)
+		for x := 0; x < t.w; x++ {
+			cell := t.get(x, y)
+			if cell != nil {
+				cells[y][x] = cell.text()
+			}
+		}
+	}
+	return TextTable{W: t.w, H: t.h, Cells: cells}
+}
+
+//
+// Cell sorting
+//
+//   x     x    x      x     x     x
+//   x
+//   x     x
+//   x
+//   x     x           x
+//   x
+//   x
+
+// 1. Compute all row candidates
+//      alignedY  No intervening paras
+// 2. Compute all column candidates
+//      alignedX  No intervening paras
+
+// Table candidate
+// 1. Top row fully populated
+// 2. Left column fully populated
+// 3. All cells in table are aligned with 1 top row element and 1 left column candidate
+// 4. Mininum number of cells must be filled
+
+// Computation time
+// 1. Row candidates  O(N)
+//    Sort top to bottom, left to right
+//    Search
+// 2. Column candidates O(N)
+//    Sort left to right, top to bottom
+//    Search
+// 3. Find intersections  O(N^2)
+//    For each row
+//       Find columns that start at row -> table candiates
+//    Sort table candidates by w x h descending
+// 4. Test each candidate O(N^4)
+
+func (cells cellList) findTables() []*textTable {
+	if verboseTable {
+		common.Log.Info("findTables @@1: cells=%d", len(cells))
+	}
+
+	cols := cells.findGetterCandidates(getXLl, maxIntraReadingGapR, false)
+	rows := cells.findGetterCandidates(getYUr, lineDepthR, true)
+	sortContents(getYUr, true, cols)
+	sortContents(getXLl, false, rows)
+	if verboseTable {
+		common.Log.Info("findTables @@2: cols=%d rows=%d", len(cols), len(rows))
+	}
+	if len(cols) == 0 || len(rows) == 0 {
+		return nil
+	}
+
+	tables := cells.findTableCandidates(cols, rows)
+	logTables(tables, "candidates")
+	tables = removeDuplicateTables((tables))
+	logTables(tables, "distinct")
+	return tables
+}
+
+func removeDuplicateTables(tables []*textTable) []*textTable {
+	if len(tables) == 0 {
+		return nil
+	}
+	sort.Slice(tables, func(i, j int) bool {
+		ti, tj := tables[i], tables[j]
+		ai, aj := ti.w*ti.h, tj.w*tj.h
+		if ai != aj {
+			return ai > aj
+		}
+		return ti.Ury > tj.Ury
+	})
+	distinct := []*textTable{tables[0]}
+	tables[0].log("removeDuplicateTables 0")
+outer:
+	for _, t := range tables[1:] {
+		for _, d := range distinct {
+			if overlapped(t, d) {
+				continue outer
+			}
+		}
+		t.log("removeDuplicateTables x")
+		distinct = append(distinct, t)
+	}
+	return distinct
+}
+
+func (cells cellList) findTableCandidates(cols, rows []cellList) []*textTable {
+	if verboseTable {
+		common.Log.Info("findTableCandidates: cols=%d rows=%d\n\tcols=%s\n\trows=%s",
+			len(cols), len(rows), cols[0].String(), rows[0].String())
+	}
+
+	var candidates [][2]cellList
+	for _, col := range cols {
+		for _, row := range rows {
+			col2, row2 := makeCandidate(col, row)
+			if col2 != nil && len(col2) >= 2 && len(row2) >= 2 {
+				candidates = append(candidates, [2]cellList{col2, row2})
+			}
+		}
+	}
+	sort.Slice(candidates, func(i, j int) bool {
+		ci, cj := candidates[i], candidates[j]
+		ai := len(ci[0]) * len(ci[1])
+		aj := len(cj[0]) * len(cj[1])
+		if ai == 0 || aj == 0 {
+			panic("emprty")
+		}
+		if ai != aj {
+			return ai > aj
+		}
+		return i < j
+	})
+	var tables []*textTable
+	for i, cand := range candidates {
+		col, row := cand[0], cand[1]
+		if verboseTable {
+			fmt.Printf("%8d: findTableCandidates: col=%2d %6.2f row=%2d %6.2f\n\tcol=%s\n\trow=%s\n",
+				i, len(col), col.bbox(), len(row), row.bbox(), col.asStrings(), row.asStrings())
+		}
+
+		if col.equals(row) {
+			// panic(fmt.Errorf("columns can't be rows\n\tcol=%6.2f %q\n\trow=%6.2f %q",
+			// 	col.bbox(), col.asStrings(), row.bbox(), row.asStrings()))
+			// common.Log.Error("columns can't be rows\n\tcol=%6.2f %q\n\trow=%6.2f %q",
+			// 	col.bbox(), col.asStrings(), row.bbox(), row.asStrings())
+			continue
+		}
+		if len(col) == 0 || len(row) == 0 {
+			panic("emmmpty")
+		}
+		boundary := append(row, col...).bbox()
+
+		subset := cells.within(boundary)
+		table := subset.validTable(col, row)
+		// fmt.Printf("%12s boundary=%6.2f subset=%3d=%6.2f valid=%t\n", "",
+		// 	boundary, len(subset), subset.bbox(), table != nil)
+		if table != nil {
+			table.log("VALID!!")
+			tables = append(tables, table)
+		}
+	}
+	return tables
+}
+
+// within returns the elements of `cells` that are within `boundary`.
+func (cells cellList) within(boundary model.PdfRectangle) cellList {
+	var subset cellList
+	for _, cell := range cells {
+		if rectContainsBounded(boundary, cell) {
+			subset = append(subset, cell)
+		}
+	}
+	return subset
+}
+
+func makeCandidate(col, row cellList) (cellList, cellList) {
+	var col1, row1 cellList
+	for i, c := range col {
+		if c == row[0] {
+			col1 = col[i:]
+			row1 = row
+			break
+		}
+	}
+	var col2, row2 cellList
+	for i, c := range row {
+		if c == col[0] {
+			col2 = col
+			row2 = row[i:]
+			break
+		}
+	}
+	if col1 != nil && col2 != nil {
+		if len(col1)*len(row1) >= len(col2)*len(row2) {
+			return col1, row1
+		}
+		return col2, row2
+	}
+	if col1 != nil {
+		return col1, row1
+	}
+	return col2, row2
+}
+
+// validTable returns a sparse table containing `cells`if `cells` make up a valid table with `col`
+// on its left and `row` on its top.
+// nil is returned if there is no valid table
+func (cells cellList) validTable(col, row cellList) *textTable {
+	w, h := len(row), len(col)
+	if col.equals(row) {
+		panic("columns can't be rows")
+	}
+	if col[0] != row[0] {
+		panic("bad intersection")
+	}
+	if verboseTable {
+		common.Log.Info("validTable: w=%d h=%d cells=%d", w, h, len(cells))
+	}
+
+	table := newTextTable(w, h)
+	for x, cell := range row {
+		table.put(x, 0, cell)
+	}
+	for y, cell := range col {
+		table.put(0, y, cell)
+	}
+	fontsize := table.fontsize()
+	for i, cell := range cells {
+		y := col.getAlignedIndex(getYUr, fontsize*lineDepthR, cell)
+		x := row.getAlignedIndex(getXLl, fontsize*maxIntraReadingGapR, cell)
+		if x < 0 || y < 0 {
+			if verboseTable {
+				common.Log.Error("bad element: x=%d y=%d cell=%s", x, y, cell.String())
+			}
+			return nil
+		}
+		if verboseTable {
+			fmt.Printf("%4d: y=%d x=%d %q\n", i, y, x, truncate(cell.text(), 50))
+		}
+		table.put(x, y, cell)
+		fontsize = table.fontsize()
+	}
+
+	w, h = table.maxDense()
+	if verboseTable {
+		common.Log.Info("maxDense: w=%d h=%d", w, h)
+	}
+	if w < 0 {
+		return nil
+	}
+	return table.subTable(w, h)
+}
+
+func (t *textTable) maxDense() (int, int) {
+	var product [][2]int
+	for h := 2; h <= t.h; h++ {
+		for w := 2; w <= t.w; w++ {
+			product = append(product, [2]int{w, h})
+		}
+	}
+	if len(product) == 0 {
+		return -1, -1
+	}
+	sort.Slice(product, func(i, j int) bool {
+		pi, pj := product[i], product[j]
+		ai := pi[0] * pi[1]
+		aj := pj[0] * pj[1]
+		if ai != aj {
+			return ai > aj
+		}
+		if pi[1] != pj[1] {
+			return pi[1] > pj[1]
+		}
+		return i < j
+	})
+	for i, p := range product {
+		w, h := p[0], p[1]
+		dense, reason := t.isDense(w, h)
+		if verboseTable {
+			fmt.Printf("%d: isDense w=%d h=%d dense=%5t %s\n", i, w, h, dense, reason)
+		}
+		if dense {
+			return w, h
+		}
+	}
+	return -1, -1
+}
+
+func (t *textTable) isDense(w, h int) (bool, string) {
+	minOccRow := 2
+	minOccCol := 2
+	minOccR := 0.3
+
+	count := 0
+	for x := 0; x < w; x++ {
+		n := t.column(x).count()
+		if n < minOccCol {
+			// common.Log.Error("col %d has %d entries", x, n, t.column(x).asStrings())
+			return false, fmt.Sprintf("col %d has %d entries %s", x, n, t.column(x).asStrings())
+		}
+		count += n
+	}
+	for y := 0; y < h; y++ {
+		n := t.row(y).count()
+		if n < minOccRow {
+			// common.Log.Error("row %d has %d entries %s", y, n, t.row(y).asStrings())
+			return false, fmt.Sprintf("row %d has %d entries %s", y, n, t.row(y).asStrings())
+		}
+	}
+	occupancy := float64(count) / float64(w*h)
+	if occupancy < minOccR {
+		// common.Log.Error("table has %d of %d = %.2f entries", count, t.w*t.h, occupancy)
+		return false, fmt.Sprintf("table has %d of %d = %.2f entries", count, w*h, occupancy)
+	}
+	return true, ""
+}
+
+func (cells cellList) count() int {
+	n := 0
+	for _, c := range cells {
+		if c != nil {
+			n++
+		}
+	}
+	return n
+}
+
+func (cells cellList) getAlignedIndex(get getter, delta float64, targetCell *textPara) int {
+	for i, cell := range cells {
+		if parasAligned(get, delta, targetCell, cell) {
+			return i
+		}
+	}
+	return -1
+}
+
+func sortContents(get getter, reverse bool, cols []cellList) {
+	for _, cells := range cols {
+		sort.Slice(cells, func(i, j int) bool {
+			ci, cj := cells[i], cells[j]
+			if reverse {
+				return get(ci) > get(cj)
+			}
+			return get(ci) < get(cj)
+		})
+	}
+}
+
+// findGetterCandidates returns list of elements of `cells` that are within `delta` for attribute `get`.
+func (cells cellList) findGetterCandidates(get getter, deltaR float64, reverse bool) []cellList {
+	delta := cells.fontsize() * deltaR
+	xIndex := cells.makeIndex(getXLl)
+	var columns []cellList
+	addCol := func(col cellList) {
+		if len(col) > 1 {
+			columns = append(columns, col)
+		}
+	}
+	for i0, idx0 := range xIndex[:len(xIndex)-1] {
+		cell0 := cells[idx0]
+		col := cellList{cell0}
+		for _, idx := range xIndex[i0+1:] {
+			cell := cells[idx]
+			if getXLl(cell) > get(cell0)+delta {
+				addCol(col)
+				col = cellList{cell}
+			} else if parasAligned(get, delta, cell0, cell) {
+				col = append(col, cell)
+			}
+		}
+		addCol(col)
+	}
+	sort.Slice(columns, func(i, j int) bool {
+		ci, cj := columns[i], columns[j]
+		if len(ci) != len(cj) {
+			return len(ci) > len(cj)
+		}
+		if reverse {
+			return get(ci[0]) > get(cj[0])
+		}
+		return get(ci[0]) < get(cj[0])
+	})
+	return columns
+}
+
+func (cells cellList) equals(other cellList) bool {
+	if len(cells) != len(other) {
+		return false
+	}
+	for i, cell := range cells {
+		if other[i] != cell {
+			return false
+		}
+	}
+	return true
+}
+
+// makeIndex returns an indexes over cells on the `Llx` and `Ury `attributes.
+func (cells cellList) xyIndexes() ([]int, []int) {
+	xIndex := cells.makeIndex(getXLl)
+	yIndex := cells.makeIndex(getYUr)
+	return xIndex, yIndex
+}
+
+// makeIndex returns an index over cells on the `get` attributes.
+func (cells cellList) makeIndex(get getter) []int {
+	index := make([]int, len(cells))
+	for i := range cells {
+		index[i] = i
+	}
+	sort.Slice(index, func(i, j int) bool {
+		zi := get(cells[index[i]])
+		zj := get(cells[index[j]])
+		return zi < zj
+	})
+	return index
 }
 
 type getter func(*textPara) float64
 
 var (
+	// gettersX get the x-center, left and right of cells.
 	gettersX = []getter{getXCe, getXLl, getXUr}
+	// gettersX get the y-center, bottom and top of cells.
 	gettersY = []getter{getYCe, getYLl, getYUr}
 )
 
@@ -540,18 +933,55 @@ func getXUr(para *textPara) float64 { return para.Urx }
 func getYCe(para *textPara) float64 { return 0.5 * (para.Lly + para.Ury) }
 func getYLl(para *textPara) float64 { return para.Lly }
 func getYUr(para *textPara) float64 { return para.Ury }
+func getTop(para *textPara) float64 { return -para.Ury }
 
-func parasAligned(para1, para2 *textPara, delta float64, get func(*textPara) float64) bool {
-	z1 := get(para1)
-	z2 := get(para2)
-	return math.Abs(z1-z2) <= delta
+func (cells cellList) log(title string) {
+	paraList(cells).log(title)
 }
 
-// fontsize for a paraList is the minimum font size of the paras.
-func (paras cellList) fontsize() float64 {
-	size := paras[0].fontsize()
-	for _, p := range paras[1:] {
-		size = math.Min(size, p.fontsize())
+// logTables logs the contents of `tables`.
+func logTables(tables []*textTable, title string) {
+	if !verboseTable {
+		return
+	}
+	common.Log.Info("%8s: %d tables =======!!!!!!!!=====", title, len(tables))
+	for i, t := range tables {
+		t.log(fmt.Sprintf("%s-%02d", title, i))
 	}
-	return size
+}
+
+// log logs the contents of `table`.
+func (t *textTable) log(title string) {
+	if !verboseTable {
+		return
+	}
+	fmt.Printf("%4s[%dx%d] %s ++++++++++\n", "", t.w, t.h, title)
+	if t.w == 0 || t.h == 0 {
+		return
+	}
+	top := t.row(0)
+	left := t.column(0)
+	fmt.Printf("%8s top=%q\n", "", top.asStrings())
+	fmt.Printf("%8sleft=%q\n", "", left.asStrings())
+	// return
+	// common.Log.Info("%8s: %s: %2d x %2d %6.2f =======//////////=====\n"+
+	// 	"      %6.2f", title, fileLine(1, false),
+	// 	table.w, table.h, table.PdfRectangle, table.PdfRectangle)
+	// for i, p := range table.cells {
+	// 	if p == nil {
+	// 		continue
+	// 	}
+	// 	fmt.Printf("%4d: %6.2f %q\n", i, p.PdfRectangle, truncate(p.text(), 50))
+	// }
+}
+
+func (cells cellList) asStrings() []string {
+	n := minInt(5, len(cells))
+	parts := make([]string, n)
+	for i, cell := range cells[:n] {
+		if cell != nil {
+			parts[i] = truncate(cell.text(), 20)
+		}
+	}
+	return parts
 }
diff --git a/extractor/text_test.go b/extractor/text_test.go
index 20a9038f..131216f3 100644
--- a/extractor/text_test.go
+++ b/extractor/text_test.go
@@ -175,7 +175,7 @@ func TestTermMarksFiles(t *testing.T) {
 	if !doStress {
 		t.Skip("skipping stress test")
 	}
-	common.Log.Info("Running text stress tests. go test --short to skip these.")
+	common.Log.Info("Running text stress tests.")
 	if len(corpusFolder) == 0 && !forceTest {
 		t.Log("Corpus folder not set - skipping")
 		return
@@ -736,6 +736,11 @@ func testTermMarks(t *testing.T, text string, textMarks *TextMarkArray, n int) {
 			ofs1d = len(text)
 		}
 		show := fmt.Sprintf("<%s|%s|%s>", text[ofs0d:ofs0], text[ofs0:ofs1], text[ofs1:ofs1d])
+		{
+			show = fmt.Sprintf("%q", show)
+			runes := []rune(show)
+			show = string(runes[1 : len(runes)-1])
+		}
 
 		// Get TextMarks spanning `term` with RangeOffset().
 		spanArray, err := textMarks.RangeOffset(ofs0, ofs1)
@@ -783,6 +788,7 @@ func startWith(str, sub string) bool {
 		if strings.HasPrefix(str, sub[n:]) {
 			return true
 		}
+		// common.Log.Error("!startsWith: str=%q sub=%q sub[%d:]=%q", str, sub, n, sub[n:])
 	}
 	return false
 }
diff --git a/extractor/text_word.go b/extractor/text_word.go
index 2f61ded6..20db6d78 100644
--- a/extractor/text_word.go
+++ b/extractor/text_word.go
@@ -170,6 +170,19 @@ func (w *textWord) text() string {
 	return strings.Join(texts, "")
 }
 
+// toTextMarks returns the TextMarks contained in `w`.text().
+// `offset` is used to give the TextMarks the correct Offset values.
+func (w *textWord) toTextMarks(offset *int) []TextMark {
+	var marks []TextMark
+	for _, tm := range w.marks {
+		marks = appendTextMark(marks, offset, tm.ToTextMark())
+	}
+	if len(w.text()) > 0 && len(marks) == 0 {
+		panic(w.text())
+	}
+	return marks
+}
+
 // font returns the fontID of the `idx`th rune in text.
 // compute on creation? !@#$
 func (w *textWord) font(idx int) string {
diff --git a/internal/cmap/cmap.go b/internal/cmap/cmap.go
index 11b2c634..2729f934 100644
--- a/internal/cmap/cmap.go
+++ b/internal/cmap/cmap.go
@@ -22,7 +22,7 @@ const (
 	// MissingCodeRune replaces runes that can't be decoded. '\ufffd' = �. Was '?'.
 	MissingCodeRune = '\ufffd' // �
 
-	// MissingCodeRune replaces strings that can't be decoded.
+	// MissingCodeString replaces strings that can't be decoded.
 	MissingCodeString = string(MissingCodeRune)
 )
 
@@ -44,7 +44,7 @@ type charRange struct {
 type fbRange struct {
 	code0 CharCode
 	code1 CharCode
-	r0    rune // TODO (peterwilliams97): Change to string for compound codes.
+	r0    string
 }
 
 // CIDSystemInfo contains information for identifying the character collection
@@ -110,8 +110,7 @@ type CMap struct {
 
 	// Used by ctype 2 CMaps.
 	codeToUnicode map[CharCode]string // CID -> Unicode string
-	// XXXX(peterwilliams97): Should unicodeToCode be the inverse of codeToUnicode?
-	unicodeToCode map[rune]CharCode // Unicode rune -> CID
+	unicodeToCode map[string]CharCode // Unicode rune -> CID
 
 	// cached contains the raw CMap data. It is used by the Bytes method in
 	// order to avoid generating the data for every call.
@@ -137,10 +136,10 @@ func NewToUnicodeCMap(codeToRune map[CharCode]rune) *CMap {
 			Supplement: 0,
 		},
 		codespaces:    []Codespace{{Low: 0, High: 0xffff}},
-		codeToCID:     make(map[CharCode]CharCode),
-		cidToCode:     make(map[CharCode]CharCode),
 		codeToUnicode: codeToUnicode,
-		unicodeToCode: make(map[rune]CharCode),
+		unicodeToCode: make(map[string]CharCode, len(codeToRune)),
+		codeToCID:     make(map[CharCode]CharCode, len(codeToRune)),
+		cidToCode:     make(map[CharCode]CharCode, len(codeToRune)),
 	}
 
 	cmap.computeInverseMappings()
@@ -159,7 +158,7 @@ func newCMap(isSimple bool) *CMap {
 		codeToCID:     make(map[CharCode]CharCode),
 		cidToCode:     make(map[CharCode]CharCode),
 		codeToUnicode: make(map[CharCode]string),
-		unicodeToCode: make(map[rune]CharCode),
+		unicodeToCode: make(map[string]CharCode),
 	}
 }
 
@@ -265,13 +264,8 @@ func (cmap *CMap) computeInverseMappings() {
 
 	// Generate Unicode -> CID map.
 	for cid, s := range cmap.codeToUnicode {
-		// The CMap entries can be empty e.g. dobe_supplement_iso32000_1.pdf
-		if len(s) == 0 {
-			continue
-		}
-		r := rune0(s)
-		if c, ok := cmap.unicodeToCode[r]; !ok || (ok && c > cid) {
-			cmap.unicodeToCode[r] = cid
+		if c, ok := cmap.unicodeToCode[s]; !ok || (ok && c > cid) {
+			cmap.unicodeToCode[s] = cid
 		}
 	}
 
@@ -326,10 +320,10 @@ func (cmap *CMap) CharcodeToUnicode(code CharCode) (string, bool) {
 	return MissingCodeString, false
 }
 
-// RuneToCID maps the specified rune to a character identifier. If the provided
-// rune has no available mapping, the second return value is false.
-func (cmap *CMap) RuneToCID(r rune) (CharCode, bool) {
-	cid, ok := cmap.unicodeToCode[r]
+// StringToCID maps the specified string to a character identifier. If the provided
+// string has no available mapping, the bool return value is false.
+func (cmap *CMap) StringToCID(s string) (CharCode, bool) {
+	cid, ok := cmap.unicodeToCode[s]
 	return cid, ok
 }
 
@@ -484,10 +478,10 @@ func (cmap *CMap) toBfData() string {
 	// character codes have been mapped to code ranges.
 	var charRanges []charRange
 	currCharRange := charRange{codes[0], codes[0]}
-	prevRune := rune0(cmap.codeToUnicode[codes[0]])
+	prevRune := cmap.codeToUnicode[codes[0]]
 	for _, c := range codes[1:] {
-		currRune := rune0(cmap.codeToUnicode[c])
-		if c == currCharRange.code1+1 && currRune == prevRune+1 {
+		currRune := cmap.codeToUnicode[c]
+		if c == currCharRange.code1+1 && lastRune(currRune) == lastRune(prevRune)+1 {
 			currCharRange.code1 = c
 		} else {
 			charRanges = append(charRanges, currCharRange)
@@ -507,7 +501,7 @@ func (cmap *CMap) toBfData() string {
 			fbRanges = append(fbRanges, fbRange{
 				code0: cr.code0,
 				code1: cr.code1,
-				r0:    rune0(cmap.codeToUnicode[cr.code0]),
+				r0:    cmap.codeToUnicode[cr.code0],
 			})
 		}
 	}
@@ -522,8 +516,8 @@ func (cmap *CMap) toBfData() string {
 			lines = append(lines, fmt.Sprintf("%d beginbfchar", n))
 			for j := 0; j < n; j++ {
 				code := fbChars[i*maxBfEntries+j]
-				r := rune0(cmap.codeToUnicode[code])
-				lines = append(lines, fmt.Sprintf("<%04x> <%04x>", code, r))
+				s := cmap.codeToUnicode[code]
+				lines = append(lines, fmt.Sprintf("<%04x> %s", code, hexCode(s)))
 			}
 			lines = append(lines, "endbfchar")
 		}
@@ -535,8 +529,8 @@ func (cmap *CMap) toBfData() string {
 			lines = append(lines, fmt.Sprintf("%d beginbfrange", n))
 			for j := 0; j < n; j++ {
 				rng := fbRanges[i*maxBfEntries+j]
-				r := rng.r0
-				lines = append(lines, fmt.Sprintf("<%04x><%04x> <%04x>", rng.code0, rng.code1, r))
+				lines = append(lines, fmt.Sprintf("<%04x><%04x> %s",
+					rng.code0, rng.code1, hexCode(rng.r0)))
 			}
 			lines = append(lines, "endbfrange")
 		}
@@ -544,6 +538,22 @@ func (cmap *CMap) toBfData() string {
 	return strings.Join(lines, "\n")
 }
 
+// lastRune returns the last rune in `s`.
+func lastRune(s string) rune {
+	runes := []rune(s)
+	return runes[len(runes)-1]
+}
+
+// hexCode return the CMap hex code for `s`.
+func hexCode(s string) string {
+	runes := []rune(s)
+	codes := make([]string, len(runes))
+	for i, r := range runes {
+		codes[i] = fmt.Sprintf("%04x", r)
+	}
+	return fmt.Sprintf("<%s>", strings.Join(codes, ""))
+}
+
 const (
 	maxBfEntries = 100 // Maximum number of entries in a bfchar or bfrange section.
 	cmapHeader   = `
@@ -563,9 +573,3 @@ end
 end
 `
 )
-
-// rune0 is a convenience function that returns the first rune in `s`.
-// Caller must check that `s` is not empty.
-func rune0(s string) rune {
-	return ([]rune(s))[0]
-}
diff --git a/internal/cmap/cmap_parser.go b/internal/cmap/cmap_parser.go
index a160f32c..7ee40ee2 100644
--- a/internal/cmap/cmap_parser.go
+++ b/internal/cmap/cmap_parser.go
@@ -105,7 +105,7 @@ func (cmap *CMap) parse() error {
 func (cmap *CMap) parseName() error {
 	name := ""
 	done := false
-	// /Users/peter/testdata/programming/pdf_text/columns/Berg.pdf
+	// NOTE(peterwilliams97): We need up to 20 iterations of this loop for some PDFs I have seen.
 	for i := 0; i < 20 && !done; i++ {
 		o, err := cmap.parseObject()
 		if err != nil {
diff --git a/internal/textencoding/cmap.go b/internal/textencoding/cmap.go
index 56b24c74..e727ab56 100644
--- a/internal/textencoding/cmap.go
+++ b/internal/textencoding/cmap.go
@@ -67,7 +67,7 @@ func (enc CMapEncoder) RuneToCharcode(r rune) (CharCode, bool) {
 	}
 
 	// Map rune to CID.
-	cid, ok := enc.cidToUnicode.RuneToCID(r)
+	cid, ok := enc.cidToUnicode.StringToCID(string(r))
 	if !ok {
 		return 0, false
 	}
diff --git a/internal/textencoding/glyphs_glyphlist.go b/internal/textencoding/glyphs_glyphlist.go
index 2567675f..0a8db594 100644
--- a/internal/textencoding/glyphs_glyphlist.go
+++ b/internal/textencoding/glyphs_glyphlist.go
@@ -23,7 +23,7 @@ const (
 	// MissingCodeRune replaces runes that can't be decoded. .
 	MissingCodeRune = '\ufffd' // �
 
-	// MissingCodeRune replaces strings that can't be decoded.
+	// MissingCodeString replaces strings that can't be decoded.
 	MissingCodeString = string(MissingCodeRune)
 )
 
diff --git a/model/font.go b/model/font.go
index c1a9b609..a676845d 100644
--- a/model/font.go
+++ b/model/font.go
@@ -421,31 +421,26 @@ func (font *PdfFont) BytesToCharcodes(data []byte) []textencoding.CharCode {
 	return charcodes
 }
 
-// CharcodesToUnicodeWithStats is identical to CharcodesToUnicode except returns more statistical
+// CharcodesToUnicodeWithStats is identical to CharcodesToUnicode except it returns more statistical
 // information about hits and misses from the reverse mapping process.
 // NOTE: The number of runes returned may be greater than the number of charcodes.
-// TODO(peterwilliams97): Deprecate?
+// TODO(peterwilliams97): Deprecate in v4 and use only CharcodesToStrings()
 func (font *PdfFont) CharcodesToUnicodeWithStats(charcodes []textencoding.CharCode) (runelist []rune, numHits, numMisses int) {
-	runeSlices, numHits, numMisses := font.CharcodesToRuneSlices(charcodes)
-	var runes []rune
-	for _, r := range runeSlices {
-		runes = append(runes, r...)
-	}
-	return runes, numHits, numMisses
+	texts, numHits, numMisses := font.CharcodesToStrings(charcodes)
+	return []rune(strings.Join(texts, "")), numHits, numMisses
 }
 
-// CharcodesToRuneSlices returns the unicode strings corresponding to `charcodes` as rune slices.
-// The int return is the number of unconvereted codes.
-// NOTE: The number of rune slices returned is equal to the number of charcodes
-func (font *PdfFont) CharcodesToRuneSlices(charcodes []textencoding.CharCode) ([][]rune, int, int) {
+// CharcodesToStrings returns the unicode strings corresponding to `charcodes`.
+// The int returns are the number of strings and the number of unconvereted codes.
+// NOTE: The number of strings returned is equal to the number of charcodes
+func (font *PdfFont) CharcodesToStrings(charcodes []textencoding.CharCode) ([]string, int, int) {
 	fontBase := font.baseFields()
-	runeSlices := make([][]rune, 0, len(charcodes))
+	texts := make([]string, 0, len(charcodes))
 	numMisses := 0
 	for _, code := range charcodes {
 		if fontBase.toUnicodeCmap != nil {
 			if s, ok := fontBase.toUnicodeCmap.CharcodeToUnicode(cmap.CharCode(code)); ok {
-				runeSlices = append(runeSlices, []rune(s))
-				// common.Log.Info("CharcodesToRuneSlices1: code=%d s=`%s`", code, s)
+				texts = append(texts, s)
 				continue
 			}
 		}
@@ -454,9 +449,7 @@ func (font *PdfFont) CharcodesToRuneSlices(charcodes []textencoding.CharCode) ([
 		encoder := font.Encoder()
 		if encoder != nil {
 			if r, ok := encoder.CharcodeToRune(code); ok {
-				runeSlices = append(runeSlices, []rune{r})
-				// common.Log.Info("CharcodesToRuneSlices2: code=%d s=%q encoder=%s",
-				// 	code, string(r), encoder.String())
+				texts = append(texts, string(r))
 				continue
 			}
 		}
@@ -465,7 +458,7 @@ func (font *PdfFont) CharcodesToRuneSlices(charcodes []textencoding.CharCode) ([
 			"\tfont=%s\n\tencoding=%s",
 			code, charcodes, fontBase.isCIDFont(), font, encoder)
 		numMisses++
-		runeSlices = append(runeSlices, []rune{cmap.MissingCodeRune})
+		texts = append(texts, cmap.MissingCodeString)
 	}
 
 	if numMisses != 0 {
@@ -475,7 +468,7 @@ func (font *PdfFont) CharcodesToRuneSlices(charcodes []textencoding.CharCode) ([
 			len(charcodes), numMisses, font)
 	}
 
-	return runeSlices, len(runeSlices), numMisses
+	return texts, len(texts), numMisses
 }
 
 // CharcodeBytesToUnicode converts PDF character codes `data` to a Go unicode string.
@@ -499,8 +492,8 @@ func (font *PdfFont) CharcodeBytesToUnicode(data []byte) (string, int, int) {
 //  1) Use the ToUnicode CMap if there is one.
 //  2) Use the underlying font's encoding.
 func (font *PdfFont) CharcodesToUnicode(charcodes []textencoding.CharCode) []rune {
-	strlist, _, _ := font.CharcodesToUnicodeWithStats(charcodes)
-	return strlist
+	runes, _, _ := font.CharcodesToUnicodeWithStats(charcodes)
+	return runes
 }
 
 // RunesToCharcodeBytes maps the provided runes to charcode bytes and it